1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 */
27
28 /*
29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 * All rights reserved.
31 */
32
33 /*
34 * Copyright 2018 Nexenta Systems, Inc.
35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 */
37
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/buf.h>
43 #include <sys/vfs.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/stat.h>
47 #include <sys/errno.h>
48 #include <sys/sysmacros.h>
49 #include <sys/statvfs.h>
50 #include <sys/kmem.h>
51 #include <sys/kstat.h>
52 #include <sys/dirent.h>
53 #include <sys/cmn_err.h>
54 #include <sys/debug.h>
55 #include <sys/vtrace.h>
56 #include <sys/mode.h>
57 #include <sys/acl.h>
58 #include <sys/nbmlock.h>
59 #include <sys/policy.h>
60 #include <sys/sdt.h>
61
62 #include <rpc/types.h>
63 #include <rpc/auth.h>
64 #include <rpc/svc.h>
65
66 #include <nfs/nfs.h>
67 #include <nfs/export.h>
68 #include <nfs/nfs_cmd.h>
69
70 #include <vm/hat.h>
71 #include <vm/as.h>
72 #include <vm/seg.h>
73 #include <vm/seg_map.h>
74 #include <vm/seg_kmem.h>
75
76 #include <sys/strsubr.h>
77
78 struct rfs_async_write_list;
79
80 /*
81 * Zone globals of NFSv2 server
82 */
83 typedef struct nfs_srv {
84 kmutex_t async_write_lock;
85 struct rfs_async_write_list *async_write_head;
86
87 /*
88 * enables write clustering if == 1
89 */
90 int write_async;
91 } nfs_srv_t;
92
93 /*
94 * These are the interface routines for the server side of the
95 * Network File System. See the NFS version 2 protocol specification
96 * for a description of this interface.
97 */
98
99 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
101 cred_t *);
102
103
104 /*
105 * Some "over the wire" UNIX file types. These are encoded
106 * into the mode. This needs to be fixed in the next rev.
107 */
108 #define IFMT 0170000 /* type of file */
109 #define IFCHR 0020000 /* character special */
110 #define IFBLK 0060000 /* block special */
111 #define IFSOCK 0140000 /* socket */
112
113 u_longlong_t nfs2_srv_caller_id;
114
115 static nfs_srv_t *
116 nfs_get_srv(void)
117 {
118 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
119 nfs_srv_t *srv = ng->nfs_srv;
120 ASSERT(srv != NULL);
121 return (srv);
122 }
123
124 /*
125 * Get file attributes.
126 * Returns the current attributes of the file with the given fhandle.
127 */
128 /* ARGSUSED */
129 void
130 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
131 struct svc_req *req, cred_t *cr, bool_t ro)
132 {
133 int error;
134 vnode_t *vp;
135 struct vattr va;
136
137 vp = nfs_fhtovp(fhp, exi);
138 if (vp == NULL) {
139 ns->ns_status = NFSERR_STALE;
140 return;
141 }
142
143 /*
144 * Do the getattr.
145 */
146 va.va_mask = AT_ALL; /* we want all the attributes */
147
148 error = rfs4_delegated_getattr(vp, &va, 0, cr);
149
150 /* check for overflows */
151 if (!error) {
152 /* Lie about the object type for a referral */
153 if (vn_is_nfs_reparse(vp, cr))
154 va.va_type = VLNK;
155
156 acl_perm(vp, exi, &va, cr);
157 error = vattr_to_nattr(&va, &ns->ns_attr);
158 }
159
160 VN_RELE(vp);
161
162 ns->ns_status = puterrno(error);
163 }
164 void *
165 rfs_getattr_getfh(fhandle_t *fhp)
166 {
167 return (fhp);
168 }
169
170 /*
171 * Set file attributes.
172 * Sets the attributes of the file with the given fhandle. Returns
173 * the new attributes.
174 */
175 /* ARGSUSED */
176 void
177 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
178 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
179 {
180 int error;
181 int flag;
182 int in_crit = 0;
183 vnode_t *vp;
184 struct vattr va;
185 struct vattr bva;
186 struct flock64 bf;
187 caller_context_t ct;
188
189
190 vp = nfs_fhtovp(&args->saa_fh, exi);
191 if (vp == NULL) {
192 ns->ns_status = NFSERR_STALE;
193 return;
194 }
195
196 if (rdonly(ro, vp)) {
197 VN_RELE(vp);
198 ns->ns_status = NFSERR_ROFS;
199 return;
200 }
201
202 error = sattr_to_vattr(&args->saa_sa, &va);
203 if (error) {
204 VN_RELE(vp);
205 ns->ns_status = puterrno(error);
206 return;
207 }
208
209 /*
210 * If the client is requesting a change to the mtime,
211 * but the nanosecond field is set to 1 billion, then
212 * this is a flag to the server that it should set the
213 * atime and mtime fields to the server's current time.
214 * The 1 billion number actually came from the client
215 * as 1 million, but the units in the over the wire
216 * request are microseconds instead of nanoseconds.
217 *
218 * This is an overload of the protocol and should be
219 * documented in the NFS Version 2 protocol specification.
220 */
221 if (va.va_mask & AT_MTIME) {
222 if (va.va_mtime.tv_nsec == 1000000000) {
223 gethrestime(&va.va_mtime);
224 va.va_atime = va.va_mtime;
225 va.va_mask |= AT_ATIME;
226 flag = 0;
227 } else
228 flag = ATTR_UTIME;
229 } else
230 flag = 0;
231
232 /*
233 * If the filesystem is exported with nosuid, then mask off
234 * the setuid and setgid bits.
235 */
236 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
237 (exi->exi_export.ex_flags & EX_NOSUID))
238 va.va_mode &= ~(VSUID | VSGID);
239
240 ct.cc_sysid = 0;
241 ct.cc_pid = 0;
242 ct.cc_caller_id = nfs2_srv_caller_id;
243 ct.cc_flags = CC_DONTBLOCK;
244
245 /*
246 * We need to specially handle size changes because it is
247 * possible for the client to create a file with modes
248 * which indicate read-only, but with the file opened for
249 * writing. If the client then tries to set the size of
250 * the file, then the normal access checking done in
251 * VOP_SETATTR would prevent the client from doing so,
252 * although it should be legal for it to do so. To get
253 * around this, we do the access checking for ourselves
254 * and then use VOP_SPACE which doesn't do the access
255 * checking which VOP_SETATTR does. VOP_SPACE can only
256 * operate on VREG files, let VOP_SETATTR handle the other
257 * extremely rare cases.
258 * Also the client should not be allowed to change the
259 * size of the file if there is a conflicting non-blocking
260 * mandatory lock in the region of change.
261 */
262 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
263 if (nbl_need_check(vp)) {
264 nbl_start_crit(vp, RW_READER);
265 in_crit = 1;
266 }
267
268 bva.va_mask = AT_UID | AT_SIZE;
269
270 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
271
272 if (error) {
273 if (in_crit)
274 nbl_end_crit(vp);
275 VN_RELE(vp);
276 ns->ns_status = puterrno(error);
277 return;
278 }
279
280 if (in_crit) {
281 u_offset_t offset;
282 ssize_t length;
283
284 if (va.va_size < bva.va_size) {
285 offset = va.va_size;
286 length = bva.va_size - va.va_size;
287 } else {
288 offset = bva.va_size;
289 length = va.va_size - bva.va_size;
290 }
291 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
292 NULL)) {
293 error = EACCES;
294 }
295 }
296
297 if (crgetuid(cr) == bva.va_uid && !error &&
298 va.va_size != bva.va_size) {
299 va.va_mask &= ~AT_SIZE;
300 bf.l_type = F_WRLCK;
301 bf.l_whence = 0;
302 bf.l_start = (off64_t)va.va_size;
303 bf.l_len = 0;
304 bf.l_sysid = 0;
305 bf.l_pid = 0;
306
307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
308 (offset_t)va.va_size, cr, &ct);
309 }
310 if (in_crit)
311 nbl_end_crit(vp);
312 } else
313 error = 0;
314
315 /*
316 * Do the setattr.
317 */
318 if (!error && va.va_mask) {
319 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
320 }
321
322 /*
323 * check if the monitor on either vop_space or vop_setattr detected
324 * a delegation conflict and if so, mark the thread flag as
325 * wouldblock so that the response is dropped and the client will
326 * try again.
327 */
328 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
329 VN_RELE(vp);
330 curthread->t_flag |= T_WOULDBLOCK;
331 return;
332 }
333
334 if (!error) {
335 va.va_mask = AT_ALL; /* get everything */
336
337 error = rfs4_delegated_getattr(vp, &va, 0, cr);
338
339 /* check for overflows */
340 if (!error) {
341 acl_perm(vp, exi, &va, cr);
342 error = vattr_to_nattr(&va, &ns->ns_attr);
343 }
344 }
345
346 ct.cc_flags = 0;
347
348 /*
349 * Force modified metadata out to stable storage.
350 */
351 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
352
353 VN_RELE(vp);
354
355 ns->ns_status = puterrno(error);
356 }
357 void *
358 rfs_setattr_getfh(struct nfssaargs *args)
359 {
360 return (&args->saa_fh);
361 }
362
363 /* Change and release @exip and @vpp only in success */
364 int
365 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
366 {
367 struct exportinfo *exi;
368 vnode_t *vp = *vpp;
369 fid_t fid;
370 int error;
371
372 VN_HOLD(vp);
373
374 if ((error = traverse(&vp)) != 0) {
375 VN_RELE(vp);
376 return (error);
377 }
378
379 bzero(&fid, sizeof (fid));
380 fid.fid_len = MAXFIDSZ;
381 error = VOP_FID(vp, &fid, NULL);
382 if (error) {
383 VN_RELE(vp);
384 return (error);
385 }
386
387 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
388 if (exi == NULL ||
389 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
390 /*
391 * It is not error, just subdir is not exported
392 * or "nohide" is not set
393 */
394 if (exi != NULL)
395 exi_rele(exi);
396 VN_RELE(vp);
397 } else {
398 /* go to submount */
399 exi_rele(*exip);
400 *exip = exi;
401
402 VN_RELE(*vpp);
403 *vpp = vp;
404 }
405
406 return (0);
407 }
408
409 /*
410 * Given mounted "dvp" and "exi", go upper mountpoint
411 * with dvp/exi correction
412 * Return 0 in success
413 */
414 int
415 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
416 {
417 struct exportinfo *exi;
418 vnode_t *dvp = *dvpp;
419 vnode_t *zone_rootvp;
420
421 zone_rootvp = (*exip)->exi_ne->exi_root->exi_vp;
422 ASSERT((dvp->v_flag & VROOT) || VN_CMP(zone_rootvp, dvp));
423
424 VN_HOLD(dvp);
425 dvp = untraverse(dvp, zone_rootvp);
426 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
427 if (exi == NULL) {
428 VN_RELE(dvp);
429 return (-1);
430 }
431
432 ASSERT3U(exi->exi_zoneid, ==, (*exip)->exi_zoneid);
433 exi_rele(*exip);
434 *exip = exi;
435 VN_RELE(*dvpp);
436 *dvpp = dvp;
437
438 return (0);
439 }
440 /*
441 * Directory lookup.
442 * Returns an fhandle and file attributes for file name in a directory.
443 */
444 /* ARGSUSED */
445 void
446 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
447 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
448 {
449 int error;
450 vnode_t *dvp;
451 vnode_t *vp;
452 struct vattr va;
453 fhandle_t *fhp = da->da_fhandle;
454 struct sec_ol sec = {0, 0};
455 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
456 char *name;
457 struct sockaddr *ca;
458
459 /*
460 * Trusted Extension doesn't support NFSv2. MOUNT
461 * will reject v2 clients. Need to prevent v2 client
462 * access via WebNFS here.
463 */
464 if (is_system_labeled() && req->rq_vers == 2) {
465 dr->dr_status = NFSERR_ACCES;
466 return;
467 }
468
469 /*
470 * Disallow NULL paths
471 */
472 if (da->da_name == NULL || *da->da_name == '\0') {
473 dr->dr_status = NFSERR_ACCES;
474 return;
475 }
476
477 /*
478 * Allow lookups from the root - the default
479 * location of the public filehandle.
480 */
481 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
482 dvp = ZONE_ROOTVP();
483 VN_HOLD(dvp);
484 } else {
485 dvp = nfs_fhtovp(fhp, exi);
486 if (dvp == NULL) {
487 dr->dr_status = NFSERR_STALE;
488 return;
489 }
490 }
491
492 exi_hold(exi);
493 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
494
495 /*
496 * Not allow lookup beyond root.
497 * If the filehandle matches a filehandle of the exi,
498 * then the ".." refers beyond the root of an exported filesystem.
499 */
500 if (strcmp(da->da_name, "..") == 0 &&
501 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
502 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
503 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
504 /*
505 * special case for ".." and 'nohide'exported root
506 */
507 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
508 error = NFSERR_ACCES;
509 goto out;
510 }
511 } else {
512 error = NFSERR_NOENT;
513 goto out;
514 }
515 }
516
517 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
518 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
519 MAXPATHLEN);
520
521 if (name == NULL) {
522 error = NFSERR_ACCES;
523 goto out;
524 }
525
526 /*
527 * If the public filehandle is used then allow
528 * a multi-component lookup, i.e. evaluate
529 * a pathname and follow symbolic links if
530 * necessary.
531 *
532 * This may result in a vnode in another filesystem
533 * which is OK as long as the filesystem is exported.
534 */
535 if (PUBLIC_FH2(fhp)) {
536 publicfh_flag = TRUE;
537
538 exi_rele(exi);
539 exi = NULL;
540
541 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
542 &sec);
543 } else {
544 /*
545 * Do a normal single component lookup.
546 */
547 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
548 NULL, NULL, NULL);
549 }
550
551 if (name != da->da_name)
552 kmem_free(name, MAXPATHLEN);
553
554 if (error == 0 && vn_ismntpt(vp)) {
555 error = rfs_cross_mnt(&vp, &exi);
556 if (error)
557 VN_RELE(vp);
558 }
559
560 if (!error) {
561 va.va_mask = AT_ALL; /* we want everything */
562
563 error = rfs4_delegated_getattr(vp, &va, 0, cr);
564
565 /* check for overflows */
566 if (!error) {
567 acl_perm(vp, exi, &va, cr);
568 error = vattr_to_nattr(&va, &dr->dr_attr);
569 if (!error) {
570 if (sec.sec_flags & SEC_QUERY)
571 error = makefh_ol(&dr->dr_fhandle, exi,
572 sec.sec_index);
573 else {
574 error = makefh(&dr->dr_fhandle, vp,
575 exi);
576 if (!error && publicfh_flag &&
577 !chk_clnt_sec(exi, req))
578 auth_weak = TRUE;
579 }
580 }
581 }
582 VN_RELE(vp);
583 }
584
585 out:
586 VN_RELE(dvp);
587
588 if (exi != NULL)
589 exi_rele(exi);
590
591 /*
592 * If it's public fh, no 0x81, and client's flavor is
593 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
594 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
595 */
596 if (auth_weak)
597 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
598 else
599 dr->dr_status = puterrno(error);
600 }
601 void *
602 rfs_lookup_getfh(struct nfsdiropargs *da)
603 {
604 return (da->da_fhandle);
605 }
606
607 /*
608 * Read symbolic link.
609 * Returns the string in the symbolic link at the given fhandle.
610 */
611 /* ARGSUSED */
612 void
613 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
614 struct svc_req *req, cred_t *cr, bool_t ro)
615 {
616 int error;
617 struct iovec iov;
618 struct uio uio;
619 vnode_t *vp;
620 struct vattr va;
621 struct sockaddr *ca;
622 char *name = NULL;
623 int is_referral = 0;
624
625 vp = nfs_fhtovp(fhp, exi);
626 if (vp == NULL) {
627 rl->rl_data = NULL;
628 rl->rl_status = NFSERR_STALE;
629 return;
630 }
631
632 va.va_mask = AT_MODE;
633
634 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
635
636 if (error) {
637 VN_RELE(vp);
638 rl->rl_data = NULL;
639 rl->rl_status = puterrno(error);
640 return;
641 }
642
643 if (MANDLOCK(vp, va.va_mode)) {
644 VN_RELE(vp);
645 rl->rl_data = NULL;
646 rl->rl_status = NFSERR_ACCES;
647 return;
648 }
649
650 /* We lied about the object type for a referral */
651 if (vn_is_nfs_reparse(vp, cr))
652 is_referral = 1;
653
654 /*
655 * XNFS and RFC1094 require us to return ENXIO if argument
656 * is not a link. BUGID 1138002.
657 */
658 if (vp->v_type != VLNK && !is_referral) {
659 VN_RELE(vp);
660 rl->rl_data = NULL;
661 rl->rl_status = NFSERR_NXIO;
662 return;
663 }
664
665 /*
666 * Allocate data for pathname. This will be freed by rfs_rlfree.
667 */
668 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
669
670 if (is_referral) {
671 char *s;
672 size_t strsz;
673 kstat_named_t *stat =
674 exi->exi_ne->ne_globals->svstat[NFS_VERSION];
675
676 /* Get an artificial symlink based on a referral */
677 s = build_symlink(vp, cr, &strsz);
678 stat[NFS_REFERLINKS].value.ui64++;
679 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
680 vnode_t *, vp, char *, s);
681 if (s == NULL)
682 error = EINVAL;
683 else {
684 error = 0;
685 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
686 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
687 kmem_free(s, strsz);
688 }
689
690 } else {
691
692 /*
693 * Set up io vector to read sym link data
694 */
695 iov.iov_base = rl->rl_data;
696 iov.iov_len = NFS_MAXPATHLEN;
697 uio.uio_iov = &iov;
698 uio.uio_iovcnt = 1;
699 uio.uio_segflg = UIO_SYSSPACE;
700 uio.uio_extflg = UIO_COPY_CACHED;
701 uio.uio_loffset = (offset_t)0;
702 uio.uio_resid = NFS_MAXPATHLEN;
703
704 /*
705 * Do the readlink.
706 */
707 error = VOP_READLINK(vp, &uio, cr, NULL);
708
709 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
710
711 if (!error)
712 rl->rl_data[rl->rl_count] = '\0';
713
714 }
715
716
717 VN_RELE(vp);
718
719 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
720 name = nfscmd_convname(ca, exi, rl->rl_data,
721 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
722
723 if (name != NULL && name != rl->rl_data) {
724 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
725 rl->rl_data = name;
726 }
727
728 /*
729 * XNFS and RFC1094 require us to return ENXIO if argument
730 * is not a link. UFS returns EINVAL if this is the case,
731 * so we do the mapping here. BUGID 1138002.
732 */
733 if (error == EINVAL)
734 rl->rl_status = NFSERR_NXIO;
735 else
736 rl->rl_status = puterrno(error);
737
738 }
739 void *
740 rfs_readlink_getfh(fhandle_t *fhp)
741 {
742 return (fhp);
743 }
744 /*
745 * Free data allocated by rfs_readlink
746 */
747 void
748 rfs_rlfree(struct nfsrdlnres *rl)
749 {
750 if (rl->rl_data != NULL)
751 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
752 }
753
754 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
755
756 /*
757 * Read data.
758 * Returns some data read from the file at the given fhandle.
759 */
760 /* ARGSUSED */
761 void
762 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
763 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
764 {
765 vnode_t *vp;
766 int error;
767 struct vattr va;
768 struct iovec iov;
769 struct uio uio;
770 mblk_t *mp;
771 int alloc_err = 0;
772 int in_crit = 0;
773 caller_context_t ct;
774
775 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
776 if (vp == NULL) {
777 rr->rr_data = NULL;
778 rr->rr_status = NFSERR_STALE;
779 return;
780 }
781
782 if (vp->v_type != VREG) {
783 VN_RELE(vp);
784 rr->rr_data = NULL;
785 rr->rr_status = NFSERR_ISDIR;
786 return;
787 }
788
789 ct.cc_sysid = 0;
790 ct.cc_pid = 0;
791 ct.cc_caller_id = nfs2_srv_caller_id;
792 ct.cc_flags = CC_DONTBLOCK;
793
794 /*
795 * Enter the critical region before calling VOP_RWLOCK
796 * to avoid a deadlock with write requests.
797 */
798 if (nbl_need_check(vp)) {
799 nbl_start_crit(vp, RW_READER);
800 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
801 0, NULL)) {
802 nbl_end_crit(vp);
803 VN_RELE(vp);
804 rr->rr_data = NULL;
805 rr->rr_status = NFSERR_ACCES;
806 return;
807 }
808 in_crit = 1;
809 }
810
811 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
812
813 /* check if a monitor detected a delegation conflict */
814 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
815 if (in_crit)
816 nbl_end_crit(vp);
817 VN_RELE(vp);
818 /* mark as wouldblock so response is dropped */
819 curthread->t_flag |= T_WOULDBLOCK;
820
821 rr->rr_data = NULL;
822 return;
823 }
824
825 va.va_mask = AT_ALL;
826
827 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
828
829 if (error) {
830 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
831 if (in_crit)
832 nbl_end_crit(vp);
833
834 VN_RELE(vp);
835 rr->rr_data = NULL;
836 rr->rr_status = puterrno(error);
837
838 return;
839 }
840
841 /*
842 * This is a kludge to allow reading of files created
843 * with no read permission. The owner of the file
844 * is always allowed to read it.
845 */
846 if (crgetuid(cr) != va.va_uid) {
847 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
848
849 if (error) {
850 /*
851 * Exec is the same as read over the net because
852 * of demand loading.
853 */
854 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
855 }
856 if (error) {
857 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
858 if (in_crit)
859 nbl_end_crit(vp);
860 VN_RELE(vp);
861 rr->rr_data = NULL;
862 rr->rr_status = puterrno(error);
863
864 return;
865 }
866 }
867
868 if (MANDLOCK(vp, va.va_mode)) {
869 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
870 if (in_crit)
871 nbl_end_crit(vp);
872
873 VN_RELE(vp);
874 rr->rr_data = NULL;
875 rr->rr_status = NFSERR_ACCES;
876
877 return;
878 }
879
880 rr->rr_ok.rrok_wlist_len = 0;
881 rr->rr_ok.rrok_wlist = NULL;
882
883 if ((u_offset_t)ra->ra_offset >= va.va_size) {
884 rr->rr_count = 0;
885 rr->rr_data = NULL;
886 /*
887 * In this case, status is NFS_OK, but there is no data
888 * to encode. So set rr_mp to NULL.
889 */
890 rr->rr_mp = NULL;
891 rr->rr_ok.rrok_wlist = ra->ra_wlist;
892 if (rr->rr_ok.rrok_wlist)
893 clist_zero_len(rr->rr_ok.rrok_wlist);
894 goto done;
895 }
896
897 if (ra->ra_wlist) {
898 mp = NULL;
899 rr->rr_mp = NULL;
900 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
901 if (ra->ra_count > iov.iov_len) {
902 rr->rr_data = NULL;
903 rr->rr_status = NFSERR_INVAL;
904 goto done;
905 }
906 } else {
907 /*
908 * mp will contain the data to be sent out in the read reply.
909 * This will be freed after the reply has been sent out (by the
910 * driver).
911 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
912 * that the call to xdrmblk_putmblk() never fails.
913 */
914 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
915 &alloc_err);
916 ASSERT(mp != NULL);
917 ASSERT(alloc_err == 0);
918
919 rr->rr_mp = mp;
920
921 /*
922 * Set up io vector
923 */
924 iov.iov_base = (caddr_t)mp->b_datap->db_base;
925 iov.iov_len = ra->ra_count;
926 }
927
928 uio.uio_iov = &iov;
929 uio.uio_iovcnt = 1;
930 uio.uio_segflg = UIO_SYSSPACE;
931 uio.uio_extflg = UIO_COPY_CACHED;
932 uio.uio_loffset = (offset_t)ra->ra_offset;
933 uio.uio_resid = ra->ra_count;
934
935 error = VOP_READ(vp, &uio, 0, cr, &ct);
936
937 if (error) {
938 if (mp)
939 freeb(mp);
940
941 /*
942 * check if a monitor detected a delegation conflict and
943 * mark as wouldblock so response is dropped
944 */
945 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
946 curthread->t_flag |= T_WOULDBLOCK;
947 else
948 rr->rr_status = puterrno(error);
949
950 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
951 if (in_crit)
952 nbl_end_crit(vp);
953
954 VN_RELE(vp);
955 rr->rr_data = NULL;
956
957 return;
958 }
959
960 /*
961 * Get attributes again so we can send the latest access
962 * time to the client side for its cache.
963 */
964 va.va_mask = AT_ALL;
965
966 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
967
968 if (error) {
969 if (mp)
970 freeb(mp);
971
972 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
973 if (in_crit)
974 nbl_end_crit(vp);
975
976 VN_RELE(vp);
977 rr->rr_data = NULL;
978 rr->rr_status = puterrno(error);
979
980 return;
981 }
982
983 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
984
985 if (mp) {
986 rr->rr_data = (char *)mp->b_datap->db_base;
987 } else {
988 if (ra->ra_wlist) {
989 rr->rr_data = (caddr_t)iov.iov_base;
990 if (!rdma_setup_read_data2(ra, rr)) {
991 rr->rr_data = NULL;
992 rr->rr_status = puterrno(NFSERR_INVAL);
993 }
994 }
995 }
996 done:
997 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
998 if (in_crit)
999 nbl_end_crit(vp);
1000
1001 acl_perm(vp, exi, &va, cr);
1002
1003 /* check for overflows */
1004 error = vattr_to_nattr(&va, &rr->rr_attr);
1005
1006 VN_RELE(vp);
1007
1008 rr->rr_status = puterrno(error);
1009 }
1010
1011 /*
1012 * Free data allocated by rfs_read
1013 */
1014 void
1015 rfs_rdfree(struct nfsrdresult *rr)
1016 {
1017 mblk_t *mp;
1018
1019 if (rr->rr_status == NFS_OK) {
1020 mp = rr->rr_mp;
1021 if (mp != NULL)
1022 freeb(mp);
1023 }
1024 }
1025
1026 void *
1027 rfs_read_getfh(struct nfsreadargs *ra)
1028 {
1029 return (&ra->ra_fhandle);
1030 }
1031
1032 #define MAX_IOVECS 12
1033
1034 #ifdef DEBUG
1035 static int rfs_write_sync_hits = 0;
1036 static int rfs_write_sync_misses = 0;
1037 #endif
1038
1039 /*
1040 * Write data to file.
1041 * Returns attributes of a file after writing some data to it.
1042 *
1043 * Any changes made here, especially in error handling might have
1044 * to also be done in rfs_write (which clusters write requests).
1045 */
1046 /* ARGSUSED */
1047 void
1048 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1049 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1050 {
1051 int error;
1052 vnode_t *vp;
1053 rlim64_t rlimit;
1054 struct vattr va;
1055 struct uio uio;
1056 struct iovec iov[MAX_IOVECS];
1057 mblk_t *m;
1058 struct iovec *iovp;
1059 int iovcnt;
1060 cred_t *savecred;
1061 int in_crit = 0;
1062 caller_context_t ct;
1063
1064 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1065 if (vp == NULL) {
1066 ns->ns_status = NFSERR_STALE;
1067 return;
1068 }
1069
1070 if (rdonly(ro, vp)) {
1071 VN_RELE(vp);
1072 ns->ns_status = NFSERR_ROFS;
1073 return;
1074 }
1075
1076 if (vp->v_type != VREG) {
1077 VN_RELE(vp);
1078 ns->ns_status = NFSERR_ISDIR;
1079 return;
1080 }
1081
1082 ct.cc_sysid = 0;
1083 ct.cc_pid = 0;
1084 ct.cc_caller_id = nfs2_srv_caller_id;
1085 ct.cc_flags = CC_DONTBLOCK;
1086
1087 va.va_mask = AT_UID|AT_MODE;
1088
1089 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1090
1091 if (error) {
1092 VN_RELE(vp);
1093 ns->ns_status = puterrno(error);
1094
1095 return;
1096 }
1097
1098 if (crgetuid(cr) != va.va_uid) {
1099 /*
1100 * This is a kludge to allow writes of files created
1101 * with read only permission. The owner of the file
1102 * is always allowed to write it.
1103 */
1104 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1105
1106 if (error) {
1107 VN_RELE(vp);
1108 ns->ns_status = puterrno(error);
1109 return;
1110 }
1111 }
1112
1113 /*
1114 * Can't access a mandatory lock file. This might cause
1115 * the NFS service thread to block forever waiting for a
1116 * lock to be released that will never be released.
1117 */
1118 if (MANDLOCK(vp, va.va_mode)) {
1119 VN_RELE(vp);
1120 ns->ns_status = NFSERR_ACCES;
1121 return;
1122 }
1123
1124 /*
1125 * We have to enter the critical region before calling VOP_RWLOCK
1126 * to avoid a deadlock with ufs.
1127 */
1128 if (nbl_need_check(vp)) {
1129 nbl_start_crit(vp, RW_READER);
1130 in_crit = 1;
1131 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1132 wa->wa_count, 0, NULL)) {
1133 error = EACCES;
1134 goto out;
1135 }
1136 }
1137
1138 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1139
1140 /* check if a monitor detected a delegation conflict */
1141 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1142 goto out;
1143 }
1144
1145 if (wa->wa_data || wa->wa_rlist) {
1146 /* Do the RDMA thing if necessary */
1147 if (wa->wa_rlist) {
1148 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1149 iov[0].iov_len = wa->wa_count;
1150 } else {
1151 iov[0].iov_base = wa->wa_data;
1152 iov[0].iov_len = wa->wa_count;
1153 }
1154 uio.uio_iov = iov;
1155 uio.uio_iovcnt = 1;
1156 uio.uio_segflg = UIO_SYSSPACE;
1157 uio.uio_extflg = UIO_COPY_DEFAULT;
1158 uio.uio_loffset = (offset_t)wa->wa_offset;
1159 uio.uio_resid = wa->wa_count;
1160 /*
1161 * The limit is checked on the client. We
1162 * should allow any size writes here.
1163 */
1164 uio.uio_llimit = curproc->p_fsz_ctl;
1165 rlimit = uio.uio_llimit - wa->wa_offset;
1166 if (rlimit < (rlim64_t)uio.uio_resid)
1167 uio.uio_resid = (uint_t)rlimit;
1168
1169 /*
1170 * for now we assume no append mode
1171 */
1172 /*
1173 * We're changing creds because VM may fault and we need
1174 * the cred of the current thread to be used if quota
1175 * checking is enabled.
1176 */
1177 savecred = curthread->t_cred;
1178 curthread->t_cred = cr;
1179 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1180 curthread->t_cred = savecred;
1181 } else {
1182
1183 iovcnt = 0;
1184 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1185 iovcnt++;
1186 if (iovcnt <= MAX_IOVECS) {
1187 #ifdef DEBUG
1188 rfs_write_sync_hits++;
1189 #endif
1190 iovp = iov;
1191 } else {
1192 #ifdef DEBUG
1193 rfs_write_sync_misses++;
1194 #endif
1195 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1196 }
1197 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1198 uio.uio_iov = iovp;
1199 uio.uio_iovcnt = iovcnt;
1200 uio.uio_segflg = UIO_SYSSPACE;
1201 uio.uio_extflg = UIO_COPY_DEFAULT;
1202 uio.uio_loffset = (offset_t)wa->wa_offset;
1203 uio.uio_resid = wa->wa_count;
1204 /*
1205 * The limit is checked on the client. We
1206 * should allow any size writes here.
1207 */
1208 uio.uio_llimit = curproc->p_fsz_ctl;
1209 rlimit = uio.uio_llimit - wa->wa_offset;
1210 if (rlimit < (rlim64_t)uio.uio_resid)
1211 uio.uio_resid = (uint_t)rlimit;
1212
1213 /*
1214 * For now we assume no append mode.
1215 */
1216 /*
1217 * We're changing creds because VM may fault and we need
1218 * the cred of the current thread to be used if quota
1219 * checking is enabled.
1220 */
1221 savecred = curthread->t_cred;
1222 curthread->t_cred = cr;
1223 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1224 curthread->t_cred = savecred;
1225
1226 if (iovp != iov)
1227 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1228 }
1229
1230 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1231
1232 if (!error) {
1233 /*
1234 * Get attributes again so we send the latest mod
1235 * time to the client side for its cache.
1236 */
1237 va.va_mask = AT_ALL; /* now we want everything */
1238
1239 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1240
1241 /* check for overflows */
1242 if (!error) {
1243 acl_perm(vp, exi, &va, cr);
1244 error = vattr_to_nattr(&va, &ns->ns_attr);
1245 }
1246 }
1247
1248 out:
1249 if (in_crit)
1250 nbl_end_crit(vp);
1251 VN_RELE(vp);
1252
1253 /* check if a monitor detected a delegation conflict */
1254 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1255 /* mark as wouldblock so response is dropped */
1256 curthread->t_flag |= T_WOULDBLOCK;
1257 else
1258 ns->ns_status = puterrno(error);
1259
1260 }
1261
1262 struct rfs_async_write {
1263 struct nfswriteargs *wa;
1264 struct nfsattrstat *ns;
1265 struct svc_req *req;
1266 cred_t *cr;
1267 bool_t ro;
1268 kthread_t *thread;
1269 struct rfs_async_write *list;
1270 };
1271
1272 struct rfs_async_write_list {
1273 fhandle_t *fhp;
1274 kcondvar_t cv;
1275 struct rfs_async_write *list;
1276 struct rfs_async_write_list *next;
1277 };
1278
1279 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1280 static kmutex_t rfs_async_write_lock;
1281 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1282
1283 #define MAXCLIOVECS 42
1284 #define RFSWRITE_INITVAL (enum nfsstat) -1
1285
1286 #ifdef DEBUG
1287 static int rfs_write_hits = 0;
1288 static int rfs_write_misses = 0;
1289 #endif
1290
1291 /*
1292 * Write data to file.
1293 * Returns attributes of a file after writing some data to it.
1294 */
1295 void
1296 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1297 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1298 {
1299 int error;
1300 vnode_t *vp;
1301 rlim64_t rlimit;
1302 struct vattr va;
1303 struct uio uio;
1304 struct rfs_async_write_list *lp;
1305 struct rfs_async_write_list *nlp;
1306 struct rfs_async_write *rp;
1307 struct rfs_async_write *nrp;
1308 struct rfs_async_write *trp;
1309 struct rfs_async_write *lrp;
1310 int data_written;
1311 int iovcnt;
1312 mblk_t *m;
1313 struct iovec *iovp;
1314 struct iovec *niovp;
1315 struct iovec iov[MAXCLIOVECS];
1316 int count;
1317 int rcount;
1318 uint_t off;
1319 uint_t len;
1320 struct rfs_async_write nrpsp;
1321 struct rfs_async_write_list nlpsp;
1322 ushort_t t_flag;
1323 cred_t *savecred;
1324 int in_crit = 0;
1325 caller_context_t ct;
1326 nfs_srv_t *nsrv;
1327
1328 ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
1329 nsrv = nfs_get_srv();
1330 if (!nsrv->write_async) {
1331 rfs_write_sync(wa, ns, exi, req, cr, ro);
1332 return;
1333 }
1334
1335 /*
1336 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1337 * is considered an OK.
1338 */
1339 ns->ns_status = RFSWRITE_INITVAL;
1340
1341 nrp = &nrpsp;
1342 nrp->wa = wa;
1343 nrp->ns = ns;
1344 nrp->req = req;
1345 nrp->cr = cr;
1346 nrp->ro = ro;
1347 nrp->thread = curthread;
1348
1349 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1350
1351 /*
1352 * Look to see if there is already a cluster started
1353 * for this file.
1354 */
1355 mutex_enter(&nsrv->async_write_lock);
1356 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1357 if (bcmp(&wa->wa_fhandle, lp->fhp,
1358 sizeof (fhandle_t)) == 0)
1359 break;
1360 }
1361
1362 /*
1363 * If lp is non-NULL, then there is already a cluster
1364 * started. We need to place ourselves in the cluster
1365 * list in the right place as determined by starting
1366 * offset. Conflicts with non-blocking mandatory locked
1367 * regions will be checked when the cluster is processed.
1368 */
1369 if (lp != NULL) {
1370 rp = lp->list;
1371 trp = NULL;
1372 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1373 trp = rp;
1374 rp = rp->list;
1375 }
1376 nrp->list = rp;
1377 if (trp == NULL)
1378 lp->list = nrp;
1379 else
1380 trp->list = nrp;
1381 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1382 cv_wait(&lp->cv, &nsrv->async_write_lock);
1383 mutex_exit(&nsrv->async_write_lock);
1384
1385 return;
1386 }
1387
1388 /*
1389 * No cluster started yet, start one and add ourselves
1390 * to the list of clusters.
1391 */
1392 nrp->list = NULL;
1393
1394 nlp = &nlpsp;
1395 nlp->fhp = &wa->wa_fhandle;
1396 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1397 nlp->list = nrp;
1398 nlp->next = NULL;
1399
1400 if (nsrv->async_write_head == NULL) {
1401 nsrv->async_write_head = nlp;
1402 } else {
1403 lp = nsrv->async_write_head;
1404 while (lp->next != NULL)
1405 lp = lp->next;
1406 lp->next = nlp;
1407 }
1408 mutex_exit(&nsrv->async_write_lock);
1409
1410 /*
1411 * Convert the file handle common to all of the requests
1412 * in this cluster to a vnode.
1413 */
1414 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1415 if (vp == NULL) {
1416 mutex_enter(&nsrv->async_write_lock);
1417 if (nsrv->async_write_head == nlp)
1418 nsrv->async_write_head = nlp->next;
1419 else {
1420 lp = nsrv->async_write_head;
1421 while (lp->next != nlp)
1422 lp = lp->next;
1423 lp->next = nlp->next;
1424 }
1425 t_flag = curthread->t_flag & T_WOULDBLOCK;
1426 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1427 rp->ns->ns_status = NFSERR_STALE;
1428 rp->thread->t_flag |= t_flag;
1429 }
1430 cv_broadcast(&nlp->cv);
1431 mutex_exit(&nsrv->async_write_lock);
1432
1433 return;
1434 }
1435
1436 /*
1437 * Can only write regular files. Attempts to write any
1438 * other file types fail with EISDIR.
1439 */
1440 if (vp->v_type != VREG) {
1441 VN_RELE(vp);
1442 mutex_enter(&nsrv->async_write_lock);
1443 if (nsrv->async_write_head == nlp)
1444 nsrv->async_write_head = nlp->next;
1445 else {
1446 lp = nsrv->async_write_head;
1447 while (lp->next != nlp)
1448 lp = lp->next;
1449 lp->next = nlp->next;
1450 }
1451 t_flag = curthread->t_flag & T_WOULDBLOCK;
1452 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1453 rp->ns->ns_status = NFSERR_ISDIR;
1454 rp->thread->t_flag |= t_flag;
1455 }
1456 cv_broadcast(&nlp->cv);
1457 mutex_exit(&nsrv->async_write_lock);
1458
1459 return;
1460 }
1461
1462 /*
1463 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1464 * deadlock with ufs.
1465 */
1466 if (nbl_need_check(vp)) {
1467 nbl_start_crit(vp, RW_READER);
1468 in_crit = 1;
1469 }
1470
1471 ct.cc_sysid = 0;
1472 ct.cc_pid = 0;
1473 ct.cc_caller_id = nfs2_srv_caller_id;
1474 ct.cc_flags = CC_DONTBLOCK;
1475
1476 /*
1477 * Lock the file for writing. This operation provides
1478 * the delay which allows clusters to grow.
1479 */
1480 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1481
1482 /* check if a monitor detected a delegation conflict */
1483 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1484 if (in_crit)
1485 nbl_end_crit(vp);
1486 VN_RELE(vp);
1487 /* mark as wouldblock so response is dropped */
1488 curthread->t_flag |= T_WOULDBLOCK;
1489 mutex_enter(&nsrv->async_write_lock);
1490 if (nsrv->async_write_head == nlp)
1491 nsrv->async_write_head = nlp->next;
1492 else {
1493 lp = nsrv->async_write_head;
1494 while (lp->next != nlp)
1495 lp = lp->next;
1496 lp->next = nlp->next;
1497 }
1498 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1499 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1500 rp->ns->ns_status = puterrno(error);
1501 rp->thread->t_flag |= T_WOULDBLOCK;
1502 }
1503 }
1504 cv_broadcast(&nlp->cv);
1505 mutex_exit(&nsrv->async_write_lock);
1506
1507 return;
1508 }
1509
1510 /*
1511 * Disconnect this cluster from the list of clusters.
1512 * The cluster that is being dealt with must be fixed
1513 * in size after this point, so there is no reason
1514 * to leave it on the list so that new requests can
1515 * find it.
1516 *
1517 * The algorithm is that the first write request will
1518 * create a cluster, convert the file handle to a
1519 * vnode pointer, and then lock the file for writing.
1520 * This request is not likely to be clustered with
1521 * any others. However, the next request will create
1522 * a new cluster and be blocked in VOP_RWLOCK while
1523 * the first request is being processed. This delay
1524 * will allow more requests to be clustered in this
1525 * second cluster.
1526 */
1527 mutex_enter(&nsrv->async_write_lock);
1528 if (nsrv->async_write_head == nlp)
1529 nsrv->async_write_head = nlp->next;
1530 else {
1531 lp = nsrv->async_write_head;
1532 while (lp->next != nlp)
1533 lp = lp->next;
1534 lp->next = nlp->next;
1535 }
1536 mutex_exit(&nsrv->async_write_lock);
1537
1538 /*
1539 * Step through the list of requests in this cluster.
1540 * We need to check permissions to make sure that all
1541 * of the requests have sufficient permission to write
1542 * the file. A cluster can be composed of requests
1543 * from different clients and different users on each
1544 * client.
1545 *
1546 * As a side effect, we also calculate the size of the
1547 * byte range that this cluster encompasses.
1548 */
1549 rp = nlp->list;
1550 off = rp->wa->wa_offset;
1551 len = (uint_t)0;
1552 do {
1553 if (rdonly(rp->ro, vp)) {
1554 rp->ns->ns_status = NFSERR_ROFS;
1555 t_flag = curthread->t_flag & T_WOULDBLOCK;
1556 rp->thread->t_flag |= t_flag;
1557 continue;
1558 }
1559
1560 va.va_mask = AT_UID|AT_MODE;
1561
1562 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1563
1564 if (!error) {
1565 if (crgetuid(rp->cr) != va.va_uid) {
1566 /*
1567 * This is a kludge to allow writes of files
1568 * created with read only permission. The
1569 * owner of the file is always allowed to
1570 * write it.
1571 */
1572 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1573 }
1574 if (!error && MANDLOCK(vp, va.va_mode))
1575 error = EACCES;
1576 }
1577
1578 /*
1579 * Check for a conflict with a nbmand-locked region.
1580 */
1581 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1582 rp->wa->wa_count, 0, NULL)) {
1583 error = EACCES;
1584 }
1585
1586 if (error) {
1587 rp->ns->ns_status = puterrno(error);
1588 t_flag = curthread->t_flag & T_WOULDBLOCK;
1589 rp->thread->t_flag |= t_flag;
1590 continue;
1591 }
1592 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1593 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1594 } while ((rp = rp->list) != NULL);
1595
1596 /*
1597 * Step through the cluster attempting to gather as many
1598 * requests which are contiguous as possible. These
1599 * contiguous requests are handled via one call to VOP_WRITE
1600 * instead of different calls to VOP_WRITE. We also keep
1601 * track of the fact that any data was written.
1602 */
1603 rp = nlp->list;
1604 data_written = 0;
1605 do {
1606 /*
1607 * Skip any requests which are already marked as having an
1608 * error.
1609 */
1610 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1611 rp = rp->list;
1612 continue;
1613 }
1614
1615 /*
1616 * Count the number of iovec's which are required
1617 * to handle this set of requests. One iovec is
1618 * needed for each data buffer, whether addressed
1619 * by wa_data or by the b_rptr pointers in the
1620 * mblk chains.
1621 */
1622 iovcnt = 0;
1623 lrp = rp;
1624 for (;;) {
1625 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1626 iovcnt++;
1627 else {
1628 m = lrp->wa->wa_mblk;
1629 while (m != NULL) {
1630 iovcnt++;
1631 m = m->b_cont;
1632 }
1633 }
1634 if (lrp->list == NULL ||
1635 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1636 lrp->wa->wa_offset + lrp->wa->wa_count !=
1637 lrp->list->wa->wa_offset) {
1638 lrp = lrp->list;
1639 break;
1640 }
1641 lrp = lrp->list;
1642 }
1643
1644 if (iovcnt <= MAXCLIOVECS) {
1645 #ifdef DEBUG
1646 rfs_write_hits++;
1647 #endif
1648 niovp = iov;
1649 } else {
1650 #ifdef DEBUG
1651 rfs_write_misses++;
1652 #endif
1653 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1654 }
1655 /*
1656 * Put together the scatter/gather iovecs.
1657 */
1658 iovp = niovp;
1659 trp = rp;
1660 count = 0;
1661 do {
1662 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1663 if (trp->wa->wa_rlist) {
1664 iovp->iov_base =
1665 (char *)((trp->wa->wa_rlist)->
1666 u.c_daddr3);
1667 iovp->iov_len = trp->wa->wa_count;
1668 } else {
1669 iovp->iov_base = trp->wa->wa_data;
1670 iovp->iov_len = trp->wa->wa_count;
1671 }
1672 iovp++;
1673 } else {
1674 m = trp->wa->wa_mblk;
1675 rcount = trp->wa->wa_count;
1676 while (m != NULL) {
1677 iovp->iov_base = (caddr_t)m->b_rptr;
1678 iovp->iov_len = (m->b_wptr - m->b_rptr);
1679 rcount -= iovp->iov_len;
1680 if (rcount < 0)
1681 iovp->iov_len += rcount;
1682 iovp++;
1683 if (rcount <= 0)
1684 break;
1685 m = m->b_cont;
1686 }
1687 }
1688 count += trp->wa->wa_count;
1689 trp = trp->list;
1690 } while (trp != lrp);
1691
1692 uio.uio_iov = niovp;
1693 uio.uio_iovcnt = iovcnt;
1694 uio.uio_segflg = UIO_SYSSPACE;
1695 uio.uio_extflg = UIO_COPY_DEFAULT;
1696 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1697 uio.uio_resid = count;
1698 /*
1699 * The limit is checked on the client. We
1700 * should allow any size writes here.
1701 */
1702 uio.uio_llimit = curproc->p_fsz_ctl;
1703 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1704 if (rlimit < (rlim64_t)uio.uio_resid)
1705 uio.uio_resid = (uint_t)rlimit;
1706
1707 /*
1708 * For now we assume no append mode.
1709 */
1710
1711 /*
1712 * We're changing creds because VM may fault
1713 * and we need the cred of the current
1714 * thread to be used if quota * checking is
1715 * enabled.
1716 */
1717 savecred = curthread->t_cred;
1718 curthread->t_cred = cr;
1719 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1720 curthread->t_cred = savecred;
1721
1722 /* check if a monitor detected a delegation conflict */
1723 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1724 /* mark as wouldblock so response is dropped */
1725 curthread->t_flag |= T_WOULDBLOCK;
1726
1727 if (niovp != iov)
1728 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1729
1730 if (!error) {
1731 data_written = 1;
1732 /*
1733 * Get attributes again so we send the latest mod
1734 * time to the client side for its cache.
1735 */
1736 va.va_mask = AT_ALL; /* now we want everything */
1737
1738 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1739
1740 if (!error)
1741 acl_perm(vp, exi, &va, rp->cr);
1742 }
1743
1744 /*
1745 * Fill in the status responses for each request
1746 * which was just handled. Also, copy the latest
1747 * attributes in to the attribute responses if
1748 * appropriate.
1749 */
1750 t_flag = curthread->t_flag & T_WOULDBLOCK;
1751 do {
1752 rp->thread->t_flag |= t_flag;
1753 /* check for overflows */
1754 if (!error) {
1755 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1756 }
1757 rp->ns->ns_status = puterrno(error);
1758 rp = rp->list;
1759 } while (rp != lrp);
1760 } while (rp != NULL);
1761
1762 /*
1763 * If any data was written at all, then we need to flush
1764 * the data and metadata to stable storage.
1765 */
1766 if (data_written) {
1767 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1768
1769 if (!error) {
1770 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1771 }
1772 }
1773
1774 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1775
1776 if (in_crit)
1777 nbl_end_crit(vp);
1778 VN_RELE(vp);
1779
1780 t_flag = curthread->t_flag & T_WOULDBLOCK;
1781 mutex_enter(&nsrv->async_write_lock);
1782 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1783 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1784 rp->ns->ns_status = puterrno(error);
1785 rp->thread->t_flag |= t_flag;
1786 }
1787 }
1788 cv_broadcast(&nlp->cv);
1789 mutex_exit(&nsrv->async_write_lock);
1790
1791 }
1792
1793 void *
1794 rfs_write_getfh(struct nfswriteargs *wa)
1795 {
1796 return (&wa->wa_fhandle);
1797 }
1798
1799 /*
1800 * Create a file.
1801 * Creates a file with given attributes and returns those attributes
1802 * and an fhandle for the new file.
1803 */
1804 void
1805 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1806 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1807 {
1808 int error;
1809 int lookuperr;
1810 int in_crit = 0;
1811 struct vattr va;
1812 vnode_t *vp;
1813 vnode_t *realvp;
1814 vnode_t *dvp;
1815 char *name = args->ca_da.da_name;
1816 vnode_t *tvp = NULL;
1817 int mode;
1818 int lookup_ok;
1819 bool_t trunc;
1820 struct sockaddr *ca;
1821
1822 /*
1823 * Disallow NULL paths
1824 */
1825 if (name == NULL || *name == '\0') {
1826 dr->dr_status = NFSERR_ACCES;
1827 return;
1828 }
1829
1830 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1831 if (dvp == NULL) {
1832 dr->dr_status = NFSERR_STALE;
1833 return;
1834 }
1835
1836 error = sattr_to_vattr(args->ca_sa, &va);
1837 if (error) {
1838 dr->dr_status = puterrno(error);
1839 return;
1840 }
1841
1842 /*
1843 * Must specify the mode.
1844 */
1845 if (!(va.va_mask & AT_MODE)) {
1846 VN_RELE(dvp);
1847 dr->dr_status = NFSERR_INVAL;
1848 return;
1849 }
1850
1851 /*
1852 * This is a completely gross hack to make mknod
1853 * work over the wire until we can wack the protocol
1854 */
1855 if ((va.va_mode & IFMT) == IFCHR) {
1856 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1857 va.va_type = VFIFO; /* xtra kludge for named pipe */
1858 else {
1859 va.va_type = VCHR;
1860 /*
1861 * uncompress the received dev_t
1862 * if the top half is zero indicating a request
1863 * from an `older style' OS.
1864 */
1865 if ((va.va_size & 0xffff0000) == 0)
1866 va.va_rdev = nfsv2_expdev(va.va_size);
1867 else
1868 va.va_rdev = (dev_t)va.va_size;
1869 }
1870 va.va_mask &= ~AT_SIZE;
1871 } else if ((va.va_mode & IFMT) == IFBLK) {
1872 va.va_type = VBLK;
1873 /*
1874 * uncompress the received dev_t
1875 * if the top half is zero indicating a request
1876 * from an `older style' OS.
1877 */
1878 if ((va.va_size & 0xffff0000) == 0)
1879 va.va_rdev = nfsv2_expdev(va.va_size);
1880 else
1881 va.va_rdev = (dev_t)va.va_size;
1882 va.va_mask &= ~AT_SIZE;
1883 } else if ((va.va_mode & IFMT) == IFSOCK) {
1884 va.va_type = VSOCK;
1885 } else {
1886 va.va_type = VREG;
1887 }
1888 va.va_mode &= ~IFMT;
1889 va.va_mask |= AT_TYPE;
1890
1891 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1892 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1893 MAXPATHLEN);
1894 if (name == NULL) {
1895 dr->dr_status = puterrno(EINVAL);
1896 return;
1897 }
1898
1899 /*
1900 * Why was the choice made to use VWRITE as the mode to the
1901 * call to VOP_CREATE ? This results in a bug. When a client
1902 * opens a file that already exists and is RDONLY, the second
1903 * open fails with an EACESS because of the mode.
1904 * bug ID 1054648.
1905 */
1906 lookup_ok = 0;
1907 mode = VWRITE;
1908 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1909 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1910 NULL, NULL, NULL);
1911 if (!error) {
1912 struct vattr at;
1913
1914 lookup_ok = 1;
1915 at.va_mask = AT_MODE;
1916 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1917 if (!error)
1918 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1919 VN_RELE(tvp);
1920 tvp = NULL;
1921 }
1922 }
1923
1924 if (!lookup_ok) {
1925 if (rdonly(ro, dvp)) {
1926 error = EROFS;
1927 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1928 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1929 error = EPERM;
1930 } else {
1931 error = 0;
1932 }
1933 }
1934
1935 /*
1936 * If file size is being modified on an already existing file
1937 * make sure that there are no conflicting non-blocking mandatory
1938 * locks in the region being manipulated. Return EACCES if there
1939 * are conflicting locks.
1940 */
1941 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1942 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1943 NULL, NULL, NULL);
1944
1945 if (!lookuperr &&
1946 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1947 VN_RELE(tvp);
1948 curthread->t_flag |= T_WOULDBLOCK;
1949 goto out;
1950 }
1951
1952 if (!lookuperr && nbl_need_check(tvp)) {
1953 /*
1954 * The file exists. Now check if it has any
1955 * conflicting non-blocking mandatory locks
1956 * in the region being changed.
1957 */
1958 struct vattr bva;
1959 u_offset_t offset;
1960 ssize_t length;
1961
1962 nbl_start_crit(tvp, RW_READER);
1963 in_crit = 1;
1964
1965 bva.va_mask = AT_SIZE;
1966 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1967 if (!error) {
1968 if (va.va_size < bva.va_size) {
1969 offset = va.va_size;
1970 length = bva.va_size - va.va_size;
1971 } else {
1972 offset = bva.va_size;
1973 length = va.va_size - bva.va_size;
1974 }
1975 if (length) {
1976 if (nbl_conflict(tvp, NBL_WRITE,
1977 offset, length, 0, NULL)) {
1978 error = EACCES;
1979 }
1980 }
1981 }
1982 if (error) {
1983 nbl_end_crit(tvp);
1984 VN_RELE(tvp);
1985 in_crit = 0;
1986 }
1987 } else if (tvp != NULL) {
1988 VN_RELE(tvp);
1989 }
1990 }
1991
1992 if (!error) {
1993 /*
1994 * If filesystem is shared with nosuid the remove any
1995 * setuid/setgid bits on create.
1996 */
1997 if (va.va_type == VREG &&
1998 exi->exi_export.ex_flags & EX_NOSUID)
1999 va.va_mode &= ~(VSUID | VSGID);
2000
2001 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
2002 NULL, NULL);
2003
2004 if (!error) {
2005
2006 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
2007 trunc = TRUE;
2008 else
2009 trunc = FALSE;
2010
2011 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
2012 VN_RELE(vp);
2013 curthread->t_flag |= T_WOULDBLOCK;
2014 goto out;
2015 }
2016 va.va_mask = AT_ALL;
2017
2018 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
2019
2020 /* check for overflows */
2021 if (!error) {
2022 acl_perm(vp, exi, &va, cr);
2023 error = vattr_to_nattr(&va, &dr->dr_attr);
2024 if (!error) {
2025 error = makefh(&dr->dr_fhandle, vp,
2026 exi);
2027 }
2028 }
2029 /*
2030 * Force modified metadata out to stable storage.
2031 *
2032 * if a underlying vp exists, pass it to VOP_FSYNC
2033 */
2034 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2035 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2036 else
2037 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2038 VN_RELE(vp);
2039 }
2040
2041 if (in_crit) {
2042 nbl_end_crit(tvp);
2043 VN_RELE(tvp);
2044 }
2045 }
2046
2047 /*
2048 * Force modified data and metadata out to stable storage.
2049 */
2050 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2051
2052 out:
2053
2054 VN_RELE(dvp);
2055
2056 dr->dr_status = puterrno(error);
2057
2058 if (name != args->ca_da.da_name)
2059 kmem_free(name, MAXPATHLEN);
2060 }
2061 void *
2062 rfs_create_getfh(struct nfscreatargs *args)
2063 {
2064 return (args->ca_da.da_fhandle);
2065 }
2066
2067 /*
2068 * Remove a file.
2069 * Remove named file from parent directory.
2070 */
2071 /* ARGSUSED */
2072 void
2073 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2074 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2075 {
2076 int error = 0;
2077 vnode_t *vp;
2078 vnode_t *targvp;
2079 int in_crit = 0;
2080
2081 /*
2082 * Disallow NULL paths
2083 */
2084 if (da->da_name == NULL || *da->da_name == '\0') {
2085 *status = NFSERR_ACCES;
2086 return;
2087 }
2088
2089 vp = nfs_fhtovp(da->da_fhandle, exi);
2090 if (vp == NULL) {
2091 *status = NFSERR_STALE;
2092 return;
2093 }
2094
2095 if (rdonly(ro, vp)) {
2096 VN_RELE(vp);
2097 *status = NFSERR_ROFS;
2098 return;
2099 }
2100
2101 /*
2102 * Check for a conflict with a non-blocking mandatory share reservation.
2103 */
2104 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2105 NULL, cr, NULL, NULL, NULL);
2106 if (error != 0) {
2107 VN_RELE(vp);
2108 *status = puterrno(error);
2109 return;
2110 }
2111
2112 /*
2113 * If the file is delegated to an v4 client, then initiate
2114 * recall and drop this request (by setting T_WOULDBLOCK).
2115 * The client will eventually re-transmit the request and
2116 * (hopefully), by then, the v4 client will have returned
2117 * the delegation.
2118 */
2119
2120 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2121 VN_RELE(vp);
2122 VN_RELE(targvp);
2123 curthread->t_flag |= T_WOULDBLOCK;
2124 return;
2125 }
2126
2127 if (nbl_need_check(targvp)) {
2128 nbl_start_crit(targvp, RW_READER);
2129 in_crit = 1;
2130 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2131 error = EACCES;
2132 goto out;
2133 }
2134 }
2135
2136 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2137
2138 /*
2139 * Force modified data and metadata out to stable storage.
2140 */
2141 (void) VOP_FSYNC(vp, 0, cr, NULL);
2142
2143 out:
2144 if (in_crit)
2145 nbl_end_crit(targvp);
2146 VN_RELE(targvp);
2147 VN_RELE(vp);
2148
2149 *status = puterrno(error);
2150
2151 }
2152
2153 void *
2154 rfs_remove_getfh(struct nfsdiropargs *da)
2155 {
2156 return (da->da_fhandle);
2157 }
2158
2159 /*
2160 * rename a file
2161 * Give a file (from) a new name (to).
2162 */
2163 /* ARGSUSED */
2164 void
2165 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2166 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2167 {
2168 int error = 0;
2169 vnode_t *fromvp;
2170 vnode_t *tovp;
2171 struct exportinfo *to_exi;
2172 fhandle_t *fh;
2173 vnode_t *srcvp;
2174 vnode_t *targvp;
2175 int in_crit = 0;
2176
2177 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2178 if (fromvp == NULL) {
2179 *status = NFSERR_STALE;
2180 return;
2181 }
2182
2183 fh = args->rna_to.da_fhandle;
2184 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2185 if (to_exi == NULL) {
2186 VN_RELE(fromvp);
2187 *status = NFSERR_ACCES;
2188 return;
2189 }
2190 exi_rele(to_exi);
2191
2192 if (to_exi != exi) {
2193 VN_RELE(fromvp);
2194 *status = NFSERR_XDEV;
2195 return;
2196 }
2197
2198 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2199 if (tovp == NULL) {
2200 VN_RELE(fromvp);
2201 *status = NFSERR_STALE;
2202 return;
2203 }
2204
2205 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2206 VN_RELE(tovp);
2207 VN_RELE(fromvp);
2208 *status = NFSERR_NOTDIR;
2209 return;
2210 }
2211
2212 /*
2213 * Disallow NULL paths
2214 */
2215 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2216 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2217 VN_RELE(tovp);
2218 VN_RELE(fromvp);
2219 *status = NFSERR_ACCES;
2220 return;
2221 }
2222
2223 if (rdonly(ro, tovp)) {
2224 VN_RELE(tovp);
2225 VN_RELE(fromvp);
2226 *status = NFSERR_ROFS;
2227 return;
2228 }
2229
2230 /*
2231 * Check for a conflict with a non-blocking mandatory share reservation.
2232 */
2233 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2234 NULL, cr, NULL, NULL, NULL);
2235 if (error != 0) {
2236 VN_RELE(tovp);
2237 VN_RELE(fromvp);
2238 *status = puterrno(error);
2239 return;
2240 }
2241
2242 /* Check for delegations on the source file */
2243
2244 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2245 VN_RELE(tovp);
2246 VN_RELE(fromvp);
2247 VN_RELE(srcvp);
2248 curthread->t_flag |= T_WOULDBLOCK;
2249 return;
2250 }
2251
2252 /* Check for delegation on the file being renamed over, if it exists */
2253
2254 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2255 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2256 NULL, NULL, NULL) == 0) {
2257
2258 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2259 VN_RELE(tovp);
2260 VN_RELE(fromvp);
2261 VN_RELE(srcvp);
2262 VN_RELE(targvp);
2263 curthread->t_flag |= T_WOULDBLOCK;
2264 return;
2265 }
2266 VN_RELE(targvp);
2267 }
2268
2269
2270 if (nbl_need_check(srcvp)) {
2271 nbl_start_crit(srcvp, RW_READER);
2272 in_crit = 1;
2273 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2274 error = EACCES;
2275 goto out;
2276 }
2277 }
2278
2279 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2280 tovp, args->rna_to.da_name, cr, NULL, 0);
2281
2282 if (error == 0)
2283 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2284 strlen(args->rna_to.da_name));
2285
2286 /*
2287 * Force modified data and metadata out to stable storage.
2288 */
2289 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2290 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2291
2292 out:
2293 if (in_crit)
2294 nbl_end_crit(srcvp);
2295 VN_RELE(srcvp);
2296 VN_RELE(tovp);
2297 VN_RELE(fromvp);
2298
2299 *status = puterrno(error);
2300
2301 }
2302 void *
2303 rfs_rename_getfh(struct nfsrnmargs *args)
2304 {
2305 return (args->rna_from.da_fhandle);
2306 }
2307
2308 /*
2309 * Link to a file.
2310 * Create a file (to) which is a hard link to the given file (from).
2311 */
2312 /* ARGSUSED */
2313 void
2314 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2315 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2316 {
2317 int error;
2318 vnode_t *fromvp;
2319 vnode_t *tovp;
2320 struct exportinfo *to_exi;
2321 fhandle_t *fh;
2322
2323 fromvp = nfs_fhtovp(args->la_from, exi);
2324 if (fromvp == NULL) {
2325 *status = NFSERR_STALE;
2326 return;
2327 }
2328
2329 fh = args->la_to.da_fhandle;
2330 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2331 if (to_exi == NULL) {
2332 VN_RELE(fromvp);
2333 *status = NFSERR_ACCES;
2334 return;
2335 }
2336 exi_rele(to_exi);
2337
2338 if (to_exi != exi) {
2339 VN_RELE(fromvp);
2340 *status = NFSERR_XDEV;
2341 return;
2342 }
2343
2344 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2345 if (tovp == NULL) {
2346 VN_RELE(fromvp);
2347 *status = NFSERR_STALE;
2348 return;
2349 }
2350
2351 if (tovp->v_type != VDIR) {
2352 VN_RELE(tovp);
2353 VN_RELE(fromvp);
2354 *status = NFSERR_NOTDIR;
2355 return;
2356 }
2357 /*
2358 * Disallow NULL paths
2359 */
2360 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2361 VN_RELE(tovp);
2362 VN_RELE(fromvp);
2363 *status = NFSERR_ACCES;
2364 return;
2365 }
2366
2367 if (rdonly(ro, tovp)) {
2368 VN_RELE(tovp);
2369 VN_RELE(fromvp);
2370 *status = NFSERR_ROFS;
2371 return;
2372 }
2373
2374 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2375
2376 /*
2377 * Force modified data and metadata out to stable storage.
2378 */
2379 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2380 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2381
2382 VN_RELE(tovp);
2383 VN_RELE(fromvp);
2384
2385 *status = puterrno(error);
2386
2387 }
2388 void *
2389 rfs_link_getfh(struct nfslinkargs *args)
2390 {
2391 return (args->la_from);
2392 }
2393
2394 /*
2395 * Symbolicly link to a file.
2396 * Create a file (to) with the given attributes which is a symbolic link
2397 * to the given path name (to).
2398 */
2399 void
2400 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2401 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2402 {
2403 int error;
2404 struct vattr va;
2405 vnode_t *vp;
2406 vnode_t *svp;
2407 int lerror;
2408 struct sockaddr *ca;
2409 char *name = NULL;
2410
2411 /*
2412 * Disallow NULL paths
2413 */
2414 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2415 *status = NFSERR_ACCES;
2416 return;
2417 }
2418
2419 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2420 if (vp == NULL) {
2421 *status = NFSERR_STALE;
2422 return;
2423 }
2424
2425 if (rdonly(ro, vp)) {
2426 VN_RELE(vp);
2427 *status = NFSERR_ROFS;
2428 return;
2429 }
2430
2431 error = sattr_to_vattr(args->sla_sa, &va);
2432 if (error) {
2433 VN_RELE(vp);
2434 *status = puterrno(error);
2435 return;
2436 }
2437
2438 if (!(va.va_mask & AT_MODE)) {
2439 VN_RELE(vp);
2440 *status = NFSERR_INVAL;
2441 return;
2442 }
2443
2444 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2445 name = nfscmd_convname(ca, exi, args->sla_tnm,
2446 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2447
2448 if (name == NULL) {
2449 *status = NFSERR_ACCES;
2450 return;
2451 }
2452
2453 va.va_type = VLNK;
2454 va.va_mask |= AT_TYPE;
2455
2456 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2457
2458 /*
2459 * Force new data and metadata out to stable storage.
2460 */
2461 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2462 NULL, cr, NULL, NULL, NULL);
2463
2464 if (!lerror) {
2465 (void) VOP_FSYNC(svp, 0, cr, NULL);
2466 VN_RELE(svp);
2467 }
2468
2469 /*
2470 * Force modified data and metadata out to stable storage.
2471 */
2472 (void) VOP_FSYNC(vp, 0, cr, NULL);
2473
2474 VN_RELE(vp);
2475
2476 *status = puterrno(error);
2477 if (name != args->sla_tnm)
2478 kmem_free(name, MAXPATHLEN);
2479
2480 }
2481 void *
2482 rfs_symlink_getfh(struct nfsslargs *args)
2483 {
2484 return (args->sla_from.da_fhandle);
2485 }
2486
2487 /*
2488 * Make a directory.
2489 * Create a directory with the given name, parent directory, and attributes.
2490 * Returns a file handle and attributes for the new directory.
2491 */
2492 /* ARGSUSED */
2493 void
2494 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2495 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2496 {
2497 int error;
2498 struct vattr va;
2499 vnode_t *dvp = NULL;
2500 vnode_t *vp;
2501 char *name = args->ca_da.da_name;
2502
2503 /*
2504 * Disallow NULL paths
2505 */
2506 if (name == NULL || *name == '\0') {
2507 dr->dr_status = NFSERR_ACCES;
2508 return;
2509 }
2510
2511 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2512 if (vp == NULL) {
2513 dr->dr_status = NFSERR_STALE;
2514 return;
2515 }
2516
2517 if (rdonly(ro, vp)) {
2518 VN_RELE(vp);
2519 dr->dr_status = NFSERR_ROFS;
2520 return;
2521 }
2522
2523 error = sattr_to_vattr(args->ca_sa, &va);
2524 if (error) {
2525 VN_RELE(vp);
2526 dr->dr_status = puterrno(error);
2527 return;
2528 }
2529
2530 if (!(va.va_mask & AT_MODE)) {
2531 VN_RELE(vp);
2532 dr->dr_status = NFSERR_INVAL;
2533 return;
2534 }
2535
2536 va.va_type = VDIR;
2537 va.va_mask |= AT_TYPE;
2538
2539 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2540
2541 if (!error) {
2542 /*
2543 * Attribtutes of the newly created directory should
2544 * be returned to the client.
2545 */
2546 va.va_mask = AT_ALL; /* We want everything */
2547 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2548
2549 /* check for overflows */
2550 if (!error) {
2551 acl_perm(vp, exi, &va, cr);
2552 error = vattr_to_nattr(&va, &dr->dr_attr);
2553 if (!error) {
2554 error = makefh(&dr->dr_fhandle, dvp, exi);
2555 }
2556 }
2557 /*
2558 * Force new data and metadata out to stable storage.
2559 */
2560 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2561 VN_RELE(dvp);
2562 }
2563
2564 /*
2565 * Force modified data and metadata out to stable storage.
2566 */
2567 (void) VOP_FSYNC(vp, 0, cr, NULL);
2568
2569 VN_RELE(vp);
2570
2571 dr->dr_status = puterrno(error);
2572
2573 }
2574 void *
2575 rfs_mkdir_getfh(struct nfscreatargs *args)
2576 {
2577 return (args->ca_da.da_fhandle);
2578 }
2579
2580 /*
2581 * Remove a directory.
2582 * Remove the given directory name from the given parent directory.
2583 */
2584 /* ARGSUSED */
2585 void
2586 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2587 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2588 {
2589 int error;
2590 vnode_t *vp;
2591
2592 /*
2593 * Disallow NULL paths
2594 */
2595 if (da->da_name == NULL || *da->da_name == '\0') {
2596 *status = NFSERR_ACCES;
2597 return;
2598 }
2599
2600 vp = nfs_fhtovp(da->da_fhandle, exi);
2601 if (vp == NULL) {
2602 *status = NFSERR_STALE;
2603 return;
2604 }
2605
2606 if (rdonly(ro, vp)) {
2607 VN_RELE(vp);
2608 *status = NFSERR_ROFS;
2609 return;
2610 }
2611
2612 /*
2613 * VOP_RMDIR takes a third argument (the current
2614 * directory of the process). That's because someone
2615 * wants to return EINVAL if one tries to remove ".".
2616 * Of course, NFS servers have no idea what their
2617 * clients' current directories are. We fake it by
2618 * supplying a vnode known to exist and illegal to
2619 * remove.
2620 */
2621 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2622
2623 /*
2624 * Force modified data and metadata out to stable storage.
2625 */
2626 (void) VOP_FSYNC(vp, 0, cr, NULL);
2627
2628 VN_RELE(vp);
2629
2630 /*
2631 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2632 * if the directory is not empty. A System V NFS server
2633 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2634 * over the wire.
2635 */
2636 if (error == EEXIST)
2637 *status = NFSERR_NOTEMPTY;
2638 else
2639 *status = puterrno(error);
2640
2641 }
2642 void *
2643 rfs_rmdir_getfh(struct nfsdiropargs *da)
2644 {
2645 return (da->da_fhandle);
2646 }
2647
2648 /* ARGSUSED */
2649 void
2650 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2651 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2652 {
2653 int error;
2654 int iseof;
2655 struct iovec iov;
2656 struct uio uio;
2657 vnode_t *vp;
2658 char *ndata = NULL;
2659 struct sockaddr *ca;
2660 size_t nents;
2661 int ret;
2662
2663 vp = nfs_fhtovp(&rda->rda_fh, exi);
2664 if (vp == NULL) {
2665 rd->rd_entries = NULL;
2666 rd->rd_status = NFSERR_STALE;
2667 return;
2668 }
2669
2670 if (vp->v_type != VDIR) {
2671 VN_RELE(vp);
2672 rd->rd_entries = NULL;
2673 rd->rd_status = NFSERR_NOTDIR;
2674 return;
2675 }
2676
2677 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2678
2679 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2680
2681 if (error) {
2682 rd->rd_entries = NULL;
2683 goto bad;
2684 }
2685
2686 if (rda->rda_count == 0) {
2687 rd->rd_entries = NULL;
2688 rd->rd_size = 0;
2689 rd->rd_eof = FALSE;
2690 goto bad;
2691 }
2692
2693 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2694
2695 /*
2696 * Allocate data for entries. This will be freed by rfs_rddirfree.
2697 */
2698 rd->rd_bufsize = (uint_t)rda->rda_count;
2699 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2700
2701 /*
2702 * Set up io vector to read directory data
2703 */
2704 iov.iov_base = (caddr_t)rd->rd_entries;
2705 iov.iov_len = rda->rda_count;
2706 uio.uio_iov = &iov;
2707 uio.uio_iovcnt = 1;
2708 uio.uio_segflg = UIO_SYSSPACE;
2709 uio.uio_extflg = UIO_COPY_CACHED;
2710 uio.uio_loffset = (offset_t)rda->rda_offset;
2711 uio.uio_resid = rda->rda_count;
2712
2713 /*
2714 * read directory
2715 */
2716 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2717
2718 /*
2719 * Clean up
2720 */
2721 if (!error) {
2722 /*
2723 * set size and eof
2724 */
2725 if (uio.uio_resid == rda->rda_count) {
2726 rd->rd_size = 0;
2727 rd->rd_eof = TRUE;
2728 } else {
2729 rd->rd_size = (uint32_t)(rda->rda_count -
2730 uio.uio_resid);
2731 rd->rd_eof = iseof ? TRUE : FALSE;
2732 }
2733 }
2734
2735 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2736 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2737 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2738 rda->rda_count, &ndata);
2739
2740 if (ret != 0) {
2741 size_t dropbytes;
2742 /*
2743 * We had to drop one or more entries in order to fit
2744 * during the character conversion. We need to patch
2745 * up the size and eof info.
2746 */
2747 if (rd->rd_eof)
2748 rd->rd_eof = FALSE;
2749 dropbytes = nfscmd_dropped_entrysize(
2750 (struct dirent64 *)rd->rd_entries, nents, ret);
2751 rd->rd_size -= dropbytes;
2752 }
2753 if (ndata == NULL) {
2754 ndata = (char *)rd->rd_entries;
2755 } else if (ndata != (char *)rd->rd_entries) {
2756 kmem_free(rd->rd_entries, rd->rd_bufsize);
2757 rd->rd_entries = (void *)ndata;
2758 rd->rd_bufsize = rda->rda_count;
2759 }
2760
2761 bad:
2762 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2763
2764 #if 0 /* notyet */
2765 /*
2766 * Don't do this. It causes local disk writes when just
2767 * reading the file and the overhead is deemed larger
2768 * than the benefit.
2769 */
2770 /*
2771 * Force modified metadata out to stable storage.
2772 */
2773 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2774 #endif
2775
2776 VN_RELE(vp);
2777
2778 rd->rd_status = puterrno(error);
2779
2780 }
2781 void *
2782 rfs_readdir_getfh(struct nfsrddirargs *rda)
2783 {
2784 return (&rda->rda_fh);
2785 }
2786 void
2787 rfs_rddirfree(struct nfsrddirres *rd)
2788 {
2789 if (rd->rd_entries != NULL)
2790 kmem_free(rd->rd_entries, rd->rd_bufsize);
2791 }
2792
2793 /* ARGSUSED */
2794 void
2795 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2796 struct svc_req *req, cred_t *cr, bool_t ro)
2797 {
2798 int error;
2799 struct statvfs64 sb;
2800 vnode_t *vp;
2801
2802 vp = nfs_fhtovp(fh, exi);
2803 if (vp == NULL) {
2804 fs->fs_status = NFSERR_STALE;
2805 return;
2806 }
2807
2808 error = VFS_STATVFS(vp->v_vfsp, &sb);
2809
2810 if (!error) {
2811 fs->fs_tsize = nfstsize();
2812 fs->fs_bsize = sb.f_frsize;
2813 fs->fs_blocks = sb.f_blocks;
2814 fs->fs_bfree = sb.f_bfree;
2815 fs->fs_bavail = sb.f_bavail;
2816 }
2817
2818 VN_RELE(vp);
2819
2820 fs->fs_status = puterrno(error);
2821
2822 }
2823 void *
2824 rfs_statfs_getfh(fhandle_t *fh)
2825 {
2826 return (fh);
2827 }
2828
2829 static int
2830 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2831 {
2832 vap->va_mask = 0;
2833
2834 /*
2835 * There was a sign extension bug in some VFS based systems
2836 * which stored the mode as a short. When it would get
2837 * assigned to a u_long, no sign extension would occur.
2838 * It needed to, but this wasn't noticed because sa_mode
2839 * would then get assigned back to the short, thus ignoring
2840 * the upper 16 bits of sa_mode.
2841 *
2842 * To make this implementation work for both broken
2843 * clients and good clients, we check for both versions
2844 * of the mode.
2845 */
2846 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2847 sa->sa_mode != (uint32_t)-1) {
2848 vap->va_mask |= AT_MODE;
2849 vap->va_mode = sa->sa_mode;
2850 }
2851 if (sa->sa_uid != (uint32_t)-1) {
2852 vap->va_mask |= AT_UID;
2853 vap->va_uid = sa->sa_uid;
2854 }
2855 if (sa->sa_gid != (uint32_t)-1) {
2856 vap->va_mask |= AT_GID;
2857 vap->va_gid = sa->sa_gid;
2858 }
2859 if (sa->sa_size != (uint32_t)-1) {
2860 vap->va_mask |= AT_SIZE;
2861 vap->va_size = sa->sa_size;
2862 }
2863 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2864 sa->sa_atime.tv_usec != (int32_t)-1) {
2865 #ifndef _LP64
2866 /* return error if time overflow */
2867 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2868 return (EOVERFLOW);
2869 #endif
2870 vap->va_mask |= AT_ATIME;
2871 /*
2872 * nfs protocol defines times as unsigned so don't extend sign,
2873 * unless sysadmin set nfs_allow_preepoch_time.
2874 */
2875 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2876 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2877 }
2878 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2879 sa->sa_mtime.tv_usec != (int32_t)-1) {
2880 #ifndef _LP64
2881 /* return error if time overflow */
2882 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2883 return (EOVERFLOW);
2884 #endif
2885 vap->va_mask |= AT_MTIME;
2886 /*
2887 * nfs protocol defines times as unsigned so don't extend sign,
2888 * unless sysadmin set nfs_allow_preepoch_time.
2889 */
2890 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2891 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2892 }
2893 return (0);
2894 }
2895
2896 static const enum nfsftype vt_to_nf[] = {
2897 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2898 };
2899
2900 /*
2901 * check the following fields for overflow: nodeid, size, and time.
2902 * There could be a problem when converting 64-bit LP64 fields
2903 * into 32-bit ones. Return an error if there is an overflow.
2904 */
2905 int
2906 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2907 {
2908 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2909 na->na_type = vt_to_nf[vap->va_type];
2910
2911 if (vap->va_mode == (unsigned short) -1)
2912 na->na_mode = (uint32_t)-1;
2913 else
2914 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2915
2916 if (vap->va_uid == (unsigned short)(-1))
2917 na->na_uid = (uint32_t)(-1);
2918 else if (vap->va_uid == UID_NOBODY)
2919 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2920 else
2921 na->na_uid = vap->va_uid;
2922
2923 if (vap->va_gid == (unsigned short)(-1))
2924 na->na_gid = (uint32_t)-1;
2925 else if (vap->va_gid == GID_NOBODY)
2926 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2927 else
2928 na->na_gid = vap->va_gid;
2929
2930 /*
2931 * Do we need to check fsid for overflow? It is 64-bit in the
2932 * vattr, but are bigger than 32 bit values supported?
2933 */
2934 na->na_fsid = vap->va_fsid;
2935
2936 na->na_nodeid = vap->va_nodeid;
2937
2938 /*
2939 * Check to make sure that the nodeid is representable over the
2940 * wire without losing bits.
2941 */
2942 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2943 return (EFBIG);
2944 na->na_nlink = vap->va_nlink;
2945
2946 /*
2947 * Check for big files here, instead of at the caller. See
2948 * comments in cstat for large special file explanation.
2949 */
2950 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2951 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2952 return (EFBIG);
2953 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2954 /* UNKNOWN_SIZE | OVERFLOW */
2955 na->na_size = MAXOFF32_T;
2956 } else
2957 na->na_size = vap->va_size;
2958 } else
2959 na->na_size = vap->va_size;
2960
2961 /*
2962 * If the vnode times overflow the 32-bit times that NFS2
2963 * uses on the wire then return an error.
2964 */
2965 if (!NFS_VAP_TIME_OK(vap)) {
2966 return (EOVERFLOW);
2967 }
2968 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2969 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2970
2971 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2972 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2973
2974 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2975 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2976
2977 /*
2978 * If the dev_t will fit into 16 bits then compress
2979 * it, otherwise leave it alone. See comments in
2980 * nfs_client.c.
2981 */
2982 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2983 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2984 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2985 else
2986 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2987
2988 na->na_blocks = vap->va_nblocks;
2989 na->na_blocksize = vap->va_blksize;
2990
2991 /*
2992 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2993 * over-the-wire protocols for named-pipe vnodes. It remaps the
2994 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2995 *
2996 * BUYER BEWARE:
2997 * If you are porting the NFS to a non-Sun server, you probably
2998 * don't want to include the following block of code. The
2999 * over-the-wire special file types will be changing with the
3000 * NFS Protocol Revision.
3001 */
3002 if (vap->va_type == VFIFO)
3003 NA_SETFIFO(na);
3004 return (0);
3005 }
3006
3007 /*
3008 * acl v2 support: returns approximate permission.
3009 * default: returns minimal permission (more restrictive)
3010 * aclok: returns maximal permission (less restrictive)
3011 * This routine changes the permissions that are alaredy in *va.
3012 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
3013 * CLASS_OBJ is always the same as GROUP_OBJ entry.
3014 */
3015 static void
3016 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
3017 {
3018 vsecattr_t vsa;
3019 int aclcnt;
3020 aclent_t *aclentp;
3021 mode_t mask_perm;
3022 mode_t grp_perm;
3023 mode_t other_perm;
3024 mode_t other_orig;
3025 int error;
3026
3027 /* dont care default acl */
3028 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
3029 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
3030
3031 if (!error) {
3032 aclcnt = vsa.vsa_aclcnt;
3033 if (aclcnt > MIN_ACL_ENTRIES) {
3034 /* non-trivial ACL */
3035 aclentp = vsa.vsa_aclentp;
3036 if (exi->exi_export.ex_flags & EX_ACLOK) {
3037 /* maximal permissions */
3038 grp_perm = 0;
3039 other_perm = 0;
3040 for (; aclcnt > 0; aclcnt--, aclentp++) {
3041 switch (aclentp->a_type) {
3042 case USER_OBJ:
3043 break;
3044 case USER:
3045 grp_perm |=
3046 aclentp->a_perm << 3;
3047 other_perm |= aclentp->a_perm;
3048 break;
3049 case GROUP_OBJ:
3050 grp_perm |=
3051 aclentp->a_perm << 3;
3052 break;
3053 case GROUP:
3054 other_perm |= aclentp->a_perm;
3055 break;
3056 case OTHER_OBJ:
3057 other_orig = aclentp->a_perm;
3058 break;
3059 case CLASS_OBJ:
3060 mask_perm = aclentp->a_perm;
3061 break;
3062 default:
3063 break;
3064 }
3065 }
3066 grp_perm &= mask_perm << 3;
3067 other_perm &= mask_perm;
3068 other_perm |= other_orig;
3069
3070 } else {
3071 /* minimal permissions */
3072 grp_perm = 070;
3073 other_perm = 07;
3074 for (; aclcnt > 0; aclcnt--, aclentp++) {
3075 switch (aclentp->a_type) {
3076 case USER_OBJ:
3077 break;
3078 case USER:
3079 case CLASS_OBJ:
3080 grp_perm &=
3081 aclentp->a_perm << 3;
3082 other_perm &=
3083 aclentp->a_perm;
3084 break;
3085 case GROUP_OBJ:
3086 grp_perm &=
3087 aclentp->a_perm << 3;
3088 break;
3089 case GROUP:
3090 other_perm &=
3091 aclentp->a_perm;
3092 break;
3093 case OTHER_OBJ:
3094 other_perm &=
3095 aclentp->a_perm;
3096 break;
3097 default:
3098 break;
3099 }
3100 }
3101 }
3102 /* copy to va */
3103 va->va_mode &= ~077;
3104 va->va_mode |= grp_perm | other_perm;
3105 }
3106 if (vsa.vsa_aclcnt)
3107 kmem_free(vsa.vsa_aclentp,
3108 vsa.vsa_aclcnt * sizeof (aclent_t));
3109 }
3110 }
3111
3112 void
3113 rfs_srvrinit(void)
3114 {
3115 nfs2_srv_caller_id = fs_new_caller_id();
3116 }
3117
3118 void
3119 rfs_srvrfini(void)
3120 {
3121 }
3122
3123 /* ARGSUSED */
3124 void
3125 rfs_srv_zone_init(nfs_globals_t *ng)
3126 {
3127 nfs_srv_t *ns;
3128
3129 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3130
3131 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3132 ns->write_async = 1;
3133
3134 ng->nfs_srv = ns;
3135 }
3136
3137 /* ARGSUSED */
3138 void
3139 rfs_srv_zone_fini(nfs_globals_t *ng)
3140 {
3141 nfs_srv_t *ns = ng->nfs_srv;
3142
3143 ng->nfs_srv = NULL;
3144
3145 mutex_destroy(&ns->async_write_lock);
3146 kmem_free(ns, sizeof (*ns));
3147 }
3148
3149 static int
3150 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3151 {
3152 struct clist *wcl;
3153 int wlist_len;
3154 uint32_t count = rr->rr_count;
3155
3156 wcl = ra->ra_wlist;
3157
3158 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3159 return (FALSE);
3160 }
3161
3162 wcl = ra->ra_wlist;
3163 rr->rr_ok.rrok_wlist_len = wlist_len;
3164 rr->rr_ok.rrok_wlist = wcl;
3165
3166 return (TRUE);
3167 }