1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2018 Nexenta Systems, Inc.
24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/systeminfo.h>
48 #include <sys/flock.h>
49 #include <sys/nbmlock.h>
50 #include <sys/policy.h>
51 #include <sys/sdt.h>
52
53 #include <rpc/types.h>
54 #include <rpc/auth.h>
55 #include <rpc/svc.h>
56 #include <rpc/rpc_rdma.h>
57
58 #include <nfs/nfs.h>
59 #include <nfs/export.h>
60 #include <nfs/nfs_cmd.h>
61
62 #include <sys/strsubr.h>
63 #include <sys/tsol/label.h>
64 #include <sys/tsol/tndb.h>
65
66 #include <sys/zone.h>
67
68 #include <inet/ip.h>
69 #include <inet/ip6.h>
70
71 /*
72 * Zone global variables of NFSv3 server
73 */
74 typedef struct nfs3_srv {
75 writeverf3 write3verf;
76 } nfs3_srv_t;
77
78 /*
79 * These are the interface routines for the server side of the
80 * Network File System. See the NFS version 3 protocol specification
81 * for a description of this interface.
82 */
83
84 static int sattr3_to_vattr(sattr3 *, struct vattr *);
85 static int vattr_to_fattr3(struct vattr *, fattr3 *);
86 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
87 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
88 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
89 static int rdma_setup_read_data3(READ3args *, READ3resok *);
90
91 extern int nfs_loaned_buffers;
92
93 u_longlong_t nfs3_srv_caller_id;
94 static zone_key_t rfs3_zone_key;
95
96 /* ARGSUSED */
97 void
98 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
99 struct svc_req *req, cred_t *cr, bool_t ro)
100 {
101 int error;
102 vnode_t *vp;
103 struct vattr va;
104
105 vp = nfs3_fhtovp(&args->object, exi);
106
107 DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
108 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
109 GETATTR3args *, args);
110
111 if (vp == NULL) {
112 error = ESTALE;
113 goto out;
114 }
115
116 va.va_mask = AT_ALL;
117 error = rfs4_delegated_getattr(vp, &va, 0, cr);
118
119 if (!error) {
120 /* Lie about the object type for a referral */
121 if (vn_is_nfs_reparse(vp, cr))
122 va.va_type = VLNK;
123
124 /* overflow error if time or size is out of range */
125 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
126 if (error)
127 goto out;
128 resp->status = NFS3_OK;
129
130 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
131 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
132 GETATTR3res *, resp);
133
134 VN_RELE(vp);
135
136 return;
137 }
138
139 out:
140 if (curthread->t_flag & T_WOULDBLOCK) {
141 curthread->t_flag &= ~T_WOULDBLOCK;
142 resp->status = NFS3ERR_JUKEBOX;
143 } else
144 resp->status = puterrno3(error);
145
146 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
147 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
148 GETATTR3res *, resp);
149
150 if (vp != NULL)
151 VN_RELE(vp);
152 }
153
154 void *
155 rfs3_getattr_getfh(GETATTR3args *args)
156 {
157
158 return (&args->object);
159 }
160
161 void
162 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
163 struct svc_req *req, cred_t *cr, bool_t ro)
164 {
165 int error;
166 vnode_t *vp;
167 struct vattr *bvap;
168 struct vattr bva;
169 struct vattr *avap;
170 struct vattr ava;
171 int flag;
172 int in_crit = 0;
173 struct flock64 bf;
174 caller_context_t ct;
175
176 bvap = NULL;
177 avap = NULL;
178
179 vp = nfs3_fhtovp(&args->object, exi);
180
181 DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
182 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
183 SETATTR3args *, args);
184
185 if (vp == NULL) {
186 error = ESTALE;
187 goto out;
188 }
189
190 error = sattr3_to_vattr(&args->new_attributes, &ava);
191 if (error)
192 goto out;
193
194 if (is_system_labeled()) {
195 bslabel_t *clabel = req->rq_label;
196
197 ASSERT(clabel != NULL);
198 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
199 "got client label from request(1)", struct svc_req *, req);
200
201 if (!blequal(&l_admin_low->tsl_label, clabel)) {
202 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
203 exi)) {
204 resp->status = NFS3ERR_ACCES;
205 goto out1;
206 }
207 }
208 }
209
210 /*
211 * We need to specially handle size changes because of
212 * possible conflicting NBMAND locks. Get into critical
213 * region before VOP_GETATTR, so the size attribute is
214 * valid when checking conflicts.
215 *
216 * Also, check to see if the v4 side of the server has
217 * delegated this file. If so, then we return JUKEBOX to
218 * allow the client to retrasmit its request.
219 */
220 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
221 if (nbl_need_check(vp)) {
222 nbl_start_crit(vp, RW_READER);
223 in_crit = 1;
224 }
225 }
226
227 bva.va_mask = AT_ALL;
228 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
229
230 /*
231 * If we can't get the attributes, then we can't do the
232 * right access checking. So, we'll fail the request.
233 */
234 if (error)
235 goto out;
236
237 bvap = &bva;
238
239 if (rdonly(ro, vp)) {
240 resp->status = NFS3ERR_ROFS;
241 goto out1;
242 }
243
244 if (args->guard.check &&
245 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
246 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
247 resp->status = NFS3ERR_NOT_SYNC;
248 goto out1;
249 }
250
251 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
252 flag = ATTR_UTIME;
253 else
254 flag = 0;
255
256 /*
257 * If the filesystem is exported with nosuid, then mask off
258 * the setuid and setgid bits.
259 */
260 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
261 (exi->exi_export.ex_flags & EX_NOSUID))
262 ava.va_mode &= ~(VSUID | VSGID);
263
264 ct.cc_sysid = 0;
265 ct.cc_pid = 0;
266 ct.cc_caller_id = nfs3_srv_caller_id;
267 ct.cc_flags = CC_DONTBLOCK;
268
269 /*
270 * We need to specially handle size changes because it is
271 * possible for the client to create a file with modes
272 * which indicate read-only, but with the file opened for
273 * writing. If the client then tries to set the size of
274 * the file, then the normal access checking done in
275 * VOP_SETATTR would prevent the client from doing so,
276 * although it should be legal for it to do so. To get
277 * around this, we do the access checking for ourselves
278 * and then use VOP_SPACE which doesn't do the access
279 * checking which VOP_SETATTR does. VOP_SPACE can only
280 * operate on VREG files, let VOP_SETATTR handle the other
281 * extremely rare cases.
282 * Also the client should not be allowed to change the
283 * size of the file if there is a conflicting non-blocking
284 * mandatory lock in the region the change.
285 */
286 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
287 if (in_crit) {
288 u_offset_t offset;
289 ssize_t length;
290
291 if (ava.va_size < bva.va_size) {
292 offset = ava.va_size;
293 length = bva.va_size - ava.va_size;
294 } else {
295 offset = bva.va_size;
296 length = ava.va_size - bva.va_size;
297 }
298 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
299 NULL)) {
300 error = EACCES;
301 goto out;
302 }
303 }
304
305 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
306 ava.va_mask &= ~AT_SIZE;
307 bf.l_type = F_WRLCK;
308 bf.l_whence = 0;
309 bf.l_start = (off64_t)ava.va_size;
310 bf.l_len = 0;
311 bf.l_sysid = 0;
312 bf.l_pid = 0;
313 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
314 (offset_t)ava.va_size, cr, &ct);
315 }
316 }
317
318 if (!error && ava.va_mask)
319 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
320
321 /* check if a monitor detected a delegation conflict */
322 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
323 resp->status = NFS3ERR_JUKEBOX;
324 goto out1;
325 }
326
327 ava.va_mask = AT_ALL;
328 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
329
330 /*
331 * Force modified metadata out to stable storage.
332 */
333 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
334
335 if (error)
336 goto out;
337
338 if (in_crit)
339 nbl_end_crit(vp);
340
341 resp->status = NFS3_OK;
342 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
343
344 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
345 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
346 SETATTR3res *, resp);
347
348 VN_RELE(vp);
349
350 return;
351
352 out:
353 if (curthread->t_flag & T_WOULDBLOCK) {
354 curthread->t_flag &= ~T_WOULDBLOCK;
355 resp->status = NFS3ERR_JUKEBOX;
356 } else
357 resp->status = puterrno3(error);
358 out1:
359 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
360 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
361 SETATTR3res *, resp);
362
363 if (vp != NULL) {
364 if (in_crit)
365 nbl_end_crit(vp);
366 VN_RELE(vp);
367 }
368 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
369 }
370
371 void *
372 rfs3_setattr_getfh(SETATTR3args *args)
373 {
374
375 return (&args->object);
376 }
377
378 /* ARGSUSED */
379 void
380 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
381 struct svc_req *req, cred_t *cr, bool_t ro)
382 {
383 int error;
384 vnode_t *vp;
385 vnode_t *dvp;
386 struct vattr *vap;
387 struct vattr va;
388 struct vattr *dvap;
389 struct vattr dva;
390 nfs_fh3 *fhp;
391 struct sec_ol sec = {0, 0};
392 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
393 struct sockaddr *ca;
394 char *name = NULL;
395
396 dvap = NULL;
397
398 if (exi != NULL)
399 exi_hold(exi);
400
401 /*
402 * Allow lookups from the root - the default
403 * location of the public filehandle.
404 */
405 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
406 dvp = ZONE_ROOTVP();
407 VN_HOLD(dvp);
408
409 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
410 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
411 LOOKUP3args *, args);
412 } else {
413 dvp = nfs3_fhtovp(&args->what.dir, exi);
414
415 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
416 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
417 LOOKUP3args *, args);
418
419 if (dvp == NULL) {
420 error = ESTALE;
421 goto out;
422 }
423 }
424
425 dva.va_mask = AT_ALL;
426 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
427
428 if (args->what.name == nfs3nametoolong) {
429 resp->status = NFS3ERR_NAMETOOLONG;
430 goto out1;
431 }
432
433 if (args->what.name == NULL || *(args->what.name) == '\0') {
434 resp->status = NFS3ERR_ACCES;
435 goto out1;
436 }
437
438 fhp = &args->what.dir;
439 if (strcmp(args->what.name, "..") == 0 &&
440 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
441 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
442 (dvp->v_flag & VROOT)) {
443 /*
444 * special case for ".." and 'nohide'exported root
445 */
446 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
447 resp->status = NFS3ERR_ACCES;
448 goto out1;
449 }
450 } else {
451 resp->status = NFS3ERR_NOENT;
452 goto out1;
453 }
454 }
455
456 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
457 name = nfscmd_convname(ca, exi, args->what.name,
458 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
459
460 if (name == NULL) {
461 resp->status = NFS3ERR_ACCES;
462 goto out1;
463 }
464
465 /*
466 * If the public filehandle is used then allow
467 * a multi-component lookup
468 */
469 if (PUBLIC_FH3(&args->what.dir)) {
470 publicfh_flag = TRUE;
471
472 exi_rele(exi);
473
474 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
475 &exi, &sec);
476
477 /*
478 * Since WebNFS may bypass MOUNT, we need to ensure this
479 * request didn't come from an unlabeled admin_low client.
480 */
481 if (is_system_labeled() && error == 0) {
482 int addr_type;
483 void *ipaddr;
484 tsol_tpc_t *tp;
485
486 if (ca->sa_family == AF_INET) {
487 addr_type = IPV4_VERSION;
488 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
489 } else if (ca->sa_family == AF_INET6) {
490 addr_type = IPV6_VERSION;
491 ipaddr = &((struct sockaddr_in6 *)
492 ca)->sin6_addr;
493 }
494 tp = find_tpc(ipaddr, addr_type, B_FALSE);
495 if (tp == NULL || tp->tpc_tp.tp_doi !=
496 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
497 SUN_CIPSO) {
498 VN_RELE(vp);
499 error = EACCES;
500 }
501 if (tp != NULL)
502 TPC_RELE(tp);
503 }
504 } else {
505 error = VOP_LOOKUP(dvp, name, &vp,
506 NULL, 0, NULL, cr, NULL, NULL, NULL);
507 }
508
509 if (name != args->what.name)
510 kmem_free(name, MAXPATHLEN + 1);
511
512 if (error == 0 && vn_ismntpt(vp)) {
513 error = rfs_cross_mnt(&vp, &exi);
514 if (error)
515 VN_RELE(vp);
516 }
517
518 if (is_system_labeled() && error == 0) {
519 bslabel_t *clabel = req->rq_label;
520
521 ASSERT(clabel != NULL);
522 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
523 "got client label from request(1)", struct svc_req *, req);
524
525 if (!blequal(&l_admin_low->tsl_label, clabel)) {
526 if (!do_rfs_label_check(clabel, dvp,
527 DOMINANCE_CHECK, exi)) {
528 VN_RELE(vp);
529 error = EACCES;
530 }
531 }
532 }
533
534 dva.va_mask = AT_ALL;
535 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
536
537 if (error)
538 goto out;
539
540 if (sec.sec_flags & SEC_QUERY) {
541 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
542 } else {
543 error = makefh3(&resp->resok.object, vp, exi);
544 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
545 auth_weak = TRUE;
546 }
547
548 if (error) {
549 VN_RELE(vp);
550 goto out;
551 }
552
553 va.va_mask = AT_ALL;
554 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
555
556 exi_rele(exi);
557 VN_RELE(vp);
558
559 resp->status = NFS3_OK;
560 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
561 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
562
563 /*
564 * If it's public fh, no 0x81, and client's flavor is
565 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
566 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
567 */
568 if (auth_weak)
569 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
570
571 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
572 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
573 LOOKUP3res *, resp);
574 VN_RELE(dvp);
575
576 return;
577
578 out:
579 if (curthread->t_flag & T_WOULDBLOCK) {
580 curthread->t_flag &= ~T_WOULDBLOCK;
581 resp->status = NFS3ERR_JUKEBOX;
582 } else
583 resp->status = puterrno3(error);
584 out1:
585 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
586 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
587 LOOKUP3res *, resp);
588
589 if (exi != NULL)
590 exi_rele(exi);
591
592 if (dvp != NULL)
593 VN_RELE(dvp);
594 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
595
596 }
597
598 void *
599 rfs3_lookup_getfh(LOOKUP3args *args)
600 {
601
602 return (&args->what.dir);
603 }
604
605 /* ARGSUSED */
606 void
607 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
608 struct svc_req *req, cred_t *cr, bool_t ro)
609 {
610 int error;
611 vnode_t *vp;
612 struct vattr *vap;
613 struct vattr va;
614 int checkwriteperm;
615 boolean_t dominant_label = B_FALSE;
616 boolean_t equal_label = B_FALSE;
617 boolean_t admin_low_client;
618
619 vap = NULL;
620
621 vp = nfs3_fhtovp(&args->object, exi);
622
623 DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
624 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
625 ACCESS3args *, args);
626
627 if (vp == NULL) {
628 error = ESTALE;
629 goto out;
630 }
631
632 /*
633 * If the file system is exported read only, it is not appropriate
634 * to check write permissions for regular files and directories.
635 * Special files are interpreted by the client, so the underlying
636 * permissions are sent back to the client for interpretation.
637 */
638 if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
639 checkwriteperm = 0;
640 else
641 checkwriteperm = 1;
642
643 /*
644 * We need the mode so that we can correctly determine access
645 * permissions relative to a mandatory lock file. Access to
646 * mandatory lock files is denied on the server, so it might
647 * as well be reflected to the server during the open.
648 */
649 va.va_mask = AT_MODE;
650 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
651 if (error)
652 goto out;
653
654 vap = &va;
655
656 resp->resok.access = 0;
657
658 if (is_system_labeled()) {
659 bslabel_t *clabel = req->rq_label;
660
661 ASSERT(clabel != NULL);
662 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
663 "got client label from request(1)", struct svc_req *, req);
664
665 if (!blequal(&l_admin_low->tsl_label, clabel)) {
666 if ((equal_label = do_rfs_label_check(clabel, vp,
667 EQUALITY_CHECK, exi)) == B_FALSE) {
668 dominant_label = do_rfs_label_check(clabel,
669 vp, DOMINANCE_CHECK, exi);
670 } else
671 dominant_label = B_TRUE;
672 admin_low_client = B_FALSE;
673 } else
674 admin_low_client = B_TRUE;
675 }
676
677 if (args->access & ACCESS3_READ) {
678 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
679 if (error) {
680 if (curthread->t_flag & T_WOULDBLOCK)
681 goto out;
682 } else if (!MANDLOCK(vp, va.va_mode) &&
683 (!is_system_labeled() || admin_low_client ||
684 dominant_label))
685 resp->resok.access |= ACCESS3_READ;
686 }
687 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
688 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
689 if (error) {
690 if (curthread->t_flag & T_WOULDBLOCK)
691 goto out;
692 } else if (!is_system_labeled() || admin_low_client ||
693 dominant_label)
694 resp->resok.access |= ACCESS3_LOOKUP;
695 }
696 if (checkwriteperm &&
697 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
698 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
699 if (error) {
700 if (curthread->t_flag & T_WOULDBLOCK)
701 goto out;
702 } else if (!MANDLOCK(vp, va.va_mode) &&
703 (!is_system_labeled() || admin_low_client || equal_label)) {
704 resp->resok.access |=
705 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
706 }
707 }
708 if (checkwriteperm &&
709 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
710 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
711 if (error) {
712 if (curthread->t_flag & T_WOULDBLOCK)
713 goto out;
714 } else if (!is_system_labeled() || admin_low_client ||
715 equal_label)
716 resp->resok.access |= ACCESS3_DELETE;
717 }
718 if (args->access & ACCESS3_EXECUTE) {
719 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
720 if (error) {
721 if (curthread->t_flag & T_WOULDBLOCK)
722 goto out;
723 } else if (!MANDLOCK(vp, va.va_mode) &&
724 (!is_system_labeled() || admin_low_client ||
725 dominant_label))
726 resp->resok.access |= ACCESS3_EXECUTE;
727 }
728
729 va.va_mask = AT_ALL;
730 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
731
732 resp->status = NFS3_OK;
733 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
734
735 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
736 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
737 ACCESS3res *, resp);
738
739 VN_RELE(vp);
740
741 return;
742
743 out:
744 if (curthread->t_flag & T_WOULDBLOCK) {
745 curthread->t_flag &= ~T_WOULDBLOCK;
746 resp->status = NFS3ERR_JUKEBOX;
747 } else
748 resp->status = puterrno3(error);
749 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
750 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
751 ACCESS3res *, resp);
752 if (vp != NULL)
753 VN_RELE(vp);
754 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
755 }
756
757 void *
758 rfs3_access_getfh(ACCESS3args *args)
759 {
760
761 return (&args->object);
762 }
763
764 /* ARGSUSED */
765 void
766 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
767 struct svc_req *req, cred_t *cr, bool_t ro)
768 {
769 int error;
770 vnode_t *vp;
771 struct vattr *vap;
772 struct vattr va;
773 struct iovec iov;
774 struct uio uio;
775 char *data;
776 struct sockaddr *ca;
777 char *name = NULL;
778 int is_referral = 0;
779
780 vap = NULL;
781
782 vp = nfs3_fhtovp(&args->symlink, exi);
783
784 DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
785 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
786 READLINK3args *, args);
787
788 if (vp == NULL) {
789 error = ESTALE;
790 goto out;
791 }
792
793 va.va_mask = AT_ALL;
794 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
795 if (error)
796 goto out;
797
798 vap = &va;
799
800 /* We lied about the object type for a referral */
801 if (vn_is_nfs_reparse(vp, cr))
802 is_referral = 1;
803
804 if (vp->v_type != VLNK && !is_referral) {
805 resp->status = NFS3ERR_INVAL;
806 goto out1;
807 }
808
809 if (MANDLOCK(vp, va.va_mode)) {
810 resp->status = NFS3ERR_ACCES;
811 goto out1;
812 }
813
814 if (is_system_labeled()) {
815 bslabel_t *clabel = req->rq_label;
816
817 ASSERT(clabel != NULL);
818 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
819 "got client label from request(1)", struct svc_req *, req);
820
821 if (!blequal(&l_admin_low->tsl_label, clabel)) {
822 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
823 exi)) {
824 resp->status = NFS3ERR_ACCES;
825 goto out1;
826 }
827 }
828 }
829
830 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
831
832 if (is_referral) {
833 char *s;
834 size_t strsz;
835
836 /* Get an artificial symlink based on a referral */
837 s = build_symlink(vp, cr, &strsz);
838 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
839 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
840 vnode_t *, vp, char *, s);
841 if (s == NULL)
842 error = EINVAL;
843 else {
844 error = 0;
845 (void) strlcpy(data, s, MAXPATHLEN + 1);
846 kmem_free(s, strsz);
847 }
848
849 } else {
850
851 iov.iov_base = data;
852 iov.iov_len = MAXPATHLEN;
853 uio.uio_iov = &iov;
854 uio.uio_iovcnt = 1;
855 uio.uio_segflg = UIO_SYSSPACE;
856 uio.uio_extflg = UIO_COPY_CACHED;
857 uio.uio_loffset = 0;
858 uio.uio_resid = MAXPATHLEN;
859
860 error = VOP_READLINK(vp, &uio, cr, NULL);
861
862 if (!error)
863 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
864 }
865
866 va.va_mask = AT_ALL;
867 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
868
869 /* Lie about object type again just to be consistent */
870 if (is_referral && vap != NULL)
871 vap->va_type = VLNK;
872
873 #if 0 /* notyet */
874 /*
875 * Don't do this. It causes local disk writes when just
876 * reading the file and the overhead is deemed larger
877 * than the benefit.
878 */
879 /*
880 * Force modified metadata out to stable storage.
881 */
882 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
883 #endif
884
885 if (error) {
886 kmem_free(data, MAXPATHLEN + 1);
887 goto out;
888 }
889
890 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
891 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
892 MAXPATHLEN + 1);
893
894 if (name == NULL) {
895 /*
896 * Even though the conversion failed, we return
897 * something. We just don't translate it.
898 */
899 name = data;
900 }
901
902 resp->status = NFS3_OK;
903 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
904 resp->resok.data = name;
905
906 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
907 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
908 READLINK3res *, resp);
909 VN_RELE(vp);
910
911 if (name != data)
912 kmem_free(data, MAXPATHLEN + 1);
913
914 return;
915
916 out:
917 if (curthread->t_flag & T_WOULDBLOCK) {
918 curthread->t_flag &= ~T_WOULDBLOCK;
919 resp->status = NFS3ERR_JUKEBOX;
920 } else
921 resp->status = puterrno3(error);
922 out1:
923 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
924 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
925 READLINK3res *, resp);
926 if (vp != NULL)
927 VN_RELE(vp);
928 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
929 }
930
931 void *
932 rfs3_readlink_getfh(READLINK3args *args)
933 {
934
935 return (&args->symlink);
936 }
937
938 void
939 rfs3_readlink_free(READLINK3res *resp)
940 {
941
942 if (resp->status == NFS3_OK)
943 kmem_free(resp->resok.data, MAXPATHLEN + 1);
944 }
945
946 /*
947 * Server routine to handle read
948 * May handle RDMA data as well as mblks
949 */
950 /* ARGSUSED */
951 void
952 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
953 struct svc_req *req, cred_t *cr, bool_t ro)
954 {
955 int error;
956 vnode_t *vp;
957 struct vattr *vap;
958 struct vattr va;
959 struct iovec iov, *iovp = NULL;
960 int iovcnt;
961 struct uio uio;
962 u_offset_t offset;
963 mblk_t *mp = NULL;
964 int in_crit = 0;
965 int need_rwunlock = 0;
966 caller_context_t ct;
967 int rdma_used = 0;
968 int loaned_buffers;
969 struct uio *uiop;
970
971 vap = NULL;
972
973 vp = nfs3_fhtovp(&args->file, exi);
974
975 DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
976 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
977 READ3args *, args);
978
979
980 if (vp == NULL) {
981 error = ESTALE;
982 goto out;
983 }
984
985 if (args->wlist) {
986 if (args->count > clist_len(args->wlist)) {
987 error = EINVAL;
988 goto out;
989 }
990 rdma_used = 1;
991 }
992
993 /* use loaned buffers for TCP */
994 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
995
996 if (is_system_labeled()) {
997 bslabel_t *clabel = req->rq_label;
998
999 ASSERT(clabel != NULL);
1000 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1001 "got client label from request(1)", struct svc_req *, req);
1002
1003 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1004 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1005 exi)) {
1006 resp->status = NFS3ERR_ACCES;
1007 goto out1;
1008 }
1009 }
1010 }
1011
1012 ct.cc_sysid = 0;
1013 ct.cc_pid = 0;
1014 ct.cc_caller_id = nfs3_srv_caller_id;
1015 ct.cc_flags = CC_DONTBLOCK;
1016
1017 /*
1018 * Enter the critical region before calling VOP_RWLOCK
1019 * to avoid a deadlock with write requests.
1020 */
1021 if (nbl_need_check(vp)) {
1022 nbl_start_crit(vp, RW_READER);
1023 in_crit = 1;
1024 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1025 NULL)) {
1026 error = EACCES;
1027 goto out;
1028 }
1029 }
1030
1031 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1032
1033 /* check if a monitor detected a delegation conflict */
1034 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1035 resp->status = NFS3ERR_JUKEBOX;
1036 goto out1;
1037 }
1038
1039 need_rwunlock = 1;
1040
1041 va.va_mask = AT_ALL;
1042 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1043
1044 /*
1045 * If we can't get the attributes, then we can't do the
1046 * right access checking. So, we'll fail the request.
1047 */
1048 if (error)
1049 goto out;
1050
1051 vap = &va;
1052
1053 if (vp->v_type != VREG) {
1054 resp->status = NFS3ERR_INVAL;
1055 goto out1;
1056 }
1057
1058 if (crgetuid(cr) != va.va_uid) {
1059 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1060 if (error) {
1061 if (curthread->t_flag & T_WOULDBLOCK)
1062 goto out;
1063 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1064 if (error)
1065 goto out;
1066 }
1067 }
1068
1069 if (MANDLOCK(vp, va.va_mode)) {
1070 resp->status = NFS3ERR_ACCES;
1071 goto out1;
1072 }
1073
1074 offset = args->offset;
1075 if (offset >= va.va_size) {
1076 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1077 if (in_crit)
1078 nbl_end_crit(vp);
1079 resp->status = NFS3_OK;
1080 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1081 resp->resok.count = 0;
1082 resp->resok.eof = TRUE;
1083 resp->resok.data.data_len = 0;
1084 resp->resok.data.data_val = NULL;
1085 resp->resok.data.mp = NULL;
1086 /* RDMA */
1087 resp->resok.wlist = args->wlist;
1088 resp->resok.wlist_len = resp->resok.count;
1089 if (resp->resok.wlist)
1090 clist_zero_len(resp->resok.wlist);
1091 goto done;
1092 }
1093
1094 if (args->count == 0) {
1095 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1096 if (in_crit)
1097 nbl_end_crit(vp);
1098 resp->status = NFS3_OK;
1099 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1100 resp->resok.count = 0;
1101 resp->resok.eof = FALSE;
1102 resp->resok.data.data_len = 0;
1103 resp->resok.data.data_val = NULL;
1104 resp->resok.data.mp = NULL;
1105 /* RDMA */
1106 resp->resok.wlist = args->wlist;
1107 resp->resok.wlist_len = resp->resok.count;
1108 if (resp->resok.wlist)
1109 clist_zero_len(resp->resok.wlist);
1110 goto done;
1111 }
1112
1113 /*
1114 * do not allocate memory more the max. allowed
1115 * transfer size
1116 */
1117 if (args->count > rfs3_tsize(req))
1118 args->count = rfs3_tsize(req);
1119
1120 if (loaned_buffers) {
1121 uiop = (uio_t *)rfs_setup_xuio(vp);
1122 ASSERT(uiop != NULL);
1123 uiop->uio_segflg = UIO_SYSSPACE;
1124 uiop->uio_loffset = args->offset;
1125 uiop->uio_resid = args->count;
1126
1127 /* Jump to do the read if successful */
1128 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1129 /*
1130 * Need to hold the vnode until after VOP_RETZCBUF()
1131 * is called.
1132 */
1133 VN_HOLD(vp);
1134 goto doio_read;
1135 }
1136
1137 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1138 uiop->uio_loffset, int, uiop->uio_resid);
1139
1140 uiop->uio_extflg = 0;
1141 /* failure to setup for zero copy */
1142 rfs_free_xuio((void *)uiop);
1143 loaned_buffers = 0;
1144 }
1145
1146 /*
1147 * If returning data via RDMA Write, then grab the chunk list.
1148 * If we aren't returning READ data w/RDMA_WRITE, then grab
1149 * a mblk.
1150 */
1151 if (rdma_used) {
1152 (void) rdma_get_wchunk(req, &iov, args->wlist);
1153 uio.uio_iov = &iov;
1154 uio.uio_iovcnt = 1;
1155 } else {
1156 /*
1157 * mp will contain the data to be sent out in the read reply.
1158 * For UDP, this will be freed after the reply has been sent
1159 * out by the driver. For TCP, it will be freed after the last
1160 * segment associated with the reply has been ACKed by the
1161 * client.
1162 */
1163 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1164 uio.uio_iov = iovp;
1165 uio.uio_iovcnt = iovcnt;
1166 }
1167
1168 uio.uio_segflg = UIO_SYSSPACE;
1169 uio.uio_extflg = UIO_COPY_CACHED;
1170 uio.uio_loffset = args->offset;
1171 uio.uio_resid = args->count;
1172 uiop = &uio;
1173
1174 doio_read:
1175 error = VOP_READ(vp, uiop, 0, cr, &ct);
1176
1177 if (error) {
1178 if (mp)
1179 freemsg(mp);
1180 /* check if a monitor detected a delegation conflict */
1181 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1182 resp->status = NFS3ERR_JUKEBOX;
1183 goto out1;
1184 }
1185 goto out;
1186 }
1187
1188 /* make mblk using zc buffers */
1189 if (loaned_buffers) {
1190 mp = uio_to_mblk(uiop);
1191 ASSERT(mp != NULL);
1192 }
1193
1194 va.va_mask = AT_ALL;
1195 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1196
1197 if (error)
1198 vap = NULL;
1199 else
1200 vap = &va;
1201
1202 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1203
1204 if (in_crit)
1205 nbl_end_crit(vp);
1206
1207 resp->status = NFS3_OK;
1208 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1209 resp->resok.count = args->count - uiop->uio_resid;
1210 if (!error && offset + resp->resok.count == va.va_size)
1211 resp->resok.eof = TRUE;
1212 else
1213 resp->resok.eof = FALSE;
1214 resp->resok.data.data_len = resp->resok.count;
1215
1216 if (mp)
1217 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1218
1219 resp->resok.data.mp = mp;
1220 resp->resok.size = (uint_t)args->count;
1221
1222 if (rdma_used) {
1223 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1224 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1225 resp->status = NFS3ERR_INVAL;
1226 }
1227 } else {
1228 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1229 (resp->resok).wlist = NULL;
1230 }
1231
1232 done:
1233 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1234 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1235 READ3res *, resp);
1236
1237 VN_RELE(vp);
1238
1239 if (iovp != NULL)
1240 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1241
1242 return;
1243
1244 out:
1245 if (curthread->t_flag & T_WOULDBLOCK) {
1246 curthread->t_flag &= ~T_WOULDBLOCK;
1247 resp->status = NFS3ERR_JUKEBOX;
1248 } else
1249 resp->status = puterrno3(error);
1250 out1:
1251 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1252 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1253 READ3res *, resp);
1254
1255 if (vp != NULL) {
1256 if (need_rwunlock)
1257 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1258 if (in_crit)
1259 nbl_end_crit(vp);
1260 VN_RELE(vp);
1261 }
1262 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1263
1264 if (iovp != NULL)
1265 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1266 }
1267
1268 void
1269 rfs3_read_free(READ3res *resp)
1270 {
1271 mblk_t *mp;
1272
1273 if (resp->status == NFS3_OK) {
1274 mp = resp->resok.data.mp;
1275 if (mp != NULL)
1276 freemsg(mp);
1277 }
1278 }
1279
1280 void *
1281 rfs3_read_getfh(READ3args *args)
1282 {
1283
1284 return (&args->file);
1285 }
1286
1287 #define MAX_IOVECS 12
1288
1289 #ifdef DEBUG
1290 static int rfs3_write_hits = 0;
1291 static int rfs3_write_misses = 0;
1292 #endif
1293
1294 void
1295 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1296 struct svc_req *req, cred_t *cr, bool_t ro)
1297 {
1298 nfs3_srv_t *ns;
1299 int error;
1300 vnode_t *vp;
1301 struct vattr *bvap = NULL;
1302 struct vattr bva;
1303 struct vattr *avap = NULL;
1304 struct vattr ava;
1305 u_offset_t rlimit;
1306 struct uio uio;
1307 struct iovec iov[MAX_IOVECS];
1308 mblk_t *m;
1309 struct iovec *iovp;
1310 int iovcnt;
1311 int ioflag;
1312 cred_t *savecred;
1313 int in_crit = 0;
1314 int rwlock_ret = -1;
1315 caller_context_t ct;
1316
1317 vp = nfs3_fhtovp(&args->file, exi);
1318
1319 DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1320 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1321 WRITE3args *, args);
1322
1323 if (vp == NULL) {
1324 error = ESTALE;
1325 goto err;
1326 }
1327
1328 ns = zone_getspecific(rfs3_zone_key, curzone);
1329 if (is_system_labeled()) {
1330 bslabel_t *clabel = req->rq_label;
1331
1332 ASSERT(clabel != NULL);
1333 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1334 "got client label from request(1)", struct svc_req *, req);
1335
1336 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1337 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1338 exi)) {
1339 resp->status = NFS3ERR_ACCES;
1340 goto err1;
1341 }
1342 }
1343 }
1344
1345 ct.cc_sysid = 0;
1346 ct.cc_pid = 0;
1347 ct.cc_caller_id = nfs3_srv_caller_id;
1348 ct.cc_flags = CC_DONTBLOCK;
1349
1350 /*
1351 * We have to enter the critical region before calling VOP_RWLOCK
1352 * to avoid a deadlock with ufs.
1353 */
1354 if (nbl_need_check(vp)) {
1355 nbl_start_crit(vp, RW_READER);
1356 in_crit = 1;
1357 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1358 NULL)) {
1359 error = EACCES;
1360 goto err;
1361 }
1362 }
1363
1364 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1365
1366 /* check if a monitor detected a delegation conflict */
1367 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1368 resp->status = NFS3ERR_JUKEBOX;
1369 rwlock_ret = -1;
1370 goto err1;
1371 }
1372
1373
1374 bva.va_mask = AT_ALL;
1375 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1376
1377 /*
1378 * If we can't get the attributes, then we can't do the
1379 * right access checking. So, we'll fail the request.
1380 */
1381 if (error)
1382 goto err;
1383
1384 bvap = &bva;
1385 avap = bvap;
1386
1387 if (args->count != args->data.data_len) {
1388 resp->status = NFS3ERR_INVAL;
1389 goto err1;
1390 }
1391
1392 if (rdonly(ro, vp)) {
1393 resp->status = NFS3ERR_ROFS;
1394 goto err1;
1395 }
1396
1397 if (vp->v_type != VREG) {
1398 resp->status = NFS3ERR_INVAL;
1399 goto err1;
1400 }
1401
1402 if (crgetuid(cr) != bva.va_uid &&
1403 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1404 goto err;
1405
1406 if (MANDLOCK(vp, bva.va_mode)) {
1407 resp->status = NFS3ERR_ACCES;
1408 goto err1;
1409 }
1410
1411 if (args->count == 0) {
1412 resp->status = NFS3_OK;
1413 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1414 resp->resok.count = 0;
1415 resp->resok.committed = args->stable;
1416 resp->resok.verf = ns->write3verf;
1417 goto out;
1418 }
1419
1420 if (args->mblk != NULL) {
1421 iovcnt = 0;
1422 for (m = args->mblk; m != NULL; m = m->b_cont)
1423 iovcnt++;
1424 if (iovcnt <= MAX_IOVECS) {
1425 #ifdef DEBUG
1426 rfs3_write_hits++;
1427 #endif
1428 iovp = iov;
1429 } else {
1430 #ifdef DEBUG
1431 rfs3_write_misses++;
1432 #endif
1433 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1434 }
1435 mblk_to_iov(args->mblk, iovcnt, iovp);
1436
1437 } else if (args->rlist != NULL) {
1438 iovcnt = 1;
1439 iovp = iov;
1440 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1441 iovp->iov_len = args->count;
1442 } else {
1443 iovcnt = 1;
1444 iovp = iov;
1445 iovp->iov_base = args->data.data_val;
1446 iovp->iov_len = args->count;
1447 }
1448
1449 uio.uio_iov = iovp;
1450 uio.uio_iovcnt = iovcnt;
1451
1452 uio.uio_segflg = UIO_SYSSPACE;
1453 uio.uio_extflg = UIO_COPY_DEFAULT;
1454 uio.uio_loffset = args->offset;
1455 uio.uio_resid = args->count;
1456 uio.uio_llimit = curproc->p_fsz_ctl;
1457 rlimit = uio.uio_llimit - args->offset;
1458 if (rlimit < (u_offset_t)uio.uio_resid)
1459 uio.uio_resid = (int)rlimit;
1460
1461 if (args->stable == UNSTABLE)
1462 ioflag = 0;
1463 else if (args->stable == FILE_SYNC)
1464 ioflag = FSYNC;
1465 else if (args->stable == DATA_SYNC)
1466 ioflag = FDSYNC;
1467 else {
1468 if (iovp != iov)
1469 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1470 resp->status = NFS3ERR_INVAL;
1471 goto err1;
1472 }
1473
1474 /*
1475 * We're changing creds because VM may fault and we need
1476 * the cred of the current thread to be used if quota
1477 * checking is enabled.
1478 */
1479 savecred = curthread->t_cred;
1480 curthread->t_cred = cr;
1481 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1482 curthread->t_cred = savecred;
1483
1484 if (iovp != iov)
1485 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1486
1487 /* check if a monitor detected a delegation conflict */
1488 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1489 resp->status = NFS3ERR_JUKEBOX;
1490 goto err1;
1491 }
1492
1493 ava.va_mask = AT_ALL;
1494 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1495
1496 if (error)
1497 goto err;
1498
1499 /*
1500 * If we were unable to get the V_WRITELOCK_TRUE, then we
1501 * may not have accurate after attrs, so check if
1502 * we have both attributes, they have a non-zero va_seq, and
1503 * va_seq has changed by exactly one,
1504 * if not, turn off the before attr.
1505 */
1506 if (rwlock_ret != V_WRITELOCK_TRUE) {
1507 if (bvap == NULL || avap == NULL ||
1508 bvap->va_seq == 0 || avap->va_seq == 0 ||
1509 avap->va_seq != (bvap->va_seq + 1)) {
1510 bvap = NULL;
1511 }
1512 }
1513
1514 resp->status = NFS3_OK;
1515 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1516 resp->resok.count = args->count - uio.uio_resid;
1517 resp->resok.committed = args->stable;
1518 resp->resok.verf = ns->write3verf;
1519 goto out;
1520
1521 err:
1522 if (curthread->t_flag & T_WOULDBLOCK) {
1523 curthread->t_flag &= ~T_WOULDBLOCK;
1524 resp->status = NFS3ERR_JUKEBOX;
1525 } else
1526 resp->status = puterrno3(error);
1527 err1:
1528 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1529 out:
1530 DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1531 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1532 WRITE3res *, resp);
1533
1534 if (vp != NULL) {
1535 if (rwlock_ret != -1)
1536 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1537 if (in_crit)
1538 nbl_end_crit(vp);
1539 VN_RELE(vp);
1540 }
1541 }
1542
1543 void *
1544 rfs3_write_getfh(WRITE3args *args)
1545 {
1546
1547 return (&args->file);
1548 }
1549
1550 void
1551 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1552 struct svc_req *req, cred_t *cr, bool_t ro)
1553 {
1554 int error;
1555 int in_crit = 0;
1556 vnode_t *vp;
1557 vnode_t *tvp = NULL;
1558 vnode_t *dvp;
1559 struct vattr *vap;
1560 struct vattr va;
1561 struct vattr *dbvap;
1562 struct vattr dbva;
1563 struct vattr *davap;
1564 struct vattr dava;
1565 enum vcexcl excl;
1566 nfstime3 *mtime;
1567 len_t reqsize;
1568 bool_t trunc;
1569 struct sockaddr *ca;
1570 char *name = NULL;
1571
1572 dbvap = NULL;
1573 davap = NULL;
1574
1575 dvp = nfs3_fhtovp(&args->where.dir, exi);
1576
1577 DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1578 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1579 CREATE3args *, args);
1580
1581 if (dvp == NULL) {
1582 error = ESTALE;
1583 goto out;
1584 }
1585
1586 dbva.va_mask = AT_ALL;
1587 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1588 davap = dbvap;
1589
1590 if (args->where.name == nfs3nametoolong) {
1591 resp->status = NFS3ERR_NAMETOOLONG;
1592 goto out1;
1593 }
1594
1595 if (args->where.name == NULL || *(args->where.name) == '\0') {
1596 resp->status = NFS3ERR_ACCES;
1597 goto out1;
1598 }
1599
1600 if (rdonly(ro, dvp)) {
1601 resp->status = NFS3ERR_ROFS;
1602 goto out1;
1603 }
1604
1605 if (is_system_labeled()) {
1606 bslabel_t *clabel = req->rq_label;
1607
1608 ASSERT(clabel != NULL);
1609 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1610 "got client label from request(1)", struct svc_req *, req);
1611
1612 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1613 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1614 exi)) {
1615 resp->status = NFS3ERR_ACCES;
1616 goto out1;
1617 }
1618 }
1619 }
1620
1621 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1622 name = nfscmd_convname(ca, exi, args->where.name,
1623 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1624
1625 if (name == NULL) {
1626 /* This is really a Solaris EILSEQ */
1627 resp->status = NFS3ERR_INVAL;
1628 goto out1;
1629 }
1630
1631 if (args->how.mode == EXCLUSIVE) {
1632 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1633 va.va_type = VREG;
1634 va.va_mode = (mode_t)0;
1635 /*
1636 * Ensure no time overflows and that types match
1637 */
1638 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1639 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1640 va.va_mtime.tv_nsec = mtime->nseconds;
1641 excl = EXCL;
1642 } else {
1643 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1644 &va);
1645 if (error)
1646 goto out;
1647 va.va_mask |= AT_TYPE;
1648 va.va_type = VREG;
1649 if (args->how.mode == GUARDED)
1650 excl = EXCL;
1651 else {
1652 excl = NONEXCL;
1653
1654 /*
1655 * During creation of file in non-exclusive mode
1656 * if size of file is being set then make sure
1657 * that if the file already exists that no conflicting
1658 * non-blocking mandatory locks exists in the region
1659 * being modified. If there are conflicting locks fail
1660 * the operation with EACCES.
1661 */
1662 if (va.va_mask & AT_SIZE) {
1663 struct vattr tva;
1664
1665 /*
1666 * Does file already exist?
1667 */
1668 error = VOP_LOOKUP(dvp, name, &tvp,
1669 NULL, 0, NULL, cr, NULL, NULL, NULL);
1670
1671 /*
1672 * Check to see if the file has been delegated
1673 * to a v4 client. If so, then begin recall of
1674 * the delegation and return JUKEBOX to allow
1675 * the client to retrasmit its request.
1676 */
1677
1678 trunc = va.va_size == 0;
1679 if (!error &&
1680 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1681 resp->status = NFS3ERR_JUKEBOX;
1682 goto out1;
1683 }
1684
1685 /*
1686 * Check for NBMAND lock conflicts
1687 */
1688 if (!error && nbl_need_check(tvp)) {
1689 u_offset_t offset;
1690 ssize_t len;
1691
1692 nbl_start_crit(tvp, RW_READER);
1693 in_crit = 1;
1694
1695 tva.va_mask = AT_SIZE;
1696 error = VOP_GETATTR(tvp, &tva, 0, cr,
1697 NULL);
1698 /*
1699 * Can't check for conflicts, so return
1700 * error.
1701 */
1702 if (error)
1703 goto out;
1704
1705 offset = tva.va_size < va.va_size ?
1706 tva.va_size : va.va_size;
1707 len = tva.va_size < va.va_size ?
1708 va.va_size - tva.va_size :
1709 tva.va_size - va.va_size;
1710 if (nbl_conflict(tvp, NBL_WRITE,
1711 offset, len, 0, NULL)) {
1712 error = EACCES;
1713 goto out;
1714 }
1715 } else if (tvp) {
1716 VN_RELE(tvp);
1717 tvp = NULL;
1718 }
1719 }
1720 }
1721 if (va.va_mask & AT_SIZE)
1722 reqsize = va.va_size;
1723 }
1724
1725 /*
1726 * Must specify the mode.
1727 */
1728 if (!(va.va_mask & AT_MODE)) {
1729 resp->status = NFS3ERR_INVAL;
1730 goto out1;
1731 }
1732
1733 /*
1734 * If the filesystem is exported with nosuid, then mask off
1735 * the setuid and setgid bits.
1736 */
1737 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1738 va.va_mode &= ~(VSUID | VSGID);
1739
1740 tryagain:
1741 /*
1742 * The file open mode used is VWRITE. If the client needs
1743 * some other semantic, then it should do the access checking
1744 * itself. It would have been nice to have the file open mode
1745 * passed as part of the arguments.
1746 */
1747 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1748 &vp, cr, 0, NULL, NULL);
1749
1750 dava.va_mask = AT_ALL;
1751 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1752
1753 if (error) {
1754 /*
1755 * If we got something other than file already exists
1756 * then just return this error. Otherwise, we got
1757 * EEXIST. If we were doing a GUARDED create, then
1758 * just return this error. Otherwise, we need to
1759 * make sure that this wasn't a duplicate of an
1760 * exclusive create request.
1761 *
1762 * The assumption is made that a non-exclusive create
1763 * request will never return EEXIST.
1764 */
1765 if (error != EEXIST || args->how.mode == GUARDED)
1766 goto out;
1767 /*
1768 * Lookup the file so that we can get a vnode for it.
1769 */
1770 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1771 NULL, cr, NULL, NULL, NULL);
1772 if (error) {
1773 /*
1774 * We couldn't find the file that we thought that
1775 * we just created. So, we'll just try creating
1776 * it again.
1777 */
1778 if (error == ENOENT)
1779 goto tryagain;
1780 goto out;
1781 }
1782
1783 /*
1784 * If the file is delegated to a v4 client, go ahead
1785 * and initiate recall, this create is a hint that a
1786 * conflicting v3 open has occurred.
1787 */
1788
1789 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1790 VN_RELE(vp);
1791 resp->status = NFS3ERR_JUKEBOX;
1792 goto out1;
1793 }
1794
1795 va.va_mask = AT_ALL;
1796 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1797
1798 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1799 /* % with INT32_MAX to prevent overflows */
1800 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1801 vap->va_mtime.tv_sec !=
1802 (mtime->seconds % INT32_MAX) ||
1803 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1804 VN_RELE(vp);
1805 error = EEXIST;
1806 goto out;
1807 }
1808 } else {
1809
1810 if ((args->how.mode == UNCHECKED ||
1811 args->how.mode == GUARDED) &&
1812 args->how.createhow3_u.obj_attributes.size.set_it &&
1813 va.va_size == 0)
1814 trunc = TRUE;
1815 else
1816 trunc = FALSE;
1817
1818 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1819 VN_RELE(vp);
1820 resp->status = NFS3ERR_JUKEBOX;
1821 goto out1;
1822 }
1823
1824 va.va_mask = AT_ALL;
1825 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1826
1827 /*
1828 * We need to check to make sure that the file got
1829 * created to the indicated size. If not, we do a
1830 * setattr to try to change the size, but we don't
1831 * try too hard. This shouldn't a problem as most
1832 * clients will only specifiy a size of zero which
1833 * local file systems handle. However, even if
1834 * the client does specify a non-zero size, it can
1835 * still recover by checking the size of the file
1836 * after it has created it and then issue a setattr
1837 * request of its own to set the size of the file.
1838 */
1839 if (vap != NULL &&
1840 (args->how.mode == UNCHECKED ||
1841 args->how.mode == GUARDED) &&
1842 args->how.createhow3_u.obj_attributes.size.set_it &&
1843 vap->va_size != reqsize) {
1844 va.va_mask = AT_SIZE;
1845 va.va_size = reqsize;
1846 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1847 va.va_mask = AT_ALL;
1848 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1849 }
1850 }
1851
1852 if (name != args->where.name)
1853 kmem_free(name, MAXPATHLEN + 1);
1854
1855 error = makefh3(&resp->resok.obj.handle, vp, exi);
1856 if (error)
1857 resp->resok.obj.handle_follows = FALSE;
1858 else
1859 resp->resok.obj.handle_follows = TRUE;
1860
1861 /*
1862 * Force modified data and metadata out to stable storage.
1863 */
1864 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1865 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1866
1867 VN_RELE(vp);
1868 if (tvp != NULL) {
1869 if (in_crit)
1870 nbl_end_crit(tvp);
1871 VN_RELE(tvp);
1872 }
1873
1874 resp->status = NFS3_OK;
1875 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1876 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1877
1878 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1879 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1880 CREATE3res *, resp);
1881
1882 VN_RELE(dvp);
1883 return;
1884
1885 out:
1886 if (curthread->t_flag & T_WOULDBLOCK) {
1887 curthread->t_flag &= ~T_WOULDBLOCK;
1888 resp->status = NFS3ERR_JUKEBOX;
1889 } else
1890 resp->status = puterrno3(error);
1891 out1:
1892 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1893 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1894 CREATE3res *, resp);
1895
1896 if (name != NULL && name != args->where.name)
1897 kmem_free(name, MAXPATHLEN + 1);
1898
1899 if (tvp != NULL) {
1900 if (in_crit)
1901 nbl_end_crit(tvp);
1902 VN_RELE(tvp);
1903 }
1904 if (dvp != NULL)
1905 VN_RELE(dvp);
1906 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1907 }
1908
1909 void *
1910 rfs3_create_getfh(CREATE3args *args)
1911 {
1912
1913 return (&args->where.dir);
1914 }
1915
1916 void
1917 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1918 struct svc_req *req, cred_t *cr, bool_t ro)
1919 {
1920 int error;
1921 vnode_t *vp = NULL;
1922 vnode_t *dvp;
1923 struct vattr *vap;
1924 struct vattr va;
1925 struct vattr *dbvap;
1926 struct vattr dbva;
1927 struct vattr *davap;
1928 struct vattr dava;
1929 struct sockaddr *ca;
1930 char *name = NULL;
1931
1932 dbvap = NULL;
1933 davap = NULL;
1934
1935 dvp = nfs3_fhtovp(&args->where.dir, exi);
1936
1937 DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1938 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1939 MKDIR3args *, args);
1940
1941 if (dvp == NULL) {
1942 error = ESTALE;
1943 goto out;
1944 }
1945
1946 dbva.va_mask = AT_ALL;
1947 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1948 davap = dbvap;
1949
1950 if (args->where.name == nfs3nametoolong) {
1951 resp->status = NFS3ERR_NAMETOOLONG;
1952 goto out1;
1953 }
1954
1955 if (args->where.name == NULL || *(args->where.name) == '\0') {
1956 resp->status = NFS3ERR_ACCES;
1957 goto out1;
1958 }
1959
1960 if (rdonly(ro, dvp)) {
1961 resp->status = NFS3ERR_ROFS;
1962 goto out1;
1963 }
1964
1965 if (is_system_labeled()) {
1966 bslabel_t *clabel = req->rq_label;
1967
1968 ASSERT(clabel != NULL);
1969 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1970 "got client label from request(1)", struct svc_req *, req);
1971
1972 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1973 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1974 exi)) {
1975 resp->status = NFS3ERR_ACCES;
1976 goto out1;
1977 }
1978 }
1979 }
1980
1981 error = sattr3_to_vattr(&args->attributes, &va);
1982 if (error)
1983 goto out;
1984
1985 if (!(va.va_mask & AT_MODE)) {
1986 resp->status = NFS3ERR_INVAL;
1987 goto out1;
1988 }
1989
1990 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1991 name = nfscmd_convname(ca, exi, args->where.name,
1992 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1993
1994 if (name == NULL) {
1995 resp->status = NFS3ERR_INVAL;
1996 goto out1;
1997 }
1998
1999 va.va_mask |= AT_TYPE;
2000 va.va_type = VDIR;
2001
2002 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2003
2004 if (name != args->where.name)
2005 kmem_free(name, MAXPATHLEN + 1);
2006
2007 dava.va_mask = AT_ALL;
2008 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2009
2010 /*
2011 * Force modified data and metadata out to stable storage.
2012 */
2013 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2014
2015 if (error)
2016 goto out;
2017
2018 error = makefh3(&resp->resok.obj.handle, vp, exi);
2019 if (error)
2020 resp->resok.obj.handle_follows = FALSE;
2021 else
2022 resp->resok.obj.handle_follows = TRUE;
2023
2024 va.va_mask = AT_ALL;
2025 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2026
2027 /*
2028 * Force modified data and metadata out to stable storage.
2029 */
2030 (void) VOP_FSYNC(vp, 0, cr, NULL);
2031
2032 VN_RELE(vp);
2033
2034 resp->status = NFS3_OK;
2035 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2036 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2037
2038 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2039 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2040 MKDIR3res *, resp);
2041 VN_RELE(dvp);
2042
2043 return;
2044
2045 out:
2046 if (curthread->t_flag & T_WOULDBLOCK) {
2047 curthread->t_flag &= ~T_WOULDBLOCK;
2048 resp->status = NFS3ERR_JUKEBOX;
2049 } else
2050 resp->status = puterrno3(error);
2051 out1:
2052 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2053 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2054 MKDIR3res *, resp);
2055 if (dvp != NULL)
2056 VN_RELE(dvp);
2057 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2058 }
2059
2060 void *
2061 rfs3_mkdir_getfh(MKDIR3args *args)
2062 {
2063
2064 return (&args->where.dir);
2065 }
2066
2067 void
2068 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2069 struct svc_req *req, cred_t *cr, bool_t ro)
2070 {
2071 int error;
2072 vnode_t *vp;
2073 vnode_t *dvp;
2074 struct vattr *vap;
2075 struct vattr va;
2076 struct vattr *dbvap;
2077 struct vattr dbva;
2078 struct vattr *davap;
2079 struct vattr dava;
2080 struct sockaddr *ca;
2081 char *name = NULL;
2082 char *symdata = NULL;
2083
2084 dbvap = NULL;
2085 davap = NULL;
2086
2087 dvp = nfs3_fhtovp(&args->where.dir, exi);
2088
2089 DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2090 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2091 SYMLINK3args *, args);
2092
2093 if (dvp == NULL) {
2094 error = ESTALE;
2095 goto err;
2096 }
2097
2098 dbva.va_mask = AT_ALL;
2099 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2100 davap = dbvap;
2101
2102 if (args->where.name == nfs3nametoolong) {
2103 resp->status = NFS3ERR_NAMETOOLONG;
2104 goto err1;
2105 }
2106
2107 if (args->where.name == NULL || *(args->where.name) == '\0') {
2108 resp->status = NFS3ERR_ACCES;
2109 goto err1;
2110 }
2111
2112 if (rdonly(ro, dvp)) {
2113 resp->status = NFS3ERR_ROFS;
2114 goto err1;
2115 }
2116
2117 if (is_system_labeled()) {
2118 bslabel_t *clabel = req->rq_label;
2119
2120 ASSERT(clabel != NULL);
2121 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2122 "got client label from request(1)", struct svc_req *, req);
2123
2124 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2125 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2126 exi)) {
2127 resp->status = NFS3ERR_ACCES;
2128 goto err1;
2129 }
2130 }
2131 }
2132
2133 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2134 if (error)
2135 goto err;
2136
2137 if (!(va.va_mask & AT_MODE)) {
2138 resp->status = NFS3ERR_INVAL;
2139 goto err1;
2140 }
2141
2142 if (args->symlink.symlink_data == nfs3nametoolong) {
2143 resp->status = NFS3ERR_NAMETOOLONG;
2144 goto err1;
2145 }
2146
2147 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2148 name = nfscmd_convname(ca, exi, args->where.name,
2149 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2150
2151 if (name == NULL) {
2152 /* This is really a Solaris EILSEQ */
2153 resp->status = NFS3ERR_INVAL;
2154 goto err1;
2155 }
2156
2157 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2158 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2159 if (symdata == NULL) {
2160 /* This is really a Solaris EILSEQ */
2161 resp->status = NFS3ERR_INVAL;
2162 goto err1;
2163 }
2164
2165
2166 va.va_mask |= AT_TYPE;
2167 va.va_type = VLNK;
2168
2169 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2170
2171 dava.va_mask = AT_ALL;
2172 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2173
2174 if (error)
2175 goto err;
2176
2177 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2178 NULL, NULL, NULL);
2179
2180 /*
2181 * Force modified data and metadata out to stable storage.
2182 */
2183 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2184
2185
2186 resp->status = NFS3_OK;
2187 if (error) {
2188 resp->resok.obj.handle_follows = FALSE;
2189 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2190 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2191 goto out;
2192 }
2193
2194 error = makefh3(&resp->resok.obj.handle, vp, exi);
2195 if (error)
2196 resp->resok.obj.handle_follows = FALSE;
2197 else
2198 resp->resok.obj.handle_follows = TRUE;
2199
2200 va.va_mask = AT_ALL;
2201 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2202
2203 /*
2204 * Force modified data and metadata out to stable storage.
2205 */
2206 (void) VOP_FSYNC(vp, 0, cr, NULL);
2207
2208 VN_RELE(vp);
2209
2210 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2211 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2212 goto out;
2213
2214 err:
2215 if (curthread->t_flag & T_WOULDBLOCK) {
2216 curthread->t_flag &= ~T_WOULDBLOCK;
2217 resp->status = NFS3ERR_JUKEBOX;
2218 } else
2219 resp->status = puterrno3(error);
2220 err1:
2221 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2222 out:
2223 if (name != NULL && name != args->where.name)
2224 kmem_free(name, MAXPATHLEN + 1);
2225 if (symdata != NULL && symdata != args->symlink.symlink_data)
2226 kmem_free(symdata, MAXPATHLEN + 1);
2227
2228 DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2229 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2230 SYMLINK3res *, resp);
2231
2232 if (dvp != NULL)
2233 VN_RELE(dvp);
2234 }
2235
2236 void *
2237 rfs3_symlink_getfh(SYMLINK3args *args)
2238 {
2239
2240 return (&args->where.dir);
2241 }
2242
2243 void
2244 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2245 struct svc_req *req, cred_t *cr, bool_t ro)
2246 {
2247 int error;
2248 vnode_t *vp;
2249 vnode_t *realvp;
2250 vnode_t *dvp;
2251 struct vattr *vap;
2252 struct vattr va;
2253 struct vattr *dbvap;
2254 struct vattr dbva;
2255 struct vattr *davap;
2256 struct vattr dava;
2257 int mode;
2258 enum vcexcl excl;
2259 struct sockaddr *ca;
2260 char *name = NULL;
2261
2262 dbvap = NULL;
2263 davap = NULL;
2264
2265 dvp = nfs3_fhtovp(&args->where.dir, exi);
2266
2267 DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2268 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2269 MKNOD3args *, args);
2270
2271 if (dvp == NULL) {
2272 error = ESTALE;
2273 goto out;
2274 }
2275
2276 dbva.va_mask = AT_ALL;
2277 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2278 davap = dbvap;
2279
2280 if (args->where.name == nfs3nametoolong) {
2281 resp->status = NFS3ERR_NAMETOOLONG;
2282 goto out1;
2283 }
2284
2285 if (args->where.name == NULL || *(args->where.name) == '\0') {
2286 resp->status = NFS3ERR_ACCES;
2287 goto out1;
2288 }
2289
2290 if (rdonly(ro, dvp)) {
2291 resp->status = NFS3ERR_ROFS;
2292 goto out1;
2293 }
2294
2295 if (is_system_labeled()) {
2296 bslabel_t *clabel = req->rq_label;
2297
2298 ASSERT(clabel != NULL);
2299 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2300 "got client label from request(1)", struct svc_req *, req);
2301
2302 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2303 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2304 exi)) {
2305 resp->status = NFS3ERR_ACCES;
2306 goto out1;
2307 }
2308 }
2309 }
2310
2311 switch (args->what.type) {
2312 case NF3CHR:
2313 case NF3BLK:
2314 error = sattr3_to_vattr(
2315 &args->what.mknoddata3_u.device.dev_attributes, &va);
2316 if (error)
2317 goto out;
2318 if (secpolicy_sys_devices(cr) != 0) {
2319 resp->status = NFS3ERR_PERM;
2320 goto out1;
2321 }
2322 if (args->what.type == NF3CHR)
2323 va.va_type = VCHR;
2324 else
2325 va.va_type = VBLK;
2326 va.va_rdev = makedevice(
2327 args->what.mknoddata3_u.device.spec.specdata1,
2328 args->what.mknoddata3_u.device.spec.specdata2);
2329 va.va_mask |= AT_TYPE | AT_RDEV;
2330 break;
2331 case NF3SOCK:
2332 error = sattr3_to_vattr(
2333 &args->what.mknoddata3_u.pipe_attributes, &va);
2334 if (error)
2335 goto out;
2336 va.va_type = VSOCK;
2337 va.va_mask |= AT_TYPE;
2338 break;
2339 case NF3FIFO:
2340 error = sattr3_to_vattr(
2341 &args->what.mknoddata3_u.pipe_attributes, &va);
2342 if (error)
2343 goto out;
2344 va.va_type = VFIFO;
2345 va.va_mask |= AT_TYPE;
2346 break;
2347 default:
2348 resp->status = NFS3ERR_BADTYPE;
2349 goto out1;
2350 }
2351
2352 /*
2353 * Must specify the mode.
2354 */
2355 if (!(va.va_mask & AT_MODE)) {
2356 resp->status = NFS3ERR_INVAL;
2357 goto out1;
2358 }
2359
2360 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2361 name = nfscmd_convname(ca, exi, args->where.name,
2362 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2363
2364 if (name == NULL) {
2365 resp->status = NFS3ERR_INVAL;
2366 goto out1;
2367 }
2368
2369 excl = EXCL;
2370
2371 mode = 0;
2372
2373 error = VOP_CREATE(dvp, name, &va, excl, mode,
2374 &vp, cr, 0, NULL, NULL);
2375
2376 if (name != args->where.name)
2377 kmem_free(name, MAXPATHLEN + 1);
2378
2379 dava.va_mask = AT_ALL;
2380 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2381
2382 /*
2383 * Force modified data and metadata out to stable storage.
2384 */
2385 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2386
2387 if (error)
2388 goto out;
2389
2390 resp->status = NFS3_OK;
2391
2392 error = makefh3(&resp->resok.obj.handle, vp, exi);
2393 if (error)
2394 resp->resok.obj.handle_follows = FALSE;
2395 else
2396 resp->resok.obj.handle_follows = TRUE;
2397
2398 va.va_mask = AT_ALL;
2399 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2400
2401 /*
2402 * Force modified metadata out to stable storage.
2403 *
2404 * if a underlying vp exists, pass it to VOP_FSYNC
2405 */
2406 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2407 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2408 else
2409 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2410
2411 VN_RELE(vp);
2412
2413 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2414 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2415 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2416 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2417 MKNOD3res *, resp);
2418 VN_RELE(dvp);
2419 return;
2420
2421 out:
2422 if (curthread->t_flag & T_WOULDBLOCK) {
2423 curthread->t_flag &= ~T_WOULDBLOCK;
2424 resp->status = NFS3ERR_JUKEBOX;
2425 } else
2426 resp->status = puterrno3(error);
2427 out1:
2428 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2429 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2430 MKNOD3res *, resp);
2431 if (dvp != NULL)
2432 VN_RELE(dvp);
2433 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2434 }
2435
2436 void *
2437 rfs3_mknod_getfh(MKNOD3args *args)
2438 {
2439
2440 return (&args->where.dir);
2441 }
2442
2443 void
2444 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2445 struct svc_req *req, cred_t *cr, bool_t ro)
2446 {
2447 int error = 0;
2448 vnode_t *vp;
2449 struct vattr *bvap;
2450 struct vattr bva;
2451 struct vattr *avap;
2452 struct vattr ava;
2453 vnode_t *targvp = NULL;
2454 struct sockaddr *ca;
2455 char *name = NULL;
2456
2457 bvap = NULL;
2458 avap = NULL;
2459
2460 vp = nfs3_fhtovp(&args->object.dir, exi);
2461
2462 DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2463 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2464 REMOVE3args *, args);
2465
2466 if (vp == NULL) {
2467 error = ESTALE;
2468 goto err;
2469 }
2470
2471 bva.va_mask = AT_ALL;
2472 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2473 avap = bvap;
2474
2475 if (vp->v_type != VDIR) {
2476 resp->status = NFS3ERR_NOTDIR;
2477 goto err1;
2478 }
2479
2480 if (args->object.name == nfs3nametoolong) {
2481 resp->status = NFS3ERR_NAMETOOLONG;
2482 goto err1;
2483 }
2484
2485 if (args->object.name == NULL || *(args->object.name) == '\0') {
2486 resp->status = NFS3ERR_ACCES;
2487 goto err1;
2488 }
2489
2490 if (rdonly(ro, vp)) {
2491 resp->status = NFS3ERR_ROFS;
2492 goto err1;
2493 }
2494
2495 if (is_system_labeled()) {
2496 bslabel_t *clabel = req->rq_label;
2497
2498 ASSERT(clabel != NULL);
2499 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2500 "got client label from request(1)", struct svc_req *, req);
2501
2502 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2503 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2504 exi)) {
2505 resp->status = NFS3ERR_ACCES;
2506 goto err1;
2507 }
2508 }
2509 }
2510
2511 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2512 name = nfscmd_convname(ca, exi, args->object.name,
2513 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2514
2515 if (name == NULL) {
2516 resp->status = NFS3ERR_INVAL;
2517 goto err1;
2518 }
2519
2520 /*
2521 * Check for a conflict with a non-blocking mandatory share
2522 * reservation and V4 delegations
2523 */
2524 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2525 NULL, cr, NULL, NULL, NULL);
2526 if (error != 0)
2527 goto err;
2528
2529 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2530 resp->status = NFS3ERR_JUKEBOX;
2531 goto err1;
2532 }
2533
2534 if (!nbl_need_check(targvp)) {
2535 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2536 } else {
2537 nbl_start_crit(targvp, RW_READER);
2538 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2539 error = EACCES;
2540 } else {
2541 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2542 }
2543 nbl_end_crit(targvp);
2544 }
2545 VN_RELE(targvp);
2546 targvp = NULL;
2547
2548 ava.va_mask = AT_ALL;
2549 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2550
2551 /*
2552 * Force modified data and metadata out to stable storage.
2553 */
2554 (void) VOP_FSYNC(vp, 0, cr, NULL);
2555
2556 if (error)
2557 goto err;
2558
2559 resp->status = NFS3_OK;
2560 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2561 goto out;
2562
2563 err:
2564 if (curthread->t_flag & T_WOULDBLOCK) {
2565 curthread->t_flag &= ~T_WOULDBLOCK;
2566 resp->status = NFS3ERR_JUKEBOX;
2567 } else
2568 resp->status = puterrno3(error);
2569 err1:
2570 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2571 out:
2572 DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2573 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2574 REMOVE3res *, resp);
2575
2576 if (name != NULL && name != args->object.name)
2577 kmem_free(name, MAXPATHLEN + 1);
2578
2579 if (vp != NULL)
2580 VN_RELE(vp);
2581 }
2582
2583 void *
2584 rfs3_remove_getfh(REMOVE3args *args)
2585 {
2586
2587 return (&args->object.dir);
2588 }
2589
2590 void
2591 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2592 struct svc_req *req, cred_t *cr, bool_t ro)
2593 {
2594 int error;
2595 vnode_t *vp;
2596 struct vattr *bvap;
2597 struct vattr bva;
2598 struct vattr *avap;
2599 struct vattr ava;
2600 struct sockaddr *ca;
2601 char *name = NULL;
2602
2603 bvap = NULL;
2604 avap = NULL;
2605
2606 vp = nfs3_fhtovp(&args->object.dir, exi);
2607
2608 DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2609 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2610 RMDIR3args *, args);
2611
2612 if (vp == NULL) {
2613 error = ESTALE;
2614 goto err;
2615 }
2616
2617 bva.va_mask = AT_ALL;
2618 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2619 avap = bvap;
2620
2621 if (vp->v_type != VDIR) {
2622 resp->status = NFS3ERR_NOTDIR;
2623 goto err1;
2624 }
2625
2626 if (args->object.name == nfs3nametoolong) {
2627 resp->status = NFS3ERR_NAMETOOLONG;
2628 goto err1;
2629 }
2630
2631 if (args->object.name == NULL || *(args->object.name) == '\0') {
2632 resp->status = NFS3ERR_ACCES;
2633 goto err1;
2634 }
2635
2636 if (rdonly(ro, vp)) {
2637 resp->status = NFS3ERR_ROFS;
2638 goto err1;
2639 }
2640
2641 if (is_system_labeled()) {
2642 bslabel_t *clabel = req->rq_label;
2643
2644 ASSERT(clabel != NULL);
2645 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2646 "got client label from request(1)", struct svc_req *, req);
2647
2648 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2649 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2650 exi)) {
2651 resp->status = NFS3ERR_ACCES;
2652 goto err1;
2653 }
2654 }
2655 }
2656
2657 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2658 name = nfscmd_convname(ca, exi, args->object.name,
2659 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2660
2661 if (name == NULL) {
2662 resp->status = NFS3ERR_INVAL;
2663 goto err1;
2664 }
2665
2666 error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2667
2668 if (name != args->object.name)
2669 kmem_free(name, MAXPATHLEN + 1);
2670
2671 ava.va_mask = AT_ALL;
2672 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2673
2674 /*
2675 * Force modified data and metadata out to stable storage.
2676 */
2677 (void) VOP_FSYNC(vp, 0, cr, NULL);
2678
2679 if (error) {
2680 /*
2681 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2682 * if the directory is not empty. A System V NFS server
2683 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2684 * over the wire.
2685 */
2686 if (error == EEXIST)
2687 error = ENOTEMPTY;
2688 goto err;
2689 }
2690
2691 resp->status = NFS3_OK;
2692 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2693 goto out;
2694
2695 err:
2696 if (curthread->t_flag & T_WOULDBLOCK) {
2697 curthread->t_flag &= ~T_WOULDBLOCK;
2698 resp->status = NFS3ERR_JUKEBOX;
2699 } else
2700 resp->status = puterrno3(error);
2701 err1:
2702 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2703 out:
2704 DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2705 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2706 RMDIR3res *, resp);
2707 if (vp != NULL)
2708 VN_RELE(vp);
2709
2710 }
2711
2712 void *
2713 rfs3_rmdir_getfh(RMDIR3args *args)
2714 {
2715
2716 return (&args->object.dir);
2717 }
2718
2719 void
2720 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2721 struct svc_req *req, cred_t *cr, bool_t ro)
2722 {
2723 int error = 0;
2724 vnode_t *fvp;
2725 vnode_t *tvp;
2726 vnode_t *targvp;
2727 struct vattr *fbvap;
2728 struct vattr fbva;
2729 struct vattr *favap;
2730 struct vattr fava;
2731 struct vattr *tbvap;
2732 struct vattr tbva;
2733 struct vattr *tavap;
2734 struct vattr tava;
2735 nfs_fh3 *fh3;
2736 struct exportinfo *to_exi;
2737 vnode_t *srcvp = NULL;
2738 bslabel_t *clabel;
2739 struct sockaddr *ca;
2740 char *name = NULL;
2741 char *toname = NULL;
2742
2743 fbvap = NULL;
2744 favap = NULL;
2745 tbvap = NULL;
2746 tavap = NULL;
2747 tvp = NULL;
2748
2749 fvp = nfs3_fhtovp(&args->from.dir, exi);
2750
2751 DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2752 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2753 RENAME3args *, args);
2754
2755 if (fvp == NULL) {
2756 error = ESTALE;
2757 goto err;
2758 }
2759
2760 if (is_system_labeled()) {
2761 clabel = req->rq_label;
2762 ASSERT(clabel != NULL);
2763 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2764 "got client label from request(1)", struct svc_req *, req);
2765
2766 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2767 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2768 exi)) {
2769 resp->status = NFS3ERR_ACCES;
2770 goto err1;
2771 }
2772 }
2773 }
2774
2775 fbva.va_mask = AT_ALL;
2776 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2777 favap = fbvap;
2778
2779 fh3 = &args->to.dir;
2780 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2781 if (to_exi == NULL) {
2782 resp->status = NFS3ERR_ACCES;
2783 goto err1;
2784 }
2785 exi_rele(to_exi);
2786
2787 if (to_exi != exi) {
2788 resp->status = NFS3ERR_XDEV;
2789 goto err1;
2790 }
2791
2792 tvp = nfs3_fhtovp(&args->to.dir, exi);
2793 if (tvp == NULL) {
2794 error = ESTALE;
2795 goto err;
2796 }
2797
2798 tbva.va_mask = AT_ALL;
2799 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2800 tavap = tbvap;
2801
2802 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2803 resp->status = NFS3ERR_NOTDIR;
2804 goto err1;
2805 }
2806
2807 if (args->from.name == nfs3nametoolong ||
2808 args->to.name == nfs3nametoolong) {
2809 resp->status = NFS3ERR_NAMETOOLONG;
2810 goto err1;
2811 }
2812 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2813 args->to.name == NULL || *(args->to.name) == '\0') {
2814 resp->status = NFS3ERR_ACCES;
2815 goto err1;
2816 }
2817
2818 if (rdonly(ro, tvp)) {
2819 resp->status = NFS3ERR_ROFS;
2820 goto err1;
2821 }
2822
2823 if (is_system_labeled()) {
2824 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2825 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2826 exi)) {
2827 resp->status = NFS3ERR_ACCES;
2828 goto err1;
2829 }
2830 }
2831 }
2832
2833 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2834 name = nfscmd_convname(ca, exi, args->from.name,
2835 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2836
2837 if (name == NULL) {
2838 resp->status = NFS3ERR_INVAL;
2839 goto err1;
2840 }
2841
2842 toname = nfscmd_convname(ca, exi, args->to.name,
2843 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2844
2845 if (toname == NULL) {
2846 resp->status = NFS3ERR_INVAL;
2847 goto err1;
2848 }
2849
2850 /*
2851 * Check for a conflict with a non-blocking mandatory share
2852 * reservation or V4 delegations.
2853 */
2854 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2855 NULL, cr, NULL, NULL, NULL);
2856 if (error != 0)
2857 goto err;
2858
2859 /*
2860 * If we rename a delegated file we should recall the
2861 * delegation, since future opens should fail or would
2862 * refer to a new file.
2863 */
2864 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2865 resp->status = NFS3ERR_JUKEBOX;
2866 goto err1;
2867 }
2868
2869 /*
2870 * Check for renaming over a delegated file. Check nfs4_deleg_policy
2871 * first to avoid VOP_LOOKUP if possible.
2872 */
2873 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2874 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2875 NULL, NULL, NULL) == 0) {
2876
2877 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2878 VN_RELE(targvp);
2879 resp->status = NFS3ERR_JUKEBOX;
2880 goto err1;
2881 }
2882 VN_RELE(targvp);
2883 }
2884
2885 if (!nbl_need_check(srcvp)) {
2886 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2887 } else {
2888 nbl_start_crit(srcvp, RW_READER);
2889 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2890 error = EACCES;
2891 else
2892 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2893 nbl_end_crit(srcvp);
2894 }
2895 if (error == 0)
2896 vn_renamepath(tvp, srcvp, args->to.name,
2897 strlen(args->to.name));
2898 VN_RELE(srcvp);
2899 srcvp = NULL;
2900
2901 fava.va_mask = AT_ALL;
2902 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2903 tava.va_mask = AT_ALL;
2904 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2905
2906 /*
2907 * Force modified data and metadata out to stable storage.
2908 */
2909 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2910 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2911
2912 if (error)
2913 goto err;
2914
2915 resp->status = NFS3_OK;
2916 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2917 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2918 goto out;
2919
2920 err:
2921 if (curthread->t_flag & T_WOULDBLOCK) {
2922 curthread->t_flag &= ~T_WOULDBLOCK;
2923 resp->status = NFS3ERR_JUKEBOX;
2924 } else {
2925 resp->status = puterrno3(error);
2926 }
2927 err1:
2928 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2929 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2930
2931 out:
2932 if (name != NULL && name != args->from.name)
2933 kmem_free(name, MAXPATHLEN + 1);
2934 if (toname != NULL && toname != args->to.name)
2935 kmem_free(toname, MAXPATHLEN + 1);
2936
2937 DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2938 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2939 RENAME3res *, resp);
2940 if (fvp != NULL)
2941 VN_RELE(fvp);
2942 if (tvp != NULL)
2943 VN_RELE(tvp);
2944 }
2945
2946 void *
2947 rfs3_rename_getfh(RENAME3args *args)
2948 {
2949
2950 return (&args->from.dir);
2951 }
2952
2953 void
2954 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2955 struct svc_req *req, cred_t *cr, bool_t ro)
2956 {
2957 int error;
2958 vnode_t *vp;
2959 vnode_t *dvp;
2960 struct vattr *vap;
2961 struct vattr va;
2962 struct vattr *bvap;
2963 struct vattr bva;
2964 struct vattr *avap;
2965 struct vattr ava;
2966 nfs_fh3 *fh3;
2967 struct exportinfo *to_exi;
2968 bslabel_t *clabel;
2969 struct sockaddr *ca;
2970 char *name = NULL;
2971
2972 vap = NULL;
2973 bvap = NULL;
2974 avap = NULL;
2975 dvp = NULL;
2976
2977 vp = nfs3_fhtovp(&args->file, exi);
2978
2979 DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2980 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2981 LINK3args *, args);
2982
2983 if (vp == NULL) {
2984 error = ESTALE;
2985 goto out;
2986 }
2987
2988 va.va_mask = AT_ALL;
2989 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2990
2991 fh3 = &args->link.dir;
2992 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2993 if (to_exi == NULL) {
2994 resp->status = NFS3ERR_ACCES;
2995 goto out1;
2996 }
2997 exi_rele(to_exi);
2998
2999 if (to_exi != exi) {
3000 resp->status = NFS3ERR_XDEV;
3001 goto out1;
3002 }
3003
3004 if (is_system_labeled()) {
3005 clabel = req->rq_label;
3006
3007 ASSERT(clabel != NULL);
3008 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3009 "got client label from request(1)", struct svc_req *, req);
3010
3011 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3012 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3013 exi)) {
3014 resp->status = NFS3ERR_ACCES;
3015 goto out1;
3016 }
3017 }
3018 }
3019
3020 dvp = nfs3_fhtovp(&args->link.dir, exi);
3021 if (dvp == NULL) {
3022 error = ESTALE;
3023 goto out;
3024 }
3025
3026 bva.va_mask = AT_ALL;
3027 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3028
3029 if (dvp->v_type != VDIR) {
3030 resp->status = NFS3ERR_NOTDIR;
3031 goto out1;
3032 }
3033
3034 if (args->link.name == nfs3nametoolong) {
3035 resp->status = NFS3ERR_NAMETOOLONG;
3036 goto out1;
3037 }
3038
3039 if (args->link.name == NULL || *(args->link.name) == '\0') {
3040 resp->status = NFS3ERR_ACCES;
3041 goto out1;
3042 }
3043
3044 if (rdonly(ro, dvp)) {
3045 resp->status = NFS3ERR_ROFS;
3046 goto out1;
3047 }
3048
3049 if (is_system_labeled()) {
3050 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3051 "got client label from request(1)", struct svc_req *, req);
3052
3053 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3054 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3055 exi)) {
3056 resp->status = NFS3ERR_ACCES;
3057 goto out1;
3058 }
3059 }
3060 }
3061
3062 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3063 name = nfscmd_convname(ca, exi, args->link.name,
3064 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3065
3066 if (name == NULL) {
3067 resp->status = NFS3ERR_SERVERFAULT;
3068 goto out1;
3069 }
3070
3071 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3072
3073 va.va_mask = AT_ALL;
3074 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3075 ava.va_mask = AT_ALL;
3076 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3077
3078 /*
3079 * Force modified data and metadata out to stable storage.
3080 */
3081 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3082 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3083
3084 if (error)
3085 goto out;
3086
3087 VN_RELE(dvp);
3088
3089 resp->status = NFS3_OK;
3090 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3091 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3092
3093 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3094 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3095 LINK3res *, resp);
3096
3097 VN_RELE(vp);
3098
3099 return;
3100
3101 out:
3102 if (curthread->t_flag & T_WOULDBLOCK) {
3103 curthread->t_flag &= ~T_WOULDBLOCK;
3104 resp->status = NFS3ERR_JUKEBOX;
3105 } else
3106 resp->status = puterrno3(error);
3107 out1:
3108 if (name != NULL && name != args->link.name)
3109 kmem_free(name, MAXPATHLEN + 1);
3110
3111 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3112 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3113 LINK3res *, resp);
3114
3115 if (vp != NULL)
3116 VN_RELE(vp);
3117 if (dvp != NULL)
3118 VN_RELE(dvp);
3119 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3120 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3121 }
3122
3123 void *
3124 rfs3_link_getfh(LINK3args *args)
3125 {
3126
3127 return (&args->file);
3128 }
3129
3130 /*
3131 * This macro defines the size of a response which contains attribute
3132 * information and one directory entry (whose length is specified by
3133 * the macro parameter). If the incoming request is larger than this,
3134 * then we are guaranteed to be able to return at one directory entry
3135 * if one exists. Therefore, we do not need to check for
3136 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
3137 * is not, then we need to check to make sure that this error does not
3138 * need to be returned.
3139 *
3140 * NFS3_READDIR_MIN_COUNT is comprised of following :
3141 *
3142 * status - 1 * BYTES_PER_XDR_UNIT
3143 * attr. flag - 1 * BYTES_PER_XDR_UNIT
3144 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3145 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3146 * boolean - 1 * BYTES_PER_XDR_UNIT
3147 * file id - 2 * BYTES_PER_XDR_UNIT
3148 * directory name length - 1 * BYTES_PER_XDR_UNIT
3149 * cookie - 2 * BYTES_PER_XDR_UNIT
3150 * end of list - 1 * BYTES_PER_XDR_UNIT
3151 * end of file - 1 * BYTES_PER_XDR_UNIT
3152 * Name length of directory to the nearest byte
3153 */
3154
3155 #define NFS3_READDIR_MIN_COUNT(length) \
3156 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3157 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3158
3159 /* ARGSUSED */
3160 void
3161 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3162 struct svc_req *req, cred_t *cr, bool_t ro)
3163 {
3164 int error;
3165 vnode_t *vp;
3166 struct vattr *vap;
3167 struct vattr va;
3168 struct iovec iov;
3169 struct uio uio;
3170 char *data;
3171 int iseof;
3172 int bufsize;
3173 int namlen;
3174 uint_t count;
3175 struct sockaddr *ca;
3176
3177 vap = NULL;
3178
3179 vp = nfs3_fhtovp(&args->dir, exi);
3180
3181 DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3182 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3183 READDIR3args *, args);
3184
3185 if (vp == NULL) {
3186 error = ESTALE;
3187 goto out;
3188 }
3189
3190 if (is_system_labeled()) {
3191 bslabel_t *clabel = req->rq_label;
3192
3193 ASSERT(clabel != NULL);
3194 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3195 "got client label from request(1)", struct svc_req *, req);
3196
3197 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3198 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3199 exi)) {
3200 resp->status = NFS3ERR_ACCES;
3201 goto out1;
3202 }
3203 }
3204 }
3205
3206 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3207
3208 va.va_mask = AT_ALL;
3209 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3210
3211 if (vp->v_type != VDIR) {
3212 resp->status = NFS3ERR_NOTDIR;
3213 goto out1;
3214 }
3215
3216 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3217 if (error)
3218 goto out;
3219
3220 /*
3221 * Now don't allow arbitrary count to alloc;
3222 * allow the maximum not to exceed rfs3_tsize()
3223 */
3224 if (args->count > rfs3_tsize(req))
3225 args->count = rfs3_tsize(req);
3226
3227 /*
3228 * Make sure that there is room to read at least one entry
3229 * if any are available.
3230 */
3231 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3232 count = DIRENT64_RECLEN(MAXNAMELEN);
3233 else
3234 count = args->count;
3235
3236 data = kmem_alloc(count, KM_SLEEP);
3237
3238 iov.iov_base = data;
3239 iov.iov_len = count;
3240 uio.uio_iov = &iov;
3241 uio.uio_iovcnt = 1;
3242 uio.uio_segflg = UIO_SYSSPACE;
3243 uio.uio_extflg = UIO_COPY_CACHED;
3244 uio.uio_loffset = (offset_t)args->cookie;
3245 uio.uio_resid = count;
3246
3247 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3248
3249 va.va_mask = AT_ALL;
3250 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3251
3252 if (error) {
3253 kmem_free(data, count);
3254 goto out;
3255 }
3256
3257 /*
3258 * If the count was not large enough to be able to guarantee
3259 * to be able to return at least one entry, then need to
3260 * check to see if NFS3ERR_TOOSMALL should be returned.
3261 */
3262 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3263 /*
3264 * bufsize is used to keep track of the size of the response.
3265 * It is primed with:
3266 * 1 for the status +
3267 * 1 for the dir_attributes.attributes boolean +
3268 * 2 for the cookie verifier
3269 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3270 * to bytes. If there are directory attributes to be
3271 * returned, then:
3272 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3273 * time BYTES_PER_XDR_UNIT is added to account for them.
3274 */
3275 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3276 if (vap != NULL)
3277 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3278 /*
3279 * An entry is composed of:
3280 * 1 for the true/false list indicator +
3281 * 2 for the fileid +
3282 * 1 for the length of the name +
3283 * 2 for the cookie +
3284 * all times BYTES_PER_XDR_UNIT to convert from
3285 * XDR units to bytes, plus the length of the name
3286 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3287 */
3288 if (count != uio.uio_resid) {
3289 namlen = strlen(((struct dirent64 *)data)->d_name);
3290 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3291 roundup(namlen, BYTES_PER_XDR_UNIT);
3292 }
3293 /*
3294 * We need to check to see if the number of bytes left
3295 * to go into the buffer will actually fit into the
3296 * buffer. This is calculated as the size of this
3297 * entry plus:
3298 * 1 for the true/false list indicator +
3299 * 1 for the eof indicator
3300 * times BYTES_PER_XDR_UNIT to convert from from
3301 * XDR units to bytes.
3302 */
3303 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3304 if (bufsize > args->count) {
3305 kmem_free(data, count);
3306 resp->status = NFS3ERR_TOOSMALL;
3307 goto out1;
3308 }
3309 }
3310
3311 /*
3312 * Have a valid readir buffer for the native character
3313 * set. Need to check if a conversion is necessary and
3314 * potentially rewrite the whole buffer. Note that if the
3315 * conversion expands names enough, the structure may not
3316 * fit. In this case, we need to drop entries until if fits
3317 * and patch the counts in order that the next readdir will
3318 * get the correct entries.
3319 */
3320 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3321 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3322
3323
3324 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3325
3326 #if 0 /* notyet */
3327 /*
3328 * Don't do this. It causes local disk writes when just
3329 * reading the file and the overhead is deemed larger
3330 * than the benefit.
3331 */
3332 /*
3333 * Force modified metadata out to stable storage.
3334 */
3335 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3336 #endif
3337
3338 resp->status = NFS3_OK;
3339 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3340 resp->resok.cookieverf = 0;
3341 resp->resok.reply.entries = (entry3 *)data;
3342 resp->resok.reply.eof = iseof;
3343 resp->resok.size = count - uio.uio_resid;
3344 resp->resok.count = args->count;
3345 resp->resok.freecount = count;
3346
3347 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3348 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3349 READDIR3res *, resp);
3350
3351 VN_RELE(vp);
3352
3353 return;
3354
3355 out:
3356 if (curthread->t_flag & T_WOULDBLOCK) {
3357 curthread->t_flag &= ~T_WOULDBLOCK;
3358 resp->status = NFS3ERR_JUKEBOX;
3359 } else
3360 resp->status = puterrno3(error);
3361 out1:
3362 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3363
3364 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3365 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3366 READDIR3res *, resp);
3367
3368 if (vp != NULL) {
3369 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3370 VN_RELE(vp);
3371 }
3372 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3373 }
3374
3375 void *
3376 rfs3_readdir_getfh(READDIR3args *args)
3377 {
3378
3379 return (&args->dir);
3380 }
3381
3382 void
3383 rfs3_readdir_free(READDIR3res *resp)
3384 {
3385
3386 if (resp->status == NFS3_OK)
3387 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3388 }
3389
3390 #ifdef nextdp
3391 #undef nextdp
3392 #endif
3393 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3394
3395 /*
3396 * This macro computes the size of a response which contains
3397 * one directory entry including the attributes as well as file handle.
3398 * If the incoming request is larger than this, then we are guaranteed to be
3399 * able to return at least one more directory entry if one exists.
3400 *
3401 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3402 *
3403 * boolean - 1 * BYTES_PER_XDR_UNIT
3404 * file id - 2 * BYTES_PER_XDR_UNIT
3405 * directory name length - 1 * BYTES_PER_XDR_UNIT
3406 * cookie - 2 * BYTES_PER_XDR_UNIT
3407 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3408 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3409 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3410 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3411 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3412 * name length of the entry to the nearest bytes
3413 */
3414 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3415 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3416 BYTES_PER_XDR_UNIT + \
3417 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3418
3419 static int rfs3_readdir_unit = MAXBSIZE;
3420
3421 /* ARGSUSED */
3422 void
3423 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3424 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3425 {
3426 int error;
3427 vnode_t *vp;
3428 struct vattr *vap;
3429 struct vattr va;
3430 struct iovec iov;
3431 struct uio uio;
3432 char *data;
3433 int iseof;
3434 struct dirent64 *dp;
3435 vnode_t *nvp;
3436 struct vattr *nvap;
3437 struct vattr nva;
3438 entryplus3_info *infop = NULL;
3439 int size = 0;
3440 int nents = 0;
3441 int bufsize = 0;
3442 int entrysize = 0;
3443 int tofit = 0;
3444 int rd_unit = rfs3_readdir_unit;
3445 int prev_len;
3446 int space_left;
3447 int i;
3448 uint_t *namlen = NULL;
3449 char *ndata = NULL;
3450 struct sockaddr *ca;
3451 size_t ret;
3452
3453 vap = NULL;
3454
3455 vp = nfs3_fhtovp(&args->dir, exi);
3456
3457 DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3458 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3459 READDIRPLUS3args *, args);
3460
3461 if (vp == NULL) {
3462 error = ESTALE;
3463 goto out;
3464 }
3465
3466 if (is_system_labeled()) {
3467 bslabel_t *clabel = req->rq_label;
3468
3469 ASSERT(clabel != NULL);
3470 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3471 char *, "got client label from request(1)",
3472 struct svc_req *, req);
3473
3474 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3475 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3476 exi)) {
3477 resp->status = NFS3ERR_ACCES;
3478 goto out1;
3479 }
3480 }
3481 }
3482
3483 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3484
3485 va.va_mask = AT_ALL;
3486 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3487
3488 if (vp->v_type != VDIR) {
3489 error = ENOTDIR;
3490 goto out;
3491 }
3492
3493 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3494 if (error)
3495 goto out;
3496
3497 /*
3498 * Don't allow arbitrary counts for allocation
3499 */
3500 if (args->maxcount > rfs3_tsize(req))
3501 args->maxcount = rfs3_tsize(req);
3502
3503 /*
3504 * Make sure that there is room to read at least one entry
3505 * if any are available
3506 */
3507 args->dircount = MIN(args->dircount, args->maxcount);
3508
3509 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3510 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3511
3512 /*
3513 * This allocation relies on a minimum directory entry
3514 * being roughly 24 bytes. Therefore, the namlen array
3515 * will have enough space based on the maximum number of
3516 * entries to read.
3517 */
3518 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3519
3520 space_left = args->dircount;
3521 data = kmem_alloc(args->dircount, KM_SLEEP);
3522 dp = (struct dirent64 *)data;
3523 uio.uio_iov = &iov;
3524 uio.uio_iovcnt = 1;
3525 uio.uio_segflg = UIO_SYSSPACE;
3526 uio.uio_extflg = UIO_COPY_CACHED;
3527 uio.uio_loffset = (offset_t)args->cookie;
3528
3529 /*
3530 * bufsize is used to keep track of the size of the response as we
3531 * get post op attributes and filehandles for each entry. This is
3532 * an optimization as the server may have read more entries than will
3533 * fit in the buffer specified by maxcount. We stop calculating
3534 * post op attributes and filehandles once we have exceeded maxcount.
3535 * This will minimize the effect of truncation.
3536 *
3537 * It is primed with:
3538 * 1 for the status +
3539 * 1 for the dir_attributes.attributes boolean +
3540 * 2 for the cookie verifier
3541 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3542 * to bytes. If there are directory attributes to be
3543 * returned, then:
3544 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3545 * time BYTES_PER_XDR_UNIT is added to account for them.
3546 */
3547 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3548 if (vap != NULL)
3549 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3550
3551 getmoredents:
3552 /*
3553 * Here we make a check so that our read unit is not larger than
3554 * the space left in the buffer.
3555 */
3556 rd_unit = MIN(rd_unit, space_left);
3557 iov.iov_base = (char *)dp;
3558 iov.iov_len = rd_unit;
3559 uio.uio_resid = rd_unit;
3560 prev_len = rd_unit;
3561
3562 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3563
3564 if (error) {
3565 kmem_free(data, args->dircount);
3566 goto out;
3567 }
3568
3569 if (uio.uio_resid == prev_len && !iseof) {
3570 if (nents == 0) {
3571 kmem_free(data, args->dircount);
3572 resp->status = NFS3ERR_TOOSMALL;
3573 goto out1;
3574 }
3575
3576 /*
3577 * We could not get any more entries, so get the attributes
3578 * and filehandle for the entries already obtained.
3579 */
3580 goto good;
3581 }
3582
3583 /*
3584 * We estimate the size of the response by assuming the
3585 * entry exists and attributes and filehandle are also valid
3586 */
3587 for (size = prev_len - uio.uio_resid;
3588 size > 0;
3589 size -= dp->d_reclen, dp = nextdp(dp)) {
3590
3591 if (dp->d_ino == 0) {
3592 nents++;
3593 continue;
3594 }
3595
3596 namlen[nents] = strlen(dp->d_name);
3597 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3598
3599 /*
3600 * We need to check to see if the number of bytes left
3601 * to go into the buffer will actually fit into the
3602 * buffer. This is calculated as the size of this
3603 * entry plus:
3604 * 1 for the true/false list indicator +
3605 * 1 for the eof indicator
3606 * times BYTES_PER_XDR_UNIT to convert from XDR units
3607 * to bytes.
3608 *
3609 * Also check the dircount limit against the first entry read
3610 *
3611 */
3612 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3613 if (bufsize + tofit > args->maxcount) {
3614 /*
3615 * We make a check here to see if this was the
3616 * first entry being measured. If so, then maxcount
3617 * was too small to begin with and so we need to
3618 * return with NFS3ERR_TOOSMALL.
3619 */
3620 if (nents == 0) {
3621 kmem_free(data, args->dircount);
3622 resp->status = NFS3ERR_TOOSMALL;
3623 goto out1;
3624 }
3625 iseof = FALSE;
3626 goto good;
3627 }
3628 bufsize += entrysize;
3629 nents++;
3630 }
3631
3632 /*
3633 * If there is enough room to fit at least 1 more entry including
3634 * post op attributes and filehandle in the buffer AND that we haven't
3635 * exceeded dircount then go back and get some more.
3636 */
3637 if (!iseof &&
3638 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3639 space_left -= (prev_len - uio.uio_resid);
3640 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3641 goto getmoredents;
3642
3643 /* else, fall through */
3644 }
3645 good:
3646 va.va_mask = AT_ALL;
3647 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3648
3649 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3650
3651 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3652 resp->resok.infop = infop;
3653
3654 dp = (struct dirent64 *)data;
3655 for (i = 0; i < nents; i++) {
3656
3657 if (dp->d_ino == 0) {
3658 infop[i].attr.attributes = FALSE;
3659 infop[i].fh.handle_follows = FALSE;
3660 dp = nextdp(dp);
3661 continue;
3662 }
3663
3664 infop[i].namelen = namlen[i];
3665
3666 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3667 NULL, NULL, NULL);
3668 if (error) {
3669 infop[i].attr.attributes = FALSE;
3670 infop[i].fh.handle_follows = FALSE;
3671 dp = nextdp(dp);
3672 continue;
3673 }
3674
3675 nva.va_mask = AT_ALL;
3676 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3677
3678 /* Lie about the object type for a referral */
3679 if (vn_is_nfs_reparse(nvp, cr))
3680 nvap->va_type = VLNK;
3681
3682 if (vn_ismntpt(nvp)) {
3683 infop[i].attr.attributes = FALSE;
3684 infop[i].fh.handle_follows = FALSE;
3685 } else {
3686 vattr_to_post_op_attr(nvap, &infop[i].attr);
3687
3688 error = makefh3(&infop[i].fh.handle, nvp, exi);
3689 if (!error)
3690 infop[i].fh.handle_follows = TRUE;
3691 else
3692 infop[i].fh.handle_follows = FALSE;
3693 }
3694
3695 VN_RELE(nvp);
3696 dp = nextdp(dp);
3697 }
3698
3699 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3700 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3701 if (ndata == NULL)
3702 ndata = data;
3703
3704 if (ret > 0) {
3705 /*
3706 * We had to drop one or more entries in order to fit
3707 * during the character conversion. We need to patch
3708 * up the size and eof info.
3709 */
3710 if (iseof)
3711 iseof = FALSE;
3712
3713 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3714 nents, ret);
3715 }
3716
3717
3718 #if 0 /* notyet */
3719 /*
3720 * Don't do this. It causes local disk writes when just
3721 * reading the file and the overhead is deemed larger
3722 * than the benefit.
3723 */
3724 /*
3725 * Force modified metadata out to stable storage.
3726 */
3727 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3728 #endif
3729
3730 kmem_free(namlen, args->dircount);
3731
3732 resp->status = NFS3_OK;
3733 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3734 resp->resok.cookieverf = 0;
3735 resp->resok.reply.entries = (entryplus3 *)ndata;
3736 resp->resok.reply.eof = iseof;
3737 resp->resok.size = nents;
3738 resp->resok.count = args->dircount - ret;
3739 resp->resok.maxcount = args->maxcount;
3740
3741 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3742 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3743 READDIRPLUS3res *, resp);
3744
3745 VN_RELE(vp);
3746
3747 return;
3748
3749 out:
3750 if (curthread->t_flag & T_WOULDBLOCK) {
3751 curthread->t_flag &= ~T_WOULDBLOCK;
3752 resp->status = NFS3ERR_JUKEBOX;
3753 } else {
3754 resp->status = puterrno3(error);
3755 }
3756 out1:
3757 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3758
3759 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3760 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3761 READDIRPLUS3res *, resp);
3762
3763 if (vp != NULL) {
3764 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3765 VN_RELE(vp);
3766 }
3767
3768 if (namlen != NULL)
3769 kmem_free(namlen, args->dircount);
3770
3771 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3772 }
3773
3774 void *
3775 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3776 {
3777
3778 return (&args->dir);
3779 }
3780
3781 void
3782 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3783 {
3784
3785 if (resp->status == NFS3_OK) {
3786 kmem_free(resp->resok.reply.entries, resp->resok.count);
3787 kmem_free(resp->resok.infop,
3788 resp->resok.size * sizeof (struct entryplus3_info));
3789 }
3790 }
3791
3792 /* ARGSUSED */
3793 void
3794 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3795 struct svc_req *req, cred_t *cr, bool_t ro)
3796 {
3797 int error;
3798 vnode_t *vp;
3799 struct vattr *vap;
3800 struct vattr va;
3801 struct statvfs64 sb;
3802
3803 vap = NULL;
3804
3805 vp = nfs3_fhtovp(&args->fsroot, exi);
3806
3807 DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3808 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3809 FSSTAT3args *, args);
3810
3811 if (vp == NULL) {
3812 error = ESTALE;
3813 goto out;
3814 }
3815
3816 if (is_system_labeled()) {
3817 bslabel_t *clabel = req->rq_label;
3818
3819 ASSERT(clabel != NULL);
3820 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3821 "got client label from request(1)", struct svc_req *, req);
3822
3823 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3824 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3825 exi)) {
3826 resp->status = NFS3ERR_ACCES;
3827 goto out1;
3828 }
3829 }
3830 }
3831
3832 error = VFS_STATVFS(vp->v_vfsp, &sb);
3833
3834 va.va_mask = AT_ALL;
3835 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3836
3837 if (error)
3838 goto out;
3839
3840 resp->status = NFS3_OK;
3841 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3842 if (sb.f_blocks != (fsblkcnt64_t)-1)
3843 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3844 else
3845 resp->resok.tbytes = (size3)sb.f_blocks;
3846 if (sb.f_bfree != (fsblkcnt64_t)-1)
3847 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3848 else
3849 resp->resok.fbytes = (size3)sb.f_bfree;
3850 if (sb.f_bavail != (fsblkcnt64_t)-1)
3851 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3852 else
3853 resp->resok.abytes = (size3)sb.f_bavail;
3854 resp->resok.tfiles = (size3)sb.f_files;
3855 resp->resok.ffiles = (size3)sb.f_ffree;
3856 resp->resok.afiles = (size3)sb.f_favail;
3857 resp->resok.invarsec = 0;
3858
3859 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3860 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3861 FSSTAT3res *, resp);
3862 VN_RELE(vp);
3863
3864 return;
3865
3866 out:
3867 if (curthread->t_flag & T_WOULDBLOCK) {
3868 curthread->t_flag &= ~T_WOULDBLOCK;
3869 resp->status = NFS3ERR_JUKEBOX;
3870 } else
3871 resp->status = puterrno3(error);
3872 out1:
3873 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3874 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3875 FSSTAT3res *, resp);
3876
3877 if (vp != NULL)
3878 VN_RELE(vp);
3879 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3880 }
3881
3882 void *
3883 rfs3_fsstat_getfh(FSSTAT3args *args)
3884 {
3885
3886 return (&args->fsroot);
3887 }
3888
3889 /* ARGSUSED */
3890 void
3891 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3892 struct svc_req *req, cred_t *cr, bool_t ro)
3893 {
3894 vnode_t *vp;
3895 struct vattr *vap;
3896 struct vattr va;
3897 uint32_t xfer_size;
3898 ulong_t l = 0;
3899 int error;
3900
3901 vp = nfs3_fhtovp(&args->fsroot, exi);
3902
3903 DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3904 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3905 FSINFO3args *, args);
3906
3907 if (vp == NULL) {
3908 if (curthread->t_flag & T_WOULDBLOCK) {
3909 curthread->t_flag &= ~T_WOULDBLOCK;
3910 resp->status = NFS3ERR_JUKEBOX;
3911 } else
3912 resp->status = NFS3ERR_STALE;
3913 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3914 goto out;
3915 }
3916
3917 if (is_system_labeled()) {
3918 bslabel_t *clabel = req->rq_label;
3919
3920 ASSERT(clabel != NULL);
3921 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3922 "got client label from request(1)", struct svc_req *, req);
3923
3924 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3925 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3926 exi)) {
3927 resp->status = NFS3ERR_STALE;
3928 vattr_to_post_op_attr(NULL,
3929 &resp->resfail.obj_attributes);
3930 goto out;
3931 }
3932 }
3933 }
3934
3935 va.va_mask = AT_ALL;
3936 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3937
3938 resp->status = NFS3_OK;
3939 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3940 xfer_size = rfs3_tsize(req);
3941 resp->resok.rtmax = xfer_size;
3942 resp->resok.rtpref = xfer_size;
3943 resp->resok.rtmult = DEV_BSIZE;
3944 resp->resok.wtmax = xfer_size;
3945 resp->resok.wtpref = xfer_size;
3946 resp->resok.wtmult = DEV_BSIZE;
3947 resp->resok.dtpref = MAXBSIZE;
3948
3949 /*
3950 * Large file spec: want maxfilesize based on limit of
3951 * underlying filesystem. We can guess 2^31-1 if need be.
3952 */
3953 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3954 if (error) {
3955 resp->status = puterrno3(error);
3956 goto out;
3957 }
3958
3959 /*
3960 * If the underlying file system does not support _PC_FILESIZEBITS,
3961 * return a reasonable default. Note that error code on VOP_PATHCONF
3962 * will be 0, even if the underlying file system does not support
3963 * _PC_FILESIZEBITS.
3964 */
3965 if (l == (ulong_t)-1) {
3966 resp->resok.maxfilesize = MAXOFF32_T;
3967 } else {
3968 if (l >= (sizeof (uint64_t) * 8))
3969 resp->resok.maxfilesize = INT64_MAX;
3970 else
3971 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3972 }
3973
3974 resp->resok.time_delta.seconds = 0;
3975 resp->resok.time_delta.nseconds = 1000;
3976 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3977 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3978
3979 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3980 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3981 FSINFO3res *, resp);
3982
3983 VN_RELE(vp);
3984
3985 return;
3986
3987 out:
3988 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3989 cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
3990 FSINFO3res *, resp);
3991 if (vp != NULL)
3992 VN_RELE(vp);
3993 }
3994
3995 void *
3996 rfs3_fsinfo_getfh(FSINFO3args *args)
3997 {
3998 return (&args->fsroot);
3999 }
4000
4001 /* ARGSUSED */
4002 void
4003 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4004 struct svc_req *req, cred_t *cr, bool_t ro)
4005 {
4006 int error;
4007 vnode_t *vp;
4008 struct vattr *vap;
4009 struct vattr va;
4010 ulong_t val;
4011
4012 vap = NULL;
4013
4014 vp = nfs3_fhtovp(&args->object, exi);
4015
4016 DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4017 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4018 PATHCONF3args *, args);
4019
4020 if (vp == NULL) {
4021 error = ESTALE;
4022 goto out;
4023 }
4024
4025 if (is_system_labeled()) {
4026 bslabel_t *clabel = req->rq_label;
4027
4028 ASSERT(clabel != NULL);
4029 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4030 "got client label from request(1)", struct svc_req *, req);
4031
4032 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4033 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4034 exi)) {
4035 resp->status = NFS3ERR_ACCES;
4036 goto out1;
4037 }
4038 }
4039 }
4040
4041 va.va_mask = AT_ALL;
4042 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4043
4044 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4045 if (error)
4046 goto out;
4047 resp->resok.info.link_max = (uint32)val;
4048
4049 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4050 if (error)
4051 goto out;
4052 resp->resok.info.name_max = (uint32)val;
4053
4054 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4055 if (error)
4056 goto out;
4057 if (val == 1)
4058 resp->resok.info.no_trunc = TRUE;
4059 else
4060 resp->resok.info.no_trunc = FALSE;
4061
4062 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4063 if (error)
4064 goto out;
4065 if (val == 1)
4066 resp->resok.info.chown_restricted = TRUE;
4067 else
4068 resp->resok.info.chown_restricted = FALSE;
4069
4070 resp->status = NFS3_OK;
4071 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4072 resp->resok.info.case_insensitive = FALSE;
4073 resp->resok.info.case_preserving = TRUE;
4074 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4075 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4076 PATHCONF3res *, resp);
4077 VN_RELE(vp);
4078 return;
4079
4080 out:
4081 if (curthread->t_flag & T_WOULDBLOCK) {
4082 curthread->t_flag &= ~T_WOULDBLOCK;
4083 resp->status = NFS3ERR_JUKEBOX;
4084 } else
4085 resp->status = puterrno3(error);
4086 out1:
4087 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4088 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4089 PATHCONF3res *, resp);
4090 if (vp != NULL)
4091 VN_RELE(vp);
4092 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4093 }
4094
4095 void *
4096 rfs3_pathconf_getfh(PATHCONF3args *args)
4097 {
4098
4099 return (&args->object);
4100 }
4101
4102 void
4103 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4104 struct svc_req *req, cred_t *cr, bool_t ro)
4105 {
4106 nfs3_srv_t *ns;
4107 int error;
4108 vnode_t *vp;
4109 struct vattr *bvap;
4110 struct vattr bva;
4111 struct vattr *avap;
4112 struct vattr ava;
4113
4114 bvap = NULL;
4115 avap = NULL;
4116
4117 vp = nfs3_fhtovp(&args->file, exi);
4118
4119 DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4120 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4121 COMMIT3args *, args);
4122
4123 if (vp == NULL) {
4124 error = ESTALE;
4125 goto out;
4126 }
4127
4128 ns = zone_getspecific(rfs3_zone_key, curzone);
4129 bva.va_mask = AT_ALL;
4130 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4131
4132 /*
4133 * If we can't get the attributes, then we can't do the
4134 * right access checking. So, we'll fail the request.
4135 */
4136 if (error)
4137 goto out;
4138
4139 bvap = &bva;
4140
4141 if (rdonly(ro, vp)) {
4142 resp->status = NFS3ERR_ROFS;
4143 goto out1;
4144 }
4145
4146 if (vp->v_type != VREG) {
4147 resp->status = NFS3ERR_INVAL;
4148 goto out1;
4149 }
4150
4151 if (is_system_labeled()) {
4152 bslabel_t *clabel = req->rq_label;
4153
4154 ASSERT(clabel != NULL);
4155 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4156 "got client label from request(1)", struct svc_req *, req);
4157
4158 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4159 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4160 exi)) {
4161 resp->status = NFS3ERR_ACCES;
4162 goto out1;
4163 }
4164 }
4165 }
4166
4167 if (crgetuid(cr) != bva.va_uid &&
4168 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4169 goto out;
4170
4171 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4172
4173 ava.va_mask = AT_ALL;
4174 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4175
4176 if (error)
4177 goto out;
4178
4179 resp->status = NFS3_OK;
4180 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4181 resp->resok.verf = ns->write3verf;
4182
4183 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4184 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4185 COMMIT3res *, resp);
4186
4187 VN_RELE(vp);
4188
4189 return;
4190
4191 out:
4192 if (curthread->t_flag & T_WOULDBLOCK) {
4193 curthread->t_flag &= ~T_WOULDBLOCK;
4194 resp->status = NFS3ERR_JUKEBOX;
4195 } else
4196 resp->status = puterrno3(error);
4197 out1:
4198 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4199 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4200 COMMIT3res *, resp);
4201
4202 if (vp != NULL)
4203 VN_RELE(vp);
4204 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4205 }
4206
4207 void *
4208 rfs3_commit_getfh(COMMIT3args *args)
4209 {
4210
4211 return (&args->file);
4212 }
4213
4214 static int
4215 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4216 {
4217
4218 vap->va_mask = 0;
4219
4220 if (sap->mode.set_it) {
4221 vap->va_mode = (mode_t)sap->mode.mode;
4222 vap->va_mask |= AT_MODE;
4223 }
4224 if (sap->uid.set_it) {
4225 vap->va_uid = (uid_t)sap->uid.uid;
4226 vap->va_mask |= AT_UID;
4227 }
4228 if (sap->gid.set_it) {
4229 vap->va_gid = (gid_t)sap->gid.gid;
4230 vap->va_mask |= AT_GID;
4231 }
4232 if (sap->size.set_it) {
4233 if (sap->size.size > (size3)((u_longlong_t)-1))
4234 return (EINVAL);
4235 vap->va_size = sap->size.size;
4236 vap->va_mask |= AT_SIZE;
4237 }
4238 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4239 #ifndef _LP64
4240 /* check time validity */
4241 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4242 return (EOVERFLOW);
4243 #endif
4244 /*
4245 * nfs protocol defines times as unsigned so don't extend sign,
4246 * unless sysadmin set nfs_allow_preepoch_time.
4247 */
4248 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4249 sap->atime.atime.seconds);
4250 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4251 vap->va_mask |= AT_ATIME;
4252 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4253 gethrestime(&vap->va_atime);
4254 vap->va_mask |= AT_ATIME;
4255 }
4256 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4257 #ifndef _LP64
4258 /* check time validity */
4259 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4260 return (EOVERFLOW);
4261 #endif
4262 /*
4263 * nfs protocol defines times as unsigned so don't extend sign,
4264 * unless sysadmin set nfs_allow_preepoch_time.
4265 */
4266 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4267 sap->mtime.mtime.seconds);
4268 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4269 vap->va_mask |= AT_MTIME;
4270 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4271 gethrestime(&vap->va_mtime);
4272 vap->va_mask |= AT_MTIME;
4273 }
4274
4275 return (0);
4276 }
4277
4278 static const ftype3 vt_to_nf3[] = {
4279 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4280 };
4281
4282 static int
4283 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4284 {
4285
4286 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4287 /* Return error if time or size overflow */
4288 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4289 return (EOVERFLOW);
4290 }
4291 fap->type = vt_to_nf3[vap->va_type];
4292 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4293 fap->nlink = (uint32)vap->va_nlink;
4294 if (vap->va_uid == UID_NOBODY)
4295 fap->uid = (uid3)NFS_UID_NOBODY;
4296 else
4297 fap->uid = (uid3)vap->va_uid;
4298 if (vap->va_gid == GID_NOBODY)
4299 fap->gid = (gid3)NFS_GID_NOBODY;
4300 else
4301 fap->gid = (gid3)vap->va_gid;
4302 fap->size = (size3)vap->va_size;
4303 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4304 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4305 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4306 fap->fsid = (uint64)vap->va_fsid;
4307 fap->fileid = (fileid3)vap->va_nodeid;
4308 fap->atime.seconds = vap->va_atime.tv_sec;
4309 fap->atime.nseconds = vap->va_atime.tv_nsec;
4310 fap->mtime.seconds = vap->va_mtime.tv_sec;
4311 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4312 fap->ctime.seconds = vap->va_ctime.tv_sec;
4313 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4314 return (0);
4315 }
4316
4317 static int
4318 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4319 {
4320
4321 /* Return error if time or size overflow */
4322 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4323 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4324 NFS3_SIZE_OK(vap->va_size))) {
4325 return (EOVERFLOW);
4326 }
4327 wccap->size = (size3)vap->va_size;
4328 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4329 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4330 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4331 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4332 return (0);
4333 }
4334
4335 static void
4336 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4337 {
4338
4339 /* don't return attrs if time overflow */
4340 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4341 poap->attributes = TRUE;
4342 } else
4343 poap->attributes = FALSE;
4344 }
4345
4346 void
4347 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4348 {
4349
4350 /* don't return attrs if time overflow */
4351 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4352 poap->attributes = TRUE;
4353 } else
4354 poap->attributes = FALSE;
4355 }
4356
4357 static void
4358 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4359 {
4360 vattr_to_pre_op_attr(bvap, &wccp->before);
4361 vattr_to_post_op_attr(avap, &wccp->after);
4362 }
4363
4364 static int
4365 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4366 {
4367 struct clist *wcl;
4368 int wlist_len;
4369 count3 count = rok->count;
4370
4371 wcl = args->wlist;
4372 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4373 return (FALSE);
4374
4375 wcl = args->wlist;
4376 rok->wlist_len = wlist_len;
4377 rok->wlist = wcl;
4378 return (TRUE);
4379 }
4380
4381 /* ARGSUSED */
4382 static void *
4383 rfs3_zone_init(zoneid_t zoneid)
4384 {
4385 nfs3_srv_t *ns;
4386 struct rfs3_verf_overlay {
4387 uint_t id; /* a "unique" identifier */
4388 int ts; /* a unique timestamp */
4389 } *verfp;
4390 timestruc_t now;
4391
4392 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4393
4394 /*
4395 * The following algorithm attempts to find a unique verifier
4396 * to be used as the write verifier returned from the server
4397 * to the client. It is important that this verifier change
4398 * whenever the server reboots. Of secondary importance, it
4399 * is important for the verifier to be unique between two
4400 * different servers.
4401 *
4402 * Thus, an attempt is made to use the system hostid and the
4403 * current time in seconds when the nfssrv kernel module is
4404 * loaded. It is assumed that an NFS server will not be able
4405 * to boot and then to reboot in less than a second. If the
4406 * hostid has not been set, then the current high resolution
4407 * time is used. This will ensure different verifiers each
4408 * time the server reboots and minimize the chances that two
4409 * different servers will have the same verifier.
4410 */
4411
4412 #ifndef lint
4413 /*
4414 * We ASSERT that this constant logic expression is
4415 * always true because in the past, it wasn't.
4416 */
4417 ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4418 #endif
4419
4420 gethrestime(&now);
4421 verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4422 verfp->ts = (int)now.tv_sec;
4423 verfp->id = zone_get_hostid(NULL);
4424
4425 if (verfp->id == 0)
4426 verfp->id = (uint_t)now.tv_nsec;
4427
4428 return (ns);
4429 }
4430
4431 /* ARGSUSED */
4432 static void
4433 rfs3_zone_fini(zoneid_t zoneid, void *data)
4434 {
4435 nfs3_srv_t *ns = data;
4436
4437 kmem_free(ns, sizeof (*ns));
4438 }
4439
4440 void
4441 rfs3_srvrinit(void)
4442 {
4443 nfs3_srv_caller_id = fs_new_caller_id();
4444 zone_key_create(&rfs3_zone_key, rfs3_zone_init, NULL, rfs3_zone_fini);
4445 }
4446
4447 void
4448 rfs3_srvrfini(void)
4449 {
4450 /* Nothing to do */
4451 }