1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2018 Nexenta Systems, Inc.
24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/systeminfo.h>
48 #include <sys/flock.h>
49 #include <sys/nbmlock.h>
50 #include <sys/policy.h>
51 #include <sys/sdt.h>
52
53 #include <rpc/types.h>
54 #include <rpc/auth.h>
55 #include <rpc/svc.h>
56 #include <rpc/rpc_rdma.h>
57
58 #include <nfs/nfs.h>
59 #include <nfs/export.h>
60 #include <nfs/nfs_cmd.h>
61
62 #include <sys/strsubr.h>
63 #include <sys/tsol/label.h>
64 #include <sys/tsol/tndb.h>
65
66 #include <sys/zone.h>
67
68 #include <inet/ip.h>
69 #include <inet/ip6.h>
70
71 /*
72 * Zone global variables of NFSv3 server
73 */
74 typedef struct nfs3_srv {
75 writeverf3 write3verf;
76 } nfs3_srv_t;
77
78 /*
79 * These are the interface routines for the server side of the
80 * Network File System. See the NFS version 3 protocol specification
81 * for a description of this interface.
82 */
83
84 static int sattr3_to_vattr(sattr3 *, struct vattr *);
85 static int vattr_to_fattr3(struct vattr *, fattr3 *);
86 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
87 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
88 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
89 static int rdma_setup_read_data3(READ3args *, READ3resok *);
90
91 extern int nfs_loaned_buffers;
92
93 u_longlong_t nfs3_srv_caller_id;
94
95 static nfs3_srv_t *
96 nfs3_get_srv(void)
97 {
98 nfs_globals_t *ng = nfs_srv_getzg();
99 nfs3_srv_t *srv = ng->nfs3_srv;
100 ASSERT(srv != NULL);
101 return (srv);
102 }
103
104 /* ARGSUSED */
105 void
106 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
107 struct svc_req *req, cred_t *cr, bool_t ro)
108 {
109 int error;
110 vnode_t *vp;
111 struct vattr va;
112
113 vp = nfs3_fhtovp(&args->object, exi);
114
115 DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
116 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
117 GETATTR3args *, args);
118
119 if (vp == NULL) {
120 error = ESTALE;
121 goto out;
122 }
123
124 va.va_mask = AT_ALL;
125 error = rfs4_delegated_getattr(vp, &va, 0, cr);
126
127 if (!error) {
128 /* Lie about the object type for a referral */
129 if (vn_is_nfs_reparse(vp, cr))
130 va.va_type = VLNK;
131
132 /* overflow error if time or size is out of range */
133 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
134 if (error)
135 goto out;
136 resp->status = NFS3_OK;
137
138 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
139 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
140 GETATTR3res *, resp);
141
142 VN_RELE(vp);
143
144 return;
145 }
146
147 out:
148 if (curthread->t_flag & T_WOULDBLOCK) {
149 curthread->t_flag &= ~T_WOULDBLOCK;
150 resp->status = NFS3ERR_JUKEBOX;
151 } else
152 resp->status = puterrno3(error);
153
154 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
155 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
156 GETATTR3res *, resp);
157
158 if (vp != NULL)
159 VN_RELE(vp);
160 }
161
162 void *
163 rfs3_getattr_getfh(GETATTR3args *args)
164 {
165
166 return (&args->object);
167 }
168
169 void
170 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
171 struct svc_req *req, cred_t *cr, bool_t ro)
172 {
173 int error;
174 vnode_t *vp;
175 struct vattr *bvap;
176 struct vattr bva;
177 struct vattr *avap;
178 struct vattr ava;
179 int flag;
180 int in_crit = 0;
181 struct flock64 bf;
182 caller_context_t ct;
183
184 bvap = NULL;
185 avap = NULL;
186
187 vp = nfs3_fhtovp(&args->object, exi);
188
189 DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
190 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
191 SETATTR3args *, args);
192
193 if (vp == NULL) {
194 error = ESTALE;
195 goto out;
196 }
197
198 error = sattr3_to_vattr(&args->new_attributes, &ava);
199 if (error)
200 goto out;
201
202 if (is_system_labeled()) {
203 bslabel_t *clabel = req->rq_label;
204
205 ASSERT(clabel != NULL);
206 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
207 "got client label from request(1)", struct svc_req *, req);
208
209 if (!blequal(&l_admin_low->tsl_label, clabel)) {
210 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
211 exi)) {
212 resp->status = NFS3ERR_ACCES;
213 goto out1;
214 }
215 }
216 }
217
218 /*
219 * We need to specially handle size changes because of
220 * possible conflicting NBMAND locks. Get into critical
221 * region before VOP_GETATTR, so the size attribute is
222 * valid when checking conflicts.
223 *
224 * Also, check to see if the v4 side of the server has
225 * delegated this file. If so, then we return JUKEBOX to
226 * allow the client to retrasmit its request.
227 */
228 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
229 if (nbl_need_check(vp)) {
230 nbl_start_crit(vp, RW_READER);
231 in_crit = 1;
232 }
233 }
234
235 bva.va_mask = AT_ALL;
236 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
237
238 /*
239 * If we can't get the attributes, then we can't do the
240 * right access checking. So, we'll fail the request.
241 */
242 if (error)
243 goto out;
244
245 bvap = &bva;
246
247 if (rdonly(ro, vp)) {
248 resp->status = NFS3ERR_ROFS;
249 goto out1;
250 }
251
252 if (args->guard.check &&
253 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
254 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
255 resp->status = NFS3ERR_NOT_SYNC;
256 goto out1;
257 }
258
259 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
260 flag = ATTR_UTIME;
261 else
262 flag = 0;
263
264 /*
265 * If the filesystem is exported with nosuid, then mask off
266 * the setuid and setgid bits.
267 */
268 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
269 (exi->exi_export.ex_flags & EX_NOSUID))
270 ava.va_mode &= ~(VSUID | VSGID);
271
272 ct.cc_sysid = 0;
273 ct.cc_pid = 0;
274 ct.cc_caller_id = nfs3_srv_caller_id;
275 ct.cc_flags = CC_DONTBLOCK;
276
277 /*
278 * We need to specially handle size changes because it is
279 * possible for the client to create a file with modes
280 * which indicate read-only, but with the file opened for
281 * writing. If the client then tries to set the size of
282 * the file, then the normal access checking done in
283 * VOP_SETATTR would prevent the client from doing so,
284 * although it should be legal for it to do so. To get
285 * around this, we do the access checking for ourselves
286 * and then use VOP_SPACE which doesn't do the access
287 * checking which VOP_SETATTR does. VOP_SPACE can only
288 * operate on VREG files, let VOP_SETATTR handle the other
289 * extremely rare cases.
290 * Also the client should not be allowed to change the
291 * size of the file if there is a conflicting non-blocking
292 * mandatory lock in the region the change.
293 */
294 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
295 if (in_crit) {
296 u_offset_t offset;
297 ssize_t length;
298
299 if (ava.va_size < bva.va_size) {
300 offset = ava.va_size;
301 length = bva.va_size - ava.va_size;
302 } else {
303 offset = bva.va_size;
304 length = ava.va_size - bva.va_size;
305 }
306 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
307 NULL)) {
308 error = EACCES;
309 goto out;
310 }
311 }
312
313 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
314 ava.va_mask &= ~AT_SIZE;
315 bf.l_type = F_WRLCK;
316 bf.l_whence = 0;
317 bf.l_start = (off64_t)ava.va_size;
318 bf.l_len = 0;
319 bf.l_sysid = 0;
320 bf.l_pid = 0;
321 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
322 (offset_t)ava.va_size, cr, &ct);
323 }
324 }
325
326 if (!error && ava.va_mask)
327 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
328
329 /* check if a monitor detected a delegation conflict */
330 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
331 resp->status = NFS3ERR_JUKEBOX;
332 goto out1;
333 }
334
335 ava.va_mask = AT_ALL;
336 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
337
338 /*
339 * Force modified metadata out to stable storage.
340 */
341 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
342
343 if (error)
344 goto out;
345
346 if (in_crit)
347 nbl_end_crit(vp);
348
349 resp->status = NFS3_OK;
350 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
351
352 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
353 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
354 SETATTR3res *, resp);
355
356 VN_RELE(vp);
357
358 return;
359
360 out:
361 if (curthread->t_flag & T_WOULDBLOCK) {
362 curthread->t_flag &= ~T_WOULDBLOCK;
363 resp->status = NFS3ERR_JUKEBOX;
364 } else
365 resp->status = puterrno3(error);
366 out1:
367 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
368 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
369 SETATTR3res *, resp);
370
371 if (vp != NULL) {
372 if (in_crit)
373 nbl_end_crit(vp);
374 VN_RELE(vp);
375 }
376 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
377 }
378
379 void *
380 rfs3_setattr_getfh(SETATTR3args *args)
381 {
382
383 return (&args->object);
384 }
385
386 /* ARGSUSED */
387 void
388 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
389 struct svc_req *req, cred_t *cr, bool_t ro)
390 {
391 int error;
392 vnode_t *vp;
393 vnode_t *dvp;
394 struct vattr *vap;
395 struct vattr va;
396 struct vattr *dvap;
397 struct vattr dva;
398 nfs_fh3 *fhp;
399 struct sec_ol sec = {0, 0};
400 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
401 struct sockaddr *ca;
402 char *name = NULL;
403
404 dvap = NULL;
405
406 if (exi != NULL)
407 exi_hold(exi);
408
409 /*
410 * Allow lookups from the root - the default
411 * location of the public filehandle.
412 */
413 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
414 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
415 dvp = ZONE_ROOTVP();
416 VN_HOLD(dvp);
417
418 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
419 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
420 LOOKUP3args *, args);
421 } else {
422 dvp = nfs3_fhtovp(&args->what.dir, exi);
423
424 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
425 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
426 LOOKUP3args *, args);
427
428 if (dvp == NULL) {
429 error = ESTALE;
430 goto out;
431 }
432 }
433
434 dva.va_mask = AT_ALL;
435 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
436
437 if (args->what.name == nfs3nametoolong) {
438 resp->status = NFS3ERR_NAMETOOLONG;
439 goto out1;
440 }
441
442 if (args->what.name == NULL || *(args->what.name) == '\0') {
443 resp->status = NFS3ERR_ACCES;
444 goto out1;
445 }
446
447 fhp = &args->what.dir;
448 ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL */
449 if (strcmp(args->what.name, "..") == 0 &&
450 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
451 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
452 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
453 /*
454 * special case for ".." and 'nohide'exported root
455 */
456 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
457 resp->status = NFS3ERR_ACCES;
458 goto out1;
459 }
460 } else {
461 resp->status = NFS3ERR_NOENT;
462 goto out1;
463 }
464 }
465
466 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
467 name = nfscmd_convname(ca, exi, args->what.name,
468 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
469
470 if (name == NULL) {
471 resp->status = NFS3ERR_ACCES;
472 goto out1;
473 }
474
475 /*
476 * If the public filehandle is used then allow
477 * a multi-component lookup
478 */
479 if (PUBLIC_FH3(&args->what.dir)) {
480 publicfh_flag = TRUE;
481
482 exi_rele(exi);
483 exi = NULL;
484
485 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
486 &exi, &sec);
487
488 /*
489 * Since WebNFS may bypass MOUNT, we need to ensure this
490 * request didn't come from an unlabeled admin_low client.
491 */
492 if (is_system_labeled() && error == 0) {
493 int addr_type;
494 void *ipaddr;
495 tsol_tpc_t *tp;
496
497 if (ca->sa_family == AF_INET) {
498 addr_type = IPV4_VERSION;
499 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
500 } else if (ca->sa_family == AF_INET6) {
501 addr_type = IPV6_VERSION;
502 ipaddr = &((struct sockaddr_in6 *)
503 ca)->sin6_addr;
504 }
505 tp = find_tpc(ipaddr, addr_type, B_FALSE);
506 if (tp == NULL || tp->tpc_tp.tp_doi !=
507 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
508 SUN_CIPSO) {
509 VN_RELE(vp);
510 error = EACCES;
511 }
512 if (tp != NULL)
513 TPC_RELE(tp);
514 }
515 } else {
516 error = VOP_LOOKUP(dvp, name, &vp,
517 NULL, 0, NULL, cr, NULL, NULL, NULL);
518 }
519
520 if (name != args->what.name)
521 kmem_free(name, MAXPATHLEN + 1);
522
523 if (error == 0 && vn_ismntpt(vp)) {
524 error = rfs_cross_mnt(&vp, &exi);
525 if (error)
526 VN_RELE(vp);
527 }
528
529 if (is_system_labeled() && error == 0) {
530 bslabel_t *clabel = req->rq_label;
531
532 ASSERT(clabel != NULL);
533 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
534 "got client label from request(1)", struct svc_req *, req);
535
536 if (!blequal(&l_admin_low->tsl_label, clabel)) {
537 if (!do_rfs_label_check(clabel, dvp,
538 DOMINANCE_CHECK, exi)) {
539 VN_RELE(vp);
540 error = EACCES;
541 }
542 }
543 }
544
545 dva.va_mask = AT_ALL;
546 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
547
548 if (error)
549 goto out;
550
551 if (sec.sec_flags & SEC_QUERY) {
552 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
553 } else {
554 error = makefh3(&resp->resok.object, vp, exi);
555 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
556 auth_weak = TRUE;
557 }
558
559 if (error) {
560 VN_RELE(vp);
561 goto out;
562 }
563
564 va.va_mask = AT_ALL;
565 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
566
567 VN_RELE(vp);
568
569 resp->status = NFS3_OK;
570 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
571 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
572
573 /*
574 * If it's public fh, no 0x81, and client's flavor is
575 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
576 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
577 */
578 if (auth_weak)
579 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
580
581 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
582 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
583 LOOKUP3res *, resp);
584 VN_RELE(dvp);
585 exi_rele(exi);
586
587 return;
588
589 out:
590 if (curthread->t_flag & T_WOULDBLOCK) {
591 curthread->t_flag &= ~T_WOULDBLOCK;
592 resp->status = NFS3ERR_JUKEBOX;
593 } else
594 resp->status = puterrno3(error);
595 out1:
596 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
597 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
598 LOOKUP3res *, resp);
599
600 if (exi != NULL)
601 exi_rele(exi);
602
603 if (dvp != NULL)
604 VN_RELE(dvp);
605 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
606
607 }
608
609 void *
610 rfs3_lookup_getfh(LOOKUP3args *args)
611 {
612
613 return (&args->what.dir);
614 }
615
616 /* ARGSUSED */
617 void
618 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
619 struct svc_req *req, cred_t *cr, bool_t ro)
620 {
621 int error;
622 vnode_t *vp;
623 struct vattr *vap;
624 struct vattr va;
625 int checkwriteperm;
626 boolean_t dominant_label = B_FALSE;
627 boolean_t equal_label = B_FALSE;
628 boolean_t admin_low_client;
629
630 vap = NULL;
631
632 vp = nfs3_fhtovp(&args->object, exi);
633
634 DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
635 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
636 ACCESS3args *, args);
637
638 if (vp == NULL) {
639 error = ESTALE;
640 goto out;
641 }
642
643 /*
644 * If the file system is exported read only, it is not appropriate
645 * to check write permissions for regular files and directories.
646 * Special files are interpreted by the client, so the underlying
647 * permissions are sent back to the client for interpretation.
648 */
649 if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
650 checkwriteperm = 0;
651 else
652 checkwriteperm = 1;
653
654 /*
655 * We need the mode so that we can correctly determine access
656 * permissions relative to a mandatory lock file. Access to
657 * mandatory lock files is denied on the server, so it might
658 * as well be reflected to the server during the open.
659 */
660 va.va_mask = AT_MODE;
661 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
662 if (error)
663 goto out;
664
665 vap = &va;
666
667 resp->resok.access = 0;
668
669 if (is_system_labeled()) {
670 bslabel_t *clabel = req->rq_label;
671
672 ASSERT(clabel != NULL);
673 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
674 "got client label from request(1)", struct svc_req *, req);
675
676 if (!blequal(&l_admin_low->tsl_label, clabel)) {
677 if ((equal_label = do_rfs_label_check(clabel, vp,
678 EQUALITY_CHECK, exi)) == B_FALSE) {
679 dominant_label = do_rfs_label_check(clabel,
680 vp, DOMINANCE_CHECK, exi);
681 } else
682 dominant_label = B_TRUE;
683 admin_low_client = B_FALSE;
684 } else
685 admin_low_client = B_TRUE;
686 }
687
688 if (args->access & ACCESS3_READ) {
689 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
690 if (error) {
691 if (curthread->t_flag & T_WOULDBLOCK)
692 goto out;
693 } else if (!MANDLOCK(vp, va.va_mode) &&
694 (!is_system_labeled() || admin_low_client ||
695 dominant_label))
696 resp->resok.access |= ACCESS3_READ;
697 }
698 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
699 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
700 if (error) {
701 if (curthread->t_flag & T_WOULDBLOCK)
702 goto out;
703 } else if (!is_system_labeled() || admin_low_client ||
704 dominant_label)
705 resp->resok.access |= ACCESS3_LOOKUP;
706 }
707 if (checkwriteperm &&
708 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
709 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
710 if (error) {
711 if (curthread->t_flag & T_WOULDBLOCK)
712 goto out;
713 } else if (!MANDLOCK(vp, va.va_mode) &&
714 (!is_system_labeled() || admin_low_client || equal_label)) {
715 resp->resok.access |=
716 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
717 }
718 }
719 if (checkwriteperm &&
720 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
721 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
722 if (error) {
723 if (curthread->t_flag & T_WOULDBLOCK)
724 goto out;
725 } else if (!is_system_labeled() || admin_low_client ||
726 equal_label)
727 resp->resok.access |= ACCESS3_DELETE;
728 }
729 if (args->access & ACCESS3_EXECUTE) {
730 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
731 if (error) {
732 if (curthread->t_flag & T_WOULDBLOCK)
733 goto out;
734 } else if (!MANDLOCK(vp, va.va_mode) &&
735 (!is_system_labeled() || admin_low_client ||
736 dominant_label))
737 resp->resok.access |= ACCESS3_EXECUTE;
738 }
739
740 va.va_mask = AT_ALL;
741 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
742
743 resp->status = NFS3_OK;
744 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
745
746 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
747 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
748 ACCESS3res *, resp);
749
750 VN_RELE(vp);
751
752 return;
753
754 out:
755 if (curthread->t_flag & T_WOULDBLOCK) {
756 curthread->t_flag &= ~T_WOULDBLOCK;
757 resp->status = NFS3ERR_JUKEBOX;
758 } else
759 resp->status = puterrno3(error);
760 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
761 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
762 ACCESS3res *, resp);
763 if (vp != NULL)
764 VN_RELE(vp);
765 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
766 }
767
768 void *
769 rfs3_access_getfh(ACCESS3args *args)
770 {
771
772 return (&args->object);
773 }
774
775 /* ARGSUSED */
776 void
777 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
778 struct svc_req *req, cred_t *cr, bool_t ro)
779 {
780 int error;
781 vnode_t *vp;
782 struct vattr *vap;
783 struct vattr va;
784 struct iovec iov;
785 struct uio uio;
786 char *data;
787 struct sockaddr *ca;
788 char *name = NULL;
789 int is_referral = 0;
790
791 vap = NULL;
792
793 vp = nfs3_fhtovp(&args->symlink, exi);
794
795 DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
796 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
797 READLINK3args *, args);
798
799 if (vp == NULL) {
800 error = ESTALE;
801 goto out;
802 }
803
804 va.va_mask = AT_ALL;
805 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
806 if (error)
807 goto out;
808
809 vap = &va;
810
811 /* We lied about the object type for a referral */
812 if (vn_is_nfs_reparse(vp, cr))
813 is_referral = 1;
814
815 if (vp->v_type != VLNK && !is_referral) {
816 resp->status = NFS3ERR_INVAL;
817 goto out1;
818 }
819
820 if (MANDLOCK(vp, va.va_mode)) {
821 resp->status = NFS3ERR_ACCES;
822 goto out1;
823 }
824
825 if (is_system_labeled()) {
826 bslabel_t *clabel = req->rq_label;
827
828 ASSERT(clabel != NULL);
829 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
830 "got client label from request(1)", struct svc_req *, req);
831
832 if (!blequal(&l_admin_low->tsl_label, clabel)) {
833 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
834 exi)) {
835 resp->status = NFS3ERR_ACCES;
836 goto out1;
837 }
838 }
839 }
840
841 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
842
843 if (is_referral) {
844 char *s;
845 size_t strsz;
846 kstat_named_t *stat = exi->exi_ne->ne_globals->svstat[NFS_V3];
847
848 /* Get an artificial symlink based on a referral */
849 s = build_symlink(vp, cr, &strsz);
850 stat[NFS_REFERLINKS].value.ui64++;
851 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
852 vnode_t *, vp, char *, s);
853 if (s == NULL)
854 error = EINVAL;
855 else {
856 error = 0;
857 (void) strlcpy(data, s, MAXPATHLEN + 1);
858 kmem_free(s, strsz);
859 }
860
861 } else {
862
863 iov.iov_base = data;
864 iov.iov_len = MAXPATHLEN;
865 uio.uio_iov = &iov;
866 uio.uio_iovcnt = 1;
867 uio.uio_segflg = UIO_SYSSPACE;
868 uio.uio_extflg = UIO_COPY_CACHED;
869 uio.uio_loffset = 0;
870 uio.uio_resid = MAXPATHLEN;
871
872 error = VOP_READLINK(vp, &uio, cr, NULL);
873
874 if (!error)
875 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
876 }
877
878 va.va_mask = AT_ALL;
879 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
880
881 /* Lie about object type again just to be consistent */
882 if (is_referral && vap != NULL)
883 vap->va_type = VLNK;
884
885 #if 0 /* notyet */
886 /*
887 * Don't do this. It causes local disk writes when just
888 * reading the file and the overhead is deemed larger
889 * than the benefit.
890 */
891 /*
892 * Force modified metadata out to stable storage.
893 */
894 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
895 #endif
896
897 if (error) {
898 kmem_free(data, MAXPATHLEN + 1);
899 goto out;
900 }
901
902 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
903 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
904 MAXPATHLEN + 1);
905
906 if (name == NULL) {
907 /*
908 * Even though the conversion failed, we return
909 * something. We just don't translate it.
910 */
911 name = data;
912 }
913
914 resp->status = NFS3_OK;
915 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
916 resp->resok.data = name;
917
918 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
919 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
920 READLINK3res *, resp);
921 VN_RELE(vp);
922
923 if (name != data)
924 kmem_free(data, MAXPATHLEN + 1);
925
926 return;
927
928 out:
929 if (curthread->t_flag & T_WOULDBLOCK) {
930 curthread->t_flag &= ~T_WOULDBLOCK;
931 resp->status = NFS3ERR_JUKEBOX;
932 } else
933 resp->status = puterrno3(error);
934 out1:
935 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
936 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
937 READLINK3res *, resp);
938 if (vp != NULL)
939 VN_RELE(vp);
940 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
941 }
942
943 void *
944 rfs3_readlink_getfh(READLINK3args *args)
945 {
946
947 return (&args->symlink);
948 }
949
950 void
951 rfs3_readlink_free(READLINK3res *resp)
952 {
953
954 if (resp->status == NFS3_OK)
955 kmem_free(resp->resok.data, MAXPATHLEN + 1);
956 }
957
958 /*
959 * Server routine to handle read
960 * May handle RDMA data as well as mblks
961 */
962 /* ARGSUSED */
963 void
964 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
965 struct svc_req *req, cred_t *cr, bool_t ro)
966 {
967 int error;
968 vnode_t *vp;
969 struct vattr *vap;
970 struct vattr va;
971 struct iovec iov, *iovp = NULL;
972 int iovcnt;
973 struct uio uio;
974 u_offset_t offset;
975 mblk_t *mp = NULL;
976 int in_crit = 0;
977 int need_rwunlock = 0;
978 caller_context_t ct;
979 int rdma_used = 0;
980 int loaned_buffers;
981 struct uio *uiop;
982
983 vap = NULL;
984
985 vp = nfs3_fhtovp(&args->file, exi);
986
987 DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
988 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
989 READ3args *, args);
990
991
992 if (vp == NULL) {
993 error = ESTALE;
994 goto out;
995 }
996
997 if (args->wlist) {
998 if (args->count > clist_len(args->wlist)) {
999 error = EINVAL;
1000 goto out;
1001 }
1002 rdma_used = 1;
1003 }
1004
1005 /* use loaned buffers for TCP */
1006 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1007
1008 if (is_system_labeled()) {
1009 bslabel_t *clabel = req->rq_label;
1010
1011 ASSERT(clabel != NULL);
1012 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1013 "got client label from request(1)", struct svc_req *, req);
1014
1015 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1016 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1017 exi)) {
1018 resp->status = NFS3ERR_ACCES;
1019 goto out1;
1020 }
1021 }
1022 }
1023
1024 ct.cc_sysid = 0;
1025 ct.cc_pid = 0;
1026 ct.cc_caller_id = nfs3_srv_caller_id;
1027 ct.cc_flags = CC_DONTBLOCK;
1028
1029 /*
1030 * Enter the critical region before calling VOP_RWLOCK
1031 * to avoid a deadlock with write requests.
1032 */
1033 if (nbl_need_check(vp)) {
1034 nbl_start_crit(vp, RW_READER);
1035 in_crit = 1;
1036 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1037 NULL)) {
1038 error = EACCES;
1039 goto out;
1040 }
1041 }
1042
1043 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1044
1045 /* check if a monitor detected a delegation conflict */
1046 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1047 resp->status = NFS3ERR_JUKEBOX;
1048 goto out1;
1049 }
1050
1051 need_rwunlock = 1;
1052
1053 va.va_mask = AT_ALL;
1054 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1055
1056 /*
1057 * If we can't get the attributes, then we can't do the
1058 * right access checking. So, we'll fail the request.
1059 */
1060 if (error)
1061 goto out;
1062
1063 vap = &va;
1064
1065 if (vp->v_type != VREG) {
1066 resp->status = NFS3ERR_INVAL;
1067 goto out1;
1068 }
1069
1070 if (crgetuid(cr) != va.va_uid) {
1071 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1072 if (error) {
1073 if (curthread->t_flag & T_WOULDBLOCK)
1074 goto out;
1075 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1076 if (error)
1077 goto out;
1078 }
1079 }
1080
1081 if (MANDLOCK(vp, va.va_mode)) {
1082 resp->status = NFS3ERR_ACCES;
1083 goto out1;
1084 }
1085
1086 offset = args->offset;
1087 if (offset >= va.va_size) {
1088 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1089 if (in_crit)
1090 nbl_end_crit(vp);
1091 resp->status = NFS3_OK;
1092 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1093 resp->resok.count = 0;
1094 resp->resok.eof = TRUE;
1095 resp->resok.data.data_len = 0;
1096 resp->resok.data.data_val = NULL;
1097 resp->resok.data.mp = NULL;
1098 /* RDMA */
1099 resp->resok.wlist = args->wlist;
1100 resp->resok.wlist_len = resp->resok.count;
1101 if (resp->resok.wlist)
1102 clist_zero_len(resp->resok.wlist);
1103 goto done;
1104 }
1105
1106 if (args->count == 0) {
1107 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1108 if (in_crit)
1109 nbl_end_crit(vp);
1110 resp->status = NFS3_OK;
1111 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1112 resp->resok.count = 0;
1113 resp->resok.eof = FALSE;
1114 resp->resok.data.data_len = 0;
1115 resp->resok.data.data_val = NULL;
1116 resp->resok.data.mp = NULL;
1117 /* RDMA */
1118 resp->resok.wlist = args->wlist;
1119 resp->resok.wlist_len = resp->resok.count;
1120 if (resp->resok.wlist)
1121 clist_zero_len(resp->resok.wlist);
1122 goto done;
1123 }
1124
1125 /*
1126 * do not allocate memory more the max. allowed
1127 * transfer size
1128 */
1129 if (args->count > rfs3_tsize(req))
1130 args->count = rfs3_tsize(req);
1131
1132 if (loaned_buffers) {
1133 uiop = (uio_t *)rfs_setup_xuio(vp);
1134 ASSERT(uiop != NULL);
1135 uiop->uio_segflg = UIO_SYSSPACE;
1136 uiop->uio_loffset = args->offset;
1137 uiop->uio_resid = args->count;
1138
1139 /* Jump to do the read if successful */
1140 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1141 /*
1142 * Need to hold the vnode until after VOP_RETZCBUF()
1143 * is called.
1144 */
1145 VN_HOLD(vp);
1146 goto doio_read;
1147 }
1148
1149 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1150 uiop->uio_loffset, int, uiop->uio_resid);
1151
1152 uiop->uio_extflg = 0;
1153 /* failure to setup for zero copy */
1154 rfs_free_xuio((void *)uiop);
1155 loaned_buffers = 0;
1156 }
1157
1158 /*
1159 * If returning data via RDMA Write, then grab the chunk list.
1160 * If we aren't returning READ data w/RDMA_WRITE, then grab
1161 * a mblk.
1162 */
1163 if (rdma_used) {
1164 (void) rdma_get_wchunk(req, &iov, args->wlist);
1165 uio.uio_iov = &iov;
1166 uio.uio_iovcnt = 1;
1167 } else {
1168 /*
1169 * mp will contain the data to be sent out in the read reply.
1170 * For UDP, this will be freed after the reply has been sent
1171 * out by the driver. For TCP, it will be freed after the last
1172 * segment associated with the reply has been ACKed by the
1173 * client.
1174 */
1175 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1176 uio.uio_iov = iovp;
1177 uio.uio_iovcnt = iovcnt;
1178 }
1179
1180 uio.uio_segflg = UIO_SYSSPACE;
1181 uio.uio_extflg = UIO_COPY_CACHED;
1182 uio.uio_loffset = args->offset;
1183 uio.uio_resid = args->count;
1184 uiop = &uio;
1185
1186 doio_read:
1187 error = VOP_READ(vp, uiop, 0, cr, &ct);
1188
1189 if (error) {
1190 if (mp)
1191 freemsg(mp);
1192 /* check if a monitor detected a delegation conflict */
1193 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1194 resp->status = NFS3ERR_JUKEBOX;
1195 goto out1;
1196 }
1197 goto out;
1198 }
1199
1200 /* make mblk using zc buffers */
1201 if (loaned_buffers) {
1202 mp = uio_to_mblk(uiop);
1203 ASSERT(mp != NULL);
1204 }
1205
1206 va.va_mask = AT_ALL;
1207 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1208
1209 if (error)
1210 vap = NULL;
1211 else
1212 vap = &va;
1213
1214 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1215
1216 if (in_crit)
1217 nbl_end_crit(vp);
1218
1219 resp->status = NFS3_OK;
1220 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1221 resp->resok.count = args->count - uiop->uio_resid;
1222 if (!error && offset + resp->resok.count == va.va_size)
1223 resp->resok.eof = TRUE;
1224 else
1225 resp->resok.eof = FALSE;
1226 resp->resok.data.data_len = resp->resok.count;
1227
1228 if (mp)
1229 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1230
1231 resp->resok.data.mp = mp;
1232 resp->resok.size = (uint_t)args->count;
1233
1234 if (rdma_used) {
1235 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1236 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1237 resp->status = NFS3ERR_INVAL;
1238 }
1239 } else {
1240 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1241 (resp->resok).wlist = NULL;
1242 }
1243
1244 done:
1245 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1246 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1247 READ3res *, resp);
1248
1249 VN_RELE(vp);
1250
1251 if (iovp != NULL)
1252 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1253
1254 return;
1255
1256 out:
1257 if (curthread->t_flag & T_WOULDBLOCK) {
1258 curthread->t_flag &= ~T_WOULDBLOCK;
1259 resp->status = NFS3ERR_JUKEBOX;
1260 } else
1261 resp->status = puterrno3(error);
1262 out1:
1263 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1264 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1265 READ3res *, resp);
1266
1267 if (vp != NULL) {
1268 if (need_rwunlock)
1269 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1270 if (in_crit)
1271 nbl_end_crit(vp);
1272 VN_RELE(vp);
1273 }
1274 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1275
1276 if (iovp != NULL)
1277 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1278 }
1279
1280 void
1281 rfs3_read_free(READ3res *resp)
1282 {
1283 mblk_t *mp;
1284
1285 if (resp->status == NFS3_OK) {
1286 mp = resp->resok.data.mp;
1287 if (mp != NULL)
1288 freemsg(mp);
1289 }
1290 }
1291
1292 void *
1293 rfs3_read_getfh(READ3args *args)
1294 {
1295
1296 return (&args->file);
1297 }
1298
1299 #define MAX_IOVECS 12
1300
1301 #ifdef DEBUG
1302 static int rfs3_write_hits = 0;
1303 static int rfs3_write_misses = 0;
1304 #endif
1305
1306 void
1307 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1308 struct svc_req *req, cred_t *cr, bool_t ro)
1309 {
1310 nfs3_srv_t *ns;
1311 int error;
1312 vnode_t *vp;
1313 struct vattr *bvap = NULL;
1314 struct vattr bva;
1315 struct vattr *avap = NULL;
1316 struct vattr ava;
1317 u_offset_t rlimit;
1318 struct uio uio;
1319 struct iovec iov[MAX_IOVECS];
1320 mblk_t *m;
1321 struct iovec *iovp;
1322 int iovcnt;
1323 int ioflag;
1324 cred_t *savecred;
1325 int in_crit = 0;
1326 int rwlock_ret = -1;
1327 caller_context_t ct;
1328
1329 vp = nfs3_fhtovp(&args->file, exi);
1330
1331 DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1332 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1333 WRITE3args *, args);
1334
1335 if (vp == NULL) {
1336 error = ESTALE;
1337 goto err;
1338 }
1339
1340 ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
1341 ns = nfs3_get_srv();
1342
1343 if (is_system_labeled()) {
1344 bslabel_t *clabel = req->rq_label;
1345
1346 ASSERT(clabel != NULL);
1347 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1348 "got client label from request(1)", struct svc_req *, req);
1349
1350 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1351 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1352 exi)) {
1353 resp->status = NFS3ERR_ACCES;
1354 goto err1;
1355 }
1356 }
1357 }
1358
1359 ct.cc_sysid = 0;
1360 ct.cc_pid = 0;
1361 ct.cc_caller_id = nfs3_srv_caller_id;
1362 ct.cc_flags = CC_DONTBLOCK;
1363
1364 /*
1365 * We have to enter the critical region before calling VOP_RWLOCK
1366 * to avoid a deadlock with ufs.
1367 */
1368 if (nbl_need_check(vp)) {
1369 nbl_start_crit(vp, RW_READER);
1370 in_crit = 1;
1371 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1372 NULL)) {
1373 error = EACCES;
1374 goto err;
1375 }
1376 }
1377
1378 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1379
1380 /* check if a monitor detected a delegation conflict */
1381 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1382 resp->status = NFS3ERR_JUKEBOX;
1383 rwlock_ret = -1;
1384 goto err1;
1385 }
1386
1387
1388 bva.va_mask = AT_ALL;
1389 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1390
1391 /*
1392 * If we can't get the attributes, then we can't do the
1393 * right access checking. So, we'll fail the request.
1394 */
1395 if (error)
1396 goto err;
1397
1398 bvap = &bva;
1399 avap = bvap;
1400
1401 if (args->count != args->data.data_len) {
1402 resp->status = NFS3ERR_INVAL;
1403 goto err1;
1404 }
1405
1406 if (rdonly(ro, vp)) {
1407 resp->status = NFS3ERR_ROFS;
1408 goto err1;
1409 }
1410
1411 if (vp->v_type != VREG) {
1412 resp->status = NFS3ERR_INVAL;
1413 goto err1;
1414 }
1415
1416 if (crgetuid(cr) != bva.va_uid &&
1417 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1418 goto err;
1419
1420 if (MANDLOCK(vp, bva.va_mode)) {
1421 resp->status = NFS3ERR_ACCES;
1422 goto err1;
1423 }
1424
1425 if (args->count == 0) {
1426 resp->status = NFS3_OK;
1427 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1428 resp->resok.count = 0;
1429 resp->resok.committed = args->stable;
1430 resp->resok.verf = ns->write3verf;
1431 goto out;
1432 }
1433
1434 if (args->mblk != NULL) {
1435 iovcnt = 0;
1436 for (m = args->mblk; m != NULL; m = m->b_cont)
1437 iovcnt++;
1438 if (iovcnt <= MAX_IOVECS) {
1439 #ifdef DEBUG
1440 rfs3_write_hits++;
1441 #endif
1442 iovp = iov;
1443 } else {
1444 #ifdef DEBUG
1445 rfs3_write_misses++;
1446 #endif
1447 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1448 }
1449 mblk_to_iov(args->mblk, iovcnt, iovp);
1450
1451 } else if (args->rlist != NULL) {
1452 iovcnt = 1;
1453 iovp = iov;
1454 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1455 iovp->iov_len = args->count;
1456 } else {
1457 iovcnt = 1;
1458 iovp = iov;
1459 iovp->iov_base = args->data.data_val;
1460 iovp->iov_len = args->count;
1461 }
1462
1463 uio.uio_iov = iovp;
1464 uio.uio_iovcnt = iovcnt;
1465
1466 uio.uio_segflg = UIO_SYSSPACE;
1467 uio.uio_extflg = UIO_COPY_DEFAULT;
1468 uio.uio_loffset = args->offset;
1469 uio.uio_resid = args->count;
1470 uio.uio_llimit = curproc->p_fsz_ctl;
1471 rlimit = uio.uio_llimit - args->offset;
1472 if (rlimit < (u_offset_t)uio.uio_resid)
1473 uio.uio_resid = (int)rlimit;
1474
1475 if (args->stable == UNSTABLE)
1476 ioflag = 0;
1477 else if (args->stable == FILE_SYNC)
1478 ioflag = FSYNC;
1479 else if (args->stable == DATA_SYNC)
1480 ioflag = FDSYNC;
1481 else {
1482 if (iovp != iov)
1483 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1484 resp->status = NFS3ERR_INVAL;
1485 goto err1;
1486 }
1487
1488 /*
1489 * We're changing creds because VM may fault and we need
1490 * the cred of the current thread to be used if quota
1491 * checking is enabled.
1492 */
1493 savecred = curthread->t_cred;
1494 curthread->t_cred = cr;
1495 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1496 curthread->t_cred = savecred;
1497
1498 if (iovp != iov)
1499 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1500
1501 /* check if a monitor detected a delegation conflict */
1502 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1503 resp->status = NFS3ERR_JUKEBOX;
1504 goto err1;
1505 }
1506
1507 ava.va_mask = AT_ALL;
1508 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1509
1510 if (error)
1511 goto err;
1512
1513 /*
1514 * If we were unable to get the V_WRITELOCK_TRUE, then we
1515 * may not have accurate after attrs, so check if
1516 * we have both attributes, they have a non-zero va_seq, and
1517 * va_seq has changed by exactly one,
1518 * if not, turn off the before attr.
1519 */
1520 if (rwlock_ret != V_WRITELOCK_TRUE) {
1521 if (bvap == NULL || avap == NULL ||
1522 bvap->va_seq == 0 || avap->va_seq == 0 ||
1523 avap->va_seq != (bvap->va_seq + 1)) {
1524 bvap = NULL;
1525 }
1526 }
1527
1528 resp->status = NFS3_OK;
1529 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1530 resp->resok.count = args->count - uio.uio_resid;
1531 resp->resok.committed = args->stable;
1532 resp->resok.verf = ns->write3verf;
1533 goto out;
1534
1535 err:
1536 if (curthread->t_flag & T_WOULDBLOCK) {
1537 curthread->t_flag &= ~T_WOULDBLOCK;
1538 resp->status = NFS3ERR_JUKEBOX;
1539 } else
1540 resp->status = puterrno3(error);
1541 err1:
1542 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1543 out:
1544 DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1545 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1546 WRITE3res *, resp);
1547
1548 if (vp != NULL) {
1549 if (rwlock_ret != -1)
1550 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1551 if (in_crit)
1552 nbl_end_crit(vp);
1553 VN_RELE(vp);
1554 }
1555 }
1556
1557 void *
1558 rfs3_write_getfh(WRITE3args *args)
1559 {
1560
1561 return (&args->file);
1562 }
1563
1564 void
1565 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1566 struct svc_req *req, cred_t *cr, bool_t ro)
1567 {
1568 int error;
1569 int in_crit = 0;
1570 vnode_t *vp;
1571 vnode_t *tvp = NULL;
1572 vnode_t *dvp;
1573 struct vattr *vap;
1574 struct vattr va;
1575 struct vattr *dbvap;
1576 struct vattr dbva;
1577 struct vattr *davap;
1578 struct vattr dava;
1579 enum vcexcl excl;
1580 nfstime3 *mtime;
1581 len_t reqsize;
1582 bool_t trunc;
1583 struct sockaddr *ca;
1584 char *name = NULL;
1585
1586 dbvap = NULL;
1587 davap = NULL;
1588
1589 dvp = nfs3_fhtovp(&args->where.dir, exi);
1590
1591 DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1592 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1593 CREATE3args *, args);
1594
1595 if (dvp == NULL) {
1596 error = ESTALE;
1597 goto out;
1598 }
1599
1600 dbva.va_mask = AT_ALL;
1601 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1602 davap = dbvap;
1603
1604 if (args->where.name == nfs3nametoolong) {
1605 resp->status = NFS3ERR_NAMETOOLONG;
1606 goto out1;
1607 }
1608
1609 if (args->where.name == NULL || *(args->where.name) == '\0') {
1610 resp->status = NFS3ERR_ACCES;
1611 goto out1;
1612 }
1613
1614 if (rdonly(ro, dvp)) {
1615 resp->status = NFS3ERR_ROFS;
1616 goto out1;
1617 }
1618
1619 if (is_system_labeled()) {
1620 bslabel_t *clabel = req->rq_label;
1621
1622 ASSERT(clabel != NULL);
1623 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1624 "got client label from request(1)", struct svc_req *, req);
1625
1626 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1627 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1628 exi)) {
1629 resp->status = NFS3ERR_ACCES;
1630 goto out1;
1631 }
1632 }
1633 }
1634
1635 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1636 name = nfscmd_convname(ca, exi, args->where.name,
1637 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1638
1639 if (name == NULL) {
1640 /* This is really a Solaris EILSEQ */
1641 resp->status = NFS3ERR_INVAL;
1642 goto out1;
1643 }
1644
1645 if (args->how.mode == EXCLUSIVE) {
1646 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1647 va.va_type = VREG;
1648 va.va_mode = (mode_t)0;
1649 /*
1650 * Ensure no time overflows and that types match
1651 */
1652 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1653 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1654 va.va_mtime.tv_nsec = mtime->nseconds;
1655 excl = EXCL;
1656 } else {
1657 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1658 &va);
1659 if (error)
1660 goto out;
1661 va.va_mask |= AT_TYPE;
1662 va.va_type = VREG;
1663 if (args->how.mode == GUARDED)
1664 excl = EXCL;
1665 else {
1666 excl = NONEXCL;
1667
1668 /*
1669 * During creation of file in non-exclusive mode
1670 * if size of file is being set then make sure
1671 * that if the file already exists that no conflicting
1672 * non-blocking mandatory locks exists in the region
1673 * being modified. If there are conflicting locks fail
1674 * the operation with EACCES.
1675 */
1676 if (va.va_mask & AT_SIZE) {
1677 struct vattr tva;
1678
1679 /*
1680 * Does file already exist?
1681 */
1682 error = VOP_LOOKUP(dvp, name, &tvp,
1683 NULL, 0, NULL, cr, NULL, NULL, NULL);
1684
1685 /*
1686 * Check to see if the file has been delegated
1687 * to a v4 client. If so, then begin recall of
1688 * the delegation and return JUKEBOX to allow
1689 * the client to retrasmit its request.
1690 */
1691
1692 trunc = va.va_size == 0;
1693 if (!error &&
1694 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1695 resp->status = NFS3ERR_JUKEBOX;
1696 goto out1;
1697 }
1698
1699 /*
1700 * Check for NBMAND lock conflicts
1701 */
1702 if (!error && nbl_need_check(tvp)) {
1703 u_offset_t offset;
1704 ssize_t len;
1705
1706 nbl_start_crit(tvp, RW_READER);
1707 in_crit = 1;
1708
1709 tva.va_mask = AT_SIZE;
1710 error = VOP_GETATTR(tvp, &tva, 0, cr,
1711 NULL);
1712 /*
1713 * Can't check for conflicts, so return
1714 * error.
1715 */
1716 if (error)
1717 goto out;
1718
1719 offset = tva.va_size < va.va_size ?
1720 tva.va_size : va.va_size;
1721 len = tva.va_size < va.va_size ?
1722 va.va_size - tva.va_size :
1723 tva.va_size - va.va_size;
1724 if (nbl_conflict(tvp, NBL_WRITE,
1725 offset, len, 0, NULL)) {
1726 error = EACCES;
1727 goto out;
1728 }
1729 } else if (tvp) {
1730 VN_RELE(tvp);
1731 tvp = NULL;
1732 }
1733 }
1734 }
1735 if (va.va_mask & AT_SIZE)
1736 reqsize = va.va_size;
1737 }
1738
1739 /*
1740 * Must specify the mode.
1741 */
1742 if (!(va.va_mask & AT_MODE)) {
1743 resp->status = NFS3ERR_INVAL;
1744 goto out1;
1745 }
1746
1747 /*
1748 * If the filesystem is exported with nosuid, then mask off
1749 * the setuid and setgid bits.
1750 */
1751 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1752 va.va_mode &= ~(VSUID | VSGID);
1753
1754 tryagain:
1755 /*
1756 * The file open mode used is VWRITE. If the client needs
1757 * some other semantic, then it should do the access checking
1758 * itself. It would have been nice to have the file open mode
1759 * passed as part of the arguments.
1760 */
1761 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1762 &vp, cr, 0, NULL, NULL);
1763
1764 dava.va_mask = AT_ALL;
1765 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1766
1767 if (error) {
1768 /*
1769 * If we got something other than file already exists
1770 * then just return this error. Otherwise, we got
1771 * EEXIST. If we were doing a GUARDED create, then
1772 * just return this error. Otherwise, we need to
1773 * make sure that this wasn't a duplicate of an
1774 * exclusive create request.
1775 *
1776 * The assumption is made that a non-exclusive create
1777 * request will never return EEXIST.
1778 */
1779 if (error != EEXIST || args->how.mode == GUARDED)
1780 goto out;
1781 /*
1782 * Lookup the file so that we can get a vnode for it.
1783 */
1784 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1785 NULL, cr, NULL, NULL, NULL);
1786 if (error) {
1787 /*
1788 * We couldn't find the file that we thought that
1789 * we just created. So, we'll just try creating
1790 * it again.
1791 */
1792 if (error == ENOENT)
1793 goto tryagain;
1794 goto out;
1795 }
1796
1797 /*
1798 * If the file is delegated to a v4 client, go ahead
1799 * and initiate recall, this create is a hint that a
1800 * conflicting v3 open has occurred.
1801 */
1802
1803 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1804 VN_RELE(vp);
1805 resp->status = NFS3ERR_JUKEBOX;
1806 goto out1;
1807 }
1808
1809 va.va_mask = AT_ALL;
1810 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1811
1812 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1813 /* % with INT32_MAX to prevent overflows */
1814 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1815 vap->va_mtime.tv_sec !=
1816 (mtime->seconds % INT32_MAX) ||
1817 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1818 VN_RELE(vp);
1819 error = EEXIST;
1820 goto out;
1821 }
1822 } else {
1823
1824 if ((args->how.mode == UNCHECKED ||
1825 args->how.mode == GUARDED) &&
1826 args->how.createhow3_u.obj_attributes.size.set_it &&
1827 va.va_size == 0)
1828 trunc = TRUE;
1829 else
1830 trunc = FALSE;
1831
1832 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1833 VN_RELE(vp);
1834 resp->status = NFS3ERR_JUKEBOX;
1835 goto out1;
1836 }
1837
1838 va.va_mask = AT_ALL;
1839 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1840
1841 /*
1842 * We need to check to make sure that the file got
1843 * created to the indicated size. If not, we do a
1844 * setattr to try to change the size, but we don't
1845 * try too hard. This shouldn't a problem as most
1846 * clients will only specifiy a size of zero which
1847 * local file systems handle. However, even if
1848 * the client does specify a non-zero size, it can
1849 * still recover by checking the size of the file
1850 * after it has created it and then issue a setattr
1851 * request of its own to set the size of the file.
1852 */
1853 if (vap != NULL &&
1854 (args->how.mode == UNCHECKED ||
1855 args->how.mode == GUARDED) &&
1856 args->how.createhow3_u.obj_attributes.size.set_it &&
1857 vap->va_size != reqsize) {
1858 va.va_mask = AT_SIZE;
1859 va.va_size = reqsize;
1860 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1861 va.va_mask = AT_ALL;
1862 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1863 }
1864 }
1865
1866 if (name != args->where.name)
1867 kmem_free(name, MAXPATHLEN + 1);
1868
1869 error = makefh3(&resp->resok.obj.handle, vp, exi);
1870 if (error)
1871 resp->resok.obj.handle_follows = FALSE;
1872 else
1873 resp->resok.obj.handle_follows = TRUE;
1874
1875 /*
1876 * Force modified data and metadata out to stable storage.
1877 */
1878 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1879 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1880
1881 VN_RELE(vp);
1882 if (tvp != NULL) {
1883 if (in_crit)
1884 nbl_end_crit(tvp);
1885 VN_RELE(tvp);
1886 }
1887
1888 resp->status = NFS3_OK;
1889 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1890 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1891
1892 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1893 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1894 CREATE3res *, resp);
1895
1896 VN_RELE(dvp);
1897 return;
1898
1899 out:
1900 if (curthread->t_flag & T_WOULDBLOCK) {
1901 curthread->t_flag &= ~T_WOULDBLOCK;
1902 resp->status = NFS3ERR_JUKEBOX;
1903 } else
1904 resp->status = puterrno3(error);
1905 out1:
1906 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1907 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1908 CREATE3res *, resp);
1909
1910 if (name != NULL && name != args->where.name)
1911 kmem_free(name, MAXPATHLEN + 1);
1912
1913 if (tvp != NULL) {
1914 if (in_crit)
1915 nbl_end_crit(tvp);
1916 VN_RELE(tvp);
1917 }
1918 if (dvp != NULL)
1919 VN_RELE(dvp);
1920 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1921 }
1922
1923 void *
1924 rfs3_create_getfh(CREATE3args *args)
1925 {
1926
1927 return (&args->where.dir);
1928 }
1929
1930 void
1931 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1932 struct svc_req *req, cred_t *cr, bool_t ro)
1933 {
1934 int error;
1935 vnode_t *vp = NULL;
1936 vnode_t *dvp;
1937 struct vattr *vap;
1938 struct vattr va;
1939 struct vattr *dbvap;
1940 struct vattr dbva;
1941 struct vattr *davap;
1942 struct vattr dava;
1943 struct sockaddr *ca;
1944 char *name = NULL;
1945
1946 dbvap = NULL;
1947 davap = NULL;
1948
1949 dvp = nfs3_fhtovp(&args->where.dir, exi);
1950
1951 DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1952 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1953 MKDIR3args *, args);
1954
1955 if (dvp == NULL) {
1956 error = ESTALE;
1957 goto out;
1958 }
1959
1960 dbva.va_mask = AT_ALL;
1961 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1962 davap = dbvap;
1963
1964 if (args->where.name == nfs3nametoolong) {
1965 resp->status = NFS3ERR_NAMETOOLONG;
1966 goto out1;
1967 }
1968
1969 if (args->where.name == NULL || *(args->where.name) == '\0') {
1970 resp->status = NFS3ERR_ACCES;
1971 goto out1;
1972 }
1973
1974 if (rdonly(ro, dvp)) {
1975 resp->status = NFS3ERR_ROFS;
1976 goto out1;
1977 }
1978
1979 if (is_system_labeled()) {
1980 bslabel_t *clabel = req->rq_label;
1981
1982 ASSERT(clabel != NULL);
1983 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1984 "got client label from request(1)", struct svc_req *, req);
1985
1986 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1987 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1988 exi)) {
1989 resp->status = NFS3ERR_ACCES;
1990 goto out1;
1991 }
1992 }
1993 }
1994
1995 error = sattr3_to_vattr(&args->attributes, &va);
1996 if (error)
1997 goto out;
1998
1999 if (!(va.va_mask & AT_MODE)) {
2000 resp->status = NFS3ERR_INVAL;
2001 goto out1;
2002 }
2003
2004 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2005 name = nfscmd_convname(ca, exi, args->where.name,
2006 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2007
2008 if (name == NULL) {
2009 resp->status = NFS3ERR_INVAL;
2010 goto out1;
2011 }
2012
2013 va.va_mask |= AT_TYPE;
2014 va.va_type = VDIR;
2015
2016 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2017
2018 if (name != args->where.name)
2019 kmem_free(name, MAXPATHLEN + 1);
2020
2021 dava.va_mask = AT_ALL;
2022 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2023
2024 /*
2025 * Force modified data and metadata out to stable storage.
2026 */
2027 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2028
2029 if (error)
2030 goto out;
2031
2032 error = makefh3(&resp->resok.obj.handle, vp, exi);
2033 if (error)
2034 resp->resok.obj.handle_follows = FALSE;
2035 else
2036 resp->resok.obj.handle_follows = TRUE;
2037
2038 va.va_mask = AT_ALL;
2039 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2040
2041 /*
2042 * Force modified data and metadata out to stable storage.
2043 */
2044 (void) VOP_FSYNC(vp, 0, cr, NULL);
2045
2046 VN_RELE(vp);
2047
2048 resp->status = NFS3_OK;
2049 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2050 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2051
2052 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2053 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2054 MKDIR3res *, resp);
2055 VN_RELE(dvp);
2056
2057 return;
2058
2059 out:
2060 if (curthread->t_flag & T_WOULDBLOCK) {
2061 curthread->t_flag &= ~T_WOULDBLOCK;
2062 resp->status = NFS3ERR_JUKEBOX;
2063 } else
2064 resp->status = puterrno3(error);
2065 out1:
2066 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2067 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2068 MKDIR3res *, resp);
2069 if (dvp != NULL)
2070 VN_RELE(dvp);
2071 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2072 }
2073
2074 void *
2075 rfs3_mkdir_getfh(MKDIR3args *args)
2076 {
2077
2078 return (&args->where.dir);
2079 }
2080
2081 void
2082 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2083 struct svc_req *req, cred_t *cr, bool_t ro)
2084 {
2085 int error;
2086 vnode_t *vp;
2087 vnode_t *dvp;
2088 struct vattr *vap;
2089 struct vattr va;
2090 struct vattr *dbvap;
2091 struct vattr dbva;
2092 struct vattr *davap;
2093 struct vattr dava;
2094 struct sockaddr *ca;
2095 char *name = NULL;
2096 char *symdata = NULL;
2097
2098 dbvap = NULL;
2099 davap = NULL;
2100
2101 dvp = nfs3_fhtovp(&args->where.dir, exi);
2102
2103 DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2104 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2105 SYMLINK3args *, args);
2106
2107 if (dvp == NULL) {
2108 error = ESTALE;
2109 goto err;
2110 }
2111
2112 dbva.va_mask = AT_ALL;
2113 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2114 davap = dbvap;
2115
2116 if (args->where.name == nfs3nametoolong) {
2117 resp->status = NFS3ERR_NAMETOOLONG;
2118 goto err1;
2119 }
2120
2121 if (args->where.name == NULL || *(args->where.name) == '\0') {
2122 resp->status = NFS3ERR_ACCES;
2123 goto err1;
2124 }
2125
2126 if (rdonly(ro, dvp)) {
2127 resp->status = NFS3ERR_ROFS;
2128 goto err1;
2129 }
2130
2131 if (is_system_labeled()) {
2132 bslabel_t *clabel = req->rq_label;
2133
2134 ASSERT(clabel != NULL);
2135 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2136 "got client label from request(1)", struct svc_req *, req);
2137
2138 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2139 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2140 exi)) {
2141 resp->status = NFS3ERR_ACCES;
2142 goto err1;
2143 }
2144 }
2145 }
2146
2147 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2148 if (error)
2149 goto err;
2150
2151 if (!(va.va_mask & AT_MODE)) {
2152 resp->status = NFS3ERR_INVAL;
2153 goto err1;
2154 }
2155
2156 if (args->symlink.symlink_data == nfs3nametoolong) {
2157 resp->status = NFS3ERR_NAMETOOLONG;
2158 goto err1;
2159 }
2160
2161 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2162 name = nfscmd_convname(ca, exi, args->where.name,
2163 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2164
2165 if (name == NULL) {
2166 /* This is really a Solaris EILSEQ */
2167 resp->status = NFS3ERR_INVAL;
2168 goto err1;
2169 }
2170
2171 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2172 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2173 if (symdata == NULL) {
2174 /* This is really a Solaris EILSEQ */
2175 resp->status = NFS3ERR_INVAL;
2176 goto err1;
2177 }
2178
2179
2180 va.va_mask |= AT_TYPE;
2181 va.va_type = VLNK;
2182
2183 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2184
2185 dava.va_mask = AT_ALL;
2186 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2187
2188 if (error)
2189 goto err;
2190
2191 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2192 NULL, NULL, NULL);
2193
2194 /*
2195 * Force modified data and metadata out to stable storage.
2196 */
2197 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2198
2199
2200 resp->status = NFS3_OK;
2201 if (error) {
2202 resp->resok.obj.handle_follows = FALSE;
2203 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2204 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2205 goto out;
2206 }
2207
2208 error = makefh3(&resp->resok.obj.handle, vp, exi);
2209 if (error)
2210 resp->resok.obj.handle_follows = FALSE;
2211 else
2212 resp->resok.obj.handle_follows = TRUE;
2213
2214 va.va_mask = AT_ALL;
2215 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2216
2217 /*
2218 * Force modified data and metadata out to stable storage.
2219 */
2220 (void) VOP_FSYNC(vp, 0, cr, NULL);
2221
2222 VN_RELE(vp);
2223
2224 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2225 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2226 goto out;
2227
2228 err:
2229 if (curthread->t_flag & T_WOULDBLOCK) {
2230 curthread->t_flag &= ~T_WOULDBLOCK;
2231 resp->status = NFS3ERR_JUKEBOX;
2232 } else
2233 resp->status = puterrno3(error);
2234 err1:
2235 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2236 out:
2237 if (name != NULL && name != args->where.name)
2238 kmem_free(name, MAXPATHLEN + 1);
2239 if (symdata != NULL && symdata != args->symlink.symlink_data)
2240 kmem_free(symdata, MAXPATHLEN + 1);
2241
2242 DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2243 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2244 SYMLINK3res *, resp);
2245
2246 if (dvp != NULL)
2247 VN_RELE(dvp);
2248 }
2249
2250 void *
2251 rfs3_symlink_getfh(SYMLINK3args *args)
2252 {
2253
2254 return (&args->where.dir);
2255 }
2256
2257 void
2258 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2259 struct svc_req *req, cred_t *cr, bool_t ro)
2260 {
2261 int error;
2262 vnode_t *vp;
2263 vnode_t *realvp;
2264 vnode_t *dvp;
2265 struct vattr *vap;
2266 struct vattr va;
2267 struct vattr *dbvap;
2268 struct vattr dbva;
2269 struct vattr *davap;
2270 struct vattr dava;
2271 int mode;
2272 enum vcexcl excl;
2273 struct sockaddr *ca;
2274 char *name = NULL;
2275
2276 dbvap = NULL;
2277 davap = NULL;
2278
2279 dvp = nfs3_fhtovp(&args->where.dir, exi);
2280
2281 DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2282 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2283 MKNOD3args *, args);
2284
2285 if (dvp == NULL) {
2286 error = ESTALE;
2287 goto out;
2288 }
2289
2290 dbva.va_mask = AT_ALL;
2291 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2292 davap = dbvap;
2293
2294 if (args->where.name == nfs3nametoolong) {
2295 resp->status = NFS3ERR_NAMETOOLONG;
2296 goto out1;
2297 }
2298
2299 if (args->where.name == NULL || *(args->where.name) == '\0') {
2300 resp->status = NFS3ERR_ACCES;
2301 goto out1;
2302 }
2303
2304 if (rdonly(ro, dvp)) {
2305 resp->status = NFS3ERR_ROFS;
2306 goto out1;
2307 }
2308
2309 if (is_system_labeled()) {
2310 bslabel_t *clabel = req->rq_label;
2311
2312 ASSERT(clabel != NULL);
2313 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2314 "got client label from request(1)", struct svc_req *, req);
2315
2316 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2317 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2318 exi)) {
2319 resp->status = NFS3ERR_ACCES;
2320 goto out1;
2321 }
2322 }
2323 }
2324
2325 switch (args->what.type) {
2326 case NF3CHR:
2327 case NF3BLK:
2328 error = sattr3_to_vattr(
2329 &args->what.mknoddata3_u.device.dev_attributes, &va);
2330 if (error)
2331 goto out;
2332 if (secpolicy_sys_devices(cr) != 0) {
2333 resp->status = NFS3ERR_PERM;
2334 goto out1;
2335 }
2336 if (args->what.type == NF3CHR)
2337 va.va_type = VCHR;
2338 else
2339 va.va_type = VBLK;
2340 va.va_rdev = makedevice(
2341 args->what.mknoddata3_u.device.spec.specdata1,
2342 args->what.mknoddata3_u.device.spec.specdata2);
2343 va.va_mask |= AT_TYPE | AT_RDEV;
2344 break;
2345 case NF3SOCK:
2346 error = sattr3_to_vattr(
2347 &args->what.mknoddata3_u.pipe_attributes, &va);
2348 if (error)
2349 goto out;
2350 va.va_type = VSOCK;
2351 va.va_mask |= AT_TYPE;
2352 break;
2353 case NF3FIFO:
2354 error = sattr3_to_vattr(
2355 &args->what.mknoddata3_u.pipe_attributes, &va);
2356 if (error)
2357 goto out;
2358 va.va_type = VFIFO;
2359 va.va_mask |= AT_TYPE;
2360 break;
2361 default:
2362 resp->status = NFS3ERR_BADTYPE;
2363 goto out1;
2364 }
2365
2366 /*
2367 * Must specify the mode.
2368 */
2369 if (!(va.va_mask & AT_MODE)) {
2370 resp->status = NFS3ERR_INVAL;
2371 goto out1;
2372 }
2373
2374 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2375 name = nfscmd_convname(ca, exi, args->where.name,
2376 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2377
2378 if (name == NULL) {
2379 resp->status = NFS3ERR_INVAL;
2380 goto out1;
2381 }
2382
2383 excl = EXCL;
2384
2385 mode = 0;
2386
2387 error = VOP_CREATE(dvp, name, &va, excl, mode,
2388 &vp, cr, 0, NULL, NULL);
2389
2390 if (name != args->where.name)
2391 kmem_free(name, MAXPATHLEN + 1);
2392
2393 dava.va_mask = AT_ALL;
2394 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2395
2396 /*
2397 * Force modified data and metadata out to stable storage.
2398 */
2399 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2400
2401 if (error)
2402 goto out;
2403
2404 resp->status = NFS3_OK;
2405
2406 error = makefh3(&resp->resok.obj.handle, vp, exi);
2407 if (error)
2408 resp->resok.obj.handle_follows = FALSE;
2409 else
2410 resp->resok.obj.handle_follows = TRUE;
2411
2412 va.va_mask = AT_ALL;
2413 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2414
2415 /*
2416 * Force modified metadata out to stable storage.
2417 *
2418 * if a underlying vp exists, pass it to VOP_FSYNC
2419 */
2420 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2421 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2422 else
2423 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2424
2425 VN_RELE(vp);
2426
2427 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2428 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2429 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2430 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2431 MKNOD3res *, resp);
2432 VN_RELE(dvp);
2433 return;
2434
2435 out:
2436 if (curthread->t_flag & T_WOULDBLOCK) {
2437 curthread->t_flag &= ~T_WOULDBLOCK;
2438 resp->status = NFS3ERR_JUKEBOX;
2439 } else
2440 resp->status = puterrno3(error);
2441 out1:
2442 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2443 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2444 MKNOD3res *, resp);
2445 if (dvp != NULL)
2446 VN_RELE(dvp);
2447 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2448 }
2449
2450 void *
2451 rfs3_mknod_getfh(MKNOD3args *args)
2452 {
2453
2454 return (&args->where.dir);
2455 }
2456
2457 void
2458 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2459 struct svc_req *req, cred_t *cr, bool_t ro)
2460 {
2461 int error = 0;
2462 vnode_t *vp;
2463 struct vattr *bvap;
2464 struct vattr bva;
2465 struct vattr *avap;
2466 struct vattr ava;
2467 vnode_t *targvp = NULL;
2468 struct sockaddr *ca;
2469 char *name = NULL;
2470
2471 bvap = NULL;
2472 avap = NULL;
2473
2474 vp = nfs3_fhtovp(&args->object.dir, exi);
2475
2476 DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2477 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2478 REMOVE3args *, args);
2479
2480 if (vp == NULL) {
2481 error = ESTALE;
2482 goto err;
2483 }
2484
2485 bva.va_mask = AT_ALL;
2486 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2487 avap = bvap;
2488
2489 if (vp->v_type != VDIR) {
2490 resp->status = NFS3ERR_NOTDIR;
2491 goto err1;
2492 }
2493
2494 if (args->object.name == nfs3nametoolong) {
2495 resp->status = NFS3ERR_NAMETOOLONG;
2496 goto err1;
2497 }
2498
2499 if (args->object.name == NULL || *(args->object.name) == '\0') {
2500 resp->status = NFS3ERR_ACCES;
2501 goto err1;
2502 }
2503
2504 if (rdonly(ro, vp)) {
2505 resp->status = NFS3ERR_ROFS;
2506 goto err1;
2507 }
2508
2509 if (is_system_labeled()) {
2510 bslabel_t *clabel = req->rq_label;
2511
2512 ASSERT(clabel != NULL);
2513 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2514 "got client label from request(1)", struct svc_req *, req);
2515
2516 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2517 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2518 exi)) {
2519 resp->status = NFS3ERR_ACCES;
2520 goto err1;
2521 }
2522 }
2523 }
2524
2525 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2526 name = nfscmd_convname(ca, exi, args->object.name,
2527 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2528
2529 if (name == NULL) {
2530 resp->status = NFS3ERR_INVAL;
2531 goto err1;
2532 }
2533
2534 /*
2535 * Check for a conflict with a non-blocking mandatory share
2536 * reservation and V4 delegations
2537 */
2538 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2539 NULL, cr, NULL, NULL, NULL);
2540 if (error != 0)
2541 goto err;
2542
2543 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2544 resp->status = NFS3ERR_JUKEBOX;
2545 goto err1;
2546 }
2547
2548 if (!nbl_need_check(targvp)) {
2549 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2550 } else {
2551 nbl_start_crit(targvp, RW_READER);
2552 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2553 error = EACCES;
2554 } else {
2555 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2556 }
2557 nbl_end_crit(targvp);
2558 }
2559 VN_RELE(targvp);
2560 targvp = NULL;
2561
2562 ava.va_mask = AT_ALL;
2563 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2564
2565 /*
2566 * Force modified data and metadata out to stable storage.
2567 */
2568 (void) VOP_FSYNC(vp, 0, cr, NULL);
2569
2570 if (error)
2571 goto err;
2572
2573 resp->status = NFS3_OK;
2574 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2575 goto out;
2576
2577 err:
2578 if (curthread->t_flag & T_WOULDBLOCK) {
2579 curthread->t_flag &= ~T_WOULDBLOCK;
2580 resp->status = NFS3ERR_JUKEBOX;
2581 } else
2582 resp->status = puterrno3(error);
2583 err1:
2584 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2585 out:
2586 DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2587 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2588 REMOVE3res *, resp);
2589
2590 if (name != NULL && name != args->object.name)
2591 kmem_free(name, MAXPATHLEN + 1);
2592
2593 if (vp != NULL)
2594 VN_RELE(vp);
2595 }
2596
2597 void *
2598 rfs3_remove_getfh(REMOVE3args *args)
2599 {
2600
2601 return (&args->object.dir);
2602 }
2603
2604 void
2605 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2606 struct svc_req *req, cred_t *cr, bool_t ro)
2607 {
2608 int error;
2609 vnode_t *vp;
2610 struct vattr *bvap;
2611 struct vattr bva;
2612 struct vattr *avap;
2613 struct vattr ava;
2614 struct sockaddr *ca;
2615 char *name = NULL;
2616
2617 bvap = NULL;
2618 avap = NULL;
2619
2620 vp = nfs3_fhtovp(&args->object.dir, exi);
2621
2622 DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2623 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2624 RMDIR3args *, args);
2625
2626 if (vp == NULL) {
2627 error = ESTALE;
2628 goto err;
2629 }
2630
2631 bva.va_mask = AT_ALL;
2632 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2633 avap = bvap;
2634
2635 if (vp->v_type != VDIR) {
2636 resp->status = NFS3ERR_NOTDIR;
2637 goto err1;
2638 }
2639
2640 if (args->object.name == nfs3nametoolong) {
2641 resp->status = NFS3ERR_NAMETOOLONG;
2642 goto err1;
2643 }
2644
2645 if (args->object.name == NULL || *(args->object.name) == '\0') {
2646 resp->status = NFS3ERR_ACCES;
2647 goto err1;
2648 }
2649
2650 if (rdonly(ro, vp)) {
2651 resp->status = NFS3ERR_ROFS;
2652 goto err1;
2653 }
2654
2655 if (is_system_labeled()) {
2656 bslabel_t *clabel = req->rq_label;
2657
2658 ASSERT(clabel != NULL);
2659 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2660 "got client label from request(1)", struct svc_req *, req);
2661
2662 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2663 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2664 exi)) {
2665 resp->status = NFS3ERR_ACCES;
2666 goto err1;
2667 }
2668 }
2669 }
2670
2671 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2672 name = nfscmd_convname(ca, exi, args->object.name,
2673 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2674
2675 if (name == NULL) {
2676 resp->status = NFS3ERR_INVAL;
2677 goto err1;
2678 }
2679
2680 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
2681 error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2682
2683 if (name != args->object.name)
2684 kmem_free(name, MAXPATHLEN + 1);
2685
2686 ava.va_mask = AT_ALL;
2687 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2688
2689 /*
2690 * Force modified data and metadata out to stable storage.
2691 */
2692 (void) VOP_FSYNC(vp, 0, cr, NULL);
2693
2694 if (error) {
2695 /*
2696 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2697 * if the directory is not empty. A System V NFS server
2698 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2699 * over the wire.
2700 */
2701 if (error == EEXIST)
2702 error = ENOTEMPTY;
2703 goto err;
2704 }
2705
2706 resp->status = NFS3_OK;
2707 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2708 goto out;
2709
2710 err:
2711 if (curthread->t_flag & T_WOULDBLOCK) {
2712 curthread->t_flag &= ~T_WOULDBLOCK;
2713 resp->status = NFS3ERR_JUKEBOX;
2714 } else
2715 resp->status = puterrno3(error);
2716 err1:
2717 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2718 out:
2719 DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2720 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2721 RMDIR3res *, resp);
2722 if (vp != NULL)
2723 VN_RELE(vp);
2724
2725 }
2726
2727 void *
2728 rfs3_rmdir_getfh(RMDIR3args *args)
2729 {
2730
2731 return (&args->object.dir);
2732 }
2733
2734 void
2735 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2736 struct svc_req *req, cred_t *cr, bool_t ro)
2737 {
2738 int error = 0;
2739 vnode_t *fvp;
2740 vnode_t *tvp;
2741 vnode_t *targvp;
2742 struct vattr *fbvap;
2743 struct vattr fbva;
2744 struct vattr *favap;
2745 struct vattr fava;
2746 struct vattr *tbvap;
2747 struct vattr tbva;
2748 struct vattr *tavap;
2749 struct vattr tava;
2750 nfs_fh3 *fh3;
2751 struct exportinfo *to_exi;
2752 vnode_t *srcvp = NULL;
2753 bslabel_t *clabel;
2754 struct sockaddr *ca;
2755 char *name = NULL;
2756 char *toname = NULL;
2757
2758 fbvap = NULL;
2759 favap = NULL;
2760 tbvap = NULL;
2761 tavap = NULL;
2762 tvp = NULL;
2763
2764 fvp = nfs3_fhtovp(&args->from.dir, exi);
2765
2766 DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2767 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2768 RENAME3args *, args);
2769
2770 if (fvp == NULL) {
2771 error = ESTALE;
2772 goto err;
2773 }
2774
2775 if (is_system_labeled()) {
2776 clabel = req->rq_label;
2777 ASSERT(clabel != NULL);
2778 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2779 "got client label from request(1)", struct svc_req *, req);
2780
2781 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2782 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2783 exi)) {
2784 resp->status = NFS3ERR_ACCES;
2785 goto err1;
2786 }
2787 }
2788 }
2789
2790 fbva.va_mask = AT_ALL;
2791 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2792 favap = fbvap;
2793
2794 fh3 = &args->to.dir;
2795 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2796 if (to_exi == NULL) {
2797 resp->status = NFS3ERR_ACCES;
2798 goto err1;
2799 }
2800 exi_rele(to_exi);
2801
2802 if (to_exi != exi) {
2803 resp->status = NFS3ERR_XDEV;
2804 goto err1;
2805 }
2806
2807 tvp = nfs3_fhtovp(&args->to.dir, exi);
2808 if (tvp == NULL) {
2809 error = ESTALE;
2810 goto err;
2811 }
2812
2813 tbva.va_mask = AT_ALL;
2814 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2815 tavap = tbvap;
2816
2817 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2818 resp->status = NFS3ERR_NOTDIR;
2819 goto err1;
2820 }
2821
2822 if (args->from.name == nfs3nametoolong ||
2823 args->to.name == nfs3nametoolong) {
2824 resp->status = NFS3ERR_NAMETOOLONG;
2825 goto err1;
2826 }
2827 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2828 args->to.name == NULL || *(args->to.name) == '\0') {
2829 resp->status = NFS3ERR_ACCES;
2830 goto err1;
2831 }
2832
2833 if (rdonly(ro, tvp)) {
2834 resp->status = NFS3ERR_ROFS;
2835 goto err1;
2836 }
2837
2838 if (is_system_labeled()) {
2839 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2840 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2841 exi)) {
2842 resp->status = NFS3ERR_ACCES;
2843 goto err1;
2844 }
2845 }
2846 }
2847
2848 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2849 name = nfscmd_convname(ca, exi, args->from.name,
2850 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2851
2852 if (name == NULL) {
2853 resp->status = NFS3ERR_INVAL;
2854 goto err1;
2855 }
2856
2857 toname = nfscmd_convname(ca, exi, args->to.name,
2858 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2859
2860 if (toname == NULL) {
2861 resp->status = NFS3ERR_INVAL;
2862 goto err1;
2863 }
2864
2865 /*
2866 * Check for a conflict with a non-blocking mandatory share
2867 * reservation or V4 delegations.
2868 */
2869 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2870 NULL, cr, NULL, NULL, NULL);
2871 if (error != 0)
2872 goto err;
2873
2874 /*
2875 * If we rename a delegated file we should recall the
2876 * delegation, since future opens should fail or would
2877 * refer to a new file.
2878 */
2879 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2880 resp->status = NFS3ERR_JUKEBOX;
2881 goto err1;
2882 }
2883
2884 /*
2885 * Check for renaming over a delegated file. Check nfs4_deleg_policy
2886 * first to avoid VOP_LOOKUP if possible.
2887 */
2888 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2889 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2890 NULL, NULL, NULL) == 0) {
2891
2892 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2893 VN_RELE(targvp);
2894 resp->status = NFS3ERR_JUKEBOX;
2895 goto err1;
2896 }
2897 VN_RELE(targvp);
2898 }
2899
2900 if (!nbl_need_check(srcvp)) {
2901 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2902 } else {
2903 nbl_start_crit(srcvp, RW_READER);
2904 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2905 error = EACCES;
2906 else
2907 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2908 nbl_end_crit(srcvp);
2909 }
2910 if (error == 0)
2911 vn_renamepath(tvp, srcvp, args->to.name,
2912 strlen(args->to.name));
2913 VN_RELE(srcvp);
2914 srcvp = NULL;
2915
2916 fava.va_mask = AT_ALL;
2917 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2918 tava.va_mask = AT_ALL;
2919 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2920
2921 /*
2922 * Force modified data and metadata out to stable storage.
2923 */
2924 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2925 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2926
2927 if (error)
2928 goto err;
2929
2930 resp->status = NFS3_OK;
2931 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2932 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2933 goto out;
2934
2935 err:
2936 if (curthread->t_flag & T_WOULDBLOCK) {
2937 curthread->t_flag &= ~T_WOULDBLOCK;
2938 resp->status = NFS3ERR_JUKEBOX;
2939 } else {
2940 resp->status = puterrno3(error);
2941 }
2942 err1:
2943 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2944 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2945
2946 out:
2947 if (name != NULL && name != args->from.name)
2948 kmem_free(name, MAXPATHLEN + 1);
2949 if (toname != NULL && toname != args->to.name)
2950 kmem_free(toname, MAXPATHLEN + 1);
2951
2952 DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2953 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2954 RENAME3res *, resp);
2955 if (fvp != NULL)
2956 VN_RELE(fvp);
2957 if (tvp != NULL)
2958 VN_RELE(tvp);
2959 }
2960
2961 void *
2962 rfs3_rename_getfh(RENAME3args *args)
2963 {
2964
2965 return (&args->from.dir);
2966 }
2967
2968 void
2969 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2970 struct svc_req *req, cred_t *cr, bool_t ro)
2971 {
2972 int error;
2973 vnode_t *vp;
2974 vnode_t *dvp;
2975 struct vattr *vap;
2976 struct vattr va;
2977 struct vattr *bvap;
2978 struct vattr bva;
2979 struct vattr *avap;
2980 struct vattr ava;
2981 nfs_fh3 *fh3;
2982 struct exportinfo *to_exi;
2983 bslabel_t *clabel;
2984 struct sockaddr *ca;
2985 char *name = NULL;
2986
2987 vap = NULL;
2988 bvap = NULL;
2989 avap = NULL;
2990 dvp = NULL;
2991
2992 vp = nfs3_fhtovp(&args->file, exi);
2993
2994 DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2995 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2996 LINK3args *, args);
2997
2998 if (vp == NULL) {
2999 error = ESTALE;
3000 goto out;
3001 }
3002
3003 va.va_mask = AT_ALL;
3004 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3005
3006 fh3 = &args->link.dir;
3007 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3008 if (to_exi == NULL) {
3009 resp->status = NFS3ERR_ACCES;
3010 goto out1;
3011 }
3012 exi_rele(to_exi);
3013
3014 if (to_exi != exi) {
3015 resp->status = NFS3ERR_XDEV;
3016 goto out1;
3017 }
3018
3019 if (is_system_labeled()) {
3020 clabel = req->rq_label;
3021
3022 ASSERT(clabel != NULL);
3023 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3024 "got client label from request(1)", struct svc_req *, req);
3025
3026 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3027 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3028 exi)) {
3029 resp->status = NFS3ERR_ACCES;
3030 goto out1;
3031 }
3032 }
3033 }
3034
3035 dvp = nfs3_fhtovp(&args->link.dir, exi);
3036 if (dvp == NULL) {
3037 error = ESTALE;
3038 goto out;
3039 }
3040
3041 bva.va_mask = AT_ALL;
3042 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3043
3044 if (dvp->v_type != VDIR) {
3045 resp->status = NFS3ERR_NOTDIR;
3046 goto out1;
3047 }
3048
3049 if (args->link.name == nfs3nametoolong) {
3050 resp->status = NFS3ERR_NAMETOOLONG;
3051 goto out1;
3052 }
3053
3054 if (args->link.name == NULL || *(args->link.name) == '\0') {
3055 resp->status = NFS3ERR_ACCES;
3056 goto out1;
3057 }
3058
3059 if (rdonly(ro, dvp)) {
3060 resp->status = NFS3ERR_ROFS;
3061 goto out1;
3062 }
3063
3064 if (is_system_labeled()) {
3065 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3066 "got client label from request(1)", struct svc_req *, req);
3067
3068 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3069 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3070 exi)) {
3071 resp->status = NFS3ERR_ACCES;
3072 goto out1;
3073 }
3074 }
3075 }
3076
3077 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3078 name = nfscmd_convname(ca, exi, args->link.name,
3079 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3080
3081 if (name == NULL) {
3082 resp->status = NFS3ERR_SERVERFAULT;
3083 goto out1;
3084 }
3085
3086 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3087
3088 va.va_mask = AT_ALL;
3089 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3090 ava.va_mask = AT_ALL;
3091 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3092
3093 /*
3094 * Force modified data and metadata out to stable storage.
3095 */
3096 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3097 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3098
3099 if (error)
3100 goto out;
3101
3102 VN_RELE(dvp);
3103
3104 resp->status = NFS3_OK;
3105 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3106 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3107
3108 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3109 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3110 LINK3res *, resp);
3111
3112 VN_RELE(vp);
3113
3114 return;
3115
3116 out:
3117 if (curthread->t_flag & T_WOULDBLOCK) {
3118 curthread->t_flag &= ~T_WOULDBLOCK;
3119 resp->status = NFS3ERR_JUKEBOX;
3120 } else
3121 resp->status = puterrno3(error);
3122 out1:
3123 if (name != NULL && name != args->link.name)
3124 kmem_free(name, MAXPATHLEN + 1);
3125
3126 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3127 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3128 LINK3res *, resp);
3129
3130 if (vp != NULL)
3131 VN_RELE(vp);
3132 if (dvp != NULL)
3133 VN_RELE(dvp);
3134 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3135 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3136 }
3137
3138 void *
3139 rfs3_link_getfh(LINK3args *args)
3140 {
3141
3142 return (&args->file);
3143 }
3144
3145 /*
3146 * This macro defines the size of a response which contains attribute
3147 * information and one directory entry (whose length is specified by
3148 * the macro parameter). If the incoming request is larger than this,
3149 * then we are guaranteed to be able to return at one directory entry
3150 * if one exists. Therefore, we do not need to check for
3151 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
3152 * is not, then we need to check to make sure that this error does not
3153 * need to be returned.
3154 *
3155 * NFS3_READDIR_MIN_COUNT is comprised of following :
3156 *
3157 * status - 1 * BYTES_PER_XDR_UNIT
3158 * attr. flag - 1 * BYTES_PER_XDR_UNIT
3159 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3160 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3161 * boolean - 1 * BYTES_PER_XDR_UNIT
3162 * file id - 2 * BYTES_PER_XDR_UNIT
3163 * directory name length - 1 * BYTES_PER_XDR_UNIT
3164 * cookie - 2 * BYTES_PER_XDR_UNIT
3165 * end of list - 1 * BYTES_PER_XDR_UNIT
3166 * end of file - 1 * BYTES_PER_XDR_UNIT
3167 * Name length of directory to the nearest byte
3168 */
3169
3170 #define NFS3_READDIR_MIN_COUNT(length) \
3171 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3172 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3173
3174 /* ARGSUSED */
3175 void
3176 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3177 struct svc_req *req, cred_t *cr, bool_t ro)
3178 {
3179 int error;
3180 vnode_t *vp;
3181 struct vattr *vap;
3182 struct vattr va;
3183 struct iovec iov;
3184 struct uio uio;
3185 char *data;
3186 int iseof;
3187 int bufsize;
3188 int namlen;
3189 uint_t count;
3190 struct sockaddr *ca;
3191
3192 vap = NULL;
3193
3194 vp = nfs3_fhtovp(&args->dir, exi);
3195
3196 DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3197 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3198 READDIR3args *, args);
3199
3200 if (vp == NULL) {
3201 error = ESTALE;
3202 goto out;
3203 }
3204
3205 if (is_system_labeled()) {
3206 bslabel_t *clabel = req->rq_label;
3207
3208 ASSERT(clabel != NULL);
3209 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3210 "got client label from request(1)", struct svc_req *, req);
3211
3212 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3213 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3214 exi)) {
3215 resp->status = NFS3ERR_ACCES;
3216 goto out1;
3217 }
3218 }
3219 }
3220
3221 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3222
3223 va.va_mask = AT_ALL;
3224 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3225
3226 if (vp->v_type != VDIR) {
3227 resp->status = NFS3ERR_NOTDIR;
3228 goto out1;
3229 }
3230
3231 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3232 if (error)
3233 goto out;
3234
3235 /*
3236 * Now don't allow arbitrary count to alloc;
3237 * allow the maximum not to exceed rfs3_tsize()
3238 */
3239 if (args->count > rfs3_tsize(req))
3240 args->count = rfs3_tsize(req);
3241
3242 /*
3243 * Make sure that there is room to read at least one entry
3244 * if any are available.
3245 */
3246 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3247 count = DIRENT64_RECLEN(MAXNAMELEN);
3248 else
3249 count = args->count;
3250
3251 data = kmem_alloc(count, KM_SLEEP);
3252
3253 iov.iov_base = data;
3254 iov.iov_len = count;
3255 uio.uio_iov = &iov;
3256 uio.uio_iovcnt = 1;
3257 uio.uio_segflg = UIO_SYSSPACE;
3258 uio.uio_extflg = UIO_COPY_CACHED;
3259 uio.uio_loffset = (offset_t)args->cookie;
3260 uio.uio_resid = count;
3261
3262 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3263
3264 va.va_mask = AT_ALL;
3265 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3266
3267 if (error) {
3268 kmem_free(data, count);
3269 goto out;
3270 }
3271
3272 /*
3273 * If the count was not large enough to be able to guarantee
3274 * to be able to return at least one entry, then need to
3275 * check to see if NFS3ERR_TOOSMALL should be returned.
3276 */
3277 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3278 /*
3279 * bufsize is used to keep track of the size of the response.
3280 * It is primed with:
3281 * 1 for the status +
3282 * 1 for the dir_attributes.attributes boolean +
3283 * 2 for the cookie verifier
3284 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3285 * to bytes. If there are directory attributes to be
3286 * returned, then:
3287 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3288 * time BYTES_PER_XDR_UNIT is added to account for them.
3289 */
3290 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3291 if (vap != NULL)
3292 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3293 /*
3294 * An entry is composed of:
3295 * 1 for the true/false list indicator +
3296 * 2 for the fileid +
3297 * 1 for the length of the name +
3298 * 2 for the cookie +
3299 * all times BYTES_PER_XDR_UNIT to convert from
3300 * XDR units to bytes, plus the length of the name
3301 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3302 */
3303 if (count != uio.uio_resid) {
3304 namlen = strlen(((struct dirent64 *)data)->d_name);
3305 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3306 roundup(namlen, BYTES_PER_XDR_UNIT);
3307 }
3308 /*
3309 * We need to check to see if the number of bytes left
3310 * to go into the buffer will actually fit into the
3311 * buffer. This is calculated as the size of this
3312 * entry plus:
3313 * 1 for the true/false list indicator +
3314 * 1 for the eof indicator
3315 * times BYTES_PER_XDR_UNIT to convert from from
3316 * XDR units to bytes.
3317 */
3318 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3319 if (bufsize > args->count) {
3320 kmem_free(data, count);
3321 resp->status = NFS3ERR_TOOSMALL;
3322 goto out1;
3323 }
3324 }
3325
3326 /*
3327 * Have a valid readir buffer for the native character
3328 * set. Need to check if a conversion is necessary and
3329 * potentially rewrite the whole buffer. Note that if the
3330 * conversion expands names enough, the structure may not
3331 * fit. In this case, we need to drop entries until if fits
3332 * and patch the counts in order that the next readdir will
3333 * get the correct entries.
3334 */
3335 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3336 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3337
3338
3339 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3340
3341 #if 0 /* notyet */
3342 /*
3343 * Don't do this. It causes local disk writes when just
3344 * reading the file and the overhead is deemed larger
3345 * than the benefit.
3346 */
3347 /*
3348 * Force modified metadata out to stable storage.
3349 */
3350 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3351 #endif
3352
3353 resp->status = NFS3_OK;
3354 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3355 resp->resok.cookieverf = 0;
3356 resp->resok.reply.entries = (entry3 *)data;
3357 resp->resok.reply.eof = iseof;
3358 resp->resok.size = count - uio.uio_resid;
3359 resp->resok.count = args->count;
3360 resp->resok.freecount = count;
3361
3362 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3363 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3364 READDIR3res *, resp);
3365
3366 VN_RELE(vp);
3367
3368 return;
3369
3370 out:
3371 if (curthread->t_flag & T_WOULDBLOCK) {
3372 curthread->t_flag &= ~T_WOULDBLOCK;
3373 resp->status = NFS3ERR_JUKEBOX;
3374 } else
3375 resp->status = puterrno3(error);
3376 out1:
3377 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3378
3379 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3380 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3381 READDIR3res *, resp);
3382
3383 if (vp != NULL) {
3384 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3385 VN_RELE(vp);
3386 }
3387 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3388 }
3389
3390 void *
3391 rfs3_readdir_getfh(READDIR3args *args)
3392 {
3393
3394 return (&args->dir);
3395 }
3396
3397 void
3398 rfs3_readdir_free(READDIR3res *resp)
3399 {
3400
3401 if (resp->status == NFS3_OK)
3402 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3403 }
3404
3405 #ifdef nextdp
3406 #undef nextdp
3407 #endif
3408 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3409
3410 /*
3411 * This macro computes the size of a response which contains
3412 * one directory entry including the attributes as well as file handle.
3413 * If the incoming request is larger than this, then we are guaranteed to be
3414 * able to return at least one more directory entry if one exists.
3415 *
3416 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3417 *
3418 * boolean - 1 * BYTES_PER_XDR_UNIT
3419 * file id - 2 * BYTES_PER_XDR_UNIT
3420 * directory name length - 1 * BYTES_PER_XDR_UNIT
3421 * cookie - 2 * BYTES_PER_XDR_UNIT
3422 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3423 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3424 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3425 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3426 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3427 * name length of the entry to the nearest bytes
3428 */
3429 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3430 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3431 BYTES_PER_XDR_UNIT + \
3432 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3433
3434 static int rfs3_readdir_unit = MAXBSIZE;
3435
3436 /* ARGSUSED */
3437 void
3438 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3439 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3440 {
3441 int error;
3442 vnode_t *vp;
3443 struct vattr *vap;
3444 struct vattr va;
3445 struct iovec iov;
3446 struct uio uio;
3447 char *data;
3448 int iseof;
3449 struct dirent64 *dp;
3450 vnode_t *nvp;
3451 struct vattr *nvap;
3452 struct vattr nva;
3453 entryplus3_info *infop = NULL;
3454 int size = 0;
3455 int nents = 0;
3456 int bufsize = 0;
3457 int entrysize = 0;
3458 int tofit = 0;
3459 int rd_unit = rfs3_readdir_unit;
3460 int prev_len;
3461 int space_left;
3462 int i;
3463 uint_t *namlen = NULL;
3464 char *ndata = NULL;
3465 struct sockaddr *ca;
3466 size_t ret;
3467
3468 vap = NULL;
3469
3470 vp = nfs3_fhtovp(&args->dir, exi);
3471
3472 DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3473 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3474 READDIRPLUS3args *, args);
3475
3476 if (vp == NULL) {
3477 error = ESTALE;
3478 goto out;
3479 }
3480
3481 if (is_system_labeled()) {
3482 bslabel_t *clabel = req->rq_label;
3483
3484 ASSERT(clabel != NULL);
3485 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3486 char *, "got client label from request(1)",
3487 struct svc_req *, req);
3488
3489 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3490 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3491 exi)) {
3492 resp->status = NFS3ERR_ACCES;
3493 goto out1;
3494 }
3495 }
3496 }
3497
3498 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3499
3500 va.va_mask = AT_ALL;
3501 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3502
3503 if (vp->v_type != VDIR) {
3504 error = ENOTDIR;
3505 goto out;
3506 }
3507
3508 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3509 if (error)
3510 goto out;
3511
3512 /*
3513 * Don't allow arbitrary counts for allocation
3514 */
3515 if (args->maxcount > rfs3_tsize(req))
3516 args->maxcount = rfs3_tsize(req);
3517
3518 /*
3519 * Make sure that there is room to read at least one entry
3520 * if any are available
3521 */
3522 args->dircount = MIN(args->dircount, args->maxcount);
3523
3524 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3525 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3526
3527 /*
3528 * This allocation relies on a minimum directory entry
3529 * being roughly 24 bytes. Therefore, the namlen array
3530 * will have enough space based on the maximum number of
3531 * entries to read.
3532 */
3533 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3534
3535 space_left = args->dircount;
3536 data = kmem_alloc(args->dircount, KM_SLEEP);
3537 dp = (struct dirent64 *)data;
3538 uio.uio_iov = &iov;
3539 uio.uio_iovcnt = 1;
3540 uio.uio_segflg = UIO_SYSSPACE;
3541 uio.uio_extflg = UIO_COPY_CACHED;
3542 uio.uio_loffset = (offset_t)args->cookie;
3543
3544 /*
3545 * bufsize is used to keep track of the size of the response as we
3546 * get post op attributes and filehandles for each entry. This is
3547 * an optimization as the server may have read more entries than will
3548 * fit in the buffer specified by maxcount. We stop calculating
3549 * post op attributes and filehandles once we have exceeded maxcount.
3550 * This will minimize the effect of truncation.
3551 *
3552 * It is primed with:
3553 * 1 for the status +
3554 * 1 for the dir_attributes.attributes boolean +
3555 * 2 for the cookie verifier
3556 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3557 * to bytes. If there are directory attributes to be
3558 * returned, then:
3559 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3560 * time BYTES_PER_XDR_UNIT is added to account for them.
3561 */
3562 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3563 if (vap != NULL)
3564 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3565
3566 getmoredents:
3567 /*
3568 * Here we make a check so that our read unit is not larger than
3569 * the space left in the buffer.
3570 */
3571 rd_unit = MIN(rd_unit, space_left);
3572 iov.iov_base = (char *)dp;
3573 iov.iov_len = rd_unit;
3574 uio.uio_resid = rd_unit;
3575 prev_len = rd_unit;
3576
3577 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3578
3579 if (error) {
3580 kmem_free(data, args->dircount);
3581 goto out;
3582 }
3583
3584 if (uio.uio_resid == prev_len && !iseof) {
3585 if (nents == 0) {
3586 kmem_free(data, args->dircount);
3587 resp->status = NFS3ERR_TOOSMALL;
3588 goto out1;
3589 }
3590
3591 /*
3592 * We could not get any more entries, so get the attributes
3593 * and filehandle for the entries already obtained.
3594 */
3595 goto good;
3596 }
3597
3598 /*
3599 * We estimate the size of the response by assuming the
3600 * entry exists and attributes and filehandle are also valid
3601 */
3602 for (size = prev_len - uio.uio_resid;
3603 size > 0;
3604 size -= dp->d_reclen, dp = nextdp(dp)) {
3605
3606 if (dp->d_ino == 0) {
3607 nents++;
3608 continue;
3609 }
3610
3611 namlen[nents] = strlen(dp->d_name);
3612 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3613
3614 /*
3615 * We need to check to see if the number of bytes left
3616 * to go into the buffer will actually fit into the
3617 * buffer. This is calculated as the size of this
3618 * entry plus:
3619 * 1 for the true/false list indicator +
3620 * 1 for the eof indicator
3621 * times BYTES_PER_XDR_UNIT to convert from XDR units
3622 * to bytes.
3623 *
3624 * Also check the dircount limit against the first entry read
3625 *
3626 */
3627 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3628 if (bufsize + tofit > args->maxcount) {
3629 /*
3630 * We make a check here to see if this was the
3631 * first entry being measured. If so, then maxcount
3632 * was too small to begin with and so we need to
3633 * return with NFS3ERR_TOOSMALL.
3634 */
3635 if (nents == 0) {
3636 kmem_free(data, args->dircount);
3637 resp->status = NFS3ERR_TOOSMALL;
3638 goto out1;
3639 }
3640 iseof = FALSE;
3641 goto good;
3642 }
3643 bufsize += entrysize;
3644 nents++;
3645 }
3646
3647 /*
3648 * If there is enough room to fit at least 1 more entry including
3649 * post op attributes and filehandle in the buffer AND that we haven't
3650 * exceeded dircount then go back and get some more.
3651 */
3652 if (!iseof &&
3653 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3654 space_left -= (prev_len - uio.uio_resid);
3655 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3656 goto getmoredents;
3657
3658 /* else, fall through */
3659 }
3660 good:
3661 va.va_mask = AT_ALL;
3662 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3663
3664 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3665
3666 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3667 resp->resok.infop = infop;
3668
3669 dp = (struct dirent64 *)data;
3670 for (i = 0; i < nents; i++) {
3671
3672 if (dp->d_ino == 0) {
3673 infop[i].attr.attributes = FALSE;
3674 infop[i].fh.handle_follows = FALSE;
3675 dp = nextdp(dp);
3676 continue;
3677 }
3678
3679 infop[i].namelen = namlen[i];
3680
3681 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3682 NULL, NULL, NULL);
3683 if (error) {
3684 infop[i].attr.attributes = FALSE;
3685 infop[i].fh.handle_follows = FALSE;
3686 dp = nextdp(dp);
3687 continue;
3688 }
3689
3690 nva.va_mask = AT_ALL;
3691 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3692
3693 /* Lie about the object type for a referral */
3694 if (vn_is_nfs_reparse(nvp, cr))
3695 nvap->va_type = VLNK;
3696
3697 if (vn_ismntpt(nvp)) {
3698 infop[i].attr.attributes = FALSE;
3699 infop[i].fh.handle_follows = FALSE;
3700 } else {
3701 vattr_to_post_op_attr(nvap, &infop[i].attr);
3702
3703 error = makefh3(&infop[i].fh.handle, nvp, exi);
3704 if (!error)
3705 infop[i].fh.handle_follows = TRUE;
3706 else
3707 infop[i].fh.handle_follows = FALSE;
3708 }
3709
3710 VN_RELE(nvp);
3711 dp = nextdp(dp);
3712 }
3713
3714 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3715 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3716 if (ndata == NULL)
3717 ndata = data;
3718
3719 if (ret > 0) {
3720 /*
3721 * We had to drop one or more entries in order to fit
3722 * during the character conversion. We need to patch
3723 * up the size and eof info.
3724 */
3725 if (iseof)
3726 iseof = FALSE;
3727
3728 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3729 nents, ret);
3730 }
3731
3732
3733 #if 0 /* notyet */
3734 /*
3735 * Don't do this. It causes local disk writes when just
3736 * reading the file and the overhead is deemed larger
3737 * than the benefit.
3738 */
3739 /*
3740 * Force modified metadata out to stable storage.
3741 */
3742 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3743 #endif
3744
3745 kmem_free(namlen, args->dircount);
3746
3747 resp->status = NFS3_OK;
3748 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3749 resp->resok.cookieverf = 0;
3750 resp->resok.reply.entries = (entryplus3 *)ndata;
3751 resp->resok.reply.eof = iseof;
3752 resp->resok.size = nents;
3753 resp->resok.count = args->dircount - ret;
3754 resp->resok.maxcount = args->maxcount;
3755
3756 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3757 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3758 READDIRPLUS3res *, resp);
3759
3760 VN_RELE(vp);
3761
3762 return;
3763
3764 out:
3765 if (curthread->t_flag & T_WOULDBLOCK) {
3766 curthread->t_flag &= ~T_WOULDBLOCK;
3767 resp->status = NFS3ERR_JUKEBOX;
3768 } else {
3769 resp->status = puterrno3(error);
3770 }
3771 out1:
3772 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3773
3774 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3775 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3776 READDIRPLUS3res *, resp);
3777
3778 if (vp != NULL) {
3779 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3780 VN_RELE(vp);
3781 }
3782
3783 if (namlen != NULL)
3784 kmem_free(namlen, args->dircount);
3785
3786 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3787 }
3788
3789 void *
3790 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3791 {
3792
3793 return (&args->dir);
3794 }
3795
3796 void
3797 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3798 {
3799
3800 if (resp->status == NFS3_OK) {
3801 kmem_free(resp->resok.reply.entries, resp->resok.count);
3802 kmem_free(resp->resok.infop,
3803 resp->resok.size * sizeof (struct entryplus3_info));
3804 }
3805 }
3806
3807 /* ARGSUSED */
3808 void
3809 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3810 struct svc_req *req, cred_t *cr, bool_t ro)
3811 {
3812 int error;
3813 vnode_t *vp;
3814 struct vattr *vap;
3815 struct vattr va;
3816 struct statvfs64 sb;
3817
3818 vap = NULL;
3819
3820 vp = nfs3_fhtovp(&args->fsroot, exi);
3821
3822 DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3823 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3824 FSSTAT3args *, args);
3825
3826 if (vp == NULL) {
3827 error = ESTALE;
3828 goto out;
3829 }
3830
3831 if (is_system_labeled()) {
3832 bslabel_t *clabel = req->rq_label;
3833
3834 ASSERT(clabel != NULL);
3835 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3836 "got client label from request(1)", struct svc_req *, req);
3837
3838 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3839 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3840 exi)) {
3841 resp->status = NFS3ERR_ACCES;
3842 goto out1;
3843 }
3844 }
3845 }
3846
3847 error = VFS_STATVFS(vp->v_vfsp, &sb);
3848
3849 va.va_mask = AT_ALL;
3850 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3851
3852 if (error)
3853 goto out;
3854
3855 resp->status = NFS3_OK;
3856 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3857 if (sb.f_blocks != (fsblkcnt64_t)-1)
3858 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3859 else
3860 resp->resok.tbytes = (size3)sb.f_blocks;
3861 if (sb.f_bfree != (fsblkcnt64_t)-1)
3862 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3863 else
3864 resp->resok.fbytes = (size3)sb.f_bfree;
3865 if (sb.f_bavail != (fsblkcnt64_t)-1)
3866 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3867 else
3868 resp->resok.abytes = (size3)sb.f_bavail;
3869 resp->resok.tfiles = (size3)sb.f_files;
3870 resp->resok.ffiles = (size3)sb.f_ffree;
3871 resp->resok.afiles = (size3)sb.f_favail;
3872 resp->resok.invarsec = 0;
3873
3874 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3875 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3876 FSSTAT3res *, resp);
3877 VN_RELE(vp);
3878
3879 return;
3880
3881 out:
3882 if (curthread->t_flag & T_WOULDBLOCK) {
3883 curthread->t_flag &= ~T_WOULDBLOCK;
3884 resp->status = NFS3ERR_JUKEBOX;
3885 } else
3886 resp->status = puterrno3(error);
3887 out1:
3888 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3889 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3890 FSSTAT3res *, resp);
3891
3892 if (vp != NULL)
3893 VN_RELE(vp);
3894 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3895 }
3896
3897 void *
3898 rfs3_fsstat_getfh(FSSTAT3args *args)
3899 {
3900
3901 return (&args->fsroot);
3902 }
3903
3904 /* ARGSUSED */
3905 void
3906 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3907 struct svc_req *req, cred_t *cr, bool_t ro)
3908 {
3909 vnode_t *vp;
3910 struct vattr *vap;
3911 struct vattr va;
3912 uint32_t xfer_size;
3913 ulong_t l = 0;
3914 int error;
3915
3916 vp = nfs3_fhtovp(&args->fsroot, exi);
3917
3918 DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3919 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3920 FSINFO3args *, args);
3921
3922 if (vp == NULL) {
3923 if (curthread->t_flag & T_WOULDBLOCK) {
3924 curthread->t_flag &= ~T_WOULDBLOCK;
3925 resp->status = NFS3ERR_JUKEBOX;
3926 } else
3927 resp->status = NFS3ERR_STALE;
3928 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3929 goto out;
3930 }
3931
3932 if (is_system_labeled()) {
3933 bslabel_t *clabel = req->rq_label;
3934
3935 ASSERT(clabel != NULL);
3936 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3937 "got client label from request(1)", struct svc_req *, req);
3938
3939 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3940 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3941 exi)) {
3942 resp->status = NFS3ERR_STALE;
3943 vattr_to_post_op_attr(NULL,
3944 &resp->resfail.obj_attributes);
3945 goto out;
3946 }
3947 }
3948 }
3949
3950 va.va_mask = AT_ALL;
3951 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3952
3953 resp->status = NFS3_OK;
3954 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3955 xfer_size = rfs3_tsize(req);
3956 resp->resok.rtmax = xfer_size;
3957 resp->resok.rtpref = xfer_size;
3958 resp->resok.rtmult = DEV_BSIZE;
3959 resp->resok.wtmax = xfer_size;
3960 resp->resok.wtpref = xfer_size;
3961 resp->resok.wtmult = DEV_BSIZE;
3962 resp->resok.dtpref = MAXBSIZE;
3963
3964 /*
3965 * Large file spec: want maxfilesize based on limit of
3966 * underlying filesystem. We can guess 2^31-1 if need be.
3967 */
3968 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3969 if (error) {
3970 resp->status = puterrno3(error);
3971 goto out;
3972 }
3973
3974 /*
3975 * If the underlying file system does not support _PC_FILESIZEBITS,
3976 * return a reasonable default. Note that error code on VOP_PATHCONF
3977 * will be 0, even if the underlying file system does not support
3978 * _PC_FILESIZEBITS.
3979 */
3980 if (l == (ulong_t)-1) {
3981 resp->resok.maxfilesize = MAXOFF32_T;
3982 } else {
3983 if (l >= (sizeof (uint64_t) * 8))
3984 resp->resok.maxfilesize = INT64_MAX;
3985 else
3986 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3987 }
3988
3989 resp->resok.time_delta.seconds = 0;
3990 resp->resok.time_delta.nseconds = 1000;
3991 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3992 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3993
3994 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3995 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3996 FSINFO3res *, resp);
3997
3998 VN_RELE(vp);
3999
4000 return;
4001
4002 out:
4003 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
4004 cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
4005 FSINFO3res *, resp);
4006 if (vp != NULL)
4007 VN_RELE(vp);
4008 }
4009
4010 void *
4011 rfs3_fsinfo_getfh(FSINFO3args *args)
4012 {
4013 return (&args->fsroot);
4014 }
4015
4016 /* ARGSUSED */
4017 void
4018 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4019 struct svc_req *req, cred_t *cr, bool_t ro)
4020 {
4021 int error;
4022 vnode_t *vp;
4023 struct vattr *vap;
4024 struct vattr va;
4025 ulong_t val;
4026
4027 vap = NULL;
4028
4029 vp = nfs3_fhtovp(&args->object, exi);
4030
4031 DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4032 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4033 PATHCONF3args *, args);
4034
4035 if (vp == NULL) {
4036 error = ESTALE;
4037 goto out;
4038 }
4039
4040 if (is_system_labeled()) {
4041 bslabel_t *clabel = req->rq_label;
4042
4043 ASSERT(clabel != NULL);
4044 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4045 "got client label from request(1)", struct svc_req *, req);
4046
4047 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4048 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4049 exi)) {
4050 resp->status = NFS3ERR_ACCES;
4051 goto out1;
4052 }
4053 }
4054 }
4055
4056 va.va_mask = AT_ALL;
4057 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4058
4059 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4060 if (error)
4061 goto out;
4062 resp->resok.info.link_max = (uint32)val;
4063
4064 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4065 if (error)
4066 goto out;
4067 resp->resok.info.name_max = (uint32)val;
4068
4069 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4070 if (error)
4071 goto out;
4072 if (val == 1)
4073 resp->resok.info.no_trunc = TRUE;
4074 else
4075 resp->resok.info.no_trunc = FALSE;
4076
4077 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4078 if (error)
4079 goto out;
4080 if (val == 1)
4081 resp->resok.info.chown_restricted = TRUE;
4082 else
4083 resp->resok.info.chown_restricted = FALSE;
4084
4085 resp->status = NFS3_OK;
4086 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4087 resp->resok.info.case_insensitive = FALSE;
4088 resp->resok.info.case_preserving = TRUE;
4089 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4090 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4091 PATHCONF3res *, resp);
4092 VN_RELE(vp);
4093 return;
4094
4095 out:
4096 if (curthread->t_flag & T_WOULDBLOCK) {
4097 curthread->t_flag &= ~T_WOULDBLOCK;
4098 resp->status = NFS3ERR_JUKEBOX;
4099 } else
4100 resp->status = puterrno3(error);
4101 out1:
4102 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4103 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4104 PATHCONF3res *, resp);
4105 if (vp != NULL)
4106 VN_RELE(vp);
4107 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4108 }
4109
4110 void *
4111 rfs3_pathconf_getfh(PATHCONF3args *args)
4112 {
4113
4114 return (&args->object);
4115 }
4116
4117 void
4118 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4119 struct svc_req *req, cred_t *cr, bool_t ro)
4120 {
4121 nfs3_srv_t *ns;
4122 int error;
4123 vnode_t *vp;
4124 struct vattr *bvap;
4125 struct vattr bva;
4126 struct vattr *avap;
4127 struct vattr ava;
4128
4129 bvap = NULL;
4130 avap = NULL;
4131
4132 vp = nfs3_fhtovp(&args->file, exi);
4133
4134 DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4135 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4136 COMMIT3args *, args);
4137
4138 if (vp == NULL) {
4139 error = ESTALE;
4140 goto out;
4141 }
4142
4143 ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
4144 ns = nfs3_get_srv();
4145 bva.va_mask = AT_ALL;
4146 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4147
4148 /*
4149 * If we can't get the attributes, then we can't do the
4150 * right access checking. So, we'll fail the request.
4151 */
4152 if (error)
4153 goto out;
4154
4155 bvap = &bva;
4156
4157 if (rdonly(ro, vp)) {
4158 resp->status = NFS3ERR_ROFS;
4159 goto out1;
4160 }
4161
4162 if (vp->v_type != VREG) {
4163 resp->status = NFS3ERR_INVAL;
4164 goto out1;
4165 }
4166
4167 if (is_system_labeled()) {
4168 bslabel_t *clabel = req->rq_label;
4169
4170 ASSERT(clabel != NULL);
4171 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4172 "got client label from request(1)", struct svc_req *, req);
4173
4174 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4175 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4176 exi)) {
4177 resp->status = NFS3ERR_ACCES;
4178 goto out1;
4179 }
4180 }
4181 }
4182
4183 if (crgetuid(cr) != bva.va_uid &&
4184 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4185 goto out;
4186
4187 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4188
4189 ava.va_mask = AT_ALL;
4190 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4191
4192 if (error)
4193 goto out;
4194
4195 resp->status = NFS3_OK;
4196 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4197 resp->resok.verf = ns->write3verf;
4198
4199 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4200 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4201 COMMIT3res *, resp);
4202
4203 VN_RELE(vp);
4204
4205 return;
4206
4207 out:
4208 if (curthread->t_flag & T_WOULDBLOCK) {
4209 curthread->t_flag &= ~T_WOULDBLOCK;
4210 resp->status = NFS3ERR_JUKEBOX;
4211 } else
4212 resp->status = puterrno3(error);
4213 out1:
4214 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4215 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4216 COMMIT3res *, resp);
4217
4218 if (vp != NULL)
4219 VN_RELE(vp);
4220 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4221 }
4222
4223 void *
4224 rfs3_commit_getfh(COMMIT3args *args)
4225 {
4226
4227 return (&args->file);
4228 }
4229
4230 static int
4231 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4232 {
4233
4234 vap->va_mask = 0;
4235
4236 if (sap->mode.set_it) {
4237 vap->va_mode = (mode_t)sap->mode.mode;
4238 vap->va_mask |= AT_MODE;
4239 }
4240 if (sap->uid.set_it) {
4241 vap->va_uid = (uid_t)sap->uid.uid;
4242 vap->va_mask |= AT_UID;
4243 }
4244 if (sap->gid.set_it) {
4245 vap->va_gid = (gid_t)sap->gid.gid;
4246 vap->va_mask |= AT_GID;
4247 }
4248 if (sap->size.set_it) {
4249 if (sap->size.size > (size3)((u_longlong_t)-1))
4250 return (EINVAL);
4251 vap->va_size = sap->size.size;
4252 vap->va_mask |= AT_SIZE;
4253 }
4254 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4255 #ifndef _LP64
4256 /* check time validity */
4257 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4258 return (EOVERFLOW);
4259 #endif
4260 /*
4261 * nfs protocol defines times as unsigned so don't extend sign,
4262 * unless sysadmin set nfs_allow_preepoch_time.
4263 */
4264 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4265 sap->atime.atime.seconds);
4266 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4267 vap->va_mask |= AT_ATIME;
4268 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4269 gethrestime(&vap->va_atime);
4270 vap->va_mask |= AT_ATIME;
4271 }
4272 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4273 #ifndef _LP64
4274 /* check time validity */
4275 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4276 return (EOVERFLOW);
4277 #endif
4278 /*
4279 * nfs protocol defines times as unsigned so don't extend sign,
4280 * unless sysadmin set nfs_allow_preepoch_time.
4281 */
4282 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4283 sap->mtime.mtime.seconds);
4284 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4285 vap->va_mask |= AT_MTIME;
4286 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4287 gethrestime(&vap->va_mtime);
4288 vap->va_mask |= AT_MTIME;
4289 }
4290
4291 return (0);
4292 }
4293
4294 static const ftype3 vt_to_nf3[] = {
4295 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4296 };
4297
4298 static int
4299 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4300 {
4301
4302 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4303 /* Return error if time or size overflow */
4304 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4305 return (EOVERFLOW);
4306 }
4307 fap->type = vt_to_nf3[vap->va_type];
4308 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4309 fap->nlink = (uint32)vap->va_nlink;
4310 if (vap->va_uid == UID_NOBODY)
4311 fap->uid = (uid3)NFS_UID_NOBODY;
4312 else
4313 fap->uid = (uid3)vap->va_uid;
4314 if (vap->va_gid == GID_NOBODY)
4315 fap->gid = (gid3)NFS_GID_NOBODY;
4316 else
4317 fap->gid = (gid3)vap->va_gid;
4318 fap->size = (size3)vap->va_size;
4319 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4320 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4321 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4322 fap->fsid = (uint64)vap->va_fsid;
4323 fap->fileid = (fileid3)vap->va_nodeid;
4324 fap->atime.seconds = vap->va_atime.tv_sec;
4325 fap->atime.nseconds = vap->va_atime.tv_nsec;
4326 fap->mtime.seconds = vap->va_mtime.tv_sec;
4327 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4328 fap->ctime.seconds = vap->va_ctime.tv_sec;
4329 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4330 return (0);
4331 }
4332
4333 static int
4334 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4335 {
4336
4337 /* Return error if time or size overflow */
4338 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4339 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4340 NFS3_SIZE_OK(vap->va_size))) {
4341 return (EOVERFLOW);
4342 }
4343 wccap->size = (size3)vap->va_size;
4344 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4345 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4346 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4347 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4348 return (0);
4349 }
4350
4351 static void
4352 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4353 {
4354
4355 /* don't return attrs if time overflow */
4356 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4357 poap->attributes = TRUE;
4358 } else
4359 poap->attributes = FALSE;
4360 }
4361
4362 void
4363 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4364 {
4365
4366 /* don't return attrs if time overflow */
4367 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4368 poap->attributes = TRUE;
4369 } else
4370 poap->attributes = FALSE;
4371 }
4372
4373 static void
4374 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4375 {
4376 vattr_to_pre_op_attr(bvap, &wccp->before);
4377 vattr_to_post_op_attr(avap, &wccp->after);
4378 }
4379
4380 static int
4381 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4382 {
4383 struct clist *wcl;
4384 int wlist_len;
4385 count3 count = rok->count;
4386
4387 wcl = args->wlist;
4388 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4389 return (FALSE);
4390
4391 wcl = args->wlist;
4392 rok->wlist_len = wlist_len;
4393 rok->wlist = wcl;
4394 return (TRUE);
4395 }
4396
4397 void
4398 rfs3_srv_zone_init(nfs_globals_t *ng)
4399 {
4400 nfs3_srv_t *ns;
4401 struct rfs3_verf_overlay {
4402 uint_t id; /* a "unique" identifier */
4403 int ts; /* a unique timestamp */
4404 } *verfp;
4405 timestruc_t now;
4406
4407 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4408
4409 /*
4410 * The following algorithm attempts to find a unique verifier
4411 * to be used as the write verifier returned from the server
4412 * to the client. It is important that this verifier change
4413 * whenever the server reboots. Of secondary importance, it
4414 * is important for the verifier to be unique between two
4415 * different servers.
4416 *
4417 * Thus, an attempt is made to use the system hostid and the
4418 * current time in seconds when the nfssrv kernel module is
4419 * loaded. It is assumed that an NFS server will not be able
4420 * to boot and then to reboot in less than a second. If the
4421 * hostid has not been set, then the current high resolution
4422 * time is used. This will ensure different verifiers each
4423 * time the server reboots and minimize the chances that two
4424 * different servers will have the same verifier.
4425 */
4426
4427 #ifndef lint
4428 /*
4429 * We ASSERT that this constant logic expression is
4430 * always true because in the past, it wasn't.
4431 */
4432 ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4433 #endif
4434
4435 gethrestime(&now);
4436 verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4437 verfp->ts = (int)now.tv_sec;
4438 verfp->id = zone_get_hostid(NULL);
4439
4440 if (verfp->id == 0)
4441 verfp->id = (uint_t)now.tv_nsec;
4442
4443 ng->nfs3_srv = ns;
4444 }
4445
4446 void
4447 rfs3_srv_zone_fini(nfs_globals_t *ng)
4448 {
4449 nfs3_srv_t *ns = ng->nfs3_srv;
4450
4451 ng->nfs3_srv = NULL;
4452
4453 kmem_free(ns, sizeof (*ns));
4454 }
4455
4456 void
4457 rfs3_srvrinit(void)
4458 {
4459 nfs3_srv_caller_id = fs_new_caller_id();
4460 }
4461
4462 void
4463 rfs3_srvrfini(void)
4464 {
4465 /* Nothing to do */
4466 }