1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2018 Nexenta Systems, Inc.
24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/systeminfo.h>
48 #include <sys/flock.h>
49 #include <sys/nbmlock.h>
50 #include <sys/policy.h>
51 #include <sys/sdt.h>
52
53 #include <rpc/types.h>
54 #include <rpc/auth.h>
55 #include <rpc/svc.h>
56 #include <rpc/rpc_rdma.h>
57
58 #include <nfs/nfs.h>
59 #include <nfs/export.h>
60 #include <nfs/nfs_cmd.h>
61
62 #include <sys/strsubr.h>
63 #include <sys/tsol/label.h>
64 #include <sys/tsol/tndb.h>
65
66 #include <sys/zone.h>
67
68 #include <inet/ip.h>
69 #include <inet/ip6.h>
70
71 /*
72 * Zone global variables of NFSv3 server
73 */
74 typedef struct nfs3_srv {
75 writeverf3 write3verf;
76 } nfs3_srv_t;
77
78 /*
79 * These are the interface routines for the server side of the
80 * Network File System. See the NFS version 3 protocol specification
81 * for a description of this interface.
82 */
83
84 static int sattr3_to_vattr(sattr3 *, struct vattr *);
85 static int vattr_to_fattr3(struct vattr *, fattr3 *);
86 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
87 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
88 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
89 static int rdma_setup_read_data3(READ3args *, READ3resok *);
90
91 extern int nfs_loaned_buffers;
92
93 u_longlong_t nfs3_srv_caller_id;
94
95 static nfs3_srv_t *
96 nfs3_get_srv(void)
97 {
98 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
99 nfs3_srv_t *srv = ng->nfs3_srv;
100 ASSERT(srv != NULL);
101 return (srv);
102 }
103
104 /* ARGSUSED */
105 void
106 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
107 struct svc_req *req, cred_t *cr, bool_t ro)
108 {
109 int error;
110 vnode_t *vp;
111 struct vattr va;
112
113 vp = nfs3_fhtovp(&args->object, exi);
114
115 DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
116 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
117 GETATTR3args *, args);
118
119 if (vp == NULL) {
120 error = ESTALE;
121 goto out;
122 }
123
124 va.va_mask = AT_ALL;
125 error = rfs4_delegated_getattr(vp, &va, 0, cr);
126
127 if (!error) {
128 /* Lie about the object type for a referral */
129 if (vn_is_nfs_reparse(vp, cr))
130 va.va_type = VLNK;
131
132 /* overflow error if time or size is out of range */
133 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
134 if (error)
135 goto out;
136 resp->status = NFS3_OK;
137
138 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
139 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
140 GETATTR3res *, resp);
141
142 VN_RELE(vp);
143
144 return;
145 }
146
147 out:
148 if (curthread->t_flag & T_WOULDBLOCK) {
149 curthread->t_flag &= ~T_WOULDBLOCK;
150 resp->status = NFS3ERR_JUKEBOX;
151 } else
152 resp->status = puterrno3(error);
153
154 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
155 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
156 GETATTR3res *, resp);
157
158 if (vp != NULL)
159 VN_RELE(vp);
160 }
161
162 void *
163 rfs3_getattr_getfh(GETATTR3args *args)
164 {
165
166 return (&args->object);
167 }
168
169 void
170 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
171 struct svc_req *req, cred_t *cr, bool_t ro)
172 {
173 int error;
174 vnode_t *vp;
175 struct vattr *bvap;
176 struct vattr bva;
177 struct vattr *avap;
178 struct vattr ava;
179 int flag;
180 int in_crit = 0;
181 struct flock64 bf;
182 caller_context_t ct;
183
184 bvap = NULL;
185 avap = NULL;
186
187 vp = nfs3_fhtovp(&args->object, exi);
188
189 DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
190 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
191 SETATTR3args *, args);
192
193 if (vp == NULL) {
194 error = ESTALE;
195 goto out;
196 }
197
198 error = sattr3_to_vattr(&args->new_attributes, &ava);
199 if (error)
200 goto out;
201
202 if (is_system_labeled()) {
203 bslabel_t *clabel = req->rq_label;
204
205 ASSERT(clabel != NULL);
206 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
207 "got client label from request(1)", struct svc_req *, req);
208
209 if (!blequal(&l_admin_low->tsl_label, clabel)) {
210 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
211 exi)) {
212 resp->status = NFS3ERR_ACCES;
213 goto out1;
214 }
215 }
216 }
217
218 /*
219 * We need to specially handle size changes because of
220 * possible conflicting NBMAND locks. Get into critical
221 * region before VOP_GETATTR, so the size attribute is
222 * valid when checking conflicts.
223 *
224 * Also, check to see if the v4 side of the server has
225 * delegated this file. If so, then we return JUKEBOX to
226 * allow the client to retrasmit its request.
227 */
228 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
229 if (nbl_need_check(vp)) {
230 nbl_start_crit(vp, RW_READER);
231 in_crit = 1;
232 }
233 }
234
235 bva.va_mask = AT_ALL;
236 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
237
238 /*
239 * If we can't get the attributes, then we can't do the
240 * right access checking. So, we'll fail the request.
241 */
242 if (error)
243 goto out;
244
245 bvap = &bva;
246
247 if (rdonly(ro, vp)) {
248 resp->status = NFS3ERR_ROFS;
249 goto out1;
250 }
251
252 if (args->guard.check &&
253 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
254 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
255 resp->status = NFS3ERR_NOT_SYNC;
256 goto out1;
257 }
258
259 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
260 flag = ATTR_UTIME;
261 else
262 flag = 0;
263
264 /*
265 * If the filesystem is exported with nosuid, then mask off
266 * the setuid and setgid bits.
267 */
268 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
269 (exi->exi_export.ex_flags & EX_NOSUID))
270 ava.va_mode &= ~(VSUID | VSGID);
271
272 ct.cc_sysid = 0;
273 ct.cc_pid = 0;
274 ct.cc_caller_id = nfs3_srv_caller_id;
275 ct.cc_flags = CC_DONTBLOCK;
276
277 /*
278 * We need to specially handle size changes because it is
279 * possible for the client to create a file with modes
280 * which indicate read-only, but with the file opened for
281 * writing. If the client then tries to set the size of
282 * the file, then the normal access checking done in
283 * VOP_SETATTR would prevent the client from doing so,
284 * although it should be legal for it to do so. To get
285 * around this, we do the access checking for ourselves
286 * and then use VOP_SPACE which doesn't do the access
287 * checking which VOP_SETATTR does. VOP_SPACE can only
288 * operate on VREG files, let VOP_SETATTR handle the other
289 * extremely rare cases.
290 * Also the client should not be allowed to change the
291 * size of the file if there is a conflicting non-blocking
292 * mandatory lock in the region the change.
293 */
294 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
295 if (in_crit) {
296 u_offset_t offset;
297 ssize_t length;
298
299 if (ava.va_size < bva.va_size) {
300 offset = ava.va_size;
301 length = bva.va_size - ava.va_size;
302 } else {
303 offset = bva.va_size;
304 length = ava.va_size - bva.va_size;
305 }
306 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
307 NULL)) {
308 error = EACCES;
309 goto out;
310 }
311 }
312
313 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
314 ava.va_mask &= ~AT_SIZE;
315 bf.l_type = F_WRLCK;
316 bf.l_whence = 0;
317 bf.l_start = (off64_t)ava.va_size;
318 bf.l_len = 0;
319 bf.l_sysid = 0;
320 bf.l_pid = 0;
321 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
322 (offset_t)ava.va_size, cr, &ct);
323 }
324 }
325
326 if (!error && ava.va_mask)
327 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
328
329 /* check if a monitor detected a delegation conflict */
330 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
331 resp->status = NFS3ERR_JUKEBOX;
332 goto out1;
333 }
334
335 ava.va_mask = AT_ALL;
336 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
337
338 /*
339 * Force modified metadata out to stable storage.
340 */
341 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
342
343 if (error)
344 goto out;
345
346 if (in_crit)
347 nbl_end_crit(vp);
348
349 resp->status = NFS3_OK;
350 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
351
352 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
353 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
354 SETATTR3res *, resp);
355
356 VN_RELE(vp);
357
358 return;
359
360 out:
361 if (curthread->t_flag & T_WOULDBLOCK) {
362 curthread->t_flag &= ~T_WOULDBLOCK;
363 resp->status = NFS3ERR_JUKEBOX;
364 } else
365 resp->status = puterrno3(error);
366 out1:
367 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
368 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
369 SETATTR3res *, resp);
370
371 if (vp != NULL) {
372 if (in_crit)
373 nbl_end_crit(vp);
374 VN_RELE(vp);
375 }
376 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
377 }
378
379 void *
380 rfs3_setattr_getfh(SETATTR3args *args)
381 {
382
383 return (&args->object);
384 }
385
386 /* ARGSUSED */
387 void
388 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
389 struct svc_req *req, cred_t *cr, bool_t ro)
390 {
391 int error;
392 vnode_t *vp;
393 vnode_t *dvp;
394 struct vattr *vap;
395 struct vattr va;
396 struct vattr *dvap;
397 struct vattr dva;
398 nfs_fh3 *fhp;
399 struct sec_ol sec = {0, 0};
400 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
401 struct sockaddr *ca;
402 char *name = NULL;
403
404 dvap = NULL;
405
406 if (exi != NULL)
407 exi_hold(exi);
408
409 /*
410 * Allow lookups from the root - the default
411 * location of the public filehandle.
412 */
413 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
414 dvp = ZONE_ROOTVP();
415 VN_HOLD(dvp);
416
417 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
418 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
419 LOOKUP3args *, args);
420 } else {
421 dvp = nfs3_fhtovp(&args->what.dir, exi);
422
423 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
424 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
425 LOOKUP3args *, args);
426
427 if (dvp == NULL) {
428 error = ESTALE;
429 goto out;
430 }
431 }
432
433 dva.va_mask = AT_ALL;
434 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
435
436 if (args->what.name == nfs3nametoolong) {
437 resp->status = NFS3ERR_NAMETOOLONG;
438 goto out1;
439 }
440
441 if (args->what.name == NULL || *(args->what.name) == '\0') {
442 resp->status = NFS3ERR_ACCES;
443 goto out1;
444 }
445
446 fhp = &args->what.dir;
447 ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
448 if (strcmp(args->what.name, "..") == 0 &&
449 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
450 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
451 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
452 /*
453 * special case for ".." and 'nohide'exported root
454 */
455 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
456 resp->status = NFS3ERR_ACCES;
457 goto out1;
458 }
459 } else {
460 resp->status = NFS3ERR_NOENT;
461 goto out1;
462 }
463 }
464
465 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
466 name = nfscmd_convname(ca, exi, args->what.name,
467 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
468
469 if (name == NULL) {
470 resp->status = NFS3ERR_ACCES;
471 goto out1;
472 }
473
474 /*
475 * If the public filehandle is used then allow
476 * a multi-component lookup
477 */
478 if (PUBLIC_FH3(&args->what.dir)) {
479 publicfh_flag = TRUE;
480
481 exi_rele(exi);
482
483 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
484 &exi, &sec);
485
486 /*
487 * Since WebNFS may bypass MOUNT, we need to ensure this
488 * request didn't come from an unlabeled admin_low client.
489 */
490 if (is_system_labeled() && error == 0) {
491 int addr_type;
492 void *ipaddr;
493 tsol_tpc_t *tp;
494
495 if (ca->sa_family == AF_INET) {
496 addr_type = IPV4_VERSION;
497 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
498 } else if (ca->sa_family == AF_INET6) {
499 addr_type = IPV6_VERSION;
500 ipaddr = &((struct sockaddr_in6 *)
501 ca)->sin6_addr;
502 }
503 tp = find_tpc(ipaddr, addr_type, B_FALSE);
504 if (tp == NULL || tp->tpc_tp.tp_doi !=
505 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
506 SUN_CIPSO) {
507 VN_RELE(vp);
508 error = EACCES;
509 }
510 if (tp != NULL)
511 TPC_RELE(tp);
512 }
513 } else {
514 error = VOP_LOOKUP(dvp, name, &vp,
515 NULL, 0, NULL, cr, NULL, NULL, NULL);
516 }
517
518 if (name != args->what.name)
519 kmem_free(name, MAXPATHLEN + 1);
520
521 if (error == 0 && vn_ismntpt(vp)) {
522 error = rfs_cross_mnt(&vp, &exi);
523 if (error)
524 VN_RELE(vp);
525 }
526
527 if (is_system_labeled() && error == 0) {
528 bslabel_t *clabel = req->rq_label;
529
530 ASSERT(clabel != NULL);
531 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
532 "got client label from request(1)", struct svc_req *, req);
533
534 if (!blequal(&l_admin_low->tsl_label, clabel)) {
535 if (!do_rfs_label_check(clabel, dvp,
536 DOMINANCE_CHECK, exi)) {
537 VN_RELE(vp);
538 error = EACCES;
539 }
540 }
541 }
542
543 dva.va_mask = AT_ALL;
544 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
545
546 if (error)
547 goto out;
548
549 if (sec.sec_flags & SEC_QUERY) {
550 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
551 } else {
552 error = makefh3(&resp->resok.object, vp, exi);
553 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
554 auth_weak = TRUE;
555 }
556
557 if (error) {
558 VN_RELE(vp);
559 goto out;
560 }
561
562 va.va_mask = AT_ALL;
563 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
564
565 exi_rele(exi);
566 VN_RELE(vp);
567
568 resp->status = NFS3_OK;
569 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
570 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
571
572 /*
573 * If it's public fh, no 0x81, and client's flavor is
574 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
575 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
576 */
577 if (auth_weak)
578 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
579
580 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
581 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
582 LOOKUP3res *, resp);
583 VN_RELE(dvp);
584
585 return;
586
587 out:
588 if (curthread->t_flag & T_WOULDBLOCK) {
589 curthread->t_flag &= ~T_WOULDBLOCK;
590 resp->status = NFS3ERR_JUKEBOX;
591 } else
592 resp->status = puterrno3(error);
593 out1:
594 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
595 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
596 LOOKUP3res *, resp);
597
598 if (exi != NULL)
599 exi_rele(exi);
600
601 if (dvp != NULL)
602 VN_RELE(dvp);
603 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
604
605 }
606
607 void *
608 rfs3_lookup_getfh(LOOKUP3args *args)
609 {
610
611 return (&args->what.dir);
612 }
613
614 /* ARGSUSED */
615 void
616 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
617 struct svc_req *req, cred_t *cr, bool_t ro)
618 {
619 int error;
620 vnode_t *vp;
621 struct vattr *vap;
622 struct vattr va;
623 int checkwriteperm;
624 boolean_t dominant_label = B_FALSE;
625 boolean_t equal_label = B_FALSE;
626 boolean_t admin_low_client;
627
628 vap = NULL;
629
630 vp = nfs3_fhtovp(&args->object, exi);
631
632 DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
633 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
634 ACCESS3args *, args);
635
636 if (vp == NULL) {
637 error = ESTALE;
638 goto out;
639 }
640
641 /*
642 * If the file system is exported read only, it is not appropriate
643 * to check write permissions for regular files and directories.
644 * Special files are interpreted by the client, so the underlying
645 * permissions are sent back to the client for interpretation.
646 */
647 if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
648 checkwriteperm = 0;
649 else
650 checkwriteperm = 1;
651
652 /*
653 * We need the mode so that we can correctly determine access
654 * permissions relative to a mandatory lock file. Access to
655 * mandatory lock files is denied on the server, so it might
656 * as well be reflected to the server during the open.
657 */
658 va.va_mask = AT_MODE;
659 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
660 if (error)
661 goto out;
662
663 vap = &va;
664
665 resp->resok.access = 0;
666
667 if (is_system_labeled()) {
668 bslabel_t *clabel = req->rq_label;
669
670 ASSERT(clabel != NULL);
671 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
672 "got client label from request(1)", struct svc_req *, req);
673
674 if (!blequal(&l_admin_low->tsl_label, clabel)) {
675 if ((equal_label = do_rfs_label_check(clabel, vp,
676 EQUALITY_CHECK, exi)) == B_FALSE) {
677 dominant_label = do_rfs_label_check(clabel,
678 vp, DOMINANCE_CHECK, exi);
679 } else
680 dominant_label = B_TRUE;
681 admin_low_client = B_FALSE;
682 } else
683 admin_low_client = B_TRUE;
684 }
685
686 if (args->access & ACCESS3_READ) {
687 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
688 if (error) {
689 if (curthread->t_flag & T_WOULDBLOCK)
690 goto out;
691 } else if (!MANDLOCK(vp, va.va_mode) &&
692 (!is_system_labeled() || admin_low_client ||
693 dominant_label))
694 resp->resok.access |= ACCESS3_READ;
695 }
696 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
697 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
698 if (error) {
699 if (curthread->t_flag & T_WOULDBLOCK)
700 goto out;
701 } else if (!is_system_labeled() || admin_low_client ||
702 dominant_label)
703 resp->resok.access |= ACCESS3_LOOKUP;
704 }
705 if (checkwriteperm &&
706 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
707 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
708 if (error) {
709 if (curthread->t_flag & T_WOULDBLOCK)
710 goto out;
711 } else if (!MANDLOCK(vp, va.va_mode) &&
712 (!is_system_labeled() || admin_low_client || equal_label)) {
713 resp->resok.access |=
714 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
715 }
716 }
717 if (checkwriteperm &&
718 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
719 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
720 if (error) {
721 if (curthread->t_flag & T_WOULDBLOCK)
722 goto out;
723 } else if (!is_system_labeled() || admin_low_client ||
724 equal_label)
725 resp->resok.access |= ACCESS3_DELETE;
726 }
727 if (args->access & ACCESS3_EXECUTE) {
728 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
729 if (error) {
730 if (curthread->t_flag & T_WOULDBLOCK)
731 goto out;
732 } else if (!MANDLOCK(vp, va.va_mode) &&
733 (!is_system_labeled() || admin_low_client ||
734 dominant_label))
735 resp->resok.access |= ACCESS3_EXECUTE;
736 }
737
738 va.va_mask = AT_ALL;
739 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
740
741 resp->status = NFS3_OK;
742 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
743
744 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
745 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
746 ACCESS3res *, resp);
747
748 VN_RELE(vp);
749
750 return;
751
752 out:
753 if (curthread->t_flag & T_WOULDBLOCK) {
754 curthread->t_flag &= ~T_WOULDBLOCK;
755 resp->status = NFS3ERR_JUKEBOX;
756 } else
757 resp->status = puterrno3(error);
758 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
759 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
760 ACCESS3res *, resp);
761 if (vp != NULL)
762 VN_RELE(vp);
763 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
764 }
765
766 void *
767 rfs3_access_getfh(ACCESS3args *args)
768 {
769
770 return (&args->object);
771 }
772
773 /* ARGSUSED */
774 void
775 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
776 struct svc_req *req, cred_t *cr, bool_t ro)
777 {
778 int error;
779 vnode_t *vp;
780 struct vattr *vap;
781 struct vattr va;
782 struct iovec iov;
783 struct uio uio;
784 char *data;
785 struct sockaddr *ca;
786 char *name = NULL;
787 int is_referral = 0;
788
789 vap = NULL;
790
791 vp = nfs3_fhtovp(&args->symlink, exi);
792
793 DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
794 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
795 READLINK3args *, args);
796
797 if (vp == NULL) {
798 error = ESTALE;
799 goto out;
800 }
801
802 va.va_mask = AT_ALL;
803 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
804 if (error)
805 goto out;
806
807 vap = &va;
808
809 /* We lied about the object type for a referral */
810 if (vn_is_nfs_reparse(vp, cr))
811 is_referral = 1;
812
813 if (vp->v_type != VLNK && !is_referral) {
814 resp->status = NFS3ERR_INVAL;
815 goto out1;
816 }
817
818 if (MANDLOCK(vp, va.va_mode)) {
819 resp->status = NFS3ERR_ACCES;
820 goto out1;
821 }
822
823 if (is_system_labeled()) {
824 bslabel_t *clabel = req->rq_label;
825
826 ASSERT(clabel != NULL);
827 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
828 "got client label from request(1)", struct svc_req *, req);
829
830 if (!blequal(&l_admin_low->tsl_label, clabel)) {
831 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
832 exi)) {
833 resp->status = NFS3ERR_ACCES;
834 goto out1;
835 }
836 }
837 }
838
839 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
840
841 if (is_referral) {
842 char *s;
843 size_t strsz;
844
845 /* Get an artificial symlink based on a referral */
846 s = build_symlink(vp, cr, &strsz);
847 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
848 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
849 vnode_t *, vp, char *, s);
850 if (s == NULL)
851 error = EINVAL;
852 else {
853 error = 0;
854 (void) strlcpy(data, s, MAXPATHLEN + 1);
855 kmem_free(s, strsz);
856 }
857
858 } else {
859
860 iov.iov_base = data;
861 iov.iov_len = MAXPATHLEN;
862 uio.uio_iov = &iov;
863 uio.uio_iovcnt = 1;
864 uio.uio_segflg = UIO_SYSSPACE;
865 uio.uio_extflg = UIO_COPY_CACHED;
866 uio.uio_loffset = 0;
867 uio.uio_resid = MAXPATHLEN;
868
869 error = VOP_READLINK(vp, &uio, cr, NULL);
870
871 if (!error)
872 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
873 }
874
875 va.va_mask = AT_ALL;
876 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
877
878 /* Lie about object type again just to be consistent */
879 if (is_referral && vap != NULL)
880 vap->va_type = VLNK;
881
882 #if 0 /* notyet */
883 /*
884 * Don't do this. It causes local disk writes when just
885 * reading the file and the overhead is deemed larger
886 * than the benefit.
887 */
888 /*
889 * Force modified metadata out to stable storage.
890 */
891 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
892 #endif
893
894 if (error) {
895 kmem_free(data, MAXPATHLEN + 1);
896 goto out;
897 }
898
899 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
900 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
901 MAXPATHLEN + 1);
902
903 if (name == NULL) {
904 /*
905 * Even though the conversion failed, we return
906 * something. We just don't translate it.
907 */
908 name = data;
909 }
910
911 resp->status = NFS3_OK;
912 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
913 resp->resok.data = name;
914
915 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
916 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
917 READLINK3res *, resp);
918 VN_RELE(vp);
919
920 if (name != data)
921 kmem_free(data, MAXPATHLEN + 1);
922
923 return;
924
925 out:
926 if (curthread->t_flag & T_WOULDBLOCK) {
927 curthread->t_flag &= ~T_WOULDBLOCK;
928 resp->status = NFS3ERR_JUKEBOX;
929 } else
930 resp->status = puterrno3(error);
931 out1:
932 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
933 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
934 READLINK3res *, resp);
935 if (vp != NULL)
936 VN_RELE(vp);
937 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
938 }
939
940 void *
941 rfs3_readlink_getfh(READLINK3args *args)
942 {
943
944 return (&args->symlink);
945 }
946
947 void
948 rfs3_readlink_free(READLINK3res *resp)
949 {
950
951 if (resp->status == NFS3_OK)
952 kmem_free(resp->resok.data, MAXPATHLEN + 1);
953 }
954
955 /*
956 * Server routine to handle read
957 * May handle RDMA data as well as mblks
958 */
959 /* ARGSUSED */
960 void
961 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
962 struct svc_req *req, cred_t *cr, bool_t ro)
963 {
964 int error;
965 vnode_t *vp;
966 struct vattr *vap;
967 struct vattr va;
968 struct iovec iov, *iovp = NULL;
969 int iovcnt;
970 struct uio uio;
971 u_offset_t offset;
972 mblk_t *mp = NULL;
973 int in_crit = 0;
974 int need_rwunlock = 0;
975 caller_context_t ct;
976 int rdma_used = 0;
977 int loaned_buffers;
978 struct uio *uiop;
979
980 vap = NULL;
981
982 vp = nfs3_fhtovp(&args->file, exi);
983
984 DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
985 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
986 READ3args *, args);
987
988
989 if (vp == NULL) {
990 error = ESTALE;
991 goto out;
992 }
993
994 if (args->wlist) {
995 if (args->count > clist_len(args->wlist)) {
996 error = EINVAL;
997 goto out;
998 }
999 rdma_used = 1;
1000 }
1001
1002 /* use loaned buffers for TCP */
1003 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1004
1005 if (is_system_labeled()) {
1006 bslabel_t *clabel = req->rq_label;
1007
1008 ASSERT(clabel != NULL);
1009 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1010 "got client label from request(1)", struct svc_req *, req);
1011
1012 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1013 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1014 exi)) {
1015 resp->status = NFS3ERR_ACCES;
1016 goto out1;
1017 }
1018 }
1019 }
1020
1021 ct.cc_sysid = 0;
1022 ct.cc_pid = 0;
1023 ct.cc_caller_id = nfs3_srv_caller_id;
1024 ct.cc_flags = CC_DONTBLOCK;
1025
1026 /*
1027 * Enter the critical region before calling VOP_RWLOCK
1028 * to avoid a deadlock with write requests.
1029 */
1030 if (nbl_need_check(vp)) {
1031 nbl_start_crit(vp, RW_READER);
1032 in_crit = 1;
1033 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1034 NULL)) {
1035 error = EACCES;
1036 goto out;
1037 }
1038 }
1039
1040 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1041
1042 /* check if a monitor detected a delegation conflict */
1043 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1044 resp->status = NFS3ERR_JUKEBOX;
1045 goto out1;
1046 }
1047
1048 need_rwunlock = 1;
1049
1050 va.va_mask = AT_ALL;
1051 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1052
1053 /*
1054 * If we can't get the attributes, then we can't do the
1055 * right access checking. So, we'll fail the request.
1056 */
1057 if (error)
1058 goto out;
1059
1060 vap = &va;
1061
1062 if (vp->v_type != VREG) {
1063 resp->status = NFS3ERR_INVAL;
1064 goto out1;
1065 }
1066
1067 if (crgetuid(cr) != va.va_uid) {
1068 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1069 if (error) {
1070 if (curthread->t_flag & T_WOULDBLOCK)
1071 goto out;
1072 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1073 if (error)
1074 goto out;
1075 }
1076 }
1077
1078 if (MANDLOCK(vp, va.va_mode)) {
1079 resp->status = NFS3ERR_ACCES;
1080 goto out1;
1081 }
1082
1083 offset = args->offset;
1084 if (offset >= va.va_size) {
1085 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1086 if (in_crit)
1087 nbl_end_crit(vp);
1088 resp->status = NFS3_OK;
1089 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1090 resp->resok.count = 0;
1091 resp->resok.eof = TRUE;
1092 resp->resok.data.data_len = 0;
1093 resp->resok.data.data_val = NULL;
1094 resp->resok.data.mp = NULL;
1095 /* RDMA */
1096 resp->resok.wlist = args->wlist;
1097 resp->resok.wlist_len = resp->resok.count;
1098 if (resp->resok.wlist)
1099 clist_zero_len(resp->resok.wlist);
1100 goto done;
1101 }
1102
1103 if (args->count == 0) {
1104 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1105 if (in_crit)
1106 nbl_end_crit(vp);
1107 resp->status = NFS3_OK;
1108 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1109 resp->resok.count = 0;
1110 resp->resok.eof = FALSE;
1111 resp->resok.data.data_len = 0;
1112 resp->resok.data.data_val = NULL;
1113 resp->resok.data.mp = NULL;
1114 /* RDMA */
1115 resp->resok.wlist = args->wlist;
1116 resp->resok.wlist_len = resp->resok.count;
1117 if (resp->resok.wlist)
1118 clist_zero_len(resp->resok.wlist);
1119 goto done;
1120 }
1121
1122 /*
1123 * do not allocate memory more the max. allowed
1124 * transfer size
1125 */
1126 if (args->count > rfs3_tsize(req))
1127 args->count = rfs3_tsize(req);
1128
1129 if (loaned_buffers) {
1130 uiop = (uio_t *)rfs_setup_xuio(vp);
1131 ASSERT(uiop != NULL);
1132 uiop->uio_segflg = UIO_SYSSPACE;
1133 uiop->uio_loffset = args->offset;
1134 uiop->uio_resid = args->count;
1135
1136 /* Jump to do the read if successful */
1137 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1138 /*
1139 * Need to hold the vnode until after VOP_RETZCBUF()
1140 * is called.
1141 */
1142 VN_HOLD(vp);
1143 goto doio_read;
1144 }
1145
1146 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1147 uiop->uio_loffset, int, uiop->uio_resid);
1148
1149 uiop->uio_extflg = 0;
1150 /* failure to setup for zero copy */
1151 rfs_free_xuio((void *)uiop);
1152 loaned_buffers = 0;
1153 }
1154
1155 /*
1156 * If returning data via RDMA Write, then grab the chunk list.
1157 * If we aren't returning READ data w/RDMA_WRITE, then grab
1158 * a mblk.
1159 */
1160 if (rdma_used) {
1161 (void) rdma_get_wchunk(req, &iov, args->wlist);
1162 uio.uio_iov = &iov;
1163 uio.uio_iovcnt = 1;
1164 } else {
1165 /*
1166 * mp will contain the data to be sent out in the read reply.
1167 * For UDP, this will be freed after the reply has been sent
1168 * out by the driver. For TCP, it will be freed after the last
1169 * segment associated with the reply has been ACKed by the
1170 * client.
1171 */
1172 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1173 uio.uio_iov = iovp;
1174 uio.uio_iovcnt = iovcnt;
1175 }
1176
1177 uio.uio_segflg = UIO_SYSSPACE;
1178 uio.uio_extflg = UIO_COPY_CACHED;
1179 uio.uio_loffset = args->offset;
1180 uio.uio_resid = args->count;
1181 uiop = &uio;
1182
1183 doio_read:
1184 error = VOP_READ(vp, uiop, 0, cr, &ct);
1185
1186 if (error) {
1187 if (mp)
1188 freemsg(mp);
1189 /* check if a monitor detected a delegation conflict */
1190 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1191 resp->status = NFS3ERR_JUKEBOX;
1192 goto out1;
1193 }
1194 goto out;
1195 }
1196
1197 /* make mblk using zc buffers */
1198 if (loaned_buffers) {
1199 mp = uio_to_mblk(uiop);
1200 ASSERT(mp != NULL);
1201 }
1202
1203 va.va_mask = AT_ALL;
1204 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1205
1206 if (error)
1207 vap = NULL;
1208 else
1209 vap = &va;
1210
1211 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1212
1213 if (in_crit)
1214 nbl_end_crit(vp);
1215
1216 resp->status = NFS3_OK;
1217 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1218 resp->resok.count = args->count - uiop->uio_resid;
1219 if (!error && offset + resp->resok.count == va.va_size)
1220 resp->resok.eof = TRUE;
1221 else
1222 resp->resok.eof = FALSE;
1223 resp->resok.data.data_len = resp->resok.count;
1224
1225 if (mp)
1226 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1227
1228 resp->resok.data.mp = mp;
1229 resp->resok.size = (uint_t)args->count;
1230
1231 if (rdma_used) {
1232 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1233 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1234 resp->status = NFS3ERR_INVAL;
1235 }
1236 } else {
1237 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1238 (resp->resok).wlist = NULL;
1239 }
1240
1241 done:
1242 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1243 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1244 READ3res *, resp);
1245
1246 VN_RELE(vp);
1247
1248 if (iovp != NULL)
1249 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1250
1251 return;
1252
1253 out:
1254 if (curthread->t_flag & T_WOULDBLOCK) {
1255 curthread->t_flag &= ~T_WOULDBLOCK;
1256 resp->status = NFS3ERR_JUKEBOX;
1257 } else
1258 resp->status = puterrno3(error);
1259 out1:
1260 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1261 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1262 READ3res *, resp);
1263
1264 if (vp != NULL) {
1265 if (need_rwunlock)
1266 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1267 if (in_crit)
1268 nbl_end_crit(vp);
1269 VN_RELE(vp);
1270 }
1271 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1272
1273 if (iovp != NULL)
1274 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1275 }
1276
1277 void
1278 rfs3_read_free(READ3res *resp)
1279 {
1280 mblk_t *mp;
1281
1282 if (resp->status == NFS3_OK) {
1283 mp = resp->resok.data.mp;
1284 if (mp != NULL)
1285 freemsg(mp);
1286 }
1287 }
1288
1289 void *
1290 rfs3_read_getfh(READ3args *args)
1291 {
1292
1293 return (&args->file);
1294 }
1295
1296 #define MAX_IOVECS 12
1297
1298 #ifdef DEBUG
1299 static int rfs3_write_hits = 0;
1300 static int rfs3_write_misses = 0;
1301 #endif
1302
1303 void
1304 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1305 struct svc_req *req, cred_t *cr, bool_t ro)
1306 {
1307 nfs3_srv_t *ns;
1308 int error;
1309 vnode_t *vp;
1310 struct vattr *bvap = NULL;
1311 struct vattr bva;
1312 struct vattr *avap = NULL;
1313 struct vattr ava;
1314 u_offset_t rlimit;
1315 struct uio uio;
1316 struct iovec iov[MAX_IOVECS];
1317 mblk_t *m;
1318 struct iovec *iovp;
1319 int iovcnt;
1320 int ioflag;
1321 cred_t *savecred;
1322 int in_crit = 0;
1323 int rwlock_ret = -1;
1324 caller_context_t ct;
1325
1326 vp = nfs3_fhtovp(&args->file, exi);
1327
1328 DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1329 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1330 WRITE3args *, args);
1331
1332 if (vp == NULL) {
1333 error = ESTALE;
1334 goto err;
1335 }
1336
1337 ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
1338 ns = nfs3_get_srv();
1339
1340 if (is_system_labeled()) {
1341 bslabel_t *clabel = req->rq_label;
1342
1343 ASSERT(clabel != NULL);
1344 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1345 "got client label from request(1)", struct svc_req *, req);
1346
1347 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1348 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1349 exi)) {
1350 resp->status = NFS3ERR_ACCES;
1351 goto err1;
1352 }
1353 }
1354 }
1355
1356 ct.cc_sysid = 0;
1357 ct.cc_pid = 0;
1358 ct.cc_caller_id = nfs3_srv_caller_id;
1359 ct.cc_flags = CC_DONTBLOCK;
1360
1361 /*
1362 * We have to enter the critical region before calling VOP_RWLOCK
1363 * to avoid a deadlock with ufs.
1364 */
1365 if (nbl_need_check(vp)) {
1366 nbl_start_crit(vp, RW_READER);
1367 in_crit = 1;
1368 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1369 NULL)) {
1370 error = EACCES;
1371 goto err;
1372 }
1373 }
1374
1375 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1376
1377 /* check if a monitor detected a delegation conflict */
1378 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1379 resp->status = NFS3ERR_JUKEBOX;
1380 rwlock_ret = -1;
1381 goto err1;
1382 }
1383
1384
1385 bva.va_mask = AT_ALL;
1386 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1387
1388 /*
1389 * If we can't get the attributes, then we can't do the
1390 * right access checking. So, we'll fail the request.
1391 */
1392 if (error)
1393 goto err;
1394
1395 bvap = &bva;
1396 avap = bvap;
1397
1398 if (args->count != args->data.data_len) {
1399 resp->status = NFS3ERR_INVAL;
1400 goto err1;
1401 }
1402
1403 if (rdonly(ro, vp)) {
1404 resp->status = NFS3ERR_ROFS;
1405 goto err1;
1406 }
1407
1408 if (vp->v_type != VREG) {
1409 resp->status = NFS3ERR_INVAL;
1410 goto err1;
1411 }
1412
1413 if (crgetuid(cr) != bva.va_uid &&
1414 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1415 goto err;
1416
1417 if (MANDLOCK(vp, bva.va_mode)) {
1418 resp->status = NFS3ERR_ACCES;
1419 goto err1;
1420 }
1421
1422 if (args->count == 0) {
1423 resp->status = NFS3_OK;
1424 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1425 resp->resok.count = 0;
1426 resp->resok.committed = args->stable;
1427 resp->resok.verf = ns->write3verf;
1428 goto out;
1429 }
1430
1431 if (args->mblk != NULL) {
1432 iovcnt = 0;
1433 for (m = args->mblk; m != NULL; m = m->b_cont)
1434 iovcnt++;
1435 if (iovcnt <= MAX_IOVECS) {
1436 #ifdef DEBUG
1437 rfs3_write_hits++;
1438 #endif
1439 iovp = iov;
1440 } else {
1441 #ifdef DEBUG
1442 rfs3_write_misses++;
1443 #endif
1444 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1445 }
1446 mblk_to_iov(args->mblk, iovcnt, iovp);
1447
1448 } else if (args->rlist != NULL) {
1449 iovcnt = 1;
1450 iovp = iov;
1451 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1452 iovp->iov_len = args->count;
1453 } else {
1454 iovcnt = 1;
1455 iovp = iov;
1456 iovp->iov_base = args->data.data_val;
1457 iovp->iov_len = args->count;
1458 }
1459
1460 uio.uio_iov = iovp;
1461 uio.uio_iovcnt = iovcnt;
1462
1463 uio.uio_segflg = UIO_SYSSPACE;
1464 uio.uio_extflg = UIO_COPY_DEFAULT;
1465 uio.uio_loffset = args->offset;
1466 uio.uio_resid = args->count;
1467 uio.uio_llimit = curproc->p_fsz_ctl;
1468 rlimit = uio.uio_llimit - args->offset;
1469 if (rlimit < (u_offset_t)uio.uio_resid)
1470 uio.uio_resid = (int)rlimit;
1471
1472 if (args->stable == UNSTABLE)
1473 ioflag = 0;
1474 else if (args->stable == FILE_SYNC)
1475 ioflag = FSYNC;
1476 else if (args->stable == DATA_SYNC)
1477 ioflag = FDSYNC;
1478 else {
1479 if (iovp != iov)
1480 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1481 resp->status = NFS3ERR_INVAL;
1482 goto err1;
1483 }
1484
1485 /*
1486 * We're changing creds because VM may fault and we need
1487 * the cred of the current thread to be used if quota
1488 * checking is enabled.
1489 */
1490 savecred = curthread->t_cred;
1491 curthread->t_cred = cr;
1492 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1493 curthread->t_cred = savecred;
1494
1495 if (iovp != iov)
1496 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1497
1498 /* check if a monitor detected a delegation conflict */
1499 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1500 resp->status = NFS3ERR_JUKEBOX;
1501 goto err1;
1502 }
1503
1504 ava.va_mask = AT_ALL;
1505 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1506
1507 if (error)
1508 goto err;
1509
1510 /*
1511 * If we were unable to get the V_WRITELOCK_TRUE, then we
1512 * may not have accurate after attrs, so check if
1513 * we have both attributes, they have a non-zero va_seq, and
1514 * va_seq has changed by exactly one,
1515 * if not, turn off the before attr.
1516 */
1517 if (rwlock_ret != V_WRITELOCK_TRUE) {
1518 if (bvap == NULL || avap == NULL ||
1519 bvap->va_seq == 0 || avap->va_seq == 0 ||
1520 avap->va_seq != (bvap->va_seq + 1)) {
1521 bvap = NULL;
1522 }
1523 }
1524
1525 resp->status = NFS3_OK;
1526 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1527 resp->resok.count = args->count - uio.uio_resid;
1528 resp->resok.committed = args->stable;
1529 resp->resok.verf = ns->write3verf;
1530 goto out;
1531
1532 err:
1533 if (curthread->t_flag & T_WOULDBLOCK) {
1534 curthread->t_flag &= ~T_WOULDBLOCK;
1535 resp->status = NFS3ERR_JUKEBOX;
1536 } else
1537 resp->status = puterrno3(error);
1538 err1:
1539 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1540 out:
1541 DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1542 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1543 WRITE3res *, resp);
1544
1545 if (vp != NULL) {
1546 if (rwlock_ret != -1)
1547 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1548 if (in_crit)
1549 nbl_end_crit(vp);
1550 VN_RELE(vp);
1551 }
1552 }
1553
1554 void *
1555 rfs3_write_getfh(WRITE3args *args)
1556 {
1557
1558 return (&args->file);
1559 }
1560
1561 void
1562 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1563 struct svc_req *req, cred_t *cr, bool_t ro)
1564 {
1565 int error;
1566 int in_crit = 0;
1567 vnode_t *vp;
1568 vnode_t *tvp = NULL;
1569 vnode_t *dvp;
1570 struct vattr *vap;
1571 struct vattr va;
1572 struct vattr *dbvap;
1573 struct vattr dbva;
1574 struct vattr *davap;
1575 struct vattr dava;
1576 enum vcexcl excl;
1577 nfstime3 *mtime;
1578 len_t reqsize;
1579 bool_t trunc;
1580 struct sockaddr *ca;
1581 char *name = NULL;
1582
1583 dbvap = NULL;
1584 davap = NULL;
1585
1586 dvp = nfs3_fhtovp(&args->where.dir, exi);
1587
1588 DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1589 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1590 CREATE3args *, args);
1591
1592 if (dvp == NULL) {
1593 error = ESTALE;
1594 goto out;
1595 }
1596
1597 dbva.va_mask = AT_ALL;
1598 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1599 davap = dbvap;
1600
1601 if (args->where.name == nfs3nametoolong) {
1602 resp->status = NFS3ERR_NAMETOOLONG;
1603 goto out1;
1604 }
1605
1606 if (args->where.name == NULL || *(args->where.name) == '\0') {
1607 resp->status = NFS3ERR_ACCES;
1608 goto out1;
1609 }
1610
1611 if (rdonly(ro, dvp)) {
1612 resp->status = NFS3ERR_ROFS;
1613 goto out1;
1614 }
1615
1616 if (is_system_labeled()) {
1617 bslabel_t *clabel = req->rq_label;
1618
1619 ASSERT(clabel != NULL);
1620 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1621 "got client label from request(1)", struct svc_req *, req);
1622
1623 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1624 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1625 exi)) {
1626 resp->status = NFS3ERR_ACCES;
1627 goto out1;
1628 }
1629 }
1630 }
1631
1632 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1633 name = nfscmd_convname(ca, exi, args->where.name,
1634 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1635
1636 if (name == NULL) {
1637 /* This is really a Solaris EILSEQ */
1638 resp->status = NFS3ERR_INVAL;
1639 goto out1;
1640 }
1641
1642 if (args->how.mode == EXCLUSIVE) {
1643 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1644 va.va_type = VREG;
1645 va.va_mode = (mode_t)0;
1646 /*
1647 * Ensure no time overflows and that types match
1648 */
1649 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1650 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1651 va.va_mtime.tv_nsec = mtime->nseconds;
1652 excl = EXCL;
1653 } else {
1654 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1655 &va);
1656 if (error)
1657 goto out;
1658 va.va_mask |= AT_TYPE;
1659 va.va_type = VREG;
1660 if (args->how.mode == GUARDED)
1661 excl = EXCL;
1662 else {
1663 excl = NONEXCL;
1664
1665 /*
1666 * During creation of file in non-exclusive mode
1667 * if size of file is being set then make sure
1668 * that if the file already exists that no conflicting
1669 * non-blocking mandatory locks exists in the region
1670 * being modified. If there are conflicting locks fail
1671 * the operation with EACCES.
1672 */
1673 if (va.va_mask & AT_SIZE) {
1674 struct vattr tva;
1675
1676 /*
1677 * Does file already exist?
1678 */
1679 error = VOP_LOOKUP(dvp, name, &tvp,
1680 NULL, 0, NULL, cr, NULL, NULL, NULL);
1681
1682 /*
1683 * Check to see if the file has been delegated
1684 * to a v4 client. If so, then begin recall of
1685 * the delegation and return JUKEBOX to allow
1686 * the client to retrasmit its request.
1687 */
1688
1689 trunc = va.va_size == 0;
1690 if (!error &&
1691 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1692 resp->status = NFS3ERR_JUKEBOX;
1693 goto out1;
1694 }
1695
1696 /*
1697 * Check for NBMAND lock conflicts
1698 */
1699 if (!error && nbl_need_check(tvp)) {
1700 u_offset_t offset;
1701 ssize_t len;
1702
1703 nbl_start_crit(tvp, RW_READER);
1704 in_crit = 1;
1705
1706 tva.va_mask = AT_SIZE;
1707 error = VOP_GETATTR(tvp, &tva, 0, cr,
1708 NULL);
1709 /*
1710 * Can't check for conflicts, so return
1711 * error.
1712 */
1713 if (error)
1714 goto out;
1715
1716 offset = tva.va_size < va.va_size ?
1717 tva.va_size : va.va_size;
1718 len = tva.va_size < va.va_size ?
1719 va.va_size - tva.va_size :
1720 tva.va_size - va.va_size;
1721 if (nbl_conflict(tvp, NBL_WRITE,
1722 offset, len, 0, NULL)) {
1723 error = EACCES;
1724 goto out;
1725 }
1726 } else if (tvp) {
1727 VN_RELE(tvp);
1728 tvp = NULL;
1729 }
1730 }
1731 }
1732 if (va.va_mask & AT_SIZE)
1733 reqsize = va.va_size;
1734 }
1735
1736 /*
1737 * Must specify the mode.
1738 */
1739 if (!(va.va_mask & AT_MODE)) {
1740 resp->status = NFS3ERR_INVAL;
1741 goto out1;
1742 }
1743
1744 /*
1745 * If the filesystem is exported with nosuid, then mask off
1746 * the setuid and setgid bits.
1747 */
1748 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1749 va.va_mode &= ~(VSUID | VSGID);
1750
1751 tryagain:
1752 /*
1753 * The file open mode used is VWRITE. If the client needs
1754 * some other semantic, then it should do the access checking
1755 * itself. It would have been nice to have the file open mode
1756 * passed as part of the arguments.
1757 */
1758 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1759 &vp, cr, 0, NULL, NULL);
1760
1761 dava.va_mask = AT_ALL;
1762 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1763
1764 if (error) {
1765 /*
1766 * If we got something other than file already exists
1767 * then just return this error. Otherwise, we got
1768 * EEXIST. If we were doing a GUARDED create, then
1769 * just return this error. Otherwise, we need to
1770 * make sure that this wasn't a duplicate of an
1771 * exclusive create request.
1772 *
1773 * The assumption is made that a non-exclusive create
1774 * request will never return EEXIST.
1775 */
1776 if (error != EEXIST || args->how.mode == GUARDED)
1777 goto out;
1778 /*
1779 * Lookup the file so that we can get a vnode for it.
1780 */
1781 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1782 NULL, cr, NULL, NULL, NULL);
1783 if (error) {
1784 /*
1785 * We couldn't find the file that we thought that
1786 * we just created. So, we'll just try creating
1787 * it again.
1788 */
1789 if (error == ENOENT)
1790 goto tryagain;
1791 goto out;
1792 }
1793
1794 /*
1795 * If the file is delegated to a v4 client, go ahead
1796 * and initiate recall, this create is a hint that a
1797 * conflicting v3 open has occurred.
1798 */
1799
1800 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1801 VN_RELE(vp);
1802 resp->status = NFS3ERR_JUKEBOX;
1803 goto out1;
1804 }
1805
1806 va.va_mask = AT_ALL;
1807 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1808
1809 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1810 /* % with INT32_MAX to prevent overflows */
1811 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1812 vap->va_mtime.tv_sec !=
1813 (mtime->seconds % INT32_MAX) ||
1814 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1815 VN_RELE(vp);
1816 error = EEXIST;
1817 goto out;
1818 }
1819 } else {
1820
1821 if ((args->how.mode == UNCHECKED ||
1822 args->how.mode == GUARDED) &&
1823 args->how.createhow3_u.obj_attributes.size.set_it &&
1824 va.va_size == 0)
1825 trunc = TRUE;
1826 else
1827 trunc = FALSE;
1828
1829 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1830 VN_RELE(vp);
1831 resp->status = NFS3ERR_JUKEBOX;
1832 goto out1;
1833 }
1834
1835 va.va_mask = AT_ALL;
1836 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1837
1838 /*
1839 * We need to check to make sure that the file got
1840 * created to the indicated size. If not, we do a
1841 * setattr to try to change the size, but we don't
1842 * try too hard. This shouldn't a problem as most
1843 * clients will only specifiy a size of zero which
1844 * local file systems handle. However, even if
1845 * the client does specify a non-zero size, it can
1846 * still recover by checking the size of the file
1847 * after it has created it and then issue a setattr
1848 * request of its own to set the size of the file.
1849 */
1850 if (vap != NULL &&
1851 (args->how.mode == UNCHECKED ||
1852 args->how.mode == GUARDED) &&
1853 args->how.createhow3_u.obj_attributes.size.set_it &&
1854 vap->va_size != reqsize) {
1855 va.va_mask = AT_SIZE;
1856 va.va_size = reqsize;
1857 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1858 va.va_mask = AT_ALL;
1859 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1860 }
1861 }
1862
1863 if (name != args->where.name)
1864 kmem_free(name, MAXPATHLEN + 1);
1865
1866 error = makefh3(&resp->resok.obj.handle, vp, exi);
1867 if (error)
1868 resp->resok.obj.handle_follows = FALSE;
1869 else
1870 resp->resok.obj.handle_follows = TRUE;
1871
1872 /*
1873 * Force modified data and metadata out to stable storage.
1874 */
1875 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1876 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1877
1878 VN_RELE(vp);
1879 if (tvp != NULL) {
1880 if (in_crit)
1881 nbl_end_crit(tvp);
1882 VN_RELE(tvp);
1883 }
1884
1885 resp->status = NFS3_OK;
1886 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1887 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1888
1889 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1890 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1891 CREATE3res *, resp);
1892
1893 VN_RELE(dvp);
1894 return;
1895
1896 out:
1897 if (curthread->t_flag & T_WOULDBLOCK) {
1898 curthread->t_flag &= ~T_WOULDBLOCK;
1899 resp->status = NFS3ERR_JUKEBOX;
1900 } else
1901 resp->status = puterrno3(error);
1902 out1:
1903 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1904 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1905 CREATE3res *, resp);
1906
1907 if (name != NULL && name != args->where.name)
1908 kmem_free(name, MAXPATHLEN + 1);
1909
1910 if (tvp != NULL) {
1911 if (in_crit)
1912 nbl_end_crit(tvp);
1913 VN_RELE(tvp);
1914 }
1915 if (dvp != NULL)
1916 VN_RELE(dvp);
1917 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1918 }
1919
1920 void *
1921 rfs3_create_getfh(CREATE3args *args)
1922 {
1923
1924 return (&args->where.dir);
1925 }
1926
1927 void
1928 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1929 struct svc_req *req, cred_t *cr, bool_t ro)
1930 {
1931 int error;
1932 vnode_t *vp = NULL;
1933 vnode_t *dvp;
1934 struct vattr *vap;
1935 struct vattr va;
1936 struct vattr *dbvap;
1937 struct vattr dbva;
1938 struct vattr *davap;
1939 struct vattr dava;
1940 struct sockaddr *ca;
1941 char *name = NULL;
1942
1943 dbvap = NULL;
1944 davap = NULL;
1945
1946 dvp = nfs3_fhtovp(&args->where.dir, exi);
1947
1948 DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1949 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1950 MKDIR3args *, args);
1951
1952 if (dvp == NULL) {
1953 error = ESTALE;
1954 goto out;
1955 }
1956
1957 dbva.va_mask = AT_ALL;
1958 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1959 davap = dbvap;
1960
1961 if (args->where.name == nfs3nametoolong) {
1962 resp->status = NFS3ERR_NAMETOOLONG;
1963 goto out1;
1964 }
1965
1966 if (args->where.name == NULL || *(args->where.name) == '\0') {
1967 resp->status = NFS3ERR_ACCES;
1968 goto out1;
1969 }
1970
1971 if (rdonly(ro, dvp)) {
1972 resp->status = NFS3ERR_ROFS;
1973 goto out1;
1974 }
1975
1976 if (is_system_labeled()) {
1977 bslabel_t *clabel = req->rq_label;
1978
1979 ASSERT(clabel != NULL);
1980 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1981 "got client label from request(1)", struct svc_req *, req);
1982
1983 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1984 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1985 exi)) {
1986 resp->status = NFS3ERR_ACCES;
1987 goto out1;
1988 }
1989 }
1990 }
1991
1992 error = sattr3_to_vattr(&args->attributes, &va);
1993 if (error)
1994 goto out;
1995
1996 if (!(va.va_mask & AT_MODE)) {
1997 resp->status = NFS3ERR_INVAL;
1998 goto out1;
1999 }
2000
2001 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2002 name = nfscmd_convname(ca, exi, args->where.name,
2003 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2004
2005 if (name == NULL) {
2006 resp->status = NFS3ERR_INVAL;
2007 goto out1;
2008 }
2009
2010 va.va_mask |= AT_TYPE;
2011 va.va_type = VDIR;
2012
2013 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2014
2015 if (name != args->where.name)
2016 kmem_free(name, MAXPATHLEN + 1);
2017
2018 dava.va_mask = AT_ALL;
2019 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2020
2021 /*
2022 * Force modified data and metadata out to stable storage.
2023 */
2024 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2025
2026 if (error)
2027 goto out;
2028
2029 error = makefh3(&resp->resok.obj.handle, vp, exi);
2030 if (error)
2031 resp->resok.obj.handle_follows = FALSE;
2032 else
2033 resp->resok.obj.handle_follows = TRUE;
2034
2035 va.va_mask = AT_ALL;
2036 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2037
2038 /*
2039 * Force modified data and metadata out to stable storage.
2040 */
2041 (void) VOP_FSYNC(vp, 0, cr, NULL);
2042
2043 VN_RELE(vp);
2044
2045 resp->status = NFS3_OK;
2046 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2047 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2048
2049 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2050 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2051 MKDIR3res *, resp);
2052 VN_RELE(dvp);
2053
2054 return;
2055
2056 out:
2057 if (curthread->t_flag & T_WOULDBLOCK) {
2058 curthread->t_flag &= ~T_WOULDBLOCK;
2059 resp->status = NFS3ERR_JUKEBOX;
2060 } else
2061 resp->status = puterrno3(error);
2062 out1:
2063 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2064 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2065 MKDIR3res *, resp);
2066 if (dvp != NULL)
2067 VN_RELE(dvp);
2068 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2069 }
2070
2071 void *
2072 rfs3_mkdir_getfh(MKDIR3args *args)
2073 {
2074
2075 return (&args->where.dir);
2076 }
2077
2078 void
2079 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2080 struct svc_req *req, cred_t *cr, bool_t ro)
2081 {
2082 int error;
2083 vnode_t *vp;
2084 vnode_t *dvp;
2085 struct vattr *vap;
2086 struct vattr va;
2087 struct vattr *dbvap;
2088 struct vattr dbva;
2089 struct vattr *davap;
2090 struct vattr dava;
2091 struct sockaddr *ca;
2092 char *name = NULL;
2093 char *symdata = NULL;
2094
2095 dbvap = NULL;
2096 davap = NULL;
2097
2098 dvp = nfs3_fhtovp(&args->where.dir, exi);
2099
2100 DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2101 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2102 SYMLINK3args *, args);
2103
2104 if (dvp == NULL) {
2105 error = ESTALE;
2106 goto err;
2107 }
2108
2109 dbva.va_mask = AT_ALL;
2110 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2111 davap = dbvap;
2112
2113 if (args->where.name == nfs3nametoolong) {
2114 resp->status = NFS3ERR_NAMETOOLONG;
2115 goto err1;
2116 }
2117
2118 if (args->where.name == NULL || *(args->where.name) == '\0') {
2119 resp->status = NFS3ERR_ACCES;
2120 goto err1;
2121 }
2122
2123 if (rdonly(ro, dvp)) {
2124 resp->status = NFS3ERR_ROFS;
2125 goto err1;
2126 }
2127
2128 if (is_system_labeled()) {
2129 bslabel_t *clabel = req->rq_label;
2130
2131 ASSERT(clabel != NULL);
2132 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2133 "got client label from request(1)", struct svc_req *, req);
2134
2135 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2136 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2137 exi)) {
2138 resp->status = NFS3ERR_ACCES;
2139 goto err1;
2140 }
2141 }
2142 }
2143
2144 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2145 if (error)
2146 goto err;
2147
2148 if (!(va.va_mask & AT_MODE)) {
2149 resp->status = NFS3ERR_INVAL;
2150 goto err1;
2151 }
2152
2153 if (args->symlink.symlink_data == nfs3nametoolong) {
2154 resp->status = NFS3ERR_NAMETOOLONG;
2155 goto err1;
2156 }
2157
2158 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2159 name = nfscmd_convname(ca, exi, args->where.name,
2160 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2161
2162 if (name == NULL) {
2163 /* This is really a Solaris EILSEQ */
2164 resp->status = NFS3ERR_INVAL;
2165 goto err1;
2166 }
2167
2168 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2169 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2170 if (symdata == NULL) {
2171 /* This is really a Solaris EILSEQ */
2172 resp->status = NFS3ERR_INVAL;
2173 goto err1;
2174 }
2175
2176
2177 va.va_mask |= AT_TYPE;
2178 va.va_type = VLNK;
2179
2180 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2181
2182 dava.va_mask = AT_ALL;
2183 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2184
2185 if (error)
2186 goto err;
2187
2188 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2189 NULL, NULL, NULL);
2190
2191 /*
2192 * Force modified data and metadata out to stable storage.
2193 */
2194 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2195
2196
2197 resp->status = NFS3_OK;
2198 if (error) {
2199 resp->resok.obj.handle_follows = FALSE;
2200 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2201 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2202 goto out;
2203 }
2204
2205 error = makefh3(&resp->resok.obj.handle, vp, exi);
2206 if (error)
2207 resp->resok.obj.handle_follows = FALSE;
2208 else
2209 resp->resok.obj.handle_follows = TRUE;
2210
2211 va.va_mask = AT_ALL;
2212 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2213
2214 /*
2215 * Force modified data and metadata out to stable storage.
2216 */
2217 (void) VOP_FSYNC(vp, 0, cr, NULL);
2218
2219 VN_RELE(vp);
2220
2221 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2222 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2223 goto out;
2224
2225 err:
2226 if (curthread->t_flag & T_WOULDBLOCK) {
2227 curthread->t_flag &= ~T_WOULDBLOCK;
2228 resp->status = NFS3ERR_JUKEBOX;
2229 } else
2230 resp->status = puterrno3(error);
2231 err1:
2232 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2233 out:
2234 if (name != NULL && name != args->where.name)
2235 kmem_free(name, MAXPATHLEN + 1);
2236 if (symdata != NULL && symdata != args->symlink.symlink_data)
2237 kmem_free(symdata, MAXPATHLEN + 1);
2238
2239 DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2240 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2241 SYMLINK3res *, resp);
2242
2243 if (dvp != NULL)
2244 VN_RELE(dvp);
2245 }
2246
2247 void *
2248 rfs3_symlink_getfh(SYMLINK3args *args)
2249 {
2250
2251 return (&args->where.dir);
2252 }
2253
2254 void
2255 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2256 struct svc_req *req, cred_t *cr, bool_t ro)
2257 {
2258 int error;
2259 vnode_t *vp;
2260 vnode_t *realvp;
2261 vnode_t *dvp;
2262 struct vattr *vap;
2263 struct vattr va;
2264 struct vattr *dbvap;
2265 struct vattr dbva;
2266 struct vattr *davap;
2267 struct vattr dava;
2268 int mode;
2269 enum vcexcl excl;
2270 struct sockaddr *ca;
2271 char *name = NULL;
2272
2273 dbvap = NULL;
2274 davap = NULL;
2275
2276 dvp = nfs3_fhtovp(&args->where.dir, exi);
2277
2278 DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2279 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2280 MKNOD3args *, args);
2281
2282 if (dvp == NULL) {
2283 error = ESTALE;
2284 goto out;
2285 }
2286
2287 dbva.va_mask = AT_ALL;
2288 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2289 davap = dbvap;
2290
2291 if (args->where.name == nfs3nametoolong) {
2292 resp->status = NFS3ERR_NAMETOOLONG;
2293 goto out1;
2294 }
2295
2296 if (args->where.name == NULL || *(args->where.name) == '\0') {
2297 resp->status = NFS3ERR_ACCES;
2298 goto out1;
2299 }
2300
2301 if (rdonly(ro, dvp)) {
2302 resp->status = NFS3ERR_ROFS;
2303 goto out1;
2304 }
2305
2306 if (is_system_labeled()) {
2307 bslabel_t *clabel = req->rq_label;
2308
2309 ASSERT(clabel != NULL);
2310 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2311 "got client label from request(1)", struct svc_req *, req);
2312
2313 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2314 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2315 exi)) {
2316 resp->status = NFS3ERR_ACCES;
2317 goto out1;
2318 }
2319 }
2320 }
2321
2322 switch (args->what.type) {
2323 case NF3CHR:
2324 case NF3BLK:
2325 error = sattr3_to_vattr(
2326 &args->what.mknoddata3_u.device.dev_attributes, &va);
2327 if (error)
2328 goto out;
2329 if (secpolicy_sys_devices(cr) != 0) {
2330 resp->status = NFS3ERR_PERM;
2331 goto out1;
2332 }
2333 if (args->what.type == NF3CHR)
2334 va.va_type = VCHR;
2335 else
2336 va.va_type = VBLK;
2337 va.va_rdev = makedevice(
2338 args->what.mknoddata3_u.device.spec.specdata1,
2339 args->what.mknoddata3_u.device.spec.specdata2);
2340 va.va_mask |= AT_TYPE | AT_RDEV;
2341 break;
2342 case NF3SOCK:
2343 error = sattr3_to_vattr(
2344 &args->what.mknoddata3_u.pipe_attributes, &va);
2345 if (error)
2346 goto out;
2347 va.va_type = VSOCK;
2348 va.va_mask |= AT_TYPE;
2349 break;
2350 case NF3FIFO:
2351 error = sattr3_to_vattr(
2352 &args->what.mknoddata3_u.pipe_attributes, &va);
2353 if (error)
2354 goto out;
2355 va.va_type = VFIFO;
2356 va.va_mask |= AT_TYPE;
2357 break;
2358 default:
2359 resp->status = NFS3ERR_BADTYPE;
2360 goto out1;
2361 }
2362
2363 /*
2364 * Must specify the mode.
2365 */
2366 if (!(va.va_mask & AT_MODE)) {
2367 resp->status = NFS3ERR_INVAL;
2368 goto out1;
2369 }
2370
2371 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2372 name = nfscmd_convname(ca, exi, args->where.name,
2373 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2374
2375 if (name == NULL) {
2376 resp->status = NFS3ERR_INVAL;
2377 goto out1;
2378 }
2379
2380 excl = EXCL;
2381
2382 mode = 0;
2383
2384 error = VOP_CREATE(dvp, name, &va, excl, mode,
2385 &vp, cr, 0, NULL, NULL);
2386
2387 if (name != args->where.name)
2388 kmem_free(name, MAXPATHLEN + 1);
2389
2390 dava.va_mask = AT_ALL;
2391 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2392
2393 /*
2394 * Force modified data and metadata out to stable storage.
2395 */
2396 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2397
2398 if (error)
2399 goto out;
2400
2401 resp->status = NFS3_OK;
2402
2403 error = makefh3(&resp->resok.obj.handle, vp, exi);
2404 if (error)
2405 resp->resok.obj.handle_follows = FALSE;
2406 else
2407 resp->resok.obj.handle_follows = TRUE;
2408
2409 va.va_mask = AT_ALL;
2410 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2411
2412 /*
2413 * Force modified metadata out to stable storage.
2414 *
2415 * if a underlying vp exists, pass it to VOP_FSYNC
2416 */
2417 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2418 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2419 else
2420 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2421
2422 VN_RELE(vp);
2423
2424 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2425 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2426 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2427 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2428 MKNOD3res *, resp);
2429 VN_RELE(dvp);
2430 return;
2431
2432 out:
2433 if (curthread->t_flag & T_WOULDBLOCK) {
2434 curthread->t_flag &= ~T_WOULDBLOCK;
2435 resp->status = NFS3ERR_JUKEBOX;
2436 } else
2437 resp->status = puterrno3(error);
2438 out1:
2439 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2440 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2441 MKNOD3res *, resp);
2442 if (dvp != NULL)
2443 VN_RELE(dvp);
2444 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2445 }
2446
2447 void *
2448 rfs3_mknod_getfh(MKNOD3args *args)
2449 {
2450
2451 return (&args->where.dir);
2452 }
2453
2454 void
2455 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2456 struct svc_req *req, cred_t *cr, bool_t ro)
2457 {
2458 int error = 0;
2459 vnode_t *vp;
2460 struct vattr *bvap;
2461 struct vattr bva;
2462 struct vattr *avap;
2463 struct vattr ava;
2464 vnode_t *targvp = NULL;
2465 struct sockaddr *ca;
2466 char *name = NULL;
2467
2468 bvap = NULL;
2469 avap = NULL;
2470
2471 vp = nfs3_fhtovp(&args->object.dir, exi);
2472
2473 DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2474 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2475 REMOVE3args *, args);
2476
2477 if (vp == NULL) {
2478 error = ESTALE;
2479 goto err;
2480 }
2481
2482 bva.va_mask = AT_ALL;
2483 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2484 avap = bvap;
2485
2486 if (vp->v_type != VDIR) {
2487 resp->status = NFS3ERR_NOTDIR;
2488 goto err1;
2489 }
2490
2491 if (args->object.name == nfs3nametoolong) {
2492 resp->status = NFS3ERR_NAMETOOLONG;
2493 goto err1;
2494 }
2495
2496 if (args->object.name == NULL || *(args->object.name) == '\0') {
2497 resp->status = NFS3ERR_ACCES;
2498 goto err1;
2499 }
2500
2501 if (rdonly(ro, vp)) {
2502 resp->status = NFS3ERR_ROFS;
2503 goto err1;
2504 }
2505
2506 if (is_system_labeled()) {
2507 bslabel_t *clabel = req->rq_label;
2508
2509 ASSERT(clabel != NULL);
2510 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2511 "got client label from request(1)", struct svc_req *, req);
2512
2513 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2514 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2515 exi)) {
2516 resp->status = NFS3ERR_ACCES;
2517 goto err1;
2518 }
2519 }
2520 }
2521
2522 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2523 name = nfscmd_convname(ca, exi, args->object.name,
2524 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2525
2526 if (name == NULL) {
2527 resp->status = NFS3ERR_INVAL;
2528 goto err1;
2529 }
2530
2531 /*
2532 * Check for a conflict with a non-blocking mandatory share
2533 * reservation and V4 delegations
2534 */
2535 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2536 NULL, cr, NULL, NULL, NULL);
2537 if (error != 0)
2538 goto err;
2539
2540 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2541 resp->status = NFS3ERR_JUKEBOX;
2542 goto err1;
2543 }
2544
2545 if (!nbl_need_check(targvp)) {
2546 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2547 } else {
2548 nbl_start_crit(targvp, RW_READER);
2549 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2550 error = EACCES;
2551 } else {
2552 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2553 }
2554 nbl_end_crit(targvp);
2555 }
2556 VN_RELE(targvp);
2557 targvp = NULL;
2558
2559 ava.va_mask = AT_ALL;
2560 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2561
2562 /*
2563 * Force modified data and metadata out to stable storage.
2564 */
2565 (void) VOP_FSYNC(vp, 0, cr, NULL);
2566
2567 if (error)
2568 goto err;
2569
2570 resp->status = NFS3_OK;
2571 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2572 goto out;
2573
2574 err:
2575 if (curthread->t_flag & T_WOULDBLOCK) {
2576 curthread->t_flag &= ~T_WOULDBLOCK;
2577 resp->status = NFS3ERR_JUKEBOX;
2578 } else
2579 resp->status = puterrno3(error);
2580 err1:
2581 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2582 out:
2583 DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2584 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2585 REMOVE3res *, resp);
2586
2587 if (name != NULL && name != args->object.name)
2588 kmem_free(name, MAXPATHLEN + 1);
2589
2590 if (vp != NULL)
2591 VN_RELE(vp);
2592 }
2593
2594 void *
2595 rfs3_remove_getfh(REMOVE3args *args)
2596 {
2597
2598 return (&args->object.dir);
2599 }
2600
2601 void
2602 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2603 struct svc_req *req, cred_t *cr, bool_t ro)
2604 {
2605 int error;
2606 vnode_t *vp;
2607 struct vattr *bvap;
2608 struct vattr bva;
2609 struct vattr *avap;
2610 struct vattr ava;
2611 struct sockaddr *ca;
2612 char *name = NULL;
2613
2614 bvap = NULL;
2615 avap = NULL;
2616
2617 vp = nfs3_fhtovp(&args->object.dir, exi);
2618
2619 DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2620 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2621 RMDIR3args *, args);
2622
2623 if (vp == NULL) {
2624 error = ESTALE;
2625 goto err;
2626 }
2627
2628 bva.va_mask = AT_ALL;
2629 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2630 avap = bvap;
2631
2632 if (vp->v_type != VDIR) {
2633 resp->status = NFS3ERR_NOTDIR;
2634 goto err1;
2635 }
2636
2637 if (args->object.name == nfs3nametoolong) {
2638 resp->status = NFS3ERR_NAMETOOLONG;
2639 goto err1;
2640 }
2641
2642 if (args->object.name == NULL || *(args->object.name) == '\0') {
2643 resp->status = NFS3ERR_ACCES;
2644 goto err1;
2645 }
2646
2647 if (rdonly(ro, vp)) {
2648 resp->status = NFS3ERR_ROFS;
2649 goto err1;
2650 }
2651
2652 if (is_system_labeled()) {
2653 bslabel_t *clabel = req->rq_label;
2654
2655 ASSERT(clabel != NULL);
2656 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2657 "got client label from request(1)", struct svc_req *, req);
2658
2659 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2660 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2661 exi)) {
2662 resp->status = NFS3ERR_ACCES;
2663 goto err1;
2664 }
2665 }
2666 }
2667
2668 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2669 name = nfscmd_convname(ca, exi, args->object.name,
2670 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2671
2672 if (name == NULL) {
2673 resp->status = NFS3ERR_INVAL;
2674 goto err1;
2675 }
2676
2677 error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2678
2679 if (name != args->object.name)
2680 kmem_free(name, MAXPATHLEN + 1);
2681
2682 ava.va_mask = AT_ALL;
2683 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2684
2685 /*
2686 * Force modified data and metadata out to stable storage.
2687 */
2688 (void) VOP_FSYNC(vp, 0, cr, NULL);
2689
2690 if (error) {
2691 /*
2692 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2693 * if the directory is not empty. A System V NFS server
2694 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2695 * over the wire.
2696 */
2697 if (error == EEXIST)
2698 error = ENOTEMPTY;
2699 goto err;
2700 }
2701
2702 resp->status = NFS3_OK;
2703 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2704 goto out;
2705
2706 err:
2707 if (curthread->t_flag & T_WOULDBLOCK) {
2708 curthread->t_flag &= ~T_WOULDBLOCK;
2709 resp->status = NFS3ERR_JUKEBOX;
2710 } else
2711 resp->status = puterrno3(error);
2712 err1:
2713 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2714 out:
2715 DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2716 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2717 RMDIR3res *, resp);
2718 if (vp != NULL)
2719 VN_RELE(vp);
2720
2721 }
2722
2723 void *
2724 rfs3_rmdir_getfh(RMDIR3args *args)
2725 {
2726
2727 return (&args->object.dir);
2728 }
2729
2730 void
2731 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2732 struct svc_req *req, cred_t *cr, bool_t ro)
2733 {
2734 int error = 0;
2735 vnode_t *fvp;
2736 vnode_t *tvp;
2737 vnode_t *targvp;
2738 struct vattr *fbvap;
2739 struct vattr fbva;
2740 struct vattr *favap;
2741 struct vattr fava;
2742 struct vattr *tbvap;
2743 struct vattr tbva;
2744 struct vattr *tavap;
2745 struct vattr tava;
2746 nfs_fh3 *fh3;
2747 struct exportinfo *to_exi;
2748 vnode_t *srcvp = NULL;
2749 bslabel_t *clabel;
2750 struct sockaddr *ca;
2751 char *name = NULL;
2752 char *toname = NULL;
2753
2754 fbvap = NULL;
2755 favap = NULL;
2756 tbvap = NULL;
2757 tavap = NULL;
2758 tvp = NULL;
2759
2760 fvp = nfs3_fhtovp(&args->from.dir, exi);
2761
2762 DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2763 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2764 RENAME3args *, args);
2765
2766 if (fvp == NULL) {
2767 error = ESTALE;
2768 goto err;
2769 }
2770
2771 if (is_system_labeled()) {
2772 clabel = req->rq_label;
2773 ASSERT(clabel != NULL);
2774 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2775 "got client label from request(1)", struct svc_req *, req);
2776
2777 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2778 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2779 exi)) {
2780 resp->status = NFS3ERR_ACCES;
2781 goto err1;
2782 }
2783 }
2784 }
2785
2786 fbva.va_mask = AT_ALL;
2787 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2788 favap = fbvap;
2789
2790 fh3 = &args->to.dir;
2791 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2792 if (to_exi == NULL) {
2793 resp->status = NFS3ERR_ACCES;
2794 goto err1;
2795 }
2796 exi_rele(to_exi);
2797
2798 if (to_exi != exi) {
2799 resp->status = NFS3ERR_XDEV;
2800 goto err1;
2801 }
2802
2803 tvp = nfs3_fhtovp(&args->to.dir, exi);
2804 if (tvp == NULL) {
2805 error = ESTALE;
2806 goto err;
2807 }
2808
2809 tbva.va_mask = AT_ALL;
2810 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2811 tavap = tbvap;
2812
2813 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2814 resp->status = NFS3ERR_NOTDIR;
2815 goto err1;
2816 }
2817
2818 if (args->from.name == nfs3nametoolong ||
2819 args->to.name == nfs3nametoolong) {
2820 resp->status = NFS3ERR_NAMETOOLONG;
2821 goto err1;
2822 }
2823 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2824 args->to.name == NULL || *(args->to.name) == '\0') {
2825 resp->status = NFS3ERR_ACCES;
2826 goto err1;
2827 }
2828
2829 if (rdonly(ro, tvp)) {
2830 resp->status = NFS3ERR_ROFS;
2831 goto err1;
2832 }
2833
2834 if (is_system_labeled()) {
2835 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2836 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2837 exi)) {
2838 resp->status = NFS3ERR_ACCES;
2839 goto err1;
2840 }
2841 }
2842 }
2843
2844 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2845 name = nfscmd_convname(ca, exi, args->from.name,
2846 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2847
2848 if (name == NULL) {
2849 resp->status = NFS3ERR_INVAL;
2850 goto err1;
2851 }
2852
2853 toname = nfscmd_convname(ca, exi, args->to.name,
2854 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2855
2856 if (toname == NULL) {
2857 resp->status = NFS3ERR_INVAL;
2858 goto err1;
2859 }
2860
2861 /*
2862 * Check for a conflict with a non-blocking mandatory share
2863 * reservation or V4 delegations.
2864 */
2865 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2866 NULL, cr, NULL, NULL, NULL);
2867 if (error != 0)
2868 goto err;
2869
2870 /*
2871 * If we rename a delegated file we should recall the
2872 * delegation, since future opens should fail or would
2873 * refer to a new file.
2874 */
2875 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2876 resp->status = NFS3ERR_JUKEBOX;
2877 goto err1;
2878 }
2879
2880 /*
2881 * Check for renaming over a delegated file. Check nfs4_deleg_policy
2882 * first to avoid VOP_LOOKUP if possible.
2883 */
2884 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2885 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2886 NULL, NULL, NULL) == 0) {
2887
2888 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2889 VN_RELE(targvp);
2890 resp->status = NFS3ERR_JUKEBOX;
2891 goto err1;
2892 }
2893 VN_RELE(targvp);
2894 }
2895
2896 if (!nbl_need_check(srcvp)) {
2897 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2898 } else {
2899 nbl_start_crit(srcvp, RW_READER);
2900 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2901 error = EACCES;
2902 else
2903 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2904 nbl_end_crit(srcvp);
2905 }
2906 if (error == 0)
2907 vn_renamepath(tvp, srcvp, args->to.name,
2908 strlen(args->to.name));
2909 VN_RELE(srcvp);
2910 srcvp = NULL;
2911
2912 fava.va_mask = AT_ALL;
2913 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2914 tava.va_mask = AT_ALL;
2915 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2916
2917 /*
2918 * Force modified data and metadata out to stable storage.
2919 */
2920 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2921 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2922
2923 if (error)
2924 goto err;
2925
2926 resp->status = NFS3_OK;
2927 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2928 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2929 goto out;
2930
2931 err:
2932 if (curthread->t_flag & T_WOULDBLOCK) {
2933 curthread->t_flag &= ~T_WOULDBLOCK;
2934 resp->status = NFS3ERR_JUKEBOX;
2935 } else {
2936 resp->status = puterrno3(error);
2937 }
2938 err1:
2939 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2940 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2941
2942 out:
2943 if (name != NULL && name != args->from.name)
2944 kmem_free(name, MAXPATHLEN + 1);
2945 if (toname != NULL && toname != args->to.name)
2946 kmem_free(toname, MAXPATHLEN + 1);
2947
2948 DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2949 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2950 RENAME3res *, resp);
2951 if (fvp != NULL)
2952 VN_RELE(fvp);
2953 if (tvp != NULL)
2954 VN_RELE(tvp);
2955 }
2956
2957 void *
2958 rfs3_rename_getfh(RENAME3args *args)
2959 {
2960
2961 return (&args->from.dir);
2962 }
2963
2964 void
2965 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2966 struct svc_req *req, cred_t *cr, bool_t ro)
2967 {
2968 int error;
2969 vnode_t *vp;
2970 vnode_t *dvp;
2971 struct vattr *vap;
2972 struct vattr va;
2973 struct vattr *bvap;
2974 struct vattr bva;
2975 struct vattr *avap;
2976 struct vattr ava;
2977 nfs_fh3 *fh3;
2978 struct exportinfo *to_exi;
2979 bslabel_t *clabel;
2980 struct sockaddr *ca;
2981 char *name = NULL;
2982
2983 vap = NULL;
2984 bvap = NULL;
2985 avap = NULL;
2986 dvp = NULL;
2987
2988 vp = nfs3_fhtovp(&args->file, exi);
2989
2990 DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2991 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2992 LINK3args *, args);
2993
2994 if (vp == NULL) {
2995 error = ESTALE;
2996 goto out;
2997 }
2998
2999 va.va_mask = AT_ALL;
3000 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3001
3002 fh3 = &args->link.dir;
3003 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3004 if (to_exi == NULL) {
3005 resp->status = NFS3ERR_ACCES;
3006 goto out1;
3007 }
3008 exi_rele(to_exi);
3009
3010 if (to_exi != exi) {
3011 resp->status = NFS3ERR_XDEV;
3012 goto out1;
3013 }
3014
3015 if (is_system_labeled()) {
3016 clabel = req->rq_label;
3017
3018 ASSERT(clabel != NULL);
3019 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3020 "got client label from request(1)", struct svc_req *, req);
3021
3022 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3023 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3024 exi)) {
3025 resp->status = NFS3ERR_ACCES;
3026 goto out1;
3027 }
3028 }
3029 }
3030
3031 dvp = nfs3_fhtovp(&args->link.dir, exi);
3032 if (dvp == NULL) {
3033 error = ESTALE;
3034 goto out;
3035 }
3036
3037 bva.va_mask = AT_ALL;
3038 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3039
3040 if (dvp->v_type != VDIR) {
3041 resp->status = NFS3ERR_NOTDIR;
3042 goto out1;
3043 }
3044
3045 if (args->link.name == nfs3nametoolong) {
3046 resp->status = NFS3ERR_NAMETOOLONG;
3047 goto out1;
3048 }
3049
3050 if (args->link.name == NULL || *(args->link.name) == '\0') {
3051 resp->status = NFS3ERR_ACCES;
3052 goto out1;
3053 }
3054
3055 if (rdonly(ro, dvp)) {
3056 resp->status = NFS3ERR_ROFS;
3057 goto out1;
3058 }
3059
3060 if (is_system_labeled()) {
3061 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3062 "got client label from request(1)", struct svc_req *, req);
3063
3064 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3065 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3066 exi)) {
3067 resp->status = NFS3ERR_ACCES;
3068 goto out1;
3069 }
3070 }
3071 }
3072
3073 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3074 name = nfscmd_convname(ca, exi, args->link.name,
3075 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3076
3077 if (name == NULL) {
3078 resp->status = NFS3ERR_SERVERFAULT;
3079 goto out1;
3080 }
3081
3082 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3083
3084 va.va_mask = AT_ALL;
3085 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3086 ava.va_mask = AT_ALL;
3087 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3088
3089 /*
3090 * Force modified data and metadata out to stable storage.
3091 */
3092 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3093 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3094
3095 if (error)
3096 goto out;
3097
3098 VN_RELE(dvp);
3099
3100 resp->status = NFS3_OK;
3101 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3102 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3103
3104 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3105 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3106 LINK3res *, resp);
3107
3108 VN_RELE(vp);
3109
3110 return;
3111
3112 out:
3113 if (curthread->t_flag & T_WOULDBLOCK) {
3114 curthread->t_flag &= ~T_WOULDBLOCK;
3115 resp->status = NFS3ERR_JUKEBOX;
3116 } else
3117 resp->status = puterrno3(error);
3118 out1:
3119 if (name != NULL && name != args->link.name)
3120 kmem_free(name, MAXPATHLEN + 1);
3121
3122 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3123 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3124 LINK3res *, resp);
3125
3126 if (vp != NULL)
3127 VN_RELE(vp);
3128 if (dvp != NULL)
3129 VN_RELE(dvp);
3130 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3131 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3132 }
3133
3134 void *
3135 rfs3_link_getfh(LINK3args *args)
3136 {
3137
3138 return (&args->file);
3139 }
3140
3141 /*
3142 * This macro defines the size of a response which contains attribute
3143 * information and one directory entry (whose length is specified by
3144 * the macro parameter). If the incoming request is larger than this,
3145 * then we are guaranteed to be able to return at one directory entry
3146 * if one exists. Therefore, we do not need to check for
3147 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
3148 * is not, then we need to check to make sure that this error does not
3149 * need to be returned.
3150 *
3151 * NFS3_READDIR_MIN_COUNT is comprised of following :
3152 *
3153 * status - 1 * BYTES_PER_XDR_UNIT
3154 * attr. flag - 1 * BYTES_PER_XDR_UNIT
3155 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3156 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3157 * boolean - 1 * BYTES_PER_XDR_UNIT
3158 * file id - 2 * BYTES_PER_XDR_UNIT
3159 * directory name length - 1 * BYTES_PER_XDR_UNIT
3160 * cookie - 2 * BYTES_PER_XDR_UNIT
3161 * end of list - 1 * BYTES_PER_XDR_UNIT
3162 * end of file - 1 * BYTES_PER_XDR_UNIT
3163 * Name length of directory to the nearest byte
3164 */
3165
3166 #define NFS3_READDIR_MIN_COUNT(length) \
3167 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3168 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3169
3170 /* ARGSUSED */
3171 void
3172 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3173 struct svc_req *req, cred_t *cr, bool_t ro)
3174 {
3175 int error;
3176 vnode_t *vp;
3177 struct vattr *vap;
3178 struct vattr va;
3179 struct iovec iov;
3180 struct uio uio;
3181 char *data;
3182 int iseof;
3183 int bufsize;
3184 int namlen;
3185 uint_t count;
3186 struct sockaddr *ca;
3187
3188 vap = NULL;
3189
3190 vp = nfs3_fhtovp(&args->dir, exi);
3191
3192 DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3193 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3194 READDIR3args *, args);
3195
3196 if (vp == NULL) {
3197 error = ESTALE;
3198 goto out;
3199 }
3200
3201 if (is_system_labeled()) {
3202 bslabel_t *clabel = req->rq_label;
3203
3204 ASSERT(clabel != NULL);
3205 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3206 "got client label from request(1)", struct svc_req *, req);
3207
3208 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3209 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3210 exi)) {
3211 resp->status = NFS3ERR_ACCES;
3212 goto out1;
3213 }
3214 }
3215 }
3216
3217 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3218
3219 va.va_mask = AT_ALL;
3220 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3221
3222 if (vp->v_type != VDIR) {
3223 resp->status = NFS3ERR_NOTDIR;
3224 goto out1;
3225 }
3226
3227 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3228 if (error)
3229 goto out;
3230
3231 /*
3232 * Now don't allow arbitrary count to alloc;
3233 * allow the maximum not to exceed rfs3_tsize()
3234 */
3235 if (args->count > rfs3_tsize(req))
3236 args->count = rfs3_tsize(req);
3237
3238 /*
3239 * Make sure that there is room to read at least one entry
3240 * if any are available.
3241 */
3242 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3243 count = DIRENT64_RECLEN(MAXNAMELEN);
3244 else
3245 count = args->count;
3246
3247 data = kmem_alloc(count, KM_SLEEP);
3248
3249 iov.iov_base = data;
3250 iov.iov_len = count;
3251 uio.uio_iov = &iov;
3252 uio.uio_iovcnt = 1;
3253 uio.uio_segflg = UIO_SYSSPACE;
3254 uio.uio_extflg = UIO_COPY_CACHED;
3255 uio.uio_loffset = (offset_t)args->cookie;
3256 uio.uio_resid = count;
3257
3258 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3259
3260 va.va_mask = AT_ALL;
3261 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3262
3263 if (error) {
3264 kmem_free(data, count);
3265 goto out;
3266 }
3267
3268 /*
3269 * If the count was not large enough to be able to guarantee
3270 * to be able to return at least one entry, then need to
3271 * check to see if NFS3ERR_TOOSMALL should be returned.
3272 */
3273 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3274 /*
3275 * bufsize is used to keep track of the size of the response.
3276 * It is primed with:
3277 * 1 for the status +
3278 * 1 for the dir_attributes.attributes boolean +
3279 * 2 for the cookie verifier
3280 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3281 * to bytes. If there are directory attributes to be
3282 * returned, then:
3283 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3284 * time BYTES_PER_XDR_UNIT is added to account for them.
3285 */
3286 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3287 if (vap != NULL)
3288 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3289 /*
3290 * An entry is composed of:
3291 * 1 for the true/false list indicator +
3292 * 2 for the fileid +
3293 * 1 for the length of the name +
3294 * 2 for the cookie +
3295 * all times BYTES_PER_XDR_UNIT to convert from
3296 * XDR units to bytes, plus the length of the name
3297 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3298 */
3299 if (count != uio.uio_resid) {
3300 namlen = strlen(((struct dirent64 *)data)->d_name);
3301 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3302 roundup(namlen, BYTES_PER_XDR_UNIT);
3303 }
3304 /*
3305 * We need to check to see if the number of bytes left
3306 * to go into the buffer will actually fit into the
3307 * buffer. This is calculated as the size of this
3308 * entry plus:
3309 * 1 for the true/false list indicator +
3310 * 1 for the eof indicator
3311 * times BYTES_PER_XDR_UNIT to convert from from
3312 * XDR units to bytes.
3313 */
3314 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3315 if (bufsize > args->count) {
3316 kmem_free(data, count);
3317 resp->status = NFS3ERR_TOOSMALL;
3318 goto out1;
3319 }
3320 }
3321
3322 /*
3323 * Have a valid readir buffer for the native character
3324 * set. Need to check if a conversion is necessary and
3325 * potentially rewrite the whole buffer. Note that if the
3326 * conversion expands names enough, the structure may not
3327 * fit. In this case, we need to drop entries until if fits
3328 * and patch the counts in order that the next readdir will
3329 * get the correct entries.
3330 */
3331 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3332 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3333
3334
3335 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3336
3337 #if 0 /* notyet */
3338 /*
3339 * Don't do this. It causes local disk writes when just
3340 * reading the file and the overhead is deemed larger
3341 * than the benefit.
3342 */
3343 /*
3344 * Force modified metadata out to stable storage.
3345 */
3346 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3347 #endif
3348
3349 resp->status = NFS3_OK;
3350 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3351 resp->resok.cookieverf = 0;
3352 resp->resok.reply.entries = (entry3 *)data;
3353 resp->resok.reply.eof = iseof;
3354 resp->resok.size = count - uio.uio_resid;
3355 resp->resok.count = args->count;
3356 resp->resok.freecount = count;
3357
3358 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3359 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3360 READDIR3res *, resp);
3361
3362 VN_RELE(vp);
3363
3364 return;
3365
3366 out:
3367 if (curthread->t_flag & T_WOULDBLOCK) {
3368 curthread->t_flag &= ~T_WOULDBLOCK;
3369 resp->status = NFS3ERR_JUKEBOX;
3370 } else
3371 resp->status = puterrno3(error);
3372 out1:
3373 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3374
3375 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3376 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3377 READDIR3res *, resp);
3378
3379 if (vp != NULL) {
3380 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3381 VN_RELE(vp);
3382 }
3383 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3384 }
3385
3386 void *
3387 rfs3_readdir_getfh(READDIR3args *args)
3388 {
3389
3390 return (&args->dir);
3391 }
3392
3393 void
3394 rfs3_readdir_free(READDIR3res *resp)
3395 {
3396
3397 if (resp->status == NFS3_OK)
3398 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3399 }
3400
3401 #ifdef nextdp
3402 #undef nextdp
3403 #endif
3404 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3405
3406 /*
3407 * This macro computes the size of a response which contains
3408 * one directory entry including the attributes as well as file handle.
3409 * If the incoming request is larger than this, then we are guaranteed to be
3410 * able to return at least one more directory entry if one exists.
3411 *
3412 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3413 *
3414 * boolean - 1 * BYTES_PER_XDR_UNIT
3415 * file id - 2 * BYTES_PER_XDR_UNIT
3416 * directory name length - 1 * BYTES_PER_XDR_UNIT
3417 * cookie - 2 * BYTES_PER_XDR_UNIT
3418 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3419 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3420 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3421 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3422 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3423 * name length of the entry to the nearest bytes
3424 */
3425 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3426 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3427 BYTES_PER_XDR_UNIT + \
3428 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3429
3430 static int rfs3_readdir_unit = MAXBSIZE;
3431
3432 /* ARGSUSED */
3433 void
3434 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3435 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3436 {
3437 int error;
3438 vnode_t *vp;
3439 struct vattr *vap;
3440 struct vattr va;
3441 struct iovec iov;
3442 struct uio uio;
3443 char *data;
3444 int iseof;
3445 struct dirent64 *dp;
3446 vnode_t *nvp;
3447 struct vattr *nvap;
3448 struct vattr nva;
3449 entryplus3_info *infop = NULL;
3450 int size = 0;
3451 int nents = 0;
3452 int bufsize = 0;
3453 int entrysize = 0;
3454 int tofit = 0;
3455 int rd_unit = rfs3_readdir_unit;
3456 int prev_len;
3457 int space_left;
3458 int i;
3459 uint_t *namlen = NULL;
3460 char *ndata = NULL;
3461 struct sockaddr *ca;
3462 size_t ret;
3463
3464 vap = NULL;
3465
3466 vp = nfs3_fhtovp(&args->dir, exi);
3467
3468 DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3469 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3470 READDIRPLUS3args *, args);
3471
3472 if (vp == NULL) {
3473 error = ESTALE;
3474 goto out;
3475 }
3476
3477 if (is_system_labeled()) {
3478 bslabel_t *clabel = req->rq_label;
3479
3480 ASSERT(clabel != NULL);
3481 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3482 char *, "got client label from request(1)",
3483 struct svc_req *, req);
3484
3485 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3486 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3487 exi)) {
3488 resp->status = NFS3ERR_ACCES;
3489 goto out1;
3490 }
3491 }
3492 }
3493
3494 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3495
3496 va.va_mask = AT_ALL;
3497 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3498
3499 if (vp->v_type != VDIR) {
3500 error = ENOTDIR;
3501 goto out;
3502 }
3503
3504 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3505 if (error)
3506 goto out;
3507
3508 /*
3509 * Don't allow arbitrary counts for allocation
3510 */
3511 if (args->maxcount > rfs3_tsize(req))
3512 args->maxcount = rfs3_tsize(req);
3513
3514 /*
3515 * Make sure that there is room to read at least one entry
3516 * if any are available
3517 */
3518 args->dircount = MIN(args->dircount, args->maxcount);
3519
3520 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3521 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3522
3523 /*
3524 * This allocation relies on a minimum directory entry
3525 * being roughly 24 bytes. Therefore, the namlen array
3526 * will have enough space based on the maximum number of
3527 * entries to read.
3528 */
3529 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3530
3531 space_left = args->dircount;
3532 data = kmem_alloc(args->dircount, KM_SLEEP);
3533 dp = (struct dirent64 *)data;
3534 uio.uio_iov = &iov;
3535 uio.uio_iovcnt = 1;
3536 uio.uio_segflg = UIO_SYSSPACE;
3537 uio.uio_extflg = UIO_COPY_CACHED;
3538 uio.uio_loffset = (offset_t)args->cookie;
3539
3540 /*
3541 * bufsize is used to keep track of the size of the response as we
3542 * get post op attributes and filehandles for each entry. This is
3543 * an optimization as the server may have read more entries than will
3544 * fit in the buffer specified by maxcount. We stop calculating
3545 * post op attributes and filehandles once we have exceeded maxcount.
3546 * This will minimize the effect of truncation.
3547 *
3548 * It is primed with:
3549 * 1 for the status +
3550 * 1 for the dir_attributes.attributes boolean +
3551 * 2 for the cookie verifier
3552 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3553 * to bytes. If there are directory attributes to be
3554 * returned, then:
3555 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3556 * time BYTES_PER_XDR_UNIT is added to account for them.
3557 */
3558 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3559 if (vap != NULL)
3560 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3561
3562 getmoredents:
3563 /*
3564 * Here we make a check so that our read unit is not larger than
3565 * the space left in the buffer.
3566 */
3567 rd_unit = MIN(rd_unit, space_left);
3568 iov.iov_base = (char *)dp;
3569 iov.iov_len = rd_unit;
3570 uio.uio_resid = rd_unit;
3571 prev_len = rd_unit;
3572
3573 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3574
3575 if (error) {
3576 kmem_free(data, args->dircount);
3577 goto out;
3578 }
3579
3580 if (uio.uio_resid == prev_len && !iseof) {
3581 if (nents == 0) {
3582 kmem_free(data, args->dircount);
3583 resp->status = NFS3ERR_TOOSMALL;
3584 goto out1;
3585 }
3586
3587 /*
3588 * We could not get any more entries, so get the attributes
3589 * and filehandle for the entries already obtained.
3590 */
3591 goto good;
3592 }
3593
3594 /*
3595 * We estimate the size of the response by assuming the
3596 * entry exists and attributes and filehandle are also valid
3597 */
3598 for (size = prev_len - uio.uio_resid;
3599 size > 0;
3600 size -= dp->d_reclen, dp = nextdp(dp)) {
3601
3602 if (dp->d_ino == 0) {
3603 nents++;
3604 continue;
3605 }
3606
3607 namlen[nents] = strlen(dp->d_name);
3608 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3609
3610 /*
3611 * We need to check to see if the number of bytes left
3612 * to go into the buffer will actually fit into the
3613 * buffer. This is calculated as the size of this
3614 * entry plus:
3615 * 1 for the true/false list indicator +
3616 * 1 for the eof indicator
3617 * times BYTES_PER_XDR_UNIT to convert from XDR units
3618 * to bytes.
3619 *
3620 * Also check the dircount limit against the first entry read
3621 *
3622 */
3623 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3624 if (bufsize + tofit > args->maxcount) {
3625 /*
3626 * We make a check here to see if this was the
3627 * first entry being measured. If so, then maxcount
3628 * was too small to begin with and so we need to
3629 * return with NFS3ERR_TOOSMALL.
3630 */
3631 if (nents == 0) {
3632 kmem_free(data, args->dircount);
3633 resp->status = NFS3ERR_TOOSMALL;
3634 goto out1;
3635 }
3636 iseof = FALSE;
3637 goto good;
3638 }
3639 bufsize += entrysize;
3640 nents++;
3641 }
3642
3643 /*
3644 * If there is enough room to fit at least 1 more entry including
3645 * post op attributes and filehandle in the buffer AND that we haven't
3646 * exceeded dircount then go back and get some more.
3647 */
3648 if (!iseof &&
3649 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3650 space_left -= (prev_len - uio.uio_resid);
3651 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3652 goto getmoredents;
3653
3654 /* else, fall through */
3655 }
3656 good:
3657 va.va_mask = AT_ALL;
3658 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3659
3660 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3661
3662 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3663 resp->resok.infop = infop;
3664
3665 dp = (struct dirent64 *)data;
3666 for (i = 0; i < nents; i++) {
3667
3668 if (dp->d_ino == 0) {
3669 infop[i].attr.attributes = FALSE;
3670 infop[i].fh.handle_follows = FALSE;
3671 dp = nextdp(dp);
3672 continue;
3673 }
3674
3675 infop[i].namelen = namlen[i];
3676
3677 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3678 NULL, NULL, NULL);
3679 if (error) {
3680 infop[i].attr.attributes = FALSE;
3681 infop[i].fh.handle_follows = FALSE;
3682 dp = nextdp(dp);
3683 continue;
3684 }
3685
3686 nva.va_mask = AT_ALL;
3687 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3688
3689 /* Lie about the object type for a referral */
3690 if (vn_is_nfs_reparse(nvp, cr))
3691 nvap->va_type = VLNK;
3692
3693 if (vn_ismntpt(nvp)) {
3694 infop[i].attr.attributes = FALSE;
3695 infop[i].fh.handle_follows = FALSE;
3696 } else {
3697 vattr_to_post_op_attr(nvap, &infop[i].attr);
3698
3699 error = makefh3(&infop[i].fh.handle, nvp, exi);
3700 if (!error)
3701 infop[i].fh.handle_follows = TRUE;
3702 else
3703 infop[i].fh.handle_follows = FALSE;
3704 }
3705
3706 VN_RELE(nvp);
3707 dp = nextdp(dp);
3708 }
3709
3710 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3711 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3712 if (ndata == NULL)
3713 ndata = data;
3714
3715 if (ret > 0) {
3716 /*
3717 * We had to drop one or more entries in order to fit
3718 * during the character conversion. We need to patch
3719 * up the size and eof info.
3720 */
3721 if (iseof)
3722 iseof = FALSE;
3723
3724 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3725 nents, ret);
3726 }
3727
3728
3729 #if 0 /* notyet */
3730 /*
3731 * Don't do this. It causes local disk writes when just
3732 * reading the file and the overhead is deemed larger
3733 * than the benefit.
3734 */
3735 /*
3736 * Force modified metadata out to stable storage.
3737 */
3738 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3739 #endif
3740
3741 kmem_free(namlen, args->dircount);
3742
3743 resp->status = NFS3_OK;
3744 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3745 resp->resok.cookieverf = 0;
3746 resp->resok.reply.entries = (entryplus3 *)ndata;
3747 resp->resok.reply.eof = iseof;
3748 resp->resok.size = nents;
3749 resp->resok.count = args->dircount - ret;
3750 resp->resok.maxcount = args->maxcount;
3751
3752 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3753 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3754 READDIRPLUS3res *, resp);
3755
3756 VN_RELE(vp);
3757
3758 return;
3759
3760 out:
3761 if (curthread->t_flag & T_WOULDBLOCK) {
3762 curthread->t_flag &= ~T_WOULDBLOCK;
3763 resp->status = NFS3ERR_JUKEBOX;
3764 } else {
3765 resp->status = puterrno3(error);
3766 }
3767 out1:
3768 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3769
3770 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3771 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3772 READDIRPLUS3res *, resp);
3773
3774 if (vp != NULL) {
3775 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3776 VN_RELE(vp);
3777 }
3778
3779 if (namlen != NULL)
3780 kmem_free(namlen, args->dircount);
3781
3782 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3783 }
3784
3785 void *
3786 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3787 {
3788
3789 return (&args->dir);
3790 }
3791
3792 void
3793 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3794 {
3795
3796 if (resp->status == NFS3_OK) {
3797 kmem_free(resp->resok.reply.entries, resp->resok.count);
3798 kmem_free(resp->resok.infop,
3799 resp->resok.size * sizeof (struct entryplus3_info));
3800 }
3801 }
3802
3803 /* ARGSUSED */
3804 void
3805 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3806 struct svc_req *req, cred_t *cr, bool_t ro)
3807 {
3808 int error;
3809 vnode_t *vp;
3810 struct vattr *vap;
3811 struct vattr va;
3812 struct statvfs64 sb;
3813
3814 vap = NULL;
3815
3816 vp = nfs3_fhtovp(&args->fsroot, exi);
3817
3818 DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3819 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3820 FSSTAT3args *, args);
3821
3822 if (vp == NULL) {
3823 error = ESTALE;
3824 goto out;
3825 }
3826
3827 if (is_system_labeled()) {
3828 bslabel_t *clabel = req->rq_label;
3829
3830 ASSERT(clabel != NULL);
3831 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3832 "got client label from request(1)", struct svc_req *, req);
3833
3834 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3835 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3836 exi)) {
3837 resp->status = NFS3ERR_ACCES;
3838 goto out1;
3839 }
3840 }
3841 }
3842
3843 error = VFS_STATVFS(vp->v_vfsp, &sb);
3844
3845 va.va_mask = AT_ALL;
3846 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3847
3848 if (error)
3849 goto out;
3850
3851 resp->status = NFS3_OK;
3852 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3853 if (sb.f_blocks != (fsblkcnt64_t)-1)
3854 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3855 else
3856 resp->resok.tbytes = (size3)sb.f_blocks;
3857 if (sb.f_bfree != (fsblkcnt64_t)-1)
3858 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3859 else
3860 resp->resok.fbytes = (size3)sb.f_bfree;
3861 if (sb.f_bavail != (fsblkcnt64_t)-1)
3862 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3863 else
3864 resp->resok.abytes = (size3)sb.f_bavail;
3865 resp->resok.tfiles = (size3)sb.f_files;
3866 resp->resok.ffiles = (size3)sb.f_ffree;
3867 resp->resok.afiles = (size3)sb.f_favail;
3868 resp->resok.invarsec = 0;
3869
3870 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3871 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3872 FSSTAT3res *, resp);
3873 VN_RELE(vp);
3874
3875 return;
3876
3877 out:
3878 if (curthread->t_flag & T_WOULDBLOCK) {
3879 curthread->t_flag &= ~T_WOULDBLOCK;
3880 resp->status = NFS3ERR_JUKEBOX;
3881 } else
3882 resp->status = puterrno3(error);
3883 out1:
3884 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3885 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3886 FSSTAT3res *, resp);
3887
3888 if (vp != NULL)
3889 VN_RELE(vp);
3890 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3891 }
3892
3893 void *
3894 rfs3_fsstat_getfh(FSSTAT3args *args)
3895 {
3896
3897 return (&args->fsroot);
3898 }
3899
3900 /* ARGSUSED */
3901 void
3902 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3903 struct svc_req *req, cred_t *cr, bool_t ro)
3904 {
3905 vnode_t *vp;
3906 struct vattr *vap;
3907 struct vattr va;
3908 uint32_t xfer_size;
3909 ulong_t l = 0;
3910 int error;
3911
3912 vp = nfs3_fhtovp(&args->fsroot, exi);
3913
3914 DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3915 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3916 FSINFO3args *, args);
3917
3918 if (vp == NULL) {
3919 if (curthread->t_flag & T_WOULDBLOCK) {
3920 curthread->t_flag &= ~T_WOULDBLOCK;
3921 resp->status = NFS3ERR_JUKEBOX;
3922 } else
3923 resp->status = NFS3ERR_STALE;
3924 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3925 goto out;
3926 }
3927
3928 if (is_system_labeled()) {
3929 bslabel_t *clabel = req->rq_label;
3930
3931 ASSERT(clabel != NULL);
3932 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3933 "got client label from request(1)", struct svc_req *, req);
3934
3935 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3936 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3937 exi)) {
3938 resp->status = NFS3ERR_STALE;
3939 vattr_to_post_op_attr(NULL,
3940 &resp->resfail.obj_attributes);
3941 goto out;
3942 }
3943 }
3944 }
3945
3946 va.va_mask = AT_ALL;
3947 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3948
3949 resp->status = NFS3_OK;
3950 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3951 xfer_size = rfs3_tsize(req);
3952 resp->resok.rtmax = xfer_size;
3953 resp->resok.rtpref = xfer_size;
3954 resp->resok.rtmult = DEV_BSIZE;
3955 resp->resok.wtmax = xfer_size;
3956 resp->resok.wtpref = xfer_size;
3957 resp->resok.wtmult = DEV_BSIZE;
3958 resp->resok.dtpref = MAXBSIZE;
3959
3960 /*
3961 * Large file spec: want maxfilesize based on limit of
3962 * underlying filesystem. We can guess 2^31-1 if need be.
3963 */
3964 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3965 if (error) {
3966 resp->status = puterrno3(error);
3967 goto out;
3968 }
3969
3970 /*
3971 * If the underlying file system does not support _PC_FILESIZEBITS,
3972 * return a reasonable default. Note that error code on VOP_PATHCONF
3973 * will be 0, even if the underlying file system does not support
3974 * _PC_FILESIZEBITS.
3975 */
3976 if (l == (ulong_t)-1) {
3977 resp->resok.maxfilesize = MAXOFF32_T;
3978 } else {
3979 if (l >= (sizeof (uint64_t) * 8))
3980 resp->resok.maxfilesize = INT64_MAX;
3981 else
3982 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3983 }
3984
3985 resp->resok.time_delta.seconds = 0;
3986 resp->resok.time_delta.nseconds = 1000;
3987 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3988 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3989
3990 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3991 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3992 FSINFO3res *, resp);
3993
3994 VN_RELE(vp);
3995
3996 return;
3997
3998 out:
3999 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
4000 cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
4001 FSINFO3res *, resp);
4002 if (vp != NULL)
4003 VN_RELE(vp);
4004 }
4005
4006 void *
4007 rfs3_fsinfo_getfh(FSINFO3args *args)
4008 {
4009 return (&args->fsroot);
4010 }
4011
4012 /* ARGSUSED */
4013 void
4014 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4015 struct svc_req *req, cred_t *cr, bool_t ro)
4016 {
4017 int error;
4018 vnode_t *vp;
4019 struct vattr *vap;
4020 struct vattr va;
4021 ulong_t val;
4022
4023 vap = NULL;
4024
4025 vp = nfs3_fhtovp(&args->object, exi);
4026
4027 DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4028 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4029 PATHCONF3args *, args);
4030
4031 if (vp == NULL) {
4032 error = ESTALE;
4033 goto out;
4034 }
4035
4036 if (is_system_labeled()) {
4037 bslabel_t *clabel = req->rq_label;
4038
4039 ASSERT(clabel != NULL);
4040 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4041 "got client label from request(1)", struct svc_req *, req);
4042
4043 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4044 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4045 exi)) {
4046 resp->status = NFS3ERR_ACCES;
4047 goto out1;
4048 }
4049 }
4050 }
4051
4052 va.va_mask = AT_ALL;
4053 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4054
4055 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4056 if (error)
4057 goto out;
4058 resp->resok.info.link_max = (uint32)val;
4059
4060 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4061 if (error)
4062 goto out;
4063 resp->resok.info.name_max = (uint32)val;
4064
4065 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4066 if (error)
4067 goto out;
4068 if (val == 1)
4069 resp->resok.info.no_trunc = TRUE;
4070 else
4071 resp->resok.info.no_trunc = FALSE;
4072
4073 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4074 if (error)
4075 goto out;
4076 if (val == 1)
4077 resp->resok.info.chown_restricted = TRUE;
4078 else
4079 resp->resok.info.chown_restricted = FALSE;
4080
4081 resp->status = NFS3_OK;
4082 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4083 resp->resok.info.case_insensitive = FALSE;
4084 resp->resok.info.case_preserving = TRUE;
4085 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4086 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4087 PATHCONF3res *, resp);
4088 VN_RELE(vp);
4089 return;
4090
4091 out:
4092 if (curthread->t_flag & T_WOULDBLOCK) {
4093 curthread->t_flag &= ~T_WOULDBLOCK;
4094 resp->status = NFS3ERR_JUKEBOX;
4095 } else
4096 resp->status = puterrno3(error);
4097 out1:
4098 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4099 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4100 PATHCONF3res *, resp);
4101 if (vp != NULL)
4102 VN_RELE(vp);
4103 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4104 }
4105
4106 void *
4107 rfs3_pathconf_getfh(PATHCONF3args *args)
4108 {
4109
4110 return (&args->object);
4111 }
4112
4113 void
4114 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4115 struct svc_req *req, cred_t *cr, bool_t ro)
4116 {
4117 nfs3_srv_t *ns;
4118 int error;
4119 vnode_t *vp;
4120 struct vattr *bvap;
4121 struct vattr bva;
4122 struct vattr *avap;
4123 struct vattr ava;
4124
4125 bvap = NULL;
4126 avap = NULL;
4127
4128 vp = nfs3_fhtovp(&args->file, exi);
4129
4130 DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4131 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4132 COMMIT3args *, args);
4133
4134 if (vp == NULL) {
4135 error = ESTALE;
4136 goto out;
4137 }
4138
4139 ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
4140 ns = nfs3_get_srv();
4141 bva.va_mask = AT_ALL;
4142 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4143
4144 /*
4145 * If we can't get the attributes, then we can't do the
4146 * right access checking. So, we'll fail the request.
4147 */
4148 if (error)
4149 goto out;
4150
4151 bvap = &bva;
4152
4153 if (rdonly(ro, vp)) {
4154 resp->status = NFS3ERR_ROFS;
4155 goto out1;
4156 }
4157
4158 if (vp->v_type != VREG) {
4159 resp->status = NFS3ERR_INVAL;
4160 goto out1;
4161 }
4162
4163 if (is_system_labeled()) {
4164 bslabel_t *clabel = req->rq_label;
4165
4166 ASSERT(clabel != NULL);
4167 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4168 "got client label from request(1)", struct svc_req *, req);
4169
4170 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4171 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4172 exi)) {
4173 resp->status = NFS3ERR_ACCES;
4174 goto out1;
4175 }
4176 }
4177 }
4178
4179 if (crgetuid(cr) != bva.va_uid &&
4180 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4181 goto out;
4182
4183 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4184
4185 ava.va_mask = AT_ALL;
4186 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4187
4188 if (error)
4189 goto out;
4190
4191 resp->status = NFS3_OK;
4192 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4193 resp->resok.verf = ns->write3verf;
4194
4195 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4196 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4197 COMMIT3res *, resp);
4198
4199 VN_RELE(vp);
4200
4201 return;
4202
4203 out:
4204 if (curthread->t_flag & T_WOULDBLOCK) {
4205 curthread->t_flag &= ~T_WOULDBLOCK;
4206 resp->status = NFS3ERR_JUKEBOX;
4207 } else
4208 resp->status = puterrno3(error);
4209 out1:
4210 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4211 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4212 COMMIT3res *, resp);
4213
4214 if (vp != NULL)
4215 VN_RELE(vp);
4216 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4217 }
4218
4219 void *
4220 rfs3_commit_getfh(COMMIT3args *args)
4221 {
4222
4223 return (&args->file);
4224 }
4225
4226 static int
4227 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4228 {
4229
4230 vap->va_mask = 0;
4231
4232 if (sap->mode.set_it) {
4233 vap->va_mode = (mode_t)sap->mode.mode;
4234 vap->va_mask |= AT_MODE;
4235 }
4236 if (sap->uid.set_it) {
4237 vap->va_uid = (uid_t)sap->uid.uid;
4238 vap->va_mask |= AT_UID;
4239 }
4240 if (sap->gid.set_it) {
4241 vap->va_gid = (gid_t)sap->gid.gid;
4242 vap->va_mask |= AT_GID;
4243 }
4244 if (sap->size.set_it) {
4245 if (sap->size.size > (size3)((u_longlong_t)-1))
4246 return (EINVAL);
4247 vap->va_size = sap->size.size;
4248 vap->va_mask |= AT_SIZE;
4249 }
4250 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4251 #ifndef _LP64
4252 /* check time validity */
4253 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4254 return (EOVERFLOW);
4255 #endif
4256 /*
4257 * nfs protocol defines times as unsigned so don't extend sign,
4258 * unless sysadmin set nfs_allow_preepoch_time.
4259 */
4260 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4261 sap->atime.atime.seconds);
4262 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4263 vap->va_mask |= AT_ATIME;
4264 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4265 gethrestime(&vap->va_atime);
4266 vap->va_mask |= AT_ATIME;
4267 }
4268 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4269 #ifndef _LP64
4270 /* check time validity */
4271 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4272 return (EOVERFLOW);
4273 #endif
4274 /*
4275 * nfs protocol defines times as unsigned so don't extend sign,
4276 * unless sysadmin set nfs_allow_preepoch_time.
4277 */
4278 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4279 sap->mtime.mtime.seconds);
4280 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4281 vap->va_mask |= AT_MTIME;
4282 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4283 gethrestime(&vap->va_mtime);
4284 vap->va_mask |= AT_MTIME;
4285 }
4286
4287 return (0);
4288 }
4289
4290 static const ftype3 vt_to_nf3[] = {
4291 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4292 };
4293
4294 static int
4295 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4296 {
4297
4298 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4299 /* Return error if time or size overflow */
4300 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4301 return (EOVERFLOW);
4302 }
4303 fap->type = vt_to_nf3[vap->va_type];
4304 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4305 fap->nlink = (uint32)vap->va_nlink;
4306 if (vap->va_uid == UID_NOBODY)
4307 fap->uid = (uid3)NFS_UID_NOBODY;
4308 else
4309 fap->uid = (uid3)vap->va_uid;
4310 if (vap->va_gid == GID_NOBODY)
4311 fap->gid = (gid3)NFS_GID_NOBODY;
4312 else
4313 fap->gid = (gid3)vap->va_gid;
4314 fap->size = (size3)vap->va_size;
4315 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4316 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4317 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4318 fap->fsid = (uint64)vap->va_fsid;
4319 fap->fileid = (fileid3)vap->va_nodeid;
4320 fap->atime.seconds = vap->va_atime.tv_sec;
4321 fap->atime.nseconds = vap->va_atime.tv_nsec;
4322 fap->mtime.seconds = vap->va_mtime.tv_sec;
4323 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4324 fap->ctime.seconds = vap->va_ctime.tv_sec;
4325 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4326 return (0);
4327 }
4328
4329 static int
4330 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4331 {
4332
4333 /* Return error if time or size overflow */
4334 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4335 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4336 NFS3_SIZE_OK(vap->va_size))) {
4337 return (EOVERFLOW);
4338 }
4339 wccap->size = (size3)vap->va_size;
4340 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4341 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4342 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4343 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4344 return (0);
4345 }
4346
4347 static void
4348 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4349 {
4350
4351 /* don't return attrs if time overflow */
4352 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4353 poap->attributes = TRUE;
4354 } else
4355 poap->attributes = FALSE;
4356 }
4357
4358 void
4359 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4360 {
4361
4362 /* don't return attrs if time overflow */
4363 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4364 poap->attributes = TRUE;
4365 } else
4366 poap->attributes = FALSE;
4367 }
4368
4369 static void
4370 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4371 {
4372 vattr_to_pre_op_attr(bvap, &wccp->before);
4373 vattr_to_post_op_attr(avap, &wccp->after);
4374 }
4375
4376 static int
4377 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4378 {
4379 struct clist *wcl;
4380 int wlist_len;
4381 count3 count = rok->count;
4382
4383 wcl = args->wlist;
4384 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4385 return (FALSE);
4386
4387 wcl = args->wlist;
4388 rok->wlist_len = wlist_len;
4389 rok->wlist = wcl;
4390 return (TRUE);
4391 }
4392
4393 void
4394 rfs3_srv_zone_init(nfs_globals_t *ng)
4395 {
4396 nfs3_srv_t *ns;
4397 struct rfs3_verf_overlay {
4398 uint_t id; /* a "unique" identifier */
4399 int ts; /* a unique timestamp */
4400 } *verfp;
4401 timestruc_t now;
4402
4403 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4404
4405 /*
4406 * The following algorithm attempts to find a unique verifier
4407 * to be used as the write verifier returned from the server
4408 * to the client. It is important that this verifier change
4409 * whenever the server reboots. Of secondary importance, it
4410 * is important for the verifier to be unique between two
4411 * different servers.
4412 *
4413 * Thus, an attempt is made to use the system hostid and the
4414 * current time in seconds when the nfssrv kernel module is
4415 * loaded. It is assumed that an NFS server will not be able
4416 * to boot and then to reboot in less than a second. If the
4417 * hostid has not been set, then the current high resolution
4418 * time is used. This will ensure different verifiers each
4419 * time the server reboots and minimize the chances that two
4420 * different servers will have the same verifier.
4421 */
4422
4423 #ifndef lint
4424 /*
4425 * We ASSERT that this constant logic expression is
4426 * always true because in the past, it wasn't.
4427 */
4428 ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4429 #endif
4430
4431 gethrestime(&now);
4432 verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4433 verfp->ts = (int)now.tv_sec;
4434 verfp->id = zone_get_hostid(NULL);
4435
4436 if (verfp->id == 0)
4437 verfp->id = (uint_t)now.tv_nsec;
4438
4439 ng->nfs3_srv = ns;
4440 }
4441
4442 void
4443 rfs3_srv_zone_fini(nfs_globals_t *ng)
4444 {
4445 nfs3_srv_t *ns = ng->nfs3_srv;
4446
4447 ng->nfs3_srv = NULL;
4448
4449 kmem_free(ns, sizeof (*ns));
4450 }
4451
4452 void
4453 rfs3_srvrinit(void)
4454 {
4455 nfs3_srv_caller_id = fs_new_caller_id();
4456 }
4457
4458 void
4459 rfs3_srvrfini(void)
4460 {
4461 /* Nothing to do */
4462 }