1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2018 Nexenta Systems, Inc.
24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/systeminfo.h>
48 #include <sys/flock.h>
49 #include <sys/nbmlock.h>
50 #include <sys/policy.h>
51 #include <sys/sdt.h>
52
53 #include <rpc/types.h>
54 #include <rpc/auth.h>
55 #include <rpc/svc.h>
56 #include <rpc/rpc_rdma.h>
57
58 #include <nfs/nfs.h>
59 #include <nfs/export.h>
60 #include <nfs/nfs_cmd.h>
61
62 #include <sys/strsubr.h>
63 #include <sys/tsol/label.h>
64 #include <sys/tsol/tndb.h>
65
66 #include <sys/zone.h>
67
68 #include <inet/ip.h>
69 #include <inet/ip6.h>
70
71 /*
72 * Zone global variables of NFSv3 server
73 */
74 typedef struct nfs3_srv {
75 writeverf3 write3verf;
76 } nfs3_srv_t;
77
78 /*
79 * These are the interface routines for the server side of the
80 * Network File System. See the NFS version 3 protocol specification
81 * for a description of this interface.
82 */
83
84 static int sattr3_to_vattr(sattr3 *, struct vattr *);
85 static int vattr_to_fattr3(struct vattr *, fattr3 *);
86 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
87 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
88 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
89 static int rdma_setup_read_data3(READ3args *, READ3resok *);
90
91 extern int nfs_loaned_buffers;
92
93 u_longlong_t nfs3_srv_caller_id;
94
95 static nfs3_srv_t *
96 nfs3_get_srv(void)
97 {
98 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
99 nfs3_srv_t *srv = ng->nfs3_srv;
100 ASSERT(srv != NULL);
101 return (srv);
102 }
103
104 /* ARGSUSED */
105 void
106 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
107 struct svc_req *req, cred_t *cr, bool_t ro)
108 {
109 int error;
110 vnode_t *vp;
111 struct vattr va;
112
113 vp = nfs3_fhtovp(&args->object, exi);
114
115 DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
116 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
117 GETATTR3args *, args);
118
119 if (vp == NULL) {
120 error = ESTALE;
121 goto out;
122 }
123
124 va.va_mask = AT_ALL;
125 error = rfs4_delegated_getattr(vp, &va, 0, cr);
126
127 if (!error) {
128 /* Lie about the object type for a referral */
129 if (vn_is_nfs_reparse(vp, cr))
130 va.va_type = VLNK;
131
132 /* overflow error if time or size is out of range */
133 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
134 if (error)
135 goto out;
136 resp->status = NFS3_OK;
137
138 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
139 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
140 GETATTR3res *, resp);
141
142 VN_RELE(vp);
143
144 return;
145 }
146
147 out:
148 if (curthread->t_flag & T_WOULDBLOCK) {
149 curthread->t_flag &= ~T_WOULDBLOCK;
150 resp->status = NFS3ERR_JUKEBOX;
151 } else
152 resp->status = puterrno3(error);
153
154 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
155 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
156 GETATTR3res *, resp);
157
158 if (vp != NULL)
159 VN_RELE(vp);
160 }
161
162 void *
163 rfs3_getattr_getfh(GETATTR3args *args)
164 {
165
166 return (&args->object);
167 }
168
169 void
170 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
171 struct svc_req *req, cred_t *cr, bool_t ro)
172 {
173 int error;
174 vnode_t *vp;
175 struct vattr *bvap;
176 struct vattr bva;
177 struct vattr *avap;
178 struct vattr ava;
179 int flag;
180 int in_crit = 0;
181 struct flock64 bf;
182 caller_context_t ct;
183
184 bvap = NULL;
185 avap = NULL;
186
187 vp = nfs3_fhtovp(&args->object, exi);
188
189 DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
190 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
191 SETATTR3args *, args);
192
193 if (vp == NULL) {
194 error = ESTALE;
195 goto out;
196 }
197
198 error = sattr3_to_vattr(&args->new_attributes, &ava);
199 if (error)
200 goto out;
201
202 if (is_system_labeled()) {
203 bslabel_t *clabel = req->rq_label;
204
205 ASSERT(clabel != NULL);
206 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
207 "got client label from request(1)", struct svc_req *, req);
208
209 if (!blequal(&l_admin_low->tsl_label, clabel)) {
210 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
211 exi)) {
212 resp->status = NFS3ERR_ACCES;
213 goto out1;
214 }
215 }
216 }
217
218 /*
219 * We need to specially handle size changes because of
220 * possible conflicting NBMAND locks. Get into critical
221 * region before VOP_GETATTR, so the size attribute is
222 * valid when checking conflicts.
223 *
224 * Also, check to see if the v4 side of the server has
225 * delegated this file. If so, then we return JUKEBOX to
226 * allow the client to retrasmit its request.
227 */
228 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
229 if (nbl_need_check(vp)) {
230 nbl_start_crit(vp, RW_READER);
231 in_crit = 1;
232 }
233 }
234
235 bva.va_mask = AT_ALL;
236 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
237
238 /*
239 * If we can't get the attributes, then we can't do the
240 * right access checking. So, we'll fail the request.
241 */
242 if (error)
243 goto out;
244
245 bvap = &bva;
246
247 if (rdonly(ro, vp)) {
248 resp->status = NFS3ERR_ROFS;
249 goto out1;
250 }
251
252 if (args->guard.check &&
253 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
254 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
255 resp->status = NFS3ERR_NOT_SYNC;
256 goto out1;
257 }
258
259 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
260 flag = ATTR_UTIME;
261 else
262 flag = 0;
263
264 /*
265 * If the filesystem is exported with nosuid, then mask off
266 * the setuid and setgid bits.
267 */
268 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
269 (exi->exi_export.ex_flags & EX_NOSUID))
270 ava.va_mode &= ~(VSUID | VSGID);
271
272 ct.cc_sysid = 0;
273 ct.cc_pid = 0;
274 ct.cc_caller_id = nfs3_srv_caller_id;
275 ct.cc_flags = CC_DONTBLOCK;
276
277 /*
278 * We need to specially handle size changes because it is
279 * possible for the client to create a file with modes
280 * which indicate read-only, but with the file opened for
281 * writing. If the client then tries to set the size of
282 * the file, then the normal access checking done in
283 * VOP_SETATTR would prevent the client from doing so,
284 * although it should be legal for it to do so. To get
285 * around this, we do the access checking for ourselves
286 * and then use VOP_SPACE which doesn't do the access
287 * checking which VOP_SETATTR does. VOP_SPACE can only
288 * operate on VREG files, let VOP_SETATTR handle the other
289 * extremely rare cases.
290 * Also the client should not be allowed to change the
291 * size of the file if there is a conflicting non-blocking
292 * mandatory lock in the region the change.
293 */
294 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
295 if (in_crit) {
296 u_offset_t offset;
297 ssize_t length;
298
299 if (ava.va_size < bva.va_size) {
300 offset = ava.va_size;
301 length = bva.va_size - ava.va_size;
302 } else {
303 offset = bva.va_size;
304 length = ava.va_size - bva.va_size;
305 }
306 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
307 NULL)) {
308 error = EACCES;
309 goto out;
310 }
311 }
312
313 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
314 ava.va_mask &= ~AT_SIZE;
315 bf.l_type = F_WRLCK;
316 bf.l_whence = 0;
317 bf.l_start = (off64_t)ava.va_size;
318 bf.l_len = 0;
319 bf.l_sysid = 0;
320 bf.l_pid = 0;
321 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
322 (offset_t)ava.va_size, cr, &ct);
323 }
324 }
325
326 if (!error && ava.va_mask)
327 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
328
329 /* check if a monitor detected a delegation conflict */
330 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
331 resp->status = NFS3ERR_JUKEBOX;
332 goto out1;
333 }
334
335 ava.va_mask = AT_ALL;
336 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
337
338 /*
339 * Force modified metadata out to stable storage.
340 */
341 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
342
343 if (error)
344 goto out;
345
346 if (in_crit)
347 nbl_end_crit(vp);
348
349 resp->status = NFS3_OK;
350 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
351
352 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
353 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
354 SETATTR3res *, resp);
355
356 VN_RELE(vp);
357
358 return;
359
360 out:
361 if (curthread->t_flag & T_WOULDBLOCK) {
362 curthread->t_flag &= ~T_WOULDBLOCK;
363 resp->status = NFS3ERR_JUKEBOX;
364 } else
365 resp->status = puterrno3(error);
366 out1:
367 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
368 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
369 SETATTR3res *, resp);
370
371 if (vp != NULL) {
372 if (in_crit)
373 nbl_end_crit(vp);
374 VN_RELE(vp);
375 }
376 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
377 }
378
379 void *
380 rfs3_setattr_getfh(SETATTR3args *args)
381 {
382
383 return (&args->object);
384 }
385
386 /* ARGSUSED */
387 void
388 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
389 struct svc_req *req, cred_t *cr, bool_t ro)
390 {
391 int error;
392 vnode_t *vp;
393 vnode_t *dvp;
394 struct vattr *vap;
395 struct vattr va;
396 struct vattr *dvap;
397 struct vattr dva;
398 nfs_fh3 *fhp;
399 struct sec_ol sec = {0, 0};
400 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
401 struct sockaddr *ca;
402 char *name = NULL;
403
404 dvap = NULL;
405
406 if (exi != NULL)
407 exi_hold(exi);
408
409 /*
410 * Allow lookups from the root - the default
411 * location of the public filehandle.
412 */
413 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
414 dvp = ZONE_ROOTVP();
415 VN_HOLD(dvp);
416
417 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
418 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
419 LOOKUP3args *, args);
420 } else {
421 dvp = nfs3_fhtovp(&args->what.dir, exi);
422
423 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
424 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
425 LOOKUP3args *, args);
426
427 if (dvp == NULL) {
428 error = ESTALE;
429 goto out;
430 }
431 }
432
433 dva.va_mask = AT_ALL;
434 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
435
436 if (args->what.name == nfs3nametoolong) {
437 resp->status = NFS3ERR_NAMETOOLONG;
438 goto out1;
439 }
440
441 if (args->what.name == NULL || *(args->what.name) == '\0') {
442 resp->status = NFS3ERR_ACCES;
443 goto out1;
444 }
445
446 fhp = &args->what.dir;
447 if (strcmp(args->what.name, "..") == 0 &&
448 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
449 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
450 (dvp->v_flag & VROOT)) {
451 /*
452 * special case for ".." and 'nohide'exported root
453 */
454 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
455 resp->status = NFS3ERR_ACCES;
456 goto out1;
457 }
458 } else {
459 resp->status = NFS3ERR_NOENT;
460 goto out1;
461 }
462 }
463
464 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
465 name = nfscmd_convname(ca, exi, args->what.name,
466 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
467
468 if (name == NULL) {
469 resp->status = NFS3ERR_ACCES;
470 goto out1;
471 }
472
473 /*
474 * If the public filehandle is used then allow
475 * a multi-component lookup
476 */
477 if (PUBLIC_FH3(&args->what.dir)) {
478 publicfh_flag = TRUE;
479
480 exi_rele(exi);
481
482 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
483 &exi, &sec);
484
485 /*
486 * Since WebNFS may bypass MOUNT, we need to ensure this
487 * request didn't come from an unlabeled admin_low client.
488 */
489 if (is_system_labeled() && error == 0) {
490 int addr_type;
491 void *ipaddr;
492 tsol_tpc_t *tp;
493
494 if (ca->sa_family == AF_INET) {
495 addr_type = IPV4_VERSION;
496 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
497 } else if (ca->sa_family == AF_INET6) {
498 addr_type = IPV6_VERSION;
499 ipaddr = &((struct sockaddr_in6 *)
500 ca)->sin6_addr;
501 }
502 tp = find_tpc(ipaddr, addr_type, B_FALSE);
503 if (tp == NULL || tp->tpc_tp.tp_doi !=
504 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
505 SUN_CIPSO) {
506 VN_RELE(vp);
507 error = EACCES;
508 }
509 if (tp != NULL)
510 TPC_RELE(tp);
511 }
512 } else {
513 error = VOP_LOOKUP(dvp, name, &vp,
514 NULL, 0, NULL, cr, NULL, NULL, NULL);
515 }
516
517 if (name != args->what.name)
518 kmem_free(name, MAXPATHLEN + 1);
519
520 if (error == 0 && vn_ismntpt(vp)) {
521 error = rfs_cross_mnt(&vp, &exi);
522 if (error)
523 VN_RELE(vp);
524 }
525
526 if (is_system_labeled() && error == 0) {
527 bslabel_t *clabel = req->rq_label;
528
529 ASSERT(clabel != NULL);
530 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
531 "got client label from request(1)", struct svc_req *, req);
532
533 if (!blequal(&l_admin_low->tsl_label, clabel)) {
534 if (!do_rfs_label_check(clabel, dvp,
535 DOMINANCE_CHECK, exi)) {
536 VN_RELE(vp);
537 error = EACCES;
538 }
539 }
540 }
541
542 dva.va_mask = AT_ALL;
543 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
544
545 if (error)
546 goto out;
547
548 if (sec.sec_flags & SEC_QUERY) {
549 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
550 } else {
551 error = makefh3(&resp->resok.object, vp, exi);
552 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
553 auth_weak = TRUE;
554 }
555
556 if (error) {
557 VN_RELE(vp);
558 goto out;
559 }
560
561 va.va_mask = AT_ALL;
562 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
563
564 exi_rele(exi);
565 VN_RELE(vp);
566
567 resp->status = NFS3_OK;
568 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
569 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
570
571 /*
572 * If it's public fh, no 0x81, and client's flavor is
573 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
574 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
575 */
576 if (auth_weak)
577 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
578
579 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
580 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
581 LOOKUP3res *, resp);
582 VN_RELE(dvp);
583
584 return;
585
586 out:
587 if (curthread->t_flag & T_WOULDBLOCK) {
588 curthread->t_flag &= ~T_WOULDBLOCK;
589 resp->status = NFS3ERR_JUKEBOX;
590 } else
591 resp->status = puterrno3(error);
592 out1:
593 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
594 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
595 LOOKUP3res *, resp);
596
597 if (exi != NULL)
598 exi_rele(exi);
599
600 if (dvp != NULL)
601 VN_RELE(dvp);
602 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
603
604 }
605
606 void *
607 rfs3_lookup_getfh(LOOKUP3args *args)
608 {
609
610 return (&args->what.dir);
611 }
612
613 /* ARGSUSED */
614 void
615 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
616 struct svc_req *req, cred_t *cr, bool_t ro)
617 {
618 int error;
619 vnode_t *vp;
620 struct vattr *vap;
621 struct vattr va;
622 int checkwriteperm;
623 boolean_t dominant_label = B_FALSE;
624 boolean_t equal_label = B_FALSE;
625 boolean_t admin_low_client;
626
627 vap = NULL;
628
629 vp = nfs3_fhtovp(&args->object, exi);
630
631 DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
632 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
633 ACCESS3args *, args);
634
635 if (vp == NULL) {
636 error = ESTALE;
637 goto out;
638 }
639
640 /*
641 * If the file system is exported read only, it is not appropriate
642 * to check write permissions for regular files and directories.
643 * Special files are interpreted by the client, so the underlying
644 * permissions are sent back to the client for interpretation.
645 */
646 if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
647 checkwriteperm = 0;
648 else
649 checkwriteperm = 1;
650
651 /*
652 * We need the mode so that we can correctly determine access
653 * permissions relative to a mandatory lock file. Access to
654 * mandatory lock files is denied on the server, so it might
655 * as well be reflected to the server during the open.
656 */
657 va.va_mask = AT_MODE;
658 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
659 if (error)
660 goto out;
661
662 vap = &va;
663
664 resp->resok.access = 0;
665
666 if (is_system_labeled()) {
667 bslabel_t *clabel = req->rq_label;
668
669 ASSERT(clabel != NULL);
670 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
671 "got client label from request(1)", struct svc_req *, req);
672
673 if (!blequal(&l_admin_low->tsl_label, clabel)) {
674 if ((equal_label = do_rfs_label_check(clabel, vp,
675 EQUALITY_CHECK, exi)) == B_FALSE) {
676 dominant_label = do_rfs_label_check(clabel,
677 vp, DOMINANCE_CHECK, exi);
678 } else
679 dominant_label = B_TRUE;
680 admin_low_client = B_FALSE;
681 } else
682 admin_low_client = B_TRUE;
683 }
684
685 if (args->access & ACCESS3_READ) {
686 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
687 if (error) {
688 if (curthread->t_flag & T_WOULDBLOCK)
689 goto out;
690 } else if (!MANDLOCK(vp, va.va_mode) &&
691 (!is_system_labeled() || admin_low_client ||
692 dominant_label))
693 resp->resok.access |= ACCESS3_READ;
694 }
695 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
696 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
697 if (error) {
698 if (curthread->t_flag & T_WOULDBLOCK)
699 goto out;
700 } else if (!is_system_labeled() || admin_low_client ||
701 dominant_label)
702 resp->resok.access |= ACCESS3_LOOKUP;
703 }
704 if (checkwriteperm &&
705 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
706 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
707 if (error) {
708 if (curthread->t_flag & T_WOULDBLOCK)
709 goto out;
710 } else if (!MANDLOCK(vp, va.va_mode) &&
711 (!is_system_labeled() || admin_low_client || equal_label)) {
712 resp->resok.access |=
713 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
714 }
715 }
716 if (checkwriteperm &&
717 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
718 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
719 if (error) {
720 if (curthread->t_flag & T_WOULDBLOCK)
721 goto out;
722 } else if (!is_system_labeled() || admin_low_client ||
723 equal_label)
724 resp->resok.access |= ACCESS3_DELETE;
725 }
726 if (args->access & ACCESS3_EXECUTE) {
727 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
728 if (error) {
729 if (curthread->t_flag & T_WOULDBLOCK)
730 goto out;
731 } else if (!MANDLOCK(vp, va.va_mode) &&
732 (!is_system_labeled() || admin_low_client ||
733 dominant_label))
734 resp->resok.access |= ACCESS3_EXECUTE;
735 }
736
737 va.va_mask = AT_ALL;
738 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
739
740 resp->status = NFS3_OK;
741 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
742
743 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
744 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
745 ACCESS3res *, resp);
746
747 VN_RELE(vp);
748
749 return;
750
751 out:
752 if (curthread->t_flag & T_WOULDBLOCK) {
753 curthread->t_flag &= ~T_WOULDBLOCK;
754 resp->status = NFS3ERR_JUKEBOX;
755 } else
756 resp->status = puterrno3(error);
757 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
758 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
759 ACCESS3res *, resp);
760 if (vp != NULL)
761 VN_RELE(vp);
762 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
763 }
764
765 void *
766 rfs3_access_getfh(ACCESS3args *args)
767 {
768
769 return (&args->object);
770 }
771
772 /* ARGSUSED */
773 void
774 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
775 struct svc_req *req, cred_t *cr, bool_t ro)
776 {
777 int error;
778 vnode_t *vp;
779 struct vattr *vap;
780 struct vattr va;
781 struct iovec iov;
782 struct uio uio;
783 char *data;
784 struct sockaddr *ca;
785 char *name = NULL;
786 int is_referral = 0;
787
788 vap = NULL;
789
790 vp = nfs3_fhtovp(&args->symlink, exi);
791
792 DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
793 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
794 READLINK3args *, args);
795
796 if (vp == NULL) {
797 error = ESTALE;
798 goto out;
799 }
800
801 va.va_mask = AT_ALL;
802 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
803 if (error)
804 goto out;
805
806 vap = &va;
807
808 /* We lied about the object type for a referral */
809 if (vn_is_nfs_reparse(vp, cr))
810 is_referral = 1;
811
812 if (vp->v_type != VLNK && !is_referral) {
813 resp->status = NFS3ERR_INVAL;
814 goto out1;
815 }
816
817 if (MANDLOCK(vp, va.va_mode)) {
818 resp->status = NFS3ERR_ACCES;
819 goto out1;
820 }
821
822 if (is_system_labeled()) {
823 bslabel_t *clabel = req->rq_label;
824
825 ASSERT(clabel != NULL);
826 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
827 "got client label from request(1)", struct svc_req *, req);
828
829 if (!blequal(&l_admin_low->tsl_label, clabel)) {
830 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
831 exi)) {
832 resp->status = NFS3ERR_ACCES;
833 goto out1;
834 }
835 }
836 }
837
838 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
839
840 if (is_referral) {
841 char *s;
842 size_t strsz;
843
844 /* Get an artificial symlink based on a referral */
845 s = build_symlink(vp, cr, &strsz);
846 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
847 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
848 vnode_t *, vp, char *, s);
849 if (s == NULL)
850 error = EINVAL;
851 else {
852 error = 0;
853 (void) strlcpy(data, s, MAXPATHLEN + 1);
854 kmem_free(s, strsz);
855 }
856
857 } else {
858
859 iov.iov_base = data;
860 iov.iov_len = MAXPATHLEN;
861 uio.uio_iov = &iov;
862 uio.uio_iovcnt = 1;
863 uio.uio_segflg = UIO_SYSSPACE;
864 uio.uio_extflg = UIO_COPY_CACHED;
865 uio.uio_loffset = 0;
866 uio.uio_resid = MAXPATHLEN;
867
868 error = VOP_READLINK(vp, &uio, cr, NULL);
869
870 if (!error)
871 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
872 }
873
874 va.va_mask = AT_ALL;
875 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
876
877 /* Lie about object type again just to be consistent */
878 if (is_referral && vap != NULL)
879 vap->va_type = VLNK;
880
881 #if 0 /* notyet */
882 /*
883 * Don't do this. It causes local disk writes when just
884 * reading the file and the overhead is deemed larger
885 * than the benefit.
886 */
887 /*
888 * Force modified metadata out to stable storage.
889 */
890 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
891 #endif
892
893 if (error) {
894 kmem_free(data, MAXPATHLEN + 1);
895 goto out;
896 }
897
898 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
899 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
900 MAXPATHLEN + 1);
901
902 if (name == NULL) {
903 /*
904 * Even though the conversion failed, we return
905 * something. We just don't translate it.
906 */
907 name = data;
908 }
909
910 resp->status = NFS3_OK;
911 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
912 resp->resok.data = name;
913
914 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
915 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
916 READLINK3res *, resp);
917 VN_RELE(vp);
918
919 if (name != data)
920 kmem_free(data, MAXPATHLEN + 1);
921
922 return;
923
924 out:
925 if (curthread->t_flag & T_WOULDBLOCK) {
926 curthread->t_flag &= ~T_WOULDBLOCK;
927 resp->status = NFS3ERR_JUKEBOX;
928 } else
929 resp->status = puterrno3(error);
930 out1:
931 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
932 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
933 READLINK3res *, resp);
934 if (vp != NULL)
935 VN_RELE(vp);
936 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
937 }
938
939 void *
940 rfs3_readlink_getfh(READLINK3args *args)
941 {
942
943 return (&args->symlink);
944 }
945
946 void
947 rfs3_readlink_free(READLINK3res *resp)
948 {
949
950 if (resp->status == NFS3_OK)
951 kmem_free(resp->resok.data, MAXPATHLEN + 1);
952 }
953
954 /*
955 * Server routine to handle read
956 * May handle RDMA data as well as mblks
957 */
958 /* ARGSUSED */
959 void
960 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
961 struct svc_req *req, cred_t *cr, bool_t ro)
962 {
963 int error;
964 vnode_t *vp;
965 struct vattr *vap;
966 struct vattr va;
967 struct iovec iov, *iovp = NULL;
968 int iovcnt;
969 struct uio uio;
970 u_offset_t offset;
971 mblk_t *mp = NULL;
972 int in_crit = 0;
973 int need_rwunlock = 0;
974 caller_context_t ct;
975 int rdma_used = 0;
976 int loaned_buffers;
977 struct uio *uiop;
978
979 vap = NULL;
980
981 vp = nfs3_fhtovp(&args->file, exi);
982
983 DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
984 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
985 READ3args *, args);
986
987
988 if (vp == NULL) {
989 error = ESTALE;
990 goto out;
991 }
992
993 if (args->wlist) {
994 if (args->count > clist_len(args->wlist)) {
995 error = EINVAL;
996 goto out;
997 }
998 rdma_used = 1;
999 }
1000
1001 /* use loaned buffers for TCP */
1002 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1003
1004 if (is_system_labeled()) {
1005 bslabel_t *clabel = req->rq_label;
1006
1007 ASSERT(clabel != NULL);
1008 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1009 "got client label from request(1)", struct svc_req *, req);
1010
1011 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1012 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1013 exi)) {
1014 resp->status = NFS3ERR_ACCES;
1015 goto out1;
1016 }
1017 }
1018 }
1019
1020 ct.cc_sysid = 0;
1021 ct.cc_pid = 0;
1022 ct.cc_caller_id = nfs3_srv_caller_id;
1023 ct.cc_flags = CC_DONTBLOCK;
1024
1025 /*
1026 * Enter the critical region before calling VOP_RWLOCK
1027 * to avoid a deadlock with write requests.
1028 */
1029 if (nbl_need_check(vp)) {
1030 nbl_start_crit(vp, RW_READER);
1031 in_crit = 1;
1032 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1033 NULL)) {
1034 error = EACCES;
1035 goto out;
1036 }
1037 }
1038
1039 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1040
1041 /* check if a monitor detected a delegation conflict */
1042 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1043 resp->status = NFS3ERR_JUKEBOX;
1044 goto out1;
1045 }
1046
1047 need_rwunlock = 1;
1048
1049 va.va_mask = AT_ALL;
1050 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1051
1052 /*
1053 * If we can't get the attributes, then we can't do the
1054 * right access checking. So, we'll fail the request.
1055 */
1056 if (error)
1057 goto out;
1058
1059 vap = &va;
1060
1061 if (vp->v_type != VREG) {
1062 resp->status = NFS3ERR_INVAL;
1063 goto out1;
1064 }
1065
1066 if (crgetuid(cr) != va.va_uid) {
1067 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1068 if (error) {
1069 if (curthread->t_flag & T_WOULDBLOCK)
1070 goto out;
1071 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1072 if (error)
1073 goto out;
1074 }
1075 }
1076
1077 if (MANDLOCK(vp, va.va_mode)) {
1078 resp->status = NFS3ERR_ACCES;
1079 goto out1;
1080 }
1081
1082 offset = args->offset;
1083 if (offset >= va.va_size) {
1084 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1085 if (in_crit)
1086 nbl_end_crit(vp);
1087 resp->status = NFS3_OK;
1088 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1089 resp->resok.count = 0;
1090 resp->resok.eof = TRUE;
1091 resp->resok.data.data_len = 0;
1092 resp->resok.data.data_val = NULL;
1093 resp->resok.data.mp = NULL;
1094 /* RDMA */
1095 resp->resok.wlist = args->wlist;
1096 resp->resok.wlist_len = resp->resok.count;
1097 if (resp->resok.wlist)
1098 clist_zero_len(resp->resok.wlist);
1099 goto done;
1100 }
1101
1102 if (args->count == 0) {
1103 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1104 if (in_crit)
1105 nbl_end_crit(vp);
1106 resp->status = NFS3_OK;
1107 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1108 resp->resok.count = 0;
1109 resp->resok.eof = FALSE;
1110 resp->resok.data.data_len = 0;
1111 resp->resok.data.data_val = NULL;
1112 resp->resok.data.mp = NULL;
1113 /* RDMA */
1114 resp->resok.wlist = args->wlist;
1115 resp->resok.wlist_len = resp->resok.count;
1116 if (resp->resok.wlist)
1117 clist_zero_len(resp->resok.wlist);
1118 goto done;
1119 }
1120
1121 /*
1122 * do not allocate memory more the max. allowed
1123 * transfer size
1124 */
1125 if (args->count > rfs3_tsize(req))
1126 args->count = rfs3_tsize(req);
1127
1128 if (loaned_buffers) {
1129 uiop = (uio_t *)rfs_setup_xuio(vp);
1130 ASSERT(uiop != NULL);
1131 uiop->uio_segflg = UIO_SYSSPACE;
1132 uiop->uio_loffset = args->offset;
1133 uiop->uio_resid = args->count;
1134
1135 /* Jump to do the read if successful */
1136 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1137 /*
1138 * Need to hold the vnode until after VOP_RETZCBUF()
1139 * is called.
1140 */
1141 VN_HOLD(vp);
1142 goto doio_read;
1143 }
1144
1145 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1146 uiop->uio_loffset, int, uiop->uio_resid);
1147
1148 uiop->uio_extflg = 0;
1149 /* failure to setup for zero copy */
1150 rfs_free_xuio((void *)uiop);
1151 loaned_buffers = 0;
1152 }
1153
1154 /*
1155 * If returning data via RDMA Write, then grab the chunk list.
1156 * If we aren't returning READ data w/RDMA_WRITE, then grab
1157 * a mblk.
1158 */
1159 if (rdma_used) {
1160 (void) rdma_get_wchunk(req, &iov, args->wlist);
1161 uio.uio_iov = &iov;
1162 uio.uio_iovcnt = 1;
1163 } else {
1164 /*
1165 * mp will contain the data to be sent out in the read reply.
1166 * For UDP, this will be freed after the reply has been sent
1167 * out by the driver. For TCP, it will be freed after the last
1168 * segment associated with the reply has been ACKed by the
1169 * client.
1170 */
1171 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1172 uio.uio_iov = iovp;
1173 uio.uio_iovcnt = iovcnt;
1174 }
1175
1176 uio.uio_segflg = UIO_SYSSPACE;
1177 uio.uio_extflg = UIO_COPY_CACHED;
1178 uio.uio_loffset = args->offset;
1179 uio.uio_resid = args->count;
1180 uiop = &uio;
1181
1182 doio_read:
1183 error = VOP_READ(vp, uiop, 0, cr, &ct);
1184
1185 if (error) {
1186 if (mp)
1187 freemsg(mp);
1188 /* check if a monitor detected a delegation conflict */
1189 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1190 resp->status = NFS3ERR_JUKEBOX;
1191 goto out1;
1192 }
1193 goto out;
1194 }
1195
1196 /* make mblk using zc buffers */
1197 if (loaned_buffers) {
1198 mp = uio_to_mblk(uiop);
1199 ASSERT(mp != NULL);
1200 }
1201
1202 va.va_mask = AT_ALL;
1203 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1204
1205 if (error)
1206 vap = NULL;
1207 else
1208 vap = &va;
1209
1210 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1211
1212 if (in_crit)
1213 nbl_end_crit(vp);
1214
1215 resp->status = NFS3_OK;
1216 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1217 resp->resok.count = args->count - uiop->uio_resid;
1218 if (!error && offset + resp->resok.count == va.va_size)
1219 resp->resok.eof = TRUE;
1220 else
1221 resp->resok.eof = FALSE;
1222 resp->resok.data.data_len = resp->resok.count;
1223
1224 if (mp)
1225 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1226
1227 resp->resok.data.mp = mp;
1228 resp->resok.size = (uint_t)args->count;
1229
1230 if (rdma_used) {
1231 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1232 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1233 resp->status = NFS3ERR_INVAL;
1234 }
1235 } else {
1236 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1237 (resp->resok).wlist = NULL;
1238 }
1239
1240 done:
1241 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1242 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1243 READ3res *, resp);
1244
1245 VN_RELE(vp);
1246
1247 if (iovp != NULL)
1248 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1249
1250 return;
1251
1252 out:
1253 if (curthread->t_flag & T_WOULDBLOCK) {
1254 curthread->t_flag &= ~T_WOULDBLOCK;
1255 resp->status = NFS3ERR_JUKEBOX;
1256 } else
1257 resp->status = puterrno3(error);
1258 out1:
1259 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1260 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1261 READ3res *, resp);
1262
1263 if (vp != NULL) {
1264 if (need_rwunlock)
1265 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1266 if (in_crit)
1267 nbl_end_crit(vp);
1268 VN_RELE(vp);
1269 }
1270 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1271
1272 if (iovp != NULL)
1273 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1274 }
1275
1276 void
1277 rfs3_read_free(READ3res *resp)
1278 {
1279 mblk_t *mp;
1280
1281 if (resp->status == NFS3_OK) {
1282 mp = resp->resok.data.mp;
1283 if (mp != NULL)
1284 freemsg(mp);
1285 }
1286 }
1287
1288 void *
1289 rfs3_read_getfh(READ3args *args)
1290 {
1291
1292 return (&args->file);
1293 }
1294
1295 #define MAX_IOVECS 12
1296
1297 #ifdef DEBUG
1298 static int rfs3_write_hits = 0;
1299 static int rfs3_write_misses = 0;
1300 #endif
1301
1302 void
1303 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1304 struct svc_req *req, cred_t *cr, bool_t ro)
1305 {
1306 nfs3_srv_t *ns;
1307 int error;
1308 vnode_t *vp;
1309 struct vattr *bvap = NULL;
1310 struct vattr bva;
1311 struct vattr *avap = NULL;
1312 struct vattr ava;
1313 u_offset_t rlimit;
1314 struct uio uio;
1315 struct iovec iov[MAX_IOVECS];
1316 mblk_t *m;
1317 struct iovec *iovp;
1318 int iovcnt;
1319 int ioflag;
1320 cred_t *savecred;
1321 int in_crit = 0;
1322 int rwlock_ret = -1;
1323 caller_context_t ct;
1324
1325 vp = nfs3_fhtovp(&args->file, exi);
1326
1327 DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1328 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1329 WRITE3args *, args);
1330
1331 if (vp == NULL) {
1332 error = ESTALE;
1333 goto err;
1334 }
1335
1336 ns = nfs3_get_srv();
1337 if (is_system_labeled()) {
1338 bslabel_t *clabel = req->rq_label;
1339
1340 ASSERT(clabel != NULL);
1341 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1342 "got client label from request(1)", struct svc_req *, req);
1343
1344 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1345 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1346 exi)) {
1347 resp->status = NFS3ERR_ACCES;
1348 goto err1;
1349 }
1350 }
1351 }
1352
1353 ct.cc_sysid = 0;
1354 ct.cc_pid = 0;
1355 ct.cc_caller_id = nfs3_srv_caller_id;
1356 ct.cc_flags = CC_DONTBLOCK;
1357
1358 /*
1359 * We have to enter the critical region before calling VOP_RWLOCK
1360 * to avoid a deadlock with ufs.
1361 */
1362 if (nbl_need_check(vp)) {
1363 nbl_start_crit(vp, RW_READER);
1364 in_crit = 1;
1365 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1366 NULL)) {
1367 error = EACCES;
1368 goto err;
1369 }
1370 }
1371
1372 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1373
1374 /* check if a monitor detected a delegation conflict */
1375 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1376 resp->status = NFS3ERR_JUKEBOX;
1377 rwlock_ret = -1;
1378 goto err1;
1379 }
1380
1381
1382 bva.va_mask = AT_ALL;
1383 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1384
1385 /*
1386 * If we can't get the attributes, then we can't do the
1387 * right access checking. So, we'll fail the request.
1388 */
1389 if (error)
1390 goto err;
1391
1392 bvap = &bva;
1393 avap = bvap;
1394
1395 if (args->count != args->data.data_len) {
1396 resp->status = NFS3ERR_INVAL;
1397 goto err1;
1398 }
1399
1400 if (rdonly(ro, vp)) {
1401 resp->status = NFS3ERR_ROFS;
1402 goto err1;
1403 }
1404
1405 if (vp->v_type != VREG) {
1406 resp->status = NFS3ERR_INVAL;
1407 goto err1;
1408 }
1409
1410 if (crgetuid(cr) != bva.va_uid &&
1411 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1412 goto err;
1413
1414 if (MANDLOCK(vp, bva.va_mode)) {
1415 resp->status = NFS3ERR_ACCES;
1416 goto err1;
1417 }
1418
1419 if (args->count == 0) {
1420 resp->status = NFS3_OK;
1421 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1422 resp->resok.count = 0;
1423 resp->resok.committed = args->stable;
1424 resp->resok.verf = ns->write3verf;
1425 goto out;
1426 }
1427
1428 if (args->mblk != NULL) {
1429 iovcnt = 0;
1430 for (m = args->mblk; m != NULL; m = m->b_cont)
1431 iovcnt++;
1432 if (iovcnt <= MAX_IOVECS) {
1433 #ifdef DEBUG
1434 rfs3_write_hits++;
1435 #endif
1436 iovp = iov;
1437 } else {
1438 #ifdef DEBUG
1439 rfs3_write_misses++;
1440 #endif
1441 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1442 }
1443 mblk_to_iov(args->mblk, iovcnt, iovp);
1444
1445 } else if (args->rlist != NULL) {
1446 iovcnt = 1;
1447 iovp = iov;
1448 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1449 iovp->iov_len = args->count;
1450 } else {
1451 iovcnt = 1;
1452 iovp = iov;
1453 iovp->iov_base = args->data.data_val;
1454 iovp->iov_len = args->count;
1455 }
1456
1457 uio.uio_iov = iovp;
1458 uio.uio_iovcnt = iovcnt;
1459
1460 uio.uio_segflg = UIO_SYSSPACE;
1461 uio.uio_extflg = UIO_COPY_DEFAULT;
1462 uio.uio_loffset = args->offset;
1463 uio.uio_resid = args->count;
1464 uio.uio_llimit = curproc->p_fsz_ctl;
1465 rlimit = uio.uio_llimit - args->offset;
1466 if (rlimit < (u_offset_t)uio.uio_resid)
1467 uio.uio_resid = (int)rlimit;
1468
1469 if (args->stable == UNSTABLE)
1470 ioflag = 0;
1471 else if (args->stable == FILE_SYNC)
1472 ioflag = FSYNC;
1473 else if (args->stable == DATA_SYNC)
1474 ioflag = FDSYNC;
1475 else {
1476 if (iovp != iov)
1477 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1478 resp->status = NFS3ERR_INVAL;
1479 goto err1;
1480 }
1481
1482 /*
1483 * We're changing creds because VM may fault and we need
1484 * the cred of the current thread to be used if quota
1485 * checking is enabled.
1486 */
1487 savecred = curthread->t_cred;
1488 curthread->t_cred = cr;
1489 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1490 curthread->t_cred = savecred;
1491
1492 if (iovp != iov)
1493 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1494
1495 /* check if a monitor detected a delegation conflict */
1496 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1497 resp->status = NFS3ERR_JUKEBOX;
1498 goto err1;
1499 }
1500
1501 ava.va_mask = AT_ALL;
1502 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1503
1504 if (error)
1505 goto err;
1506
1507 /*
1508 * If we were unable to get the V_WRITELOCK_TRUE, then we
1509 * may not have accurate after attrs, so check if
1510 * we have both attributes, they have a non-zero va_seq, and
1511 * va_seq has changed by exactly one,
1512 * if not, turn off the before attr.
1513 */
1514 if (rwlock_ret != V_WRITELOCK_TRUE) {
1515 if (bvap == NULL || avap == NULL ||
1516 bvap->va_seq == 0 || avap->va_seq == 0 ||
1517 avap->va_seq != (bvap->va_seq + 1)) {
1518 bvap = NULL;
1519 }
1520 }
1521
1522 resp->status = NFS3_OK;
1523 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1524 resp->resok.count = args->count - uio.uio_resid;
1525 resp->resok.committed = args->stable;
1526 resp->resok.verf = ns->write3verf;
1527 goto out;
1528
1529 err:
1530 if (curthread->t_flag & T_WOULDBLOCK) {
1531 curthread->t_flag &= ~T_WOULDBLOCK;
1532 resp->status = NFS3ERR_JUKEBOX;
1533 } else
1534 resp->status = puterrno3(error);
1535 err1:
1536 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1537 out:
1538 DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1539 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1540 WRITE3res *, resp);
1541
1542 if (vp != NULL) {
1543 if (rwlock_ret != -1)
1544 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1545 if (in_crit)
1546 nbl_end_crit(vp);
1547 VN_RELE(vp);
1548 }
1549 }
1550
1551 void *
1552 rfs3_write_getfh(WRITE3args *args)
1553 {
1554
1555 return (&args->file);
1556 }
1557
1558 void
1559 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1560 struct svc_req *req, cred_t *cr, bool_t ro)
1561 {
1562 int error;
1563 int in_crit = 0;
1564 vnode_t *vp;
1565 vnode_t *tvp = NULL;
1566 vnode_t *dvp;
1567 struct vattr *vap;
1568 struct vattr va;
1569 struct vattr *dbvap;
1570 struct vattr dbva;
1571 struct vattr *davap;
1572 struct vattr dava;
1573 enum vcexcl excl;
1574 nfstime3 *mtime;
1575 len_t reqsize;
1576 bool_t trunc;
1577 struct sockaddr *ca;
1578 char *name = NULL;
1579
1580 dbvap = NULL;
1581 davap = NULL;
1582
1583 dvp = nfs3_fhtovp(&args->where.dir, exi);
1584
1585 DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1586 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1587 CREATE3args *, args);
1588
1589 if (dvp == NULL) {
1590 error = ESTALE;
1591 goto out;
1592 }
1593
1594 dbva.va_mask = AT_ALL;
1595 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1596 davap = dbvap;
1597
1598 if (args->where.name == nfs3nametoolong) {
1599 resp->status = NFS3ERR_NAMETOOLONG;
1600 goto out1;
1601 }
1602
1603 if (args->where.name == NULL || *(args->where.name) == '\0') {
1604 resp->status = NFS3ERR_ACCES;
1605 goto out1;
1606 }
1607
1608 if (rdonly(ro, dvp)) {
1609 resp->status = NFS3ERR_ROFS;
1610 goto out1;
1611 }
1612
1613 if (is_system_labeled()) {
1614 bslabel_t *clabel = req->rq_label;
1615
1616 ASSERT(clabel != NULL);
1617 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1618 "got client label from request(1)", struct svc_req *, req);
1619
1620 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1621 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1622 exi)) {
1623 resp->status = NFS3ERR_ACCES;
1624 goto out1;
1625 }
1626 }
1627 }
1628
1629 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1630 name = nfscmd_convname(ca, exi, args->where.name,
1631 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1632
1633 if (name == NULL) {
1634 /* This is really a Solaris EILSEQ */
1635 resp->status = NFS3ERR_INVAL;
1636 goto out1;
1637 }
1638
1639 if (args->how.mode == EXCLUSIVE) {
1640 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1641 va.va_type = VREG;
1642 va.va_mode = (mode_t)0;
1643 /*
1644 * Ensure no time overflows and that types match
1645 */
1646 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1647 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1648 va.va_mtime.tv_nsec = mtime->nseconds;
1649 excl = EXCL;
1650 } else {
1651 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1652 &va);
1653 if (error)
1654 goto out;
1655 va.va_mask |= AT_TYPE;
1656 va.va_type = VREG;
1657 if (args->how.mode == GUARDED)
1658 excl = EXCL;
1659 else {
1660 excl = NONEXCL;
1661
1662 /*
1663 * During creation of file in non-exclusive mode
1664 * if size of file is being set then make sure
1665 * that if the file already exists that no conflicting
1666 * non-blocking mandatory locks exists in the region
1667 * being modified. If there are conflicting locks fail
1668 * the operation with EACCES.
1669 */
1670 if (va.va_mask & AT_SIZE) {
1671 struct vattr tva;
1672
1673 /*
1674 * Does file already exist?
1675 */
1676 error = VOP_LOOKUP(dvp, name, &tvp,
1677 NULL, 0, NULL, cr, NULL, NULL, NULL);
1678
1679 /*
1680 * Check to see if the file has been delegated
1681 * to a v4 client. If so, then begin recall of
1682 * the delegation and return JUKEBOX to allow
1683 * the client to retrasmit its request.
1684 */
1685
1686 trunc = va.va_size == 0;
1687 if (!error &&
1688 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1689 resp->status = NFS3ERR_JUKEBOX;
1690 goto out1;
1691 }
1692
1693 /*
1694 * Check for NBMAND lock conflicts
1695 */
1696 if (!error && nbl_need_check(tvp)) {
1697 u_offset_t offset;
1698 ssize_t len;
1699
1700 nbl_start_crit(tvp, RW_READER);
1701 in_crit = 1;
1702
1703 tva.va_mask = AT_SIZE;
1704 error = VOP_GETATTR(tvp, &tva, 0, cr,
1705 NULL);
1706 /*
1707 * Can't check for conflicts, so return
1708 * error.
1709 */
1710 if (error)
1711 goto out;
1712
1713 offset = tva.va_size < va.va_size ?
1714 tva.va_size : va.va_size;
1715 len = tva.va_size < va.va_size ?
1716 va.va_size - tva.va_size :
1717 tva.va_size - va.va_size;
1718 if (nbl_conflict(tvp, NBL_WRITE,
1719 offset, len, 0, NULL)) {
1720 error = EACCES;
1721 goto out;
1722 }
1723 } else if (tvp) {
1724 VN_RELE(tvp);
1725 tvp = NULL;
1726 }
1727 }
1728 }
1729 if (va.va_mask & AT_SIZE)
1730 reqsize = va.va_size;
1731 }
1732
1733 /*
1734 * Must specify the mode.
1735 */
1736 if (!(va.va_mask & AT_MODE)) {
1737 resp->status = NFS3ERR_INVAL;
1738 goto out1;
1739 }
1740
1741 /*
1742 * If the filesystem is exported with nosuid, then mask off
1743 * the setuid and setgid bits.
1744 */
1745 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1746 va.va_mode &= ~(VSUID | VSGID);
1747
1748 tryagain:
1749 /*
1750 * The file open mode used is VWRITE. If the client needs
1751 * some other semantic, then it should do the access checking
1752 * itself. It would have been nice to have the file open mode
1753 * passed as part of the arguments.
1754 */
1755 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1756 &vp, cr, 0, NULL, NULL);
1757
1758 dava.va_mask = AT_ALL;
1759 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1760
1761 if (error) {
1762 /*
1763 * If we got something other than file already exists
1764 * then just return this error. Otherwise, we got
1765 * EEXIST. If we were doing a GUARDED create, then
1766 * just return this error. Otherwise, we need to
1767 * make sure that this wasn't a duplicate of an
1768 * exclusive create request.
1769 *
1770 * The assumption is made that a non-exclusive create
1771 * request will never return EEXIST.
1772 */
1773 if (error != EEXIST || args->how.mode == GUARDED)
1774 goto out;
1775 /*
1776 * Lookup the file so that we can get a vnode for it.
1777 */
1778 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1779 NULL, cr, NULL, NULL, NULL);
1780 if (error) {
1781 /*
1782 * We couldn't find the file that we thought that
1783 * we just created. So, we'll just try creating
1784 * it again.
1785 */
1786 if (error == ENOENT)
1787 goto tryagain;
1788 goto out;
1789 }
1790
1791 /*
1792 * If the file is delegated to a v4 client, go ahead
1793 * and initiate recall, this create is a hint that a
1794 * conflicting v3 open has occurred.
1795 */
1796
1797 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1798 VN_RELE(vp);
1799 resp->status = NFS3ERR_JUKEBOX;
1800 goto out1;
1801 }
1802
1803 va.va_mask = AT_ALL;
1804 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1805
1806 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1807 /* % with INT32_MAX to prevent overflows */
1808 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1809 vap->va_mtime.tv_sec !=
1810 (mtime->seconds % INT32_MAX) ||
1811 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1812 VN_RELE(vp);
1813 error = EEXIST;
1814 goto out;
1815 }
1816 } else {
1817
1818 if ((args->how.mode == UNCHECKED ||
1819 args->how.mode == GUARDED) &&
1820 args->how.createhow3_u.obj_attributes.size.set_it &&
1821 va.va_size == 0)
1822 trunc = TRUE;
1823 else
1824 trunc = FALSE;
1825
1826 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1827 VN_RELE(vp);
1828 resp->status = NFS3ERR_JUKEBOX;
1829 goto out1;
1830 }
1831
1832 va.va_mask = AT_ALL;
1833 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1834
1835 /*
1836 * We need to check to make sure that the file got
1837 * created to the indicated size. If not, we do a
1838 * setattr to try to change the size, but we don't
1839 * try too hard. This shouldn't a problem as most
1840 * clients will only specifiy a size of zero which
1841 * local file systems handle. However, even if
1842 * the client does specify a non-zero size, it can
1843 * still recover by checking the size of the file
1844 * after it has created it and then issue a setattr
1845 * request of its own to set the size of the file.
1846 */
1847 if (vap != NULL &&
1848 (args->how.mode == UNCHECKED ||
1849 args->how.mode == GUARDED) &&
1850 args->how.createhow3_u.obj_attributes.size.set_it &&
1851 vap->va_size != reqsize) {
1852 va.va_mask = AT_SIZE;
1853 va.va_size = reqsize;
1854 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1855 va.va_mask = AT_ALL;
1856 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1857 }
1858 }
1859
1860 if (name != args->where.name)
1861 kmem_free(name, MAXPATHLEN + 1);
1862
1863 error = makefh3(&resp->resok.obj.handle, vp, exi);
1864 if (error)
1865 resp->resok.obj.handle_follows = FALSE;
1866 else
1867 resp->resok.obj.handle_follows = TRUE;
1868
1869 /*
1870 * Force modified data and metadata out to stable storage.
1871 */
1872 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1873 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1874
1875 VN_RELE(vp);
1876 if (tvp != NULL) {
1877 if (in_crit)
1878 nbl_end_crit(tvp);
1879 VN_RELE(tvp);
1880 }
1881
1882 resp->status = NFS3_OK;
1883 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1884 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1885
1886 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1887 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1888 CREATE3res *, resp);
1889
1890 VN_RELE(dvp);
1891 return;
1892
1893 out:
1894 if (curthread->t_flag & T_WOULDBLOCK) {
1895 curthread->t_flag &= ~T_WOULDBLOCK;
1896 resp->status = NFS3ERR_JUKEBOX;
1897 } else
1898 resp->status = puterrno3(error);
1899 out1:
1900 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1901 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1902 CREATE3res *, resp);
1903
1904 if (name != NULL && name != args->where.name)
1905 kmem_free(name, MAXPATHLEN + 1);
1906
1907 if (tvp != NULL) {
1908 if (in_crit)
1909 nbl_end_crit(tvp);
1910 VN_RELE(tvp);
1911 }
1912 if (dvp != NULL)
1913 VN_RELE(dvp);
1914 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1915 }
1916
1917 void *
1918 rfs3_create_getfh(CREATE3args *args)
1919 {
1920
1921 return (&args->where.dir);
1922 }
1923
1924 void
1925 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1926 struct svc_req *req, cred_t *cr, bool_t ro)
1927 {
1928 int error;
1929 vnode_t *vp = NULL;
1930 vnode_t *dvp;
1931 struct vattr *vap;
1932 struct vattr va;
1933 struct vattr *dbvap;
1934 struct vattr dbva;
1935 struct vattr *davap;
1936 struct vattr dava;
1937 struct sockaddr *ca;
1938 char *name = NULL;
1939
1940 dbvap = NULL;
1941 davap = NULL;
1942
1943 dvp = nfs3_fhtovp(&args->where.dir, exi);
1944
1945 DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1946 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1947 MKDIR3args *, args);
1948
1949 if (dvp == NULL) {
1950 error = ESTALE;
1951 goto out;
1952 }
1953
1954 dbva.va_mask = AT_ALL;
1955 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1956 davap = dbvap;
1957
1958 if (args->where.name == nfs3nametoolong) {
1959 resp->status = NFS3ERR_NAMETOOLONG;
1960 goto out1;
1961 }
1962
1963 if (args->where.name == NULL || *(args->where.name) == '\0') {
1964 resp->status = NFS3ERR_ACCES;
1965 goto out1;
1966 }
1967
1968 if (rdonly(ro, dvp)) {
1969 resp->status = NFS3ERR_ROFS;
1970 goto out1;
1971 }
1972
1973 if (is_system_labeled()) {
1974 bslabel_t *clabel = req->rq_label;
1975
1976 ASSERT(clabel != NULL);
1977 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1978 "got client label from request(1)", struct svc_req *, req);
1979
1980 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1981 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1982 exi)) {
1983 resp->status = NFS3ERR_ACCES;
1984 goto out1;
1985 }
1986 }
1987 }
1988
1989 error = sattr3_to_vattr(&args->attributes, &va);
1990 if (error)
1991 goto out;
1992
1993 if (!(va.va_mask & AT_MODE)) {
1994 resp->status = NFS3ERR_INVAL;
1995 goto out1;
1996 }
1997
1998 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1999 name = nfscmd_convname(ca, exi, args->where.name,
2000 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2001
2002 if (name == NULL) {
2003 resp->status = NFS3ERR_INVAL;
2004 goto out1;
2005 }
2006
2007 va.va_mask |= AT_TYPE;
2008 va.va_type = VDIR;
2009
2010 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2011
2012 if (name != args->where.name)
2013 kmem_free(name, MAXPATHLEN + 1);
2014
2015 dava.va_mask = AT_ALL;
2016 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2017
2018 /*
2019 * Force modified data and metadata out to stable storage.
2020 */
2021 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2022
2023 if (error)
2024 goto out;
2025
2026 error = makefh3(&resp->resok.obj.handle, vp, exi);
2027 if (error)
2028 resp->resok.obj.handle_follows = FALSE;
2029 else
2030 resp->resok.obj.handle_follows = TRUE;
2031
2032 va.va_mask = AT_ALL;
2033 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2034
2035 /*
2036 * Force modified data and metadata out to stable storage.
2037 */
2038 (void) VOP_FSYNC(vp, 0, cr, NULL);
2039
2040 VN_RELE(vp);
2041
2042 resp->status = NFS3_OK;
2043 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2044 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2045
2046 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2047 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2048 MKDIR3res *, resp);
2049 VN_RELE(dvp);
2050
2051 return;
2052
2053 out:
2054 if (curthread->t_flag & T_WOULDBLOCK) {
2055 curthread->t_flag &= ~T_WOULDBLOCK;
2056 resp->status = NFS3ERR_JUKEBOX;
2057 } else
2058 resp->status = puterrno3(error);
2059 out1:
2060 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2061 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2062 MKDIR3res *, resp);
2063 if (dvp != NULL)
2064 VN_RELE(dvp);
2065 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2066 }
2067
2068 void *
2069 rfs3_mkdir_getfh(MKDIR3args *args)
2070 {
2071
2072 return (&args->where.dir);
2073 }
2074
2075 void
2076 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2077 struct svc_req *req, cred_t *cr, bool_t ro)
2078 {
2079 int error;
2080 vnode_t *vp;
2081 vnode_t *dvp;
2082 struct vattr *vap;
2083 struct vattr va;
2084 struct vattr *dbvap;
2085 struct vattr dbva;
2086 struct vattr *davap;
2087 struct vattr dava;
2088 struct sockaddr *ca;
2089 char *name = NULL;
2090 char *symdata = NULL;
2091
2092 dbvap = NULL;
2093 davap = NULL;
2094
2095 dvp = nfs3_fhtovp(&args->where.dir, exi);
2096
2097 DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2098 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2099 SYMLINK3args *, args);
2100
2101 if (dvp == NULL) {
2102 error = ESTALE;
2103 goto err;
2104 }
2105
2106 dbva.va_mask = AT_ALL;
2107 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2108 davap = dbvap;
2109
2110 if (args->where.name == nfs3nametoolong) {
2111 resp->status = NFS3ERR_NAMETOOLONG;
2112 goto err1;
2113 }
2114
2115 if (args->where.name == NULL || *(args->where.name) == '\0') {
2116 resp->status = NFS3ERR_ACCES;
2117 goto err1;
2118 }
2119
2120 if (rdonly(ro, dvp)) {
2121 resp->status = NFS3ERR_ROFS;
2122 goto err1;
2123 }
2124
2125 if (is_system_labeled()) {
2126 bslabel_t *clabel = req->rq_label;
2127
2128 ASSERT(clabel != NULL);
2129 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2130 "got client label from request(1)", struct svc_req *, req);
2131
2132 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2133 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2134 exi)) {
2135 resp->status = NFS3ERR_ACCES;
2136 goto err1;
2137 }
2138 }
2139 }
2140
2141 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2142 if (error)
2143 goto err;
2144
2145 if (!(va.va_mask & AT_MODE)) {
2146 resp->status = NFS3ERR_INVAL;
2147 goto err1;
2148 }
2149
2150 if (args->symlink.symlink_data == nfs3nametoolong) {
2151 resp->status = NFS3ERR_NAMETOOLONG;
2152 goto err1;
2153 }
2154
2155 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2156 name = nfscmd_convname(ca, exi, args->where.name,
2157 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2158
2159 if (name == NULL) {
2160 /* This is really a Solaris EILSEQ */
2161 resp->status = NFS3ERR_INVAL;
2162 goto err1;
2163 }
2164
2165 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2166 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2167 if (symdata == NULL) {
2168 /* This is really a Solaris EILSEQ */
2169 resp->status = NFS3ERR_INVAL;
2170 goto err1;
2171 }
2172
2173
2174 va.va_mask |= AT_TYPE;
2175 va.va_type = VLNK;
2176
2177 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2178
2179 dava.va_mask = AT_ALL;
2180 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2181
2182 if (error)
2183 goto err;
2184
2185 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2186 NULL, NULL, NULL);
2187
2188 /*
2189 * Force modified data and metadata out to stable storage.
2190 */
2191 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2192
2193
2194 resp->status = NFS3_OK;
2195 if (error) {
2196 resp->resok.obj.handle_follows = FALSE;
2197 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2198 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2199 goto out;
2200 }
2201
2202 error = makefh3(&resp->resok.obj.handle, vp, exi);
2203 if (error)
2204 resp->resok.obj.handle_follows = FALSE;
2205 else
2206 resp->resok.obj.handle_follows = TRUE;
2207
2208 va.va_mask = AT_ALL;
2209 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2210
2211 /*
2212 * Force modified data and metadata out to stable storage.
2213 */
2214 (void) VOP_FSYNC(vp, 0, cr, NULL);
2215
2216 VN_RELE(vp);
2217
2218 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2219 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2220 goto out;
2221
2222 err:
2223 if (curthread->t_flag & T_WOULDBLOCK) {
2224 curthread->t_flag &= ~T_WOULDBLOCK;
2225 resp->status = NFS3ERR_JUKEBOX;
2226 } else
2227 resp->status = puterrno3(error);
2228 err1:
2229 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2230 out:
2231 if (name != NULL && name != args->where.name)
2232 kmem_free(name, MAXPATHLEN + 1);
2233 if (symdata != NULL && symdata != args->symlink.symlink_data)
2234 kmem_free(symdata, MAXPATHLEN + 1);
2235
2236 DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2237 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2238 SYMLINK3res *, resp);
2239
2240 if (dvp != NULL)
2241 VN_RELE(dvp);
2242 }
2243
2244 void *
2245 rfs3_symlink_getfh(SYMLINK3args *args)
2246 {
2247
2248 return (&args->where.dir);
2249 }
2250
2251 void
2252 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2253 struct svc_req *req, cred_t *cr, bool_t ro)
2254 {
2255 int error;
2256 vnode_t *vp;
2257 vnode_t *realvp;
2258 vnode_t *dvp;
2259 struct vattr *vap;
2260 struct vattr va;
2261 struct vattr *dbvap;
2262 struct vattr dbva;
2263 struct vattr *davap;
2264 struct vattr dava;
2265 int mode;
2266 enum vcexcl excl;
2267 struct sockaddr *ca;
2268 char *name = NULL;
2269
2270 dbvap = NULL;
2271 davap = NULL;
2272
2273 dvp = nfs3_fhtovp(&args->where.dir, exi);
2274
2275 DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2276 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2277 MKNOD3args *, args);
2278
2279 if (dvp == NULL) {
2280 error = ESTALE;
2281 goto out;
2282 }
2283
2284 dbva.va_mask = AT_ALL;
2285 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2286 davap = dbvap;
2287
2288 if (args->where.name == nfs3nametoolong) {
2289 resp->status = NFS3ERR_NAMETOOLONG;
2290 goto out1;
2291 }
2292
2293 if (args->where.name == NULL || *(args->where.name) == '\0') {
2294 resp->status = NFS3ERR_ACCES;
2295 goto out1;
2296 }
2297
2298 if (rdonly(ro, dvp)) {
2299 resp->status = NFS3ERR_ROFS;
2300 goto out1;
2301 }
2302
2303 if (is_system_labeled()) {
2304 bslabel_t *clabel = req->rq_label;
2305
2306 ASSERT(clabel != NULL);
2307 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2308 "got client label from request(1)", struct svc_req *, req);
2309
2310 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2311 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2312 exi)) {
2313 resp->status = NFS3ERR_ACCES;
2314 goto out1;
2315 }
2316 }
2317 }
2318
2319 switch (args->what.type) {
2320 case NF3CHR:
2321 case NF3BLK:
2322 error = sattr3_to_vattr(
2323 &args->what.mknoddata3_u.device.dev_attributes, &va);
2324 if (error)
2325 goto out;
2326 if (secpolicy_sys_devices(cr) != 0) {
2327 resp->status = NFS3ERR_PERM;
2328 goto out1;
2329 }
2330 if (args->what.type == NF3CHR)
2331 va.va_type = VCHR;
2332 else
2333 va.va_type = VBLK;
2334 va.va_rdev = makedevice(
2335 args->what.mknoddata3_u.device.spec.specdata1,
2336 args->what.mknoddata3_u.device.spec.specdata2);
2337 va.va_mask |= AT_TYPE | AT_RDEV;
2338 break;
2339 case NF3SOCK:
2340 error = sattr3_to_vattr(
2341 &args->what.mknoddata3_u.pipe_attributes, &va);
2342 if (error)
2343 goto out;
2344 va.va_type = VSOCK;
2345 va.va_mask |= AT_TYPE;
2346 break;
2347 case NF3FIFO:
2348 error = sattr3_to_vattr(
2349 &args->what.mknoddata3_u.pipe_attributes, &va);
2350 if (error)
2351 goto out;
2352 va.va_type = VFIFO;
2353 va.va_mask |= AT_TYPE;
2354 break;
2355 default:
2356 resp->status = NFS3ERR_BADTYPE;
2357 goto out1;
2358 }
2359
2360 /*
2361 * Must specify the mode.
2362 */
2363 if (!(va.va_mask & AT_MODE)) {
2364 resp->status = NFS3ERR_INVAL;
2365 goto out1;
2366 }
2367
2368 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2369 name = nfscmd_convname(ca, exi, args->where.name,
2370 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2371
2372 if (name == NULL) {
2373 resp->status = NFS3ERR_INVAL;
2374 goto out1;
2375 }
2376
2377 excl = EXCL;
2378
2379 mode = 0;
2380
2381 error = VOP_CREATE(dvp, name, &va, excl, mode,
2382 &vp, cr, 0, NULL, NULL);
2383
2384 if (name != args->where.name)
2385 kmem_free(name, MAXPATHLEN + 1);
2386
2387 dava.va_mask = AT_ALL;
2388 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2389
2390 /*
2391 * Force modified data and metadata out to stable storage.
2392 */
2393 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2394
2395 if (error)
2396 goto out;
2397
2398 resp->status = NFS3_OK;
2399
2400 error = makefh3(&resp->resok.obj.handle, vp, exi);
2401 if (error)
2402 resp->resok.obj.handle_follows = FALSE;
2403 else
2404 resp->resok.obj.handle_follows = TRUE;
2405
2406 va.va_mask = AT_ALL;
2407 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2408
2409 /*
2410 * Force modified metadata out to stable storage.
2411 *
2412 * if a underlying vp exists, pass it to VOP_FSYNC
2413 */
2414 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2415 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2416 else
2417 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2418
2419 VN_RELE(vp);
2420
2421 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2422 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2423 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2424 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2425 MKNOD3res *, resp);
2426 VN_RELE(dvp);
2427 return;
2428
2429 out:
2430 if (curthread->t_flag & T_WOULDBLOCK) {
2431 curthread->t_flag &= ~T_WOULDBLOCK;
2432 resp->status = NFS3ERR_JUKEBOX;
2433 } else
2434 resp->status = puterrno3(error);
2435 out1:
2436 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2437 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2438 MKNOD3res *, resp);
2439 if (dvp != NULL)
2440 VN_RELE(dvp);
2441 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2442 }
2443
2444 void *
2445 rfs3_mknod_getfh(MKNOD3args *args)
2446 {
2447
2448 return (&args->where.dir);
2449 }
2450
2451 void
2452 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2453 struct svc_req *req, cred_t *cr, bool_t ro)
2454 {
2455 int error = 0;
2456 vnode_t *vp;
2457 struct vattr *bvap;
2458 struct vattr bva;
2459 struct vattr *avap;
2460 struct vattr ava;
2461 vnode_t *targvp = NULL;
2462 struct sockaddr *ca;
2463 char *name = NULL;
2464
2465 bvap = NULL;
2466 avap = NULL;
2467
2468 vp = nfs3_fhtovp(&args->object.dir, exi);
2469
2470 DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2471 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2472 REMOVE3args *, args);
2473
2474 if (vp == NULL) {
2475 error = ESTALE;
2476 goto err;
2477 }
2478
2479 bva.va_mask = AT_ALL;
2480 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2481 avap = bvap;
2482
2483 if (vp->v_type != VDIR) {
2484 resp->status = NFS3ERR_NOTDIR;
2485 goto err1;
2486 }
2487
2488 if (args->object.name == nfs3nametoolong) {
2489 resp->status = NFS3ERR_NAMETOOLONG;
2490 goto err1;
2491 }
2492
2493 if (args->object.name == NULL || *(args->object.name) == '\0') {
2494 resp->status = NFS3ERR_ACCES;
2495 goto err1;
2496 }
2497
2498 if (rdonly(ro, vp)) {
2499 resp->status = NFS3ERR_ROFS;
2500 goto err1;
2501 }
2502
2503 if (is_system_labeled()) {
2504 bslabel_t *clabel = req->rq_label;
2505
2506 ASSERT(clabel != NULL);
2507 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2508 "got client label from request(1)", struct svc_req *, req);
2509
2510 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2511 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2512 exi)) {
2513 resp->status = NFS3ERR_ACCES;
2514 goto err1;
2515 }
2516 }
2517 }
2518
2519 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2520 name = nfscmd_convname(ca, exi, args->object.name,
2521 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2522
2523 if (name == NULL) {
2524 resp->status = NFS3ERR_INVAL;
2525 goto err1;
2526 }
2527
2528 /*
2529 * Check for a conflict with a non-blocking mandatory share
2530 * reservation and V4 delegations
2531 */
2532 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2533 NULL, cr, NULL, NULL, NULL);
2534 if (error != 0)
2535 goto err;
2536
2537 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2538 resp->status = NFS3ERR_JUKEBOX;
2539 goto err1;
2540 }
2541
2542 if (!nbl_need_check(targvp)) {
2543 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2544 } else {
2545 nbl_start_crit(targvp, RW_READER);
2546 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2547 error = EACCES;
2548 } else {
2549 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2550 }
2551 nbl_end_crit(targvp);
2552 }
2553 VN_RELE(targvp);
2554 targvp = NULL;
2555
2556 ava.va_mask = AT_ALL;
2557 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2558
2559 /*
2560 * Force modified data and metadata out to stable storage.
2561 */
2562 (void) VOP_FSYNC(vp, 0, cr, NULL);
2563
2564 if (error)
2565 goto err;
2566
2567 resp->status = NFS3_OK;
2568 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2569 goto out;
2570
2571 err:
2572 if (curthread->t_flag & T_WOULDBLOCK) {
2573 curthread->t_flag &= ~T_WOULDBLOCK;
2574 resp->status = NFS3ERR_JUKEBOX;
2575 } else
2576 resp->status = puterrno3(error);
2577 err1:
2578 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2579 out:
2580 DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2581 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2582 REMOVE3res *, resp);
2583
2584 if (name != NULL && name != args->object.name)
2585 kmem_free(name, MAXPATHLEN + 1);
2586
2587 if (vp != NULL)
2588 VN_RELE(vp);
2589 }
2590
2591 void *
2592 rfs3_remove_getfh(REMOVE3args *args)
2593 {
2594
2595 return (&args->object.dir);
2596 }
2597
2598 void
2599 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2600 struct svc_req *req, cred_t *cr, bool_t ro)
2601 {
2602 int error;
2603 vnode_t *vp;
2604 struct vattr *bvap;
2605 struct vattr bva;
2606 struct vattr *avap;
2607 struct vattr ava;
2608 struct sockaddr *ca;
2609 char *name = NULL;
2610
2611 bvap = NULL;
2612 avap = NULL;
2613
2614 vp = nfs3_fhtovp(&args->object.dir, exi);
2615
2616 DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2617 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2618 RMDIR3args *, args);
2619
2620 if (vp == NULL) {
2621 error = ESTALE;
2622 goto err;
2623 }
2624
2625 bva.va_mask = AT_ALL;
2626 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2627 avap = bvap;
2628
2629 if (vp->v_type != VDIR) {
2630 resp->status = NFS3ERR_NOTDIR;
2631 goto err1;
2632 }
2633
2634 if (args->object.name == nfs3nametoolong) {
2635 resp->status = NFS3ERR_NAMETOOLONG;
2636 goto err1;
2637 }
2638
2639 if (args->object.name == NULL || *(args->object.name) == '\0') {
2640 resp->status = NFS3ERR_ACCES;
2641 goto err1;
2642 }
2643
2644 if (rdonly(ro, vp)) {
2645 resp->status = NFS3ERR_ROFS;
2646 goto err1;
2647 }
2648
2649 if (is_system_labeled()) {
2650 bslabel_t *clabel = req->rq_label;
2651
2652 ASSERT(clabel != NULL);
2653 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2654 "got client label from request(1)", struct svc_req *, req);
2655
2656 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2657 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2658 exi)) {
2659 resp->status = NFS3ERR_ACCES;
2660 goto err1;
2661 }
2662 }
2663 }
2664
2665 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2666 name = nfscmd_convname(ca, exi, args->object.name,
2667 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2668
2669 if (name == NULL) {
2670 resp->status = NFS3ERR_INVAL;
2671 goto err1;
2672 }
2673
2674 error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2675
2676 if (name != args->object.name)
2677 kmem_free(name, MAXPATHLEN + 1);
2678
2679 ava.va_mask = AT_ALL;
2680 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2681
2682 /*
2683 * Force modified data and metadata out to stable storage.
2684 */
2685 (void) VOP_FSYNC(vp, 0, cr, NULL);
2686
2687 if (error) {
2688 /*
2689 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2690 * if the directory is not empty. A System V NFS server
2691 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2692 * over the wire.
2693 */
2694 if (error == EEXIST)
2695 error = ENOTEMPTY;
2696 goto err;
2697 }
2698
2699 resp->status = NFS3_OK;
2700 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2701 goto out;
2702
2703 err:
2704 if (curthread->t_flag & T_WOULDBLOCK) {
2705 curthread->t_flag &= ~T_WOULDBLOCK;
2706 resp->status = NFS3ERR_JUKEBOX;
2707 } else
2708 resp->status = puterrno3(error);
2709 err1:
2710 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2711 out:
2712 DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2713 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2714 RMDIR3res *, resp);
2715 if (vp != NULL)
2716 VN_RELE(vp);
2717
2718 }
2719
2720 void *
2721 rfs3_rmdir_getfh(RMDIR3args *args)
2722 {
2723
2724 return (&args->object.dir);
2725 }
2726
2727 void
2728 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2729 struct svc_req *req, cred_t *cr, bool_t ro)
2730 {
2731 int error = 0;
2732 vnode_t *fvp;
2733 vnode_t *tvp;
2734 vnode_t *targvp;
2735 struct vattr *fbvap;
2736 struct vattr fbva;
2737 struct vattr *favap;
2738 struct vattr fava;
2739 struct vattr *tbvap;
2740 struct vattr tbva;
2741 struct vattr *tavap;
2742 struct vattr tava;
2743 nfs_fh3 *fh3;
2744 struct exportinfo *to_exi;
2745 vnode_t *srcvp = NULL;
2746 bslabel_t *clabel;
2747 struct sockaddr *ca;
2748 char *name = NULL;
2749 char *toname = NULL;
2750
2751 fbvap = NULL;
2752 favap = NULL;
2753 tbvap = NULL;
2754 tavap = NULL;
2755 tvp = NULL;
2756
2757 fvp = nfs3_fhtovp(&args->from.dir, exi);
2758
2759 DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2760 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2761 RENAME3args *, args);
2762
2763 if (fvp == NULL) {
2764 error = ESTALE;
2765 goto err;
2766 }
2767
2768 if (is_system_labeled()) {
2769 clabel = req->rq_label;
2770 ASSERT(clabel != NULL);
2771 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2772 "got client label from request(1)", struct svc_req *, req);
2773
2774 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2775 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2776 exi)) {
2777 resp->status = NFS3ERR_ACCES;
2778 goto err1;
2779 }
2780 }
2781 }
2782
2783 fbva.va_mask = AT_ALL;
2784 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2785 favap = fbvap;
2786
2787 fh3 = &args->to.dir;
2788 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2789 if (to_exi == NULL) {
2790 resp->status = NFS3ERR_ACCES;
2791 goto err1;
2792 }
2793 exi_rele(to_exi);
2794
2795 if (to_exi != exi) {
2796 resp->status = NFS3ERR_XDEV;
2797 goto err1;
2798 }
2799
2800 tvp = nfs3_fhtovp(&args->to.dir, exi);
2801 if (tvp == NULL) {
2802 error = ESTALE;
2803 goto err;
2804 }
2805
2806 tbva.va_mask = AT_ALL;
2807 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2808 tavap = tbvap;
2809
2810 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2811 resp->status = NFS3ERR_NOTDIR;
2812 goto err1;
2813 }
2814
2815 if (args->from.name == nfs3nametoolong ||
2816 args->to.name == nfs3nametoolong) {
2817 resp->status = NFS3ERR_NAMETOOLONG;
2818 goto err1;
2819 }
2820 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2821 args->to.name == NULL || *(args->to.name) == '\0') {
2822 resp->status = NFS3ERR_ACCES;
2823 goto err1;
2824 }
2825
2826 if (rdonly(ro, tvp)) {
2827 resp->status = NFS3ERR_ROFS;
2828 goto err1;
2829 }
2830
2831 if (is_system_labeled()) {
2832 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2833 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2834 exi)) {
2835 resp->status = NFS3ERR_ACCES;
2836 goto err1;
2837 }
2838 }
2839 }
2840
2841 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2842 name = nfscmd_convname(ca, exi, args->from.name,
2843 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2844
2845 if (name == NULL) {
2846 resp->status = NFS3ERR_INVAL;
2847 goto err1;
2848 }
2849
2850 toname = nfscmd_convname(ca, exi, args->to.name,
2851 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2852
2853 if (toname == NULL) {
2854 resp->status = NFS3ERR_INVAL;
2855 goto err1;
2856 }
2857
2858 /*
2859 * Check for a conflict with a non-blocking mandatory share
2860 * reservation or V4 delegations.
2861 */
2862 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2863 NULL, cr, NULL, NULL, NULL);
2864 if (error != 0)
2865 goto err;
2866
2867 /*
2868 * If we rename a delegated file we should recall the
2869 * delegation, since future opens should fail or would
2870 * refer to a new file.
2871 */
2872 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2873 resp->status = NFS3ERR_JUKEBOX;
2874 goto err1;
2875 }
2876
2877 /*
2878 * Check for renaming over a delegated file. Check nfs4_deleg_policy
2879 * first to avoid VOP_LOOKUP if possible.
2880 */
2881 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2882 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2883 NULL, NULL, NULL) == 0) {
2884
2885 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2886 VN_RELE(targvp);
2887 resp->status = NFS3ERR_JUKEBOX;
2888 goto err1;
2889 }
2890 VN_RELE(targvp);
2891 }
2892
2893 if (!nbl_need_check(srcvp)) {
2894 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2895 } else {
2896 nbl_start_crit(srcvp, RW_READER);
2897 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2898 error = EACCES;
2899 else
2900 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2901 nbl_end_crit(srcvp);
2902 }
2903 if (error == 0)
2904 vn_renamepath(tvp, srcvp, args->to.name,
2905 strlen(args->to.name));
2906 VN_RELE(srcvp);
2907 srcvp = NULL;
2908
2909 fava.va_mask = AT_ALL;
2910 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2911 tava.va_mask = AT_ALL;
2912 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2913
2914 /*
2915 * Force modified data and metadata out to stable storage.
2916 */
2917 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2918 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2919
2920 if (error)
2921 goto err;
2922
2923 resp->status = NFS3_OK;
2924 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2925 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2926 goto out;
2927
2928 err:
2929 if (curthread->t_flag & T_WOULDBLOCK) {
2930 curthread->t_flag &= ~T_WOULDBLOCK;
2931 resp->status = NFS3ERR_JUKEBOX;
2932 } else {
2933 resp->status = puterrno3(error);
2934 }
2935 err1:
2936 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2937 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2938
2939 out:
2940 if (name != NULL && name != args->from.name)
2941 kmem_free(name, MAXPATHLEN + 1);
2942 if (toname != NULL && toname != args->to.name)
2943 kmem_free(toname, MAXPATHLEN + 1);
2944
2945 DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2946 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2947 RENAME3res *, resp);
2948 if (fvp != NULL)
2949 VN_RELE(fvp);
2950 if (tvp != NULL)
2951 VN_RELE(tvp);
2952 }
2953
2954 void *
2955 rfs3_rename_getfh(RENAME3args *args)
2956 {
2957
2958 return (&args->from.dir);
2959 }
2960
2961 void
2962 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2963 struct svc_req *req, cred_t *cr, bool_t ro)
2964 {
2965 int error;
2966 vnode_t *vp;
2967 vnode_t *dvp;
2968 struct vattr *vap;
2969 struct vattr va;
2970 struct vattr *bvap;
2971 struct vattr bva;
2972 struct vattr *avap;
2973 struct vattr ava;
2974 nfs_fh3 *fh3;
2975 struct exportinfo *to_exi;
2976 bslabel_t *clabel;
2977 struct sockaddr *ca;
2978 char *name = NULL;
2979
2980 vap = NULL;
2981 bvap = NULL;
2982 avap = NULL;
2983 dvp = NULL;
2984
2985 vp = nfs3_fhtovp(&args->file, exi);
2986
2987 DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2988 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2989 LINK3args *, args);
2990
2991 if (vp == NULL) {
2992 error = ESTALE;
2993 goto out;
2994 }
2995
2996 va.va_mask = AT_ALL;
2997 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2998
2999 fh3 = &args->link.dir;
3000 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3001 if (to_exi == NULL) {
3002 resp->status = NFS3ERR_ACCES;
3003 goto out1;
3004 }
3005 exi_rele(to_exi);
3006
3007 if (to_exi != exi) {
3008 resp->status = NFS3ERR_XDEV;
3009 goto out1;
3010 }
3011
3012 if (is_system_labeled()) {
3013 clabel = req->rq_label;
3014
3015 ASSERT(clabel != NULL);
3016 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3017 "got client label from request(1)", struct svc_req *, req);
3018
3019 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3020 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3021 exi)) {
3022 resp->status = NFS3ERR_ACCES;
3023 goto out1;
3024 }
3025 }
3026 }
3027
3028 dvp = nfs3_fhtovp(&args->link.dir, exi);
3029 if (dvp == NULL) {
3030 error = ESTALE;
3031 goto out;
3032 }
3033
3034 bva.va_mask = AT_ALL;
3035 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3036
3037 if (dvp->v_type != VDIR) {
3038 resp->status = NFS3ERR_NOTDIR;
3039 goto out1;
3040 }
3041
3042 if (args->link.name == nfs3nametoolong) {
3043 resp->status = NFS3ERR_NAMETOOLONG;
3044 goto out1;
3045 }
3046
3047 if (args->link.name == NULL || *(args->link.name) == '\0') {
3048 resp->status = NFS3ERR_ACCES;
3049 goto out1;
3050 }
3051
3052 if (rdonly(ro, dvp)) {
3053 resp->status = NFS3ERR_ROFS;
3054 goto out1;
3055 }
3056
3057 if (is_system_labeled()) {
3058 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3059 "got client label from request(1)", struct svc_req *, req);
3060
3061 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3062 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3063 exi)) {
3064 resp->status = NFS3ERR_ACCES;
3065 goto out1;
3066 }
3067 }
3068 }
3069
3070 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3071 name = nfscmd_convname(ca, exi, args->link.name,
3072 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3073
3074 if (name == NULL) {
3075 resp->status = NFS3ERR_SERVERFAULT;
3076 goto out1;
3077 }
3078
3079 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3080
3081 va.va_mask = AT_ALL;
3082 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3083 ava.va_mask = AT_ALL;
3084 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3085
3086 /*
3087 * Force modified data and metadata out to stable storage.
3088 */
3089 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3090 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3091
3092 if (error)
3093 goto out;
3094
3095 VN_RELE(dvp);
3096
3097 resp->status = NFS3_OK;
3098 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3099 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3100
3101 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3102 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3103 LINK3res *, resp);
3104
3105 VN_RELE(vp);
3106
3107 return;
3108
3109 out:
3110 if (curthread->t_flag & T_WOULDBLOCK) {
3111 curthread->t_flag &= ~T_WOULDBLOCK;
3112 resp->status = NFS3ERR_JUKEBOX;
3113 } else
3114 resp->status = puterrno3(error);
3115 out1:
3116 if (name != NULL && name != args->link.name)
3117 kmem_free(name, MAXPATHLEN + 1);
3118
3119 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3120 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3121 LINK3res *, resp);
3122
3123 if (vp != NULL)
3124 VN_RELE(vp);
3125 if (dvp != NULL)
3126 VN_RELE(dvp);
3127 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3128 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3129 }
3130
3131 void *
3132 rfs3_link_getfh(LINK3args *args)
3133 {
3134
3135 return (&args->file);
3136 }
3137
3138 /*
3139 * This macro defines the size of a response which contains attribute
3140 * information and one directory entry (whose length is specified by
3141 * the macro parameter). If the incoming request is larger than this,
3142 * then we are guaranteed to be able to return at one directory entry
3143 * if one exists. Therefore, we do not need to check for
3144 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
3145 * is not, then we need to check to make sure that this error does not
3146 * need to be returned.
3147 *
3148 * NFS3_READDIR_MIN_COUNT is comprised of following :
3149 *
3150 * status - 1 * BYTES_PER_XDR_UNIT
3151 * attr. flag - 1 * BYTES_PER_XDR_UNIT
3152 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3153 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3154 * boolean - 1 * BYTES_PER_XDR_UNIT
3155 * file id - 2 * BYTES_PER_XDR_UNIT
3156 * directory name length - 1 * BYTES_PER_XDR_UNIT
3157 * cookie - 2 * BYTES_PER_XDR_UNIT
3158 * end of list - 1 * BYTES_PER_XDR_UNIT
3159 * end of file - 1 * BYTES_PER_XDR_UNIT
3160 * Name length of directory to the nearest byte
3161 */
3162
3163 #define NFS3_READDIR_MIN_COUNT(length) \
3164 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3165 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3166
3167 /* ARGSUSED */
3168 void
3169 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3170 struct svc_req *req, cred_t *cr, bool_t ro)
3171 {
3172 int error;
3173 vnode_t *vp;
3174 struct vattr *vap;
3175 struct vattr va;
3176 struct iovec iov;
3177 struct uio uio;
3178 char *data;
3179 int iseof;
3180 int bufsize;
3181 int namlen;
3182 uint_t count;
3183 struct sockaddr *ca;
3184
3185 vap = NULL;
3186
3187 vp = nfs3_fhtovp(&args->dir, exi);
3188
3189 DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3190 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3191 READDIR3args *, args);
3192
3193 if (vp == NULL) {
3194 error = ESTALE;
3195 goto out;
3196 }
3197
3198 if (is_system_labeled()) {
3199 bslabel_t *clabel = req->rq_label;
3200
3201 ASSERT(clabel != NULL);
3202 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3203 "got client label from request(1)", struct svc_req *, req);
3204
3205 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3206 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3207 exi)) {
3208 resp->status = NFS3ERR_ACCES;
3209 goto out1;
3210 }
3211 }
3212 }
3213
3214 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3215
3216 va.va_mask = AT_ALL;
3217 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3218
3219 if (vp->v_type != VDIR) {
3220 resp->status = NFS3ERR_NOTDIR;
3221 goto out1;
3222 }
3223
3224 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3225 if (error)
3226 goto out;
3227
3228 /*
3229 * Now don't allow arbitrary count to alloc;
3230 * allow the maximum not to exceed rfs3_tsize()
3231 */
3232 if (args->count > rfs3_tsize(req))
3233 args->count = rfs3_tsize(req);
3234
3235 /*
3236 * Make sure that there is room to read at least one entry
3237 * if any are available.
3238 */
3239 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3240 count = DIRENT64_RECLEN(MAXNAMELEN);
3241 else
3242 count = args->count;
3243
3244 data = kmem_alloc(count, KM_SLEEP);
3245
3246 iov.iov_base = data;
3247 iov.iov_len = count;
3248 uio.uio_iov = &iov;
3249 uio.uio_iovcnt = 1;
3250 uio.uio_segflg = UIO_SYSSPACE;
3251 uio.uio_extflg = UIO_COPY_CACHED;
3252 uio.uio_loffset = (offset_t)args->cookie;
3253 uio.uio_resid = count;
3254
3255 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3256
3257 va.va_mask = AT_ALL;
3258 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3259
3260 if (error) {
3261 kmem_free(data, count);
3262 goto out;
3263 }
3264
3265 /*
3266 * If the count was not large enough to be able to guarantee
3267 * to be able to return at least one entry, then need to
3268 * check to see if NFS3ERR_TOOSMALL should be returned.
3269 */
3270 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3271 /*
3272 * bufsize is used to keep track of the size of the response.
3273 * It is primed with:
3274 * 1 for the status +
3275 * 1 for the dir_attributes.attributes boolean +
3276 * 2 for the cookie verifier
3277 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3278 * to bytes. If there are directory attributes to be
3279 * returned, then:
3280 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3281 * time BYTES_PER_XDR_UNIT is added to account for them.
3282 */
3283 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3284 if (vap != NULL)
3285 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3286 /*
3287 * An entry is composed of:
3288 * 1 for the true/false list indicator +
3289 * 2 for the fileid +
3290 * 1 for the length of the name +
3291 * 2 for the cookie +
3292 * all times BYTES_PER_XDR_UNIT to convert from
3293 * XDR units to bytes, plus the length of the name
3294 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3295 */
3296 if (count != uio.uio_resid) {
3297 namlen = strlen(((struct dirent64 *)data)->d_name);
3298 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3299 roundup(namlen, BYTES_PER_XDR_UNIT);
3300 }
3301 /*
3302 * We need to check to see if the number of bytes left
3303 * to go into the buffer will actually fit into the
3304 * buffer. This is calculated as the size of this
3305 * entry plus:
3306 * 1 for the true/false list indicator +
3307 * 1 for the eof indicator
3308 * times BYTES_PER_XDR_UNIT to convert from from
3309 * XDR units to bytes.
3310 */
3311 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3312 if (bufsize > args->count) {
3313 kmem_free(data, count);
3314 resp->status = NFS3ERR_TOOSMALL;
3315 goto out1;
3316 }
3317 }
3318
3319 /*
3320 * Have a valid readir buffer for the native character
3321 * set. Need to check if a conversion is necessary and
3322 * potentially rewrite the whole buffer. Note that if the
3323 * conversion expands names enough, the structure may not
3324 * fit. In this case, we need to drop entries until if fits
3325 * and patch the counts in order that the next readdir will
3326 * get the correct entries.
3327 */
3328 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3329 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3330
3331
3332 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3333
3334 #if 0 /* notyet */
3335 /*
3336 * Don't do this. It causes local disk writes when just
3337 * reading the file and the overhead is deemed larger
3338 * than the benefit.
3339 */
3340 /*
3341 * Force modified metadata out to stable storage.
3342 */
3343 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3344 #endif
3345
3346 resp->status = NFS3_OK;
3347 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3348 resp->resok.cookieverf = 0;
3349 resp->resok.reply.entries = (entry3 *)data;
3350 resp->resok.reply.eof = iseof;
3351 resp->resok.size = count - uio.uio_resid;
3352 resp->resok.count = args->count;
3353 resp->resok.freecount = count;
3354
3355 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3356 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3357 READDIR3res *, resp);
3358
3359 VN_RELE(vp);
3360
3361 return;
3362
3363 out:
3364 if (curthread->t_flag & T_WOULDBLOCK) {
3365 curthread->t_flag &= ~T_WOULDBLOCK;
3366 resp->status = NFS3ERR_JUKEBOX;
3367 } else
3368 resp->status = puterrno3(error);
3369 out1:
3370 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3371
3372 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3373 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3374 READDIR3res *, resp);
3375
3376 if (vp != NULL) {
3377 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3378 VN_RELE(vp);
3379 }
3380 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3381 }
3382
3383 void *
3384 rfs3_readdir_getfh(READDIR3args *args)
3385 {
3386
3387 return (&args->dir);
3388 }
3389
3390 void
3391 rfs3_readdir_free(READDIR3res *resp)
3392 {
3393
3394 if (resp->status == NFS3_OK)
3395 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3396 }
3397
3398 #ifdef nextdp
3399 #undef nextdp
3400 #endif
3401 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3402
3403 /*
3404 * This macro computes the size of a response which contains
3405 * one directory entry including the attributes as well as file handle.
3406 * If the incoming request is larger than this, then we are guaranteed to be
3407 * able to return at least one more directory entry if one exists.
3408 *
3409 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3410 *
3411 * boolean - 1 * BYTES_PER_XDR_UNIT
3412 * file id - 2 * BYTES_PER_XDR_UNIT
3413 * directory name length - 1 * BYTES_PER_XDR_UNIT
3414 * cookie - 2 * BYTES_PER_XDR_UNIT
3415 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3416 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3417 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3418 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3419 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3420 * name length of the entry to the nearest bytes
3421 */
3422 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3423 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3424 BYTES_PER_XDR_UNIT + \
3425 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3426
3427 static int rfs3_readdir_unit = MAXBSIZE;
3428
3429 /* ARGSUSED */
3430 void
3431 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3432 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3433 {
3434 int error;
3435 vnode_t *vp;
3436 struct vattr *vap;
3437 struct vattr va;
3438 struct iovec iov;
3439 struct uio uio;
3440 char *data;
3441 int iseof;
3442 struct dirent64 *dp;
3443 vnode_t *nvp;
3444 struct vattr *nvap;
3445 struct vattr nva;
3446 entryplus3_info *infop = NULL;
3447 int size = 0;
3448 int nents = 0;
3449 int bufsize = 0;
3450 int entrysize = 0;
3451 int tofit = 0;
3452 int rd_unit = rfs3_readdir_unit;
3453 int prev_len;
3454 int space_left;
3455 int i;
3456 uint_t *namlen = NULL;
3457 char *ndata = NULL;
3458 struct sockaddr *ca;
3459 size_t ret;
3460
3461 vap = NULL;
3462
3463 vp = nfs3_fhtovp(&args->dir, exi);
3464
3465 DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3466 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3467 READDIRPLUS3args *, args);
3468
3469 if (vp == NULL) {
3470 error = ESTALE;
3471 goto out;
3472 }
3473
3474 if (is_system_labeled()) {
3475 bslabel_t *clabel = req->rq_label;
3476
3477 ASSERT(clabel != NULL);
3478 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3479 char *, "got client label from request(1)",
3480 struct svc_req *, req);
3481
3482 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3483 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3484 exi)) {
3485 resp->status = NFS3ERR_ACCES;
3486 goto out1;
3487 }
3488 }
3489 }
3490
3491 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3492
3493 va.va_mask = AT_ALL;
3494 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3495
3496 if (vp->v_type != VDIR) {
3497 error = ENOTDIR;
3498 goto out;
3499 }
3500
3501 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3502 if (error)
3503 goto out;
3504
3505 /*
3506 * Don't allow arbitrary counts for allocation
3507 */
3508 if (args->maxcount > rfs3_tsize(req))
3509 args->maxcount = rfs3_tsize(req);
3510
3511 /*
3512 * Make sure that there is room to read at least one entry
3513 * if any are available
3514 */
3515 args->dircount = MIN(args->dircount, args->maxcount);
3516
3517 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3518 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3519
3520 /*
3521 * This allocation relies on a minimum directory entry
3522 * being roughly 24 bytes. Therefore, the namlen array
3523 * will have enough space based on the maximum number of
3524 * entries to read.
3525 */
3526 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3527
3528 space_left = args->dircount;
3529 data = kmem_alloc(args->dircount, KM_SLEEP);
3530 dp = (struct dirent64 *)data;
3531 uio.uio_iov = &iov;
3532 uio.uio_iovcnt = 1;
3533 uio.uio_segflg = UIO_SYSSPACE;
3534 uio.uio_extflg = UIO_COPY_CACHED;
3535 uio.uio_loffset = (offset_t)args->cookie;
3536
3537 /*
3538 * bufsize is used to keep track of the size of the response as we
3539 * get post op attributes and filehandles for each entry. This is
3540 * an optimization as the server may have read more entries than will
3541 * fit in the buffer specified by maxcount. We stop calculating
3542 * post op attributes and filehandles once we have exceeded maxcount.
3543 * This will minimize the effect of truncation.
3544 *
3545 * It is primed with:
3546 * 1 for the status +
3547 * 1 for the dir_attributes.attributes boolean +
3548 * 2 for the cookie verifier
3549 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3550 * to bytes. If there are directory attributes to be
3551 * returned, then:
3552 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3553 * time BYTES_PER_XDR_UNIT is added to account for them.
3554 */
3555 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3556 if (vap != NULL)
3557 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3558
3559 getmoredents:
3560 /*
3561 * Here we make a check so that our read unit is not larger than
3562 * the space left in the buffer.
3563 */
3564 rd_unit = MIN(rd_unit, space_left);
3565 iov.iov_base = (char *)dp;
3566 iov.iov_len = rd_unit;
3567 uio.uio_resid = rd_unit;
3568 prev_len = rd_unit;
3569
3570 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3571
3572 if (error) {
3573 kmem_free(data, args->dircount);
3574 goto out;
3575 }
3576
3577 if (uio.uio_resid == prev_len && !iseof) {
3578 if (nents == 0) {
3579 kmem_free(data, args->dircount);
3580 resp->status = NFS3ERR_TOOSMALL;
3581 goto out1;
3582 }
3583
3584 /*
3585 * We could not get any more entries, so get the attributes
3586 * and filehandle for the entries already obtained.
3587 */
3588 goto good;
3589 }
3590
3591 /*
3592 * We estimate the size of the response by assuming the
3593 * entry exists and attributes and filehandle are also valid
3594 */
3595 for (size = prev_len - uio.uio_resid;
3596 size > 0;
3597 size -= dp->d_reclen, dp = nextdp(dp)) {
3598
3599 if (dp->d_ino == 0) {
3600 nents++;
3601 continue;
3602 }
3603
3604 namlen[nents] = strlen(dp->d_name);
3605 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3606
3607 /*
3608 * We need to check to see if the number of bytes left
3609 * to go into the buffer will actually fit into the
3610 * buffer. This is calculated as the size of this
3611 * entry plus:
3612 * 1 for the true/false list indicator +
3613 * 1 for the eof indicator
3614 * times BYTES_PER_XDR_UNIT to convert from XDR units
3615 * to bytes.
3616 *
3617 * Also check the dircount limit against the first entry read
3618 *
3619 */
3620 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3621 if (bufsize + tofit > args->maxcount) {
3622 /*
3623 * We make a check here to see if this was the
3624 * first entry being measured. If so, then maxcount
3625 * was too small to begin with and so we need to
3626 * return with NFS3ERR_TOOSMALL.
3627 */
3628 if (nents == 0) {
3629 kmem_free(data, args->dircount);
3630 resp->status = NFS3ERR_TOOSMALL;
3631 goto out1;
3632 }
3633 iseof = FALSE;
3634 goto good;
3635 }
3636 bufsize += entrysize;
3637 nents++;
3638 }
3639
3640 /*
3641 * If there is enough room to fit at least 1 more entry including
3642 * post op attributes and filehandle in the buffer AND that we haven't
3643 * exceeded dircount then go back and get some more.
3644 */
3645 if (!iseof &&
3646 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3647 space_left -= (prev_len - uio.uio_resid);
3648 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3649 goto getmoredents;
3650
3651 /* else, fall through */
3652 }
3653 good:
3654 va.va_mask = AT_ALL;
3655 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3656
3657 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3658
3659 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3660 resp->resok.infop = infop;
3661
3662 dp = (struct dirent64 *)data;
3663 for (i = 0; i < nents; i++) {
3664
3665 if (dp->d_ino == 0) {
3666 infop[i].attr.attributes = FALSE;
3667 infop[i].fh.handle_follows = FALSE;
3668 dp = nextdp(dp);
3669 continue;
3670 }
3671
3672 infop[i].namelen = namlen[i];
3673
3674 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3675 NULL, NULL, NULL);
3676 if (error) {
3677 infop[i].attr.attributes = FALSE;
3678 infop[i].fh.handle_follows = FALSE;
3679 dp = nextdp(dp);
3680 continue;
3681 }
3682
3683 nva.va_mask = AT_ALL;
3684 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3685
3686 /* Lie about the object type for a referral */
3687 if (vn_is_nfs_reparse(nvp, cr))
3688 nvap->va_type = VLNK;
3689
3690 if (vn_ismntpt(nvp)) {
3691 infop[i].attr.attributes = FALSE;
3692 infop[i].fh.handle_follows = FALSE;
3693 } else {
3694 vattr_to_post_op_attr(nvap, &infop[i].attr);
3695
3696 error = makefh3(&infop[i].fh.handle, nvp, exi);
3697 if (!error)
3698 infop[i].fh.handle_follows = TRUE;
3699 else
3700 infop[i].fh.handle_follows = FALSE;
3701 }
3702
3703 VN_RELE(nvp);
3704 dp = nextdp(dp);
3705 }
3706
3707 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3708 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3709 if (ndata == NULL)
3710 ndata = data;
3711
3712 if (ret > 0) {
3713 /*
3714 * We had to drop one or more entries in order to fit
3715 * during the character conversion. We need to patch
3716 * up the size and eof info.
3717 */
3718 if (iseof)
3719 iseof = FALSE;
3720
3721 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3722 nents, ret);
3723 }
3724
3725
3726 #if 0 /* notyet */
3727 /*
3728 * Don't do this. It causes local disk writes when just
3729 * reading the file and the overhead is deemed larger
3730 * than the benefit.
3731 */
3732 /*
3733 * Force modified metadata out to stable storage.
3734 */
3735 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3736 #endif
3737
3738 kmem_free(namlen, args->dircount);
3739
3740 resp->status = NFS3_OK;
3741 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3742 resp->resok.cookieverf = 0;
3743 resp->resok.reply.entries = (entryplus3 *)ndata;
3744 resp->resok.reply.eof = iseof;
3745 resp->resok.size = nents;
3746 resp->resok.count = args->dircount - ret;
3747 resp->resok.maxcount = args->maxcount;
3748
3749 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3750 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3751 READDIRPLUS3res *, resp);
3752
3753 VN_RELE(vp);
3754
3755 return;
3756
3757 out:
3758 if (curthread->t_flag & T_WOULDBLOCK) {
3759 curthread->t_flag &= ~T_WOULDBLOCK;
3760 resp->status = NFS3ERR_JUKEBOX;
3761 } else {
3762 resp->status = puterrno3(error);
3763 }
3764 out1:
3765 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3766
3767 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3768 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3769 READDIRPLUS3res *, resp);
3770
3771 if (vp != NULL) {
3772 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3773 VN_RELE(vp);
3774 }
3775
3776 if (namlen != NULL)
3777 kmem_free(namlen, args->dircount);
3778
3779 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3780 }
3781
3782 void *
3783 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3784 {
3785
3786 return (&args->dir);
3787 }
3788
3789 void
3790 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3791 {
3792
3793 if (resp->status == NFS3_OK) {
3794 kmem_free(resp->resok.reply.entries, resp->resok.count);
3795 kmem_free(resp->resok.infop,
3796 resp->resok.size * sizeof (struct entryplus3_info));
3797 }
3798 }
3799
3800 /* ARGSUSED */
3801 void
3802 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3803 struct svc_req *req, cred_t *cr, bool_t ro)
3804 {
3805 int error;
3806 vnode_t *vp;
3807 struct vattr *vap;
3808 struct vattr va;
3809 struct statvfs64 sb;
3810
3811 vap = NULL;
3812
3813 vp = nfs3_fhtovp(&args->fsroot, exi);
3814
3815 DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3816 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3817 FSSTAT3args *, args);
3818
3819 if (vp == NULL) {
3820 error = ESTALE;
3821 goto out;
3822 }
3823
3824 if (is_system_labeled()) {
3825 bslabel_t *clabel = req->rq_label;
3826
3827 ASSERT(clabel != NULL);
3828 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3829 "got client label from request(1)", struct svc_req *, req);
3830
3831 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3832 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3833 exi)) {
3834 resp->status = NFS3ERR_ACCES;
3835 goto out1;
3836 }
3837 }
3838 }
3839
3840 error = VFS_STATVFS(vp->v_vfsp, &sb);
3841
3842 va.va_mask = AT_ALL;
3843 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3844
3845 if (error)
3846 goto out;
3847
3848 resp->status = NFS3_OK;
3849 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3850 if (sb.f_blocks != (fsblkcnt64_t)-1)
3851 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3852 else
3853 resp->resok.tbytes = (size3)sb.f_blocks;
3854 if (sb.f_bfree != (fsblkcnt64_t)-1)
3855 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3856 else
3857 resp->resok.fbytes = (size3)sb.f_bfree;
3858 if (sb.f_bavail != (fsblkcnt64_t)-1)
3859 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3860 else
3861 resp->resok.abytes = (size3)sb.f_bavail;
3862 resp->resok.tfiles = (size3)sb.f_files;
3863 resp->resok.ffiles = (size3)sb.f_ffree;
3864 resp->resok.afiles = (size3)sb.f_favail;
3865 resp->resok.invarsec = 0;
3866
3867 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3868 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3869 FSSTAT3res *, resp);
3870 VN_RELE(vp);
3871
3872 return;
3873
3874 out:
3875 if (curthread->t_flag & T_WOULDBLOCK) {
3876 curthread->t_flag &= ~T_WOULDBLOCK;
3877 resp->status = NFS3ERR_JUKEBOX;
3878 } else
3879 resp->status = puterrno3(error);
3880 out1:
3881 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3882 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3883 FSSTAT3res *, resp);
3884
3885 if (vp != NULL)
3886 VN_RELE(vp);
3887 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3888 }
3889
3890 void *
3891 rfs3_fsstat_getfh(FSSTAT3args *args)
3892 {
3893
3894 return (&args->fsroot);
3895 }
3896
3897 /* ARGSUSED */
3898 void
3899 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3900 struct svc_req *req, cred_t *cr, bool_t ro)
3901 {
3902 vnode_t *vp;
3903 struct vattr *vap;
3904 struct vattr va;
3905 uint32_t xfer_size;
3906 ulong_t l = 0;
3907 int error;
3908
3909 vp = nfs3_fhtovp(&args->fsroot, exi);
3910
3911 DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3912 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3913 FSINFO3args *, args);
3914
3915 if (vp == NULL) {
3916 if (curthread->t_flag & T_WOULDBLOCK) {
3917 curthread->t_flag &= ~T_WOULDBLOCK;
3918 resp->status = NFS3ERR_JUKEBOX;
3919 } else
3920 resp->status = NFS3ERR_STALE;
3921 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3922 goto out;
3923 }
3924
3925 if (is_system_labeled()) {
3926 bslabel_t *clabel = req->rq_label;
3927
3928 ASSERT(clabel != NULL);
3929 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3930 "got client label from request(1)", struct svc_req *, req);
3931
3932 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3933 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3934 exi)) {
3935 resp->status = NFS3ERR_STALE;
3936 vattr_to_post_op_attr(NULL,
3937 &resp->resfail.obj_attributes);
3938 goto out;
3939 }
3940 }
3941 }
3942
3943 va.va_mask = AT_ALL;
3944 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3945
3946 resp->status = NFS3_OK;
3947 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3948 xfer_size = rfs3_tsize(req);
3949 resp->resok.rtmax = xfer_size;
3950 resp->resok.rtpref = xfer_size;
3951 resp->resok.rtmult = DEV_BSIZE;
3952 resp->resok.wtmax = xfer_size;
3953 resp->resok.wtpref = xfer_size;
3954 resp->resok.wtmult = DEV_BSIZE;
3955 resp->resok.dtpref = MAXBSIZE;
3956
3957 /*
3958 * Large file spec: want maxfilesize based on limit of
3959 * underlying filesystem. We can guess 2^31-1 if need be.
3960 */
3961 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3962 if (error) {
3963 resp->status = puterrno3(error);
3964 goto out;
3965 }
3966
3967 /*
3968 * If the underlying file system does not support _PC_FILESIZEBITS,
3969 * return a reasonable default. Note that error code on VOP_PATHCONF
3970 * will be 0, even if the underlying file system does not support
3971 * _PC_FILESIZEBITS.
3972 */
3973 if (l == (ulong_t)-1) {
3974 resp->resok.maxfilesize = MAXOFF32_T;
3975 } else {
3976 if (l >= (sizeof (uint64_t) * 8))
3977 resp->resok.maxfilesize = INT64_MAX;
3978 else
3979 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3980 }
3981
3982 resp->resok.time_delta.seconds = 0;
3983 resp->resok.time_delta.nseconds = 1000;
3984 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3985 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3986
3987 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3988 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3989 FSINFO3res *, resp);
3990
3991 VN_RELE(vp);
3992
3993 return;
3994
3995 out:
3996 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3997 cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
3998 FSINFO3res *, resp);
3999 if (vp != NULL)
4000 VN_RELE(vp);
4001 }
4002
4003 void *
4004 rfs3_fsinfo_getfh(FSINFO3args *args)
4005 {
4006 return (&args->fsroot);
4007 }
4008
4009 /* ARGSUSED */
4010 void
4011 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4012 struct svc_req *req, cred_t *cr, bool_t ro)
4013 {
4014 int error;
4015 vnode_t *vp;
4016 struct vattr *vap;
4017 struct vattr va;
4018 ulong_t val;
4019
4020 vap = NULL;
4021
4022 vp = nfs3_fhtovp(&args->object, exi);
4023
4024 DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4025 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4026 PATHCONF3args *, args);
4027
4028 if (vp == NULL) {
4029 error = ESTALE;
4030 goto out;
4031 }
4032
4033 if (is_system_labeled()) {
4034 bslabel_t *clabel = req->rq_label;
4035
4036 ASSERT(clabel != NULL);
4037 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4038 "got client label from request(1)", struct svc_req *, req);
4039
4040 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4041 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4042 exi)) {
4043 resp->status = NFS3ERR_ACCES;
4044 goto out1;
4045 }
4046 }
4047 }
4048
4049 va.va_mask = AT_ALL;
4050 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4051
4052 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4053 if (error)
4054 goto out;
4055 resp->resok.info.link_max = (uint32)val;
4056
4057 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4058 if (error)
4059 goto out;
4060 resp->resok.info.name_max = (uint32)val;
4061
4062 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4063 if (error)
4064 goto out;
4065 if (val == 1)
4066 resp->resok.info.no_trunc = TRUE;
4067 else
4068 resp->resok.info.no_trunc = FALSE;
4069
4070 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4071 if (error)
4072 goto out;
4073 if (val == 1)
4074 resp->resok.info.chown_restricted = TRUE;
4075 else
4076 resp->resok.info.chown_restricted = FALSE;
4077
4078 resp->status = NFS3_OK;
4079 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4080 resp->resok.info.case_insensitive = FALSE;
4081 resp->resok.info.case_preserving = TRUE;
4082 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4083 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4084 PATHCONF3res *, resp);
4085 VN_RELE(vp);
4086 return;
4087
4088 out:
4089 if (curthread->t_flag & T_WOULDBLOCK) {
4090 curthread->t_flag &= ~T_WOULDBLOCK;
4091 resp->status = NFS3ERR_JUKEBOX;
4092 } else
4093 resp->status = puterrno3(error);
4094 out1:
4095 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4096 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4097 PATHCONF3res *, resp);
4098 if (vp != NULL)
4099 VN_RELE(vp);
4100 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4101 }
4102
4103 void *
4104 rfs3_pathconf_getfh(PATHCONF3args *args)
4105 {
4106
4107 return (&args->object);
4108 }
4109
4110 void
4111 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4112 struct svc_req *req, cred_t *cr, bool_t ro)
4113 {
4114 nfs3_srv_t *ns;
4115 int error;
4116 vnode_t *vp;
4117 struct vattr *bvap;
4118 struct vattr bva;
4119 struct vattr *avap;
4120 struct vattr ava;
4121
4122 bvap = NULL;
4123 avap = NULL;
4124
4125 vp = nfs3_fhtovp(&args->file, exi);
4126
4127 DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4128 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4129 COMMIT3args *, args);
4130
4131 if (vp == NULL) {
4132 error = ESTALE;
4133 goto out;
4134 }
4135
4136 ns = nfs3_get_srv();
4137 bva.va_mask = AT_ALL;
4138 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4139
4140 /*
4141 * If we can't get the attributes, then we can't do the
4142 * right access checking. So, we'll fail the request.
4143 */
4144 if (error)
4145 goto out;
4146
4147 bvap = &bva;
4148
4149 if (rdonly(ro, vp)) {
4150 resp->status = NFS3ERR_ROFS;
4151 goto out1;
4152 }
4153
4154 if (vp->v_type != VREG) {
4155 resp->status = NFS3ERR_INVAL;
4156 goto out1;
4157 }
4158
4159 if (is_system_labeled()) {
4160 bslabel_t *clabel = req->rq_label;
4161
4162 ASSERT(clabel != NULL);
4163 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4164 "got client label from request(1)", struct svc_req *, req);
4165
4166 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4167 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4168 exi)) {
4169 resp->status = NFS3ERR_ACCES;
4170 goto out1;
4171 }
4172 }
4173 }
4174
4175 if (crgetuid(cr) != bva.va_uid &&
4176 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4177 goto out;
4178
4179 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4180
4181 ava.va_mask = AT_ALL;
4182 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4183
4184 if (error)
4185 goto out;
4186
4187 resp->status = NFS3_OK;
4188 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4189 resp->resok.verf = ns->write3verf;
4190
4191 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4192 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4193 COMMIT3res *, resp);
4194
4195 VN_RELE(vp);
4196
4197 return;
4198
4199 out:
4200 if (curthread->t_flag & T_WOULDBLOCK) {
4201 curthread->t_flag &= ~T_WOULDBLOCK;
4202 resp->status = NFS3ERR_JUKEBOX;
4203 } else
4204 resp->status = puterrno3(error);
4205 out1:
4206 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4207 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4208 COMMIT3res *, resp);
4209
4210 if (vp != NULL)
4211 VN_RELE(vp);
4212 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4213 }
4214
4215 void *
4216 rfs3_commit_getfh(COMMIT3args *args)
4217 {
4218
4219 return (&args->file);
4220 }
4221
4222 static int
4223 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4224 {
4225
4226 vap->va_mask = 0;
4227
4228 if (sap->mode.set_it) {
4229 vap->va_mode = (mode_t)sap->mode.mode;
4230 vap->va_mask |= AT_MODE;
4231 }
4232 if (sap->uid.set_it) {
4233 vap->va_uid = (uid_t)sap->uid.uid;
4234 vap->va_mask |= AT_UID;
4235 }
4236 if (sap->gid.set_it) {
4237 vap->va_gid = (gid_t)sap->gid.gid;
4238 vap->va_mask |= AT_GID;
4239 }
4240 if (sap->size.set_it) {
4241 if (sap->size.size > (size3)((u_longlong_t)-1))
4242 return (EINVAL);
4243 vap->va_size = sap->size.size;
4244 vap->va_mask |= AT_SIZE;
4245 }
4246 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4247 #ifndef _LP64
4248 /* check time validity */
4249 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4250 return (EOVERFLOW);
4251 #endif
4252 /*
4253 * nfs protocol defines times as unsigned so don't extend sign,
4254 * unless sysadmin set nfs_allow_preepoch_time.
4255 */
4256 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4257 sap->atime.atime.seconds);
4258 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4259 vap->va_mask |= AT_ATIME;
4260 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4261 gethrestime(&vap->va_atime);
4262 vap->va_mask |= AT_ATIME;
4263 }
4264 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4265 #ifndef _LP64
4266 /* check time validity */
4267 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4268 return (EOVERFLOW);
4269 #endif
4270 /*
4271 * nfs protocol defines times as unsigned so don't extend sign,
4272 * unless sysadmin set nfs_allow_preepoch_time.
4273 */
4274 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4275 sap->mtime.mtime.seconds);
4276 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4277 vap->va_mask |= AT_MTIME;
4278 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4279 gethrestime(&vap->va_mtime);
4280 vap->va_mask |= AT_MTIME;
4281 }
4282
4283 return (0);
4284 }
4285
4286 static const ftype3 vt_to_nf3[] = {
4287 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4288 };
4289
4290 static int
4291 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4292 {
4293
4294 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4295 /* Return error if time or size overflow */
4296 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4297 return (EOVERFLOW);
4298 }
4299 fap->type = vt_to_nf3[vap->va_type];
4300 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4301 fap->nlink = (uint32)vap->va_nlink;
4302 if (vap->va_uid == UID_NOBODY)
4303 fap->uid = (uid3)NFS_UID_NOBODY;
4304 else
4305 fap->uid = (uid3)vap->va_uid;
4306 if (vap->va_gid == GID_NOBODY)
4307 fap->gid = (gid3)NFS_GID_NOBODY;
4308 else
4309 fap->gid = (gid3)vap->va_gid;
4310 fap->size = (size3)vap->va_size;
4311 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4312 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4313 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4314 fap->fsid = (uint64)vap->va_fsid;
4315 fap->fileid = (fileid3)vap->va_nodeid;
4316 fap->atime.seconds = vap->va_atime.tv_sec;
4317 fap->atime.nseconds = vap->va_atime.tv_nsec;
4318 fap->mtime.seconds = vap->va_mtime.tv_sec;
4319 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4320 fap->ctime.seconds = vap->va_ctime.tv_sec;
4321 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4322 return (0);
4323 }
4324
4325 static int
4326 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4327 {
4328
4329 /* Return error if time or size overflow */
4330 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4331 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4332 NFS3_SIZE_OK(vap->va_size))) {
4333 return (EOVERFLOW);
4334 }
4335 wccap->size = (size3)vap->va_size;
4336 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4337 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4338 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4339 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4340 return (0);
4341 }
4342
4343 static void
4344 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4345 {
4346
4347 /* don't return attrs if time overflow */
4348 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4349 poap->attributes = TRUE;
4350 } else
4351 poap->attributes = FALSE;
4352 }
4353
4354 void
4355 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4356 {
4357
4358 /* don't return attrs if time overflow */
4359 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4360 poap->attributes = TRUE;
4361 } else
4362 poap->attributes = FALSE;
4363 }
4364
4365 static void
4366 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4367 {
4368 vattr_to_pre_op_attr(bvap, &wccp->before);
4369 vattr_to_post_op_attr(avap, &wccp->after);
4370 }
4371
4372 static int
4373 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4374 {
4375 struct clist *wcl;
4376 int wlist_len;
4377 count3 count = rok->count;
4378
4379 wcl = args->wlist;
4380 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4381 return (FALSE);
4382
4383 wcl = args->wlist;
4384 rok->wlist_len = wlist_len;
4385 rok->wlist = wcl;
4386 return (TRUE);
4387 }
4388
4389 void
4390 rfs3_srv_zone_init(nfs_globals_t *ng)
4391 {
4392 nfs3_srv_t *ns;
4393 struct rfs3_verf_overlay {
4394 uint_t id; /* a "unique" identifier */
4395 int ts; /* a unique timestamp */
4396 } *verfp;
4397 timestruc_t now;
4398
4399 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4400
4401 /*
4402 * The following algorithm attempts to find a unique verifier
4403 * to be used as the write verifier returned from the server
4404 * to the client. It is important that this verifier change
4405 * whenever the server reboots. Of secondary importance, it
4406 * is important for the verifier to be unique between two
4407 * different servers.
4408 *
4409 * Thus, an attempt is made to use the system hostid and the
4410 * current time in seconds when the nfssrv kernel module is
4411 * loaded. It is assumed that an NFS server will not be able
4412 * to boot and then to reboot in less than a second. If the
4413 * hostid has not been set, then the current high resolution
4414 * time is used. This will ensure different verifiers each
4415 * time the server reboots and minimize the chances that two
4416 * different servers will have the same verifier.
4417 */
4418
4419 #ifndef lint
4420 /*
4421 * We ASSERT that this constant logic expression is
4422 * always true because in the past, it wasn't.
4423 */
4424 ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4425 #endif
4426
4427 gethrestime(&now);
4428 verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4429 verfp->ts = (int)now.tv_sec;
4430 verfp->id = zone_get_hostid(NULL);
4431
4432 if (verfp->id == 0)
4433 verfp->id = (uint_t)now.tv_nsec;
4434
4435 ng->nfs3_srv = ns;
4436 }
4437
4438 void
4439 rfs3_srv_zone_fini(nfs_globals_t *ng)
4440 {
4441 nfs3_srv_t *ns = ng->nfs3_srv;
4442
4443 ng->nfs3_srv = NULL;
4444
4445 kmem_free(ns, sizeof (*ns));
4446 }
4447
4448 void
4449 rfs3_srvrinit(void)
4450 {
4451 nfs3_srv_caller_id = fs_new_caller_id();
4452 }
4453
4454 void
4455 rfs3_srvrfini(void)
4456 {
4457 /* Nothing to do */
4458 }