1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2018 Nexenta Systems, Inc.
24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/systeminfo.h>
48 #include <sys/flock.h>
49 #include <sys/nbmlock.h>
50 #include <sys/policy.h>
51 #include <sys/sdt.h>
52
53 #include <rpc/types.h>
54 #include <rpc/auth.h>
55 #include <rpc/svc.h>
56 #include <rpc/rpc_rdma.h>
57
58 #include <nfs/nfs.h>
59 #include <nfs/export.h>
60 #include <nfs/nfs_cmd.h>
61
62 #include <sys/strsubr.h>
63 #include <sys/tsol/label.h>
64 #include <sys/tsol/tndb.h>
65
66 #include <sys/zone.h>
67
68 #include <inet/ip.h>
69 #include <inet/ip6.h>
70
71 /*
72 * Zone global variables of NFSv3 server
73 */
74 typedef struct nfs3_srv {
75 writeverf3 write3verf;
76 } nfs3_srv_t;
77
78 /*
79 * These are the interface routines for the server side of the
80 * Network File System. See the NFS version 3 protocol specification
81 * for a description of this interface.
82 */
83
84 static int sattr3_to_vattr(sattr3 *, struct vattr *);
85 static int vattr_to_fattr3(struct vattr *, fattr3 *);
86 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
87 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
88 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
89 static int rdma_setup_read_data3(READ3args *, READ3resok *);
90
91 extern int nfs_loaned_buffers;
92
93 u_longlong_t nfs3_srv_caller_id;
94
95 static nfs3_srv_t *
96 nfs3_get_srv(void)
97 {
98 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
99 nfs3_srv_t *srv = ng->nfs3_srv;
100 ASSERT(srv != NULL);
101 return (srv);
102 }
103
104 /* ARGSUSED */
105 void
106 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
107 struct svc_req *req, cred_t *cr, bool_t ro)
108 {
109 int error;
110 vnode_t *vp;
111 struct vattr va;
112
113 vp = nfs3_fhtovp(&args->object, exi);
114
115 DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
116 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
117 GETATTR3args *, args);
118
119 if (vp == NULL) {
120 error = ESTALE;
121 goto out;
122 }
123
124 va.va_mask = AT_ALL;
125 error = rfs4_delegated_getattr(vp, &va, 0, cr);
126
127 if (!error) {
128 /* Lie about the object type for a referral */
129 if (vn_is_nfs_reparse(vp, cr))
130 va.va_type = VLNK;
131
132 /* overflow error if time or size is out of range */
133 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
134 if (error)
135 goto out;
136 resp->status = NFS3_OK;
137
138 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
139 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
140 GETATTR3res *, resp);
141
142 VN_RELE(vp);
143
144 return;
145 }
146
147 out:
148 if (curthread->t_flag & T_WOULDBLOCK) {
149 curthread->t_flag &= ~T_WOULDBLOCK;
150 resp->status = NFS3ERR_JUKEBOX;
151 } else
152 resp->status = puterrno3(error);
153
154 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
155 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
156 GETATTR3res *, resp);
157
158 if (vp != NULL)
159 VN_RELE(vp);
160 }
161
162 void *
163 rfs3_getattr_getfh(GETATTR3args *args)
164 {
165
166 return (&args->object);
167 }
168
169 void
170 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
171 struct svc_req *req, cred_t *cr, bool_t ro)
172 {
173 int error;
174 vnode_t *vp;
175 struct vattr *bvap;
176 struct vattr bva;
177 struct vattr *avap;
178 struct vattr ava;
179 int flag;
180 int in_crit = 0;
181 struct flock64 bf;
182 caller_context_t ct;
183
184 bvap = NULL;
185 avap = NULL;
186
187 vp = nfs3_fhtovp(&args->object, exi);
188
189 DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
190 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
191 SETATTR3args *, args);
192
193 if (vp == NULL) {
194 error = ESTALE;
195 goto out;
196 }
197
198 error = sattr3_to_vattr(&args->new_attributes, &ava);
199 if (error)
200 goto out;
201
202 if (is_system_labeled()) {
203 bslabel_t *clabel = req->rq_label;
204
205 ASSERT(clabel != NULL);
206 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
207 "got client label from request(1)", struct svc_req *, req);
208
209 if (!blequal(&l_admin_low->tsl_label, clabel)) {
210 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
211 exi)) {
212 resp->status = NFS3ERR_ACCES;
213 goto out1;
214 }
215 }
216 }
217
218 /*
219 * We need to specially handle size changes because of
220 * possible conflicting NBMAND locks. Get into critical
221 * region before VOP_GETATTR, so the size attribute is
222 * valid when checking conflicts.
223 *
224 * Also, check to see if the v4 side of the server has
225 * delegated this file. If so, then we return JUKEBOX to
226 * allow the client to retrasmit its request.
227 */
228 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
229 if (nbl_need_check(vp)) {
230 nbl_start_crit(vp, RW_READER);
231 in_crit = 1;
232 }
233 }
234
235 bva.va_mask = AT_ALL;
236 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
237
238 /*
239 * If we can't get the attributes, then we can't do the
240 * right access checking. So, we'll fail the request.
241 */
242 if (error)
243 goto out;
244
245 bvap = &bva;
246
247 if (rdonly(ro, vp)) {
248 resp->status = NFS3ERR_ROFS;
249 goto out1;
250 }
251
252 if (args->guard.check &&
253 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
254 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
255 resp->status = NFS3ERR_NOT_SYNC;
256 goto out1;
257 }
258
259 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
260 flag = ATTR_UTIME;
261 else
262 flag = 0;
263
264 /*
265 * If the filesystem is exported with nosuid, then mask off
266 * the setuid and setgid bits.
267 */
268 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
269 (exi->exi_export.ex_flags & EX_NOSUID))
270 ava.va_mode &= ~(VSUID | VSGID);
271
272 ct.cc_sysid = 0;
273 ct.cc_pid = 0;
274 ct.cc_caller_id = nfs3_srv_caller_id;
275 ct.cc_flags = CC_DONTBLOCK;
276
277 /*
278 * We need to specially handle size changes because it is
279 * possible for the client to create a file with modes
280 * which indicate read-only, but with the file opened for
281 * writing. If the client then tries to set the size of
282 * the file, then the normal access checking done in
283 * VOP_SETATTR would prevent the client from doing so,
284 * although it should be legal for it to do so. To get
285 * around this, we do the access checking for ourselves
286 * and then use VOP_SPACE which doesn't do the access
287 * checking which VOP_SETATTR does. VOP_SPACE can only
288 * operate on VREG files, let VOP_SETATTR handle the other
289 * extremely rare cases.
290 * Also the client should not be allowed to change the
291 * size of the file if there is a conflicting non-blocking
292 * mandatory lock in the region the change.
293 */
294 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
295 if (in_crit) {
296 u_offset_t offset;
297 ssize_t length;
298
299 if (ava.va_size < bva.va_size) {
300 offset = ava.va_size;
301 length = bva.va_size - ava.va_size;
302 } else {
303 offset = bva.va_size;
304 length = ava.va_size - bva.va_size;
305 }
306 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
307 NULL)) {
308 error = EACCES;
309 goto out;
310 }
311 }
312
313 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
314 ava.va_mask &= ~AT_SIZE;
315 bf.l_type = F_WRLCK;
316 bf.l_whence = 0;
317 bf.l_start = (off64_t)ava.va_size;
318 bf.l_len = 0;
319 bf.l_sysid = 0;
320 bf.l_pid = 0;
321 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
322 (offset_t)ava.va_size, cr, &ct);
323 }
324 }
325
326 if (!error && ava.va_mask)
327 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
328
329 /* check if a monitor detected a delegation conflict */
330 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
331 resp->status = NFS3ERR_JUKEBOX;
332 goto out1;
333 }
334
335 ava.va_mask = AT_ALL;
336 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
337
338 /*
339 * Force modified metadata out to stable storage.
340 */
341 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
342
343 if (error)
344 goto out;
345
346 if (in_crit)
347 nbl_end_crit(vp);
348
349 resp->status = NFS3_OK;
350 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
351
352 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
353 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
354 SETATTR3res *, resp);
355
356 VN_RELE(vp);
357
358 return;
359
360 out:
361 if (curthread->t_flag & T_WOULDBLOCK) {
362 curthread->t_flag &= ~T_WOULDBLOCK;
363 resp->status = NFS3ERR_JUKEBOX;
364 } else
365 resp->status = puterrno3(error);
366 out1:
367 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
368 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
369 SETATTR3res *, resp);
370
371 if (vp != NULL) {
372 if (in_crit)
373 nbl_end_crit(vp);
374 VN_RELE(vp);
375 }
376 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
377 }
378
379 void *
380 rfs3_setattr_getfh(SETATTR3args *args)
381 {
382
383 return (&args->object);
384 }
385
386 /* ARGSUSED */
387 void
388 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
389 struct svc_req *req, cred_t *cr, bool_t ro)
390 {
391 int error;
392 vnode_t *vp;
393 vnode_t *dvp;
394 struct vattr *vap;
395 struct vattr va;
396 struct vattr *dvap;
397 struct vattr dva;
398 nfs_fh3 *fhp;
399 struct sec_ol sec = {0, 0};
400 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
401 struct sockaddr *ca;
402 char *name = NULL;
403
404 dvap = NULL;
405
406 if (exi != NULL)
407 exi_hold(exi);
408
409 /*
410 * Allow lookups from the root - the default
411 * location of the public filehandle.
412 */
413 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
414 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
415 dvp = ZONE_ROOTVP();
416 VN_HOLD(dvp);
417
418 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
419 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
420 LOOKUP3args *, args);
421 } else {
422 dvp = nfs3_fhtovp(&args->what.dir, exi);
423
424 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
425 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
426 LOOKUP3args *, args);
427
428 if (dvp == NULL) {
429 error = ESTALE;
430 goto out;
431 }
432 }
433
434 dva.va_mask = AT_ALL;
435 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
436
437 if (args->what.name == nfs3nametoolong) {
438 resp->status = NFS3ERR_NAMETOOLONG;
439 goto out1;
440 }
441
442 if (args->what.name == NULL || *(args->what.name) == '\0') {
443 resp->status = NFS3ERR_ACCES;
444 goto out1;
445 }
446
447 fhp = &args->what.dir;
448 ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
449 if (strcmp(args->what.name, "..") == 0 &&
450 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
451 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
452 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
453 /*
454 * special case for ".." and 'nohide'exported root
455 */
456 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
457 resp->status = NFS3ERR_ACCES;
458 goto out1;
459 }
460 } else {
461 resp->status = NFS3ERR_NOENT;
462 goto out1;
463 }
464 }
465
466 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
467 name = nfscmd_convname(ca, exi, args->what.name,
468 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
469
470 if (name == NULL) {
471 resp->status = NFS3ERR_ACCES;
472 goto out1;
473 }
474
475 /*
476 * If the public filehandle is used then allow
477 * a multi-component lookup
478 */
479 if (PUBLIC_FH3(&args->what.dir)) {
480 publicfh_flag = TRUE;
481
482 exi_rele(exi);
483 exi = NULL;
484
485 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
486 &exi, &sec);
487
488 /*
489 * Since WebNFS may bypass MOUNT, we need to ensure this
490 * request didn't come from an unlabeled admin_low client.
491 */
492 if (is_system_labeled() && error == 0) {
493 int addr_type;
494 void *ipaddr;
495 tsol_tpc_t *tp;
496
497 if (ca->sa_family == AF_INET) {
498 addr_type = IPV4_VERSION;
499 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
500 } else if (ca->sa_family == AF_INET6) {
501 addr_type = IPV6_VERSION;
502 ipaddr = &((struct sockaddr_in6 *)
503 ca)->sin6_addr;
504 }
505 tp = find_tpc(ipaddr, addr_type, B_FALSE);
506 if (tp == NULL || tp->tpc_tp.tp_doi !=
507 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
508 SUN_CIPSO) {
509 VN_RELE(vp);
510 error = EACCES;
511 }
512 if (tp != NULL)
513 TPC_RELE(tp);
514 }
515 } else {
516 error = VOP_LOOKUP(dvp, name, &vp,
517 NULL, 0, NULL, cr, NULL, NULL, NULL);
518 }
519
520 if (name != args->what.name)
521 kmem_free(name, MAXPATHLEN + 1);
522
523 if (error == 0 && vn_ismntpt(vp)) {
524 error = rfs_cross_mnt(&vp, &exi);
525 if (error)
526 VN_RELE(vp);
527 }
528
529 if (is_system_labeled() && error == 0) {
530 bslabel_t *clabel = req->rq_label;
531
532 ASSERT(clabel != NULL);
533 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
534 "got client label from request(1)", struct svc_req *, req);
535
536 if (!blequal(&l_admin_low->tsl_label, clabel)) {
537 if (!do_rfs_label_check(clabel, dvp,
538 DOMINANCE_CHECK, exi)) {
539 VN_RELE(vp);
540 error = EACCES;
541 }
542 }
543 }
544
545 dva.va_mask = AT_ALL;
546 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
547
548 if (error)
549 goto out;
550
551 if (sec.sec_flags & SEC_QUERY) {
552 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
553 } else {
554 error = makefh3(&resp->resok.object, vp, exi);
555 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
556 auth_weak = TRUE;
557 }
558
559 if (error) {
560 VN_RELE(vp);
561 goto out;
562 }
563
564 va.va_mask = AT_ALL;
565 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
566
567 VN_RELE(vp);
568
569 resp->status = NFS3_OK;
570 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
571 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
572
573 /*
574 * If it's public fh, no 0x81, and client's flavor is
575 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
576 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
577 */
578 if (auth_weak)
579 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
580
581 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
582 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
583 LOOKUP3res *, resp);
584 VN_RELE(dvp);
585 exi_rele(exi);
586
587 return;
588
589 out:
590 if (curthread->t_flag & T_WOULDBLOCK) {
591 curthread->t_flag &= ~T_WOULDBLOCK;
592 resp->status = NFS3ERR_JUKEBOX;
593 } else
594 resp->status = puterrno3(error);
595 out1:
596 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
597 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
598 LOOKUP3res *, resp);
599
600 if (exi != NULL)
601 exi_rele(exi);
602
603 if (dvp != NULL)
604 VN_RELE(dvp);
605 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
606
607 }
608
609 void *
610 rfs3_lookup_getfh(LOOKUP3args *args)
611 {
612
613 return (&args->what.dir);
614 }
615
616 /* ARGSUSED */
617 void
618 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
619 struct svc_req *req, cred_t *cr, bool_t ro)
620 {
621 int error;
622 vnode_t *vp;
623 struct vattr *vap;
624 struct vattr va;
625 int checkwriteperm;
626 boolean_t dominant_label = B_FALSE;
627 boolean_t equal_label = B_FALSE;
628 boolean_t admin_low_client;
629
630 vap = NULL;
631
632 vp = nfs3_fhtovp(&args->object, exi);
633
634 DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
635 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
636 ACCESS3args *, args);
637
638 if (vp == NULL) {
639 error = ESTALE;
640 goto out;
641 }
642
643 /*
644 * If the file system is exported read only, it is not appropriate
645 * to check write permissions for regular files and directories.
646 * Special files are interpreted by the client, so the underlying
647 * permissions are sent back to the client for interpretation.
648 */
649 if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
650 checkwriteperm = 0;
651 else
652 checkwriteperm = 1;
653
654 /*
655 * We need the mode so that we can correctly determine access
656 * permissions relative to a mandatory lock file. Access to
657 * mandatory lock files is denied on the server, so it might
658 * as well be reflected to the server during the open.
659 */
660 va.va_mask = AT_MODE;
661 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
662 if (error)
663 goto out;
664
665 vap = &va;
666
667 resp->resok.access = 0;
668
669 if (is_system_labeled()) {
670 bslabel_t *clabel = req->rq_label;
671
672 ASSERT(clabel != NULL);
673 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
674 "got client label from request(1)", struct svc_req *, req);
675
676 if (!blequal(&l_admin_low->tsl_label, clabel)) {
677 if ((equal_label = do_rfs_label_check(clabel, vp,
678 EQUALITY_CHECK, exi)) == B_FALSE) {
679 dominant_label = do_rfs_label_check(clabel,
680 vp, DOMINANCE_CHECK, exi);
681 } else
682 dominant_label = B_TRUE;
683 admin_low_client = B_FALSE;
684 } else
685 admin_low_client = B_TRUE;
686 }
687
688 if (args->access & ACCESS3_READ) {
689 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
690 if (error) {
691 if (curthread->t_flag & T_WOULDBLOCK)
692 goto out;
693 } else if (!MANDLOCK(vp, va.va_mode) &&
694 (!is_system_labeled() || admin_low_client ||
695 dominant_label))
696 resp->resok.access |= ACCESS3_READ;
697 }
698 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
699 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
700 if (error) {
701 if (curthread->t_flag & T_WOULDBLOCK)
702 goto out;
703 } else if (!is_system_labeled() || admin_low_client ||
704 dominant_label)
705 resp->resok.access |= ACCESS3_LOOKUP;
706 }
707 if (checkwriteperm &&
708 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
709 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
710 if (error) {
711 if (curthread->t_flag & T_WOULDBLOCK)
712 goto out;
713 } else if (!MANDLOCK(vp, va.va_mode) &&
714 (!is_system_labeled() || admin_low_client || equal_label)) {
715 resp->resok.access |=
716 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
717 }
718 }
719 if (checkwriteperm &&
720 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
721 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
722 if (error) {
723 if (curthread->t_flag & T_WOULDBLOCK)
724 goto out;
725 } else if (!is_system_labeled() || admin_low_client ||
726 equal_label)
727 resp->resok.access |= ACCESS3_DELETE;
728 }
729 if (args->access & ACCESS3_EXECUTE) {
730 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
731 if (error) {
732 if (curthread->t_flag & T_WOULDBLOCK)
733 goto out;
734 } else if (!MANDLOCK(vp, va.va_mode) &&
735 (!is_system_labeled() || admin_low_client ||
736 dominant_label))
737 resp->resok.access |= ACCESS3_EXECUTE;
738 }
739
740 va.va_mask = AT_ALL;
741 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
742
743 resp->status = NFS3_OK;
744 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
745
746 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
747 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
748 ACCESS3res *, resp);
749
750 VN_RELE(vp);
751
752 return;
753
754 out:
755 if (curthread->t_flag & T_WOULDBLOCK) {
756 curthread->t_flag &= ~T_WOULDBLOCK;
757 resp->status = NFS3ERR_JUKEBOX;
758 } else
759 resp->status = puterrno3(error);
760 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
761 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
762 ACCESS3res *, resp);
763 if (vp != NULL)
764 VN_RELE(vp);
765 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
766 }
767
768 void *
769 rfs3_access_getfh(ACCESS3args *args)
770 {
771
772 return (&args->object);
773 }
774
775 /* ARGSUSED */
776 void
777 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
778 struct svc_req *req, cred_t *cr, bool_t ro)
779 {
780 int error;
781 vnode_t *vp;
782 struct vattr *vap;
783 struct vattr va;
784 struct iovec iov;
785 struct uio uio;
786 char *data;
787 struct sockaddr *ca;
788 char *name = NULL;
789 int is_referral = 0;
790
791 vap = NULL;
792
793 vp = nfs3_fhtovp(&args->symlink, exi);
794
795 DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
796 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
797 READLINK3args *, args);
798
799 if (vp == NULL) {
800 error = ESTALE;
801 goto out;
802 }
803
804 va.va_mask = AT_ALL;
805 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
806 if (error)
807 goto out;
808
809 vap = &va;
810
811 /* We lied about the object type for a referral */
812 if (vn_is_nfs_reparse(vp, cr))
813 is_referral = 1;
814
815 if (vp->v_type != VLNK && !is_referral) {
816 resp->status = NFS3ERR_INVAL;
817 goto out1;
818 }
819
820 if (MANDLOCK(vp, va.va_mode)) {
821 resp->status = NFS3ERR_ACCES;
822 goto out1;
823 }
824
825 if (is_system_labeled()) {
826 bslabel_t *clabel = req->rq_label;
827
828 ASSERT(clabel != NULL);
829 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
830 "got client label from request(1)", struct svc_req *, req);
831
832 if (!blequal(&l_admin_low->tsl_label, clabel)) {
833 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
834 exi)) {
835 resp->status = NFS3ERR_ACCES;
836 goto out1;
837 }
838 }
839 }
840
841 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
842
843 if (is_referral) {
844 char *s;
845 size_t strsz;
846
847 /* Get an artificial symlink based on a referral */
848 s = build_symlink(vp, cr, &strsz);
849 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
850 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
851 vnode_t *, vp, char *, s);
852 if (s == NULL)
853 error = EINVAL;
854 else {
855 error = 0;
856 (void) strlcpy(data, s, MAXPATHLEN + 1);
857 kmem_free(s, strsz);
858 }
859
860 } else {
861
862 iov.iov_base = data;
863 iov.iov_len = MAXPATHLEN;
864 uio.uio_iov = &iov;
865 uio.uio_iovcnt = 1;
866 uio.uio_segflg = UIO_SYSSPACE;
867 uio.uio_extflg = UIO_COPY_CACHED;
868 uio.uio_loffset = 0;
869 uio.uio_resid = MAXPATHLEN;
870
871 error = VOP_READLINK(vp, &uio, cr, NULL);
872
873 if (!error)
874 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
875 }
876
877 va.va_mask = AT_ALL;
878 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
879
880 /* Lie about object type again just to be consistent */
881 if (is_referral && vap != NULL)
882 vap->va_type = VLNK;
883
884 #if 0 /* notyet */
885 /*
886 * Don't do this. It causes local disk writes when just
887 * reading the file and the overhead is deemed larger
888 * than the benefit.
889 */
890 /*
891 * Force modified metadata out to stable storage.
892 */
893 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
894 #endif
895
896 if (error) {
897 kmem_free(data, MAXPATHLEN + 1);
898 goto out;
899 }
900
901 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
902 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
903 MAXPATHLEN + 1);
904
905 if (name == NULL) {
906 /*
907 * Even though the conversion failed, we return
908 * something. We just don't translate it.
909 */
910 name = data;
911 }
912
913 resp->status = NFS3_OK;
914 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
915 resp->resok.data = name;
916
917 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
918 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
919 READLINK3res *, resp);
920 VN_RELE(vp);
921
922 if (name != data)
923 kmem_free(data, MAXPATHLEN + 1);
924
925 return;
926
927 out:
928 if (curthread->t_flag & T_WOULDBLOCK) {
929 curthread->t_flag &= ~T_WOULDBLOCK;
930 resp->status = NFS3ERR_JUKEBOX;
931 } else
932 resp->status = puterrno3(error);
933 out1:
934 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
935 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
936 READLINK3res *, resp);
937 if (vp != NULL)
938 VN_RELE(vp);
939 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
940 }
941
942 void *
943 rfs3_readlink_getfh(READLINK3args *args)
944 {
945
946 return (&args->symlink);
947 }
948
949 void
950 rfs3_readlink_free(READLINK3res *resp)
951 {
952
953 if (resp->status == NFS3_OK)
954 kmem_free(resp->resok.data, MAXPATHLEN + 1);
955 }
956
957 /*
958 * Server routine to handle read
959 * May handle RDMA data as well as mblks
960 */
961 /* ARGSUSED */
962 void
963 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
964 struct svc_req *req, cred_t *cr, bool_t ro)
965 {
966 int error;
967 vnode_t *vp;
968 struct vattr *vap;
969 struct vattr va;
970 struct iovec iov, *iovp = NULL;
971 int iovcnt;
972 struct uio uio;
973 u_offset_t offset;
974 mblk_t *mp = NULL;
975 int in_crit = 0;
976 int need_rwunlock = 0;
977 caller_context_t ct;
978 int rdma_used = 0;
979 int loaned_buffers;
980 struct uio *uiop;
981
982 vap = NULL;
983
984 vp = nfs3_fhtovp(&args->file, exi);
985
986 DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
987 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
988 READ3args *, args);
989
990
991 if (vp == NULL) {
992 error = ESTALE;
993 goto out;
994 }
995
996 if (args->wlist) {
997 if (args->count > clist_len(args->wlist)) {
998 error = EINVAL;
999 goto out;
1000 }
1001 rdma_used = 1;
1002 }
1003
1004 /* use loaned buffers for TCP */
1005 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1006
1007 if (is_system_labeled()) {
1008 bslabel_t *clabel = req->rq_label;
1009
1010 ASSERT(clabel != NULL);
1011 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1012 "got client label from request(1)", struct svc_req *, req);
1013
1014 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1015 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1016 exi)) {
1017 resp->status = NFS3ERR_ACCES;
1018 goto out1;
1019 }
1020 }
1021 }
1022
1023 ct.cc_sysid = 0;
1024 ct.cc_pid = 0;
1025 ct.cc_caller_id = nfs3_srv_caller_id;
1026 ct.cc_flags = CC_DONTBLOCK;
1027
1028 /*
1029 * Enter the critical region before calling VOP_RWLOCK
1030 * to avoid a deadlock with write requests.
1031 */
1032 if (nbl_need_check(vp)) {
1033 nbl_start_crit(vp, RW_READER);
1034 in_crit = 1;
1035 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1036 NULL)) {
1037 error = EACCES;
1038 goto out;
1039 }
1040 }
1041
1042 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1043
1044 /* check if a monitor detected a delegation conflict */
1045 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1046 resp->status = NFS3ERR_JUKEBOX;
1047 goto out1;
1048 }
1049
1050 need_rwunlock = 1;
1051
1052 va.va_mask = AT_ALL;
1053 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1054
1055 /*
1056 * If we can't get the attributes, then we can't do the
1057 * right access checking. So, we'll fail the request.
1058 */
1059 if (error)
1060 goto out;
1061
1062 vap = &va;
1063
1064 if (vp->v_type != VREG) {
1065 resp->status = NFS3ERR_INVAL;
1066 goto out1;
1067 }
1068
1069 if (crgetuid(cr) != va.va_uid) {
1070 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1071 if (error) {
1072 if (curthread->t_flag & T_WOULDBLOCK)
1073 goto out;
1074 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1075 if (error)
1076 goto out;
1077 }
1078 }
1079
1080 if (MANDLOCK(vp, va.va_mode)) {
1081 resp->status = NFS3ERR_ACCES;
1082 goto out1;
1083 }
1084
1085 offset = args->offset;
1086 if (offset >= va.va_size) {
1087 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1088 if (in_crit)
1089 nbl_end_crit(vp);
1090 resp->status = NFS3_OK;
1091 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1092 resp->resok.count = 0;
1093 resp->resok.eof = TRUE;
1094 resp->resok.data.data_len = 0;
1095 resp->resok.data.data_val = NULL;
1096 resp->resok.data.mp = NULL;
1097 /* RDMA */
1098 resp->resok.wlist = args->wlist;
1099 resp->resok.wlist_len = resp->resok.count;
1100 if (resp->resok.wlist)
1101 clist_zero_len(resp->resok.wlist);
1102 goto done;
1103 }
1104
1105 if (args->count == 0) {
1106 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1107 if (in_crit)
1108 nbl_end_crit(vp);
1109 resp->status = NFS3_OK;
1110 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1111 resp->resok.count = 0;
1112 resp->resok.eof = FALSE;
1113 resp->resok.data.data_len = 0;
1114 resp->resok.data.data_val = NULL;
1115 resp->resok.data.mp = NULL;
1116 /* RDMA */
1117 resp->resok.wlist = args->wlist;
1118 resp->resok.wlist_len = resp->resok.count;
1119 if (resp->resok.wlist)
1120 clist_zero_len(resp->resok.wlist);
1121 goto done;
1122 }
1123
1124 /*
1125 * do not allocate memory more the max. allowed
1126 * transfer size
1127 */
1128 if (args->count > rfs3_tsize(req))
1129 args->count = rfs3_tsize(req);
1130
1131 if (loaned_buffers) {
1132 uiop = (uio_t *)rfs_setup_xuio(vp);
1133 ASSERT(uiop != NULL);
1134 uiop->uio_segflg = UIO_SYSSPACE;
1135 uiop->uio_loffset = args->offset;
1136 uiop->uio_resid = args->count;
1137
1138 /* Jump to do the read if successful */
1139 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1140 /*
1141 * Need to hold the vnode until after VOP_RETZCBUF()
1142 * is called.
1143 */
1144 VN_HOLD(vp);
1145 goto doio_read;
1146 }
1147
1148 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1149 uiop->uio_loffset, int, uiop->uio_resid);
1150
1151 uiop->uio_extflg = 0;
1152 /* failure to setup for zero copy */
1153 rfs_free_xuio((void *)uiop);
1154 loaned_buffers = 0;
1155 }
1156
1157 /*
1158 * If returning data via RDMA Write, then grab the chunk list.
1159 * If we aren't returning READ data w/RDMA_WRITE, then grab
1160 * a mblk.
1161 */
1162 if (rdma_used) {
1163 (void) rdma_get_wchunk(req, &iov, args->wlist);
1164 uio.uio_iov = &iov;
1165 uio.uio_iovcnt = 1;
1166 } else {
1167 /*
1168 * mp will contain the data to be sent out in the read reply.
1169 * For UDP, this will be freed after the reply has been sent
1170 * out by the driver. For TCP, it will be freed after the last
1171 * segment associated with the reply has been ACKed by the
1172 * client.
1173 */
1174 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1175 uio.uio_iov = iovp;
1176 uio.uio_iovcnt = iovcnt;
1177 }
1178
1179 uio.uio_segflg = UIO_SYSSPACE;
1180 uio.uio_extflg = UIO_COPY_CACHED;
1181 uio.uio_loffset = args->offset;
1182 uio.uio_resid = args->count;
1183 uiop = &uio;
1184
1185 doio_read:
1186 error = VOP_READ(vp, uiop, 0, cr, &ct);
1187
1188 if (error) {
1189 if (mp)
1190 freemsg(mp);
1191 /* check if a monitor detected a delegation conflict */
1192 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1193 resp->status = NFS3ERR_JUKEBOX;
1194 goto out1;
1195 }
1196 goto out;
1197 }
1198
1199 /* make mblk using zc buffers */
1200 if (loaned_buffers) {
1201 mp = uio_to_mblk(uiop);
1202 ASSERT(mp != NULL);
1203 }
1204
1205 va.va_mask = AT_ALL;
1206 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1207
1208 if (error)
1209 vap = NULL;
1210 else
1211 vap = &va;
1212
1213 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1214
1215 if (in_crit)
1216 nbl_end_crit(vp);
1217
1218 resp->status = NFS3_OK;
1219 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1220 resp->resok.count = args->count - uiop->uio_resid;
1221 if (!error && offset + resp->resok.count == va.va_size)
1222 resp->resok.eof = TRUE;
1223 else
1224 resp->resok.eof = FALSE;
1225 resp->resok.data.data_len = resp->resok.count;
1226
1227 if (mp)
1228 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1229
1230 resp->resok.data.mp = mp;
1231 resp->resok.size = (uint_t)args->count;
1232
1233 if (rdma_used) {
1234 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1235 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1236 resp->status = NFS3ERR_INVAL;
1237 }
1238 } else {
1239 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1240 (resp->resok).wlist = NULL;
1241 }
1242
1243 done:
1244 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1245 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1246 READ3res *, resp);
1247
1248 VN_RELE(vp);
1249
1250 if (iovp != NULL)
1251 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1252
1253 return;
1254
1255 out:
1256 if (curthread->t_flag & T_WOULDBLOCK) {
1257 curthread->t_flag &= ~T_WOULDBLOCK;
1258 resp->status = NFS3ERR_JUKEBOX;
1259 } else
1260 resp->status = puterrno3(error);
1261 out1:
1262 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1263 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1264 READ3res *, resp);
1265
1266 if (vp != NULL) {
1267 if (need_rwunlock)
1268 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1269 if (in_crit)
1270 nbl_end_crit(vp);
1271 VN_RELE(vp);
1272 }
1273 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1274
1275 if (iovp != NULL)
1276 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1277 }
1278
1279 void
1280 rfs3_read_free(READ3res *resp)
1281 {
1282 mblk_t *mp;
1283
1284 if (resp->status == NFS3_OK) {
1285 mp = resp->resok.data.mp;
1286 if (mp != NULL)
1287 freemsg(mp);
1288 }
1289 }
1290
1291 void *
1292 rfs3_read_getfh(READ3args *args)
1293 {
1294
1295 return (&args->file);
1296 }
1297
1298 #define MAX_IOVECS 12
1299
1300 #ifdef DEBUG
1301 static int rfs3_write_hits = 0;
1302 static int rfs3_write_misses = 0;
1303 #endif
1304
1305 void
1306 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1307 struct svc_req *req, cred_t *cr, bool_t ro)
1308 {
1309 nfs3_srv_t *ns;
1310 int error;
1311 vnode_t *vp;
1312 struct vattr *bvap = NULL;
1313 struct vattr bva;
1314 struct vattr *avap = NULL;
1315 struct vattr ava;
1316 u_offset_t rlimit;
1317 struct uio uio;
1318 struct iovec iov[MAX_IOVECS];
1319 mblk_t *m;
1320 struct iovec *iovp;
1321 int iovcnt;
1322 int ioflag;
1323 cred_t *savecred;
1324 int in_crit = 0;
1325 int rwlock_ret = -1;
1326 caller_context_t ct;
1327
1328 vp = nfs3_fhtovp(&args->file, exi);
1329
1330 DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1331 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1332 WRITE3args *, args);
1333
1334 if (vp == NULL) {
1335 error = ESTALE;
1336 goto err;
1337 }
1338
1339 ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
1340 ns = nfs3_get_srv();
1341
1342 if (is_system_labeled()) {
1343 bslabel_t *clabel = req->rq_label;
1344
1345 ASSERT(clabel != NULL);
1346 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1347 "got client label from request(1)", struct svc_req *, req);
1348
1349 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1350 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1351 exi)) {
1352 resp->status = NFS3ERR_ACCES;
1353 goto err1;
1354 }
1355 }
1356 }
1357
1358 ct.cc_sysid = 0;
1359 ct.cc_pid = 0;
1360 ct.cc_caller_id = nfs3_srv_caller_id;
1361 ct.cc_flags = CC_DONTBLOCK;
1362
1363 /*
1364 * We have to enter the critical region before calling VOP_RWLOCK
1365 * to avoid a deadlock with ufs.
1366 */
1367 if (nbl_need_check(vp)) {
1368 nbl_start_crit(vp, RW_READER);
1369 in_crit = 1;
1370 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1371 NULL)) {
1372 error = EACCES;
1373 goto err;
1374 }
1375 }
1376
1377 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1378
1379 /* check if a monitor detected a delegation conflict */
1380 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1381 resp->status = NFS3ERR_JUKEBOX;
1382 rwlock_ret = -1;
1383 goto err1;
1384 }
1385
1386
1387 bva.va_mask = AT_ALL;
1388 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1389
1390 /*
1391 * If we can't get the attributes, then we can't do the
1392 * right access checking. So, we'll fail the request.
1393 */
1394 if (error)
1395 goto err;
1396
1397 bvap = &bva;
1398 avap = bvap;
1399
1400 if (args->count != args->data.data_len) {
1401 resp->status = NFS3ERR_INVAL;
1402 goto err1;
1403 }
1404
1405 if (rdonly(ro, vp)) {
1406 resp->status = NFS3ERR_ROFS;
1407 goto err1;
1408 }
1409
1410 if (vp->v_type != VREG) {
1411 resp->status = NFS3ERR_INVAL;
1412 goto err1;
1413 }
1414
1415 if (crgetuid(cr) != bva.va_uid &&
1416 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1417 goto err;
1418
1419 if (MANDLOCK(vp, bva.va_mode)) {
1420 resp->status = NFS3ERR_ACCES;
1421 goto err1;
1422 }
1423
1424 if (args->count == 0) {
1425 resp->status = NFS3_OK;
1426 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1427 resp->resok.count = 0;
1428 resp->resok.committed = args->stable;
1429 resp->resok.verf = ns->write3verf;
1430 goto out;
1431 }
1432
1433 if (args->mblk != NULL) {
1434 iovcnt = 0;
1435 for (m = args->mblk; m != NULL; m = m->b_cont)
1436 iovcnt++;
1437 if (iovcnt <= MAX_IOVECS) {
1438 #ifdef DEBUG
1439 rfs3_write_hits++;
1440 #endif
1441 iovp = iov;
1442 } else {
1443 #ifdef DEBUG
1444 rfs3_write_misses++;
1445 #endif
1446 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1447 }
1448 mblk_to_iov(args->mblk, iovcnt, iovp);
1449
1450 } else if (args->rlist != NULL) {
1451 iovcnt = 1;
1452 iovp = iov;
1453 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1454 iovp->iov_len = args->count;
1455 } else {
1456 iovcnt = 1;
1457 iovp = iov;
1458 iovp->iov_base = args->data.data_val;
1459 iovp->iov_len = args->count;
1460 }
1461
1462 uio.uio_iov = iovp;
1463 uio.uio_iovcnt = iovcnt;
1464
1465 uio.uio_segflg = UIO_SYSSPACE;
1466 uio.uio_extflg = UIO_COPY_DEFAULT;
1467 uio.uio_loffset = args->offset;
1468 uio.uio_resid = args->count;
1469 uio.uio_llimit = curproc->p_fsz_ctl;
1470 rlimit = uio.uio_llimit - args->offset;
1471 if (rlimit < (u_offset_t)uio.uio_resid)
1472 uio.uio_resid = (int)rlimit;
1473
1474 if (args->stable == UNSTABLE)
1475 ioflag = 0;
1476 else if (args->stable == FILE_SYNC)
1477 ioflag = FSYNC;
1478 else if (args->stable == DATA_SYNC)
1479 ioflag = FDSYNC;
1480 else {
1481 if (iovp != iov)
1482 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1483 resp->status = NFS3ERR_INVAL;
1484 goto err1;
1485 }
1486
1487 /*
1488 * We're changing creds because VM may fault and we need
1489 * the cred of the current thread to be used if quota
1490 * checking is enabled.
1491 */
1492 savecred = curthread->t_cred;
1493 curthread->t_cred = cr;
1494 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1495 curthread->t_cred = savecred;
1496
1497 if (iovp != iov)
1498 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1499
1500 /* check if a monitor detected a delegation conflict */
1501 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1502 resp->status = NFS3ERR_JUKEBOX;
1503 goto err1;
1504 }
1505
1506 ava.va_mask = AT_ALL;
1507 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1508
1509 if (error)
1510 goto err;
1511
1512 /*
1513 * If we were unable to get the V_WRITELOCK_TRUE, then we
1514 * may not have accurate after attrs, so check if
1515 * we have both attributes, they have a non-zero va_seq, and
1516 * va_seq has changed by exactly one,
1517 * if not, turn off the before attr.
1518 */
1519 if (rwlock_ret != V_WRITELOCK_TRUE) {
1520 if (bvap == NULL || avap == NULL ||
1521 bvap->va_seq == 0 || avap->va_seq == 0 ||
1522 avap->va_seq != (bvap->va_seq + 1)) {
1523 bvap = NULL;
1524 }
1525 }
1526
1527 resp->status = NFS3_OK;
1528 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1529 resp->resok.count = args->count - uio.uio_resid;
1530 resp->resok.committed = args->stable;
1531 resp->resok.verf = ns->write3verf;
1532 goto out;
1533
1534 err:
1535 if (curthread->t_flag & T_WOULDBLOCK) {
1536 curthread->t_flag &= ~T_WOULDBLOCK;
1537 resp->status = NFS3ERR_JUKEBOX;
1538 } else
1539 resp->status = puterrno3(error);
1540 err1:
1541 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1542 out:
1543 DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1544 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1545 WRITE3res *, resp);
1546
1547 if (vp != NULL) {
1548 if (rwlock_ret != -1)
1549 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1550 if (in_crit)
1551 nbl_end_crit(vp);
1552 VN_RELE(vp);
1553 }
1554 }
1555
1556 void *
1557 rfs3_write_getfh(WRITE3args *args)
1558 {
1559
1560 return (&args->file);
1561 }
1562
1563 void
1564 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1565 struct svc_req *req, cred_t *cr, bool_t ro)
1566 {
1567 int error;
1568 int in_crit = 0;
1569 vnode_t *vp;
1570 vnode_t *tvp = NULL;
1571 vnode_t *dvp;
1572 struct vattr *vap;
1573 struct vattr va;
1574 struct vattr *dbvap;
1575 struct vattr dbva;
1576 struct vattr *davap;
1577 struct vattr dava;
1578 enum vcexcl excl;
1579 nfstime3 *mtime;
1580 len_t reqsize;
1581 bool_t trunc;
1582 struct sockaddr *ca;
1583 char *name = NULL;
1584
1585 dbvap = NULL;
1586 davap = NULL;
1587
1588 dvp = nfs3_fhtovp(&args->where.dir, exi);
1589
1590 DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1591 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1592 CREATE3args *, args);
1593
1594 if (dvp == NULL) {
1595 error = ESTALE;
1596 goto out;
1597 }
1598
1599 dbva.va_mask = AT_ALL;
1600 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1601 davap = dbvap;
1602
1603 if (args->where.name == nfs3nametoolong) {
1604 resp->status = NFS3ERR_NAMETOOLONG;
1605 goto out1;
1606 }
1607
1608 if (args->where.name == NULL || *(args->where.name) == '\0') {
1609 resp->status = NFS3ERR_ACCES;
1610 goto out1;
1611 }
1612
1613 if (rdonly(ro, dvp)) {
1614 resp->status = NFS3ERR_ROFS;
1615 goto out1;
1616 }
1617
1618 if (is_system_labeled()) {
1619 bslabel_t *clabel = req->rq_label;
1620
1621 ASSERT(clabel != NULL);
1622 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1623 "got client label from request(1)", struct svc_req *, req);
1624
1625 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1626 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1627 exi)) {
1628 resp->status = NFS3ERR_ACCES;
1629 goto out1;
1630 }
1631 }
1632 }
1633
1634 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1635 name = nfscmd_convname(ca, exi, args->where.name,
1636 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1637
1638 if (name == NULL) {
1639 /* This is really a Solaris EILSEQ */
1640 resp->status = NFS3ERR_INVAL;
1641 goto out1;
1642 }
1643
1644 if (args->how.mode == EXCLUSIVE) {
1645 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1646 va.va_type = VREG;
1647 va.va_mode = (mode_t)0;
1648 /*
1649 * Ensure no time overflows and that types match
1650 */
1651 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1652 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1653 va.va_mtime.tv_nsec = mtime->nseconds;
1654 excl = EXCL;
1655 } else {
1656 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1657 &va);
1658 if (error)
1659 goto out;
1660 va.va_mask |= AT_TYPE;
1661 va.va_type = VREG;
1662 if (args->how.mode == GUARDED)
1663 excl = EXCL;
1664 else {
1665 excl = NONEXCL;
1666
1667 /*
1668 * During creation of file in non-exclusive mode
1669 * if size of file is being set then make sure
1670 * that if the file already exists that no conflicting
1671 * non-blocking mandatory locks exists in the region
1672 * being modified. If there are conflicting locks fail
1673 * the operation with EACCES.
1674 */
1675 if (va.va_mask & AT_SIZE) {
1676 struct vattr tva;
1677
1678 /*
1679 * Does file already exist?
1680 */
1681 error = VOP_LOOKUP(dvp, name, &tvp,
1682 NULL, 0, NULL, cr, NULL, NULL, NULL);
1683
1684 /*
1685 * Check to see if the file has been delegated
1686 * to a v4 client. If so, then begin recall of
1687 * the delegation and return JUKEBOX to allow
1688 * the client to retrasmit its request.
1689 */
1690
1691 trunc = va.va_size == 0;
1692 if (!error &&
1693 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1694 resp->status = NFS3ERR_JUKEBOX;
1695 goto out1;
1696 }
1697
1698 /*
1699 * Check for NBMAND lock conflicts
1700 */
1701 if (!error && nbl_need_check(tvp)) {
1702 u_offset_t offset;
1703 ssize_t len;
1704
1705 nbl_start_crit(tvp, RW_READER);
1706 in_crit = 1;
1707
1708 tva.va_mask = AT_SIZE;
1709 error = VOP_GETATTR(tvp, &tva, 0, cr,
1710 NULL);
1711 /*
1712 * Can't check for conflicts, so return
1713 * error.
1714 */
1715 if (error)
1716 goto out;
1717
1718 offset = tva.va_size < va.va_size ?
1719 tva.va_size : va.va_size;
1720 len = tva.va_size < va.va_size ?
1721 va.va_size - tva.va_size :
1722 tva.va_size - va.va_size;
1723 if (nbl_conflict(tvp, NBL_WRITE,
1724 offset, len, 0, NULL)) {
1725 error = EACCES;
1726 goto out;
1727 }
1728 } else if (tvp) {
1729 VN_RELE(tvp);
1730 tvp = NULL;
1731 }
1732 }
1733 }
1734 if (va.va_mask & AT_SIZE)
1735 reqsize = va.va_size;
1736 }
1737
1738 /*
1739 * Must specify the mode.
1740 */
1741 if (!(va.va_mask & AT_MODE)) {
1742 resp->status = NFS3ERR_INVAL;
1743 goto out1;
1744 }
1745
1746 /*
1747 * If the filesystem is exported with nosuid, then mask off
1748 * the setuid and setgid bits.
1749 */
1750 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1751 va.va_mode &= ~(VSUID | VSGID);
1752
1753 tryagain:
1754 /*
1755 * The file open mode used is VWRITE. If the client needs
1756 * some other semantic, then it should do the access checking
1757 * itself. It would have been nice to have the file open mode
1758 * passed as part of the arguments.
1759 */
1760 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1761 &vp, cr, 0, NULL, NULL);
1762
1763 dava.va_mask = AT_ALL;
1764 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1765
1766 if (error) {
1767 /*
1768 * If we got something other than file already exists
1769 * then just return this error. Otherwise, we got
1770 * EEXIST. If we were doing a GUARDED create, then
1771 * just return this error. Otherwise, we need to
1772 * make sure that this wasn't a duplicate of an
1773 * exclusive create request.
1774 *
1775 * The assumption is made that a non-exclusive create
1776 * request will never return EEXIST.
1777 */
1778 if (error != EEXIST || args->how.mode == GUARDED)
1779 goto out;
1780 /*
1781 * Lookup the file so that we can get a vnode for it.
1782 */
1783 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1784 NULL, cr, NULL, NULL, NULL);
1785 if (error) {
1786 /*
1787 * We couldn't find the file that we thought that
1788 * we just created. So, we'll just try creating
1789 * it again.
1790 */
1791 if (error == ENOENT)
1792 goto tryagain;
1793 goto out;
1794 }
1795
1796 /*
1797 * If the file is delegated to a v4 client, go ahead
1798 * and initiate recall, this create is a hint that a
1799 * conflicting v3 open has occurred.
1800 */
1801
1802 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1803 VN_RELE(vp);
1804 resp->status = NFS3ERR_JUKEBOX;
1805 goto out1;
1806 }
1807
1808 va.va_mask = AT_ALL;
1809 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1810
1811 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1812 /* % with INT32_MAX to prevent overflows */
1813 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1814 vap->va_mtime.tv_sec !=
1815 (mtime->seconds % INT32_MAX) ||
1816 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1817 VN_RELE(vp);
1818 error = EEXIST;
1819 goto out;
1820 }
1821 } else {
1822
1823 if ((args->how.mode == UNCHECKED ||
1824 args->how.mode == GUARDED) &&
1825 args->how.createhow3_u.obj_attributes.size.set_it &&
1826 va.va_size == 0)
1827 trunc = TRUE;
1828 else
1829 trunc = FALSE;
1830
1831 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1832 VN_RELE(vp);
1833 resp->status = NFS3ERR_JUKEBOX;
1834 goto out1;
1835 }
1836
1837 va.va_mask = AT_ALL;
1838 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1839
1840 /*
1841 * We need to check to make sure that the file got
1842 * created to the indicated size. If not, we do a
1843 * setattr to try to change the size, but we don't
1844 * try too hard. This shouldn't a problem as most
1845 * clients will only specifiy a size of zero which
1846 * local file systems handle. However, even if
1847 * the client does specify a non-zero size, it can
1848 * still recover by checking the size of the file
1849 * after it has created it and then issue a setattr
1850 * request of its own to set the size of the file.
1851 */
1852 if (vap != NULL &&
1853 (args->how.mode == UNCHECKED ||
1854 args->how.mode == GUARDED) &&
1855 args->how.createhow3_u.obj_attributes.size.set_it &&
1856 vap->va_size != reqsize) {
1857 va.va_mask = AT_SIZE;
1858 va.va_size = reqsize;
1859 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1860 va.va_mask = AT_ALL;
1861 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1862 }
1863 }
1864
1865 if (name != args->where.name)
1866 kmem_free(name, MAXPATHLEN + 1);
1867
1868 error = makefh3(&resp->resok.obj.handle, vp, exi);
1869 if (error)
1870 resp->resok.obj.handle_follows = FALSE;
1871 else
1872 resp->resok.obj.handle_follows = TRUE;
1873
1874 /*
1875 * Force modified data and metadata out to stable storage.
1876 */
1877 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1878 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1879
1880 VN_RELE(vp);
1881 if (tvp != NULL) {
1882 if (in_crit)
1883 nbl_end_crit(tvp);
1884 VN_RELE(tvp);
1885 }
1886
1887 resp->status = NFS3_OK;
1888 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1889 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1890
1891 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1892 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1893 CREATE3res *, resp);
1894
1895 VN_RELE(dvp);
1896 return;
1897
1898 out:
1899 if (curthread->t_flag & T_WOULDBLOCK) {
1900 curthread->t_flag &= ~T_WOULDBLOCK;
1901 resp->status = NFS3ERR_JUKEBOX;
1902 } else
1903 resp->status = puterrno3(error);
1904 out1:
1905 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1906 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1907 CREATE3res *, resp);
1908
1909 if (name != NULL && name != args->where.name)
1910 kmem_free(name, MAXPATHLEN + 1);
1911
1912 if (tvp != NULL) {
1913 if (in_crit)
1914 nbl_end_crit(tvp);
1915 VN_RELE(tvp);
1916 }
1917 if (dvp != NULL)
1918 VN_RELE(dvp);
1919 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1920 }
1921
1922 void *
1923 rfs3_create_getfh(CREATE3args *args)
1924 {
1925
1926 return (&args->where.dir);
1927 }
1928
1929 void
1930 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1931 struct svc_req *req, cred_t *cr, bool_t ro)
1932 {
1933 int error;
1934 vnode_t *vp = NULL;
1935 vnode_t *dvp;
1936 struct vattr *vap;
1937 struct vattr va;
1938 struct vattr *dbvap;
1939 struct vattr dbva;
1940 struct vattr *davap;
1941 struct vattr dava;
1942 struct sockaddr *ca;
1943 char *name = NULL;
1944
1945 dbvap = NULL;
1946 davap = NULL;
1947
1948 dvp = nfs3_fhtovp(&args->where.dir, exi);
1949
1950 DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1951 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1952 MKDIR3args *, args);
1953
1954 if (dvp == NULL) {
1955 error = ESTALE;
1956 goto out;
1957 }
1958
1959 dbva.va_mask = AT_ALL;
1960 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1961 davap = dbvap;
1962
1963 if (args->where.name == nfs3nametoolong) {
1964 resp->status = NFS3ERR_NAMETOOLONG;
1965 goto out1;
1966 }
1967
1968 if (args->where.name == NULL || *(args->where.name) == '\0') {
1969 resp->status = NFS3ERR_ACCES;
1970 goto out1;
1971 }
1972
1973 if (rdonly(ro, dvp)) {
1974 resp->status = NFS3ERR_ROFS;
1975 goto out1;
1976 }
1977
1978 if (is_system_labeled()) {
1979 bslabel_t *clabel = req->rq_label;
1980
1981 ASSERT(clabel != NULL);
1982 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1983 "got client label from request(1)", struct svc_req *, req);
1984
1985 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1986 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1987 exi)) {
1988 resp->status = NFS3ERR_ACCES;
1989 goto out1;
1990 }
1991 }
1992 }
1993
1994 error = sattr3_to_vattr(&args->attributes, &va);
1995 if (error)
1996 goto out;
1997
1998 if (!(va.va_mask & AT_MODE)) {
1999 resp->status = NFS3ERR_INVAL;
2000 goto out1;
2001 }
2002
2003 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2004 name = nfscmd_convname(ca, exi, args->where.name,
2005 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2006
2007 if (name == NULL) {
2008 resp->status = NFS3ERR_INVAL;
2009 goto out1;
2010 }
2011
2012 va.va_mask |= AT_TYPE;
2013 va.va_type = VDIR;
2014
2015 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2016
2017 if (name != args->where.name)
2018 kmem_free(name, MAXPATHLEN + 1);
2019
2020 dava.va_mask = AT_ALL;
2021 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2022
2023 /*
2024 * Force modified data and metadata out to stable storage.
2025 */
2026 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2027
2028 if (error)
2029 goto out;
2030
2031 error = makefh3(&resp->resok.obj.handle, vp, exi);
2032 if (error)
2033 resp->resok.obj.handle_follows = FALSE;
2034 else
2035 resp->resok.obj.handle_follows = TRUE;
2036
2037 va.va_mask = AT_ALL;
2038 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2039
2040 /*
2041 * Force modified data and metadata out to stable storage.
2042 */
2043 (void) VOP_FSYNC(vp, 0, cr, NULL);
2044
2045 VN_RELE(vp);
2046
2047 resp->status = NFS3_OK;
2048 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2049 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2050
2051 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2052 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2053 MKDIR3res *, resp);
2054 VN_RELE(dvp);
2055
2056 return;
2057
2058 out:
2059 if (curthread->t_flag & T_WOULDBLOCK) {
2060 curthread->t_flag &= ~T_WOULDBLOCK;
2061 resp->status = NFS3ERR_JUKEBOX;
2062 } else
2063 resp->status = puterrno3(error);
2064 out1:
2065 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2066 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2067 MKDIR3res *, resp);
2068 if (dvp != NULL)
2069 VN_RELE(dvp);
2070 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2071 }
2072
2073 void *
2074 rfs3_mkdir_getfh(MKDIR3args *args)
2075 {
2076
2077 return (&args->where.dir);
2078 }
2079
2080 void
2081 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2082 struct svc_req *req, cred_t *cr, bool_t ro)
2083 {
2084 int error;
2085 vnode_t *vp;
2086 vnode_t *dvp;
2087 struct vattr *vap;
2088 struct vattr va;
2089 struct vattr *dbvap;
2090 struct vattr dbva;
2091 struct vattr *davap;
2092 struct vattr dava;
2093 struct sockaddr *ca;
2094 char *name = NULL;
2095 char *symdata = NULL;
2096
2097 dbvap = NULL;
2098 davap = NULL;
2099
2100 dvp = nfs3_fhtovp(&args->where.dir, exi);
2101
2102 DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2103 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2104 SYMLINK3args *, args);
2105
2106 if (dvp == NULL) {
2107 error = ESTALE;
2108 goto err;
2109 }
2110
2111 dbva.va_mask = AT_ALL;
2112 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2113 davap = dbvap;
2114
2115 if (args->where.name == nfs3nametoolong) {
2116 resp->status = NFS3ERR_NAMETOOLONG;
2117 goto err1;
2118 }
2119
2120 if (args->where.name == NULL || *(args->where.name) == '\0') {
2121 resp->status = NFS3ERR_ACCES;
2122 goto err1;
2123 }
2124
2125 if (rdonly(ro, dvp)) {
2126 resp->status = NFS3ERR_ROFS;
2127 goto err1;
2128 }
2129
2130 if (is_system_labeled()) {
2131 bslabel_t *clabel = req->rq_label;
2132
2133 ASSERT(clabel != NULL);
2134 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2135 "got client label from request(1)", struct svc_req *, req);
2136
2137 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2138 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2139 exi)) {
2140 resp->status = NFS3ERR_ACCES;
2141 goto err1;
2142 }
2143 }
2144 }
2145
2146 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2147 if (error)
2148 goto err;
2149
2150 if (!(va.va_mask & AT_MODE)) {
2151 resp->status = NFS3ERR_INVAL;
2152 goto err1;
2153 }
2154
2155 if (args->symlink.symlink_data == nfs3nametoolong) {
2156 resp->status = NFS3ERR_NAMETOOLONG;
2157 goto err1;
2158 }
2159
2160 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2161 name = nfscmd_convname(ca, exi, args->where.name,
2162 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2163
2164 if (name == NULL) {
2165 /* This is really a Solaris EILSEQ */
2166 resp->status = NFS3ERR_INVAL;
2167 goto err1;
2168 }
2169
2170 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2171 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2172 if (symdata == NULL) {
2173 /* This is really a Solaris EILSEQ */
2174 resp->status = NFS3ERR_INVAL;
2175 goto err1;
2176 }
2177
2178
2179 va.va_mask |= AT_TYPE;
2180 va.va_type = VLNK;
2181
2182 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2183
2184 dava.va_mask = AT_ALL;
2185 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2186
2187 if (error)
2188 goto err;
2189
2190 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2191 NULL, NULL, NULL);
2192
2193 /*
2194 * Force modified data and metadata out to stable storage.
2195 */
2196 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2197
2198
2199 resp->status = NFS3_OK;
2200 if (error) {
2201 resp->resok.obj.handle_follows = FALSE;
2202 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2203 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2204 goto out;
2205 }
2206
2207 error = makefh3(&resp->resok.obj.handle, vp, exi);
2208 if (error)
2209 resp->resok.obj.handle_follows = FALSE;
2210 else
2211 resp->resok.obj.handle_follows = TRUE;
2212
2213 va.va_mask = AT_ALL;
2214 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2215
2216 /*
2217 * Force modified data and metadata out to stable storage.
2218 */
2219 (void) VOP_FSYNC(vp, 0, cr, NULL);
2220
2221 VN_RELE(vp);
2222
2223 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2224 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2225 goto out;
2226
2227 err:
2228 if (curthread->t_flag & T_WOULDBLOCK) {
2229 curthread->t_flag &= ~T_WOULDBLOCK;
2230 resp->status = NFS3ERR_JUKEBOX;
2231 } else
2232 resp->status = puterrno3(error);
2233 err1:
2234 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2235 out:
2236 if (name != NULL && name != args->where.name)
2237 kmem_free(name, MAXPATHLEN + 1);
2238 if (symdata != NULL && symdata != args->symlink.symlink_data)
2239 kmem_free(symdata, MAXPATHLEN + 1);
2240
2241 DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2242 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2243 SYMLINK3res *, resp);
2244
2245 if (dvp != NULL)
2246 VN_RELE(dvp);
2247 }
2248
2249 void *
2250 rfs3_symlink_getfh(SYMLINK3args *args)
2251 {
2252
2253 return (&args->where.dir);
2254 }
2255
2256 void
2257 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2258 struct svc_req *req, cred_t *cr, bool_t ro)
2259 {
2260 int error;
2261 vnode_t *vp;
2262 vnode_t *realvp;
2263 vnode_t *dvp;
2264 struct vattr *vap;
2265 struct vattr va;
2266 struct vattr *dbvap;
2267 struct vattr dbva;
2268 struct vattr *davap;
2269 struct vattr dava;
2270 int mode;
2271 enum vcexcl excl;
2272 struct sockaddr *ca;
2273 char *name = NULL;
2274
2275 dbvap = NULL;
2276 davap = NULL;
2277
2278 dvp = nfs3_fhtovp(&args->where.dir, exi);
2279
2280 DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2281 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2282 MKNOD3args *, args);
2283
2284 if (dvp == NULL) {
2285 error = ESTALE;
2286 goto out;
2287 }
2288
2289 dbva.va_mask = AT_ALL;
2290 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2291 davap = dbvap;
2292
2293 if (args->where.name == nfs3nametoolong) {
2294 resp->status = NFS3ERR_NAMETOOLONG;
2295 goto out1;
2296 }
2297
2298 if (args->where.name == NULL || *(args->where.name) == '\0') {
2299 resp->status = NFS3ERR_ACCES;
2300 goto out1;
2301 }
2302
2303 if (rdonly(ro, dvp)) {
2304 resp->status = NFS3ERR_ROFS;
2305 goto out1;
2306 }
2307
2308 if (is_system_labeled()) {
2309 bslabel_t *clabel = req->rq_label;
2310
2311 ASSERT(clabel != NULL);
2312 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2313 "got client label from request(1)", struct svc_req *, req);
2314
2315 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2316 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2317 exi)) {
2318 resp->status = NFS3ERR_ACCES;
2319 goto out1;
2320 }
2321 }
2322 }
2323
2324 switch (args->what.type) {
2325 case NF3CHR:
2326 case NF3BLK:
2327 error = sattr3_to_vattr(
2328 &args->what.mknoddata3_u.device.dev_attributes, &va);
2329 if (error)
2330 goto out;
2331 if (secpolicy_sys_devices(cr) != 0) {
2332 resp->status = NFS3ERR_PERM;
2333 goto out1;
2334 }
2335 if (args->what.type == NF3CHR)
2336 va.va_type = VCHR;
2337 else
2338 va.va_type = VBLK;
2339 va.va_rdev = makedevice(
2340 args->what.mknoddata3_u.device.spec.specdata1,
2341 args->what.mknoddata3_u.device.spec.specdata2);
2342 va.va_mask |= AT_TYPE | AT_RDEV;
2343 break;
2344 case NF3SOCK:
2345 error = sattr3_to_vattr(
2346 &args->what.mknoddata3_u.pipe_attributes, &va);
2347 if (error)
2348 goto out;
2349 va.va_type = VSOCK;
2350 va.va_mask |= AT_TYPE;
2351 break;
2352 case NF3FIFO:
2353 error = sattr3_to_vattr(
2354 &args->what.mknoddata3_u.pipe_attributes, &va);
2355 if (error)
2356 goto out;
2357 va.va_type = VFIFO;
2358 va.va_mask |= AT_TYPE;
2359 break;
2360 default:
2361 resp->status = NFS3ERR_BADTYPE;
2362 goto out1;
2363 }
2364
2365 /*
2366 * Must specify the mode.
2367 */
2368 if (!(va.va_mask & AT_MODE)) {
2369 resp->status = NFS3ERR_INVAL;
2370 goto out1;
2371 }
2372
2373 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2374 name = nfscmd_convname(ca, exi, args->where.name,
2375 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2376
2377 if (name == NULL) {
2378 resp->status = NFS3ERR_INVAL;
2379 goto out1;
2380 }
2381
2382 excl = EXCL;
2383
2384 mode = 0;
2385
2386 error = VOP_CREATE(dvp, name, &va, excl, mode,
2387 &vp, cr, 0, NULL, NULL);
2388
2389 if (name != args->where.name)
2390 kmem_free(name, MAXPATHLEN + 1);
2391
2392 dava.va_mask = AT_ALL;
2393 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2394
2395 /*
2396 * Force modified data and metadata out to stable storage.
2397 */
2398 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2399
2400 if (error)
2401 goto out;
2402
2403 resp->status = NFS3_OK;
2404
2405 error = makefh3(&resp->resok.obj.handle, vp, exi);
2406 if (error)
2407 resp->resok.obj.handle_follows = FALSE;
2408 else
2409 resp->resok.obj.handle_follows = TRUE;
2410
2411 va.va_mask = AT_ALL;
2412 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2413
2414 /*
2415 * Force modified metadata out to stable storage.
2416 *
2417 * if a underlying vp exists, pass it to VOP_FSYNC
2418 */
2419 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2420 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2421 else
2422 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2423
2424 VN_RELE(vp);
2425
2426 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2427 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2428 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2429 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2430 MKNOD3res *, resp);
2431 VN_RELE(dvp);
2432 return;
2433
2434 out:
2435 if (curthread->t_flag & T_WOULDBLOCK) {
2436 curthread->t_flag &= ~T_WOULDBLOCK;
2437 resp->status = NFS3ERR_JUKEBOX;
2438 } else
2439 resp->status = puterrno3(error);
2440 out1:
2441 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2442 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2443 MKNOD3res *, resp);
2444 if (dvp != NULL)
2445 VN_RELE(dvp);
2446 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2447 }
2448
2449 void *
2450 rfs3_mknod_getfh(MKNOD3args *args)
2451 {
2452
2453 return (&args->where.dir);
2454 }
2455
2456 void
2457 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2458 struct svc_req *req, cred_t *cr, bool_t ro)
2459 {
2460 int error = 0;
2461 vnode_t *vp;
2462 struct vattr *bvap;
2463 struct vattr bva;
2464 struct vattr *avap;
2465 struct vattr ava;
2466 vnode_t *targvp = NULL;
2467 struct sockaddr *ca;
2468 char *name = NULL;
2469
2470 bvap = NULL;
2471 avap = NULL;
2472
2473 vp = nfs3_fhtovp(&args->object.dir, exi);
2474
2475 DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2476 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2477 REMOVE3args *, args);
2478
2479 if (vp == NULL) {
2480 error = ESTALE;
2481 goto err;
2482 }
2483
2484 bva.va_mask = AT_ALL;
2485 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2486 avap = bvap;
2487
2488 if (vp->v_type != VDIR) {
2489 resp->status = NFS3ERR_NOTDIR;
2490 goto err1;
2491 }
2492
2493 if (args->object.name == nfs3nametoolong) {
2494 resp->status = NFS3ERR_NAMETOOLONG;
2495 goto err1;
2496 }
2497
2498 if (args->object.name == NULL || *(args->object.name) == '\0') {
2499 resp->status = NFS3ERR_ACCES;
2500 goto err1;
2501 }
2502
2503 if (rdonly(ro, vp)) {
2504 resp->status = NFS3ERR_ROFS;
2505 goto err1;
2506 }
2507
2508 if (is_system_labeled()) {
2509 bslabel_t *clabel = req->rq_label;
2510
2511 ASSERT(clabel != NULL);
2512 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2513 "got client label from request(1)", struct svc_req *, req);
2514
2515 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2516 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2517 exi)) {
2518 resp->status = NFS3ERR_ACCES;
2519 goto err1;
2520 }
2521 }
2522 }
2523
2524 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2525 name = nfscmd_convname(ca, exi, args->object.name,
2526 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2527
2528 if (name == NULL) {
2529 resp->status = NFS3ERR_INVAL;
2530 goto err1;
2531 }
2532
2533 /*
2534 * Check for a conflict with a non-blocking mandatory share
2535 * reservation and V4 delegations
2536 */
2537 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2538 NULL, cr, NULL, NULL, NULL);
2539 if (error != 0)
2540 goto err;
2541
2542 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2543 resp->status = NFS3ERR_JUKEBOX;
2544 goto err1;
2545 }
2546
2547 if (!nbl_need_check(targvp)) {
2548 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2549 } else {
2550 nbl_start_crit(targvp, RW_READER);
2551 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2552 error = EACCES;
2553 } else {
2554 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2555 }
2556 nbl_end_crit(targvp);
2557 }
2558 VN_RELE(targvp);
2559 targvp = NULL;
2560
2561 ava.va_mask = AT_ALL;
2562 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2563
2564 /*
2565 * Force modified data and metadata out to stable storage.
2566 */
2567 (void) VOP_FSYNC(vp, 0, cr, NULL);
2568
2569 if (error)
2570 goto err;
2571
2572 resp->status = NFS3_OK;
2573 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2574 goto out;
2575
2576 err:
2577 if (curthread->t_flag & T_WOULDBLOCK) {
2578 curthread->t_flag &= ~T_WOULDBLOCK;
2579 resp->status = NFS3ERR_JUKEBOX;
2580 } else
2581 resp->status = puterrno3(error);
2582 err1:
2583 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2584 out:
2585 DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2586 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2587 REMOVE3res *, resp);
2588
2589 if (name != NULL && name != args->object.name)
2590 kmem_free(name, MAXPATHLEN + 1);
2591
2592 if (vp != NULL)
2593 VN_RELE(vp);
2594 }
2595
2596 void *
2597 rfs3_remove_getfh(REMOVE3args *args)
2598 {
2599
2600 return (&args->object.dir);
2601 }
2602
2603 void
2604 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2605 struct svc_req *req, cred_t *cr, bool_t ro)
2606 {
2607 int error;
2608 vnode_t *vp;
2609 struct vattr *bvap;
2610 struct vattr bva;
2611 struct vattr *avap;
2612 struct vattr ava;
2613 struct sockaddr *ca;
2614 char *name = NULL;
2615
2616 bvap = NULL;
2617 avap = NULL;
2618
2619 vp = nfs3_fhtovp(&args->object.dir, exi);
2620
2621 DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2622 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2623 RMDIR3args *, args);
2624
2625 if (vp == NULL) {
2626 error = ESTALE;
2627 goto err;
2628 }
2629
2630 bva.va_mask = AT_ALL;
2631 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2632 avap = bvap;
2633
2634 if (vp->v_type != VDIR) {
2635 resp->status = NFS3ERR_NOTDIR;
2636 goto err1;
2637 }
2638
2639 if (args->object.name == nfs3nametoolong) {
2640 resp->status = NFS3ERR_NAMETOOLONG;
2641 goto err1;
2642 }
2643
2644 if (args->object.name == NULL || *(args->object.name) == '\0') {
2645 resp->status = NFS3ERR_ACCES;
2646 goto err1;
2647 }
2648
2649 if (rdonly(ro, vp)) {
2650 resp->status = NFS3ERR_ROFS;
2651 goto err1;
2652 }
2653
2654 if (is_system_labeled()) {
2655 bslabel_t *clabel = req->rq_label;
2656
2657 ASSERT(clabel != NULL);
2658 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2659 "got client label from request(1)", struct svc_req *, req);
2660
2661 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2662 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2663 exi)) {
2664 resp->status = NFS3ERR_ACCES;
2665 goto err1;
2666 }
2667 }
2668 }
2669
2670 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2671 name = nfscmd_convname(ca, exi, args->object.name,
2672 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2673
2674 if (name == NULL) {
2675 resp->status = NFS3ERR_INVAL;
2676 goto err1;
2677 }
2678
2679 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
2680 error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2681
2682 if (name != args->object.name)
2683 kmem_free(name, MAXPATHLEN + 1);
2684
2685 ava.va_mask = AT_ALL;
2686 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2687
2688 /*
2689 * Force modified data and metadata out to stable storage.
2690 */
2691 (void) VOP_FSYNC(vp, 0, cr, NULL);
2692
2693 if (error) {
2694 /*
2695 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2696 * if the directory is not empty. A System V NFS server
2697 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2698 * over the wire.
2699 */
2700 if (error == EEXIST)
2701 error = ENOTEMPTY;
2702 goto err;
2703 }
2704
2705 resp->status = NFS3_OK;
2706 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2707 goto out;
2708
2709 err:
2710 if (curthread->t_flag & T_WOULDBLOCK) {
2711 curthread->t_flag &= ~T_WOULDBLOCK;
2712 resp->status = NFS3ERR_JUKEBOX;
2713 } else
2714 resp->status = puterrno3(error);
2715 err1:
2716 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2717 out:
2718 DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2719 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2720 RMDIR3res *, resp);
2721 if (vp != NULL)
2722 VN_RELE(vp);
2723
2724 }
2725
2726 void *
2727 rfs3_rmdir_getfh(RMDIR3args *args)
2728 {
2729
2730 return (&args->object.dir);
2731 }
2732
2733 void
2734 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2735 struct svc_req *req, cred_t *cr, bool_t ro)
2736 {
2737 int error = 0;
2738 vnode_t *fvp;
2739 vnode_t *tvp;
2740 vnode_t *targvp;
2741 struct vattr *fbvap;
2742 struct vattr fbva;
2743 struct vattr *favap;
2744 struct vattr fava;
2745 struct vattr *tbvap;
2746 struct vattr tbva;
2747 struct vattr *tavap;
2748 struct vattr tava;
2749 nfs_fh3 *fh3;
2750 struct exportinfo *to_exi;
2751 vnode_t *srcvp = NULL;
2752 bslabel_t *clabel;
2753 struct sockaddr *ca;
2754 char *name = NULL;
2755 char *toname = NULL;
2756
2757 fbvap = NULL;
2758 favap = NULL;
2759 tbvap = NULL;
2760 tavap = NULL;
2761 tvp = NULL;
2762
2763 fvp = nfs3_fhtovp(&args->from.dir, exi);
2764
2765 DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2766 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2767 RENAME3args *, args);
2768
2769 if (fvp == NULL) {
2770 error = ESTALE;
2771 goto err;
2772 }
2773
2774 if (is_system_labeled()) {
2775 clabel = req->rq_label;
2776 ASSERT(clabel != NULL);
2777 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2778 "got client label from request(1)", struct svc_req *, req);
2779
2780 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2781 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2782 exi)) {
2783 resp->status = NFS3ERR_ACCES;
2784 goto err1;
2785 }
2786 }
2787 }
2788
2789 fbva.va_mask = AT_ALL;
2790 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2791 favap = fbvap;
2792
2793 fh3 = &args->to.dir;
2794 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2795 if (to_exi == NULL) {
2796 resp->status = NFS3ERR_ACCES;
2797 goto err1;
2798 }
2799 exi_rele(to_exi);
2800
2801 if (to_exi != exi) {
2802 resp->status = NFS3ERR_XDEV;
2803 goto err1;
2804 }
2805
2806 tvp = nfs3_fhtovp(&args->to.dir, exi);
2807 if (tvp == NULL) {
2808 error = ESTALE;
2809 goto err;
2810 }
2811
2812 tbva.va_mask = AT_ALL;
2813 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2814 tavap = tbvap;
2815
2816 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2817 resp->status = NFS3ERR_NOTDIR;
2818 goto err1;
2819 }
2820
2821 if (args->from.name == nfs3nametoolong ||
2822 args->to.name == nfs3nametoolong) {
2823 resp->status = NFS3ERR_NAMETOOLONG;
2824 goto err1;
2825 }
2826 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2827 args->to.name == NULL || *(args->to.name) == '\0') {
2828 resp->status = NFS3ERR_ACCES;
2829 goto err1;
2830 }
2831
2832 if (rdonly(ro, tvp)) {
2833 resp->status = NFS3ERR_ROFS;
2834 goto err1;
2835 }
2836
2837 if (is_system_labeled()) {
2838 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2839 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2840 exi)) {
2841 resp->status = NFS3ERR_ACCES;
2842 goto err1;
2843 }
2844 }
2845 }
2846
2847 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2848 name = nfscmd_convname(ca, exi, args->from.name,
2849 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2850
2851 if (name == NULL) {
2852 resp->status = NFS3ERR_INVAL;
2853 goto err1;
2854 }
2855
2856 toname = nfscmd_convname(ca, exi, args->to.name,
2857 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2858
2859 if (toname == NULL) {
2860 resp->status = NFS3ERR_INVAL;
2861 goto err1;
2862 }
2863
2864 /*
2865 * Check for a conflict with a non-blocking mandatory share
2866 * reservation or V4 delegations.
2867 */
2868 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2869 NULL, cr, NULL, NULL, NULL);
2870 if (error != 0)
2871 goto err;
2872
2873 /*
2874 * If we rename a delegated file we should recall the
2875 * delegation, since future opens should fail or would
2876 * refer to a new file.
2877 */
2878 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2879 resp->status = NFS3ERR_JUKEBOX;
2880 goto err1;
2881 }
2882
2883 /*
2884 * Check for renaming over a delegated file. Check nfs4_deleg_policy
2885 * first to avoid VOP_LOOKUP if possible.
2886 */
2887 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2888 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2889 NULL, NULL, NULL) == 0) {
2890
2891 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2892 VN_RELE(targvp);
2893 resp->status = NFS3ERR_JUKEBOX;
2894 goto err1;
2895 }
2896 VN_RELE(targvp);
2897 }
2898
2899 if (!nbl_need_check(srcvp)) {
2900 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2901 } else {
2902 nbl_start_crit(srcvp, RW_READER);
2903 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2904 error = EACCES;
2905 else
2906 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2907 nbl_end_crit(srcvp);
2908 }
2909 if (error == 0)
2910 vn_renamepath(tvp, srcvp, args->to.name,
2911 strlen(args->to.name));
2912 VN_RELE(srcvp);
2913 srcvp = NULL;
2914
2915 fava.va_mask = AT_ALL;
2916 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2917 tava.va_mask = AT_ALL;
2918 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2919
2920 /*
2921 * Force modified data and metadata out to stable storage.
2922 */
2923 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2924 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2925
2926 if (error)
2927 goto err;
2928
2929 resp->status = NFS3_OK;
2930 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2931 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2932 goto out;
2933
2934 err:
2935 if (curthread->t_flag & T_WOULDBLOCK) {
2936 curthread->t_flag &= ~T_WOULDBLOCK;
2937 resp->status = NFS3ERR_JUKEBOX;
2938 } else {
2939 resp->status = puterrno3(error);
2940 }
2941 err1:
2942 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2943 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2944
2945 out:
2946 if (name != NULL && name != args->from.name)
2947 kmem_free(name, MAXPATHLEN + 1);
2948 if (toname != NULL && toname != args->to.name)
2949 kmem_free(toname, MAXPATHLEN + 1);
2950
2951 DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2952 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2953 RENAME3res *, resp);
2954 if (fvp != NULL)
2955 VN_RELE(fvp);
2956 if (tvp != NULL)
2957 VN_RELE(tvp);
2958 }
2959
2960 void *
2961 rfs3_rename_getfh(RENAME3args *args)
2962 {
2963
2964 return (&args->from.dir);
2965 }
2966
2967 void
2968 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2969 struct svc_req *req, cred_t *cr, bool_t ro)
2970 {
2971 int error;
2972 vnode_t *vp;
2973 vnode_t *dvp;
2974 struct vattr *vap;
2975 struct vattr va;
2976 struct vattr *bvap;
2977 struct vattr bva;
2978 struct vattr *avap;
2979 struct vattr ava;
2980 nfs_fh3 *fh3;
2981 struct exportinfo *to_exi;
2982 bslabel_t *clabel;
2983 struct sockaddr *ca;
2984 char *name = NULL;
2985
2986 vap = NULL;
2987 bvap = NULL;
2988 avap = NULL;
2989 dvp = NULL;
2990
2991 vp = nfs3_fhtovp(&args->file, exi);
2992
2993 DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2994 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2995 LINK3args *, args);
2996
2997 if (vp == NULL) {
2998 error = ESTALE;
2999 goto out;
3000 }
3001
3002 va.va_mask = AT_ALL;
3003 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3004
3005 fh3 = &args->link.dir;
3006 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3007 if (to_exi == NULL) {
3008 resp->status = NFS3ERR_ACCES;
3009 goto out1;
3010 }
3011 exi_rele(to_exi);
3012
3013 if (to_exi != exi) {
3014 resp->status = NFS3ERR_XDEV;
3015 goto out1;
3016 }
3017
3018 if (is_system_labeled()) {
3019 clabel = req->rq_label;
3020
3021 ASSERT(clabel != NULL);
3022 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3023 "got client label from request(1)", struct svc_req *, req);
3024
3025 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3026 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3027 exi)) {
3028 resp->status = NFS3ERR_ACCES;
3029 goto out1;
3030 }
3031 }
3032 }
3033
3034 dvp = nfs3_fhtovp(&args->link.dir, exi);
3035 if (dvp == NULL) {
3036 error = ESTALE;
3037 goto out;
3038 }
3039
3040 bva.va_mask = AT_ALL;
3041 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3042
3043 if (dvp->v_type != VDIR) {
3044 resp->status = NFS3ERR_NOTDIR;
3045 goto out1;
3046 }
3047
3048 if (args->link.name == nfs3nametoolong) {
3049 resp->status = NFS3ERR_NAMETOOLONG;
3050 goto out1;
3051 }
3052
3053 if (args->link.name == NULL || *(args->link.name) == '\0') {
3054 resp->status = NFS3ERR_ACCES;
3055 goto out1;
3056 }
3057
3058 if (rdonly(ro, dvp)) {
3059 resp->status = NFS3ERR_ROFS;
3060 goto out1;
3061 }
3062
3063 if (is_system_labeled()) {
3064 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3065 "got client label from request(1)", struct svc_req *, req);
3066
3067 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3068 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3069 exi)) {
3070 resp->status = NFS3ERR_ACCES;
3071 goto out1;
3072 }
3073 }
3074 }
3075
3076 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3077 name = nfscmd_convname(ca, exi, args->link.name,
3078 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3079
3080 if (name == NULL) {
3081 resp->status = NFS3ERR_SERVERFAULT;
3082 goto out1;
3083 }
3084
3085 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3086
3087 va.va_mask = AT_ALL;
3088 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3089 ava.va_mask = AT_ALL;
3090 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3091
3092 /*
3093 * Force modified data and metadata out to stable storage.
3094 */
3095 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3096 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3097
3098 if (error)
3099 goto out;
3100
3101 VN_RELE(dvp);
3102
3103 resp->status = NFS3_OK;
3104 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3105 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3106
3107 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3108 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3109 LINK3res *, resp);
3110
3111 VN_RELE(vp);
3112
3113 return;
3114
3115 out:
3116 if (curthread->t_flag & T_WOULDBLOCK) {
3117 curthread->t_flag &= ~T_WOULDBLOCK;
3118 resp->status = NFS3ERR_JUKEBOX;
3119 } else
3120 resp->status = puterrno3(error);
3121 out1:
3122 if (name != NULL && name != args->link.name)
3123 kmem_free(name, MAXPATHLEN + 1);
3124
3125 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3126 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3127 LINK3res *, resp);
3128
3129 if (vp != NULL)
3130 VN_RELE(vp);
3131 if (dvp != NULL)
3132 VN_RELE(dvp);
3133 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3134 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3135 }
3136
3137 void *
3138 rfs3_link_getfh(LINK3args *args)
3139 {
3140
3141 return (&args->file);
3142 }
3143
3144 /*
3145 * This macro defines the size of a response which contains attribute
3146 * information and one directory entry (whose length is specified by
3147 * the macro parameter). If the incoming request is larger than this,
3148 * then we are guaranteed to be able to return at one directory entry
3149 * if one exists. Therefore, we do not need to check for
3150 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
3151 * is not, then we need to check to make sure that this error does not
3152 * need to be returned.
3153 *
3154 * NFS3_READDIR_MIN_COUNT is comprised of following :
3155 *
3156 * status - 1 * BYTES_PER_XDR_UNIT
3157 * attr. flag - 1 * BYTES_PER_XDR_UNIT
3158 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3159 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3160 * boolean - 1 * BYTES_PER_XDR_UNIT
3161 * file id - 2 * BYTES_PER_XDR_UNIT
3162 * directory name length - 1 * BYTES_PER_XDR_UNIT
3163 * cookie - 2 * BYTES_PER_XDR_UNIT
3164 * end of list - 1 * BYTES_PER_XDR_UNIT
3165 * end of file - 1 * BYTES_PER_XDR_UNIT
3166 * Name length of directory to the nearest byte
3167 */
3168
3169 #define NFS3_READDIR_MIN_COUNT(length) \
3170 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3171 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3172
3173 /* ARGSUSED */
3174 void
3175 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3176 struct svc_req *req, cred_t *cr, bool_t ro)
3177 {
3178 int error;
3179 vnode_t *vp;
3180 struct vattr *vap;
3181 struct vattr va;
3182 struct iovec iov;
3183 struct uio uio;
3184 char *data;
3185 int iseof;
3186 int bufsize;
3187 int namlen;
3188 uint_t count;
3189 struct sockaddr *ca;
3190
3191 vap = NULL;
3192
3193 vp = nfs3_fhtovp(&args->dir, exi);
3194
3195 DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3196 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3197 READDIR3args *, args);
3198
3199 if (vp == NULL) {
3200 error = ESTALE;
3201 goto out;
3202 }
3203
3204 if (is_system_labeled()) {
3205 bslabel_t *clabel = req->rq_label;
3206
3207 ASSERT(clabel != NULL);
3208 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3209 "got client label from request(1)", struct svc_req *, req);
3210
3211 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3212 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3213 exi)) {
3214 resp->status = NFS3ERR_ACCES;
3215 goto out1;
3216 }
3217 }
3218 }
3219
3220 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3221
3222 va.va_mask = AT_ALL;
3223 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3224
3225 if (vp->v_type != VDIR) {
3226 resp->status = NFS3ERR_NOTDIR;
3227 goto out1;
3228 }
3229
3230 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3231 if (error)
3232 goto out;
3233
3234 /*
3235 * Now don't allow arbitrary count to alloc;
3236 * allow the maximum not to exceed rfs3_tsize()
3237 */
3238 if (args->count > rfs3_tsize(req))
3239 args->count = rfs3_tsize(req);
3240
3241 /*
3242 * Make sure that there is room to read at least one entry
3243 * if any are available.
3244 */
3245 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3246 count = DIRENT64_RECLEN(MAXNAMELEN);
3247 else
3248 count = args->count;
3249
3250 data = kmem_alloc(count, KM_SLEEP);
3251
3252 iov.iov_base = data;
3253 iov.iov_len = count;
3254 uio.uio_iov = &iov;
3255 uio.uio_iovcnt = 1;
3256 uio.uio_segflg = UIO_SYSSPACE;
3257 uio.uio_extflg = UIO_COPY_CACHED;
3258 uio.uio_loffset = (offset_t)args->cookie;
3259 uio.uio_resid = count;
3260
3261 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3262
3263 va.va_mask = AT_ALL;
3264 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3265
3266 if (error) {
3267 kmem_free(data, count);
3268 goto out;
3269 }
3270
3271 /*
3272 * If the count was not large enough to be able to guarantee
3273 * to be able to return at least one entry, then need to
3274 * check to see if NFS3ERR_TOOSMALL should be returned.
3275 */
3276 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3277 /*
3278 * bufsize is used to keep track of the size of the response.
3279 * It is primed with:
3280 * 1 for the status +
3281 * 1 for the dir_attributes.attributes boolean +
3282 * 2 for the cookie verifier
3283 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3284 * to bytes. If there are directory attributes to be
3285 * returned, then:
3286 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3287 * time BYTES_PER_XDR_UNIT is added to account for them.
3288 */
3289 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3290 if (vap != NULL)
3291 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3292 /*
3293 * An entry is composed of:
3294 * 1 for the true/false list indicator +
3295 * 2 for the fileid +
3296 * 1 for the length of the name +
3297 * 2 for the cookie +
3298 * all times BYTES_PER_XDR_UNIT to convert from
3299 * XDR units to bytes, plus the length of the name
3300 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3301 */
3302 if (count != uio.uio_resid) {
3303 namlen = strlen(((struct dirent64 *)data)->d_name);
3304 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3305 roundup(namlen, BYTES_PER_XDR_UNIT);
3306 }
3307 /*
3308 * We need to check to see if the number of bytes left
3309 * to go into the buffer will actually fit into the
3310 * buffer. This is calculated as the size of this
3311 * entry plus:
3312 * 1 for the true/false list indicator +
3313 * 1 for the eof indicator
3314 * times BYTES_PER_XDR_UNIT to convert from from
3315 * XDR units to bytes.
3316 */
3317 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3318 if (bufsize > args->count) {
3319 kmem_free(data, count);
3320 resp->status = NFS3ERR_TOOSMALL;
3321 goto out1;
3322 }
3323 }
3324
3325 /*
3326 * Have a valid readir buffer for the native character
3327 * set. Need to check if a conversion is necessary and
3328 * potentially rewrite the whole buffer. Note that if the
3329 * conversion expands names enough, the structure may not
3330 * fit. In this case, we need to drop entries until if fits
3331 * and patch the counts in order that the next readdir will
3332 * get the correct entries.
3333 */
3334 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3335 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3336
3337
3338 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3339
3340 #if 0 /* notyet */
3341 /*
3342 * Don't do this. It causes local disk writes when just
3343 * reading the file and the overhead is deemed larger
3344 * than the benefit.
3345 */
3346 /*
3347 * Force modified metadata out to stable storage.
3348 */
3349 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3350 #endif
3351
3352 resp->status = NFS3_OK;
3353 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3354 resp->resok.cookieverf = 0;
3355 resp->resok.reply.entries = (entry3 *)data;
3356 resp->resok.reply.eof = iseof;
3357 resp->resok.size = count - uio.uio_resid;
3358 resp->resok.count = args->count;
3359 resp->resok.freecount = count;
3360
3361 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3362 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3363 READDIR3res *, resp);
3364
3365 VN_RELE(vp);
3366
3367 return;
3368
3369 out:
3370 if (curthread->t_flag & T_WOULDBLOCK) {
3371 curthread->t_flag &= ~T_WOULDBLOCK;
3372 resp->status = NFS3ERR_JUKEBOX;
3373 } else
3374 resp->status = puterrno3(error);
3375 out1:
3376 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3377
3378 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3379 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3380 READDIR3res *, resp);
3381
3382 if (vp != NULL) {
3383 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3384 VN_RELE(vp);
3385 }
3386 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3387 }
3388
3389 void *
3390 rfs3_readdir_getfh(READDIR3args *args)
3391 {
3392
3393 return (&args->dir);
3394 }
3395
3396 void
3397 rfs3_readdir_free(READDIR3res *resp)
3398 {
3399
3400 if (resp->status == NFS3_OK)
3401 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3402 }
3403
3404 #ifdef nextdp
3405 #undef nextdp
3406 #endif
3407 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3408
3409 /*
3410 * This macro computes the size of a response which contains
3411 * one directory entry including the attributes as well as file handle.
3412 * If the incoming request is larger than this, then we are guaranteed to be
3413 * able to return at least one more directory entry if one exists.
3414 *
3415 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3416 *
3417 * boolean - 1 * BYTES_PER_XDR_UNIT
3418 * file id - 2 * BYTES_PER_XDR_UNIT
3419 * directory name length - 1 * BYTES_PER_XDR_UNIT
3420 * cookie - 2 * BYTES_PER_XDR_UNIT
3421 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3422 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3423 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3424 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3425 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3426 * name length of the entry to the nearest bytes
3427 */
3428 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3429 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3430 BYTES_PER_XDR_UNIT + \
3431 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3432
3433 static int rfs3_readdir_unit = MAXBSIZE;
3434
3435 /* ARGSUSED */
3436 void
3437 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3438 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3439 {
3440 int error;
3441 vnode_t *vp;
3442 struct vattr *vap;
3443 struct vattr va;
3444 struct iovec iov;
3445 struct uio uio;
3446 char *data;
3447 int iseof;
3448 struct dirent64 *dp;
3449 vnode_t *nvp;
3450 struct vattr *nvap;
3451 struct vattr nva;
3452 entryplus3_info *infop = NULL;
3453 int size = 0;
3454 int nents = 0;
3455 int bufsize = 0;
3456 int entrysize = 0;
3457 int tofit = 0;
3458 int rd_unit = rfs3_readdir_unit;
3459 int prev_len;
3460 int space_left;
3461 int i;
3462 uint_t *namlen = NULL;
3463 char *ndata = NULL;
3464 struct sockaddr *ca;
3465 size_t ret;
3466
3467 vap = NULL;
3468
3469 vp = nfs3_fhtovp(&args->dir, exi);
3470
3471 DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3472 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3473 READDIRPLUS3args *, args);
3474
3475 if (vp == NULL) {
3476 error = ESTALE;
3477 goto out;
3478 }
3479
3480 if (is_system_labeled()) {
3481 bslabel_t *clabel = req->rq_label;
3482
3483 ASSERT(clabel != NULL);
3484 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3485 char *, "got client label from request(1)",
3486 struct svc_req *, req);
3487
3488 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3489 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3490 exi)) {
3491 resp->status = NFS3ERR_ACCES;
3492 goto out1;
3493 }
3494 }
3495 }
3496
3497 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3498
3499 va.va_mask = AT_ALL;
3500 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3501
3502 if (vp->v_type != VDIR) {
3503 error = ENOTDIR;
3504 goto out;
3505 }
3506
3507 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3508 if (error)
3509 goto out;
3510
3511 /*
3512 * Don't allow arbitrary counts for allocation
3513 */
3514 if (args->maxcount > rfs3_tsize(req))
3515 args->maxcount = rfs3_tsize(req);
3516
3517 /*
3518 * Make sure that there is room to read at least one entry
3519 * if any are available
3520 */
3521 args->dircount = MIN(args->dircount, args->maxcount);
3522
3523 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3524 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3525
3526 /*
3527 * This allocation relies on a minimum directory entry
3528 * being roughly 24 bytes. Therefore, the namlen array
3529 * will have enough space based on the maximum number of
3530 * entries to read.
3531 */
3532 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3533
3534 space_left = args->dircount;
3535 data = kmem_alloc(args->dircount, KM_SLEEP);
3536 dp = (struct dirent64 *)data;
3537 uio.uio_iov = &iov;
3538 uio.uio_iovcnt = 1;
3539 uio.uio_segflg = UIO_SYSSPACE;
3540 uio.uio_extflg = UIO_COPY_CACHED;
3541 uio.uio_loffset = (offset_t)args->cookie;
3542
3543 /*
3544 * bufsize is used to keep track of the size of the response as we
3545 * get post op attributes and filehandles for each entry. This is
3546 * an optimization as the server may have read more entries than will
3547 * fit in the buffer specified by maxcount. We stop calculating
3548 * post op attributes and filehandles once we have exceeded maxcount.
3549 * This will minimize the effect of truncation.
3550 *
3551 * It is primed with:
3552 * 1 for the status +
3553 * 1 for the dir_attributes.attributes boolean +
3554 * 2 for the cookie verifier
3555 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3556 * to bytes. If there are directory attributes to be
3557 * returned, then:
3558 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3559 * time BYTES_PER_XDR_UNIT is added to account for them.
3560 */
3561 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3562 if (vap != NULL)
3563 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3564
3565 getmoredents:
3566 /*
3567 * Here we make a check so that our read unit is not larger than
3568 * the space left in the buffer.
3569 */
3570 rd_unit = MIN(rd_unit, space_left);
3571 iov.iov_base = (char *)dp;
3572 iov.iov_len = rd_unit;
3573 uio.uio_resid = rd_unit;
3574 prev_len = rd_unit;
3575
3576 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3577
3578 if (error) {
3579 kmem_free(data, args->dircount);
3580 goto out;
3581 }
3582
3583 if (uio.uio_resid == prev_len && !iseof) {
3584 if (nents == 0) {
3585 kmem_free(data, args->dircount);
3586 resp->status = NFS3ERR_TOOSMALL;
3587 goto out1;
3588 }
3589
3590 /*
3591 * We could not get any more entries, so get the attributes
3592 * and filehandle for the entries already obtained.
3593 */
3594 goto good;
3595 }
3596
3597 /*
3598 * We estimate the size of the response by assuming the
3599 * entry exists and attributes and filehandle are also valid
3600 */
3601 for (size = prev_len - uio.uio_resid;
3602 size > 0;
3603 size -= dp->d_reclen, dp = nextdp(dp)) {
3604
3605 if (dp->d_ino == 0) {
3606 nents++;
3607 continue;
3608 }
3609
3610 namlen[nents] = strlen(dp->d_name);
3611 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3612
3613 /*
3614 * We need to check to see if the number of bytes left
3615 * to go into the buffer will actually fit into the
3616 * buffer. This is calculated as the size of this
3617 * entry plus:
3618 * 1 for the true/false list indicator +
3619 * 1 for the eof indicator
3620 * times BYTES_PER_XDR_UNIT to convert from XDR units
3621 * to bytes.
3622 *
3623 * Also check the dircount limit against the first entry read
3624 *
3625 */
3626 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3627 if (bufsize + tofit > args->maxcount) {
3628 /*
3629 * We make a check here to see if this was the
3630 * first entry being measured. If so, then maxcount
3631 * was too small to begin with and so we need to
3632 * return with NFS3ERR_TOOSMALL.
3633 */
3634 if (nents == 0) {
3635 kmem_free(data, args->dircount);
3636 resp->status = NFS3ERR_TOOSMALL;
3637 goto out1;
3638 }
3639 iseof = FALSE;
3640 goto good;
3641 }
3642 bufsize += entrysize;
3643 nents++;
3644 }
3645
3646 /*
3647 * If there is enough room to fit at least 1 more entry including
3648 * post op attributes and filehandle in the buffer AND that we haven't
3649 * exceeded dircount then go back and get some more.
3650 */
3651 if (!iseof &&
3652 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3653 space_left -= (prev_len - uio.uio_resid);
3654 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3655 goto getmoredents;
3656
3657 /* else, fall through */
3658 }
3659 good:
3660 va.va_mask = AT_ALL;
3661 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3662
3663 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3664
3665 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3666 resp->resok.infop = infop;
3667
3668 dp = (struct dirent64 *)data;
3669 for (i = 0; i < nents; i++) {
3670
3671 if (dp->d_ino == 0) {
3672 infop[i].attr.attributes = FALSE;
3673 infop[i].fh.handle_follows = FALSE;
3674 dp = nextdp(dp);
3675 continue;
3676 }
3677
3678 infop[i].namelen = namlen[i];
3679
3680 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3681 NULL, NULL, NULL);
3682 if (error) {
3683 infop[i].attr.attributes = FALSE;
3684 infop[i].fh.handle_follows = FALSE;
3685 dp = nextdp(dp);
3686 continue;
3687 }
3688
3689 nva.va_mask = AT_ALL;
3690 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3691
3692 /* Lie about the object type for a referral */
3693 if (vn_is_nfs_reparse(nvp, cr))
3694 nvap->va_type = VLNK;
3695
3696 if (vn_ismntpt(nvp)) {
3697 infop[i].attr.attributes = FALSE;
3698 infop[i].fh.handle_follows = FALSE;
3699 } else {
3700 vattr_to_post_op_attr(nvap, &infop[i].attr);
3701
3702 error = makefh3(&infop[i].fh.handle, nvp, exi);
3703 if (!error)
3704 infop[i].fh.handle_follows = TRUE;
3705 else
3706 infop[i].fh.handle_follows = FALSE;
3707 }
3708
3709 VN_RELE(nvp);
3710 dp = nextdp(dp);
3711 }
3712
3713 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3714 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3715 if (ndata == NULL)
3716 ndata = data;
3717
3718 if (ret > 0) {
3719 /*
3720 * We had to drop one or more entries in order to fit
3721 * during the character conversion. We need to patch
3722 * up the size and eof info.
3723 */
3724 if (iseof)
3725 iseof = FALSE;
3726
3727 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3728 nents, ret);
3729 }
3730
3731
3732 #if 0 /* notyet */
3733 /*
3734 * Don't do this. It causes local disk writes when just
3735 * reading the file and the overhead is deemed larger
3736 * than the benefit.
3737 */
3738 /*
3739 * Force modified metadata out to stable storage.
3740 */
3741 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3742 #endif
3743
3744 kmem_free(namlen, args->dircount);
3745
3746 resp->status = NFS3_OK;
3747 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3748 resp->resok.cookieverf = 0;
3749 resp->resok.reply.entries = (entryplus3 *)ndata;
3750 resp->resok.reply.eof = iseof;
3751 resp->resok.size = nents;
3752 resp->resok.count = args->dircount - ret;
3753 resp->resok.maxcount = args->maxcount;
3754
3755 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3756 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3757 READDIRPLUS3res *, resp);
3758
3759 VN_RELE(vp);
3760
3761 return;
3762
3763 out:
3764 if (curthread->t_flag & T_WOULDBLOCK) {
3765 curthread->t_flag &= ~T_WOULDBLOCK;
3766 resp->status = NFS3ERR_JUKEBOX;
3767 } else {
3768 resp->status = puterrno3(error);
3769 }
3770 out1:
3771 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3772
3773 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3774 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3775 READDIRPLUS3res *, resp);
3776
3777 if (vp != NULL) {
3778 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3779 VN_RELE(vp);
3780 }
3781
3782 if (namlen != NULL)
3783 kmem_free(namlen, args->dircount);
3784
3785 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3786 }
3787
3788 void *
3789 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3790 {
3791
3792 return (&args->dir);
3793 }
3794
3795 void
3796 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3797 {
3798
3799 if (resp->status == NFS3_OK) {
3800 kmem_free(resp->resok.reply.entries, resp->resok.count);
3801 kmem_free(resp->resok.infop,
3802 resp->resok.size * sizeof (struct entryplus3_info));
3803 }
3804 }
3805
3806 /* ARGSUSED */
3807 void
3808 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3809 struct svc_req *req, cred_t *cr, bool_t ro)
3810 {
3811 int error;
3812 vnode_t *vp;
3813 struct vattr *vap;
3814 struct vattr va;
3815 struct statvfs64 sb;
3816
3817 vap = NULL;
3818
3819 vp = nfs3_fhtovp(&args->fsroot, exi);
3820
3821 DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3822 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3823 FSSTAT3args *, args);
3824
3825 if (vp == NULL) {
3826 error = ESTALE;
3827 goto out;
3828 }
3829
3830 if (is_system_labeled()) {
3831 bslabel_t *clabel = req->rq_label;
3832
3833 ASSERT(clabel != NULL);
3834 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3835 "got client label from request(1)", struct svc_req *, req);
3836
3837 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3838 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3839 exi)) {
3840 resp->status = NFS3ERR_ACCES;
3841 goto out1;
3842 }
3843 }
3844 }
3845
3846 error = VFS_STATVFS(vp->v_vfsp, &sb);
3847
3848 va.va_mask = AT_ALL;
3849 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3850
3851 if (error)
3852 goto out;
3853
3854 resp->status = NFS3_OK;
3855 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3856 if (sb.f_blocks != (fsblkcnt64_t)-1)
3857 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3858 else
3859 resp->resok.tbytes = (size3)sb.f_blocks;
3860 if (sb.f_bfree != (fsblkcnt64_t)-1)
3861 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3862 else
3863 resp->resok.fbytes = (size3)sb.f_bfree;
3864 if (sb.f_bavail != (fsblkcnt64_t)-1)
3865 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3866 else
3867 resp->resok.abytes = (size3)sb.f_bavail;
3868 resp->resok.tfiles = (size3)sb.f_files;
3869 resp->resok.ffiles = (size3)sb.f_ffree;
3870 resp->resok.afiles = (size3)sb.f_favail;
3871 resp->resok.invarsec = 0;
3872
3873 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3874 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3875 FSSTAT3res *, resp);
3876 VN_RELE(vp);
3877
3878 return;
3879
3880 out:
3881 if (curthread->t_flag & T_WOULDBLOCK) {
3882 curthread->t_flag &= ~T_WOULDBLOCK;
3883 resp->status = NFS3ERR_JUKEBOX;
3884 } else
3885 resp->status = puterrno3(error);
3886 out1:
3887 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3888 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3889 FSSTAT3res *, resp);
3890
3891 if (vp != NULL)
3892 VN_RELE(vp);
3893 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3894 }
3895
3896 void *
3897 rfs3_fsstat_getfh(FSSTAT3args *args)
3898 {
3899
3900 return (&args->fsroot);
3901 }
3902
3903 /* ARGSUSED */
3904 void
3905 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3906 struct svc_req *req, cred_t *cr, bool_t ro)
3907 {
3908 vnode_t *vp;
3909 struct vattr *vap;
3910 struct vattr va;
3911 uint32_t xfer_size;
3912 ulong_t l = 0;
3913 int error;
3914
3915 vp = nfs3_fhtovp(&args->fsroot, exi);
3916
3917 DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3918 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3919 FSINFO3args *, args);
3920
3921 if (vp == NULL) {
3922 if (curthread->t_flag & T_WOULDBLOCK) {
3923 curthread->t_flag &= ~T_WOULDBLOCK;
3924 resp->status = NFS3ERR_JUKEBOX;
3925 } else
3926 resp->status = NFS3ERR_STALE;
3927 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3928 goto out;
3929 }
3930
3931 if (is_system_labeled()) {
3932 bslabel_t *clabel = req->rq_label;
3933
3934 ASSERT(clabel != NULL);
3935 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3936 "got client label from request(1)", struct svc_req *, req);
3937
3938 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3939 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3940 exi)) {
3941 resp->status = NFS3ERR_STALE;
3942 vattr_to_post_op_attr(NULL,
3943 &resp->resfail.obj_attributes);
3944 goto out;
3945 }
3946 }
3947 }
3948
3949 va.va_mask = AT_ALL;
3950 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3951
3952 resp->status = NFS3_OK;
3953 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3954 xfer_size = rfs3_tsize(req);
3955 resp->resok.rtmax = xfer_size;
3956 resp->resok.rtpref = xfer_size;
3957 resp->resok.rtmult = DEV_BSIZE;
3958 resp->resok.wtmax = xfer_size;
3959 resp->resok.wtpref = xfer_size;
3960 resp->resok.wtmult = DEV_BSIZE;
3961 resp->resok.dtpref = MAXBSIZE;
3962
3963 /*
3964 * Large file spec: want maxfilesize based on limit of
3965 * underlying filesystem. We can guess 2^31-1 if need be.
3966 */
3967 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3968 if (error) {
3969 resp->status = puterrno3(error);
3970 goto out;
3971 }
3972
3973 /*
3974 * If the underlying file system does not support _PC_FILESIZEBITS,
3975 * return a reasonable default. Note that error code on VOP_PATHCONF
3976 * will be 0, even if the underlying file system does not support
3977 * _PC_FILESIZEBITS.
3978 */
3979 if (l == (ulong_t)-1) {
3980 resp->resok.maxfilesize = MAXOFF32_T;
3981 } else {
3982 if (l >= (sizeof (uint64_t) * 8))
3983 resp->resok.maxfilesize = INT64_MAX;
3984 else
3985 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3986 }
3987
3988 resp->resok.time_delta.seconds = 0;
3989 resp->resok.time_delta.nseconds = 1000;
3990 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3991 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3992
3993 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3994 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3995 FSINFO3res *, resp);
3996
3997 VN_RELE(vp);
3998
3999 return;
4000
4001 out:
4002 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
4003 cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
4004 FSINFO3res *, resp);
4005 if (vp != NULL)
4006 VN_RELE(vp);
4007 }
4008
4009 void *
4010 rfs3_fsinfo_getfh(FSINFO3args *args)
4011 {
4012 return (&args->fsroot);
4013 }
4014
4015 /* ARGSUSED */
4016 void
4017 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4018 struct svc_req *req, cred_t *cr, bool_t ro)
4019 {
4020 int error;
4021 vnode_t *vp;
4022 struct vattr *vap;
4023 struct vattr va;
4024 ulong_t val;
4025
4026 vap = NULL;
4027
4028 vp = nfs3_fhtovp(&args->object, exi);
4029
4030 DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4031 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4032 PATHCONF3args *, args);
4033
4034 if (vp == NULL) {
4035 error = ESTALE;
4036 goto out;
4037 }
4038
4039 if (is_system_labeled()) {
4040 bslabel_t *clabel = req->rq_label;
4041
4042 ASSERT(clabel != NULL);
4043 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4044 "got client label from request(1)", struct svc_req *, req);
4045
4046 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4047 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4048 exi)) {
4049 resp->status = NFS3ERR_ACCES;
4050 goto out1;
4051 }
4052 }
4053 }
4054
4055 va.va_mask = AT_ALL;
4056 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4057
4058 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4059 if (error)
4060 goto out;
4061 resp->resok.info.link_max = (uint32)val;
4062
4063 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4064 if (error)
4065 goto out;
4066 resp->resok.info.name_max = (uint32)val;
4067
4068 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4069 if (error)
4070 goto out;
4071 if (val == 1)
4072 resp->resok.info.no_trunc = TRUE;
4073 else
4074 resp->resok.info.no_trunc = FALSE;
4075
4076 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4077 if (error)
4078 goto out;
4079 if (val == 1)
4080 resp->resok.info.chown_restricted = TRUE;
4081 else
4082 resp->resok.info.chown_restricted = FALSE;
4083
4084 resp->status = NFS3_OK;
4085 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4086 resp->resok.info.case_insensitive = FALSE;
4087 resp->resok.info.case_preserving = TRUE;
4088 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4089 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4090 PATHCONF3res *, resp);
4091 VN_RELE(vp);
4092 return;
4093
4094 out:
4095 if (curthread->t_flag & T_WOULDBLOCK) {
4096 curthread->t_flag &= ~T_WOULDBLOCK;
4097 resp->status = NFS3ERR_JUKEBOX;
4098 } else
4099 resp->status = puterrno3(error);
4100 out1:
4101 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4102 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4103 PATHCONF3res *, resp);
4104 if (vp != NULL)
4105 VN_RELE(vp);
4106 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4107 }
4108
4109 void *
4110 rfs3_pathconf_getfh(PATHCONF3args *args)
4111 {
4112
4113 return (&args->object);
4114 }
4115
4116 void
4117 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4118 struct svc_req *req, cred_t *cr, bool_t ro)
4119 {
4120 nfs3_srv_t *ns;
4121 int error;
4122 vnode_t *vp;
4123 struct vattr *bvap;
4124 struct vattr bva;
4125 struct vattr *avap;
4126 struct vattr ava;
4127
4128 bvap = NULL;
4129 avap = NULL;
4130
4131 vp = nfs3_fhtovp(&args->file, exi);
4132
4133 DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4134 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4135 COMMIT3args *, args);
4136
4137 if (vp == NULL) {
4138 error = ESTALE;
4139 goto out;
4140 }
4141
4142 ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
4143 ns = nfs3_get_srv();
4144 bva.va_mask = AT_ALL;
4145 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4146
4147 /*
4148 * If we can't get the attributes, then we can't do the
4149 * right access checking. So, we'll fail the request.
4150 */
4151 if (error)
4152 goto out;
4153
4154 bvap = &bva;
4155
4156 if (rdonly(ro, vp)) {
4157 resp->status = NFS3ERR_ROFS;
4158 goto out1;
4159 }
4160
4161 if (vp->v_type != VREG) {
4162 resp->status = NFS3ERR_INVAL;
4163 goto out1;
4164 }
4165
4166 if (is_system_labeled()) {
4167 bslabel_t *clabel = req->rq_label;
4168
4169 ASSERT(clabel != NULL);
4170 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4171 "got client label from request(1)", struct svc_req *, req);
4172
4173 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4174 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4175 exi)) {
4176 resp->status = NFS3ERR_ACCES;
4177 goto out1;
4178 }
4179 }
4180 }
4181
4182 if (crgetuid(cr) != bva.va_uid &&
4183 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4184 goto out;
4185
4186 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4187
4188 ava.va_mask = AT_ALL;
4189 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4190
4191 if (error)
4192 goto out;
4193
4194 resp->status = NFS3_OK;
4195 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4196 resp->resok.verf = ns->write3verf;
4197
4198 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4199 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4200 COMMIT3res *, resp);
4201
4202 VN_RELE(vp);
4203
4204 return;
4205
4206 out:
4207 if (curthread->t_flag & T_WOULDBLOCK) {
4208 curthread->t_flag &= ~T_WOULDBLOCK;
4209 resp->status = NFS3ERR_JUKEBOX;
4210 } else
4211 resp->status = puterrno3(error);
4212 out1:
4213 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4214 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4215 COMMIT3res *, resp);
4216
4217 if (vp != NULL)
4218 VN_RELE(vp);
4219 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4220 }
4221
4222 void *
4223 rfs3_commit_getfh(COMMIT3args *args)
4224 {
4225
4226 return (&args->file);
4227 }
4228
4229 static int
4230 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4231 {
4232
4233 vap->va_mask = 0;
4234
4235 if (sap->mode.set_it) {
4236 vap->va_mode = (mode_t)sap->mode.mode;
4237 vap->va_mask |= AT_MODE;
4238 }
4239 if (sap->uid.set_it) {
4240 vap->va_uid = (uid_t)sap->uid.uid;
4241 vap->va_mask |= AT_UID;
4242 }
4243 if (sap->gid.set_it) {
4244 vap->va_gid = (gid_t)sap->gid.gid;
4245 vap->va_mask |= AT_GID;
4246 }
4247 if (sap->size.set_it) {
4248 if (sap->size.size > (size3)((u_longlong_t)-1))
4249 return (EINVAL);
4250 vap->va_size = sap->size.size;
4251 vap->va_mask |= AT_SIZE;
4252 }
4253 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4254 #ifndef _LP64
4255 /* check time validity */
4256 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4257 return (EOVERFLOW);
4258 #endif
4259 /*
4260 * nfs protocol defines times as unsigned so don't extend sign,
4261 * unless sysadmin set nfs_allow_preepoch_time.
4262 */
4263 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4264 sap->atime.atime.seconds);
4265 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4266 vap->va_mask |= AT_ATIME;
4267 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4268 gethrestime(&vap->va_atime);
4269 vap->va_mask |= AT_ATIME;
4270 }
4271 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4272 #ifndef _LP64
4273 /* check time validity */
4274 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4275 return (EOVERFLOW);
4276 #endif
4277 /*
4278 * nfs protocol defines times as unsigned so don't extend sign,
4279 * unless sysadmin set nfs_allow_preepoch_time.
4280 */
4281 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4282 sap->mtime.mtime.seconds);
4283 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4284 vap->va_mask |= AT_MTIME;
4285 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4286 gethrestime(&vap->va_mtime);
4287 vap->va_mask |= AT_MTIME;
4288 }
4289
4290 return (0);
4291 }
4292
4293 static const ftype3 vt_to_nf3[] = {
4294 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4295 };
4296
4297 static int
4298 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4299 {
4300
4301 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4302 /* Return error if time or size overflow */
4303 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4304 return (EOVERFLOW);
4305 }
4306 fap->type = vt_to_nf3[vap->va_type];
4307 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4308 fap->nlink = (uint32)vap->va_nlink;
4309 if (vap->va_uid == UID_NOBODY)
4310 fap->uid = (uid3)NFS_UID_NOBODY;
4311 else
4312 fap->uid = (uid3)vap->va_uid;
4313 if (vap->va_gid == GID_NOBODY)
4314 fap->gid = (gid3)NFS_GID_NOBODY;
4315 else
4316 fap->gid = (gid3)vap->va_gid;
4317 fap->size = (size3)vap->va_size;
4318 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4319 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4320 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4321 fap->fsid = (uint64)vap->va_fsid;
4322 fap->fileid = (fileid3)vap->va_nodeid;
4323 fap->atime.seconds = vap->va_atime.tv_sec;
4324 fap->atime.nseconds = vap->va_atime.tv_nsec;
4325 fap->mtime.seconds = vap->va_mtime.tv_sec;
4326 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4327 fap->ctime.seconds = vap->va_ctime.tv_sec;
4328 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4329 return (0);
4330 }
4331
4332 static int
4333 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4334 {
4335
4336 /* Return error if time or size overflow */
4337 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4338 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4339 NFS3_SIZE_OK(vap->va_size))) {
4340 return (EOVERFLOW);
4341 }
4342 wccap->size = (size3)vap->va_size;
4343 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4344 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4345 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4346 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4347 return (0);
4348 }
4349
4350 static void
4351 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4352 {
4353
4354 /* don't return attrs if time overflow */
4355 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4356 poap->attributes = TRUE;
4357 } else
4358 poap->attributes = FALSE;
4359 }
4360
4361 void
4362 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4363 {
4364
4365 /* don't return attrs if time overflow */
4366 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4367 poap->attributes = TRUE;
4368 } else
4369 poap->attributes = FALSE;
4370 }
4371
4372 static void
4373 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4374 {
4375 vattr_to_pre_op_attr(bvap, &wccp->before);
4376 vattr_to_post_op_attr(avap, &wccp->after);
4377 }
4378
4379 static int
4380 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4381 {
4382 struct clist *wcl;
4383 int wlist_len;
4384 count3 count = rok->count;
4385
4386 wcl = args->wlist;
4387 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4388 return (FALSE);
4389
4390 wcl = args->wlist;
4391 rok->wlist_len = wlist_len;
4392 rok->wlist = wcl;
4393 return (TRUE);
4394 }
4395
4396 void
4397 rfs3_srv_zone_init(nfs_globals_t *ng)
4398 {
4399 nfs3_srv_t *ns;
4400 struct rfs3_verf_overlay {
4401 uint_t id; /* a "unique" identifier */
4402 int ts; /* a unique timestamp */
4403 } *verfp;
4404 timestruc_t now;
4405
4406 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4407
4408 /*
4409 * The following algorithm attempts to find a unique verifier
4410 * to be used as the write verifier returned from the server
4411 * to the client. It is important that this verifier change
4412 * whenever the server reboots. Of secondary importance, it
4413 * is important for the verifier to be unique between two
4414 * different servers.
4415 *
4416 * Thus, an attempt is made to use the system hostid and the
4417 * current time in seconds when the nfssrv kernel module is
4418 * loaded. It is assumed that an NFS server will not be able
4419 * to boot and then to reboot in less than a second. If the
4420 * hostid has not been set, then the current high resolution
4421 * time is used. This will ensure different verifiers each
4422 * time the server reboots and minimize the chances that two
4423 * different servers will have the same verifier.
4424 */
4425
4426 #ifndef lint
4427 /*
4428 * We ASSERT that this constant logic expression is
4429 * always true because in the past, it wasn't.
4430 */
4431 ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4432 #endif
4433
4434 gethrestime(&now);
4435 verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4436 verfp->ts = (int)now.tv_sec;
4437 verfp->id = zone_get_hostid(NULL);
4438
4439 if (verfp->id == 0)
4440 verfp->id = (uint_t)now.tv_nsec;
4441
4442 ng->nfs3_srv = ns;
4443 }
4444
4445 void
4446 rfs3_srv_zone_fini(nfs_globals_t *ng)
4447 {
4448 nfs3_srv_t *ns = ng->nfs3_srv;
4449
4450 ng->nfs3_srv = NULL;
4451
4452 kmem_free(ns, sizeof (*ns));
4453 }
4454
4455 void
4456 rfs3_srvrinit(void)
4457 {
4458 nfs3_srv_caller_id = fs_new_caller_id();
4459 }
4460
4461 void
4462 rfs3_srvrfini(void)
4463 {
4464 /* Nothing to do */
4465 }