1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2018 Nexenta Systems, Inc.
24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/systeminfo.h>
48 #include <sys/flock.h>
49 #include <sys/nbmlock.h>
50 #include <sys/policy.h>
51 #include <sys/sdt.h>
52
53 #include <rpc/types.h>
54 #include <rpc/auth.h>
55 #include <rpc/svc.h>
56 #include <rpc/rpc_rdma.h>
57
58 #include <nfs/nfs.h>
59 #include <nfs/export.h>
60 #include <nfs/nfs_cmd.h>
61
62 #include <sys/strsubr.h>
63 #include <sys/tsol/label.h>
64 #include <sys/tsol/tndb.h>
65
66 #include <sys/zone.h>
67
68 #include <inet/ip.h>
69 #include <inet/ip6.h>
70
71 /*
72 * Zone global variables of NFSv3 server
73 */
74 typedef struct nfs3_srv {
75 writeverf3 write3verf;
76 } nfs3_srv_t;
77
78 /*
79 * These are the interface routines for the server side of the
80 * Network File System. See the NFS version 3 protocol specification
81 * for a description of this interface.
82 */
83
84 static int sattr3_to_vattr(sattr3 *, struct vattr *);
85 static int vattr_to_fattr3(struct vattr *, fattr3 *);
86 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
87 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
88 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
89 static int rdma_setup_read_data3(READ3args *, READ3resok *);
90
91 extern int nfs_loaned_buffers;
92
93 u_longlong_t nfs3_srv_caller_id;
94 static zone_key_t rfs3_zone_key;
95
96 /* ARGSUSED */
97 void
98 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
99 struct svc_req *req, cred_t *cr, bool_t ro)
100 {
101 int error;
102 vnode_t *vp;
103 struct vattr va;
104
105 vp = nfs3_fhtovp(&args->object, exi);
106
107 DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
108 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
109 GETATTR3args *, args);
110
111 if (vp == NULL) {
112 error = ESTALE;
113 goto out;
114 }
115
116 va.va_mask = AT_ALL;
117 error = rfs4_delegated_getattr(vp, &va, 0, cr);
118
119 if (!error) {
120 /* Lie about the object type for a referral */
121 if (vn_is_nfs_reparse(vp, cr))
122 va.va_type = VLNK;
123
124 /* overflow error if time or size is out of range */
125 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
126 if (error)
127 goto out;
128 resp->status = NFS3_OK;
129
130 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
131 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
132 GETATTR3res *, resp);
133
134 VN_RELE(vp);
135
136 return;
137 }
138
139 out:
140 if (curthread->t_flag & T_WOULDBLOCK) {
141 curthread->t_flag &= ~T_WOULDBLOCK;
142 resp->status = NFS3ERR_JUKEBOX;
143 } else
144 resp->status = puterrno3(error);
145
146 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
147 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
148 GETATTR3res *, resp);
149
150 if (vp != NULL)
151 VN_RELE(vp);
152 }
153
154 void *
155 rfs3_getattr_getfh(GETATTR3args *args)
156 {
157
158 return (&args->object);
159 }
160
161 void
162 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
163 struct svc_req *req, cred_t *cr, bool_t ro)
164 {
165 int error;
166 vnode_t *vp;
167 struct vattr *bvap;
168 struct vattr bva;
169 struct vattr *avap;
170 struct vattr ava;
171 int flag;
172 int in_crit = 0;
173 struct flock64 bf;
174 caller_context_t ct;
175
176 bvap = NULL;
177 avap = NULL;
178
179 vp = nfs3_fhtovp(&args->object, exi);
180
181 DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
182 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
183 SETATTR3args *, args);
184
185 if (vp == NULL) {
186 error = ESTALE;
187 goto out;
188 }
189
190 error = sattr3_to_vattr(&args->new_attributes, &ava);
191 if (error)
192 goto out;
193
194 if (is_system_labeled()) {
195 bslabel_t *clabel = req->rq_label;
196
197 ASSERT(clabel != NULL);
198 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
199 "got client label from request(1)", struct svc_req *, req);
200
201 if (!blequal(&l_admin_low->tsl_label, clabel)) {
202 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
203 exi)) {
204 resp->status = NFS3ERR_ACCES;
205 goto out1;
206 }
207 }
208 }
209
210 /*
211 * We need to specially handle size changes because of
212 * possible conflicting NBMAND locks. Get into critical
213 * region before VOP_GETATTR, so the size attribute is
214 * valid when checking conflicts.
215 *
216 * Also, check to see if the v4 side of the server has
217 * delegated this file. If so, then we return JUKEBOX to
218 * allow the client to retrasmit its request.
219 */
220 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
221 if (nbl_need_check(vp)) {
222 nbl_start_crit(vp, RW_READER);
223 in_crit = 1;
224 }
225 }
226
227 bva.va_mask = AT_ALL;
228 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
229
230 /*
231 * If we can't get the attributes, then we can't do the
232 * right access checking. So, we'll fail the request.
233 */
234 if (error)
235 goto out;
236
237 bvap = &bva;
238
239 if (rdonly(ro, vp)) {
240 resp->status = NFS3ERR_ROFS;
241 goto out1;
242 }
243
244 if (args->guard.check &&
245 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
246 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
247 resp->status = NFS3ERR_NOT_SYNC;
248 goto out1;
249 }
250
251 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
252 flag = ATTR_UTIME;
253 else
254 flag = 0;
255
256 /*
257 * If the filesystem is exported with nosuid, then mask off
258 * the setuid and setgid bits.
259 */
260 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
261 (exi->exi_export.ex_flags & EX_NOSUID))
262 ava.va_mode &= ~(VSUID | VSGID);
263
264 ct.cc_sysid = 0;
265 ct.cc_pid = 0;
266 ct.cc_caller_id = nfs3_srv_caller_id;
267 ct.cc_flags = CC_DONTBLOCK;
268
269 /*
270 * We need to specially handle size changes because it is
271 * possible for the client to create a file with modes
272 * which indicate read-only, but with the file opened for
273 * writing. If the client then tries to set the size of
274 * the file, then the normal access checking done in
275 * VOP_SETATTR would prevent the client from doing so,
276 * although it should be legal for it to do so. To get
277 * around this, we do the access checking for ourselves
278 * and then use VOP_SPACE which doesn't do the access
279 * checking which VOP_SETATTR does. VOP_SPACE can only
280 * operate on VREG files, let VOP_SETATTR handle the other
281 * extremely rare cases.
282 * Also the client should not be allowed to change the
283 * size of the file if there is a conflicting non-blocking
284 * mandatory lock in the region the change.
285 */
286 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
287 if (in_crit) {
288 u_offset_t offset;
289 ssize_t length;
290
291 if (ava.va_size < bva.va_size) {
292 offset = ava.va_size;
293 length = bva.va_size - ava.va_size;
294 } else {
295 offset = bva.va_size;
296 length = ava.va_size - bva.va_size;
297 }
298 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
299 NULL)) {
300 error = EACCES;
301 goto out;
302 }
303 }
304
305 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
306 ava.va_mask &= ~AT_SIZE;
307 bf.l_type = F_WRLCK;
308 bf.l_whence = 0;
309 bf.l_start = (off64_t)ava.va_size;
310 bf.l_len = 0;
311 bf.l_sysid = 0;
312 bf.l_pid = 0;
313 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
314 (offset_t)ava.va_size, cr, &ct);
315 }
316 }
317
318 if (!error && ava.va_mask)
319 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
320
321 /* check if a monitor detected a delegation conflict */
322 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
323 resp->status = NFS3ERR_JUKEBOX;
324 goto out1;
325 }
326
327 ava.va_mask = AT_ALL;
328 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
329
330 /*
331 * Force modified metadata out to stable storage.
332 */
333 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
334
335 if (error)
336 goto out;
337
338 if (in_crit)
339 nbl_end_crit(vp);
340
341 resp->status = NFS3_OK;
342 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
343
344 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
345 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
346 SETATTR3res *, resp);
347
348 VN_RELE(vp);
349
350 return;
351
352 out:
353 if (curthread->t_flag & T_WOULDBLOCK) {
354 curthread->t_flag &= ~T_WOULDBLOCK;
355 resp->status = NFS3ERR_JUKEBOX;
356 } else
357 resp->status = puterrno3(error);
358 out1:
359 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
360 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
361 SETATTR3res *, resp);
362
363 if (vp != NULL) {
364 if (in_crit)
365 nbl_end_crit(vp);
366 VN_RELE(vp);
367 }
368 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
369 }
370
371 void *
372 rfs3_setattr_getfh(SETATTR3args *args)
373 {
374
375 return (&args->object);
376 }
377
378 /* ARGSUSED */
379 void
380 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
381 struct svc_req *req, cred_t *cr, bool_t ro)
382 {
383 int error;
384 vnode_t *vp;
385 vnode_t *dvp;
386 struct vattr *vap;
387 struct vattr va;
388 struct vattr *dvap;
389 struct vattr dva;
390 nfs_fh3 *fhp;
391 struct sec_ol sec = {0, 0};
392 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
393 struct sockaddr *ca;
394 char *name = NULL;
395
396 dvap = NULL;
397
398 if (exi != NULL)
399 exi_hold(exi);
400
401 /*
402 * Allow lookups from the root - the default
403 * location of the public filehandle.
404 */
405 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
406 dvp = ZONE_ROOTVP();
407 VN_HOLD(dvp);
408
409 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
410 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
411 LOOKUP3args *, args);
412 } else {
413 dvp = nfs3_fhtovp(&args->what.dir, exi);
414
415 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
416 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
417 LOOKUP3args *, args);
418
419 if (dvp == NULL) {
420 error = ESTALE;
421 goto out;
422 }
423 }
424
425 dva.va_mask = AT_ALL;
426 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
427
428 if (args->what.name == nfs3nametoolong) {
429 resp->status = NFS3ERR_NAMETOOLONG;
430 goto out1;
431 }
432
433 if (args->what.name == NULL || *(args->what.name) == '\0') {
434 resp->status = NFS3ERR_ACCES;
435 goto out1;
436 }
437
438 fhp = &args->what.dir;
439 ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
440 if (strcmp(args->what.name, "..") == 0 &&
441 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
442 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
443 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
444 /*
445 * special case for ".." and 'nohide'exported root
446 */
447 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
448 resp->status = NFS3ERR_ACCES;
449 goto out1;
450 }
451 } else {
452 resp->status = NFS3ERR_NOENT;
453 goto out1;
454 }
455 }
456
457 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
458 name = nfscmd_convname(ca, exi, args->what.name,
459 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
460
461 if (name == NULL) {
462 resp->status = NFS3ERR_ACCES;
463 goto out1;
464 }
465
466 /*
467 * If the public filehandle is used then allow
468 * a multi-component lookup
469 */
470 if (PUBLIC_FH3(&args->what.dir)) {
471 publicfh_flag = TRUE;
472
473 exi_rele(exi);
474
475 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
476 &exi, &sec);
477
478 /*
479 * Since WebNFS may bypass MOUNT, we need to ensure this
480 * request didn't come from an unlabeled admin_low client.
481 */
482 if (is_system_labeled() && error == 0) {
483 int addr_type;
484 void *ipaddr;
485 tsol_tpc_t *tp;
486
487 if (ca->sa_family == AF_INET) {
488 addr_type = IPV4_VERSION;
489 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
490 } else if (ca->sa_family == AF_INET6) {
491 addr_type = IPV6_VERSION;
492 ipaddr = &((struct sockaddr_in6 *)
493 ca)->sin6_addr;
494 }
495 tp = find_tpc(ipaddr, addr_type, B_FALSE);
496 if (tp == NULL || tp->tpc_tp.tp_doi !=
497 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
498 SUN_CIPSO) {
499 VN_RELE(vp);
500 error = EACCES;
501 }
502 if (tp != NULL)
503 TPC_RELE(tp);
504 }
505 } else {
506 error = VOP_LOOKUP(dvp, name, &vp,
507 NULL, 0, NULL, cr, NULL, NULL, NULL);
508 }
509
510 if (name != args->what.name)
511 kmem_free(name, MAXPATHLEN + 1);
512
513 if (error == 0 && vn_ismntpt(vp)) {
514 error = rfs_cross_mnt(&vp, &exi);
515 if (error)
516 VN_RELE(vp);
517 }
518
519 if (is_system_labeled() && error == 0) {
520 bslabel_t *clabel = req->rq_label;
521
522 ASSERT(clabel != NULL);
523 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
524 "got client label from request(1)", struct svc_req *, req);
525
526 if (!blequal(&l_admin_low->tsl_label, clabel)) {
527 if (!do_rfs_label_check(clabel, dvp,
528 DOMINANCE_CHECK, exi)) {
529 VN_RELE(vp);
530 error = EACCES;
531 }
532 }
533 }
534
535 dva.va_mask = AT_ALL;
536 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
537
538 if (error)
539 goto out;
540
541 if (sec.sec_flags & SEC_QUERY) {
542 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
543 } else {
544 error = makefh3(&resp->resok.object, vp, exi);
545 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
546 auth_weak = TRUE;
547 }
548
549 if (error) {
550 VN_RELE(vp);
551 goto out;
552 }
553
554 va.va_mask = AT_ALL;
555 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
556
557 exi_rele(exi);
558 VN_RELE(vp);
559
560 resp->status = NFS3_OK;
561 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
562 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
563
564 /*
565 * If it's public fh, no 0x81, and client's flavor is
566 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
567 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
568 */
569 if (auth_weak)
570 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
571
572 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
573 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
574 LOOKUP3res *, resp);
575 VN_RELE(dvp);
576
577 return;
578
579 out:
580 if (curthread->t_flag & T_WOULDBLOCK) {
581 curthread->t_flag &= ~T_WOULDBLOCK;
582 resp->status = NFS3ERR_JUKEBOX;
583 } else
584 resp->status = puterrno3(error);
585 out1:
586 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
587 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
588 LOOKUP3res *, resp);
589
590 if (exi != NULL)
591 exi_rele(exi);
592
593 if (dvp != NULL)
594 VN_RELE(dvp);
595 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
596
597 }
598
599 void *
600 rfs3_lookup_getfh(LOOKUP3args *args)
601 {
602
603 return (&args->what.dir);
604 }
605
606 /* ARGSUSED */
607 void
608 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
609 struct svc_req *req, cred_t *cr, bool_t ro)
610 {
611 int error;
612 vnode_t *vp;
613 struct vattr *vap;
614 struct vattr va;
615 int checkwriteperm;
616 boolean_t dominant_label = B_FALSE;
617 boolean_t equal_label = B_FALSE;
618 boolean_t admin_low_client;
619
620 vap = NULL;
621
622 vp = nfs3_fhtovp(&args->object, exi);
623
624 DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
625 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
626 ACCESS3args *, args);
627
628 if (vp == NULL) {
629 error = ESTALE;
630 goto out;
631 }
632
633 /*
634 * If the file system is exported read only, it is not appropriate
635 * to check write permissions for regular files and directories.
636 * Special files are interpreted by the client, so the underlying
637 * permissions are sent back to the client for interpretation.
638 */
639 if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
640 checkwriteperm = 0;
641 else
642 checkwriteperm = 1;
643
644 /*
645 * We need the mode so that we can correctly determine access
646 * permissions relative to a mandatory lock file. Access to
647 * mandatory lock files is denied on the server, so it might
648 * as well be reflected to the server during the open.
649 */
650 va.va_mask = AT_MODE;
651 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
652 if (error)
653 goto out;
654
655 vap = &va;
656
657 resp->resok.access = 0;
658
659 if (is_system_labeled()) {
660 bslabel_t *clabel = req->rq_label;
661
662 ASSERT(clabel != NULL);
663 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
664 "got client label from request(1)", struct svc_req *, req);
665
666 if (!blequal(&l_admin_low->tsl_label, clabel)) {
667 if ((equal_label = do_rfs_label_check(clabel, vp,
668 EQUALITY_CHECK, exi)) == B_FALSE) {
669 dominant_label = do_rfs_label_check(clabel,
670 vp, DOMINANCE_CHECK, exi);
671 } else
672 dominant_label = B_TRUE;
673 admin_low_client = B_FALSE;
674 } else
675 admin_low_client = B_TRUE;
676 }
677
678 if (args->access & ACCESS3_READ) {
679 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
680 if (error) {
681 if (curthread->t_flag & T_WOULDBLOCK)
682 goto out;
683 } else if (!MANDLOCK(vp, va.va_mode) &&
684 (!is_system_labeled() || admin_low_client ||
685 dominant_label))
686 resp->resok.access |= ACCESS3_READ;
687 }
688 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
689 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
690 if (error) {
691 if (curthread->t_flag & T_WOULDBLOCK)
692 goto out;
693 } else if (!is_system_labeled() || admin_low_client ||
694 dominant_label)
695 resp->resok.access |= ACCESS3_LOOKUP;
696 }
697 if (checkwriteperm &&
698 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
699 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
700 if (error) {
701 if (curthread->t_flag & T_WOULDBLOCK)
702 goto out;
703 } else if (!MANDLOCK(vp, va.va_mode) &&
704 (!is_system_labeled() || admin_low_client || equal_label)) {
705 resp->resok.access |=
706 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
707 }
708 }
709 if (checkwriteperm &&
710 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
711 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
712 if (error) {
713 if (curthread->t_flag & T_WOULDBLOCK)
714 goto out;
715 } else if (!is_system_labeled() || admin_low_client ||
716 equal_label)
717 resp->resok.access |= ACCESS3_DELETE;
718 }
719 if (args->access & ACCESS3_EXECUTE) {
720 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
721 if (error) {
722 if (curthread->t_flag & T_WOULDBLOCK)
723 goto out;
724 } else if (!MANDLOCK(vp, va.va_mode) &&
725 (!is_system_labeled() || admin_low_client ||
726 dominant_label))
727 resp->resok.access |= ACCESS3_EXECUTE;
728 }
729
730 va.va_mask = AT_ALL;
731 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
732
733 resp->status = NFS3_OK;
734 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
735
736 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
737 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
738 ACCESS3res *, resp);
739
740 VN_RELE(vp);
741
742 return;
743
744 out:
745 if (curthread->t_flag & T_WOULDBLOCK) {
746 curthread->t_flag &= ~T_WOULDBLOCK;
747 resp->status = NFS3ERR_JUKEBOX;
748 } else
749 resp->status = puterrno3(error);
750 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
751 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
752 ACCESS3res *, resp);
753 if (vp != NULL)
754 VN_RELE(vp);
755 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
756 }
757
758 void *
759 rfs3_access_getfh(ACCESS3args *args)
760 {
761
762 return (&args->object);
763 }
764
765 /* ARGSUSED */
766 void
767 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
768 struct svc_req *req, cred_t *cr, bool_t ro)
769 {
770 int error;
771 vnode_t *vp;
772 struct vattr *vap;
773 struct vattr va;
774 struct iovec iov;
775 struct uio uio;
776 char *data;
777 struct sockaddr *ca;
778 char *name = NULL;
779 int is_referral = 0;
780
781 vap = NULL;
782
783 vp = nfs3_fhtovp(&args->symlink, exi);
784
785 DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
786 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
787 READLINK3args *, args);
788
789 if (vp == NULL) {
790 error = ESTALE;
791 goto out;
792 }
793
794 va.va_mask = AT_ALL;
795 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
796 if (error)
797 goto out;
798
799 vap = &va;
800
801 /* We lied about the object type for a referral */
802 if (vn_is_nfs_reparse(vp, cr))
803 is_referral = 1;
804
805 if (vp->v_type != VLNK && !is_referral) {
806 resp->status = NFS3ERR_INVAL;
807 goto out1;
808 }
809
810 if (MANDLOCK(vp, va.va_mode)) {
811 resp->status = NFS3ERR_ACCES;
812 goto out1;
813 }
814
815 if (is_system_labeled()) {
816 bslabel_t *clabel = req->rq_label;
817
818 ASSERT(clabel != NULL);
819 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
820 "got client label from request(1)", struct svc_req *, req);
821
822 if (!blequal(&l_admin_low->tsl_label, clabel)) {
823 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
824 exi)) {
825 resp->status = NFS3ERR_ACCES;
826 goto out1;
827 }
828 }
829 }
830
831 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
832
833 if (is_referral) {
834 char *s;
835 size_t strsz;
836
837 /* Get an artificial symlink based on a referral */
838 s = build_symlink(vp, cr, &strsz);
839 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
840 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
841 vnode_t *, vp, char *, s);
842 if (s == NULL)
843 error = EINVAL;
844 else {
845 error = 0;
846 (void) strlcpy(data, s, MAXPATHLEN + 1);
847 kmem_free(s, strsz);
848 }
849
850 } else {
851
852 iov.iov_base = data;
853 iov.iov_len = MAXPATHLEN;
854 uio.uio_iov = &iov;
855 uio.uio_iovcnt = 1;
856 uio.uio_segflg = UIO_SYSSPACE;
857 uio.uio_extflg = UIO_COPY_CACHED;
858 uio.uio_loffset = 0;
859 uio.uio_resid = MAXPATHLEN;
860
861 error = VOP_READLINK(vp, &uio, cr, NULL);
862
863 if (!error)
864 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
865 }
866
867 va.va_mask = AT_ALL;
868 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
869
870 /* Lie about object type again just to be consistent */
871 if (is_referral && vap != NULL)
872 vap->va_type = VLNK;
873
874 #if 0 /* notyet */
875 /*
876 * Don't do this. It causes local disk writes when just
877 * reading the file and the overhead is deemed larger
878 * than the benefit.
879 */
880 /*
881 * Force modified metadata out to stable storage.
882 */
883 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
884 #endif
885
886 if (error) {
887 kmem_free(data, MAXPATHLEN + 1);
888 goto out;
889 }
890
891 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
892 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
893 MAXPATHLEN + 1);
894
895 if (name == NULL) {
896 /*
897 * Even though the conversion failed, we return
898 * something. We just don't translate it.
899 */
900 name = data;
901 }
902
903 resp->status = NFS3_OK;
904 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
905 resp->resok.data = name;
906
907 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
908 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
909 READLINK3res *, resp);
910 VN_RELE(vp);
911
912 if (name != data)
913 kmem_free(data, MAXPATHLEN + 1);
914
915 return;
916
917 out:
918 if (curthread->t_flag & T_WOULDBLOCK) {
919 curthread->t_flag &= ~T_WOULDBLOCK;
920 resp->status = NFS3ERR_JUKEBOX;
921 } else
922 resp->status = puterrno3(error);
923 out1:
924 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
925 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
926 READLINK3res *, resp);
927 if (vp != NULL)
928 VN_RELE(vp);
929 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
930 }
931
932 void *
933 rfs3_readlink_getfh(READLINK3args *args)
934 {
935
936 return (&args->symlink);
937 }
938
939 void
940 rfs3_readlink_free(READLINK3res *resp)
941 {
942
943 if (resp->status == NFS3_OK)
944 kmem_free(resp->resok.data, MAXPATHLEN + 1);
945 }
946
947 /*
948 * Server routine to handle read
949 * May handle RDMA data as well as mblks
950 */
951 /* ARGSUSED */
952 void
953 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
954 struct svc_req *req, cred_t *cr, bool_t ro)
955 {
956 int error;
957 vnode_t *vp;
958 struct vattr *vap;
959 struct vattr va;
960 struct iovec iov, *iovp = NULL;
961 int iovcnt;
962 struct uio uio;
963 u_offset_t offset;
964 mblk_t *mp = NULL;
965 int in_crit = 0;
966 int need_rwunlock = 0;
967 caller_context_t ct;
968 int rdma_used = 0;
969 int loaned_buffers;
970 struct uio *uiop;
971
972 vap = NULL;
973
974 vp = nfs3_fhtovp(&args->file, exi);
975
976 DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
977 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
978 READ3args *, args);
979
980
981 if (vp == NULL) {
982 error = ESTALE;
983 goto out;
984 }
985
986 if (args->wlist) {
987 if (args->count > clist_len(args->wlist)) {
988 error = EINVAL;
989 goto out;
990 }
991 rdma_used = 1;
992 }
993
994 /* use loaned buffers for TCP */
995 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
996
997 if (is_system_labeled()) {
998 bslabel_t *clabel = req->rq_label;
999
1000 ASSERT(clabel != NULL);
1001 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1002 "got client label from request(1)", struct svc_req *, req);
1003
1004 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1005 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1006 exi)) {
1007 resp->status = NFS3ERR_ACCES;
1008 goto out1;
1009 }
1010 }
1011 }
1012
1013 ct.cc_sysid = 0;
1014 ct.cc_pid = 0;
1015 ct.cc_caller_id = nfs3_srv_caller_id;
1016 ct.cc_flags = CC_DONTBLOCK;
1017
1018 /*
1019 * Enter the critical region before calling VOP_RWLOCK
1020 * to avoid a deadlock with write requests.
1021 */
1022 if (nbl_need_check(vp)) {
1023 nbl_start_crit(vp, RW_READER);
1024 in_crit = 1;
1025 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1026 NULL)) {
1027 error = EACCES;
1028 goto out;
1029 }
1030 }
1031
1032 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1033
1034 /* check if a monitor detected a delegation conflict */
1035 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1036 resp->status = NFS3ERR_JUKEBOX;
1037 goto out1;
1038 }
1039
1040 need_rwunlock = 1;
1041
1042 va.va_mask = AT_ALL;
1043 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1044
1045 /*
1046 * If we can't get the attributes, then we can't do the
1047 * right access checking. So, we'll fail the request.
1048 */
1049 if (error)
1050 goto out;
1051
1052 vap = &va;
1053
1054 if (vp->v_type != VREG) {
1055 resp->status = NFS3ERR_INVAL;
1056 goto out1;
1057 }
1058
1059 if (crgetuid(cr) != va.va_uid) {
1060 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1061 if (error) {
1062 if (curthread->t_flag & T_WOULDBLOCK)
1063 goto out;
1064 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1065 if (error)
1066 goto out;
1067 }
1068 }
1069
1070 if (MANDLOCK(vp, va.va_mode)) {
1071 resp->status = NFS3ERR_ACCES;
1072 goto out1;
1073 }
1074
1075 offset = args->offset;
1076 if (offset >= va.va_size) {
1077 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1078 if (in_crit)
1079 nbl_end_crit(vp);
1080 resp->status = NFS3_OK;
1081 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1082 resp->resok.count = 0;
1083 resp->resok.eof = TRUE;
1084 resp->resok.data.data_len = 0;
1085 resp->resok.data.data_val = NULL;
1086 resp->resok.data.mp = NULL;
1087 /* RDMA */
1088 resp->resok.wlist = args->wlist;
1089 resp->resok.wlist_len = resp->resok.count;
1090 if (resp->resok.wlist)
1091 clist_zero_len(resp->resok.wlist);
1092 goto done;
1093 }
1094
1095 if (args->count == 0) {
1096 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1097 if (in_crit)
1098 nbl_end_crit(vp);
1099 resp->status = NFS3_OK;
1100 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1101 resp->resok.count = 0;
1102 resp->resok.eof = FALSE;
1103 resp->resok.data.data_len = 0;
1104 resp->resok.data.data_val = NULL;
1105 resp->resok.data.mp = NULL;
1106 /* RDMA */
1107 resp->resok.wlist = args->wlist;
1108 resp->resok.wlist_len = resp->resok.count;
1109 if (resp->resok.wlist)
1110 clist_zero_len(resp->resok.wlist);
1111 goto done;
1112 }
1113
1114 /*
1115 * do not allocate memory more the max. allowed
1116 * transfer size
1117 */
1118 if (args->count > rfs3_tsize(req))
1119 args->count = rfs3_tsize(req);
1120
1121 if (loaned_buffers) {
1122 uiop = (uio_t *)rfs_setup_xuio(vp);
1123 ASSERT(uiop != NULL);
1124 uiop->uio_segflg = UIO_SYSSPACE;
1125 uiop->uio_loffset = args->offset;
1126 uiop->uio_resid = args->count;
1127
1128 /* Jump to do the read if successful */
1129 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1130 /*
1131 * Need to hold the vnode until after VOP_RETZCBUF()
1132 * is called.
1133 */
1134 VN_HOLD(vp);
1135 goto doio_read;
1136 }
1137
1138 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1139 uiop->uio_loffset, int, uiop->uio_resid);
1140
1141 uiop->uio_extflg = 0;
1142 /* failure to setup for zero copy */
1143 rfs_free_xuio((void *)uiop);
1144 loaned_buffers = 0;
1145 }
1146
1147 /*
1148 * If returning data via RDMA Write, then grab the chunk list.
1149 * If we aren't returning READ data w/RDMA_WRITE, then grab
1150 * a mblk.
1151 */
1152 if (rdma_used) {
1153 (void) rdma_get_wchunk(req, &iov, args->wlist);
1154 uio.uio_iov = &iov;
1155 uio.uio_iovcnt = 1;
1156 } else {
1157 /*
1158 * mp will contain the data to be sent out in the read reply.
1159 * For UDP, this will be freed after the reply has been sent
1160 * out by the driver. For TCP, it will be freed after the last
1161 * segment associated with the reply has been ACKed by the
1162 * client.
1163 */
1164 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1165 uio.uio_iov = iovp;
1166 uio.uio_iovcnt = iovcnt;
1167 }
1168
1169 uio.uio_segflg = UIO_SYSSPACE;
1170 uio.uio_extflg = UIO_COPY_CACHED;
1171 uio.uio_loffset = args->offset;
1172 uio.uio_resid = args->count;
1173 uiop = &uio;
1174
1175 doio_read:
1176 error = VOP_READ(vp, uiop, 0, cr, &ct);
1177
1178 if (error) {
1179 if (mp)
1180 freemsg(mp);
1181 /* check if a monitor detected a delegation conflict */
1182 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1183 resp->status = NFS3ERR_JUKEBOX;
1184 goto out1;
1185 }
1186 goto out;
1187 }
1188
1189 /* make mblk using zc buffers */
1190 if (loaned_buffers) {
1191 mp = uio_to_mblk(uiop);
1192 ASSERT(mp != NULL);
1193 }
1194
1195 va.va_mask = AT_ALL;
1196 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1197
1198 if (error)
1199 vap = NULL;
1200 else
1201 vap = &va;
1202
1203 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1204
1205 if (in_crit)
1206 nbl_end_crit(vp);
1207
1208 resp->status = NFS3_OK;
1209 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1210 resp->resok.count = args->count - uiop->uio_resid;
1211 if (!error && offset + resp->resok.count == va.va_size)
1212 resp->resok.eof = TRUE;
1213 else
1214 resp->resok.eof = FALSE;
1215 resp->resok.data.data_len = resp->resok.count;
1216
1217 if (mp)
1218 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1219
1220 resp->resok.data.mp = mp;
1221 resp->resok.size = (uint_t)args->count;
1222
1223 if (rdma_used) {
1224 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1225 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1226 resp->status = NFS3ERR_INVAL;
1227 }
1228 } else {
1229 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1230 (resp->resok).wlist = NULL;
1231 }
1232
1233 done:
1234 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1235 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1236 READ3res *, resp);
1237
1238 VN_RELE(vp);
1239
1240 if (iovp != NULL)
1241 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1242
1243 return;
1244
1245 out:
1246 if (curthread->t_flag & T_WOULDBLOCK) {
1247 curthread->t_flag &= ~T_WOULDBLOCK;
1248 resp->status = NFS3ERR_JUKEBOX;
1249 } else
1250 resp->status = puterrno3(error);
1251 out1:
1252 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1253 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1254 READ3res *, resp);
1255
1256 if (vp != NULL) {
1257 if (need_rwunlock)
1258 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1259 if (in_crit)
1260 nbl_end_crit(vp);
1261 VN_RELE(vp);
1262 }
1263 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1264
1265 if (iovp != NULL)
1266 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1267 }
1268
1269 void
1270 rfs3_read_free(READ3res *resp)
1271 {
1272 mblk_t *mp;
1273
1274 if (resp->status == NFS3_OK) {
1275 mp = resp->resok.data.mp;
1276 if (mp != NULL)
1277 freemsg(mp);
1278 }
1279 }
1280
1281 void *
1282 rfs3_read_getfh(READ3args *args)
1283 {
1284
1285 return (&args->file);
1286 }
1287
1288 #define MAX_IOVECS 12
1289
1290 #ifdef DEBUG
1291 static int rfs3_write_hits = 0;
1292 static int rfs3_write_misses = 0;
1293 #endif
1294
1295 void
1296 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1297 struct svc_req *req, cred_t *cr, bool_t ro)
1298 {
1299 nfs3_srv_t *ns;
1300 int error;
1301 vnode_t *vp;
1302 struct vattr *bvap = NULL;
1303 struct vattr bva;
1304 struct vattr *avap = NULL;
1305 struct vattr ava;
1306 u_offset_t rlimit;
1307 struct uio uio;
1308 struct iovec iov[MAX_IOVECS];
1309 mblk_t *m;
1310 struct iovec *iovp;
1311 int iovcnt;
1312 int ioflag;
1313 cred_t *savecred;
1314 int in_crit = 0;
1315 int rwlock_ret = -1;
1316 caller_context_t ct;
1317
1318 vp = nfs3_fhtovp(&args->file, exi);
1319
1320 DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1321 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1322 WRITE3args *, args);
1323
1324 if (vp == NULL) {
1325 error = ESTALE;
1326 goto err;
1327 }
1328
1329 ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
1330 ns = zone_getspecific(rfs3_zone_key, curzone);
1331 if (is_system_labeled()) {
1332 bslabel_t *clabel = req->rq_label;
1333
1334 ASSERT(clabel != NULL);
1335 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1336 "got client label from request(1)", struct svc_req *, req);
1337
1338 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1339 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1340 exi)) {
1341 resp->status = NFS3ERR_ACCES;
1342 goto err1;
1343 }
1344 }
1345 }
1346
1347 ct.cc_sysid = 0;
1348 ct.cc_pid = 0;
1349 ct.cc_caller_id = nfs3_srv_caller_id;
1350 ct.cc_flags = CC_DONTBLOCK;
1351
1352 /*
1353 * We have to enter the critical region before calling VOP_RWLOCK
1354 * to avoid a deadlock with ufs.
1355 */
1356 if (nbl_need_check(vp)) {
1357 nbl_start_crit(vp, RW_READER);
1358 in_crit = 1;
1359 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1360 NULL)) {
1361 error = EACCES;
1362 goto err;
1363 }
1364 }
1365
1366 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1367
1368 /* check if a monitor detected a delegation conflict */
1369 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1370 resp->status = NFS3ERR_JUKEBOX;
1371 rwlock_ret = -1;
1372 goto err1;
1373 }
1374
1375
1376 bva.va_mask = AT_ALL;
1377 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1378
1379 /*
1380 * If we can't get the attributes, then we can't do the
1381 * right access checking. So, we'll fail the request.
1382 */
1383 if (error)
1384 goto err;
1385
1386 bvap = &bva;
1387 avap = bvap;
1388
1389 if (args->count != args->data.data_len) {
1390 resp->status = NFS3ERR_INVAL;
1391 goto err1;
1392 }
1393
1394 if (rdonly(ro, vp)) {
1395 resp->status = NFS3ERR_ROFS;
1396 goto err1;
1397 }
1398
1399 if (vp->v_type != VREG) {
1400 resp->status = NFS3ERR_INVAL;
1401 goto err1;
1402 }
1403
1404 if (crgetuid(cr) != bva.va_uid &&
1405 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1406 goto err;
1407
1408 if (MANDLOCK(vp, bva.va_mode)) {
1409 resp->status = NFS3ERR_ACCES;
1410 goto err1;
1411 }
1412
1413 if (args->count == 0) {
1414 resp->status = NFS3_OK;
1415 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1416 resp->resok.count = 0;
1417 resp->resok.committed = args->stable;
1418 resp->resok.verf = ns->write3verf;
1419 goto out;
1420 }
1421
1422 if (args->mblk != NULL) {
1423 iovcnt = 0;
1424 for (m = args->mblk; m != NULL; m = m->b_cont)
1425 iovcnt++;
1426 if (iovcnt <= MAX_IOVECS) {
1427 #ifdef DEBUG
1428 rfs3_write_hits++;
1429 #endif
1430 iovp = iov;
1431 } else {
1432 #ifdef DEBUG
1433 rfs3_write_misses++;
1434 #endif
1435 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1436 }
1437 mblk_to_iov(args->mblk, iovcnt, iovp);
1438
1439 } else if (args->rlist != NULL) {
1440 iovcnt = 1;
1441 iovp = iov;
1442 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1443 iovp->iov_len = args->count;
1444 } else {
1445 iovcnt = 1;
1446 iovp = iov;
1447 iovp->iov_base = args->data.data_val;
1448 iovp->iov_len = args->count;
1449 }
1450
1451 uio.uio_iov = iovp;
1452 uio.uio_iovcnt = iovcnt;
1453
1454 uio.uio_segflg = UIO_SYSSPACE;
1455 uio.uio_extflg = UIO_COPY_DEFAULT;
1456 uio.uio_loffset = args->offset;
1457 uio.uio_resid = args->count;
1458 uio.uio_llimit = curproc->p_fsz_ctl;
1459 rlimit = uio.uio_llimit - args->offset;
1460 if (rlimit < (u_offset_t)uio.uio_resid)
1461 uio.uio_resid = (int)rlimit;
1462
1463 if (args->stable == UNSTABLE)
1464 ioflag = 0;
1465 else if (args->stable == FILE_SYNC)
1466 ioflag = FSYNC;
1467 else if (args->stable == DATA_SYNC)
1468 ioflag = FDSYNC;
1469 else {
1470 if (iovp != iov)
1471 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1472 resp->status = NFS3ERR_INVAL;
1473 goto err1;
1474 }
1475
1476 /*
1477 * We're changing creds because VM may fault and we need
1478 * the cred of the current thread to be used if quota
1479 * checking is enabled.
1480 */
1481 savecred = curthread->t_cred;
1482 curthread->t_cred = cr;
1483 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1484 curthread->t_cred = savecred;
1485
1486 if (iovp != iov)
1487 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1488
1489 /* check if a monitor detected a delegation conflict */
1490 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1491 resp->status = NFS3ERR_JUKEBOX;
1492 goto err1;
1493 }
1494
1495 ava.va_mask = AT_ALL;
1496 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1497
1498 if (error)
1499 goto err;
1500
1501 /*
1502 * If we were unable to get the V_WRITELOCK_TRUE, then we
1503 * may not have accurate after attrs, so check if
1504 * we have both attributes, they have a non-zero va_seq, and
1505 * va_seq has changed by exactly one,
1506 * if not, turn off the before attr.
1507 */
1508 if (rwlock_ret != V_WRITELOCK_TRUE) {
1509 if (bvap == NULL || avap == NULL ||
1510 bvap->va_seq == 0 || avap->va_seq == 0 ||
1511 avap->va_seq != (bvap->va_seq + 1)) {
1512 bvap = NULL;
1513 }
1514 }
1515
1516 resp->status = NFS3_OK;
1517 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1518 resp->resok.count = args->count - uio.uio_resid;
1519 resp->resok.committed = args->stable;
1520 resp->resok.verf = ns->write3verf;
1521 goto out;
1522
1523 err:
1524 if (curthread->t_flag & T_WOULDBLOCK) {
1525 curthread->t_flag &= ~T_WOULDBLOCK;
1526 resp->status = NFS3ERR_JUKEBOX;
1527 } else
1528 resp->status = puterrno3(error);
1529 err1:
1530 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1531 out:
1532 DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1533 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1534 WRITE3res *, resp);
1535
1536 if (vp != NULL) {
1537 if (rwlock_ret != -1)
1538 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1539 if (in_crit)
1540 nbl_end_crit(vp);
1541 VN_RELE(vp);
1542 }
1543 }
1544
1545 void *
1546 rfs3_write_getfh(WRITE3args *args)
1547 {
1548
1549 return (&args->file);
1550 }
1551
1552 void
1553 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1554 struct svc_req *req, cred_t *cr, bool_t ro)
1555 {
1556 int error;
1557 int in_crit = 0;
1558 vnode_t *vp;
1559 vnode_t *tvp = NULL;
1560 vnode_t *dvp;
1561 struct vattr *vap;
1562 struct vattr va;
1563 struct vattr *dbvap;
1564 struct vattr dbva;
1565 struct vattr *davap;
1566 struct vattr dava;
1567 enum vcexcl excl;
1568 nfstime3 *mtime;
1569 len_t reqsize;
1570 bool_t trunc;
1571 struct sockaddr *ca;
1572 char *name = NULL;
1573
1574 dbvap = NULL;
1575 davap = NULL;
1576
1577 dvp = nfs3_fhtovp(&args->where.dir, exi);
1578
1579 DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1580 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1581 CREATE3args *, args);
1582
1583 if (dvp == NULL) {
1584 error = ESTALE;
1585 goto out;
1586 }
1587
1588 dbva.va_mask = AT_ALL;
1589 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1590 davap = dbvap;
1591
1592 if (args->where.name == nfs3nametoolong) {
1593 resp->status = NFS3ERR_NAMETOOLONG;
1594 goto out1;
1595 }
1596
1597 if (args->where.name == NULL || *(args->where.name) == '\0') {
1598 resp->status = NFS3ERR_ACCES;
1599 goto out1;
1600 }
1601
1602 if (rdonly(ro, dvp)) {
1603 resp->status = NFS3ERR_ROFS;
1604 goto out1;
1605 }
1606
1607 if (is_system_labeled()) {
1608 bslabel_t *clabel = req->rq_label;
1609
1610 ASSERT(clabel != NULL);
1611 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1612 "got client label from request(1)", struct svc_req *, req);
1613
1614 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1615 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1616 exi)) {
1617 resp->status = NFS3ERR_ACCES;
1618 goto out1;
1619 }
1620 }
1621 }
1622
1623 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1624 name = nfscmd_convname(ca, exi, args->where.name,
1625 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1626
1627 if (name == NULL) {
1628 /* This is really a Solaris EILSEQ */
1629 resp->status = NFS3ERR_INVAL;
1630 goto out1;
1631 }
1632
1633 if (args->how.mode == EXCLUSIVE) {
1634 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1635 va.va_type = VREG;
1636 va.va_mode = (mode_t)0;
1637 /*
1638 * Ensure no time overflows and that types match
1639 */
1640 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1641 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1642 va.va_mtime.tv_nsec = mtime->nseconds;
1643 excl = EXCL;
1644 } else {
1645 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1646 &va);
1647 if (error)
1648 goto out;
1649 va.va_mask |= AT_TYPE;
1650 va.va_type = VREG;
1651 if (args->how.mode == GUARDED)
1652 excl = EXCL;
1653 else {
1654 excl = NONEXCL;
1655
1656 /*
1657 * During creation of file in non-exclusive mode
1658 * if size of file is being set then make sure
1659 * that if the file already exists that no conflicting
1660 * non-blocking mandatory locks exists in the region
1661 * being modified. If there are conflicting locks fail
1662 * the operation with EACCES.
1663 */
1664 if (va.va_mask & AT_SIZE) {
1665 struct vattr tva;
1666
1667 /*
1668 * Does file already exist?
1669 */
1670 error = VOP_LOOKUP(dvp, name, &tvp,
1671 NULL, 0, NULL, cr, NULL, NULL, NULL);
1672
1673 /*
1674 * Check to see if the file has been delegated
1675 * to a v4 client. If so, then begin recall of
1676 * the delegation and return JUKEBOX to allow
1677 * the client to retrasmit its request.
1678 */
1679
1680 trunc = va.va_size == 0;
1681 if (!error &&
1682 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1683 resp->status = NFS3ERR_JUKEBOX;
1684 goto out1;
1685 }
1686
1687 /*
1688 * Check for NBMAND lock conflicts
1689 */
1690 if (!error && nbl_need_check(tvp)) {
1691 u_offset_t offset;
1692 ssize_t len;
1693
1694 nbl_start_crit(tvp, RW_READER);
1695 in_crit = 1;
1696
1697 tva.va_mask = AT_SIZE;
1698 error = VOP_GETATTR(tvp, &tva, 0, cr,
1699 NULL);
1700 /*
1701 * Can't check for conflicts, so return
1702 * error.
1703 */
1704 if (error)
1705 goto out;
1706
1707 offset = tva.va_size < va.va_size ?
1708 tva.va_size : va.va_size;
1709 len = tva.va_size < va.va_size ?
1710 va.va_size - tva.va_size :
1711 tva.va_size - va.va_size;
1712 if (nbl_conflict(tvp, NBL_WRITE,
1713 offset, len, 0, NULL)) {
1714 error = EACCES;
1715 goto out;
1716 }
1717 } else if (tvp) {
1718 VN_RELE(tvp);
1719 tvp = NULL;
1720 }
1721 }
1722 }
1723 if (va.va_mask & AT_SIZE)
1724 reqsize = va.va_size;
1725 }
1726
1727 /*
1728 * Must specify the mode.
1729 */
1730 if (!(va.va_mask & AT_MODE)) {
1731 resp->status = NFS3ERR_INVAL;
1732 goto out1;
1733 }
1734
1735 /*
1736 * If the filesystem is exported with nosuid, then mask off
1737 * the setuid and setgid bits.
1738 */
1739 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1740 va.va_mode &= ~(VSUID | VSGID);
1741
1742 tryagain:
1743 /*
1744 * The file open mode used is VWRITE. If the client needs
1745 * some other semantic, then it should do the access checking
1746 * itself. It would have been nice to have the file open mode
1747 * passed as part of the arguments.
1748 */
1749 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1750 &vp, cr, 0, NULL, NULL);
1751
1752 dava.va_mask = AT_ALL;
1753 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1754
1755 if (error) {
1756 /*
1757 * If we got something other than file already exists
1758 * then just return this error. Otherwise, we got
1759 * EEXIST. If we were doing a GUARDED create, then
1760 * just return this error. Otherwise, we need to
1761 * make sure that this wasn't a duplicate of an
1762 * exclusive create request.
1763 *
1764 * The assumption is made that a non-exclusive create
1765 * request will never return EEXIST.
1766 */
1767 if (error != EEXIST || args->how.mode == GUARDED)
1768 goto out;
1769 /*
1770 * Lookup the file so that we can get a vnode for it.
1771 */
1772 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1773 NULL, cr, NULL, NULL, NULL);
1774 if (error) {
1775 /*
1776 * We couldn't find the file that we thought that
1777 * we just created. So, we'll just try creating
1778 * it again.
1779 */
1780 if (error == ENOENT)
1781 goto tryagain;
1782 goto out;
1783 }
1784
1785 /*
1786 * If the file is delegated to a v4 client, go ahead
1787 * and initiate recall, this create is a hint that a
1788 * conflicting v3 open has occurred.
1789 */
1790
1791 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1792 VN_RELE(vp);
1793 resp->status = NFS3ERR_JUKEBOX;
1794 goto out1;
1795 }
1796
1797 va.va_mask = AT_ALL;
1798 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1799
1800 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1801 /* % with INT32_MAX to prevent overflows */
1802 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1803 vap->va_mtime.tv_sec !=
1804 (mtime->seconds % INT32_MAX) ||
1805 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1806 VN_RELE(vp);
1807 error = EEXIST;
1808 goto out;
1809 }
1810 } else {
1811
1812 if ((args->how.mode == UNCHECKED ||
1813 args->how.mode == GUARDED) &&
1814 args->how.createhow3_u.obj_attributes.size.set_it &&
1815 va.va_size == 0)
1816 trunc = TRUE;
1817 else
1818 trunc = FALSE;
1819
1820 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1821 VN_RELE(vp);
1822 resp->status = NFS3ERR_JUKEBOX;
1823 goto out1;
1824 }
1825
1826 va.va_mask = AT_ALL;
1827 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1828
1829 /*
1830 * We need to check to make sure that the file got
1831 * created to the indicated size. If not, we do a
1832 * setattr to try to change the size, but we don't
1833 * try too hard. This shouldn't a problem as most
1834 * clients will only specifiy a size of zero which
1835 * local file systems handle. However, even if
1836 * the client does specify a non-zero size, it can
1837 * still recover by checking the size of the file
1838 * after it has created it and then issue a setattr
1839 * request of its own to set the size of the file.
1840 */
1841 if (vap != NULL &&
1842 (args->how.mode == UNCHECKED ||
1843 args->how.mode == GUARDED) &&
1844 args->how.createhow3_u.obj_attributes.size.set_it &&
1845 vap->va_size != reqsize) {
1846 va.va_mask = AT_SIZE;
1847 va.va_size = reqsize;
1848 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1849 va.va_mask = AT_ALL;
1850 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1851 }
1852 }
1853
1854 if (name != args->where.name)
1855 kmem_free(name, MAXPATHLEN + 1);
1856
1857 error = makefh3(&resp->resok.obj.handle, vp, exi);
1858 if (error)
1859 resp->resok.obj.handle_follows = FALSE;
1860 else
1861 resp->resok.obj.handle_follows = TRUE;
1862
1863 /*
1864 * Force modified data and metadata out to stable storage.
1865 */
1866 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1867 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1868
1869 VN_RELE(vp);
1870 if (tvp != NULL) {
1871 if (in_crit)
1872 nbl_end_crit(tvp);
1873 VN_RELE(tvp);
1874 }
1875
1876 resp->status = NFS3_OK;
1877 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1878 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1879
1880 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1881 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1882 CREATE3res *, resp);
1883
1884 VN_RELE(dvp);
1885 return;
1886
1887 out:
1888 if (curthread->t_flag & T_WOULDBLOCK) {
1889 curthread->t_flag &= ~T_WOULDBLOCK;
1890 resp->status = NFS3ERR_JUKEBOX;
1891 } else
1892 resp->status = puterrno3(error);
1893 out1:
1894 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1895 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1896 CREATE3res *, resp);
1897
1898 if (name != NULL && name != args->where.name)
1899 kmem_free(name, MAXPATHLEN + 1);
1900
1901 if (tvp != NULL) {
1902 if (in_crit)
1903 nbl_end_crit(tvp);
1904 VN_RELE(tvp);
1905 }
1906 if (dvp != NULL)
1907 VN_RELE(dvp);
1908 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1909 }
1910
1911 void *
1912 rfs3_create_getfh(CREATE3args *args)
1913 {
1914
1915 return (&args->where.dir);
1916 }
1917
1918 void
1919 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1920 struct svc_req *req, cred_t *cr, bool_t ro)
1921 {
1922 int error;
1923 vnode_t *vp = NULL;
1924 vnode_t *dvp;
1925 struct vattr *vap;
1926 struct vattr va;
1927 struct vattr *dbvap;
1928 struct vattr dbva;
1929 struct vattr *davap;
1930 struct vattr dava;
1931 struct sockaddr *ca;
1932 char *name = NULL;
1933
1934 dbvap = NULL;
1935 davap = NULL;
1936
1937 dvp = nfs3_fhtovp(&args->where.dir, exi);
1938
1939 DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1940 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1941 MKDIR3args *, args);
1942
1943 if (dvp == NULL) {
1944 error = ESTALE;
1945 goto out;
1946 }
1947
1948 dbva.va_mask = AT_ALL;
1949 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1950 davap = dbvap;
1951
1952 if (args->where.name == nfs3nametoolong) {
1953 resp->status = NFS3ERR_NAMETOOLONG;
1954 goto out1;
1955 }
1956
1957 if (args->where.name == NULL || *(args->where.name) == '\0') {
1958 resp->status = NFS3ERR_ACCES;
1959 goto out1;
1960 }
1961
1962 if (rdonly(ro, dvp)) {
1963 resp->status = NFS3ERR_ROFS;
1964 goto out1;
1965 }
1966
1967 if (is_system_labeled()) {
1968 bslabel_t *clabel = req->rq_label;
1969
1970 ASSERT(clabel != NULL);
1971 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1972 "got client label from request(1)", struct svc_req *, req);
1973
1974 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1975 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1976 exi)) {
1977 resp->status = NFS3ERR_ACCES;
1978 goto out1;
1979 }
1980 }
1981 }
1982
1983 error = sattr3_to_vattr(&args->attributes, &va);
1984 if (error)
1985 goto out;
1986
1987 if (!(va.va_mask & AT_MODE)) {
1988 resp->status = NFS3ERR_INVAL;
1989 goto out1;
1990 }
1991
1992 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1993 name = nfscmd_convname(ca, exi, args->where.name,
1994 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1995
1996 if (name == NULL) {
1997 resp->status = NFS3ERR_INVAL;
1998 goto out1;
1999 }
2000
2001 va.va_mask |= AT_TYPE;
2002 va.va_type = VDIR;
2003
2004 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2005
2006 if (name != args->where.name)
2007 kmem_free(name, MAXPATHLEN + 1);
2008
2009 dava.va_mask = AT_ALL;
2010 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2011
2012 /*
2013 * Force modified data and metadata out to stable storage.
2014 */
2015 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2016
2017 if (error)
2018 goto out;
2019
2020 error = makefh3(&resp->resok.obj.handle, vp, exi);
2021 if (error)
2022 resp->resok.obj.handle_follows = FALSE;
2023 else
2024 resp->resok.obj.handle_follows = TRUE;
2025
2026 va.va_mask = AT_ALL;
2027 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2028
2029 /*
2030 * Force modified data and metadata out to stable storage.
2031 */
2032 (void) VOP_FSYNC(vp, 0, cr, NULL);
2033
2034 VN_RELE(vp);
2035
2036 resp->status = NFS3_OK;
2037 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2038 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2039
2040 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2041 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2042 MKDIR3res *, resp);
2043 VN_RELE(dvp);
2044
2045 return;
2046
2047 out:
2048 if (curthread->t_flag & T_WOULDBLOCK) {
2049 curthread->t_flag &= ~T_WOULDBLOCK;
2050 resp->status = NFS3ERR_JUKEBOX;
2051 } else
2052 resp->status = puterrno3(error);
2053 out1:
2054 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2055 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2056 MKDIR3res *, resp);
2057 if (dvp != NULL)
2058 VN_RELE(dvp);
2059 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2060 }
2061
2062 void *
2063 rfs3_mkdir_getfh(MKDIR3args *args)
2064 {
2065
2066 return (&args->where.dir);
2067 }
2068
2069 void
2070 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2071 struct svc_req *req, cred_t *cr, bool_t ro)
2072 {
2073 int error;
2074 vnode_t *vp;
2075 vnode_t *dvp;
2076 struct vattr *vap;
2077 struct vattr va;
2078 struct vattr *dbvap;
2079 struct vattr dbva;
2080 struct vattr *davap;
2081 struct vattr dava;
2082 struct sockaddr *ca;
2083 char *name = NULL;
2084 char *symdata = NULL;
2085
2086 dbvap = NULL;
2087 davap = NULL;
2088
2089 dvp = nfs3_fhtovp(&args->where.dir, exi);
2090
2091 DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2092 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2093 SYMLINK3args *, args);
2094
2095 if (dvp == NULL) {
2096 error = ESTALE;
2097 goto err;
2098 }
2099
2100 dbva.va_mask = AT_ALL;
2101 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2102 davap = dbvap;
2103
2104 if (args->where.name == nfs3nametoolong) {
2105 resp->status = NFS3ERR_NAMETOOLONG;
2106 goto err1;
2107 }
2108
2109 if (args->where.name == NULL || *(args->where.name) == '\0') {
2110 resp->status = NFS3ERR_ACCES;
2111 goto err1;
2112 }
2113
2114 if (rdonly(ro, dvp)) {
2115 resp->status = NFS3ERR_ROFS;
2116 goto err1;
2117 }
2118
2119 if (is_system_labeled()) {
2120 bslabel_t *clabel = req->rq_label;
2121
2122 ASSERT(clabel != NULL);
2123 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2124 "got client label from request(1)", struct svc_req *, req);
2125
2126 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2127 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2128 exi)) {
2129 resp->status = NFS3ERR_ACCES;
2130 goto err1;
2131 }
2132 }
2133 }
2134
2135 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2136 if (error)
2137 goto err;
2138
2139 if (!(va.va_mask & AT_MODE)) {
2140 resp->status = NFS3ERR_INVAL;
2141 goto err1;
2142 }
2143
2144 if (args->symlink.symlink_data == nfs3nametoolong) {
2145 resp->status = NFS3ERR_NAMETOOLONG;
2146 goto err1;
2147 }
2148
2149 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2150 name = nfscmd_convname(ca, exi, args->where.name,
2151 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2152
2153 if (name == NULL) {
2154 /* This is really a Solaris EILSEQ */
2155 resp->status = NFS3ERR_INVAL;
2156 goto err1;
2157 }
2158
2159 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2160 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2161 if (symdata == NULL) {
2162 /* This is really a Solaris EILSEQ */
2163 resp->status = NFS3ERR_INVAL;
2164 goto err1;
2165 }
2166
2167
2168 va.va_mask |= AT_TYPE;
2169 va.va_type = VLNK;
2170
2171 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2172
2173 dava.va_mask = AT_ALL;
2174 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2175
2176 if (error)
2177 goto err;
2178
2179 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2180 NULL, NULL, NULL);
2181
2182 /*
2183 * Force modified data and metadata out to stable storage.
2184 */
2185 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2186
2187
2188 resp->status = NFS3_OK;
2189 if (error) {
2190 resp->resok.obj.handle_follows = FALSE;
2191 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2192 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2193 goto out;
2194 }
2195
2196 error = makefh3(&resp->resok.obj.handle, vp, exi);
2197 if (error)
2198 resp->resok.obj.handle_follows = FALSE;
2199 else
2200 resp->resok.obj.handle_follows = TRUE;
2201
2202 va.va_mask = AT_ALL;
2203 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2204
2205 /*
2206 * Force modified data and metadata out to stable storage.
2207 */
2208 (void) VOP_FSYNC(vp, 0, cr, NULL);
2209
2210 VN_RELE(vp);
2211
2212 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2213 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2214 goto out;
2215
2216 err:
2217 if (curthread->t_flag & T_WOULDBLOCK) {
2218 curthread->t_flag &= ~T_WOULDBLOCK;
2219 resp->status = NFS3ERR_JUKEBOX;
2220 } else
2221 resp->status = puterrno3(error);
2222 err1:
2223 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2224 out:
2225 if (name != NULL && name != args->where.name)
2226 kmem_free(name, MAXPATHLEN + 1);
2227 if (symdata != NULL && symdata != args->symlink.symlink_data)
2228 kmem_free(symdata, MAXPATHLEN + 1);
2229
2230 DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2231 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2232 SYMLINK3res *, resp);
2233
2234 if (dvp != NULL)
2235 VN_RELE(dvp);
2236 }
2237
2238 void *
2239 rfs3_symlink_getfh(SYMLINK3args *args)
2240 {
2241
2242 return (&args->where.dir);
2243 }
2244
2245 void
2246 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2247 struct svc_req *req, cred_t *cr, bool_t ro)
2248 {
2249 int error;
2250 vnode_t *vp;
2251 vnode_t *realvp;
2252 vnode_t *dvp;
2253 struct vattr *vap;
2254 struct vattr va;
2255 struct vattr *dbvap;
2256 struct vattr dbva;
2257 struct vattr *davap;
2258 struct vattr dava;
2259 int mode;
2260 enum vcexcl excl;
2261 struct sockaddr *ca;
2262 char *name = NULL;
2263
2264 dbvap = NULL;
2265 davap = NULL;
2266
2267 dvp = nfs3_fhtovp(&args->where.dir, exi);
2268
2269 DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2270 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2271 MKNOD3args *, args);
2272
2273 if (dvp == NULL) {
2274 error = ESTALE;
2275 goto out;
2276 }
2277
2278 dbva.va_mask = AT_ALL;
2279 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2280 davap = dbvap;
2281
2282 if (args->where.name == nfs3nametoolong) {
2283 resp->status = NFS3ERR_NAMETOOLONG;
2284 goto out1;
2285 }
2286
2287 if (args->where.name == NULL || *(args->where.name) == '\0') {
2288 resp->status = NFS3ERR_ACCES;
2289 goto out1;
2290 }
2291
2292 if (rdonly(ro, dvp)) {
2293 resp->status = NFS3ERR_ROFS;
2294 goto out1;
2295 }
2296
2297 if (is_system_labeled()) {
2298 bslabel_t *clabel = req->rq_label;
2299
2300 ASSERT(clabel != NULL);
2301 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2302 "got client label from request(1)", struct svc_req *, req);
2303
2304 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2305 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2306 exi)) {
2307 resp->status = NFS3ERR_ACCES;
2308 goto out1;
2309 }
2310 }
2311 }
2312
2313 switch (args->what.type) {
2314 case NF3CHR:
2315 case NF3BLK:
2316 error = sattr3_to_vattr(
2317 &args->what.mknoddata3_u.device.dev_attributes, &va);
2318 if (error)
2319 goto out;
2320 if (secpolicy_sys_devices(cr) != 0) {
2321 resp->status = NFS3ERR_PERM;
2322 goto out1;
2323 }
2324 if (args->what.type == NF3CHR)
2325 va.va_type = VCHR;
2326 else
2327 va.va_type = VBLK;
2328 va.va_rdev = makedevice(
2329 args->what.mknoddata3_u.device.spec.specdata1,
2330 args->what.mknoddata3_u.device.spec.specdata2);
2331 va.va_mask |= AT_TYPE | AT_RDEV;
2332 break;
2333 case NF3SOCK:
2334 error = sattr3_to_vattr(
2335 &args->what.mknoddata3_u.pipe_attributes, &va);
2336 if (error)
2337 goto out;
2338 va.va_type = VSOCK;
2339 va.va_mask |= AT_TYPE;
2340 break;
2341 case NF3FIFO:
2342 error = sattr3_to_vattr(
2343 &args->what.mknoddata3_u.pipe_attributes, &va);
2344 if (error)
2345 goto out;
2346 va.va_type = VFIFO;
2347 va.va_mask |= AT_TYPE;
2348 break;
2349 default:
2350 resp->status = NFS3ERR_BADTYPE;
2351 goto out1;
2352 }
2353
2354 /*
2355 * Must specify the mode.
2356 */
2357 if (!(va.va_mask & AT_MODE)) {
2358 resp->status = NFS3ERR_INVAL;
2359 goto out1;
2360 }
2361
2362 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2363 name = nfscmd_convname(ca, exi, args->where.name,
2364 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2365
2366 if (name == NULL) {
2367 resp->status = NFS3ERR_INVAL;
2368 goto out1;
2369 }
2370
2371 excl = EXCL;
2372
2373 mode = 0;
2374
2375 error = VOP_CREATE(dvp, name, &va, excl, mode,
2376 &vp, cr, 0, NULL, NULL);
2377
2378 if (name != args->where.name)
2379 kmem_free(name, MAXPATHLEN + 1);
2380
2381 dava.va_mask = AT_ALL;
2382 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2383
2384 /*
2385 * Force modified data and metadata out to stable storage.
2386 */
2387 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2388
2389 if (error)
2390 goto out;
2391
2392 resp->status = NFS3_OK;
2393
2394 error = makefh3(&resp->resok.obj.handle, vp, exi);
2395 if (error)
2396 resp->resok.obj.handle_follows = FALSE;
2397 else
2398 resp->resok.obj.handle_follows = TRUE;
2399
2400 va.va_mask = AT_ALL;
2401 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2402
2403 /*
2404 * Force modified metadata out to stable storage.
2405 *
2406 * if a underlying vp exists, pass it to VOP_FSYNC
2407 */
2408 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2409 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2410 else
2411 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2412
2413 VN_RELE(vp);
2414
2415 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2416 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2417 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2418 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2419 MKNOD3res *, resp);
2420 VN_RELE(dvp);
2421 return;
2422
2423 out:
2424 if (curthread->t_flag & T_WOULDBLOCK) {
2425 curthread->t_flag &= ~T_WOULDBLOCK;
2426 resp->status = NFS3ERR_JUKEBOX;
2427 } else
2428 resp->status = puterrno3(error);
2429 out1:
2430 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2431 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2432 MKNOD3res *, resp);
2433 if (dvp != NULL)
2434 VN_RELE(dvp);
2435 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2436 }
2437
2438 void *
2439 rfs3_mknod_getfh(MKNOD3args *args)
2440 {
2441
2442 return (&args->where.dir);
2443 }
2444
2445 void
2446 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2447 struct svc_req *req, cred_t *cr, bool_t ro)
2448 {
2449 int error = 0;
2450 vnode_t *vp;
2451 struct vattr *bvap;
2452 struct vattr bva;
2453 struct vattr *avap;
2454 struct vattr ava;
2455 vnode_t *targvp = NULL;
2456 struct sockaddr *ca;
2457 char *name = NULL;
2458
2459 bvap = NULL;
2460 avap = NULL;
2461
2462 vp = nfs3_fhtovp(&args->object.dir, exi);
2463
2464 DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2465 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2466 REMOVE3args *, args);
2467
2468 if (vp == NULL) {
2469 error = ESTALE;
2470 goto err;
2471 }
2472
2473 bva.va_mask = AT_ALL;
2474 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2475 avap = bvap;
2476
2477 if (vp->v_type != VDIR) {
2478 resp->status = NFS3ERR_NOTDIR;
2479 goto err1;
2480 }
2481
2482 if (args->object.name == nfs3nametoolong) {
2483 resp->status = NFS3ERR_NAMETOOLONG;
2484 goto err1;
2485 }
2486
2487 if (args->object.name == NULL || *(args->object.name) == '\0') {
2488 resp->status = NFS3ERR_ACCES;
2489 goto err1;
2490 }
2491
2492 if (rdonly(ro, vp)) {
2493 resp->status = NFS3ERR_ROFS;
2494 goto err1;
2495 }
2496
2497 if (is_system_labeled()) {
2498 bslabel_t *clabel = req->rq_label;
2499
2500 ASSERT(clabel != NULL);
2501 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2502 "got client label from request(1)", struct svc_req *, req);
2503
2504 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2505 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2506 exi)) {
2507 resp->status = NFS3ERR_ACCES;
2508 goto err1;
2509 }
2510 }
2511 }
2512
2513 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2514 name = nfscmd_convname(ca, exi, args->object.name,
2515 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2516
2517 if (name == NULL) {
2518 resp->status = NFS3ERR_INVAL;
2519 goto err1;
2520 }
2521
2522 /*
2523 * Check for a conflict with a non-blocking mandatory share
2524 * reservation and V4 delegations
2525 */
2526 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2527 NULL, cr, NULL, NULL, NULL);
2528 if (error != 0)
2529 goto err;
2530
2531 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2532 resp->status = NFS3ERR_JUKEBOX;
2533 goto err1;
2534 }
2535
2536 if (!nbl_need_check(targvp)) {
2537 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2538 } else {
2539 nbl_start_crit(targvp, RW_READER);
2540 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2541 error = EACCES;
2542 } else {
2543 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2544 }
2545 nbl_end_crit(targvp);
2546 }
2547 VN_RELE(targvp);
2548 targvp = NULL;
2549
2550 ava.va_mask = AT_ALL;
2551 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2552
2553 /*
2554 * Force modified data and metadata out to stable storage.
2555 */
2556 (void) VOP_FSYNC(vp, 0, cr, NULL);
2557
2558 if (error)
2559 goto err;
2560
2561 resp->status = NFS3_OK;
2562 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2563 goto out;
2564
2565 err:
2566 if (curthread->t_flag & T_WOULDBLOCK) {
2567 curthread->t_flag &= ~T_WOULDBLOCK;
2568 resp->status = NFS3ERR_JUKEBOX;
2569 } else
2570 resp->status = puterrno3(error);
2571 err1:
2572 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2573 out:
2574 DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2575 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2576 REMOVE3res *, resp);
2577
2578 if (name != NULL && name != args->object.name)
2579 kmem_free(name, MAXPATHLEN + 1);
2580
2581 if (vp != NULL)
2582 VN_RELE(vp);
2583 }
2584
2585 void *
2586 rfs3_remove_getfh(REMOVE3args *args)
2587 {
2588
2589 return (&args->object.dir);
2590 }
2591
2592 void
2593 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2594 struct svc_req *req, cred_t *cr, bool_t ro)
2595 {
2596 int error;
2597 vnode_t *vp;
2598 struct vattr *bvap;
2599 struct vattr bva;
2600 struct vattr *avap;
2601 struct vattr ava;
2602 struct sockaddr *ca;
2603 char *name = NULL;
2604
2605 bvap = NULL;
2606 avap = NULL;
2607
2608 vp = nfs3_fhtovp(&args->object.dir, exi);
2609
2610 DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2611 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2612 RMDIR3args *, args);
2613
2614 if (vp == NULL) {
2615 error = ESTALE;
2616 goto err;
2617 }
2618
2619 bva.va_mask = AT_ALL;
2620 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2621 avap = bvap;
2622
2623 if (vp->v_type != VDIR) {
2624 resp->status = NFS3ERR_NOTDIR;
2625 goto err1;
2626 }
2627
2628 if (args->object.name == nfs3nametoolong) {
2629 resp->status = NFS3ERR_NAMETOOLONG;
2630 goto err1;
2631 }
2632
2633 if (args->object.name == NULL || *(args->object.name) == '\0') {
2634 resp->status = NFS3ERR_ACCES;
2635 goto err1;
2636 }
2637
2638 if (rdonly(ro, vp)) {
2639 resp->status = NFS3ERR_ROFS;
2640 goto err1;
2641 }
2642
2643 if (is_system_labeled()) {
2644 bslabel_t *clabel = req->rq_label;
2645
2646 ASSERT(clabel != NULL);
2647 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2648 "got client label from request(1)", struct svc_req *, req);
2649
2650 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2651 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2652 exi)) {
2653 resp->status = NFS3ERR_ACCES;
2654 goto err1;
2655 }
2656 }
2657 }
2658
2659 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2660 name = nfscmd_convname(ca, exi, args->object.name,
2661 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2662
2663 if (name == NULL) {
2664 resp->status = NFS3ERR_INVAL;
2665 goto err1;
2666 }
2667
2668 error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2669
2670 if (name != args->object.name)
2671 kmem_free(name, MAXPATHLEN + 1);
2672
2673 ava.va_mask = AT_ALL;
2674 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2675
2676 /*
2677 * Force modified data and metadata out to stable storage.
2678 */
2679 (void) VOP_FSYNC(vp, 0, cr, NULL);
2680
2681 if (error) {
2682 /*
2683 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2684 * if the directory is not empty. A System V NFS server
2685 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2686 * over the wire.
2687 */
2688 if (error == EEXIST)
2689 error = ENOTEMPTY;
2690 goto err;
2691 }
2692
2693 resp->status = NFS3_OK;
2694 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2695 goto out;
2696
2697 err:
2698 if (curthread->t_flag & T_WOULDBLOCK) {
2699 curthread->t_flag &= ~T_WOULDBLOCK;
2700 resp->status = NFS3ERR_JUKEBOX;
2701 } else
2702 resp->status = puterrno3(error);
2703 err1:
2704 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2705 out:
2706 DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2707 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2708 RMDIR3res *, resp);
2709 if (vp != NULL)
2710 VN_RELE(vp);
2711
2712 }
2713
2714 void *
2715 rfs3_rmdir_getfh(RMDIR3args *args)
2716 {
2717
2718 return (&args->object.dir);
2719 }
2720
2721 void
2722 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2723 struct svc_req *req, cred_t *cr, bool_t ro)
2724 {
2725 int error = 0;
2726 vnode_t *fvp;
2727 vnode_t *tvp;
2728 vnode_t *targvp;
2729 struct vattr *fbvap;
2730 struct vattr fbva;
2731 struct vattr *favap;
2732 struct vattr fava;
2733 struct vattr *tbvap;
2734 struct vattr tbva;
2735 struct vattr *tavap;
2736 struct vattr tava;
2737 nfs_fh3 *fh3;
2738 struct exportinfo *to_exi;
2739 vnode_t *srcvp = NULL;
2740 bslabel_t *clabel;
2741 struct sockaddr *ca;
2742 char *name = NULL;
2743 char *toname = NULL;
2744
2745 fbvap = NULL;
2746 favap = NULL;
2747 tbvap = NULL;
2748 tavap = NULL;
2749 tvp = NULL;
2750
2751 fvp = nfs3_fhtovp(&args->from.dir, exi);
2752
2753 DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2754 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2755 RENAME3args *, args);
2756
2757 if (fvp == NULL) {
2758 error = ESTALE;
2759 goto err;
2760 }
2761
2762 if (is_system_labeled()) {
2763 clabel = req->rq_label;
2764 ASSERT(clabel != NULL);
2765 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2766 "got client label from request(1)", struct svc_req *, req);
2767
2768 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2769 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2770 exi)) {
2771 resp->status = NFS3ERR_ACCES;
2772 goto err1;
2773 }
2774 }
2775 }
2776
2777 fbva.va_mask = AT_ALL;
2778 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2779 favap = fbvap;
2780
2781 fh3 = &args->to.dir;
2782 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2783 if (to_exi == NULL) {
2784 resp->status = NFS3ERR_ACCES;
2785 goto err1;
2786 }
2787 exi_rele(to_exi);
2788
2789 if (to_exi != exi) {
2790 resp->status = NFS3ERR_XDEV;
2791 goto err1;
2792 }
2793
2794 tvp = nfs3_fhtovp(&args->to.dir, exi);
2795 if (tvp == NULL) {
2796 error = ESTALE;
2797 goto err;
2798 }
2799
2800 tbva.va_mask = AT_ALL;
2801 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2802 tavap = tbvap;
2803
2804 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2805 resp->status = NFS3ERR_NOTDIR;
2806 goto err1;
2807 }
2808
2809 if (args->from.name == nfs3nametoolong ||
2810 args->to.name == nfs3nametoolong) {
2811 resp->status = NFS3ERR_NAMETOOLONG;
2812 goto err1;
2813 }
2814 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2815 args->to.name == NULL || *(args->to.name) == '\0') {
2816 resp->status = NFS3ERR_ACCES;
2817 goto err1;
2818 }
2819
2820 if (rdonly(ro, tvp)) {
2821 resp->status = NFS3ERR_ROFS;
2822 goto err1;
2823 }
2824
2825 if (is_system_labeled()) {
2826 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2827 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2828 exi)) {
2829 resp->status = NFS3ERR_ACCES;
2830 goto err1;
2831 }
2832 }
2833 }
2834
2835 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2836 name = nfscmd_convname(ca, exi, args->from.name,
2837 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2838
2839 if (name == NULL) {
2840 resp->status = NFS3ERR_INVAL;
2841 goto err1;
2842 }
2843
2844 toname = nfscmd_convname(ca, exi, args->to.name,
2845 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2846
2847 if (toname == NULL) {
2848 resp->status = NFS3ERR_INVAL;
2849 goto err1;
2850 }
2851
2852 /*
2853 * Check for a conflict with a non-blocking mandatory share
2854 * reservation or V4 delegations.
2855 */
2856 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2857 NULL, cr, NULL, NULL, NULL);
2858 if (error != 0)
2859 goto err;
2860
2861 /*
2862 * If we rename a delegated file we should recall the
2863 * delegation, since future opens should fail or would
2864 * refer to a new file.
2865 */
2866 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2867 resp->status = NFS3ERR_JUKEBOX;
2868 goto err1;
2869 }
2870
2871 /*
2872 * Check for renaming over a delegated file. Check nfs4_deleg_policy
2873 * first to avoid VOP_LOOKUP if possible.
2874 */
2875 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2876 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2877 NULL, NULL, NULL) == 0) {
2878
2879 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2880 VN_RELE(targvp);
2881 resp->status = NFS3ERR_JUKEBOX;
2882 goto err1;
2883 }
2884 VN_RELE(targvp);
2885 }
2886
2887 if (!nbl_need_check(srcvp)) {
2888 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2889 } else {
2890 nbl_start_crit(srcvp, RW_READER);
2891 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2892 error = EACCES;
2893 else
2894 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2895 nbl_end_crit(srcvp);
2896 }
2897 if (error == 0)
2898 vn_renamepath(tvp, srcvp, args->to.name,
2899 strlen(args->to.name));
2900 VN_RELE(srcvp);
2901 srcvp = NULL;
2902
2903 fava.va_mask = AT_ALL;
2904 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2905 tava.va_mask = AT_ALL;
2906 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2907
2908 /*
2909 * Force modified data and metadata out to stable storage.
2910 */
2911 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2912 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2913
2914 if (error)
2915 goto err;
2916
2917 resp->status = NFS3_OK;
2918 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2919 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2920 goto out;
2921
2922 err:
2923 if (curthread->t_flag & T_WOULDBLOCK) {
2924 curthread->t_flag &= ~T_WOULDBLOCK;
2925 resp->status = NFS3ERR_JUKEBOX;
2926 } else {
2927 resp->status = puterrno3(error);
2928 }
2929 err1:
2930 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2931 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2932
2933 out:
2934 if (name != NULL && name != args->from.name)
2935 kmem_free(name, MAXPATHLEN + 1);
2936 if (toname != NULL && toname != args->to.name)
2937 kmem_free(toname, MAXPATHLEN + 1);
2938
2939 DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2940 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2941 RENAME3res *, resp);
2942 if (fvp != NULL)
2943 VN_RELE(fvp);
2944 if (tvp != NULL)
2945 VN_RELE(tvp);
2946 }
2947
2948 void *
2949 rfs3_rename_getfh(RENAME3args *args)
2950 {
2951
2952 return (&args->from.dir);
2953 }
2954
2955 void
2956 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2957 struct svc_req *req, cred_t *cr, bool_t ro)
2958 {
2959 int error;
2960 vnode_t *vp;
2961 vnode_t *dvp;
2962 struct vattr *vap;
2963 struct vattr va;
2964 struct vattr *bvap;
2965 struct vattr bva;
2966 struct vattr *avap;
2967 struct vattr ava;
2968 nfs_fh3 *fh3;
2969 struct exportinfo *to_exi;
2970 bslabel_t *clabel;
2971 struct sockaddr *ca;
2972 char *name = NULL;
2973
2974 vap = NULL;
2975 bvap = NULL;
2976 avap = NULL;
2977 dvp = NULL;
2978
2979 vp = nfs3_fhtovp(&args->file, exi);
2980
2981 DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2982 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2983 LINK3args *, args);
2984
2985 if (vp == NULL) {
2986 error = ESTALE;
2987 goto out;
2988 }
2989
2990 va.va_mask = AT_ALL;
2991 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2992
2993 fh3 = &args->link.dir;
2994 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2995 if (to_exi == NULL) {
2996 resp->status = NFS3ERR_ACCES;
2997 goto out1;
2998 }
2999 exi_rele(to_exi);
3000
3001 if (to_exi != exi) {
3002 resp->status = NFS3ERR_XDEV;
3003 goto out1;
3004 }
3005
3006 if (is_system_labeled()) {
3007 clabel = req->rq_label;
3008
3009 ASSERT(clabel != NULL);
3010 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3011 "got client label from request(1)", struct svc_req *, req);
3012
3013 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3014 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3015 exi)) {
3016 resp->status = NFS3ERR_ACCES;
3017 goto out1;
3018 }
3019 }
3020 }
3021
3022 dvp = nfs3_fhtovp(&args->link.dir, exi);
3023 if (dvp == NULL) {
3024 error = ESTALE;
3025 goto out;
3026 }
3027
3028 bva.va_mask = AT_ALL;
3029 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3030
3031 if (dvp->v_type != VDIR) {
3032 resp->status = NFS3ERR_NOTDIR;
3033 goto out1;
3034 }
3035
3036 if (args->link.name == nfs3nametoolong) {
3037 resp->status = NFS3ERR_NAMETOOLONG;
3038 goto out1;
3039 }
3040
3041 if (args->link.name == NULL || *(args->link.name) == '\0') {
3042 resp->status = NFS3ERR_ACCES;
3043 goto out1;
3044 }
3045
3046 if (rdonly(ro, dvp)) {
3047 resp->status = NFS3ERR_ROFS;
3048 goto out1;
3049 }
3050
3051 if (is_system_labeled()) {
3052 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3053 "got client label from request(1)", struct svc_req *, req);
3054
3055 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3056 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3057 exi)) {
3058 resp->status = NFS3ERR_ACCES;
3059 goto out1;
3060 }
3061 }
3062 }
3063
3064 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3065 name = nfscmd_convname(ca, exi, args->link.name,
3066 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3067
3068 if (name == NULL) {
3069 resp->status = NFS3ERR_SERVERFAULT;
3070 goto out1;
3071 }
3072
3073 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3074
3075 va.va_mask = AT_ALL;
3076 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3077 ava.va_mask = AT_ALL;
3078 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3079
3080 /*
3081 * Force modified data and metadata out to stable storage.
3082 */
3083 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3084 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3085
3086 if (error)
3087 goto out;
3088
3089 VN_RELE(dvp);
3090
3091 resp->status = NFS3_OK;
3092 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3093 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3094
3095 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3096 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3097 LINK3res *, resp);
3098
3099 VN_RELE(vp);
3100
3101 return;
3102
3103 out:
3104 if (curthread->t_flag & T_WOULDBLOCK) {
3105 curthread->t_flag &= ~T_WOULDBLOCK;
3106 resp->status = NFS3ERR_JUKEBOX;
3107 } else
3108 resp->status = puterrno3(error);
3109 out1:
3110 if (name != NULL && name != args->link.name)
3111 kmem_free(name, MAXPATHLEN + 1);
3112
3113 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3114 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3115 LINK3res *, resp);
3116
3117 if (vp != NULL)
3118 VN_RELE(vp);
3119 if (dvp != NULL)
3120 VN_RELE(dvp);
3121 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3122 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3123 }
3124
3125 void *
3126 rfs3_link_getfh(LINK3args *args)
3127 {
3128
3129 return (&args->file);
3130 }
3131
3132 /*
3133 * This macro defines the size of a response which contains attribute
3134 * information and one directory entry (whose length is specified by
3135 * the macro parameter). If the incoming request is larger than this,
3136 * then we are guaranteed to be able to return at one directory entry
3137 * if one exists. Therefore, we do not need to check for
3138 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
3139 * is not, then we need to check to make sure that this error does not
3140 * need to be returned.
3141 *
3142 * NFS3_READDIR_MIN_COUNT is comprised of following :
3143 *
3144 * status - 1 * BYTES_PER_XDR_UNIT
3145 * attr. flag - 1 * BYTES_PER_XDR_UNIT
3146 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3147 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3148 * boolean - 1 * BYTES_PER_XDR_UNIT
3149 * file id - 2 * BYTES_PER_XDR_UNIT
3150 * directory name length - 1 * BYTES_PER_XDR_UNIT
3151 * cookie - 2 * BYTES_PER_XDR_UNIT
3152 * end of list - 1 * BYTES_PER_XDR_UNIT
3153 * end of file - 1 * BYTES_PER_XDR_UNIT
3154 * Name length of directory to the nearest byte
3155 */
3156
3157 #define NFS3_READDIR_MIN_COUNT(length) \
3158 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3159 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3160
3161 /* ARGSUSED */
3162 void
3163 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3164 struct svc_req *req, cred_t *cr, bool_t ro)
3165 {
3166 int error;
3167 vnode_t *vp;
3168 struct vattr *vap;
3169 struct vattr va;
3170 struct iovec iov;
3171 struct uio uio;
3172 char *data;
3173 int iseof;
3174 int bufsize;
3175 int namlen;
3176 uint_t count;
3177 struct sockaddr *ca;
3178
3179 vap = NULL;
3180
3181 vp = nfs3_fhtovp(&args->dir, exi);
3182
3183 DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3184 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3185 READDIR3args *, args);
3186
3187 if (vp == NULL) {
3188 error = ESTALE;
3189 goto out;
3190 }
3191
3192 if (is_system_labeled()) {
3193 bslabel_t *clabel = req->rq_label;
3194
3195 ASSERT(clabel != NULL);
3196 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3197 "got client label from request(1)", struct svc_req *, req);
3198
3199 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3200 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3201 exi)) {
3202 resp->status = NFS3ERR_ACCES;
3203 goto out1;
3204 }
3205 }
3206 }
3207
3208 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3209
3210 va.va_mask = AT_ALL;
3211 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3212
3213 if (vp->v_type != VDIR) {
3214 resp->status = NFS3ERR_NOTDIR;
3215 goto out1;
3216 }
3217
3218 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3219 if (error)
3220 goto out;
3221
3222 /*
3223 * Now don't allow arbitrary count to alloc;
3224 * allow the maximum not to exceed rfs3_tsize()
3225 */
3226 if (args->count > rfs3_tsize(req))
3227 args->count = rfs3_tsize(req);
3228
3229 /*
3230 * Make sure that there is room to read at least one entry
3231 * if any are available.
3232 */
3233 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3234 count = DIRENT64_RECLEN(MAXNAMELEN);
3235 else
3236 count = args->count;
3237
3238 data = kmem_alloc(count, KM_SLEEP);
3239
3240 iov.iov_base = data;
3241 iov.iov_len = count;
3242 uio.uio_iov = &iov;
3243 uio.uio_iovcnt = 1;
3244 uio.uio_segflg = UIO_SYSSPACE;
3245 uio.uio_extflg = UIO_COPY_CACHED;
3246 uio.uio_loffset = (offset_t)args->cookie;
3247 uio.uio_resid = count;
3248
3249 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3250
3251 va.va_mask = AT_ALL;
3252 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3253
3254 if (error) {
3255 kmem_free(data, count);
3256 goto out;
3257 }
3258
3259 /*
3260 * If the count was not large enough to be able to guarantee
3261 * to be able to return at least one entry, then need to
3262 * check to see if NFS3ERR_TOOSMALL should be returned.
3263 */
3264 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3265 /*
3266 * bufsize is used to keep track of the size of the response.
3267 * It is primed with:
3268 * 1 for the status +
3269 * 1 for the dir_attributes.attributes boolean +
3270 * 2 for the cookie verifier
3271 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3272 * to bytes. If there are directory attributes to be
3273 * returned, then:
3274 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3275 * time BYTES_PER_XDR_UNIT is added to account for them.
3276 */
3277 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3278 if (vap != NULL)
3279 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3280 /*
3281 * An entry is composed of:
3282 * 1 for the true/false list indicator +
3283 * 2 for the fileid +
3284 * 1 for the length of the name +
3285 * 2 for the cookie +
3286 * all times BYTES_PER_XDR_UNIT to convert from
3287 * XDR units to bytes, plus the length of the name
3288 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3289 */
3290 if (count != uio.uio_resid) {
3291 namlen = strlen(((struct dirent64 *)data)->d_name);
3292 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3293 roundup(namlen, BYTES_PER_XDR_UNIT);
3294 }
3295 /*
3296 * We need to check to see if the number of bytes left
3297 * to go into the buffer will actually fit into the
3298 * buffer. This is calculated as the size of this
3299 * entry plus:
3300 * 1 for the true/false list indicator +
3301 * 1 for the eof indicator
3302 * times BYTES_PER_XDR_UNIT to convert from from
3303 * XDR units to bytes.
3304 */
3305 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3306 if (bufsize > args->count) {
3307 kmem_free(data, count);
3308 resp->status = NFS3ERR_TOOSMALL;
3309 goto out1;
3310 }
3311 }
3312
3313 /*
3314 * Have a valid readir buffer for the native character
3315 * set. Need to check if a conversion is necessary and
3316 * potentially rewrite the whole buffer. Note that if the
3317 * conversion expands names enough, the structure may not
3318 * fit. In this case, we need to drop entries until if fits
3319 * and patch the counts in order that the next readdir will
3320 * get the correct entries.
3321 */
3322 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3323 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3324
3325
3326 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3327
3328 #if 0 /* notyet */
3329 /*
3330 * Don't do this. It causes local disk writes when just
3331 * reading the file and the overhead is deemed larger
3332 * than the benefit.
3333 */
3334 /*
3335 * Force modified metadata out to stable storage.
3336 */
3337 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3338 #endif
3339
3340 resp->status = NFS3_OK;
3341 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3342 resp->resok.cookieverf = 0;
3343 resp->resok.reply.entries = (entry3 *)data;
3344 resp->resok.reply.eof = iseof;
3345 resp->resok.size = count - uio.uio_resid;
3346 resp->resok.count = args->count;
3347 resp->resok.freecount = count;
3348
3349 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3350 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3351 READDIR3res *, resp);
3352
3353 VN_RELE(vp);
3354
3355 return;
3356
3357 out:
3358 if (curthread->t_flag & T_WOULDBLOCK) {
3359 curthread->t_flag &= ~T_WOULDBLOCK;
3360 resp->status = NFS3ERR_JUKEBOX;
3361 } else
3362 resp->status = puterrno3(error);
3363 out1:
3364 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3365
3366 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3367 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3368 READDIR3res *, resp);
3369
3370 if (vp != NULL) {
3371 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3372 VN_RELE(vp);
3373 }
3374 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3375 }
3376
3377 void *
3378 rfs3_readdir_getfh(READDIR3args *args)
3379 {
3380
3381 return (&args->dir);
3382 }
3383
3384 void
3385 rfs3_readdir_free(READDIR3res *resp)
3386 {
3387
3388 if (resp->status == NFS3_OK)
3389 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3390 }
3391
3392 #ifdef nextdp
3393 #undef nextdp
3394 #endif
3395 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3396
3397 /*
3398 * This macro computes the size of a response which contains
3399 * one directory entry including the attributes as well as file handle.
3400 * If the incoming request is larger than this, then we are guaranteed to be
3401 * able to return at least one more directory entry if one exists.
3402 *
3403 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3404 *
3405 * boolean - 1 * BYTES_PER_XDR_UNIT
3406 * file id - 2 * BYTES_PER_XDR_UNIT
3407 * directory name length - 1 * BYTES_PER_XDR_UNIT
3408 * cookie - 2 * BYTES_PER_XDR_UNIT
3409 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3410 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3411 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3412 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3413 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3414 * name length of the entry to the nearest bytes
3415 */
3416 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3417 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3418 BYTES_PER_XDR_UNIT + \
3419 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3420
3421 static int rfs3_readdir_unit = MAXBSIZE;
3422
3423 /* ARGSUSED */
3424 void
3425 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3426 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3427 {
3428 int error;
3429 vnode_t *vp;
3430 struct vattr *vap;
3431 struct vattr va;
3432 struct iovec iov;
3433 struct uio uio;
3434 char *data;
3435 int iseof;
3436 struct dirent64 *dp;
3437 vnode_t *nvp;
3438 struct vattr *nvap;
3439 struct vattr nva;
3440 entryplus3_info *infop = NULL;
3441 int size = 0;
3442 int nents = 0;
3443 int bufsize = 0;
3444 int entrysize = 0;
3445 int tofit = 0;
3446 int rd_unit = rfs3_readdir_unit;
3447 int prev_len;
3448 int space_left;
3449 int i;
3450 uint_t *namlen = NULL;
3451 char *ndata = NULL;
3452 struct sockaddr *ca;
3453 size_t ret;
3454
3455 vap = NULL;
3456
3457 vp = nfs3_fhtovp(&args->dir, exi);
3458
3459 DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3460 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3461 READDIRPLUS3args *, args);
3462
3463 if (vp == NULL) {
3464 error = ESTALE;
3465 goto out;
3466 }
3467
3468 if (is_system_labeled()) {
3469 bslabel_t *clabel = req->rq_label;
3470
3471 ASSERT(clabel != NULL);
3472 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3473 char *, "got client label from request(1)",
3474 struct svc_req *, req);
3475
3476 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3477 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3478 exi)) {
3479 resp->status = NFS3ERR_ACCES;
3480 goto out1;
3481 }
3482 }
3483 }
3484
3485 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3486
3487 va.va_mask = AT_ALL;
3488 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3489
3490 if (vp->v_type != VDIR) {
3491 error = ENOTDIR;
3492 goto out;
3493 }
3494
3495 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3496 if (error)
3497 goto out;
3498
3499 /*
3500 * Don't allow arbitrary counts for allocation
3501 */
3502 if (args->maxcount > rfs3_tsize(req))
3503 args->maxcount = rfs3_tsize(req);
3504
3505 /*
3506 * Make sure that there is room to read at least one entry
3507 * if any are available
3508 */
3509 args->dircount = MIN(args->dircount, args->maxcount);
3510
3511 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3512 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3513
3514 /*
3515 * This allocation relies on a minimum directory entry
3516 * being roughly 24 bytes. Therefore, the namlen array
3517 * will have enough space based on the maximum number of
3518 * entries to read.
3519 */
3520 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3521
3522 space_left = args->dircount;
3523 data = kmem_alloc(args->dircount, KM_SLEEP);
3524 dp = (struct dirent64 *)data;
3525 uio.uio_iov = &iov;
3526 uio.uio_iovcnt = 1;
3527 uio.uio_segflg = UIO_SYSSPACE;
3528 uio.uio_extflg = UIO_COPY_CACHED;
3529 uio.uio_loffset = (offset_t)args->cookie;
3530
3531 /*
3532 * bufsize is used to keep track of the size of the response as we
3533 * get post op attributes and filehandles for each entry. This is
3534 * an optimization as the server may have read more entries than will
3535 * fit in the buffer specified by maxcount. We stop calculating
3536 * post op attributes and filehandles once we have exceeded maxcount.
3537 * This will minimize the effect of truncation.
3538 *
3539 * It is primed with:
3540 * 1 for the status +
3541 * 1 for the dir_attributes.attributes boolean +
3542 * 2 for the cookie verifier
3543 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3544 * to bytes. If there are directory attributes to be
3545 * returned, then:
3546 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3547 * time BYTES_PER_XDR_UNIT is added to account for them.
3548 */
3549 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3550 if (vap != NULL)
3551 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3552
3553 getmoredents:
3554 /*
3555 * Here we make a check so that our read unit is not larger than
3556 * the space left in the buffer.
3557 */
3558 rd_unit = MIN(rd_unit, space_left);
3559 iov.iov_base = (char *)dp;
3560 iov.iov_len = rd_unit;
3561 uio.uio_resid = rd_unit;
3562 prev_len = rd_unit;
3563
3564 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3565
3566 if (error) {
3567 kmem_free(data, args->dircount);
3568 goto out;
3569 }
3570
3571 if (uio.uio_resid == prev_len && !iseof) {
3572 if (nents == 0) {
3573 kmem_free(data, args->dircount);
3574 resp->status = NFS3ERR_TOOSMALL;
3575 goto out1;
3576 }
3577
3578 /*
3579 * We could not get any more entries, so get the attributes
3580 * and filehandle for the entries already obtained.
3581 */
3582 goto good;
3583 }
3584
3585 /*
3586 * We estimate the size of the response by assuming the
3587 * entry exists and attributes and filehandle are also valid
3588 */
3589 for (size = prev_len - uio.uio_resid;
3590 size > 0;
3591 size -= dp->d_reclen, dp = nextdp(dp)) {
3592
3593 if (dp->d_ino == 0) {
3594 nents++;
3595 continue;
3596 }
3597
3598 namlen[nents] = strlen(dp->d_name);
3599 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3600
3601 /*
3602 * We need to check to see if the number of bytes left
3603 * to go into the buffer will actually fit into the
3604 * buffer. This is calculated as the size of this
3605 * entry plus:
3606 * 1 for the true/false list indicator +
3607 * 1 for the eof indicator
3608 * times BYTES_PER_XDR_UNIT to convert from XDR units
3609 * to bytes.
3610 *
3611 * Also check the dircount limit against the first entry read
3612 *
3613 */
3614 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3615 if (bufsize + tofit > args->maxcount) {
3616 /*
3617 * We make a check here to see if this was the
3618 * first entry being measured. If so, then maxcount
3619 * was too small to begin with and so we need to
3620 * return with NFS3ERR_TOOSMALL.
3621 */
3622 if (nents == 0) {
3623 kmem_free(data, args->dircount);
3624 resp->status = NFS3ERR_TOOSMALL;
3625 goto out1;
3626 }
3627 iseof = FALSE;
3628 goto good;
3629 }
3630 bufsize += entrysize;
3631 nents++;
3632 }
3633
3634 /*
3635 * If there is enough room to fit at least 1 more entry including
3636 * post op attributes and filehandle in the buffer AND that we haven't
3637 * exceeded dircount then go back and get some more.
3638 */
3639 if (!iseof &&
3640 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3641 space_left -= (prev_len - uio.uio_resid);
3642 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3643 goto getmoredents;
3644
3645 /* else, fall through */
3646 }
3647 good:
3648 va.va_mask = AT_ALL;
3649 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3650
3651 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3652
3653 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3654 resp->resok.infop = infop;
3655
3656 dp = (struct dirent64 *)data;
3657 for (i = 0; i < nents; i++) {
3658
3659 if (dp->d_ino == 0) {
3660 infop[i].attr.attributes = FALSE;
3661 infop[i].fh.handle_follows = FALSE;
3662 dp = nextdp(dp);
3663 continue;
3664 }
3665
3666 infop[i].namelen = namlen[i];
3667
3668 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3669 NULL, NULL, NULL);
3670 if (error) {
3671 infop[i].attr.attributes = FALSE;
3672 infop[i].fh.handle_follows = FALSE;
3673 dp = nextdp(dp);
3674 continue;
3675 }
3676
3677 nva.va_mask = AT_ALL;
3678 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3679
3680 /* Lie about the object type for a referral */
3681 if (vn_is_nfs_reparse(nvp, cr))
3682 nvap->va_type = VLNK;
3683
3684 if (vn_ismntpt(nvp)) {
3685 infop[i].attr.attributes = FALSE;
3686 infop[i].fh.handle_follows = FALSE;
3687 } else {
3688 vattr_to_post_op_attr(nvap, &infop[i].attr);
3689
3690 error = makefh3(&infop[i].fh.handle, nvp, exi);
3691 if (!error)
3692 infop[i].fh.handle_follows = TRUE;
3693 else
3694 infop[i].fh.handle_follows = FALSE;
3695 }
3696
3697 VN_RELE(nvp);
3698 dp = nextdp(dp);
3699 }
3700
3701 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3702 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3703 if (ndata == NULL)
3704 ndata = data;
3705
3706 if (ret > 0) {
3707 /*
3708 * We had to drop one or more entries in order to fit
3709 * during the character conversion. We need to patch
3710 * up the size and eof info.
3711 */
3712 if (iseof)
3713 iseof = FALSE;
3714
3715 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3716 nents, ret);
3717 }
3718
3719
3720 #if 0 /* notyet */
3721 /*
3722 * Don't do this. It causes local disk writes when just
3723 * reading the file and the overhead is deemed larger
3724 * than the benefit.
3725 */
3726 /*
3727 * Force modified metadata out to stable storage.
3728 */
3729 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3730 #endif
3731
3732 kmem_free(namlen, args->dircount);
3733
3734 resp->status = NFS3_OK;
3735 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3736 resp->resok.cookieverf = 0;
3737 resp->resok.reply.entries = (entryplus3 *)ndata;
3738 resp->resok.reply.eof = iseof;
3739 resp->resok.size = nents;
3740 resp->resok.count = args->dircount - ret;
3741 resp->resok.maxcount = args->maxcount;
3742
3743 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3744 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3745 READDIRPLUS3res *, resp);
3746
3747 VN_RELE(vp);
3748
3749 return;
3750
3751 out:
3752 if (curthread->t_flag & T_WOULDBLOCK) {
3753 curthread->t_flag &= ~T_WOULDBLOCK;
3754 resp->status = NFS3ERR_JUKEBOX;
3755 } else {
3756 resp->status = puterrno3(error);
3757 }
3758 out1:
3759 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3760
3761 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3762 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3763 READDIRPLUS3res *, resp);
3764
3765 if (vp != NULL) {
3766 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3767 VN_RELE(vp);
3768 }
3769
3770 if (namlen != NULL)
3771 kmem_free(namlen, args->dircount);
3772
3773 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3774 }
3775
3776 void *
3777 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3778 {
3779
3780 return (&args->dir);
3781 }
3782
3783 void
3784 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3785 {
3786
3787 if (resp->status == NFS3_OK) {
3788 kmem_free(resp->resok.reply.entries, resp->resok.count);
3789 kmem_free(resp->resok.infop,
3790 resp->resok.size * sizeof (struct entryplus3_info));
3791 }
3792 }
3793
3794 /* ARGSUSED */
3795 void
3796 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3797 struct svc_req *req, cred_t *cr, bool_t ro)
3798 {
3799 int error;
3800 vnode_t *vp;
3801 struct vattr *vap;
3802 struct vattr va;
3803 struct statvfs64 sb;
3804
3805 vap = NULL;
3806
3807 vp = nfs3_fhtovp(&args->fsroot, exi);
3808
3809 DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3810 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3811 FSSTAT3args *, args);
3812
3813 if (vp == NULL) {
3814 error = ESTALE;
3815 goto out;
3816 }
3817
3818 if (is_system_labeled()) {
3819 bslabel_t *clabel = req->rq_label;
3820
3821 ASSERT(clabel != NULL);
3822 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3823 "got client label from request(1)", struct svc_req *, req);
3824
3825 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3826 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3827 exi)) {
3828 resp->status = NFS3ERR_ACCES;
3829 goto out1;
3830 }
3831 }
3832 }
3833
3834 error = VFS_STATVFS(vp->v_vfsp, &sb);
3835
3836 va.va_mask = AT_ALL;
3837 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3838
3839 if (error)
3840 goto out;
3841
3842 resp->status = NFS3_OK;
3843 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3844 if (sb.f_blocks != (fsblkcnt64_t)-1)
3845 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3846 else
3847 resp->resok.tbytes = (size3)sb.f_blocks;
3848 if (sb.f_bfree != (fsblkcnt64_t)-1)
3849 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3850 else
3851 resp->resok.fbytes = (size3)sb.f_bfree;
3852 if (sb.f_bavail != (fsblkcnt64_t)-1)
3853 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3854 else
3855 resp->resok.abytes = (size3)sb.f_bavail;
3856 resp->resok.tfiles = (size3)sb.f_files;
3857 resp->resok.ffiles = (size3)sb.f_ffree;
3858 resp->resok.afiles = (size3)sb.f_favail;
3859 resp->resok.invarsec = 0;
3860
3861 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3862 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3863 FSSTAT3res *, resp);
3864 VN_RELE(vp);
3865
3866 return;
3867
3868 out:
3869 if (curthread->t_flag & T_WOULDBLOCK) {
3870 curthread->t_flag &= ~T_WOULDBLOCK;
3871 resp->status = NFS3ERR_JUKEBOX;
3872 } else
3873 resp->status = puterrno3(error);
3874 out1:
3875 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3876 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3877 FSSTAT3res *, resp);
3878
3879 if (vp != NULL)
3880 VN_RELE(vp);
3881 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3882 }
3883
3884 void *
3885 rfs3_fsstat_getfh(FSSTAT3args *args)
3886 {
3887
3888 return (&args->fsroot);
3889 }
3890
3891 /* ARGSUSED */
3892 void
3893 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3894 struct svc_req *req, cred_t *cr, bool_t ro)
3895 {
3896 vnode_t *vp;
3897 struct vattr *vap;
3898 struct vattr va;
3899 uint32_t xfer_size;
3900 ulong_t l = 0;
3901 int error;
3902
3903 vp = nfs3_fhtovp(&args->fsroot, exi);
3904
3905 DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3906 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3907 FSINFO3args *, args);
3908
3909 if (vp == NULL) {
3910 if (curthread->t_flag & T_WOULDBLOCK) {
3911 curthread->t_flag &= ~T_WOULDBLOCK;
3912 resp->status = NFS3ERR_JUKEBOX;
3913 } else
3914 resp->status = NFS3ERR_STALE;
3915 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3916 goto out;
3917 }
3918
3919 if (is_system_labeled()) {
3920 bslabel_t *clabel = req->rq_label;
3921
3922 ASSERT(clabel != NULL);
3923 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3924 "got client label from request(1)", struct svc_req *, req);
3925
3926 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3927 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3928 exi)) {
3929 resp->status = NFS3ERR_STALE;
3930 vattr_to_post_op_attr(NULL,
3931 &resp->resfail.obj_attributes);
3932 goto out;
3933 }
3934 }
3935 }
3936
3937 va.va_mask = AT_ALL;
3938 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3939
3940 resp->status = NFS3_OK;
3941 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3942 xfer_size = rfs3_tsize(req);
3943 resp->resok.rtmax = xfer_size;
3944 resp->resok.rtpref = xfer_size;
3945 resp->resok.rtmult = DEV_BSIZE;
3946 resp->resok.wtmax = xfer_size;
3947 resp->resok.wtpref = xfer_size;
3948 resp->resok.wtmult = DEV_BSIZE;
3949 resp->resok.dtpref = MAXBSIZE;
3950
3951 /*
3952 * Large file spec: want maxfilesize based on limit of
3953 * underlying filesystem. We can guess 2^31-1 if need be.
3954 */
3955 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3956 if (error) {
3957 resp->status = puterrno3(error);
3958 goto out;
3959 }
3960
3961 /*
3962 * If the underlying file system does not support _PC_FILESIZEBITS,
3963 * return a reasonable default. Note that error code on VOP_PATHCONF
3964 * will be 0, even if the underlying file system does not support
3965 * _PC_FILESIZEBITS.
3966 */
3967 if (l == (ulong_t)-1) {
3968 resp->resok.maxfilesize = MAXOFF32_T;
3969 } else {
3970 if (l >= (sizeof (uint64_t) * 8))
3971 resp->resok.maxfilesize = INT64_MAX;
3972 else
3973 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3974 }
3975
3976 resp->resok.time_delta.seconds = 0;
3977 resp->resok.time_delta.nseconds = 1000;
3978 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3979 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3980
3981 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3982 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3983 FSINFO3res *, resp);
3984
3985 VN_RELE(vp);
3986
3987 return;
3988
3989 out:
3990 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3991 cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
3992 FSINFO3res *, resp);
3993 if (vp != NULL)
3994 VN_RELE(vp);
3995 }
3996
3997 void *
3998 rfs3_fsinfo_getfh(FSINFO3args *args)
3999 {
4000 return (&args->fsroot);
4001 }
4002
4003 /* ARGSUSED */
4004 void
4005 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4006 struct svc_req *req, cred_t *cr, bool_t ro)
4007 {
4008 int error;
4009 vnode_t *vp;
4010 struct vattr *vap;
4011 struct vattr va;
4012 ulong_t val;
4013
4014 vap = NULL;
4015
4016 vp = nfs3_fhtovp(&args->object, exi);
4017
4018 DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4019 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4020 PATHCONF3args *, args);
4021
4022 if (vp == NULL) {
4023 error = ESTALE;
4024 goto out;
4025 }
4026
4027 if (is_system_labeled()) {
4028 bslabel_t *clabel = req->rq_label;
4029
4030 ASSERT(clabel != NULL);
4031 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4032 "got client label from request(1)", struct svc_req *, req);
4033
4034 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4035 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4036 exi)) {
4037 resp->status = NFS3ERR_ACCES;
4038 goto out1;
4039 }
4040 }
4041 }
4042
4043 va.va_mask = AT_ALL;
4044 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4045
4046 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4047 if (error)
4048 goto out;
4049 resp->resok.info.link_max = (uint32)val;
4050
4051 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4052 if (error)
4053 goto out;
4054 resp->resok.info.name_max = (uint32)val;
4055
4056 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4057 if (error)
4058 goto out;
4059 if (val == 1)
4060 resp->resok.info.no_trunc = TRUE;
4061 else
4062 resp->resok.info.no_trunc = FALSE;
4063
4064 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4065 if (error)
4066 goto out;
4067 if (val == 1)
4068 resp->resok.info.chown_restricted = TRUE;
4069 else
4070 resp->resok.info.chown_restricted = FALSE;
4071
4072 resp->status = NFS3_OK;
4073 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4074 resp->resok.info.case_insensitive = FALSE;
4075 resp->resok.info.case_preserving = TRUE;
4076 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4077 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4078 PATHCONF3res *, resp);
4079 VN_RELE(vp);
4080 return;
4081
4082 out:
4083 if (curthread->t_flag & T_WOULDBLOCK) {
4084 curthread->t_flag &= ~T_WOULDBLOCK;
4085 resp->status = NFS3ERR_JUKEBOX;
4086 } else
4087 resp->status = puterrno3(error);
4088 out1:
4089 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4090 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4091 PATHCONF3res *, resp);
4092 if (vp != NULL)
4093 VN_RELE(vp);
4094 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4095 }
4096
4097 void *
4098 rfs3_pathconf_getfh(PATHCONF3args *args)
4099 {
4100
4101 return (&args->object);
4102 }
4103
4104 void
4105 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4106 struct svc_req *req, cred_t *cr, bool_t ro)
4107 {
4108 nfs3_srv_t *ns;
4109 int error;
4110 vnode_t *vp;
4111 struct vattr *bvap;
4112 struct vattr bva;
4113 struct vattr *avap;
4114 struct vattr ava;
4115
4116 bvap = NULL;
4117 avap = NULL;
4118
4119 vp = nfs3_fhtovp(&args->file, exi);
4120
4121 DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4122 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4123 COMMIT3args *, args);
4124
4125 if (vp == NULL) {
4126 error = ESTALE;
4127 goto out;
4128 }
4129
4130 ASSERT3P(curzone, ==, exi->exi_zone); /* exi is guaranteed non-NULL. */
4131 ns = zone_getspecific(rfs3_zone_key, curzone);
4132 bva.va_mask = AT_ALL;
4133 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4134
4135 /*
4136 * If we can't get the attributes, then we can't do the
4137 * right access checking. So, we'll fail the request.
4138 */
4139 if (error)
4140 goto out;
4141
4142 bvap = &bva;
4143
4144 if (rdonly(ro, vp)) {
4145 resp->status = NFS3ERR_ROFS;
4146 goto out1;
4147 }
4148
4149 if (vp->v_type != VREG) {
4150 resp->status = NFS3ERR_INVAL;
4151 goto out1;
4152 }
4153
4154 if (is_system_labeled()) {
4155 bslabel_t *clabel = req->rq_label;
4156
4157 ASSERT(clabel != NULL);
4158 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4159 "got client label from request(1)", struct svc_req *, req);
4160
4161 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4162 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4163 exi)) {
4164 resp->status = NFS3ERR_ACCES;
4165 goto out1;
4166 }
4167 }
4168 }
4169
4170 if (crgetuid(cr) != bva.va_uid &&
4171 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4172 goto out;
4173
4174 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4175
4176 ava.va_mask = AT_ALL;
4177 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4178
4179 if (error)
4180 goto out;
4181
4182 resp->status = NFS3_OK;
4183 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4184 resp->resok.verf = ns->write3verf;
4185
4186 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4187 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4188 COMMIT3res *, resp);
4189
4190 VN_RELE(vp);
4191
4192 return;
4193
4194 out:
4195 if (curthread->t_flag & T_WOULDBLOCK) {
4196 curthread->t_flag &= ~T_WOULDBLOCK;
4197 resp->status = NFS3ERR_JUKEBOX;
4198 } else
4199 resp->status = puterrno3(error);
4200 out1:
4201 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4202 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4203 COMMIT3res *, resp);
4204
4205 if (vp != NULL)
4206 VN_RELE(vp);
4207 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4208 }
4209
4210 void *
4211 rfs3_commit_getfh(COMMIT3args *args)
4212 {
4213
4214 return (&args->file);
4215 }
4216
4217 static int
4218 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4219 {
4220
4221 vap->va_mask = 0;
4222
4223 if (sap->mode.set_it) {
4224 vap->va_mode = (mode_t)sap->mode.mode;
4225 vap->va_mask |= AT_MODE;
4226 }
4227 if (sap->uid.set_it) {
4228 vap->va_uid = (uid_t)sap->uid.uid;
4229 vap->va_mask |= AT_UID;
4230 }
4231 if (sap->gid.set_it) {
4232 vap->va_gid = (gid_t)sap->gid.gid;
4233 vap->va_mask |= AT_GID;
4234 }
4235 if (sap->size.set_it) {
4236 if (sap->size.size > (size3)((u_longlong_t)-1))
4237 return (EINVAL);
4238 vap->va_size = sap->size.size;
4239 vap->va_mask |= AT_SIZE;
4240 }
4241 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4242 #ifndef _LP64
4243 /* check time validity */
4244 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4245 return (EOVERFLOW);
4246 #endif
4247 /*
4248 * nfs protocol defines times as unsigned so don't extend sign,
4249 * unless sysadmin set nfs_allow_preepoch_time.
4250 */
4251 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4252 sap->atime.atime.seconds);
4253 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4254 vap->va_mask |= AT_ATIME;
4255 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4256 gethrestime(&vap->va_atime);
4257 vap->va_mask |= AT_ATIME;
4258 }
4259 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4260 #ifndef _LP64
4261 /* check time validity */
4262 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4263 return (EOVERFLOW);
4264 #endif
4265 /*
4266 * nfs protocol defines times as unsigned so don't extend sign,
4267 * unless sysadmin set nfs_allow_preepoch_time.
4268 */
4269 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4270 sap->mtime.mtime.seconds);
4271 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4272 vap->va_mask |= AT_MTIME;
4273 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4274 gethrestime(&vap->va_mtime);
4275 vap->va_mask |= AT_MTIME;
4276 }
4277
4278 return (0);
4279 }
4280
4281 static const ftype3 vt_to_nf3[] = {
4282 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4283 };
4284
4285 static int
4286 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4287 {
4288
4289 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4290 /* Return error if time or size overflow */
4291 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4292 return (EOVERFLOW);
4293 }
4294 fap->type = vt_to_nf3[vap->va_type];
4295 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4296 fap->nlink = (uint32)vap->va_nlink;
4297 if (vap->va_uid == UID_NOBODY)
4298 fap->uid = (uid3)NFS_UID_NOBODY;
4299 else
4300 fap->uid = (uid3)vap->va_uid;
4301 if (vap->va_gid == GID_NOBODY)
4302 fap->gid = (gid3)NFS_GID_NOBODY;
4303 else
4304 fap->gid = (gid3)vap->va_gid;
4305 fap->size = (size3)vap->va_size;
4306 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4307 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4308 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4309 fap->fsid = (uint64)vap->va_fsid;
4310 fap->fileid = (fileid3)vap->va_nodeid;
4311 fap->atime.seconds = vap->va_atime.tv_sec;
4312 fap->atime.nseconds = vap->va_atime.tv_nsec;
4313 fap->mtime.seconds = vap->va_mtime.tv_sec;
4314 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4315 fap->ctime.seconds = vap->va_ctime.tv_sec;
4316 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4317 return (0);
4318 }
4319
4320 static int
4321 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4322 {
4323
4324 /* Return error if time or size overflow */
4325 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4326 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4327 NFS3_SIZE_OK(vap->va_size))) {
4328 return (EOVERFLOW);
4329 }
4330 wccap->size = (size3)vap->va_size;
4331 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4332 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4333 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4334 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4335 return (0);
4336 }
4337
4338 static void
4339 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4340 {
4341
4342 /* don't return attrs if time overflow */
4343 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4344 poap->attributes = TRUE;
4345 } else
4346 poap->attributes = FALSE;
4347 }
4348
4349 void
4350 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4351 {
4352
4353 /* don't return attrs if time overflow */
4354 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4355 poap->attributes = TRUE;
4356 } else
4357 poap->attributes = FALSE;
4358 }
4359
4360 static void
4361 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4362 {
4363 vattr_to_pre_op_attr(bvap, &wccp->before);
4364 vattr_to_post_op_attr(avap, &wccp->after);
4365 }
4366
4367 static int
4368 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4369 {
4370 struct clist *wcl;
4371 int wlist_len;
4372 count3 count = rok->count;
4373
4374 wcl = args->wlist;
4375 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4376 return (FALSE);
4377
4378 wcl = args->wlist;
4379 rok->wlist_len = wlist_len;
4380 rok->wlist = wcl;
4381 return (TRUE);
4382 }
4383
4384 /* ARGSUSED */
4385 static void *
4386 rfs3_zone_init(zoneid_t zoneid)
4387 {
4388 nfs3_srv_t *ns;
4389 struct rfs3_verf_overlay {
4390 uint_t id; /* a "unique" identifier */
4391 int ts; /* a unique timestamp */
4392 } *verfp;
4393 timestruc_t now;
4394
4395 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4396
4397 /*
4398 * The following algorithm attempts to find a unique verifier
4399 * to be used as the write verifier returned from the server
4400 * to the client. It is important that this verifier change
4401 * whenever the server reboots. Of secondary importance, it
4402 * is important for the verifier to be unique between two
4403 * different servers.
4404 *
4405 * Thus, an attempt is made to use the system hostid and the
4406 * current time in seconds when the nfssrv kernel module is
4407 * loaded. It is assumed that an NFS server will not be able
4408 * to boot and then to reboot in less than a second. If the
4409 * hostid has not been set, then the current high resolution
4410 * time is used. This will ensure different verifiers each
4411 * time the server reboots and minimize the chances that two
4412 * different servers will have the same verifier.
4413 */
4414
4415 #ifndef lint
4416 /*
4417 * We ASSERT that this constant logic expression is
4418 * always true because in the past, it wasn't.
4419 */
4420 ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4421 #endif
4422
4423 gethrestime(&now);
4424 verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4425 verfp->ts = (int)now.tv_sec;
4426 verfp->id = zone_get_hostid(NULL);
4427
4428 if (verfp->id == 0)
4429 verfp->id = (uint_t)now.tv_nsec;
4430
4431 return (ns);
4432 }
4433
4434 /* ARGSUSED */
4435 static void
4436 rfs3_zone_fini(zoneid_t zoneid, void *data)
4437 {
4438 nfs3_srv_t *ns = data;
4439
4440 kmem_free(ns, sizeof (*ns));
4441 }
4442
4443 void
4444 rfs3_srvrinit(void)
4445 {
4446 nfs3_srv_caller_id = fs_new_caller_id();
4447 zone_key_create(&rfs3_zone_key, rfs3_zone_init, NULL, rfs3_zone_fini);
4448 }
4449
4450 void
4451 rfs3_srvrfini(void)
4452 {
4453 /* Nothing to do */
4454 }