1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2018 Nexenta Systems, Inc.
24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/systeminfo.h>
48 #include <sys/flock.h>
49 #include <sys/nbmlock.h>
50 #include <sys/policy.h>
51 #include <sys/sdt.h>
52
53 #include <rpc/types.h>
54 #include <rpc/auth.h>
55 #include <rpc/svc.h>
56 #include <rpc/rpc_rdma.h>
57
58 #include <nfs/nfs.h>
59 #include <nfs/export.h>
60 #include <nfs/nfs_cmd.h>
61
62 #include <sys/strsubr.h>
63 #include <sys/tsol/label.h>
64 #include <sys/tsol/tndb.h>
65
66 #include <sys/zone.h>
67
68 #include <inet/ip.h>
69 #include <inet/ip6.h>
70
71 /*
72 * Zone global variables of NFSv3 server
73 */
74 typedef struct nfs3_srv {
75 writeverf3 write3verf;
76 } nfs3_srv_t;
77
78 /*
79 * These are the interface routines for the server side of the
80 * Network File System. See the NFS version 3 protocol specification
81 * for a description of this interface.
82 */
83
84 static int sattr3_to_vattr(sattr3 *, struct vattr *);
85 static int vattr_to_fattr3(struct vattr *, fattr3 *);
86 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
87 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
88 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
89 static int rdma_setup_read_data3(READ3args *, READ3resok *);
90
91 extern int nfs_loaned_buffers;
92
93 u_longlong_t nfs3_srv_caller_id;
94 static zone_key_t rfs3_zone_key;
95
96 /* ARGSUSED */
97 void
98 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
99 struct svc_req *req, cred_t *cr, bool_t ro)
100 {
101 int error;
102 vnode_t *vp;
103 struct vattr va;
104
105 vp = nfs3_fhtovp(&args->object, exi);
106
107 DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
108 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
109 GETATTR3args *, args);
110
111 if (vp == NULL) {
112 error = ESTALE;
113 goto out;
114 }
115
116 va.va_mask = AT_ALL;
117 error = rfs4_delegated_getattr(vp, &va, 0, cr);
118
119 if (!error) {
120 /* Lie about the object type for a referral */
121 if (vn_is_nfs_reparse(vp, cr))
122 va.va_type = VLNK;
123
124 /* overflow error if time or size is out of range */
125 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
126 if (error)
127 goto out;
128 resp->status = NFS3_OK;
129
130 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
131 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
132 GETATTR3res *, resp);
133
134 VN_RELE(vp);
135
136 return;
137 }
138
139 out:
140 if (curthread->t_flag & T_WOULDBLOCK) {
141 curthread->t_flag &= ~T_WOULDBLOCK;
142 resp->status = NFS3ERR_JUKEBOX;
143 } else
144 resp->status = puterrno3(error);
145
146 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
147 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
148 GETATTR3res *, resp);
149
150 if (vp != NULL)
151 VN_RELE(vp);
152 }
153
154 void *
155 rfs3_getattr_getfh(GETATTR3args *args)
156 {
157 return (&args->object);
158 }
159
160 void
161 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
162 struct svc_req *req, cred_t *cr, bool_t ro)
163 {
164 int error;
165 vnode_t *vp;
166 struct vattr *bvap;
167 struct vattr bva;
168 struct vattr *avap;
169 struct vattr ava;
170 int flag;
171 int in_crit = 0;
172 struct flock64 bf;
173 caller_context_t ct;
174
175 bvap = NULL;
176 avap = NULL;
177
178 vp = nfs3_fhtovp(&args->object, exi);
179
180 DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
181 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
182 SETATTR3args *, args);
183
184 if (vp == NULL) {
185 error = ESTALE;
186 goto out;
187 }
188
189 error = sattr3_to_vattr(&args->new_attributes, &ava);
190 if (error)
191 goto out;
192
193 if (is_system_labeled()) {
194 bslabel_t *clabel = req->rq_label;
195
196 ASSERT(clabel != NULL);
197 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
198 "got client label from request(1)", struct svc_req *, req);
199
200 if (!blequal(&l_admin_low->tsl_label, clabel)) {
201 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
202 exi)) {
203 resp->status = NFS3ERR_ACCES;
204 goto out1;
205 }
206 }
207 }
208
209 /*
210 * We need to specially handle size changes because of
211 * possible conflicting NBMAND locks. Get into critical
212 * region before VOP_GETATTR, so the size attribute is
213 * valid when checking conflicts.
214 *
215 * Also, check to see if the v4 side of the server has
216 * delegated this file. If so, then we return JUKEBOX to
217 * allow the client to retrasmit its request.
218 */
219 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
220 if (nbl_need_check(vp)) {
221 nbl_start_crit(vp, RW_READER);
222 in_crit = 1;
223 }
224 }
225
226 bva.va_mask = AT_ALL;
227 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
228
229 /*
230 * If we can't get the attributes, then we can't do the
231 * right access checking. So, we'll fail the request.
232 */
233 if (error)
234 goto out;
235
236 bvap = &bva;
237
238 if (rdonly(ro, vp)) {
239 resp->status = NFS3ERR_ROFS;
240 goto out1;
241 }
242
243 if (args->guard.check &&
244 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
245 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
246 resp->status = NFS3ERR_NOT_SYNC;
247 goto out1;
248 }
249
250 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
251 flag = ATTR_UTIME;
252 else
253 flag = 0;
254
255 /*
256 * If the filesystem is exported with nosuid, then mask off
257 * the setuid and setgid bits.
258 */
259 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
260 (exi->exi_export.ex_flags & EX_NOSUID))
261 ava.va_mode &= ~(VSUID | VSGID);
262
263 ct.cc_sysid = 0;
264 ct.cc_pid = 0;
265 ct.cc_caller_id = nfs3_srv_caller_id;
266 ct.cc_flags = CC_DONTBLOCK;
267
268 /*
269 * We need to specially handle size changes because it is
270 * possible for the client to create a file with modes
271 * which indicate read-only, but with the file opened for
272 * writing. If the client then tries to set the size of
273 * the file, then the normal access checking done in
274 * VOP_SETATTR would prevent the client from doing so,
275 * although it should be legal for it to do so. To get
276 * around this, we do the access checking for ourselves
277 * and then use VOP_SPACE which doesn't do the access
278 * checking which VOP_SETATTR does. VOP_SPACE can only
279 * operate on VREG files, let VOP_SETATTR handle the other
280 * extremely rare cases.
281 * Also the client should not be allowed to change the
282 * size of the file if there is a conflicting non-blocking
283 * mandatory lock in the region the change.
284 */
285 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
286 if (in_crit) {
287 u_offset_t offset;
288 ssize_t length;
289
290 if (ava.va_size < bva.va_size) {
291 offset = ava.va_size;
292 length = bva.va_size - ava.va_size;
293 } else {
294 offset = bva.va_size;
295 length = ava.va_size - bva.va_size;
296 }
297 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
298 NULL)) {
299 error = EACCES;
300 goto out;
301 }
302 }
303
304 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
305 ava.va_mask &= ~AT_SIZE;
306 bf.l_type = F_WRLCK;
307 bf.l_whence = 0;
308 bf.l_start = (off64_t)ava.va_size;
309 bf.l_len = 0;
310 bf.l_sysid = 0;
311 bf.l_pid = 0;
312 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
313 (offset_t)ava.va_size, cr, &ct);
314 }
315 }
316
317 if (!error && ava.va_mask)
318 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
319
320 /* check if a monitor detected a delegation conflict */
321 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
322 resp->status = NFS3ERR_JUKEBOX;
323 goto out1;
324 }
325
326 ava.va_mask = AT_ALL;
327 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
328
329 /*
330 * Force modified metadata out to stable storage.
331 */
332 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
333
334 if (error)
335 goto out;
336
337 if (in_crit)
338 nbl_end_crit(vp);
339
340 resp->status = NFS3_OK;
341 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
342
343 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
344 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
345 SETATTR3res *, resp);
346
347 VN_RELE(vp);
348
349 return;
350
351 out:
352 if (curthread->t_flag & T_WOULDBLOCK) {
353 curthread->t_flag &= ~T_WOULDBLOCK;
354 resp->status = NFS3ERR_JUKEBOX;
355 } else
356 resp->status = puterrno3(error);
357 out1:
358 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
359 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
360 SETATTR3res *, resp);
361
362 if (vp != NULL) {
363 if (in_crit)
364 nbl_end_crit(vp);
365 VN_RELE(vp);
366 }
367 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
368 }
369
370 void *
371 rfs3_setattr_getfh(SETATTR3args *args)
372 {
373 return (&args->object);
374 }
375
376 /* ARGSUSED */
377 void
378 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
379 struct svc_req *req, cred_t *cr, bool_t ro)
380 {
381 int error;
382 vnode_t *vp;
383 vnode_t *dvp;
384 struct vattr *vap;
385 struct vattr va;
386 struct vattr *dvap;
387 struct vattr dva;
388 nfs_fh3 *fhp;
389 struct sec_ol sec = {0, 0};
390 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
391 struct sockaddr *ca;
392 char *name = NULL;
393
394 dvap = NULL;
395
396 if (exi != NULL)
397 exi_hold(exi);
398
399 /*
400 * Allow lookups from the root - the default
401 * location of the public filehandle.
402 */
403 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
404 dvp = ZONE_ROOTVP();
405 VN_HOLD(dvp);
406
407 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
408 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
409 LOOKUP3args *, args);
410 } else {
411 dvp = nfs3_fhtovp(&args->what.dir, exi);
412
413 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
414 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
415 LOOKUP3args *, args);
416
417 if (dvp == NULL) {
418 error = ESTALE;
419 goto out;
420 }
421 }
422
423 dva.va_mask = AT_ALL;
424 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
425
426 if (args->what.name == nfs3nametoolong) {
427 resp->status = NFS3ERR_NAMETOOLONG;
428 goto out1;
429 }
430
431 if (args->what.name == NULL || *(args->what.name) == '\0') {
432 resp->status = NFS3ERR_ACCES;
433 goto out1;
434 }
435
436 fhp = &args->what.dir;
437 if (strcmp(args->what.name, "..") == 0 &&
438 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
439 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
440 (dvp->v_flag & VROOT)) {
441 /*
442 * special case for ".." and 'nohide'exported root
443 */
444 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
445 resp->status = NFS3ERR_ACCES;
446 goto out1;
447 }
448 } else {
449 resp->status = NFS3ERR_NOENT;
450 goto out1;
451 }
452 }
453
454 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
455 name = nfscmd_convname(ca, exi, args->what.name,
456 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
457
458 if (name == NULL) {
459 resp->status = NFS3ERR_ACCES;
460 goto out1;
461 }
462
463 /*
464 * If the public filehandle is used then allow
465 * a multi-component lookup
466 */
467 if (PUBLIC_FH3(&args->what.dir)) {
468 publicfh_flag = TRUE;
469
470 exi_rele(&exi);
471
472 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
473 &exi, &sec);
474
475 /*
476 * Since WebNFS may bypass MOUNT, we need to ensure this
477 * request didn't come from an unlabeled admin_low client.
478 */
479 if (is_system_labeled() && error == 0) {
480 int addr_type;
481 void *ipaddr;
482 tsol_tpc_t *tp;
483
484 if (ca->sa_family == AF_INET) {
485 addr_type = IPV4_VERSION;
486 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
487 } else if (ca->sa_family == AF_INET6) {
488 addr_type = IPV6_VERSION;
489 ipaddr = &((struct sockaddr_in6 *)
490 ca)->sin6_addr;
491 }
492 tp = find_tpc(ipaddr, addr_type, B_FALSE);
493 if (tp == NULL || tp->tpc_tp.tp_doi !=
494 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
495 SUN_CIPSO) {
496 VN_RELE(vp);
497 error = EACCES;
498 }
499 if (tp != NULL)
500 TPC_RELE(tp);
501 }
502 } else {
503 error = VOP_LOOKUP(dvp, name, &vp,
504 NULL, 0, NULL, cr, NULL, NULL, NULL);
505 }
506
507 if (name != args->what.name)
508 kmem_free(name, MAXPATHLEN + 1);
509
510 if (error == 0 && vn_ismntpt(vp)) {
511 error = rfs_cross_mnt(&vp, &exi);
512 if (error)
513 VN_RELE(vp);
514 }
515
516 if (is_system_labeled() && error == 0) {
517 bslabel_t *clabel = req->rq_label;
518
519 ASSERT(clabel != NULL);
520 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
521 "got client label from request(1)", struct svc_req *, req);
522
523 if (!blequal(&l_admin_low->tsl_label, clabel)) {
524 if (!do_rfs_label_check(clabel, dvp,
525 DOMINANCE_CHECK, exi)) {
526 VN_RELE(vp);
527 error = EACCES;
528 }
529 }
530 }
531
532 dva.va_mask = AT_ALL;
533 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
534
535 if (error)
536 goto out;
537
538 if (sec.sec_flags & SEC_QUERY) {
539 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
540 } else {
541 error = makefh3(&resp->resok.object, vp, exi);
542 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
543 auth_weak = TRUE;
544 }
545
546 if (error) {
547 VN_RELE(vp);
548 goto out;
549 }
550
551 va.va_mask = AT_ALL;
552 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
553
554 exi_rele(&exi);
555 VN_RELE(vp);
556
557 resp->status = NFS3_OK;
558 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
559 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
560
561 /*
562 * If it's public fh, no 0x81, and client's flavor is
563 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
564 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
565 */
566 if (auth_weak)
567 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
568
569 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
570 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
571 LOOKUP3res *, resp);
572 VN_RELE(dvp);
573
574 return;
575
576 out:
577 if (curthread->t_flag & T_WOULDBLOCK) {
578 curthread->t_flag &= ~T_WOULDBLOCK;
579 resp->status = NFS3ERR_JUKEBOX;
580 } else
581 resp->status = puterrno3(error);
582 out1:
583 if (exi != NULL)
584 exi_rele(&exi);
585
586 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
587 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
588 LOOKUP3res *, resp);
589
590 if (dvp != NULL)
591 VN_RELE(dvp);
592 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
593
594 }
595
596 void *
597 rfs3_lookup_getfh(LOOKUP3args *args)
598 {
599 return (&args->what.dir);
600 }
601
602 void
603 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
604 struct svc_req *req, cred_t *cr, bool_t ro)
605 {
606 int error;
607 vnode_t *vp;
608 struct vattr *vap;
609 struct vattr va;
610 int checkwriteperm;
611 boolean_t dominant_label = B_FALSE;
612 boolean_t equal_label = B_FALSE;
613 boolean_t admin_low_client;
614
615 vap = NULL;
616
617 vp = nfs3_fhtovp(&args->object, exi);
618
619 DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
620 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
621 ACCESS3args *, args);
622
623 if (vp == NULL) {
624 error = ESTALE;
625 goto out;
626 }
627
628 /*
629 * If the file system is exported read only, it is not appropriate
630 * to check write permissions for regular files and directories.
631 * Special files are interpreted by the client, so the underlying
632 * permissions are sent back to the client for interpretation.
633 */
634 if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
635 checkwriteperm = 0;
636 else
637 checkwriteperm = 1;
638
639 /*
640 * We need the mode so that we can correctly determine access
641 * permissions relative to a mandatory lock file. Access to
642 * mandatory lock files is denied on the server, so it might
643 * as well be reflected to the server during the open.
644 */
645 va.va_mask = AT_MODE;
646 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
647 if (error)
648 goto out;
649
650 vap = &va;
651
652 resp->resok.access = 0;
653
654 if (is_system_labeled()) {
655 bslabel_t *clabel = req->rq_label;
656
657 ASSERT(clabel != NULL);
658 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
659 "got client label from request(1)", struct svc_req *, req);
660
661 if (!blequal(&l_admin_low->tsl_label, clabel)) {
662 if ((equal_label = do_rfs_label_check(clabel, vp,
663 EQUALITY_CHECK, exi)) == B_FALSE) {
664 dominant_label = do_rfs_label_check(clabel,
665 vp, DOMINANCE_CHECK, exi);
666 } else
667 dominant_label = B_TRUE;
668 admin_low_client = B_FALSE;
669 } else
670 admin_low_client = B_TRUE;
671 }
672
673 if (args->access & ACCESS3_READ) {
674 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
675 if (error) {
676 if (curthread->t_flag & T_WOULDBLOCK)
677 goto out;
678 } else if (!MANDLOCK(vp, va.va_mode) &&
679 (!is_system_labeled() || admin_low_client ||
680 dominant_label))
681 resp->resok.access |= ACCESS3_READ;
682 }
683 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
684 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
685 if (error) {
686 if (curthread->t_flag & T_WOULDBLOCK)
687 goto out;
688 } else if (!is_system_labeled() || admin_low_client ||
689 dominant_label)
690 resp->resok.access |= ACCESS3_LOOKUP;
691 }
692 if (checkwriteperm &&
693 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
694 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
695 if (error) {
696 if (curthread->t_flag & T_WOULDBLOCK)
697 goto out;
698 } else if (!MANDLOCK(vp, va.va_mode) &&
699 (!is_system_labeled() || admin_low_client || equal_label)) {
700 resp->resok.access |=
701 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
702 }
703 }
704 if (checkwriteperm &&
705 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
706 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
707 if (error) {
708 if (curthread->t_flag & T_WOULDBLOCK)
709 goto out;
710 } else if (!is_system_labeled() || admin_low_client ||
711 equal_label)
712 resp->resok.access |= ACCESS3_DELETE;
713 }
714 if (args->access & ACCESS3_EXECUTE) {
715 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
716 if (error) {
717 if (curthread->t_flag & T_WOULDBLOCK)
718 goto out;
719 } else if (!MANDLOCK(vp, va.va_mode) &&
720 (!is_system_labeled() || admin_low_client ||
721 dominant_label))
722 resp->resok.access |= ACCESS3_EXECUTE;
723 }
724
725 va.va_mask = AT_ALL;
726 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
727
728 resp->status = NFS3_OK;
729 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
730
731 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
732 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
733 ACCESS3res *, resp);
734
735 VN_RELE(vp);
736
737 return;
738
739 out:
740 if (curthread->t_flag & T_WOULDBLOCK) {
741 curthread->t_flag &= ~T_WOULDBLOCK;
742 resp->status = NFS3ERR_JUKEBOX;
743 } else
744 resp->status = puterrno3(error);
745 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
746 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
747 ACCESS3res *, resp);
748 if (vp != NULL)
749 VN_RELE(vp);
750 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
751 }
752
753 void *
754 rfs3_access_getfh(ACCESS3args *args)
755 {
756 return (&args->object);
757 }
758
759 /* ARGSUSED */
760 void
761 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
762 struct svc_req *req, cred_t *cr, bool_t ro)
763 {
764 int error;
765 vnode_t *vp;
766 struct vattr *vap;
767 struct vattr va;
768 struct iovec iov;
769 struct uio uio;
770 char *data;
771 struct sockaddr *ca;
772 char *name = NULL;
773 int is_referral = 0;
774
775 vap = NULL;
776
777 vp = nfs3_fhtovp(&args->symlink, exi);
778
779 DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
780 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
781 READLINK3args *, args);
782
783 if (vp == NULL) {
784 error = ESTALE;
785 goto out;
786 }
787
788 va.va_mask = AT_ALL;
789 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
790 if (error)
791 goto out;
792
793 vap = &va;
794
795 /* We lied about the object type for a referral */
796 if (vn_is_nfs_reparse(vp, cr))
797 is_referral = 1;
798
799 if (vp->v_type != VLNK && !is_referral) {
800 resp->status = NFS3ERR_INVAL;
801 goto out1;
802 }
803
804 if (MANDLOCK(vp, va.va_mode)) {
805 resp->status = NFS3ERR_ACCES;
806 goto out1;
807 }
808
809 if (is_system_labeled()) {
810 bslabel_t *clabel = req->rq_label;
811
812 ASSERT(clabel != NULL);
813 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
814 "got client label from request(1)", struct svc_req *, req);
815
816 if (!blequal(&l_admin_low->tsl_label, clabel)) {
817 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
818 exi)) {
819 resp->status = NFS3ERR_ACCES;
820 goto out1;
821 }
822 }
823 }
824
825 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
826
827 if (is_referral) {
828 char *s;
829 size_t strsz;
830
831 /* Get an artificial symlink based on a referral */
832 s = build_symlink(vp, cr, &strsz);
833 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
834 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
835 vnode_t *, vp, char *, s);
836 if (s == NULL)
837 error = EINVAL;
838 else {
839 error = 0;
840 (void) strlcpy(data, s, MAXPATHLEN + 1);
841 kmem_free(s, strsz);
842 }
843
844 } else {
845
846 iov.iov_base = data;
847 iov.iov_len = MAXPATHLEN;
848 uio.uio_iov = &iov;
849 uio.uio_iovcnt = 1;
850 uio.uio_segflg = UIO_SYSSPACE;
851 uio.uio_extflg = UIO_COPY_CACHED;
852 uio.uio_loffset = 0;
853 uio.uio_resid = MAXPATHLEN;
854
855 error = VOP_READLINK(vp, &uio, cr, NULL);
856
857 if (!error)
858 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
859 }
860
861 va.va_mask = AT_ALL;
862 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
863
864 /* Lie about object type again just to be consistent */
865 if (is_referral && vap != NULL)
866 vap->va_type = VLNK;
867
868 #if 0 /* notyet */
869 /*
870 * Don't do this. It causes local disk writes when just
871 * reading the file and the overhead is deemed larger
872 * than the benefit.
873 */
874 /*
875 * Force modified metadata out to stable storage.
876 */
877 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
878 #endif
879
880 if (error) {
881 kmem_free(data, MAXPATHLEN + 1);
882 goto out;
883 }
884
885 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
886 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
887 MAXPATHLEN + 1);
888
889 if (name == NULL) {
890 /*
891 * Even though the conversion failed, we return
892 * something. We just don't translate it.
893 */
894 name = data;
895 }
896
897 resp->status = NFS3_OK;
898 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
899 resp->resok.data = name;
900
901 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
902 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
903 READLINK3res *, resp);
904 VN_RELE(vp);
905
906 if (name != data)
907 kmem_free(data, MAXPATHLEN + 1);
908
909 return;
910
911 out:
912 if (curthread->t_flag & T_WOULDBLOCK) {
913 curthread->t_flag &= ~T_WOULDBLOCK;
914 resp->status = NFS3ERR_JUKEBOX;
915 } else
916 resp->status = puterrno3(error);
917 out1:
918 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
919 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
920 READLINK3res *, resp);
921 if (vp != NULL)
922 VN_RELE(vp);
923 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
924 }
925
926 void *
927 rfs3_readlink_getfh(READLINK3args *args)
928 {
929 return (&args->symlink);
930 }
931
932 void
933 rfs3_readlink_free(READLINK3res *resp)
934 {
935 if (resp->status == NFS3_OK)
936 kmem_free(resp->resok.data, MAXPATHLEN + 1);
937 }
938
939 /*
940 * Server routine to handle read
941 * May handle RDMA data as well as mblks
942 */
943 /* ARGSUSED */
944 void
945 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
946 struct svc_req *req, cred_t *cr, bool_t ro)
947 {
948 int error;
949 vnode_t *vp;
950 struct vattr *vap;
951 struct vattr va;
952 struct iovec iov, *iovp = NULL;
953 int iovcnt;
954 struct uio uio;
955 u_offset_t offset;
956 mblk_t *mp = NULL;
957 int in_crit = 0;
958 int need_rwunlock = 0;
959 caller_context_t ct;
960 int rdma_used = 0;
961 int loaned_buffers;
962 struct uio *uiop;
963
964 vap = NULL;
965
966 vp = nfs3_fhtovp(&args->file, exi);
967
968 DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
969 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
970 READ3args *, args);
971
972
973 if (vp == NULL) {
974 error = ESTALE;
975 goto out;
976 }
977
978 if (args->wlist) {
979 if (args->count > clist_len(args->wlist)) {
980 error = EINVAL;
981 goto out;
982 }
983 rdma_used = 1;
984 }
985
986 /* use loaned buffers for TCP */
987 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
988
989 if (is_system_labeled()) {
990 bslabel_t *clabel = req->rq_label;
991
992 ASSERT(clabel != NULL);
993 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
994 "got client label from request(1)", struct svc_req *, req);
995
996 if (!blequal(&l_admin_low->tsl_label, clabel)) {
997 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
998 exi)) {
999 resp->status = NFS3ERR_ACCES;
1000 goto out1;
1001 }
1002 }
1003 }
1004
1005 ct.cc_sysid = 0;
1006 ct.cc_pid = 0;
1007 ct.cc_caller_id = nfs3_srv_caller_id;
1008 ct.cc_flags = CC_DONTBLOCK;
1009
1010 /*
1011 * Enter the critical region before calling VOP_RWLOCK
1012 * to avoid a deadlock with write requests.
1013 */
1014 if (nbl_need_check(vp)) {
1015 nbl_start_crit(vp, RW_READER);
1016 in_crit = 1;
1017 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1018 NULL)) {
1019 error = EACCES;
1020 goto out;
1021 }
1022 }
1023
1024 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1025
1026 /* check if a monitor detected a delegation conflict */
1027 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1028 resp->status = NFS3ERR_JUKEBOX;
1029 goto out1;
1030 }
1031
1032 need_rwunlock = 1;
1033
1034 va.va_mask = AT_ALL;
1035 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1036
1037 /*
1038 * If we can't get the attributes, then we can't do the
1039 * right access checking. So, we'll fail the request.
1040 */
1041 if (error)
1042 goto out;
1043
1044 vap = &va;
1045
1046 if (vp->v_type != VREG) {
1047 resp->status = NFS3ERR_INVAL;
1048 goto out1;
1049 }
1050
1051 if (crgetuid(cr) != va.va_uid) {
1052 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1053 if (error) {
1054 if (curthread->t_flag & T_WOULDBLOCK)
1055 goto out;
1056 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1057 if (error)
1058 goto out;
1059 }
1060 }
1061
1062 if (MANDLOCK(vp, va.va_mode)) {
1063 resp->status = NFS3ERR_ACCES;
1064 goto out1;
1065 }
1066
1067 offset = args->offset;
1068 if (offset >= va.va_size) {
1069 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1070 if (in_crit)
1071 nbl_end_crit(vp);
1072 resp->status = NFS3_OK;
1073 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1074 resp->resok.count = 0;
1075 resp->resok.eof = TRUE;
1076 resp->resok.data.data_len = 0;
1077 resp->resok.data.data_val = NULL;
1078 resp->resok.data.mp = NULL;
1079 /* RDMA */
1080 resp->resok.wlist = args->wlist;
1081 resp->resok.wlist_len = resp->resok.count;
1082 if (resp->resok.wlist)
1083 clist_zero_len(resp->resok.wlist);
1084 goto done;
1085 }
1086
1087 if (args->count == 0) {
1088 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1089 if (in_crit)
1090 nbl_end_crit(vp);
1091 resp->status = NFS3_OK;
1092 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1093 resp->resok.count = 0;
1094 resp->resok.eof = FALSE;
1095 resp->resok.data.data_len = 0;
1096 resp->resok.data.data_val = NULL;
1097 resp->resok.data.mp = NULL;
1098 /* RDMA */
1099 resp->resok.wlist = args->wlist;
1100 resp->resok.wlist_len = resp->resok.count;
1101 if (resp->resok.wlist)
1102 clist_zero_len(resp->resok.wlist);
1103 goto done;
1104 }
1105
1106 /*
1107 * do not allocate memory more the max. allowed
1108 * transfer size
1109 */
1110 if (args->count > rfs3_tsize(req))
1111 args->count = rfs3_tsize(req);
1112
1113 if (loaned_buffers) {
1114 uiop = (uio_t *)rfs_setup_xuio(vp);
1115 ASSERT(uiop != NULL);
1116 uiop->uio_segflg = UIO_SYSSPACE;
1117 uiop->uio_loffset = args->offset;
1118 uiop->uio_resid = args->count;
1119
1120 /* Jump to do the read if successful */
1121 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1122 /*
1123 * Need to hold the vnode until after VOP_RETZCBUF()
1124 * is called.
1125 */
1126 VN_HOLD(vp);
1127 goto doio_read;
1128 }
1129
1130 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1131 uiop->uio_loffset, int, uiop->uio_resid);
1132
1133 uiop->uio_extflg = 0;
1134 /* failure to setup for zero copy */
1135 rfs_free_xuio((void *)uiop);
1136 loaned_buffers = 0;
1137 }
1138
1139 /*
1140 * If returning data via RDMA Write, then grab the chunk list.
1141 * If we aren't returning READ data w/RDMA_WRITE, then grab
1142 * a mblk.
1143 */
1144 if (rdma_used) {
1145 (void) rdma_get_wchunk(req, &iov, args->wlist);
1146 uio.uio_iov = &iov;
1147 uio.uio_iovcnt = 1;
1148 } else {
1149 /*
1150 * mp will contain the data to be sent out in the read reply.
1151 * For UDP, this will be freed after the reply has been sent
1152 * out by the driver. For TCP, it will be freed after the last
1153 * segment associated with the reply has been ACKed by the
1154 * client.
1155 */
1156 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1157 uio.uio_iov = iovp;
1158 uio.uio_iovcnt = iovcnt;
1159 }
1160
1161 uio.uio_segflg = UIO_SYSSPACE;
1162 uio.uio_extflg = UIO_COPY_CACHED;
1163 uio.uio_loffset = args->offset;
1164 uio.uio_resid = args->count;
1165 uiop = &uio;
1166
1167 doio_read:
1168 error = VOP_READ(vp, uiop, 0, cr, &ct);
1169
1170 if (error) {
1171 if (mp)
1172 freemsg(mp);
1173 /* check if a monitor detected a delegation conflict */
1174 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1175 resp->status = NFS3ERR_JUKEBOX;
1176 goto out1;
1177 }
1178 goto out;
1179 }
1180
1181 /* make mblk using zc buffers */
1182 if (loaned_buffers) {
1183 mp = uio_to_mblk(uiop);
1184 ASSERT(mp != NULL);
1185 }
1186
1187 va.va_mask = AT_ALL;
1188 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1189
1190 if (error)
1191 vap = NULL;
1192 else
1193 vap = &va;
1194
1195 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1196
1197 if (in_crit)
1198 nbl_end_crit(vp);
1199
1200 resp->status = NFS3_OK;
1201 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1202 resp->resok.count = args->count - uiop->uio_resid;
1203 if (!error && offset + resp->resok.count == va.va_size)
1204 resp->resok.eof = TRUE;
1205 else
1206 resp->resok.eof = FALSE;
1207 resp->resok.data.data_len = resp->resok.count;
1208
1209 if (mp)
1210 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1211
1212 resp->resok.data.mp = mp;
1213 resp->resok.size = (uint_t)args->count;
1214
1215 if (rdma_used) {
1216 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1217 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1218 resp->status = NFS3ERR_INVAL;
1219 }
1220 } else {
1221 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1222 (resp->resok).wlist = NULL;
1223 }
1224
1225 done:
1226 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1227 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1228 READ3res *, resp);
1229
1230 VN_RELE(vp);
1231
1232 if (iovp != NULL)
1233 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1234
1235 return;
1236
1237 out:
1238 if (curthread->t_flag & T_WOULDBLOCK) {
1239 curthread->t_flag &= ~T_WOULDBLOCK;
1240 resp->status = NFS3ERR_JUKEBOX;
1241 } else
1242 resp->status = puterrno3(error);
1243 out1:
1244 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1245 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1246 READ3res *, resp);
1247
1248 if (vp != NULL) {
1249 if (need_rwunlock)
1250 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1251 if (in_crit)
1252 nbl_end_crit(vp);
1253 VN_RELE(vp);
1254 }
1255 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1256
1257 if (iovp != NULL)
1258 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1259 }
1260
1261 void
1262 rfs3_read_free(READ3res *resp)
1263 {
1264 mblk_t *mp;
1265
1266 if (resp->status == NFS3_OK) {
1267 mp = resp->resok.data.mp;
1268 if (mp != NULL)
1269 freemsg(mp);
1270 }
1271 }
1272
1273 void *
1274 rfs3_read_getfh(READ3args *args)
1275 {
1276 return (&args->file);
1277 }
1278
1279 #define MAX_IOVECS 12
1280
1281 #ifdef DEBUG
1282 static int rfs3_write_hits = 0;
1283 static int rfs3_write_misses = 0;
1284 #endif
1285
1286 void
1287 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1288 struct svc_req *req, cred_t *cr, bool_t ro)
1289 {
1290 nfs3_srv_t *ns;
1291 int error;
1292 vnode_t *vp;
1293 struct vattr *bvap = NULL;
1294 struct vattr bva;
1295 struct vattr *avap = NULL;
1296 struct vattr ava;
1297 u_offset_t rlimit;
1298 struct uio uio;
1299 struct iovec iov[MAX_IOVECS];
1300 mblk_t *m;
1301 struct iovec *iovp;
1302 int iovcnt;
1303 int ioflag;
1304 cred_t *savecred;
1305 int in_crit = 0;
1306 int rwlock_ret = -1;
1307 caller_context_t ct;
1308
1309 vp = nfs3_fhtovp(&args->file, exi);
1310
1311 DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1312 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1313 WRITE3args *, args);
1314
1315 if (vp == NULL) {
1316 error = ESTALE;
1317 goto err;
1318 }
1319
1320 ns = zone_getspecific(rfs3_zone_key, curzone);
1321 if (is_system_labeled()) {
1322 bslabel_t *clabel = req->rq_label;
1323
1324 ASSERT(clabel != NULL);
1325 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1326 "got client label from request(1)", struct svc_req *, req);
1327
1328 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1329 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1330 exi)) {
1331 resp->status = NFS3ERR_ACCES;
1332 goto err1;
1333 }
1334 }
1335 }
1336
1337 ct.cc_sysid = 0;
1338 ct.cc_pid = 0;
1339 ct.cc_caller_id = nfs3_srv_caller_id;
1340 ct.cc_flags = CC_DONTBLOCK;
1341
1342 /*
1343 * We have to enter the critical region before calling VOP_RWLOCK
1344 * to avoid a deadlock with ufs.
1345 */
1346 if (nbl_need_check(vp)) {
1347 nbl_start_crit(vp, RW_READER);
1348 in_crit = 1;
1349 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1350 NULL)) {
1351 error = EACCES;
1352 goto err;
1353 }
1354 }
1355
1356 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1357
1358 /* check if a monitor detected a delegation conflict */
1359 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1360 resp->status = NFS3ERR_JUKEBOX;
1361 rwlock_ret = -1;
1362 goto err1;
1363 }
1364
1365
1366 bva.va_mask = AT_ALL;
1367 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1368
1369 /*
1370 * If we can't get the attributes, then we can't do the
1371 * right access checking. So, we'll fail the request.
1372 */
1373 if (error)
1374 goto err;
1375
1376 bvap = &bva;
1377 avap = bvap;
1378
1379 if (args->count != args->data.data_len) {
1380 resp->status = NFS3ERR_INVAL;
1381 goto err1;
1382 }
1383
1384 if (rdonly(ro, vp)) {
1385 resp->status = NFS3ERR_ROFS;
1386 goto err1;
1387 }
1388
1389 if (vp->v_type != VREG) {
1390 resp->status = NFS3ERR_INVAL;
1391 goto err1;
1392 }
1393
1394 if (crgetuid(cr) != bva.va_uid &&
1395 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1396 goto err;
1397
1398 if (MANDLOCK(vp, bva.va_mode)) {
1399 resp->status = NFS3ERR_ACCES;
1400 goto err1;
1401 }
1402
1403 if (args->count == 0) {
1404 resp->status = NFS3_OK;
1405 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1406 resp->resok.count = 0;
1407 resp->resok.committed = args->stable;
1408 resp->resok.verf = ns->write3verf;
1409 goto out;
1410 }
1411
1412 if (args->mblk != NULL) {
1413 iovcnt = 0;
1414 for (m = args->mblk; m != NULL; m = m->b_cont)
1415 iovcnt++;
1416 if (iovcnt <= MAX_IOVECS) {
1417 #ifdef DEBUG
1418 rfs3_write_hits++;
1419 #endif
1420 iovp = iov;
1421 } else {
1422 #ifdef DEBUG
1423 rfs3_write_misses++;
1424 #endif
1425 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1426 }
1427 mblk_to_iov(args->mblk, iovcnt, iovp);
1428
1429 } else if (args->rlist != NULL) {
1430 iovcnt = 1;
1431 iovp = iov;
1432 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1433 iovp->iov_len = args->count;
1434 } else {
1435 iovcnt = 1;
1436 iovp = iov;
1437 iovp->iov_base = args->data.data_val;
1438 iovp->iov_len = args->count;
1439 }
1440
1441 uio.uio_iov = iovp;
1442 uio.uio_iovcnt = iovcnt;
1443
1444 uio.uio_segflg = UIO_SYSSPACE;
1445 uio.uio_extflg = UIO_COPY_DEFAULT;
1446 uio.uio_loffset = args->offset;
1447 uio.uio_resid = args->count;
1448 uio.uio_llimit = curproc->p_fsz_ctl;
1449 rlimit = uio.uio_llimit - args->offset;
1450 if (rlimit < (u_offset_t)uio.uio_resid)
1451 uio.uio_resid = (int)rlimit;
1452
1453 if (args->stable == UNSTABLE)
1454 ioflag = 0;
1455 else if (args->stable == FILE_SYNC)
1456 ioflag = FSYNC;
1457 else if (args->stable == DATA_SYNC)
1458 ioflag = FDSYNC;
1459 else {
1460 if (iovp != iov)
1461 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1462 resp->status = NFS3ERR_INVAL;
1463 goto err1;
1464 }
1465
1466 /*
1467 * We're changing creds because VM may fault and we need
1468 * the cred of the current thread to be used if quota
1469 * checking is enabled.
1470 */
1471 savecred = curthread->t_cred;
1472 curthread->t_cred = cr;
1473 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1474 curthread->t_cred = savecred;
1475
1476 if (iovp != iov)
1477 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1478
1479 /* check if a monitor detected a delegation conflict */
1480 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1481 resp->status = NFS3ERR_JUKEBOX;
1482 goto err1;
1483 }
1484
1485 ava.va_mask = AT_ALL;
1486 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1487
1488 if (error)
1489 goto err;
1490
1491 /*
1492 * If we were unable to get the V_WRITELOCK_TRUE, then we
1493 * may not have accurate after attrs, so check if
1494 * we have both attributes, they have a non-zero va_seq, and
1495 * va_seq has changed by exactly one,
1496 * if not, turn off the before attr.
1497 */
1498 if (rwlock_ret != V_WRITELOCK_TRUE) {
1499 if (bvap == NULL || avap == NULL ||
1500 bvap->va_seq == 0 || avap->va_seq == 0 ||
1501 avap->va_seq != (bvap->va_seq + 1)) {
1502 bvap = NULL;
1503 }
1504 }
1505
1506 resp->status = NFS3_OK;
1507 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1508 resp->resok.count = args->count - uio.uio_resid;
1509 resp->resok.committed = args->stable;
1510 resp->resok.verf = ns->write3verf;
1511 goto out;
1512
1513 err:
1514 if (curthread->t_flag & T_WOULDBLOCK) {
1515 curthread->t_flag &= ~T_WOULDBLOCK;
1516 resp->status = NFS3ERR_JUKEBOX;
1517 } else
1518 resp->status = puterrno3(error);
1519 err1:
1520 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1521 out:
1522 DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1523 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1524 WRITE3res *, resp);
1525
1526 if (vp != NULL) {
1527 if (rwlock_ret != -1)
1528 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1529 if (in_crit)
1530 nbl_end_crit(vp);
1531 VN_RELE(vp);
1532 }
1533 }
1534
1535 void *
1536 rfs3_write_getfh(WRITE3args *args)
1537 {
1538 return (&args->file);
1539 }
1540
1541 void
1542 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1543 struct svc_req *req, cred_t *cr, bool_t ro)
1544 {
1545 int error;
1546 int in_crit = 0;
1547 vnode_t *vp;
1548 vnode_t *tvp = NULL;
1549 vnode_t *dvp;
1550 struct vattr *vap;
1551 struct vattr va;
1552 struct vattr *dbvap;
1553 struct vattr dbva;
1554 struct vattr *davap;
1555 struct vattr dava;
1556 enum vcexcl excl;
1557 nfstime3 *mtime;
1558 len_t reqsize;
1559 bool_t trunc;
1560 struct sockaddr *ca;
1561 char *name = NULL;
1562
1563 dbvap = NULL;
1564 davap = NULL;
1565
1566 dvp = nfs3_fhtovp(&args->where.dir, exi);
1567
1568 DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1569 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1570 CREATE3args *, args);
1571
1572 if (dvp == NULL) {
1573 error = ESTALE;
1574 goto out;
1575 }
1576
1577 dbva.va_mask = AT_ALL;
1578 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1579 davap = dbvap;
1580
1581 if (args->where.name == nfs3nametoolong) {
1582 resp->status = NFS3ERR_NAMETOOLONG;
1583 goto out1;
1584 }
1585
1586 if (args->where.name == NULL || *(args->where.name) == '\0') {
1587 resp->status = NFS3ERR_ACCES;
1588 goto out1;
1589 }
1590
1591 if (rdonly(ro, dvp)) {
1592 resp->status = NFS3ERR_ROFS;
1593 goto out1;
1594 }
1595
1596 if (protect_zfs_mntpt(dvp) != 0) {
1597 resp->status = NFS3ERR_ACCES;
1598 goto out1;
1599 }
1600
1601 if (is_system_labeled()) {
1602 bslabel_t *clabel = req->rq_label;
1603
1604 ASSERT(clabel != NULL);
1605 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1606 "got client label from request(1)", struct svc_req *, req);
1607
1608 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1609 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1610 exi)) {
1611 resp->status = NFS3ERR_ACCES;
1612 goto out1;
1613 }
1614 }
1615 }
1616
1617 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1618 name = nfscmd_convname(ca, exi, args->where.name,
1619 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1620
1621 if (name == NULL) {
1622 /* This is really a Solaris EILSEQ */
1623 resp->status = NFS3ERR_INVAL;
1624 goto out1;
1625 }
1626
1627 if (args->how.mode == EXCLUSIVE) {
1628 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1629 va.va_type = VREG;
1630 va.va_mode = (mode_t)0;
1631 /*
1632 * Ensure no time overflows and that types match
1633 */
1634 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1635 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1636 va.va_mtime.tv_nsec = mtime->nseconds;
1637 excl = EXCL;
1638 } else {
1639 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1640 &va);
1641 if (error)
1642 goto out;
1643 va.va_mask |= AT_TYPE;
1644 va.va_type = VREG;
1645 if (args->how.mode == GUARDED)
1646 excl = EXCL;
1647 else {
1648 excl = NONEXCL;
1649
1650 /*
1651 * During creation of file in non-exclusive mode
1652 * if size of file is being set then make sure
1653 * that if the file already exists that no conflicting
1654 * non-blocking mandatory locks exists in the region
1655 * being modified. If there are conflicting locks fail
1656 * the operation with EACCES.
1657 */
1658 if (va.va_mask & AT_SIZE) {
1659 struct vattr tva;
1660
1661 /*
1662 * Does file already exist?
1663 */
1664 error = VOP_LOOKUP(dvp, name, &tvp,
1665 NULL, 0, NULL, cr, NULL, NULL, NULL);
1666
1667 /*
1668 * Check to see if the file has been delegated
1669 * to a v4 client. If so, then begin recall of
1670 * the delegation and return JUKEBOX to allow
1671 * the client to retrasmit its request.
1672 */
1673
1674 trunc = va.va_size == 0;
1675 if (!error &&
1676 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1677 resp->status = NFS3ERR_JUKEBOX;
1678 goto out1;
1679 }
1680
1681 /*
1682 * Check for NBMAND lock conflicts
1683 */
1684 if (!error && nbl_need_check(tvp)) {
1685 u_offset_t offset;
1686 ssize_t len;
1687
1688 nbl_start_crit(tvp, RW_READER);
1689 in_crit = 1;
1690
1691 tva.va_mask = AT_SIZE;
1692 error = VOP_GETATTR(tvp, &tva, 0, cr,
1693 NULL);
1694 /*
1695 * Can't check for conflicts, so return
1696 * error.
1697 */
1698 if (error)
1699 goto out;
1700
1701 offset = tva.va_size < va.va_size ?
1702 tva.va_size : va.va_size;
1703 len = tva.va_size < va.va_size ?
1704 va.va_size - tva.va_size :
1705 tva.va_size - va.va_size;
1706 if (nbl_conflict(tvp, NBL_WRITE,
1707 offset, len, 0, NULL)) {
1708 error = EACCES;
1709 goto out;
1710 }
1711 } else if (tvp) {
1712 VN_RELE(tvp);
1713 tvp = NULL;
1714 }
1715 }
1716 }
1717 if (va.va_mask & AT_SIZE)
1718 reqsize = va.va_size;
1719 }
1720
1721 /*
1722 * Must specify the mode.
1723 */
1724 if (!(va.va_mask & AT_MODE)) {
1725 resp->status = NFS3ERR_INVAL;
1726 goto out1;
1727 }
1728
1729 /*
1730 * If the filesystem is exported with nosuid, then mask off
1731 * the setuid and setgid bits.
1732 */
1733 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1734 va.va_mode &= ~(VSUID | VSGID);
1735
1736 tryagain:
1737 /*
1738 * The file open mode used is VWRITE. If the client needs
1739 * some other semantic, then it should do the access checking
1740 * itself. It would have been nice to have the file open mode
1741 * passed as part of the arguments.
1742 */
1743 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1744 &vp, cr, 0, NULL, NULL);
1745
1746 dava.va_mask = AT_ALL;
1747 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1748
1749 if (error) {
1750 /*
1751 * If we got something other than file already exists
1752 * then just return this error. Otherwise, we got
1753 * EEXIST. If we were doing a GUARDED create, then
1754 * just return this error. Otherwise, we need to
1755 * make sure that this wasn't a duplicate of an
1756 * exclusive create request.
1757 *
1758 * The assumption is made that a non-exclusive create
1759 * request will never return EEXIST.
1760 */
1761 if (error != EEXIST || args->how.mode == GUARDED)
1762 goto out;
1763 /*
1764 * Lookup the file so that we can get a vnode for it.
1765 */
1766 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1767 NULL, cr, NULL, NULL, NULL);
1768 if (error) {
1769 /*
1770 * We couldn't find the file that we thought that
1771 * we just created. So, we'll just try creating
1772 * it again.
1773 */
1774 if (error == ENOENT)
1775 goto tryagain;
1776 goto out;
1777 }
1778
1779 /*
1780 * If the file is delegated to a v4 client, go ahead
1781 * and initiate recall, this create is a hint that a
1782 * conflicting v3 open has occurred.
1783 */
1784
1785 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1786 VN_RELE(vp);
1787 resp->status = NFS3ERR_JUKEBOX;
1788 goto out1;
1789 }
1790
1791 va.va_mask = AT_ALL;
1792 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1793
1794 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1795 /* % with INT32_MAX to prevent overflows */
1796 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1797 vap->va_mtime.tv_sec !=
1798 (mtime->seconds % INT32_MAX) ||
1799 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1800 VN_RELE(vp);
1801 error = EEXIST;
1802 goto out;
1803 }
1804 } else {
1805
1806 if ((args->how.mode == UNCHECKED ||
1807 args->how.mode == GUARDED) &&
1808 args->how.createhow3_u.obj_attributes.size.set_it &&
1809 va.va_size == 0)
1810 trunc = TRUE;
1811 else
1812 trunc = FALSE;
1813
1814 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1815 VN_RELE(vp);
1816 resp->status = NFS3ERR_JUKEBOX;
1817 goto out1;
1818 }
1819
1820 va.va_mask = AT_ALL;
1821 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1822
1823 /*
1824 * We need to check to make sure that the file got
1825 * created to the indicated size. If not, we do a
1826 * setattr to try to change the size, but we don't
1827 * try too hard. This shouldn't a problem as most
1828 * clients will only specifiy a size of zero which
1829 * local file systems handle. However, even if
1830 * the client does specify a non-zero size, it can
1831 * still recover by checking the size of the file
1832 * after it has created it and then issue a setattr
1833 * request of its own to set the size of the file.
1834 */
1835 if (vap != NULL &&
1836 (args->how.mode == UNCHECKED ||
1837 args->how.mode == GUARDED) &&
1838 args->how.createhow3_u.obj_attributes.size.set_it &&
1839 vap->va_size != reqsize) {
1840 va.va_mask = AT_SIZE;
1841 va.va_size = reqsize;
1842 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1843 va.va_mask = AT_ALL;
1844 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1845 }
1846 }
1847
1848 if (name != args->where.name)
1849 kmem_free(name, MAXPATHLEN + 1);
1850
1851 error = makefh3(&resp->resok.obj.handle, vp, exi);
1852 if (error)
1853 resp->resok.obj.handle_follows = FALSE;
1854 else
1855 resp->resok.obj.handle_follows = TRUE;
1856
1857 /*
1858 * Force modified data and metadata out to stable storage.
1859 */
1860 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1861 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1862
1863 VN_RELE(vp);
1864 if (tvp != NULL) {
1865 if (in_crit)
1866 nbl_end_crit(tvp);
1867 VN_RELE(tvp);
1868 }
1869
1870 resp->status = NFS3_OK;
1871 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1872 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1873
1874 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1875 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1876 CREATE3res *, resp);
1877
1878 VN_RELE(dvp);
1879 return;
1880
1881 out:
1882 if (curthread->t_flag & T_WOULDBLOCK) {
1883 curthread->t_flag &= ~T_WOULDBLOCK;
1884 resp->status = NFS3ERR_JUKEBOX;
1885 } else
1886 resp->status = puterrno3(error);
1887 out1:
1888 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1889 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1890 CREATE3res *, resp);
1891
1892 if (name != NULL && name != args->where.name)
1893 kmem_free(name, MAXPATHLEN + 1);
1894
1895 if (tvp != NULL) {
1896 if (in_crit)
1897 nbl_end_crit(tvp);
1898 VN_RELE(tvp);
1899 }
1900 if (dvp != NULL)
1901 VN_RELE(dvp);
1902 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1903 }
1904
1905 void *
1906 rfs3_create_getfh(CREATE3args *args)
1907 {
1908 return (&args->where.dir);
1909 }
1910
1911 void
1912 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1913 struct svc_req *req, cred_t *cr, bool_t ro)
1914 {
1915 int error;
1916 vnode_t *vp = NULL;
1917 vnode_t *dvp;
1918 struct vattr *vap;
1919 struct vattr va;
1920 struct vattr *dbvap;
1921 struct vattr dbva;
1922 struct vattr *davap;
1923 struct vattr dava;
1924 struct sockaddr *ca;
1925 char *name = NULL;
1926
1927 dbvap = NULL;
1928 davap = NULL;
1929
1930 dvp = nfs3_fhtovp(&args->where.dir, exi);
1931
1932 DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1933 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1934 MKDIR3args *, args);
1935
1936 if (dvp == NULL) {
1937 error = ESTALE;
1938 goto out;
1939 }
1940
1941 dbva.va_mask = AT_ALL;
1942 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1943 davap = dbvap;
1944
1945 if (args->where.name == nfs3nametoolong) {
1946 resp->status = NFS3ERR_NAMETOOLONG;
1947 goto out1;
1948 }
1949
1950 if (args->where.name == NULL || *(args->where.name) == '\0') {
1951 resp->status = NFS3ERR_ACCES;
1952 goto out1;
1953 }
1954
1955 if (rdonly(ro, dvp)) {
1956 resp->status = NFS3ERR_ROFS;
1957 goto out1;
1958 }
1959
1960 if (protect_zfs_mntpt(dvp) != 0) {
1961 resp->status = NFS3ERR_ACCES;
1962 goto out1;
1963 }
1964
1965 if (is_system_labeled()) {
1966 bslabel_t *clabel = req->rq_label;
1967
1968 ASSERT(clabel != NULL);
1969 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1970 "got client label from request(1)", struct svc_req *, req);
1971
1972 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1973 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1974 exi)) {
1975 resp->status = NFS3ERR_ACCES;
1976 goto out1;
1977 }
1978 }
1979 }
1980
1981 error = sattr3_to_vattr(&args->attributes, &va);
1982 if (error)
1983 goto out;
1984
1985 if (!(va.va_mask & AT_MODE)) {
1986 resp->status = NFS3ERR_INVAL;
1987 goto out1;
1988 }
1989
1990 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1991 name = nfscmd_convname(ca, exi, args->where.name,
1992 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1993
1994 if (name == NULL) {
1995 resp->status = NFS3ERR_INVAL;
1996 goto out1;
1997 }
1998
1999 va.va_mask |= AT_TYPE;
2000 va.va_type = VDIR;
2001
2002 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2003
2004 if (name != args->where.name)
2005 kmem_free(name, MAXPATHLEN + 1);
2006
2007 dava.va_mask = AT_ALL;
2008 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2009
2010 /*
2011 * Force modified data and metadata out to stable storage.
2012 */
2013 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2014
2015 if (error)
2016 goto out;
2017
2018 error = makefh3(&resp->resok.obj.handle, vp, exi);
2019 if (error)
2020 resp->resok.obj.handle_follows = FALSE;
2021 else
2022 resp->resok.obj.handle_follows = TRUE;
2023
2024 va.va_mask = AT_ALL;
2025 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2026
2027 /*
2028 * Force modified data and metadata out to stable storage.
2029 */
2030 (void) VOP_FSYNC(vp, 0, cr, NULL);
2031
2032 VN_RELE(vp);
2033
2034 resp->status = NFS3_OK;
2035 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2036 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2037
2038 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2039 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2040 MKDIR3res *, resp);
2041 VN_RELE(dvp);
2042
2043 return;
2044
2045 out:
2046 if (curthread->t_flag & T_WOULDBLOCK) {
2047 curthread->t_flag &= ~T_WOULDBLOCK;
2048 resp->status = NFS3ERR_JUKEBOX;
2049 } else
2050 resp->status = puterrno3(error);
2051 out1:
2052 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2053 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2054 MKDIR3res *, resp);
2055 if (dvp != NULL)
2056 VN_RELE(dvp);
2057 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2058 }
2059
2060 void *
2061 rfs3_mkdir_getfh(MKDIR3args *args)
2062 {
2063 return (&args->where.dir);
2064 }
2065
2066 void
2067 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2068 struct svc_req *req, cred_t *cr, bool_t ro)
2069 {
2070 int error;
2071 vnode_t *vp;
2072 vnode_t *dvp;
2073 struct vattr *vap;
2074 struct vattr va;
2075 struct vattr *dbvap;
2076 struct vattr dbva;
2077 struct vattr *davap;
2078 struct vattr dava;
2079 struct sockaddr *ca;
2080 char *name = NULL;
2081 char *symdata = NULL;
2082
2083 dbvap = NULL;
2084 davap = NULL;
2085
2086 dvp = nfs3_fhtovp(&args->where.dir, exi);
2087
2088 DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2089 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2090 SYMLINK3args *, args);
2091
2092 if (dvp == NULL) {
2093 error = ESTALE;
2094 goto err;
2095 }
2096
2097 dbva.va_mask = AT_ALL;
2098 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2099 davap = dbvap;
2100
2101 if (args->where.name == nfs3nametoolong) {
2102 resp->status = NFS3ERR_NAMETOOLONG;
2103 goto err1;
2104 }
2105
2106 if (args->where.name == NULL || *(args->where.name) == '\0') {
2107 resp->status = NFS3ERR_ACCES;
2108 goto err1;
2109 }
2110
2111 if (rdonly(ro, dvp)) {
2112 resp->status = NFS3ERR_ROFS;
2113 goto err1;
2114 }
2115
2116 if (protect_zfs_mntpt(dvp) != 0) {
2117 resp->status = NFS3ERR_ACCES;
2118 goto err1;
2119 }
2120
2121 if (is_system_labeled()) {
2122 bslabel_t *clabel = req->rq_label;
2123
2124 ASSERT(clabel != NULL);
2125 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2126 "got client label from request(1)", struct svc_req *, req);
2127
2128 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2129 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2130 exi)) {
2131 resp->status = NFS3ERR_ACCES;
2132 goto err1;
2133 }
2134 }
2135 }
2136
2137 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2138 if (error)
2139 goto err;
2140
2141 if (!(va.va_mask & AT_MODE)) {
2142 resp->status = NFS3ERR_INVAL;
2143 goto err1;
2144 }
2145
2146 if (args->symlink.symlink_data == nfs3nametoolong) {
2147 resp->status = NFS3ERR_NAMETOOLONG;
2148 goto err1;
2149 }
2150
2151 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2152 name = nfscmd_convname(ca, exi, args->where.name,
2153 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2154
2155 if (name == NULL) {
2156 /* This is really a Solaris EILSEQ */
2157 resp->status = NFS3ERR_INVAL;
2158 goto err1;
2159 }
2160
2161 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2162 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2163 if (symdata == NULL) {
2164 /* This is really a Solaris EILSEQ */
2165 resp->status = NFS3ERR_INVAL;
2166 goto err1;
2167 }
2168
2169
2170 va.va_mask |= AT_TYPE;
2171 va.va_type = VLNK;
2172
2173 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2174
2175 dava.va_mask = AT_ALL;
2176 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2177
2178 if (error)
2179 goto err;
2180
2181 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2182 NULL, NULL, NULL);
2183
2184 /*
2185 * Force modified data and metadata out to stable storage.
2186 */
2187 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2188
2189
2190 resp->status = NFS3_OK;
2191 if (error) {
2192 resp->resok.obj.handle_follows = FALSE;
2193 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2194 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2195 goto out;
2196 }
2197
2198 error = makefh3(&resp->resok.obj.handle, vp, exi);
2199 if (error)
2200 resp->resok.obj.handle_follows = FALSE;
2201 else
2202 resp->resok.obj.handle_follows = TRUE;
2203
2204 va.va_mask = AT_ALL;
2205 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2206
2207 /*
2208 * Force modified data and metadata out to stable storage.
2209 */
2210 (void) VOP_FSYNC(vp, 0, cr, NULL);
2211
2212 VN_RELE(vp);
2213
2214 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2215 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2216 goto out;
2217
2218 err:
2219 if (curthread->t_flag & T_WOULDBLOCK) {
2220 curthread->t_flag &= ~T_WOULDBLOCK;
2221 resp->status = NFS3ERR_JUKEBOX;
2222 } else
2223 resp->status = puterrno3(error);
2224 err1:
2225 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2226 out:
2227 if (name != NULL && name != args->where.name)
2228 kmem_free(name, MAXPATHLEN + 1);
2229 if (symdata != NULL && symdata != args->symlink.symlink_data)
2230 kmem_free(symdata, MAXPATHLEN + 1);
2231
2232 DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2233 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2234 SYMLINK3res *, resp);
2235
2236 if (dvp != NULL)
2237 VN_RELE(dvp);
2238 }
2239
2240 void *
2241 rfs3_symlink_getfh(SYMLINK3args *args)
2242 {
2243 return (&args->where.dir);
2244 }
2245
2246 void
2247 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2248 struct svc_req *req, cred_t *cr, bool_t ro)
2249 {
2250 int error;
2251 vnode_t *vp;
2252 vnode_t *realvp;
2253 vnode_t *dvp;
2254 struct vattr *vap;
2255 struct vattr va;
2256 struct vattr *dbvap;
2257 struct vattr dbva;
2258 struct vattr *davap;
2259 struct vattr dava;
2260 int mode;
2261 enum vcexcl excl;
2262 struct sockaddr *ca;
2263 char *name = NULL;
2264
2265 dbvap = NULL;
2266 davap = NULL;
2267
2268 dvp = nfs3_fhtovp(&args->where.dir, exi);
2269
2270 DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2271 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2272 MKNOD3args *, args);
2273
2274 if (dvp == NULL) {
2275 error = ESTALE;
2276 goto out;
2277 }
2278
2279 dbva.va_mask = AT_ALL;
2280 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2281 davap = dbvap;
2282
2283 if (args->where.name == nfs3nametoolong) {
2284 resp->status = NFS3ERR_NAMETOOLONG;
2285 goto out1;
2286 }
2287
2288 if (args->where.name == NULL || *(args->where.name) == '\0') {
2289 resp->status = NFS3ERR_ACCES;
2290 goto out1;
2291 }
2292
2293 if (rdonly(ro, dvp)) {
2294 resp->status = NFS3ERR_ROFS;
2295 goto out1;
2296 }
2297
2298 if (protect_zfs_mntpt(dvp) != 0) {
2299 resp->status = NFS3ERR_ACCES;
2300 goto out1;
2301 }
2302
2303 if (is_system_labeled()) {
2304 bslabel_t *clabel = req->rq_label;
2305
2306 ASSERT(clabel != NULL);
2307 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2308 "got client label from request(1)", struct svc_req *, req);
2309
2310 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2311 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2312 exi)) {
2313 resp->status = NFS3ERR_ACCES;
2314 goto out1;
2315 }
2316 }
2317 }
2318
2319 switch (args->what.type) {
2320 case NF3CHR:
2321 case NF3BLK:
2322 error = sattr3_to_vattr(
2323 &args->what.mknoddata3_u.device.dev_attributes, &va);
2324 if (error)
2325 goto out;
2326 if (secpolicy_sys_devices(cr) != 0) {
2327 resp->status = NFS3ERR_PERM;
2328 goto out1;
2329 }
2330 if (args->what.type == NF3CHR)
2331 va.va_type = VCHR;
2332 else
2333 va.va_type = VBLK;
2334 va.va_rdev = makedevice(
2335 args->what.mknoddata3_u.device.spec.specdata1,
2336 args->what.mknoddata3_u.device.spec.specdata2);
2337 va.va_mask |= AT_TYPE | AT_RDEV;
2338 break;
2339 case NF3SOCK:
2340 error = sattr3_to_vattr(
2341 &args->what.mknoddata3_u.pipe_attributes, &va);
2342 if (error)
2343 goto out;
2344 va.va_type = VSOCK;
2345 va.va_mask |= AT_TYPE;
2346 break;
2347 case NF3FIFO:
2348 error = sattr3_to_vattr(
2349 &args->what.mknoddata3_u.pipe_attributes, &va);
2350 if (error)
2351 goto out;
2352 va.va_type = VFIFO;
2353 va.va_mask |= AT_TYPE;
2354 break;
2355 default:
2356 resp->status = NFS3ERR_BADTYPE;
2357 goto out1;
2358 }
2359
2360 /*
2361 * Must specify the mode.
2362 */
2363 if (!(va.va_mask & AT_MODE)) {
2364 resp->status = NFS3ERR_INVAL;
2365 goto out1;
2366 }
2367
2368 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2369 name = nfscmd_convname(ca, exi, args->where.name,
2370 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2371
2372 if (name == NULL) {
2373 resp->status = NFS3ERR_INVAL;
2374 goto out1;
2375 }
2376
2377 excl = EXCL;
2378
2379 mode = 0;
2380
2381 error = VOP_CREATE(dvp, name, &va, excl, mode,
2382 &vp, cr, 0, NULL, NULL);
2383
2384 if (name != args->where.name)
2385 kmem_free(name, MAXPATHLEN + 1);
2386
2387 dava.va_mask = AT_ALL;
2388 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2389
2390 /*
2391 * Force modified data and metadata out to stable storage.
2392 */
2393 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2394
2395 if (error)
2396 goto out;
2397
2398 resp->status = NFS3_OK;
2399
2400 error = makefh3(&resp->resok.obj.handle, vp, exi);
2401 if (error)
2402 resp->resok.obj.handle_follows = FALSE;
2403 else
2404 resp->resok.obj.handle_follows = TRUE;
2405
2406 va.va_mask = AT_ALL;
2407 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2408
2409 /*
2410 * Force modified metadata out to stable storage.
2411 *
2412 * if a underlying vp exists, pass it to VOP_FSYNC
2413 */
2414 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2415 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2416 else
2417 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2418
2419 VN_RELE(vp);
2420
2421 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2422 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2423 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2424 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2425 MKNOD3res *, resp);
2426 VN_RELE(dvp);
2427 return;
2428
2429 out:
2430 if (curthread->t_flag & T_WOULDBLOCK) {
2431 curthread->t_flag &= ~T_WOULDBLOCK;
2432 resp->status = NFS3ERR_JUKEBOX;
2433 } else
2434 resp->status = puterrno3(error);
2435 out1:
2436 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2437 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2438 MKNOD3res *, resp);
2439 if (dvp != NULL)
2440 VN_RELE(dvp);
2441 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2442 }
2443
2444 void *
2445 rfs3_mknod_getfh(MKNOD3args *args)
2446 {
2447 return (&args->where.dir);
2448 }
2449
2450 void
2451 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2452 struct svc_req *req, cred_t *cr, bool_t ro)
2453 {
2454 int error = 0;
2455 vnode_t *vp;
2456 struct vattr *bvap;
2457 struct vattr bva;
2458 struct vattr *avap;
2459 struct vattr ava;
2460 vnode_t *targvp = NULL;
2461 struct sockaddr *ca;
2462 char *name = NULL;
2463
2464 bvap = NULL;
2465 avap = NULL;
2466
2467 vp = nfs3_fhtovp(&args->object.dir, exi);
2468
2469 DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2470 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2471 REMOVE3args *, args);
2472
2473 if (vp == NULL) {
2474 error = ESTALE;
2475 goto err;
2476 }
2477
2478 bva.va_mask = AT_ALL;
2479 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2480 avap = bvap;
2481
2482 if (vp->v_type != VDIR) {
2483 resp->status = NFS3ERR_NOTDIR;
2484 goto err1;
2485 }
2486
2487 if (args->object.name == nfs3nametoolong) {
2488 resp->status = NFS3ERR_NAMETOOLONG;
2489 goto err1;
2490 }
2491
2492 if (args->object.name == NULL || *(args->object.name) == '\0') {
2493 resp->status = NFS3ERR_ACCES;
2494 goto err1;
2495 }
2496
2497 if (rdonly(ro, vp)) {
2498 resp->status = NFS3ERR_ROFS;
2499 goto err1;
2500 }
2501
2502 if (is_system_labeled()) {
2503 bslabel_t *clabel = req->rq_label;
2504
2505 ASSERT(clabel != NULL);
2506 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2507 "got client label from request(1)", struct svc_req *, req);
2508
2509 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2510 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2511 exi)) {
2512 resp->status = NFS3ERR_ACCES;
2513 goto err1;
2514 }
2515 }
2516 }
2517
2518 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2519 name = nfscmd_convname(ca, exi, args->object.name,
2520 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2521
2522 if (name == NULL) {
2523 resp->status = NFS3ERR_INVAL;
2524 goto err1;
2525 }
2526
2527 /*
2528 * Check for a conflict with a non-blocking mandatory share
2529 * reservation and V4 delegations
2530 */
2531 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2532 NULL, cr, NULL, NULL, NULL);
2533 if (error != 0)
2534 goto err;
2535
2536 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2537 resp->status = NFS3ERR_JUKEBOX;
2538 goto err1;
2539 }
2540
2541 if (!nbl_need_check(targvp)) {
2542 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2543 } else {
2544 nbl_start_crit(targvp, RW_READER);
2545 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2546 error = EACCES;
2547 } else {
2548 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2549 }
2550 nbl_end_crit(targvp);
2551 }
2552 VN_RELE(targvp);
2553 targvp = NULL;
2554
2555 ava.va_mask = AT_ALL;
2556 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2557
2558 /*
2559 * Force modified data and metadata out to stable storage.
2560 */
2561 (void) VOP_FSYNC(vp, 0, cr, NULL);
2562
2563 if (error)
2564 goto err;
2565
2566 resp->status = NFS3_OK;
2567 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2568 goto out;
2569
2570 err:
2571 if (curthread->t_flag & T_WOULDBLOCK) {
2572 curthread->t_flag &= ~T_WOULDBLOCK;
2573 resp->status = NFS3ERR_JUKEBOX;
2574 } else
2575 resp->status = puterrno3(error);
2576 err1:
2577 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2578 out:
2579 DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2580 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2581 REMOVE3res *, resp);
2582
2583 if (name != NULL && name != args->object.name)
2584 kmem_free(name, MAXPATHLEN + 1);
2585
2586 if (vp != NULL)
2587 VN_RELE(vp);
2588 }
2589
2590 void *
2591 rfs3_remove_getfh(REMOVE3args *args)
2592 {
2593 return (&args->object.dir);
2594 }
2595
2596 void
2597 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2598 struct svc_req *req, cred_t *cr, bool_t ro)
2599 {
2600 int error;
2601 vnode_t *vp;
2602 struct vattr *bvap;
2603 struct vattr bva;
2604 struct vattr *avap;
2605 struct vattr ava;
2606 struct sockaddr *ca;
2607 char *name = NULL;
2608
2609 bvap = NULL;
2610 avap = NULL;
2611
2612 vp = nfs3_fhtovp(&args->object.dir, exi);
2613
2614 DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2615 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2616 RMDIR3args *, args);
2617
2618 if (vp == NULL) {
2619 error = ESTALE;
2620 goto err;
2621 }
2622
2623 bva.va_mask = AT_ALL;
2624 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2625 avap = bvap;
2626
2627 if (vp->v_type != VDIR) {
2628 resp->status = NFS3ERR_NOTDIR;
2629 goto err1;
2630 }
2631
2632 if (args->object.name == nfs3nametoolong) {
2633 resp->status = NFS3ERR_NAMETOOLONG;
2634 goto err1;
2635 }
2636
2637 if (args->object.name == NULL || *(args->object.name) == '\0') {
2638 resp->status = NFS3ERR_ACCES;
2639 goto err1;
2640 }
2641
2642 if (rdonly(ro, vp)) {
2643 resp->status = NFS3ERR_ROFS;
2644 goto err1;
2645 }
2646
2647 if (is_system_labeled()) {
2648 bslabel_t *clabel = req->rq_label;
2649
2650 ASSERT(clabel != NULL);
2651 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2652 "got client label from request(1)", struct svc_req *, req);
2653
2654 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2655 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2656 exi)) {
2657 resp->status = NFS3ERR_ACCES;
2658 goto err1;
2659 }
2660 }
2661 }
2662
2663 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2664 name = nfscmd_convname(ca, exi, args->object.name,
2665 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2666
2667 if (name == NULL) {
2668 resp->status = NFS3ERR_INVAL;
2669 goto err1;
2670 }
2671
2672 error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2673
2674 if (name != args->object.name)
2675 kmem_free(name, MAXPATHLEN + 1);
2676
2677 ava.va_mask = AT_ALL;
2678 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2679
2680 /*
2681 * Force modified data and metadata out to stable storage.
2682 */
2683 (void) VOP_FSYNC(vp, 0, cr, NULL);
2684
2685 if (error) {
2686 /*
2687 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2688 * if the directory is not empty. A System V NFS server
2689 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2690 * over the wire.
2691 */
2692 if (error == EEXIST)
2693 error = ENOTEMPTY;
2694 goto err;
2695 }
2696
2697 resp->status = NFS3_OK;
2698 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2699 goto out;
2700
2701 err:
2702 if (curthread->t_flag & T_WOULDBLOCK) {
2703 curthread->t_flag &= ~T_WOULDBLOCK;
2704 resp->status = NFS3ERR_JUKEBOX;
2705 } else
2706 resp->status = puterrno3(error);
2707 err1:
2708 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2709 out:
2710 DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2711 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2712 RMDIR3res *, resp);
2713 if (vp != NULL)
2714 VN_RELE(vp);
2715
2716 }
2717
2718 void *
2719 rfs3_rmdir_getfh(RMDIR3args *args)
2720 {
2721 return (&args->object.dir);
2722 }
2723
2724 void
2725 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2726 struct svc_req *req, cred_t *cr, bool_t ro)
2727 {
2728 int error = 0;
2729 vnode_t *fvp;
2730 vnode_t *tvp;
2731 vnode_t *targvp;
2732 struct vattr *fbvap;
2733 struct vattr fbva;
2734 struct vattr *favap;
2735 struct vattr fava;
2736 struct vattr *tbvap;
2737 struct vattr tbva;
2738 struct vattr *tavap;
2739 struct vattr tava;
2740 nfs_fh3 *fh3;
2741 struct exportinfo *to_exi;
2742 vnode_t *srcvp = NULL;
2743 bslabel_t *clabel;
2744 struct sockaddr *ca;
2745 char *name = NULL;
2746 char *toname = NULL;
2747
2748 fbvap = NULL;
2749 favap = NULL;
2750 tbvap = NULL;
2751 tavap = NULL;
2752 tvp = NULL;
2753
2754 fvp = nfs3_fhtovp(&args->from.dir, exi);
2755
2756 DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2757 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2758 RENAME3args *, args);
2759
2760 if (fvp == NULL) {
2761 error = ESTALE;
2762 goto err;
2763 }
2764
2765 if (is_system_labeled()) {
2766 clabel = req->rq_label;
2767 ASSERT(clabel != NULL);
2768 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2769 "got client label from request(1)", struct svc_req *, req);
2770
2771 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2772 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2773 exi)) {
2774 resp->status = NFS3ERR_ACCES;
2775 goto err1;
2776 }
2777 }
2778 }
2779
2780 fbva.va_mask = AT_ALL;
2781 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2782 favap = fbvap;
2783
2784 fh3 = &args->to.dir;
2785 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2786 if (to_exi == NULL) {
2787 resp->status = NFS3ERR_ACCES;
2788 goto err1;
2789 }
2790 exi_rele(&to_exi);
2791
2792 if (to_exi != exi) {
2793 resp->status = NFS3ERR_XDEV;
2794 goto err1;
2795 }
2796
2797 tvp = nfs3_fhtovp(&args->to.dir, exi);
2798 if (tvp == NULL) {
2799 error = ESTALE;
2800 goto err;
2801 }
2802
2803 tbva.va_mask = AT_ALL;
2804 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2805 tavap = tbvap;
2806
2807 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2808 resp->status = NFS3ERR_NOTDIR;
2809 goto err1;
2810 }
2811
2812 if (args->from.name == nfs3nametoolong ||
2813 args->to.name == nfs3nametoolong) {
2814 resp->status = NFS3ERR_NAMETOOLONG;
2815 goto err1;
2816 }
2817 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2818 args->to.name == NULL || *(args->to.name) == '\0') {
2819 resp->status = NFS3ERR_ACCES;
2820 goto err1;
2821 }
2822
2823 if (rdonly(ro, tvp)) {
2824 resp->status = NFS3ERR_ROFS;
2825 goto err1;
2826 }
2827
2828 if (protect_zfs_mntpt(tvp) != 0) {
2829 resp->status = NFS3ERR_ACCES;
2830 goto err1;
2831 }
2832
2833 if (is_system_labeled()) {
2834 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2835 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2836 exi)) {
2837 resp->status = NFS3ERR_ACCES;
2838 goto err1;
2839 }
2840 }
2841 }
2842
2843 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2844 name = nfscmd_convname(ca, exi, args->from.name,
2845 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2846
2847 if (name == NULL) {
2848 resp->status = NFS3ERR_INVAL;
2849 goto err1;
2850 }
2851
2852 toname = nfscmd_convname(ca, exi, args->to.name,
2853 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2854
2855 if (toname == NULL) {
2856 resp->status = NFS3ERR_INVAL;
2857 goto err1;
2858 }
2859
2860 /*
2861 * Check for a conflict with a non-blocking mandatory share
2862 * reservation or V4 delegations.
2863 */
2864 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2865 NULL, cr, NULL, NULL, NULL);
2866 if (error != 0)
2867 goto err;
2868
2869 /*
2870 * If we rename a delegated file we should recall the
2871 * delegation, since future opens should fail or would
2872 * refer to a new file.
2873 */
2874 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2875 resp->status = NFS3ERR_JUKEBOX;
2876 goto err1;
2877 }
2878
2879 /*
2880 * Check for renaming over a delegated file. Check nfs4_deleg_policy
2881 * first to avoid VOP_LOOKUP if possible.
2882 */
2883 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2884 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2885 NULL, NULL, NULL) == 0) {
2886
2887 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2888 VN_RELE(targvp);
2889 resp->status = NFS3ERR_JUKEBOX;
2890 goto err1;
2891 }
2892 VN_RELE(targvp);
2893 }
2894
2895 if (!nbl_need_check(srcvp)) {
2896 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2897 } else {
2898 nbl_start_crit(srcvp, RW_READER);
2899 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2900 error = EACCES;
2901 else
2902 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2903 nbl_end_crit(srcvp);
2904 }
2905 if (error == 0)
2906 vn_renamepath(tvp, srcvp, args->to.name,
2907 strlen(args->to.name));
2908 VN_RELE(srcvp);
2909 srcvp = NULL;
2910
2911 fava.va_mask = AT_ALL;
2912 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2913 tava.va_mask = AT_ALL;
2914 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2915
2916 /*
2917 * Force modified data and metadata out to stable storage.
2918 */
2919 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2920 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2921
2922 if (error)
2923 goto err;
2924
2925 resp->status = NFS3_OK;
2926 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2927 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2928 goto out;
2929
2930 err:
2931 if (curthread->t_flag & T_WOULDBLOCK) {
2932 curthread->t_flag &= ~T_WOULDBLOCK;
2933 resp->status = NFS3ERR_JUKEBOX;
2934 } else {
2935 resp->status = puterrno3(error);
2936 }
2937 err1:
2938 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2939 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2940
2941 out:
2942 if (name != NULL && name != args->from.name)
2943 kmem_free(name, MAXPATHLEN + 1);
2944 if (toname != NULL && toname != args->to.name)
2945 kmem_free(toname, MAXPATHLEN + 1);
2946
2947 DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2948 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2949 RENAME3res *, resp);
2950 if (fvp != NULL)
2951 VN_RELE(fvp);
2952 if (tvp != NULL)
2953 VN_RELE(tvp);
2954 }
2955
2956 void *
2957 rfs3_rename_getfh(RENAME3args *args)
2958 {
2959 return (&args->from.dir);
2960 }
2961
2962 void
2963 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2964 struct svc_req *req, cred_t *cr, bool_t ro)
2965 {
2966 int error;
2967 vnode_t *vp;
2968 vnode_t *dvp;
2969 struct vattr *vap;
2970 struct vattr va;
2971 struct vattr *bvap;
2972 struct vattr bva;
2973 struct vattr *avap;
2974 struct vattr ava;
2975 nfs_fh3 *fh3;
2976 struct exportinfo *to_exi;
2977 bslabel_t *clabel;
2978 struct sockaddr *ca;
2979 char *name = NULL;
2980
2981 vap = NULL;
2982 bvap = NULL;
2983 avap = NULL;
2984 dvp = NULL;
2985
2986 vp = nfs3_fhtovp(&args->file, exi);
2987
2988 DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2989 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2990 LINK3args *, args);
2991
2992 if (vp == NULL) {
2993 error = ESTALE;
2994 goto out;
2995 }
2996
2997 va.va_mask = AT_ALL;
2998 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2999
3000 fh3 = &args->link.dir;
3001 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3002 if (to_exi == NULL) {
3003 resp->status = NFS3ERR_ACCES;
3004 goto out1;
3005 }
3006 exi_rele(&to_exi);
3007
3008 if (to_exi != exi) {
3009 resp->status = NFS3ERR_XDEV;
3010 goto out1;
3011 }
3012
3013 if (is_system_labeled()) {
3014 clabel = req->rq_label;
3015
3016 ASSERT(clabel != NULL);
3017 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3018 "got client label from request(1)", struct svc_req *, req);
3019
3020 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3021 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3022 exi)) {
3023 resp->status = NFS3ERR_ACCES;
3024 goto out1;
3025 }
3026 }
3027 }
3028
3029 dvp = nfs3_fhtovp(&args->link.dir, exi);
3030 if (dvp == NULL) {
3031 error = ESTALE;
3032 goto out;
3033 }
3034
3035 bva.va_mask = AT_ALL;
3036 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3037
3038 if (dvp->v_type != VDIR) {
3039 resp->status = NFS3ERR_NOTDIR;
3040 goto out1;
3041 }
3042
3043 if (args->link.name == nfs3nametoolong) {
3044 resp->status = NFS3ERR_NAMETOOLONG;
3045 goto out1;
3046 }
3047
3048 if (args->link.name == NULL || *(args->link.name) == '\0') {
3049 resp->status = NFS3ERR_ACCES;
3050 goto out1;
3051 }
3052
3053 if (rdonly(ro, dvp)) {
3054 resp->status = NFS3ERR_ROFS;
3055 goto out1;
3056 }
3057
3058 if (protect_zfs_mntpt(dvp) != 0) {
3059 resp->status = NFS3ERR_ACCES;
3060 goto out1;
3061 }
3062
3063 if (is_system_labeled()) {
3064 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3065 "got client label from request(1)", struct svc_req *, req);
3066
3067 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3068 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3069 exi)) {
3070 resp->status = NFS3ERR_ACCES;
3071 goto out1;
3072 }
3073 }
3074 }
3075
3076 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3077 name = nfscmd_convname(ca, exi, args->link.name,
3078 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3079
3080 if (name == NULL) {
3081 resp->status = NFS3ERR_SERVERFAULT;
3082 goto out1;
3083 }
3084
3085 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3086
3087 va.va_mask = AT_ALL;
3088 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3089 ava.va_mask = AT_ALL;
3090 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3091
3092 /*
3093 * Force modified data and metadata out to stable storage.
3094 */
3095 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3096 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3097
3098 if (error)
3099 goto out;
3100
3101 VN_RELE(dvp);
3102
3103 resp->status = NFS3_OK;
3104 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3105 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3106
3107 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3108 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3109 LINK3res *, resp);
3110
3111 VN_RELE(vp);
3112
3113 return;
3114
3115 out:
3116 if (curthread->t_flag & T_WOULDBLOCK) {
3117 curthread->t_flag &= ~T_WOULDBLOCK;
3118 resp->status = NFS3ERR_JUKEBOX;
3119 } else
3120 resp->status = puterrno3(error);
3121 out1:
3122 if (name != NULL && name != args->link.name)
3123 kmem_free(name, MAXPATHLEN + 1);
3124
3125 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3126 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3127 LINK3res *, resp);
3128
3129 if (vp != NULL)
3130 VN_RELE(vp);
3131 if (dvp != NULL)
3132 VN_RELE(dvp);
3133 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3134 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3135 }
3136
3137 void *
3138 rfs3_link_getfh(LINK3args *args)
3139 {
3140 return (&args->file);
3141 }
3142
3143 #ifdef nextdp
3144 #undef nextdp
3145 #endif
3146 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3147
3148 /* ARGSUSED */
3149 void
3150 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3151 struct svc_req *req, cred_t *cr, bool_t ro)
3152 {
3153 int error;
3154 vnode_t *vp;
3155 struct vattr *vap;
3156 struct vattr va;
3157 struct iovec iov;
3158 struct uio uio;
3159 int iseof;
3160
3161 count3 count = args->count;
3162 count3 size; /* size of the READDIR3resok structure */
3163
3164 size_t datasz;
3165 char *data = NULL;
3166 dirent64_t *dp;
3167
3168 struct sockaddr *ca;
3169 entry3 **eptr;
3170 entry3 *entry;
3171
3172 vp = nfs3_fhtovp(&args->dir, exi);
3173
3174 DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3175 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3176 READDIR3args *, args);
3177
3178 if (vp == NULL) {
3179 resp->status = NFS3ERR_STALE;
3180 vap = NULL;
3181 goto out1;
3182 }
3183
3184 if (vp->v_type != VDIR) {
3185 resp->status = NFS3ERR_NOTDIR;
3186 vap = NULL;
3187 goto out1;
3188 }
3189
3190 if (is_system_labeled()) {
3191 bslabel_t *clabel = req->rq_label;
3192
3193 ASSERT(clabel != NULL);
3194 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3195 "got client label from request(1)", struct svc_req *, req);
3196
3197 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3198 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3199 exi)) {
3200 resp->status = NFS3ERR_ACCES;
3201 vap = NULL;
3202 goto out1;
3203 }
3204 }
3205 }
3206
3207 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3208
3209 va.va_mask = AT_ALL;
3210 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3211
3212 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3213 if (error)
3214 goto out;
3215
3216 /*
3217 * Don't allow arbitrary counts for allocation
3218 */
3219 if (count > rfs3_tsize(req))
3220 count = rfs3_tsize(req);
3221
3222 /*
3223 * struct READDIR3resok:
3224 * dir_attributes: 1 + NFS3_SIZEOF_FATTR3
3225 * cookieverf: 2
3226 * entries (bool): 1
3227 * eof: 1
3228 */
3229 size = (1 + NFS3_SIZEOF_FATTR3 + 2 + 1 + 1) * BYTES_PER_XDR_UNIT;
3230
3231 if (size > count) {
3232 resp->status = NFS3ERR_TOOSMALL;
3233 goto out1;
3234 }
3235
3236 /*
3237 * This is simplification. The dirent64_t size is not the same as the
3238 * size of XDR representation of entry3, but the sizes are similar so
3239 * we'll assume they are same. This assumption should not cause any
3240 * harm. In worst case we will need to issue VOP_READDIR() once more.
3241 */
3242 datasz = count;
3243
3244 /*
3245 * Make sure that there is room to read at least one entry
3246 * if any are available.
3247 */
3248 if (datasz < DIRENT64_RECLEN(MAXNAMELEN))
3249 datasz = DIRENT64_RECLEN(MAXNAMELEN);
3250
3251 data = kmem_alloc(datasz, KM_NOSLEEP);
3252 if (data == NULL) {
3253 /* The allocation failed; downsize and wait for it this time */
3254 if (datasz > MAXBSIZE)
3255 datasz = MAXBSIZE;
3256 data = kmem_alloc(datasz, KM_SLEEP);
3257 }
3258
3259 uio.uio_iov = &iov;
3260 uio.uio_iovcnt = 1;
3261 uio.uio_segflg = UIO_SYSSPACE;
3262 uio.uio_extflg = UIO_COPY_CACHED;
3263 uio.uio_loffset = (offset_t)args->cookie;
3264 uio.uio_resid = datasz;
3265
3266 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3267 eptr = &resp->resok.reply.entries;
3268 entry = NULL;
3269
3270 getmoredents:
3271 iov.iov_base = data;
3272 iov.iov_len = datasz;
3273
3274 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3275 if (error) {
3276 iseof = 0;
3277 goto done;
3278 }
3279
3280 if (iov.iov_len == datasz)
3281 goto done;
3282
3283 for (dp = (dirent64_t *)data; (char *)dp - data < datasz - iov.iov_len;
3284 dp = nextdp(dp)) {
3285 char *name;
3286 count3 esize;
3287
3288 if (dp->d_ino == 0) {
3289 if (entry != NULL)
3290 entry->cookie = (cookie3)dp->d_off;
3291 continue;
3292 }
3293
3294 name = nfscmd_convname(ca, exi, dp->d_name,
3295 NFSCMD_CONV_OUTBOUND, MAXPATHLEN + 1);
3296 if (name == NULL) {
3297 if (entry != NULL)
3298 entry->cookie = (cookie3)dp->d_off;
3299 continue;
3300 }
3301
3302 /*
3303 * struct entry3:
3304 * fileid: 2
3305 * name (length): 1
3306 * name (data): length (rounded up)
3307 * cookie: 2
3308 * nextentry (bool): 1
3309 */
3310 esize = (2 + 1 + 2 + 1) * BYTES_PER_XDR_UNIT +
3311 RNDUP(strlen(name));
3312
3313 /* If the new entry does not fit, discard it */
3314 if (esize > count - size) {
3315 if (name != dp->d_name)
3316 kmem_free(name, MAXPATHLEN + 1);
3317 iseof = 0;
3318 goto done;
3319 }
3320
3321 entry = kmem_alloc(sizeof (entry3), KM_SLEEP);
3322
3323 entry->fileid = (fileid3)dp->d_ino;
3324 entry->name = strdup(name);
3325 if (name != dp->d_name)
3326 kmem_free(name, MAXPATHLEN + 1);
3327 entry->cookie = (cookie3)dp->d_off;
3328
3329 size += esize;
3330
3331 /* Add the entry to the linked list */
3332 *eptr = entry;
3333 eptr = &entry->nextentry;
3334 }
3335
3336 if (!iseof && size < count) {
3337 uio.uio_resid = MIN(datasz, MAXBSIZE);
3338 goto getmoredents;
3339 }
3340
3341 done:
3342 *eptr = NULL;
3343
3344 va.va_mask = AT_ALL;
3345 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3346
3347 if (!iseof && resp->resok.reply.entries == NULL) {
3348 if (error)
3349 goto out;
3350 resp->status = NFS3ERR_TOOSMALL;
3351 goto out1;
3352 }
3353
3354 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3355
3356 #if 0 /* notyet */
3357 /*
3358 * Don't do this. It causes local disk writes when just
3359 * reading the file and the overhead is deemed larger
3360 * than the benefit.
3361 */
3362 /*
3363 * Force modified metadata out to stable storage.
3364 */
3365 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3366 #endif
3367
3368 resp->status = NFS3_OK;
3369 resp->resok.cookieverf = 0;
3370 resp->resok.reply.eof = iseof ? TRUE : FALSE;
3371
3372 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3373
3374 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3375 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3376 READDIR3res *, resp);
3377
3378 VN_RELE(vp);
3379
3380 if (data != NULL)
3381 kmem_free(data, datasz);
3382
3383 return;
3384
3385 out:
3386 if (curthread->t_flag & T_WOULDBLOCK) {
3387 curthread->t_flag &= ~T_WOULDBLOCK;
3388 resp->status = NFS3ERR_JUKEBOX;
3389 } else
3390 resp->status = puterrno3(error);
3391 out1:
3392 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3393
3394 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3395 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3396 READDIR3res *, resp);
3397
3398 if (vp != NULL) {
3399 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3400 VN_RELE(vp);
3401 }
3402
3403 if (data != NULL)
3404 kmem_free(data, datasz);
3405 }
3406
3407 void *
3408 rfs3_readdir_getfh(READDIR3args *args)
3409 {
3410 return (&args->dir);
3411 }
3412
3413 void
3414 rfs3_readdir_free(READDIR3res *resp)
3415 {
3416 if (resp->status == NFS3_OK) {
3417 entry3 *entry, *nentry;
3418
3419 for (entry = resp->resok.reply.entries; entry != NULL;
3420 entry = nentry) {
3421 nentry = entry->nextentry;
3422 strfree(entry->name);
3423 kmem_free(entry, sizeof (entry3));
3424 }
3425 }
3426 }
3427
3428 #ifdef nextdp
3429 #undef nextdp
3430 #endif
3431 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3432
3433 /* ARGSUSED */
3434 void
3435 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3436 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3437 {
3438 int error;
3439 vnode_t *vp;
3440 struct vattr *vap;
3441 struct vattr va;
3442 struct iovec iov;
3443 struct uio uio;
3444 int iseof;
3445
3446 count3 dircount = args->dircount;
3447 count3 maxcount = args->maxcount;
3448 count3 dirsize = 0;
3449 count3 size; /* size of the READDIRPLUS3resok structure */
3450
3451 size_t datasz;
3452 char *data = NULL;
3453 dirent64_t *dp;
3454
3455 struct sockaddr *ca;
3456 entryplus3 **eptr;
3457 entryplus3 *entry;
3458
3459 vp = nfs3_fhtovp(&args->dir, exi);
3460
3461 DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3462 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3463 READDIRPLUS3args *, args);
3464
3465 if (vp == NULL) {
3466 resp->status = NFS3ERR_STALE;
3467 vap = NULL;
3468 goto out1;
3469 }
3470
3471 if (vp->v_type != VDIR) {
3472 resp->status = NFS3ERR_NOTDIR;
3473 vap = NULL;
3474 goto out1;
3475 }
3476
3477 if (is_system_labeled()) {
3478 bslabel_t *clabel = req->rq_label;
3479
3480 ASSERT(clabel != NULL);
3481 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3482 char *, "got client label from request(1)",
3483 struct svc_req *, req);
3484
3485 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3486 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3487 exi)) {
3488 resp->status = NFS3ERR_ACCES;
3489 vap = NULL;
3490 goto out1;
3491 }
3492 }
3493 }
3494
3495 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3496
3497 va.va_mask = AT_ALL;
3498 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3499
3500 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3501 if (error)
3502 goto out;
3503
3504 /*
3505 * Don't allow arbitrary counts for allocation
3506 */
3507 if (maxcount > rfs3_tsize(req))
3508 maxcount = rfs3_tsize(req);
3509
3510 /*
3511 * struct READDIRPLUS3resok:
3512 * dir_attributes: 1 + NFS3_SIZEOF_FATTR3
3513 * cookieverf: 2
3514 * entries (bool): 1
3515 * eof: 1
3516 */
3517 size = (1 + NFS3_SIZEOF_FATTR3 + 2 + 1 + 1) * BYTES_PER_XDR_UNIT;
3518
3519 if (size > maxcount) {
3520 resp->status = NFS3ERR_TOOSMALL;
3521 goto out1;
3522 }
3523
3524 /*
3525 * This is simplification. The dirent64_t size is not the same as the
3526 * size of XDR representation of entryplus3 (excluding attributes and
3527 * handle), but the sizes are similar so we'll assume they are same.
3528 * This assumption should not cause any harm. In worst case we will
3529 * need to issue VOP_READDIR() once more.
3530 */
3531
3532 datasz = MIN(dircount, maxcount);
3533
3534 /*
3535 * Make sure that there is room to read at least one entry
3536 * if any are available.
3537 */
3538 if (datasz < DIRENT64_RECLEN(MAXNAMELEN))
3539 datasz = DIRENT64_RECLEN(MAXNAMELEN);
3540
3541 data = kmem_alloc(datasz, KM_NOSLEEP);
3542 if (data == NULL) {
3543 /* The allocation failed; downsize and wait for it this time */
3544 if (datasz > MAXBSIZE)
3545 datasz = MAXBSIZE;
3546 data = kmem_alloc(datasz, KM_SLEEP);
3547 }
3548
3549 uio.uio_iov = &iov;
3550 uio.uio_iovcnt = 1;
3551 uio.uio_segflg = UIO_SYSSPACE;
3552 uio.uio_extflg = UIO_COPY_CACHED;
3553 uio.uio_loffset = (offset_t)args->cookie;
3554 uio.uio_resid = datasz;
3555
3556 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3557 eptr = &resp->resok.reply.entries;
3558 entry = NULL;
3559
3560 getmoredents:
3561 iov.iov_base = data;
3562 iov.iov_len = datasz;
3563
3564 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3565 if (error) {
3566 iseof = 0;
3567 goto done;
3568 }
3569
3570 if (iov.iov_len == datasz)
3571 goto done;
3572
3573 for (dp = (dirent64_t *)data; (char *)dp - data < datasz - iov.iov_len;
3574 dp = nextdp(dp)) {
3575 char *name;
3576 vnode_t *nvp;
3577 count3 edirsize;
3578 count3 esize;
3579
3580 if (dp->d_ino == 0) {
3581 if (entry != NULL)
3582 entry->cookie = (cookie3)dp->d_off;
3583 continue;
3584 }
3585
3586 name = nfscmd_convname(ca, exi, dp->d_name,
3587 NFSCMD_CONV_OUTBOUND, MAXPATHLEN + 1);
3588 if (name == NULL) {
3589 if (entry != NULL)
3590 entry->cookie = (cookie3)dp->d_off;
3591 continue;
3592 }
3593
3594 /*
3595 * struct entryplus3:
3596 * fileid: 2
3597 * name (length): 1
3598 * name (data): length (rounded up)
3599 * cookie: 2
3600 */
3601 edirsize = (2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3602 RNDUP(strlen(name));
3603
3604 /*
3605 * struct entryplus3:
3606 * attributes_follow: 1
3607 * handle_follows: 1
3608 * nextentry (bool): 1
3609 */
3610 esize = edirsize + (1 + 1 + 1) * BYTES_PER_XDR_UNIT;
3611
3612 /* If the new entry does not fit, we are done */
3613 if (edirsize > dircount - dirsize || esize > maxcount - size) {
3614 if (name != dp->d_name)
3615 kmem_free(name, MAXPATHLEN + 1);
3616 iseof = 0;
3617 error = 0;
3618 goto done;
3619 }
3620
3621 entry = kmem_alloc(sizeof (entryplus3), KM_SLEEP);
3622
3623 entry->fileid = (fileid3)dp->d_ino;
3624 entry->name = strdup(name);
3625 if (name != dp->d_name)
3626 kmem_free(name, MAXPATHLEN + 1);
3627 entry->cookie = (cookie3)dp->d_off;
3628
3629 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3630 NULL, NULL, NULL);
3631 if (error) {
3632 entry->name_attributes.attributes = FALSE;
3633 entry->name_handle.handle_follows = FALSE;
3634 } else {
3635 struct vattr nva;
3636 struct vattr *nvap;
3637
3638 nva.va_mask = AT_ALL;
3639 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL :
3640 &nva;
3641
3642 /* Lie about the object type for a referral */
3643 if (nvap != NULL && vn_is_nfs_reparse(nvp, cr))
3644 nvap->va_type = VLNK;
3645
3646 if (vn_ismntpt(nvp)) {
3647 entry->name_attributes.attributes = FALSE;
3648 entry->name_handle.handle_follows = FALSE;
3649 } else {
3650 vattr_to_post_op_attr(nvap,
3651 &entry->name_attributes);
3652
3653 error = makefh3(&entry->name_handle.handle, nvp,
3654 exi);
3655 if (!error)
3656 entry->name_handle.handle_follows =
3657 TRUE;
3658 else
3659 entry->name_handle.handle_follows =
3660 FALSE;
3661 }
3662
3663 VN_RELE(nvp);
3664 }
3665
3666 /*
3667 * struct entryplus3 (optionally):
3668 * attributes: NFS3_SIZEOF_FATTR3
3669 * handle length: 1
3670 * handle data: length (rounded up)
3671 */
3672 if (entry->name_attributes.attributes == TRUE)
3673 esize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3674 if (entry->name_handle.handle_follows == TRUE)
3675 esize += 1 * BYTES_PER_XDR_UNIT +
3676 RNDUP(entry->name_handle.handle.fh3_length);
3677
3678 /* If the new entry does not fit, discard it */
3679 if (esize > maxcount - size) {
3680 strfree(entry->name);
3681 kmem_free(entry, sizeof (entryplus3));
3682 iseof = 0;
3683 error = 0;
3684 goto done;
3685 }
3686
3687 dirsize += edirsize;
3688 size += esize;
3689
3690 /* Add the entry to the linked list */
3691 *eptr = entry;
3692 eptr = &entry->nextentry;
3693 }
3694
3695 if (!iseof && dirsize < dircount && size < maxcount) {
3696 uio.uio_resid = MIN(datasz, MAXBSIZE);
3697 goto getmoredents;
3698 }
3699
3700 done:
3701 *eptr = NULL;
3702
3703 va.va_mask = AT_ALL;
3704 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3705
3706 if (!iseof && resp->resok.reply.entries == NULL) {
3707 if (error)
3708 goto out;
3709 resp->status = NFS3ERR_TOOSMALL;
3710 goto out1;
3711 }
3712
3713 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3714
3715 #if 0 /* notyet */
3716 /*
3717 * Don't do this. It causes local disk writes when just
3718 * reading the file and the overhead is deemed larger
3719 * than the benefit.
3720 */
3721 /*
3722 * Force modified metadata out to stable storage.
3723 */
3724 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3725 #endif
3726
3727 resp->status = NFS3_OK;
3728 resp->resok.cookieverf = 0;
3729 resp->resok.reply.eof = iseof ? TRUE : FALSE;
3730
3731 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3732
3733 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3734 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3735 READDIRPLUS3res *, resp);
3736
3737 VN_RELE(vp);
3738
3739 if (data != NULL)
3740 kmem_free(data, datasz);
3741
3742 return;
3743
3744 out:
3745 if (curthread->t_flag & T_WOULDBLOCK) {
3746 curthread->t_flag &= ~T_WOULDBLOCK;
3747 resp->status = NFS3ERR_JUKEBOX;
3748 } else {
3749 resp->status = puterrno3(error);
3750 }
3751 out1:
3752 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3753
3754 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3755 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3756 READDIRPLUS3res *, resp);
3757
3758 if (vp != NULL) {
3759 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3760 VN_RELE(vp);
3761 }
3762
3763 if (data != NULL)
3764 kmem_free(data, datasz);
3765 }
3766
3767 void *
3768 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3769 {
3770 return (&args->dir);
3771 }
3772
3773 void
3774 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3775 {
3776 if (resp->status == NFS3_OK) {
3777 entryplus3 *entry, *nentry;
3778
3779 for (entry = resp->resok.reply.entries; entry != NULL;
3780 entry = nentry) {
3781 nentry = entry->nextentry;
3782 strfree(entry->name);
3783 kmem_free(entry, sizeof (entryplus3));
3784 }
3785 }
3786 }
3787
3788 /* ARGSUSED */
3789 void
3790 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3791 struct svc_req *req, cred_t *cr, bool_t ro)
3792 {
3793 int error;
3794 vnode_t *vp;
3795 struct vattr *vap;
3796 struct vattr va;
3797 struct statvfs64 sb;
3798
3799 vap = NULL;
3800
3801 vp = nfs3_fhtovp(&args->fsroot, exi);
3802
3803 DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3804 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3805 FSSTAT3args *, args);
3806
3807 if (vp == NULL) {
3808 error = ESTALE;
3809 goto out;
3810 }
3811
3812 if (is_system_labeled()) {
3813 bslabel_t *clabel = req->rq_label;
3814
3815 ASSERT(clabel != NULL);
3816 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3817 "got client label from request(1)", struct svc_req *, req);
3818
3819 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3820 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3821 exi)) {
3822 resp->status = NFS3ERR_ACCES;
3823 goto out1;
3824 }
3825 }
3826 }
3827
3828 error = VFS_STATVFS(vp->v_vfsp, &sb);
3829
3830 va.va_mask = AT_ALL;
3831 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3832
3833 if (error)
3834 goto out;
3835
3836 resp->status = NFS3_OK;
3837 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3838 if (sb.f_blocks != (fsblkcnt64_t)-1)
3839 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3840 else
3841 resp->resok.tbytes = (size3)sb.f_blocks;
3842 if (sb.f_bfree != (fsblkcnt64_t)-1)
3843 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3844 else
3845 resp->resok.fbytes = (size3)sb.f_bfree;
3846 if (sb.f_bavail != (fsblkcnt64_t)-1)
3847 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3848 else
3849 resp->resok.abytes = (size3)sb.f_bavail;
3850 resp->resok.tfiles = (size3)sb.f_files;
3851 resp->resok.ffiles = (size3)sb.f_ffree;
3852 resp->resok.afiles = (size3)sb.f_favail;
3853 resp->resok.invarsec = 0;
3854
3855 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3856 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3857 FSSTAT3res *, resp);
3858 VN_RELE(vp);
3859
3860 return;
3861
3862 out:
3863 if (curthread->t_flag & T_WOULDBLOCK) {
3864 curthread->t_flag &= ~T_WOULDBLOCK;
3865 resp->status = NFS3ERR_JUKEBOX;
3866 } else
3867 resp->status = puterrno3(error);
3868 out1:
3869 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3870 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3871 FSSTAT3res *, resp);
3872
3873 if (vp != NULL)
3874 VN_RELE(vp);
3875 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3876 }
3877
3878 void *
3879 rfs3_fsstat_getfh(FSSTAT3args *args)
3880 {
3881 return (&args->fsroot);
3882 }
3883
3884 /* ARGSUSED */
3885 void
3886 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3887 struct svc_req *req, cred_t *cr, bool_t ro)
3888 {
3889 vnode_t *vp;
3890 struct vattr *vap;
3891 struct vattr va;
3892 uint32_t xfer_size;
3893 ulong_t l = 0;
3894 int error;
3895
3896 vp = nfs3_fhtovp(&args->fsroot, exi);
3897
3898 DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3899 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3900 FSINFO3args *, args);
3901
3902 if (vp == NULL) {
3903 if (curthread->t_flag & T_WOULDBLOCK) {
3904 curthread->t_flag &= ~T_WOULDBLOCK;
3905 resp->status = NFS3ERR_JUKEBOX;
3906 } else
3907 resp->status = NFS3ERR_STALE;
3908 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3909 goto out;
3910 }
3911
3912 if (is_system_labeled()) {
3913 bslabel_t *clabel = req->rq_label;
3914
3915 ASSERT(clabel != NULL);
3916 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3917 "got client label from request(1)", struct svc_req *, req);
3918
3919 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3920 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3921 exi)) {
3922 resp->status = NFS3ERR_STALE;
3923 vattr_to_post_op_attr(NULL,
3924 &resp->resfail.obj_attributes);
3925 goto out;
3926 }
3927 }
3928 }
3929
3930 va.va_mask = AT_ALL;
3931 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3932
3933 resp->status = NFS3_OK;
3934 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3935 xfer_size = rfs3_tsize(req);
3936 resp->resok.rtmax = xfer_size;
3937 resp->resok.rtpref = xfer_size;
3938 resp->resok.rtmult = DEV_BSIZE;
3939 resp->resok.wtmax = xfer_size;
3940 resp->resok.wtpref = xfer_size;
3941 resp->resok.wtmult = DEV_BSIZE;
3942 resp->resok.dtpref = MAXBSIZE;
3943
3944 /*
3945 * Large file spec: want maxfilesize based on limit of
3946 * underlying filesystem. We can guess 2^31-1 if need be.
3947 */
3948 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3949 if (error) {
3950 resp->status = puterrno3(error);
3951 goto out;
3952 }
3953
3954 /*
3955 * If the underlying file system does not support _PC_FILESIZEBITS,
3956 * return a reasonable default. Note that error code on VOP_PATHCONF
3957 * will be 0, even if the underlying file system does not support
3958 * _PC_FILESIZEBITS.
3959 */
3960 if (l == (ulong_t)-1) {
3961 resp->resok.maxfilesize = MAXOFF32_T;
3962 } else {
3963 if (l >= (sizeof (uint64_t) * 8))
3964 resp->resok.maxfilesize = INT64_MAX;
3965 else
3966 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3967 }
3968
3969 resp->resok.time_delta.seconds = 0;
3970 resp->resok.time_delta.nseconds = 1000;
3971 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3972 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3973
3974 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3975 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3976 FSINFO3res *, resp);
3977
3978 VN_RELE(vp);
3979
3980 return;
3981
3982 out:
3983 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3984 cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
3985 FSINFO3res *, resp);
3986 if (vp != NULL)
3987 VN_RELE(vp);
3988 }
3989
3990 void *
3991 rfs3_fsinfo_getfh(FSINFO3args *args)
3992 {
3993 return (&args->fsroot);
3994 }
3995
3996 /* ARGSUSED */
3997 void
3998 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3999 struct svc_req *req, cred_t *cr, bool_t ro)
4000 {
4001 int error;
4002 vnode_t *vp;
4003 struct vattr *vap;
4004 struct vattr va;
4005 ulong_t val;
4006
4007 vap = NULL;
4008
4009 vp = nfs3_fhtovp(&args->object, exi);
4010
4011 DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4012 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4013 PATHCONF3args *, args);
4014
4015 if (vp == NULL) {
4016 error = ESTALE;
4017 goto out;
4018 }
4019
4020 if (is_system_labeled()) {
4021 bslabel_t *clabel = req->rq_label;
4022
4023 ASSERT(clabel != NULL);
4024 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4025 "got client label from request(1)", struct svc_req *, req);
4026
4027 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4028 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4029 exi)) {
4030 resp->status = NFS3ERR_ACCES;
4031 goto out1;
4032 }
4033 }
4034 }
4035
4036 va.va_mask = AT_ALL;
4037 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4038
4039 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4040 if (error)
4041 goto out;
4042 resp->resok.info.link_max = (uint32)val;
4043
4044 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4045 if (error)
4046 goto out;
4047 resp->resok.info.name_max = (uint32)val;
4048
4049 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4050 if (error)
4051 goto out;
4052 if (val == 1)
4053 resp->resok.info.no_trunc = TRUE;
4054 else
4055 resp->resok.info.no_trunc = FALSE;
4056
4057 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4058 if (error)
4059 goto out;
4060 if (val == 1)
4061 resp->resok.info.chown_restricted = TRUE;
4062 else
4063 resp->resok.info.chown_restricted = FALSE;
4064
4065 resp->status = NFS3_OK;
4066 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4067 resp->resok.info.case_insensitive = FALSE;
4068 resp->resok.info.case_preserving = TRUE;
4069 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4070 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4071 PATHCONF3res *, resp);
4072 VN_RELE(vp);
4073 return;
4074
4075 out:
4076 if (curthread->t_flag & T_WOULDBLOCK) {
4077 curthread->t_flag &= ~T_WOULDBLOCK;
4078 resp->status = NFS3ERR_JUKEBOX;
4079 } else
4080 resp->status = puterrno3(error);
4081 out1:
4082 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4083 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4084 PATHCONF3res *, resp);
4085 if (vp != NULL)
4086 VN_RELE(vp);
4087 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4088 }
4089
4090 void *
4091 rfs3_pathconf_getfh(PATHCONF3args *args)
4092 {
4093 return (&args->object);
4094 }
4095
4096 void
4097 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4098 struct svc_req *req, cred_t *cr, bool_t ro)
4099 {
4100 nfs3_srv_t *ns;
4101 int error;
4102 vnode_t *vp;
4103 struct vattr *bvap;
4104 struct vattr bva;
4105 struct vattr *avap;
4106 struct vattr ava;
4107
4108 bvap = NULL;
4109 avap = NULL;
4110
4111 vp = nfs3_fhtovp(&args->file, exi);
4112
4113 DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4114 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4115 COMMIT3args *, args);
4116
4117 if (vp == NULL) {
4118 error = ESTALE;
4119 goto out;
4120 }
4121
4122 ns = zone_getspecific(rfs3_zone_key, curzone);
4123 bva.va_mask = AT_ALL;
4124 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4125
4126 /*
4127 * If we can't get the attributes, then we can't do the
4128 * right access checking. So, we'll fail the request.
4129 */
4130 if (error)
4131 goto out;
4132
4133 bvap = &bva;
4134
4135 if (rdonly(ro, vp)) {
4136 resp->status = NFS3ERR_ROFS;
4137 goto out1;
4138 }
4139
4140 if (vp->v_type != VREG) {
4141 resp->status = NFS3ERR_INVAL;
4142 goto out1;
4143 }
4144
4145 if (is_system_labeled()) {
4146 bslabel_t *clabel = req->rq_label;
4147
4148 ASSERT(clabel != NULL);
4149 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4150 "got client label from request(1)", struct svc_req *, req);
4151
4152 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4153 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4154 exi)) {
4155 resp->status = NFS3ERR_ACCES;
4156 goto out1;
4157 }
4158 }
4159 }
4160
4161 if (crgetuid(cr) != bva.va_uid &&
4162 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4163 goto out;
4164
4165 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4166
4167 ava.va_mask = AT_ALL;
4168 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4169
4170 if (error)
4171 goto out;
4172
4173 resp->status = NFS3_OK;
4174 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4175 resp->resok.verf = ns->write3verf;
4176
4177 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4178 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4179 COMMIT3res *, resp);
4180
4181 VN_RELE(vp);
4182
4183 return;
4184
4185 out:
4186 if (curthread->t_flag & T_WOULDBLOCK) {
4187 curthread->t_flag &= ~T_WOULDBLOCK;
4188 resp->status = NFS3ERR_JUKEBOX;
4189 } else
4190 resp->status = puterrno3(error);
4191 out1:
4192 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4193 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4194 COMMIT3res *, resp);
4195
4196 if (vp != NULL)
4197 VN_RELE(vp);
4198 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4199 }
4200
4201 void *
4202 rfs3_commit_getfh(COMMIT3args *args)
4203 {
4204 return (&args->file);
4205 }
4206
4207 static int
4208 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4209 {
4210
4211 vap->va_mask = 0;
4212
4213 if (sap->mode.set_it) {
4214 vap->va_mode = (mode_t)sap->mode.mode;
4215 vap->va_mask |= AT_MODE;
4216 }
4217 if (sap->uid.set_it) {
4218 vap->va_uid = (uid_t)sap->uid.uid;
4219 vap->va_mask |= AT_UID;
4220 }
4221 if (sap->gid.set_it) {
4222 vap->va_gid = (gid_t)sap->gid.gid;
4223 vap->va_mask |= AT_GID;
4224 }
4225 if (sap->size.set_it) {
4226 if (sap->size.size > (size3)((u_longlong_t)-1))
4227 return (EINVAL);
4228 vap->va_size = sap->size.size;
4229 vap->va_mask |= AT_SIZE;
4230 }
4231 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4232 #ifndef _LP64
4233 /* check time validity */
4234 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4235 return (EOVERFLOW);
4236 #endif
4237 /*
4238 * nfs protocol defines times as unsigned so don't extend sign,
4239 * unless sysadmin set nfs_allow_preepoch_time.
4240 */
4241 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4242 sap->atime.atime.seconds);
4243 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4244 vap->va_mask |= AT_ATIME;
4245 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4246 gethrestime(&vap->va_atime);
4247 vap->va_mask |= AT_ATIME;
4248 }
4249 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4250 #ifndef _LP64
4251 /* check time validity */
4252 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4253 return (EOVERFLOW);
4254 #endif
4255 /*
4256 * nfs protocol defines times as unsigned so don't extend sign,
4257 * unless sysadmin set nfs_allow_preepoch_time.
4258 */
4259 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4260 sap->mtime.mtime.seconds);
4261 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4262 vap->va_mask |= AT_MTIME;
4263 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4264 gethrestime(&vap->va_mtime);
4265 vap->va_mask |= AT_MTIME;
4266 }
4267
4268 return (0);
4269 }
4270
4271 static const ftype3 vt_to_nf3[] = {
4272 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4273 };
4274
4275 static int
4276 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4277 {
4278
4279 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4280 /* Return error if time or size overflow */
4281 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4282 return (EOVERFLOW);
4283 }
4284 fap->type = vt_to_nf3[vap->va_type];
4285 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4286 fap->nlink = (uint32)vap->va_nlink;
4287 if (vap->va_uid == UID_NOBODY)
4288 fap->uid = (uid3)NFS_UID_NOBODY;
4289 else
4290 fap->uid = (uid3)vap->va_uid;
4291 if (vap->va_gid == GID_NOBODY)
4292 fap->gid = (gid3)NFS_GID_NOBODY;
4293 else
4294 fap->gid = (gid3)vap->va_gid;
4295 fap->size = (size3)vap->va_size;
4296 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4297 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4298 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4299 fap->fsid = (uint64)vap->va_fsid;
4300 fap->fileid = (fileid3)vap->va_nodeid;
4301 fap->atime.seconds = vap->va_atime.tv_sec;
4302 fap->atime.nseconds = vap->va_atime.tv_nsec;
4303 fap->mtime.seconds = vap->va_mtime.tv_sec;
4304 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4305 fap->ctime.seconds = vap->va_ctime.tv_sec;
4306 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4307 return (0);
4308 }
4309
4310 static int
4311 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4312 {
4313
4314 /* Return error if time or size overflow */
4315 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4316 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4317 NFS3_SIZE_OK(vap->va_size))) {
4318 return (EOVERFLOW);
4319 }
4320 wccap->size = (size3)vap->va_size;
4321 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4322 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4323 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4324 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4325 return (0);
4326 }
4327
4328 static void
4329 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4330 {
4331
4332 /* don't return attrs if time overflow */
4333 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4334 poap->attributes = TRUE;
4335 } else
4336 poap->attributes = FALSE;
4337 }
4338
4339 void
4340 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4341 {
4342
4343 /* don't return attrs if time overflow */
4344 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4345 poap->attributes = TRUE;
4346 } else
4347 poap->attributes = FALSE;
4348 }
4349
4350 static void
4351 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4352 {
4353 vattr_to_pre_op_attr(bvap, &wccp->before);
4354 vattr_to_post_op_attr(avap, &wccp->after);
4355 }
4356
4357 static int
4358 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4359 {
4360 struct clist *wcl;
4361 int wlist_len;
4362 count3 count = rok->count;
4363
4364 wcl = args->wlist;
4365 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4366 return (FALSE);
4367
4368 wcl = args->wlist;
4369 rok->wlist_len = wlist_len;
4370 rok->wlist = wcl;
4371 return (TRUE);
4372 }
4373
4374 /* ARGSUSED */
4375 static void *
4376 rfs3_zone_init(zoneid_t zoneid)
4377 {
4378 nfs3_srv_t *ns;
4379 struct rfs3_verf_overlay {
4380 uint_t id; /* a "unique" identifier */
4381 int ts; /* a unique timestamp */
4382 } *verfp;
4383 timestruc_t now;
4384
4385 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4386
4387 /*
4388 * The following algorithm attempts to find a unique verifier
4389 * to be used as the write verifier returned from the server
4390 * to the client. It is important that this verifier change
4391 * whenever the server reboots. Of secondary importance, it
4392 * is important for the verifier to be unique between two
4393 * different servers.
4394 *
4395 * Thus, an attempt is made to use the system hostid and the
4396 * current time in seconds when the nfssrv kernel module is
4397 * loaded. It is assumed that an NFS server will not be able
4398 * to boot and then to reboot in less than a second. If the
4399 * hostid has not been set, then the current high resolution
4400 * time is used. This will ensure different verifiers each
4401 * time the server reboots and minimize the chances that two
4402 * different servers will have the same verifier.
4403 */
4404
4405 #ifndef lint
4406 /*
4407 * We ASSERT that this constant logic expression is
4408 * always true because in the past, it wasn't.
4409 */
4410 ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4411 #endif
4412
4413 gethrestime(&now);
4414 verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4415 verfp->ts = (int)now.tv_sec;
4416 verfp->id = zone_get_hostid(NULL);
4417
4418 if (verfp->id == 0)
4419 verfp->id = (uint_t)now.tv_nsec;
4420
4421 return (ns);
4422 }
4423
4424 /* ARGSUSED */
4425 static void
4426 rfs3_zone_fini(zoneid_t zoneid, void *data)
4427 {
4428 nfs3_srv_t *ns = data;
4429
4430 kmem_free(ns, sizeof (*ns));
4431 }
4432
4433 void
4434 rfs3_srvrinit(void)
4435 {
4436 nfs3_srv_caller_id = fs_new_caller_id();
4437 zone_key_create(&rfs3_zone_key, rfs3_zone_init, NULL, rfs3_zone_fini);
4438 }
4439
4440 void
4441 rfs3_srvrfini(void)
4442 {
4443 /* Nothing to do */
4444 }