3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
26 */
27
28 /*
29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 * All Rights Reserved
31 */
32
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/cred.h>
37 #include <sys/buf.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/systeminfo.h>
50 #include <sys/flock.h>
51 #include <sys/pathname.h>
52 #include <sys/nbmlock.h>
53 #include <sys/share.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/fem.h>
57 #include <sys/sdt.h>
58 #include <sys/ddi.h>
59 #include <sys/zone.h>
60
61 #include <fs/fs_reparse.h>
62
63 #include <rpc/types.h>
64 #include <rpc/auth.h>
65 #include <rpc/rpcsec_gss.h>
66 #include <rpc/svc.h>
67
68 #include <nfs/nfs.h>
69 #include <nfs/export.h>
70 #include <nfs/nfs_cmd.h>
71 #include <nfs/lm.h>
72 #include <nfs/nfs4.h>
73
74 #include <sys/strsubr.h>
75 #include <sys/strsun.h>
76
77 #include <inet/common.h>
78 #include <inet/ip.h>
79 #include <inet/ip6.h>
80
81 #include <sys/tsol/label.h>
82 #include <sys/tsol/tndb.h>
83
84 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
85 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
86 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
87 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
88 extern struct svc_ops rdma_svc_ops;
89 extern int nfs_loaned_buffers;
90 /* End of Tunables */
91
92 static int rdma_setup_read_data4(READ4args *, READ4res *);
130 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
131 #define RFS4_MINLEN_RDDIR_BUF \
132 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
133
134 /*
135 * It would be better to pad to 4 bytes since that's what XDR would do,
136 * but the dirents UFS gives us are already padded to 8, so just take
137 * what we're given. Dircount is only a hint anyway. Currently the
138 * solaris kernel is ASCII only, so there's no point in calling the
139 * UTF8 functions.
140 *
141 * dirent64: named padded to provide 8 byte struct alignment
142 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
143 *
144 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
145 *
146 */
147 #define DIRENT64_TO_DIRCOUNT(dp) \
148 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
149
150 time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */
151
152 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
153
154 u_longlong_t nfs4_srv_caller_id;
155 uint_t nfs4_srv_vkey = 0;
156
157 verifier4 Write4verf;
158 verifier4 Readdir4verf;
159
160 void rfs4_init_compound_state(struct compound_state *);
161
162 static void nullfree(caddr_t);
163 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
164 struct compound_state *);
165 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 struct compound_state *);
167 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 struct compound_state *);
169 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 struct compound_state *);
171 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 struct compound_state *);
173 static void rfs4_op_create_free(nfs_resop4 *resop);
174 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
175 struct svc_req *, struct compound_state *);
176 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
177 struct svc_req *, struct compound_state *);
178 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 struct compound_state *);
228 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 struct compound_state *);
230 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
241 struct svc_req *, struct compound_state *);
242 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
243 struct svc_req *req, struct compound_state *);
244 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
245 struct compound_state *);
246 static void rfs4_op_secinfo_free(nfs_resop4 *);
247
248 static nfsstat4 check_open_access(uint32_t,
249 struct compound_state *, struct svc_req *);
250 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
251 void rfs4_ss_clid(rfs4_client_t *);
252
253 /*
254 * translation table for attrs
255 */
256 struct nfs4_ntov_table {
257 union nfs4_attr_u *na;
258 uint8_t amap[NFS4_MAXNUM_ATTRS];
259 int attrcnt;
260 bool_t vfsstat;
261 };
262
263 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
264 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
265 struct nfs4_svgetit_arg *sargp);
266
267 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
268 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
269 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
270
271 fem_t *deleg_rdops;
272 fem_t *deleg_wrops;
273
274 rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */
275 kmutex_t rfs4_servinst_lock; /* protects linked list */
276 int rfs4_seen_first_compound; /* set first time we see one */
277
278 /*
279 * NFS4 op dispatch table
280 */
281
282 struct rfsv4disp {
283 void (*dis_proc)(); /* proc to call */
284 void (*dis_resfree)(); /* frees space allocated by proc */
285 int dis_flags; /* RPC_IDEMPOTENT, etc... */
286 };
287
288 static struct rfsv4disp rfsv4disptab[] = {
289 /*
290 * NFS VERSION 4
291 */
292
293 /* RFS_NULL = 0 */
294 {rfs4_op_illegal, nullfree, 0},
295
296 /* UNUSED = 1 */
297 {rfs4_op_illegal, nullfree, 0},
449 "rfs4_op_putrootfh",
450 "rfs4_op_read",
451 "rfs4_op_readdir",
452 "rfs4_op_readlink",
453 "rfs4_op_remove",
454 "rfs4_op_rename",
455 "rfs4_op_renew",
456 "rfs4_op_restorefh",
457 "rfs4_op_savefh",
458 "rfs4_op_secinfo",
459 "rfs4_op_setattr",
460 "rfs4_op_setclientid",
461 "rfs4_op_setclient_confirm",
462 "rfs4_op_verify",
463 "rfs4_op_write",
464 "rfs4_op_release_lockowner",
465 "rfs4_op_illegal"
466 };
467 #endif
468
469 void rfs4_ss_chkclid(rfs4_client_t *);
470
471 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
472
473 extern void rfs4_free_fs_locations4(fs_locations4 *);
474
475 #ifdef nextdp
476 #undef nextdp
477 #endif
478 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
479
480 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
481 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
482 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
483 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
484 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
485 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
486 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
487 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
488 NULL, NULL
489 };
490 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
491 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
492 VOPNAME_READ, { .femop_read = deleg_wr_read },
493 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
494 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
495 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
496 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
497 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
498 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
499 NULL, NULL
500 };
501
502 int
503 rfs4_srvrinit(void)
504 {
505 timespec32_t verf;
506 int error;
507 extern void rfs4_attr_init();
508 extern krwlock_t rfs4_deleg_policy_lock;
509
510 /*
511 * The following algorithm attempts to find a unique verifier
512 * to be used as the write verifier returned from the server
513 * to the client. It is important that this verifier change
514 * whenever the server reboots. Of secondary importance, it
515 * is important for the verifier to be unique between two
516 * different servers.
517 *
518 * Thus, an attempt is made to use the system hostid and the
519 * current time in seconds when the nfssrv kernel module is
520 * loaded. It is assumed that an NFS server will not be able
521 * to boot and then to reboot in less than a second. If the
522 * hostid has not been set, then the current high resolution
523 * time is used. This will ensure different verifiers each
524 * time the server reboots and minimize the chances that two
525 * different servers will have the same verifier.
526 * XXX - this is broken on LP64 kernels.
527 */
528 verf.tv_sec = (time_t)zone_get_hostid(NULL);
529 if (verf.tv_sec != 0) {
530 verf.tv_nsec = gethrestime_sec();
531 } else {
532 timespec_t tverf;
533
534 gethrestime(&tverf);
535 verf.tv_sec = (time_t)tverf.tv_sec;
536 verf.tv_nsec = tverf.tv_nsec;
537 }
538
539 Write4verf = *(uint64_t *)&verf;
540
541 rfs4_attr_init();
542 mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
543
544 /* Used to manage create/destroy of server state */
545 mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
546
547 /* Used to manage access to server instance linked list */
548 mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
549
550 /* Used to manage access to rfs4_deleg_policy */
551 rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
552
553 error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
554 if (error != 0) {
555 rfs4_disable_delegation();
556 } else {
557 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
558 &deleg_wrops);
559 if (error != 0) {
560 rfs4_disable_delegation();
561 fem_free(deleg_rdops);
562 }
563 }
564
565 nfs4_srv_caller_id = fs_new_caller_id();
566
567 lockt_sysid = lm_alloc_sysidt();
568
569 vsd_create(&nfs4_srv_vkey, NULL);
570
571 return (0);
572 }
573
574 void
575 rfs4_srvrfini(void)
576 {
577 extern krwlock_t rfs4_deleg_policy_lock;
578
579 if (lockt_sysid != LM_NOSYSID) {
580 lm_free_sysidt(lockt_sysid);
581 lockt_sysid = LM_NOSYSID;
582 }
583
584 mutex_destroy(&rfs4_deleg_lock);
585 mutex_destroy(&rfs4_state_lock);
586 rw_destroy(&rfs4_deleg_policy_lock);
587
588 fem_free(deleg_rdops);
589 fem_free(deleg_wrops);
590 }
591
592 void
593 rfs4_init_compound_state(struct compound_state *cs)
594 {
595 bzero(cs, sizeof (*cs));
596 cs->cont = TRUE;
597 cs->access = CS_ACCESS_DENIED;
598 cs->deleg = FALSE;
599 cs->mandlock = FALSE;
600 cs->fh.nfs_fh4_val = cs->fhbuf;
601 }
602
603 void
604 rfs4_grace_start(rfs4_servinst_t *sip)
605 {
606 rw_enter(&sip->rwlock, RW_WRITER);
607 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
608 sip->grace_period = rfs4_grace_period;
609 rw_exit(&sip->rwlock);
610 }
611
612 /*
635
636 rw_enter(&sip->rwlock, RW_READER);
637 grace_expiry = sip->start_time + sip->grace_period;
638 rw_exit(&sip->rwlock);
639
640 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
641 }
642
643 int
644 rfs4_clnt_in_grace(rfs4_client_t *cp)
645 {
646 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
647
648 return (rfs4_servinst_in_grace(cp->rc_server_instance));
649 }
650
651 /*
652 * reset all currently active grace periods
653 */
654 void
655 rfs4_grace_reset_all(void)
656 {
657 rfs4_servinst_t *sip;
658
659 mutex_enter(&rfs4_servinst_lock);
660 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
661 if (rfs4_servinst_in_grace(sip))
662 rfs4_grace_start(sip);
663 mutex_exit(&rfs4_servinst_lock);
664 }
665
666 /*
667 * start any new instances' grace periods
668 */
669 void
670 rfs4_grace_start_new(void)
671 {
672 rfs4_servinst_t *sip;
673
674 mutex_enter(&rfs4_servinst_lock);
675 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
676 if (rfs4_servinst_grace_new(sip))
677 rfs4_grace_start(sip);
678 mutex_exit(&rfs4_servinst_lock);
679 }
680
681 static rfs4_dss_path_t *
682 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
683 {
684 size_t len;
685 rfs4_dss_path_t *dss_path;
686
687 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
688
689 /*
690 * Take a copy of the string, since the original may be overwritten.
691 * Sadly, no strdup() in the kernel.
692 */
693 /* allow for NUL */
694 len = strlen(path) + 1;
695 dss_path->path = kmem_alloc(len, KM_SLEEP);
696 (void) strlcpy(dss_path->path, path, len);
697
698 /* associate with servinst */
699 dss_path->sip = sip;
700 dss_path->index = index;
701
702 /*
703 * Add to list of served paths.
704 * No locking required, as we're only ever called at startup.
705 */
706 if (rfs4_dss_pathlist == NULL) {
707 /* this is the first dss_path_t */
708
709 /* needed for insque/remque */
710 dss_path->next = dss_path->prev = dss_path;
711
712 rfs4_dss_pathlist = dss_path;
713 } else {
714 insque(dss_path, rfs4_dss_pathlist);
715 }
716
717 return (dss_path);
718 }
719
720 /*
721 * Create a new server instance, and make it the currently active instance.
722 * Note that starting the grace period too early will reduce the clients'
723 * recovery window.
724 */
725 void
726 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
727 {
728 unsigned i;
729 rfs4_servinst_t *sip;
730 rfs4_oldstate_t *oldstate;
731
732 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
733 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
734
735 sip->start_time = (time_t)0;
736 sip->grace_period = (time_t)0;
737 sip->next = NULL;
738 sip->prev = NULL;
739
740 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
741 /*
742 * This initial dummy entry is required to setup for insque/remque.
743 * It must be skipped over whenever the list is traversed.
744 */
745 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
746 /* insque/remque require initial list entry to be self-terminated */
747 oldstate->next = oldstate;
748 oldstate->prev = oldstate;
749 sip->oldstate = oldstate;
750
751
752 sip->dss_npaths = dss_npaths;
753 sip->dss_paths = kmem_alloc(dss_npaths *
754 sizeof (rfs4_dss_path_t *), KM_SLEEP);
755
756 for (i = 0; i < dss_npaths; i++) {
757 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
758 }
759
760 mutex_enter(&rfs4_servinst_lock);
761 if (rfs4_cur_servinst != NULL) {
762 /* add to linked list */
763 sip->prev = rfs4_cur_servinst;
764 rfs4_cur_servinst->next = sip;
765 }
766 if (start_grace)
767 rfs4_grace_start(sip);
768 /* make the new instance "current" */
769 rfs4_cur_servinst = sip;
770
771 mutex_exit(&rfs4_servinst_lock);
772 }
773
774 /*
775 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
776 * all instances directly.
777 */
778 void
779 rfs4_servinst_destroy_all(void)
780 {
781 rfs4_servinst_t *sip, *prev, *current;
782 #ifdef DEBUG
783 int n = 0;
784 #endif
785
786 mutex_enter(&rfs4_servinst_lock);
787 ASSERT(rfs4_cur_servinst != NULL);
788 current = rfs4_cur_servinst;
789 rfs4_cur_servinst = NULL;
790 for (sip = current; sip != NULL; sip = prev) {
791 prev = sip->prev;
792 rw_destroy(&sip->rwlock);
793 if (sip->oldstate)
794 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
795 if (sip->dss_paths)
796 kmem_free(sip->dss_paths,
797 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
798 kmem_free(sip, sizeof (rfs4_servinst_t));
799 #ifdef DEBUG
800 n++;
801 #endif
802 }
803 mutex_exit(&rfs4_servinst_lock);
804 }
805
806 /*
807 * Assign the current server instance to a client_t.
808 * Should be called with cp->rc_dbe held.
809 */
810 void
811 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
812 {
813 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
814
815 /*
816 * The lock ensures that if the current instance is in the process
817 * of changing, we will see the new one.
818 */
819 mutex_enter(&rfs4_servinst_lock);
820 cp->rc_server_instance = sip;
821 mutex_exit(&rfs4_servinst_lock);
822 }
823
824 rfs4_servinst_t *
825 rfs4_servinst(rfs4_client_t *cp)
826 {
827 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
828
829 return (cp->rc_server_instance);
830 }
831
832 /* ARGSUSED */
833 static void
834 nullfree(caddr_t resop)
835 {
836 }
837
838 /*
839 * This is a fall-through for invalid or not implemented (yet) ops
840 */
841 /* ARGSUSED */
854 {
855 int i;
856
857 for (i = 0; i < count; i++) {
858 if (nfsnum == flavor_list[i])
859 return (TRUE);
860 }
861 return (FALSE);
862 }
863
864 /*
865 * Used by rfs4_op_secinfo to get the security information from the
866 * export structure associated with the component.
867 */
868 /* ARGSUSED */
869 static nfsstat4
870 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
871 {
872 int error, different_export = 0;
873 vnode_t *dvp, *vp;
874 struct exportinfo *exi = NULL;
875 fid_t fid;
876 uint_t count, i;
877 secinfo4 *resok_val;
878 struct secinfo *secp;
879 seconfig_t *si;
880 bool_t did_traverse = FALSE;
881 int dotdot, walk;
882
883 dvp = cs->vp;
884 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
885
886 /*
887 * If dotdotting, then need to check whether it's above the
888 * root of a filesystem, or above an export point.
889 */
890 if (dotdot) {
891
892 /*
893 * If dotdotting at the root of a filesystem, then
894 * need to traverse back to the mounted-on filesystem
895 * and do the dotdot lookup there.
896 */
897 if (cs->vp->v_flag & VROOT) {
898
899 /*
900 * If at the system root, then can
901 * go up no further.
902 */
903 if (VN_CMP(dvp, rootdir))
904 return (puterrno4(ENOENT));
905
906 /*
907 * Traverse back to the mounted-on filesystem
908 */
909 dvp = untraverse(cs->vp);
910
911 /*
912 * Set the different_export flag so we remember
913 * to pick up a new exportinfo entry for
914 * this new filesystem.
915 */
916 different_export = 1;
917 } else {
918
919 /*
920 * If dotdotting above an export point then set
921 * the different_export to get new export info.
922 */
923 different_export = nfs_exported(cs->exi, cs->vp);
924 }
925 }
926
927 /*
928 * Get the vnode for the component "nm".
929 */
930 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
931 NULL, NULL, NULL);
932 if (error)
933 return (puterrno4(error));
934
935 /*
936 * If the vnode is in a pseudo filesystem, or if the security flavor
937 * used in the request is valid but not an explicitly shared flavor,
938 * or the access bit indicates that this is a limited access,
939 * check whether this vnode is visible.
940 */
941 if (!different_export &&
942 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
943 cs->access & CS_ACCESS_LIMITED)) {
944 if (! nfs_visible(cs->exi, vp, &different_export)) {
945 VN_RELE(vp);
946 return (puterrno4(ENOENT));
947 }
948 }
949
950 /*
951 * If it's a mountpoint, then traverse it.
952 */
953 if (vn_ismntpt(vp)) {
954 if ((error = traverse(&vp)) != 0) {
955 VN_RELE(vp);
956 return (puterrno4(error));
957 }
958 /* remember that we had to traverse mountpoint */
959 did_traverse = TRUE;
960 different_export = 1;
961 } else if (vp->v_vfsp != dvp->v_vfsp) {
962 /*
963 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
964 * then vp is probably an LOFS object. We don't need the
966 * a server fs boundary and need to call checkexport4.
967 * (LOFS lookup hides server fs mountpoints, and actually calls
968 * traverse)
969 */
970 different_export = 1;
971 }
972
973 /*
974 * Get the export information for it.
975 */
976 if (different_export) {
977
978 bzero(&fid, sizeof (fid));
979 fid.fid_len = MAXFIDSZ;
980 error = vop_fid_pseudo(vp, &fid);
981 if (error) {
982 VN_RELE(vp);
983 return (puterrno4(error));
984 }
985
986 if (dotdot)
987 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
988 else
989 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
990
991 if (exi == NULL) {
992 if (did_traverse == TRUE) {
993 /*
994 * If this vnode is a mounted-on vnode,
995 * but the mounted-on file system is not
996 * exported, send back the secinfo for
997 * the exported node that the mounted-on
998 * vnode lives in.
999 */
1000 exi = cs->exi;
1001 } else {
1002 VN_RELE(vp);
1003 return (puterrno4(EACCES));
1004 }
1005 }
1006 } else {
1007 exi = cs->exi;
1008 }
1009 ASSERT(exi != NULL);
1010
1011
1012 /*
1013 * Create the secinfo result based on the security information
1014 * from the exportinfo structure (exi).
1015 *
1016 * Return all flavors for a pseudo node.
1017 * For a real export node, return the flavor that the client
1018 * has access with.
1019 */
1020 ASSERT(RW_LOCK_HELD(&exported_lock));
1021 if (PSEUDO(exi)) {
1022 count = exi->exi_export.ex_seccnt; /* total sec count */
1023 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1024 secp = exi->exi_export.ex_secinfo;
1025
1026 for (i = 0; i < count; i++) {
1027 si = &secp[i].s_secinfo;
1028 resok_val[i].flavor = si->sc_rpcnum;
1029 if (resok_val[i].flavor == RPCSEC_GSS) {
1030 rpcsec_gss_info *info;
1031
1032 info = &resok_val[i].flavor_info;
1033 info->qop = si->sc_qop;
1034 info->service = (rpc_gss_svc_t)si->sc_service;
1035
1036 /* get oid opaque data */
1037 info->oid.sec_oid4_len =
1038 si->sc_gss_mech_type->length;
1039 info->oid.sec_oid4_val = kmem_alloc(
1040 si->sc_gss_mech_type->length, KM_SLEEP);
1363 if (is_system_labeled() && !admin_low_client)
1364 label_rele(tslabel);
1365
1366 *cs->statusp = resp->status = NFS4_OK;
1367 out:
1368 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1369 ACCESS4res *, resp);
1370 }
1371
1372 /* ARGSUSED */
1373 static void
1374 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1375 struct compound_state *cs)
1376 {
1377 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1378 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1379 int error;
1380 vnode_t *vp = cs->vp;
1381 cred_t *cr = cs->cr;
1382 vattr_t va;
1383
1384 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1385 COMMIT4args *, args);
1386
1387 if (vp == NULL) {
1388 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1389 goto out;
1390 }
1391 if (cs->access == CS_ACCESS_DENIED) {
1392 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1393 goto out;
1394 }
1395
1396 if (args->offset + args->count < args->offset) {
1397 *cs->statusp = resp->status = NFS4ERR_INVAL;
1398 goto out;
1399 }
1400
1401 va.va_mask = AT_UID;
1402 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1419 resp->status = NFS4ERR_ISDIR;
1420 else
1421 resp->status = NFS4ERR_INVAL;
1422 *cs->statusp = resp->status;
1423 goto out;
1424 }
1425
1426 if (crgetuid(cr) != va.va_uid &&
1427 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1428 *cs->statusp = resp->status = puterrno4(error);
1429 goto out;
1430 }
1431
1432 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1433
1434 if (error) {
1435 *cs->statusp = resp->status = puterrno4(error);
1436 goto out;
1437 }
1438
1439 *cs->statusp = resp->status = NFS4_OK;
1440 resp->writeverf = Write4verf;
1441 out:
1442 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1443 COMMIT4res *, resp);
1444 }
1445
1446 /*
1447 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1448 * was completed. It does the nfsv4 create for special files.
1449 */
1450 /* ARGSUSED */
1451 static vnode_t *
1452 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1453 struct compound_state *cs, vattr_t *vap, char *nm)
1454 {
1455 int error;
1456 cred_t *cr = cs->cr;
1457 vnode_t *dvp = cs->vp;
1458 vnode_t *vp = NULL;
1459 int mode;
1460 enum vcexcl excl;
2616 fid_t fid;
2617 int attrdir, dotdot, walk;
2618 bool_t is_newvp = FALSE;
2619
2620 if (cs->vp->v_flag & V_XATTRDIR) {
2621 attrdir = 1;
2622 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2623 } else {
2624 attrdir = 0;
2625 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2626 }
2627
2628 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629
2630 /*
2631 * If dotdotting, then need to check whether it's
2632 * above the root of a filesystem, or above an
2633 * export point.
2634 */
2635 if (dotdot) {
2636
2637 /*
2638 * If dotdotting at the root of a filesystem, then
2639 * need to traverse back to the mounted-on filesystem
2640 * and do the dotdot lookup there.
2641 */
2642 if (cs->vp->v_flag & VROOT) {
2643
2644 /*
2645 * If at the system root, then can
2646 * go up no further.
2647 */
2648 if (VN_CMP(cs->vp, rootdir))
2649 return (puterrno4(ENOENT));
2650
2651 /*
2652 * Traverse back to the mounted-on filesystem
2653 */
2654 cs->vp = untraverse(cs->vp);
2655
2656 /*
2657 * Set the different_export flag so we remember
2658 * to pick up a new exportinfo entry for
2659 * this new filesystem.
2660 */
2661 different_export = 1;
2662 } else {
2663
2664 /*
2665 * If dotdotting above an export point then set
2666 * the different_export to get new export info.
2667 */
2668 different_export = nfs_exported(cs->exi, cs->vp);
2669 }
2670 }
2671
2672 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2673 NULL, NULL, NULL);
2674 if (error)
3392 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3393
3394 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3395 freeb(resp->mblk);
3396 resp->mblk = NULL;
3397 resp->data_len = 0;
3398 }
3399 }
3400
3401
3402 /* ARGSUSED */
3403 static void
3404 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3405 struct compound_state *cs)
3406 {
3407 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3408 int error;
3409 vnode_t *vp;
3410 struct exportinfo *exi, *sav_exi;
3411 nfs_fh4_fmt_t *fh_fmtp;
3412
3413 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3414
3415 if (cs->vp) {
3416 VN_RELE(cs->vp);
3417 cs->vp = NULL;
3418 }
3419
3420 if (cs->cr)
3421 crfree(cs->cr);
3422
3423 cs->cr = crdup(cs->basecr);
3424
3425 vp = exi_public->exi_vp;
3426 if (vp == NULL) {
3427 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3428 goto out;
3429 }
3430
3431 error = makefh4(&cs->fh, vp, exi_public);
3432 if (error != 0) {
3433 *cs->statusp = resp->status = puterrno4(error);
3434 goto out;
3435 }
3436 sav_exi = cs->exi;
3437 if (exi_public == exi_root) {
3438 /*
3439 * No filesystem is actually shared public, so we default
3440 * to exi_root. In this case, we must check whether root
3441 * is exported.
3442 */
3443 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3444
3445 /*
3446 * if root filesystem is exported, the exportinfo struct that we
3447 * should use is what checkexport4 returns, because root_exi is
3448 * actually a mostly empty struct.
3449 */
3450 exi = checkexport4(&fh_fmtp->fh4_fsid,
3451 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3452 cs->exi = ((exi != NULL) ? exi : exi_public);
3453 } else {
3454 /*
3455 * it's a properly shared filesystem
3456 */
3457 cs->exi = exi_public;
3458 }
3459
3460 if (is_system_labeled()) {
3461 bslabel_t *clabel;
3462
3463 ASSERT(req->rq_label != NULL);
3464 clabel = req->rq_label;
3465 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3466 "got client label from request(1)",
3467 struct svc_req *, req);
3468 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3469 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3470 cs->exi)) {
3471 *cs->statusp = resp->status =
3472 NFS4ERR_SERVERFAULT;
3473 goto out;
3474 }
3475 }
3476 }
3477
3579 struct exportinfo *exi, *sav_exi;
3580
3581 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3582
3583 if (cs->vp) {
3584 VN_RELE(cs->vp);
3585 cs->vp = NULL;
3586 }
3587
3588 if (cs->cr)
3589 crfree(cs->cr);
3590
3591 cs->cr = crdup(cs->basecr);
3592
3593 /*
3594 * Using rootdir, the system root vnode,
3595 * get its fid.
3596 */
3597 bzero(&fid, sizeof (fid));
3598 fid.fid_len = MAXFIDSZ;
3599 error = vop_fid_pseudo(rootdir, &fid);
3600 if (error != 0) {
3601 *cs->statusp = resp->status = puterrno4(error);
3602 goto out;
3603 }
3604
3605 /*
3606 * Then use the root fsid & fid it to find out if it's exported
3607 *
3608 * If the server root isn't exported directly, then
3609 * it should at least be a pseudo export based on
3610 * one or more exports further down in the server's
3611 * file tree.
3612 */
3613 exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3614 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3615 NFS4_DEBUG(rfs4_debug,
3616 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3617 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3618 goto out;
3619 }
3620
3621 /*
3622 * Now make a filehandle based on the root
3623 * export and root vnode.
3624 */
3625 error = makefh4(&cs->fh, rootdir, exi);
3626 if (error != 0) {
3627 *cs->statusp = resp->status = puterrno4(error);
3628 goto out;
3629 }
3630
3631 sav_exi = cs->exi;
3632 cs->exi = exi;
3633
3634 VN_HOLD(rootdir);
3635 cs->vp = rootdir;
3636
3637 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3638 VN_RELE(rootdir);
3639 cs->vp = NULL;
3640 cs->exi = sav_exi;
3641 goto out;
3642 }
3643
3644 *cs->statusp = resp->status = NFS4_OK;
3645 cs->deleg = FALSE;
3646 out:
3647 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3648 PUTROOTFH4res *, resp);
3649 }
3650
3651 /*
3652 * readlink: args: CURRENT_FH.
3653 * res: status. If success - CURRENT_FH unchanged, return linktext.
3654 */
3655
3656 /* ARGSUSED */
3657 static void
3658 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3704
3705 }
3706
3707 va.va_mask = AT_MODE;
3708 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3709 if (error) {
3710 *cs->statusp = resp->status = puterrno4(error);
3711 goto out;
3712 }
3713
3714 if (MANDLOCK(vp, va.va_mode)) {
3715 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3716 goto out;
3717 }
3718
3719 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3720
3721 if (is_referral) {
3722 char *s;
3723 size_t strsz;
3724
3725 /* Get an artificial symlink based on a referral */
3726 s = build_symlink(vp, cs->cr, &strsz);
3727 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3728 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3729 vnode_t *, vp, char *, s);
3730 if (s == NULL)
3731 error = EINVAL;
3732 else {
3733 error = 0;
3734 (void) strlcpy(data, s, MAXPATHLEN + 1);
3735 kmem_free(s, strsz);
3736 }
3737
3738 } else {
3739
3740 iov.iov_base = data;
3741 iov.iov_len = MAXPATHLEN;
3742 uio.uio_iov = &iov;
3743 uio.uio_iovcnt = 1;
3744 uio.uio_segflg = UIO_SYSSPACE;
3745 uio.uio_extflg = UIO_COPY_CACHED;
3746 uio.uio_loffset = 0;
3747 uio.uio_resid = MAXPATHLEN;
4154 }
4155 goto out;
4156 }
4157 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4158
4159 /* Actually do the REMOVE operation */
4160 if (vp->v_type == VDIR) {
4161 /*
4162 * Can't remove a directory that has a mounted-on filesystem.
4163 */
4164 if (vn_ismntpt(vp)) {
4165 error = EACCES;
4166 } else {
4167 /*
4168 * System V defines rmdir to return EEXIST,
4169 * not ENOTEMPTY, if the directory is not
4170 * empty. A System V NFS server needs to map
4171 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4172 * transmit over the wire.
4173 */
4174 if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4175 NULL, 0)) == EEXIST)
4176 error = ENOTEMPTY;
4177 }
4178 } else {
4179 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4180 fp != NULL) {
4181 struct vattr va;
4182 vnode_t *tvp;
4183
4184 rfs4_dbe_lock(fp->rf_dbe);
4185 tvp = fp->rf_vp;
4186 if (tvp)
4187 VN_HOLD(tvp);
4188 rfs4_dbe_unlock(fp->rf_dbe);
4189
4190 if (tvp) {
4191 /*
4192 * This is va_seq safe because we are not
4193 * manipulating dvp.
4194 */
4266 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4267 REMOVE4res *, resp);
4268 }
4269
4270 /*
4271 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4272 * oldname and newname.
4273 * res: status. If success - CURRENT_FH unchanged, return change_info
4274 * for both from and target directories.
4275 */
4276 /* ARGSUSED */
4277 static void
4278 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4279 struct compound_state *cs)
4280 {
4281 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4282 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4283 int error;
4284 vnode_t *odvp;
4285 vnode_t *ndvp;
4286 vnode_t *srcvp, *targvp;
4287 struct vattr obdva, oidva, oadva;
4288 struct vattr nbdva, nidva, nadva;
4289 char *onm, *nnm;
4290 uint_t olen, nlen;
4291 rfs4_file_t *fp, *sfp;
4292 int in_crit_src, in_crit_targ;
4293 int fp_rele_grant_hold, sfp_rele_grant_hold;
4294 bslabel_t *clabel;
4295 struct sockaddr *ca;
4296 char *converted_onm = NULL;
4297 char *converted_nnm = NULL;
4298 nfsstat4 status;
4299
4300 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4301 RENAME4args *, args);
4302
4303 fp = sfp = NULL;
4304 srcvp = targvp = NULL;
4305 in_crit_src = in_crit_targ = 0;
4306 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4307
4308 /* CURRENT_FH: target directory */
4309 ndvp = cs->vp;
4310 if (ndvp == NULL) {
4311 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4312 goto out;
4313 }
4314
4315 /* SAVED_FH: from directory */
4316 odvp = cs->saved_vp;
4317 if (odvp == NULL) {
4318 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4319 goto out;
4320 }
4321
4322 if (cs->access == CS_ACCESS_DENIED) {
4323 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4324 goto out;
4325 }
4326
4459 kmem_free(onm, olen);
4460 if (nnm != converted_nnm)
4461 kmem_free(converted_nnm, MAXPATHLEN + 1);
4462 kmem_free(nnm, nlen);
4463 goto out;
4464 }
4465
4466 sfp_rele_grant_hold = 1;
4467
4468 /* Does the destination exist and a file and have a delegation? */
4469 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4470 NULL, cs->cr)) {
4471 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4472 NULL)) {
4473 *cs->statusp = resp->status = NFS4ERR_DELAY;
4474 goto err_out;
4475 }
4476 }
4477 fp_rele_grant_hold = 1;
4478
4479
4480 /* Check for NBMAND lock on both source and target */
4481 if (nbl_need_check(srcvp)) {
4482 nbl_start_crit(srcvp, RW_READER);
4483 in_crit_src = 1;
4484 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4485 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4486 goto err_out;
4487 }
4488 }
4489
4490 if (targvp && nbl_need_check(targvp)) {
4491 nbl_start_crit(targvp, RW_READER);
4492 in_crit_targ = 1;
4493 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4494 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4495 goto err_out;
4496 }
4497 }
4498
4499 /* Get source "before" change value */
4500 obdva.va_mask = AT_CTIME|AT_SEQ;
4501 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4502 if (!error) {
4503 nbdva.va_mask = AT_CTIME|AT_SEQ;
4504 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4505 }
4506 if (error) {
4507 *cs->statusp = resp->status = puterrno4(error);
4508 goto err_out;
4509 }
4510
4511 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4512 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4513
4514 if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4515 cs->cr, NULL, 0)) == 0 && fp != NULL) {
4516 struct vattr va;
4517 vnode_t *tvp;
4518
4519 rfs4_dbe_lock(fp->rf_dbe);
4520 tvp = fp->rf_vp;
4521 if (tvp)
4522 VN_HOLD(tvp);
4523 rfs4_dbe_unlock(fp->rf_dbe);
4524
4525 if (tvp) {
4526 va.va_mask = AT_NLINK;
4527 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4528 va.va_nlink == 0) {
4529 /* The file is gone and so should the state */
4530 if (in_crit_targ) {
4531 nbl_end_crit(targvp);
4532 in_crit_targ = 0;
4533 }
4534 rfs4_close_all_state(fp);
4535 }
4536 VN_RELE(tvp);
4537 }
4538 }
4539 if (error == 0)
4540 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4541
4542 if (in_crit_src)
4543 nbl_end_crit(srcvp);
4544 if (srcvp)
4545 VN_RELE(srcvp);
4546 if (in_crit_targ)
4547 nbl_end_crit(targvp);
4548 if (targvp)
4549 VN_RELE(targvp);
4550
4551 if (sfp) {
4552 rfs4_clear_dont_grant(sfp);
4553 rfs4_file_rele(sfp);
4554 }
4555 if (fp) {
4556 rfs4_clear_dont_grant(fp);
4557 rfs4_file_rele(fp);
4558 }
4559
4560 if (converted_onm != onm)
4561 kmem_free(converted_onm, MAXPATHLEN + 1);
4562 kmem_free(onm, olen);
4563 if (converted_nnm != nnm)
4564 kmem_free(converted_nnm, MAXPATHLEN + 1);
4565 kmem_free(nnm, nlen);
4566
4567 /*
4568 * Get the initial "after" sequence number, if it fails, set to zero
4569 */
4570 oidva.va_mask = AT_SEQ;
5467 static void
5468 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5469 struct compound_state *cs)
5470 {
5471 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5472 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5473 int error;
5474 vnode_t *vp;
5475 struct vattr bva;
5476 u_offset_t rlimit;
5477 struct uio uio;
5478 struct iovec iov[MAX_IOVECS];
5479 struct iovec *iovp;
5480 int iovcnt;
5481 int ioflag;
5482 cred_t *savecred, *cr;
5483 bool_t *deleg = &cs->deleg;
5484 nfsstat4 stat;
5485 int in_crit = 0;
5486 caller_context_t ct;
5487
5488 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5489 WRITE4args *, args);
5490
5491 vp = cs->vp;
5492 if (vp == NULL) {
5493 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5494 goto out;
5495 }
5496 if (cs->access == CS_ACCESS_DENIED) {
5497 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5498 goto out;
5499 }
5500
5501 cr = cs->cr;
5502
5503 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5504 deleg, TRUE, &ct)) != NFS4_OK) {
5505 *cs->statusp = resp->status = stat;
5506 goto out;
5537 goto out;
5538 }
5539
5540 if (vp->v_type != VREG) {
5541 *cs->statusp = resp->status =
5542 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5543 goto out;
5544 }
5545
5546 if (crgetuid(cr) != bva.va_uid &&
5547 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5548 *cs->statusp = resp->status = puterrno4(error);
5549 goto out;
5550 }
5551
5552 if (MANDLOCK(vp, bva.va_mode)) {
5553 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5554 goto out;
5555 }
5556
5557 if (args->data_len == 0) {
5558 *cs->statusp = resp->status = NFS4_OK;
5559 resp->count = 0;
5560 resp->committed = args->stable;
5561 resp->writeverf = Write4verf;
5562 goto out;
5563 }
5564
5565 if (args->mblk != NULL) {
5566 mblk_t *m;
5567 uint_t bytes, round_len;
5568
5569 iovcnt = 0;
5570 bytes = 0;
5571 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5572 for (m = args->mblk;
5573 m != NULL && bytes < round_len;
5574 m = m->b_cont) {
5575 iovcnt++;
5576 bytes += MBLKL(m);
5577 }
5578 #ifdef DEBUG
5579 /* should have ended on an mblk boundary */
5580 if (bytes != round_len) {
5581 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5637 curthread->t_cred = cr;
5638 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5639 curthread->t_cred = savecred;
5640
5641 if (iovp != iov)
5642 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5643
5644 if (error) {
5645 *cs->statusp = resp->status = puterrno4(error);
5646 goto out;
5647 }
5648
5649 *cs->statusp = resp->status = NFS4_OK;
5650 resp->count = args->data_len - uio.uio_resid;
5651
5652 if (ioflag == 0)
5653 resp->committed = UNSTABLE4;
5654 else
5655 resp->committed = FILE_SYNC4;
5656
5657 resp->writeverf = Write4verf;
5658
5659 out:
5660 if (in_crit)
5661 nbl_end_crit(vp);
5662
5663 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5664 WRITE4res *, resp);
5665 }
5666
5667
5668 /* XXX put in a header file */
5669 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5670
5671 void
5672 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5673 struct svc_req *req, cred_t *cr, int *rv)
5674 {
5675 uint_t i;
5676 struct compound_state cs;
5677
5678 if (rv != NULL)
5679 *rv = 0;
5680 rfs4_init_compound_state(&cs);
5681 /*
5682 * Form a reply tag by copying over the reqeuest tag.
5683 */
5684 resp->tag.utf8string_val =
5685 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5686 resp->tag.utf8string_len = args->tag.utf8string_len;
5687 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5688 resp->tag.utf8string_len);
5689
5690 cs.statusp = &resp->status;
5691 cs.req = req;
5692 resp->array = NULL;
5693 resp->array_len = 0;
5694
5695 /*
5696 * XXX for now, minorversion should be zero
5697 */
5698 if (args->minorversion != NFS4_MINORVERSION) {
5699 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5700 &cs, COMPOUND4args *, args);
5701 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5702 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5703 &cs, COMPOUND4res *, resp);
5704 return;
5705 }
5706
5707 if (args->array_len == 0) {
5708 resp->status = NFS4_OK;
5714
5715 cr = crget();
5716 ASSERT(cr != NULL);
5717
5718 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5719 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5720 &cs, COMPOUND4args *, args);
5721 crfree(cr);
5722 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5723 &cs, COMPOUND4res *, resp);
5724 svcerr_badcred(req->rq_xprt);
5725 if (rv != NULL)
5726 *rv = 1;
5727 return;
5728 }
5729 resp->array_len = args->array_len;
5730 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5731 KM_SLEEP);
5732
5733 cs.basecr = cr;
5734
5735 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5736 COMPOUND4args *, args);
5737
5738 /*
5739 * For now, NFS4 compound processing must be protected by
5740 * exported_lock because it can access more than one exportinfo
5741 * per compound and share/unshare can now change multiple
5742 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5743 * per proc (excluding public exinfo), and exi_count design
5744 * is sufficient to protect concurrent execution of NFS2/3
5745 * ops along with unexport. This lock will be removed as
5746 * part of the NFSv4 phase 2 namespace redesign work.
5747 */
5748 rw_enter(&exported_lock, RW_READER);
5749
5750 /*
5751 * If this is the first compound we've seen, we need to start all
5752 * new instances' grace periods.
5753 */
5754 if (rfs4_seen_first_compound == 0) {
5755 rfs4_grace_start_new();
5756 /*
5757 * This must be set after rfs4_grace_start_new(), otherwise
5758 * another thread could proceed past here before the former
5759 * is finished.
5760 */
5761 rfs4_seen_first_compound = 1;
5762 }
5763
5764 for (i = 0; i < args->array_len && cs.cont; i++) {
5765 nfs_argop4 *argop;
5766 nfs_resop4 *resop;
5767 uint_t op;
5768
5769 argop = &args->array[i];
5770 resop = &resp->array[i];
5771 resop->resop = argop->argop;
5772 op = (uint_t)resop->resop;
5773
5774 if (op < rfsv4disp_cnt) {
5775 /*
5776 * Count the individual ops here; NULL and COMPOUND
5777 * are counted in common_dispatch()
5778 */
5779 rfsproccnt_v4_ptr[op].value.ui64++;
5780
5781 NFS4_DEBUG(rfs4_debug > 1,
5782 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5783 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5784 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5785 rfs4_op_string[op], *cs.statusp));
5786 if (*cs.statusp != NFS4_OK)
5787 cs.cont = FALSE;
5788 } else {
5789 /*
5790 * This is effectively dead code since XDR code
5791 * will have already returned BADXDR if op doesn't
5792 * decode to legal value. This only done for a
5793 * day when XDR code doesn't verify v4 opcodes.
5794 */
5795 op = OP_ILLEGAL;
5796 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5797
5798 rfs4_op_illegal(argop, resop, req, &cs);
5799 cs.cont = FALSE;
5800 }
5801
5802 /*
5803 * If not at last op, and if we are to stop, then
5804 * compact the results array.
5805 */
5806 if ((i + 1) < args->array_len && !cs.cont) {
5807 nfs_resop4 *new_res = kmem_alloc(
5808 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5809 bcopy(resp->array,
5810 new_res, (i+1) * sizeof (nfs_resop4));
5811 kmem_free(resp->array,
5812 args->array_len * sizeof (nfs_resop4));
5813
5814 resp->array_len = i + 1;
5815 resp->array = new_res;
5816 }
5817 }
5818
5819 rw_exit(&exported_lock);
5820
5821 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5822 COMPOUND4res *, resp);
5823
5824 if (cs.vp)
5825 VN_RELE(cs.vp);
5826 if (cs.saved_vp)
5827 VN_RELE(cs.saved_vp);
5828 if (cs.saved_fh.nfs_fh4_val)
5829 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5830
5831 if (cs.basecr)
5832 crfree(cs.basecr);
5833 if (cs.cr)
5834 crfree(cs.cr);
5835 /*
5836 * done with this compound request, free the label
5837 */
5838
5839 if (req->rq_label != NULL) {
5840 kmem_free(req->rq_label, sizeof (bslabel_t));
5841 req->rq_label = NULL;
5842 }
5843 }
5844
5845 /*
5846 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5847 * XXX zero out the tag and array values. Need to investigate why the
6511
6512 /* Check for mandatory locking and that the size gets set. */
6513 cva.va_mask = AT_MODE;
6514 if (setsize)
6515 cva.va_mask |= AT_SIZE;
6516
6517 /* Assume the worst */
6518 cs->mandlock = TRUE;
6519
6520 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6521 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6522
6523 /*
6524 * Truncate the file if necessary; this would be
6525 * the case for create over an existing file.
6526 */
6527
6528 if (trunc) {
6529 int in_crit = 0;
6530 rfs4_file_t *fp;
6531 bool_t create = FALSE;
6532
6533 /*
6534 * We are writing over an existing file.
6535 * Check to see if we need to recall a delegation.
6536 */
6537 rfs4_hold_deleg_policy();
6538 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6539 if (rfs4_check_delegated_byfp(FWRITE, fp,
6540 (reqsize == 0), FALSE, FALSE, &clientid)) {
6541 rfs4_file_rele(fp);
6542 rfs4_rele_deleg_policy();
6543 VN_RELE(vp);
6544 *attrset = 0;
6545 return (NFS4ERR_DELAY);
6546 }
6547 rfs4_file_rele(fp);
6548 }
6549 rfs4_rele_deleg_policy();
6550
6551 if (nbl_need_check(vp)) {
6552 in_crit = 1;
6553
6554 ASSERT(reqsize == 0);
6555
6556 nbl_start_crit(vp, RW_READER);
6557 if (nbl_conflict(vp, NBL_WRITE, 0,
6558 cva.va_size, 0, NULL)) {
6559 in_crit = 0;
6560 nbl_end_crit(vp);
6561 VN_RELE(vp);
6562 *attrset = 0;
6563 return (NFS4ERR_ACCESS);
6564 }
6565 }
6566 ct.cc_sysid = 0;
6567 ct.cc_pid = 0;
6568 ct.cc_caller_id = nfs4_srv_caller_id;
6569 ct.cc_flags = CC_DONTBLOCK;
8087
8088 newcp->rc_cp_confirmed = cp_confirmed;
8089
8090 rfs4_client_rele(newcp);
8091
8092 out:
8093 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8094 SETCLIENTID4res *, res);
8095 }
8096
8097 /*ARGSUSED*/
8098 void
8099 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8100 struct svc_req *req, struct compound_state *cs)
8101 {
8102 SETCLIENTID_CONFIRM4args *args =
8103 &argop->nfs_argop4_u.opsetclientid_confirm;
8104 SETCLIENTID_CONFIRM4res *res =
8105 &resop->nfs_resop4_u.opsetclientid_confirm;
8106 rfs4_client_t *cp, *cptoclose = NULL;
8107
8108 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8109 struct compound_state *, cs,
8110 SETCLIENTID_CONFIRM4args *, args);
8111
8112 *cs->statusp = res->status = NFS4_OK;
8113
8114 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8115
8116 if (cp == NULL) {
8117 *cs->statusp = res->status =
8118 rfs4_check_clientid(&args->clientid, 1);
8119 goto out;
8120 }
8121
8122 if (!creds_ok(cp, req, cs)) {
8123 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8124 rfs4_client_rele(cp);
8125 goto out;
8126 }
8127
8128 /* If the verifier doesn't match, the record doesn't match */
8129 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8130 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8131 rfs4_client_rele(cp);
8132 goto out;
8133 }
8134
8135 rfs4_dbe_lock(cp->rc_dbe);
8136 cp->rc_need_confirm = FALSE;
8137 if (cp->rc_cp_confirmed) {
8138 cptoclose = cp->rc_cp_confirmed;
8139 cptoclose->rc_ss_remove = 1;
8140 cp->rc_cp_confirmed = NULL;
8141 }
8142
8143 /*
8144 * Update the client's associated server instance, if it's changed
8145 * since the client was created.
8146 */
8147 if (rfs4_servinst(cp) != rfs4_cur_servinst)
8148 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8149
8150 /*
8151 * Record clientid in stable storage.
8152 * Must be done after server instance has been assigned.
8153 */
8154 rfs4_ss_clid(cp);
8155
8156 rfs4_dbe_unlock(cp->rc_dbe);
8157
8158 if (cptoclose)
8159 /* don't need to rele, client_close does it */
8160 rfs4_client_close(cptoclose);
8161
8162 /* If needed, initiate CB_NULL call for callback path */
8163 rfs4_deleg_cb_check(cp);
8164 rfs4_update_lease(cp);
8165
8166 /*
8167 * Check to see if client can perform reclaims
8168 */
8169 rfs4_ss_chkclid(cp);
8170
8171 rfs4_client_rele(cp);
8172
8173 out:
8174 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8175 struct compound_state *, cs,
8176 SETCLIENTID_CONFIRM4 *, res);
8177 }
8178
8179
8180 /*ARGSUSED*/
8181 void
8182 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8183 struct svc_req *req, struct compound_state *cs)
8184 {
8185 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8186 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8187 rfs4_state_t *sp;
8188 nfsstat4 status;
8189
9793 /*
9794 * Check to see if we have a downrev Solaris client, so that we
9795 * can send it a symlink instead of a referral.
9796 */
9797 int
9798 client_is_downrev(struct svc_req *req)
9799 {
9800 struct sockaddr *ca;
9801 rfs4_clntip_t *ci;
9802 bool_t create = FALSE;
9803 int is_downrev;
9804
9805 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9806 ASSERT(ca);
9807 ci = rfs4_find_clntip(ca, &create);
9808 if (ci == NULL)
9809 return (0);
9810 is_downrev = ci->ri_no_referrals;
9811 rfs4_dbe_rele(ci->ri_dbe);
9812 return (is_downrev);
9813 }
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All Rights Reserved
29 */
30
31 /*
32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 * Copyright 2019 Nexenta Systems, Inc.
34 * Copyright 2019 Nexenta by DDN, Inc.
35 */
36
37 #include <sys/param.h>
38 #include <sys/types.h>
39 #include <sys/systm.h>
40 #include <sys/cred.h>
41 #include <sys/buf.h>
42 #include <sys/vfs.h>
43 #include <sys/vfs_opreg.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/sysmacros.h>
48 #include <sys/statvfs.h>
49 #include <sys/kmem.h>
50 #include <sys/dirent.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/systeminfo.h>
54 #include <sys/flock.h>
55 #include <sys/pathname.h>
56 #include <sys/nbmlock.h>
57 #include <sys/share.h>
58 #include <sys/atomic.h>
59 #include <sys/policy.h>
60 #include <sys/fem.h>
61 #include <sys/sdt.h>
62 #include <sys/ddi.h>
63 #include <sys/zone.h>
64
65 #include <fs/fs_reparse.h>
66
67 #include <rpc/types.h>
68 #include <rpc/auth.h>
69 #include <rpc/rpcsec_gss.h>
70 #include <rpc/svc.h>
71
72 #include <nfs/nfs.h>
73 #include <nfs/nfssys.h>
74 #include <nfs/export.h>
75 #include <nfs/nfs_cmd.h>
76 #include <nfs/lm.h>
77 #include <nfs/nfs4.h>
78 #include <nfs/nfs4_drc.h>
79
80 #include <sys/strsubr.h>
81 #include <sys/strsun.h>
82
83 #include <inet/common.h>
84 #include <inet/ip.h>
85 #include <inet/ip6.h>
86
87 #include <sys/tsol/label.h>
88 #include <sys/tsol/tndb.h>
89
90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 extern struct svc_ops rdma_svc_ops;
95 extern int nfs_loaned_buffers;
96 /* End of Tunables */
97
98 static int rdma_setup_read_data4(READ4args *, READ4res *);
136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 #define RFS4_MINLEN_RDDIR_BUF \
138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139
140 /*
141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 * but the dirents UFS gives us are already padded to 8, so just take
143 * what we're given. Dircount is only a hint anyway. Currently the
144 * solaris kernel is ASCII only, so there's no point in calling the
145 * UTF8 functions.
146 *
147 * dirent64: named padded to provide 8 byte struct alignment
148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 *
150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 *
152 */
153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155
156
157 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
158
159 u_longlong_t nfs4_srv_caller_id;
160 uint_t nfs4_srv_vkey = 0;
161
162 void rfs4_init_compound_state(struct compound_state *);
163
164 static void nullfree(caddr_t);
165 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 struct compound_state *);
167 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 struct compound_state *);
169 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 struct compound_state *);
171 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 struct compound_state *);
173 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 struct compound_state *);
175 static void rfs4_op_create_free(nfs_resop4 *resop);
176 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 struct svc_req *, struct compound_state *);
178 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 struct svc_req *, struct compound_state *);
180 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 struct compound_state *);
230 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 struct compound_state *);
242 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
243 struct svc_req *, struct compound_state *);
244 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
245 struct svc_req *req, struct compound_state *);
246 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
247 struct compound_state *);
248 static void rfs4_op_secinfo_free(nfs_resop4 *);
249
250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
251 struct svc_req *);
252 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
253 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
254
255
256 /*
257 * translation table for attrs
258 */
259 struct nfs4_ntov_table {
260 union nfs4_attr_u *na;
261 uint8_t amap[NFS4_MAXNUM_ATTRS];
262 int attrcnt;
263 bool_t vfsstat;
264 };
265
266 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
267 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
268 struct nfs4_svgetit_arg *sargp);
269
270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
271 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
272 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
273
274 static void hanfsv4_failover(nfs4_srv_t *);
275
276 fem_t *deleg_rdops;
277 fem_t *deleg_wrops;
278
279 /*
280 * NFS4 op dispatch table
281 */
282
283 struct rfsv4disp {
284 void (*dis_proc)(); /* proc to call */
285 void (*dis_resfree)(); /* frees space allocated by proc */
286 int dis_flags; /* RPC_IDEMPOTENT, etc... */
287 };
288
289 static struct rfsv4disp rfsv4disptab[] = {
290 /*
291 * NFS VERSION 4
292 */
293
294 /* RFS_NULL = 0 */
295 {rfs4_op_illegal, nullfree, 0},
296
297 /* UNUSED = 1 */
298 {rfs4_op_illegal, nullfree, 0},
450 "rfs4_op_putrootfh",
451 "rfs4_op_read",
452 "rfs4_op_readdir",
453 "rfs4_op_readlink",
454 "rfs4_op_remove",
455 "rfs4_op_rename",
456 "rfs4_op_renew",
457 "rfs4_op_restorefh",
458 "rfs4_op_savefh",
459 "rfs4_op_secinfo",
460 "rfs4_op_setattr",
461 "rfs4_op_setclientid",
462 "rfs4_op_setclient_confirm",
463 "rfs4_op_verify",
464 "rfs4_op_write",
465 "rfs4_op_release_lockowner",
466 "rfs4_op_illegal"
467 };
468 #endif
469
470 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
471
472 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
473
474 extern void rfs4_free_fs_locations4(fs_locations4 *);
475
476 #ifdef nextdp
477 #undef nextdp
478 #endif
479 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
480
481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
482 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
483 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
484 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
485 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
486 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
487 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
488 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
489 NULL, NULL
490 };
491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
492 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
493 VOPNAME_READ, { .femop_read = deleg_wr_read },
494 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
495 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
496 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
497 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
498 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
499 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
500 NULL, NULL
501 };
502
503 nfs4_srv_t *
504 nfs4_get_srv(void)
505 {
506 nfs_globals_t *ng = nfs_srv_getzg();
507 nfs4_srv_t *srv = ng->nfs4_srv;
508 ASSERT(srv != NULL);
509 return (srv);
510 }
511
512 void
513 rfs4_srv_zone_init(nfs_globals_t *ng)
514 {
515 nfs4_srv_t *nsrv4;
516 timespec32_t verf;
517
518 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
519
520 /*
521 * The following algorithm attempts to find a unique verifier
522 * to be used as the write verifier returned from the server
523 * to the client. It is important that this verifier change
524 * whenever the server reboots. Of secondary importance, it
525 * is important for the verifier to be unique between two
526 * different servers.
527 *
528 * Thus, an attempt is made to use the system hostid and the
529 * current time in seconds when the nfssrv kernel module is
530 * loaded. It is assumed that an NFS server will not be able
531 * to boot and then to reboot in less than a second. If the
532 * hostid has not been set, then the current high resolution
533 * time is used. This will ensure different verifiers each
534 * time the server reboots and minimize the chances that two
535 * different servers will have the same verifier.
536 * XXX - this is broken on LP64 kernels.
537 */
538 verf.tv_sec = (time_t)zone_get_hostid(NULL);
539 if (verf.tv_sec != 0) {
540 verf.tv_nsec = gethrestime_sec();
541 } else {
542 timespec_t tverf;
543
544 gethrestime(&tverf);
545 verf.tv_sec = (time_t)tverf.tv_sec;
546 verf.tv_nsec = tverf.tv_nsec;
547 }
548 nsrv4->write4verf = *(uint64_t *)&verf;
549
550 /* Used to manage create/destroy of server state */
551 nsrv4->nfs4_server_state = NULL;
552 nsrv4->nfs4_cur_servinst = NULL;
553 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
554 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
555 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
556 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
557 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
558
559 ng->nfs4_srv = nsrv4;
560 }
561
562 void
563 rfs4_srv_zone_fini(nfs_globals_t *ng)
564 {
565 nfs4_srv_t *nsrv4 = ng->nfs4_srv;
566
567 ng->nfs4_srv = NULL;
568
569 mutex_destroy(&nsrv4->deleg_lock);
570 mutex_destroy(&nsrv4->state_lock);
571 mutex_destroy(&nsrv4->servinst_lock);
572 rw_destroy(&nsrv4->deleg_policy_lock);
573
574 kmem_free(nsrv4, sizeof (*nsrv4));
575 }
576
577 void
578 rfs4_srvrinit(void)
579 {
580 extern void rfs4_attr_init();
581
582 rfs4_attr_init();
583
584 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
585 rfs4_disable_delegation();
586 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
587 &deleg_wrops) != 0) {
588 rfs4_disable_delegation();
589 fem_free(deleg_rdops);
590 }
591
592 nfs4_srv_caller_id = fs_new_caller_id();
593 lockt_sysid = lm_alloc_sysidt();
594 vsd_create(&nfs4_srv_vkey, NULL);
595 rfs4_state_g_init();
596 }
597
598 void
599 rfs4_srvrfini(void)
600 {
601 if (lockt_sysid != LM_NOSYSID) {
602 lm_free_sysidt(lockt_sysid);
603 lockt_sysid = LM_NOSYSID;
604 }
605
606 rfs4_state_g_fini();
607
608 fem_free(deleg_rdops);
609 fem_free(deleg_wrops);
610 }
611
612 void
613 rfs4_do_server_start(int server_upordown,
614 int srv_delegation, int cluster_booted)
615 {
616 nfs4_srv_t *nsrv4 = nfs4_get_srv();
617
618 /* Is this a warm start? */
619 if (server_upordown == NFS_SERVER_QUIESCED) {
620 cmn_err(CE_NOTE, "nfs4_srv: "
621 "server was previously quiesced; "
622 "existing NFSv4 state will be re-used");
623
624 /*
625 * HA-NFSv4: this is also the signal
626 * that a Resource Group failover has
627 * occurred.
628 */
629 if (cluster_booted)
630 hanfsv4_failover(nsrv4);
631 } else {
632 /* Cold start */
633 nsrv4->rfs4_start_time = 0;
634 rfs4_state_zone_init(nsrv4);
635 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
636 nfs4_drc_hash);
637
638 /*
639 * The nfsd service was started with the -s option
640 * we need to pull in any state from the paths indicated.
641 */
642 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
643 /* read in the stable storage state from these paths */
644 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
645 rfs4_dss_newpaths);
646 }
647 }
648
649 /* Check if delegation is to be enabled */
650 if (srv_delegation != FALSE)
651 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
652 }
653
654 void
655 rfs4_init_compound_state(struct compound_state *cs)
656 {
657 bzero(cs, sizeof (*cs));
658 cs->cont = TRUE;
659 cs->access = CS_ACCESS_DENIED;
660 cs->deleg = FALSE;
661 cs->mandlock = FALSE;
662 cs->fh.nfs_fh4_val = cs->fhbuf;
663 }
664
665 void
666 rfs4_grace_start(rfs4_servinst_t *sip)
667 {
668 rw_enter(&sip->rwlock, RW_WRITER);
669 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
670 sip->grace_period = rfs4_grace_period;
671 rw_exit(&sip->rwlock);
672 }
673
674 /*
697
698 rw_enter(&sip->rwlock, RW_READER);
699 grace_expiry = sip->start_time + sip->grace_period;
700 rw_exit(&sip->rwlock);
701
702 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
703 }
704
705 int
706 rfs4_clnt_in_grace(rfs4_client_t *cp)
707 {
708 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
709
710 return (rfs4_servinst_in_grace(cp->rc_server_instance));
711 }
712
713 /*
714 * reset all currently active grace periods
715 */
716 void
717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
718 {
719 rfs4_servinst_t *sip;
720
721 mutex_enter(&nsrv4->servinst_lock);
722 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
723 if (rfs4_servinst_in_grace(sip))
724 rfs4_grace_start(sip);
725 mutex_exit(&nsrv4->servinst_lock);
726 }
727
728 /*
729 * start any new instances' grace periods
730 */
731 void
732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
733 {
734 rfs4_servinst_t *sip;
735
736 mutex_enter(&nsrv4->servinst_lock);
737 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
738 if (rfs4_servinst_grace_new(sip))
739 rfs4_grace_start(sip);
740 mutex_exit(&nsrv4->servinst_lock);
741 }
742
743 static rfs4_dss_path_t *
744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
745 char *path, unsigned index)
746 {
747 size_t len;
748 rfs4_dss_path_t *dss_path;
749
750 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
751
752 /*
753 * Take a copy of the string, since the original may be overwritten.
754 * Sadly, no strdup() in the kernel.
755 */
756 /* allow for NUL */
757 len = strlen(path) + 1;
758 dss_path->path = kmem_alloc(len, KM_SLEEP);
759 (void) strlcpy(dss_path->path, path, len);
760
761 /* associate with servinst */
762 dss_path->sip = sip;
763 dss_path->index = index;
764
765 /*
766 * Add to list of served paths.
767 * No locking required, as we're only ever called at startup.
768 */
769 if (nsrv4->dss_pathlist == NULL) {
770 /* this is the first dss_path_t */
771
772 /* needed for insque/remque */
773 dss_path->next = dss_path->prev = dss_path;
774
775 nsrv4->dss_pathlist = dss_path;
776 } else {
777 insque(dss_path, nsrv4->dss_pathlist);
778 }
779
780 return (dss_path);
781 }
782
783 /*
784 * Create a new server instance, and make it the currently active instance.
785 * Note that starting the grace period too early will reduce the clients'
786 * recovery window.
787 */
788 void
789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
790 int dss_npaths, char **dss_paths)
791 {
792 unsigned i;
793 rfs4_servinst_t *sip;
794 rfs4_oldstate_t *oldstate;
795
796 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
797 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
798
799 sip->start_time = (time_t)0;
800 sip->grace_period = (time_t)0;
801 sip->next = NULL;
802 sip->prev = NULL;
803
804 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
805 /*
806 * This initial dummy entry is required to setup for insque/remque.
807 * It must be skipped over whenever the list is traversed.
808 */
809 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
810 /* insque/remque require initial list entry to be self-terminated */
811 oldstate->next = oldstate;
812 oldstate->prev = oldstate;
813 sip->oldstate = oldstate;
814
815
816 sip->dss_npaths = dss_npaths;
817 sip->dss_paths = kmem_alloc(dss_npaths *
818 sizeof (rfs4_dss_path_t *), KM_SLEEP);
819
820 for (i = 0; i < dss_npaths; i++) {
821 sip->dss_paths[i] =
822 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
823 }
824
825 mutex_enter(&nsrv4->servinst_lock);
826 if (nsrv4->nfs4_cur_servinst != NULL) {
827 /* add to linked list */
828 sip->prev = nsrv4->nfs4_cur_servinst;
829 nsrv4->nfs4_cur_servinst->next = sip;
830 }
831 if (start_grace)
832 rfs4_grace_start(sip);
833 /* make the new instance "current" */
834 nsrv4->nfs4_cur_servinst = sip;
835
836 mutex_exit(&nsrv4->servinst_lock);
837 }
838
839 /*
840 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
841 * all instances directly.
842 */
843 void
844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
845 {
846 rfs4_servinst_t *sip, *prev, *current;
847 #ifdef DEBUG
848 int n = 0;
849 #endif
850
851 mutex_enter(&nsrv4->servinst_lock);
852 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
853 current = nsrv4->nfs4_cur_servinst;
854 nsrv4->nfs4_cur_servinst = NULL;
855 for (sip = current; sip != NULL; sip = prev) {
856 prev = sip->prev;
857 rw_destroy(&sip->rwlock);
858 if (sip->oldstate)
859 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
860 if (sip->dss_paths) {
861 int i = sip->dss_npaths;
862
863 while (i > 0) {
864 i--;
865 if (sip->dss_paths[i] != NULL) {
866 char *path = sip->dss_paths[i]->path;
867
868 if (path != NULL) {
869 kmem_free(path,
870 strlen(path) + 1);
871 }
872 kmem_free(sip->dss_paths[i],
873 sizeof (rfs4_dss_path_t));
874 }
875 }
876 kmem_free(sip->dss_paths,
877 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
878 }
879 kmem_free(sip, sizeof (rfs4_servinst_t));
880 #ifdef DEBUG
881 n++;
882 #endif
883 }
884 mutex_exit(&nsrv4->servinst_lock);
885 }
886
887 /*
888 * Assign the current server instance to a client_t.
889 * Should be called with cp->rc_dbe held.
890 */
891 void
892 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
893 rfs4_servinst_t *sip)
894 {
895 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
896
897 /*
898 * The lock ensures that if the current instance is in the process
899 * of changing, we will see the new one.
900 */
901 mutex_enter(&nsrv4->servinst_lock);
902 cp->rc_server_instance = sip;
903 mutex_exit(&nsrv4->servinst_lock);
904 }
905
906 rfs4_servinst_t *
907 rfs4_servinst(rfs4_client_t *cp)
908 {
909 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
910
911 return (cp->rc_server_instance);
912 }
913
914 /* ARGSUSED */
915 static void
916 nullfree(caddr_t resop)
917 {
918 }
919
920 /*
921 * This is a fall-through for invalid or not implemented (yet) ops
922 */
923 /* ARGSUSED */
936 {
937 int i;
938
939 for (i = 0; i < count; i++) {
940 if (nfsnum == flavor_list[i])
941 return (TRUE);
942 }
943 return (FALSE);
944 }
945
946 /*
947 * Used by rfs4_op_secinfo to get the security information from the
948 * export structure associated with the component.
949 */
950 /* ARGSUSED */
951 static nfsstat4
952 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
953 {
954 int error, different_export = 0;
955 vnode_t *dvp, *vp;
956 struct exportinfo *exi;
957 fid_t fid;
958 uint_t count, i;
959 secinfo4 *resok_val;
960 struct secinfo *secp;
961 seconfig_t *si;
962 bool_t did_traverse = FALSE;
963 int dotdot, walk;
964 nfs_export_t *ne = nfs_get_export();
965
966 dvp = cs->vp;
967 exi = cs->exi;
968 ASSERT(exi != NULL);
969 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
970
971 /*
972 * If dotdotting, then need to check whether it's above the
973 * root of a filesystem, or above an export point.
974 */
975 if (dotdot) {
976 vnode_t *zone_rootvp = ne->exi_root->exi_vp;
977
978 ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
979 /*
980 * If dotdotting at the root of a filesystem, then
981 * need to traverse back to the mounted-on filesystem
982 * and do the dotdot lookup there.
983 */
984 if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
985
986 /*
987 * If at the system root, then can
988 * go up no further.
989 */
990 if (VN_CMP(dvp, zone_rootvp))
991 return (puterrno4(ENOENT));
992
993 /*
994 * Traverse back to the mounted-on filesystem
995 */
996 dvp = untraverse(dvp, zone_rootvp);
997
998 /*
999 * Set the different_export flag so we remember
1000 * to pick up a new exportinfo entry for
1001 * this new filesystem.
1002 */
1003 different_export = 1;
1004 } else {
1005
1006 /*
1007 * If dotdotting above an export point then set
1008 * the different_export to get new export info.
1009 */
1010 different_export = nfs_exported(exi, dvp);
1011 }
1012 }
1013
1014 /*
1015 * Get the vnode for the component "nm".
1016 */
1017 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1018 NULL, NULL, NULL);
1019 if (error)
1020 return (puterrno4(error));
1021
1022 /*
1023 * If the vnode is in a pseudo filesystem, or if the security flavor
1024 * used in the request is valid but not an explicitly shared flavor,
1025 * or the access bit indicates that this is a limited access,
1026 * check whether this vnode is visible.
1027 */
1028 if (!different_export &&
1029 (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1030 cs->access & CS_ACCESS_LIMITED)) {
1031 if (! nfs_visible(exi, vp, &different_export)) {
1032 VN_RELE(vp);
1033 return (puterrno4(ENOENT));
1034 }
1035 }
1036
1037 /*
1038 * If it's a mountpoint, then traverse it.
1039 */
1040 if (vn_ismntpt(vp)) {
1041 if ((error = traverse(&vp)) != 0) {
1042 VN_RELE(vp);
1043 return (puterrno4(error));
1044 }
1045 /* remember that we had to traverse mountpoint */
1046 did_traverse = TRUE;
1047 different_export = 1;
1048 } else if (vp->v_vfsp != dvp->v_vfsp) {
1049 /*
1050 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1051 * then vp is probably an LOFS object. We don't need the
1053 * a server fs boundary and need to call checkexport4.
1054 * (LOFS lookup hides server fs mountpoints, and actually calls
1055 * traverse)
1056 */
1057 different_export = 1;
1058 }
1059
1060 /*
1061 * Get the export information for it.
1062 */
1063 if (different_export) {
1064
1065 bzero(&fid, sizeof (fid));
1066 fid.fid_len = MAXFIDSZ;
1067 error = vop_fid_pseudo(vp, &fid);
1068 if (error) {
1069 VN_RELE(vp);
1070 return (puterrno4(error));
1071 }
1072
1073 /* We'll need to reassign "exi". */
1074 if (dotdot)
1075 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1076 else
1077 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1078
1079 if (exi == NULL) {
1080 if (did_traverse == TRUE) {
1081 /*
1082 * If this vnode is a mounted-on vnode,
1083 * but the mounted-on file system is not
1084 * exported, send back the secinfo for
1085 * the exported node that the mounted-on
1086 * vnode lives in.
1087 */
1088 exi = cs->exi;
1089 } else {
1090 VN_RELE(vp);
1091 return (puterrno4(EACCES));
1092 }
1093 }
1094 }
1095 ASSERT(exi != NULL);
1096
1097
1098 /*
1099 * Create the secinfo result based on the security information
1100 * from the exportinfo structure (exi).
1101 *
1102 * Return all flavors for a pseudo node.
1103 * For a real export node, return the flavor that the client
1104 * has access with.
1105 */
1106 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1107 if (PSEUDO(exi)) {
1108 count = exi->exi_export.ex_seccnt; /* total sec count */
1109 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1110 secp = exi->exi_export.ex_secinfo;
1111
1112 for (i = 0; i < count; i++) {
1113 si = &secp[i].s_secinfo;
1114 resok_val[i].flavor = si->sc_rpcnum;
1115 if (resok_val[i].flavor == RPCSEC_GSS) {
1116 rpcsec_gss_info *info;
1117
1118 info = &resok_val[i].flavor_info;
1119 info->qop = si->sc_qop;
1120 info->service = (rpc_gss_svc_t)si->sc_service;
1121
1122 /* get oid opaque data */
1123 info->oid.sec_oid4_len =
1124 si->sc_gss_mech_type->length;
1125 info->oid.sec_oid4_val = kmem_alloc(
1126 si->sc_gss_mech_type->length, KM_SLEEP);
1449 if (is_system_labeled() && !admin_low_client)
1450 label_rele(tslabel);
1451
1452 *cs->statusp = resp->status = NFS4_OK;
1453 out:
1454 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1455 ACCESS4res *, resp);
1456 }
1457
1458 /* ARGSUSED */
1459 static void
1460 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1461 struct compound_state *cs)
1462 {
1463 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1464 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1465 int error;
1466 vnode_t *vp = cs->vp;
1467 cred_t *cr = cs->cr;
1468 vattr_t va;
1469 nfs4_srv_t *nsrv4;
1470
1471 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1472 COMMIT4args *, args);
1473
1474 if (vp == NULL) {
1475 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1476 goto out;
1477 }
1478 if (cs->access == CS_ACCESS_DENIED) {
1479 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1480 goto out;
1481 }
1482
1483 if (args->offset + args->count < args->offset) {
1484 *cs->statusp = resp->status = NFS4ERR_INVAL;
1485 goto out;
1486 }
1487
1488 va.va_mask = AT_UID;
1489 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1506 resp->status = NFS4ERR_ISDIR;
1507 else
1508 resp->status = NFS4ERR_INVAL;
1509 *cs->statusp = resp->status;
1510 goto out;
1511 }
1512
1513 if (crgetuid(cr) != va.va_uid &&
1514 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1515 *cs->statusp = resp->status = puterrno4(error);
1516 goto out;
1517 }
1518
1519 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1520
1521 if (error) {
1522 *cs->statusp = resp->status = puterrno4(error);
1523 goto out;
1524 }
1525
1526 nsrv4 = nfs4_get_srv();
1527 *cs->statusp = resp->status = NFS4_OK;
1528 resp->writeverf = nsrv4->write4verf;
1529 out:
1530 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1531 COMMIT4res *, resp);
1532 }
1533
1534 /*
1535 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1536 * was completed. It does the nfsv4 create for special files.
1537 */
1538 /* ARGSUSED */
1539 static vnode_t *
1540 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1541 struct compound_state *cs, vattr_t *vap, char *nm)
1542 {
1543 int error;
1544 cred_t *cr = cs->cr;
1545 vnode_t *dvp = cs->vp;
1546 vnode_t *vp = NULL;
1547 int mode;
1548 enum vcexcl excl;
2704 fid_t fid;
2705 int attrdir, dotdot, walk;
2706 bool_t is_newvp = FALSE;
2707
2708 if (cs->vp->v_flag & V_XATTRDIR) {
2709 attrdir = 1;
2710 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2711 } else {
2712 attrdir = 0;
2713 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2714 }
2715
2716 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2717
2718 /*
2719 * If dotdotting, then need to check whether it's
2720 * above the root of a filesystem, or above an
2721 * export point.
2722 */
2723 if (dotdot) {
2724 vnode_t *zone_rootvp;
2725
2726 ASSERT(cs->exi != NULL);
2727 zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2728 /*
2729 * If dotdotting at the root of a filesystem, then
2730 * need to traverse back to the mounted-on filesystem
2731 * and do the dotdot lookup there.
2732 */
2733 if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2734
2735 /*
2736 * If at the system root, then can
2737 * go up no further.
2738 */
2739 if (VN_CMP(cs->vp, zone_rootvp))
2740 return (puterrno4(ENOENT));
2741
2742 /*
2743 * Traverse back to the mounted-on filesystem
2744 */
2745 cs->vp = untraverse(cs->vp, zone_rootvp);
2746
2747 /*
2748 * Set the different_export flag so we remember
2749 * to pick up a new exportinfo entry for
2750 * this new filesystem.
2751 */
2752 different_export = 1;
2753 } else {
2754
2755 /*
2756 * If dotdotting above an export point then set
2757 * the different_export to get new export info.
2758 */
2759 different_export = nfs_exported(cs->exi, cs->vp);
2760 }
2761 }
2762
2763 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2764 NULL, NULL, NULL);
2765 if (error)
3483 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3484
3485 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3486 freeb(resp->mblk);
3487 resp->mblk = NULL;
3488 resp->data_len = 0;
3489 }
3490 }
3491
3492
3493 /* ARGSUSED */
3494 static void
3495 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3496 struct compound_state *cs)
3497 {
3498 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3499 int error;
3500 vnode_t *vp;
3501 struct exportinfo *exi, *sav_exi;
3502 nfs_fh4_fmt_t *fh_fmtp;
3503 nfs_export_t *ne = nfs_get_export();
3504
3505 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3506
3507 if (cs->vp) {
3508 VN_RELE(cs->vp);
3509 cs->vp = NULL;
3510 }
3511
3512 if (cs->cr)
3513 crfree(cs->cr);
3514
3515 cs->cr = crdup(cs->basecr);
3516
3517 vp = ne->exi_public->exi_vp;
3518 if (vp == NULL) {
3519 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3520 goto out;
3521 }
3522
3523 error = makefh4(&cs->fh, vp, ne->exi_public);
3524 if (error != 0) {
3525 *cs->statusp = resp->status = puterrno4(error);
3526 goto out;
3527 }
3528 sav_exi = cs->exi;
3529 if (ne->exi_public == ne->exi_root) {
3530 /*
3531 * No filesystem is actually shared public, so we default
3532 * to exi_root. In this case, we must check whether root
3533 * is exported.
3534 */
3535 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3536
3537 /*
3538 * if root filesystem is exported, the exportinfo struct that we
3539 * should use is what checkexport4 returns, because root_exi is
3540 * actually a mostly empty struct.
3541 */
3542 exi = checkexport4(&fh_fmtp->fh4_fsid,
3543 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3544 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3545 } else {
3546 /*
3547 * it's a properly shared filesystem
3548 */
3549 cs->exi = ne->exi_public;
3550 }
3551
3552 if (is_system_labeled()) {
3553 bslabel_t *clabel;
3554
3555 ASSERT(req->rq_label != NULL);
3556 clabel = req->rq_label;
3557 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3558 "got client label from request(1)",
3559 struct svc_req *, req);
3560 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3561 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3562 cs->exi)) {
3563 *cs->statusp = resp->status =
3564 NFS4ERR_SERVERFAULT;
3565 goto out;
3566 }
3567 }
3568 }
3569
3671 struct exportinfo *exi, *sav_exi;
3672
3673 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3674
3675 if (cs->vp) {
3676 VN_RELE(cs->vp);
3677 cs->vp = NULL;
3678 }
3679
3680 if (cs->cr)
3681 crfree(cs->cr);
3682
3683 cs->cr = crdup(cs->basecr);
3684
3685 /*
3686 * Using rootdir, the system root vnode,
3687 * get its fid.
3688 */
3689 bzero(&fid, sizeof (fid));
3690 fid.fid_len = MAXFIDSZ;
3691 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3692 if (error != 0) {
3693 *cs->statusp = resp->status = puterrno4(error);
3694 goto out;
3695 }
3696
3697 /*
3698 * Then use the root fsid & fid it to find out if it's exported
3699 *
3700 * If the server root isn't exported directly, then
3701 * it should at least be a pseudo export based on
3702 * one or more exports further down in the server's
3703 * file tree.
3704 */
3705 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3706 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3707 NFS4_DEBUG(rfs4_debug,
3708 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3709 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3710 goto out;
3711 }
3712
3713 /*
3714 * Now make a filehandle based on the root
3715 * export and root vnode.
3716 */
3717 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3718 if (error != 0) {
3719 *cs->statusp = resp->status = puterrno4(error);
3720 goto out;
3721 }
3722
3723 sav_exi = cs->exi;
3724 cs->exi = exi;
3725
3726 VN_HOLD(ZONE_ROOTVP());
3727 cs->vp = ZONE_ROOTVP();
3728
3729 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3730 VN_RELE(cs->vp);
3731 cs->vp = NULL;
3732 cs->exi = sav_exi;
3733 goto out;
3734 }
3735
3736 *cs->statusp = resp->status = NFS4_OK;
3737 cs->deleg = FALSE;
3738 out:
3739 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3740 PUTROOTFH4res *, resp);
3741 }
3742
3743 /*
3744 * readlink: args: CURRENT_FH.
3745 * res: status. If success - CURRENT_FH unchanged, return linktext.
3746 */
3747
3748 /* ARGSUSED */
3749 static void
3750 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3796
3797 }
3798
3799 va.va_mask = AT_MODE;
3800 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3801 if (error) {
3802 *cs->statusp = resp->status = puterrno4(error);
3803 goto out;
3804 }
3805
3806 if (MANDLOCK(vp, va.va_mode)) {
3807 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3808 goto out;
3809 }
3810
3811 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3812
3813 if (is_referral) {
3814 char *s;
3815 size_t strsz;
3816 kstat_named_t *stat =
3817 cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
3818
3819 /* Get an artificial symlink based on a referral */
3820 s = build_symlink(vp, cs->cr, &strsz);
3821 stat[NFS_REFERLINKS].value.ui64++;
3822 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3823 vnode_t *, vp, char *, s);
3824 if (s == NULL)
3825 error = EINVAL;
3826 else {
3827 error = 0;
3828 (void) strlcpy(data, s, MAXPATHLEN + 1);
3829 kmem_free(s, strsz);
3830 }
3831
3832 } else {
3833
3834 iov.iov_base = data;
3835 iov.iov_len = MAXPATHLEN;
3836 uio.uio_iov = &iov;
3837 uio.uio_iovcnt = 1;
3838 uio.uio_segflg = UIO_SYSSPACE;
3839 uio.uio_extflg = UIO_COPY_CACHED;
3840 uio.uio_loffset = 0;
3841 uio.uio_resid = MAXPATHLEN;
4248 }
4249 goto out;
4250 }
4251 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4252
4253 /* Actually do the REMOVE operation */
4254 if (vp->v_type == VDIR) {
4255 /*
4256 * Can't remove a directory that has a mounted-on filesystem.
4257 */
4258 if (vn_ismntpt(vp)) {
4259 error = EACCES;
4260 } else {
4261 /*
4262 * System V defines rmdir to return EEXIST,
4263 * not ENOTEMPTY, if the directory is not
4264 * empty. A System V NFS server needs to map
4265 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4266 * transmit over the wire.
4267 */
4268 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4269 NULL, 0)) == EEXIST)
4270 error = ENOTEMPTY;
4271 }
4272 } else {
4273 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4274 fp != NULL) {
4275 struct vattr va;
4276 vnode_t *tvp;
4277
4278 rfs4_dbe_lock(fp->rf_dbe);
4279 tvp = fp->rf_vp;
4280 if (tvp)
4281 VN_HOLD(tvp);
4282 rfs4_dbe_unlock(fp->rf_dbe);
4283
4284 if (tvp) {
4285 /*
4286 * This is va_seq safe because we are not
4287 * manipulating dvp.
4288 */
4360 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4361 REMOVE4res *, resp);
4362 }
4363
4364 /*
4365 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4366 * oldname and newname.
4367 * res: status. If success - CURRENT_FH unchanged, return change_info
4368 * for both from and target directories.
4369 */
4370 /* ARGSUSED */
4371 static void
4372 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4373 struct compound_state *cs)
4374 {
4375 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4376 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4377 int error;
4378 vnode_t *odvp;
4379 vnode_t *ndvp;
4380 vnode_t *srcvp, *targvp, *tvp;
4381 struct vattr obdva, oidva, oadva;
4382 struct vattr nbdva, nidva, nadva;
4383 char *onm, *nnm;
4384 uint_t olen, nlen;
4385 rfs4_file_t *fp, *sfp;
4386 int in_crit_src, in_crit_targ;
4387 int fp_rele_grant_hold, sfp_rele_grant_hold;
4388 int unlinked;
4389 bslabel_t *clabel;
4390 struct sockaddr *ca;
4391 char *converted_onm = NULL;
4392 char *converted_nnm = NULL;
4393 nfsstat4 status;
4394
4395 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4396 RENAME4args *, args);
4397
4398 fp = sfp = NULL;
4399 srcvp = targvp = tvp = NULL;
4400 in_crit_src = in_crit_targ = 0;
4401 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4402 unlinked = 0;
4403
4404 /* CURRENT_FH: target directory */
4405 ndvp = cs->vp;
4406 if (ndvp == NULL) {
4407 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4408 goto out;
4409 }
4410
4411 /* SAVED_FH: from directory */
4412 odvp = cs->saved_vp;
4413 if (odvp == NULL) {
4414 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4415 goto out;
4416 }
4417
4418 if (cs->access == CS_ACCESS_DENIED) {
4419 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4420 goto out;
4421 }
4422
4555 kmem_free(onm, olen);
4556 if (nnm != converted_nnm)
4557 kmem_free(converted_nnm, MAXPATHLEN + 1);
4558 kmem_free(nnm, nlen);
4559 goto out;
4560 }
4561
4562 sfp_rele_grant_hold = 1;
4563
4564 /* Does the destination exist and a file and have a delegation? */
4565 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4566 NULL, cs->cr)) {
4567 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4568 NULL)) {
4569 *cs->statusp = resp->status = NFS4ERR_DELAY;
4570 goto err_out;
4571 }
4572 }
4573 fp_rele_grant_hold = 1;
4574
4575 /* Check for NBMAND lock on both source and target */
4576 if (nbl_need_check(srcvp)) {
4577 nbl_start_crit(srcvp, RW_READER);
4578 in_crit_src = 1;
4579 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4580 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4581 goto err_out;
4582 }
4583 }
4584
4585 if (targvp && nbl_need_check(targvp)) {
4586 nbl_start_crit(targvp, RW_READER);
4587 in_crit_targ = 1;
4588 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4589 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4590 goto err_out;
4591 }
4592 }
4593
4594 /* Get source "before" change value */
4595 obdva.va_mask = AT_CTIME|AT_SEQ;
4596 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4597 if (!error) {
4598 nbdva.va_mask = AT_CTIME|AT_SEQ;
4599 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4600 }
4601 if (error) {
4602 *cs->statusp = resp->status = puterrno4(error);
4603 goto err_out;
4604 }
4605
4606 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4607 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4608
4609 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4610 NULL, 0);
4611
4612 /*
4613 * If target existed and was unlinked by VOP_RENAME, state will need
4614 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4615 * any necessary nbl_end_crit on srcvp and tgtvp.
4616 */
4617 if (error == 0 && fp != NULL) {
4618 rfs4_dbe_lock(fp->rf_dbe);
4619 tvp = fp->rf_vp;
4620 if (tvp)
4621 VN_HOLD(tvp);
4622 rfs4_dbe_unlock(fp->rf_dbe);
4623
4624 if (tvp) {
4625 struct vattr va;
4626 va.va_mask = AT_NLINK;
4627
4628 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4629 va.va_nlink == 0) {
4630 unlinked = 1;
4631
4632 /* DEBUG data */
4633 if ((srcvp == targvp) || (tvp != targvp)) {
4634 cmn_err(CE_WARN, "rfs4_op_rename: "
4635 "srcvp %p, targvp: %p, tvp: %p",
4636 (void *)srcvp, (void *)targvp,
4637 (void *)tvp);
4638 }
4639 } else {
4640 VN_RELE(tvp);
4641 }
4642 }
4643 }
4644 if (error == 0)
4645 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4646
4647 if (in_crit_src)
4648 nbl_end_crit(srcvp);
4649 if (srcvp)
4650 VN_RELE(srcvp);
4651 if (in_crit_targ)
4652 nbl_end_crit(targvp);
4653 if (targvp)
4654 VN_RELE(targvp);
4655
4656 if (unlinked) {
4657 ASSERT(fp != NULL);
4658 ASSERT(tvp != NULL);
4659
4660 /* DEBUG data */
4661 if (RW_READ_HELD(&tvp->v_nbllock)) {
4662 cmn_err(CE_WARN, "rfs4_op_rename: "
4663 "RW_READ_HELD(%p)", (void *)tvp);
4664 }
4665
4666 /* The file is gone and so should the state */
4667 rfs4_close_all_state(fp);
4668 VN_RELE(tvp);
4669 }
4670
4671 if (sfp) {
4672 rfs4_clear_dont_grant(sfp);
4673 rfs4_file_rele(sfp);
4674 }
4675 if (fp) {
4676 rfs4_clear_dont_grant(fp);
4677 rfs4_file_rele(fp);
4678 }
4679
4680 if (converted_onm != onm)
4681 kmem_free(converted_onm, MAXPATHLEN + 1);
4682 kmem_free(onm, olen);
4683 if (converted_nnm != nnm)
4684 kmem_free(converted_nnm, MAXPATHLEN + 1);
4685 kmem_free(nnm, nlen);
4686
4687 /*
4688 * Get the initial "after" sequence number, if it fails, set to zero
4689 */
4690 oidva.va_mask = AT_SEQ;
5587 static void
5588 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5589 struct compound_state *cs)
5590 {
5591 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5592 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5593 int error;
5594 vnode_t *vp;
5595 struct vattr bva;
5596 u_offset_t rlimit;
5597 struct uio uio;
5598 struct iovec iov[MAX_IOVECS];
5599 struct iovec *iovp;
5600 int iovcnt;
5601 int ioflag;
5602 cred_t *savecred, *cr;
5603 bool_t *deleg = &cs->deleg;
5604 nfsstat4 stat;
5605 int in_crit = 0;
5606 caller_context_t ct;
5607 nfs4_srv_t *nsrv4;
5608
5609 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5610 WRITE4args *, args);
5611
5612 vp = cs->vp;
5613 if (vp == NULL) {
5614 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5615 goto out;
5616 }
5617 if (cs->access == CS_ACCESS_DENIED) {
5618 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5619 goto out;
5620 }
5621
5622 cr = cs->cr;
5623
5624 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5625 deleg, TRUE, &ct)) != NFS4_OK) {
5626 *cs->statusp = resp->status = stat;
5627 goto out;
5658 goto out;
5659 }
5660
5661 if (vp->v_type != VREG) {
5662 *cs->statusp = resp->status =
5663 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5664 goto out;
5665 }
5666
5667 if (crgetuid(cr) != bva.va_uid &&
5668 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5669 *cs->statusp = resp->status = puterrno4(error);
5670 goto out;
5671 }
5672
5673 if (MANDLOCK(vp, bva.va_mode)) {
5674 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5675 goto out;
5676 }
5677
5678 nsrv4 = nfs4_get_srv();
5679 if (args->data_len == 0) {
5680 *cs->statusp = resp->status = NFS4_OK;
5681 resp->count = 0;
5682 resp->committed = args->stable;
5683 resp->writeverf = nsrv4->write4verf;
5684 goto out;
5685 }
5686
5687 if (args->mblk != NULL) {
5688 mblk_t *m;
5689 uint_t bytes, round_len;
5690
5691 iovcnt = 0;
5692 bytes = 0;
5693 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5694 for (m = args->mblk;
5695 m != NULL && bytes < round_len;
5696 m = m->b_cont) {
5697 iovcnt++;
5698 bytes += MBLKL(m);
5699 }
5700 #ifdef DEBUG
5701 /* should have ended on an mblk boundary */
5702 if (bytes != round_len) {
5703 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5759 curthread->t_cred = cr;
5760 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5761 curthread->t_cred = savecred;
5762
5763 if (iovp != iov)
5764 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5765
5766 if (error) {
5767 *cs->statusp = resp->status = puterrno4(error);
5768 goto out;
5769 }
5770
5771 *cs->statusp = resp->status = NFS4_OK;
5772 resp->count = args->data_len - uio.uio_resid;
5773
5774 if (ioflag == 0)
5775 resp->committed = UNSTABLE4;
5776 else
5777 resp->committed = FILE_SYNC4;
5778
5779 resp->writeverf = nsrv4->write4verf;
5780
5781 out:
5782 if (in_crit)
5783 nbl_end_crit(vp);
5784
5785 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5786 WRITE4res *, resp);
5787 }
5788
5789
5790 /* XXX put in a header file */
5791 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5792
5793 void
5794 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5795 struct svc_req *req, cred_t *cr, int *rv)
5796 {
5797 uint_t i;
5798 struct compound_state cs;
5799 nfs4_srv_t *nsrv4;
5800 nfs_export_t *ne = nfs_get_export();
5801
5802 if (rv != NULL)
5803 *rv = 0;
5804 rfs4_init_compound_state(&cs);
5805 /*
5806 * Form a reply tag by copying over the request tag.
5807 */
5808 resp->tag.utf8string_len = args->tag.utf8string_len;
5809 if (args->tag.utf8string_len != 0) {
5810 resp->tag.utf8string_val =
5811 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5812 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5813 resp->tag.utf8string_len);
5814 } else {
5815 resp->tag.utf8string_val = NULL;
5816 }
5817
5818 cs.statusp = &resp->status;
5819 cs.req = req;
5820 resp->array = NULL;
5821 resp->array_len = 0;
5822
5823 /*
5824 * XXX for now, minorversion should be zero
5825 */
5826 if (args->minorversion != NFS4_MINORVERSION) {
5827 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5828 &cs, COMPOUND4args *, args);
5829 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5830 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5831 &cs, COMPOUND4res *, resp);
5832 return;
5833 }
5834
5835 if (args->array_len == 0) {
5836 resp->status = NFS4_OK;
5842
5843 cr = crget();
5844 ASSERT(cr != NULL);
5845
5846 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5847 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5848 &cs, COMPOUND4args *, args);
5849 crfree(cr);
5850 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5851 &cs, COMPOUND4res *, resp);
5852 svcerr_badcred(req->rq_xprt);
5853 if (rv != NULL)
5854 *rv = 1;
5855 return;
5856 }
5857 resp->array_len = args->array_len;
5858 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5859 KM_SLEEP);
5860
5861 cs.basecr = cr;
5862 nsrv4 = nfs4_get_srv();
5863
5864 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5865 COMPOUND4args *, args);
5866
5867 /*
5868 * For now, NFS4 compound processing must be protected by
5869 * exported_lock because it can access more than one exportinfo
5870 * per compound and share/unshare can now change multiple
5871 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5872 * per proc (excluding public exinfo), and exi_count design
5873 * is sufficient to protect concurrent execution of NFS2/3
5874 * ops along with unexport. This lock will be removed as
5875 * part of the NFSv4 phase 2 namespace redesign work.
5876 */
5877 rw_enter(&ne->exported_lock, RW_READER);
5878
5879 /*
5880 * If this is the first compound we've seen, we need to start all
5881 * new instances' grace periods.
5882 */
5883 if (nsrv4->seen_first_compound == 0) {
5884 rfs4_grace_start_new(nsrv4);
5885 /*
5886 * This must be set after rfs4_grace_start_new(), otherwise
5887 * another thread could proceed past here before the former
5888 * is finished.
5889 */
5890 nsrv4->seen_first_compound = 1;
5891 }
5892
5893 for (i = 0; i < args->array_len && cs.cont; i++) {
5894 nfs_argop4 *argop;
5895 nfs_resop4 *resop;
5896 uint_t op;
5897 kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
5898
5899 argop = &args->array[i];
5900 resop = &resp->array[i];
5901 resop->resop = argop->argop;
5902 op = (uint_t)resop->resop;
5903
5904 if (op < rfsv4disp_cnt) {
5905 /*
5906 * Count the individual ops here; NULL and COMPOUND
5907 * are counted in common_dispatch()
5908 */
5909 stat[op].value.ui64++;
5910
5911 NFS4_DEBUG(rfs4_debug > 1,
5912 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5913 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5914 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5915 rfs4_op_string[op], *cs.statusp));
5916 if (*cs.statusp != NFS4_OK)
5917 cs.cont = FALSE;
5918 } else {
5919 /*
5920 * This is effectively dead code since XDR code
5921 * will have already returned BADXDR if op doesn't
5922 * decode to legal value. This only done for a
5923 * day when XDR code doesn't verify v4 opcodes.
5924 */
5925 op = OP_ILLEGAL;
5926 stat[OP_ILLEGAL_IDX].value.ui64++;
5927
5928 rfs4_op_illegal(argop, resop, req, &cs);
5929 cs.cont = FALSE;
5930 }
5931
5932 /*
5933 * If not at last op, and if we are to stop, then
5934 * compact the results array.
5935 */
5936 if ((i + 1) < args->array_len && !cs.cont) {
5937 nfs_resop4 *new_res = kmem_alloc(
5938 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5939 bcopy(resp->array,
5940 new_res, (i+1) * sizeof (nfs_resop4));
5941 kmem_free(resp->array,
5942 args->array_len * sizeof (nfs_resop4));
5943
5944 resp->array_len = i + 1;
5945 resp->array = new_res;
5946 }
5947 }
5948
5949 rw_exit(&ne->exported_lock);
5950
5951 /*
5952 * clear exportinfo and vnode fields from compound_state before dtrace
5953 * probe, to avoid tracing residual values for path and share path.
5954 */
5955 if (cs.vp)
5956 VN_RELE(cs.vp);
5957 if (cs.saved_vp)
5958 VN_RELE(cs.saved_vp);
5959 cs.exi = cs.saved_exi = NULL;
5960 cs.vp = cs.saved_vp = NULL;
5961
5962 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5963 COMPOUND4res *, resp);
5964
5965 if (cs.saved_fh.nfs_fh4_val)
5966 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5967
5968 if (cs.basecr)
5969 crfree(cs.basecr);
5970 if (cs.cr)
5971 crfree(cs.cr);
5972 /*
5973 * done with this compound request, free the label
5974 */
5975
5976 if (req->rq_label != NULL) {
5977 kmem_free(req->rq_label, sizeof (bslabel_t));
5978 req->rq_label = NULL;
5979 }
5980 }
5981
5982 /*
5983 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5984 * XXX zero out the tag and array values. Need to investigate why the
6648
6649 /* Check for mandatory locking and that the size gets set. */
6650 cva.va_mask = AT_MODE;
6651 if (setsize)
6652 cva.va_mask |= AT_SIZE;
6653
6654 /* Assume the worst */
6655 cs->mandlock = TRUE;
6656
6657 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6658 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6659
6660 /*
6661 * Truncate the file if necessary; this would be
6662 * the case for create over an existing file.
6663 */
6664
6665 if (trunc) {
6666 int in_crit = 0;
6667 rfs4_file_t *fp;
6668 nfs4_srv_t *nsrv4;
6669 bool_t create = FALSE;
6670
6671 /*
6672 * We are writing over an existing file.
6673 * Check to see if we need to recall a delegation.
6674 */
6675 nsrv4 = nfs4_get_srv();
6676 rfs4_hold_deleg_policy(nsrv4);
6677 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6678 if (rfs4_check_delegated_byfp(FWRITE, fp,
6679 (reqsize == 0), FALSE, FALSE, &clientid)) {
6680 rfs4_file_rele(fp);
6681 rfs4_rele_deleg_policy(nsrv4);
6682 VN_RELE(vp);
6683 *attrset = 0;
6684 return (NFS4ERR_DELAY);
6685 }
6686 rfs4_file_rele(fp);
6687 }
6688 rfs4_rele_deleg_policy(nsrv4);
6689
6690 if (nbl_need_check(vp)) {
6691 in_crit = 1;
6692
6693 ASSERT(reqsize == 0);
6694
6695 nbl_start_crit(vp, RW_READER);
6696 if (nbl_conflict(vp, NBL_WRITE, 0,
6697 cva.va_size, 0, NULL)) {
6698 in_crit = 0;
6699 nbl_end_crit(vp);
6700 VN_RELE(vp);
6701 *attrset = 0;
6702 return (NFS4ERR_ACCESS);
6703 }
6704 }
6705 ct.cc_sysid = 0;
6706 ct.cc_pid = 0;
6707 ct.cc_caller_id = nfs4_srv_caller_id;
6708 ct.cc_flags = CC_DONTBLOCK;
8226
8227 newcp->rc_cp_confirmed = cp_confirmed;
8228
8229 rfs4_client_rele(newcp);
8230
8231 out:
8232 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8233 SETCLIENTID4res *, res);
8234 }
8235
8236 /*ARGSUSED*/
8237 void
8238 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8239 struct svc_req *req, struct compound_state *cs)
8240 {
8241 SETCLIENTID_CONFIRM4args *args =
8242 &argop->nfs_argop4_u.opsetclientid_confirm;
8243 SETCLIENTID_CONFIRM4res *res =
8244 &resop->nfs_resop4_u.opsetclientid_confirm;
8245 rfs4_client_t *cp, *cptoclose = NULL;
8246 nfs4_srv_t *nsrv4;
8247
8248 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8249 struct compound_state *, cs,
8250 SETCLIENTID_CONFIRM4args *, args);
8251
8252 nsrv4 = nfs4_get_srv();
8253 *cs->statusp = res->status = NFS4_OK;
8254
8255 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8256
8257 if (cp == NULL) {
8258 *cs->statusp = res->status =
8259 rfs4_check_clientid(&args->clientid, 1);
8260 goto out;
8261 }
8262
8263 if (!creds_ok(cp, req, cs)) {
8264 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8265 rfs4_client_rele(cp);
8266 goto out;
8267 }
8268
8269 /* If the verifier doesn't match, the record doesn't match */
8270 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8271 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8272 rfs4_client_rele(cp);
8273 goto out;
8274 }
8275
8276 rfs4_dbe_lock(cp->rc_dbe);
8277 cp->rc_need_confirm = FALSE;
8278 if (cp->rc_cp_confirmed) {
8279 cptoclose = cp->rc_cp_confirmed;
8280 cptoclose->rc_ss_remove = 1;
8281 cp->rc_cp_confirmed = NULL;
8282 }
8283
8284 /*
8285 * Update the client's associated server instance, if it's changed
8286 * since the client was created.
8287 */
8288 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8289 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8290
8291 /*
8292 * Record clientid in stable storage.
8293 * Must be done after server instance has been assigned.
8294 */
8295 rfs4_ss_clid(nsrv4, cp);
8296
8297 rfs4_dbe_unlock(cp->rc_dbe);
8298
8299 if (cptoclose)
8300 /* don't need to rele, client_close does it */
8301 rfs4_client_close(cptoclose);
8302
8303 /* If needed, initiate CB_NULL call for callback path */
8304 rfs4_deleg_cb_check(cp);
8305 rfs4_update_lease(cp);
8306
8307 /*
8308 * Check to see if client can perform reclaims
8309 */
8310 rfs4_ss_chkclid(nsrv4, cp);
8311
8312 rfs4_client_rele(cp);
8313
8314 out:
8315 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8316 struct compound_state *, cs,
8317 SETCLIENTID_CONFIRM4 *, res);
8318 }
8319
8320
8321 /*ARGSUSED*/
8322 void
8323 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8324 struct svc_req *req, struct compound_state *cs)
8325 {
8326 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8327 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8328 rfs4_state_t *sp;
8329 nfsstat4 status;
8330
9934 /*
9935 * Check to see if we have a downrev Solaris client, so that we
9936 * can send it a symlink instead of a referral.
9937 */
9938 int
9939 client_is_downrev(struct svc_req *req)
9940 {
9941 struct sockaddr *ca;
9942 rfs4_clntip_t *ci;
9943 bool_t create = FALSE;
9944 int is_downrev;
9945
9946 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9947 ASSERT(ca);
9948 ci = rfs4_find_clntip(ca, &create);
9949 if (ci == NULL)
9950 return (0);
9951 is_downrev = ci->ri_no_referrals;
9952 rfs4_dbe_rele(ci->ri_dbe);
9953 return (is_downrev);
9954 }
9955
9956 /*
9957 * Do the main work of handling HA-NFSv4 Resource Group failover on
9958 * Sun Cluster.
9959 * We need to detect whether any RG admin paths have been added or removed,
9960 * and adjust resources accordingly.
9961 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9962 * order to scale, the list and array of paths need to be held in more
9963 * suitable data structures.
9964 */
9965 static void
9966 hanfsv4_failover(nfs4_srv_t *nsrv4)
9967 {
9968 int i, start_grace, numadded_paths = 0;
9969 char **added_paths = NULL;
9970 rfs4_dss_path_t *dss_path;
9971
9972 /*
9973 * Note: currently, dss_pathlist cannot be NULL, since
9974 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9975 * make the latter dynamically specified too, the following will
9976 * need to be adjusted.
9977 */
9978
9979 /*
9980 * First, look for removed paths: RGs that have been failed-over
9981 * away from this node.
9982 * Walk the "currently-serving" dss_pathlist and, for each
9983 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9984 * from nfsd. If not, that RG path has been removed.
9985 *
9986 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9987 * any duplicates.
9988 */
9989 dss_path = nsrv4->dss_pathlist;
9990 do {
9991 int found = 0;
9992 char *path = dss_path->path;
9993
9994 /* used only for non-HA so may not be removed */
9995 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9996 dss_path = dss_path->next;
9997 continue;
9998 }
9999
10000 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10001 int cmpret;
10002 char *newpath = rfs4_dss_newpaths[i];
10003
10004 /*
10005 * Since nfsd has sorted rfs4_dss_newpaths for us,
10006 * once the return from strcmp is negative we know
10007 * we've passed the point where "path" should be,
10008 * and can stop searching: "path" has been removed.
10009 */
10010 cmpret = strcmp(path, newpath);
10011 if (cmpret < 0)
10012 break;
10013 if (cmpret == 0) {
10014 found = 1;
10015 break;
10016 }
10017 }
10018
10019 if (found == 0) {
10020 unsigned index = dss_path->index;
10021 rfs4_servinst_t *sip = dss_path->sip;
10022 rfs4_dss_path_t *path_next = dss_path->next;
10023
10024 /*
10025 * This path has been removed.
10026 * We must clear out the servinst reference to
10027 * it, since it's now owned by another
10028 * node: we should not attempt to touch it.
10029 */
10030 ASSERT(dss_path == sip->dss_paths[index]);
10031 sip->dss_paths[index] = NULL;
10032
10033 /* remove from "currently-serving" list, and destroy */
10034 remque(dss_path);
10035 /* allow for NUL */
10036 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10037 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10038
10039 dss_path = path_next;
10040 } else {
10041 /* path was found; not removed */
10042 dss_path = dss_path->next;
10043 }
10044 } while (dss_path != nsrv4->dss_pathlist);
10045
10046 /*
10047 * Now, look for added paths: RGs that have been failed-over
10048 * to this node.
10049 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10050 * for each path, check if it is on the "currently-serving"
10051 * dss_pathlist. If not, that RG path has been added.
10052 *
10053 * Note: we don't do duplicate detection here; nfsd does that for us.
10054 *
10055 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10056 * an upper bound for the size needed for added_paths[numadded_paths].
10057 */
10058
10059 /* probably more space than we need, but guaranteed to be enough */
10060 if (rfs4_dss_numnewpaths > 0) {
10061 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10062 added_paths = kmem_zalloc(sz, KM_SLEEP);
10063 }
10064
10065 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10066 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10067 int found = 0;
10068 char *newpath = rfs4_dss_newpaths[i];
10069
10070 dss_path = nsrv4->dss_pathlist;
10071 do {
10072 char *path = dss_path->path;
10073
10074 /* used only for non-HA */
10075 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10076 dss_path = dss_path->next;
10077 continue;
10078 }
10079
10080 if (strncmp(path, newpath, strlen(path)) == 0) {
10081 found = 1;
10082 break;
10083 }
10084
10085 dss_path = dss_path->next;
10086 } while (dss_path != nsrv4->dss_pathlist);
10087
10088 if (found == 0) {
10089 added_paths[numadded_paths] = newpath;
10090 numadded_paths++;
10091 }
10092 }
10093
10094 /* did we find any added paths? */
10095 if (numadded_paths > 0) {
10096
10097 /* create a new server instance, and start its grace period */
10098 start_grace = 1;
10099 /* CSTYLED */
10100 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10101
10102 /* read in the stable storage state from these paths */
10103 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10104
10105 /*
10106 * Multiple failovers during a grace period will cause
10107 * clients of the same resource group to be partitioned
10108 * into different server instances, with different
10109 * grace periods. Since clients of the same resource
10110 * group must be subject to the same grace period,
10111 * we need to reset all currently active grace periods.
10112 */
10113 rfs4_grace_reset_all(nsrv4);
10114 }
10115
10116 if (rfs4_dss_numnewpaths > 0)
10117 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10118 }
|