3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  26  */
  27 
  28 /*
  29  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30  *      All Rights Reserved
  31  */
  32 
  33 #include <sys/param.h>
  34 #include <sys/types.h>
  35 #include <sys/systm.h>
  36 #include <sys/cred.h>
  37 #include <sys/buf.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vfs_opreg.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/errno.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/statvfs.h>
  45 #include <sys/kmem.h>
  46 #include <sys/dirent.h>
  47 #include <sys/cmn_err.h>
  48 #include <sys/debug.h>
  49 #include <sys/systeminfo.h>
  50 #include <sys/flock.h>
  51 #include <sys/pathname.h>
  52 #include <sys/nbmlock.h>
  53 #include <sys/share.h>
  54 #include <sys/atomic.h>
  55 #include <sys/policy.h>
  56 #include <sys/fem.h>
  57 #include <sys/sdt.h>
  58 #include <sys/ddi.h>
  59 #include <sys/zone.h>
  60 
  61 #include <fs/fs_reparse.h>
  62 
  63 #include <rpc/types.h>
  64 #include <rpc/auth.h>
  65 #include <rpc/rpcsec_gss.h>
  66 #include <rpc/svc.h>
  67 
  68 #include <nfs/nfs.h>
  69 #include <nfs/export.h>
  70 #include <nfs/nfs_cmd.h>
  71 #include <nfs/lm.h>
  72 #include <nfs/nfs4.h>
  73 
  74 #include <sys/strsubr.h>
  75 #include <sys/strsun.h>
  76 
  77 #include <inet/common.h>
  78 #include <inet/ip.h>
  79 #include <inet/ip6.h>
  80 
  81 #include <sys/tsol/label.h>
  82 #include <sys/tsol/tndb.h>
  83 
  84 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  85 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  86 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  87 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  88 extern struct svc_ops rdma_svc_ops;
  89 extern int nfs_loaned_buffers;
  90 /* End of Tunables */
  91 
  92 static int rdma_setup_read_data4(READ4args *, READ4res *);
 
 
 130 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 131 #define RFS4_MINLEN_RDDIR_BUF \
 132         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 133 
 134 /*
 135  * It would be better to pad to 4 bytes since that's what XDR would do,
 136  * but the dirents UFS gives us are already padded to 8, so just take
 137  * what we're given.  Dircount is only a hint anyway.  Currently the
 138  * solaris kernel is ASCII only, so there's no point in calling the
 139  * UTF8 functions.
 140  *
 141  * dirent64: named padded to provide 8 byte struct alignment
 142  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 143  *
 144  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 145  *
 146  */
 147 #define DIRENT64_TO_DIRCOUNT(dp) \
 148         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 149 
 150 time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 151 
 152 static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
 153 
 154 u_longlong_t    nfs4_srv_caller_id;
 155 uint_t          nfs4_srv_vkey = 0;
 156 
 157 verifier4       Write4verf;
 158 verifier4       Readdir4verf;
 159 
 160 void    rfs4_init_compound_state(struct compound_state *);
 161 
 162 static void     nullfree(caddr_t);
 163 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 164                         struct compound_state *);
 165 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                         struct compound_state *);
 167 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                         struct compound_state *);
 169 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170                         struct compound_state *);
 171 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172                         struct compound_state *);
 173 static void     rfs4_op_create_free(nfs_resop4 *resop);
 174 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 175                         struct svc_req *, struct compound_state *);
 176 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 177                         struct svc_req *, struct compound_state *);
 178 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 179                         struct compound_state *);
 
 
 228 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229                         struct compound_state *);
 230 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                         struct compound_state *);
 232 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                         struct compound_state *);
 234 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                         struct compound_state *);
 236 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237                         struct compound_state *);
 238 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239                         struct compound_state *);
 240 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 241                         struct svc_req *, struct compound_state *);
 242 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 243                         struct svc_req *req, struct compound_state *);
 244 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 245                         struct compound_state *);
 246 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 247 
 248 static nfsstat4 check_open_access(uint32_t,
 249                                 struct compound_state *, struct svc_req *);
 250 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 251 void rfs4_ss_clid(rfs4_client_t *);
 252 
 253 /*
 254  * translation table for attrs
 255  */
 256 struct nfs4_ntov_table {
 257         union nfs4_attr_u *na;
 258         uint8_t amap[NFS4_MAXNUM_ATTRS];
 259         int attrcnt;
 260         bool_t vfsstat;
 261 };
 262 
 263 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 264 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 265                                     struct nfs4_svgetit_arg *sargp);
 266 
 267 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 268                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 269                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 270 
 271 fem_t           *deleg_rdops;
 272 fem_t           *deleg_wrops;
 273 
 274 rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 275 kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 276 int             rfs4_seen_first_compound;       /* set first time we see one */
 277 
 278 /*
 279  * NFS4 op dispatch table
 280  */
 281 
 282 struct rfsv4disp {
 283         void    (*dis_proc)();          /* proc to call */
 284         void    (*dis_resfree)();       /* frees space allocated by proc */
 285         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 286 };
 287 
 288 static struct rfsv4disp rfsv4disptab[] = {
 289         /*
 290          * NFS VERSION 4
 291          */
 292 
 293         /* RFS_NULL = 0 */
 294         {rfs4_op_illegal, nullfree, 0},
 295 
 296         /* UNUSED = 1 */
 297         {rfs4_op_illegal, nullfree, 0},
 
 449         "rfs4_op_putrootfh",
 450         "rfs4_op_read",
 451         "rfs4_op_readdir",
 452         "rfs4_op_readlink",
 453         "rfs4_op_remove",
 454         "rfs4_op_rename",
 455         "rfs4_op_renew",
 456         "rfs4_op_restorefh",
 457         "rfs4_op_savefh",
 458         "rfs4_op_secinfo",
 459         "rfs4_op_setattr",
 460         "rfs4_op_setclientid",
 461         "rfs4_op_setclient_confirm",
 462         "rfs4_op_verify",
 463         "rfs4_op_write",
 464         "rfs4_op_release_lockowner",
 465         "rfs4_op_illegal"
 466 };
 467 #endif
 468 
 469 void    rfs4_ss_chkclid(rfs4_client_t *);
 470 
 471 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 472 
 473 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 474 
 475 #ifdef  nextdp
 476 #undef nextdp
 477 #endif
 478 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 479 
 480 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 481         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 482         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 483         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 484         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 485         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 486         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 487         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 488         NULL,                   NULL
 489 };
 490 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 491         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 492         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 493         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 494         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 495         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 496         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 497         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 498         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 499         NULL,                   NULL
 500 };
 501 
 502 int
 503 rfs4_srvrinit(void)
 504 {
 505         timespec32_t verf;
 506         int error;
 507         extern void rfs4_attr_init();
 508         extern krwlock_t rfs4_deleg_policy_lock;
 509 
 510         /*
 511          * The following algorithm attempts to find a unique verifier
 512          * to be used as the write verifier returned from the server
 513          * to the client.  It is important that this verifier change
 514          * whenever the server reboots.  Of secondary importance, it
 515          * is important for the verifier to be unique between two
 516          * different servers.
 517          *
 518          * Thus, an attempt is made to use the system hostid and the
 519          * current time in seconds when the nfssrv kernel module is
 520          * loaded.  It is assumed that an NFS server will not be able
 521          * to boot and then to reboot in less than a second.  If the
 522          * hostid has not been set, then the current high resolution
 523          * time is used.  This will ensure different verifiers each
 524          * time the server reboots and minimize the chances that two
 525          * different servers will have the same verifier.
 526          * XXX - this is broken on LP64 kernels.
 527          */
 528         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 529         if (verf.tv_sec != 0) {
 530                 verf.tv_nsec = gethrestime_sec();
 531         } else {
 532                 timespec_t tverf;
 533 
 534                 gethrestime(&tverf);
 535                 verf.tv_sec = (time_t)tverf.tv_sec;
 536                 verf.tv_nsec = tverf.tv_nsec;
 537         }
 538 
 539         Write4verf = *(uint64_t *)&verf;
 540 
 541         rfs4_attr_init();
 542         mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 543 
 544         /* Used to manage create/destroy of server state */
 545         mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
 546 
 547         /* Used to manage access to server instance linked list */
 548         mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 549 
 550         /* Used to manage access to rfs4_deleg_policy */
 551         rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 552 
 553         error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
 554         if (error != 0) {
 555                 rfs4_disable_delegation();
 556         } else {
 557                 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 558                     &deleg_wrops);
 559                 if (error != 0) {
 560                         rfs4_disable_delegation();
 561                         fem_free(deleg_rdops);
 562                 }
 563         }
 564 
 565         nfs4_srv_caller_id = fs_new_caller_id();
 566 
 567         lockt_sysid = lm_alloc_sysidt();
 568 
 569         vsd_create(&nfs4_srv_vkey, NULL);
 570 
 571         return (0);
 572 }
 573 
 574 void
 575 rfs4_srvrfini(void)
 576 {
 577         extern krwlock_t rfs4_deleg_policy_lock;
 578 
 579         if (lockt_sysid != LM_NOSYSID) {
 580                 lm_free_sysidt(lockt_sysid);
 581                 lockt_sysid = LM_NOSYSID;
 582         }
 583 
 584         mutex_destroy(&rfs4_deleg_lock);
 585         mutex_destroy(&rfs4_state_lock);
 586         rw_destroy(&rfs4_deleg_policy_lock);
 587 
 588         fem_free(deleg_rdops);
 589         fem_free(deleg_wrops);
 590 }
 591 
 592 void
 593 rfs4_init_compound_state(struct compound_state *cs)
 594 {
 595         bzero(cs, sizeof (*cs));
 596         cs->cont = TRUE;
 597         cs->access = CS_ACCESS_DENIED;
 598         cs->deleg = FALSE;
 599         cs->mandlock = FALSE;
 600         cs->fh.nfs_fh4_val = cs->fhbuf;
 601 }
 602 
 603 void
 604 rfs4_grace_start(rfs4_servinst_t *sip)
 605 {
 606         rw_enter(&sip->rwlock, RW_WRITER);
 607         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 608         sip->grace_period = rfs4_grace_period;
 609         rw_exit(&sip->rwlock);
 610 }
 611 
 612 /*
 
 635 
 636         rw_enter(&sip->rwlock, RW_READER);
 637         grace_expiry = sip->start_time + sip->grace_period;
 638         rw_exit(&sip->rwlock);
 639 
 640         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 641 }
 642 
 643 int
 644 rfs4_clnt_in_grace(rfs4_client_t *cp)
 645 {
 646         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 647 
 648         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 649 }
 650 
 651 /*
 652  * reset all currently active grace periods
 653  */
 654 void
 655 rfs4_grace_reset_all(void)
 656 {
 657         rfs4_servinst_t *sip;
 658 
 659         mutex_enter(&rfs4_servinst_lock);
 660         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 661                 if (rfs4_servinst_in_grace(sip))
 662                         rfs4_grace_start(sip);
 663         mutex_exit(&rfs4_servinst_lock);
 664 }
 665 
 666 /*
 667  * start any new instances' grace periods
 668  */
 669 void
 670 rfs4_grace_start_new(void)
 671 {
 672         rfs4_servinst_t *sip;
 673 
 674         mutex_enter(&rfs4_servinst_lock);
 675         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 676                 if (rfs4_servinst_grace_new(sip))
 677                         rfs4_grace_start(sip);
 678         mutex_exit(&rfs4_servinst_lock);
 679 }
 680 
 681 static rfs4_dss_path_t *
 682 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
 683 {
 684         size_t len;
 685         rfs4_dss_path_t *dss_path;
 686 
 687         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 688 
 689         /*
 690          * Take a copy of the string, since the original may be overwritten.
 691          * Sadly, no strdup() in the kernel.
 692          */
 693         /* allow for NUL */
 694         len = strlen(path) + 1;
 695         dss_path->path = kmem_alloc(len, KM_SLEEP);
 696         (void) strlcpy(dss_path->path, path, len);
 697 
 698         /* associate with servinst */
 699         dss_path->sip = sip;
 700         dss_path->index = index;
 701 
 702         /*
 703          * Add to list of served paths.
 704          * No locking required, as we're only ever called at startup.
 705          */
 706         if (rfs4_dss_pathlist == NULL) {
 707                 /* this is the first dss_path_t */
 708 
 709                 /* needed for insque/remque */
 710                 dss_path->next = dss_path->prev = dss_path;
 711 
 712                 rfs4_dss_pathlist = dss_path;
 713         } else {
 714                 insque(dss_path, rfs4_dss_pathlist);
 715         }
 716 
 717         return (dss_path);
 718 }
 719 
 720 /*
 721  * Create a new server instance, and make it the currently active instance.
 722  * Note that starting the grace period too early will reduce the clients'
 723  * recovery window.
 724  */
 725 void
 726 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
 727 {
 728         unsigned i;
 729         rfs4_servinst_t *sip;
 730         rfs4_oldstate_t *oldstate;
 731 
 732         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 733         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 734 
 735         sip->start_time = (time_t)0;
 736         sip->grace_period = (time_t)0;
 737         sip->next = NULL;
 738         sip->prev = NULL;
 739 
 740         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 741         /*
 742          * This initial dummy entry is required to setup for insque/remque.
 743          * It must be skipped over whenever the list is traversed.
 744          */
 745         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 746         /* insque/remque require initial list entry to be self-terminated */
 747         oldstate->next = oldstate;
 748         oldstate->prev = oldstate;
 749         sip->oldstate = oldstate;
 750 
 751 
 752         sip->dss_npaths = dss_npaths;
 753         sip->dss_paths = kmem_alloc(dss_npaths *
 754             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 755 
 756         for (i = 0; i < dss_npaths; i++) {
 757                 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
 758         }
 759 
 760         mutex_enter(&rfs4_servinst_lock);
 761         if (rfs4_cur_servinst != NULL) {
 762                 /* add to linked list */
 763                 sip->prev = rfs4_cur_servinst;
 764                 rfs4_cur_servinst->next = sip;
 765         }
 766         if (start_grace)
 767                 rfs4_grace_start(sip);
 768         /* make the new instance "current" */
 769         rfs4_cur_servinst = sip;
 770 
 771         mutex_exit(&rfs4_servinst_lock);
 772 }
 773 
 774 /*
 775  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 776  * all instances directly.
 777  */
 778 void
 779 rfs4_servinst_destroy_all(void)
 780 {
 781         rfs4_servinst_t *sip, *prev, *current;
 782 #ifdef DEBUG
 783         int n = 0;
 784 #endif
 785 
 786         mutex_enter(&rfs4_servinst_lock);
 787         ASSERT(rfs4_cur_servinst != NULL);
 788         current = rfs4_cur_servinst;
 789         rfs4_cur_servinst = NULL;
 790         for (sip = current; sip != NULL; sip = prev) {
 791                 prev = sip->prev;
 792                 rw_destroy(&sip->rwlock);
 793                 if (sip->oldstate)
 794                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 795                 if (sip->dss_paths)
 796                         kmem_free(sip->dss_paths,
 797                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 798                 kmem_free(sip, sizeof (rfs4_servinst_t));
 799 #ifdef DEBUG
 800                 n++;
 801 #endif
 802         }
 803         mutex_exit(&rfs4_servinst_lock);
 804 }
 805 
 806 /*
 807  * Assign the current server instance to a client_t.
 808  * Should be called with cp->rc_dbe held.
 809  */
 810 void
 811 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
 812 {
 813         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 814 
 815         /*
 816          * The lock ensures that if the current instance is in the process
 817          * of changing, we will see the new one.
 818          */
 819         mutex_enter(&rfs4_servinst_lock);
 820         cp->rc_server_instance = sip;
 821         mutex_exit(&rfs4_servinst_lock);
 822 }
 823 
 824 rfs4_servinst_t *
 825 rfs4_servinst(rfs4_client_t *cp)
 826 {
 827         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 828 
 829         return (cp->rc_server_instance);
 830 }
 831 
 832 /* ARGSUSED */
 833 static void
 834 nullfree(caddr_t resop)
 835 {
 836 }
 837 
 838 /*
 839  * This is a fall-through for invalid or not implemented (yet) ops
 840  */
 841 /* ARGSUSED */
 
 854 {
 855         int i;
 856 
 857         for (i = 0; i < count; i++) {
 858                 if (nfsnum == flavor_list[i])
 859                         return (TRUE);
 860         }
 861         return (FALSE);
 862 }
 863 
 864 /*
 865  * Used by rfs4_op_secinfo to get the security information from the
 866  * export structure associated with the component.
 867  */
 868 /* ARGSUSED */
 869 static nfsstat4
 870 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 871 {
 872         int error, different_export = 0;
 873         vnode_t *dvp, *vp;
 874         struct exportinfo *exi = NULL;
 875         fid_t fid;
 876         uint_t count, i;
 877         secinfo4 *resok_val;
 878         struct secinfo *secp;
 879         seconfig_t *si;
 880         bool_t did_traverse = FALSE;
 881         int dotdot, walk;
 882 
 883         dvp = cs->vp;
 884         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 885 
 886         /*
 887          * If dotdotting, then need to check whether it's above the
 888          * root of a filesystem, or above an export point.
 889          */
 890         if (dotdot) {
 891 
 892                 /*
 893                  * If dotdotting at the root of a filesystem, then
 894                  * need to traverse back to the mounted-on filesystem
 895                  * and do the dotdot lookup there.
 896                  */
 897                 if (cs->vp->v_flag & VROOT) {
 898 
 899                         /*
 900                          * If at the system root, then can
 901                          * go up no further.
 902                          */
 903                         if (VN_CMP(dvp, rootdir))
 904                                 return (puterrno4(ENOENT));
 905 
 906                         /*
 907                          * Traverse back to the mounted-on filesystem
 908                          */
 909                         dvp = untraverse(cs->vp);
 910 
 911                         /*
 912                          * Set the different_export flag so we remember
 913                          * to pick up a new exportinfo entry for
 914                          * this new filesystem.
 915                          */
 916                         different_export = 1;
 917                 } else {
 918 
 919                         /*
 920                          * If dotdotting above an export point then set
 921                          * the different_export to get new export info.
 922                          */
 923                         different_export = nfs_exported(cs->exi, cs->vp);
 924                 }
 925         }
 926 
 927         /*
 928          * Get the vnode for the component "nm".
 929          */
 930         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 931             NULL, NULL, NULL);
 932         if (error)
 933                 return (puterrno4(error));
 934 
 935         /*
 936          * If the vnode is in a pseudo filesystem, or if the security flavor
 937          * used in the request is valid but not an explicitly shared flavor,
 938          * or the access bit indicates that this is a limited access,
 939          * check whether this vnode is visible.
 940          */
 941         if (!different_export &&
 942             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
 943             cs->access & CS_ACCESS_LIMITED)) {
 944                 if (! nfs_visible(cs->exi, vp, &different_export)) {
 945                         VN_RELE(vp);
 946                         return (puterrno4(ENOENT));
 947                 }
 948         }
 949 
 950         /*
 951          * If it's a mountpoint, then traverse it.
 952          */
 953         if (vn_ismntpt(vp)) {
 954                 if ((error = traverse(&vp)) != 0) {
 955                         VN_RELE(vp);
 956                         return (puterrno4(error));
 957                 }
 958                 /* remember that we had to traverse mountpoint */
 959                 did_traverse = TRUE;
 960                 different_export = 1;
 961         } else if (vp->v_vfsp != dvp->v_vfsp) {
 962                 /*
 963                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
 964                  * then vp is probably an LOFS object.  We don't need the
 
 966                  * a server fs boundary and need to call checkexport4.
 967                  * (LOFS lookup hides server fs mountpoints, and actually calls
 968                  * traverse)
 969                  */
 970                 different_export = 1;
 971         }
 972 
 973         /*
 974          * Get the export information for it.
 975          */
 976         if (different_export) {
 977 
 978                 bzero(&fid, sizeof (fid));
 979                 fid.fid_len = MAXFIDSZ;
 980                 error = vop_fid_pseudo(vp, &fid);
 981                 if (error) {
 982                         VN_RELE(vp);
 983                         return (puterrno4(error));
 984                 }
 985 
 986                 if (dotdot)
 987                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
 988                 else
 989                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
 990 
 991                 if (exi == NULL) {
 992                         if (did_traverse == TRUE) {
 993                                 /*
 994                                  * If this vnode is a mounted-on vnode,
 995                                  * but the mounted-on file system is not
 996                                  * exported, send back the secinfo for
 997                                  * the exported node that the mounted-on
 998                                  * vnode lives in.
 999                                  */
1000                                 exi = cs->exi;
1001                         } else {
1002                                 VN_RELE(vp);
1003                                 return (puterrno4(EACCES));
1004                         }
1005                 }
1006         } else {
1007                 exi = cs->exi;
1008         }
1009         ASSERT(exi != NULL);
1010 
1011 
1012         /*
1013          * Create the secinfo result based on the security information
1014          * from the exportinfo structure (exi).
1015          *
1016          * Return all flavors for a pseudo node.
1017          * For a real export node, return the flavor that the client
1018          * has access with.
1019          */
1020         ASSERT(RW_LOCK_HELD(&exported_lock));
1021         if (PSEUDO(exi)) {
1022                 count = exi->exi_export.ex_seccnt; /* total sec count */
1023                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1024                 secp = exi->exi_export.ex_secinfo;
1025 
1026                 for (i = 0; i < count; i++) {
1027                         si = &secp[i].s_secinfo;
1028                         resok_val[i].flavor = si->sc_rpcnum;
1029                         if (resok_val[i].flavor == RPCSEC_GSS) {
1030                                 rpcsec_gss_info *info;
1031 
1032                                 info = &resok_val[i].flavor_info;
1033                                 info->qop = si->sc_qop;
1034                                 info->service = (rpc_gss_svc_t)si->sc_service;
1035 
1036                                 /* get oid opaque data */
1037                                 info->oid.sec_oid4_len =
1038                                     si->sc_gss_mech_type->length;
1039                                 info->oid.sec_oid4_val = kmem_alloc(
1040                                     si->sc_gss_mech_type->length, KM_SLEEP);
 
1363         if (is_system_labeled() && !admin_low_client)
1364                 label_rele(tslabel);
1365 
1366         *cs->statusp = resp->status = NFS4_OK;
1367 out:
1368         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1369             ACCESS4res *, resp);
1370 }
1371 
1372 /* ARGSUSED */
1373 static void
1374 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1375     struct compound_state *cs)
1376 {
1377         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1378         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1379         int error;
1380         vnode_t *vp = cs->vp;
1381         cred_t *cr = cs->cr;
1382         vattr_t va;
1383 
1384         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1385             COMMIT4args *, args);
1386 
1387         if (vp == NULL) {
1388                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1389                 goto out;
1390         }
1391         if (cs->access == CS_ACCESS_DENIED) {
1392                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1393                 goto out;
1394         }
1395 
1396         if (args->offset + args->count < args->offset) {
1397                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1398                 goto out;
1399         }
1400 
1401         va.va_mask = AT_UID;
1402         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 
1419                         resp->status = NFS4ERR_ISDIR;
1420                 else
1421                         resp->status = NFS4ERR_INVAL;
1422                 *cs->statusp = resp->status;
1423                 goto out;
1424         }
1425 
1426         if (crgetuid(cr) != va.va_uid &&
1427             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1428                 *cs->statusp = resp->status = puterrno4(error);
1429                 goto out;
1430         }
1431 
1432         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1433 
1434         if (error) {
1435                 *cs->statusp = resp->status = puterrno4(error);
1436                 goto out;
1437         }
1438 
1439         *cs->statusp = resp->status = NFS4_OK;
1440         resp->writeverf = Write4verf;
1441 out:
1442         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1443             COMMIT4res *, resp);
1444 }
1445 
1446 /*
1447  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1448  * was completed. It does the nfsv4 create for special files.
1449  */
1450 /* ARGSUSED */
1451 static vnode_t *
1452 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1453     struct compound_state *cs, vattr_t *vap, char *nm)
1454 {
1455         int error;
1456         cred_t *cr = cs->cr;
1457         vnode_t *dvp = cs->vp;
1458         vnode_t *vp = NULL;
1459         int mode;
1460         enum vcexcl excl;
 
2616         fid_t fid;
2617         int attrdir, dotdot, walk;
2618         bool_t is_newvp = FALSE;
2619 
2620         if (cs->vp->v_flag & V_XATTRDIR) {
2621                 attrdir = 1;
2622                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2623         } else {
2624                 attrdir = 0;
2625                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2626         }
2627 
2628         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629 
2630         /*
2631          * If dotdotting, then need to check whether it's
2632          * above the root of a filesystem, or above an
2633          * export point.
2634          */
2635         if (dotdot) {
2636 
2637                 /*
2638                  * If dotdotting at the root of a filesystem, then
2639                  * need to traverse back to the mounted-on filesystem
2640                  * and do the dotdot lookup there.
2641                  */
2642                 if (cs->vp->v_flag & VROOT) {
2643 
2644                         /*
2645                          * If at the system root, then can
2646                          * go up no further.
2647                          */
2648                         if (VN_CMP(cs->vp, rootdir))
2649                                 return (puterrno4(ENOENT));
2650 
2651                         /*
2652                          * Traverse back to the mounted-on filesystem
2653                          */
2654                         cs->vp = untraverse(cs->vp);
2655 
2656                         /*
2657                          * Set the different_export flag so we remember
2658                          * to pick up a new exportinfo entry for
2659                          * this new filesystem.
2660                          */
2661                         different_export = 1;
2662                 } else {
2663 
2664                         /*
2665                          * If dotdotting above an export point then set
2666                          * the different_export to get new export info.
2667                          */
2668                         different_export = nfs_exported(cs->exi, cs->vp);
2669                 }
2670         }
2671 
2672         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2673             NULL, NULL, NULL);
2674         if (error)
 
3392         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3393 
3394         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3395                 freeb(resp->mblk);
3396                 resp->mblk = NULL;
3397                 resp->data_len = 0;
3398         }
3399 }
3400 
3401 
3402 /* ARGSUSED */
3403 static void
3404 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3405     struct compound_state *cs)
3406 {
3407         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3408         int             error;
3409         vnode_t         *vp;
3410         struct exportinfo *exi, *sav_exi;
3411         nfs_fh4_fmt_t   *fh_fmtp;
3412 
3413         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3414 
3415         if (cs->vp) {
3416                 VN_RELE(cs->vp);
3417                 cs->vp = NULL;
3418         }
3419 
3420         if (cs->cr)
3421                 crfree(cs->cr);
3422 
3423         cs->cr = crdup(cs->basecr);
3424 
3425         vp = exi_public->exi_vp;
3426         if (vp == NULL) {
3427                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3428                 goto out;
3429         }
3430 
3431         error = makefh4(&cs->fh, vp, exi_public);
3432         if (error != 0) {
3433                 *cs->statusp = resp->status = puterrno4(error);
3434                 goto out;
3435         }
3436         sav_exi = cs->exi;
3437         if (exi_public == exi_root) {
3438                 /*
3439                  * No filesystem is actually shared public, so we default
3440                  * to exi_root. In this case, we must check whether root
3441                  * is exported.
3442                  */
3443                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3444 
3445                 /*
3446                  * if root filesystem is exported, the exportinfo struct that we
3447                  * should use is what checkexport4 returns, because root_exi is
3448                  * actually a mostly empty struct.
3449                  */
3450                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3451                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3452                 cs->exi = ((exi != NULL) ? exi : exi_public);
3453         } else {
3454                 /*
3455                  * it's a properly shared filesystem
3456                  */
3457                 cs->exi = exi_public;
3458         }
3459 
3460         if (is_system_labeled()) {
3461                 bslabel_t *clabel;
3462 
3463                 ASSERT(req->rq_label != NULL);
3464                 clabel = req->rq_label;
3465                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3466                     "got client label from request(1)",
3467                     struct svc_req *, req);
3468                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3469                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3470                             cs->exi)) {
3471                                 *cs->statusp = resp->status =
3472                                     NFS4ERR_SERVERFAULT;
3473                                 goto out;
3474                         }
3475                 }
3476         }
3477 
 
3579         struct exportinfo *exi, *sav_exi;
3580 
3581         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3582 
3583         if (cs->vp) {
3584                 VN_RELE(cs->vp);
3585                 cs->vp = NULL;
3586         }
3587 
3588         if (cs->cr)
3589                 crfree(cs->cr);
3590 
3591         cs->cr = crdup(cs->basecr);
3592 
3593         /*
3594          * Using rootdir, the system root vnode,
3595          * get its fid.
3596          */
3597         bzero(&fid, sizeof (fid));
3598         fid.fid_len = MAXFIDSZ;
3599         error = vop_fid_pseudo(rootdir, &fid);
3600         if (error != 0) {
3601                 *cs->statusp = resp->status = puterrno4(error);
3602                 goto out;
3603         }
3604 
3605         /*
3606          * Then use the root fsid & fid it to find out if it's exported
3607          *
3608          * If the server root isn't exported directly, then
3609          * it should at least be a pseudo export based on
3610          * one or more exports further down in the server's
3611          * file tree.
3612          */
3613         exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3614         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3615                 NFS4_DEBUG(rfs4_debug,
3616                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3617                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3618                 goto out;
3619         }
3620 
3621         /*
3622          * Now make a filehandle based on the root
3623          * export and root vnode.
3624          */
3625         error = makefh4(&cs->fh, rootdir, exi);
3626         if (error != 0) {
3627                 *cs->statusp = resp->status = puterrno4(error);
3628                 goto out;
3629         }
3630 
3631         sav_exi = cs->exi;
3632         cs->exi = exi;
3633 
3634         VN_HOLD(rootdir);
3635         cs->vp = rootdir;
3636 
3637         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3638                 VN_RELE(rootdir);
3639                 cs->vp = NULL;
3640                 cs->exi = sav_exi;
3641                 goto out;
3642         }
3643 
3644         *cs->statusp = resp->status = NFS4_OK;
3645         cs->deleg = FALSE;
3646 out:
3647         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3648             PUTROOTFH4res *, resp);
3649 }
3650 
3651 /*
3652  * readlink: args: CURRENT_FH.
3653  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3654  */
3655 
3656 /* ARGSUSED */
3657 static void
3658 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 
3704 
3705         }
3706 
3707         va.va_mask = AT_MODE;
3708         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3709         if (error) {
3710                 *cs->statusp = resp->status = puterrno4(error);
3711                 goto out;
3712         }
3713 
3714         if (MANDLOCK(vp, va.va_mode)) {
3715                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3716                 goto out;
3717         }
3718 
3719         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3720 
3721         if (is_referral) {
3722                 char *s;
3723                 size_t strsz;
3724 
3725                 /* Get an artificial symlink based on a referral */
3726                 s = build_symlink(vp, cs->cr, &strsz);
3727                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3728                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3729                     vnode_t *, vp, char *, s);
3730                 if (s == NULL)
3731                         error = EINVAL;
3732                 else {
3733                         error = 0;
3734                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3735                         kmem_free(s, strsz);
3736                 }
3737 
3738         } else {
3739 
3740                 iov.iov_base = data;
3741                 iov.iov_len = MAXPATHLEN;
3742                 uio.uio_iov = &iov;
3743                 uio.uio_iovcnt = 1;
3744                 uio.uio_segflg = UIO_SYSSPACE;
3745                 uio.uio_extflg = UIO_COPY_CACHED;
3746                 uio.uio_loffset = 0;
3747                 uio.uio_resid = MAXPATHLEN;
 
4154                 }
4155                 goto out;
4156         }
4157         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4158 
4159         /* Actually do the REMOVE operation */
4160         if (vp->v_type == VDIR) {
4161                 /*
4162                  * Can't remove a directory that has a mounted-on filesystem.
4163                  */
4164                 if (vn_ismntpt(vp)) {
4165                         error = EACCES;
4166                 } else {
4167                         /*
4168                          * System V defines rmdir to return EEXIST,
4169                          * not ENOTEMPTY, if the directory is not
4170                          * empty.  A System V NFS server needs to map
4171                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4172                          * transmit over the wire.
4173                          */
4174                         if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4175                             NULL, 0)) == EEXIST)
4176                                 error = ENOTEMPTY;
4177                 }
4178         } else {
4179                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4180                     fp != NULL) {
4181                         struct vattr va;
4182                         vnode_t *tvp;
4183 
4184                         rfs4_dbe_lock(fp->rf_dbe);
4185                         tvp = fp->rf_vp;
4186                         if (tvp)
4187                                 VN_HOLD(tvp);
4188                         rfs4_dbe_unlock(fp->rf_dbe);
4189 
4190                         if (tvp) {
4191                                 /*
4192                                  * This is va_seq safe because we are not
4193                                  * manipulating dvp.
4194                                  */
 
4266         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4267             REMOVE4res *, resp);
4268 }
4269 
4270 /*
4271  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4272  *              oldname and newname.
4273  *      res: status. If success - CURRENT_FH unchanged, return change_info
4274  *              for both from and target directories.
4275  */
4276 /* ARGSUSED */
4277 static void
4278 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4279     struct compound_state *cs)
4280 {
4281         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4282         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4283         int error;
4284         vnode_t *odvp;
4285         vnode_t *ndvp;
4286         vnode_t *srcvp, *targvp;
4287         struct vattr obdva, oidva, oadva;
4288         struct vattr nbdva, nidva, nadva;
4289         char *onm, *nnm;
4290         uint_t olen, nlen;
4291         rfs4_file_t *fp, *sfp;
4292         int in_crit_src, in_crit_targ;
4293         int fp_rele_grant_hold, sfp_rele_grant_hold;
4294         bslabel_t *clabel;
4295         struct sockaddr *ca;
4296         char *converted_onm = NULL;
4297         char *converted_nnm = NULL;
4298         nfsstat4 status;
4299 
4300         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4301             RENAME4args *, args);
4302 
4303         fp = sfp = NULL;
4304         srcvp = targvp = NULL;
4305         in_crit_src = in_crit_targ = 0;
4306         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4307 
4308         /* CURRENT_FH: target directory */
4309         ndvp = cs->vp;
4310         if (ndvp == NULL) {
4311                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4312                 goto out;
4313         }
4314 
4315         /* SAVED_FH: from directory */
4316         odvp = cs->saved_vp;
4317         if (odvp == NULL) {
4318                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4319                 goto out;
4320         }
4321 
4322         if (cs->access == CS_ACCESS_DENIED) {
4323                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4324                 goto out;
4325         }
4326 
 
4459                 kmem_free(onm, olen);
4460                 if (nnm != converted_nnm)
4461                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4462                 kmem_free(nnm, nlen);
4463                 goto out;
4464         }
4465 
4466         sfp_rele_grant_hold = 1;
4467 
4468         /* Does the destination exist and a file and have a delegation? */
4469         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4470             NULL, cs->cr)) {
4471                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4472                     NULL)) {
4473                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4474                         goto err_out;
4475                 }
4476         }
4477         fp_rele_grant_hold = 1;
4478 
4479 
4480         /* Check for NBMAND lock on both source and target */
4481         if (nbl_need_check(srcvp)) {
4482                 nbl_start_crit(srcvp, RW_READER);
4483                 in_crit_src = 1;
4484                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4485                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4486                         goto err_out;
4487                 }
4488         }
4489 
4490         if (targvp && nbl_need_check(targvp)) {
4491                 nbl_start_crit(targvp, RW_READER);
4492                 in_crit_targ = 1;
4493                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4494                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4495                         goto err_out;
4496                 }
4497         }
4498 
4499         /* Get source "before" change value */
4500         obdva.va_mask = AT_CTIME|AT_SEQ;
4501         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4502         if (!error) {
4503                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4504                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4505         }
4506         if (error) {
4507                 *cs->statusp = resp->status = puterrno4(error);
4508                 goto err_out;
4509         }
4510 
4511         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4512         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4513 
4514         if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4515             cs->cr, NULL, 0)) == 0 && fp != NULL) {
4516                 struct vattr va;
4517                 vnode_t *tvp;
4518 
4519                 rfs4_dbe_lock(fp->rf_dbe);
4520                 tvp = fp->rf_vp;
4521                 if (tvp)
4522                         VN_HOLD(tvp);
4523                 rfs4_dbe_unlock(fp->rf_dbe);
4524 
4525                 if (tvp) {
4526                         va.va_mask = AT_NLINK;
4527                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4528                             va.va_nlink == 0) {
4529                                 /* The file is gone and so should the state */
4530                                 if (in_crit_targ) {
4531                                         nbl_end_crit(targvp);
4532                                         in_crit_targ = 0;
4533                                 }
4534                                 rfs4_close_all_state(fp);
4535                         }
4536                         VN_RELE(tvp);
4537                 }
4538         }
4539         if (error == 0)
4540                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4541 
4542         if (in_crit_src)
4543                 nbl_end_crit(srcvp);
4544         if (srcvp)
4545                 VN_RELE(srcvp);
4546         if (in_crit_targ)
4547                 nbl_end_crit(targvp);
4548         if (targvp)
4549                 VN_RELE(targvp);
4550 
4551         if (sfp) {
4552                 rfs4_clear_dont_grant(sfp);
4553                 rfs4_file_rele(sfp);
4554         }
4555         if (fp) {
4556                 rfs4_clear_dont_grant(fp);
4557                 rfs4_file_rele(fp);
4558         }
4559 
4560         if (converted_onm != onm)
4561                 kmem_free(converted_onm, MAXPATHLEN + 1);
4562         kmem_free(onm, olen);
4563         if (converted_nnm != nnm)
4564                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4565         kmem_free(nnm, nlen);
4566 
4567         /*
4568          * Get the initial "after" sequence number, if it fails, set to zero
4569          */
4570         oidva.va_mask = AT_SEQ;
 
5467 static void
5468 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5469     struct compound_state *cs)
5470 {
5471         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5472         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5473         int error;
5474         vnode_t *vp;
5475         struct vattr bva;
5476         u_offset_t rlimit;
5477         struct uio uio;
5478         struct iovec iov[MAX_IOVECS];
5479         struct iovec *iovp;
5480         int iovcnt;
5481         int ioflag;
5482         cred_t *savecred, *cr;
5483         bool_t *deleg = &cs->deleg;
5484         nfsstat4 stat;
5485         int in_crit = 0;
5486         caller_context_t ct;
5487 
5488         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5489             WRITE4args *, args);
5490 
5491         vp = cs->vp;
5492         if (vp == NULL) {
5493                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5494                 goto out;
5495         }
5496         if (cs->access == CS_ACCESS_DENIED) {
5497                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5498                 goto out;
5499         }
5500 
5501         cr = cs->cr;
5502 
5503         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5504             deleg, TRUE, &ct)) != NFS4_OK) {
5505                 *cs->statusp = resp->status = stat;
5506                 goto out;
 
5537                 goto out;
5538         }
5539 
5540         if (vp->v_type != VREG) {
5541                 *cs->statusp = resp->status =
5542                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5543                 goto out;
5544         }
5545 
5546         if (crgetuid(cr) != bva.va_uid &&
5547             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5548                 *cs->statusp = resp->status = puterrno4(error);
5549                 goto out;
5550         }
5551 
5552         if (MANDLOCK(vp, bva.va_mode)) {
5553                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5554                 goto out;
5555         }
5556 
5557         if (args->data_len == 0) {
5558                 *cs->statusp = resp->status = NFS4_OK;
5559                 resp->count = 0;
5560                 resp->committed = args->stable;
5561                 resp->writeverf = Write4verf;
5562                 goto out;
5563         }
5564 
5565         if (args->mblk != NULL) {
5566                 mblk_t *m;
5567                 uint_t bytes, round_len;
5568 
5569                 iovcnt = 0;
5570                 bytes = 0;
5571                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5572                 for (m = args->mblk;
5573                     m != NULL && bytes < round_len;
5574                     m = m->b_cont) {
5575                         iovcnt++;
5576                         bytes += MBLKL(m);
5577                 }
5578 #ifdef DEBUG
5579                 /* should have ended on an mblk boundary */
5580                 if (bytes != round_len) {
5581                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
 
5637         curthread->t_cred = cr;
5638         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5639         curthread->t_cred = savecred;
5640 
5641         if (iovp != iov)
5642                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5643 
5644         if (error) {
5645                 *cs->statusp = resp->status = puterrno4(error);
5646                 goto out;
5647         }
5648 
5649         *cs->statusp = resp->status = NFS4_OK;
5650         resp->count = args->data_len - uio.uio_resid;
5651 
5652         if (ioflag == 0)
5653                 resp->committed = UNSTABLE4;
5654         else
5655                 resp->committed = FILE_SYNC4;
5656 
5657         resp->writeverf = Write4verf;
5658 
5659 out:
5660         if (in_crit)
5661                 nbl_end_crit(vp);
5662 
5663         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5664             WRITE4res *, resp);
5665 }
5666 
5667 
5668 /* XXX put in a header file */
5669 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5670 
5671 void
5672 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5673     struct svc_req *req, cred_t *cr, int *rv)
5674 {
5675         uint_t i;
5676         struct compound_state cs;
5677 
5678         if (rv != NULL)
5679                 *rv = 0;
5680         rfs4_init_compound_state(&cs);
5681         /*
5682          * Form a reply tag by copying over the reqeuest tag.
5683          */
5684         resp->tag.utf8string_val =
5685             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5686         resp->tag.utf8string_len = args->tag.utf8string_len;
5687         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5688             resp->tag.utf8string_len);
5689 
5690         cs.statusp = &resp->status;
5691         cs.req = req;
5692         resp->array = NULL;
5693         resp->array_len = 0;
5694 
5695         /*
5696          * XXX for now, minorversion should be zero
5697          */
5698         if (args->minorversion != NFS4_MINORVERSION) {
5699                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5700                     &cs, COMPOUND4args *, args);
5701                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5702                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5703                     &cs, COMPOUND4res *, resp);
5704                 return;
5705         }
5706 
5707         if (args->array_len == 0) {
5708                 resp->status = NFS4_OK;
 
5714 
5715         cr = crget();
5716         ASSERT(cr != NULL);
5717 
5718         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5719                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5720                     &cs, COMPOUND4args *, args);
5721                 crfree(cr);
5722                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5723                     &cs, COMPOUND4res *, resp);
5724                 svcerr_badcred(req->rq_xprt);
5725                 if (rv != NULL)
5726                         *rv = 1;
5727                 return;
5728         }
5729         resp->array_len = args->array_len;
5730         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5731             KM_SLEEP);
5732 
5733         cs.basecr = cr;
5734 
5735         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5736             COMPOUND4args *, args);
5737 
5738         /*
5739          * For now, NFS4 compound processing must be protected by
5740          * exported_lock because it can access more than one exportinfo
5741          * per compound and share/unshare can now change multiple
5742          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5743          * per proc (excluding public exinfo), and exi_count design
5744          * is sufficient to protect concurrent execution of NFS2/3
5745          * ops along with unexport.  This lock will be removed as
5746          * part of the NFSv4 phase 2 namespace redesign work.
5747          */
5748         rw_enter(&exported_lock, RW_READER);
5749 
5750         /*
5751          * If this is the first compound we've seen, we need to start all
5752          * new instances' grace periods.
5753          */
5754         if (rfs4_seen_first_compound == 0) {
5755                 rfs4_grace_start_new();
5756                 /*
5757                  * This must be set after rfs4_grace_start_new(), otherwise
5758                  * another thread could proceed past here before the former
5759                  * is finished.
5760                  */
5761                 rfs4_seen_first_compound = 1;
5762         }
5763 
5764         for (i = 0; i < args->array_len && cs.cont; i++) {
5765                 nfs_argop4 *argop;
5766                 nfs_resop4 *resop;
5767                 uint_t op;
5768 
5769                 argop = &args->array[i];
5770                 resop = &resp->array[i];
5771                 resop->resop = argop->argop;
5772                 op = (uint_t)resop->resop;
5773 
5774                 if (op < rfsv4disp_cnt) {
5775                         /*
5776                          * Count the individual ops here; NULL and COMPOUND
5777                          * are counted in common_dispatch()
5778                          */
5779                         rfsproccnt_v4_ptr[op].value.ui64++;
5780 
5781                         NFS4_DEBUG(rfs4_debug > 1,
5782                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5783                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5784                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5785                             rfs4_op_string[op], *cs.statusp));
5786                         if (*cs.statusp != NFS4_OK)
5787                                 cs.cont = FALSE;
5788                 } else {
5789                         /*
5790                          * This is effectively dead code since XDR code
5791                          * will have already returned BADXDR if op doesn't
5792                          * decode to legal value.  This only done for a
5793                          * day when XDR code doesn't verify v4 opcodes.
5794                          */
5795                         op = OP_ILLEGAL;
5796                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5797 
5798                         rfs4_op_illegal(argop, resop, req, &cs);
5799                         cs.cont = FALSE;
5800                 }
5801 
5802                 /*
5803                  * If not at last op, and if we are to stop, then
5804                  * compact the results array.
5805                  */
5806                 if ((i + 1) < args->array_len && !cs.cont) {
5807                         nfs_resop4 *new_res = kmem_alloc(
5808                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5809                         bcopy(resp->array,
5810                             new_res, (i+1) * sizeof (nfs_resop4));
5811                         kmem_free(resp->array,
5812                             args->array_len * sizeof (nfs_resop4));
5813 
5814                         resp->array_len =  i + 1;
5815                         resp->array = new_res;
5816                 }
5817         }
5818 
5819         rw_exit(&exported_lock);
5820 
5821         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5822             COMPOUND4res *, resp);
5823 
5824         if (cs.vp)
5825                 VN_RELE(cs.vp);
5826         if (cs.saved_vp)
5827                 VN_RELE(cs.saved_vp);
5828         if (cs.saved_fh.nfs_fh4_val)
5829                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5830 
5831         if (cs.basecr)
5832                 crfree(cs.basecr);
5833         if (cs.cr)
5834                 crfree(cs.cr);
5835         /*
5836          * done with this compound request, free the label
5837          */
5838 
5839         if (req->rq_label != NULL) {
5840                 kmem_free(req->rq_label, sizeof (bslabel_t));
5841                 req->rq_label = NULL;
5842         }
5843 }
5844 
5845 /*
5846  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5847  * XXX zero out the tag and array values. Need to investigate why the
 
6511 
6512         /* Check for mandatory locking and that the size gets set. */
6513         cva.va_mask = AT_MODE;
6514         if (setsize)
6515                 cva.va_mask |= AT_SIZE;
6516 
6517         /* Assume the worst */
6518         cs->mandlock = TRUE;
6519 
6520         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6521                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6522 
6523                 /*
6524                  * Truncate the file if necessary; this would be
6525                  * the case for create over an existing file.
6526                  */
6527 
6528                 if (trunc) {
6529                         int in_crit = 0;
6530                         rfs4_file_t *fp;
6531                         bool_t create = FALSE;
6532 
6533                         /*
6534                          * We are writing over an existing file.
6535                          * Check to see if we need to recall a delegation.
6536                          */
6537                         rfs4_hold_deleg_policy();
6538                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6539                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6540                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6541                                         rfs4_file_rele(fp);
6542                                         rfs4_rele_deleg_policy();
6543                                         VN_RELE(vp);
6544                                         *attrset = 0;
6545                                         return (NFS4ERR_DELAY);
6546                                 }
6547                                 rfs4_file_rele(fp);
6548                         }
6549                         rfs4_rele_deleg_policy();
6550 
6551                         if (nbl_need_check(vp)) {
6552                                 in_crit = 1;
6553 
6554                                 ASSERT(reqsize == 0);
6555 
6556                                 nbl_start_crit(vp, RW_READER);
6557                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6558                                     cva.va_size, 0, NULL)) {
6559                                         in_crit = 0;
6560                                         nbl_end_crit(vp);
6561                                         VN_RELE(vp);
6562                                         *attrset = 0;
6563                                         return (NFS4ERR_ACCESS);
6564                                 }
6565                         }
6566                         ct.cc_sysid = 0;
6567                         ct.cc_pid = 0;
6568                         ct.cc_caller_id = nfs4_srv_caller_id;
6569                         ct.cc_flags = CC_DONTBLOCK;
 
8087 
8088         newcp->rc_cp_confirmed = cp_confirmed;
8089 
8090         rfs4_client_rele(newcp);
8091 
8092 out:
8093         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8094             SETCLIENTID4res *, res);
8095 }
8096 
8097 /*ARGSUSED*/
8098 void
8099 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8100     struct svc_req *req, struct compound_state *cs)
8101 {
8102         SETCLIENTID_CONFIRM4args *args =
8103             &argop->nfs_argop4_u.opsetclientid_confirm;
8104         SETCLIENTID_CONFIRM4res *res =
8105             &resop->nfs_resop4_u.opsetclientid_confirm;
8106         rfs4_client_t *cp, *cptoclose = NULL;
8107 
8108         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8109             struct compound_state *, cs,
8110             SETCLIENTID_CONFIRM4args *, args);
8111 
8112         *cs->statusp = res->status = NFS4_OK;
8113 
8114         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8115 
8116         if (cp == NULL) {
8117                 *cs->statusp = res->status =
8118                     rfs4_check_clientid(&args->clientid, 1);
8119                 goto out;
8120         }
8121 
8122         if (!creds_ok(cp, req, cs)) {
8123                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8124                 rfs4_client_rele(cp);
8125                 goto out;
8126         }
8127 
8128         /* If the verifier doesn't match, the record doesn't match */
8129         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8130                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8131                 rfs4_client_rele(cp);
8132                 goto out;
8133         }
8134 
8135         rfs4_dbe_lock(cp->rc_dbe);
8136         cp->rc_need_confirm = FALSE;
8137         if (cp->rc_cp_confirmed) {
8138                 cptoclose = cp->rc_cp_confirmed;
8139                 cptoclose->rc_ss_remove = 1;
8140                 cp->rc_cp_confirmed = NULL;
8141         }
8142 
8143         /*
8144          * Update the client's associated server instance, if it's changed
8145          * since the client was created.
8146          */
8147         if (rfs4_servinst(cp) != rfs4_cur_servinst)
8148                 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8149 
8150         /*
8151          * Record clientid in stable storage.
8152          * Must be done after server instance has been assigned.
8153          */
8154         rfs4_ss_clid(cp);
8155 
8156         rfs4_dbe_unlock(cp->rc_dbe);
8157 
8158         if (cptoclose)
8159                 /* don't need to rele, client_close does it */
8160                 rfs4_client_close(cptoclose);
8161 
8162         /* If needed, initiate CB_NULL call for callback path */
8163         rfs4_deleg_cb_check(cp);
8164         rfs4_update_lease(cp);
8165 
8166         /*
8167          * Check to see if client can perform reclaims
8168          */
8169         rfs4_ss_chkclid(cp);
8170 
8171         rfs4_client_rele(cp);
8172 
8173 out:
8174         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8175             struct compound_state *, cs,
8176             SETCLIENTID_CONFIRM4 *, res);
8177 }
8178 
8179 
8180 /*ARGSUSED*/
8181 void
8182 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8183     struct svc_req *req, struct compound_state *cs)
8184 {
8185         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8186         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8187         rfs4_state_t *sp;
8188         nfsstat4 status;
8189 
 
9793 /*
9794  * Check to see if we have a downrev Solaris client, so that we
9795  * can send it a symlink instead of a referral.
9796  */
9797 int
9798 client_is_downrev(struct svc_req *req)
9799 {
9800         struct sockaddr *ca;
9801         rfs4_clntip_t *ci;
9802         bool_t create = FALSE;
9803         int is_downrev;
9804 
9805         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9806         ASSERT(ca);
9807         ci = rfs4_find_clntip(ca, &create);
9808         if (ci == NULL)
9809                 return (0);
9810         is_downrev = ci->ri_no_referrals;
9811         rfs4_dbe_rele(ci->ri_dbe);
9812         return (is_downrev);
9813 }
 | 
 
 
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All Rights Reserved
  29  */
  30 
  31 /*
  32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  33  * Copyright 2019 Nexenta Systems, Inc.
  34  * Copyright 2019 Nexenta by DDN, Inc.
  35  */
  36 
  37 #include <sys/param.h>
  38 #include <sys/types.h>
  39 #include <sys/systm.h>
  40 #include <sys/cred.h>
  41 #include <sys/buf.h>
  42 #include <sys/vfs.h>
  43 #include <sys/vfs_opreg.h>
  44 #include <sys/vnode.h>
  45 #include <sys/uio.h>
  46 #include <sys/errno.h>
  47 #include <sys/sysmacros.h>
  48 #include <sys/statvfs.h>
  49 #include <sys/kmem.h>
  50 #include <sys/dirent.h>
  51 #include <sys/cmn_err.h>
  52 #include <sys/debug.h>
  53 #include <sys/systeminfo.h>
  54 #include <sys/flock.h>
  55 #include <sys/pathname.h>
  56 #include <sys/nbmlock.h>
  57 #include <sys/share.h>
  58 #include <sys/atomic.h>
  59 #include <sys/policy.h>
  60 #include <sys/fem.h>
  61 #include <sys/sdt.h>
  62 #include <sys/ddi.h>
  63 #include <sys/zone.h>
  64 
  65 #include <fs/fs_reparse.h>
  66 
  67 #include <rpc/types.h>
  68 #include <rpc/auth.h>
  69 #include <rpc/rpcsec_gss.h>
  70 #include <rpc/svc.h>
  71 
  72 #include <nfs/nfs.h>
  73 #include <nfs/nfssys.h>
  74 #include <nfs/export.h>
  75 #include <nfs/nfs_cmd.h>
  76 #include <nfs/lm.h>
  77 #include <nfs/nfs4.h>
  78 #include <nfs/nfs4_drc.h>
  79 
  80 #include <sys/strsubr.h>
  81 #include <sys/strsun.h>
  82 
  83 #include <inet/common.h>
  84 #include <inet/ip.h>
  85 #include <inet/ip6.h>
  86 
  87 #include <sys/tsol/label.h>
  88 #include <sys/tsol/tndb.h>
  89 
  90 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94 extern struct svc_ops rdma_svc_ops;
  95 extern int nfs_loaned_buffers;
  96 /* End of Tunables */
  97 
  98 static int rdma_setup_read_data4(READ4args *, READ4res *);
 
 
 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137 #define RFS4_MINLEN_RDDIR_BUF \
 138         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139 
 140 /*
 141  * It would be better to pad to 4 bytes since that's what XDR would do,
 142  * but the dirents UFS gives us are already padded to 8, so just take
 143  * what we're given.  Dircount is only a hint anyway.  Currently the
 144  * solaris kernel is ASCII only, so there's no point in calling the
 145  * UTF8 functions.
 146  *
 147  * dirent64: named padded to provide 8 byte struct alignment
 148  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  *
 150  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  *
 152  */
 153 #define DIRENT64_TO_DIRCOUNT(dp) \
 154         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155 
 156 
 157 static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 158 
 159 u_longlong_t    nfs4_srv_caller_id;
 160 uint_t          nfs4_srv_vkey = 0;
 161 
 162 void    rfs4_init_compound_state(struct compound_state *);
 163 
 164 static void     nullfree(caddr_t);
 165 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                     struct compound_state *);
 167 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                     struct compound_state *);
 169 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170                     struct compound_state *);
 171 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172                     struct compound_state *);
 173 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 174                     struct compound_state *);
 175 static void     rfs4_op_create_free(nfs_resop4 *resop);
 176 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 177                     struct svc_req *, struct compound_state *);
 178 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 179                     struct svc_req *, struct compound_state *);
 180 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 181                     struct compound_state *);
 
 
 230 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                     struct compound_state *);
 232 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                     struct compound_state *);
 234 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                     struct compound_state *);
 236 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237                     struct compound_state *);
 238 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239                     struct compound_state *);
 240 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 241                     struct compound_state *);
 242 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 243                     struct svc_req *, struct compound_state *);
 244 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 245                     struct svc_req *req, struct compound_state *);
 246 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 247                     struct compound_state *);
 248 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 249 
 250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 251                     struct svc_req *);
 252 nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 253 void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 254 
 255 
 256 /*
 257  * translation table for attrs
 258  */
 259 struct nfs4_ntov_table {
 260         union nfs4_attr_u *na;
 261         uint8_t amap[NFS4_MAXNUM_ATTRS];
 262         int attrcnt;
 263         bool_t vfsstat;
 264 };
 265 
 266 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 267 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 268                     struct nfs4_svgetit_arg *sargp);
 269 
 270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 271                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 272                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 273 
 274 static void     hanfsv4_failover(nfs4_srv_t *);
 275 
 276 fem_t           *deleg_rdops;
 277 fem_t           *deleg_wrops;
 278 
 279 /*
 280  * NFS4 op dispatch table
 281  */
 282 
 283 struct rfsv4disp {
 284         void    (*dis_proc)();          /* proc to call */
 285         void    (*dis_resfree)();       /* frees space allocated by proc */
 286         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 287 };
 288 
 289 static struct rfsv4disp rfsv4disptab[] = {
 290         /*
 291          * NFS VERSION 4
 292          */
 293 
 294         /* RFS_NULL = 0 */
 295         {rfs4_op_illegal, nullfree, 0},
 296 
 297         /* UNUSED = 1 */
 298         {rfs4_op_illegal, nullfree, 0},
 
 450         "rfs4_op_putrootfh",
 451         "rfs4_op_read",
 452         "rfs4_op_readdir",
 453         "rfs4_op_readlink",
 454         "rfs4_op_remove",
 455         "rfs4_op_rename",
 456         "rfs4_op_renew",
 457         "rfs4_op_restorefh",
 458         "rfs4_op_savefh",
 459         "rfs4_op_secinfo",
 460         "rfs4_op_setattr",
 461         "rfs4_op_setclientid",
 462         "rfs4_op_setclient_confirm",
 463         "rfs4_op_verify",
 464         "rfs4_op_write",
 465         "rfs4_op_release_lockowner",
 466         "rfs4_op_illegal"
 467 };
 468 #endif
 469 
 470 void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 471 
 472 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 473 
 474 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 475 
 476 #ifdef  nextdp
 477 #undef nextdp
 478 #endif
 479 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 480 
 481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 482         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 483         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 484         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 485         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 486         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 487         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 488         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 489         NULL,                   NULL
 490 };
 491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 492         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 493         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 494         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 495         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 496         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 497         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 498         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 499         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 500         NULL,                   NULL
 501 };
 502 
 503 nfs4_srv_t *
 504 nfs4_get_srv(void)
 505 {
 506         nfs_globals_t *ng = nfs_srv_getzg();
 507         nfs4_srv_t *srv = ng->nfs4_srv;
 508         ASSERT(srv != NULL);
 509         return (srv);
 510 }
 511 
 512 void
 513 rfs4_srv_zone_init(nfs_globals_t *ng)
 514 {
 515         nfs4_srv_t *nsrv4;
 516         timespec32_t verf;
 517 
 518         nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 519 
 520         /*
 521          * The following algorithm attempts to find a unique verifier
 522          * to be used as the write verifier returned from the server
 523          * to the client.  It is important that this verifier change
 524          * whenever the server reboots.  Of secondary importance, it
 525          * is important for the verifier to be unique between two
 526          * different servers.
 527          *
 528          * Thus, an attempt is made to use the system hostid and the
 529          * current time in seconds when the nfssrv kernel module is
 530          * loaded.  It is assumed that an NFS server will not be able
 531          * to boot and then to reboot in less than a second.  If the
 532          * hostid has not been set, then the current high resolution
 533          * time is used.  This will ensure different verifiers each
 534          * time the server reboots and minimize the chances that two
 535          * different servers will have the same verifier.
 536          * XXX - this is broken on LP64 kernels.
 537          */
 538         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 539         if (verf.tv_sec != 0) {
 540                 verf.tv_nsec = gethrestime_sec();
 541         } else {
 542                 timespec_t tverf;
 543 
 544                 gethrestime(&tverf);
 545                 verf.tv_sec = (time_t)tverf.tv_sec;
 546                 verf.tv_nsec = tverf.tv_nsec;
 547         }
 548         nsrv4->write4verf = *(uint64_t *)&verf;
 549 
 550         /* Used to manage create/destroy of server state */
 551         nsrv4->nfs4_server_state = NULL;
 552         nsrv4->nfs4_cur_servinst = NULL;
 553         nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 554         mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 555         mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 556         mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 557         rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 558 
 559         ng->nfs4_srv = nsrv4;
 560 }
 561 
 562 void
 563 rfs4_srv_zone_fini(nfs_globals_t *ng)
 564 {
 565         nfs4_srv_t *nsrv4 = ng->nfs4_srv;
 566 
 567         ng->nfs4_srv = NULL;
 568 
 569         mutex_destroy(&nsrv4->deleg_lock);
 570         mutex_destroy(&nsrv4->state_lock);
 571         mutex_destroy(&nsrv4->servinst_lock);
 572         rw_destroy(&nsrv4->deleg_policy_lock);
 573 
 574         kmem_free(nsrv4, sizeof (*nsrv4));
 575 }
 576 
 577 void
 578 rfs4_srvrinit(void)
 579 {
 580         extern void rfs4_attr_init();
 581 
 582         rfs4_attr_init();
 583 
 584         if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 585                 rfs4_disable_delegation();
 586         } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 587             &deleg_wrops) != 0) {
 588                 rfs4_disable_delegation();
 589                 fem_free(deleg_rdops);
 590         }
 591 
 592         nfs4_srv_caller_id = fs_new_caller_id();
 593         lockt_sysid = lm_alloc_sysidt();
 594         vsd_create(&nfs4_srv_vkey, NULL);
 595         rfs4_state_g_init();
 596 }
 597 
 598 void
 599 rfs4_srvrfini(void)
 600 {
 601         if (lockt_sysid != LM_NOSYSID) {
 602                 lm_free_sysidt(lockt_sysid);
 603                 lockt_sysid = LM_NOSYSID;
 604         }
 605 
 606         rfs4_state_g_fini();
 607 
 608         fem_free(deleg_rdops);
 609         fem_free(deleg_wrops);
 610 }
 611 
 612 void
 613 rfs4_do_server_start(int server_upordown,
 614     int srv_delegation, int cluster_booted)
 615 {
 616         nfs4_srv_t *nsrv4 = nfs4_get_srv();
 617 
 618         /* Is this a warm start? */
 619         if (server_upordown == NFS_SERVER_QUIESCED) {
 620                 cmn_err(CE_NOTE, "nfs4_srv: "
 621                     "server was previously quiesced; "
 622                     "existing NFSv4 state will be re-used");
 623 
 624                 /*
 625                  * HA-NFSv4: this is also the signal
 626                  * that a Resource Group failover has
 627                  * occurred.
 628                  */
 629                 if (cluster_booted)
 630                         hanfsv4_failover(nsrv4);
 631         } else {
 632                 /* Cold start */
 633                 nsrv4->rfs4_start_time = 0;
 634                 rfs4_state_zone_init(nsrv4);
 635                 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 636                     nfs4_drc_hash);
 637 
 638                 /*
 639                  * The nfsd service was started with the -s option
 640                  * we need to pull in any state from the paths indicated.
 641                  */
 642                 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
 643                         /* read in the stable storage state from these paths */
 644                         rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
 645                             rfs4_dss_newpaths);
 646                 }
 647         }
 648 
 649         /* Check if delegation is to be enabled */
 650         if (srv_delegation != FALSE)
 651                 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 652 }
 653 
 654 void
 655 rfs4_init_compound_state(struct compound_state *cs)
 656 {
 657         bzero(cs, sizeof (*cs));
 658         cs->cont = TRUE;
 659         cs->access = CS_ACCESS_DENIED;
 660         cs->deleg = FALSE;
 661         cs->mandlock = FALSE;
 662         cs->fh.nfs_fh4_val = cs->fhbuf;
 663 }
 664 
 665 void
 666 rfs4_grace_start(rfs4_servinst_t *sip)
 667 {
 668         rw_enter(&sip->rwlock, RW_WRITER);
 669         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 670         sip->grace_period = rfs4_grace_period;
 671         rw_exit(&sip->rwlock);
 672 }
 673 
 674 /*
 
 697 
 698         rw_enter(&sip->rwlock, RW_READER);
 699         grace_expiry = sip->start_time + sip->grace_period;
 700         rw_exit(&sip->rwlock);
 701 
 702         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 703 }
 704 
 705 int
 706 rfs4_clnt_in_grace(rfs4_client_t *cp)
 707 {
 708         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 709 
 710         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 711 }
 712 
 713 /*
 714  * reset all currently active grace periods
 715  */
 716 void
 717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 718 {
 719         rfs4_servinst_t *sip;
 720 
 721         mutex_enter(&nsrv4->servinst_lock);
 722         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 723                 if (rfs4_servinst_in_grace(sip))
 724                         rfs4_grace_start(sip);
 725         mutex_exit(&nsrv4->servinst_lock);
 726 }
 727 
 728 /*
 729  * start any new instances' grace periods
 730  */
 731 void
 732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 733 {
 734         rfs4_servinst_t *sip;
 735 
 736         mutex_enter(&nsrv4->servinst_lock);
 737         for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 738                 if (rfs4_servinst_grace_new(sip))
 739                         rfs4_grace_start(sip);
 740         mutex_exit(&nsrv4->servinst_lock);
 741 }
 742 
 743 static rfs4_dss_path_t *
 744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 745     char *path, unsigned index)
 746 {
 747         size_t len;
 748         rfs4_dss_path_t *dss_path;
 749 
 750         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 751 
 752         /*
 753          * Take a copy of the string, since the original may be overwritten.
 754          * Sadly, no strdup() in the kernel.
 755          */
 756         /* allow for NUL */
 757         len = strlen(path) + 1;
 758         dss_path->path = kmem_alloc(len, KM_SLEEP);
 759         (void) strlcpy(dss_path->path, path, len);
 760 
 761         /* associate with servinst */
 762         dss_path->sip = sip;
 763         dss_path->index = index;
 764 
 765         /*
 766          * Add to list of served paths.
 767          * No locking required, as we're only ever called at startup.
 768          */
 769         if (nsrv4->dss_pathlist == NULL) {
 770                 /* this is the first dss_path_t */
 771 
 772                 /* needed for insque/remque */
 773                 dss_path->next = dss_path->prev = dss_path;
 774 
 775                 nsrv4->dss_pathlist = dss_path;
 776         } else {
 777                 insque(dss_path, nsrv4->dss_pathlist);
 778         }
 779 
 780         return (dss_path);
 781 }
 782 
 783 /*
 784  * Create a new server instance, and make it the currently active instance.
 785  * Note that starting the grace period too early will reduce the clients'
 786  * recovery window.
 787  */
 788 void
 789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 790     int dss_npaths, char **dss_paths)
 791 {
 792         unsigned i;
 793         rfs4_servinst_t *sip;
 794         rfs4_oldstate_t *oldstate;
 795 
 796         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 797         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 798 
 799         sip->start_time = (time_t)0;
 800         sip->grace_period = (time_t)0;
 801         sip->next = NULL;
 802         sip->prev = NULL;
 803 
 804         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 805         /*
 806          * This initial dummy entry is required to setup for insque/remque.
 807          * It must be skipped over whenever the list is traversed.
 808          */
 809         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 810         /* insque/remque require initial list entry to be self-terminated */
 811         oldstate->next = oldstate;
 812         oldstate->prev = oldstate;
 813         sip->oldstate = oldstate;
 814 
 815 
 816         sip->dss_npaths = dss_npaths;
 817         sip->dss_paths = kmem_alloc(dss_npaths *
 818             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 819 
 820         for (i = 0; i < dss_npaths; i++) {
 821                 sip->dss_paths[i] =
 822                     rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 823         }
 824 
 825         mutex_enter(&nsrv4->servinst_lock);
 826         if (nsrv4->nfs4_cur_servinst != NULL) {
 827                 /* add to linked list */
 828                 sip->prev = nsrv4->nfs4_cur_servinst;
 829                 nsrv4->nfs4_cur_servinst->next = sip;
 830         }
 831         if (start_grace)
 832                 rfs4_grace_start(sip);
 833         /* make the new instance "current" */
 834         nsrv4->nfs4_cur_servinst = sip;
 835 
 836         mutex_exit(&nsrv4->servinst_lock);
 837 }
 838 
 839 /*
 840  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 841  * all instances directly.
 842  */
 843 void
 844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 845 {
 846         rfs4_servinst_t *sip, *prev, *current;
 847 #ifdef DEBUG
 848         int n = 0;
 849 #endif
 850 
 851         mutex_enter(&nsrv4->servinst_lock);
 852         ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 853         current = nsrv4->nfs4_cur_servinst;
 854         nsrv4->nfs4_cur_servinst = NULL;
 855         for (sip = current; sip != NULL; sip = prev) {
 856                 prev = sip->prev;
 857                 rw_destroy(&sip->rwlock);
 858                 if (sip->oldstate)
 859                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 860                 if (sip->dss_paths) {
 861                         int i = sip->dss_npaths;
 862 
 863                         while (i > 0) {
 864                                 i--;
 865                                 if (sip->dss_paths[i] != NULL) {
 866                                         char *path = sip->dss_paths[i]->path;
 867 
 868                                         if (path != NULL) {
 869                                                 kmem_free(path,
 870                                                     strlen(path) + 1);
 871                                         }
 872                                         kmem_free(sip->dss_paths[i],
 873                                             sizeof (rfs4_dss_path_t));
 874                                 }
 875                         }
 876                         kmem_free(sip->dss_paths,
 877                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 878                 }
 879                 kmem_free(sip, sizeof (rfs4_servinst_t));
 880 #ifdef DEBUG
 881                 n++;
 882 #endif
 883         }
 884         mutex_exit(&nsrv4->servinst_lock);
 885 }
 886 
 887 /*
 888  * Assign the current server instance to a client_t.
 889  * Should be called with cp->rc_dbe held.
 890  */
 891 void
 892 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 893     rfs4_servinst_t *sip)
 894 {
 895         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 896 
 897         /*
 898          * The lock ensures that if the current instance is in the process
 899          * of changing, we will see the new one.
 900          */
 901         mutex_enter(&nsrv4->servinst_lock);
 902         cp->rc_server_instance = sip;
 903         mutex_exit(&nsrv4->servinst_lock);
 904 }
 905 
 906 rfs4_servinst_t *
 907 rfs4_servinst(rfs4_client_t *cp)
 908 {
 909         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 910 
 911         return (cp->rc_server_instance);
 912 }
 913 
 914 /* ARGSUSED */
 915 static void
 916 nullfree(caddr_t resop)
 917 {
 918 }
 919 
 920 /*
 921  * This is a fall-through for invalid or not implemented (yet) ops
 922  */
 923 /* ARGSUSED */
 
 936 {
 937         int i;
 938 
 939         for (i = 0; i < count; i++) {
 940                 if (nfsnum == flavor_list[i])
 941                         return (TRUE);
 942         }
 943         return (FALSE);
 944 }
 945 
 946 /*
 947  * Used by rfs4_op_secinfo to get the security information from the
 948  * export structure associated with the component.
 949  */
 950 /* ARGSUSED */
 951 static nfsstat4
 952 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 953 {
 954         int error, different_export = 0;
 955         vnode_t *dvp, *vp;
 956         struct exportinfo *exi;
 957         fid_t fid;
 958         uint_t count, i;
 959         secinfo4 *resok_val;
 960         struct secinfo *secp;
 961         seconfig_t *si;
 962         bool_t did_traverse = FALSE;
 963         int dotdot, walk;
 964         nfs_export_t *ne = nfs_get_export();
 965 
 966         dvp = cs->vp;
 967         exi = cs->exi;
 968         ASSERT(exi != NULL);
 969         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 970 
 971         /*
 972          * If dotdotting, then need to check whether it's above the
 973          * root of a filesystem, or above an export point.
 974          */
 975         if (dotdot) {
 976                 vnode_t *zone_rootvp = ne->exi_root->exi_vp;
 977 
 978                 ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
 979                 /*
 980                  * If dotdotting at the root of a filesystem, then
 981                  * need to traverse back to the mounted-on filesystem
 982                  * and do the dotdot lookup there.
 983                  */
 984                 if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
 985 
 986                         /*
 987                          * If at the system root, then can
 988                          * go up no further.
 989                          */
 990                         if (VN_CMP(dvp, zone_rootvp))
 991                                 return (puterrno4(ENOENT));
 992 
 993                         /*
 994                          * Traverse back to the mounted-on filesystem
 995                          */
 996                         dvp = untraverse(dvp, zone_rootvp);
 997 
 998                         /*
 999                          * Set the different_export flag so we remember
1000                          * to pick up a new exportinfo entry for
1001                          * this new filesystem.
1002                          */
1003                         different_export = 1;
1004                 } else {
1005 
1006                         /*
1007                          * If dotdotting above an export point then set
1008                          * the different_export to get new export info.
1009                          */
1010                         different_export = nfs_exported(exi, dvp);
1011                 }
1012         }
1013 
1014         /*
1015          * Get the vnode for the component "nm".
1016          */
1017         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1018             NULL, NULL, NULL);
1019         if (error)
1020                 return (puterrno4(error));
1021 
1022         /*
1023          * If the vnode is in a pseudo filesystem, or if the security flavor
1024          * used in the request is valid but not an explicitly shared flavor,
1025          * or the access bit indicates that this is a limited access,
1026          * check whether this vnode is visible.
1027          */
1028         if (!different_export &&
1029             (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1030             cs->access & CS_ACCESS_LIMITED)) {
1031                 if (! nfs_visible(exi, vp, &different_export)) {
1032                         VN_RELE(vp);
1033                         return (puterrno4(ENOENT));
1034                 }
1035         }
1036 
1037         /*
1038          * If it's a mountpoint, then traverse it.
1039          */
1040         if (vn_ismntpt(vp)) {
1041                 if ((error = traverse(&vp)) != 0) {
1042                         VN_RELE(vp);
1043                         return (puterrno4(error));
1044                 }
1045                 /* remember that we had to traverse mountpoint */
1046                 did_traverse = TRUE;
1047                 different_export = 1;
1048         } else if (vp->v_vfsp != dvp->v_vfsp) {
1049                 /*
1050                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1051                  * then vp is probably an LOFS object.  We don't need the
 
1053                  * a server fs boundary and need to call checkexport4.
1054                  * (LOFS lookup hides server fs mountpoints, and actually calls
1055                  * traverse)
1056                  */
1057                 different_export = 1;
1058         }
1059 
1060         /*
1061          * Get the export information for it.
1062          */
1063         if (different_export) {
1064 
1065                 bzero(&fid, sizeof (fid));
1066                 fid.fid_len = MAXFIDSZ;
1067                 error = vop_fid_pseudo(vp, &fid);
1068                 if (error) {
1069                         VN_RELE(vp);
1070                         return (puterrno4(error));
1071                 }
1072 
1073                 /* We'll need to reassign "exi". */
1074                 if (dotdot)
1075                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1076                 else
1077                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1078 
1079                 if (exi == NULL) {
1080                         if (did_traverse == TRUE) {
1081                                 /*
1082                                  * If this vnode is a mounted-on vnode,
1083                                  * but the mounted-on file system is not
1084                                  * exported, send back the secinfo for
1085                                  * the exported node that the mounted-on
1086                                  * vnode lives in.
1087                                  */
1088                                 exi = cs->exi;
1089                         } else {
1090                                 VN_RELE(vp);
1091                                 return (puterrno4(EACCES));
1092                         }
1093                 }
1094         }
1095         ASSERT(exi != NULL);
1096 
1097 
1098         /*
1099          * Create the secinfo result based on the security information
1100          * from the exportinfo structure (exi).
1101          *
1102          * Return all flavors for a pseudo node.
1103          * For a real export node, return the flavor that the client
1104          * has access with.
1105          */
1106         ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1107         if (PSEUDO(exi)) {
1108                 count = exi->exi_export.ex_seccnt; /* total sec count */
1109                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1110                 secp = exi->exi_export.ex_secinfo;
1111 
1112                 for (i = 0; i < count; i++) {
1113                         si = &secp[i].s_secinfo;
1114                         resok_val[i].flavor = si->sc_rpcnum;
1115                         if (resok_val[i].flavor == RPCSEC_GSS) {
1116                                 rpcsec_gss_info *info;
1117 
1118                                 info = &resok_val[i].flavor_info;
1119                                 info->qop = si->sc_qop;
1120                                 info->service = (rpc_gss_svc_t)si->sc_service;
1121 
1122                                 /* get oid opaque data */
1123                                 info->oid.sec_oid4_len =
1124                                     si->sc_gss_mech_type->length;
1125                                 info->oid.sec_oid4_val = kmem_alloc(
1126                                     si->sc_gss_mech_type->length, KM_SLEEP);
 
1449         if (is_system_labeled() && !admin_low_client)
1450                 label_rele(tslabel);
1451 
1452         *cs->statusp = resp->status = NFS4_OK;
1453 out:
1454         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1455             ACCESS4res *, resp);
1456 }
1457 
1458 /* ARGSUSED */
1459 static void
1460 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1461     struct compound_state *cs)
1462 {
1463         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1464         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1465         int error;
1466         vnode_t *vp = cs->vp;
1467         cred_t *cr = cs->cr;
1468         vattr_t va;
1469         nfs4_srv_t *nsrv4;
1470 
1471         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1472             COMMIT4args *, args);
1473 
1474         if (vp == NULL) {
1475                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1476                 goto out;
1477         }
1478         if (cs->access == CS_ACCESS_DENIED) {
1479                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1480                 goto out;
1481         }
1482 
1483         if (args->offset + args->count < args->offset) {
1484                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1485                 goto out;
1486         }
1487 
1488         va.va_mask = AT_UID;
1489         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 
1506                         resp->status = NFS4ERR_ISDIR;
1507                 else
1508                         resp->status = NFS4ERR_INVAL;
1509                 *cs->statusp = resp->status;
1510                 goto out;
1511         }
1512 
1513         if (crgetuid(cr) != va.va_uid &&
1514             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1515                 *cs->statusp = resp->status = puterrno4(error);
1516                 goto out;
1517         }
1518 
1519         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1520 
1521         if (error) {
1522                 *cs->statusp = resp->status = puterrno4(error);
1523                 goto out;
1524         }
1525 
1526         nsrv4 = nfs4_get_srv();
1527         *cs->statusp = resp->status = NFS4_OK;
1528         resp->writeverf = nsrv4->write4verf;
1529 out:
1530         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1531             COMMIT4res *, resp);
1532 }
1533 
1534 /*
1535  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1536  * was completed. It does the nfsv4 create for special files.
1537  */
1538 /* ARGSUSED */
1539 static vnode_t *
1540 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1541     struct compound_state *cs, vattr_t *vap, char *nm)
1542 {
1543         int error;
1544         cred_t *cr = cs->cr;
1545         vnode_t *dvp = cs->vp;
1546         vnode_t *vp = NULL;
1547         int mode;
1548         enum vcexcl excl;
 
2704         fid_t fid;
2705         int attrdir, dotdot, walk;
2706         bool_t is_newvp = FALSE;
2707 
2708         if (cs->vp->v_flag & V_XATTRDIR) {
2709                 attrdir = 1;
2710                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2711         } else {
2712                 attrdir = 0;
2713                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2714         }
2715 
2716         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2717 
2718         /*
2719          * If dotdotting, then need to check whether it's
2720          * above the root of a filesystem, or above an
2721          * export point.
2722          */
2723         if (dotdot) {
2724                 vnode_t *zone_rootvp;
2725 
2726                 ASSERT(cs->exi != NULL);
2727                 zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2728                 /*
2729                  * If dotdotting at the root of a filesystem, then
2730                  * need to traverse back to the mounted-on filesystem
2731                  * and do the dotdot lookup there.
2732                  */
2733                 if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2734 
2735                         /*
2736                          * If at the system root, then can
2737                          * go up no further.
2738                          */
2739                         if (VN_CMP(cs->vp, zone_rootvp))
2740                                 return (puterrno4(ENOENT));
2741 
2742                         /*
2743                          * Traverse back to the mounted-on filesystem
2744                          */
2745                         cs->vp = untraverse(cs->vp, zone_rootvp);
2746 
2747                         /*
2748                          * Set the different_export flag so we remember
2749                          * to pick up a new exportinfo entry for
2750                          * this new filesystem.
2751                          */
2752                         different_export = 1;
2753                 } else {
2754 
2755                         /*
2756                          * If dotdotting above an export point then set
2757                          * the different_export to get new export info.
2758                          */
2759                         different_export = nfs_exported(cs->exi, cs->vp);
2760                 }
2761         }
2762 
2763         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2764             NULL, NULL, NULL);
2765         if (error)
 
3483         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3484 
3485         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3486                 freeb(resp->mblk);
3487                 resp->mblk = NULL;
3488                 resp->data_len = 0;
3489         }
3490 }
3491 
3492 
3493 /* ARGSUSED */
3494 static void
3495 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3496     struct compound_state *cs)
3497 {
3498         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3499         int             error;
3500         vnode_t         *vp;
3501         struct exportinfo *exi, *sav_exi;
3502         nfs_fh4_fmt_t   *fh_fmtp;
3503         nfs_export_t *ne = nfs_get_export();
3504 
3505         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3506 
3507         if (cs->vp) {
3508                 VN_RELE(cs->vp);
3509                 cs->vp = NULL;
3510         }
3511 
3512         if (cs->cr)
3513                 crfree(cs->cr);
3514 
3515         cs->cr = crdup(cs->basecr);
3516 
3517         vp = ne->exi_public->exi_vp;
3518         if (vp == NULL) {
3519                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3520                 goto out;
3521         }
3522 
3523         error = makefh4(&cs->fh, vp, ne->exi_public);
3524         if (error != 0) {
3525                 *cs->statusp = resp->status = puterrno4(error);
3526                 goto out;
3527         }
3528         sav_exi = cs->exi;
3529         if (ne->exi_public == ne->exi_root) {
3530                 /*
3531                  * No filesystem is actually shared public, so we default
3532                  * to exi_root. In this case, we must check whether root
3533                  * is exported.
3534                  */
3535                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3536 
3537                 /*
3538                  * if root filesystem is exported, the exportinfo struct that we
3539                  * should use is what checkexport4 returns, because root_exi is
3540                  * actually a mostly empty struct.
3541                  */
3542                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3543                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3544                 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3545         } else {
3546                 /*
3547                  * it's a properly shared filesystem
3548                  */
3549                 cs->exi = ne->exi_public;
3550         }
3551 
3552         if (is_system_labeled()) {
3553                 bslabel_t *clabel;
3554 
3555                 ASSERT(req->rq_label != NULL);
3556                 clabel = req->rq_label;
3557                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3558                     "got client label from request(1)",
3559                     struct svc_req *, req);
3560                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3561                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3562                             cs->exi)) {
3563                                 *cs->statusp = resp->status =
3564                                     NFS4ERR_SERVERFAULT;
3565                                 goto out;
3566                         }
3567                 }
3568         }
3569 
 
3671         struct exportinfo *exi, *sav_exi;
3672 
3673         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3674 
3675         if (cs->vp) {
3676                 VN_RELE(cs->vp);
3677                 cs->vp = NULL;
3678         }
3679 
3680         if (cs->cr)
3681                 crfree(cs->cr);
3682 
3683         cs->cr = crdup(cs->basecr);
3684 
3685         /*
3686          * Using rootdir, the system root vnode,
3687          * get its fid.
3688          */
3689         bzero(&fid, sizeof (fid));
3690         fid.fid_len = MAXFIDSZ;
3691         error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3692         if (error != 0) {
3693                 *cs->statusp = resp->status = puterrno4(error);
3694                 goto out;
3695         }
3696 
3697         /*
3698          * Then use the root fsid & fid it to find out if it's exported
3699          *
3700          * If the server root isn't exported directly, then
3701          * it should at least be a pseudo export based on
3702          * one or more exports further down in the server's
3703          * file tree.
3704          */
3705         exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3706         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3707                 NFS4_DEBUG(rfs4_debug,
3708                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3709                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3710                 goto out;
3711         }
3712 
3713         /*
3714          * Now make a filehandle based on the root
3715          * export and root vnode.
3716          */
3717         error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3718         if (error != 0) {
3719                 *cs->statusp = resp->status = puterrno4(error);
3720                 goto out;
3721         }
3722 
3723         sav_exi = cs->exi;
3724         cs->exi = exi;
3725 
3726         VN_HOLD(ZONE_ROOTVP());
3727         cs->vp = ZONE_ROOTVP();
3728 
3729         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3730                 VN_RELE(cs->vp);
3731                 cs->vp = NULL;
3732                 cs->exi = sav_exi;
3733                 goto out;
3734         }
3735 
3736         *cs->statusp = resp->status = NFS4_OK;
3737         cs->deleg = FALSE;
3738 out:
3739         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3740             PUTROOTFH4res *, resp);
3741 }
3742 
3743 /*
3744  * readlink: args: CURRENT_FH.
3745  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3746  */
3747 
3748 /* ARGSUSED */
3749 static void
3750 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 
3796 
3797         }
3798 
3799         va.va_mask = AT_MODE;
3800         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3801         if (error) {
3802                 *cs->statusp = resp->status = puterrno4(error);
3803                 goto out;
3804         }
3805 
3806         if (MANDLOCK(vp, va.va_mode)) {
3807                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3808                 goto out;
3809         }
3810 
3811         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3812 
3813         if (is_referral) {
3814                 char *s;
3815                 size_t strsz;
3816                 kstat_named_t *stat =
3817                     cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
3818 
3819                 /* Get an artificial symlink based on a referral */
3820                 s = build_symlink(vp, cs->cr, &strsz);
3821                 stat[NFS_REFERLINKS].value.ui64++;
3822                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3823                     vnode_t *, vp, char *, s);
3824                 if (s == NULL)
3825                         error = EINVAL;
3826                 else {
3827                         error = 0;
3828                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3829                         kmem_free(s, strsz);
3830                 }
3831 
3832         } else {
3833 
3834                 iov.iov_base = data;
3835                 iov.iov_len = MAXPATHLEN;
3836                 uio.uio_iov = &iov;
3837                 uio.uio_iovcnt = 1;
3838                 uio.uio_segflg = UIO_SYSSPACE;
3839                 uio.uio_extflg = UIO_COPY_CACHED;
3840                 uio.uio_loffset = 0;
3841                 uio.uio_resid = MAXPATHLEN;
 
4248                 }
4249                 goto out;
4250         }
4251         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4252 
4253         /* Actually do the REMOVE operation */
4254         if (vp->v_type == VDIR) {
4255                 /*
4256                  * Can't remove a directory that has a mounted-on filesystem.
4257                  */
4258                 if (vn_ismntpt(vp)) {
4259                         error = EACCES;
4260                 } else {
4261                         /*
4262                          * System V defines rmdir to return EEXIST,
4263                          * not ENOTEMPTY, if the directory is not
4264                          * empty.  A System V NFS server needs to map
4265                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4266                          * transmit over the wire.
4267                          */
4268                         if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4269                             NULL, 0)) == EEXIST)
4270                                 error = ENOTEMPTY;
4271                 }
4272         } else {
4273                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4274                     fp != NULL) {
4275                         struct vattr va;
4276                         vnode_t *tvp;
4277 
4278                         rfs4_dbe_lock(fp->rf_dbe);
4279                         tvp = fp->rf_vp;
4280                         if (tvp)
4281                                 VN_HOLD(tvp);
4282                         rfs4_dbe_unlock(fp->rf_dbe);
4283 
4284                         if (tvp) {
4285                                 /*
4286                                  * This is va_seq safe because we are not
4287                                  * manipulating dvp.
4288                                  */
 
4360         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4361             REMOVE4res *, resp);
4362 }
4363 
4364 /*
4365  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4366  *              oldname and newname.
4367  *      res: status. If success - CURRENT_FH unchanged, return change_info
4368  *              for both from and target directories.
4369  */
4370 /* ARGSUSED */
4371 static void
4372 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4373     struct compound_state *cs)
4374 {
4375         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4376         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4377         int error;
4378         vnode_t *odvp;
4379         vnode_t *ndvp;
4380         vnode_t *srcvp, *targvp, *tvp;
4381         struct vattr obdva, oidva, oadva;
4382         struct vattr nbdva, nidva, nadva;
4383         char *onm, *nnm;
4384         uint_t olen, nlen;
4385         rfs4_file_t *fp, *sfp;
4386         int in_crit_src, in_crit_targ;
4387         int fp_rele_grant_hold, sfp_rele_grant_hold;
4388         int unlinked;
4389         bslabel_t *clabel;
4390         struct sockaddr *ca;
4391         char *converted_onm = NULL;
4392         char *converted_nnm = NULL;
4393         nfsstat4 status;
4394 
4395         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4396             RENAME4args *, args);
4397 
4398         fp = sfp = NULL;
4399         srcvp = targvp = tvp = NULL;
4400         in_crit_src = in_crit_targ = 0;
4401         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4402         unlinked = 0;
4403 
4404         /* CURRENT_FH: target directory */
4405         ndvp = cs->vp;
4406         if (ndvp == NULL) {
4407                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4408                 goto out;
4409         }
4410 
4411         /* SAVED_FH: from directory */
4412         odvp = cs->saved_vp;
4413         if (odvp == NULL) {
4414                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4415                 goto out;
4416         }
4417 
4418         if (cs->access == CS_ACCESS_DENIED) {
4419                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4420                 goto out;
4421         }
4422 
 
4555                 kmem_free(onm, olen);
4556                 if (nnm != converted_nnm)
4557                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4558                 kmem_free(nnm, nlen);
4559                 goto out;
4560         }
4561 
4562         sfp_rele_grant_hold = 1;
4563 
4564         /* Does the destination exist and a file and have a delegation? */
4565         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4566             NULL, cs->cr)) {
4567                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4568                     NULL)) {
4569                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4570                         goto err_out;
4571                 }
4572         }
4573         fp_rele_grant_hold = 1;
4574 
4575         /* Check for NBMAND lock on both source and target */
4576         if (nbl_need_check(srcvp)) {
4577                 nbl_start_crit(srcvp, RW_READER);
4578                 in_crit_src = 1;
4579                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4580                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4581                         goto err_out;
4582                 }
4583         }
4584 
4585         if (targvp && nbl_need_check(targvp)) {
4586                 nbl_start_crit(targvp, RW_READER);
4587                 in_crit_targ = 1;
4588                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4589                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4590                         goto err_out;
4591                 }
4592         }
4593 
4594         /* Get source "before" change value */
4595         obdva.va_mask = AT_CTIME|AT_SEQ;
4596         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4597         if (!error) {
4598                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4599                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4600         }
4601         if (error) {
4602                 *cs->statusp = resp->status = puterrno4(error);
4603                 goto err_out;
4604         }
4605 
4606         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4607         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4608 
4609         error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4610             NULL, 0);
4611 
4612         /*
4613          * If target existed and was unlinked by VOP_RENAME, state will need
4614          * closed. To avoid deadlock, rfs4_close_all_state will be done after
4615          * any necessary nbl_end_crit on srcvp and tgtvp.
4616          */
4617         if (error == 0 && fp != NULL) {
4618                 rfs4_dbe_lock(fp->rf_dbe);
4619                 tvp = fp->rf_vp;
4620                 if (tvp)
4621                         VN_HOLD(tvp);
4622                 rfs4_dbe_unlock(fp->rf_dbe);
4623 
4624                 if (tvp) {
4625                         struct vattr va;
4626                         va.va_mask = AT_NLINK;
4627 
4628                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4629                             va.va_nlink == 0) {
4630                                 unlinked = 1;
4631 
4632                                 /* DEBUG data */
4633                                 if ((srcvp == targvp) || (tvp != targvp)) {
4634                                         cmn_err(CE_WARN, "rfs4_op_rename: "
4635                                             "srcvp %p, targvp: %p, tvp: %p",
4636                                             (void *)srcvp, (void *)targvp,
4637                                             (void *)tvp);
4638                                 }
4639                         } else {
4640                                 VN_RELE(tvp);
4641                         }
4642                 }
4643         }
4644         if (error == 0)
4645                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4646 
4647         if (in_crit_src)
4648                 nbl_end_crit(srcvp);
4649         if (srcvp)
4650                 VN_RELE(srcvp);
4651         if (in_crit_targ)
4652                 nbl_end_crit(targvp);
4653         if (targvp)
4654                 VN_RELE(targvp);
4655 
4656         if (unlinked) {
4657                 ASSERT(fp != NULL);
4658                 ASSERT(tvp != NULL);
4659 
4660                 /* DEBUG data */
4661                 if (RW_READ_HELD(&tvp->v_nbllock)) {
4662                         cmn_err(CE_WARN, "rfs4_op_rename: "
4663                             "RW_READ_HELD(%p)", (void *)tvp);
4664                 }
4665 
4666                 /* The file is gone and so should the state */
4667                 rfs4_close_all_state(fp);
4668                 VN_RELE(tvp);
4669         }
4670 
4671         if (sfp) {
4672                 rfs4_clear_dont_grant(sfp);
4673                 rfs4_file_rele(sfp);
4674         }
4675         if (fp) {
4676                 rfs4_clear_dont_grant(fp);
4677                 rfs4_file_rele(fp);
4678         }
4679 
4680         if (converted_onm != onm)
4681                 kmem_free(converted_onm, MAXPATHLEN + 1);
4682         kmem_free(onm, olen);
4683         if (converted_nnm != nnm)
4684                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4685         kmem_free(nnm, nlen);
4686 
4687         /*
4688          * Get the initial "after" sequence number, if it fails, set to zero
4689          */
4690         oidva.va_mask = AT_SEQ;
 
5587 static void
5588 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5589     struct compound_state *cs)
5590 {
5591         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5592         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5593         int error;
5594         vnode_t *vp;
5595         struct vattr bva;
5596         u_offset_t rlimit;
5597         struct uio uio;
5598         struct iovec iov[MAX_IOVECS];
5599         struct iovec *iovp;
5600         int iovcnt;
5601         int ioflag;
5602         cred_t *savecred, *cr;
5603         bool_t *deleg = &cs->deleg;
5604         nfsstat4 stat;
5605         int in_crit = 0;
5606         caller_context_t ct;
5607         nfs4_srv_t *nsrv4;
5608 
5609         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5610             WRITE4args *, args);
5611 
5612         vp = cs->vp;
5613         if (vp == NULL) {
5614                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5615                 goto out;
5616         }
5617         if (cs->access == CS_ACCESS_DENIED) {
5618                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5619                 goto out;
5620         }
5621 
5622         cr = cs->cr;
5623 
5624         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5625             deleg, TRUE, &ct)) != NFS4_OK) {
5626                 *cs->statusp = resp->status = stat;
5627                 goto out;
 
5658                 goto out;
5659         }
5660 
5661         if (vp->v_type != VREG) {
5662                 *cs->statusp = resp->status =
5663                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5664                 goto out;
5665         }
5666 
5667         if (crgetuid(cr) != bva.va_uid &&
5668             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5669                 *cs->statusp = resp->status = puterrno4(error);
5670                 goto out;
5671         }
5672 
5673         if (MANDLOCK(vp, bva.va_mode)) {
5674                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5675                 goto out;
5676         }
5677 
5678         nsrv4 = nfs4_get_srv();
5679         if (args->data_len == 0) {
5680                 *cs->statusp = resp->status = NFS4_OK;
5681                 resp->count = 0;
5682                 resp->committed = args->stable;
5683                 resp->writeverf = nsrv4->write4verf;
5684                 goto out;
5685         }
5686 
5687         if (args->mblk != NULL) {
5688                 mblk_t *m;
5689                 uint_t bytes, round_len;
5690 
5691                 iovcnt = 0;
5692                 bytes = 0;
5693                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5694                 for (m = args->mblk;
5695                     m != NULL && bytes < round_len;
5696                     m = m->b_cont) {
5697                         iovcnt++;
5698                         bytes += MBLKL(m);
5699                 }
5700 #ifdef DEBUG
5701                 /* should have ended on an mblk boundary */
5702                 if (bytes != round_len) {
5703                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
 
5759         curthread->t_cred = cr;
5760         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5761         curthread->t_cred = savecred;
5762 
5763         if (iovp != iov)
5764                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5765 
5766         if (error) {
5767                 *cs->statusp = resp->status = puterrno4(error);
5768                 goto out;
5769         }
5770 
5771         *cs->statusp = resp->status = NFS4_OK;
5772         resp->count = args->data_len - uio.uio_resid;
5773 
5774         if (ioflag == 0)
5775                 resp->committed = UNSTABLE4;
5776         else
5777                 resp->committed = FILE_SYNC4;
5778 
5779         resp->writeverf = nsrv4->write4verf;
5780 
5781 out:
5782         if (in_crit)
5783                 nbl_end_crit(vp);
5784 
5785         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5786             WRITE4res *, resp);
5787 }
5788 
5789 
5790 /* XXX put in a header file */
5791 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5792 
5793 void
5794 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5795     struct svc_req *req, cred_t *cr, int *rv)
5796 {
5797         uint_t i;
5798         struct compound_state cs;
5799         nfs4_srv_t *nsrv4;
5800         nfs_export_t *ne = nfs_get_export();
5801 
5802         if (rv != NULL)
5803                 *rv = 0;
5804         rfs4_init_compound_state(&cs);
5805         /*
5806          * Form a reply tag by copying over the request tag.
5807          */
5808         resp->tag.utf8string_len = args->tag.utf8string_len;
5809         if (args->tag.utf8string_len != 0) {
5810                 resp->tag.utf8string_val =
5811                     kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5812                 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5813                     resp->tag.utf8string_len);
5814         } else {
5815                 resp->tag.utf8string_val = NULL;
5816         }
5817 
5818         cs.statusp = &resp->status;
5819         cs.req = req;
5820         resp->array = NULL;
5821         resp->array_len = 0;
5822 
5823         /*
5824          * XXX for now, minorversion should be zero
5825          */
5826         if (args->minorversion != NFS4_MINORVERSION) {
5827                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5828                     &cs, COMPOUND4args *, args);
5829                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5830                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5831                     &cs, COMPOUND4res *, resp);
5832                 return;
5833         }
5834 
5835         if (args->array_len == 0) {
5836                 resp->status = NFS4_OK;
 
5842 
5843         cr = crget();
5844         ASSERT(cr != NULL);
5845 
5846         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5847                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5848                     &cs, COMPOUND4args *, args);
5849                 crfree(cr);
5850                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5851                     &cs, COMPOUND4res *, resp);
5852                 svcerr_badcred(req->rq_xprt);
5853                 if (rv != NULL)
5854                         *rv = 1;
5855                 return;
5856         }
5857         resp->array_len = args->array_len;
5858         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5859             KM_SLEEP);
5860 
5861         cs.basecr = cr;
5862         nsrv4 = nfs4_get_srv();
5863 
5864         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5865             COMPOUND4args *, args);
5866 
5867         /*
5868          * For now, NFS4 compound processing must be protected by
5869          * exported_lock because it can access more than one exportinfo
5870          * per compound and share/unshare can now change multiple
5871          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5872          * per proc (excluding public exinfo), and exi_count design
5873          * is sufficient to protect concurrent execution of NFS2/3
5874          * ops along with unexport.  This lock will be removed as
5875          * part of the NFSv4 phase 2 namespace redesign work.
5876          */
5877         rw_enter(&ne->exported_lock, RW_READER);
5878 
5879         /*
5880          * If this is the first compound we've seen, we need to start all
5881          * new instances' grace periods.
5882          */
5883         if (nsrv4->seen_first_compound == 0) {
5884                 rfs4_grace_start_new(nsrv4);
5885                 /*
5886                  * This must be set after rfs4_grace_start_new(), otherwise
5887                  * another thread could proceed past here before the former
5888                  * is finished.
5889                  */
5890                 nsrv4->seen_first_compound = 1;
5891         }
5892 
5893         for (i = 0; i < args->array_len && cs.cont; i++) {
5894                 nfs_argop4 *argop;
5895                 nfs_resop4 *resop;
5896                 uint_t op;
5897                 kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
5898 
5899                 argop = &args->array[i];
5900                 resop = &resp->array[i];
5901                 resop->resop = argop->argop;
5902                 op = (uint_t)resop->resop;
5903 
5904                 if (op < rfsv4disp_cnt) {
5905                         /*
5906                          * Count the individual ops here; NULL and COMPOUND
5907                          * are counted in common_dispatch()
5908                          */
5909                         stat[op].value.ui64++;
5910 
5911                         NFS4_DEBUG(rfs4_debug > 1,
5912                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5913                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5914                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5915                             rfs4_op_string[op], *cs.statusp));
5916                         if (*cs.statusp != NFS4_OK)
5917                                 cs.cont = FALSE;
5918                 } else {
5919                         /*
5920                          * This is effectively dead code since XDR code
5921                          * will have already returned BADXDR if op doesn't
5922                          * decode to legal value.  This only done for a
5923                          * day when XDR code doesn't verify v4 opcodes.
5924                          */
5925                         op = OP_ILLEGAL;
5926                         stat[OP_ILLEGAL_IDX].value.ui64++;
5927 
5928                         rfs4_op_illegal(argop, resop, req, &cs);
5929                         cs.cont = FALSE;
5930                 }
5931 
5932                 /*
5933                  * If not at last op, and if we are to stop, then
5934                  * compact the results array.
5935                  */
5936                 if ((i + 1) < args->array_len && !cs.cont) {
5937                         nfs_resop4 *new_res = kmem_alloc(
5938                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5939                         bcopy(resp->array,
5940                             new_res, (i+1) * sizeof (nfs_resop4));
5941                         kmem_free(resp->array,
5942                             args->array_len * sizeof (nfs_resop4));
5943 
5944                         resp->array_len =  i + 1;
5945                         resp->array = new_res;
5946                 }
5947         }
5948 
5949         rw_exit(&ne->exported_lock);
5950 
5951         /*
5952          * clear exportinfo and vnode fields from compound_state before dtrace
5953          * probe, to avoid tracing residual values for path and share path.
5954          */
5955         if (cs.vp)
5956                 VN_RELE(cs.vp);
5957         if (cs.saved_vp)
5958                 VN_RELE(cs.saved_vp);
5959         cs.exi = cs.saved_exi = NULL;
5960         cs.vp = cs.saved_vp = NULL;
5961 
5962         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5963             COMPOUND4res *, resp);
5964 
5965         if (cs.saved_fh.nfs_fh4_val)
5966                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5967 
5968         if (cs.basecr)
5969                 crfree(cs.basecr);
5970         if (cs.cr)
5971                 crfree(cs.cr);
5972         /*
5973          * done with this compound request, free the label
5974          */
5975 
5976         if (req->rq_label != NULL) {
5977                 kmem_free(req->rq_label, sizeof (bslabel_t));
5978                 req->rq_label = NULL;
5979         }
5980 }
5981 
5982 /*
5983  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5984  * XXX zero out the tag and array values. Need to investigate why the
 
6648 
6649         /* Check for mandatory locking and that the size gets set. */
6650         cva.va_mask = AT_MODE;
6651         if (setsize)
6652                 cva.va_mask |= AT_SIZE;
6653 
6654         /* Assume the worst */
6655         cs->mandlock = TRUE;
6656 
6657         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6658                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6659 
6660                 /*
6661                  * Truncate the file if necessary; this would be
6662                  * the case for create over an existing file.
6663                  */
6664 
6665                 if (trunc) {
6666                         int in_crit = 0;
6667                         rfs4_file_t *fp;
6668                         nfs4_srv_t *nsrv4;
6669                         bool_t create = FALSE;
6670 
6671                         /*
6672                          * We are writing over an existing file.
6673                          * Check to see if we need to recall a delegation.
6674                          */
6675                         nsrv4 = nfs4_get_srv();
6676                         rfs4_hold_deleg_policy(nsrv4);
6677                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6678                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6679                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6680                                         rfs4_file_rele(fp);
6681                                         rfs4_rele_deleg_policy(nsrv4);
6682                                         VN_RELE(vp);
6683                                         *attrset = 0;
6684                                         return (NFS4ERR_DELAY);
6685                                 }
6686                                 rfs4_file_rele(fp);
6687                         }
6688                         rfs4_rele_deleg_policy(nsrv4);
6689 
6690                         if (nbl_need_check(vp)) {
6691                                 in_crit = 1;
6692 
6693                                 ASSERT(reqsize == 0);
6694 
6695                                 nbl_start_crit(vp, RW_READER);
6696                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6697                                     cva.va_size, 0, NULL)) {
6698                                         in_crit = 0;
6699                                         nbl_end_crit(vp);
6700                                         VN_RELE(vp);
6701                                         *attrset = 0;
6702                                         return (NFS4ERR_ACCESS);
6703                                 }
6704                         }
6705                         ct.cc_sysid = 0;
6706                         ct.cc_pid = 0;
6707                         ct.cc_caller_id = nfs4_srv_caller_id;
6708                         ct.cc_flags = CC_DONTBLOCK;
 
8226 
8227         newcp->rc_cp_confirmed = cp_confirmed;
8228 
8229         rfs4_client_rele(newcp);
8230 
8231 out:
8232         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8233             SETCLIENTID4res *, res);
8234 }
8235 
8236 /*ARGSUSED*/
8237 void
8238 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8239     struct svc_req *req, struct compound_state *cs)
8240 {
8241         SETCLIENTID_CONFIRM4args *args =
8242             &argop->nfs_argop4_u.opsetclientid_confirm;
8243         SETCLIENTID_CONFIRM4res *res =
8244             &resop->nfs_resop4_u.opsetclientid_confirm;
8245         rfs4_client_t *cp, *cptoclose = NULL;
8246         nfs4_srv_t *nsrv4;
8247 
8248         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8249             struct compound_state *, cs,
8250             SETCLIENTID_CONFIRM4args *, args);
8251 
8252         nsrv4 = nfs4_get_srv();
8253         *cs->statusp = res->status = NFS4_OK;
8254 
8255         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8256 
8257         if (cp == NULL) {
8258                 *cs->statusp = res->status =
8259                     rfs4_check_clientid(&args->clientid, 1);
8260                 goto out;
8261         }
8262 
8263         if (!creds_ok(cp, req, cs)) {
8264                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8265                 rfs4_client_rele(cp);
8266                 goto out;
8267         }
8268 
8269         /* If the verifier doesn't match, the record doesn't match */
8270         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8271                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8272                 rfs4_client_rele(cp);
8273                 goto out;
8274         }
8275 
8276         rfs4_dbe_lock(cp->rc_dbe);
8277         cp->rc_need_confirm = FALSE;
8278         if (cp->rc_cp_confirmed) {
8279                 cptoclose = cp->rc_cp_confirmed;
8280                 cptoclose->rc_ss_remove = 1;
8281                 cp->rc_cp_confirmed = NULL;
8282         }
8283 
8284         /*
8285          * Update the client's associated server instance, if it's changed
8286          * since the client was created.
8287          */
8288         if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8289                 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8290 
8291         /*
8292          * Record clientid in stable storage.
8293          * Must be done after server instance has been assigned.
8294          */
8295         rfs4_ss_clid(nsrv4, cp);
8296 
8297         rfs4_dbe_unlock(cp->rc_dbe);
8298 
8299         if (cptoclose)
8300                 /* don't need to rele, client_close does it */
8301                 rfs4_client_close(cptoclose);
8302 
8303         /* If needed, initiate CB_NULL call for callback path */
8304         rfs4_deleg_cb_check(cp);
8305         rfs4_update_lease(cp);
8306 
8307         /*
8308          * Check to see if client can perform reclaims
8309          */
8310         rfs4_ss_chkclid(nsrv4, cp);
8311 
8312         rfs4_client_rele(cp);
8313 
8314 out:
8315         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8316             struct compound_state *, cs,
8317             SETCLIENTID_CONFIRM4 *, res);
8318 }
8319 
8320 
8321 /*ARGSUSED*/
8322 void
8323 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8324     struct svc_req *req, struct compound_state *cs)
8325 {
8326         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8327         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8328         rfs4_state_t *sp;
8329         nfsstat4 status;
8330 
 
9934 /*
9935  * Check to see if we have a downrev Solaris client, so that we
9936  * can send it a symlink instead of a referral.
9937  */
9938 int
9939 client_is_downrev(struct svc_req *req)
9940 {
9941         struct sockaddr *ca;
9942         rfs4_clntip_t *ci;
9943         bool_t create = FALSE;
9944         int is_downrev;
9945 
9946         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9947         ASSERT(ca);
9948         ci = rfs4_find_clntip(ca, &create);
9949         if (ci == NULL)
9950                 return (0);
9951         is_downrev = ci->ri_no_referrals;
9952         rfs4_dbe_rele(ci->ri_dbe);
9953         return (is_downrev);
9954 }
9955 
9956 /*
9957  * Do the main work of handling HA-NFSv4 Resource Group failover on
9958  * Sun Cluster.
9959  * We need to detect whether any RG admin paths have been added or removed,
9960  * and adjust resources accordingly.
9961  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9962  * order to scale, the list and array of paths need to be held in more
9963  * suitable data structures.
9964  */
9965 static void
9966 hanfsv4_failover(nfs4_srv_t *nsrv4)
9967 {
9968         int i, start_grace, numadded_paths = 0;
9969         char **added_paths = NULL;
9970         rfs4_dss_path_t *dss_path;
9971 
9972         /*
9973          * Note: currently, dss_pathlist cannot be NULL, since
9974          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9975          * make the latter dynamically specified too, the following will
9976          * need to be adjusted.
9977          */
9978 
9979         /*
9980          * First, look for removed paths: RGs that have been failed-over
9981          * away from this node.
9982          * Walk the "currently-serving" dss_pathlist and, for each
9983          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9984          * from nfsd. If not, that RG path has been removed.
9985          *
9986          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9987          * any duplicates.
9988          */
9989         dss_path = nsrv4->dss_pathlist;
9990         do {
9991                 int found = 0;
9992                 char *path = dss_path->path;
9993 
9994                 /* used only for non-HA so may not be removed */
9995                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9996                         dss_path = dss_path->next;
9997                         continue;
9998                 }
9999 
10000                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10001                         int cmpret;
10002                         char *newpath = rfs4_dss_newpaths[i];
10003 
10004                         /*
10005                          * Since nfsd has sorted rfs4_dss_newpaths for us,
10006                          * once the return from strcmp is negative we know
10007                          * we've passed the point where "path" should be,
10008                          * and can stop searching: "path" has been removed.
10009                          */
10010                         cmpret = strcmp(path, newpath);
10011                         if (cmpret < 0)
10012                                 break;
10013                         if (cmpret == 0) {
10014                                 found = 1;
10015                                 break;
10016                         }
10017                 }
10018 
10019                 if (found == 0) {
10020                         unsigned index = dss_path->index;
10021                         rfs4_servinst_t *sip = dss_path->sip;
10022                         rfs4_dss_path_t *path_next = dss_path->next;
10023 
10024                         /*
10025                          * This path has been removed.
10026                          * We must clear out the servinst reference to
10027                          * it, since it's now owned by another
10028                          * node: we should not attempt to touch it.
10029                          */
10030                         ASSERT(dss_path == sip->dss_paths[index]);
10031                         sip->dss_paths[index] = NULL;
10032 
10033                         /* remove from "currently-serving" list, and destroy */
10034                         remque(dss_path);
10035                         /* allow for NUL */
10036                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10037                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10038 
10039                         dss_path = path_next;
10040                 } else {
10041                         /* path was found; not removed */
10042                         dss_path = dss_path->next;
10043                 }
10044         } while (dss_path != nsrv4->dss_pathlist);
10045 
10046         /*
10047          * Now, look for added paths: RGs that have been failed-over
10048          * to this node.
10049          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10050          * for each path, check if it is on the "currently-serving"
10051          * dss_pathlist. If not, that RG path has been added.
10052          *
10053          * Note: we don't do duplicate detection here; nfsd does that for us.
10054          *
10055          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10056          * an upper bound for the size needed for added_paths[numadded_paths].
10057          */
10058 
10059         /* probably more space than we need, but guaranteed to be enough */
10060         if (rfs4_dss_numnewpaths > 0) {
10061                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10062                 added_paths = kmem_zalloc(sz, KM_SLEEP);
10063         }
10064 
10065         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10066         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10067                 int found = 0;
10068                 char *newpath = rfs4_dss_newpaths[i];
10069 
10070                 dss_path = nsrv4->dss_pathlist;
10071                 do {
10072                         char *path = dss_path->path;
10073 
10074                         /* used only for non-HA */
10075                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10076                                 dss_path = dss_path->next;
10077                                 continue;
10078                         }
10079 
10080                         if (strncmp(path, newpath, strlen(path)) == 0) {
10081                                 found = 1;
10082                                 break;
10083                         }
10084 
10085                         dss_path = dss_path->next;
10086                 } while (dss_path != nsrv4->dss_pathlist);
10087 
10088                 if (found == 0) {
10089                         added_paths[numadded_paths] = newpath;
10090                         numadded_paths++;
10091                 }
10092         }
10093 
10094         /* did we find any added paths? */
10095         if (numadded_paths > 0) {
10096 
10097                 /* create a new server instance, and start its grace period */
10098                 start_grace = 1;
10099                 /* CSTYLED */
10100                 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10101 
10102                 /* read in the stable storage state from these paths */
10103                 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10104 
10105                 /*
10106                  * Multiple failovers during a grace period will cause
10107                  * clients of the same resource group to be partitioned
10108                  * into different server instances, with different
10109                  * grace periods.  Since clients of the same resource
10110                  * group must be subject to the same grace period,
10111                  * we need to reset all currently active grace periods.
10112                  */
10113                 rfs4_grace_reset_all(nsrv4);
10114         }
10115 
10116         if (rfs4_dss_numnewpaths > 0)
10117                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10118 }
 |