Print this page
    
dss_paths[] entries need cleanup too
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28   28   *      All Rights Reserved
  29   29   */
  30   30  
  31   31  /*
  32   32   * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  33   33   * Copyright 2019 Nexenta Systems, Inc.
  34   34   * Copyright 2019 Nexenta by DDN, Inc.
  35   35   */
  36   36  
  37   37  #include <sys/param.h>
  38   38  #include <sys/types.h>
  39   39  #include <sys/systm.h>
  40   40  #include <sys/cred.h>
  41   41  #include <sys/buf.h>
  42   42  #include <sys/vfs.h>
  43   43  #include <sys/vfs_opreg.h>
  44   44  #include <sys/vnode.h>
  45   45  #include <sys/uio.h>
  46   46  #include <sys/errno.h>
  47   47  #include <sys/sysmacros.h>
  48   48  #include <sys/statvfs.h>
  49   49  #include <sys/kmem.h>
  50   50  #include <sys/dirent.h>
  51   51  #include <sys/cmn_err.h>
  52   52  #include <sys/debug.h>
  53   53  #include <sys/systeminfo.h>
  54   54  #include <sys/flock.h>
  55   55  #include <sys/pathname.h>
  56   56  #include <sys/nbmlock.h>
  57   57  #include <sys/share.h>
  58   58  #include <sys/atomic.h>
  59   59  #include <sys/policy.h>
  60   60  #include <sys/fem.h>
  61   61  #include <sys/sdt.h>
  62   62  #include <sys/ddi.h>
  63   63  #include <sys/zone.h>
  64   64  
  65   65  #include <fs/fs_reparse.h>
  66   66  
  67   67  #include <rpc/types.h>
  68   68  #include <rpc/auth.h>
  69   69  #include <rpc/rpcsec_gss.h>
  70   70  #include <rpc/svc.h>
  71   71  
  72   72  #include <nfs/nfs.h>
  73   73  #include <nfs/nfssys.h>
  74   74  #include <nfs/export.h>
  75   75  #include <nfs/nfs_cmd.h>
  76   76  #include <nfs/lm.h>
  77   77  #include <nfs/nfs4.h>
  78   78  #include <nfs/nfs4_drc.h>
  79   79  
  80   80  #include <sys/strsubr.h>
  81   81  #include <sys/strsun.h>
  82   82  
  83   83  #include <inet/common.h>
  84   84  #include <inet/ip.h>
  85   85  #include <inet/ip6.h>
  86   86  
  87   87  #include <sys/tsol/label.h>
  88   88  #include <sys/tsol/tndb.h>
  89   89  
  90   90  #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91   91  static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92   92  #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93   93  static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94   94  extern struct svc_ops rdma_svc_ops;
  95   95  extern int nfs_loaned_buffers;
  96   96  /* End of Tunables */
  97   97  
  98   98  static int rdma_setup_read_data4(READ4args *, READ4res *);
  99   99  
 100  100  /*
 101  101   * Used to bump the stateid4.seqid value and show changes in the stateid
 102  102   */
 103  103  #define next_stateid(sp) (++(sp)->bits.chgseq)
 104  104  
 105  105  /*
 106  106   * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 107  107   *      This is used to return NFS4ERR_TOOSMALL when clients specify
 108  108   *      maxcount that isn't large enough to hold the smallest possible
 109  109   *      XDR encoded dirent.
 110  110   *
 111  111   *          sizeof cookie (8 bytes) +
 112  112   *          sizeof name_len (4 bytes) +
 113  113   *          sizeof smallest (padded) name (4 bytes) +
 114  114   *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 115  115   *          sizeof attrlist4_len (4 bytes) +
 116  116   *          sizeof next boolean (4 bytes)
 117  117   *
 118  118   * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 119  119   * the smallest possible entry4 (assumes no attrs requested).
 120  120   *      sizeof nfsstat4 (4 bytes) +
 121  121   *      sizeof verifier4 (8 bytes) +
 122  122   *      sizeof entry4list bool (4 bytes) +
 123  123   *      sizeof entry4   (36 bytes) +
 124  124   *      sizeof eof bool  (4 bytes)
 125  125   *
 126  126   * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 127  127   *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 128  128   *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 129  129   *      required for a given name length.  MAXNAMELEN is the maximum
 130  130   *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 131  131   *      macros are to allow for . and .. entries -- just a minor tweak to try
 132  132   *      and guarantee that buffer we give to VOP_READDIR will be large enough
 133  133   *      to hold ., .., and the largest possible solaris dirent64.
 134  134   */
 135  135  #define RFS4_MINLEN_ENTRY4 36
 136  136  #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137  137  #define RFS4_MINLEN_RDDIR_BUF \
 138  138          (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139  139  
 140  140  /*
 141  141   * It would be better to pad to 4 bytes since that's what XDR would do,
 142  142   * but the dirents UFS gives us are already padded to 8, so just take
 143  143   * what we're given.  Dircount is only a hint anyway.  Currently the
 144  144   * solaris kernel is ASCII only, so there's no point in calling the
 145  145   * UTF8 functions.
 146  146   *
 147  147   * dirent64: named padded to provide 8 byte struct alignment
 148  148   *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  149   *
 150  150   * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  151   *
 152  152   */
 153  153  #define DIRENT64_TO_DIRCOUNT(dp) \
 154  154          (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155  155  
 156  156  zone_key_t      rfs4_zone_key;
 157  157  
 158  158  static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 159  159  
 160  160  u_longlong_t    nfs4_srv_caller_id;
 161  161  uint_t          nfs4_srv_vkey = 0;
 162  162  
 163  163  void    rfs4_init_compound_state(struct compound_state *);
 164  164  
 165  165  static void     nullfree(caddr_t);
 166  166  static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 167  167                      struct compound_state *);
 168  168  static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 169  169                      struct compound_state *);
 170  170  static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 171  171                      struct compound_state *);
 172  172  static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 173  173                      struct compound_state *);
 174  174  static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 175  175                      struct compound_state *);
 176  176  static void     rfs4_op_create_free(nfs_resop4 *resop);
 177  177  static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 178  178                      struct svc_req *, struct compound_state *);
 179  179  static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 180  180                      struct svc_req *, struct compound_state *);
 181  181  static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182  182                      struct compound_state *);
 183  183  static void     rfs4_op_getattr_free(nfs_resop4 *);
 184  184  static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185  185                      struct compound_state *);
 186  186  static void     rfs4_op_getfh_free(nfs_resop4 *);
 187  187  static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 188  188                      struct compound_state *);
 189  189  static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 190  190                      struct compound_state *);
 191  191  static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192  192                      struct compound_state *);
 193  193  static void     lock_denied_free(nfs_resop4 *);
 194  194  static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 195  195                      struct compound_state *);
 196  196  static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 197  197                      struct compound_state *);
 198  198  static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 199  199                      struct compound_state *);
 200  200  static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 201  201                      struct compound_state *);
 202  202  static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 203  203                      struct svc_req *req, struct compound_state *cs);
 204  204  static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 205  205                      struct compound_state *);
 206  206  static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 207  207                      struct compound_state *);
 208  208  static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 209  209                      struct svc_req *, struct compound_state *);
 210  210  static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 211  211                      struct svc_req *, struct compound_state *);
 212  212  static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 213  213                      struct compound_state *);
 214  214  static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 215  215                      struct compound_state *);
 216  216  static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 217  217                      struct compound_state *);
 218  218  static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 219  219                      struct compound_state *);
 220  220  static void     rfs4_op_read_free(nfs_resop4 *);
 221  221  static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 222  222  static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 223  223                      struct compound_state *);
 224  224  static void     rfs4_op_readlink_free(nfs_resop4 *);
 225  225  static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 226  226                      struct svc_req *, struct compound_state *);
 227  227  static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 228  228                      struct compound_state *);
 229  229  static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 230  230                      struct compound_state *);
 231  231  static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 232  232                      struct compound_state *);
 233  233  static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 234  234                      struct compound_state *);
 235  235  static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 236  236                      struct compound_state *);
 237  237  static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 238  238                      struct compound_state *);
 239  239  static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 240  240                      struct compound_state *);
 241  241  static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 242  242                      struct compound_state *);
 243  243  static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 244  244                      struct svc_req *, struct compound_state *);
 245  245  static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 246  246                      struct svc_req *req, struct compound_state *);
 247  247  static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 248  248                      struct compound_state *);
 249  249  static void     rfs4_op_secinfo_free(nfs_resop4 *);
 250  250  
 251  251  static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 252  252                      struct svc_req *);
 253  253  nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 254  254  void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 255  255  
 256  256  
 257  257  /*
 258  258   * translation table for attrs
 259  259   */
 260  260  struct nfs4_ntov_table {
 261  261          union nfs4_attr_u *na;
 262  262          uint8_t amap[NFS4_MAXNUM_ATTRS];
 263  263          int attrcnt;
 264  264          bool_t vfsstat;
 265  265  };
 266  266  
 267  267  static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 268  268  static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 269  269                      struct nfs4_svgetit_arg *sargp);
 270  270  
 271  271  static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 272  272                      struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 273  273                      struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 274  274  
 275  275  static void     hanfsv4_failover(nfs4_srv_t *);
 276  276  
 277  277  fem_t           *deleg_rdops;
 278  278  fem_t           *deleg_wrops;
 279  279  
 280  280  /*
 281  281   * NFS4 op dispatch table
 282  282   */
 283  283  
 284  284  struct rfsv4disp {
 285  285          void    (*dis_proc)();          /* proc to call */
 286  286          void    (*dis_resfree)();       /* frees space allocated by proc */
 287  287          int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 288  288  };
 289  289  
 290  290  static struct rfsv4disp rfsv4disptab[] = {
 291  291          /*
 292  292           * NFS VERSION 4
 293  293           */
 294  294  
 295  295          /* RFS_NULL = 0 */
 296  296          {rfs4_op_illegal, nullfree, 0},
 297  297  
 298  298          /* UNUSED = 1 */
 299  299          {rfs4_op_illegal, nullfree, 0},
 300  300  
 301  301          /* UNUSED = 2 */
 302  302          {rfs4_op_illegal, nullfree, 0},
 303  303  
 304  304          /* OP_ACCESS = 3 */
 305  305          {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 306  306  
 307  307          /* OP_CLOSE = 4 */
 308  308          {rfs4_op_close, nullfree, 0},
 309  309  
 310  310          /* OP_COMMIT = 5 */
 311  311          {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 312  312  
 313  313          /* OP_CREATE = 6 */
 314  314          {rfs4_op_create, nullfree, 0},
 315  315  
 316  316          /* OP_DELEGPURGE = 7 */
 317  317          {rfs4_op_delegpurge, nullfree, 0},
 318  318  
 319  319          /* OP_DELEGRETURN = 8 */
 320  320          {rfs4_op_delegreturn, nullfree, 0},
 321  321  
 322  322          /* OP_GETATTR = 9 */
 323  323          {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 324  324  
 325  325          /* OP_GETFH = 10 */
 326  326          {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 327  327  
 328  328          /* OP_LINK = 11 */
 329  329          {rfs4_op_link, nullfree, 0},
 330  330  
 331  331          /* OP_LOCK = 12 */
 332  332          {rfs4_op_lock, lock_denied_free, 0},
 333  333  
 334  334          /* OP_LOCKT = 13 */
 335  335          {rfs4_op_lockt, lock_denied_free, 0},
 336  336  
 337  337          /* OP_LOCKU = 14 */
 338  338          {rfs4_op_locku, nullfree, 0},
 339  339  
 340  340          /* OP_LOOKUP = 15 */
 341  341          {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 342  342  
 343  343          /* OP_LOOKUPP = 16 */
 344  344          {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 345  345  
 346  346          /* OP_NVERIFY = 17 */
 347  347          {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 348  348  
 349  349          /* OP_OPEN = 18 */
 350  350          {rfs4_op_open, rfs4_free_reply, 0},
 351  351  
 352  352          /* OP_OPENATTR = 19 */
 353  353          {rfs4_op_openattr, nullfree, 0},
 354  354  
 355  355          /* OP_OPEN_CONFIRM = 20 */
 356  356          {rfs4_op_open_confirm, nullfree, 0},
 357  357  
 358  358          /* OP_OPEN_DOWNGRADE = 21 */
 359  359          {rfs4_op_open_downgrade, nullfree, 0},
 360  360  
 361  361          /* OP_OPEN_PUTFH = 22 */
 362  362          {rfs4_op_putfh, nullfree, RPC_ALL},
 363  363  
 364  364          /* OP_PUTPUBFH = 23 */
 365  365          {rfs4_op_putpubfh, nullfree, RPC_ALL},
 366  366  
 367  367          /* OP_PUTROOTFH = 24 */
 368  368          {rfs4_op_putrootfh, nullfree, RPC_ALL},
 369  369  
 370  370          /* OP_READ = 25 */
 371  371          {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 372  372  
 373  373          /* OP_READDIR = 26 */
 374  374          {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 375  375  
 376  376          /* OP_READLINK = 27 */
 377  377          {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 378  378  
 379  379          /* OP_REMOVE = 28 */
 380  380          {rfs4_op_remove, nullfree, 0},
 381  381  
 382  382          /* OP_RENAME = 29 */
 383  383          {rfs4_op_rename, nullfree, 0},
 384  384  
 385  385          /* OP_RENEW = 30 */
 386  386          {rfs4_op_renew, nullfree, 0},
 387  387  
 388  388          /* OP_RESTOREFH = 31 */
 389  389          {rfs4_op_restorefh, nullfree, RPC_ALL},
 390  390  
 391  391          /* OP_SAVEFH = 32 */
 392  392          {rfs4_op_savefh, nullfree, RPC_ALL},
 393  393  
 394  394          /* OP_SECINFO = 33 */
 395  395          {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 396  396  
 397  397          /* OP_SETATTR = 34 */
 398  398          {rfs4_op_setattr, nullfree, 0},
 399  399  
 400  400          /* OP_SETCLIENTID = 35 */
 401  401          {rfs4_op_setclientid, nullfree, 0},
 402  402  
 403  403          /* OP_SETCLIENTID_CONFIRM = 36 */
 404  404          {rfs4_op_setclientid_confirm, nullfree, 0},
 405  405  
 406  406          /* OP_VERIFY = 37 */
 407  407          {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 408  408  
 409  409          /* OP_WRITE = 38 */
 410  410          {rfs4_op_write, nullfree, 0},
 411  411  
 412  412          /* OP_RELEASE_LOCKOWNER = 39 */
 413  413          {rfs4_op_release_lockowner, nullfree, 0},
 414  414  };
 415  415  
 416  416  static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 417  417  
 418  418  #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 419  419  
 420  420  #ifdef DEBUG
 421  421  
 422  422  int             rfs4_fillone_debug = 0;
 423  423  int             rfs4_no_stub_access = 1;
 424  424  int             rfs4_rddir_debug = 0;
 425  425  
 426  426  static char    *rfs4_op_string[] = {
 427  427          "rfs4_op_null",
 428  428          "rfs4_op_1 unused",
 429  429          "rfs4_op_2 unused",
 430  430          "rfs4_op_access",
 431  431          "rfs4_op_close",
 432  432          "rfs4_op_commit",
 433  433          "rfs4_op_create",
 434  434          "rfs4_op_delegpurge",
 435  435          "rfs4_op_delegreturn",
 436  436          "rfs4_op_getattr",
 437  437          "rfs4_op_getfh",
 438  438          "rfs4_op_link",
 439  439          "rfs4_op_lock",
 440  440          "rfs4_op_lockt",
 441  441          "rfs4_op_locku",
 442  442          "rfs4_op_lookup",
 443  443          "rfs4_op_lookupp",
 444  444          "rfs4_op_nverify",
 445  445          "rfs4_op_open",
 446  446          "rfs4_op_openattr",
 447  447          "rfs4_op_open_confirm",
 448  448          "rfs4_op_open_downgrade",
 449  449          "rfs4_op_putfh",
 450  450          "rfs4_op_putpubfh",
 451  451          "rfs4_op_putrootfh",
 452  452          "rfs4_op_read",
 453  453          "rfs4_op_readdir",
 454  454          "rfs4_op_readlink",
 455  455          "rfs4_op_remove",
 456  456          "rfs4_op_rename",
 457  457          "rfs4_op_renew",
 458  458          "rfs4_op_restorefh",
 459  459          "rfs4_op_savefh",
 460  460          "rfs4_op_secinfo",
 461  461          "rfs4_op_setattr",
 462  462          "rfs4_op_setclientid",
 463  463          "rfs4_op_setclient_confirm",
 464  464          "rfs4_op_verify",
 465  465          "rfs4_op_write",
 466  466          "rfs4_op_release_lockowner",
 467  467          "rfs4_op_illegal"
 468  468  };
 469  469  #endif
 470  470  
 471  471  void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 472  472  
 473  473  extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 474  474  
 475  475  extern void     rfs4_free_fs_locations4(fs_locations4 *);
 476  476  
 477  477  #ifdef  nextdp
 478  478  #undef nextdp
 479  479  #endif
 480  480  #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 481  481  
 482  482  static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 483  483          VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 484  484          VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 485  485          VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 486  486          VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 487  487          VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 488  488          VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 489  489          VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 490  490          NULL,                   NULL
 491  491  };
 492  492  static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 493  493          VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 494  494          VOPNAME_READ,           { .femop_read = deleg_wr_read },
 495  495          VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 496  496          VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 497  497          VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 498  498          VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 499  499          VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 500  500          VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 501  501          NULL,                   NULL
 502  502  };
 503  503  
 504  504  /* ARGSUSED */
 505  505  static void *
 506  506  rfs4_zone_init(zoneid_t zoneid)
 507  507  {
 508  508          nfs4_srv_t *nsrv4;
 509  509          timespec32_t verf;
 510  510  
 511  511          nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 512  512  
 513  513          /*
 514  514           * The following algorithm attempts to find a unique verifier
 515  515           * to be used as the write verifier returned from the server
 516  516           * to the client.  It is important that this verifier change
 517  517           * whenever the server reboots.  Of secondary importance, it
 518  518           * is important for the verifier to be unique between two
 519  519           * different servers.
 520  520           *
 521  521           * Thus, an attempt is made to use the system hostid and the
 522  522           * current time in seconds when the nfssrv kernel module is
 523  523           * loaded.  It is assumed that an NFS server will not be able
 524  524           * to boot and then to reboot in less than a second.  If the
 525  525           * hostid has not been set, then the current high resolution
 526  526           * time is used.  This will ensure different verifiers each
 527  527           * time the server reboots and minimize the chances that two
 528  528           * different servers will have the same verifier.
 529  529           * XXX - this is broken on LP64 kernels.
 530  530           */
 531  531          verf.tv_sec = (time_t)zone_get_hostid(NULL);
 532  532          if (verf.tv_sec != 0) {
 533  533                  verf.tv_nsec = gethrestime_sec();
 534  534          } else {
 535  535                  timespec_t tverf;
 536  536  
 537  537                  gethrestime(&tverf);
 538  538                  verf.tv_sec = (time_t)tverf.tv_sec;
 539  539                  verf.tv_nsec = tverf.tv_nsec;
 540  540          }
 541  541          nsrv4->write4verf = *(uint64_t *)&verf;
 542  542  
 543  543          /* Used to manage create/destroy of server state */
 544  544          nsrv4->nfs4_server_state = NULL;
 545  545          nsrv4->nfs4_cur_servinst = NULL;
 546  546          nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 547  547          mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 548  548          mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 549  549          mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 550  550          rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 551  551  
 552  552          return (nsrv4);
 553  553  }
 554  554  
 555  555  /* ARGSUSED */
 556  556  static void
 557  557  rfs4_zone_fini(zoneid_t zoneid, void *data)
 558  558  {
 559  559          nfs4_srv_t *nsrv4 = data;
 560  560  
 561  561          mutex_destroy(&nsrv4->deleg_lock);
 562  562          mutex_destroy(&nsrv4->state_lock);
 563  563          mutex_destroy(&nsrv4->servinst_lock);
 564  564          rw_destroy(&nsrv4->deleg_policy_lock);
 565  565  
 566  566          kmem_free(nsrv4, sizeof (*nsrv4));
 567  567  }
 568  568  
 569  569  void
 570  570  rfs4_srvrinit(void)
 571  571  {
 572  572          extern void rfs4_attr_init();
 573  573  
 574  574          zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini);
 575  575  
 576  576          rfs4_attr_init();
 577  577  
 578  578  
 579  579          if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 580  580                  rfs4_disable_delegation();
 581  581          } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 582  582              &deleg_wrops) != 0) {
 583  583                  rfs4_disable_delegation();
 584  584                  fem_free(deleg_rdops);
 585  585          }
 586  586  
 587  587          nfs4_srv_caller_id = fs_new_caller_id();
 588  588          lockt_sysid = lm_alloc_sysidt();
 589  589          vsd_create(&nfs4_srv_vkey, NULL);
 590  590          rfs4_state_g_init();
 591  591  }
 592  592  
 593  593  void
 594  594  rfs4_srvrfini(void)
 595  595  {
 596  596          if (lockt_sysid != LM_NOSYSID) {
 597  597                  lm_free_sysidt(lockt_sysid);
 598  598                  lockt_sysid = LM_NOSYSID;
 599  599          }
 600  600  
 601  601          rfs4_state_g_fini();
 602  602  
 603  603          fem_free(deleg_rdops);
 604  604          fem_free(deleg_wrops);
 605  605  
 606  606          (void) zone_key_delete(rfs4_zone_key);
 607  607  }
 608  608  
 609  609  void
 610  610  rfs4_do_server_start(int server_upordown,
 611  611      int srv_delegation, int cluster_booted)
 612  612  {
 613  613          nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
 614  614  
 615  615          /* Is this a warm start? */
 616  616          if (server_upordown == NFS_SERVER_QUIESCED) {
 617  617                  cmn_err(CE_NOTE, "nfs4_srv: "
 618  618                      "server was previously quiesced; "
 619  619                      "existing NFSv4 state will be re-used");
 620  620  
 621  621                  /*
 622  622                   * HA-NFSv4: this is also the signal
 623  623                   * that a Resource Group failover has
 624  624                   * occurred.
 625  625                   */
 626  626                  if (cluster_booted)
 627  627                          hanfsv4_failover(nsrv4);
 628  628          } else {
 629  629                  /* Cold start */
 630  630                  nsrv4->rfs4_start_time = 0;
 631  631                  rfs4_state_zone_init(nsrv4);
 632  632                  nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 633  633                      nfs4_drc_hash);
 634  634  
 635  635                  /*
 636  636                   * The nfsd service was started with the -s option
 637  637                   * we need to pull in any state from the paths indicated.
 638  638                   */
 639  639                  if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
 640  640                          /* read in the stable storage state from these paths */
 641  641                          rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
 642  642                              rfs4_dss_newpaths);
 643  643                  }
 644  644          }
 645  645  
 646  646          /* Check if delegation is to be enabled */
 647  647          if (srv_delegation != FALSE)
 648  648                  rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 649  649  }
 650  650  
 651  651  void
 652  652  rfs4_init_compound_state(struct compound_state *cs)
 653  653  {
 654  654          bzero(cs, sizeof (*cs));
 655  655          cs->cont = TRUE;
 656  656          cs->access = CS_ACCESS_DENIED;
 657  657          cs->deleg = FALSE;
 658  658          cs->mandlock = FALSE;
 659  659          cs->fh.nfs_fh4_val = cs->fhbuf;
 660  660  }
 661  661  
 662  662  void
 663  663  rfs4_grace_start(rfs4_servinst_t *sip)
 664  664  {
 665  665          rw_enter(&sip->rwlock, RW_WRITER);
 666  666          sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 667  667          sip->grace_period = rfs4_grace_period;
 668  668          rw_exit(&sip->rwlock);
 669  669  }
 670  670  
 671  671  /*
 672  672   * returns true if the instance's grace period has never been started
 673  673   */
 674  674  int
 675  675  rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 676  676  {
 677  677          time_t start_time;
 678  678  
 679  679          rw_enter(&sip->rwlock, RW_READER);
 680  680          start_time = sip->start_time;
 681  681          rw_exit(&sip->rwlock);
 682  682  
 683  683          return (start_time == 0);
 684  684  }
 685  685  
 686  686  /*
 687  687   * Indicates if server instance is within the
 688  688   * grace period.
 689  689   */
 690  690  int
 691  691  rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 692  692  {
 693  693          time_t grace_expiry;
 694  694  
 695  695          rw_enter(&sip->rwlock, RW_READER);
 696  696          grace_expiry = sip->start_time + sip->grace_period;
 697  697          rw_exit(&sip->rwlock);
 698  698  
 699  699          return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 700  700  }
 701  701  
 702  702  int
 703  703  rfs4_clnt_in_grace(rfs4_client_t *cp)
 704  704  {
 705  705          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 706  706  
 707  707          return (rfs4_servinst_in_grace(cp->rc_server_instance));
 708  708  }
 709  709  
 710  710  /*
 711  711   * reset all currently active grace periods
 712  712   */
 713  713  void
 714  714  rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 715  715  {
 716  716          rfs4_servinst_t *sip;
 717  717  
 718  718          mutex_enter(&nsrv4->servinst_lock);
 719  719          for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 720  720                  if (rfs4_servinst_in_grace(sip))
 721  721                          rfs4_grace_start(sip);
 722  722          mutex_exit(&nsrv4->servinst_lock);
 723  723  }
 724  724  
 725  725  /*
 726  726   * start any new instances' grace periods
 727  727   */
 728  728  void
 729  729  rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 730  730  {
 731  731          rfs4_servinst_t *sip;
 732  732  
 733  733          mutex_enter(&nsrv4->servinst_lock);
 734  734          for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 735  735                  if (rfs4_servinst_grace_new(sip))
 736  736                          rfs4_grace_start(sip);
 737  737          mutex_exit(&nsrv4->servinst_lock);
 738  738  }
 739  739  
 740  740  static rfs4_dss_path_t *
 741  741  rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 742  742      char *path, unsigned index)
 743  743  {
 744  744          size_t len;
 745  745          rfs4_dss_path_t *dss_path;
 746  746  
 747  747          dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 748  748  
 749  749          /*
 750  750           * Take a copy of the string, since the original may be overwritten.
 751  751           * Sadly, no strdup() in the kernel.
 752  752           */
 753  753          /* allow for NUL */
 754  754          len = strlen(path) + 1;
 755  755          dss_path->path = kmem_alloc(len, KM_SLEEP);
 756  756          (void) strlcpy(dss_path->path, path, len);
 757  757  
 758  758          /* associate with servinst */
 759  759          dss_path->sip = sip;
 760  760          dss_path->index = index;
 761  761  
 762  762          /*
 763  763           * Add to list of served paths.
 764  764           * No locking required, as we're only ever called at startup.
 765  765           */
 766  766          if (nsrv4->dss_pathlist == NULL) {
 767  767                  /* this is the first dss_path_t */
 768  768  
 769  769                  /* needed for insque/remque */
 770  770                  dss_path->next = dss_path->prev = dss_path;
 771  771  
 772  772                  nsrv4->dss_pathlist = dss_path;
 773  773          } else {
 774  774                  insque(dss_path, nsrv4->dss_pathlist);
 775  775          }
 776  776  
 777  777          return (dss_path);
 778  778  }
 779  779  
 780  780  /*
 781  781   * Create a new server instance, and make it the currently active instance.
 782  782   * Note that starting the grace period too early will reduce the clients'
 783  783   * recovery window.
 784  784   */
 785  785  void
 786  786  rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 787  787      int dss_npaths, char **dss_paths)
 788  788  {
 789  789          unsigned i;
 790  790          rfs4_servinst_t *sip;
 791  791          rfs4_oldstate_t *oldstate;
 792  792  
 793  793          sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 794  794          rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 795  795  
 796  796          sip->start_time = (time_t)0;
 797  797          sip->grace_period = (time_t)0;
 798  798          sip->next = NULL;
 799  799          sip->prev = NULL;
 800  800  
 801  801          rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 802  802          /*
 803  803           * This initial dummy entry is required to setup for insque/remque.
 804  804           * It must be skipped over whenever the list is traversed.
 805  805           */
 806  806          oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 807  807          /* insque/remque require initial list entry to be self-terminated */
  
    | 
      ↓ open down ↓ | 
    807 lines elided | 
    
      ↑ open up ↑ | 
  
 808  808          oldstate->next = oldstate;
 809  809          oldstate->prev = oldstate;
 810  810          sip->oldstate = oldstate;
 811  811  
 812  812  
 813  813          sip->dss_npaths = dss_npaths;
 814  814          sip->dss_paths = kmem_alloc(dss_npaths *
 815  815              sizeof (rfs4_dss_path_t *), KM_SLEEP);
 816  816  
 817  817          for (i = 0; i < dss_npaths; i++) {
 818      -                /* CSTYLED */
 819      -                sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
      818 +                sip->dss_paths[i] =
      819 +                    rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 820  820          }
 821  821  
 822  822          mutex_enter(&nsrv4->servinst_lock);
 823  823          if (nsrv4->nfs4_cur_servinst != NULL) {
 824  824                  /* add to linked list */
 825  825                  sip->prev = nsrv4->nfs4_cur_servinst;
 826  826                  nsrv4->nfs4_cur_servinst->next = sip;
 827  827          }
 828  828          if (start_grace)
 829  829                  rfs4_grace_start(sip);
 830  830          /* make the new instance "current" */
 831  831          nsrv4->nfs4_cur_servinst = sip;
 832  832  
 833  833          mutex_exit(&nsrv4->servinst_lock);
 834  834  }
 835  835  
 836  836  /*
 837  837   * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 838  838   * all instances directly.
 839  839   */
 840  840  void
 841  841  rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 842  842  {
 843  843          rfs4_servinst_t *sip, *prev, *current;
 844  844  #ifdef DEBUG
 845  845          int n = 0;
 846  846  #endif
  
    | 
      ↓ open down ↓ | 
    17 lines elided | 
    
      ↑ open up ↑ | 
  
 847  847  
 848  848          mutex_enter(&nsrv4->servinst_lock);
 849  849          ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 850  850          current = nsrv4->nfs4_cur_servinst;
 851  851          nsrv4->nfs4_cur_servinst = NULL;
 852  852          for (sip = current; sip != NULL; sip = prev) {
 853  853                  prev = sip->prev;
 854  854                  rw_destroy(&sip->rwlock);
 855  855                  if (sip->oldstate)
 856  856                          kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 857      -                if (sip->dss_paths)
      857 +                if (sip->dss_paths) {
      858 +                        int i = sip->dss_npaths;
      859 +
      860 +                        while (i > 0) {
      861 +                                i--;
      862 +                                if (sip->dss_paths[i] != NULL) {
      863 +                                        char *path = sip->dss_paths[i]->path;
      864 +
      865 +                                        if (path != NULL) {
      866 +                                                kmem_free(path,
      867 +                                                    strlen(path) + 1);
      868 +                                        }
      869 +                                        kmem_free(sip->dss_paths[i],
      870 +                                            sizeof (rfs4_dss_path_t));
      871 +                                }
      872 +                        }
 858  873                          kmem_free(sip->dss_paths,
 859  874                              sip->dss_npaths * sizeof (rfs4_dss_path_t *));
      875 +                }
 860  876                  kmem_free(sip, sizeof (rfs4_servinst_t));
 861  877  #ifdef DEBUG
 862  878                  n++;
 863  879  #endif
 864  880          }
 865  881          mutex_exit(&nsrv4->servinst_lock);
 866  882  }
 867  883  
 868  884  /*
 869  885   * Assign the current server instance to a client_t.
 870  886   * Should be called with cp->rc_dbe held.
 871  887   */
 872  888  void
 873  889  rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 874  890      rfs4_servinst_t *sip)
 875  891  {
 876  892          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 877  893  
 878  894          /*
 879  895           * The lock ensures that if the current instance is in the process
 880  896           * of changing, we will see the new one.
 881  897           */
 882  898          mutex_enter(&nsrv4->servinst_lock);
 883  899          cp->rc_server_instance = sip;
 884  900          mutex_exit(&nsrv4->servinst_lock);
 885  901  }
 886  902  
 887  903  rfs4_servinst_t *
 888  904  rfs4_servinst(rfs4_client_t *cp)
 889  905  {
 890  906          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 891  907  
 892  908          return (cp->rc_server_instance);
 893  909  }
 894  910  
 895  911  /* ARGSUSED */
 896  912  static void
 897  913  nullfree(caddr_t resop)
 898  914  {
 899  915  }
 900  916  
 901  917  /*
 902  918   * This is a fall-through for invalid or not implemented (yet) ops
 903  919   */
 904  920  /* ARGSUSED */
 905  921  static void
 906  922  rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 907  923      struct compound_state *cs)
 908  924  {
 909  925          *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 910  926  }
 911  927  
 912  928  /*
 913  929   * Check if the security flavor, nfsnum, is in the flavor_list.
 914  930   */
 915  931  bool_t
 916  932  in_flavor_list(int nfsnum, int *flavor_list, int count)
 917  933  {
 918  934          int i;
 919  935  
 920  936          for (i = 0; i < count; i++) {
 921  937                  if (nfsnum == flavor_list[i])
 922  938                          return (TRUE);
 923  939          }
 924  940          return (FALSE);
 925  941  }
 926  942  
 927  943  /*
 928  944   * Used by rfs4_op_secinfo to get the security information from the
 929  945   * export structure associated with the component.
 930  946   */
 931  947  /* ARGSUSED */
 932  948  static nfsstat4
 933  949  do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 934  950  {
 935  951          int error, different_export = 0;
 936  952          vnode_t *dvp, *vp;
 937  953          struct exportinfo *exi = NULL;
 938  954          fid_t fid;
 939  955          uint_t count, i;
 940  956          secinfo4 *resok_val;
 941  957          struct secinfo *secp;
 942  958          seconfig_t *si;
 943  959          bool_t did_traverse = FALSE;
 944  960          int dotdot, walk;
 945  961          nfs_export_t *ne = nfs_get_export();
 946  962  
 947  963          dvp = cs->vp;
 948  964          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 949  965  
 950  966          /*
 951  967           * If dotdotting, then need to check whether it's above the
 952  968           * root of a filesystem, or above an export point.
 953  969           */
 954  970          if (dotdot) {
 955  971  
 956  972                  /*
 957  973                   * If dotdotting at the root of a filesystem, then
 958  974                   * need to traverse back to the mounted-on filesystem
 959  975                   * and do the dotdot lookup there.
 960  976                   */
 961  977                  if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
 962  978  
 963  979                          /*
 964  980                           * If at the system root, then can
 965  981                           * go up no further.
 966  982                           */
 967  983                          if (VN_CMP(dvp, ZONE_ROOTVP()))
 968  984                                  return (puterrno4(ENOENT));
 969  985  
 970  986                          /*
 971  987                           * Traverse back to the mounted-on filesystem
 972  988                           */
 973  989                          dvp = untraverse(cs->vp);
 974  990  
 975  991                          /*
 976  992                           * Set the different_export flag so we remember
 977  993                           * to pick up a new exportinfo entry for
 978  994                           * this new filesystem.
 979  995                           */
 980  996                          different_export = 1;
 981  997                  } else {
 982  998  
 983  999                          /*
 984 1000                           * If dotdotting above an export point then set
 985 1001                           * the different_export to get new export info.
 986 1002                           */
 987 1003                          different_export = nfs_exported(cs->exi, cs->vp);
 988 1004                  }
 989 1005          }
 990 1006  
 991 1007          /*
 992 1008           * Get the vnode for the component "nm".
 993 1009           */
 994 1010          error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 995 1011              NULL, NULL, NULL);
 996 1012          if (error)
 997 1013                  return (puterrno4(error));
 998 1014  
 999 1015          /*
1000 1016           * If the vnode is in a pseudo filesystem, or if the security flavor
1001 1017           * used in the request is valid but not an explicitly shared flavor,
1002 1018           * or the access bit indicates that this is a limited access,
1003 1019           * check whether this vnode is visible.
1004 1020           */
1005 1021          if (!different_export &&
1006 1022              (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1007 1023              cs->access & CS_ACCESS_LIMITED)) {
1008 1024                  if (! nfs_visible(cs->exi, vp, &different_export)) {
1009 1025                          VN_RELE(vp);
1010 1026                          return (puterrno4(ENOENT));
1011 1027                  }
1012 1028          }
1013 1029  
1014 1030          /*
1015 1031           * If it's a mountpoint, then traverse it.
1016 1032           */
1017 1033          if (vn_ismntpt(vp)) {
1018 1034                  if ((error = traverse(&vp)) != 0) {
1019 1035                          VN_RELE(vp);
1020 1036                          return (puterrno4(error));
1021 1037                  }
1022 1038                  /* remember that we had to traverse mountpoint */
1023 1039                  did_traverse = TRUE;
1024 1040                  different_export = 1;
1025 1041          } else if (vp->v_vfsp != dvp->v_vfsp) {
1026 1042                  /*
1027 1043                   * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1028 1044                   * then vp is probably an LOFS object.  We don't need the
1029 1045                   * realvp, we just need to know that we might have crossed
1030 1046                   * a server fs boundary and need to call checkexport4.
1031 1047                   * (LOFS lookup hides server fs mountpoints, and actually calls
1032 1048                   * traverse)
1033 1049                   */
1034 1050                  different_export = 1;
1035 1051          }
1036 1052  
1037 1053          /*
1038 1054           * Get the export information for it.
1039 1055           */
1040 1056          if (different_export) {
1041 1057  
1042 1058                  bzero(&fid, sizeof (fid));
1043 1059                  fid.fid_len = MAXFIDSZ;
1044 1060                  error = vop_fid_pseudo(vp, &fid);
1045 1061                  if (error) {
1046 1062                          VN_RELE(vp);
1047 1063                          return (puterrno4(error));
1048 1064                  }
1049 1065  
1050 1066                  if (dotdot)
1051 1067                          exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1052 1068                  else
1053 1069                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1054 1070  
1055 1071                  if (exi == NULL) {
1056 1072                          if (did_traverse == TRUE) {
1057 1073                                  /*
1058 1074                                   * If this vnode is a mounted-on vnode,
1059 1075                                   * but the mounted-on file system is not
1060 1076                                   * exported, send back the secinfo for
1061 1077                                   * the exported node that the mounted-on
1062 1078                                   * vnode lives in.
1063 1079                                   */
1064 1080                                  exi = cs->exi;
1065 1081                          } else {
1066 1082                                  VN_RELE(vp);
1067 1083                                  return (puterrno4(EACCES));
1068 1084                          }
1069 1085                  }
1070 1086          } else {
1071 1087                  exi = cs->exi;
1072 1088          }
1073 1089          ASSERT(exi != NULL);
1074 1090  
1075 1091  
1076 1092          /*
1077 1093           * Create the secinfo result based on the security information
1078 1094           * from the exportinfo structure (exi).
1079 1095           *
1080 1096           * Return all flavors for a pseudo node.
1081 1097           * For a real export node, return the flavor that the client
1082 1098           * has access with.
1083 1099           */
1084 1100          ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1085 1101          if (PSEUDO(exi)) {
1086 1102                  count = exi->exi_export.ex_seccnt; /* total sec count */
1087 1103                  resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1088 1104                  secp = exi->exi_export.ex_secinfo;
1089 1105  
1090 1106                  for (i = 0; i < count; i++) {
1091 1107                          si = &secp[i].s_secinfo;
1092 1108                          resok_val[i].flavor = si->sc_rpcnum;
1093 1109                          if (resok_val[i].flavor == RPCSEC_GSS) {
1094 1110                                  rpcsec_gss_info *info;
1095 1111  
1096 1112                                  info = &resok_val[i].flavor_info;
1097 1113                                  info->qop = si->sc_qop;
1098 1114                                  info->service = (rpc_gss_svc_t)si->sc_service;
1099 1115  
1100 1116                                  /* get oid opaque data */
1101 1117                                  info->oid.sec_oid4_len =
1102 1118                                      si->sc_gss_mech_type->length;
1103 1119                                  info->oid.sec_oid4_val = kmem_alloc(
1104 1120                                      si->sc_gss_mech_type->length, KM_SLEEP);
1105 1121                                  bcopy(
1106 1122                                      si->sc_gss_mech_type->elements,
1107 1123                                      info->oid.sec_oid4_val,
1108 1124                                      info->oid.sec_oid4_len);
1109 1125                          }
1110 1126                  }
1111 1127                  resp->SECINFO4resok_len = count;
1112 1128                  resp->SECINFO4resok_val = resok_val;
1113 1129          } else {
1114 1130                  int ret_cnt = 0, k = 0;
1115 1131                  int *flavor_list;
1116 1132  
1117 1133                  count = exi->exi_export.ex_seccnt; /* total sec count */
1118 1134                  secp = exi->exi_export.ex_secinfo;
1119 1135  
1120 1136                  flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1121 1137                  /* find out which flavors to return */
1122 1138                  for (i = 0; i < count; i ++) {
1123 1139                          int access, flavor, perm;
1124 1140  
1125 1141                          flavor = secp[i].s_secinfo.sc_nfsnum;
1126 1142                          perm = secp[i].s_flags;
1127 1143  
1128 1144                          access = nfsauth4_secinfo_access(exi, cs->req,
1129 1145                              flavor, perm, cs->basecr);
1130 1146  
1131 1147                          if (! (access & NFSAUTH_DENIED) &&
1132 1148                              ! (access & NFSAUTH_WRONGSEC)) {
1133 1149                                  flavor_list[ret_cnt] = flavor;
1134 1150                                  ret_cnt++;
1135 1151                          }
1136 1152                  }
1137 1153  
1138 1154                  /* Create the returning SECINFO value */
1139 1155                  resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1140 1156  
1141 1157                  for (i = 0; i < count; i++) {
1142 1158                          /*
1143 1159                           * If the flavor is in the flavor list,
1144 1160                           * fill in resok_val.
1145 1161                           */
1146 1162                          si = &secp[i].s_secinfo;
1147 1163                          if (in_flavor_list(si->sc_nfsnum,
1148 1164                              flavor_list, ret_cnt)) {
1149 1165                                  resok_val[k].flavor = si->sc_rpcnum;
1150 1166                                  if (resok_val[k].flavor == RPCSEC_GSS) {
1151 1167                                          rpcsec_gss_info *info;
1152 1168  
1153 1169                                          info = &resok_val[k].flavor_info;
1154 1170                                          info->qop = si->sc_qop;
1155 1171                                          info->service = (rpc_gss_svc_t)
1156 1172                                              si->sc_service;
1157 1173  
1158 1174                                          /* get oid opaque data */
1159 1175                                          info->oid.sec_oid4_len =
1160 1176                                              si->sc_gss_mech_type->length;
1161 1177                                          info->oid.sec_oid4_val = kmem_alloc(
1162 1178                                              si->sc_gss_mech_type->length,
1163 1179                                              KM_SLEEP);
1164 1180                                          bcopy(si->sc_gss_mech_type->elements,
1165 1181                                              info->oid.sec_oid4_val,
1166 1182                                              info->oid.sec_oid4_len);
1167 1183                                  }
1168 1184                                  k++;
1169 1185                          }
1170 1186                          if (k >= ret_cnt)
1171 1187                                  break;
1172 1188                  }
1173 1189                  resp->SECINFO4resok_len = ret_cnt;
1174 1190                  resp->SECINFO4resok_val = resok_val;
1175 1191                  kmem_free(flavor_list, count * sizeof (int));
1176 1192          }
1177 1193  
1178 1194          VN_RELE(vp);
1179 1195          return (NFS4_OK);
1180 1196  }
1181 1197  
1182 1198  /*
1183 1199   * SECINFO (Operation 33): Obtain required security information on
1184 1200   * the component name in the format of (security-mechanism-oid, qop, service)
1185 1201   * triplets.
1186 1202   */
1187 1203  /* ARGSUSED */
1188 1204  static void
1189 1205  rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1190 1206      struct compound_state *cs)
1191 1207  {
1192 1208          SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1193 1209          SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1194 1210          utf8string *utfnm = &args->name;
1195 1211          uint_t len;
1196 1212          char *nm;
1197 1213          struct sockaddr *ca;
1198 1214          char *name = NULL;
1199 1215          nfsstat4 status = NFS4_OK;
1200 1216  
1201 1217          DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1202 1218              SECINFO4args *, args);
1203 1219  
1204 1220          /*
1205 1221           * Current file handle (cfh) should have been set before getting
1206 1222           * into this function. If not, return error.
1207 1223           */
1208 1224          if (cs->vp == NULL) {
1209 1225                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1210 1226                  goto out;
1211 1227          }
1212 1228  
1213 1229          if (cs->vp->v_type != VDIR) {
1214 1230                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1215 1231                  goto out;
1216 1232          }
1217 1233  
1218 1234          /*
1219 1235           * Verify the component name. If failed, error out, but
1220 1236           * do not error out if the component name is a "..".
1221 1237           * SECINFO will return its parents secinfo data for SECINFO "..".
1222 1238           */
1223 1239          status = utf8_dir_verify(utfnm);
1224 1240          if (status != NFS4_OK) {
1225 1241                  if (utfnm->utf8string_len != 2 ||
1226 1242                      utfnm->utf8string_val[0] != '.' ||
1227 1243                      utfnm->utf8string_val[1] != '.') {
1228 1244                          *cs->statusp = resp->status = status;
1229 1245                          goto out;
1230 1246                  }
1231 1247          }
1232 1248  
1233 1249          nm = utf8_to_str(utfnm, &len, NULL);
1234 1250          if (nm == NULL) {
1235 1251                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1236 1252                  goto out;
1237 1253          }
1238 1254  
1239 1255          if (len > MAXNAMELEN) {
1240 1256                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1241 1257                  kmem_free(nm, len);
1242 1258                  goto out;
1243 1259          }
1244 1260  
1245 1261          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1246 1262          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1247 1263              MAXPATHLEN  + 1);
1248 1264  
1249 1265          if (name == NULL) {
1250 1266                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1251 1267                  kmem_free(nm, len);
1252 1268                  goto out;
1253 1269          }
1254 1270  
1255 1271  
1256 1272          *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1257 1273  
1258 1274          if (name != nm)
1259 1275                  kmem_free(name, MAXPATHLEN + 1);
1260 1276          kmem_free(nm, len);
1261 1277  
1262 1278  out:
1263 1279          DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1264 1280              SECINFO4res *, resp);
1265 1281  }
1266 1282  
1267 1283  /*
1268 1284   * Free SECINFO result.
1269 1285   */
1270 1286  /* ARGSUSED */
1271 1287  static void
1272 1288  rfs4_op_secinfo_free(nfs_resop4 *resop)
1273 1289  {
1274 1290          SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1275 1291          int count, i;
1276 1292          secinfo4 *resok_val;
1277 1293  
1278 1294          /* If this is not an Ok result, nothing to free. */
1279 1295          if (resp->status != NFS4_OK) {
1280 1296                  return;
1281 1297          }
1282 1298  
1283 1299          count = resp->SECINFO4resok_len;
1284 1300          resok_val = resp->SECINFO4resok_val;
1285 1301  
1286 1302          for (i = 0; i < count; i++) {
1287 1303                  if (resok_val[i].flavor == RPCSEC_GSS) {
1288 1304                          rpcsec_gss_info *info;
1289 1305  
1290 1306                          info = &resok_val[i].flavor_info;
1291 1307                          kmem_free(info->oid.sec_oid4_val,
1292 1308                              info->oid.sec_oid4_len);
1293 1309                  }
1294 1310          }
1295 1311          kmem_free(resok_val, count * sizeof (secinfo4));
1296 1312          resp->SECINFO4resok_len = 0;
1297 1313          resp->SECINFO4resok_val = NULL;
1298 1314  }
1299 1315  
1300 1316  /* ARGSUSED */
1301 1317  static void
1302 1318  rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1303 1319      struct compound_state *cs)
1304 1320  {
1305 1321          ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1306 1322          ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1307 1323          int error;
1308 1324          vnode_t *vp;
1309 1325          struct vattr va;
1310 1326          int checkwriteperm;
1311 1327          cred_t *cr = cs->cr;
1312 1328          bslabel_t *clabel, *slabel;
1313 1329          ts_label_t *tslabel;
1314 1330          boolean_t admin_low_client;
1315 1331  
1316 1332          DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1317 1333              ACCESS4args *, args);
1318 1334  
1319 1335  #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1320 1336          if (cs->access == CS_ACCESS_DENIED) {
1321 1337                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1322 1338                  goto out;
1323 1339          }
1324 1340  #endif
1325 1341          if (cs->vp == NULL) {
1326 1342                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1327 1343                  goto out;
1328 1344          }
1329 1345  
1330 1346          ASSERT(cr != NULL);
1331 1347  
1332 1348          vp = cs->vp;
1333 1349  
1334 1350          /*
1335 1351           * If the file system is exported read only, it is not appropriate
1336 1352           * to check write permissions for regular files and directories.
1337 1353           * Special files are interpreted by the client, so the underlying
1338 1354           * permissions are sent back to the client for interpretation.
1339 1355           */
1340 1356          if (rdonly4(req, cs) &&
1341 1357              (vp->v_type == VREG || vp->v_type == VDIR))
1342 1358                  checkwriteperm = 0;
1343 1359          else
1344 1360                  checkwriteperm = 1;
1345 1361  
1346 1362          /*
1347 1363           * XXX
1348 1364           * We need the mode so that we can correctly determine access
1349 1365           * permissions relative to a mandatory lock file.  Access to
1350 1366           * mandatory lock files is denied on the server, so it might
1351 1367           * as well be reflected to the server during the open.
1352 1368           */
1353 1369          va.va_mask = AT_MODE;
1354 1370          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1355 1371          if (error) {
1356 1372                  *cs->statusp = resp->status = puterrno4(error);
1357 1373                  goto out;
1358 1374          }
1359 1375          resp->access = 0;
1360 1376          resp->supported = 0;
1361 1377  
1362 1378          if (is_system_labeled()) {
1363 1379                  ASSERT(req->rq_label != NULL);
1364 1380                  clabel = req->rq_label;
1365 1381                  DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1366 1382                      "got client label from request(1)",
1367 1383                      struct svc_req *, req);
1368 1384                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
1369 1385                          if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1370 1386                                  *cs->statusp = resp->status = puterrno4(EACCES);
1371 1387                                  goto out;
1372 1388                          }
1373 1389                          slabel = label2bslabel(tslabel);
1374 1390                          DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1375 1391                              char *, "got server label(1) for vp(2)",
1376 1392                              bslabel_t *, slabel, vnode_t *, vp);
1377 1393  
1378 1394                          admin_low_client = B_FALSE;
1379 1395                  } else
1380 1396                          admin_low_client = B_TRUE;
1381 1397          }
1382 1398  
1383 1399          if (args->access & ACCESS4_READ) {
1384 1400                  error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1385 1401                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1386 1402                      (!is_system_labeled() || admin_low_client ||
1387 1403                      bldominates(clabel, slabel)))
1388 1404                          resp->access |= ACCESS4_READ;
1389 1405                  resp->supported |= ACCESS4_READ;
1390 1406          }
1391 1407          if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1392 1408                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1393 1409                  if (!error && (!is_system_labeled() || admin_low_client ||
1394 1410                      bldominates(clabel, slabel)))
1395 1411                          resp->access |= ACCESS4_LOOKUP;
1396 1412                  resp->supported |= ACCESS4_LOOKUP;
1397 1413          }
1398 1414          if (checkwriteperm &&
1399 1415              (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1400 1416                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1401 1417                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1402 1418                      (!is_system_labeled() || admin_low_client ||
1403 1419                      blequal(clabel, slabel)))
1404 1420                          resp->access |=
1405 1421                              (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1406 1422                  resp->supported |=
1407 1423                      resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1408 1424          }
1409 1425  
1410 1426          if (checkwriteperm &&
1411 1427              (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1412 1428                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1413 1429                  if (!error && (!is_system_labeled() || admin_low_client ||
1414 1430                      blequal(clabel, slabel)))
1415 1431                          resp->access |= ACCESS4_DELETE;
1416 1432                  resp->supported |= ACCESS4_DELETE;
1417 1433          }
1418 1434          if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1419 1435                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1420 1436                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1421 1437                      (!is_system_labeled() || admin_low_client ||
1422 1438                      bldominates(clabel, slabel)))
1423 1439                          resp->access |= ACCESS4_EXECUTE;
1424 1440                  resp->supported |= ACCESS4_EXECUTE;
1425 1441          }
1426 1442  
1427 1443          if (is_system_labeled() && !admin_low_client)
1428 1444                  label_rele(tslabel);
1429 1445  
1430 1446          *cs->statusp = resp->status = NFS4_OK;
1431 1447  out:
1432 1448          DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1433 1449              ACCESS4res *, resp);
1434 1450  }
1435 1451  
1436 1452  /* ARGSUSED */
1437 1453  static void
1438 1454  rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1439 1455      struct compound_state *cs)
1440 1456  {
1441 1457          COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1442 1458          COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1443 1459          int error;
1444 1460          vnode_t *vp = cs->vp;
1445 1461          cred_t *cr = cs->cr;
1446 1462          vattr_t va;
1447 1463          nfs4_srv_t *nsrv4;
1448 1464  
1449 1465          DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1450 1466              COMMIT4args *, args);
1451 1467  
1452 1468          if (vp == NULL) {
1453 1469                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1454 1470                  goto out;
1455 1471          }
1456 1472          if (cs->access == CS_ACCESS_DENIED) {
1457 1473                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1458 1474                  goto out;
1459 1475          }
1460 1476  
1461 1477          if (args->offset + args->count < args->offset) {
1462 1478                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1463 1479                  goto out;
1464 1480          }
1465 1481  
1466 1482          va.va_mask = AT_UID;
1467 1483          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1468 1484  
1469 1485          /*
1470 1486           * If we can't get the attributes, then we can't do the
1471 1487           * right access checking.  So, we'll fail the request.
1472 1488           */
1473 1489          if (error) {
1474 1490                  *cs->statusp = resp->status = puterrno4(error);
1475 1491                  goto out;
1476 1492          }
1477 1493          if (rdonly4(req, cs)) {
1478 1494                  *cs->statusp = resp->status = NFS4ERR_ROFS;
1479 1495                  goto out;
1480 1496          }
1481 1497  
1482 1498          if (vp->v_type != VREG) {
1483 1499                  if (vp->v_type == VDIR)
1484 1500                          resp->status = NFS4ERR_ISDIR;
1485 1501                  else
1486 1502                          resp->status = NFS4ERR_INVAL;
1487 1503                  *cs->statusp = resp->status;
1488 1504                  goto out;
1489 1505          }
1490 1506  
1491 1507          if (crgetuid(cr) != va.va_uid &&
1492 1508              (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1493 1509                  *cs->statusp = resp->status = puterrno4(error);
1494 1510                  goto out;
1495 1511          }
1496 1512  
1497 1513          error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1498 1514  
1499 1515          if (error) {
1500 1516                  *cs->statusp = resp->status = puterrno4(error);
1501 1517                  goto out;
1502 1518          }
1503 1519  
1504 1520          nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1505 1521          *cs->statusp = resp->status = NFS4_OK;
1506 1522          resp->writeverf = nsrv4->write4verf;
1507 1523  out:
1508 1524          DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1509 1525              COMMIT4res *, resp);
1510 1526  }
1511 1527  
1512 1528  /*
1513 1529   * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1514 1530   * was completed. It does the nfsv4 create for special files.
1515 1531   */
1516 1532  /* ARGSUSED */
1517 1533  static vnode_t *
1518 1534  do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1519 1535      struct compound_state *cs, vattr_t *vap, char *nm)
1520 1536  {
1521 1537          int error;
1522 1538          cred_t *cr = cs->cr;
1523 1539          vnode_t *dvp = cs->vp;
1524 1540          vnode_t *vp = NULL;
1525 1541          int mode;
1526 1542          enum vcexcl excl;
1527 1543  
1528 1544          switch (args->type) {
1529 1545          case NF4CHR:
1530 1546          case NF4BLK:
1531 1547                  if (secpolicy_sys_devices(cr) != 0) {
1532 1548                          *cs->statusp = resp->status = NFS4ERR_PERM;
1533 1549                          return (NULL);
1534 1550                  }
1535 1551                  if (args->type == NF4CHR)
1536 1552                          vap->va_type = VCHR;
1537 1553                  else
1538 1554                          vap->va_type = VBLK;
1539 1555                  vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1540 1556                      args->ftype4_u.devdata.specdata2);
1541 1557                  vap->va_mask |= AT_RDEV;
1542 1558                  break;
1543 1559          case NF4SOCK:
1544 1560                  vap->va_type = VSOCK;
1545 1561                  break;
1546 1562          case NF4FIFO:
1547 1563                  vap->va_type = VFIFO;
1548 1564                  break;
1549 1565          default:
1550 1566                  *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1551 1567                  return (NULL);
1552 1568          }
1553 1569  
1554 1570          /*
1555 1571           * Must specify the mode.
1556 1572           */
1557 1573          if (!(vap->va_mask & AT_MODE)) {
1558 1574                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1559 1575                  return (NULL);
1560 1576          }
1561 1577  
1562 1578          excl = EXCL;
1563 1579  
1564 1580          mode = 0;
1565 1581  
1566 1582          error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1567 1583          if (error) {
1568 1584                  *cs->statusp = resp->status = puterrno4(error);
1569 1585                  return (NULL);
1570 1586          }
1571 1587          return (vp);
1572 1588  }
1573 1589  
1574 1590  /*
1575 1591   * nfsv4 create is used to create non-regular files. For regular files,
1576 1592   * use nfsv4 open.
1577 1593   */
1578 1594  /* ARGSUSED */
1579 1595  static void
1580 1596  rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1581 1597      struct compound_state *cs)
1582 1598  {
1583 1599          CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1584 1600          CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1585 1601          int error;
1586 1602          struct vattr bva, iva, iva2, ava, *vap;
1587 1603          cred_t *cr = cs->cr;
1588 1604          vnode_t *dvp = cs->vp;
1589 1605          vnode_t *vp = NULL;
1590 1606          vnode_t *realvp;
1591 1607          char *nm, *lnm;
1592 1608          uint_t len, llen;
1593 1609          int syncval = 0;
1594 1610          struct nfs4_svgetit_arg sarg;
1595 1611          struct nfs4_ntov_table ntov;
1596 1612          struct statvfs64 sb;
1597 1613          nfsstat4 status;
1598 1614          struct sockaddr *ca;
1599 1615          char *name = NULL;
1600 1616          char *lname = NULL;
1601 1617  
1602 1618          DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1603 1619              CREATE4args *, args);
1604 1620  
1605 1621          resp->attrset = 0;
1606 1622  
1607 1623          if (dvp == NULL) {
1608 1624                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1609 1625                  goto out;
1610 1626          }
1611 1627  
1612 1628          /*
1613 1629           * If there is an unshared filesystem mounted on this vnode,
1614 1630           * do not allow to create an object in this directory.
1615 1631           */
1616 1632          if (vn_ismntpt(dvp)) {
1617 1633                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1618 1634                  goto out;
1619 1635          }
1620 1636  
1621 1637          /* Verify that type is correct */
1622 1638          switch (args->type) {
1623 1639          case NF4LNK:
1624 1640          case NF4BLK:
1625 1641          case NF4CHR:
1626 1642          case NF4SOCK:
1627 1643          case NF4FIFO:
1628 1644          case NF4DIR:
1629 1645                  break;
1630 1646          default:
1631 1647                  *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1632 1648                  goto out;
1633 1649          };
1634 1650  
1635 1651          if (cs->access == CS_ACCESS_DENIED) {
1636 1652                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1637 1653                  goto out;
1638 1654          }
1639 1655          if (dvp->v_type != VDIR) {
1640 1656                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1641 1657                  goto out;
1642 1658          }
1643 1659          status = utf8_dir_verify(&args->objname);
1644 1660          if (status != NFS4_OK) {
1645 1661                  *cs->statusp = resp->status = status;
1646 1662                  goto out;
1647 1663          }
1648 1664  
1649 1665          if (rdonly4(req, cs)) {
1650 1666                  *cs->statusp = resp->status = NFS4ERR_ROFS;
1651 1667                  goto out;
1652 1668          }
1653 1669  
1654 1670          /*
1655 1671           * Name of newly created object
1656 1672           */
1657 1673          nm = utf8_to_fn(&args->objname, &len, NULL);
1658 1674          if (nm == NULL) {
1659 1675                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1660 1676                  goto out;
1661 1677          }
1662 1678  
1663 1679          if (len > MAXNAMELEN) {
1664 1680                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1665 1681                  kmem_free(nm, len);
1666 1682                  goto out;
1667 1683          }
1668 1684  
1669 1685          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1670 1686          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1671 1687              MAXPATHLEN  + 1);
1672 1688  
1673 1689          if (name == NULL) {
1674 1690                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1675 1691                  kmem_free(nm, len);
1676 1692                  goto out;
1677 1693          }
1678 1694  
1679 1695          resp->attrset = 0;
1680 1696  
1681 1697          sarg.sbp = &sb;
1682 1698          sarg.is_referral = B_FALSE;
1683 1699          nfs4_ntov_table_init(&ntov);
1684 1700  
1685 1701          status = do_rfs4_set_attrs(&resp->attrset,
1686 1702              &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1687 1703  
1688 1704          if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1689 1705                  status = NFS4ERR_INVAL;
1690 1706  
1691 1707          if (status != NFS4_OK) {
1692 1708                  *cs->statusp = resp->status = status;
1693 1709                  if (name != nm)
1694 1710                          kmem_free(name, MAXPATHLEN + 1);
1695 1711                  kmem_free(nm, len);
1696 1712                  nfs4_ntov_table_free(&ntov, &sarg);
1697 1713                  resp->attrset = 0;
1698 1714                  goto out;
1699 1715          }
1700 1716  
1701 1717          /* Get "before" change value */
1702 1718          bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1703 1719          error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1704 1720          if (error) {
1705 1721                  *cs->statusp = resp->status = puterrno4(error);
1706 1722                  if (name != nm)
1707 1723                          kmem_free(name, MAXPATHLEN + 1);
1708 1724                  kmem_free(nm, len);
1709 1725                  nfs4_ntov_table_free(&ntov, &sarg);
1710 1726                  resp->attrset = 0;
1711 1727                  goto out;
1712 1728          }
1713 1729          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1714 1730  
1715 1731          vap = sarg.vap;
1716 1732  
1717 1733          /*
1718 1734           * Set the default initial values for attributes when the parent
1719 1735           * directory does not have the VSUID/VSGID bit set and they have
1720 1736           * not been specified in createattrs.
1721 1737           */
1722 1738          if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1723 1739                  vap->va_uid = crgetuid(cr);
1724 1740                  vap->va_mask |= AT_UID;
1725 1741          }
1726 1742          if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1727 1743                  vap->va_gid = crgetgid(cr);
1728 1744                  vap->va_mask |= AT_GID;
1729 1745          }
1730 1746  
1731 1747          vap->va_mask |= AT_TYPE;
1732 1748          switch (args->type) {
1733 1749          case NF4DIR:
1734 1750                  vap->va_type = VDIR;
1735 1751                  if ((vap->va_mask & AT_MODE) == 0) {
1736 1752                          vap->va_mode = 0700;    /* default: owner rwx only */
1737 1753                          vap->va_mask |= AT_MODE;
1738 1754                  }
1739 1755                  error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1740 1756                  if (error)
1741 1757                          break;
1742 1758  
1743 1759                  /*
1744 1760                   * Get the initial "after" sequence number, if it fails,
1745 1761                   * set to zero
1746 1762                   */
1747 1763                  iva.va_mask = AT_SEQ;
1748 1764                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1749 1765                          iva.va_seq = 0;
1750 1766                  break;
1751 1767          case NF4LNK:
1752 1768                  vap->va_type = VLNK;
1753 1769                  if ((vap->va_mask & AT_MODE) == 0) {
1754 1770                          vap->va_mode = 0700;    /* default: owner rwx only */
1755 1771                          vap->va_mask |= AT_MODE;
1756 1772                  }
1757 1773  
1758 1774                  /*
1759 1775                   * symlink names must be treated as data
1760 1776                   */
1761 1777                  lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1762 1778                      &llen, NULL);
1763 1779  
1764 1780                  if (lnm == NULL) {
1765 1781                          *cs->statusp = resp->status = NFS4ERR_INVAL;
1766 1782                          if (name != nm)
1767 1783                                  kmem_free(name, MAXPATHLEN + 1);
1768 1784                          kmem_free(nm, len);
1769 1785                          nfs4_ntov_table_free(&ntov, &sarg);
1770 1786                          resp->attrset = 0;
1771 1787                          goto out;
1772 1788                  }
1773 1789  
1774 1790                  if (llen > MAXPATHLEN) {
1775 1791                          *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1776 1792                          if (name != nm)
1777 1793                                  kmem_free(name, MAXPATHLEN + 1);
1778 1794                          kmem_free(nm, len);
1779 1795                          kmem_free(lnm, llen);
1780 1796                          nfs4_ntov_table_free(&ntov, &sarg);
1781 1797                          resp->attrset = 0;
1782 1798                          goto out;
1783 1799                  }
1784 1800  
1785 1801                  lname = nfscmd_convname(ca, cs->exi, lnm,
1786 1802                      NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1787 1803  
1788 1804                  if (lname == NULL) {
1789 1805                          *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1790 1806                          if (name != nm)
1791 1807                                  kmem_free(name, MAXPATHLEN + 1);
1792 1808                          kmem_free(nm, len);
1793 1809                          kmem_free(lnm, llen);
1794 1810                          nfs4_ntov_table_free(&ntov, &sarg);
1795 1811                          resp->attrset = 0;
1796 1812                          goto out;
1797 1813                  }
1798 1814  
1799 1815                  error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1800 1816                  if (lname != lnm)
1801 1817                          kmem_free(lname, MAXPATHLEN + 1);
1802 1818                  kmem_free(lnm, llen);
1803 1819                  if (error)
1804 1820                          break;
1805 1821  
1806 1822                  /*
1807 1823                   * Get the initial "after" sequence number, if it fails,
1808 1824                   * set to zero
1809 1825                   */
1810 1826                  iva.va_mask = AT_SEQ;
1811 1827                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1812 1828                          iva.va_seq = 0;
1813 1829  
1814 1830                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1815 1831                      NULL, NULL, NULL);
1816 1832                  if (error)
1817 1833                          break;
1818 1834  
1819 1835                  /*
1820 1836                   * va_seq is not safe over VOP calls, check it again
1821 1837                   * if it has changed zero out iva to force atomic = FALSE.
1822 1838                   */
1823 1839                  iva2.va_mask = AT_SEQ;
1824 1840                  if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1825 1841                      iva2.va_seq != iva.va_seq)
1826 1842                          iva.va_seq = 0;
1827 1843                  break;
1828 1844          default:
1829 1845                  /*
1830 1846                   * probably a special file.
1831 1847                   */
1832 1848                  if ((vap->va_mask & AT_MODE) == 0) {
1833 1849                          vap->va_mode = 0600;    /* default: owner rw only */
1834 1850                          vap->va_mask |= AT_MODE;
1835 1851                  }
1836 1852                  syncval = FNODSYNC;
1837 1853                  /*
1838 1854                   * We know this will only generate one VOP call
1839 1855                   */
1840 1856                  vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1841 1857  
1842 1858                  if (vp == NULL) {
1843 1859                          if (name != nm)
1844 1860                                  kmem_free(name, MAXPATHLEN + 1);
1845 1861                          kmem_free(nm, len);
1846 1862                          nfs4_ntov_table_free(&ntov, &sarg);
1847 1863                          resp->attrset = 0;
1848 1864                          goto out;
1849 1865                  }
1850 1866  
1851 1867                  /*
1852 1868                   * Get the initial "after" sequence number, if it fails,
1853 1869                   * set to zero
1854 1870                   */
1855 1871                  iva.va_mask = AT_SEQ;
1856 1872                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1857 1873                          iva.va_seq = 0;
1858 1874  
1859 1875                  break;
1860 1876          }
1861 1877          if (name != nm)
1862 1878                  kmem_free(name, MAXPATHLEN + 1);
1863 1879          kmem_free(nm, len);
1864 1880  
1865 1881          if (error) {
1866 1882                  *cs->statusp = resp->status = puterrno4(error);
1867 1883          }
1868 1884  
1869 1885          /*
1870 1886           * Force modified data and metadata out to stable storage.
1871 1887           */
1872 1888          (void) VOP_FSYNC(dvp, 0, cr, NULL);
1873 1889  
1874 1890          if (resp->status != NFS4_OK) {
1875 1891                  if (vp != NULL)
1876 1892                          VN_RELE(vp);
1877 1893                  nfs4_ntov_table_free(&ntov, &sarg);
1878 1894                  resp->attrset = 0;
1879 1895                  goto out;
1880 1896          }
1881 1897  
1882 1898          /*
1883 1899           * Finish setup of cinfo response, "before" value already set.
1884 1900           * Get "after" change value, if it fails, simply return the
1885 1901           * before value.
1886 1902           */
1887 1903          ava.va_mask = AT_CTIME|AT_SEQ;
1888 1904          if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1889 1905                  ava.va_ctime = bva.va_ctime;
1890 1906                  ava.va_seq = 0;
1891 1907          }
1892 1908          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1893 1909  
1894 1910          /*
1895 1911           * True verification that object was created with correct
1896 1912           * attrs is impossible.  The attrs could have been changed
1897 1913           * immediately after object creation.  If attributes did
1898 1914           * not verify, the only recourse for the server is to
1899 1915           * destroy the object.  Maybe if some attrs (like gid)
1900 1916           * are set incorrectly, the object should be destroyed;
1901 1917           * however, seems bad as a default policy.  Do we really
1902 1918           * want to destroy an object over one of the times not
1903 1919           * verifying correctly?  For these reasons, the server
1904 1920           * currently sets bits in attrset for createattrs
1905 1921           * that were set; however, no verification is done.
1906 1922           *
1907 1923           * vmask_to_nmask accounts for vattr bits set on create
1908 1924           *      [do_rfs4_set_attrs() only sets resp bits for
1909 1925           *       non-vattr/vfs bits.]
1910 1926           * Mask off any bits set by default so as not to return
1911 1927           * more attrset bits than were requested in createattrs
1912 1928           */
1913 1929          nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1914 1930          resp->attrset &= args->createattrs.attrmask;
1915 1931          nfs4_ntov_table_free(&ntov, &sarg);
1916 1932  
1917 1933          error = makefh4(&cs->fh, vp, cs->exi);
1918 1934          if (error) {
1919 1935                  *cs->statusp = resp->status = puterrno4(error);
1920 1936          }
1921 1937  
1922 1938          /*
1923 1939           * The cinfo.atomic = TRUE only if we got no errors, we have
1924 1940           * non-zero va_seq's, and it has incremented by exactly one
1925 1941           * during the creation and it didn't change during the VOP_LOOKUP
1926 1942           * or VOP_FSYNC.
1927 1943           */
1928 1944          if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1929 1945              iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1930 1946                  resp->cinfo.atomic = TRUE;
1931 1947          else
1932 1948                  resp->cinfo.atomic = FALSE;
1933 1949  
1934 1950          /*
1935 1951           * Force modified metadata out to stable storage.
1936 1952           *
1937 1953           * if a underlying vp exists, pass it to VOP_FSYNC
1938 1954           */
1939 1955          if (VOP_REALVP(vp, &realvp, NULL) == 0)
1940 1956                  (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1941 1957          else
1942 1958                  (void) VOP_FSYNC(vp, syncval, cr, NULL);
1943 1959  
1944 1960          if (resp->status != NFS4_OK) {
1945 1961                  VN_RELE(vp);
1946 1962                  goto out;
1947 1963          }
1948 1964          if (cs->vp)
1949 1965                  VN_RELE(cs->vp);
1950 1966  
1951 1967          cs->vp = vp;
1952 1968          *cs->statusp = resp->status = NFS4_OK;
1953 1969  out:
1954 1970          DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1955 1971              CREATE4res *, resp);
1956 1972  }
1957 1973  
1958 1974  /*ARGSUSED*/
1959 1975  static void
1960 1976  rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1961 1977      struct compound_state *cs)
1962 1978  {
1963 1979          DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1964 1980              DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1965 1981  
1966 1982          rfs4_op_inval(argop, resop, req, cs);
1967 1983  
1968 1984          DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1969 1985              DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1970 1986  }
1971 1987  
1972 1988  /*ARGSUSED*/
1973 1989  static void
1974 1990  rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1975 1991      struct compound_state *cs)
1976 1992  {
1977 1993          DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1978 1994          DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1979 1995          rfs4_deleg_state_t *dsp;
1980 1996          nfsstat4 status;
1981 1997  
1982 1998          DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1983 1999              DELEGRETURN4args *, args);
1984 2000  
1985 2001          status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1986 2002          resp->status = *cs->statusp = status;
1987 2003          if (status != NFS4_OK)
1988 2004                  goto out;
1989 2005  
1990 2006          /* Ensure specified filehandle matches */
1991 2007          if (cs->vp != dsp->rds_finfo->rf_vp) {
1992 2008                  resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1993 2009          } else
1994 2010                  rfs4_return_deleg(dsp, FALSE);
1995 2011  
1996 2012          rfs4_update_lease(dsp->rds_client);
1997 2013  
1998 2014          rfs4_deleg_state_rele(dsp);
1999 2015  out:
2000 2016          DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2001 2017              DELEGRETURN4res *, resp);
2002 2018  }
2003 2019  
2004 2020  /*
2005 2021   * Check to see if a given "flavor" is an explicitly shared flavor.
2006 2022   * The assumption of this routine is the "flavor" is already a valid
2007 2023   * flavor in the secinfo list of "exi".
2008 2024   *
2009 2025   *      e.g.
2010 2026   *              # share -o sec=flavor1 /export
2011 2027   *              # share -o sec=flavor2 /export/home
2012 2028   *
2013 2029   *              flavor2 is not an explicitly shared flavor for /export,
2014 2030   *              however it is in the secinfo list for /export thru the
2015 2031   *              server namespace setup.
2016 2032   */
2017 2033  int
2018 2034  is_exported_sec(int flavor, struct exportinfo *exi)
2019 2035  {
2020 2036          int     i;
2021 2037          struct secinfo *sp;
2022 2038  
2023 2039          sp = exi->exi_export.ex_secinfo;
2024 2040          for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2025 2041                  if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2026 2042                      sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2027 2043                          return (SEC_REF_EXPORTED(&sp[i]));
2028 2044                  }
2029 2045          }
2030 2046  
2031 2047          /* Should not reach this point based on the assumption */
2032 2048          return (0);
2033 2049  }
2034 2050  
2035 2051  /*
2036 2052   * Check if the security flavor used in the request matches what is
2037 2053   * required at the export point or at the root pseudo node (exi_root).
2038 2054   *
2039 2055   * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2040 2056   *
2041 2057   */
2042 2058  static int
2043 2059  secinfo_match_or_authnone(struct compound_state *cs)
2044 2060  {
2045 2061          int     i;
2046 2062          struct secinfo *sp;
2047 2063  
2048 2064          /*
2049 2065           * Check cs->nfsflavor (from the request) against
2050 2066           * the current export data in cs->exi.
2051 2067           */
2052 2068          sp = cs->exi->exi_export.ex_secinfo;
2053 2069          for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2054 2070                  if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2055 2071                      sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2056 2072                          return (1);
2057 2073          }
2058 2074  
2059 2075          return (0);
2060 2076  }
2061 2077  
2062 2078  /*
2063 2079   * Check the access authority for the client and return the correct error.
2064 2080   */
2065 2081  nfsstat4
2066 2082  call_checkauth4(struct compound_state *cs, struct svc_req *req)
2067 2083  {
2068 2084          int     authres;
2069 2085  
2070 2086          /*
2071 2087           * First, check if the security flavor used in the request
2072 2088           * are among the flavors set in the server namespace.
2073 2089           */
2074 2090          if (!secinfo_match_or_authnone(cs)) {
2075 2091                  *cs->statusp = NFS4ERR_WRONGSEC;
2076 2092                  return (*cs->statusp);
2077 2093          }
2078 2094  
2079 2095          authres = checkauth4(cs, req);
2080 2096  
2081 2097          if (authres > 0) {
2082 2098                  *cs->statusp = NFS4_OK;
2083 2099                  if (! (cs->access & CS_ACCESS_LIMITED))
2084 2100                          cs->access = CS_ACCESS_OK;
2085 2101          } else if (authres == 0) {
2086 2102                  *cs->statusp = NFS4ERR_ACCESS;
2087 2103          } else if (authres == -2) {
2088 2104                  *cs->statusp = NFS4ERR_WRONGSEC;
2089 2105          } else {
2090 2106                  *cs->statusp = NFS4ERR_DELAY;
2091 2107          }
2092 2108          return (*cs->statusp);
2093 2109  }
2094 2110  
2095 2111  /*
2096 2112   * bitmap4_to_attrmask is called by getattr and readdir.
2097 2113   * It sets up the vattr mask and determines whether vfsstat call is needed
2098 2114   * based on the input bitmap.
2099 2115   * Returns nfsv4 status.
2100 2116   */
2101 2117  static nfsstat4
2102 2118  bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2103 2119  {
2104 2120          int i;
2105 2121          uint_t  va_mask;
2106 2122          struct statvfs64 *sbp = sargp->sbp;
2107 2123  
2108 2124          sargp->sbp = NULL;
2109 2125          sargp->flag = 0;
2110 2126          sargp->rdattr_error = NFS4_OK;
2111 2127          sargp->mntdfid_set = FALSE;
2112 2128          if (sargp->cs->vp)
2113 2129                  sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2114 2130                      FH4_ATTRDIR | FH4_NAMEDATTR);
2115 2131          else
2116 2132                  sargp->xattr = 0;
2117 2133  
2118 2134          /*
2119 2135           * Set rdattr_error_req to true if return error per
2120 2136           * failed entry rather than fail the readdir.
2121 2137           */
2122 2138          if (breq & FATTR4_RDATTR_ERROR_MASK)
2123 2139                  sargp->rdattr_error_req = 1;
2124 2140          else
2125 2141                  sargp->rdattr_error_req = 0;
2126 2142  
2127 2143          /*
2128 2144           * generate the va_mask
2129 2145           * Handle the easy cases first
2130 2146           */
2131 2147          switch (breq) {
2132 2148          case NFS4_NTOV_ATTR_MASK:
2133 2149                  sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2134 2150                  return (NFS4_OK);
2135 2151  
2136 2152          case NFS4_FS_ATTR_MASK:
2137 2153                  sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2138 2154                  sargp->sbp = sbp;
2139 2155                  return (NFS4_OK);
2140 2156  
2141 2157          case NFS4_NTOV_ATTR_CACHE_MASK:
2142 2158                  sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2143 2159                  return (NFS4_OK);
2144 2160  
2145 2161          case FATTR4_LEASE_TIME_MASK:
2146 2162                  sargp->vap->va_mask = 0;
2147 2163                  return (NFS4_OK);
2148 2164  
2149 2165          default:
2150 2166                  va_mask = 0;
2151 2167                  for (i = 0; i < nfs4_ntov_map_size; i++) {
2152 2168                          if ((breq & nfs4_ntov_map[i].fbit) &&
2153 2169                              nfs4_ntov_map[i].vbit)
2154 2170                                  va_mask |= nfs4_ntov_map[i].vbit;
2155 2171                  }
2156 2172  
2157 2173                  /*
2158 2174                   * Check is vfsstat is needed
2159 2175                   */
2160 2176                  if (breq & NFS4_FS_ATTR_MASK)
2161 2177                          sargp->sbp = sbp;
2162 2178  
2163 2179                  sargp->vap->va_mask = va_mask;
2164 2180                  return (NFS4_OK);
2165 2181          }
2166 2182          /* NOTREACHED */
2167 2183  }
2168 2184  
2169 2185  /*
2170 2186   * bitmap4_get_sysattrs is called by getattr and readdir.
2171 2187   * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2172 2188   * Returns nfsv4 status.
2173 2189   */
2174 2190  static nfsstat4
2175 2191  bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2176 2192  {
2177 2193          int error;
2178 2194          struct compound_state *cs = sargp->cs;
2179 2195          vnode_t *vp = cs->vp;
2180 2196  
2181 2197          if (sargp->sbp != NULL) {
2182 2198                  if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2183 2199                          sargp->sbp = NULL;      /* to identify error */
2184 2200                          return (puterrno4(error));
2185 2201                  }
2186 2202          }
2187 2203  
2188 2204          return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2189 2205  }
2190 2206  
2191 2207  static void
2192 2208  nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2193 2209  {
2194 2210          ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2195 2211              KM_SLEEP);
2196 2212          ntovp->attrcnt = 0;
2197 2213          ntovp->vfsstat = FALSE;
2198 2214  }
2199 2215  
2200 2216  static void
2201 2217  nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2202 2218      struct nfs4_svgetit_arg *sargp)
2203 2219  {
2204 2220          int i;
2205 2221          union nfs4_attr_u *na;
2206 2222          uint8_t *amap;
2207 2223  
2208 2224          /*
2209 2225           * XXX Should do the same checks for whether the bit is set
2210 2226           */
2211 2227          for (i = 0, na = ntovp->na, amap = ntovp->amap;
2212 2228              i < ntovp->attrcnt; i++, na++, amap++) {
2213 2229                  (void) (*nfs4_ntov_map[*amap].sv_getit)(
2214 2230                      NFS4ATTR_FREEIT, sargp, na);
2215 2231          }
2216 2232          if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2217 2233                  /*
2218 2234                   * xdr_free for getattr will be done later
2219 2235                   */
2220 2236                  for (i = 0, na = ntovp->na, amap = ntovp->amap;
2221 2237                      i < ntovp->attrcnt; i++, na++, amap++) {
2222 2238                          xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2223 2239                  }
2224 2240          }
2225 2241          kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2226 2242  }
2227 2243  
2228 2244  /*
2229 2245   * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2230 2246   */
2231 2247  static nfsstat4
2232 2248  do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2233 2249      struct nfs4_svgetit_arg *sargp)
2234 2250  {
2235 2251          int error = 0;
2236 2252          int i, k;
2237 2253          struct nfs4_ntov_table ntov;
2238 2254          XDR xdr;
2239 2255          ulong_t xdr_size;
2240 2256          char *xdr_attrs;
2241 2257          nfsstat4 status = NFS4_OK;
2242 2258          nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2243 2259          union nfs4_attr_u *na;
2244 2260          uint8_t *amap;
2245 2261  
2246 2262          sargp->op = NFS4ATTR_GETIT;
2247 2263          sargp->flag = 0;
2248 2264  
2249 2265          fattrp->attrmask = 0;
2250 2266          /* if no bits requested, then return empty fattr4 */
2251 2267          if (breq == 0) {
2252 2268                  fattrp->attrlist4_len = 0;
2253 2269                  fattrp->attrlist4 = NULL;
2254 2270                  return (NFS4_OK);
2255 2271          }
2256 2272  
2257 2273          /*
2258 2274           * return NFS4ERR_INVAL when client requests write-only attrs
2259 2275           */
2260 2276          if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2261 2277                  return (NFS4ERR_INVAL);
2262 2278  
2263 2279          nfs4_ntov_table_init(&ntov);
2264 2280          na = ntov.na;
2265 2281          amap = ntov.amap;
2266 2282  
2267 2283          /*
2268 2284           * Now loop to get or verify the attrs
2269 2285           */
2270 2286          for (i = 0; i < nfs4_ntov_map_size; i++) {
2271 2287                  if (breq & nfs4_ntov_map[i].fbit) {
2272 2288                          if ((*nfs4_ntov_map[i].sv_getit)(
2273 2289                              NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2274 2290  
2275 2291                                  error = (*nfs4_ntov_map[i].sv_getit)(
2276 2292                                      NFS4ATTR_GETIT, sargp, na);
2277 2293  
2278 2294                                  /*
2279 2295                                   * Possible error values:
2280 2296                                   * >0 if sv_getit failed to
2281 2297                                   * get the attr; 0 if succeeded;
2282 2298                                   * <0 if rdattr_error and the
2283 2299                                   * attribute cannot be returned.
2284 2300                                   */
2285 2301                                  if (error && !(sargp->rdattr_error_req))
2286 2302                                          goto done;
2287 2303                                  /*
2288 2304                                   * If error then just for entry
2289 2305                                   */
2290 2306                                  if (error == 0) {
2291 2307                                          fattrp->attrmask |=
2292 2308                                              nfs4_ntov_map[i].fbit;
2293 2309                                          *amap++ =
2294 2310                                              (uint8_t)nfs4_ntov_map[i].nval;
2295 2311                                          na++;
2296 2312                                          (ntov.attrcnt)++;
2297 2313                                  } else if ((error > 0) &&
2298 2314                                      (sargp->rdattr_error == NFS4_OK)) {
2299 2315                                          sargp->rdattr_error = puterrno4(error);
2300 2316                                  }
2301 2317                                  error = 0;
2302 2318                          }
2303 2319                  }
2304 2320          }
2305 2321  
2306 2322          /*
2307 2323           * If rdattr_error was set after the return value for it was assigned,
2308 2324           * update it.
2309 2325           */
2310 2326          if (prev_rdattr_error != sargp->rdattr_error) {
2311 2327                  na = ntov.na;
2312 2328                  amap = ntov.amap;
2313 2329                  for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2314 2330                          k = *amap;
2315 2331                          if (k < FATTR4_RDATTR_ERROR) {
2316 2332                                  continue;
2317 2333                          }
2318 2334                          if ((k == FATTR4_RDATTR_ERROR) &&
2319 2335                              ((*nfs4_ntov_map[k].sv_getit)(
2320 2336                              NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2321 2337  
2322 2338                                  (void) (*nfs4_ntov_map[k].sv_getit)(
2323 2339                                      NFS4ATTR_GETIT, sargp, na);
2324 2340                          }
2325 2341                          break;
2326 2342                  }
2327 2343          }
2328 2344  
2329 2345          xdr_size = 0;
2330 2346          na = ntov.na;
2331 2347          amap = ntov.amap;
2332 2348          for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2333 2349                  xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2334 2350          }
2335 2351  
2336 2352          fattrp->attrlist4_len = xdr_size;
2337 2353          if (xdr_size) {
2338 2354                  /* freed by rfs4_op_getattr_free() */
2339 2355                  fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2340 2356  
2341 2357                  xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2342 2358  
2343 2359                  na = ntov.na;
2344 2360                  amap = ntov.amap;
2345 2361                  for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2346 2362                          if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2347 2363                                  DTRACE_PROBE1(nfss__e__getattr4_encfail,
2348 2364                                      int, *amap);
2349 2365                                  status = NFS4ERR_SERVERFAULT;
2350 2366                                  break;
2351 2367                          }
2352 2368                  }
2353 2369                  /* xdrmem_destroy(&xdrs); */    /* NO-OP */
2354 2370          } else {
2355 2371                  fattrp->attrlist4 = NULL;
2356 2372          }
2357 2373  done:
2358 2374  
2359 2375          nfs4_ntov_table_free(&ntov, sargp);
2360 2376  
2361 2377          if (error != 0)
2362 2378                  status = puterrno4(error);
2363 2379  
2364 2380          return (status);
2365 2381  }
2366 2382  
2367 2383  /* ARGSUSED */
2368 2384  static void
2369 2385  rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2370 2386      struct compound_state *cs)
2371 2387  {
2372 2388          GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2373 2389          GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2374 2390          struct nfs4_svgetit_arg sarg;
2375 2391          struct statvfs64 sb;
2376 2392          nfsstat4 status;
2377 2393  
2378 2394          DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2379 2395              GETATTR4args *, args);
2380 2396  
2381 2397          if (cs->vp == NULL) {
2382 2398                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2383 2399                  goto out;
2384 2400          }
2385 2401  
2386 2402          if (cs->access == CS_ACCESS_DENIED) {
2387 2403                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2388 2404                  goto out;
2389 2405          }
2390 2406  
2391 2407          sarg.sbp = &sb;
2392 2408          sarg.cs = cs;
2393 2409          sarg.is_referral = B_FALSE;
2394 2410  
2395 2411          status = bitmap4_to_attrmask(args->attr_request, &sarg);
2396 2412          if (status == NFS4_OK) {
2397 2413  
2398 2414                  status = bitmap4_get_sysattrs(&sarg);
2399 2415                  if (status == NFS4_OK) {
2400 2416  
2401 2417                          /* Is this a referral? */
2402 2418                          if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2403 2419                                  /* Older V4 Solaris client sees a link */
2404 2420                                  if (client_is_downrev(req))
2405 2421                                          sarg.vap->va_type = VLNK;
2406 2422                                  else
2407 2423                                          sarg.is_referral = B_TRUE;
2408 2424                          }
2409 2425  
2410 2426                          status = do_rfs4_op_getattr(args->attr_request,
2411 2427                              &resp->obj_attributes, &sarg);
2412 2428                  }
2413 2429          }
2414 2430          *cs->statusp = resp->status = status;
2415 2431  out:
2416 2432          DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2417 2433              GETATTR4res *, resp);
2418 2434  }
2419 2435  
2420 2436  static void
2421 2437  rfs4_op_getattr_free(nfs_resop4 *resop)
2422 2438  {
2423 2439          GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2424 2440  
2425 2441          nfs4_fattr4_free(&resp->obj_attributes);
2426 2442  }
2427 2443  
2428 2444  /* ARGSUSED */
2429 2445  static void
2430 2446  rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2431 2447      struct compound_state *cs)
2432 2448  {
2433 2449          GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2434 2450  
2435 2451          DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2436 2452  
2437 2453          if (cs->vp == NULL) {
2438 2454                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2439 2455                  goto out;
2440 2456          }
2441 2457          if (cs->access == CS_ACCESS_DENIED) {
2442 2458                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2443 2459                  goto out;
2444 2460          }
2445 2461  
2446 2462          /* check for reparse point at the share point */
2447 2463          if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2448 2464                  /* it's all bad */
2449 2465                  cs->exi->exi_moved = 1;
2450 2466                  *cs->statusp = resp->status = NFS4ERR_MOVED;
2451 2467                  DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2452 2468                      vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2453 2469                  return;
2454 2470          }
2455 2471  
2456 2472          /* check for reparse point at vp */
2457 2473          if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2458 2474                  /* it's not all bad */
2459 2475                  *cs->statusp = resp->status = NFS4ERR_MOVED;
2460 2476                  DTRACE_PROBE2(nfs4serv__func__referral__moved,
2461 2477                      vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2462 2478                  return;
2463 2479          }
2464 2480  
2465 2481          resp->object.nfs_fh4_val =
2466 2482              kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2467 2483          nfs_fh4_copy(&cs->fh, &resp->object);
2468 2484          *cs->statusp = resp->status = NFS4_OK;
2469 2485  out:
2470 2486          DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2471 2487              GETFH4res *, resp);
2472 2488  }
2473 2489  
2474 2490  static void
2475 2491  rfs4_op_getfh_free(nfs_resop4 *resop)
2476 2492  {
2477 2493          GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2478 2494  
2479 2495          if (resp->status == NFS4_OK &&
2480 2496              resp->object.nfs_fh4_val != NULL) {
2481 2497                  kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2482 2498                  resp->object.nfs_fh4_val = NULL;
2483 2499                  resp->object.nfs_fh4_len = 0;
2484 2500          }
2485 2501  }
2486 2502  
2487 2503  /*
2488 2504   * illegal: args: void
2489 2505   *          res : status (NFS4ERR_OP_ILLEGAL)
2490 2506   */
2491 2507  /* ARGSUSED */
2492 2508  static void
2493 2509  rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2494 2510      struct svc_req *req, struct compound_state *cs)
2495 2511  {
2496 2512          ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2497 2513  
2498 2514          resop->resop = OP_ILLEGAL;
2499 2515          *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2500 2516  }
2501 2517  
2502 2518  /*
2503 2519   * link: args: SAVED_FH: file, CURRENT_FH: target directory
2504 2520   *       res: status. If success - CURRENT_FH unchanged, return change_info
2505 2521   */
2506 2522  /* ARGSUSED */
2507 2523  static void
2508 2524  rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2509 2525      struct compound_state *cs)
2510 2526  {
2511 2527          LINK4args *args = &argop->nfs_argop4_u.oplink;
2512 2528          LINK4res *resp = &resop->nfs_resop4_u.oplink;
2513 2529          int error;
2514 2530          vnode_t *vp;
2515 2531          vnode_t *dvp;
2516 2532          struct vattr bdva, idva, adva;
2517 2533          char *nm;
2518 2534          uint_t  len;
2519 2535          struct sockaddr *ca;
2520 2536          char *name = NULL;
2521 2537          nfsstat4 status;
2522 2538  
2523 2539          DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2524 2540              LINK4args *, args);
2525 2541  
2526 2542          /* SAVED_FH: source object */
2527 2543          vp = cs->saved_vp;
2528 2544          if (vp == NULL) {
2529 2545                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2530 2546                  goto out;
2531 2547          }
2532 2548  
2533 2549          /* CURRENT_FH: target directory */
2534 2550          dvp = cs->vp;
2535 2551          if (dvp == NULL) {
2536 2552                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2537 2553                  goto out;
2538 2554          }
2539 2555  
2540 2556          /*
2541 2557           * If there is a non-shared filesystem mounted on this vnode,
2542 2558           * do not allow to link any file in this directory.
2543 2559           */
2544 2560          if (vn_ismntpt(dvp)) {
2545 2561                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2546 2562                  goto out;
2547 2563          }
2548 2564  
2549 2565          if (cs->access == CS_ACCESS_DENIED) {
2550 2566                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2551 2567                  goto out;
2552 2568          }
2553 2569  
2554 2570          /* Check source object's type validity */
2555 2571          if (vp->v_type == VDIR) {
2556 2572                  *cs->statusp = resp->status = NFS4ERR_ISDIR;
2557 2573                  goto out;
2558 2574          }
2559 2575  
2560 2576          /* Check target directory's type */
2561 2577          if (dvp->v_type != VDIR) {
2562 2578                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2563 2579                  goto out;
2564 2580          }
2565 2581  
2566 2582          if (cs->saved_exi != cs->exi) {
2567 2583                  *cs->statusp = resp->status = NFS4ERR_XDEV;
2568 2584                  goto out;
2569 2585          }
2570 2586  
2571 2587          status = utf8_dir_verify(&args->newname);
2572 2588          if (status != NFS4_OK) {
2573 2589                  *cs->statusp = resp->status = status;
2574 2590                  goto out;
2575 2591          }
2576 2592  
2577 2593          nm = utf8_to_fn(&args->newname, &len, NULL);
2578 2594          if (nm == NULL) {
2579 2595                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2580 2596                  goto out;
2581 2597          }
2582 2598  
2583 2599          if (len > MAXNAMELEN) {
2584 2600                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2585 2601                  kmem_free(nm, len);
2586 2602                  goto out;
2587 2603          }
2588 2604  
2589 2605          if (rdonly4(req, cs)) {
2590 2606                  *cs->statusp = resp->status = NFS4ERR_ROFS;
2591 2607                  kmem_free(nm, len);
2592 2608                  goto out;
2593 2609          }
2594 2610  
2595 2611          /* Get "before" change value */
2596 2612          bdva.va_mask = AT_CTIME|AT_SEQ;
2597 2613          error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2598 2614          if (error) {
2599 2615                  *cs->statusp = resp->status = puterrno4(error);
2600 2616                  kmem_free(nm, len);
2601 2617                  goto out;
2602 2618          }
2603 2619  
2604 2620          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2605 2621          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2606 2622              MAXPATHLEN  + 1);
2607 2623  
2608 2624          if (name == NULL) {
2609 2625                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2610 2626                  kmem_free(nm, len);
2611 2627                  goto out;
2612 2628          }
2613 2629  
2614 2630          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2615 2631  
2616 2632          error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2617 2633  
2618 2634          if (nm != name)
2619 2635                  kmem_free(name, MAXPATHLEN + 1);
2620 2636          kmem_free(nm, len);
2621 2637  
2622 2638          /*
2623 2639           * Get the initial "after" sequence number, if it fails, set to zero
2624 2640           */
2625 2641          idva.va_mask = AT_SEQ;
2626 2642          if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2627 2643                  idva.va_seq = 0;
2628 2644  
2629 2645          /*
2630 2646           * Force modified data and metadata out to stable storage.
2631 2647           */
2632 2648          (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2633 2649          (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2634 2650  
2635 2651          if (error) {
2636 2652                  *cs->statusp = resp->status = puterrno4(error);
2637 2653                  goto out;
2638 2654          }
2639 2655  
2640 2656          /*
2641 2657           * Get "after" change value, if it fails, simply return the
2642 2658           * before value.
2643 2659           */
2644 2660          adva.va_mask = AT_CTIME|AT_SEQ;
2645 2661          if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2646 2662                  adva.va_ctime = bdva.va_ctime;
2647 2663                  adva.va_seq = 0;
2648 2664          }
2649 2665  
2650 2666          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2651 2667  
2652 2668          /*
2653 2669           * The cinfo.atomic = TRUE only if we have
2654 2670           * non-zero va_seq's, and it has incremented by exactly one
2655 2671           * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2656 2672           */
2657 2673          if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2658 2674              idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2659 2675                  resp->cinfo.atomic = TRUE;
2660 2676          else
2661 2677                  resp->cinfo.atomic = FALSE;
2662 2678  
2663 2679          *cs->statusp = resp->status = NFS4_OK;
2664 2680  out:
2665 2681          DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2666 2682              LINK4res *, resp);
2667 2683  }
2668 2684  
2669 2685  /*
2670 2686   * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2671 2687   */
2672 2688  
2673 2689  /* ARGSUSED */
2674 2690  static nfsstat4
2675 2691  do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2676 2692  {
2677 2693          int error;
2678 2694          int different_export = 0;
2679 2695          vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2680 2696          struct exportinfo *exi = NULL, *pre_exi = NULL;
2681 2697          nfsstat4 stat;
2682 2698          fid_t fid;
2683 2699          int attrdir, dotdot, walk;
2684 2700          bool_t is_newvp = FALSE;
2685 2701  
2686 2702          if (cs->vp->v_flag & V_XATTRDIR) {
2687 2703                  attrdir = 1;
2688 2704                  ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2689 2705          } else {
2690 2706                  attrdir = 0;
2691 2707                  ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2692 2708          }
2693 2709  
2694 2710          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2695 2711  
2696 2712          /*
2697 2713           * If dotdotting, then need to check whether it's
2698 2714           * above the root of a filesystem, or above an
2699 2715           * export point.
2700 2716           */
2701 2717          if (dotdot) {
2702 2718  
2703 2719                  /*
2704 2720                   * If dotdotting at the root of a filesystem, then
2705 2721                   * need to traverse back to the mounted-on filesystem
2706 2722                   * and do the dotdot lookup there.
2707 2723                   */
2708 2724                  if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2709 2725  
2710 2726                          /*
2711 2727                           * If at the system root, then can
2712 2728                           * go up no further.
2713 2729                           */
2714 2730                          if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2715 2731                                  return (puterrno4(ENOENT));
2716 2732  
2717 2733                          /*
2718 2734                           * Traverse back to the mounted-on filesystem
2719 2735                           */
2720 2736                          cs->vp = untraverse(cs->vp);
2721 2737  
2722 2738                          /*
2723 2739                           * Set the different_export flag so we remember
2724 2740                           * to pick up a new exportinfo entry for
2725 2741                           * this new filesystem.
2726 2742                           */
2727 2743                          different_export = 1;
2728 2744                  } else {
2729 2745  
2730 2746                          /*
2731 2747                           * If dotdotting above an export point then set
2732 2748                           * the different_export to get new export info.
2733 2749                           */
2734 2750                          different_export = nfs_exported(cs->exi, cs->vp);
2735 2751                  }
2736 2752          }
2737 2753  
2738 2754          error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2739 2755              NULL, NULL, NULL);
2740 2756          if (error)
2741 2757                  return (puterrno4(error));
2742 2758  
2743 2759          /*
2744 2760           * If the vnode is in a pseudo filesystem, check whether it is visible.
2745 2761           *
2746 2762           * XXX if the vnode is a symlink and it is not visible in
2747 2763           * a pseudo filesystem, return ENOENT (not following symlink).
2748 2764           * V4 client can not mount such symlink. This is a regression
2749 2765           * from V2/V3.
2750 2766           *
2751 2767           * In the same exported filesystem, if the security flavor used
2752 2768           * is not an explicitly shared flavor, limit the view to the visible
2753 2769           * list entries only. This is not a WRONGSEC case because it's already
2754 2770           * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2755 2771           */
2756 2772          if (!different_export &&
2757 2773              (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2758 2774              cs->access & CS_ACCESS_LIMITED)) {
2759 2775                  if (! nfs_visible(cs->exi, vp, &different_export)) {
2760 2776                          VN_RELE(vp);
2761 2777                          return (puterrno4(ENOENT));
2762 2778                  }
2763 2779          }
2764 2780  
2765 2781          /*
2766 2782           * If it's a mountpoint, then traverse it.
2767 2783           */
2768 2784          if (vn_ismntpt(vp)) {
2769 2785                  pre_exi = cs->exi;      /* save pre-traversed exportinfo */
2770 2786                  pre_tvp = vp;           /* save pre-traversed vnode     */
2771 2787  
2772 2788                  /*
2773 2789                   * hold pre_tvp to counteract rele by traverse.  We will
2774 2790                   * need pre_tvp below if checkexport4 fails
2775 2791                   */
2776 2792                  VN_HOLD(pre_tvp);
2777 2793                  if ((error = traverse(&vp)) != 0) {
2778 2794                          VN_RELE(vp);
2779 2795                          VN_RELE(pre_tvp);
2780 2796                          return (puterrno4(error));
2781 2797                  }
2782 2798                  different_export = 1;
2783 2799          } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2784 2800                  /*
2785 2801                   * The vfsp comparison is to handle the case where
2786 2802                   * a LOFS mount is shared.  lo_lookup traverses mount points,
2787 2803                   * and NFS is unaware of local fs transistions because
2788 2804                   * v_vfsmountedhere isn't set.  For this special LOFS case,
2789 2805                   * the dir and the obj returned by lookup will have different
2790 2806                   * vfs ptrs.
2791 2807                   */
2792 2808                  different_export = 1;
2793 2809          }
2794 2810  
2795 2811          if (different_export) {
2796 2812  
2797 2813                  bzero(&fid, sizeof (fid));
2798 2814                  fid.fid_len = MAXFIDSZ;
2799 2815                  error = vop_fid_pseudo(vp, &fid);
2800 2816                  if (error) {
2801 2817                          VN_RELE(vp);
2802 2818                          if (pre_tvp)
2803 2819                                  VN_RELE(pre_tvp);
2804 2820                          return (puterrno4(error));
2805 2821                  }
2806 2822  
2807 2823                  if (dotdot)
2808 2824                          exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2809 2825                  else
2810 2826                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2811 2827  
2812 2828                  if (exi == NULL) {
2813 2829                          if (pre_tvp) {
2814 2830                                  /*
2815 2831                                   * If this vnode is a mounted-on vnode,
2816 2832                                   * but the mounted-on file system is not
2817 2833                                   * exported, send back the filehandle for
2818 2834                                   * the mounted-on vnode, not the root of
2819 2835                                   * the mounted-on file system.
2820 2836                                   */
2821 2837                                  VN_RELE(vp);
2822 2838                                  vp = pre_tvp;
2823 2839                                  exi = pre_exi;
2824 2840                          } else {
2825 2841                                  VN_RELE(vp);
2826 2842                                  return (puterrno4(EACCES));
2827 2843                          }
2828 2844                  } else if (pre_tvp) {
2829 2845                          /* we're done with pre_tvp now. release extra hold */
2830 2846                          VN_RELE(pre_tvp);
2831 2847                  }
2832 2848  
2833 2849                  cs->exi = exi;
2834 2850  
2835 2851                  /*
2836 2852                   * Now we do a checkauth4. The reason is that
2837 2853                   * this client/user may not have access to the new
2838 2854                   * exported file system, and if they do,
2839 2855                   * the client/user may be mapped to a different uid.
2840 2856                   *
2841 2857                   * We start with a new cr, because the checkauth4 done
2842 2858                   * in the PUT*FH operation over wrote the cred's uid,
2843 2859                   * gid, etc, and we want the real thing before calling
2844 2860                   * checkauth4()
2845 2861                   */
2846 2862                  crfree(cs->cr);
2847 2863                  cs->cr = crdup(cs->basecr);
2848 2864  
2849 2865                  oldvp = cs->vp;
2850 2866                  cs->vp = vp;
2851 2867                  is_newvp = TRUE;
2852 2868  
2853 2869                  stat = call_checkauth4(cs, req);
2854 2870                  if (stat != NFS4_OK) {
2855 2871                          VN_RELE(cs->vp);
2856 2872                          cs->vp = oldvp;
2857 2873                          return (stat);
2858 2874                  }
2859 2875          }
2860 2876  
2861 2877          /*
2862 2878           * After various NFS checks, do a label check on the path
2863 2879           * component. The label on this path should either be the
2864 2880           * global zone's label or a zone's label. We are only
2865 2881           * interested in the zone's label because exported files
2866 2882           * in global zone is accessible (though read-only) to
2867 2883           * clients. The exportability/visibility check is already
2868 2884           * done before reaching this code.
2869 2885           */
2870 2886          if (is_system_labeled()) {
2871 2887                  bslabel_t *clabel;
2872 2888  
2873 2889                  ASSERT(req->rq_label != NULL);
2874 2890                  clabel = req->rq_label;
2875 2891                  DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2876 2892                      "got client label from request(1)", struct svc_req *, req);
2877 2893  
2878 2894                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2879 2895                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2880 2896                              cs->exi)) {
2881 2897                                  error = EACCES;
2882 2898                                  goto err_out;
2883 2899                          }
2884 2900                  } else {
2885 2901                          /*
2886 2902                           * We grant access to admin_low label clients
2887 2903                           * only if the client is trusted, i.e. also
2888 2904                           * running Solaris Trusted Extension.
2889 2905                           */
2890 2906                          struct sockaddr *ca;
2891 2907                          int             addr_type;
2892 2908                          void            *ipaddr;
2893 2909                          tsol_tpc_t      *tp;
2894 2910  
2895 2911                          ca = (struct sockaddr *)svc_getrpccaller(
2896 2912                              req->rq_xprt)->buf;
2897 2913                          if (ca->sa_family == AF_INET) {
2898 2914                                  addr_type = IPV4_VERSION;
2899 2915                                  ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2900 2916                          } else if (ca->sa_family == AF_INET6) {
2901 2917                                  addr_type = IPV6_VERSION;
2902 2918                                  ipaddr = &((struct sockaddr_in6 *)
2903 2919                                      ca)->sin6_addr;
2904 2920                          }
2905 2921                          tp = find_tpc(ipaddr, addr_type, B_FALSE);
2906 2922                          if (tp == NULL || tp->tpc_tp.tp_doi !=
2907 2923                              l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2908 2924                              SUN_CIPSO) {
2909 2925                                  if (tp != NULL)
2910 2926                                          TPC_RELE(tp);
2911 2927                                  error = EACCES;
2912 2928                                  goto err_out;
2913 2929                          }
2914 2930                          TPC_RELE(tp);
2915 2931                  }
2916 2932          }
2917 2933  
2918 2934          error = makefh4(&cs->fh, vp, cs->exi);
2919 2935  
2920 2936  err_out:
2921 2937          if (error) {
2922 2938                  if (is_newvp) {
2923 2939                          VN_RELE(cs->vp);
2924 2940                          cs->vp = oldvp;
2925 2941                  } else
2926 2942                          VN_RELE(vp);
2927 2943                  return (puterrno4(error));
2928 2944          }
2929 2945  
2930 2946          if (!is_newvp) {
2931 2947                  if (cs->vp)
2932 2948                          VN_RELE(cs->vp);
2933 2949                  cs->vp = vp;
2934 2950          } else if (oldvp)
2935 2951                  VN_RELE(oldvp);
2936 2952  
2937 2953          /*
2938 2954           * if did lookup on attrdir and didn't lookup .., set named
2939 2955           * attr fh flag
2940 2956           */
2941 2957          if (attrdir && ! dotdot)
2942 2958                  set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2943 2959  
2944 2960          /* Assume false for now, open proc will set this */
2945 2961          cs->mandlock = FALSE;
2946 2962  
2947 2963          return (NFS4_OK);
2948 2964  }
2949 2965  
2950 2966  /* ARGSUSED */
2951 2967  static void
2952 2968  rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2953 2969      struct compound_state *cs)
2954 2970  {
2955 2971          LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2956 2972          LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2957 2973          char *nm;
2958 2974          uint_t len;
2959 2975          struct sockaddr *ca;
2960 2976          char *name = NULL;
2961 2977          nfsstat4 status;
2962 2978  
2963 2979          DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2964 2980              LOOKUP4args *, args);
2965 2981  
2966 2982          if (cs->vp == NULL) {
2967 2983                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2968 2984                  goto out;
2969 2985          }
2970 2986  
2971 2987          if (cs->vp->v_type == VLNK) {
2972 2988                  *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2973 2989                  goto out;
2974 2990          }
2975 2991  
2976 2992          if (cs->vp->v_type != VDIR) {
2977 2993                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2978 2994                  goto out;
2979 2995          }
2980 2996  
2981 2997          status = utf8_dir_verify(&args->objname);
2982 2998          if (status != NFS4_OK) {
2983 2999                  *cs->statusp = resp->status = status;
2984 3000                  goto out;
2985 3001          }
2986 3002  
2987 3003          nm = utf8_to_str(&args->objname, &len, NULL);
2988 3004          if (nm == NULL) {
2989 3005                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2990 3006                  goto out;
2991 3007          }
2992 3008  
2993 3009          if (len > MAXNAMELEN) {
2994 3010                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2995 3011                  kmem_free(nm, len);
2996 3012                  goto out;
2997 3013          }
2998 3014  
2999 3015          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3000 3016          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3001 3017              MAXPATHLEN  + 1);
3002 3018  
3003 3019          if (name == NULL) {
3004 3020                  *cs->statusp = resp->status = NFS4ERR_INVAL;
3005 3021                  kmem_free(nm, len);
3006 3022                  goto out;
3007 3023          }
3008 3024  
3009 3025          *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3010 3026  
3011 3027          if (name != nm)
3012 3028                  kmem_free(name, MAXPATHLEN + 1);
3013 3029          kmem_free(nm, len);
3014 3030  
3015 3031  out:
3016 3032          DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3017 3033              LOOKUP4res *, resp);
3018 3034  }
3019 3035  
3020 3036  /* ARGSUSED */
3021 3037  static void
3022 3038  rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3023 3039      struct compound_state *cs)
3024 3040  {
3025 3041          LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3026 3042  
3027 3043          DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3028 3044  
3029 3045          if (cs->vp == NULL) {
3030 3046                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3031 3047                  goto out;
3032 3048          }
3033 3049  
3034 3050          if (cs->vp->v_type != VDIR) {
3035 3051                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3036 3052                  goto out;
3037 3053          }
3038 3054  
3039 3055          *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3040 3056  
3041 3057          /*
3042 3058           * From NFSV4 Specification, LOOKUPP should not check for
3043 3059           * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3044 3060           */
3045 3061          if (resp->status == NFS4ERR_WRONGSEC) {
3046 3062                  *cs->statusp = resp->status = NFS4_OK;
3047 3063          }
3048 3064  
3049 3065  out:
3050 3066          DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3051 3067              LOOKUPP4res *, resp);
3052 3068  }
3053 3069  
3054 3070  
3055 3071  /*ARGSUSED2*/
3056 3072  static void
3057 3073  rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3058 3074      struct compound_state *cs)
3059 3075  {
3060 3076          OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3061 3077          OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3062 3078          vnode_t         *avp = NULL;
3063 3079          int             lookup_flags = LOOKUP_XATTR, error;
3064 3080          int             exp_ro = 0;
3065 3081  
3066 3082          DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3067 3083              OPENATTR4args *, args);
3068 3084  
3069 3085          if (cs->vp == NULL) {
3070 3086                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3071 3087                  goto out;
3072 3088          }
3073 3089  
3074 3090          if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3075 3091              !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3076 3092                  *cs->statusp = resp->status = puterrno4(ENOTSUP);
3077 3093                  goto out;
3078 3094          }
3079 3095  
3080 3096          /*
3081 3097           * If file system supports passing ACE mask to VOP_ACCESS then
3082 3098           * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3083 3099           */
3084 3100  
3085 3101          if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3086 3102                  error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3087 3103                      V_ACE_MASK, cs->cr, NULL);
3088 3104          else
3089 3105                  error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3090 3106                      (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3091 3107                      (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3092 3108  
3093 3109          if (error) {
3094 3110                  *cs->statusp = resp->status = puterrno4(EACCES);
3095 3111                  goto out;
3096 3112          }
3097 3113  
3098 3114          /*
3099 3115           * The CREATE_XATTR_DIR VOP flag cannot be specified if
3100 3116           * the file system is exported read-only -- regardless of
3101 3117           * createdir flag.  Otherwise the attrdir would be created
3102 3118           * (assuming server fs isn't mounted readonly locally).  If
3103 3119           * VOP_LOOKUP returns ENOENT in this case, the error will
3104 3120           * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3105 3121           * because specfs has no VOP_LOOKUP op, so the macro would
3106 3122           * return ENOSYS.  EINVAL is returned by all (current)
3107 3123           * Solaris file system implementations when any of their
3108 3124           * restrictions are violated (xattr(dir) can't have xattrdir).
3109 3125           * Returning NOTSUPP is more appropriate in this case
3110 3126           * because the object will never be able to have an attrdir.
3111 3127           */
3112 3128          if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3113 3129                  lookup_flags |= CREATE_XATTR_DIR;
3114 3130  
3115 3131          error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3116 3132              NULL, NULL, NULL);
3117 3133  
3118 3134          if (error) {
3119 3135                  if (error == ENOENT && args->createdir && exp_ro)
3120 3136                          *cs->statusp = resp->status = puterrno4(EROFS);
3121 3137                  else if (error == EINVAL || error == ENOSYS)
3122 3138                          *cs->statusp = resp->status = puterrno4(ENOTSUP);
3123 3139                  else
3124 3140                          *cs->statusp = resp->status = puterrno4(error);
3125 3141                  goto out;
3126 3142          }
3127 3143  
3128 3144          ASSERT(avp->v_flag & V_XATTRDIR);
3129 3145  
3130 3146          error = makefh4(&cs->fh, avp, cs->exi);
3131 3147  
3132 3148          if (error) {
3133 3149                  VN_RELE(avp);
3134 3150                  *cs->statusp = resp->status = puterrno4(error);
3135 3151                  goto out;
3136 3152          }
3137 3153  
3138 3154          VN_RELE(cs->vp);
3139 3155          cs->vp = avp;
3140 3156  
3141 3157          /*
3142 3158           * There is no requirement for an attrdir fh flag
3143 3159           * because the attrdir has a vnode flag to distinguish
3144 3160           * it from regular (non-xattr) directories.  The
3145 3161           * FH4_ATTRDIR flag is set for future sanity checks.
3146 3162           */
3147 3163          set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3148 3164          *cs->statusp = resp->status = NFS4_OK;
3149 3165  
3150 3166  out:
3151 3167          DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3152 3168              OPENATTR4res *, resp);
3153 3169  }
3154 3170  
3155 3171  static int
3156 3172  do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3157 3173      caller_context_t *ct)
3158 3174  {
3159 3175          int error;
3160 3176          int i;
3161 3177          clock_t delaytime;
3162 3178  
3163 3179          delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3164 3180  
3165 3181          /*
3166 3182           * Don't block on mandatory locks. If this routine returns
3167 3183           * EAGAIN, the caller should return NFS4ERR_LOCKED.
3168 3184           */
3169 3185          uio->uio_fmode = FNONBLOCK;
3170 3186  
3171 3187          for (i = 0; i < rfs4_maxlock_tries; i++) {
3172 3188  
3173 3189  
3174 3190                  if (direction == FREAD) {
3175 3191                          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3176 3192                          error = VOP_READ(vp, uio, ioflag, cred, ct);
3177 3193                          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3178 3194                  } else {
3179 3195                          (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3180 3196                          error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3181 3197                          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3182 3198                  }
3183 3199  
3184 3200                  if (error != EAGAIN)
3185 3201                          break;
3186 3202  
3187 3203                  if (i < rfs4_maxlock_tries - 1) {
3188 3204                          delay(delaytime);
3189 3205                          delaytime *= 2;
3190 3206                  }
3191 3207          }
3192 3208  
3193 3209          return (error);
3194 3210  }
3195 3211  
3196 3212  /* ARGSUSED */
3197 3213  static void
3198 3214  rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3199 3215      struct compound_state *cs)
3200 3216  {
3201 3217          READ4args *args = &argop->nfs_argop4_u.opread;
3202 3218          READ4res *resp = &resop->nfs_resop4_u.opread;
3203 3219          int error;
3204 3220          int verror;
3205 3221          vnode_t *vp;
3206 3222          struct vattr va;
3207 3223          struct iovec iov, *iovp = NULL;
3208 3224          int iovcnt;
3209 3225          struct uio uio;
3210 3226          u_offset_t offset;
3211 3227          bool_t *deleg = &cs->deleg;
3212 3228          nfsstat4 stat;
3213 3229          int in_crit = 0;
3214 3230          mblk_t *mp = NULL;
3215 3231          int alloc_err = 0;
3216 3232          int rdma_used = 0;
3217 3233          int loaned_buffers;
3218 3234          caller_context_t ct;
3219 3235          struct uio *uiop;
3220 3236  
3221 3237          DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3222 3238              READ4args, args);
3223 3239  
3224 3240          vp = cs->vp;
3225 3241          if (vp == NULL) {
3226 3242                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3227 3243                  goto out;
3228 3244          }
3229 3245          if (cs->access == CS_ACCESS_DENIED) {
3230 3246                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3231 3247                  goto out;
3232 3248          }
3233 3249  
3234 3250          if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3235 3251              deleg, TRUE, &ct)) != NFS4_OK) {
3236 3252                  *cs->statusp = resp->status = stat;
3237 3253                  goto out;
3238 3254          }
3239 3255  
3240 3256          /*
3241 3257           * Enter the critical region before calling VOP_RWLOCK
3242 3258           * to avoid a deadlock with write requests.
3243 3259           */
3244 3260          if (nbl_need_check(vp)) {
3245 3261                  nbl_start_crit(vp, RW_READER);
3246 3262                  in_crit = 1;
3247 3263                  if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3248 3264                      &ct)) {
3249 3265                          *cs->statusp = resp->status = NFS4ERR_LOCKED;
3250 3266                          goto out;
3251 3267                  }
3252 3268          }
3253 3269  
3254 3270          if (args->wlist) {
3255 3271                  if (args->count > clist_len(args->wlist)) {
3256 3272                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3257 3273                          goto out;
3258 3274                  }
3259 3275                  rdma_used = 1;
3260 3276          }
3261 3277  
3262 3278          /* use loaned buffers for TCP */
3263 3279          loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3264 3280  
3265 3281          va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3266 3282          verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3267 3283  
3268 3284          /*
3269 3285           * If we can't get the attributes, then we can't do the
3270 3286           * right access checking.  So, we'll fail the request.
3271 3287           */
3272 3288          if (verror) {
3273 3289                  *cs->statusp = resp->status = puterrno4(verror);
3274 3290                  goto out;
3275 3291          }
3276 3292  
3277 3293          if (vp->v_type != VREG) {
3278 3294                  *cs->statusp = resp->status =
3279 3295                      ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3280 3296                  goto out;
3281 3297          }
3282 3298  
3283 3299          if (crgetuid(cs->cr) != va.va_uid &&
3284 3300              (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3285 3301              (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3286 3302                  *cs->statusp = resp->status = puterrno4(error);
3287 3303                  goto out;
3288 3304          }
3289 3305  
3290 3306          if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3291 3307                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3292 3308                  goto out;
3293 3309          }
3294 3310  
3295 3311          offset = args->offset;
3296 3312          if (offset >= va.va_size) {
3297 3313                  *cs->statusp = resp->status = NFS4_OK;
3298 3314                  resp->eof = TRUE;
3299 3315                  resp->data_len = 0;
3300 3316                  resp->data_val = NULL;
3301 3317                  resp->mblk = NULL;
3302 3318                  /* RDMA */
3303 3319                  resp->wlist = args->wlist;
3304 3320                  resp->wlist_len = resp->data_len;
3305 3321                  *cs->statusp = resp->status = NFS4_OK;
3306 3322                  if (resp->wlist)
3307 3323                          clist_zero_len(resp->wlist);
3308 3324                  goto out;
3309 3325          }
3310 3326  
3311 3327          if (args->count == 0) {
3312 3328                  *cs->statusp = resp->status = NFS4_OK;
3313 3329                  resp->eof = FALSE;
3314 3330                  resp->data_len = 0;
3315 3331                  resp->data_val = NULL;
3316 3332                  resp->mblk = NULL;
3317 3333                  /* RDMA */
3318 3334                  resp->wlist = args->wlist;
3319 3335                  resp->wlist_len = resp->data_len;
3320 3336                  if (resp->wlist)
3321 3337                          clist_zero_len(resp->wlist);
3322 3338                  goto out;
3323 3339          }
3324 3340  
3325 3341          /*
3326 3342           * Do not allocate memory more than maximum allowed
3327 3343           * transfer size
3328 3344           */
3329 3345          if (args->count > rfs4_tsize(req))
3330 3346                  args->count = rfs4_tsize(req);
3331 3347  
3332 3348          if (loaned_buffers) {
3333 3349                  uiop = (uio_t *)rfs_setup_xuio(vp);
3334 3350                  ASSERT(uiop != NULL);
3335 3351                  uiop->uio_segflg = UIO_SYSSPACE;
3336 3352                  uiop->uio_loffset = args->offset;
3337 3353                  uiop->uio_resid = args->count;
3338 3354  
3339 3355                  /* Jump to do the read if successful */
3340 3356                  if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3341 3357                          /*
3342 3358                           * Need to hold the vnode until after VOP_RETZCBUF()
3343 3359                           * is called.
3344 3360                           */
3345 3361                          VN_HOLD(vp);
3346 3362                          goto doio_read;
3347 3363                  }
3348 3364  
3349 3365                  DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3350 3366                      uiop->uio_loffset, int, uiop->uio_resid);
3351 3367  
3352 3368                  uiop->uio_extflg = 0;
3353 3369  
3354 3370                  /* failure to setup for zero copy */
3355 3371                  rfs_free_xuio((void *)uiop);
3356 3372                  loaned_buffers = 0;
3357 3373          }
3358 3374  
3359 3375          /*
3360 3376           * If returning data via RDMA Write, then grab the chunk list. If we
3361 3377           * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3362 3378           */
3363 3379          if (rdma_used) {
3364 3380                  mp = NULL;
3365 3381                  (void) rdma_get_wchunk(req, &iov, args->wlist);
3366 3382                  uio.uio_iov = &iov;
3367 3383                  uio.uio_iovcnt = 1;
3368 3384          } else {
3369 3385                  /*
3370 3386                   * mp will contain the data to be sent out in the read reply.
3371 3387                   * It will be freed after the reply has been sent.
3372 3388                   */
3373 3389                  mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3374 3390                  ASSERT(mp != NULL);
3375 3391                  ASSERT(alloc_err == 0);
3376 3392                  uio.uio_iov = iovp;
3377 3393                  uio.uio_iovcnt = iovcnt;
3378 3394          }
3379 3395  
3380 3396          uio.uio_segflg = UIO_SYSSPACE;
3381 3397          uio.uio_extflg = UIO_COPY_CACHED;
3382 3398          uio.uio_loffset = args->offset;
3383 3399          uio.uio_resid = args->count;
3384 3400          uiop = &uio;
3385 3401  
3386 3402  doio_read:
3387 3403          error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3388 3404  
3389 3405          va.va_mask = AT_SIZE;
3390 3406          verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3391 3407  
3392 3408          if (error) {
3393 3409                  if (mp)
3394 3410                          freemsg(mp);
3395 3411                  *cs->statusp = resp->status = puterrno4(error);
3396 3412                  goto out;
3397 3413          }
3398 3414  
3399 3415          /* make mblk using zc buffers */
3400 3416          if (loaned_buffers) {
3401 3417                  mp = uio_to_mblk(uiop);
3402 3418                  ASSERT(mp != NULL);
3403 3419          }
3404 3420  
3405 3421          *cs->statusp = resp->status = NFS4_OK;
3406 3422  
3407 3423          ASSERT(uiop->uio_resid >= 0);
3408 3424          resp->data_len = args->count - uiop->uio_resid;
3409 3425          if (mp) {
3410 3426                  resp->data_val = (char *)mp->b_datap->db_base;
3411 3427                  rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3412 3428          } else {
3413 3429                  resp->data_val = (caddr_t)iov.iov_base;
3414 3430          }
3415 3431  
3416 3432          resp->mblk = mp;
3417 3433  
3418 3434          if (!verror && offset + resp->data_len == va.va_size)
3419 3435                  resp->eof = TRUE;
3420 3436          else
3421 3437                  resp->eof = FALSE;
3422 3438  
3423 3439          if (rdma_used) {
3424 3440                  if (!rdma_setup_read_data4(args, resp)) {
3425 3441                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3426 3442                  }
3427 3443          } else {
3428 3444                  resp->wlist = NULL;
3429 3445          }
3430 3446  
3431 3447  out:
3432 3448          if (in_crit)
3433 3449                  nbl_end_crit(vp);
3434 3450  
3435 3451          if (iovp != NULL)
3436 3452                  kmem_free(iovp, iovcnt * sizeof (struct iovec));
3437 3453  
3438 3454          DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3439 3455              READ4res *, resp);
3440 3456  }
3441 3457  
3442 3458  static void
3443 3459  rfs4_op_read_free(nfs_resop4 *resop)
3444 3460  {
3445 3461          READ4res        *resp = &resop->nfs_resop4_u.opread;
3446 3462  
3447 3463          if (resp->status == NFS4_OK && resp->mblk != NULL) {
3448 3464                  freemsg(resp->mblk);
3449 3465                  resp->mblk = NULL;
3450 3466                  resp->data_val = NULL;
3451 3467                  resp->data_len = 0;
3452 3468          }
3453 3469  }
3454 3470  
3455 3471  static void
3456 3472  rfs4_op_readdir_free(nfs_resop4 * resop)
3457 3473  {
3458 3474          READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3459 3475  
3460 3476          if (resp->status == NFS4_OK && resp->mblk != NULL) {
3461 3477                  freeb(resp->mblk);
3462 3478                  resp->mblk = NULL;
3463 3479                  resp->data_len = 0;
3464 3480          }
3465 3481  }
3466 3482  
3467 3483  
3468 3484  /* ARGSUSED */
3469 3485  static void
3470 3486  rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3471 3487      struct compound_state *cs)
3472 3488  {
3473 3489          PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3474 3490          int             error;
3475 3491          vnode_t         *vp;
3476 3492          struct exportinfo *exi, *sav_exi;
3477 3493          nfs_fh4_fmt_t   *fh_fmtp;
3478 3494          nfs_export_t *ne = nfs_get_export();
3479 3495  
3480 3496          DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3481 3497  
3482 3498          if (cs->vp) {
3483 3499                  VN_RELE(cs->vp);
3484 3500                  cs->vp = NULL;
3485 3501          }
3486 3502  
3487 3503          if (cs->cr)
3488 3504                  crfree(cs->cr);
3489 3505  
3490 3506          cs->cr = crdup(cs->basecr);
3491 3507  
3492 3508          vp = ne->exi_public->exi_vp;
3493 3509          if (vp == NULL) {
3494 3510                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3495 3511                  goto out;
3496 3512          }
3497 3513  
3498 3514          error = makefh4(&cs->fh, vp, ne->exi_public);
3499 3515          if (error != 0) {
3500 3516                  *cs->statusp = resp->status = puterrno4(error);
3501 3517                  goto out;
3502 3518          }
3503 3519          sav_exi = cs->exi;
3504 3520          if (ne->exi_public == ne->exi_root) {
3505 3521                  /*
3506 3522                   * No filesystem is actually shared public, so we default
3507 3523                   * to exi_root. In this case, we must check whether root
3508 3524                   * is exported.
3509 3525                   */
3510 3526                  fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3511 3527  
3512 3528                  /*
3513 3529                   * if root filesystem is exported, the exportinfo struct that we
3514 3530                   * should use is what checkexport4 returns, because root_exi is
3515 3531                   * actually a mostly empty struct.
3516 3532                   */
3517 3533                  exi = checkexport4(&fh_fmtp->fh4_fsid,
3518 3534                      (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3519 3535                  cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3520 3536          } else {
3521 3537                  /*
3522 3538                   * it's a properly shared filesystem
3523 3539                   */
3524 3540                  cs->exi = ne->exi_public;
3525 3541          }
3526 3542  
3527 3543          if (is_system_labeled()) {
3528 3544                  bslabel_t *clabel;
3529 3545  
3530 3546                  ASSERT(req->rq_label != NULL);
3531 3547                  clabel = req->rq_label;
3532 3548                  DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3533 3549                      "got client label from request(1)",
3534 3550                      struct svc_req *, req);
3535 3551                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
3536 3552                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3537 3553                              cs->exi)) {
3538 3554                                  *cs->statusp = resp->status =
3539 3555                                      NFS4ERR_SERVERFAULT;
3540 3556                                  goto out;
3541 3557                          }
3542 3558                  }
3543 3559          }
3544 3560  
3545 3561          VN_HOLD(vp);
3546 3562          cs->vp = vp;
3547 3563  
3548 3564          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3549 3565                  VN_RELE(cs->vp);
3550 3566                  cs->vp = NULL;
3551 3567                  cs->exi = sav_exi;
3552 3568                  goto out;
3553 3569          }
3554 3570  
3555 3571          *cs->statusp = resp->status = NFS4_OK;
3556 3572  out:
3557 3573          DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3558 3574              PUTPUBFH4res *, resp);
3559 3575  }
3560 3576  
3561 3577  /*
3562 3578   * XXX - issue with put*fh operations. Suppose /export/home is exported.
3563 3579   * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3564 3580   * or joe have restrictive search permissions, then we shouldn't let
3565 3581   * the client get a file handle. This is easy to enforce. However, we
3566 3582   * don't know what security flavor should be used until we resolve the
3567 3583   * path name. Another complication is uid mapping. If root is
3568 3584   * the user, then it will be mapped to the anonymous user by default,
3569 3585   * but we won't know that till we've resolved the path name. And we won't
3570 3586   * know what the anonymous user is.
3571 3587   * Luckily, SECINFO is specified to take a full filename.
3572 3588   * So what we will have to in rfs4_op_lookup is check that flavor of
3573 3589   * the target object matches that of the request, and if root was the
3574 3590   * caller, check for the root= and anon= options, and if necessary,
3575 3591   * repeat the lookup using the right cred_t. But that's not done yet.
3576 3592   */
3577 3593  /* ARGSUSED */
3578 3594  static void
3579 3595  rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3580 3596      struct compound_state *cs)
3581 3597  {
3582 3598          PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3583 3599          PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3584 3600          nfs_fh4_fmt_t *fh_fmtp;
3585 3601  
3586 3602          DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3587 3603              PUTFH4args *, args);
3588 3604  
3589 3605          if (cs->vp) {
3590 3606                  VN_RELE(cs->vp);
3591 3607                  cs->vp = NULL;
3592 3608          }
3593 3609  
3594 3610          if (cs->cr) {
3595 3611                  crfree(cs->cr);
3596 3612                  cs->cr = NULL;
3597 3613          }
3598 3614  
3599 3615  
3600 3616          if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3601 3617                  *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3602 3618                  goto out;
3603 3619          }
3604 3620  
3605 3621          fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3606 3622          cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3607 3623              NULL);
3608 3624  
3609 3625          if (cs->exi == NULL) {
3610 3626                  *cs->statusp = resp->status = NFS4ERR_STALE;
3611 3627                  goto out;
3612 3628          }
3613 3629  
3614 3630          cs->cr = crdup(cs->basecr);
3615 3631  
3616 3632          ASSERT(cs->cr != NULL);
3617 3633  
3618 3634          if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3619 3635                  *cs->statusp = resp->status;
3620 3636                  goto out;
3621 3637          }
3622 3638  
3623 3639          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3624 3640                  VN_RELE(cs->vp);
3625 3641                  cs->vp = NULL;
3626 3642                  goto out;
3627 3643          }
3628 3644  
3629 3645          nfs_fh4_copy(&args->object, &cs->fh);
3630 3646          *cs->statusp = resp->status = NFS4_OK;
3631 3647          cs->deleg = FALSE;
3632 3648  
3633 3649  out:
3634 3650          DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3635 3651              PUTFH4res *, resp);
3636 3652  }
3637 3653  
3638 3654  /* ARGSUSED */
3639 3655  static void
3640 3656  rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3641 3657      struct compound_state *cs)
3642 3658  {
3643 3659          PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3644 3660          int error;
3645 3661          fid_t fid;
3646 3662          struct exportinfo *exi, *sav_exi;
3647 3663  
3648 3664          DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3649 3665  
3650 3666          if (cs->vp) {
3651 3667                  VN_RELE(cs->vp);
3652 3668                  cs->vp = NULL;
3653 3669          }
3654 3670  
3655 3671          if (cs->cr)
3656 3672                  crfree(cs->cr);
3657 3673  
3658 3674          cs->cr = crdup(cs->basecr);
3659 3675  
3660 3676          /*
3661 3677           * Using rootdir, the system root vnode,
3662 3678           * get its fid.
3663 3679           */
3664 3680          bzero(&fid, sizeof (fid));
3665 3681          fid.fid_len = MAXFIDSZ;
3666 3682          error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3667 3683          if (error != 0) {
3668 3684                  *cs->statusp = resp->status = puterrno4(error);
3669 3685                  goto out;
3670 3686          }
3671 3687  
3672 3688          /*
3673 3689           * Then use the root fsid & fid it to find out if it's exported
3674 3690           *
3675 3691           * If the server root isn't exported directly, then
3676 3692           * it should at least be a pseudo export based on
3677 3693           * one or more exports further down in the server's
3678 3694           * file tree.
3679 3695           */
3680 3696          exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3681 3697          if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3682 3698                  NFS4_DEBUG(rfs4_debug,
3683 3699                      (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3684 3700                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3685 3701                  goto out;
3686 3702          }
3687 3703  
3688 3704          /*
3689 3705           * Now make a filehandle based on the root
3690 3706           * export and root vnode.
3691 3707           */
3692 3708          error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3693 3709          if (error != 0) {
3694 3710                  *cs->statusp = resp->status = puterrno4(error);
3695 3711                  goto out;
3696 3712          }
3697 3713  
3698 3714          sav_exi = cs->exi;
3699 3715          cs->exi = exi;
3700 3716  
3701 3717          VN_HOLD(ZONE_ROOTVP());
3702 3718          cs->vp = ZONE_ROOTVP();
3703 3719  
3704 3720          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3705 3721                  VN_RELE(cs->vp);
3706 3722                  cs->vp = NULL;
3707 3723                  cs->exi = sav_exi;
3708 3724                  goto out;
3709 3725          }
3710 3726  
3711 3727          *cs->statusp = resp->status = NFS4_OK;
3712 3728          cs->deleg = FALSE;
3713 3729  out:
3714 3730          DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3715 3731              PUTROOTFH4res *, resp);
3716 3732  }
3717 3733  
3718 3734  /*
3719 3735   * readlink: args: CURRENT_FH.
3720 3736   *      res: status. If success - CURRENT_FH unchanged, return linktext.
3721 3737   */
3722 3738  
3723 3739  /* ARGSUSED */
3724 3740  static void
3725 3741  rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3726 3742      struct compound_state *cs)
3727 3743  {
3728 3744          READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3729 3745          int error;
3730 3746          vnode_t *vp;
3731 3747          struct iovec iov;
3732 3748          struct vattr va;
3733 3749          struct uio uio;
3734 3750          char *data;
3735 3751          struct sockaddr *ca;
3736 3752          char *name = NULL;
3737 3753          int is_referral;
3738 3754  
3739 3755          DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3740 3756  
3741 3757          /* CURRENT_FH: directory */
3742 3758          vp = cs->vp;
3743 3759          if (vp == NULL) {
3744 3760                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3745 3761                  goto out;
3746 3762          }
3747 3763  
3748 3764          if (cs->access == CS_ACCESS_DENIED) {
3749 3765                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3750 3766                  goto out;
3751 3767          }
3752 3768  
3753 3769          /* Is it a referral? */
3754 3770          if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3755 3771  
3756 3772                  is_referral = 1;
3757 3773  
3758 3774          } else {
3759 3775  
3760 3776                  is_referral = 0;
3761 3777  
3762 3778                  if (vp->v_type == VDIR) {
3763 3779                          *cs->statusp = resp->status = NFS4ERR_ISDIR;
3764 3780                          goto out;
3765 3781                  }
3766 3782  
3767 3783                  if (vp->v_type != VLNK) {
3768 3784                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3769 3785                          goto out;
3770 3786                  }
3771 3787  
3772 3788          }
3773 3789  
3774 3790          va.va_mask = AT_MODE;
3775 3791          error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3776 3792          if (error) {
3777 3793                  *cs->statusp = resp->status = puterrno4(error);
3778 3794                  goto out;
3779 3795          }
3780 3796  
3781 3797          if (MANDLOCK(vp, va.va_mode)) {
3782 3798                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3783 3799                  goto out;
3784 3800          }
3785 3801  
3786 3802          data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3787 3803  
3788 3804          if (is_referral) {
3789 3805                  char *s;
3790 3806                  size_t strsz;
3791 3807  
3792 3808                  /* Get an artificial symlink based on a referral */
3793 3809                  s = build_symlink(vp, cs->cr, &strsz);
3794 3810                  global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3795 3811                  DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3796 3812                      vnode_t *, vp, char *, s);
3797 3813                  if (s == NULL)
3798 3814                          error = EINVAL;
3799 3815                  else {
3800 3816                          error = 0;
3801 3817                          (void) strlcpy(data, s, MAXPATHLEN + 1);
3802 3818                          kmem_free(s, strsz);
3803 3819                  }
3804 3820  
3805 3821          } else {
3806 3822  
3807 3823                  iov.iov_base = data;
3808 3824                  iov.iov_len = MAXPATHLEN;
3809 3825                  uio.uio_iov = &iov;
3810 3826                  uio.uio_iovcnt = 1;
3811 3827                  uio.uio_segflg = UIO_SYSSPACE;
3812 3828                  uio.uio_extflg = UIO_COPY_CACHED;
3813 3829                  uio.uio_loffset = 0;
3814 3830                  uio.uio_resid = MAXPATHLEN;
3815 3831  
3816 3832                  error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3817 3833  
3818 3834                  if (!error)
3819 3835                          *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3820 3836          }
3821 3837  
3822 3838          if (error) {
3823 3839                  kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3824 3840                  *cs->statusp = resp->status = puterrno4(error);
3825 3841                  goto out;
3826 3842          }
3827 3843  
3828 3844          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3829 3845          name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3830 3846              MAXPATHLEN  + 1);
3831 3847  
3832 3848          if (name == NULL) {
3833 3849                  /*
3834 3850                   * Even though the conversion failed, we return
3835 3851                   * something. We just don't translate it.
3836 3852                   */
3837 3853                  name = data;
3838 3854          }
3839 3855  
3840 3856          /*
3841 3857           * treat link name as data
3842 3858           */
3843 3859          (void) str_to_utf8(name, (utf8string *)&resp->link);
3844 3860  
3845 3861          if (name != data)
3846 3862                  kmem_free(name, MAXPATHLEN + 1);
3847 3863          kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3848 3864          *cs->statusp = resp->status = NFS4_OK;
3849 3865  
3850 3866  out:
3851 3867          DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3852 3868              READLINK4res *, resp);
3853 3869  }
3854 3870  
3855 3871  static void
3856 3872  rfs4_op_readlink_free(nfs_resop4 *resop)
3857 3873  {
3858 3874          READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3859 3875          utf8string *symlink = (utf8string *)&resp->link;
3860 3876  
3861 3877          if (symlink->utf8string_val) {
3862 3878                  UTF8STRING_FREE(*symlink)
3863 3879          }
3864 3880  }
3865 3881  
3866 3882  /*
3867 3883   * release_lockowner:
3868 3884   *      Release any state associated with the supplied
3869 3885   *      lockowner. Note if any lo_state is holding locks we will not
3870 3886   *      rele that lo_state and thus the lockowner will not be destroyed.
3871 3887   *      A client using lock after the lock owner stateid has been released
3872 3888   *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3873 3889   *      to reissue the lock with new_lock_owner set to TRUE.
3874 3890   *      args: lock_owner
3875 3891   *      res:  status
3876 3892   */
3877 3893  /* ARGSUSED */
3878 3894  static void
3879 3895  rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3880 3896      struct svc_req *req, struct compound_state *cs)
3881 3897  {
3882 3898          RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3883 3899          RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3884 3900          rfs4_lockowner_t *lo;
3885 3901          rfs4_openowner_t *oo;
3886 3902          rfs4_state_t *sp;
3887 3903          rfs4_lo_state_t *lsp;
3888 3904          rfs4_client_t *cp;
3889 3905          bool_t create = FALSE;
3890 3906          locklist_t *llist;
3891 3907          sysid_t sysid;
3892 3908  
3893 3909          DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3894 3910              cs, RELEASE_LOCKOWNER4args *, ap);
3895 3911  
3896 3912          /* Make sure there is a clientid around for this request */
3897 3913          cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3898 3914  
3899 3915          if (cp == NULL) {
3900 3916                  *cs->statusp = resp->status =
3901 3917                      rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3902 3918                  goto out;
3903 3919          }
3904 3920          rfs4_client_rele(cp);
3905 3921  
3906 3922          lo = rfs4_findlockowner(&ap->lock_owner, &create);
3907 3923          if (lo == NULL) {
3908 3924                  *cs->statusp = resp->status = NFS4_OK;
3909 3925                  goto out;
3910 3926          }
3911 3927          ASSERT(lo->rl_client != NULL);
3912 3928  
3913 3929          /*
3914 3930           * Check for EXPIRED client. If so will reap state with in a lease
3915 3931           * period or on next set_clientid_confirm step
3916 3932           */
3917 3933          if (rfs4_lease_expired(lo->rl_client)) {
3918 3934                  rfs4_lockowner_rele(lo);
3919 3935                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3920 3936                  goto out;
3921 3937          }
3922 3938  
3923 3939          /*
3924 3940           * If no sysid has been assigned, then no locks exist; just return.
3925 3941           */
3926 3942          rfs4_dbe_lock(lo->rl_client->rc_dbe);
3927 3943          if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3928 3944                  rfs4_lockowner_rele(lo);
3929 3945                  rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3930 3946                  goto out;
3931 3947          }
3932 3948  
3933 3949          sysid = lo->rl_client->rc_sysidt;
3934 3950          rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3935 3951  
3936 3952          /*
3937 3953           * Mark the lockowner invalid.
3938 3954           */
3939 3955          rfs4_dbe_hide(lo->rl_dbe);
3940 3956  
3941 3957          /*
3942 3958           * sysid-pid pair should now not be used since the lockowner is
3943 3959           * invalid. If the client were to instantiate the lockowner again
3944 3960           * it would be assigned a new pid. Thus we can get the list of
3945 3961           * current locks.
3946 3962           */
3947 3963  
3948 3964          llist = flk_get_active_locks(sysid, lo->rl_pid);
3949 3965          /* If we are still holding locks fail */
3950 3966          if (llist != NULL) {
3951 3967  
3952 3968                  *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3953 3969  
3954 3970                  flk_free_locklist(llist);
3955 3971                  /*
3956 3972                   * We need to unhide the lockowner so the client can
3957 3973                   * try it again. The bad thing here is if the client
3958 3974                   * has a logic error that took it here in the first place
3959 3975                   * they probably have lost accounting of the locks that it
3960 3976                   * is holding. So we may have dangling state until the
3961 3977                   * open owner state is reaped via close. One scenario
3962 3978                   * that could possibly occur is that the client has
3963 3979                   * sent the unlock request(s) in separate threads
3964 3980                   * and has not waited for the replies before sending the
3965 3981                   * RELEASE_LOCKOWNER request. Presumably, it would expect
3966 3982                   * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3967 3983                   * reissuing the request.
3968 3984                   */
3969 3985                  rfs4_dbe_unhide(lo->rl_dbe);
3970 3986                  rfs4_lockowner_rele(lo);
3971 3987                  goto out;
3972 3988          }
3973 3989  
3974 3990          /*
3975 3991           * For the corresponding client we need to check each open
3976 3992           * owner for any opens that have lockowner state associated
3977 3993           * with this lockowner.
3978 3994           */
3979 3995  
3980 3996          rfs4_dbe_lock(lo->rl_client->rc_dbe);
3981 3997          for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3982 3998              oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3983 3999  
3984 4000                  rfs4_dbe_lock(oo->ro_dbe);
3985 4001                  for (sp = list_head(&oo->ro_statelist); sp != NULL;
3986 4002                      sp = list_next(&oo->ro_statelist, sp)) {
3987 4003  
3988 4004                          rfs4_dbe_lock(sp->rs_dbe);
3989 4005                          for (lsp = list_head(&sp->rs_lostatelist);
3990 4006                              lsp != NULL;
3991 4007                              lsp = list_next(&sp->rs_lostatelist, lsp)) {
3992 4008                                  if (lsp->rls_locker == lo) {
3993 4009                                          rfs4_dbe_lock(lsp->rls_dbe);
3994 4010                                          rfs4_dbe_invalidate(lsp->rls_dbe);
3995 4011                                          rfs4_dbe_unlock(lsp->rls_dbe);
3996 4012                                  }
3997 4013                          }
3998 4014                          rfs4_dbe_unlock(sp->rs_dbe);
3999 4015                  }
4000 4016                  rfs4_dbe_unlock(oo->ro_dbe);
4001 4017          }
4002 4018          rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4003 4019  
4004 4020          rfs4_lockowner_rele(lo);
4005 4021  
4006 4022          *cs->statusp = resp->status = NFS4_OK;
4007 4023  
4008 4024  out:
4009 4025          DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4010 4026              cs, RELEASE_LOCKOWNER4res *, resp);
4011 4027  }
4012 4028  
4013 4029  /*
4014 4030   * short utility function to lookup a file and recall the delegation
4015 4031   */
4016 4032  static rfs4_file_t *
4017 4033  rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4018 4034      int *lkup_error, cred_t *cr)
4019 4035  {
4020 4036          vnode_t *vp;
4021 4037          rfs4_file_t *fp = NULL;
4022 4038          bool_t fcreate = FALSE;
4023 4039          int error;
4024 4040  
4025 4041          if (vpp)
4026 4042                  *vpp = NULL;
4027 4043  
4028 4044          if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4029 4045              NULL)) == 0) {
4030 4046                  if (vp->v_type == VREG)
4031 4047                          fp = rfs4_findfile(vp, NULL, &fcreate);
4032 4048                  if (vpp)
4033 4049                          *vpp = vp;
4034 4050                  else
4035 4051                          VN_RELE(vp);
4036 4052          }
4037 4053  
4038 4054          if (lkup_error)
4039 4055                  *lkup_error = error;
4040 4056  
4041 4057          return (fp);
4042 4058  }
4043 4059  
4044 4060  /*
4045 4061   * remove: args: CURRENT_FH: directory; name.
4046 4062   *      res: status. If success - CURRENT_FH unchanged, return change_info
4047 4063   *              for directory.
4048 4064   */
4049 4065  /* ARGSUSED */
4050 4066  static void
4051 4067  rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4052 4068      struct compound_state *cs)
4053 4069  {
4054 4070          REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4055 4071          REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4056 4072          int error;
4057 4073          vnode_t *dvp, *vp;
4058 4074          struct vattr bdva, idva, adva;
4059 4075          char *nm;
4060 4076          uint_t len;
4061 4077          rfs4_file_t *fp;
4062 4078          int in_crit = 0;
4063 4079          bslabel_t *clabel;
4064 4080          struct sockaddr *ca;
4065 4081          char *name = NULL;
4066 4082          nfsstat4 status;
4067 4083  
4068 4084          DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4069 4085              REMOVE4args *, args);
4070 4086  
4071 4087          /* CURRENT_FH: directory */
4072 4088          dvp = cs->vp;
4073 4089          if (dvp == NULL) {
4074 4090                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4075 4091                  goto out;
4076 4092          }
4077 4093  
4078 4094          if (cs->access == CS_ACCESS_DENIED) {
4079 4095                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4080 4096                  goto out;
4081 4097          }
4082 4098  
4083 4099          /*
4084 4100           * If there is an unshared filesystem mounted on this vnode,
4085 4101           * Do not allow to remove anything in this directory.
4086 4102           */
4087 4103          if (vn_ismntpt(dvp)) {
4088 4104                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4089 4105                  goto out;
4090 4106          }
4091 4107  
4092 4108          if (dvp->v_type != VDIR) {
4093 4109                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4094 4110                  goto out;
4095 4111          }
4096 4112  
4097 4113          status = utf8_dir_verify(&args->target);
4098 4114          if (status != NFS4_OK) {
4099 4115                  *cs->statusp = resp->status = status;
4100 4116                  goto out;
4101 4117          }
4102 4118  
4103 4119          /*
4104 4120           * Lookup the file so that we can check if it's a directory
4105 4121           */
4106 4122          nm = utf8_to_fn(&args->target, &len, NULL);
4107 4123          if (nm == NULL) {
4108 4124                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4109 4125                  goto out;
4110 4126          }
4111 4127  
4112 4128          if (len > MAXNAMELEN) {
4113 4129                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4114 4130                  kmem_free(nm, len);
4115 4131                  goto out;
4116 4132          }
4117 4133  
4118 4134          if (rdonly4(req, cs)) {
4119 4135                  *cs->statusp = resp->status = NFS4ERR_ROFS;
4120 4136                  kmem_free(nm, len);
4121 4137                  goto out;
4122 4138          }
4123 4139  
4124 4140          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4125 4141          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4126 4142              MAXPATHLEN  + 1);
4127 4143  
4128 4144          if (name == NULL) {
4129 4145                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4130 4146                  kmem_free(nm, len);
4131 4147                  goto out;
4132 4148          }
4133 4149  
4134 4150          /*
4135 4151           * Lookup the file to determine type and while we are see if
4136 4152           * there is a file struct around and check for delegation.
4137 4153           * We don't need to acquire va_seq before this lookup, if
4138 4154           * it causes an update, cinfo.before will not match, which will
4139 4155           * trigger a cache flush even if atomic is TRUE.
4140 4156           */
4141 4157          if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4142 4158                  if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4143 4159                      NULL)) {
4144 4160                          VN_RELE(vp);
4145 4161                          rfs4_file_rele(fp);
4146 4162                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4147 4163                          if (nm != name)
4148 4164                                  kmem_free(name, MAXPATHLEN + 1);
4149 4165                          kmem_free(nm, len);
4150 4166                          goto out;
4151 4167                  }
4152 4168          }
4153 4169  
4154 4170          /* Didn't find anything to remove */
4155 4171          if (vp == NULL) {
4156 4172                  *cs->statusp = resp->status = error;
4157 4173                  if (nm != name)
4158 4174                          kmem_free(name, MAXPATHLEN + 1);
4159 4175                  kmem_free(nm, len);
4160 4176                  goto out;
4161 4177          }
4162 4178  
4163 4179          if (nbl_need_check(vp)) {
4164 4180                  nbl_start_crit(vp, RW_READER);
4165 4181                  in_crit = 1;
4166 4182                  if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4167 4183                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4168 4184                          if (nm != name)
4169 4185                                  kmem_free(name, MAXPATHLEN + 1);
4170 4186                          kmem_free(nm, len);
4171 4187                          nbl_end_crit(vp);
4172 4188                          VN_RELE(vp);
4173 4189                          if (fp) {
4174 4190                                  rfs4_clear_dont_grant(fp);
4175 4191                                  rfs4_file_rele(fp);
4176 4192                          }
4177 4193                          goto out;
4178 4194                  }
4179 4195          }
4180 4196  
4181 4197          /* check label before allowing removal */
4182 4198          if (is_system_labeled()) {
4183 4199                  ASSERT(req->rq_label != NULL);
4184 4200                  clabel = req->rq_label;
4185 4201                  DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4186 4202                      "got client label from request(1)",
4187 4203                      struct svc_req *, req);
4188 4204                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
4189 4205                          if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4190 4206                              cs->exi)) {
4191 4207                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4192 4208                                  if (name != nm)
4193 4209                                          kmem_free(name, MAXPATHLEN + 1);
4194 4210                                  kmem_free(nm, len);
4195 4211                                  if (in_crit)
4196 4212                                          nbl_end_crit(vp);
4197 4213                                  VN_RELE(vp);
4198 4214                                  if (fp) {
4199 4215                                          rfs4_clear_dont_grant(fp);
4200 4216                                          rfs4_file_rele(fp);
4201 4217                                  }
4202 4218                                  goto out;
4203 4219                          }
4204 4220                  }
4205 4221          }
4206 4222  
4207 4223          /* Get dir "before" change value */
4208 4224          bdva.va_mask = AT_CTIME|AT_SEQ;
4209 4225          error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4210 4226          if (error) {
4211 4227                  *cs->statusp = resp->status = puterrno4(error);
4212 4228                  if (nm != name)
4213 4229                          kmem_free(name, MAXPATHLEN + 1);
4214 4230                  kmem_free(nm, len);
4215 4231                  if (in_crit)
4216 4232                          nbl_end_crit(vp);
4217 4233                  VN_RELE(vp);
4218 4234                  if (fp) {
4219 4235                          rfs4_clear_dont_grant(fp);
4220 4236                          rfs4_file_rele(fp);
4221 4237                  }
4222 4238                  goto out;
4223 4239          }
4224 4240          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4225 4241  
4226 4242          /* Actually do the REMOVE operation */
4227 4243          if (vp->v_type == VDIR) {
4228 4244                  /*
4229 4245                   * Can't remove a directory that has a mounted-on filesystem.
4230 4246                   */
4231 4247                  if (vn_ismntpt(vp)) {
4232 4248                          error = EACCES;
4233 4249                  } else {
4234 4250                          /*
4235 4251                           * System V defines rmdir to return EEXIST,
4236 4252                           * not ENOTEMPTY, if the directory is not
4237 4253                           * empty.  A System V NFS server needs to map
4238 4254                           * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4239 4255                           * transmit over the wire.
4240 4256                           */
4241 4257                          if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4242 4258                              NULL, 0)) == EEXIST)
4243 4259                                  error = ENOTEMPTY;
4244 4260                  }
4245 4261          } else {
4246 4262                  if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4247 4263                      fp != NULL) {
4248 4264                          struct vattr va;
4249 4265                          vnode_t *tvp;
4250 4266  
4251 4267                          rfs4_dbe_lock(fp->rf_dbe);
4252 4268                          tvp = fp->rf_vp;
4253 4269                          if (tvp)
4254 4270                                  VN_HOLD(tvp);
4255 4271                          rfs4_dbe_unlock(fp->rf_dbe);
4256 4272  
4257 4273                          if (tvp) {
4258 4274                                  /*
4259 4275                                   * This is va_seq safe because we are not
4260 4276                                   * manipulating dvp.
4261 4277                                   */
4262 4278                                  va.va_mask = AT_NLINK;
4263 4279                                  if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4264 4280                                      va.va_nlink == 0) {
4265 4281                                          /* Remove state on file remove */
4266 4282                                          if (in_crit) {
4267 4283                                                  nbl_end_crit(vp);
4268 4284                                                  in_crit = 0;
4269 4285                                          }
4270 4286                                          rfs4_close_all_state(fp);
4271 4287                                  }
4272 4288                                  VN_RELE(tvp);
4273 4289                          }
4274 4290                  }
4275 4291          }
4276 4292  
4277 4293          if (in_crit)
4278 4294                  nbl_end_crit(vp);
4279 4295          VN_RELE(vp);
4280 4296  
4281 4297          if (fp) {
4282 4298                  rfs4_clear_dont_grant(fp);
4283 4299                  rfs4_file_rele(fp);
4284 4300          }
4285 4301          if (nm != name)
4286 4302                  kmem_free(name, MAXPATHLEN + 1);
4287 4303          kmem_free(nm, len);
4288 4304  
4289 4305          if (error) {
4290 4306                  *cs->statusp = resp->status = puterrno4(error);
4291 4307                  goto out;
4292 4308          }
4293 4309  
4294 4310          /*
4295 4311           * Get the initial "after" sequence number, if it fails, set to zero
4296 4312           */
4297 4313          idva.va_mask = AT_SEQ;
4298 4314          if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4299 4315                  idva.va_seq = 0;
4300 4316  
4301 4317          /*
4302 4318           * Force modified data and metadata out to stable storage.
4303 4319           */
4304 4320          (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4305 4321  
4306 4322          /*
4307 4323           * Get "after" change value, if it fails, simply return the
4308 4324           * before value.
4309 4325           */
4310 4326          adva.va_mask = AT_CTIME|AT_SEQ;
4311 4327          if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4312 4328                  adva.va_ctime = bdva.va_ctime;
4313 4329                  adva.va_seq = 0;
4314 4330          }
4315 4331  
4316 4332          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4317 4333  
4318 4334          /*
4319 4335           * The cinfo.atomic = TRUE only if we have
4320 4336           * non-zero va_seq's, and it has incremented by exactly one
4321 4337           * during the VOP_REMOVE/RMDIR and it didn't change during
4322 4338           * the VOP_FSYNC.
4323 4339           */
4324 4340          if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4325 4341              idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4326 4342                  resp->cinfo.atomic = TRUE;
4327 4343          else
4328 4344                  resp->cinfo.atomic = FALSE;
4329 4345  
4330 4346          *cs->statusp = resp->status = NFS4_OK;
4331 4347  
4332 4348  out:
4333 4349          DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4334 4350              REMOVE4res *, resp);
4335 4351  }
4336 4352  
4337 4353  /*
4338 4354   * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4339 4355   *              oldname and newname.
4340 4356   *      res: status. If success - CURRENT_FH unchanged, return change_info
4341 4357   *              for both from and target directories.
4342 4358   */
4343 4359  /* ARGSUSED */
4344 4360  static void
4345 4361  rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4346 4362      struct compound_state *cs)
4347 4363  {
4348 4364          RENAME4args *args = &argop->nfs_argop4_u.oprename;
4349 4365          RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4350 4366          int error;
4351 4367          vnode_t *odvp;
4352 4368          vnode_t *ndvp;
4353 4369          vnode_t *srcvp, *targvp, *tvp;
4354 4370          struct vattr obdva, oidva, oadva;
4355 4371          struct vattr nbdva, nidva, nadva;
4356 4372          char *onm, *nnm;
4357 4373          uint_t olen, nlen;
4358 4374          rfs4_file_t *fp, *sfp;
4359 4375          int in_crit_src, in_crit_targ;
4360 4376          int fp_rele_grant_hold, sfp_rele_grant_hold;
4361 4377          int unlinked;
4362 4378          bslabel_t *clabel;
4363 4379          struct sockaddr *ca;
4364 4380          char *converted_onm = NULL;
4365 4381          char *converted_nnm = NULL;
4366 4382          nfsstat4 status;
4367 4383  
4368 4384          DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4369 4385              RENAME4args *, args);
4370 4386  
4371 4387          fp = sfp = NULL;
4372 4388          srcvp = targvp = tvp = NULL;
4373 4389          in_crit_src = in_crit_targ = 0;
4374 4390          fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4375 4391          unlinked = 0;
4376 4392  
4377 4393          /* CURRENT_FH: target directory */
4378 4394          ndvp = cs->vp;
4379 4395          if (ndvp == NULL) {
4380 4396                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4381 4397                  goto out;
4382 4398          }
4383 4399  
4384 4400          /* SAVED_FH: from directory */
4385 4401          odvp = cs->saved_vp;
4386 4402          if (odvp == NULL) {
4387 4403                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4388 4404                  goto out;
4389 4405          }
4390 4406  
4391 4407          if (cs->access == CS_ACCESS_DENIED) {
4392 4408                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4393 4409                  goto out;
4394 4410          }
4395 4411  
4396 4412          /*
4397 4413           * If there is an unshared filesystem mounted on this vnode,
4398 4414           * do not allow to rename objects in this directory.
4399 4415           */
4400 4416          if (vn_ismntpt(odvp)) {
4401 4417                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4402 4418                  goto out;
4403 4419          }
4404 4420  
4405 4421          /*
4406 4422           * If there is an unshared filesystem mounted on this vnode,
4407 4423           * do not allow to rename to this directory.
4408 4424           */
4409 4425          if (vn_ismntpt(ndvp)) {
4410 4426                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4411 4427                  goto out;
4412 4428          }
4413 4429  
4414 4430          if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4415 4431                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4416 4432                  goto out;
4417 4433          }
4418 4434  
4419 4435          if (cs->saved_exi != cs->exi) {
4420 4436                  *cs->statusp = resp->status = NFS4ERR_XDEV;
4421 4437                  goto out;
4422 4438          }
4423 4439  
4424 4440          status = utf8_dir_verify(&args->oldname);
4425 4441          if (status != NFS4_OK) {
4426 4442                  *cs->statusp = resp->status = status;
4427 4443                  goto out;
4428 4444          }
4429 4445  
4430 4446          status = utf8_dir_verify(&args->newname);
4431 4447          if (status != NFS4_OK) {
4432 4448                  *cs->statusp = resp->status = status;
4433 4449                  goto out;
4434 4450          }
4435 4451  
4436 4452          onm = utf8_to_fn(&args->oldname, &olen, NULL);
4437 4453          if (onm == NULL) {
4438 4454                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4439 4455                  goto out;
4440 4456          }
4441 4457          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4442 4458          nlen = MAXPATHLEN + 1;
4443 4459          converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4444 4460              nlen);
4445 4461  
4446 4462          if (converted_onm == NULL) {
4447 4463                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4448 4464                  kmem_free(onm, olen);
4449 4465                  goto out;
4450 4466          }
4451 4467  
4452 4468          nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4453 4469          if (nnm == NULL) {
4454 4470                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4455 4471                  if (onm != converted_onm)
4456 4472                          kmem_free(converted_onm, MAXPATHLEN + 1);
4457 4473                  kmem_free(onm, olen);
4458 4474                  goto out;
4459 4475          }
4460 4476          converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4461 4477              MAXPATHLEN  + 1);
4462 4478  
4463 4479          if (converted_nnm == NULL) {
4464 4480                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4465 4481                  kmem_free(nnm, nlen);
4466 4482                  nnm = NULL;
4467 4483                  if (onm != converted_onm)
4468 4484                          kmem_free(converted_onm, MAXPATHLEN + 1);
4469 4485                  kmem_free(onm, olen);
4470 4486                  goto out;
4471 4487          }
4472 4488  
4473 4489  
4474 4490          if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4475 4491                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4476 4492                  kmem_free(onm, olen);
4477 4493                  kmem_free(nnm, nlen);
4478 4494                  goto out;
4479 4495          }
4480 4496  
4481 4497  
4482 4498          if (rdonly4(req, cs)) {
4483 4499                  *cs->statusp = resp->status = NFS4ERR_ROFS;
4484 4500                  if (onm != converted_onm)
4485 4501                          kmem_free(converted_onm, MAXPATHLEN + 1);
4486 4502                  kmem_free(onm, olen);
4487 4503                  if (nnm != converted_nnm)
4488 4504                          kmem_free(converted_nnm, MAXPATHLEN + 1);
4489 4505                  kmem_free(nnm, nlen);
4490 4506                  goto out;
4491 4507          }
4492 4508  
4493 4509          /* check label of the target dir */
4494 4510          if (is_system_labeled()) {
4495 4511                  ASSERT(req->rq_label != NULL);
4496 4512                  clabel = req->rq_label;
4497 4513                  DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4498 4514                      "got client label from request(1)",
4499 4515                      struct svc_req *, req);
4500 4516                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
4501 4517                          if (!do_rfs_label_check(clabel, ndvp,
4502 4518                              EQUALITY_CHECK, cs->exi)) {
4503 4519                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4504 4520                                  goto err_out;
4505 4521                          }
4506 4522                  }
4507 4523          }
4508 4524  
4509 4525          /*
4510 4526           * Is the source a file and have a delegation?
4511 4527           * We don't need to acquire va_seq before these lookups, if
4512 4528           * it causes an update, cinfo.before will not match, which will
4513 4529           * trigger a cache flush even if atomic is TRUE.
4514 4530           */
4515 4531          if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4516 4532              &error, cs->cr)) {
4517 4533                  if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4518 4534                      NULL)) {
4519 4535                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4520 4536                          goto err_out;
4521 4537                  }
4522 4538          }
4523 4539  
4524 4540          if (srcvp == NULL) {
4525 4541                  *cs->statusp = resp->status = puterrno4(error);
4526 4542                  if (onm != converted_onm)
4527 4543                          kmem_free(converted_onm, MAXPATHLEN + 1);
4528 4544                  kmem_free(onm, olen);
4529 4545                  if (nnm != converted_nnm)
4530 4546                          kmem_free(converted_nnm, MAXPATHLEN + 1);
4531 4547                  kmem_free(nnm, nlen);
4532 4548                  goto out;
4533 4549          }
4534 4550  
4535 4551          sfp_rele_grant_hold = 1;
4536 4552  
4537 4553          /* Does the destination exist and a file and have a delegation? */
4538 4554          if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4539 4555              NULL, cs->cr)) {
4540 4556                  if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4541 4557                      NULL)) {
4542 4558                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4543 4559                          goto err_out;
4544 4560                  }
4545 4561          }
4546 4562          fp_rele_grant_hold = 1;
4547 4563  
4548 4564          /* Check for NBMAND lock on both source and target */
4549 4565          if (nbl_need_check(srcvp)) {
4550 4566                  nbl_start_crit(srcvp, RW_READER);
4551 4567                  in_crit_src = 1;
4552 4568                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4553 4569                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4554 4570                          goto err_out;
4555 4571                  }
4556 4572          }
4557 4573  
4558 4574          if (targvp && nbl_need_check(targvp)) {
4559 4575                  nbl_start_crit(targvp, RW_READER);
4560 4576                  in_crit_targ = 1;
4561 4577                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4562 4578                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4563 4579                          goto err_out;
4564 4580                  }
4565 4581          }
4566 4582  
4567 4583          /* Get source "before" change value */
4568 4584          obdva.va_mask = AT_CTIME|AT_SEQ;
4569 4585          error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4570 4586          if (!error) {
4571 4587                  nbdva.va_mask = AT_CTIME|AT_SEQ;
4572 4588                  error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4573 4589          }
4574 4590          if (error) {
4575 4591                  *cs->statusp = resp->status = puterrno4(error);
4576 4592                  goto err_out;
4577 4593          }
4578 4594  
4579 4595          NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4580 4596          NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4581 4597  
4582 4598          error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4583 4599              NULL, 0);
4584 4600  
4585 4601          /*
4586 4602           * If target existed and was unlinked by VOP_RENAME, state will need
4587 4603           * closed. To avoid deadlock, rfs4_close_all_state will be done after
4588 4604           * any necessary nbl_end_crit on srcvp and tgtvp.
4589 4605           */
4590 4606          if (error == 0 && fp != NULL) {
4591 4607                  rfs4_dbe_lock(fp->rf_dbe);
4592 4608                  tvp = fp->rf_vp;
4593 4609                  if (tvp)
4594 4610                          VN_HOLD(tvp);
4595 4611                  rfs4_dbe_unlock(fp->rf_dbe);
4596 4612  
4597 4613                  if (tvp) {
4598 4614                          struct vattr va;
4599 4615                          va.va_mask = AT_NLINK;
4600 4616  
4601 4617                          if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4602 4618                              va.va_nlink == 0) {
4603 4619                                  unlinked = 1;
4604 4620  
4605 4621                                  /* DEBUG data */
4606 4622                                  if ((srcvp == targvp) || (tvp != targvp)) {
4607 4623                                          cmn_err(CE_WARN, "rfs4_op_rename: "
4608 4624                                              "srcvp %p, targvp: %p, tvp: %p",
4609 4625                                              (void *)srcvp, (void *)targvp,
4610 4626                                              (void *)tvp);
4611 4627                                  }
4612 4628                          } else {
4613 4629                                  VN_RELE(tvp);
4614 4630                          }
4615 4631                  }
4616 4632          }
4617 4633          if (error == 0)
4618 4634                  vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4619 4635  
4620 4636          if (in_crit_src)
4621 4637                  nbl_end_crit(srcvp);
4622 4638          if (srcvp)
4623 4639                  VN_RELE(srcvp);
4624 4640          if (in_crit_targ)
4625 4641                  nbl_end_crit(targvp);
4626 4642          if (targvp)
4627 4643                  VN_RELE(targvp);
4628 4644  
4629 4645          if (unlinked) {
4630 4646                  ASSERT(fp != NULL);
4631 4647                  ASSERT(tvp != NULL);
4632 4648  
4633 4649                  /* DEBUG data */
4634 4650                  if (RW_READ_HELD(&tvp->v_nbllock)) {
4635 4651                          cmn_err(CE_WARN, "rfs4_op_rename: "
4636 4652                              "RW_READ_HELD(%p)", (void *)tvp);
4637 4653                  }
4638 4654  
4639 4655                  /* The file is gone and so should the state */
4640 4656                  rfs4_close_all_state(fp);
4641 4657                  VN_RELE(tvp);
4642 4658          }
4643 4659  
4644 4660          if (sfp) {
4645 4661                  rfs4_clear_dont_grant(sfp);
4646 4662                  rfs4_file_rele(sfp);
4647 4663          }
4648 4664          if (fp) {
4649 4665                  rfs4_clear_dont_grant(fp);
4650 4666                  rfs4_file_rele(fp);
4651 4667          }
4652 4668  
4653 4669          if (converted_onm != onm)
4654 4670                  kmem_free(converted_onm, MAXPATHLEN + 1);
4655 4671          kmem_free(onm, olen);
4656 4672          if (converted_nnm != nnm)
4657 4673                  kmem_free(converted_nnm, MAXPATHLEN + 1);
4658 4674          kmem_free(nnm, nlen);
4659 4675  
4660 4676          /*
4661 4677           * Get the initial "after" sequence number, if it fails, set to zero
4662 4678           */
4663 4679          oidva.va_mask = AT_SEQ;
4664 4680          if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4665 4681                  oidva.va_seq = 0;
4666 4682  
4667 4683          nidva.va_mask = AT_SEQ;
4668 4684          if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4669 4685                  nidva.va_seq = 0;
4670 4686  
4671 4687          /*
4672 4688           * Force modified data and metadata out to stable storage.
4673 4689           */
4674 4690          (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4675 4691          (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4676 4692  
4677 4693          if (error) {
4678 4694                  *cs->statusp = resp->status = puterrno4(error);
4679 4695                  goto out;
4680 4696          }
4681 4697  
4682 4698          /*
4683 4699           * Get "after" change values, if it fails, simply return the
4684 4700           * before value.
4685 4701           */
4686 4702          oadva.va_mask = AT_CTIME|AT_SEQ;
4687 4703          if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4688 4704                  oadva.va_ctime = obdva.va_ctime;
4689 4705                  oadva.va_seq = 0;
4690 4706          }
4691 4707  
4692 4708          nadva.va_mask = AT_CTIME|AT_SEQ;
4693 4709          if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4694 4710                  nadva.va_ctime = nbdva.va_ctime;
4695 4711                  nadva.va_seq = 0;
4696 4712          }
4697 4713  
4698 4714          NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4699 4715          NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4700 4716  
4701 4717          /*
4702 4718           * The cinfo.atomic = TRUE only if we have
4703 4719           * non-zero va_seq's, and it has incremented by exactly one
4704 4720           * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4705 4721           */
4706 4722          if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4707 4723              oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4708 4724                  resp->source_cinfo.atomic = TRUE;
4709 4725          else
4710 4726                  resp->source_cinfo.atomic = FALSE;
4711 4727  
4712 4728          if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4713 4729              nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4714 4730                  resp->target_cinfo.atomic = TRUE;
4715 4731          else
4716 4732                  resp->target_cinfo.atomic = FALSE;
4717 4733  
4718 4734  #ifdef  VOLATILE_FH_TEST
4719 4735          {
4720 4736          extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4721 4737  
4722 4738          /*
4723 4739           * Add the renamed file handle to the volatile rename list
4724 4740           */
4725 4741          if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4726 4742                  /* file handles may expire on rename */
4727 4743                  vnode_t *vp;
4728 4744  
4729 4745                  nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4730 4746                  /*
4731 4747                   * Already know that nnm will be a valid string
4732 4748                   */
4733 4749                  error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4734 4750                      NULL, NULL, NULL);
4735 4751                  kmem_free(nnm, nlen);
4736 4752                  if (!error) {
4737 4753                          add_volrnm_fh(cs->exi, vp);
4738 4754                          VN_RELE(vp);
4739 4755                  }
4740 4756          }
4741 4757          }
4742 4758  #endif  /* VOLATILE_FH_TEST */
4743 4759  
4744 4760          *cs->statusp = resp->status = NFS4_OK;
4745 4761  out:
4746 4762          DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4747 4763              RENAME4res *, resp);
4748 4764          return;
4749 4765  
4750 4766  err_out:
4751 4767          if (onm != converted_onm)
4752 4768                  kmem_free(converted_onm, MAXPATHLEN + 1);
4753 4769          if (onm != NULL)
4754 4770                  kmem_free(onm, olen);
4755 4771          if (nnm != converted_nnm)
4756 4772                  kmem_free(converted_nnm, MAXPATHLEN + 1);
4757 4773          if (nnm != NULL)
4758 4774                  kmem_free(nnm, nlen);
4759 4775  
4760 4776          if (in_crit_src) nbl_end_crit(srcvp);
4761 4777          if (in_crit_targ) nbl_end_crit(targvp);
4762 4778          if (targvp) VN_RELE(targvp);
4763 4779          if (srcvp) VN_RELE(srcvp);
4764 4780          if (sfp) {
4765 4781                  if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4766 4782                  rfs4_file_rele(sfp);
4767 4783          }
4768 4784          if (fp) {
4769 4785                  if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4770 4786                  rfs4_file_rele(fp);
4771 4787          }
4772 4788  
4773 4789          DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4774 4790              RENAME4res *, resp);
4775 4791  }
4776 4792  
4777 4793  /* ARGSUSED */
4778 4794  static void
4779 4795  rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4780 4796      struct compound_state *cs)
4781 4797  {
4782 4798          RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4783 4799          RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4784 4800          rfs4_client_t *cp;
4785 4801  
4786 4802          DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4787 4803              RENEW4args *, args);
4788 4804  
4789 4805          if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4790 4806                  *cs->statusp = resp->status =
4791 4807                      rfs4_check_clientid(&args->clientid, 0);
4792 4808                  goto out;
4793 4809          }
4794 4810  
4795 4811          if (rfs4_lease_expired(cp)) {
4796 4812                  rfs4_client_rele(cp);
4797 4813                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4798 4814                  goto out;
4799 4815          }
4800 4816  
4801 4817          rfs4_update_lease(cp);
4802 4818  
4803 4819          mutex_enter(cp->rc_cbinfo.cb_lock);
4804 4820          if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4805 4821                  cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4806 4822                  *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4807 4823          } else {
4808 4824                  *cs->statusp = resp->status = NFS4_OK;
4809 4825          }
4810 4826          mutex_exit(cp->rc_cbinfo.cb_lock);
4811 4827  
4812 4828          rfs4_client_rele(cp);
4813 4829  
4814 4830  out:
4815 4831          DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4816 4832              RENEW4res *, resp);
4817 4833  }
4818 4834  
4819 4835  /* ARGSUSED */
4820 4836  static void
4821 4837  rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4822 4838      struct compound_state *cs)
4823 4839  {
4824 4840          RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4825 4841  
4826 4842          DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4827 4843  
4828 4844          /* No need to check cs->access - we are not accessing any object */
4829 4845          if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4830 4846                  *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4831 4847                  goto out;
4832 4848          }
4833 4849          if (cs->vp != NULL) {
4834 4850                  VN_RELE(cs->vp);
4835 4851          }
4836 4852          cs->vp = cs->saved_vp;
4837 4853          cs->saved_vp = NULL;
4838 4854          cs->exi = cs->saved_exi;
4839 4855          nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4840 4856          *cs->statusp = resp->status = NFS4_OK;
4841 4857          cs->deleg = FALSE;
4842 4858  
4843 4859  out:
4844 4860          DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4845 4861              RESTOREFH4res *, resp);
4846 4862  }
4847 4863  
4848 4864  /* ARGSUSED */
4849 4865  static void
4850 4866  rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4851 4867      struct compound_state *cs)
4852 4868  {
4853 4869          SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4854 4870  
4855 4871          DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4856 4872  
4857 4873          /* No need to check cs->access - we are not accessing any object */
4858 4874          if (cs->vp == NULL) {
4859 4875                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4860 4876                  goto out;
4861 4877          }
4862 4878          if (cs->saved_vp != NULL) {
4863 4879                  VN_RELE(cs->saved_vp);
4864 4880          }
4865 4881          cs->saved_vp = cs->vp;
4866 4882          VN_HOLD(cs->saved_vp);
4867 4883          cs->saved_exi = cs->exi;
4868 4884          /*
4869 4885           * since SAVEFH is fairly rare, don't alloc space for its fh
4870 4886           * unless necessary.
4871 4887           */
4872 4888          if (cs->saved_fh.nfs_fh4_val == NULL) {
4873 4889                  cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4874 4890          }
4875 4891          nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4876 4892          *cs->statusp = resp->status = NFS4_OK;
4877 4893  
4878 4894  out:
4879 4895          DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4880 4896              SAVEFH4res *, resp);
4881 4897  }
4882 4898  
4883 4899  /*
4884 4900   * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4885 4901   * return the bitmap of attrs that were set successfully. It is also
4886 4902   * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4887 4903   * always be called only after rfs4_do_set_attrs().
4888 4904   *
4889 4905   * Verify that the attributes are same as the expected ones. sargp->vap
4890 4906   * and sargp->sbp contain the input attributes as translated from fattr4.
4891 4907   *
4892 4908   * This function verifies only the attrs that correspond to a vattr or
4893 4909   * vfsstat struct. That is because of the extra step needed to get the
4894 4910   * corresponding system structs. Other attributes have already been set or
4895 4911   * verified by do_rfs4_set_attrs.
4896 4912   *
4897 4913   * Return 0 if all attrs match, -1 if some don't, error if error processing.
4898 4914   */
4899 4915  static int
4900 4916  rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4901 4917      bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4902 4918  {
4903 4919          int error, ret_error = 0;
4904 4920          int i, k;
4905 4921          uint_t sva_mask = sargp->vap->va_mask;
4906 4922          uint_t vbit;
4907 4923          union nfs4_attr_u *na;
4908 4924          uint8_t *amap;
4909 4925          bool_t getsb = ntovp->vfsstat;
4910 4926  
4911 4927          if (sva_mask != 0) {
4912 4928                  /*
4913 4929                   * Okay to overwrite sargp->vap because we verify based
4914 4930                   * on the incoming values.
4915 4931                   */
4916 4932                  ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4917 4933                      sargp->cs->cr, NULL);
4918 4934                  if (ret_error) {
4919 4935                          if (resp == NULL)
4920 4936                                  return (ret_error);
4921 4937                          /*
4922 4938                           * Must return bitmap of successful attrs
4923 4939                           */
4924 4940                          sva_mask = 0;   /* to prevent checking vap later */
4925 4941                  } else {
4926 4942                          /*
4927 4943                           * Some file systems clobber va_mask. it is probably
4928 4944                           * wrong of them to do so, nonethless we practice
4929 4945                           * defensive coding.
4930 4946                           * See bug id 4276830.
4931 4947                           */
4932 4948                          sargp->vap->va_mask = sva_mask;
4933 4949                  }
4934 4950          }
4935 4951  
4936 4952          if (getsb) {
4937 4953                  /*
4938 4954                   * Now get the superblock and loop on the bitmap, as there is
4939 4955                   * no simple way of translating from superblock to bitmap4.
4940 4956                   */
4941 4957                  ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4942 4958                  if (ret_error) {
4943 4959                          if (resp == NULL)
4944 4960                                  goto errout;
4945 4961                          getsb = FALSE;
4946 4962                  }
4947 4963          }
4948 4964  
4949 4965          /*
4950 4966           * Now loop and verify each attribute which getattr returned
4951 4967           * whether it's the same as the input.
4952 4968           */
4953 4969          if (resp == NULL && !getsb && (sva_mask == 0))
4954 4970                  goto errout;
4955 4971  
4956 4972          na = ntovp->na;
4957 4973          amap = ntovp->amap;
4958 4974          k = 0;
4959 4975          for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4960 4976                  k = *amap;
4961 4977                  ASSERT(nfs4_ntov_map[k].nval == k);
4962 4978                  vbit = nfs4_ntov_map[k].vbit;
4963 4979  
4964 4980                  /*
4965 4981                   * If vattr attribute but VOP_GETATTR failed, or it's
4966 4982                   * superblock attribute but VFS_STATVFS failed, skip
4967 4983                   */
4968 4984                  if (vbit) {
4969 4985                          if ((vbit & sva_mask) == 0)
4970 4986                                  continue;
4971 4987                  } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4972 4988                          continue;
4973 4989                  }
4974 4990                  error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4975 4991                  if (resp != NULL) {
4976 4992                          if (error)
4977 4993                                  ret_error = -1; /* not all match */
4978 4994                          else    /* update response bitmap */
4979 4995                                  *resp |= nfs4_ntov_map[k].fbit;
4980 4996                          continue;
4981 4997                  }
4982 4998                  if (error) {
4983 4999                          ret_error = -1; /* not all match */
4984 5000                          break;
4985 5001                  }
4986 5002          }
4987 5003  errout:
4988 5004          return (ret_error);
4989 5005  }
4990 5006  
4991 5007  /*
4992 5008   * Decode the attribute to be set/verified. If the attr requires a sys op
4993 5009   * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4994 5010   * call the sv_getit function for it, because the sys op hasn't yet been done.
4995 5011   * Return 0 for success, error code if failed.
4996 5012   *
4997 5013   * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4998 5014   */
4999 5015  static int
5000 5016  decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5001 5017      int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5002 5018  {
5003 5019          int error = 0;
5004 5020          bool_t set_later;
5005 5021  
5006 5022          sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5007 5023  
5008 5024          if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5009 5025                  set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5010 5026                  /*
5011 5027                   * don't verify yet if a vattr or sb dependent attr,
5012 5028                   * because we don't have their sys values yet.
5013 5029                   * Will be done later.
5014 5030                   */
5015 5031                  if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5016 5032                          /*
5017 5033                           * ACLs are a special case, since setting the MODE
5018 5034                           * conflicts with setting the ACL.  We delay setting
5019 5035                           * the ACL until all other attributes have been set.
5020 5036                           * The ACL gets set in do_rfs4_op_setattr().
5021 5037                           */
5022 5038                          if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5023 5039                                  error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5024 5040                                      sargp, nap);
5025 5041                                  if (error) {
5026 5042                                          xdr_free(nfs4_ntov_map[k].xfunc,
5027 5043                                              (caddr_t)nap);
5028 5044                                  }
5029 5045                          }
5030 5046                  }
5031 5047          } else {
5032 5048  #ifdef  DEBUG
5033 5049                  cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5034 5050                      "decoding attribute %d\n", k);
5035 5051  #endif
5036 5052                  error = EINVAL;
5037 5053          }
5038 5054          if (!error && resp_bval && !set_later) {
5039 5055                  *resp_bval |= nfs4_ntov_map[k].fbit;
5040 5056          }
5041 5057  
5042 5058          return (error);
5043 5059  }
5044 5060  
5045 5061  /*
5046 5062   * Set vattr based on incoming fattr4 attrs - used by setattr.
5047 5063   * Set response mask. Ignore any values that are not writable vattr attrs.
5048 5064   */
5049 5065  static nfsstat4
5050 5066  do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5051 5067      struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5052 5068      nfs4_attr_cmd_t cmd)
5053 5069  {
5054 5070          int error = 0;
5055 5071          int i;
5056 5072          char *attrs = fattrp->attrlist4;
5057 5073          uint32_t attrslen = fattrp->attrlist4_len;
5058 5074          XDR xdr;
5059 5075          nfsstat4 status = NFS4_OK;
5060 5076          vnode_t *vp = cs->vp;
5061 5077          union nfs4_attr_u *na;
5062 5078          uint8_t *amap;
5063 5079  
5064 5080  #ifndef lint
5065 5081          /*
5066 5082           * Make sure that maximum attribute number can be expressed as an
5067 5083           * 8 bit quantity.
5068 5084           */
5069 5085          ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5070 5086  #endif
5071 5087  
5072 5088          if (vp == NULL) {
5073 5089                  if (resp)
5074 5090                          *resp = 0;
5075 5091                  return (NFS4ERR_NOFILEHANDLE);
5076 5092          }
5077 5093          if (cs->access == CS_ACCESS_DENIED) {
5078 5094                  if (resp)
5079 5095                          *resp = 0;
5080 5096                  return (NFS4ERR_ACCESS);
5081 5097          }
5082 5098  
5083 5099          sargp->op = cmd;
5084 5100          sargp->cs = cs;
5085 5101          sargp->flag = 0;        /* may be set later */
5086 5102          sargp->vap->va_mask = 0;
5087 5103          sargp->rdattr_error = NFS4_OK;
5088 5104          sargp->rdattr_error_req = FALSE;
5089 5105          /* sargp->sbp is set by the caller */
5090 5106  
5091 5107          xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5092 5108  
5093 5109          na = ntovp->na;
5094 5110          amap = ntovp->amap;
5095 5111  
5096 5112          /*
5097 5113           * The following loop iterates on the nfs4_ntov_map checking
5098 5114           * if the fbit is set in the requested bitmap.
5099 5115           * If set then we process the arguments using the
5100 5116           * rfs4_fattr4 conversion functions to populate the setattr
5101 5117           * vattr and va_mask. Any settable attrs that are not using vattr
5102 5118           * will be set in this loop.
5103 5119           */
5104 5120          for (i = 0; i < nfs4_ntov_map_size; i++) {
5105 5121                  if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5106 5122                          continue;
5107 5123                  }
5108 5124                  /*
5109 5125                   * If setattr, must be a writable attr.
5110 5126                   * If verify/nverify, must be a readable attr.
5111 5127                   */
5112 5128                  if ((error = (*nfs4_ntov_map[i].sv_getit)(
5113 5129                      NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5114 5130                          /*
5115 5131                           * Client tries to set/verify an
5116 5132                           * unsupported attribute, tries to set
5117 5133                           * a read only attr or verify a write
5118 5134                           * only one - error!
5119 5135                           */
5120 5136                          break;
5121 5137                  }
5122 5138                  /*
5123 5139                   * Decode the attribute to set/verify
5124 5140                   */
5125 5141                  error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5126 5142                      &xdr, resp ? resp : NULL, na);
5127 5143                  if (error)
5128 5144                          break;
5129 5145                  *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5130 5146                  na++;
5131 5147                  (ntovp->attrcnt)++;
5132 5148                  if (nfs4_ntov_map[i].vfsstat)
5133 5149                          ntovp->vfsstat = TRUE;
5134 5150          }
5135 5151  
5136 5152          if (error != 0)
5137 5153                  status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5138 5154                      puterrno4(error));
5139 5155          /* xdrmem_destroy(&xdrs); */    /* NO-OP */
5140 5156          return (status);
5141 5157  }
5142 5158  
5143 5159  static nfsstat4
5144 5160  do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5145 5161      stateid4 *stateid)
5146 5162  {
5147 5163          int error = 0;
5148 5164          struct nfs4_svgetit_arg sarg;
5149 5165          bool_t trunc;
5150 5166  
5151 5167          nfsstat4 status = NFS4_OK;
5152 5168          cred_t *cr = cs->cr;
5153 5169          vnode_t *vp = cs->vp;
5154 5170          struct nfs4_ntov_table ntov;
5155 5171          struct statvfs64 sb;
5156 5172          struct vattr bva;
5157 5173          struct flock64 bf;
5158 5174          int in_crit = 0;
5159 5175          uint_t saved_mask = 0;
5160 5176          caller_context_t ct;
5161 5177  
5162 5178          *resp = 0;
5163 5179          sarg.sbp = &sb;
5164 5180          sarg.is_referral = B_FALSE;
5165 5181          nfs4_ntov_table_init(&ntov);
5166 5182          status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5167 5183              NFS4ATTR_SETIT);
5168 5184          if (status != NFS4_OK) {
5169 5185                  /*
5170 5186                   * failed set attrs
5171 5187                   */
5172 5188                  goto done;
5173 5189          }
5174 5190          if ((sarg.vap->va_mask == 0) &&
5175 5191              (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5176 5192                  /*
5177 5193                   * no further work to be done
5178 5194                   */
5179 5195                  goto done;
5180 5196          }
5181 5197  
5182 5198          /*
5183 5199           * If we got a request to set the ACL and the MODE, only
5184 5200           * allow changing VSUID, VSGID, and VSVTX.  Attempting
5185 5201           * to change any other bits, along with setting an ACL,
5186 5202           * gives NFS4ERR_INVAL.
5187 5203           */
5188 5204          if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5189 5205              (fattrp->attrmask & FATTR4_MODE_MASK)) {
5190 5206                  vattr_t va;
5191 5207  
5192 5208                  va.va_mask = AT_MODE;
5193 5209                  error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5194 5210                  if (error) {
5195 5211                          status = puterrno4(error);
5196 5212                          goto done;
5197 5213                  }
5198 5214                  if ((sarg.vap->va_mode ^ va.va_mode) &
5199 5215                      ~(VSUID | VSGID | VSVTX)) {
5200 5216                          status = NFS4ERR_INVAL;
5201 5217                          goto done;
5202 5218                  }
5203 5219          }
5204 5220  
5205 5221          /* Check stateid only if size has been set */
5206 5222          if (sarg.vap->va_mask & AT_SIZE) {
5207 5223                  trunc = (sarg.vap->va_size == 0);
5208 5224                  status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5209 5225                      trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5210 5226                  if (status != NFS4_OK)
5211 5227                          goto done;
5212 5228          } else {
5213 5229                  ct.cc_sysid = 0;
5214 5230                  ct.cc_pid = 0;
5215 5231                  ct.cc_caller_id = nfs4_srv_caller_id;
5216 5232                  ct.cc_flags = CC_DONTBLOCK;
5217 5233          }
5218 5234  
5219 5235          /* XXX start of possible race with delegations */
5220 5236  
5221 5237          /*
5222 5238           * We need to specially handle size changes because it is
5223 5239           * possible for the client to create a file with read-only
5224 5240           * modes, but with the file opened for writing. If the client
5225 5241           * then tries to set the file size, e.g. ftruncate(3C),
5226 5242           * fcntl(F_FREESP), the normal access checking done in
5227 5243           * VOP_SETATTR would prevent the client from doing it even though
5228 5244           * it should be allowed to do so.  To get around this, we do the
5229 5245           * access checking for ourselves and use VOP_SPACE which doesn't
5230 5246           * do the access checking.
5231 5247           * Also the client should not be allowed to change the file
5232 5248           * size if there is a conflicting non-blocking mandatory lock in
5233 5249           * the region of the change.
5234 5250           */
5235 5251          if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5236 5252                  u_offset_t offset;
5237 5253                  ssize_t length;
5238 5254  
5239 5255                  /*
5240 5256                   * ufs_setattr clears AT_SIZE from vap->va_mask, but
5241 5257                   * before returning, sarg.vap->va_mask is used to
5242 5258                   * generate the setattr reply bitmap.  We also clear
5243 5259                   * AT_SIZE below before calling VOP_SPACE.  For both
5244 5260                   * of these cases, the va_mask needs to be saved here
5245 5261                   * and restored after calling VOP_SETATTR.
5246 5262                   */
5247 5263                  saved_mask = sarg.vap->va_mask;
5248 5264  
5249 5265                  /*
5250 5266                   * Check any possible conflict due to NBMAND locks.
5251 5267                   * Get into critical region before VOP_GETATTR, so the
5252 5268                   * size attribute is valid when checking conflicts.
5253 5269                   */
5254 5270                  if (nbl_need_check(vp)) {
5255 5271                          nbl_start_crit(vp, RW_READER);
5256 5272                          in_crit = 1;
5257 5273                  }
5258 5274  
5259 5275                  bva.va_mask = AT_UID|AT_SIZE;
5260 5276                  if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5261 5277                          status = puterrno4(error);
5262 5278                          goto done;
5263 5279                  }
5264 5280  
5265 5281                  if (in_crit) {
5266 5282                          if (sarg.vap->va_size < bva.va_size) {
5267 5283                                  offset = sarg.vap->va_size;
5268 5284                                  length = bva.va_size - sarg.vap->va_size;
5269 5285                          } else {
5270 5286                                  offset = bva.va_size;
5271 5287                                  length = sarg.vap->va_size - bva.va_size;
5272 5288                          }
5273 5289                          if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5274 5290                              &ct)) {
5275 5291                                  status = NFS4ERR_LOCKED;
5276 5292                                  goto done;
5277 5293                          }
5278 5294                  }
5279 5295  
5280 5296                  if (crgetuid(cr) == bva.va_uid) {
5281 5297                          sarg.vap->va_mask &= ~AT_SIZE;
5282 5298                          bf.l_type = F_WRLCK;
5283 5299                          bf.l_whence = 0;
5284 5300                          bf.l_start = (off64_t)sarg.vap->va_size;
5285 5301                          bf.l_len = 0;
5286 5302                          bf.l_sysid = 0;
5287 5303                          bf.l_pid = 0;
5288 5304                          error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5289 5305                              (offset_t)sarg.vap->va_size, cr, &ct);
5290 5306                  }
5291 5307          }
5292 5308  
5293 5309          if (!error && sarg.vap->va_mask != 0)
5294 5310                  error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5295 5311  
5296 5312          /* restore va_mask -- ufs_setattr clears AT_SIZE */
5297 5313          if (saved_mask & AT_SIZE)
5298 5314                  sarg.vap->va_mask |= AT_SIZE;
5299 5315  
5300 5316          /*
5301 5317           * If an ACL was being set, it has been delayed until now,
5302 5318           * in order to set the mode (via the VOP_SETATTR() above) first.
5303 5319           */
5304 5320          if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5305 5321                  int i;
5306 5322  
5307 5323                  for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5308 5324                          if (ntov.amap[i] == FATTR4_ACL)
5309 5325                                  break;
5310 5326                  if (i < NFS4_MAXNUM_ATTRS) {
5311 5327                          error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5312 5328                              NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5313 5329                          if (error == 0) {
5314 5330                                  *resp |= FATTR4_ACL_MASK;
5315 5331                          } else if (error == ENOTSUP) {
5316 5332                                  (void) rfs4_verify_attr(&sarg, resp, &ntov);
5317 5333                                  status = NFS4ERR_ATTRNOTSUPP;
5318 5334                                  goto done;
5319 5335                          }
5320 5336                  } else {
5321 5337                          NFS4_DEBUG(rfs4_debug,
5322 5338                              (CE_NOTE, "do_rfs4_op_setattr: "
5323 5339                              "unable to find ACL in fattr4"));
5324 5340                          error = EINVAL;
5325 5341                  }
5326 5342          }
5327 5343  
5328 5344          if (error) {
5329 5345                  /* check if a monitor detected a delegation conflict */
5330 5346                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5331 5347                          status = NFS4ERR_DELAY;
5332 5348                  else
5333 5349                          status = puterrno4(error);
5334 5350  
5335 5351                  /*
5336 5352                   * Set the response bitmap when setattr failed.
5337 5353                   * If VOP_SETATTR partially succeeded, test by doing a
5338 5354                   * VOP_GETATTR on the object and comparing the data
5339 5355                   * to the setattr arguments.
5340 5356                   */
5341 5357                  (void) rfs4_verify_attr(&sarg, resp, &ntov);
5342 5358          } else {
5343 5359                  /*
5344 5360                   * Force modified metadata out to stable storage.
5345 5361                   */
5346 5362                  (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5347 5363                  /*
5348 5364                   * Set response bitmap
5349 5365                   */
5350 5366                  nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5351 5367          }
5352 5368  
5353 5369  /* Return early and already have a NFSv4 error */
5354 5370  done:
5355 5371          /*
5356 5372           * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5357 5373           * conversion sets both readable and writeable NFS4 attrs
5358 5374           * for AT_MTIME and AT_ATIME.  The line below masks out
5359 5375           * unrequested attrs from the setattr result bitmap.  This
5360 5376           * is placed after the done: label to catch the ATTRNOTSUP
5361 5377           * case.
5362 5378           */
5363 5379          *resp &= fattrp->attrmask;
5364 5380  
5365 5381          if (in_crit)
5366 5382                  nbl_end_crit(vp);
5367 5383  
5368 5384          nfs4_ntov_table_free(&ntov, &sarg);
5369 5385  
5370 5386          return (status);
5371 5387  }
5372 5388  
5373 5389  /* ARGSUSED */
5374 5390  static void
5375 5391  rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5376 5392      struct compound_state *cs)
5377 5393  {
5378 5394          SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5379 5395          SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5380 5396          bslabel_t *clabel;
5381 5397  
5382 5398          DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5383 5399              SETATTR4args *, args);
5384 5400  
5385 5401          if (cs->vp == NULL) {
5386 5402                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5387 5403                  goto out;
5388 5404          }
5389 5405  
5390 5406          /*
5391 5407           * If there is an unshared filesystem mounted on this vnode,
5392 5408           * do not allow to setattr on this vnode.
5393 5409           */
5394 5410          if (vn_ismntpt(cs->vp)) {
5395 5411                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5396 5412                  goto out;
5397 5413          }
5398 5414  
5399 5415          resp->attrsset = 0;
5400 5416  
5401 5417          if (rdonly4(req, cs)) {
5402 5418                  *cs->statusp = resp->status = NFS4ERR_ROFS;
5403 5419                  goto out;
5404 5420          }
5405 5421  
5406 5422          /* check label before setting attributes */
5407 5423          if (is_system_labeled()) {
5408 5424                  ASSERT(req->rq_label != NULL);
5409 5425                  clabel = req->rq_label;
5410 5426                  DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5411 5427                      "got client label from request(1)",
5412 5428                      struct svc_req *, req);
5413 5429                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
5414 5430                          if (!do_rfs_label_check(clabel, cs->vp,
5415 5431                              EQUALITY_CHECK, cs->exi)) {
5416 5432                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5417 5433                                  goto out;
5418 5434                          }
5419 5435                  }
5420 5436          }
5421 5437  
5422 5438          *cs->statusp = resp->status =
5423 5439              do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5424 5440              &args->stateid);
5425 5441  
5426 5442  out:
5427 5443          DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5428 5444              SETATTR4res *, resp);
5429 5445  }
5430 5446  
5431 5447  /* ARGSUSED */
5432 5448  static void
5433 5449  rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5434 5450      struct compound_state *cs)
5435 5451  {
5436 5452          /*
5437 5453           * verify and nverify are exactly the same, except that nverify
5438 5454           * succeeds when some argument changed, and verify succeeds when
5439 5455           * when none changed.
5440 5456           */
5441 5457  
5442 5458          VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5443 5459          VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5444 5460  
5445 5461          int error;
5446 5462          struct nfs4_svgetit_arg sarg;
5447 5463          struct statvfs64 sb;
5448 5464          struct nfs4_ntov_table ntov;
5449 5465  
5450 5466          DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5451 5467              VERIFY4args *, args);
5452 5468  
5453 5469          if (cs->vp == NULL) {
5454 5470                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5455 5471                  goto out;
5456 5472          }
5457 5473  
5458 5474          sarg.sbp = &sb;
5459 5475          sarg.is_referral = B_FALSE;
5460 5476          nfs4_ntov_table_init(&ntov);
5461 5477          resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5462 5478              &sarg, &ntov, NFS4ATTR_VERIT);
5463 5479          if (resp->status != NFS4_OK) {
5464 5480                  /*
5465 5481                   * do_rfs4_set_attrs will try to verify systemwide attrs,
5466 5482                   * so could return -1 for "no match".
5467 5483                   */
5468 5484                  if (resp->status == -1)
5469 5485                          resp->status = NFS4ERR_NOT_SAME;
5470 5486                  goto done;
5471 5487          }
5472 5488          error = rfs4_verify_attr(&sarg, NULL, &ntov);
5473 5489          switch (error) {
5474 5490          case 0:
5475 5491                  resp->status = NFS4_OK;
5476 5492                  break;
5477 5493          case -1:
5478 5494                  resp->status = NFS4ERR_NOT_SAME;
5479 5495                  break;
5480 5496          default:
5481 5497                  resp->status = puterrno4(error);
5482 5498                  break;
5483 5499          }
5484 5500  done:
5485 5501          *cs->statusp = resp->status;
5486 5502          nfs4_ntov_table_free(&ntov, &sarg);
5487 5503  out:
5488 5504          DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5489 5505              VERIFY4res *, resp);
5490 5506  }
5491 5507  
5492 5508  /* ARGSUSED */
5493 5509  static void
5494 5510  rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5495 5511      struct compound_state *cs)
5496 5512  {
5497 5513          /*
5498 5514           * verify and nverify are exactly the same, except that nverify
5499 5515           * succeeds when some argument changed, and verify succeeds when
5500 5516           * when none changed.
5501 5517           */
5502 5518  
5503 5519          NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5504 5520          NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5505 5521  
5506 5522          int error;
5507 5523          struct nfs4_svgetit_arg sarg;
5508 5524          struct statvfs64 sb;
5509 5525          struct nfs4_ntov_table ntov;
5510 5526  
5511 5527          DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5512 5528              NVERIFY4args *, args);
5513 5529  
5514 5530          if (cs->vp == NULL) {
5515 5531                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5516 5532                  DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5517 5533                      NVERIFY4res *, resp);
5518 5534                  return;
5519 5535          }
5520 5536          sarg.sbp = &sb;
5521 5537          sarg.is_referral = B_FALSE;
5522 5538          nfs4_ntov_table_init(&ntov);
5523 5539          resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5524 5540              &sarg, &ntov, NFS4ATTR_VERIT);
5525 5541          if (resp->status != NFS4_OK) {
5526 5542                  /*
5527 5543                   * do_rfs4_set_attrs will try to verify systemwide attrs,
5528 5544                   * so could return -1 for "no match".
5529 5545                   */
5530 5546                  if (resp->status == -1)
5531 5547                          resp->status = NFS4_OK;
5532 5548                  goto done;
5533 5549          }
5534 5550          error = rfs4_verify_attr(&sarg, NULL, &ntov);
5535 5551          switch (error) {
5536 5552          case 0:
5537 5553                  resp->status = NFS4ERR_SAME;
5538 5554                  break;
5539 5555          case -1:
5540 5556                  resp->status = NFS4_OK;
5541 5557                  break;
5542 5558          default:
5543 5559                  resp->status = puterrno4(error);
5544 5560                  break;
5545 5561          }
5546 5562  done:
5547 5563          *cs->statusp = resp->status;
5548 5564          nfs4_ntov_table_free(&ntov, &sarg);
5549 5565  
5550 5566          DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5551 5567              NVERIFY4res *, resp);
5552 5568  }
5553 5569  
5554 5570  /*
5555 5571   * XXX - This should live in an NFS header file.
5556 5572   */
5557 5573  #define MAX_IOVECS      12
5558 5574  
5559 5575  /* ARGSUSED */
5560 5576  static void
5561 5577  rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5562 5578      struct compound_state *cs)
5563 5579  {
5564 5580          WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5565 5581          WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5566 5582          int error;
5567 5583          vnode_t *vp;
5568 5584          struct vattr bva;
5569 5585          u_offset_t rlimit;
5570 5586          struct uio uio;
5571 5587          struct iovec iov[MAX_IOVECS];
5572 5588          struct iovec *iovp;
5573 5589          int iovcnt;
5574 5590          int ioflag;
5575 5591          cred_t *savecred, *cr;
5576 5592          bool_t *deleg = &cs->deleg;
5577 5593          nfsstat4 stat;
5578 5594          int in_crit = 0;
5579 5595          caller_context_t ct;
5580 5596          nfs4_srv_t *nsrv4;
5581 5597  
5582 5598          DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5583 5599              WRITE4args *, args);
5584 5600  
5585 5601          vp = cs->vp;
5586 5602          if (vp == NULL) {
5587 5603                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5588 5604                  goto out;
5589 5605          }
5590 5606          if (cs->access == CS_ACCESS_DENIED) {
5591 5607                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5592 5608                  goto out;
5593 5609          }
5594 5610  
5595 5611          cr = cs->cr;
5596 5612  
5597 5613          if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5598 5614              deleg, TRUE, &ct)) != NFS4_OK) {
5599 5615                  *cs->statusp = resp->status = stat;
5600 5616                  goto out;
5601 5617          }
5602 5618  
5603 5619          /*
5604 5620           * We have to enter the critical region before calling VOP_RWLOCK
5605 5621           * to avoid a deadlock with ufs.
5606 5622           */
5607 5623          if (nbl_need_check(vp)) {
5608 5624                  nbl_start_crit(vp, RW_READER);
5609 5625                  in_crit = 1;
5610 5626                  if (nbl_conflict(vp, NBL_WRITE,
5611 5627                      args->offset, args->data_len, 0, &ct)) {
5612 5628                          *cs->statusp = resp->status = NFS4ERR_LOCKED;
5613 5629                          goto out;
5614 5630                  }
5615 5631          }
5616 5632  
5617 5633          bva.va_mask = AT_MODE | AT_UID;
5618 5634          error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5619 5635  
5620 5636          /*
5621 5637           * If we can't get the attributes, then we can't do the
5622 5638           * right access checking.  So, we'll fail the request.
5623 5639           */
5624 5640          if (error) {
5625 5641                  *cs->statusp = resp->status = puterrno4(error);
5626 5642                  goto out;
5627 5643          }
5628 5644  
5629 5645          if (rdonly4(req, cs)) {
5630 5646                  *cs->statusp = resp->status = NFS4ERR_ROFS;
5631 5647                  goto out;
5632 5648          }
5633 5649  
5634 5650          if (vp->v_type != VREG) {
5635 5651                  *cs->statusp = resp->status =
5636 5652                      ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5637 5653                  goto out;
5638 5654          }
5639 5655  
5640 5656          if (crgetuid(cr) != bva.va_uid &&
5641 5657              (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5642 5658                  *cs->statusp = resp->status = puterrno4(error);
5643 5659                  goto out;
5644 5660          }
5645 5661  
5646 5662          if (MANDLOCK(vp, bva.va_mode)) {
5647 5663                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5648 5664                  goto out;
5649 5665          }
5650 5666  
5651 5667          nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5652 5668          if (args->data_len == 0) {
5653 5669                  *cs->statusp = resp->status = NFS4_OK;
5654 5670                  resp->count = 0;
5655 5671                  resp->committed = args->stable;
5656 5672                  resp->writeverf = nsrv4->write4verf;
5657 5673                  goto out;
5658 5674          }
5659 5675  
5660 5676          if (args->mblk != NULL) {
5661 5677                  mblk_t *m;
5662 5678                  uint_t bytes, round_len;
5663 5679  
5664 5680                  iovcnt = 0;
5665 5681                  bytes = 0;
5666 5682                  round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5667 5683                  for (m = args->mblk;
5668 5684                      m != NULL && bytes < round_len;
5669 5685                      m = m->b_cont) {
5670 5686                          iovcnt++;
5671 5687                          bytes += MBLKL(m);
5672 5688                  }
5673 5689  #ifdef DEBUG
5674 5690                  /* should have ended on an mblk boundary */
5675 5691                  if (bytes != round_len) {
5676 5692                          printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5677 5693                              bytes, round_len, args->data_len);
5678 5694                          printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5679 5695                              (void *)args->mblk, (void *)m);
5680 5696                          ASSERT(bytes == round_len);
5681 5697                  }
5682 5698  #endif
5683 5699                  if (iovcnt <= MAX_IOVECS) {
5684 5700                          iovp = iov;
5685 5701                  } else {
5686 5702                          iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5687 5703                  }
5688 5704                  mblk_to_iov(args->mblk, iovcnt, iovp);
5689 5705          } else if (args->rlist != NULL) {
5690 5706                  iovcnt = 1;
5691 5707                  iovp = iov;
5692 5708                  iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5693 5709                  iovp->iov_len = args->data_len;
5694 5710          } else {
5695 5711                  iovcnt = 1;
5696 5712                  iovp = iov;
5697 5713                  iovp->iov_base = args->data_val;
5698 5714                  iovp->iov_len = args->data_len;
5699 5715          }
5700 5716  
5701 5717          uio.uio_iov = iovp;
5702 5718          uio.uio_iovcnt = iovcnt;
5703 5719  
5704 5720          uio.uio_segflg = UIO_SYSSPACE;
5705 5721          uio.uio_extflg = UIO_COPY_DEFAULT;
5706 5722          uio.uio_loffset = args->offset;
5707 5723          uio.uio_resid = args->data_len;
5708 5724          uio.uio_llimit = curproc->p_fsz_ctl;
5709 5725          rlimit = uio.uio_llimit - args->offset;
5710 5726          if (rlimit < (u_offset_t)uio.uio_resid)
5711 5727                  uio.uio_resid = (int)rlimit;
5712 5728  
5713 5729          if (args->stable == UNSTABLE4)
5714 5730                  ioflag = 0;
5715 5731          else if (args->stable == FILE_SYNC4)
5716 5732                  ioflag = FSYNC;
5717 5733          else if (args->stable == DATA_SYNC4)
5718 5734                  ioflag = FDSYNC;
5719 5735          else {
5720 5736                  if (iovp != iov)
5721 5737                          kmem_free(iovp, sizeof (*iovp) * iovcnt);
5722 5738                  *cs->statusp = resp->status = NFS4ERR_INVAL;
5723 5739                  goto out;
5724 5740          }
5725 5741  
5726 5742          /*
5727 5743           * We're changing creds because VM may fault and we need
5728 5744           * the cred of the current thread to be used if quota
5729 5745           * checking is enabled.
5730 5746           */
5731 5747          savecred = curthread->t_cred;
5732 5748          curthread->t_cred = cr;
5733 5749          error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5734 5750          curthread->t_cred = savecred;
5735 5751  
5736 5752          if (iovp != iov)
5737 5753                  kmem_free(iovp, sizeof (*iovp) * iovcnt);
5738 5754  
5739 5755          if (error) {
5740 5756                  *cs->statusp = resp->status = puterrno4(error);
5741 5757                  goto out;
5742 5758          }
5743 5759  
5744 5760          *cs->statusp = resp->status = NFS4_OK;
5745 5761          resp->count = args->data_len - uio.uio_resid;
5746 5762  
5747 5763          if (ioflag == 0)
5748 5764                  resp->committed = UNSTABLE4;
5749 5765          else
5750 5766                  resp->committed = FILE_SYNC4;
5751 5767  
5752 5768          resp->writeverf = nsrv4->write4verf;
5753 5769  
5754 5770  out:
5755 5771          if (in_crit)
5756 5772                  nbl_end_crit(vp);
5757 5773  
5758 5774          DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5759 5775              WRITE4res *, resp);
5760 5776  }
5761 5777  
5762 5778  
5763 5779  /* XXX put in a header file */
5764 5780  extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5765 5781  
5766 5782  void
5767 5783  rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5768 5784      struct svc_req *req, cred_t *cr, int *rv)
5769 5785  {
5770 5786          uint_t i;
5771 5787          struct compound_state cs;
5772 5788          nfs4_srv_t *nsrv4;
5773 5789          nfs_export_t *ne = nfs_get_export();
5774 5790  
5775 5791          if (rv != NULL)
5776 5792                  *rv = 0;
5777 5793          rfs4_init_compound_state(&cs);
5778 5794          /*
5779 5795           * Form a reply tag by copying over the reqeuest tag.
5780 5796           */
5781 5797          resp->tag.utf8string_val =
5782 5798              kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5783 5799          resp->tag.utf8string_len = args->tag.utf8string_len;
5784 5800          bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5785 5801              resp->tag.utf8string_len);
5786 5802  
5787 5803          cs.statusp = &resp->status;
5788 5804          cs.req = req;
5789 5805          resp->array = NULL;
5790 5806          resp->array_len = 0;
5791 5807  
5792 5808          /*
5793 5809           * XXX for now, minorversion should be zero
5794 5810           */
5795 5811          if (args->minorversion != NFS4_MINORVERSION) {
5796 5812                  DTRACE_NFSV4_2(compound__start, struct compound_state *,
5797 5813                      &cs, COMPOUND4args *, args);
5798 5814                  resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5799 5815                  DTRACE_NFSV4_2(compound__done, struct compound_state *,
5800 5816                      &cs, COMPOUND4res *, resp);
5801 5817                  return;
5802 5818          }
5803 5819  
5804 5820          if (args->array_len == 0) {
5805 5821                  resp->status = NFS4_OK;
5806 5822                  return;
5807 5823          }
5808 5824  
5809 5825          ASSERT(exi == NULL);
5810 5826          ASSERT(cr == NULL);
5811 5827  
5812 5828          cr = crget();
5813 5829          ASSERT(cr != NULL);
5814 5830  
5815 5831          if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5816 5832                  DTRACE_NFSV4_2(compound__start, struct compound_state *,
5817 5833                      &cs, COMPOUND4args *, args);
5818 5834                  crfree(cr);
5819 5835                  DTRACE_NFSV4_2(compound__done, struct compound_state *,
5820 5836                      &cs, COMPOUND4res *, resp);
5821 5837                  svcerr_badcred(req->rq_xprt);
5822 5838                  if (rv != NULL)
5823 5839                          *rv = 1;
5824 5840                  return;
5825 5841          }
5826 5842          resp->array_len = args->array_len;
5827 5843          resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5828 5844              KM_SLEEP);
5829 5845  
5830 5846          cs.basecr = cr;
5831 5847          nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5832 5848  
5833 5849          DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5834 5850              COMPOUND4args *, args);
5835 5851  
5836 5852          /*
5837 5853           * For now, NFS4 compound processing must be protected by
5838 5854           * exported_lock because it can access more than one exportinfo
5839 5855           * per compound and share/unshare can now change multiple
5840 5856           * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5841 5857           * per proc (excluding public exinfo), and exi_count design
5842 5858           * is sufficient to protect concurrent execution of NFS2/3
5843 5859           * ops along with unexport.  This lock will be removed as
5844 5860           * part of the NFSv4 phase 2 namespace redesign work.
5845 5861           */
5846 5862          rw_enter(&ne->exported_lock, RW_READER);
5847 5863  
5848 5864          /*
5849 5865           * If this is the first compound we've seen, we need to start all
5850 5866           * new instances' grace periods.
5851 5867           */
5852 5868          if (nsrv4->seen_first_compound == 0) {
5853 5869                  rfs4_grace_start_new(nsrv4);
5854 5870                  /*
5855 5871                   * This must be set after rfs4_grace_start_new(), otherwise
5856 5872                   * another thread could proceed past here before the former
5857 5873                   * is finished.
5858 5874                   */
5859 5875                  nsrv4->seen_first_compound = 1;
5860 5876          }
5861 5877  
5862 5878          for (i = 0; i < args->array_len && cs.cont; i++) {
5863 5879                  nfs_argop4 *argop;
5864 5880                  nfs_resop4 *resop;
5865 5881                  uint_t op;
5866 5882  
5867 5883                  argop = &args->array[i];
5868 5884                  resop = &resp->array[i];
5869 5885                  resop->resop = argop->argop;
5870 5886                  op = (uint_t)resop->resop;
5871 5887  
5872 5888                  if (op < rfsv4disp_cnt) {
5873 5889                          /*
5874 5890                           * Count the individual ops here; NULL and COMPOUND
5875 5891                           * are counted in common_dispatch()
5876 5892                           */
5877 5893                          rfsproccnt_v4_ptr[op].value.ui64++;
5878 5894  
5879 5895                          NFS4_DEBUG(rfs4_debug > 1,
5880 5896                              (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5881 5897                          (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5882 5898                          NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5883 5899                              rfs4_op_string[op], *cs.statusp));
5884 5900                          if (*cs.statusp != NFS4_OK)
5885 5901                                  cs.cont = FALSE;
5886 5902                  } else {
5887 5903                          /*
5888 5904                           * This is effectively dead code since XDR code
5889 5905                           * will have already returned BADXDR if op doesn't
5890 5906                           * decode to legal value.  This only done for a
5891 5907                           * day when XDR code doesn't verify v4 opcodes.
5892 5908                           */
5893 5909                          op = OP_ILLEGAL;
5894 5910                          rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5895 5911  
5896 5912                          rfs4_op_illegal(argop, resop, req, &cs);
5897 5913                          cs.cont = FALSE;
5898 5914                  }
5899 5915  
5900 5916                  /*
5901 5917                   * If not at last op, and if we are to stop, then
5902 5918                   * compact the results array.
5903 5919                   */
5904 5920                  if ((i + 1) < args->array_len && !cs.cont) {
5905 5921                          nfs_resop4 *new_res = kmem_alloc(
5906 5922                              (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5907 5923                          bcopy(resp->array,
5908 5924                              new_res, (i+1) * sizeof (nfs_resop4));
5909 5925                          kmem_free(resp->array,
5910 5926                              args->array_len * sizeof (nfs_resop4));
5911 5927  
5912 5928                          resp->array_len =  i + 1;
5913 5929                          resp->array = new_res;
5914 5930                  }
5915 5931          }
5916 5932  
5917 5933          rw_exit(&ne->exported_lock);
5918 5934  
5919 5935          /*
5920 5936           * clear exportinfo and vnode fields from compound_state before dtrace
5921 5937           * probe, to avoid tracing residual values for path and share path.
5922 5938           */
5923 5939          if (cs.vp)
5924 5940                  VN_RELE(cs.vp);
5925 5941          if (cs.saved_vp)
5926 5942                  VN_RELE(cs.saved_vp);
5927 5943          cs.exi = cs.saved_exi = NULL;
5928 5944          cs.vp = cs.saved_vp = NULL;
5929 5945  
5930 5946          DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5931 5947              COMPOUND4res *, resp);
5932 5948  
5933 5949          if (cs.saved_fh.nfs_fh4_val)
5934 5950                  kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5935 5951  
5936 5952          if (cs.basecr)
5937 5953                  crfree(cs.basecr);
5938 5954          if (cs.cr)
5939 5955                  crfree(cs.cr);
5940 5956          /*
5941 5957           * done with this compound request, free the label
5942 5958           */
5943 5959  
5944 5960          if (req->rq_label != NULL) {
5945 5961                  kmem_free(req->rq_label, sizeof (bslabel_t));
5946 5962                  req->rq_label = NULL;
5947 5963          }
5948 5964  }
5949 5965  
5950 5966  /*
5951 5967   * XXX because of what appears to be duplicate calls to rfs4_compound_free
5952 5968   * XXX zero out the tag and array values. Need to investigate why the
5953 5969   * XXX calls occur, but at least prevent the panic for now.
5954 5970   */
5955 5971  void
5956 5972  rfs4_compound_free(COMPOUND4res *resp)
5957 5973  {
5958 5974          uint_t i;
5959 5975  
5960 5976          if (resp->tag.utf8string_val) {
5961 5977                  UTF8STRING_FREE(resp->tag)
5962 5978          }
5963 5979  
5964 5980          for (i = 0; i < resp->array_len; i++) {
5965 5981                  nfs_resop4 *resop;
5966 5982                  uint_t op;
5967 5983  
5968 5984                  resop = &resp->array[i];
5969 5985                  op = (uint_t)resop->resop;
5970 5986                  if (op < rfsv4disp_cnt) {
5971 5987                          (*rfsv4disptab[op].dis_resfree)(resop);
5972 5988                  }
5973 5989          }
5974 5990          if (resp->array != NULL) {
5975 5991                  kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5976 5992          }
5977 5993  }
5978 5994  
5979 5995  /*
5980 5996   * Process the value of the compound request rpc flags, as a bit-AND
5981 5997   * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5982 5998   */
5983 5999  void
5984 6000  rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5985 6001  {
5986 6002          int i;
5987 6003          int flag = RPC_ALL;
5988 6004  
5989 6005          for (i = 0; flag && i < args->array_len; i++) {
5990 6006                  uint_t op;
5991 6007  
5992 6008                  op = (uint_t)args->array[i].argop;
5993 6009  
5994 6010                  if (op < rfsv4disp_cnt)
5995 6011                          flag &= rfsv4disptab[op].dis_flags;
5996 6012                  else
5997 6013                          flag = 0;
5998 6014          }
5999 6015          *flagp = flag;
6000 6016  }
6001 6017  
6002 6018  nfsstat4
6003 6019  rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6004 6020  {
6005 6021          nfsstat4 e;
6006 6022  
6007 6023          rfs4_dbe_lock(cp->rc_dbe);
6008 6024  
6009 6025          if (cp->rc_sysidt != LM_NOSYSID) {
6010 6026                  *sp = cp->rc_sysidt;
6011 6027                  e = NFS4_OK;
6012 6028  
6013 6029          } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6014 6030                  *sp = cp->rc_sysidt;
6015 6031                  e = NFS4_OK;
6016 6032  
6017 6033                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6018 6034                      "rfs4_client_sysid: allocated 0x%x\n", *sp));
6019 6035          } else
6020 6036                  e = NFS4ERR_DELAY;
6021 6037  
6022 6038          rfs4_dbe_unlock(cp->rc_dbe);
6023 6039          return (e);
6024 6040  }
6025 6041  
6026 6042  #if defined(DEBUG) && ! defined(lint)
6027 6043  static void lock_print(char *str, int operation, struct flock64 *flk)
6028 6044  {
6029 6045          char *op, *type;
6030 6046  
6031 6047          switch (operation) {
6032 6048          case F_GETLK: op = "F_GETLK";
6033 6049                  break;
6034 6050          case F_SETLK: op = "F_SETLK";
6035 6051                  break;
6036 6052          case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6037 6053                  break;
6038 6054          default: op = "F_UNKNOWN";
6039 6055                  break;
6040 6056          }
6041 6057          switch (flk->l_type) {
6042 6058          case F_UNLCK: type = "F_UNLCK";
6043 6059                  break;
6044 6060          case F_RDLCK: type = "F_RDLCK";
6045 6061                  break;
6046 6062          case F_WRLCK: type = "F_WRLCK";
6047 6063                  break;
6048 6064          default: type = "F_UNKNOWN";
6049 6065                  break;
6050 6066          }
6051 6067  
6052 6068          ASSERT(flk->l_whence == 0);
6053 6069          cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6054 6070              str, op, type, (longlong_t)flk->l_start,
6055 6071              flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6056 6072  }
6057 6073  
6058 6074  #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6059 6075  #else
6060 6076  #define LOCK_PRINT(d, s, t, f)
6061 6077  #endif
6062 6078  
6063 6079  /*ARGSUSED*/
6064 6080  static bool_t
6065 6081  creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6066 6082  {
6067 6083          return (TRUE);
6068 6084  }
6069 6085  
6070 6086  /*
6071 6087   * Look up the pathname using the vp in cs as the directory vnode.
6072 6088   * cs->vp will be the vnode for the file on success
6073 6089   */
6074 6090  
6075 6091  static nfsstat4
6076 6092  rfs4_lookup(component4 *component, struct svc_req *req,
6077 6093      struct compound_state *cs)
6078 6094  {
6079 6095          char *nm;
6080 6096          uint32_t len;
6081 6097          nfsstat4 status;
6082 6098          struct sockaddr *ca;
6083 6099          char *name;
6084 6100  
6085 6101          if (cs->vp == NULL) {
6086 6102                  return (NFS4ERR_NOFILEHANDLE);
6087 6103          }
6088 6104          if (cs->vp->v_type != VDIR) {
6089 6105                  return (NFS4ERR_NOTDIR);
6090 6106          }
6091 6107  
6092 6108          status = utf8_dir_verify(component);
6093 6109          if (status != NFS4_OK)
6094 6110                  return (status);
6095 6111  
6096 6112          nm = utf8_to_fn(component, &len, NULL);
6097 6113          if (nm == NULL) {
6098 6114                  return (NFS4ERR_INVAL);
6099 6115          }
6100 6116  
6101 6117          if (len > MAXNAMELEN) {
6102 6118                  kmem_free(nm, len);
6103 6119                  return (NFS4ERR_NAMETOOLONG);
6104 6120          }
6105 6121  
6106 6122          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6107 6123          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6108 6124              MAXPATHLEN + 1);
6109 6125  
6110 6126          if (name == NULL) {
6111 6127                  kmem_free(nm, len);
6112 6128                  return (NFS4ERR_INVAL);
6113 6129          }
6114 6130  
6115 6131          status = do_rfs4_op_lookup(name, req, cs);
6116 6132  
6117 6133          if (name != nm)
6118 6134                  kmem_free(name, MAXPATHLEN + 1);
6119 6135  
6120 6136          kmem_free(nm, len);
6121 6137  
6122 6138          return (status);
6123 6139  }
6124 6140  
6125 6141  static nfsstat4
6126 6142  rfs4_lookupfile(component4 *component, struct svc_req *req,
6127 6143      struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6128 6144  {
6129 6145          nfsstat4 status;
6130 6146          vnode_t *dvp = cs->vp;
6131 6147          vattr_t bva, ava, fva;
6132 6148          int error;
6133 6149  
6134 6150          /* Get "before" change value */
6135 6151          bva.va_mask = AT_CTIME|AT_SEQ;
6136 6152          error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6137 6153          if (error)
6138 6154                  return (puterrno4(error));
6139 6155  
6140 6156          /* rfs4_lookup may VN_RELE directory */
6141 6157          VN_HOLD(dvp);
6142 6158  
6143 6159          status = rfs4_lookup(component, req, cs);
6144 6160          if (status != NFS4_OK) {
6145 6161                  VN_RELE(dvp);
6146 6162                  return (status);
6147 6163          }
6148 6164  
6149 6165          /*
6150 6166           * Get "after" change value, if it fails, simply return the
6151 6167           * before value.
6152 6168           */
6153 6169          ava.va_mask = AT_CTIME|AT_SEQ;
6154 6170          if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6155 6171                  ava.va_ctime = bva.va_ctime;
6156 6172                  ava.va_seq = 0;
6157 6173          }
6158 6174          VN_RELE(dvp);
6159 6175  
6160 6176          /*
6161 6177           * Validate the file is a file
6162 6178           */
6163 6179          fva.va_mask = AT_TYPE|AT_MODE;
6164 6180          error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6165 6181          if (error)
6166 6182                  return (puterrno4(error));
6167 6183  
6168 6184          if (fva.va_type != VREG) {
6169 6185                  if (fva.va_type == VDIR)
6170 6186                          return (NFS4ERR_ISDIR);
6171 6187                  if (fva.va_type == VLNK)
6172 6188                          return (NFS4ERR_SYMLINK);
6173 6189                  return (NFS4ERR_INVAL);
6174 6190          }
6175 6191  
6176 6192          NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6177 6193          NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6178 6194  
6179 6195          /*
6180 6196           * It is undefined if VOP_LOOKUP will change va_seq, so
6181 6197           * cinfo.atomic = TRUE only if we have
6182 6198           * non-zero va_seq's, and they have not changed.
6183 6199           */
6184 6200          if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6185 6201                  cinfo->atomic = TRUE;
6186 6202          else
6187 6203                  cinfo->atomic = FALSE;
6188 6204  
6189 6205          /* Check for mandatory locking */
6190 6206          cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6191 6207          return (check_open_access(access, cs, req));
6192 6208  }
6193 6209  
6194 6210  static nfsstat4
6195 6211  create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6196 6212      cred_t *cr, vnode_t **vpp, bool_t *created)
6197 6213  {
6198 6214          int error;
6199 6215          nfsstat4 status = NFS4_OK;
6200 6216          vattr_t va;
6201 6217  
6202 6218  tryagain:
6203 6219  
6204 6220          /*
6205 6221           * The file open mode used is VWRITE.  If the client needs
6206 6222           * some other semantic, then it should do the access checking
6207 6223           * itself.  It would have been nice to have the file open mode
6208 6224           * passed as part of the arguments.
6209 6225           */
6210 6226  
6211 6227          *created = TRUE;
6212 6228          error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6213 6229  
6214 6230          if (error) {
6215 6231                  *created = FALSE;
6216 6232  
6217 6233                  /*
6218 6234                   * If we got something other than file already exists
6219 6235                   * then just return this error.  Otherwise, we got
6220 6236                   * EEXIST.  If we were doing a GUARDED create, then
6221 6237                   * just return this error.  Otherwise, we need to
6222 6238                   * make sure that this wasn't a duplicate of an
6223 6239                   * exclusive create request.
6224 6240                   *
6225 6241                   * The assumption is made that a non-exclusive create
6226 6242                   * request will never return EEXIST.
6227 6243                   */
6228 6244  
6229 6245                  if (error != EEXIST || mode == GUARDED4) {
6230 6246                          status = puterrno4(error);
6231 6247                          return (status);
6232 6248                  }
6233 6249                  error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6234 6250                      NULL, NULL, NULL);
6235 6251  
6236 6252                  if (error) {
6237 6253                          /*
6238 6254                           * We couldn't find the file that we thought that
6239 6255                           * we just created.  So, we'll just try creating
6240 6256                           * it again.
6241 6257                           */
6242 6258                          if (error == ENOENT)
6243 6259                                  goto tryagain;
6244 6260  
6245 6261                          status = puterrno4(error);
6246 6262                          return (status);
6247 6263                  }
6248 6264  
6249 6265                  if (mode == UNCHECKED4) {
6250 6266                          /* existing object must be regular file */
6251 6267                          if ((*vpp)->v_type != VREG) {
6252 6268                                  if ((*vpp)->v_type == VDIR)
6253 6269                                          status = NFS4ERR_ISDIR;
6254 6270                                  else if ((*vpp)->v_type == VLNK)
6255 6271                                          status = NFS4ERR_SYMLINK;
6256 6272                                  else
6257 6273                                          status = NFS4ERR_INVAL;
6258 6274                                  VN_RELE(*vpp);
6259 6275                                  return (status);
6260 6276                          }
6261 6277  
6262 6278                          return (NFS4_OK);
6263 6279                  }
6264 6280  
6265 6281                  /* Check for duplicate request */
6266 6282                  va.va_mask = AT_MTIME;
6267 6283                  error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6268 6284                  if (!error) {
6269 6285                          /* We found the file */
6270 6286                          const timestruc_t *mtime = &vap->va_mtime;
6271 6287  
6272 6288                          if (va.va_mtime.tv_sec != mtime->tv_sec ||
6273 6289                              va.va_mtime.tv_nsec != mtime->tv_nsec) {
6274 6290                                  /* but its not our creation */
6275 6291                                  VN_RELE(*vpp);
6276 6292                                  return (NFS4ERR_EXIST);
6277 6293                          }
6278 6294                          *created = TRUE; /* retrans of create == created */
6279 6295                          return (NFS4_OK);
6280 6296                  }
6281 6297                  VN_RELE(*vpp);
6282 6298                  return (NFS4ERR_EXIST);
6283 6299          }
6284 6300  
6285 6301          return (NFS4_OK);
6286 6302  }
6287 6303  
6288 6304  static nfsstat4
6289 6305  check_open_access(uint32_t access, struct compound_state *cs,
6290 6306      struct svc_req *req)
6291 6307  {
6292 6308          int error;
6293 6309          vnode_t *vp;
6294 6310          bool_t readonly;
6295 6311          cred_t *cr = cs->cr;
6296 6312  
6297 6313          /* For now we don't allow mandatory locking as per V2/V3 */
6298 6314          if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6299 6315                  return (NFS4ERR_ACCESS);
6300 6316          }
6301 6317  
6302 6318          vp = cs->vp;
6303 6319          ASSERT(cr != NULL && vp->v_type == VREG);
6304 6320  
6305 6321          /*
6306 6322           * If the file system is exported read only and we are trying
6307 6323           * to open for write, then return NFS4ERR_ROFS
6308 6324           */
6309 6325  
6310 6326          readonly = rdonly4(req, cs);
6311 6327  
6312 6328          if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6313 6329                  return (NFS4ERR_ROFS);
6314 6330  
6315 6331          if (access & OPEN4_SHARE_ACCESS_READ) {
6316 6332                  if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6317 6333                      (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6318 6334                          return (NFS4ERR_ACCESS);
6319 6335                  }
6320 6336          }
6321 6337  
6322 6338          if (access & OPEN4_SHARE_ACCESS_WRITE) {
6323 6339                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6324 6340                  if (error)
6325 6341                          return (NFS4ERR_ACCESS);
6326 6342          }
6327 6343  
6328 6344          return (NFS4_OK);
6329 6345  }
6330 6346  
6331 6347  static nfsstat4
6332 6348  rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6333 6349      change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6334 6350  {
6335 6351          struct nfs4_svgetit_arg sarg;
6336 6352          struct nfs4_ntov_table ntov;
6337 6353  
6338 6354          bool_t ntov_table_init = FALSE;
6339 6355          struct statvfs64 sb;
6340 6356          nfsstat4 status;
6341 6357          vnode_t *vp;
6342 6358          vattr_t bva, ava, iva, cva, *vap;
6343 6359          vnode_t *dvp;
6344 6360          timespec32_t *mtime;
6345 6361          char *nm = NULL;
6346 6362          uint_t buflen;
6347 6363          bool_t created;
6348 6364          bool_t setsize = FALSE;
6349 6365          len_t reqsize;
6350 6366          int error;
6351 6367          bool_t trunc;
6352 6368          caller_context_t ct;
6353 6369          component4 *component;
6354 6370          bslabel_t *clabel;
6355 6371          struct sockaddr *ca;
6356 6372          char *name = NULL;
6357 6373  
6358 6374          sarg.sbp = &sb;
6359 6375          sarg.is_referral = B_FALSE;
6360 6376  
6361 6377          dvp = cs->vp;
6362 6378  
6363 6379          /* Check if the file system is read only */
6364 6380          if (rdonly4(req, cs))
6365 6381                  return (NFS4ERR_ROFS);
6366 6382  
6367 6383          /* check the label of including directory */
6368 6384          if (is_system_labeled()) {
6369 6385                  ASSERT(req->rq_label != NULL);
6370 6386                  clabel = req->rq_label;
6371 6387                  DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6372 6388                      "got client label from request(1)",
6373 6389                      struct svc_req *, req);
6374 6390                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
6375 6391                          if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6376 6392                              cs->exi)) {
6377 6393                                  return (NFS4ERR_ACCESS);
6378 6394                          }
6379 6395                  }
6380 6396          }
6381 6397  
6382 6398          /*
6383 6399           * Get the last component of path name in nm. cs will reference
6384 6400           * the including directory on success.
6385 6401           */
6386 6402          component = &args->open_claim4_u.file;
6387 6403          status = utf8_dir_verify(component);
6388 6404          if (status != NFS4_OK)
6389 6405                  return (status);
6390 6406  
6391 6407          nm = utf8_to_fn(component, &buflen, NULL);
6392 6408  
6393 6409          if (nm == NULL)
6394 6410                  return (NFS4ERR_RESOURCE);
6395 6411  
6396 6412          if (buflen > MAXNAMELEN) {
6397 6413                  kmem_free(nm, buflen);
6398 6414                  return (NFS4ERR_NAMETOOLONG);
6399 6415          }
6400 6416  
6401 6417          bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6402 6418          error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6403 6419          if (error) {
6404 6420                  kmem_free(nm, buflen);
6405 6421                  return (puterrno4(error));
6406 6422          }
6407 6423  
6408 6424          if (bva.va_type != VDIR) {
6409 6425                  kmem_free(nm, buflen);
6410 6426                  return (NFS4ERR_NOTDIR);
6411 6427          }
6412 6428  
6413 6429          NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6414 6430  
6415 6431          switch (args->mode) {
6416 6432          case GUARDED4:
6417 6433                  /*FALLTHROUGH*/
6418 6434          case UNCHECKED4:
6419 6435                  nfs4_ntov_table_init(&ntov);
6420 6436                  ntov_table_init = TRUE;
6421 6437  
6422 6438                  *attrset = 0;
6423 6439                  status = do_rfs4_set_attrs(attrset,
6424 6440                      &args->createhow4_u.createattrs,
6425 6441                      cs, &sarg, &ntov, NFS4ATTR_SETIT);
6426 6442  
6427 6443                  if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6428 6444                      sarg.vap->va_type != VREG) {
6429 6445                          if (sarg.vap->va_type == VDIR)
6430 6446                                  status = NFS4ERR_ISDIR;
6431 6447                          else if (sarg.vap->va_type == VLNK)
6432 6448                                  status = NFS4ERR_SYMLINK;
6433 6449                          else
6434 6450                                  status = NFS4ERR_INVAL;
6435 6451                  }
6436 6452  
6437 6453                  if (status != NFS4_OK) {
6438 6454                          kmem_free(nm, buflen);
6439 6455                          nfs4_ntov_table_free(&ntov, &sarg);
6440 6456                          *attrset = 0;
6441 6457                          return (status);
6442 6458                  }
6443 6459  
6444 6460                  vap = sarg.vap;
6445 6461                  vap->va_type = VREG;
6446 6462                  vap->va_mask |= AT_TYPE;
6447 6463  
6448 6464                  if ((vap->va_mask & AT_MODE) == 0) {
6449 6465                          vap->va_mask |= AT_MODE;
6450 6466                          vap->va_mode = (mode_t)0600;
6451 6467                  }
6452 6468  
6453 6469                  if (vap->va_mask & AT_SIZE) {
6454 6470  
6455 6471                          /* Disallow create with a non-zero size */
6456 6472  
6457 6473                          if ((reqsize = sarg.vap->va_size) != 0) {
6458 6474                                  kmem_free(nm, buflen);
6459 6475                                  nfs4_ntov_table_free(&ntov, &sarg);
6460 6476                                  *attrset = 0;
6461 6477                                  return (NFS4ERR_INVAL);
6462 6478                          }
6463 6479                          setsize = TRUE;
6464 6480                  }
6465 6481                  break;
6466 6482  
6467 6483          case EXCLUSIVE4:
6468 6484                  /* prohibit EXCL create of named attributes */
6469 6485                  if (dvp->v_flag & V_XATTRDIR) {
6470 6486                          kmem_free(nm, buflen);
6471 6487                          *attrset = 0;
6472 6488                          return (NFS4ERR_INVAL);
6473 6489                  }
6474 6490  
6475 6491                  cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6476 6492                  cva.va_type = VREG;
6477 6493                  /*
6478 6494                   * Ensure no time overflows. Assumes underlying
6479 6495                   * filesystem supports at least 32 bits.
6480 6496                   * Truncate nsec to usec resolution to allow valid
6481 6497                   * compares even if the underlying filesystem truncates.
6482 6498                   */
6483 6499                  mtime = (timespec32_t *)&args->createhow4_u.createverf;
6484 6500                  cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6485 6501                  cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6486 6502                  cva.va_mode = (mode_t)0;
6487 6503                  vap = &cva;
6488 6504  
6489 6505                  /*
6490 6506                   * For EXCL create, attrset is set to the server attr
6491 6507                   * used to cache the client's verifier.
6492 6508                   */
6493 6509                  *attrset = FATTR4_TIME_MODIFY_MASK;
6494 6510                  break;
6495 6511          }
6496 6512  
6497 6513          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6498 6514          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6499 6515              MAXPATHLEN  + 1);
6500 6516  
6501 6517          if (name == NULL) {
6502 6518                  kmem_free(nm, buflen);
6503 6519                  return (NFS4ERR_SERVERFAULT);
6504 6520          }
6505 6521  
6506 6522          status = create_vnode(dvp, name, vap, args->mode,
6507 6523              cs->cr, &vp, &created);
6508 6524          if (nm != name)
6509 6525                  kmem_free(name, MAXPATHLEN + 1);
6510 6526          kmem_free(nm, buflen);
6511 6527  
6512 6528          if (status != NFS4_OK) {
6513 6529                  if (ntov_table_init)
6514 6530                          nfs4_ntov_table_free(&ntov, &sarg);
6515 6531                  *attrset = 0;
6516 6532                  return (status);
6517 6533          }
6518 6534  
6519 6535          trunc = (setsize && !created);
6520 6536  
6521 6537          if (args->mode != EXCLUSIVE4) {
6522 6538                  bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6523 6539  
6524 6540                  /*
6525 6541                   * True verification that object was created with correct
6526 6542                   * attrs is impossible.  The attrs could have been changed
6527 6543                   * immediately after object creation.  If attributes did
6528 6544                   * not verify, the only recourse for the server is to
6529 6545                   * destroy the object.  Maybe if some attrs (like gid)
6530 6546                   * are set incorrectly, the object should be destroyed;
6531 6547                   * however, seems bad as a default policy.  Do we really
6532 6548                   * want to destroy an object over one of the times not
6533 6549                   * verifying correctly?  For these reasons, the server
6534 6550                   * currently sets bits in attrset for createattrs
6535 6551                   * that were set; however, no verification is done.
6536 6552                   *
6537 6553                   * vmask_to_nmask accounts for vattr bits set on create
6538 6554                   *      [do_rfs4_set_attrs() only sets resp bits for
6539 6555                   *       non-vattr/vfs bits.]
6540 6556                   * Mask off any bits we set by default so as not to return
6541 6557                   * more attrset bits than were requested in createattrs
6542 6558                   */
6543 6559                  if (created) {
6544 6560                          nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6545 6561                          *attrset &= createmask;
6546 6562                  } else {
6547 6563                          /*
6548 6564                           * We did not create the vnode (we tried but it
6549 6565                           * already existed).  In this case, the only createattr
6550 6566                           * that the spec allows the server to set is size,
6551 6567                           * and even then, it can only be set if it is 0.
6552 6568                           */
6553 6569                          *attrset = 0;
6554 6570                          if (trunc)
6555 6571                                  *attrset = FATTR4_SIZE_MASK;
6556 6572                  }
6557 6573          }
6558 6574          if (ntov_table_init)
6559 6575                  nfs4_ntov_table_free(&ntov, &sarg);
6560 6576  
6561 6577          /*
6562 6578           * Get the initial "after" sequence number, if it fails,
6563 6579           * set to zero, time to before.
6564 6580           */
6565 6581          iva.va_mask = AT_CTIME|AT_SEQ;
6566 6582          if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6567 6583                  iva.va_seq = 0;
6568 6584                  iva.va_ctime = bva.va_ctime;
6569 6585          }
6570 6586  
6571 6587          /*
6572 6588           * create_vnode attempts to create the file exclusive,
6573 6589           * if it already exists the VOP_CREATE will fail and
6574 6590           * may not increase va_seq. It is atomic if
6575 6591           * we haven't changed the directory, but if it has changed
6576 6592           * we don't know what changed it.
6577 6593           */
6578 6594          if (!created) {
6579 6595                  if (bva.va_seq && iva.va_seq &&
6580 6596                      bva.va_seq == iva.va_seq)
6581 6597                          cinfo->atomic = TRUE;
6582 6598                  else
6583 6599                          cinfo->atomic = FALSE;
6584 6600                  NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6585 6601          } else {
6586 6602                  /*
6587 6603                   * The entry was created, we need to sync the
6588 6604                   * directory metadata.
6589 6605                   */
6590 6606                  (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6591 6607  
6592 6608                  /*
6593 6609                   * Get "after" change value, if it fails, simply return the
6594 6610                   * before value.
6595 6611                   */
6596 6612                  ava.va_mask = AT_CTIME|AT_SEQ;
6597 6613                  if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6598 6614                          ava.va_ctime = bva.va_ctime;
6599 6615                          ava.va_seq = 0;
6600 6616                  }
6601 6617  
6602 6618                  NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6603 6619  
6604 6620                  /*
6605 6621                   * The cinfo->atomic = TRUE only if we have
6606 6622                   * non-zero va_seq's, and it has incremented by exactly one
6607 6623                   * during the create_vnode and it didn't
6608 6624                   * change during the VOP_FSYNC.
6609 6625                   */
6610 6626                  if (bva.va_seq && iva.va_seq && ava.va_seq &&
6611 6627                      iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6612 6628                          cinfo->atomic = TRUE;
6613 6629                  else
6614 6630                          cinfo->atomic = FALSE;
6615 6631          }
6616 6632  
6617 6633          /* Check for mandatory locking and that the size gets set. */
6618 6634          cva.va_mask = AT_MODE;
6619 6635          if (setsize)
6620 6636                  cva.va_mask |= AT_SIZE;
6621 6637  
6622 6638          /* Assume the worst */
6623 6639          cs->mandlock = TRUE;
6624 6640  
6625 6641          if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6626 6642                  cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6627 6643  
6628 6644                  /*
6629 6645                   * Truncate the file if necessary; this would be
6630 6646                   * the case for create over an existing file.
6631 6647                   */
6632 6648  
6633 6649                  if (trunc) {
6634 6650                          int in_crit = 0;
6635 6651                          rfs4_file_t *fp;
6636 6652                          nfs4_srv_t *nsrv4;
6637 6653                          bool_t create = FALSE;
6638 6654  
6639 6655                          /*
6640 6656                           * We are writing over an existing file.
6641 6657                           * Check to see if we need to recall a delegation.
6642 6658                           */
6643 6659                          nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
6644 6660                          rfs4_hold_deleg_policy(nsrv4);
6645 6661                          if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6646 6662                                  if (rfs4_check_delegated_byfp(FWRITE, fp,
6647 6663                                      (reqsize == 0), FALSE, FALSE, &clientid)) {
6648 6664                                          rfs4_file_rele(fp);
6649 6665                                          rfs4_rele_deleg_policy(nsrv4);
6650 6666                                          VN_RELE(vp);
6651 6667                                          *attrset = 0;
6652 6668                                          return (NFS4ERR_DELAY);
6653 6669                                  }
6654 6670                                  rfs4_file_rele(fp);
6655 6671                          }
6656 6672                          rfs4_rele_deleg_policy(nsrv4);
6657 6673  
6658 6674                          if (nbl_need_check(vp)) {
6659 6675                                  in_crit = 1;
6660 6676  
6661 6677                                  ASSERT(reqsize == 0);
6662 6678  
6663 6679                                  nbl_start_crit(vp, RW_READER);
6664 6680                                  if (nbl_conflict(vp, NBL_WRITE, 0,
6665 6681                                      cva.va_size, 0, NULL)) {
6666 6682                                          in_crit = 0;
6667 6683                                          nbl_end_crit(vp);
6668 6684                                          VN_RELE(vp);
6669 6685                                          *attrset = 0;
6670 6686                                          return (NFS4ERR_ACCESS);
6671 6687                                  }
6672 6688                          }
6673 6689                          ct.cc_sysid = 0;
6674 6690                          ct.cc_pid = 0;
6675 6691                          ct.cc_caller_id = nfs4_srv_caller_id;
6676 6692                          ct.cc_flags = CC_DONTBLOCK;
6677 6693  
6678 6694                          cva.va_mask = AT_SIZE;
6679 6695                          cva.va_size = reqsize;
6680 6696                          (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6681 6697                          if (in_crit)
6682 6698                                  nbl_end_crit(vp);
6683 6699                  }
6684 6700          }
6685 6701  
6686 6702          error = makefh4(&cs->fh, vp, cs->exi);
6687 6703  
6688 6704          /*
6689 6705           * Force modified data and metadata out to stable storage.
6690 6706           */
6691 6707          (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6692 6708  
6693 6709          if (error) {
6694 6710                  VN_RELE(vp);
6695 6711                  *attrset = 0;
6696 6712                  return (puterrno4(error));
6697 6713          }
6698 6714  
6699 6715          /* if parent dir is attrdir, set namedattr fh flag */
6700 6716          if (dvp->v_flag & V_XATTRDIR)
6701 6717                  set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6702 6718  
6703 6719          if (cs->vp)
6704 6720                  VN_RELE(cs->vp);
6705 6721  
6706 6722          cs->vp = vp;
6707 6723  
6708 6724          /*
6709 6725           * if we did not create the file, we will need to check
6710 6726           * the access bits on the file
6711 6727           */
6712 6728  
6713 6729          if (!created) {
6714 6730                  if (setsize)
6715 6731                          args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6716 6732                  status = check_open_access(args->share_access, cs, req);
6717 6733                  if (status != NFS4_OK)
6718 6734                          *attrset = 0;
6719 6735          }
6720 6736          return (status);
6721 6737  }
6722 6738  
6723 6739  /*ARGSUSED*/
6724 6740  static void
6725 6741  rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6726 6742      rfs4_openowner_t *oo, delegreq_t deleg,
6727 6743      uint32_t access, uint32_t deny,
6728 6744      OPEN4res *resp, int deleg_cur)
6729 6745  {
6730 6746          /* XXX Currently not using req  */
6731 6747          rfs4_state_t *sp;
6732 6748          rfs4_file_t *fp;
6733 6749          bool_t screate = TRUE;
6734 6750          bool_t fcreate = TRUE;
6735 6751          uint32_t open_a, share_a;
6736 6752          uint32_t open_d, share_d;
6737 6753          rfs4_deleg_state_t *dsp;
6738 6754          sysid_t sysid;
6739 6755          nfsstat4 status;
6740 6756          caller_context_t ct;
6741 6757          int fflags = 0;
6742 6758          int recall = 0;
6743 6759          int err;
6744 6760          int first_open;
6745 6761  
6746 6762          /* get the file struct and hold a lock on it during initial open */
6747 6763          fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6748 6764          if (fp == NULL) {
6749 6765                  resp->status = NFS4ERR_RESOURCE;
6750 6766                  DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6751 6767                  return;
6752 6768          }
6753 6769  
6754 6770          sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6755 6771          if (sp == NULL) {
6756 6772                  resp->status = NFS4ERR_RESOURCE;
6757 6773                  DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6758 6774                  /* No need to keep any reference */
6759 6775                  rw_exit(&fp->rf_file_rwlock);
6760 6776                  rfs4_file_rele(fp);
6761 6777                  return;
6762 6778          }
6763 6779  
6764 6780          /* try to get the sysid before continuing */
6765 6781          if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6766 6782                  resp->status = status;
6767 6783                  rfs4_file_rele(fp);
6768 6784                  /* Not a fully formed open; "close" it */
6769 6785                  if (screate == TRUE)
6770 6786                          rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6771 6787                  rfs4_state_rele(sp);
6772 6788                  return;
6773 6789          }
6774 6790  
6775 6791          /* Calculate the fflags for this OPEN. */
6776 6792          if (access & OPEN4_SHARE_ACCESS_READ)
6777 6793                  fflags |= FREAD;
6778 6794          if (access & OPEN4_SHARE_ACCESS_WRITE)
6779 6795                  fflags |= FWRITE;
6780 6796  
6781 6797          rfs4_dbe_lock(sp->rs_dbe);
6782 6798  
6783 6799          /*
6784 6800           * Calculate the new deny and access mode that this open is adding to
6785 6801           * the file for this open owner;
6786 6802           */
6787 6803          open_d = (deny & ~sp->rs_open_deny);
6788 6804          open_a = (access & ~sp->rs_open_access);
6789 6805  
6790 6806          /*
6791 6807           * Calculate the new share access and share deny modes that this open
6792 6808           * is adding to the file for this open owner;
6793 6809           */
6794 6810          share_a = (access & ~sp->rs_share_access);
6795 6811          share_d = (deny & ~sp->rs_share_deny);
6796 6812  
6797 6813          first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6798 6814  
6799 6815          /*
6800 6816           * Check to see the client has already sent an open for this
6801 6817           * open owner on this file with the same share/deny modes.
6802 6818           * If so, we don't need to check for a conflict and we don't
6803 6819           * need to add another shrlock.  If not, then we need to
6804 6820           * check for conflicts in deny and access before checking for
6805 6821           * conflicts in delegation.  We don't want to recall a
6806 6822           * delegation based on an open that will eventually fail based
6807 6823           * on shares modes.
6808 6824           */
6809 6825  
6810 6826          if (share_a || share_d) {
6811 6827                  if ((err = rfs4_share(sp, access, deny)) != 0) {
6812 6828                          rfs4_dbe_unlock(sp->rs_dbe);
6813 6829                          resp->status = err;
6814 6830  
6815 6831                          rfs4_file_rele(fp);
6816 6832                          /* Not a fully formed open; "close" it */
6817 6833                          if (screate == TRUE)
6818 6834                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6819 6835                          rfs4_state_rele(sp);
6820 6836                          return;
6821 6837                  }
6822 6838          }
6823 6839  
6824 6840          rfs4_dbe_lock(fp->rf_dbe);
6825 6841  
6826 6842          /*
6827 6843           * Check to see if this file is delegated and if so, if a
6828 6844           * recall needs to be done.
6829 6845           */
6830 6846          if (rfs4_check_recall(sp, access)) {
6831 6847                  rfs4_dbe_unlock(fp->rf_dbe);
6832 6848                  rfs4_dbe_unlock(sp->rs_dbe);
6833 6849                  rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6834 6850                  delay(NFS4_DELEGATION_CONFLICT_DELAY);
6835 6851                  rfs4_dbe_lock(sp->rs_dbe);
6836 6852  
6837 6853                  /* if state closed while lock was dropped */
6838 6854                  if (sp->rs_closed) {
6839 6855                          if (share_a || share_d)
6840 6856                                  (void) rfs4_unshare(sp);
6841 6857                          rfs4_dbe_unlock(sp->rs_dbe);
6842 6858                          rfs4_file_rele(fp);
6843 6859                          /* Not a fully formed open; "close" it */
6844 6860                          if (screate == TRUE)
6845 6861                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6846 6862                          rfs4_state_rele(sp);
6847 6863                          resp->status = NFS4ERR_OLD_STATEID;
6848 6864                          return;
6849 6865                  }
6850 6866  
6851 6867                  rfs4_dbe_lock(fp->rf_dbe);
6852 6868                  /* Let's see if the delegation was returned */
6853 6869                  if (rfs4_check_recall(sp, access)) {
6854 6870                          rfs4_dbe_unlock(fp->rf_dbe);
6855 6871                          if (share_a || share_d)
6856 6872                                  (void) rfs4_unshare(sp);
6857 6873                          rfs4_dbe_unlock(sp->rs_dbe);
6858 6874                          rfs4_file_rele(fp);
6859 6875                          rfs4_update_lease(sp->rs_owner->ro_client);
6860 6876  
6861 6877                          /* Not a fully formed open; "close" it */
6862 6878                          if (screate == TRUE)
6863 6879                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6864 6880                          rfs4_state_rele(sp);
6865 6881                          resp->status = NFS4ERR_DELAY;
6866 6882                          return;
6867 6883                  }
6868 6884          }
6869 6885          /*
6870 6886           * the share check passed and any delegation conflict has been
6871 6887           * taken care of, now call vop_open.
6872 6888           * if this is the first open then call vop_open with fflags.
6873 6889           * if not, call vn_open_upgrade with just the upgrade flags.
6874 6890           *
6875 6891           * if the file has been opened already, it will have the current
6876 6892           * access mode in the state struct.  if it has no share access, then
6877 6893           * this is a new open.
6878 6894           *
6879 6895           * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6880 6896           * call VOP_OPEN(), just do the open upgrade.
6881 6897           */
6882 6898          if (first_open && !deleg_cur) {
6883 6899                  ct.cc_sysid = sysid;
6884 6900                  ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6885 6901                  ct.cc_caller_id = nfs4_srv_caller_id;
6886 6902                  ct.cc_flags = CC_DONTBLOCK;
6887 6903                  err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6888 6904                  if (err) {
6889 6905                          rfs4_dbe_unlock(fp->rf_dbe);
6890 6906                          if (share_a || share_d)
6891 6907                                  (void) rfs4_unshare(sp);
6892 6908                          rfs4_dbe_unlock(sp->rs_dbe);
6893 6909                          rfs4_file_rele(fp);
6894 6910  
6895 6911                          /* Not a fully formed open; "close" it */
6896 6912                          if (screate == TRUE)
6897 6913                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6898 6914                          rfs4_state_rele(sp);
6899 6915                          /* check if a monitor detected a delegation conflict */
6900 6916                          if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6901 6917                                  resp->status = NFS4ERR_DELAY;
6902 6918                          else
6903 6919                                  resp->status = NFS4ERR_SERVERFAULT;
6904 6920                          return;
6905 6921                  }
6906 6922          } else { /* open upgrade */
6907 6923                  /*
6908 6924                   * calculate the fflags for the new mode that is being added
6909 6925                   * by this upgrade.
6910 6926                   */
6911 6927                  fflags = 0;
6912 6928                  if (open_a & OPEN4_SHARE_ACCESS_READ)
6913 6929                          fflags |= FREAD;
6914 6930                  if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6915 6931                          fflags |= FWRITE;
6916 6932                  vn_open_upgrade(cs->vp, fflags);
6917 6933          }
6918 6934          sp->rs_open_access |= access;
6919 6935          sp->rs_open_deny |= deny;
6920 6936  
6921 6937          if (open_d & OPEN4_SHARE_DENY_READ)
6922 6938                  fp->rf_deny_read++;
6923 6939          if (open_d & OPEN4_SHARE_DENY_WRITE)
6924 6940                  fp->rf_deny_write++;
6925 6941          fp->rf_share_deny |= deny;
6926 6942  
6927 6943          if (open_a & OPEN4_SHARE_ACCESS_READ)
6928 6944                  fp->rf_access_read++;
6929 6945          if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6930 6946                  fp->rf_access_write++;
6931 6947          fp->rf_share_access |= access;
6932 6948  
6933 6949          /*
6934 6950           * Check for delegation here. if the deleg argument is not
6935 6951           * DELEG_ANY, then this is a reclaim from a client and
6936 6952           * we must honor the delegation requested. If necessary we can
6937 6953           * set the recall flag.
6938 6954           */
6939 6955  
6940 6956          dsp = rfs4_grant_delegation(deleg, sp, &recall);
6941 6957  
6942 6958          cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6943 6959  
6944 6960          next_stateid(&sp->rs_stateid);
6945 6961  
6946 6962          resp->stateid = sp->rs_stateid.stateid;
6947 6963  
6948 6964          rfs4_dbe_unlock(fp->rf_dbe);
6949 6965          rfs4_dbe_unlock(sp->rs_dbe);
6950 6966  
6951 6967          if (dsp) {
6952 6968                  rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6953 6969                  rfs4_deleg_state_rele(dsp);
6954 6970          }
6955 6971  
6956 6972          rfs4_file_rele(fp);
6957 6973          rfs4_state_rele(sp);
6958 6974  
6959 6975          resp->status = NFS4_OK;
6960 6976  }
6961 6977  
6962 6978  /*ARGSUSED*/
6963 6979  static void
6964 6980  rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6965 6981      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6966 6982  {
6967 6983          change_info4 *cinfo = &resp->cinfo;
6968 6984          bitmap4 *attrset = &resp->attrset;
6969 6985  
6970 6986          if (args->opentype == OPEN4_NOCREATE)
6971 6987                  resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6972 6988                      req, cs, args->share_access, cinfo);
6973 6989          else {
6974 6990                  /* inhibit delegation grants during exclusive create */
6975 6991  
6976 6992                  if (args->mode == EXCLUSIVE4)
6977 6993                          rfs4_disable_delegation();
6978 6994  
6979 6995                  resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6980 6996                      oo->ro_client->rc_clientid);
6981 6997          }
6982 6998  
6983 6999          if (resp->status == NFS4_OK) {
6984 7000  
6985 7001                  /* cs->vp cs->fh now reference the desired file */
6986 7002  
6987 7003                  rfs4_do_open(cs, req, oo,
6988 7004                      oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6989 7005                      args->share_access, args->share_deny, resp, 0);
6990 7006  
6991 7007                  /*
6992 7008                   * If rfs4_createfile set attrset, we must
6993 7009                   * clear this attrset before the response is copied.
6994 7010                   */
6995 7011                  if (resp->status != NFS4_OK && resp->attrset) {
6996 7012                          resp->attrset = 0;
6997 7013                  }
6998 7014          }
6999 7015          else
7000 7016                  *cs->statusp = resp->status;
7001 7017  
7002 7018          if (args->mode == EXCLUSIVE4)
7003 7019                  rfs4_enable_delegation();
7004 7020  }
7005 7021  
7006 7022  /*ARGSUSED*/
7007 7023  static void
7008 7024  rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7009 7025      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7010 7026  {
7011 7027          change_info4 *cinfo = &resp->cinfo;
7012 7028          vattr_t va;
7013 7029          vtype_t v_type = cs->vp->v_type;
7014 7030          int error = 0;
7015 7031  
7016 7032          /* Verify that we have a regular file */
7017 7033          if (v_type != VREG) {
7018 7034                  if (v_type == VDIR)
7019 7035                          resp->status = NFS4ERR_ISDIR;
7020 7036                  else if (v_type == VLNK)
7021 7037                          resp->status = NFS4ERR_SYMLINK;
7022 7038                  else
7023 7039                          resp->status = NFS4ERR_INVAL;
7024 7040                  return;
7025 7041          }
7026 7042  
7027 7043          va.va_mask = AT_MODE|AT_UID;
7028 7044          error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7029 7045          if (error) {
7030 7046                  resp->status = puterrno4(error);
7031 7047                  return;
7032 7048          }
7033 7049  
7034 7050          cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7035 7051  
7036 7052          /*
7037 7053           * Check if we have access to the file, Note the the file
7038 7054           * could have originally been open UNCHECKED or GUARDED
7039 7055           * with mode bits that will now fail, but there is nothing
7040 7056           * we can really do about that except in the case that the
7041 7057           * owner of the file is the one requesting the open.
7042 7058           */
7043 7059          if (crgetuid(cs->cr) != va.va_uid) {
7044 7060                  resp->status = check_open_access(args->share_access, cs, req);
7045 7061                  if (resp->status != NFS4_OK) {
7046 7062                          return;
7047 7063                  }
7048 7064          }
7049 7065  
7050 7066          /*
7051 7067           * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7052 7068           */
7053 7069          cinfo->before = 0;
7054 7070          cinfo->after = 0;
7055 7071          cinfo->atomic = FALSE;
7056 7072  
7057 7073          rfs4_do_open(cs, req, oo,
7058 7074              NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7059 7075              args->share_access, args->share_deny, resp, 0);
7060 7076  }
7061 7077  
7062 7078  static void
7063 7079  rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7064 7080      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7065 7081  {
7066 7082          int error;
7067 7083          nfsstat4 status;
7068 7084          stateid4 stateid =
7069 7085              args->open_claim4_u.delegate_cur_info.delegate_stateid;
7070 7086          rfs4_deleg_state_t *dsp;
7071 7087  
7072 7088          /*
7073 7089           * Find the state info from the stateid and confirm that the
7074 7090           * file is delegated.  If the state openowner is the same as
7075 7091           * the supplied openowner we're done. If not, get the file
7076 7092           * info from the found state info. Use that file info to
7077 7093           * create the state for this lock owner. Note solaris doen't
7078 7094           * really need the pathname to find the file. We may want to
7079 7095           * lookup the pathname and make sure that the vp exist and
7080 7096           * matches the vp in the file structure. However it is
7081 7097           * possible that the pathname nolonger exists (local process
7082 7098           * unlinks the file), so this may not be that useful.
7083 7099           */
7084 7100  
7085 7101          status = rfs4_get_deleg_state(&stateid, &dsp);
7086 7102          if (status != NFS4_OK) {
7087 7103                  resp->status = status;
7088 7104                  return;
7089 7105          }
7090 7106  
7091 7107          ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7092 7108  
7093 7109          /*
7094 7110           * New lock owner, create state. Since this was probably called
7095 7111           * in response to a CB_RECALL we set deleg to DELEG_NONE
7096 7112           */
7097 7113  
7098 7114          ASSERT(cs->vp != NULL);
7099 7115          VN_RELE(cs->vp);
7100 7116          VN_HOLD(dsp->rds_finfo->rf_vp);
7101 7117          cs->vp = dsp->rds_finfo->rf_vp;
7102 7118  
7103 7119          if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7104 7120                  rfs4_deleg_state_rele(dsp);
7105 7121                  *cs->statusp = resp->status = puterrno4(error);
7106 7122                  return;
7107 7123          }
7108 7124  
7109 7125          /* Mark progress for delegation returns */
7110 7126          dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7111 7127          rfs4_deleg_state_rele(dsp);
7112 7128          rfs4_do_open(cs, req, oo, DELEG_NONE,
7113 7129              args->share_access, args->share_deny, resp, 1);
7114 7130  }
7115 7131  
7116 7132  /*ARGSUSED*/
7117 7133  static void
7118 7134  rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7119 7135      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7120 7136  {
7121 7137          /*
7122 7138           * Lookup the pathname, it must already exist since this file
7123 7139           * was delegated.
7124 7140           *
7125 7141           * Find the file and state info for this vp and open owner pair.
7126 7142           *      check that they are in fact delegated.
7127 7143           *      check that the state access and deny modes are the same.
7128 7144           *
7129 7145           * Return the delgation possibly seting the recall flag.
7130 7146           */
7131 7147          rfs4_file_t *fp;
7132 7148          rfs4_state_t *sp;
7133 7149          bool_t create = FALSE;
7134 7150          bool_t dcreate = FALSE;
7135 7151          rfs4_deleg_state_t *dsp;
7136 7152          nfsace4 *ace;
7137 7153  
7138 7154          /* Note we ignore oflags */
7139 7155          resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7140 7156              req, cs, args->share_access, &resp->cinfo);
7141 7157  
7142 7158          if (resp->status != NFS4_OK) {
7143 7159                  return;
7144 7160          }
7145 7161  
7146 7162          /* get the file struct and hold a lock on it during initial open */
7147 7163          fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7148 7164          if (fp == NULL) {
7149 7165                  resp->status = NFS4ERR_RESOURCE;
7150 7166                  DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7151 7167                  return;
7152 7168          }
7153 7169  
7154 7170          sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7155 7171          if (sp == NULL) {
7156 7172                  resp->status = NFS4ERR_SERVERFAULT;
7157 7173                  DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7158 7174                  rw_exit(&fp->rf_file_rwlock);
7159 7175                  rfs4_file_rele(fp);
7160 7176                  return;
7161 7177          }
7162 7178  
7163 7179          rfs4_dbe_lock(sp->rs_dbe);
7164 7180          rfs4_dbe_lock(fp->rf_dbe);
7165 7181          if (args->share_access != sp->rs_share_access ||
7166 7182              args->share_deny != sp->rs_share_deny ||
7167 7183              sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7168 7184                  NFS4_DEBUG(rfs4_debug,
7169 7185                      (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7170 7186                  rfs4_dbe_unlock(fp->rf_dbe);
7171 7187                  rfs4_dbe_unlock(sp->rs_dbe);
7172 7188                  rfs4_file_rele(fp);
7173 7189                  rfs4_state_rele(sp);
7174 7190                  resp->status = NFS4ERR_SERVERFAULT;
7175 7191                  return;
7176 7192          }
7177 7193          rfs4_dbe_unlock(fp->rf_dbe);
7178 7194          rfs4_dbe_unlock(sp->rs_dbe);
7179 7195  
7180 7196          dsp = rfs4_finddeleg(sp, &dcreate);
7181 7197          if (dsp == NULL) {
7182 7198                  rfs4_state_rele(sp);
7183 7199                  rfs4_file_rele(fp);
7184 7200                  resp->status = NFS4ERR_SERVERFAULT;
7185 7201                  return;
7186 7202          }
7187 7203  
7188 7204          next_stateid(&sp->rs_stateid);
7189 7205  
7190 7206          resp->stateid = sp->rs_stateid.stateid;
7191 7207  
7192 7208          resp->delegation.delegation_type = dsp->rds_dtype;
7193 7209  
7194 7210          if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7195 7211                  open_read_delegation4 *rv =
7196 7212                      &resp->delegation.open_delegation4_u.read;
7197 7213  
7198 7214                  rv->stateid = dsp->rds_delegid.stateid;
7199 7215                  rv->recall = FALSE; /* no policy in place to set to TRUE */
7200 7216                  ace = &rv->permissions;
7201 7217          } else {
7202 7218                  open_write_delegation4 *rv =
7203 7219                      &resp->delegation.open_delegation4_u.write;
7204 7220  
7205 7221                  rv->stateid = dsp->rds_delegid.stateid;
7206 7222                  rv->recall = FALSE;  /* no policy in place to set to TRUE */
7207 7223                  ace = &rv->permissions;
7208 7224                  rv->space_limit.limitby = NFS_LIMIT_SIZE;
7209 7225                  rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7210 7226          }
7211 7227  
7212 7228          /* XXX For now */
7213 7229          ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7214 7230          ace->flag = 0;
7215 7231          ace->access_mask = 0;
7216 7232          ace->who.utf8string_len = 0;
7217 7233          ace->who.utf8string_val = 0;
7218 7234  
7219 7235          rfs4_deleg_state_rele(dsp);
7220 7236          rfs4_state_rele(sp);
7221 7237          rfs4_file_rele(fp);
7222 7238  }
7223 7239  
7224 7240  typedef enum {
7225 7241          NFS4_CHKSEQ_OKAY = 0,
7226 7242          NFS4_CHKSEQ_REPLAY = 1,
7227 7243          NFS4_CHKSEQ_BAD = 2
7228 7244  } rfs4_chkseq_t;
7229 7245  
7230 7246  /*
7231 7247   * Generic function for sequence number checks.
7232 7248   */
7233 7249  static rfs4_chkseq_t
7234 7250  rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7235 7251      seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7236 7252  {
7237 7253          /* Same sequence ids and matching operations? */
7238 7254          if (seqid == rqst_seq && resop->resop == lastop->resop) {
7239 7255                  if (copyres == TRUE) {
7240 7256                          rfs4_free_reply(resop);
7241 7257                          rfs4_copy_reply(resop, lastop);
7242 7258                  }
7243 7259                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7244 7260                      "Replayed SEQID %d\n", seqid));
7245 7261                  return (NFS4_CHKSEQ_REPLAY);
7246 7262          }
7247 7263  
7248 7264          /* If the incoming sequence is not the next expected then it is bad */
7249 7265          if (rqst_seq != seqid + 1) {
7250 7266                  if (rqst_seq == seqid) {
7251 7267                          NFS4_DEBUG(rfs4_debug,
7252 7268                              (CE_NOTE, "BAD SEQID: Replayed sequence id "
7253 7269                              "but last op was %d current op is %d\n",
7254 7270                              lastop->resop, resop->resop));
7255 7271                          return (NFS4_CHKSEQ_BAD);
7256 7272                  }
7257 7273                  NFS4_DEBUG(rfs4_debug,
7258 7274                      (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7259 7275                      rqst_seq, seqid));
7260 7276                  return (NFS4_CHKSEQ_BAD);
7261 7277          }
7262 7278  
7263 7279          /* Everything okay -- next expected */
7264 7280          return (NFS4_CHKSEQ_OKAY);
7265 7281  }
7266 7282  
7267 7283  
7268 7284  static rfs4_chkseq_t
7269 7285  rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7270 7286  {
7271 7287          rfs4_chkseq_t rc;
7272 7288  
7273 7289          rfs4_dbe_lock(op->ro_dbe);
7274 7290          rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7275 7291              TRUE);
7276 7292          rfs4_dbe_unlock(op->ro_dbe);
7277 7293  
7278 7294          if (rc == NFS4_CHKSEQ_OKAY)
7279 7295                  rfs4_update_lease(op->ro_client);
7280 7296  
7281 7297          return (rc);
7282 7298  }
7283 7299  
7284 7300  static rfs4_chkseq_t
7285 7301  rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7286 7302  {
7287 7303          rfs4_chkseq_t rc;
7288 7304  
7289 7305          rfs4_dbe_lock(op->ro_dbe);
7290 7306          rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7291 7307              olo_seqid, resop, FALSE);
7292 7308          rfs4_dbe_unlock(op->ro_dbe);
7293 7309  
7294 7310          return (rc);
7295 7311  }
7296 7312  
7297 7313  static rfs4_chkseq_t
7298 7314  rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7299 7315  {
7300 7316          rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7301 7317  
7302 7318          rfs4_dbe_lock(lsp->rls_dbe);
7303 7319          if (!lsp->rls_skip_seqid_check)
7304 7320                  rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7305 7321                      resop, TRUE);
7306 7322          rfs4_dbe_unlock(lsp->rls_dbe);
7307 7323  
7308 7324          return (rc);
7309 7325  }
7310 7326  
7311 7327  static void
7312 7328  rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7313 7329      struct svc_req *req, struct compound_state *cs)
7314 7330  {
7315 7331          OPEN4args *args = &argop->nfs_argop4_u.opopen;
7316 7332          OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7317 7333          open_owner4 *owner = &args->owner;
7318 7334          open_claim_type4 claim = args->claim;
7319 7335          rfs4_client_t *cp;
7320 7336          rfs4_openowner_t *oo;
7321 7337          bool_t create;
7322 7338          bool_t replay = FALSE;
7323 7339          int can_reclaim;
7324 7340  
7325 7341          DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7326 7342              OPEN4args *, args);
7327 7343  
7328 7344          if (cs->vp == NULL) {
7329 7345                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7330 7346                  goto end;
7331 7347          }
7332 7348  
7333 7349          /*
7334 7350           * Need to check clientid and lease expiration first based on
7335 7351           * error ordering and incrementing sequence id.
7336 7352           */
7337 7353          cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7338 7354          if (cp == NULL) {
7339 7355                  *cs->statusp = resp->status =
7340 7356                      rfs4_check_clientid(&owner->clientid, 0);
7341 7357                  goto end;
7342 7358          }
7343 7359  
7344 7360          if (rfs4_lease_expired(cp)) {
7345 7361                  rfs4_client_close(cp);
7346 7362                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7347 7363                  goto end;
7348 7364          }
7349 7365          can_reclaim = cp->rc_can_reclaim;
7350 7366  
7351 7367          /*
7352 7368           * Find the open_owner for use from this point forward.  Take
7353 7369           * care in updating the sequence id based on the type of error
7354 7370           * being returned.
7355 7371           */
7356 7372  retry:
7357 7373          create = TRUE;
7358 7374          oo = rfs4_findopenowner(owner, &create, args->seqid);
7359 7375          if (oo == NULL) {
7360 7376                  *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7361 7377                  rfs4_client_rele(cp);
7362 7378                  goto end;
7363 7379          }
7364 7380  
7365 7381          /* Hold off access to the sequence space while the open is done */
7366 7382          rfs4_sw_enter(&oo->ro_sw);
7367 7383  
7368 7384          /*
7369 7385           * If the open_owner existed before at the server, then check
7370 7386           * the sequence id.
7371 7387           */
7372 7388          if (!create && !oo->ro_postpone_confirm) {
7373 7389                  switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7374 7390                  case NFS4_CHKSEQ_BAD:
7375 7391                          if ((args->seqid > oo->ro_open_seqid) &&
7376 7392                              oo->ro_need_confirm) {
7377 7393                                  rfs4_free_opens(oo, TRUE, FALSE);
7378 7394                                  rfs4_sw_exit(&oo->ro_sw);
7379 7395                                  rfs4_openowner_rele(oo);
7380 7396                                  goto retry;
7381 7397                          }
7382 7398                          resp->status = NFS4ERR_BAD_SEQID;
7383 7399                          goto out;
7384 7400                  case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7385 7401                          replay = TRUE;
7386 7402                          goto out;
7387 7403                  default:
7388 7404                          break;
7389 7405                  }
7390 7406  
7391 7407                  /*
7392 7408                   * Sequence was ok and open owner exists
7393 7409                   * check to see if we have yet to see an
7394 7410                   * open_confirm.
7395 7411                   */
7396 7412                  if (oo->ro_need_confirm) {
7397 7413                          rfs4_free_opens(oo, TRUE, FALSE);
7398 7414                          rfs4_sw_exit(&oo->ro_sw);
7399 7415                          rfs4_openowner_rele(oo);
7400 7416                          goto retry;
7401 7417                  }
7402 7418          }
7403 7419          /* Grace only applies to regular-type OPENs */
7404 7420          if (rfs4_clnt_in_grace(cp) &&
7405 7421              (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7406 7422                  *cs->statusp = resp->status = NFS4ERR_GRACE;
7407 7423                  goto out;
7408 7424          }
7409 7425  
7410 7426          /*
7411 7427           * If previous state at the server existed then can_reclaim
7412 7428           * will be set. If not reply NFS4ERR_NO_GRACE to the
7413 7429           * client.
7414 7430           */
7415 7431          if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7416 7432                  *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7417 7433                  goto out;
7418 7434          }
7419 7435  
7420 7436  
7421 7437          /*
7422 7438           * Reject the open if the client has missed the grace period
7423 7439           */
7424 7440          if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7425 7441                  *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7426 7442                  goto out;
7427 7443          }
7428 7444  
7429 7445          /* Couple of up-front bookkeeping items */
7430 7446          if (oo->ro_need_confirm) {
7431 7447                  /*
7432 7448                   * If this is a reclaim OPEN then we should not ask
7433 7449                   * for a confirmation of the open_owner per the
7434 7450                   * protocol specification.
7435 7451                   */
7436 7452                  if (claim == CLAIM_PREVIOUS)
7437 7453                          oo->ro_need_confirm = FALSE;
7438 7454                  else
7439 7455                          resp->rflags |= OPEN4_RESULT_CONFIRM;
7440 7456          }
7441 7457          resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7442 7458  
7443 7459          /*
7444 7460           * If there is an unshared filesystem mounted on this vnode,
7445 7461           * do not allow to open/create in this directory.
7446 7462           */
7447 7463          if (vn_ismntpt(cs->vp)) {
7448 7464                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
7449 7465                  goto out;
7450 7466          }
7451 7467  
7452 7468          /*
7453 7469           * access must READ, WRITE, or BOTH.  No access is invalid.
7454 7470           * deny can be READ, WRITE, BOTH, or NONE.
7455 7471           * bits not defined for access/deny are invalid.
7456 7472           */
7457 7473          if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7458 7474              (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7459 7475              (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7460 7476                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7461 7477                  goto out;
7462 7478          }
7463 7479  
7464 7480  
7465 7481          /*
7466 7482           * make sure attrset is zero before response is built.
7467 7483           */
7468 7484          resp->attrset = 0;
7469 7485  
7470 7486          switch (claim) {
7471 7487          case CLAIM_NULL:
7472 7488                  rfs4_do_opennull(cs, req, args, oo, resp);
7473 7489                  break;
7474 7490          case CLAIM_PREVIOUS:
7475 7491                  rfs4_do_openprev(cs, req, args, oo, resp);
7476 7492                  break;
7477 7493          case CLAIM_DELEGATE_CUR:
7478 7494                  rfs4_do_opendelcur(cs, req, args, oo, resp);
7479 7495                  break;
7480 7496          case CLAIM_DELEGATE_PREV:
7481 7497                  rfs4_do_opendelprev(cs, req, args, oo, resp);
7482 7498                  break;
7483 7499          default:
7484 7500                  resp->status = NFS4ERR_INVAL;
7485 7501                  break;
7486 7502          }
7487 7503  
7488 7504  out:
7489 7505          rfs4_client_rele(cp);
7490 7506  
7491 7507          /* Catch sequence id handling here to make it a little easier */
7492 7508          switch (resp->status) {
7493 7509          case NFS4ERR_BADXDR:
7494 7510          case NFS4ERR_BAD_SEQID:
7495 7511          case NFS4ERR_BAD_STATEID:
7496 7512          case NFS4ERR_NOFILEHANDLE:
7497 7513          case NFS4ERR_RESOURCE:
7498 7514          case NFS4ERR_STALE_CLIENTID:
7499 7515          case NFS4ERR_STALE_STATEID:
7500 7516                  /*
7501 7517                   * The protocol states that if any of these errors are
7502 7518                   * being returned, the sequence id should not be
7503 7519                   * incremented.  Any other return requires an
7504 7520                   * increment.
7505 7521                   */
7506 7522                  break;
7507 7523          default:
7508 7524                  /* Always update the lease in this case */
7509 7525                  rfs4_update_lease(oo->ro_client);
7510 7526  
7511 7527                  /* Regular response - copy the result */
7512 7528                  if (!replay)
7513 7529                          rfs4_update_open_resp(oo, resop, &cs->fh);
7514 7530  
7515 7531                  /*
7516 7532                   * REPLAY case: Only if the previous response was OK
7517 7533                   * do we copy the filehandle.  If not OK, no
7518 7534                   * filehandle to copy.
7519 7535                   */
7520 7536                  if (replay == TRUE &&
7521 7537                      resp->status == NFS4_OK &&
7522 7538                      oo->ro_reply_fh.nfs_fh4_val) {
7523 7539                          /*
7524 7540                           * If this is a replay, we must restore the
7525 7541                           * current filehandle/vp to that of what was
7526 7542                           * returned originally.  Try our best to do
7527 7543                           * it.
7528 7544                           */
7529 7545                          nfs_fh4_fmt_t *fh_fmtp =
7530 7546                              (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7531 7547  
7532 7548                          cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7533 7549                              (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7534 7550  
7535 7551                          if (cs->exi == NULL) {
7536 7552                                  resp->status = NFS4ERR_STALE;
7537 7553                                  goto finish;
7538 7554                          }
7539 7555  
7540 7556                          VN_RELE(cs->vp);
7541 7557  
7542 7558                          cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7543 7559                              &resp->status);
7544 7560  
7545 7561                          if (cs->vp == NULL)
7546 7562                                  goto finish;
7547 7563  
7548 7564                          nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7549 7565                  }
7550 7566  
7551 7567                  /*
7552 7568                   * If this was a replay, no need to update the
7553 7569                   * sequence id. If the open_owner was not created on
7554 7570                   * this pass, then update.  The first use of an
7555 7571                   * open_owner will not bump the sequence id.
7556 7572                   */
7557 7573                  if (replay == FALSE && !create)
7558 7574                          rfs4_update_open_sequence(oo);
7559 7575                  /*
7560 7576                   * If the client is receiving an error and the
7561 7577                   * open_owner needs to be confirmed, there is no way
7562 7578                   * to notify the client of this fact ignoring the fact
7563 7579                   * that the server has no method of returning a
7564 7580                   * stateid to confirm.  Therefore, the server needs to
7565 7581                   * mark this open_owner in a way as to avoid the
7566 7582                   * sequence id checking the next time the client uses
7567 7583                   * this open_owner.
7568 7584                   */
7569 7585                  if (resp->status != NFS4_OK && oo->ro_need_confirm)
7570 7586                          oo->ro_postpone_confirm = TRUE;
7571 7587                  /*
7572 7588                   * If OK response then clear the postpone flag and
7573 7589                   * reset the sequence id to keep in sync with the
7574 7590                   * client.
7575 7591                   */
7576 7592                  if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7577 7593                          oo->ro_postpone_confirm = FALSE;
7578 7594                          oo->ro_open_seqid = args->seqid;
7579 7595                  }
7580 7596                  break;
7581 7597          }
7582 7598  
7583 7599  finish:
7584 7600          *cs->statusp = resp->status;
7585 7601  
7586 7602          rfs4_sw_exit(&oo->ro_sw);
7587 7603          rfs4_openowner_rele(oo);
7588 7604  
7589 7605  end:
7590 7606          DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7591 7607              OPEN4res *, resp);
7592 7608  }
7593 7609  
7594 7610  /*ARGSUSED*/
7595 7611  void
7596 7612  rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7597 7613      struct svc_req *req, struct compound_state *cs)
7598 7614  {
7599 7615          OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7600 7616          OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7601 7617          rfs4_state_t *sp;
7602 7618          nfsstat4 status;
7603 7619  
7604 7620          DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7605 7621              OPEN_CONFIRM4args *, args);
7606 7622  
7607 7623          if (cs->vp == NULL) {
7608 7624                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7609 7625                  goto out;
7610 7626          }
7611 7627  
7612 7628          if (cs->vp->v_type != VREG) {
7613 7629                  *cs->statusp = resp->status =
7614 7630                      cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7615 7631                  return;
7616 7632          }
7617 7633  
7618 7634          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7619 7635          if (status != NFS4_OK) {
7620 7636                  *cs->statusp = resp->status = status;
7621 7637                  goto out;
7622 7638          }
7623 7639  
7624 7640          /* Ensure specified filehandle matches */
7625 7641          if (cs->vp != sp->rs_finfo->rf_vp) {
7626 7642                  rfs4_state_rele(sp);
7627 7643                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7628 7644                  goto out;
7629 7645          }
7630 7646  
7631 7647          /* hold off other access to open_owner while we tinker */
7632 7648          rfs4_sw_enter(&sp->rs_owner->ro_sw);
7633 7649  
7634 7650          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7635 7651          case NFS4_CHECK_STATEID_OKAY:
7636 7652                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7637 7653                      resop) != 0) {
7638 7654                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7639 7655                          break;
7640 7656                  }
7641 7657                  /*
7642 7658                   * If it is the appropriate stateid and determined to
7643 7659                   * be "OKAY" then this means that the stateid does not
7644 7660                   * need to be confirmed and the client is in error for
7645 7661                   * sending an OPEN_CONFIRM.
7646 7662                   */
7647 7663                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7648 7664                  break;
7649 7665          case NFS4_CHECK_STATEID_OLD:
7650 7666                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7651 7667                  break;
7652 7668          case NFS4_CHECK_STATEID_BAD:
7653 7669                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7654 7670                  break;
7655 7671          case NFS4_CHECK_STATEID_EXPIRED:
7656 7672                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7657 7673                  break;
7658 7674          case NFS4_CHECK_STATEID_CLOSED:
7659 7675                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7660 7676                  break;
7661 7677          case NFS4_CHECK_STATEID_REPLAY:
7662 7678                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7663 7679                      resop)) {
7664 7680                  case NFS4_CHKSEQ_OKAY:
7665 7681                          /*
7666 7682                           * This is replayed stateid; if seqid matches
7667 7683                           * next expected, then client is using wrong seqid.
7668 7684                           */
7669 7685                          /* fall through */
7670 7686                  case NFS4_CHKSEQ_BAD:
7671 7687                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7672 7688                          break;
7673 7689                  case NFS4_CHKSEQ_REPLAY:
7674 7690                          /*
7675 7691                           * Note this case is the duplicate case so
7676 7692                           * resp->status is already set.
7677 7693                           */
7678 7694                          *cs->statusp = resp->status;
7679 7695                          rfs4_update_lease(sp->rs_owner->ro_client);
7680 7696                          break;
7681 7697                  }
7682 7698                  break;
7683 7699          case NFS4_CHECK_STATEID_UNCONFIRMED:
7684 7700                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7685 7701                      resop) != NFS4_CHKSEQ_OKAY) {
7686 7702                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7687 7703                          break;
7688 7704                  }
7689 7705                  *cs->statusp = resp->status = NFS4_OK;
7690 7706  
7691 7707                  next_stateid(&sp->rs_stateid);
7692 7708                  resp->open_stateid = sp->rs_stateid.stateid;
7693 7709                  sp->rs_owner->ro_need_confirm = FALSE;
7694 7710                  rfs4_update_lease(sp->rs_owner->ro_client);
7695 7711                  rfs4_update_open_sequence(sp->rs_owner);
7696 7712                  rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7697 7713                  break;
7698 7714          default:
7699 7715                  ASSERT(FALSE);
7700 7716                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7701 7717                  break;
7702 7718          }
7703 7719          rfs4_sw_exit(&sp->rs_owner->ro_sw);
7704 7720          rfs4_state_rele(sp);
7705 7721  
7706 7722  out:
7707 7723          DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7708 7724              OPEN_CONFIRM4res *, resp);
7709 7725  }
7710 7726  
7711 7727  /*ARGSUSED*/
7712 7728  void
7713 7729  rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7714 7730      struct svc_req *req, struct compound_state *cs)
7715 7731  {
7716 7732          OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7717 7733          OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7718 7734          uint32_t access = args->share_access;
7719 7735          uint32_t deny = args->share_deny;
7720 7736          nfsstat4 status;
7721 7737          rfs4_state_t *sp;
7722 7738          rfs4_file_t *fp;
7723 7739          int fflags = 0;
7724 7740  
7725 7741          DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7726 7742              OPEN_DOWNGRADE4args *, args);
7727 7743  
7728 7744          if (cs->vp == NULL) {
7729 7745                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7730 7746                  goto out;
7731 7747          }
7732 7748  
7733 7749          if (cs->vp->v_type != VREG) {
7734 7750                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7735 7751                  return;
7736 7752          }
7737 7753  
7738 7754          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7739 7755          if (status != NFS4_OK) {
7740 7756                  *cs->statusp = resp->status = status;
7741 7757                  goto out;
7742 7758          }
7743 7759  
7744 7760          /* Ensure specified filehandle matches */
7745 7761          if (cs->vp != sp->rs_finfo->rf_vp) {
7746 7762                  rfs4_state_rele(sp);
7747 7763                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7748 7764                  goto out;
7749 7765          }
7750 7766  
7751 7767          /* hold off other access to open_owner while we tinker */
7752 7768          rfs4_sw_enter(&sp->rs_owner->ro_sw);
7753 7769  
7754 7770          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7755 7771          case NFS4_CHECK_STATEID_OKAY:
7756 7772                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7757 7773                      resop) != NFS4_CHKSEQ_OKAY) {
7758 7774                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7759 7775                          goto end;
7760 7776                  }
7761 7777                  break;
7762 7778          case NFS4_CHECK_STATEID_OLD:
7763 7779                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7764 7780                  goto end;
7765 7781          case NFS4_CHECK_STATEID_BAD:
7766 7782                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7767 7783                  goto end;
7768 7784          case NFS4_CHECK_STATEID_EXPIRED:
7769 7785                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7770 7786                  goto end;
7771 7787          case NFS4_CHECK_STATEID_CLOSED:
7772 7788                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7773 7789                  goto end;
7774 7790          case NFS4_CHECK_STATEID_UNCONFIRMED:
7775 7791                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7776 7792                  goto end;
7777 7793          case NFS4_CHECK_STATEID_REPLAY:
7778 7794                  /* Check the sequence id for the open owner */
7779 7795                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7780 7796                      resop)) {
7781 7797                  case NFS4_CHKSEQ_OKAY:
7782 7798                          /*
7783 7799                           * This is replayed stateid; if seqid matches
7784 7800                           * next expected, then client is using wrong seqid.
7785 7801                           */
7786 7802                          /* fall through */
7787 7803                  case NFS4_CHKSEQ_BAD:
7788 7804                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7789 7805                          goto end;
7790 7806                  case NFS4_CHKSEQ_REPLAY:
7791 7807                          /*
7792 7808                           * Note this case is the duplicate case so
7793 7809                           * resp->status is already set.
7794 7810                           */
7795 7811                          *cs->statusp = resp->status;
7796 7812                          rfs4_update_lease(sp->rs_owner->ro_client);
7797 7813                          goto end;
7798 7814                  }
7799 7815                  break;
7800 7816          default:
7801 7817                  ASSERT(FALSE);
7802 7818                  break;
7803 7819          }
7804 7820  
7805 7821          rfs4_dbe_lock(sp->rs_dbe);
7806 7822          /*
7807 7823           * Check that the new access modes and deny modes are valid.
7808 7824           * Check that no invalid bits are set.
7809 7825           */
7810 7826          if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7811 7827              (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7812 7828                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7813 7829                  rfs4_update_open_sequence(sp->rs_owner);
7814 7830                  rfs4_dbe_unlock(sp->rs_dbe);
7815 7831                  goto end;
7816 7832          }
7817 7833  
7818 7834          /*
7819 7835           * The new modes must be a subset of the current modes and
7820 7836           * the access must specify at least one mode. To test that
7821 7837           * the new mode is a subset of the current modes we bitwise
7822 7838           * AND them together and check that the result equals the new
7823 7839           * mode. For example:
7824 7840           * New mode, access == R and current mode, sp->rs_open_access  == RW
7825 7841           * access & sp->rs_open_access == R == access, so the new access mode
7826 7842           * is valid. Consider access == RW, sp->rs_open_access = R
7827 7843           * access & sp->rs_open_access == R != access, so the new access mode
7828 7844           * is invalid.
7829 7845           */
7830 7846          if ((access & sp->rs_open_access) != access ||
7831 7847              (deny & sp->rs_open_deny) != deny ||
7832 7848              (access &
7833 7849              (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7834 7850                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7835 7851                  rfs4_update_open_sequence(sp->rs_owner);
7836 7852                  rfs4_dbe_unlock(sp->rs_dbe);
7837 7853                  goto end;
7838 7854          }
7839 7855  
7840 7856          /*
7841 7857           * Release any share locks associated with this stateID.
7842 7858           * Strictly speaking, this violates the spec because the
7843 7859           * spec effectively requires that open downgrade be atomic.
7844 7860           * At present, fs_shrlock does not have this capability.
7845 7861           */
7846 7862          (void) rfs4_unshare(sp);
7847 7863  
7848 7864          status = rfs4_share(sp, access, deny);
7849 7865          if (status != NFS4_OK) {
7850 7866                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7851 7867                  rfs4_update_open_sequence(sp->rs_owner);
7852 7868                  rfs4_dbe_unlock(sp->rs_dbe);
7853 7869                  goto end;
7854 7870          }
7855 7871  
7856 7872          fp = sp->rs_finfo;
7857 7873          rfs4_dbe_lock(fp->rf_dbe);
7858 7874  
7859 7875          /*
7860 7876           * If the current mode has deny read and the new mode
7861 7877           * does not, decrement the number of deny read mode bits
7862 7878           * and if it goes to zero turn off the deny read bit
7863 7879           * on the file.
7864 7880           */
7865 7881          if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7866 7882              (deny & OPEN4_SHARE_DENY_READ) == 0) {
7867 7883                  fp->rf_deny_read--;
7868 7884                  if (fp->rf_deny_read == 0)
7869 7885                          fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7870 7886          }
7871 7887  
7872 7888          /*
7873 7889           * If the current mode has deny write and the new mode
7874 7890           * does not, decrement the number of deny write mode bits
7875 7891           * and if it goes to zero turn off the deny write bit
7876 7892           * on the file.
7877 7893           */
7878 7894          if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7879 7895              (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7880 7896                  fp->rf_deny_write--;
7881 7897                  if (fp->rf_deny_write == 0)
7882 7898                          fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7883 7899          }
7884 7900  
7885 7901          /*
7886 7902           * If the current mode has access read and the new mode
7887 7903           * does not, decrement the number of access read mode bits
7888 7904           * and if it goes to zero turn off the access read bit
7889 7905           * on the file.  set fflags to FREAD for the call to
7890 7906           * vn_open_downgrade().
7891 7907           */
7892 7908          if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7893 7909              (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7894 7910                  fp->rf_access_read--;
7895 7911                  if (fp->rf_access_read == 0)
7896 7912                          fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7897 7913                  fflags |= FREAD;
7898 7914          }
7899 7915  
7900 7916          /*
7901 7917           * If the current mode has access write and the new mode
7902 7918           * does not, decrement the number of access write mode bits
7903 7919           * and if it goes to zero turn off the access write bit
7904 7920           * on the file.  set fflags to FWRITE for the call to
7905 7921           * vn_open_downgrade().
7906 7922           */
7907 7923          if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7908 7924              (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7909 7925                  fp->rf_access_write--;
7910 7926                  if (fp->rf_access_write == 0)
7911 7927                          fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7912 7928                  fflags |= FWRITE;
7913 7929          }
7914 7930  
7915 7931          /* Check that the file is still accessible */
7916 7932          ASSERT(fp->rf_share_access);
7917 7933  
7918 7934          rfs4_dbe_unlock(fp->rf_dbe);
7919 7935  
7920 7936          /* now set the new open access and deny modes */
7921 7937          sp->rs_open_access = access;
7922 7938          sp->rs_open_deny = deny;
7923 7939  
7924 7940          /*
7925 7941           * we successfully downgraded the share lock, now we need to downgrade
7926 7942           * the open. it is possible that the downgrade was only for a deny
7927 7943           * mode and we have nothing else to do.
7928 7944           */
7929 7945          if ((fflags & (FREAD|FWRITE)) != 0)
7930 7946                  vn_open_downgrade(cs->vp, fflags);
7931 7947  
7932 7948          /* Update the stateid */
7933 7949          next_stateid(&sp->rs_stateid);
7934 7950          resp->open_stateid = sp->rs_stateid.stateid;
7935 7951  
7936 7952          rfs4_dbe_unlock(sp->rs_dbe);
7937 7953  
7938 7954          *cs->statusp = resp->status = NFS4_OK;
7939 7955          /* Update the lease */
7940 7956          rfs4_update_lease(sp->rs_owner->ro_client);
7941 7957          /* And the sequence */
7942 7958          rfs4_update_open_sequence(sp->rs_owner);
7943 7959          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7944 7960  
7945 7961  end:
7946 7962          rfs4_sw_exit(&sp->rs_owner->ro_sw);
7947 7963          rfs4_state_rele(sp);
7948 7964  out:
7949 7965          DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7950 7966              OPEN_DOWNGRADE4res *, resp);
7951 7967  }
7952 7968  
7953 7969  static void *
7954 7970  memstr(const void *s1, const char *s2, size_t n)
7955 7971  {
7956 7972          size_t l = strlen(s2);
7957 7973          char *p = (char *)s1;
7958 7974  
7959 7975          while (n >= l) {
7960 7976                  if (bcmp(p, s2, l) == 0)
7961 7977                          return (p);
7962 7978                  p++;
7963 7979                  n--;
7964 7980          }
7965 7981  
7966 7982          return (NULL);
7967 7983  }
7968 7984  
7969 7985  /*
7970 7986   * The logic behind this function is detailed in the NFSv4 RFC in the
7971 7987   * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7972 7988   * that section for explicit guidance to server behavior for
7973 7989   * SETCLIENTID.
7974 7990   */
7975 7991  void
7976 7992  rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7977 7993      struct svc_req *req, struct compound_state *cs)
7978 7994  {
7979 7995          SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7980 7996          SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7981 7997          rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7982 7998          rfs4_clntip_t *ci;
7983 7999          bool_t create;
7984 8000          char *addr, *netid;
7985 8001          int len;
7986 8002  
7987 8003          DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7988 8004              SETCLIENTID4args *, args);
7989 8005  retry:
7990 8006          newcp = cp_confirmed = cp_unconfirmed = NULL;
7991 8007  
7992 8008          /*
7993 8009           * Save the caller's IP address
7994 8010           */
7995 8011          args->client.cl_addr =
7996 8012              (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7997 8013  
7998 8014          /*
7999 8015           * Record if it is a Solaris client that cannot handle referrals.
8000 8016           */
8001 8017          if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8002 8018              !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8003 8019                  /* Add a "yes, it's downrev" record */
8004 8020                  create = TRUE;
8005 8021                  ci = rfs4_find_clntip(args->client.cl_addr, &create);
8006 8022                  ASSERT(ci != NULL);
8007 8023                  rfs4_dbe_rele(ci->ri_dbe);
8008 8024          } else {
8009 8025                  /* Remove any previous record */
8010 8026                  rfs4_invalidate_clntip(args->client.cl_addr);
8011 8027          }
8012 8028  
8013 8029          /*
8014 8030           * In search of an EXISTING client matching the incoming
8015 8031           * request to establish a new client identifier at the server
8016 8032           */
8017 8033          create = TRUE;
8018 8034          cp = rfs4_findclient(&args->client, &create, NULL);
8019 8035  
8020 8036          /* Should never happen */
8021 8037          ASSERT(cp != NULL);
8022 8038  
8023 8039          if (cp == NULL) {
8024 8040                  *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8025 8041                  goto out;
8026 8042          }
8027 8043  
8028 8044          /*
8029 8045           * Easiest case. Client identifier is newly created and is
8030 8046           * unconfirmed.  Also note that for this case, no other
8031 8047           * entries exist for the client identifier.  Nothing else to
8032 8048           * check.  Just setup the response and respond.
8033 8049           */
8034 8050          if (create) {
8035 8051                  *cs->statusp = res->status = NFS4_OK;
8036 8052                  res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8037 8053                  res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8038 8054                      cp->rc_confirm_verf;
8039 8055                  /* Setup callback information; CB_NULL confirmation later */
8040 8056                  rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8041 8057  
8042 8058                  rfs4_client_rele(cp);
8043 8059                  goto out;
8044 8060          }
8045 8061  
8046 8062          /*
8047 8063           * An existing, confirmed client may exist but it may not have
8048 8064           * been active for at least one lease period.  If so, then
8049 8065           * "close" the client and create a new client identifier
8050 8066           */
8051 8067          if (rfs4_lease_expired(cp)) {
8052 8068                  rfs4_client_close(cp);
8053 8069                  goto retry;
8054 8070          }
8055 8071  
8056 8072          if (cp->rc_need_confirm == TRUE)
8057 8073                  cp_unconfirmed = cp;
8058 8074          else
8059 8075                  cp_confirmed = cp;
8060 8076  
8061 8077          cp = NULL;
8062 8078  
8063 8079          /*
8064 8080           * We have a confirmed client, now check for an
8065 8081           * unconfimred entry
8066 8082           */
8067 8083          if (cp_confirmed) {
8068 8084                  /* If creds don't match then client identifier is inuse */
8069 8085                  if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8070 8086                          rfs4_cbinfo_t *cbp;
8071 8087                          /*
8072 8088                           * Some one else has established this client
8073 8089                           * id. Try and say * who they are. We will use
8074 8090                           * the call back address supplied by * the
8075 8091                           * first client.
8076 8092                           */
8077 8093                          *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8078 8094  
8079 8095                          addr = netid = NULL;
8080 8096  
8081 8097                          cbp = &cp_confirmed->rc_cbinfo;
8082 8098                          if (cbp->cb_callback.cb_location.r_addr &&
8083 8099                              cbp->cb_callback.cb_location.r_netid) {
8084 8100                                  cb_client4 *cbcp = &cbp->cb_callback;
8085 8101  
8086 8102                                  len = strlen(cbcp->cb_location.r_addr)+1;
8087 8103                                  addr = kmem_alloc(len, KM_SLEEP);
8088 8104                                  bcopy(cbcp->cb_location.r_addr, addr, len);
8089 8105                                  len = strlen(cbcp->cb_location.r_netid)+1;
8090 8106                                  netid = kmem_alloc(len, KM_SLEEP);
8091 8107                                  bcopy(cbcp->cb_location.r_netid, netid, len);
8092 8108                          }
8093 8109  
8094 8110                          res->SETCLIENTID4res_u.client_using.r_addr = addr;
8095 8111                          res->SETCLIENTID4res_u.client_using.r_netid = netid;
8096 8112  
8097 8113                          rfs4_client_rele(cp_confirmed);
8098 8114                  }
8099 8115  
8100 8116                  /*
8101 8117                   * Confirmed, creds match, and verifier matches; must
8102 8118                   * be an update of the callback info
8103 8119                   */
8104 8120                  if (cp_confirmed->rc_nfs_client.verifier ==
8105 8121                      args->client.verifier) {
8106 8122                          /* Setup callback information */
8107 8123                          rfs4_client_setcb(cp_confirmed, &args->callback,
8108 8124                              args->callback_ident);
8109 8125  
8110 8126                          /* everything okay -- move ahead */
8111 8127                          *cs->statusp = res->status = NFS4_OK;
8112 8128                          res->SETCLIENTID4res_u.resok4.clientid =
8113 8129                              cp_confirmed->rc_clientid;
8114 8130  
8115 8131                          /* update the confirm_verifier and return it */
8116 8132                          rfs4_client_scv_next(cp_confirmed);
8117 8133                          res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8118 8134                              cp_confirmed->rc_confirm_verf;
8119 8135  
8120 8136                          rfs4_client_rele(cp_confirmed);
8121 8137                          goto out;
8122 8138                  }
8123 8139  
8124 8140                  /*
8125 8141                   * Creds match but the verifier doesn't.  Must search
8126 8142                   * for an unconfirmed client that would be replaced by
8127 8143                   * this request.
8128 8144                   */
8129 8145                  create = FALSE;
8130 8146                  cp_unconfirmed = rfs4_findclient(&args->client, &create,
8131 8147                      cp_confirmed);
8132 8148          }
8133 8149  
8134 8150          /*
8135 8151           * At this point, we have taken care of the brand new client
8136 8152           * struct, INUSE case, update of an existing, and confirmed
8137 8153           * client struct.
8138 8154           */
8139 8155  
8140 8156          /*
8141 8157           * check to see if things have changed while we originally
8142 8158           * picked up the client struct.  If they have, then return and
8143 8159           * retry the processing of this SETCLIENTID request.
8144 8160           */
8145 8161          if (cp_unconfirmed) {
8146 8162                  rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8147 8163                  if (!cp_unconfirmed->rc_need_confirm) {
8148 8164                          rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8149 8165                          rfs4_client_rele(cp_unconfirmed);
8150 8166                          if (cp_confirmed)
8151 8167                                  rfs4_client_rele(cp_confirmed);
8152 8168                          goto retry;
8153 8169                  }
8154 8170                  /* do away with the old unconfirmed one */
8155 8171                  rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8156 8172                  rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8157 8173                  rfs4_client_rele(cp_unconfirmed);
8158 8174                  cp_unconfirmed = NULL;
8159 8175          }
8160 8176  
8161 8177          /*
8162 8178           * This search will temporarily hide the confirmed client
8163 8179           * struct while a new client struct is created as the
8164 8180           * unconfirmed one.
8165 8181           */
8166 8182          create = TRUE;
8167 8183          newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8168 8184  
8169 8185          ASSERT(newcp != NULL);
8170 8186  
8171 8187          if (newcp == NULL) {
8172 8188                  *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8173 8189                  rfs4_client_rele(cp_confirmed);
8174 8190                  goto out;
8175 8191          }
8176 8192  
8177 8193          /*
8178 8194           * If one was not created, then a similar request must be in
8179 8195           * process so release and start over with this one
8180 8196           */
8181 8197          if (create != TRUE) {
8182 8198                  rfs4_client_rele(newcp);
8183 8199                  if (cp_confirmed)
8184 8200                          rfs4_client_rele(cp_confirmed);
8185 8201                  goto retry;
8186 8202          }
8187 8203  
8188 8204          *cs->statusp = res->status = NFS4_OK;
8189 8205          res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8190 8206          res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8191 8207              newcp->rc_confirm_verf;
8192 8208          /* Setup callback information; CB_NULL confirmation later */
8193 8209          rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8194 8210  
8195 8211          newcp->rc_cp_confirmed = cp_confirmed;
8196 8212  
8197 8213          rfs4_client_rele(newcp);
8198 8214  
8199 8215  out:
8200 8216          DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8201 8217              SETCLIENTID4res *, res);
8202 8218  }
8203 8219  
8204 8220  /*ARGSUSED*/
8205 8221  void
8206 8222  rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8207 8223      struct svc_req *req, struct compound_state *cs)
8208 8224  {
8209 8225          SETCLIENTID_CONFIRM4args *args =
8210 8226              &argop->nfs_argop4_u.opsetclientid_confirm;
8211 8227          SETCLIENTID_CONFIRM4res *res =
8212 8228              &resop->nfs_resop4_u.opsetclientid_confirm;
8213 8229          rfs4_client_t *cp, *cptoclose = NULL;
8214 8230          nfs4_srv_t *nsrv4;
8215 8231  
8216 8232          DTRACE_NFSV4_2(op__setclientid__confirm__start,
8217 8233              struct compound_state *, cs,
8218 8234              SETCLIENTID_CONFIRM4args *, args);
8219 8235  
8220 8236          nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
8221 8237          *cs->statusp = res->status = NFS4_OK;
8222 8238  
8223 8239          cp = rfs4_findclient_by_id(args->clientid, TRUE);
8224 8240  
8225 8241          if (cp == NULL) {
8226 8242                  *cs->statusp = res->status =
8227 8243                      rfs4_check_clientid(&args->clientid, 1);
8228 8244                  goto out;
8229 8245          }
8230 8246  
8231 8247          if (!creds_ok(cp, req, cs)) {
8232 8248                  *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8233 8249                  rfs4_client_rele(cp);
8234 8250                  goto out;
8235 8251          }
8236 8252  
8237 8253          /* If the verifier doesn't match, the record doesn't match */
8238 8254          if (cp->rc_confirm_verf != args->setclientid_confirm) {
8239 8255                  *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8240 8256                  rfs4_client_rele(cp);
8241 8257                  goto out;
8242 8258          }
8243 8259  
8244 8260          rfs4_dbe_lock(cp->rc_dbe);
8245 8261          cp->rc_need_confirm = FALSE;
8246 8262          if (cp->rc_cp_confirmed) {
8247 8263                  cptoclose = cp->rc_cp_confirmed;
8248 8264                  cptoclose->rc_ss_remove = 1;
8249 8265                  cp->rc_cp_confirmed = NULL;
8250 8266          }
8251 8267  
8252 8268          /*
8253 8269           * Update the client's associated server instance, if it's changed
8254 8270           * since the client was created.
8255 8271           */
8256 8272          if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8257 8273                  rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8258 8274  
8259 8275          /*
8260 8276           * Record clientid in stable storage.
8261 8277           * Must be done after server instance has been assigned.
8262 8278           */
8263 8279          rfs4_ss_clid(nsrv4, cp);
8264 8280  
8265 8281          rfs4_dbe_unlock(cp->rc_dbe);
8266 8282  
8267 8283          if (cptoclose)
8268 8284                  /* don't need to rele, client_close does it */
8269 8285                  rfs4_client_close(cptoclose);
8270 8286  
8271 8287          /* If needed, initiate CB_NULL call for callback path */
8272 8288          rfs4_deleg_cb_check(cp);
8273 8289          rfs4_update_lease(cp);
8274 8290  
8275 8291          /*
8276 8292           * Check to see if client can perform reclaims
8277 8293           */
8278 8294          rfs4_ss_chkclid(nsrv4, cp);
8279 8295  
8280 8296          rfs4_client_rele(cp);
8281 8297  
8282 8298  out:
8283 8299          DTRACE_NFSV4_2(op__setclientid__confirm__done,
8284 8300              struct compound_state *, cs,
8285 8301              SETCLIENTID_CONFIRM4 *, res);
8286 8302  }
8287 8303  
8288 8304  
8289 8305  /*ARGSUSED*/
8290 8306  void
8291 8307  rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8292 8308      struct svc_req *req, struct compound_state *cs)
8293 8309  {
8294 8310          CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8295 8311          CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8296 8312          rfs4_state_t *sp;
8297 8313          nfsstat4 status;
8298 8314  
8299 8315          DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8300 8316              CLOSE4args *, args);
8301 8317  
8302 8318          if (cs->vp == NULL) {
8303 8319                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8304 8320                  goto out;
8305 8321          }
8306 8322  
8307 8323          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8308 8324          if (status != NFS4_OK) {
8309 8325                  *cs->statusp = resp->status = status;
8310 8326                  goto out;
8311 8327          }
8312 8328  
8313 8329          /* Ensure specified filehandle matches */
8314 8330          if (cs->vp != sp->rs_finfo->rf_vp) {
8315 8331                  rfs4_state_rele(sp);
8316 8332                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8317 8333                  goto out;
8318 8334          }
8319 8335  
8320 8336          /* hold off other access to open_owner while we tinker */
8321 8337          rfs4_sw_enter(&sp->rs_owner->ro_sw);
8322 8338  
8323 8339          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8324 8340          case NFS4_CHECK_STATEID_OKAY:
8325 8341                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8326 8342                      resop) != NFS4_CHKSEQ_OKAY) {
8327 8343                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8328 8344                          goto end;
8329 8345                  }
8330 8346                  break;
8331 8347          case NFS4_CHECK_STATEID_OLD:
8332 8348                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8333 8349                  goto end;
8334 8350          case NFS4_CHECK_STATEID_BAD:
8335 8351                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8336 8352                  goto end;
8337 8353          case NFS4_CHECK_STATEID_EXPIRED:
8338 8354                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8339 8355                  goto end;
8340 8356          case NFS4_CHECK_STATEID_CLOSED:
8341 8357                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8342 8358                  goto end;
8343 8359          case NFS4_CHECK_STATEID_UNCONFIRMED:
8344 8360                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8345 8361                  goto end;
8346 8362          case NFS4_CHECK_STATEID_REPLAY:
8347 8363                  /* Check the sequence id for the open owner */
8348 8364                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8349 8365                      resop)) {
8350 8366                  case NFS4_CHKSEQ_OKAY:
8351 8367                          /*
8352 8368                           * This is replayed stateid; if seqid matches
8353 8369                           * next expected, then client is using wrong seqid.
8354 8370                           */
8355 8371                          /* FALL THROUGH */
8356 8372                  case NFS4_CHKSEQ_BAD:
8357 8373                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8358 8374                          goto end;
8359 8375                  case NFS4_CHKSEQ_REPLAY:
8360 8376                          /*
8361 8377                           * Note this case is the duplicate case so
8362 8378                           * resp->status is already set.
8363 8379                           */
8364 8380                          *cs->statusp = resp->status;
8365 8381                          rfs4_update_lease(sp->rs_owner->ro_client);
8366 8382                          goto end;
8367 8383                  }
8368 8384                  break;
8369 8385          default:
8370 8386                  ASSERT(FALSE);
8371 8387                  break;
8372 8388          }
8373 8389  
8374 8390          rfs4_dbe_lock(sp->rs_dbe);
8375 8391  
8376 8392          /* Update the stateid. */
8377 8393          next_stateid(&sp->rs_stateid);
8378 8394          resp->open_stateid = sp->rs_stateid.stateid;
8379 8395  
8380 8396          rfs4_dbe_unlock(sp->rs_dbe);
8381 8397  
8382 8398          rfs4_update_lease(sp->rs_owner->ro_client);
8383 8399          rfs4_update_open_sequence(sp->rs_owner);
8384 8400          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8385 8401  
8386 8402          rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8387 8403  
8388 8404          *cs->statusp = resp->status = status;
8389 8405  
8390 8406  end:
8391 8407          rfs4_sw_exit(&sp->rs_owner->ro_sw);
8392 8408          rfs4_state_rele(sp);
8393 8409  out:
8394 8410          DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8395 8411              CLOSE4res *, resp);
8396 8412  }
8397 8413  
8398 8414  /*
8399 8415   * Manage the counts on the file struct and close all file locks
8400 8416   */
8401 8417  /*ARGSUSED*/
8402 8418  void
8403 8419  rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8404 8420      bool_t close_of_client)
8405 8421  {
8406 8422          rfs4_file_t *fp = sp->rs_finfo;
8407 8423          rfs4_lo_state_t *lsp;
8408 8424          int fflags = 0;
8409 8425  
8410 8426          /*
8411 8427           * If this call is part of the larger closing down of client
8412 8428           * state then it is just easier to release all locks
8413 8429           * associated with this client instead of going through each
8414 8430           * individual file and cleaning locks there.
8415 8431           */
8416 8432          if (close_of_client) {
8417 8433                  if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8418 8434                      !list_is_empty(&sp->rs_lostatelist) &&
8419 8435                      sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8420 8436                          /* Is the PxFS kernel module loaded? */
8421 8437                          if (lm_remove_file_locks != NULL) {
8422 8438                                  int new_sysid;
8423 8439  
8424 8440                                  /* Encode the cluster nodeid in new sysid */
8425 8441                                  new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8426 8442                                  lm_set_nlmid_flk(&new_sysid);
8427 8443  
8428 8444                                  /*
8429 8445                                   * This PxFS routine removes file locks for a
8430 8446                                   * client over all nodes of a cluster.
8431 8447                                   */
8432 8448                                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8433 8449                                      "lm_remove_file_locks(sysid=0x%x)\n",
8434 8450                                      new_sysid));
8435 8451                                  (*lm_remove_file_locks)(new_sysid);
8436 8452                          } else {
8437 8453                                  struct flock64 flk;
8438 8454  
8439 8455                                  /* Release all locks for this client */
8440 8456                                  flk.l_type = F_UNLKSYS;
8441 8457                                  flk.l_whence = 0;
8442 8458                                  flk.l_start = 0;
8443 8459                                  flk.l_len = 0;
8444 8460                                  flk.l_sysid =
8445 8461                                      sp->rs_owner->ro_client->rc_sysidt;
8446 8462                                  flk.l_pid = 0;
8447 8463                                  (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8448 8464                                      &flk, F_REMOTELOCK | FREAD | FWRITE,
8449 8465                                      (u_offset_t)0, NULL, CRED(), NULL);
8450 8466                          }
8451 8467  
8452 8468                          sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8453 8469                  }
8454 8470          }
8455 8471  
8456 8472          /*
8457 8473           * Release all locks on this file by this lock owner or at
8458 8474           * least mark the locks as having been released
8459 8475           */
8460 8476          for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8461 8477              lsp = list_next(&sp->rs_lostatelist, lsp)) {
8462 8478                  lsp->rls_locks_cleaned = TRUE;
8463 8479  
8464 8480                  /* Was this already taken care of above? */
8465 8481                  if (!close_of_client &&
8466 8482                      sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8467 8483                          (void) cleanlocks(sp->rs_finfo->rf_vp,
8468 8484                              lsp->rls_locker->rl_pid,
8469 8485                              lsp->rls_locker->rl_client->rc_sysidt);
8470 8486          }
8471 8487  
8472 8488          /*
8473 8489           * Release any shrlocks associated with this open state ID.
8474 8490           * This must be done before the rfs4_state gets marked closed.
8475 8491           */
8476 8492          if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8477 8493                  (void) rfs4_unshare(sp);
8478 8494  
8479 8495          if (sp->rs_open_access) {
8480 8496                  rfs4_dbe_lock(fp->rf_dbe);
8481 8497  
8482 8498                  /*
8483 8499                   * Decrement the count for each access and deny bit that this
8484 8500                   * state has contributed to the file.
8485 8501                   * If the file counts go to zero
8486 8502                   * clear the appropriate bit in the appropriate mask.
8487 8503                   */
8488 8504                  if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8489 8505                          fp->rf_access_read--;
8490 8506                          fflags |= FREAD;
8491 8507                          if (fp->rf_access_read == 0)
8492 8508                                  fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8493 8509                  }
8494 8510                  if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8495 8511                          fp->rf_access_write--;
8496 8512                          fflags |= FWRITE;
8497 8513                          if (fp->rf_access_write == 0)
8498 8514                                  fp->rf_share_access &=
8499 8515                                      ~OPEN4_SHARE_ACCESS_WRITE;
8500 8516                  }
8501 8517                  if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8502 8518                          fp->rf_deny_read--;
8503 8519                          if (fp->rf_deny_read == 0)
8504 8520                                  fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8505 8521                  }
8506 8522                  if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8507 8523                          fp->rf_deny_write--;
8508 8524                          if (fp->rf_deny_write == 0)
8509 8525                                  fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8510 8526                  }
8511 8527  
8512 8528                  (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8513 8529  
8514 8530                  rfs4_dbe_unlock(fp->rf_dbe);
8515 8531  
8516 8532                  sp->rs_open_access = 0;
8517 8533                  sp->rs_open_deny = 0;
8518 8534          }
8519 8535  }
8520 8536  
8521 8537  /*
8522 8538   * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8523 8539   */
8524 8540  static nfsstat4
8525 8541  lock_denied(LOCK4denied *dp, struct flock64 *flk)
8526 8542  {
8527 8543          rfs4_lockowner_t *lo;
8528 8544          rfs4_client_t *cp;
8529 8545          uint32_t len;
8530 8546  
8531 8547          lo = rfs4_findlockowner_by_pid(flk->l_pid);
8532 8548          if (lo != NULL) {
8533 8549                  cp = lo->rl_client;
8534 8550                  if (rfs4_lease_expired(cp)) {
8535 8551                          rfs4_lockowner_rele(lo);
8536 8552                          rfs4_dbe_hold(cp->rc_dbe);
8537 8553                          rfs4_client_close(cp);
8538 8554                          return (NFS4ERR_EXPIRED);
8539 8555                  }
8540 8556                  dp->owner.clientid = lo->rl_owner.clientid;
8541 8557                  len = lo->rl_owner.owner_len;
8542 8558                  dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8543 8559                  bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8544 8560                  dp->owner.owner_len = len;
8545 8561                  rfs4_lockowner_rele(lo);
8546 8562                  goto finish;
8547 8563          }
8548 8564  
8549 8565          /*
8550 8566           * Its not a NFS4 lock. We take advantage that the upper 32 bits
8551 8567           * of the client id contain the boot time for a NFS4 lock. So we
8552 8568           * fabricate and identity by setting clientid to the sysid, and
8553 8569           * the lock owner to the pid.
8554 8570           */
8555 8571          dp->owner.clientid = flk->l_sysid;
8556 8572          len = sizeof (pid_t);
8557 8573          dp->owner.owner_len = len;
8558 8574          dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8559 8575          bcopy(&flk->l_pid, dp->owner.owner_val, len);
8560 8576  finish:
8561 8577          dp->offset = flk->l_start;
8562 8578          dp->length = flk->l_len;
8563 8579  
8564 8580          if (flk->l_type == F_RDLCK)
8565 8581                  dp->locktype = READ_LT;
8566 8582          else if (flk->l_type == F_WRLCK)
8567 8583                  dp->locktype = WRITE_LT;
8568 8584          else
8569 8585                  return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8570 8586  
8571 8587          return (NFS4_OK);
8572 8588  }
8573 8589  
8574 8590  /*
8575 8591   * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8576 8592   * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8577 8593   * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8578 8594   * for that (obviously); they are sending the LOCK requests with some delays
8579 8595   * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8580 8596   * locking and delay implementation at the client side.
8581 8597   *
8582 8598   * To make the life of the clients easier, the NFSv4.0 server tries to do some
8583 8599   * fast retries on its own (the for loop below) in a hope the lock will be
8584 8600   * available soon.  And if not, the client won't need to resend the LOCK
8585 8601   * requests so fast to check the lock availability.  This basically saves some
8586 8602   * network traffic and tries to make sure the client gets the lock ASAP.
8587 8603   */
8588 8604  static int
8589 8605  setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8590 8606  {
8591 8607          int error;
8592 8608          struct flock64 flk;
8593 8609          int i;
8594 8610          clock_t delaytime;
8595 8611          int cmd;
8596 8612          int spin_cnt = 0;
8597 8613  
8598 8614          cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8599 8615  retry:
8600 8616          delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8601 8617  
8602 8618          for (i = 0; i < rfs4_maxlock_tries; i++) {
8603 8619                  LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8604 8620                  error = VOP_FRLOCK(vp, cmd,
8605 8621                      flock, flag, (u_offset_t)0, NULL, cred, NULL);
8606 8622  
8607 8623                  if (error != EAGAIN && error != EACCES)
8608 8624                          break;
8609 8625  
8610 8626                  if (i < rfs4_maxlock_tries - 1) {
8611 8627                          delay(delaytime);
8612 8628                          delaytime *= 2;
8613 8629                  }
8614 8630          }
8615 8631  
8616 8632          if (error == EAGAIN || error == EACCES) {
8617 8633                  /* Get the owner of the lock */
8618 8634                  flk = *flock;
8619 8635                  LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8620 8636                  if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8621 8637                      NULL) == 0) {
8622 8638                          /*
8623 8639                           * There's a race inherent in the current VOP_FRLOCK
8624 8640                           * design where:
8625 8641                           * a: "other guy" takes a lock that conflicts with a
8626 8642                           * lock we want
8627 8643                           * b: we attempt to take our lock (non-blocking) and
8628 8644                           * the attempt fails.
8629 8645                           * c: "other guy" releases the conflicting lock
8630 8646                           * d: we ask what lock conflicts with the lock we want,
8631 8647                           * getting F_UNLCK (no lock blocks us)
8632 8648                           *
8633 8649                           * If we retry the non-blocking lock attempt in this
8634 8650                           * case (restart at step 'b') there's some possibility
8635 8651                           * that many such attempts might fail.  However a test
8636 8652                           * designed to actually provoke this race shows that
8637 8653                           * the vast majority of cases require no retry, and
8638 8654                           * only a few took as many as three retries.  Here's
8639 8655                           * the test outcome:
8640 8656                           *
8641 8657                           *         number of retries    how many times we needed
8642 8658                           *                              that many retries
8643 8659                           *         0                    79461
8644 8660                           *         1                      862
8645 8661                           *         2                       49
8646 8662                           *         3                        5
8647 8663                           *
8648 8664                           * Given those empirical results, we arbitrarily limit
8649 8665                           * the retry count to ten.
8650 8666                           *
8651 8667                           * If we actually make to ten retries and give up,
8652 8668                           * nothing catastrophic happens, but we're unable to
8653 8669                           * return the information about the conflicting lock to
8654 8670                           * the NFS client.  That's an acceptable trade off vs.
8655 8671                           * letting this retry loop run forever.
8656 8672                           */
8657 8673                          if (flk.l_type == F_UNLCK) {
8658 8674                                  if (spin_cnt++ < 10) {
8659 8675                                          /* No longer locked, retry */
8660 8676                                          goto retry;
8661 8677                                  }
8662 8678                          } else {
8663 8679                                  *flock = flk;
8664 8680                                  LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8665 8681                                      F_GETLK, &flk);
8666 8682                          }
8667 8683                  }
8668 8684          }
8669 8685  
8670 8686          return (error);
8671 8687  }
8672 8688  
8673 8689  /*ARGSUSED*/
8674 8690  static nfsstat4
8675 8691  rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8676 8692      offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8677 8693  {
8678 8694          nfsstat4 status;
8679 8695          rfs4_lockowner_t *lo = lsp->rls_locker;
8680 8696          rfs4_state_t *sp = lsp->rls_state;
8681 8697          struct flock64 flock;
8682 8698          int16_t ltype;
8683 8699          int flag;
8684 8700          int error;
8685 8701          sysid_t sysid;
8686 8702          LOCK4res *lres;
8687 8703          vnode_t *vp;
8688 8704  
8689 8705          if (rfs4_lease_expired(lo->rl_client)) {
8690 8706                  return (NFS4ERR_EXPIRED);
8691 8707          }
8692 8708  
8693 8709          if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8694 8710                  return (status);
8695 8711  
8696 8712          /* Check for zero length. To lock to end of file use all ones for V4 */
8697 8713          if (length == 0)
8698 8714                  return (NFS4ERR_INVAL);
8699 8715          else if (length == (length4)(~0))
8700 8716                  length = 0;             /* Posix to end of file  */
8701 8717  
8702 8718  retry:
8703 8719          rfs4_dbe_lock(sp->rs_dbe);
8704 8720          if (sp->rs_closed == TRUE) {
8705 8721                  rfs4_dbe_unlock(sp->rs_dbe);
8706 8722                  return (NFS4ERR_OLD_STATEID);
8707 8723          }
8708 8724  
8709 8725          if (resop->resop != OP_LOCKU) {
8710 8726                  switch (locktype) {
8711 8727                  case READ_LT:
8712 8728                  case READW_LT:
8713 8729                          if ((sp->rs_share_access
8714 8730                              & OPEN4_SHARE_ACCESS_READ) == 0) {
8715 8731                                  rfs4_dbe_unlock(sp->rs_dbe);
8716 8732  
8717 8733                                  return (NFS4ERR_OPENMODE);
8718 8734                          }
8719 8735                          ltype = F_RDLCK;
8720 8736                          break;
8721 8737                  case WRITE_LT:
8722 8738                  case WRITEW_LT:
8723 8739                          if ((sp->rs_share_access
8724 8740                              & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8725 8741                                  rfs4_dbe_unlock(sp->rs_dbe);
8726 8742  
8727 8743                                  return (NFS4ERR_OPENMODE);
8728 8744                          }
8729 8745                          ltype = F_WRLCK;
8730 8746                          break;
8731 8747                  }
8732 8748          } else
8733 8749                  ltype = F_UNLCK;
8734 8750  
8735 8751          flock.l_type = ltype;
8736 8752          flock.l_whence = 0;             /* SEEK_SET */
8737 8753          flock.l_start = offset;
8738 8754          flock.l_len = length;
8739 8755          flock.l_sysid = sysid;
8740 8756          flock.l_pid = lsp->rls_locker->rl_pid;
8741 8757  
8742 8758          /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8743 8759          if (flock.l_len < 0 || flock.l_start < 0) {
8744 8760                  rfs4_dbe_unlock(sp->rs_dbe);
8745 8761                  return (NFS4ERR_INVAL);
8746 8762          }
8747 8763  
8748 8764          /*
8749 8765           * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8750 8766           * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8751 8767           */
8752 8768          flag = (int)sp->rs_share_access | F_REMOTELOCK;
8753 8769  
8754 8770          vp = sp->rs_finfo->rf_vp;
8755 8771          VN_HOLD(vp);
8756 8772  
8757 8773          /*
8758 8774           * We need to unlock sp before we call the underlying filesystem to
8759 8775           * acquire the file lock.
8760 8776           */
8761 8777          rfs4_dbe_unlock(sp->rs_dbe);
8762 8778  
8763 8779          error = setlock(vp, &flock, flag, cred);
8764 8780  
8765 8781          /*
8766 8782           * Make sure the file is still open.  In a case the file was closed in
8767 8783           * the meantime, clean the lock we acquired using the setlock() call
8768 8784           * above, and return the appropriate error.
8769 8785           */
8770 8786          rfs4_dbe_lock(sp->rs_dbe);
8771 8787          if (sp->rs_closed == TRUE) {
8772 8788                  cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8773 8789                  rfs4_dbe_unlock(sp->rs_dbe);
8774 8790  
8775 8791                  VN_RELE(vp);
8776 8792  
8777 8793                  return (NFS4ERR_OLD_STATEID);
8778 8794          }
8779 8795          rfs4_dbe_unlock(sp->rs_dbe);
8780 8796  
8781 8797          VN_RELE(vp);
8782 8798  
8783 8799          if (error == 0) {
8784 8800                  rfs4_dbe_lock(lsp->rls_dbe);
8785 8801                  next_stateid(&lsp->rls_lockid);
8786 8802                  rfs4_dbe_unlock(lsp->rls_dbe);
8787 8803          }
8788 8804  
8789 8805          /*
8790 8806           * N.B. We map error values to nfsv4 errors. This is differrent
8791 8807           * than puterrno4 routine.
8792 8808           */
8793 8809          switch (error) {
8794 8810          case 0:
8795 8811                  status = NFS4_OK;
8796 8812                  break;
8797 8813          case EAGAIN:
8798 8814          case EACCES:            /* Old value */
8799 8815                  /* Can only get here if op is OP_LOCK */
8800 8816                  ASSERT(resop->resop == OP_LOCK);
8801 8817                  lres = &resop->nfs_resop4_u.oplock;
8802 8818                  status = NFS4ERR_DENIED;
8803 8819                  if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8804 8820                      == NFS4ERR_EXPIRED)
8805 8821                          goto retry;
8806 8822                  break;
8807 8823          case ENOLCK:
8808 8824                  status = NFS4ERR_DELAY;
8809 8825                  break;
8810 8826          case EOVERFLOW:
8811 8827                  status = NFS4ERR_INVAL;
8812 8828                  break;
8813 8829          case EINVAL:
8814 8830                  status = NFS4ERR_NOTSUPP;
8815 8831                  break;
8816 8832          default:
8817 8833                  status = NFS4ERR_SERVERFAULT;
8818 8834                  break;
8819 8835          }
8820 8836  
8821 8837          return (status);
8822 8838  }
8823 8839  
8824 8840  /*ARGSUSED*/
8825 8841  void
8826 8842  rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8827 8843      struct svc_req *req, struct compound_state *cs)
8828 8844  {
8829 8845          LOCK4args *args = &argop->nfs_argop4_u.oplock;
8830 8846          LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8831 8847          nfsstat4 status;
8832 8848          stateid4 *stateid;
8833 8849          rfs4_lockowner_t *lo;
8834 8850          rfs4_client_t *cp;
8835 8851          rfs4_state_t *sp = NULL;
8836 8852          rfs4_lo_state_t *lsp = NULL;
8837 8853          bool_t ls_sw_held = FALSE;
8838 8854          bool_t create = TRUE;
8839 8855          bool_t lcreate = TRUE;
8840 8856          bool_t dup_lock = FALSE;
8841 8857          int rc;
8842 8858  
8843 8859          DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8844 8860              LOCK4args *, args);
8845 8861  
8846 8862          if (cs->vp == NULL) {
8847 8863                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8848 8864                  DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8849 8865                      cs, LOCK4res *, resp);
8850 8866                  return;
8851 8867          }
8852 8868  
8853 8869          if (args->locker.new_lock_owner) {
8854 8870                  /* Create a new lockowner for this instance */
8855 8871                  open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8856 8872  
8857 8873                  NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8858 8874  
8859 8875                  stateid = &olo->open_stateid;
8860 8876                  status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8861 8877                  if (status != NFS4_OK) {
8862 8878                          NFS4_DEBUG(rfs4_debug,
8863 8879                              (CE_NOTE, "Get state failed in lock %d", status));
8864 8880                          *cs->statusp = resp->status = status;
8865 8881                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8866 8882                              cs, LOCK4res *, resp);
8867 8883                          return;
8868 8884                  }
8869 8885  
8870 8886                  /* Ensure specified filehandle matches */
8871 8887                  if (cs->vp != sp->rs_finfo->rf_vp) {
8872 8888                          rfs4_state_rele(sp);
8873 8889                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8874 8890                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8875 8891                              cs, LOCK4res *, resp);
8876 8892                          return;
8877 8893                  }
8878 8894  
8879 8895                  /* hold off other access to open_owner while we tinker */
8880 8896                  rfs4_sw_enter(&sp->rs_owner->ro_sw);
8881 8897  
8882 8898                  switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8883 8899                  case NFS4_CHECK_STATEID_OLD:
8884 8900                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8885 8901                          goto end;
8886 8902                  case NFS4_CHECK_STATEID_BAD:
8887 8903                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8888 8904                          goto end;
8889 8905                  case NFS4_CHECK_STATEID_EXPIRED:
8890 8906                          *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8891 8907                          goto end;
8892 8908                  case NFS4_CHECK_STATEID_UNCONFIRMED:
8893 8909                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8894 8910                          goto end;
8895 8911                  case NFS4_CHECK_STATEID_CLOSED:
8896 8912                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8897 8913                          goto end;
8898 8914                  case NFS4_CHECK_STATEID_OKAY:
8899 8915                  case NFS4_CHECK_STATEID_REPLAY:
8900 8916                          switch (rfs4_check_olo_seqid(olo->open_seqid,
8901 8917                              sp->rs_owner, resop)) {
8902 8918                          case NFS4_CHKSEQ_OKAY:
8903 8919                                  if (rc == NFS4_CHECK_STATEID_OKAY)
8904 8920                                          break;
8905 8921                                  /*
8906 8922                                   * This is replayed stateid; if seqid
8907 8923                                   * matches next expected, then client
8908 8924                                   * is using wrong seqid.
8909 8925                                   */
8910 8926                                  /* FALLTHROUGH */
8911 8927                          case NFS4_CHKSEQ_BAD:
8912 8928                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8913 8929                                  goto end;
8914 8930                          case NFS4_CHKSEQ_REPLAY:
8915 8931                                  /* This is a duplicate LOCK request */
8916 8932                                  dup_lock = TRUE;
8917 8933  
8918 8934                                  /*
8919 8935                                   * For a duplicate we do not want to
8920 8936                                   * create a new lockowner as it should
8921 8937                                   * already exist.
8922 8938                                   * Turn off the lockowner create flag.
8923 8939                                   */
8924 8940                                  lcreate = FALSE;
8925 8941                          }
8926 8942                          break;
8927 8943                  }
8928 8944  
8929 8945                  lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8930 8946                  if (lo == NULL) {
8931 8947                          NFS4_DEBUG(rfs4_debug,
8932 8948                              (CE_NOTE, "rfs4_op_lock: no lock owner"));
8933 8949                          *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8934 8950                          goto end;
8935 8951                  }
8936 8952  
8937 8953                  lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8938 8954                  if (lsp == NULL) {
8939 8955                          rfs4_update_lease(sp->rs_owner->ro_client);
8940 8956                          /*
8941 8957                           * Only update theh open_seqid if this is not
8942 8958                           * a duplicate request
8943 8959                           */
8944 8960                          if (dup_lock == FALSE) {
8945 8961                                  rfs4_update_open_sequence(sp->rs_owner);
8946 8962                          }
8947 8963  
8948 8964                          NFS4_DEBUG(rfs4_debug,
8949 8965                              (CE_NOTE, "rfs4_op_lock: no state"));
8950 8966                          *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8951 8967                          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8952 8968                          rfs4_lockowner_rele(lo);
8953 8969                          goto end;
8954 8970                  }
8955 8971  
8956 8972                  /*
8957 8973                   * This is the new_lock_owner branch and the client is
8958 8974                   * supposed to be associating a new lock_owner with
8959 8975                   * the open file at this point.  If we find that a
8960 8976                   * lock_owner/state association already exists and a
8961 8977                   * successful LOCK request was returned to the client,
8962 8978                   * an error is returned to the client since this is
8963 8979                   * not appropriate.  The client should be using the
8964 8980                   * existing lock_owner branch.
8965 8981                   */
8966 8982                  if (dup_lock == FALSE && create == FALSE) {
8967 8983                          if (lsp->rls_lock_completed == TRUE) {
8968 8984                                  *cs->statusp =
8969 8985                                      resp->status = NFS4ERR_BAD_SEQID;
8970 8986                                  rfs4_lockowner_rele(lo);
8971 8987                                  goto end;
8972 8988                          }
8973 8989                  }
8974 8990  
8975 8991                  rfs4_update_lease(sp->rs_owner->ro_client);
8976 8992  
8977 8993                  /*
8978 8994                   * Only update theh open_seqid if this is not
8979 8995                   * a duplicate request
8980 8996                   */
8981 8997                  if (dup_lock == FALSE) {
8982 8998                          rfs4_update_open_sequence(sp->rs_owner);
8983 8999                  }
8984 9000  
8985 9001                  /*
8986 9002                   * If this is a duplicate lock request, just copy the
8987 9003                   * previously saved reply and return.
8988 9004                   */
8989 9005                  if (dup_lock == TRUE) {
8990 9006                          /* verify that lock_seqid's match */
8991 9007                          if (lsp->rls_seqid != olo->lock_seqid) {
8992 9008                                  NFS4_DEBUG(rfs4_debug,
8993 9009                                      (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8994 9010                                      "lsp->seqid=%d old->seqid=%d",
8995 9011                                      lsp->rls_seqid, olo->lock_seqid));
8996 9012                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8997 9013                          } else {
8998 9014                                  rfs4_copy_reply(resop, &lsp->rls_reply);
8999 9015                                  /*
9000 9016                                   * Make sure to copy the just
9001 9017                                   * retrieved reply status into the
9002 9018                                   * overall compound status
9003 9019                                   */
9004 9020                                  *cs->statusp = resp->status;
9005 9021                          }
9006 9022                          rfs4_lockowner_rele(lo);
9007 9023                          goto end;
9008 9024                  }
9009 9025  
9010 9026                  rfs4_dbe_lock(lsp->rls_dbe);
9011 9027  
9012 9028                  /* Make sure to update the lock sequence id */
9013 9029                  lsp->rls_seqid = olo->lock_seqid;
9014 9030  
9015 9031                  NFS4_DEBUG(rfs4_debug,
9016 9032                      (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9017 9033  
9018 9034                  /*
9019 9035                   * This is used to signify the newly created lockowner
9020 9036                   * stateid and its sequence number.  The checks for
9021 9037                   * sequence number and increment don't occur on the
9022 9038                   * very first lock request for a lockowner.
9023 9039                   */
9024 9040                  lsp->rls_skip_seqid_check = TRUE;
9025 9041  
9026 9042                  /* hold off other access to lsp while we tinker */
9027 9043                  rfs4_sw_enter(&lsp->rls_sw);
9028 9044                  ls_sw_held = TRUE;
9029 9045  
9030 9046                  rfs4_dbe_unlock(lsp->rls_dbe);
9031 9047  
9032 9048                  rfs4_lockowner_rele(lo);
9033 9049          } else {
9034 9050                  stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9035 9051                  /* get lsp and hold the lock on the underlying file struct */
9036 9052                  if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9037 9053                      != NFS4_OK) {
9038 9054                          *cs->statusp = resp->status = status;
9039 9055                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9040 9056                              cs, LOCK4res *, resp);
9041 9057                          return;
9042 9058                  }
9043 9059                  create = FALSE; /* We didn't create lsp */
9044 9060  
9045 9061                  /* Ensure specified filehandle matches */
9046 9062                  if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9047 9063                          rfs4_lo_state_rele(lsp, TRUE);
9048 9064                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9049 9065                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9050 9066                              cs, LOCK4res *, resp);
9051 9067                          return;
9052 9068                  }
9053 9069  
9054 9070                  /* hold off other access to lsp while we tinker */
9055 9071                  rfs4_sw_enter(&lsp->rls_sw);
9056 9072                  ls_sw_held = TRUE;
9057 9073  
9058 9074                  switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9059 9075                  /*
9060 9076                   * The stateid looks like it was okay (expected to be
9061 9077                   * the next one)
9062 9078                   */
9063 9079                  case NFS4_CHECK_STATEID_OKAY:
9064 9080                          /*
9065 9081                           * The sequence id is now checked.  Determine
9066 9082                           * if this is a replay or if it is in the
9067 9083                           * expected (next) sequence.  In the case of a
9068 9084                           * replay, there are two replay conditions
9069 9085                           * that may occur.  The first is the normal
9070 9086                           * condition where a LOCK is done with a
9071 9087                           * NFS4_OK response and the stateid is
9072 9088                           * updated.  That case is handled below when
9073 9089                           * the stateid is identified as a REPLAY.  The
9074 9090                           * second is the case where an error is
9075 9091                           * returned, like NFS4ERR_DENIED, and the
9076 9092                           * sequence number is updated but the stateid
9077 9093                           * is not updated.  This second case is dealt
9078 9094                           * with here.  So it may seem odd that the
9079 9095                           * stateid is okay but the sequence id is a
9080 9096                           * replay but it is okay.
9081 9097                           */
9082 9098                          switch (rfs4_check_lock_seqid(
9083 9099                              args->locker.locker4_u.lock_owner.lock_seqid,
9084 9100                              lsp, resop)) {
9085 9101                          case NFS4_CHKSEQ_REPLAY:
9086 9102                                  if (resp->status != NFS4_OK) {
9087 9103                                          /*
9088 9104                                           * Here is our replay and need
9089 9105                                           * to verify that the last
9090 9106                                           * response was an error.
9091 9107                                           */
9092 9108                                          *cs->statusp = resp->status;
9093 9109                                          goto end;
9094 9110                                  }
9095 9111                                  /*
9096 9112                                   * This is done since the sequence id
9097 9113                                   * looked like a replay but it didn't
9098 9114                                   * pass our check so a BAD_SEQID is
9099 9115                                   * returned as a result.
9100 9116                                   */
9101 9117                                  /*FALLTHROUGH*/
9102 9118                          case NFS4_CHKSEQ_BAD:
9103 9119                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9104 9120                                  goto end;
9105 9121                          case NFS4_CHKSEQ_OKAY:
9106 9122                                  /* Everything looks okay move ahead */
9107 9123                                  break;
9108 9124                          }
9109 9125                          break;
9110 9126                  case NFS4_CHECK_STATEID_OLD:
9111 9127                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9112 9128                          goto end;
9113 9129                  case NFS4_CHECK_STATEID_BAD:
9114 9130                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9115 9131                          goto end;
9116 9132                  case NFS4_CHECK_STATEID_EXPIRED:
9117 9133                          *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9118 9134                          goto end;
9119 9135                  case NFS4_CHECK_STATEID_CLOSED:
9120 9136                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9121 9137                          goto end;
9122 9138                  case NFS4_CHECK_STATEID_REPLAY:
9123 9139                          switch (rfs4_check_lock_seqid(
9124 9140                              args->locker.locker4_u.lock_owner.lock_seqid,
9125 9141                              lsp, resop)) {
9126 9142                          case NFS4_CHKSEQ_OKAY:
9127 9143                                  /*
9128 9144                                   * This is a replayed stateid; if
9129 9145                                   * seqid matches the next expected,
9130 9146                                   * then client is using wrong seqid.
9131 9147                                   */
9132 9148                          case NFS4_CHKSEQ_BAD:
9133 9149                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9134 9150                                  goto end;
9135 9151                          case NFS4_CHKSEQ_REPLAY:
9136 9152                                  rfs4_update_lease(lsp->rls_locker->rl_client);
9137 9153                                  *cs->statusp = status = resp->status;
9138 9154                                  goto end;
9139 9155                          }
9140 9156                          break;
9141 9157                  default:
9142 9158                          ASSERT(FALSE);
9143 9159                          break;
9144 9160                  }
9145 9161  
9146 9162                  rfs4_update_lock_sequence(lsp);
9147 9163                  rfs4_update_lease(lsp->rls_locker->rl_client);
9148 9164          }
9149 9165  
9150 9166          /*
9151 9167           * NFS4 only allows locking on regular files, so
9152 9168           * verify type of object.
9153 9169           */
9154 9170          if (cs->vp->v_type != VREG) {
9155 9171                  if (cs->vp->v_type == VDIR)
9156 9172                          status = NFS4ERR_ISDIR;
9157 9173                  else
9158 9174                          status = NFS4ERR_INVAL;
9159 9175                  goto out;
9160 9176          }
9161 9177  
9162 9178          cp = lsp->rls_state->rs_owner->ro_client;
9163 9179  
9164 9180          if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9165 9181                  status = NFS4ERR_GRACE;
9166 9182                  goto out;
9167 9183          }
9168 9184  
9169 9185          if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9170 9186                  status = NFS4ERR_NO_GRACE;
9171 9187                  goto out;
9172 9188          }
9173 9189  
9174 9190          if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9175 9191                  status = NFS4ERR_NO_GRACE;
9176 9192                  goto out;
9177 9193          }
9178 9194  
9179 9195          if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9180 9196                  cs->deleg = TRUE;
9181 9197  
9182 9198          status = rfs4_do_lock(lsp, args->locktype,
9183 9199              args->offset, args->length, cs->cr, resop);
9184 9200  
9185 9201  out:
9186 9202          lsp->rls_skip_seqid_check = FALSE;
9187 9203  
9188 9204          *cs->statusp = resp->status = status;
9189 9205  
9190 9206          if (status == NFS4_OK) {
9191 9207                  resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9192 9208                  lsp->rls_lock_completed = TRUE;
9193 9209          }
9194 9210          /*
9195 9211           * Only update the "OPEN" response here if this was a new
9196 9212           * lock_owner
9197 9213           */
9198 9214          if (sp)
9199 9215                  rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9200 9216  
9201 9217          rfs4_update_lock_resp(lsp, resop);
9202 9218  
9203 9219  end:
9204 9220          if (lsp) {
9205 9221                  if (ls_sw_held)
9206 9222                          rfs4_sw_exit(&lsp->rls_sw);
9207 9223                  /*
9208 9224                   * If an sp obtained, then the lsp does not represent
9209 9225                   * a lock on the file struct.
9210 9226                   */
9211 9227                  if (sp != NULL)
9212 9228                          rfs4_lo_state_rele(lsp, FALSE);
9213 9229                  else
9214 9230                          rfs4_lo_state_rele(lsp, TRUE);
9215 9231          }
9216 9232          if (sp) {
9217 9233                  rfs4_sw_exit(&sp->rs_owner->ro_sw);
9218 9234                  rfs4_state_rele(sp);
9219 9235          }
9220 9236  
9221 9237          DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9222 9238              LOCK4res *, resp);
9223 9239  }
9224 9240  
9225 9241  /* free function for LOCK/LOCKT */
9226 9242  static void
9227 9243  lock_denied_free(nfs_resop4 *resop)
9228 9244  {
9229 9245          LOCK4denied *dp = NULL;
9230 9246  
9231 9247          switch (resop->resop) {
9232 9248          case OP_LOCK:
9233 9249                  if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9234 9250                          dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9235 9251                  break;
9236 9252          case OP_LOCKT:
9237 9253                  if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9238 9254                          dp = &resop->nfs_resop4_u.oplockt.denied;
9239 9255                  break;
9240 9256          default:
9241 9257                  break;
9242 9258          }
9243 9259  
9244 9260          if (dp)
9245 9261                  kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9246 9262  }
9247 9263  
9248 9264  /*ARGSUSED*/
9249 9265  void
9250 9266  rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9251 9267      struct svc_req *req, struct compound_state *cs)
9252 9268  {
9253 9269          LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9254 9270          LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9255 9271          nfsstat4 status;
9256 9272          stateid4 *stateid = &args->lock_stateid;
9257 9273          rfs4_lo_state_t *lsp;
9258 9274  
9259 9275          DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9260 9276              LOCKU4args *, args);
9261 9277  
9262 9278          if (cs->vp == NULL) {
9263 9279                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9264 9280                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9265 9281                      LOCKU4res *, resp);
9266 9282                  return;
9267 9283          }
9268 9284  
9269 9285          if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9270 9286                  *cs->statusp = resp->status = status;
9271 9287                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9272 9288                      LOCKU4res *, resp);
9273 9289                  return;
9274 9290          }
9275 9291  
9276 9292          /* Ensure specified filehandle matches */
9277 9293          if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9278 9294                  rfs4_lo_state_rele(lsp, TRUE);
9279 9295                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9280 9296                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9281 9297                      LOCKU4res *, resp);
9282 9298                  return;
9283 9299          }
9284 9300  
9285 9301          /* hold off other access to lsp while we tinker */
9286 9302          rfs4_sw_enter(&lsp->rls_sw);
9287 9303  
9288 9304          switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9289 9305          case NFS4_CHECK_STATEID_OKAY:
9290 9306                  if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9291 9307                      != NFS4_CHKSEQ_OKAY) {
9292 9308                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9293 9309                          goto end;
9294 9310                  }
9295 9311                  break;
9296 9312          case NFS4_CHECK_STATEID_OLD:
9297 9313                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9298 9314                  goto end;
9299 9315          case NFS4_CHECK_STATEID_BAD:
9300 9316                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9301 9317                  goto end;
9302 9318          case NFS4_CHECK_STATEID_EXPIRED:
9303 9319                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9304 9320                  goto end;
9305 9321          case NFS4_CHECK_STATEID_CLOSED:
9306 9322                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9307 9323                  goto end;
9308 9324          case NFS4_CHECK_STATEID_REPLAY:
9309 9325                  switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9310 9326                  case NFS4_CHKSEQ_OKAY:
9311 9327                                  /*
9312 9328                                   * This is a replayed stateid; if
9313 9329                                   * seqid matches the next expected,
9314 9330                                   * then client is using wrong seqid.
9315 9331                                   */
9316 9332                  case NFS4_CHKSEQ_BAD:
9317 9333                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9318 9334                          goto end;
9319 9335                  case NFS4_CHKSEQ_REPLAY:
9320 9336                          rfs4_update_lease(lsp->rls_locker->rl_client);
9321 9337                          *cs->statusp = status = resp->status;
9322 9338                          goto end;
9323 9339                  }
9324 9340                  break;
9325 9341          default:
9326 9342                  ASSERT(FALSE);
9327 9343                  break;
9328 9344          }
9329 9345  
9330 9346          rfs4_update_lock_sequence(lsp);
9331 9347          rfs4_update_lease(lsp->rls_locker->rl_client);
9332 9348  
9333 9349          /*
9334 9350           * NFS4 only allows locking on regular files, so
9335 9351           * verify type of object.
9336 9352           */
9337 9353          if (cs->vp->v_type != VREG) {
9338 9354                  if (cs->vp->v_type == VDIR)
9339 9355                          status = NFS4ERR_ISDIR;
9340 9356                  else
9341 9357                          status = NFS4ERR_INVAL;
9342 9358                  goto out;
9343 9359          }
9344 9360  
9345 9361          if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9346 9362                  status = NFS4ERR_GRACE;
9347 9363                  goto out;
9348 9364          }
9349 9365  
9350 9366          status = rfs4_do_lock(lsp, args->locktype,
9351 9367              args->offset, args->length, cs->cr, resop);
9352 9368  
9353 9369  out:
9354 9370          *cs->statusp = resp->status = status;
9355 9371  
9356 9372          if (status == NFS4_OK)
9357 9373                  resp->lock_stateid = lsp->rls_lockid.stateid;
9358 9374  
9359 9375          rfs4_update_lock_resp(lsp, resop);
9360 9376  
9361 9377  end:
9362 9378          rfs4_sw_exit(&lsp->rls_sw);
9363 9379          rfs4_lo_state_rele(lsp, TRUE);
9364 9380  
9365 9381          DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9366 9382              LOCKU4res *, resp);
9367 9383  }
9368 9384  
9369 9385  /*
9370 9386   * LOCKT is a best effort routine, the client can not be guaranteed that
9371 9387   * the status return is still in effect by the time the reply is received.
9372 9388   * They are numerous race conditions in this routine, but we are not required
9373 9389   * and can not be accurate.
9374 9390   */
9375 9391  /*ARGSUSED*/
9376 9392  void
9377 9393  rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9378 9394      struct svc_req *req, struct compound_state *cs)
9379 9395  {
9380 9396          LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9381 9397          LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9382 9398          rfs4_lockowner_t *lo;
9383 9399          rfs4_client_t *cp;
9384 9400          bool_t create = FALSE;
9385 9401          struct flock64 flk;
9386 9402          int error;
9387 9403          int flag = FREAD | FWRITE;
9388 9404          int ltype;
9389 9405          length4 posix_length;
9390 9406          sysid_t sysid;
9391 9407          pid_t pid;
9392 9408  
9393 9409          DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9394 9410              LOCKT4args *, args);
9395 9411  
9396 9412          if (cs->vp == NULL) {
9397 9413                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9398 9414                  goto out;
9399 9415          }
9400 9416  
9401 9417          /*
9402 9418           * NFS4 only allows locking on regular files, so
9403 9419           * verify type of object.
9404 9420           */
9405 9421          if (cs->vp->v_type != VREG) {
9406 9422                  if (cs->vp->v_type == VDIR)
9407 9423                          *cs->statusp = resp->status = NFS4ERR_ISDIR;
9408 9424                  else
9409 9425                          *cs->statusp = resp->status =  NFS4ERR_INVAL;
9410 9426                  goto out;
9411 9427          }
9412 9428  
9413 9429          /*
9414 9430           * Check out the clientid to ensure the server knows about it
9415 9431           * so that we correctly inform the client of a server reboot.
9416 9432           */
9417 9433          if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9418 9434              == NULL) {
9419 9435                  *cs->statusp = resp->status =
9420 9436                      rfs4_check_clientid(&args->owner.clientid, 0);
9421 9437                  goto out;
9422 9438          }
9423 9439          if (rfs4_lease_expired(cp)) {
9424 9440                  rfs4_client_close(cp);
9425 9441                  /*
9426 9442                   * Protocol doesn't allow returning NFS4ERR_STALE as
9427 9443                   * other operations do on this check so STALE_CLIENTID
9428 9444                   * is returned instead
9429 9445                   */
9430 9446                  *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9431 9447                  goto out;
9432 9448          }
9433 9449  
9434 9450          if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9435 9451                  *cs->statusp = resp->status = NFS4ERR_GRACE;
9436 9452                  rfs4_client_rele(cp);
9437 9453                  goto out;
9438 9454          }
9439 9455          rfs4_client_rele(cp);
9440 9456  
9441 9457          resp->status = NFS4_OK;
9442 9458  
9443 9459          switch (args->locktype) {
9444 9460          case READ_LT:
9445 9461          case READW_LT:
9446 9462                  ltype = F_RDLCK;
9447 9463                  break;
9448 9464          case WRITE_LT:
9449 9465          case WRITEW_LT:
9450 9466                  ltype = F_WRLCK;
9451 9467                  break;
9452 9468          }
9453 9469  
9454 9470          posix_length = args->length;
9455 9471          /* Check for zero length. To lock to end of file use all ones for V4 */
9456 9472          if (posix_length == 0) {
9457 9473                  *cs->statusp = resp->status = NFS4ERR_INVAL;
9458 9474                  goto out;
9459 9475          } else if (posix_length == (length4)(~0)) {
9460 9476                  posix_length = 0;       /* Posix to end of file  */
9461 9477          }
9462 9478  
9463 9479          /* Find or create a lockowner */
9464 9480          lo = rfs4_findlockowner(&args->owner, &create);
9465 9481  
9466 9482          if (lo) {
9467 9483                  pid = lo->rl_pid;
9468 9484                  if ((resp->status =
9469 9485                      rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9470 9486                          goto err;
9471 9487          } else {
9472 9488                  pid = 0;
9473 9489                  sysid = lockt_sysid;
9474 9490          }
9475 9491  retry:
9476 9492          flk.l_type = ltype;
9477 9493          flk.l_whence = 0;               /* SEEK_SET */
9478 9494          flk.l_start = args->offset;
9479 9495          flk.l_len = posix_length;
9480 9496          flk.l_sysid = sysid;
9481 9497          flk.l_pid = pid;
9482 9498          flag |= F_REMOTELOCK;
9483 9499  
9484 9500          LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9485 9501  
9486 9502          /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9487 9503          if (flk.l_len < 0 || flk.l_start < 0) {
9488 9504                  resp->status = NFS4ERR_INVAL;
9489 9505                  goto err;
9490 9506          }
9491 9507          error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9492 9508              NULL, cs->cr, NULL);
9493 9509  
9494 9510          /*
9495 9511           * N.B. We map error values to nfsv4 errors. This is differrent
9496 9512           * than puterrno4 routine.
9497 9513           */
9498 9514          switch (error) {
9499 9515          case 0:
9500 9516                  if (flk.l_type == F_UNLCK)
9501 9517                          resp->status = NFS4_OK;
9502 9518                  else {
9503 9519                          if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9504 9520                                  goto retry;
9505 9521                          resp->status = NFS4ERR_DENIED;
9506 9522                  }
9507 9523                  break;
9508 9524          case EOVERFLOW:
9509 9525                  resp->status = NFS4ERR_INVAL;
9510 9526                  break;
9511 9527          case EINVAL:
9512 9528                  resp->status = NFS4ERR_NOTSUPP;
9513 9529                  break;
9514 9530          default:
9515 9531                  cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9516 9532                      error);
9517 9533                  resp->status = NFS4ERR_SERVERFAULT;
9518 9534                  break;
9519 9535          }
9520 9536  
9521 9537  err:
9522 9538          if (lo)
9523 9539                  rfs4_lockowner_rele(lo);
9524 9540          *cs->statusp = resp->status;
9525 9541  out:
9526 9542          DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9527 9543              LOCKT4res *, resp);
9528 9544  }
9529 9545  
9530 9546  int
9531 9547  rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9532 9548  {
9533 9549          int err;
9534 9550          int cmd;
9535 9551          vnode_t *vp;
9536 9552          struct shrlock shr;
9537 9553          struct shr_locowner shr_loco;
9538 9554          int fflags = 0;
9539 9555  
9540 9556          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9541 9557          ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9542 9558  
9543 9559          if (sp->rs_closed)
9544 9560                  return (NFS4ERR_OLD_STATEID);
9545 9561  
9546 9562          vp = sp->rs_finfo->rf_vp;
9547 9563          ASSERT(vp);
9548 9564  
9549 9565          shr.s_access = shr.s_deny = 0;
9550 9566  
9551 9567          if (access & OPEN4_SHARE_ACCESS_READ) {
9552 9568                  fflags |= FREAD;
9553 9569                  shr.s_access |= F_RDACC;
9554 9570          }
9555 9571          if (access & OPEN4_SHARE_ACCESS_WRITE) {
9556 9572                  fflags |= FWRITE;
9557 9573                  shr.s_access |= F_WRACC;
9558 9574          }
9559 9575          ASSERT(shr.s_access);
9560 9576  
9561 9577          if (deny & OPEN4_SHARE_DENY_READ)
9562 9578                  shr.s_deny |= F_RDDNY;
9563 9579          if (deny & OPEN4_SHARE_DENY_WRITE)
9564 9580                  shr.s_deny |= F_WRDNY;
9565 9581  
9566 9582          shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9567 9583          shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9568 9584          shr_loco.sl_pid = shr.s_pid;
9569 9585          shr_loco.sl_id = shr.s_sysid;
9570 9586          shr.s_owner = (caddr_t)&shr_loco;
9571 9587          shr.s_own_len = sizeof (shr_loco);
9572 9588  
9573 9589          cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9574 9590  
9575 9591          err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9576 9592          if (err != 0) {
9577 9593                  if (err == EAGAIN)
9578 9594                          err = NFS4ERR_SHARE_DENIED;
9579 9595                  else
9580 9596                          err = puterrno4(err);
9581 9597                  return (err);
9582 9598          }
9583 9599  
9584 9600          sp->rs_share_access |= access;
9585 9601          sp->rs_share_deny |= deny;
9586 9602  
9587 9603          return (0);
9588 9604  }
9589 9605  
9590 9606  int
9591 9607  rfs4_unshare(rfs4_state_t *sp)
9592 9608  {
9593 9609          int err;
9594 9610          struct shrlock shr;
9595 9611          struct shr_locowner shr_loco;
9596 9612  
9597 9613          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9598 9614  
9599 9615          if (sp->rs_closed || sp->rs_share_access == 0)
9600 9616                  return (0);
9601 9617  
9602 9618          ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9603 9619          ASSERT(sp->rs_finfo->rf_vp);
9604 9620  
9605 9621          shr.s_access = shr.s_deny = 0;
9606 9622          shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9607 9623          shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9608 9624          shr_loco.sl_pid = shr.s_pid;
9609 9625          shr_loco.sl_id = shr.s_sysid;
9610 9626          shr.s_owner = (caddr_t)&shr_loco;
9611 9627          shr.s_own_len = sizeof (shr_loco);
9612 9628  
9613 9629          err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9614 9630              NULL);
9615 9631          if (err != 0) {
9616 9632                  err = puterrno4(err);
9617 9633                  return (err);
9618 9634          }
9619 9635  
9620 9636          sp->rs_share_access = 0;
9621 9637          sp->rs_share_deny = 0;
9622 9638  
9623 9639          return (0);
9624 9640  
9625 9641  }
9626 9642  
9627 9643  static int
9628 9644  rdma_setup_read_data4(READ4args *args, READ4res *rok)
9629 9645  {
9630 9646          struct clist    *wcl;
9631 9647          count4          count = rok->data_len;
9632 9648          int             wlist_len;
9633 9649  
9634 9650          wcl = args->wlist;
9635 9651          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9636 9652                  return (FALSE);
9637 9653          }
9638 9654          wcl = args->wlist;
9639 9655          rok->wlist_len = wlist_len;
9640 9656          rok->wlist = wcl;
9641 9657          return (TRUE);
9642 9658  }
9643 9659  
9644 9660  /* tunable to disable server referrals */
9645 9661  int rfs4_no_referrals = 0;
9646 9662  
9647 9663  /*
9648 9664   * Find an NFS record in reparse point data.
9649 9665   * Returns 0 for success and <0 or an errno value on failure.
9650 9666   */
9651 9667  int
9652 9668  vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9653 9669  {
9654 9670          int err;
9655 9671          char *stype, *val;
9656 9672          nvlist_t *nvl;
9657 9673          nvpair_t *curr;
9658 9674  
9659 9675          if ((nvl = reparse_init()) == NULL)
9660 9676                  return (-1);
9661 9677  
9662 9678          if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9663 9679                  reparse_free(nvl);
9664 9680                  return (err);
9665 9681          }
9666 9682  
9667 9683          curr = NULL;
9668 9684          while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9669 9685                  if ((stype = nvpair_name(curr)) == NULL) {
9670 9686                          reparse_free(nvl);
9671 9687                          return (-2);
9672 9688                  }
9673 9689                  if (strncasecmp(stype, "NFS", 3) == 0)
9674 9690                          break;
9675 9691          }
9676 9692  
9677 9693          if ((curr == NULL) ||
9678 9694              (nvpair_value_string(curr, &val))) {
9679 9695                  reparse_free(nvl);
9680 9696                  return (-3);
9681 9697          }
9682 9698          *nvlp = nvl;
9683 9699          *svcp = stype;
9684 9700          *datap = val;
9685 9701          return (0);
9686 9702  }
9687 9703  
9688 9704  int
9689 9705  vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9690 9706  {
9691 9707          nvlist_t *nvl;
9692 9708          char *s, *d;
9693 9709  
9694 9710          if (rfs4_no_referrals != 0)
9695 9711                  return (B_FALSE);
9696 9712  
9697 9713          if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9698 9714                  return (B_FALSE);
9699 9715  
9700 9716          if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9701 9717                  return (B_FALSE);
9702 9718  
9703 9719          reparse_free(nvl);
9704 9720  
9705 9721          return (B_TRUE);
9706 9722  }
9707 9723  
9708 9724  /*
9709 9725   * There is a user-level copy of this routine in ref_subr.c.
9710 9726   * Changes should be kept in sync.
9711 9727   */
9712 9728  static int
9713 9729  nfs4_create_components(char *path, component4 *comp4)
9714 9730  {
9715 9731          int slen, plen, ncomp;
9716 9732          char *ori_path, *nxtc, buf[MAXNAMELEN];
9717 9733  
9718 9734          if (path == NULL)
9719 9735                  return (0);
9720 9736  
9721 9737          plen = strlen(path) + 1;        /* include the terminator */
9722 9738          ori_path = path;
9723 9739          ncomp = 0;
9724 9740  
9725 9741          /* count number of components in the path */
9726 9742          for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9727 9743                  if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9728 9744                          if ((slen = nxtc - path) == 0) {
9729 9745                                  path = nxtc + 1;
9730 9746                                  continue;
9731 9747                          }
9732 9748  
9733 9749                          if (comp4 != NULL) {
9734 9750                                  bcopy(path, buf, slen);
9735 9751                                  buf[slen] = '\0';
9736 9752                                  (void) str_to_utf8(buf, &comp4[ncomp]);
9737 9753                          }
9738 9754  
9739 9755                          ncomp++;        /* 1 valid component */
9740 9756                          path = nxtc + 1;
9741 9757                  }
9742 9758                  if (*nxtc == '\0' || *nxtc == '\n')
9743 9759                          break;
9744 9760          }
9745 9761  
9746 9762          return (ncomp);
9747 9763  }
9748 9764  
9749 9765  /*
9750 9766   * There is a user-level copy of this routine in ref_subr.c.
9751 9767   * Changes should be kept in sync.
9752 9768   */
9753 9769  static int
9754 9770  make_pathname4(char *path, pathname4 *pathname)
9755 9771  {
9756 9772          int ncomp;
9757 9773          component4 *comp4;
9758 9774  
9759 9775          if (pathname == NULL)
9760 9776                  return (0);
9761 9777  
9762 9778          if (path == NULL) {
9763 9779                  pathname->pathname4_val = NULL;
9764 9780                  pathname->pathname4_len = 0;
9765 9781                  return (0);
9766 9782          }
9767 9783  
9768 9784          /* count number of components to alloc buffer */
9769 9785          if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9770 9786                  pathname->pathname4_val = NULL;
9771 9787                  pathname->pathname4_len = 0;
9772 9788                  return (0);
9773 9789          }
9774 9790          comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9775 9791  
9776 9792          /* copy components into allocated buffer */
9777 9793          ncomp = nfs4_create_components(path, comp4);
9778 9794  
9779 9795          pathname->pathname4_val = comp4;
9780 9796          pathname->pathname4_len = ncomp;
9781 9797  
9782 9798          return (ncomp);
9783 9799  }
9784 9800  
9785 9801  #define xdr_fs_locations4 xdr_fattr4_fs_locations
9786 9802  
9787 9803  fs_locations4 *
9788 9804  fetch_referral(vnode_t *vp, cred_t *cr)
9789 9805  {
9790 9806          nvlist_t *nvl;
9791 9807          char *stype, *sdata;
9792 9808          fs_locations4 *result;
9793 9809          char buf[1024];
9794 9810          size_t bufsize;
9795 9811          XDR xdr;
9796 9812          int err;
9797 9813  
9798 9814          /*
9799 9815           * Check attrs to ensure it's a reparse point
9800 9816           */
9801 9817          if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9802 9818                  return (NULL);
9803 9819  
9804 9820          /*
9805 9821           * Look for an NFS record and get the type and data
9806 9822           */
9807 9823          if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9808 9824                  return (NULL);
9809 9825  
9810 9826          /*
9811 9827           * With the type and data, upcall to get the referral
9812 9828           */
9813 9829          bufsize = sizeof (buf);
9814 9830          bzero(buf, sizeof (buf));
9815 9831          err = reparse_kderef((const char *)stype, (const char *)sdata,
9816 9832              buf, &bufsize);
9817 9833          reparse_free(nvl);
9818 9834  
9819 9835          DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9820 9836              char *, stype, char *, sdata, char *, buf, int, err);
9821 9837          if (err) {
9822 9838                  cmn_err(CE_NOTE,
9823 9839                      "reparsed daemon not running: unable to get referral (%d)",
9824 9840                      err);
9825 9841                  return (NULL);
9826 9842          }
9827 9843  
9828 9844          /*
9829 9845           * We get an XDR'ed record back from the kderef call
9830 9846           */
9831 9847          xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9832 9848          result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9833 9849          err = xdr_fs_locations4(&xdr, result);
9834 9850          XDR_DESTROY(&xdr);
9835 9851          if (err != TRUE) {
9836 9852                  DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9837 9853                      int, err);
9838 9854                  return (NULL);
9839 9855          }
9840 9856  
9841 9857          /*
9842 9858           * Look at path to recover fs_root, ignoring the leading '/'
9843 9859           */
9844 9860          (void) make_pathname4(vp->v_path, &result->fs_root);
9845 9861  
9846 9862          return (result);
9847 9863  }
9848 9864  
9849 9865  char *
9850 9866  build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9851 9867  {
9852 9868          fs_locations4 *fsl;
9853 9869          fs_location4 *fs;
9854 9870          char *server, *path, *symbuf;
9855 9871          static char *prefix = "/net/";
9856 9872          int i, size, npaths;
9857 9873          uint_t len;
9858 9874  
9859 9875          /* Get the referral */
9860 9876          if ((fsl = fetch_referral(vp, cr)) == NULL)
9861 9877                  return (NULL);
9862 9878  
9863 9879          /* Deal with only the first location and first server */
9864 9880          fs = &fsl->locations_val[0];
9865 9881          server = utf8_to_str(&fs->server_val[0], &len, NULL);
9866 9882          if (server == NULL) {
9867 9883                  rfs4_free_fs_locations4(fsl);
9868 9884                  kmem_free(fsl, sizeof (fs_locations4));
9869 9885                  return (NULL);
9870 9886          }
9871 9887  
9872 9888          /* Figure out size for "/net/" + host + /path/path/path + NULL */
9873 9889          size = strlen(prefix) + len;
9874 9890          for (i = 0; i < fs->rootpath.pathname4_len; i++)
9875 9891                  size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9876 9892  
9877 9893          /* Allocate the symlink buffer and fill it */
9878 9894          symbuf = kmem_zalloc(size, KM_SLEEP);
9879 9895          (void) strcat(symbuf, prefix);
9880 9896          (void) strcat(symbuf, server);
9881 9897          kmem_free(server, len);
9882 9898  
9883 9899          npaths = 0;
9884 9900          for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9885 9901                  path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9886 9902                  if (path == NULL)
9887 9903                          continue;
9888 9904                  (void) strcat(symbuf, "/");
9889 9905                  (void) strcat(symbuf, path);
9890 9906                  npaths++;
9891 9907                  kmem_free(path, len);
9892 9908          }
9893 9909  
9894 9910          rfs4_free_fs_locations4(fsl);
9895 9911          kmem_free(fsl, sizeof (fs_locations4));
9896 9912  
9897 9913          if (strsz != NULL)
9898 9914                  *strsz = size;
9899 9915          return (symbuf);
9900 9916  }
9901 9917  
9902 9918  /*
9903 9919   * Check to see if we have a downrev Solaris client, so that we
9904 9920   * can send it a symlink instead of a referral.
9905 9921   */
9906 9922  int
9907 9923  client_is_downrev(struct svc_req *req)
9908 9924  {
9909 9925          struct sockaddr *ca;
9910 9926          rfs4_clntip_t *ci;
9911 9927          bool_t create = FALSE;
9912 9928          int is_downrev;
9913 9929  
9914 9930          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9915 9931          ASSERT(ca);
9916 9932          ci = rfs4_find_clntip(ca, &create);
9917 9933          if (ci == NULL)
9918 9934                  return (0);
9919 9935          is_downrev = ci->ri_no_referrals;
9920 9936          rfs4_dbe_rele(ci->ri_dbe);
9921 9937          return (is_downrev);
9922 9938  }
9923 9939  
9924 9940  /*
9925 9941   * Do the main work of handling HA-NFSv4 Resource Group failover on
9926 9942   * Sun Cluster.
9927 9943   * We need to detect whether any RG admin paths have been added or removed,
9928 9944   * and adjust resources accordingly.
9929 9945   * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9930 9946   * order to scale, the list and array of paths need to be held in more
9931 9947   * suitable data structures.
9932 9948   */
9933 9949  static void
9934 9950  hanfsv4_failover(nfs4_srv_t *nsrv4)
9935 9951  {
9936 9952          int i, start_grace, numadded_paths = 0;
9937 9953          char **added_paths = NULL;
9938 9954          rfs4_dss_path_t *dss_path;
9939 9955  
9940 9956          /*
9941 9957           * Note: currently, dss_pathlist cannot be NULL, since
9942 9958           * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9943 9959           * make the latter dynamically specified too, the following will
9944 9960           * need to be adjusted.
9945 9961           */
9946 9962  
9947 9963          /*
9948 9964           * First, look for removed paths: RGs that have been failed-over
9949 9965           * away from this node.
9950 9966           * Walk the "currently-serving" dss_pathlist and, for each
9951 9967           * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9952 9968           * from nfsd. If not, that RG path has been removed.
9953 9969           *
9954 9970           * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9955 9971           * any duplicates.
9956 9972           */
9957 9973          dss_path = nsrv4->dss_pathlist;
9958 9974          do {
9959 9975                  int found = 0;
9960 9976                  char *path = dss_path->path;
9961 9977  
9962 9978                  /* used only for non-HA so may not be removed */
9963 9979                  if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9964 9980                          dss_path = dss_path->next;
9965 9981                          continue;
9966 9982                  }
9967 9983  
9968 9984                  for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9969 9985                          int cmpret;
9970 9986                          char *newpath = rfs4_dss_newpaths[i];
9971 9987  
9972 9988                          /*
9973 9989                           * Since nfsd has sorted rfs4_dss_newpaths for us,
9974 9990                           * once the return from strcmp is negative we know
9975 9991                           * we've passed the point where "path" should be,
9976 9992                           * and can stop searching: "path" has been removed.
9977 9993                           */
9978 9994                          cmpret = strcmp(path, newpath);
9979 9995                          if (cmpret < 0)
9980 9996                                  break;
9981 9997                          if (cmpret == 0) {
9982 9998                                  found = 1;
9983 9999                                  break;
9984 10000                          }
9985 10001                  }
9986 10002  
9987 10003                  if (found == 0) {
9988 10004                          unsigned index = dss_path->index;
9989 10005                          rfs4_servinst_t *sip = dss_path->sip;
9990 10006                          rfs4_dss_path_t *path_next = dss_path->next;
9991 10007  
9992 10008                          /*
9993 10009                           * This path has been removed.
9994 10010                           * We must clear out the servinst reference to
9995 10011                           * it, since it's now owned by another
9996 10012                           * node: we should not attempt to touch it.
9997 10013                           */
9998 10014                          ASSERT(dss_path == sip->dss_paths[index]);
9999 10015                          sip->dss_paths[index] = NULL;
10000 10016  
10001 10017                          /* remove from "currently-serving" list, and destroy */
10002 10018                          remque(dss_path);
10003 10019                          /* allow for NUL */
10004 10020                          kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10005 10021                          kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10006 10022  
10007 10023                          dss_path = path_next;
10008 10024                  } else {
10009 10025                          /* path was found; not removed */
10010 10026                          dss_path = dss_path->next;
10011 10027                  }
10012 10028          } while (dss_path != nsrv4->dss_pathlist);
10013 10029  
10014 10030          /*
10015 10031           * Now, look for added paths: RGs that have been failed-over
10016 10032           * to this node.
10017 10033           * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10018 10034           * for each path, check if it is on the "currently-serving"
10019 10035           * dss_pathlist. If not, that RG path has been added.
10020 10036           *
10021 10037           * Note: we don't do duplicate detection here; nfsd does that for us.
10022 10038           *
10023 10039           * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10024 10040           * an upper bound for the size needed for added_paths[numadded_paths].
10025 10041           */
10026 10042  
10027 10043          /* probably more space than we need, but guaranteed to be enough */
10028 10044          if (rfs4_dss_numnewpaths > 0) {
10029 10045                  size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10030 10046                  added_paths = kmem_zalloc(sz, KM_SLEEP);
10031 10047          }
10032 10048  
10033 10049          /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10034 10050          for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10035 10051                  int found = 0;
10036 10052                  char *newpath = rfs4_dss_newpaths[i];
10037 10053  
10038 10054                  dss_path = nsrv4->dss_pathlist;
10039 10055                  do {
10040 10056                          char *path = dss_path->path;
10041 10057  
10042 10058                          /* used only for non-HA */
10043 10059                          if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10044 10060                                  dss_path = dss_path->next;
10045 10061                                  continue;
10046 10062                          }
10047 10063  
10048 10064                          if (strncmp(path, newpath, strlen(path)) == 0) {
10049 10065                                  found = 1;
10050 10066                                  break;
10051 10067                          }
10052 10068  
10053 10069                          dss_path = dss_path->next;
10054 10070                  } while (dss_path != nsrv4->dss_pathlist);
10055 10071  
10056 10072                  if (found == 0) {
10057 10073                          added_paths[numadded_paths] = newpath;
10058 10074                          numadded_paths++;
10059 10075                  }
10060 10076          }
10061 10077  
10062 10078          /* did we find any added paths? */
10063 10079          if (numadded_paths > 0) {
10064 10080  
10065 10081                  /* create a new server instance, and start its grace period */
10066 10082                  start_grace = 1;
10067 10083                  /* CSTYLED */
10068 10084                  rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10069 10085  
10070 10086                  /* read in the stable storage state from these paths */
10071 10087                  rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10072 10088  
10073 10089                  /*
10074 10090                   * Multiple failovers during a grace period will cause
10075 10091                   * clients of the same resource group to be partitioned
10076 10092                   * into different server instances, with different
10077 10093                   * grace periods.  Since clients of the same resource
10078 10094                   * group must be subject to the same grace period,
10079 10095                   * we need to reset all currently active grace periods.
10080 10096                   */
10081 10097                  rfs4_grace_reset_all(nsrv4);
10082 10098          }
10083 10099  
10084 10100          if (rfs4_dss_numnewpaths > 0)
10085 10101                  kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10086 10102  }
  
    | 
      ↓ open down ↓ | 
    9217 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX