Print this page
    
Hyperaggressive asserts pt 2/N
Be far more judicious in the use of curzone-using macros.
(Merge and extra asserts by danmcd.)
dss_paths[] entries need cleanup too
Try to remove assumption that zone's root vnode is marked VROOT
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28   28   *      All Rights Reserved
  29   29   */
  30   30  
  31   31  /*
  32   32   * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  33   33   * Copyright 2019 Nexenta Systems, Inc.
  34   34   * Copyright 2019 Nexenta by DDN, Inc.
  35   35   */
  36   36  
  37   37  #include <sys/param.h>
  38   38  #include <sys/types.h>
  39   39  #include <sys/systm.h>
  40   40  #include <sys/cred.h>
  41   41  #include <sys/buf.h>
  42   42  #include <sys/vfs.h>
  43   43  #include <sys/vfs_opreg.h>
  44   44  #include <sys/vnode.h>
  45   45  #include <sys/uio.h>
  46   46  #include <sys/errno.h>
  47   47  #include <sys/sysmacros.h>
  48   48  #include <sys/statvfs.h>
  49   49  #include <sys/kmem.h>
  50   50  #include <sys/dirent.h>
  51   51  #include <sys/cmn_err.h>
  52   52  #include <sys/debug.h>
  53   53  #include <sys/systeminfo.h>
  54   54  #include <sys/flock.h>
  55   55  #include <sys/pathname.h>
  56   56  #include <sys/nbmlock.h>
  57   57  #include <sys/share.h>
  58   58  #include <sys/atomic.h>
  59   59  #include <sys/policy.h>
  60   60  #include <sys/fem.h>
  61   61  #include <sys/sdt.h>
  62   62  #include <sys/ddi.h>
  63   63  #include <sys/zone.h>
  64   64  
  65   65  #include <fs/fs_reparse.h>
  66   66  
  67   67  #include <rpc/types.h>
  68   68  #include <rpc/auth.h>
  69   69  #include <rpc/rpcsec_gss.h>
  70   70  #include <rpc/svc.h>
  71   71  
  72   72  #include <nfs/nfs.h>
  73   73  #include <nfs/nfssys.h>
  74   74  #include <nfs/export.h>
  75   75  #include <nfs/nfs_cmd.h>
  76   76  #include <nfs/lm.h>
  77   77  #include <nfs/nfs4.h>
  78   78  #include <nfs/nfs4_drc.h>
  79   79  
  80   80  #include <sys/strsubr.h>
  81   81  #include <sys/strsun.h>
  82   82  
  83   83  #include <inet/common.h>
  84   84  #include <inet/ip.h>
  85   85  #include <inet/ip6.h>
  86   86  
  87   87  #include <sys/tsol/label.h>
  88   88  #include <sys/tsol/tndb.h>
  89   89  
  90   90  #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91   91  static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92   92  #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93   93  static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94   94  extern struct svc_ops rdma_svc_ops;
  95   95  extern int nfs_loaned_buffers;
  96   96  /* End of Tunables */
  97   97  
  98   98  static int rdma_setup_read_data4(READ4args *, READ4res *);
  99   99  
 100  100  /*
 101  101   * Used to bump the stateid4.seqid value and show changes in the stateid
 102  102   */
 103  103  #define next_stateid(sp) (++(sp)->bits.chgseq)
 104  104  
 105  105  /*
 106  106   * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 107  107   *      This is used to return NFS4ERR_TOOSMALL when clients specify
 108  108   *      maxcount that isn't large enough to hold the smallest possible
 109  109   *      XDR encoded dirent.
 110  110   *
 111  111   *          sizeof cookie (8 bytes) +
 112  112   *          sizeof name_len (4 bytes) +
 113  113   *          sizeof smallest (padded) name (4 bytes) +
 114  114   *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 115  115   *          sizeof attrlist4_len (4 bytes) +
 116  116   *          sizeof next boolean (4 bytes)
 117  117   *
 118  118   * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 119  119   * the smallest possible entry4 (assumes no attrs requested).
 120  120   *      sizeof nfsstat4 (4 bytes) +
 121  121   *      sizeof verifier4 (8 bytes) +
 122  122   *      sizeof entry4list bool (4 bytes) +
 123  123   *      sizeof entry4   (36 bytes) +
 124  124   *      sizeof eof bool  (4 bytes)
 125  125   *
 126  126   * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 127  127   *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 128  128   *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 129  129   *      required for a given name length.  MAXNAMELEN is the maximum
 130  130   *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 131  131   *      macros are to allow for . and .. entries -- just a minor tweak to try
 132  132   *      and guarantee that buffer we give to VOP_READDIR will be large enough
 133  133   *      to hold ., .., and the largest possible solaris dirent64.
 134  134   */
 135  135  #define RFS4_MINLEN_ENTRY4 36
 136  136  #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137  137  #define RFS4_MINLEN_RDDIR_BUF \
 138  138          (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139  139  
 140  140  /*
 141  141   * It would be better to pad to 4 bytes since that's what XDR would do,
 142  142   * but the dirents UFS gives us are already padded to 8, so just take
 143  143   * what we're given.  Dircount is only a hint anyway.  Currently the
 144  144   * solaris kernel is ASCII only, so there's no point in calling the
 145  145   * UTF8 functions.
 146  146   *
 147  147   * dirent64: named padded to provide 8 byte struct alignment
 148  148   *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  149   *
 150  150   * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  151   *
 152  152   */
 153  153  #define DIRENT64_TO_DIRCOUNT(dp) \
 154  154          (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155  155  
 156  156  
 157  157  static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 158  158  
 159  159  u_longlong_t    nfs4_srv_caller_id;
 160  160  uint_t          nfs4_srv_vkey = 0;
 161  161  
 162  162  void    rfs4_init_compound_state(struct compound_state *);
 163  163  
 164  164  static void     nullfree(caddr_t);
 165  165  static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166  166                      struct compound_state *);
 167  167  static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168  168                      struct compound_state *);
 169  169  static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170  170                      struct compound_state *);
 171  171  static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172  172                      struct compound_state *);
 173  173  static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 174  174                      struct compound_state *);
 175  175  static void     rfs4_op_create_free(nfs_resop4 *resop);
 176  176  static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 177  177                      struct svc_req *, struct compound_state *);
 178  178  static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 179  179                      struct svc_req *, struct compound_state *);
 180  180  static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 181  181                      struct compound_state *);
 182  182  static void     rfs4_op_getattr_free(nfs_resop4 *);
 183  183  static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 184  184                      struct compound_state *);
 185  185  static void     rfs4_op_getfh_free(nfs_resop4 *);
 186  186  static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187  187                      struct compound_state *);
 188  188  static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189  189                      struct compound_state *);
 190  190  static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 191  191                      struct compound_state *);
 192  192  static void     lock_denied_free(nfs_resop4 *);
 193  193  static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194  194                      struct compound_state *);
 195  195  static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196  196                      struct compound_state *);
 197  197  static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198  198                      struct compound_state *);
 199  199  static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 200  200                      struct compound_state *);
 201  201  static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 202  202                      struct svc_req *req, struct compound_state *cs);
 203  203  static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204  204                      struct compound_state *);
 205  205  static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 206  206                      struct compound_state *);
 207  207  static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 208  208                      struct svc_req *, struct compound_state *);
 209  209  static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 210  210                      struct svc_req *, struct compound_state *);
 211  211  static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212  212                      struct compound_state *);
 213  213  static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214  214                      struct compound_state *);
 215  215  static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216  216                      struct compound_state *);
 217  217  static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 218  218                      struct compound_state *);
 219  219  static void     rfs4_op_read_free(nfs_resop4 *);
 220  220  static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 221  221  static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 222  222                      struct compound_state *);
 223  223  static void     rfs4_op_readlink_free(nfs_resop4 *);
 224  224  static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 225  225                      struct svc_req *, struct compound_state *);
 226  226  static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227  227                      struct compound_state *);
 228  228  static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229  229                      struct compound_state *);
 230  230  static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231  231                      struct compound_state *);
 232  232  static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233  233                      struct compound_state *);
 234  234  static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235  235                      struct compound_state *);
 236  236  static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237  237                      struct compound_state *);
 238  238  static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239  239                      struct compound_state *);
 240  240  static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 241  241                      struct compound_state *);
 242  242  static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 243  243                      struct svc_req *, struct compound_state *);
 244  244  static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 245  245                      struct svc_req *req, struct compound_state *);
 246  246  static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 247  247                      struct compound_state *);
 248  248  static void     rfs4_op_secinfo_free(nfs_resop4 *);
 249  249  
 250  250  static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 251  251                      struct svc_req *);
 252  252  nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 253  253  void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 254  254  
 255  255  
 256  256  /*
 257  257   * translation table for attrs
 258  258   */
 259  259  struct nfs4_ntov_table {
 260  260          union nfs4_attr_u *na;
 261  261          uint8_t amap[NFS4_MAXNUM_ATTRS];
 262  262          int attrcnt;
 263  263          bool_t vfsstat;
 264  264  };
 265  265  
 266  266  static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 267  267  static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 268  268                      struct nfs4_svgetit_arg *sargp);
 269  269  
 270  270  static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 271  271                      struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 272  272                      struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 273  273  
 274  274  static void     hanfsv4_failover(nfs4_srv_t *);
 275  275  
 276  276  fem_t           *deleg_rdops;
 277  277  fem_t           *deleg_wrops;
 278  278  
 279  279  /*
 280  280   * NFS4 op dispatch table
 281  281   */
 282  282  
 283  283  struct rfsv4disp {
 284  284          void    (*dis_proc)();          /* proc to call */
 285  285          void    (*dis_resfree)();       /* frees space allocated by proc */
 286  286          int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 287  287  };
 288  288  
 289  289  static struct rfsv4disp rfsv4disptab[] = {
 290  290          /*
 291  291           * NFS VERSION 4
 292  292           */
 293  293  
 294  294          /* RFS_NULL = 0 */
 295  295          {rfs4_op_illegal, nullfree, 0},
 296  296  
 297  297          /* UNUSED = 1 */
 298  298          {rfs4_op_illegal, nullfree, 0},
 299  299  
 300  300          /* UNUSED = 2 */
 301  301          {rfs4_op_illegal, nullfree, 0},
 302  302  
 303  303          /* OP_ACCESS = 3 */
 304  304          {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 305  305  
 306  306          /* OP_CLOSE = 4 */
 307  307          {rfs4_op_close, nullfree, 0},
 308  308  
 309  309          /* OP_COMMIT = 5 */
 310  310          {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 311  311  
 312  312          /* OP_CREATE = 6 */
 313  313          {rfs4_op_create, nullfree, 0},
 314  314  
 315  315          /* OP_DELEGPURGE = 7 */
 316  316          {rfs4_op_delegpurge, nullfree, 0},
 317  317  
 318  318          /* OP_DELEGRETURN = 8 */
 319  319          {rfs4_op_delegreturn, nullfree, 0},
 320  320  
 321  321          /* OP_GETATTR = 9 */
 322  322          {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 323  323  
 324  324          /* OP_GETFH = 10 */
 325  325          {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 326  326  
 327  327          /* OP_LINK = 11 */
 328  328          {rfs4_op_link, nullfree, 0},
 329  329  
 330  330          /* OP_LOCK = 12 */
 331  331          {rfs4_op_lock, lock_denied_free, 0},
 332  332  
 333  333          /* OP_LOCKT = 13 */
 334  334          {rfs4_op_lockt, lock_denied_free, 0},
 335  335  
 336  336          /* OP_LOCKU = 14 */
 337  337          {rfs4_op_locku, nullfree, 0},
 338  338  
 339  339          /* OP_LOOKUP = 15 */
 340  340          {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 341  341  
 342  342          /* OP_LOOKUPP = 16 */
 343  343          {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 344  344  
 345  345          /* OP_NVERIFY = 17 */
 346  346          {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 347  347  
 348  348          /* OP_OPEN = 18 */
 349  349          {rfs4_op_open, rfs4_free_reply, 0},
 350  350  
 351  351          /* OP_OPENATTR = 19 */
 352  352          {rfs4_op_openattr, nullfree, 0},
 353  353  
 354  354          /* OP_OPEN_CONFIRM = 20 */
 355  355          {rfs4_op_open_confirm, nullfree, 0},
 356  356  
 357  357          /* OP_OPEN_DOWNGRADE = 21 */
 358  358          {rfs4_op_open_downgrade, nullfree, 0},
 359  359  
 360  360          /* OP_OPEN_PUTFH = 22 */
 361  361          {rfs4_op_putfh, nullfree, RPC_ALL},
 362  362  
 363  363          /* OP_PUTPUBFH = 23 */
 364  364          {rfs4_op_putpubfh, nullfree, RPC_ALL},
 365  365  
 366  366          /* OP_PUTROOTFH = 24 */
 367  367          {rfs4_op_putrootfh, nullfree, RPC_ALL},
 368  368  
 369  369          /* OP_READ = 25 */
 370  370          {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 371  371  
 372  372          /* OP_READDIR = 26 */
 373  373          {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 374  374  
 375  375          /* OP_READLINK = 27 */
 376  376          {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 377  377  
 378  378          /* OP_REMOVE = 28 */
 379  379          {rfs4_op_remove, nullfree, 0},
 380  380  
 381  381          /* OP_RENAME = 29 */
 382  382          {rfs4_op_rename, nullfree, 0},
 383  383  
 384  384          /* OP_RENEW = 30 */
 385  385          {rfs4_op_renew, nullfree, 0},
 386  386  
 387  387          /* OP_RESTOREFH = 31 */
 388  388          {rfs4_op_restorefh, nullfree, RPC_ALL},
 389  389  
 390  390          /* OP_SAVEFH = 32 */
 391  391          {rfs4_op_savefh, nullfree, RPC_ALL},
 392  392  
 393  393          /* OP_SECINFO = 33 */
 394  394          {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 395  395  
 396  396          /* OP_SETATTR = 34 */
 397  397          {rfs4_op_setattr, nullfree, 0},
 398  398  
 399  399          /* OP_SETCLIENTID = 35 */
 400  400          {rfs4_op_setclientid, nullfree, 0},
 401  401  
 402  402          /* OP_SETCLIENTID_CONFIRM = 36 */
 403  403          {rfs4_op_setclientid_confirm, nullfree, 0},
 404  404  
 405  405          /* OP_VERIFY = 37 */
 406  406          {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 407  407  
 408  408          /* OP_WRITE = 38 */
 409  409          {rfs4_op_write, nullfree, 0},
 410  410  
 411  411          /* OP_RELEASE_LOCKOWNER = 39 */
 412  412          {rfs4_op_release_lockowner, nullfree, 0},
 413  413  };
 414  414  
 415  415  static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 416  416  
 417  417  #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 418  418  
 419  419  #ifdef DEBUG
 420  420  
 421  421  int             rfs4_fillone_debug = 0;
 422  422  int             rfs4_no_stub_access = 1;
 423  423  int             rfs4_rddir_debug = 0;
 424  424  
 425  425  static char    *rfs4_op_string[] = {
 426  426          "rfs4_op_null",
 427  427          "rfs4_op_1 unused",
 428  428          "rfs4_op_2 unused",
 429  429          "rfs4_op_access",
 430  430          "rfs4_op_close",
 431  431          "rfs4_op_commit",
 432  432          "rfs4_op_create",
 433  433          "rfs4_op_delegpurge",
 434  434          "rfs4_op_delegreturn",
 435  435          "rfs4_op_getattr",
 436  436          "rfs4_op_getfh",
 437  437          "rfs4_op_link",
 438  438          "rfs4_op_lock",
 439  439          "rfs4_op_lockt",
 440  440          "rfs4_op_locku",
 441  441          "rfs4_op_lookup",
 442  442          "rfs4_op_lookupp",
 443  443          "rfs4_op_nverify",
 444  444          "rfs4_op_open",
 445  445          "rfs4_op_openattr",
 446  446          "rfs4_op_open_confirm",
 447  447          "rfs4_op_open_downgrade",
 448  448          "rfs4_op_putfh",
 449  449          "rfs4_op_putpubfh",
 450  450          "rfs4_op_putrootfh",
 451  451          "rfs4_op_read",
 452  452          "rfs4_op_readdir",
 453  453          "rfs4_op_readlink",
 454  454          "rfs4_op_remove",
 455  455          "rfs4_op_rename",
 456  456          "rfs4_op_renew",
 457  457          "rfs4_op_restorefh",
 458  458          "rfs4_op_savefh",
 459  459          "rfs4_op_secinfo",
 460  460          "rfs4_op_setattr",
 461  461          "rfs4_op_setclientid",
 462  462          "rfs4_op_setclient_confirm",
 463  463          "rfs4_op_verify",
 464  464          "rfs4_op_write",
 465  465          "rfs4_op_release_lockowner",
 466  466          "rfs4_op_illegal"
 467  467  };
 468  468  #endif
 469  469  
 470  470  void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 471  471  
 472  472  extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 473  473  
 474  474  extern void     rfs4_free_fs_locations4(fs_locations4 *);
 475  475  
 476  476  #ifdef  nextdp
 477  477  #undef nextdp
 478  478  #endif
 479  479  #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 480  480  
 481  481  static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 482  482          VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 483  483          VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 484  484          VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 485  485          VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 486  486          VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 487  487          VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 488  488          VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 489  489          NULL,                   NULL
 490  490  };
 491  491  static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 492  492          VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 493  493          VOPNAME_READ,           { .femop_read = deleg_wr_read },
 494  494          VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 495  495          VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 496  496          VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 497  497          VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 498  498          VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 499  499          VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 500  500          NULL,                   NULL
 501  501  };
 502  502  
 503  503  nfs4_srv_t *
 504  504  nfs4_get_srv(void)
 505  505  {
 506  506          nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
 507  507          nfs4_srv_t *srv = ng->nfs4_srv;
 508  508          ASSERT(srv != NULL);
 509  509          return (srv);
 510  510  }
 511  511  
 512  512  void
 513  513  rfs4_srv_zone_init(nfs_globals_t *ng)
 514  514  {
 515  515          nfs4_srv_t *nsrv4;
 516  516          timespec32_t verf;
 517  517  
 518  518          nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 519  519  
 520  520          /*
 521  521           * The following algorithm attempts to find a unique verifier
 522  522           * to be used as the write verifier returned from the server
 523  523           * to the client.  It is important that this verifier change
 524  524           * whenever the server reboots.  Of secondary importance, it
 525  525           * is important for the verifier to be unique between two
 526  526           * different servers.
 527  527           *
 528  528           * Thus, an attempt is made to use the system hostid and the
 529  529           * current time in seconds when the nfssrv kernel module is
 530  530           * loaded.  It is assumed that an NFS server will not be able
 531  531           * to boot and then to reboot in less than a second.  If the
 532  532           * hostid has not been set, then the current high resolution
 533  533           * time is used.  This will ensure different verifiers each
 534  534           * time the server reboots and minimize the chances that two
 535  535           * different servers will have the same verifier.
 536  536           * XXX - this is broken on LP64 kernels.
 537  537           */
 538  538          verf.tv_sec = (time_t)zone_get_hostid(NULL);
 539  539          if (verf.tv_sec != 0) {
 540  540                  verf.tv_nsec = gethrestime_sec();
 541  541          } else {
 542  542                  timespec_t tverf;
 543  543  
 544  544                  gethrestime(&tverf);
 545  545                  verf.tv_sec = (time_t)tverf.tv_sec;
 546  546                  verf.tv_nsec = tverf.tv_nsec;
 547  547          }
 548  548          nsrv4->write4verf = *(uint64_t *)&verf;
 549  549  
 550  550          /* Used to manage create/destroy of server state */
 551  551          nsrv4->nfs4_server_state = NULL;
 552  552          nsrv4->nfs4_cur_servinst = NULL;
 553  553          nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 554  554          mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 555  555          mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 556  556          mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 557  557          rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 558  558  
 559  559          ng->nfs4_srv = nsrv4;
 560  560  }
 561  561  
 562  562  void
 563  563  rfs4_srv_zone_fini(nfs_globals_t *ng)
 564  564  {
 565  565          nfs4_srv_t *nsrv4 = ng->nfs4_srv;
 566  566  
 567  567          ng->nfs4_srv = NULL;
 568  568  
 569  569          mutex_destroy(&nsrv4->deleg_lock);
 570  570          mutex_destroy(&nsrv4->state_lock);
 571  571          mutex_destroy(&nsrv4->servinst_lock);
 572  572          rw_destroy(&nsrv4->deleg_policy_lock);
 573  573  
 574  574          kmem_free(nsrv4, sizeof (*nsrv4));
 575  575  }
 576  576  
 577  577  void
 578  578  rfs4_srvrinit(void)
 579  579  {
 580  580          extern void rfs4_attr_init();
 581  581  
 582  582          rfs4_attr_init();
 583  583  
 584  584          if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 585  585                  rfs4_disable_delegation();
 586  586          } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 587  587              &deleg_wrops) != 0) {
 588  588                  rfs4_disable_delegation();
 589  589                  fem_free(deleg_rdops);
 590  590          }
 591  591  
 592  592          nfs4_srv_caller_id = fs_new_caller_id();
 593  593          lockt_sysid = lm_alloc_sysidt();
 594  594          vsd_create(&nfs4_srv_vkey, NULL);
 595  595          rfs4_state_g_init();
 596  596  }
 597  597  
 598  598  void
 599  599  rfs4_srvrfini(void)
 600  600  {
 601  601          if (lockt_sysid != LM_NOSYSID) {
 602  602                  lm_free_sysidt(lockt_sysid);
 603  603                  lockt_sysid = LM_NOSYSID;
 604  604          }
 605  605  
 606  606          rfs4_state_g_fini();
 607  607  
 608  608          fem_free(deleg_rdops);
 609  609          fem_free(deleg_wrops);
 610  610  }
 611  611  
 612  612  void
 613  613  rfs4_do_server_start(int server_upordown,
 614  614      int srv_delegation, int cluster_booted)
 615  615  {
 616  616          nfs4_srv_t *nsrv4 = nfs4_get_srv();
 617  617  
 618  618          /* Is this a warm start? */
 619  619          if (server_upordown == NFS_SERVER_QUIESCED) {
 620  620                  cmn_err(CE_NOTE, "nfs4_srv: "
 621  621                      "server was previously quiesced; "
 622  622                      "existing NFSv4 state will be re-used");
 623  623  
 624  624                  /*
 625  625                   * HA-NFSv4: this is also the signal
 626  626                   * that a Resource Group failover has
 627  627                   * occurred.
 628  628                   */
 629  629                  if (cluster_booted)
 630  630                          hanfsv4_failover(nsrv4);
 631  631          } else {
 632  632                  /* Cold start */
 633  633                  nsrv4->rfs4_start_time = 0;
 634  634                  rfs4_state_zone_init(nsrv4);
 635  635                  nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 636  636                      nfs4_drc_hash);
 637  637  
 638  638                  /*
 639  639                   * The nfsd service was started with the -s option
 640  640                   * we need to pull in any state from the paths indicated.
 641  641                   */
 642  642                  if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
 643  643                          /* read in the stable storage state from these paths */
 644  644                          rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
 645  645                              rfs4_dss_newpaths);
 646  646                  }
 647  647          }
 648  648  
 649  649          /* Check if delegation is to be enabled */
 650  650          if (srv_delegation != FALSE)
 651  651                  rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 652  652  }
 653  653  
 654  654  void
 655  655  rfs4_init_compound_state(struct compound_state *cs)
 656  656  {
 657  657          bzero(cs, sizeof (*cs));
 658  658          cs->cont = TRUE;
 659  659          cs->access = CS_ACCESS_DENIED;
 660  660          cs->deleg = FALSE;
 661  661          cs->mandlock = FALSE;
 662  662          cs->fh.nfs_fh4_val = cs->fhbuf;
 663  663  }
 664  664  
 665  665  void
 666  666  rfs4_grace_start(rfs4_servinst_t *sip)
 667  667  {
 668  668          rw_enter(&sip->rwlock, RW_WRITER);
 669  669          sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 670  670          sip->grace_period = rfs4_grace_period;
 671  671          rw_exit(&sip->rwlock);
 672  672  }
 673  673  
 674  674  /*
 675  675   * returns true if the instance's grace period has never been started
 676  676   */
 677  677  int
 678  678  rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 679  679  {
 680  680          time_t start_time;
 681  681  
 682  682          rw_enter(&sip->rwlock, RW_READER);
 683  683          start_time = sip->start_time;
 684  684          rw_exit(&sip->rwlock);
 685  685  
 686  686          return (start_time == 0);
 687  687  }
 688  688  
 689  689  /*
 690  690   * Indicates if server instance is within the
 691  691   * grace period.
 692  692   */
 693  693  int
 694  694  rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 695  695  {
 696  696          time_t grace_expiry;
 697  697  
 698  698          rw_enter(&sip->rwlock, RW_READER);
 699  699          grace_expiry = sip->start_time + sip->grace_period;
 700  700          rw_exit(&sip->rwlock);
 701  701  
 702  702          return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 703  703  }
 704  704  
 705  705  int
 706  706  rfs4_clnt_in_grace(rfs4_client_t *cp)
 707  707  {
 708  708          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 709  709  
 710  710          return (rfs4_servinst_in_grace(cp->rc_server_instance));
 711  711  }
 712  712  
 713  713  /*
 714  714   * reset all currently active grace periods
 715  715   */
 716  716  void
 717  717  rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 718  718  {
 719  719          rfs4_servinst_t *sip;
 720  720  
 721  721          mutex_enter(&nsrv4->servinst_lock);
 722  722          for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 723  723                  if (rfs4_servinst_in_grace(sip))
 724  724                          rfs4_grace_start(sip);
 725  725          mutex_exit(&nsrv4->servinst_lock);
 726  726  }
 727  727  
 728  728  /*
 729  729   * start any new instances' grace periods
 730  730   */
 731  731  void
 732  732  rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 733  733  {
 734  734          rfs4_servinst_t *sip;
 735  735  
 736  736          mutex_enter(&nsrv4->servinst_lock);
 737  737          for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 738  738                  if (rfs4_servinst_grace_new(sip))
 739  739                          rfs4_grace_start(sip);
 740  740          mutex_exit(&nsrv4->servinst_lock);
 741  741  }
 742  742  
 743  743  static rfs4_dss_path_t *
 744  744  rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 745  745      char *path, unsigned index)
 746  746  {
 747  747          size_t len;
 748  748          rfs4_dss_path_t *dss_path;
 749  749  
 750  750          dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 751  751  
 752  752          /*
 753  753           * Take a copy of the string, since the original may be overwritten.
 754  754           * Sadly, no strdup() in the kernel.
 755  755           */
 756  756          /* allow for NUL */
 757  757          len = strlen(path) + 1;
 758  758          dss_path->path = kmem_alloc(len, KM_SLEEP);
 759  759          (void) strlcpy(dss_path->path, path, len);
 760  760  
 761  761          /* associate with servinst */
 762  762          dss_path->sip = sip;
 763  763          dss_path->index = index;
 764  764  
 765  765          /*
 766  766           * Add to list of served paths.
 767  767           * No locking required, as we're only ever called at startup.
 768  768           */
 769  769          if (nsrv4->dss_pathlist == NULL) {
 770  770                  /* this is the first dss_path_t */
 771  771  
 772  772                  /* needed for insque/remque */
 773  773                  dss_path->next = dss_path->prev = dss_path;
 774  774  
 775  775                  nsrv4->dss_pathlist = dss_path;
 776  776          } else {
 777  777                  insque(dss_path, nsrv4->dss_pathlist);
 778  778          }
 779  779  
 780  780          return (dss_path);
 781  781  }
 782  782  
 783  783  /*
 784  784   * Create a new server instance, and make it the currently active instance.
 785  785   * Note that starting the grace period too early will reduce the clients'
 786  786   * recovery window.
 787  787   */
 788  788  void
 789  789  rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 790  790      int dss_npaths, char **dss_paths)
 791  791  {
 792  792          unsigned i;
 793  793          rfs4_servinst_t *sip;
 794  794          rfs4_oldstate_t *oldstate;
 795  795  
 796  796          sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 797  797          rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 798  798  
 799  799          sip->start_time = (time_t)0;
 800  800          sip->grace_period = (time_t)0;
 801  801          sip->next = NULL;
 802  802          sip->prev = NULL;
 803  803  
 804  804          rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 805  805          /*
 806  806           * This initial dummy entry is required to setup for insque/remque.
 807  807           * It must be skipped over whenever the list is traversed.
 808  808           */
 809  809          oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 810  810          /* insque/remque require initial list entry to be self-terminated */
  
    | 
      ↓ open down ↓ | 
    810 lines elided | 
    
      ↑ open up ↑ | 
  
 811  811          oldstate->next = oldstate;
 812  812          oldstate->prev = oldstate;
 813  813          sip->oldstate = oldstate;
 814  814  
 815  815  
 816  816          sip->dss_npaths = dss_npaths;
 817  817          sip->dss_paths = kmem_alloc(dss_npaths *
 818  818              sizeof (rfs4_dss_path_t *), KM_SLEEP);
 819  819  
 820  820          for (i = 0; i < dss_npaths; i++) {
 821      -                /* CSTYLED */
 822      -                sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
      821 +                sip->dss_paths[i] =
      822 +                    rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 823  823          }
 824  824  
 825  825          mutex_enter(&nsrv4->servinst_lock);
 826  826          if (nsrv4->nfs4_cur_servinst != NULL) {
 827  827                  /* add to linked list */
 828  828                  sip->prev = nsrv4->nfs4_cur_servinst;
 829  829                  nsrv4->nfs4_cur_servinst->next = sip;
 830  830          }
 831  831          if (start_grace)
 832  832                  rfs4_grace_start(sip);
 833  833          /* make the new instance "current" */
 834  834          nsrv4->nfs4_cur_servinst = sip;
 835  835  
 836  836          mutex_exit(&nsrv4->servinst_lock);
 837  837  }
 838  838  
 839  839  /*
 840  840   * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 841  841   * all instances directly.
 842  842   */
 843  843  void
 844  844  rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 845  845  {
 846  846          rfs4_servinst_t *sip, *prev, *current;
 847  847  #ifdef DEBUG
 848  848          int n = 0;
 849  849  #endif
  
    | 
      ↓ open down ↓ | 
    17 lines elided | 
    
      ↑ open up ↑ | 
  
 850  850  
 851  851          mutex_enter(&nsrv4->servinst_lock);
 852  852          ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 853  853          current = nsrv4->nfs4_cur_servinst;
 854  854          nsrv4->nfs4_cur_servinst = NULL;
 855  855          for (sip = current; sip != NULL; sip = prev) {
 856  856                  prev = sip->prev;
 857  857                  rw_destroy(&sip->rwlock);
 858  858                  if (sip->oldstate)
 859  859                          kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 860      -                if (sip->dss_paths)
      860 +                if (sip->dss_paths) {
      861 +                        int i = sip->dss_npaths;
      862 +
      863 +                        while (i > 0) {
      864 +                                i--;
      865 +                                if (sip->dss_paths[i] != NULL) {
      866 +                                        char *path = sip->dss_paths[i]->path;
      867 +
      868 +                                        if (path != NULL) {
      869 +                                                kmem_free(path,
      870 +                                                    strlen(path) + 1);
      871 +                                        }
      872 +                                        kmem_free(sip->dss_paths[i],
      873 +                                            sizeof (rfs4_dss_path_t));
      874 +                                }
      875 +                        }
 861  876                          kmem_free(sip->dss_paths,
 862  877                              sip->dss_npaths * sizeof (rfs4_dss_path_t *));
      878 +                }
 863  879                  kmem_free(sip, sizeof (rfs4_servinst_t));
 864  880  #ifdef DEBUG
 865  881                  n++;
 866  882  #endif
 867  883          }
 868  884          mutex_exit(&nsrv4->servinst_lock);
 869  885  }
 870  886  
 871  887  /*
 872  888   * Assign the current server instance to a client_t.
 873  889   * Should be called with cp->rc_dbe held.
 874  890   */
 875  891  void
 876  892  rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 877  893      rfs4_servinst_t *sip)
 878  894  {
 879  895          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 880  896  
 881  897          /*
 882  898           * The lock ensures that if the current instance is in the process
 883  899           * of changing, we will see the new one.
 884  900           */
 885  901          mutex_enter(&nsrv4->servinst_lock);
 886  902          cp->rc_server_instance = sip;
 887  903          mutex_exit(&nsrv4->servinst_lock);
 888  904  }
 889  905  
 890  906  rfs4_servinst_t *
 891  907  rfs4_servinst(rfs4_client_t *cp)
 892  908  {
 893  909          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 894  910  
 895  911          return (cp->rc_server_instance);
 896  912  }
 897  913  
 898  914  /* ARGSUSED */
 899  915  static void
 900  916  nullfree(caddr_t resop)
 901  917  {
 902  918  }
 903  919  
 904  920  /*
 905  921   * This is a fall-through for invalid or not implemented (yet) ops
 906  922   */
 907  923  /* ARGSUSED */
 908  924  static void
 909  925  rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 910  926      struct compound_state *cs)
 911  927  {
 912  928          *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 913  929  }
 914  930  
 915  931  /*
 916  932   * Check if the security flavor, nfsnum, is in the flavor_list.
 917  933   */
 918  934  bool_t
 919  935  in_flavor_list(int nfsnum, int *flavor_list, int count)
 920  936  {
 921  937          int i;
 922  938  
 923  939          for (i = 0; i < count; i++) {
 924  940                  if (nfsnum == flavor_list[i])
 925  941                          return (TRUE);
 926  942          }
 927  943          return (FALSE);
 928  944  }
 929  945  
  
    | 
      ↓ open down ↓ | 
    57 lines elided | 
    
      ↑ open up ↑ | 
  
 930  946  /*
 931  947   * Used by rfs4_op_secinfo to get the security information from the
 932  948   * export structure associated with the component.
 933  949   */
 934  950  /* ARGSUSED */
 935  951  static nfsstat4
 936  952  do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 937  953  {
 938  954          int error, different_export = 0;
 939  955          vnode_t *dvp, *vp;
 940      -        struct exportinfo *exi = NULL;
      956 +        struct exportinfo *exi;
 941  957          fid_t fid;
 942  958          uint_t count, i;
 943  959          secinfo4 *resok_val;
 944  960          struct secinfo *secp;
 945  961          seconfig_t *si;
 946  962          bool_t did_traverse = FALSE;
 947  963          int dotdot, walk;
 948  964          nfs_export_t *ne = nfs_get_export();
 949  965  
 950  966          dvp = cs->vp;
      967 +        exi = cs->exi;
      968 +        ASSERT(exi != NULL);
 951  969          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 952  970  
 953  971          /*
 954  972           * If dotdotting, then need to check whether it's above the
 955  973           * root of a filesystem, or above an export point.
 956  974           */
 957  975          if (dotdot) {
 958      -
      976 +                ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
 959  977                  /*
 960  978                   * If dotdotting at the root of a filesystem, then
 961  979                   * need to traverse back to the mounted-on filesystem
 962  980                   * and do the dotdot lookup there.
 963  981                   */
 964      -                if (cs->vp->v_flag & VROOT) {
      982 +                if ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp)) {
 965  983  
 966  984                          /*
 967  985                           * If at the system root, then can
 968  986                           * go up no further.
 969  987                           */
 970  988                          if (VN_CMP(dvp, ZONE_ROOTVP()))
 971  989                                  return (puterrno4(ENOENT));
 972  990  
 973  991                          /*
 974  992                           * Traverse back to the mounted-on filesystem
 975  993                           */
 976      -                        dvp = untraverse(cs->vp);
      994 +                        dvp = untraverse(dvp);
 977  995  
 978  996                          /*
 979  997                           * Set the different_export flag so we remember
 980  998                           * to pick up a new exportinfo entry for
 981  999                           * this new filesystem.
 982 1000                           */
 983 1001                          different_export = 1;
 984 1002                  } else {
 985 1003  
 986 1004                          /*
 987 1005                           * If dotdotting above an export point then set
 988 1006                           * the different_export to get new export info.
 989 1007                           */
 990      -                        different_export = nfs_exported(cs->exi, cs->vp);
     1008 +                        different_export = nfs_exported(exi, dvp);
 991 1009                  }
 992 1010          }
 993 1011  
 994 1012          /*
 995 1013           * Get the vnode for the component "nm".
 996 1014           */
 997 1015          error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 998 1016              NULL, NULL, NULL);
 999 1017          if (error)
1000 1018                  return (puterrno4(error));
1001 1019  
1002 1020          /*
1003 1021           * If the vnode is in a pseudo filesystem, or if the security flavor
1004 1022           * used in the request is valid but not an explicitly shared flavor,
1005 1023           * or the access bit indicates that this is a limited access,
1006 1024           * check whether this vnode is visible.
1007 1025           */
1008 1026          if (!different_export &&
1009      -            (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
     1027 +            (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1010 1028              cs->access & CS_ACCESS_LIMITED)) {
1011      -                if (! nfs_visible(cs->exi, vp, &different_export)) {
     1029 +                if (! nfs_visible(exi, vp, &different_export)) {
1012 1030                          VN_RELE(vp);
1013 1031                          return (puterrno4(ENOENT));
1014 1032                  }
1015 1033          }
1016 1034  
1017 1035          /*
1018 1036           * If it's a mountpoint, then traverse it.
1019 1037           */
1020 1038          if (vn_ismntpt(vp)) {
1021 1039                  if ((error = traverse(&vp)) != 0) {
1022 1040                          VN_RELE(vp);
1023 1041                          return (puterrno4(error));
1024 1042                  }
1025 1043                  /* remember that we had to traverse mountpoint */
1026 1044                  did_traverse = TRUE;
1027 1045                  different_export = 1;
1028 1046          } else if (vp->v_vfsp != dvp->v_vfsp) {
1029 1047                  /*
1030 1048                   * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1031 1049                   * then vp is probably an LOFS object.  We don't need the
1032 1050                   * realvp, we just need to know that we might have crossed
1033 1051                   * a server fs boundary and need to call checkexport4.
1034 1052                   * (LOFS lookup hides server fs mountpoints, and actually calls
1035 1053                   * traverse)
1036 1054                   */
1037 1055                  different_export = 1;
1038 1056          }
1039 1057  
1040 1058          /*
1041 1059           * Get the export information for it.
1042 1060           */
  
    | 
      ↓ open down ↓ | 
    21 lines elided | 
    
      ↑ open up ↑ | 
  
1043 1061          if (different_export) {
1044 1062  
1045 1063                  bzero(&fid, sizeof (fid));
1046 1064                  fid.fid_len = MAXFIDSZ;
1047 1065                  error = vop_fid_pseudo(vp, &fid);
1048 1066                  if (error) {
1049 1067                          VN_RELE(vp);
1050 1068                          return (puterrno4(error));
1051 1069                  }
1052 1070  
     1071 +                /* We'll need to reassign "exi". */
1053 1072                  if (dotdot)
1054 1073                          exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1055 1074                  else
1056 1075                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1057 1076  
1058 1077                  if (exi == NULL) {
1059 1078                          if (did_traverse == TRUE) {
1060 1079                                  /*
1061 1080                                   * If this vnode is a mounted-on vnode,
1062 1081                                   * but the mounted-on file system is not
1063 1082                                   * exported, send back the secinfo for
1064 1083                                   * the exported node that the mounted-on
1065 1084                                   * vnode lives in.
1066 1085                                   */
1067 1086                                  exi = cs->exi;
1068 1087                          } else {
1069 1088                                  VN_RELE(vp);
1070 1089                                  return (puterrno4(EACCES));
1071 1090                          }
1072 1091                  }
1073      -        } else {
1074      -                exi = cs->exi;
1075 1092          }
1076 1093          ASSERT(exi != NULL);
1077 1094  
1078 1095  
1079 1096          /*
1080 1097           * Create the secinfo result based on the security information
1081 1098           * from the exportinfo structure (exi).
1082 1099           *
1083 1100           * Return all flavors for a pseudo node.
1084 1101           * For a real export node, return the flavor that the client
1085 1102           * has access with.
1086 1103           */
1087 1104          ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1088 1105          if (PSEUDO(exi)) {
1089 1106                  count = exi->exi_export.ex_seccnt; /* total sec count */
1090 1107                  resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1091 1108                  secp = exi->exi_export.ex_secinfo;
1092 1109  
1093 1110                  for (i = 0; i < count; i++) {
1094 1111                          si = &secp[i].s_secinfo;
1095 1112                          resok_val[i].flavor = si->sc_rpcnum;
1096 1113                          if (resok_val[i].flavor == RPCSEC_GSS) {
1097 1114                                  rpcsec_gss_info *info;
1098 1115  
1099 1116                                  info = &resok_val[i].flavor_info;
1100 1117                                  info->qop = si->sc_qop;
1101 1118                                  info->service = (rpc_gss_svc_t)si->sc_service;
1102 1119  
1103 1120                                  /* get oid opaque data */
1104 1121                                  info->oid.sec_oid4_len =
1105 1122                                      si->sc_gss_mech_type->length;
1106 1123                                  info->oid.sec_oid4_val = kmem_alloc(
1107 1124                                      si->sc_gss_mech_type->length, KM_SLEEP);
1108 1125                                  bcopy(
1109 1126                                      si->sc_gss_mech_type->elements,
1110 1127                                      info->oid.sec_oid4_val,
1111 1128                                      info->oid.sec_oid4_len);
1112 1129                          }
1113 1130                  }
1114 1131                  resp->SECINFO4resok_len = count;
1115 1132                  resp->SECINFO4resok_val = resok_val;
1116 1133          } else {
1117 1134                  int ret_cnt = 0, k = 0;
1118 1135                  int *flavor_list;
1119 1136  
1120 1137                  count = exi->exi_export.ex_seccnt; /* total sec count */
1121 1138                  secp = exi->exi_export.ex_secinfo;
1122 1139  
1123 1140                  flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1124 1141                  /* find out which flavors to return */
1125 1142                  for (i = 0; i < count; i ++) {
1126 1143                          int access, flavor, perm;
1127 1144  
1128 1145                          flavor = secp[i].s_secinfo.sc_nfsnum;
1129 1146                          perm = secp[i].s_flags;
1130 1147  
1131 1148                          access = nfsauth4_secinfo_access(exi, cs->req,
1132 1149                              flavor, perm, cs->basecr);
1133 1150  
1134 1151                          if (! (access & NFSAUTH_DENIED) &&
1135 1152                              ! (access & NFSAUTH_WRONGSEC)) {
1136 1153                                  flavor_list[ret_cnt] = flavor;
1137 1154                                  ret_cnt++;
1138 1155                          }
1139 1156                  }
1140 1157  
1141 1158                  /* Create the returning SECINFO value */
1142 1159                  resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1143 1160  
1144 1161                  for (i = 0; i < count; i++) {
1145 1162                          /*
1146 1163                           * If the flavor is in the flavor list,
1147 1164                           * fill in resok_val.
1148 1165                           */
1149 1166                          si = &secp[i].s_secinfo;
1150 1167                          if (in_flavor_list(si->sc_nfsnum,
1151 1168                              flavor_list, ret_cnt)) {
1152 1169                                  resok_val[k].flavor = si->sc_rpcnum;
1153 1170                                  if (resok_val[k].flavor == RPCSEC_GSS) {
1154 1171                                          rpcsec_gss_info *info;
1155 1172  
1156 1173                                          info = &resok_val[k].flavor_info;
1157 1174                                          info->qop = si->sc_qop;
1158 1175                                          info->service = (rpc_gss_svc_t)
1159 1176                                              si->sc_service;
1160 1177  
1161 1178                                          /* get oid opaque data */
1162 1179                                          info->oid.sec_oid4_len =
1163 1180                                              si->sc_gss_mech_type->length;
1164 1181                                          info->oid.sec_oid4_val = kmem_alloc(
1165 1182                                              si->sc_gss_mech_type->length,
1166 1183                                              KM_SLEEP);
1167 1184                                          bcopy(si->sc_gss_mech_type->elements,
1168 1185                                              info->oid.sec_oid4_val,
1169 1186                                              info->oid.sec_oid4_len);
1170 1187                                  }
1171 1188                                  k++;
1172 1189                          }
1173 1190                          if (k >= ret_cnt)
1174 1191                                  break;
1175 1192                  }
1176 1193                  resp->SECINFO4resok_len = ret_cnt;
1177 1194                  resp->SECINFO4resok_val = resok_val;
1178 1195                  kmem_free(flavor_list, count * sizeof (int));
1179 1196          }
1180 1197  
1181 1198          VN_RELE(vp);
1182 1199          return (NFS4_OK);
1183 1200  }
1184 1201  
1185 1202  /*
1186 1203   * SECINFO (Operation 33): Obtain required security information on
1187 1204   * the component name in the format of (security-mechanism-oid, qop, service)
1188 1205   * triplets.
1189 1206   */
1190 1207  /* ARGSUSED */
1191 1208  static void
1192 1209  rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1193 1210      struct compound_state *cs)
1194 1211  {
1195 1212          SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1196 1213          SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1197 1214          utf8string *utfnm = &args->name;
1198 1215          uint_t len;
1199 1216          char *nm;
1200 1217          struct sockaddr *ca;
1201 1218          char *name = NULL;
1202 1219          nfsstat4 status = NFS4_OK;
1203 1220  
1204 1221          DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1205 1222              SECINFO4args *, args);
1206 1223  
1207 1224          /*
1208 1225           * Current file handle (cfh) should have been set before getting
1209 1226           * into this function. If not, return error.
1210 1227           */
1211 1228          if (cs->vp == NULL) {
1212 1229                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1213 1230                  goto out;
1214 1231          }
1215 1232  
1216 1233          if (cs->vp->v_type != VDIR) {
1217 1234                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1218 1235                  goto out;
1219 1236          }
1220 1237  
1221 1238          /*
1222 1239           * Verify the component name. If failed, error out, but
1223 1240           * do not error out if the component name is a "..".
1224 1241           * SECINFO will return its parents secinfo data for SECINFO "..".
1225 1242           */
1226 1243          status = utf8_dir_verify(utfnm);
1227 1244          if (status != NFS4_OK) {
1228 1245                  if (utfnm->utf8string_len != 2 ||
1229 1246                      utfnm->utf8string_val[0] != '.' ||
1230 1247                      utfnm->utf8string_val[1] != '.') {
1231 1248                          *cs->statusp = resp->status = status;
1232 1249                          goto out;
1233 1250                  }
1234 1251          }
1235 1252  
1236 1253          nm = utf8_to_str(utfnm, &len, NULL);
1237 1254          if (nm == NULL) {
1238 1255                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1239 1256                  goto out;
1240 1257          }
1241 1258  
1242 1259          if (len > MAXNAMELEN) {
1243 1260                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1244 1261                  kmem_free(nm, len);
1245 1262                  goto out;
1246 1263          }
1247 1264  
1248 1265          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1249 1266          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1250 1267              MAXPATHLEN  + 1);
1251 1268  
1252 1269          if (name == NULL) {
1253 1270                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1254 1271                  kmem_free(nm, len);
1255 1272                  goto out;
1256 1273          }
1257 1274  
1258 1275  
1259 1276          *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1260 1277  
1261 1278          if (name != nm)
1262 1279                  kmem_free(name, MAXPATHLEN + 1);
1263 1280          kmem_free(nm, len);
1264 1281  
1265 1282  out:
1266 1283          DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1267 1284              SECINFO4res *, resp);
1268 1285  }
1269 1286  
1270 1287  /*
1271 1288   * Free SECINFO result.
1272 1289   */
1273 1290  /* ARGSUSED */
1274 1291  static void
1275 1292  rfs4_op_secinfo_free(nfs_resop4 *resop)
1276 1293  {
1277 1294          SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1278 1295          int count, i;
1279 1296          secinfo4 *resok_val;
1280 1297  
1281 1298          /* If this is not an Ok result, nothing to free. */
1282 1299          if (resp->status != NFS4_OK) {
1283 1300                  return;
1284 1301          }
1285 1302  
1286 1303          count = resp->SECINFO4resok_len;
1287 1304          resok_val = resp->SECINFO4resok_val;
1288 1305  
1289 1306          for (i = 0; i < count; i++) {
1290 1307                  if (resok_val[i].flavor == RPCSEC_GSS) {
1291 1308                          rpcsec_gss_info *info;
1292 1309  
1293 1310                          info = &resok_val[i].flavor_info;
1294 1311                          kmem_free(info->oid.sec_oid4_val,
1295 1312                              info->oid.sec_oid4_len);
1296 1313                  }
1297 1314          }
1298 1315          kmem_free(resok_val, count * sizeof (secinfo4));
1299 1316          resp->SECINFO4resok_len = 0;
1300 1317          resp->SECINFO4resok_val = NULL;
1301 1318  }
1302 1319  
1303 1320  /* ARGSUSED */
1304 1321  static void
1305 1322  rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1306 1323      struct compound_state *cs)
1307 1324  {
1308 1325          ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1309 1326          ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1310 1327          int error;
1311 1328          vnode_t *vp;
1312 1329          struct vattr va;
1313 1330          int checkwriteperm;
1314 1331          cred_t *cr = cs->cr;
1315 1332          bslabel_t *clabel, *slabel;
1316 1333          ts_label_t *tslabel;
1317 1334          boolean_t admin_low_client;
1318 1335  
1319 1336          DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1320 1337              ACCESS4args *, args);
1321 1338  
1322 1339  #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1323 1340          if (cs->access == CS_ACCESS_DENIED) {
1324 1341                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1325 1342                  goto out;
1326 1343          }
1327 1344  #endif
1328 1345          if (cs->vp == NULL) {
1329 1346                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1330 1347                  goto out;
1331 1348          }
1332 1349  
1333 1350          ASSERT(cr != NULL);
1334 1351  
1335 1352          vp = cs->vp;
1336 1353  
1337 1354          /*
1338 1355           * If the file system is exported read only, it is not appropriate
1339 1356           * to check write permissions for regular files and directories.
1340 1357           * Special files are interpreted by the client, so the underlying
1341 1358           * permissions are sent back to the client for interpretation.
1342 1359           */
1343 1360          if (rdonly4(req, cs) &&
1344 1361              (vp->v_type == VREG || vp->v_type == VDIR))
1345 1362                  checkwriteperm = 0;
1346 1363          else
1347 1364                  checkwriteperm = 1;
1348 1365  
1349 1366          /*
1350 1367           * XXX
1351 1368           * We need the mode so that we can correctly determine access
1352 1369           * permissions relative to a mandatory lock file.  Access to
1353 1370           * mandatory lock files is denied on the server, so it might
1354 1371           * as well be reflected to the server during the open.
1355 1372           */
1356 1373          va.va_mask = AT_MODE;
1357 1374          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1358 1375          if (error) {
1359 1376                  *cs->statusp = resp->status = puterrno4(error);
1360 1377                  goto out;
1361 1378          }
1362 1379          resp->access = 0;
1363 1380          resp->supported = 0;
1364 1381  
1365 1382          if (is_system_labeled()) {
1366 1383                  ASSERT(req->rq_label != NULL);
1367 1384                  clabel = req->rq_label;
1368 1385                  DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1369 1386                      "got client label from request(1)",
1370 1387                      struct svc_req *, req);
1371 1388                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
1372 1389                          if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1373 1390                                  *cs->statusp = resp->status = puterrno4(EACCES);
1374 1391                                  goto out;
1375 1392                          }
1376 1393                          slabel = label2bslabel(tslabel);
1377 1394                          DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1378 1395                              char *, "got server label(1) for vp(2)",
1379 1396                              bslabel_t *, slabel, vnode_t *, vp);
1380 1397  
1381 1398                          admin_low_client = B_FALSE;
1382 1399                  } else
1383 1400                          admin_low_client = B_TRUE;
1384 1401          }
1385 1402  
1386 1403          if (args->access & ACCESS4_READ) {
1387 1404                  error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1388 1405                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1389 1406                      (!is_system_labeled() || admin_low_client ||
1390 1407                      bldominates(clabel, slabel)))
1391 1408                          resp->access |= ACCESS4_READ;
1392 1409                  resp->supported |= ACCESS4_READ;
1393 1410          }
1394 1411          if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1395 1412                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1396 1413                  if (!error && (!is_system_labeled() || admin_low_client ||
1397 1414                      bldominates(clabel, slabel)))
1398 1415                          resp->access |= ACCESS4_LOOKUP;
1399 1416                  resp->supported |= ACCESS4_LOOKUP;
1400 1417          }
1401 1418          if (checkwriteperm &&
1402 1419              (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1403 1420                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1404 1421                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1405 1422                      (!is_system_labeled() || admin_low_client ||
1406 1423                      blequal(clabel, slabel)))
1407 1424                          resp->access |=
1408 1425                              (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1409 1426                  resp->supported |=
1410 1427                      resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1411 1428          }
1412 1429  
1413 1430          if (checkwriteperm &&
1414 1431              (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1415 1432                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1416 1433                  if (!error && (!is_system_labeled() || admin_low_client ||
1417 1434                      blequal(clabel, slabel)))
1418 1435                          resp->access |= ACCESS4_DELETE;
1419 1436                  resp->supported |= ACCESS4_DELETE;
1420 1437          }
1421 1438          if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1422 1439                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1423 1440                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1424 1441                      (!is_system_labeled() || admin_low_client ||
1425 1442                      bldominates(clabel, slabel)))
1426 1443                          resp->access |= ACCESS4_EXECUTE;
1427 1444                  resp->supported |= ACCESS4_EXECUTE;
1428 1445          }
1429 1446  
1430 1447          if (is_system_labeled() && !admin_low_client)
1431 1448                  label_rele(tslabel);
1432 1449  
1433 1450          *cs->statusp = resp->status = NFS4_OK;
1434 1451  out:
1435 1452          DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1436 1453              ACCESS4res *, resp);
1437 1454  }
1438 1455  
1439 1456  /* ARGSUSED */
1440 1457  static void
1441 1458  rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1442 1459      struct compound_state *cs)
1443 1460  {
1444 1461          COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1445 1462          COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1446 1463          int error;
1447 1464          vnode_t *vp = cs->vp;
1448 1465          cred_t *cr = cs->cr;
1449 1466          vattr_t va;
1450 1467          nfs4_srv_t *nsrv4;
1451 1468  
1452 1469          DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1453 1470              COMMIT4args *, args);
1454 1471  
1455 1472          if (vp == NULL) {
1456 1473                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1457 1474                  goto out;
1458 1475          }
1459 1476          if (cs->access == CS_ACCESS_DENIED) {
1460 1477                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1461 1478                  goto out;
1462 1479          }
1463 1480  
1464 1481          if (args->offset + args->count < args->offset) {
1465 1482                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1466 1483                  goto out;
1467 1484          }
1468 1485  
1469 1486          va.va_mask = AT_UID;
1470 1487          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1471 1488  
1472 1489          /*
1473 1490           * If we can't get the attributes, then we can't do the
1474 1491           * right access checking.  So, we'll fail the request.
1475 1492           */
1476 1493          if (error) {
1477 1494                  *cs->statusp = resp->status = puterrno4(error);
1478 1495                  goto out;
1479 1496          }
1480 1497          if (rdonly4(req, cs)) {
1481 1498                  *cs->statusp = resp->status = NFS4ERR_ROFS;
1482 1499                  goto out;
1483 1500          }
1484 1501  
1485 1502          if (vp->v_type != VREG) {
1486 1503                  if (vp->v_type == VDIR)
1487 1504                          resp->status = NFS4ERR_ISDIR;
1488 1505                  else
1489 1506                          resp->status = NFS4ERR_INVAL;
1490 1507                  *cs->statusp = resp->status;
1491 1508                  goto out;
1492 1509          }
1493 1510  
1494 1511          if (crgetuid(cr) != va.va_uid &&
1495 1512              (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1496 1513                  *cs->statusp = resp->status = puterrno4(error);
1497 1514                  goto out;
1498 1515          }
1499 1516  
1500 1517          error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1501 1518  
1502 1519          if (error) {
1503 1520                  *cs->statusp = resp->status = puterrno4(error);
1504 1521                  goto out;
1505 1522          }
1506 1523  
1507 1524          nsrv4 = nfs4_get_srv();
1508 1525          *cs->statusp = resp->status = NFS4_OK;
1509 1526          resp->writeverf = nsrv4->write4verf;
1510 1527  out:
1511 1528          DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1512 1529              COMMIT4res *, resp);
1513 1530  }
1514 1531  
1515 1532  /*
1516 1533   * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1517 1534   * was completed. It does the nfsv4 create for special files.
1518 1535   */
1519 1536  /* ARGSUSED */
1520 1537  static vnode_t *
1521 1538  do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1522 1539      struct compound_state *cs, vattr_t *vap, char *nm)
1523 1540  {
1524 1541          int error;
1525 1542          cred_t *cr = cs->cr;
1526 1543          vnode_t *dvp = cs->vp;
1527 1544          vnode_t *vp = NULL;
1528 1545          int mode;
1529 1546          enum vcexcl excl;
1530 1547  
1531 1548          switch (args->type) {
1532 1549          case NF4CHR:
1533 1550          case NF4BLK:
1534 1551                  if (secpolicy_sys_devices(cr) != 0) {
1535 1552                          *cs->statusp = resp->status = NFS4ERR_PERM;
1536 1553                          return (NULL);
1537 1554                  }
1538 1555                  if (args->type == NF4CHR)
1539 1556                          vap->va_type = VCHR;
1540 1557                  else
1541 1558                          vap->va_type = VBLK;
1542 1559                  vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1543 1560                      args->ftype4_u.devdata.specdata2);
1544 1561                  vap->va_mask |= AT_RDEV;
1545 1562                  break;
1546 1563          case NF4SOCK:
1547 1564                  vap->va_type = VSOCK;
1548 1565                  break;
1549 1566          case NF4FIFO:
1550 1567                  vap->va_type = VFIFO;
1551 1568                  break;
1552 1569          default:
1553 1570                  *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1554 1571                  return (NULL);
1555 1572          }
1556 1573  
1557 1574          /*
1558 1575           * Must specify the mode.
1559 1576           */
1560 1577          if (!(vap->va_mask & AT_MODE)) {
1561 1578                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1562 1579                  return (NULL);
1563 1580          }
1564 1581  
1565 1582          excl = EXCL;
1566 1583  
1567 1584          mode = 0;
1568 1585  
1569 1586          error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1570 1587          if (error) {
1571 1588                  *cs->statusp = resp->status = puterrno4(error);
1572 1589                  return (NULL);
1573 1590          }
1574 1591          return (vp);
1575 1592  }
1576 1593  
1577 1594  /*
1578 1595   * nfsv4 create is used to create non-regular files. For regular files,
1579 1596   * use nfsv4 open.
1580 1597   */
1581 1598  /* ARGSUSED */
1582 1599  static void
1583 1600  rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1584 1601      struct compound_state *cs)
1585 1602  {
1586 1603          CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1587 1604          CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1588 1605          int error;
1589 1606          struct vattr bva, iva, iva2, ava, *vap;
1590 1607          cred_t *cr = cs->cr;
1591 1608          vnode_t *dvp = cs->vp;
1592 1609          vnode_t *vp = NULL;
1593 1610          vnode_t *realvp;
1594 1611          char *nm, *lnm;
1595 1612          uint_t len, llen;
1596 1613          int syncval = 0;
1597 1614          struct nfs4_svgetit_arg sarg;
1598 1615          struct nfs4_ntov_table ntov;
1599 1616          struct statvfs64 sb;
1600 1617          nfsstat4 status;
1601 1618          struct sockaddr *ca;
1602 1619          char *name = NULL;
1603 1620          char *lname = NULL;
1604 1621  
1605 1622          DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1606 1623              CREATE4args *, args);
1607 1624  
1608 1625          resp->attrset = 0;
1609 1626  
1610 1627          if (dvp == NULL) {
1611 1628                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1612 1629                  goto out;
1613 1630          }
1614 1631  
1615 1632          /*
1616 1633           * If there is an unshared filesystem mounted on this vnode,
1617 1634           * do not allow to create an object in this directory.
1618 1635           */
1619 1636          if (vn_ismntpt(dvp)) {
1620 1637                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1621 1638                  goto out;
1622 1639          }
1623 1640  
1624 1641          /* Verify that type is correct */
1625 1642          switch (args->type) {
1626 1643          case NF4LNK:
1627 1644          case NF4BLK:
1628 1645          case NF4CHR:
1629 1646          case NF4SOCK:
1630 1647          case NF4FIFO:
1631 1648          case NF4DIR:
1632 1649                  break;
1633 1650          default:
1634 1651                  *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1635 1652                  goto out;
1636 1653          };
1637 1654  
1638 1655          if (cs->access == CS_ACCESS_DENIED) {
1639 1656                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1640 1657                  goto out;
1641 1658          }
1642 1659          if (dvp->v_type != VDIR) {
1643 1660                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1644 1661                  goto out;
1645 1662          }
1646 1663          status = utf8_dir_verify(&args->objname);
1647 1664          if (status != NFS4_OK) {
1648 1665                  *cs->statusp = resp->status = status;
1649 1666                  goto out;
1650 1667          }
1651 1668  
1652 1669          if (rdonly4(req, cs)) {
1653 1670                  *cs->statusp = resp->status = NFS4ERR_ROFS;
1654 1671                  goto out;
1655 1672          }
1656 1673  
1657 1674          /*
1658 1675           * Name of newly created object
1659 1676           */
1660 1677          nm = utf8_to_fn(&args->objname, &len, NULL);
1661 1678          if (nm == NULL) {
1662 1679                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1663 1680                  goto out;
1664 1681          }
1665 1682  
1666 1683          if (len > MAXNAMELEN) {
1667 1684                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1668 1685                  kmem_free(nm, len);
1669 1686                  goto out;
1670 1687          }
1671 1688  
1672 1689          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1673 1690          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1674 1691              MAXPATHLEN  + 1);
1675 1692  
1676 1693          if (name == NULL) {
1677 1694                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1678 1695                  kmem_free(nm, len);
1679 1696                  goto out;
1680 1697          }
1681 1698  
1682 1699          resp->attrset = 0;
1683 1700  
1684 1701          sarg.sbp = &sb;
1685 1702          sarg.is_referral = B_FALSE;
1686 1703          nfs4_ntov_table_init(&ntov);
1687 1704  
1688 1705          status = do_rfs4_set_attrs(&resp->attrset,
1689 1706              &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1690 1707  
1691 1708          if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1692 1709                  status = NFS4ERR_INVAL;
1693 1710  
1694 1711          if (status != NFS4_OK) {
1695 1712                  *cs->statusp = resp->status = status;
1696 1713                  if (name != nm)
1697 1714                          kmem_free(name, MAXPATHLEN + 1);
1698 1715                  kmem_free(nm, len);
1699 1716                  nfs4_ntov_table_free(&ntov, &sarg);
1700 1717                  resp->attrset = 0;
1701 1718                  goto out;
1702 1719          }
1703 1720  
1704 1721          /* Get "before" change value */
1705 1722          bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1706 1723          error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1707 1724          if (error) {
1708 1725                  *cs->statusp = resp->status = puterrno4(error);
1709 1726                  if (name != nm)
1710 1727                          kmem_free(name, MAXPATHLEN + 1);
1711 1728                  kmem_free(nm, len);
1712 1729                  nfs4_ntov_table_free(&ntov, &sarg);
1713 1730                  resp->attrset = 0;
1714 1731                  goto out;
1715 1732          }
1716 1733          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1717 1734  
1718 1735          vap = sarg.vap;
1719 1736  
1720 1737          /*
1721 1738           * Set the default initial values for attributes when the parent
1722 1739           * directory does not have the VSUID/VSGID bit set and they have
1723 1740           * not been specified in createattrs.
1724 1741           */
1725 1742          if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1726 1743                  vap->va_uid = crgetuid(cr);
1727 1744                  vap->va_mask |= AT_UID;
1728 1745          }
1729 1746          if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1730 1747                  vap->va_gid = crgetgid(cr);
1731 1748                  vap->va_mask |= AT_GID;
1732 1749          }
1733 1750  
1734 1751          vap->va_mask |= AT_TYPE;
1735 1752          switch (args->type) {
1736 1753          case NF4DIR:
1737 1754                  vap->va_type = VDIR;
1738 1755                  if ((vap->va_mask & AT_MODE) == 0) {
1739 1756                          vap->va_mode = 0700;    /* default: owner rwx only */
1740 1757                          vap->va_mask |= AT_MODE;
1741 1758                  }
1742 1759                  error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1743 1760                  if (error)
1744 1761                          break;
1745 1762  
1746 1763                  /*
1747 1764                   * Get the initial "after" sequence number, if it fails,
1748 1765                   * set to zero
1749 1766                   */
1750 1767                  iva.va_mask = AT_SEQ;
1751 1768                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1752 1769                          iva.va_seq = 0;
1753 1770                  break;
1754 1771          case NF4LNK:
1755 1772                  vap->va_type = VLNK;
1756 1773                  if ((vap->va_mask & AT_MODE) == 0) {
1757 1774                          vap->va_mode = 0700;    /* default: owner rwx only */
1758 1775                          vap->va_mask |= AT_MODE;
1759 1776                  }
1760 1777  
1761 1778                  /*
1762 1779                   * symlink names must be treated as data
1763 1780                   */
1764 1781                  lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1765 1782                      &llen, NULL);
1766 1783  
1767 1784                  if (lnm == NULL) {
1768 1785                          *cs->statusp = resp->status = NFS4ERR_INVAL;
1769 1786                          if (name != nm)
1770 1787                                  kmem_free(name, MAXPATHLEN + 1);
1771 1788                          kmem_free(nm, len);
1772 1789                          nfs4_ntov_table_free(&ntov, &sarg);
1773 1790                          resp->attrset = 0;
1774 1791                          goto out;
1775 1792                  }
1776 1793  
1777 1794                  if (llen > MAXPATHLEN) {
1778 1795                          *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1779 1796                          if (name != nm)
1780 1797                                  kmem_free(name, MAXPATHLEN + 1);
1781 1798                          kmem_free(nm, len);
1782 1799                          kmem_free(lnm, llen);
1783 1800                          nfs4_ntov_table_free(&ntov, &sarg);
1784 1801                          resp->attrset = 0;
1785 1802                          goto out;
1786 1803                  }
1787 1804  
1788 1805                  lname = nfscmd_convname(ca, cs->exi, lnm,
1789 1806                      NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1790 1807  
1791 1808                  if (lname == NULL) {
1792 1809                          *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1793 1810                          if (name != nm)
1794 1811                                  kmem_free(name, MAXPATHLEN + 1);
1795 1812                          kmem_free(nm, len);
1796 1813                          kmem_free(lnm, llen);
1797 1814                          nfs4_ntov_table_free(&ntov, &sarg);
1798 1815                          resp->attrset = 0;
1799 1816                          goto out;
1800 1817                  }
1801 1818  
1802 1819                  error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1803 1820                  if (lname != lnm)
1804 1821                          kmem_free(lname, MAXPATHLEN + 1);
1805 1822                  kmem_free(lnm, llen);
1806 1823                  if (error)
1807 1824                          break;
1808 1825  
1809 1826                  /*
1810 1827                   * Get the initial "after" sequence number, if it fails,
1811 1828                   * set to zero
1812 1829                   */
1813 1830                  iva.va_mask = AT_SEQ;
1814 1831                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1815 1832                          iva.va_seq = 0;
1816 1833  
1817 1834                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1818 1835                      NULL, NULL, NULL);
1819 1836                  if (error)
1820 1837                          break;
1821 1838  
1822 1839                  /*
1823 1840                   * va_seq is not safe over VOP calls, check it again
1824 1841                   * if it has changed zero out iva to force atomic = FALSE.
1825 1842                   */
1826 1843                  iva2.va_mask = AT_SEQ;
1827 1844                  if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1828 1845                      iva2.va_seq != iva.va_seq)
1829 1846                          iva.va_seq = 0;
1830 1847                  break;
1831 1848          default:
1832 1849                  /*
1833 1850                   * probably a special file.
1834 1851                   */
1835 1852                  if ((vap->va_mask & AT_MODE) == 0) {
1836 1853                          vap->va_mode = 0600;    /* default: owner rw only */
1837 1854                          vap->va_mask |= AT_MODE;
1838 1855                  }
1839 1856                  syncval = FNODSYNC;
1840 1857                  /*
1841 1858                   * We know this will only generate one VOP call
1842 1859                   */
1843 1860                  vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1844 1861  
1845 1862                  if (vp == NULL) {
1846 1863                          if (name != nm)
1847 1864                                  kmem_free(name, MAXPATHLEN + 1);
1848 1865                          kmem_free(nm, len);
1849 1866                          nfs4_ntov_table_free(&ntov, &sarg);
1850 1867                          resp->attrset = 0;
1851 1868                          goto out;
1852 1869                  }
1853 1870  
1854 1871                  /*
1855 1872                   * Get the initial "after" sequence number, if it fails,
1856 1873                   * set to zero
1857 1874                   */
1858 1875                  iva.va_mask = AT_SEQ;
1859 1876                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1860 1877                          iva.va_seq = 0;
1861 1878  
1862 1879                  break;
1863 1880          }
1864 1881          if (name != nm)
1865 1882                  kmem_free(name, MAXPATHLEN + 1);
1866 1883          kmem_free(nm, len);
1867 1884  
1868 1885          if (error) {
1869 1886                  *cs->statusp = resp->status = puterrno4(error);
1870 1887          }
1871 1888  
1872 1889          /*
1873 1890           * Force modified data and metadata out to stable storage.
1874 1891           */
1875 1892          (void) VOP_FSYNC(dvp, 0, cr, NULL);
1876 1893  
1877 1894          if (resp->status != NFS4_OK) {
1878 1895                  if (vp != NULL)
1879 1896                          VN_RELE(vp);
1880 1897                  nfs4_ntov_table_free(&ntov, &sarg);
1881 1898                  resp->attrset = 0;
1882 1899                  goto out;
1883 1900          }
1884 1901  
1885 1902          /*
1886 1903           * Finish setup of cinfo response, "before" value already set.
1887 1904           * Get "after" change value, if it fails, simply return the
1888 1905           * before value.
1889 1906           */
1890 1907          ava.va_mask = AT_CTIME|AT_SEQ;
1891 1908          if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1892 1909                  ava.va_ctime = bva.va_ctime;
1893 1910                  ava.va_seq = 0;
1894 1911          }
1895 1912          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1896 1913  
1897 1914          /*
1898 1915           * True verification that object was created with correct
1899 1916           * attrs is impossible.  The attrs could have been changed
1900 1917           * immediately after object creation.  If attributes did
1901 1918           * not verify, the only recourse for the server is to
1902 1919           * destroy the object.  Maybe if some attrs (like gid)
1903 1920           * are set incorrectly, the object should be destroyed;
1904 1921           * however, seems bad as a default policy.  Do we really
1905 1922           * want to destroy an object over one of the times not
1906 1923           * verifying correctly?  For these reasons, the server
1907 1924           * currently sets bits in attrset for createattrs
1908 1925           * that were set; however, no verification is done.
1909 1926           *
1910 1927           * vmask_to_nmask accounts for vattr bits set on create
1911 1928           *      [do_rfs4_set_attrs() only sets resp bits for
1912 1929           *       non-vattr/vfs bits.]
1913 1930           * Mask off any bits set by default so as not to return
1914 1931           * more attrset bits than were requested in createattrs
1915 1932           */
1916 1933          nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1917 1934          resp->attrset &= args->createattrs.attrmask;
1918 1935          nfs4_ntov_table_free(&ntov, &sarg);
1919 1936  
1920 1937          error = makefh4(&cs->fh, vp, cs->exi);
1921 1938          if (error) {
1922 1939                  *cs->statusp = resp->status = puterrno4(error);
1923 1940          }
1924 1941  
1925 1942          /*
1926 1943           * The cinfo.atomic = TRUE only if we got no errors, we have
1927 1944           * non-zero va_seq's, and it has incremented by exactly one
1928 1945           * during the creation and it didn't change during the VOP_LOOKUP
1929 1946           * or VOP_FSYNC.
1930 1947           */
1931 1948          if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1932 1949              iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1933 1950                  resp->cinfo.atomic = TRUE;
1934 1951          else
1935 1952                  resp->cinfo.atomic = FALSE;
1936 1953  
1937 1954          /*
1938 1955           * Force modified metadata out to stable storage.
1939 1956           *
1940 1957           * if a underlying vp exists, pass it to VOP_FSYNC
1941 1958           */
1942 1959          if (VOP_REALVP(vp, &realvp, NULL) == 0)
1943 1960                  (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1944 1961          else
1945 1962                  (void) VOP_FSYNC(vp, syncval, cr, NULL);
1946 1963  
1947 1964          if (resp->status != NFS4_OK) {
1948 1965                  VN_RELE(vp);
1949 1966                  goto out;
1950 1967          }
1951 1968          if (cs->vp)
1952 1969                  VN_RELE(cs->vp);
1953 1970  
1954 1971          cs->vp = vp;
1955 1972          *cs->statusp = resp->status = NFS4_OK;
1956 1973  out:
1957 1974          DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1958 1975              CREATE4res *, resp);
1959 1976  }
1960 1977  
1961 1978  /*ARGSUSED*/
1962 1979  static void
1963 1980  rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1964 1981      struct compound_state *cs)
1965 1982  {
1966 1983          DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1967 1984              DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1968 1985  
1969 1986          rfs4_op_inval(argop, resop, req, cs);
1970 1987  
1971 1988          DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1972 1989              DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1973 1990  }
1974 1991  
1975 1992  /*ARGSUSED*/
1976 1993  static void
1977 1994  rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1978 1995      struct compound_state *cs)
1979 1996  {
1980 1997          DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1981 1998          DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1982 1999          rfs4_deleg_state_t *dsp;
1983 2000          nfsstat4 status;
1984 2001  
1985 2002          DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1986 2003              DELEGRETURN4args *, args);
1987 2004  
1988 2005          status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1989 2006          resp->status = *cs->statusp = status;
1990 2007          if (status != NFS4_OK)
1991 2008                  goto out;
1992 2009  
1993 2010          /* Ensure specified filehandle matches */
1994 2011          if (cs->vp != dsp->rds_finfo->rf_vp) {
1995 2012                  resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1996 2013          } else
1997 2014                  rfs4_return_deleg(dsp, FALSE);
1998 2015  
1999 2016          rfs4_update_lease(dsp->rds_client);
2000 2017  
2001 2018          rfs4_deleg_state_rele(dsp);
2002 2019  out:
2003 2020          DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2004 2021              DELEGRETURN4res *, resp);
2005 2022  }
2006 2023  
2007 2024  /*
2008 2025   * Check to see if a given "flavor" is an explicitly shared flavor.
2009 2026   * The assumption of this routine is the "flavor" is already a valid
2010 2027   * flavor in the secinfo list of "exi".
2011 2028   *
2012 2029   *      e.g.
2013 2030   *              # share -o sec=flavor1 /export
2014 2031   *              # share -o sec=flavor2 /export/home
2015 2032   *
2016 2033   *              flavor2 is not an explicitly shared flavor for /export,
2017 2034   *              however it is in the secinfo list for /export thru the
2018 2035   *              server namespace setup.
2019 2036   */
2020 2037  int
2021 2038  is_exported_sec(int flavor, struct exportinfo *exi)
2022 2039  {
2023 2040          int     i;
2024 2041          struct secinfo *sp;
2025 2042  
2026 2043          sp = exi->exi_export.ex_secinfo;
2027 2044          for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2028 2045                  if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2029 2046                      sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2030 2047                          return (SEC_REF_EXPORTED(&sp[i]));
2031 2048                  }
2032 2049          }
2033 2050  
2034 2051          /* Should not reach this point based on the assumption */
2035 2052          return (0);
2036 2053  }
2037 2054  
2038 2055  /*
2039 2056   * Check if the security flavor used in the request matches what is
2040 2057   * required at the export point or at the root pseudo node (exi_root).
2041 2058   *
2042 2059   * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2043 2060   *
2044 2061   */
2045 2062  static int
2046 2063  secinfo_match_or_authnone(struct compound_state *cs)
2047 2064  {
2048 2065          int     i;
2049 2066          struct secinfo *sp;
2050 2067  
2051 2068          /*
2052 2069           * Check cs->nfsflavor (from the request) against
2053 2070           * the current export data in cs->exi.
2054 2071           */
2055 2072          sp = cs->exi->exi_export.ex_secinfo;
2056 2073          for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2057 2074                  if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2058 2075                      sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2059 2076                          return (1);
2060 2077          }
2061 2078  
2062 2079          return (0);
2063 2080  }
2064 2081  
2065 2082  /*
2066 2083   * Check the access authority for the client and return the correct error.
2067 2084   */
2068 2085  nfsstat4
2069 2086  call_checkauth4(struct compound_state *cs, struct svc_req *req)
2070 2087  {
2071 2088          int     authres;
2072 2089  
2073 2090          /*
2074 2091           * First, check if the security flavor used in the request
2075 2092           * are among the flavors set in the server namespace.
2076 2093           */
2077 2094          if (!secinfo_match_or_authnone(cs)) {
2078 2095                  *cs->statusp = NFS4ERR_WRONGSEC;
2079 2096                  return (*cs->statusp);
2080 2097          }
2081 2098  
2082 2099          authres = checkauth4(cs, req);
2083 2100  
2084 2101          if (authres > 0) {
2085 2102                  *cs->statusp = NFS4_OK;
2086 2103                  if (! (cs->access & CS_ACCESS_LIMITED))
2087 2104                          cs->access = CS_ACCESS_OK;
2088 2105          } else if (authres == 0) {
2089 2106                  *cs->statusp = NFS4ERR_ACCESS;
2090 2107          } else if (authres == -2) {
2091 2108                  *cs->statusp = NFS4ERR_WRONGSEC;
2092 2109          } else {
2093 2110                  *cs->statusp = NFS4ERR_DELAY;
2094 2111          }
2095 2112          return (*cs->statusp);
2096 2113  }
2097 2114  
2098 2115  /*
2099 2116   * bitmap4_to_attrmask is called by getattr and readdir.
2100 2117   * It sets up the vattr mask and determines whether vfsstat call is needed
2101 2118   * based on the input bitmap.
2102 2119   * Returns nfsv4 status.
2103 2120   */
2104 2121  static nfsstat4
2105 2122  bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2106 2123  {
2107 2124          int i;
2108 2125          uint_t  va_mask;
2109 2126          struct statvfs64 *sbp = sargp->sbp;
2110 2127  
2111 2128          sargp->sbp = NULL;
2112 2129          sargp->flag = 0;
2113 2130          sargp->rdattr_error = NFS4_OK;
2114 2131          sargp->mntdfid_set = FALSE;
2115 2132          if (sargp->cs->vp)
2116 2133                  sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2117 2134                      FH4_ATTRDIR | FH4_NAMEDATTR);
2118 2135          else
2119 2136                  sargp->xattr = 0;
2120 2137  
2121 2138          /*
2122 2139           * Set rdattr_error_req to true if return error per
2123 2140           * failed entry rather than fail the readdir.
2124 2141           */
2125 2142          if (breq & FATTR4_RDATTR_ERROR_MASK)
2126 2143                  sargp->rdattr_error_req = 1;
2127 2144          else
2128 2145                  sargp->rdattr_error_req = 0;
2129 2146  
2130 2147          /*
2131 2148           * generate the va_mask
2132 2149           * Handle the easy cases first
2133 2150           */
2134 2151          switch (breq) {
2135 2152          case NFS4_NTOV_ATTR_MASK:
2136 2153                  sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2137 2154                  return (NFS4_OK);
2138 2155  
2139 2156          case NFS4_FS_ATTR_MASK:
2140 2157                  sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2141 2158                  sargp->sbp = sbp;
2142 2159                  return (NFS4_OK);
2143 2160  
2144 2161          case NFS4_NTOV_ATTR_CACHE_MASK:
2145 2162                  sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2146 2163                  return (NFS4_OK);
2147 2164  
2148 2165          case FATTR4_LEASE_TIME_MASK:
2149 2166                  sargp->vap->va_mask = 0;
2150 2167                  return (NFS4_OK);
2151 2168  
2152 2169          default:
2153 2170                  va_mask = 0;
2154 2171                  for (i = 0; i < nfs4_ntov_map_size; i++) {
2155 2172                          if ((breq & nfs4_ntov_map[i].fbit) &&
2156 2173                              nfs4_ntov_map[i].vbit)
2157 2174                                  va_mask |= nfs4_ntov_map[i].vbit;
2158 2175                  }
2159 2176  
2160 2177                  /*
2161 2178                   * Check is vfsstat is needed
2162 2179                   */
2163 2180                  if (breq & NFS4_FS_ATTR_MASK)
2164 2181                          sargp->sbp = sbp;
2165 2182  
2166 2183                  sargp->vap->va_mask = va_mask;
2167 2184                  return (NFS4_OK);
2168 2185          }
2169 2186          /* NOTREACHED */
2170 2187  }
2171 2188  
2172 2189  /*
2173 2190   * bitmap4_get_sysattrs is called by getattr and readdir.
2174 2191   * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2175 2192   * Returns nfsv4 status.
2176 2193   */
2177 2194  static nfsstat4
2178 2195  bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2179 2196  {
2180 2197          int error;
2181 2198          struct compound_state *cs = sargp->cs;
2182 2199          vnode_t *vp = cs->vp;
2183 2200  
2184 2201          if (sargp->sbp != NULL) {
2185 2202                  if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2186 2203                          sargp->sbp = NULL;      /* to identify error */
2187 2204                          return (puterrno4(error));
2188 2205                  }
2189 2206          }
2190 2207  
2191 2208          return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2192 2209  }
2193 2210  
2194 2211  static void
2195 2212  nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2196 2213  {
2197 2214          ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2198 2215              KM_SLEEP);
2199 2216          ntovp->attrcnt = 0;
2200 2217          ntovp->vfsstat = FALSE;
2201 2218  }
2202 2219  
2203 2220  static void
2204 2221  nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2205 2222      struct nfs4_svgetit_arg *sargp)
2206 2223  {
2207 2224          int i;
2208 2225          union nfs4_attr_u *na;
2209 2226          uint8_t *amap;
2210 2227  
2211 2228          /*
2212 2229           * XXX Should do the same checks for whether the bit is set
2213 2230           */
2214 2231          for (i = 0, na = ntovp->na, amap = ntovp->amap;
2215 2232              i < ntovp->attrcnt; i++, na++, amap++) {
2216 2233                  (void) (*nfs4_ntov_map[*amap].sv_getit)(
2217 2234                      NFS4ATTR_FREEIT, sargp, na);
2218 2235          }
2219 2236          if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2220 2237                  /*
2221 2238                   * xdr_free for getattr will be done later
2222 2239                   */
2223 2240                  for (i = 0, na = ntovp->na, amap = ntovp->amap;
2224 2241                      i < ntovp->attrcnt; i++, na++, amap++) {
2225 2242                          xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2226 2243                  }
2227 2244          }
2228 2245          kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2229 2246  }
2230 2247  
2231 2248  /*
2232 2249   * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2233 2250   */
2234 2251  static nfsstat4
2235 2252  do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2236 2253      struct nfs4_svgetit_arg *sargp)
2237 2254  {
2238 2255          int error = 0;
2239 2256          int i, k;
2240 2257          struct nfs4_ntov_table ntov;
2241 2258          XDR xdr;
2242 2259          ulong_t xdr_size;
2243 2260          char *xdr_attrs;
2244 2261          nfsstat4 status = NFS4_OK;
2245 2262          nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2246 2263          union nfs4_attr_u *na;
2247 2264          uint8_t *amap;
2248 2265  
2249 2266          sargp->op = NFS4ATTR_GETIT;
2250 2267          sargp->flag = 0;
2251 2268  
2252 2269          fattrp->attrmask = 0;
2253 2270          /* if no bits requested, then return empty fattr4 */
2254 2271          if (breq == 0) {
2255 2272                  fattrp->attrlist4_len = 0;
2256 2273                  fattrp->attrlist4 = NULL;
2257 2274                  return (NFS4_OK);
2258 2275          }
2259 2276  
2260 2277          /*
2261 2278           * return NFS4ERR_INVAL when client requests write-only attrs
2262 2279           */
2263 2280          if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2264 2281                  return (NFS4ERR_INVAL);
2265 2282  
2266 2283          nfs4_ntov_table_init(&ntov);
2267 2284          na = ntov.na;
2268 2285          amap = ntov.amap;
2269 2286  
2270 2287          /*
2271 2288           * Now loop to get or verify the attrs
2272 2289           */
2273 2290          for (i = 0; i < nfs4_ntov_map_size; i++) {
2274 2291                  if (breq & nfs4_ntov_map[i].fbit) {
2275 2292                          if ((*nfs4_ntov_map[i].sv_getit)(
2276 2293                              NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2277 2294  
2278 2295                                  error = (*nfs4_ntov_map[i].sv_getit)(
2279 2296                                      NFS4ATTR_GETIT, sargp, na);
2280 2297  
2281 2298                                  /*
2282 2299                                   * Possible error values:
2283 2300                                   * >0 if sv_getit failed to
2284 2301                                   * get the attr; 0 if succeeded;
2285 2302                                   * <0 if rdattr_error and the
2286 2303                                   * attribute cannot be returned.
2287 2304                                   */
2288 2305                                  if (error && !(sargp->rdattr_error_req))
2289 2306                                          goto done;
2290 2307                                  /*
2291 2308                                   * If error then just for entry
2292 2309                                   */
2293 2310                                  if (error == 0) {
2294 2311                                          fattrp->attrmask |=
2295 2312                                              nfs4_ntov_map[i].fbit;
2296 2313                                          *amap++ =
2297 2314                                              (uint8_t)nfs4_ntov_map[i].nval;
2298 2315                                          na++;
2299 2316                                          (ntov.attrcnt)++;
2300 2317                                  } else if ((error > 0) &&
2301 2318                                      (sargp->rdattr_error == NFS4_OK)) {
2302 2319                                          sargp->rdattr_error = puterrno4(error);
2303 2320                                  }
2304 2321                                  error = 0;
2305 2322                          }
2306 2323                  }
2307 2324          }
2308 2325  
2309 2326          /*
2310 2327           * If rdattr_error was set after the return value for it was assigned,
2311 2328           * update it.
2312 2329           */
2313 2330          if (prev_rdattr_error != sargp->rdattr_error) {
2314 2331                  na = ntov.na;
2315 2332                  amap = ntov.amap;
2316 2333                  for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2317 2334                          k = *amap;
2318 2335                          if (k < FATTR4_RDATTR_ERROR) {
2319 2336                                  continue;
2320 2337                          }
2321 2338                          if ((k == FATTR4_RDATTR_ERROR) &&
2322 2339                              ((*nfs4_ntov_map[k].sv_getit)(
2323 2340                              NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2324 2341  
2325 2342                                  (void) (*nfs4_ntov_map[k].sv_getit)(
2326 2343                                      NFS4ATTR_GETIT, sargp, na);
2327 2344                          }
2328 2345                          break;
2329 2346                  }
2330 2347          }
2331 2348  
2332 2349          xdr_size = 0;
2333 2350          na = ntov.na;
2334 2351          amap = ntov.amap;
2335 2352          for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2336 2353                  xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2337 2354          }
2338 2355  
2339 2356          fattrp->attrlist4_len = xdr_size;
2340 2357          if (xdr_size) {
2341 2358                  /* freed by rfs4_op_getattr_free() */
2342 2359                  fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2343 2360  
2344 2361                  xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2345 2362  
2346 2363                  na = ntov.na;
2347 2364                  amap = ntov.amap;
2348 2365                  for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2349 2366                          if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2350 2367                                  DTRACE_PROBE1(nfss__e__getattr4_encfail,
2351 2368                                      int, *amap);
2352 2369                                  status = NFS4ERR_SERVERFAULT;
2353 2370                                  break;
2354 2371                          }
2355 2372                  }
2356 2373                  /* xdrmem_destroy(&xdrs); */    /* NO-OP */
2357 2374          } else {
2358 2375                  fattrp->attrlist4 = NULL;
2359 2376          }
2360 2377  done:
2361 2378  
2362 2379          nfs4_ntov_table_free(&ntov, sargp);
2363 2380  
2364 2381          if (error != 0)
2365 2382                  status = puterrno4(error);
2366 2383  
2367 2384          return (status);
2368 2385  }
2369 2386  
2370 2387  /* ARGSUSED */
2371 2388  static void
2372 2389  rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2373 2390      struct compound_state *cs)
2374 2391  {
2375 2392          GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2376 2393          GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2377 2394          struct nfs4_svgetit_arg sarg;
2378 2395          struct statvfs64 sb;
2379 2396          nfsstat4 status;
2380 2397  
2381 2398          DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2382 2399              GETATTR4args *, args);
2383 2400  
2384 2401          if (cs->vp == NULL) {
2385 2402                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2386 2403                  goto out;
2387 2404          }
2388 2405  
2389 2406          if (cs->access == CS_ACCESS_DENIED) {
2390 2407                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2391 2408                  goto out;
2392 2409          }
2393 2410  
2394 2411          sarg.sbp = &sb;
2395 2412          sarg.cs = cs;
2396 2413          sarg.is_referral = B_FALSE;
2397 2414  
2398 2415          status = bitmap4_to_attrmask(args->attr_request, &sarg);
2399 2416          if (status == NFS4_OK) {
2400 2417  
2401 2418                  status = bitmap4_get_sysattrs(&sarg);
2402 2419                  if (status == NFS4_OK) {
2403 2420  
2404 2421                          /* Is this a referral? */
2405 2422                          if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2406 2423                                  /* Older V4 Solaris client sees a link */
2407 2424                                  if (client_is_downrev(req))
2408 2425                                          sarg.vap->va_type = VLNK;
2409 2426                                  else
2410 2427                                          sarg.is_referral = B_TRUE;
2411 2428                          }
2412 2429  
2413 2430                          status = do_rfs4_op_getattr(args->attr_request,
2414 2431                              &resp->obj_attributes, &sarg);
2415 2432                  }
2416 2433          }
2417 2434          *cs->statusp = resp->status = status;
2418 2435  out:
2419 2436          DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2420 2437              GETATTR4res *, resp);
2421 2438  }
2422 2439  
2423 2440  static void
2424 2441  rfs4_op_getattr_free(nfs_resop4 *resop)
2425 2442  {
2426 2443          GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2427 2444  
2428 2445          nfs4_fattr4_free(&resp->obj_attributes);
2429 2446  }
2430 2447  
2431 2448  /* ARGSUSED */
2432 2449  static void
2433 2450  rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2434 2451      struct compound_state *cs)
2435 2452  {
2436 2453          GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2437 2454  
2438 2455          DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2439 2456  
2440 2457          if (cs->vp == NULL) {
2441 2458                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2442 2459                  goto out;
2443 2460          }
2444 2461          if (cs->access == CS_ACCESS_DENIED) {
2445 2462                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2446 2463                  goto out;
2447 2464          }
2448 2465  
2449 2466          /* check for reparse point at the share point */
2450 2467          if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2451 2468                  /* it's all bad */
2452 2469                  cs->exi->exi_moved = 1;
2453 2470                  *cs->statusp = resp->status = NFS4ERR_MOVED;
2454 2471                  DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2455 2472                      vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2456 2473                  return;
2457 2474          }
2458 2475  
2459 2476          /* check for reparse point at vp */
2460 2477          if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2461 2478                  /* it's not all bad */
2462 2479                  *cs->statusp = resp->status = NFS4ERR_MOVED;
2463 2480                  DTRACE_PROBE2(nfs4serv__func__referral__moved,
2464 2481                      vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2465 2482                  return;
2466 2483          }
2467 2484  
2468 2485          resp->object.nfs_fh4_val =
2469 2486              kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2470 2487          nfs_fh4_copy(&cs->fh, &resp->object);
2471 2488          *cs->statusp = resp->status = NFS4_OK;
2472 2489  out:
2473 2490          DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2474 2491              GETFH4res *, resp);
2475 2492  }
2476 2493  
2477 2494  static void
2478 2495  rfs4_op_getfh_free(nfs_resop4 *resop)
2479 2496  {
2480 2497          GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2481 2498  
2482 2499          if (resp->status == NFS4_OK &&
2483 2500              resp->object.nfs_fh4_val != NULL) {
2484 2501                  kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2485 2502                  resp->object.nfs_fh4_val = NULL;
2486 2503                  resp->object.nfs_fh4_len = 0;
2487 2504          }
2488 2505  }
2489 2506  
2490 2507  /*
2491 2508   * illegal: args: void
2492 2509   *          res : status (NFS4ERR_OP_ILLEGAL)
2493 2510   */
2494 2511  /* ARGSUSED */
2495 2512  static void
2496 2513  rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2497 2514      struct svc_req *req, struct compound_state *cs)
2498 2515  {
2499 2516          ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2500 2517  
2501 2518          resop->resop = OP_ILLEGAL;
2502 2519          *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2503 2520  }
2504 2521  
2505 2522  /*
2506 2523   * link: args: SAVED_FH: file, CURRENT_FH: target directory
2507 2524   *       res: status. If success - CURRENT_FH unchanged, return change_info
2508 2525   */
2509 2526  /* ARGSUSED */
2510 2527  static void
2511 2528  rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2512 2529      struct compound_state *cs)
2513 2530  {
2514 2531          LINK4args *args = &argop->nfs_argop4_u.oplink;
2515 2532          LINK4res *resp = &resop->nfs_resop4_u.oplink;
2516 2533          int error;
2517 2534          vnode_t *vp;
2518 2535          vnode_t *dvp;
2519 2536          struct vattr bdva, idva, adva;
2520 2537          char *nm;
2521 2538          uint_t  len;
2522 2539          struct sockaddr *ca;
2523 2540          char *name = NULL;
2524 2541          nfsstat4 status;
2525 2542  
2526 2543          DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2527 2544              LINK4args *, args);
2528 2545  
2529 2546          /* SAVED_FH: source object */
2530 2547          vp = cs->saved_vp;
2531 2548          if (vp == NULL) {
2532 2549                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2533 2550                  goto out;
2534 2551          }
2535 2552  
2536 2553          /* CURRENT_FH: target directory */
2537 2554          dvp = cs->vp;
2538 2555          if (dvp == NULL) {
2539 2556                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2540 2557                  goto out;
2541 2558          }
2542 2559  
2543 2560          /*
2544 2561           * If there is a non-shared filesystem mounted on this vnode,
2545 2562           * do not allow to link any file in this directory.
2546 2563           */
2547 2564          if (vn_ismntpt(dvp)) {
2548 2565                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2549 2566                  goto out;
2550 2567          }
2551 2568  
2552 2569          if (cs->access == CS_ACCESS_DENIED) {
2553 2570                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2554 2571                  goto out;
2555 2572          }
2556 2573  
2557 2574          /* Check source object's type validity */
2558 2575          if (vp->v_type == VDIR) {
2559 2576                  *cs->statusp = resp->status = NFS4ERR_ISDIR;
2560 2577                  goto out;
2561 2578          }
2562 2579  
2563 2580          /* Check target directory's type */
2564 2581          if (dvp->v_type != VDIR) {
2565 2582                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2566 2583                  goto out;
2567 2584          }
2568 2585  
2569 2586          if (cs->saved_exi != cs->exi) {
2570 2587                  *cs->statusp = resp->status = NFS4ERR_XDEV;
2571 2588                  goto out;
2572 2589          }
2573 2590  
2574 2591          status = utf8_dir_verify(&args->newname);
2575 2592          if (status != NFS4_OK) {
2576 2593                  *cs->statusp = resp->status = status;
2577 2594                  goto out;
2578 2595          }
2579 2596  
2580 2597          nm = utf8_to_fn(&args->newname, &len, NULL);
2581 2598          if (nm == NULL) {
2582 2599                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2583 2600                  goto out;
2584 2601          }
2585 2602  
2586 2603          if (len > MAXNAMELEN) {
2587 2604                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2588 2605                  kmem_free(nm, len);
2589 2606                  goto out;
2590 2607          }
2591 2608  
2592 2609          if (rdonly4(req, cs)) {
2593 2610                  *cs->statusp = resp->status = NFS4ERR_ROFS;
2594 2611                  kmem_free(nm, len);
2595 2612                  goto out;
2596 2613          }
2597 2614  
2598 2615          /* Get "before" change value */
2599 2616          bdva.va_mask = AT_CTIME|AT_SEQ;
2600 2617          error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2601 2618          if (error) {
2602 2619                  *cs->statusp = resp->status = puterrno4(error);
2603 2620                  kmem_free(nm, len);
2604 2621                  goto out;
2605 2622          }
2606 2623  
2607 2624          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2608 2625          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2609 2626              MAXPATHLEN  + 1);
2610 2627  
2611 2628          if (name == NULL) {
2612 2629                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2613 2630                  kmem_free(nm, len);
2614 2631                  goto out;
2615 2632          }
2616 2633  
2617 2634          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2618 2635  
2619 2636          error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2620 2637  
2621 2638          if (nm != name)
2622 2639                  kmem_free(name, MAXPATHLEN + 1);
2623 2640          kmem_free(nm, len);
2624 2641  
2625 2642          /*
2626 2643           * Get the initial "after" sequence number, if it fails, set to zero
2627 2644           */
2628 2645          idva.va_mask = AT_SEQ;
2629 2646          if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2630 2647                  idva.va_seq = 0;
2631 2648  
2632 2649          /*
2633 2650           * Force modified data and metadata out to stable storage.
2634 2651           */
2635 2652          (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2636 2653          (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2637 2654  
2638 2655          if (error) {
2639 2656                  *cs->statusp = resp->status = puterrno4(error);
2640 2657                  goto out;
2641 2658          }
2642 2659  
2643 2660          /*
2644 2661           * Get "after" change value, if it fails, simply return the
2645 2662           * before value.
2646 2663           */
2647 2664          adva.va_mask = AT_CTIME|AT_SEQ;
2648 2665          if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2649 2666                  adva.va_ctime = bdva.va_ctime;
2650 2667                  adva.va_seq = 0;
2651 2668          }
2652 2669  
2653 2670          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2654 2671  
2655 2672          /*
2656 2673           * The cinfo.atomic = TRUE only if we have
2657 2674           * non-zero va_seq's, and it has incremented by exactly one
2658 2675           * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2659 2676           */
2660 2677          if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2661 2678              idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2662 2679                  resp->cinfo.atomic = TRUE;
2663 2680          else
2664 2681                  resp->cinfo.atomic = FALSE;
2665 2682  
2666 2683          *cs->statusp = resp->status = NFS4_OK;
2667 2684  out:
2668 2685          DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2669 2686              LINK4res *, resp);
2670 2687  }
2671 2688  
2672 2689  /*
2673 2690   * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2674 2691   */
2675 2692  
2676 2693  /* ARGSUSED */
2677 2694  static nfsstat4
2678 2695  do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2679 2696  {
2680 2697          int error;
2681 2698          int different_export = 0;
2682 2699          vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2683 2700          struct exportinfo *exi = NULL, *pre_exi = NULL;
2684 2701          nfsstat4 stat;
2685 2702          fid_t fid;
2686 2703          int attrdir, dotdot, walk;
2687 2704          bool_t is_newvp = FALSE;
2688 2705  
2689 2706          if (cs->vp->v_flag & V_XATTRDIR) {
2690 2707                  attrdir = 1;
2691 2708                  ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2692 2709          } else {
2693 2710                  attrdir = 0;
2694 2711                  ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
  
    | 
      ↓ open down ↓ | 
    1610 lines elided | 
    
      ↑ open up ↑ | 
  
2695 2712          }
2696 2713  
2697 2714          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2698 2715  
2699 2716          /*
2700 2717           * If dotdotting, then need to check whether it's
2701 2718           * above the root of a filesystem, or above an
2702 2719           * export point.
2703 2720           */
2704 2721          if (dotdot) {
2705      -
     2722 +                ASSERT(cs->exi != NULL);
     2723 +                ASSERT3U(cs->exi->exi_zoneid, ==, curzone->zone_id);
2706 2724                  /*
2707 2725                   * If dotdotting at the root of a filesystem, then
2708 2726                   * need to traverse back to the mounted-on filesystem
2709 2727                   * and do the dotdot lookup there.
2710 2728                   */
2711      -                if (cs->vp->v_flag & VROOT) {
     2729 +                if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2712 2730  
2713 2731                          /*
2714 2732                           * If at the system root, then can
2715 2733                           * go up no further.
2716 2734                           */
2717 2735                          if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2718 2736                                  return (puterrno4(ENOENT));
2719 2737  
2720 2738                          /*
2721 2739                           * Traverse back to the mounted-on filesystem
2722 2740                           */
2723 2741                          cs->vp = untraverse(cs->vp);
2724 2742  
2725 2743                          /*
2726 2744                           * Set the different_export flag so we remember
2727 2745                           * to pick up a new exportinfo entry for
2728 2746                           * this new filesystem.
2729 2747                           */
2730 2748                          different_export = 1;
2731 2749                  } else {
2732 2750  
2733 2751                          /*
2734 2752                           * If dotdotting above an export point then set
2735 2753                           * the different_export to get new export info.
2736 2754                           */
2737 2755                          different_export = nfs_exported(cs->exi, cs->vp);
2738 2756                  }
2739 2757          }
2740 2758  
2741 2759          error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2742 2760              NULL, NULL, NULL);
2743 2761          if (error)
2744 2762                  return (puterrno4(error));
2745 2763  
2746 2764          /*
2747 2765           * If the vnode is in a pseudo filesystem, check whether it is visible.
2748 2766           *
2749 2767           * XXX if the vnode is a symlink and it is not visible in
2750 2768           * a pseudo filesystem, return ENOENT (not following symlink).
2751 2769           * V4 client can not mount such symlink. This is a regression
2752 2770           * from V2/V3.
2753 2771           *
2754 2772           * In the same exported filesystem, if the security flavor used
2755 2773           * is not an explicitly shared flavor, limit the view to the visible
2756 2774           * list entries only. This is not a WRONGSEC case because it's already
2757 2775           * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2758 2776           */
2759 2777          if (!different_export &&
2760 2778              (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2761 2779              cs->access & CS_ACCESS_LIMITED)) {
2762 2780                  if (! nfs_visible(cs->exi, vp, &different_export)) {
2763 2781                          VN_RELE(vp);
2764 2782                          return (puterrno4(ENOENT));
2765 2783                  }
2766 2784          }
2767 2785  
2768 2786          /*
2769 2787           * If it's a mountpoint, then traverse it.
2770 2788           */
2771 2789          if (vn_ismntpt(vp)) {
2772 2790                  pre_exi = cs->exi;      /* save pre-traversed exportinfo */
2773 2791                  pre_tvp = vp;           /* save pre-traversed vnode     */
2774 2792  
2775 2793                  /*
2776 2794                   * hold pre_tvp to counteract rele by traverse.  We will
2777 2795                   * need pre_tvp below if checkexport4 fails
2778 2796                   */
2779 2797                  VN_HOLD(pre_tvp);
2780 2798                  if ((error = traverse(&vp)) != 0) {
2781 2799                          VN_RELE(vp);
2782 2800                          VN_RELE(pre_tvp);
2783 2801                          return (puterrno4(error));
2784 2802                  }
2785 2803                  different_export = 1;
2786 2804          } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2787 2805                  /*
2788 2806                   * The vfsp comparison is to handle the case where
2789 2807                   * a LOFS mount is shared.  lo_lookup traverses mount points,
2790 2808                   * and NFS is unaware of local fs transistions because
2791 2809                   * v_vfsmountedhere isn't set.  For this special LOFS case,
2792 2810                   * the dir and the obj returned by lookup will have different
2793 2811                   * vfs ptrs.
2794 2812                   */
2795 2813                  different_export = 1;
2796 2814          }
2797 2815  
2798 2816          if (different_export) {
2799 2817  
2800 2818                  bzero(&fid, sizeof (fid));
2801 2819                  fid.fid_len = MAXFIDSZ;
2802 2820                  error = vop_fid_pseudo(vp, &fid);
2803 2821                  if (error) {
2804 2822                          VN_RELE(vp);
2805 2823                          if (pre_tvp)
2806 2824                                  VN_RELE(pre_tvp);
2807 2825                          return (puterrno4(error));
2808 2826                  }
2809 2827  
2810 2828                  if (dotdot)
2811 2829                          exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2812 2830                  else
2813 2831                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2814 2832  
2815 2833                  if (exi == NULL) {
2816 2834                          if (pre_tvp) {
2817 2835                                  /*
2818 2836                                   * If this vnode is a mounted-on vnode,
2819 2837                                   * but the mounted-on file system is not
2820 2838                                   * exported, send back the filehandle for
2821 2839                                   * the mounted-on vnode, not the root of
2822 2840                                   * the mounted-on file system.
2823 2841                                   */
2824 2842                                  VN_RELE(vp);
2825 2843                                  vp = pre_tvp;
2826 2844                                  exi = pre_exi;
2827 2845                          } else {
2828 2846                                  VN_RELE(vp);
2829 2847                                  return (puterrno4(EACCES));
2830 2848                          }
2831 2849                  } else if (pre_tvp) {
2832 2850                          /* we're done with pre_tvp now. release extra hold */
2833 2851                          VN_RELE(pre_tvp);
2834 2852                  }
2835 2853  
2836 2854                  cs->exi = exi;
2837 2855  
2838 2856                  /*
2839 2857                   * Now we do a checkauth4. The reason is that
2840 2858                   * this client/user may not have access to the new
2841 2859                   * exported file system, and if they do,
2842 2860                   * the client/user may be mapped to a different uid.
2843 2861                   *
2844 2862                   * We start with a new cr, because the checkauth4 done
2845 2863                   * in the PUT*FH operation over wrote the cred's uid,
2846 2864                   * gid, etc, and we want the real thing before calling
2847 2865                   * checkauth4()
2848 2866                   */
2849 2867                  crfree(cs->cr);
2850 2868                  cs->cr = crdup(cs->basecr);
2851 2869  
2852 2870                  oldvp = cs->vp;
2853 2871                  cs->vp = vp;
2854 2872                  is_newvp = TRUE;
2855 2873  
2856 2874                  stat = call_checkauth4(cs, req);
2857 2875                  if (stat != NFS4_OK) {
2858 2876                          VN_RELE(cs->vp);
2859 2877                          cs->vp = oldvp;
2860 2878                          return (stat);
2861 2879                  }
2862 2880          }
2863 2881  
2864 2882          /*
2865 2883           * After various NFS checks, do a label check on the path
2866 2884           * component. The label on this path should either be the
2867 2885           * global zone's label or a zone's label. We are only
2868 2886           * interested in the zone's label because exported files
2869 2887           * in global zone is accessible (though read-only) to
2870 2888           * clients. The exportability/visibility check is already
2871 2889           * done before reaching this code.
2872 2890           */
2873 2891          if (is_system_labeled()) {
2874 2892                  bslabel_t *clabel;
2875 2893  
2876 2894                  ASSERT(req->rq_label != NULL);
2877 2895                  clabel = req->rq_label;
2878 2896                  DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2879 2897                      "got client label from request(1)", struct svc_req *, req);
2880 2898  
2881 2899                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2882 2900                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2883 2901                              cs->exi)) {
2884 2902                                  error = EACCES;
2885 2903                                  goto err_out;
2886 2904                          }
2887 2905                  } else {
2888 2906                          /*
2889 2907                           * We grant access to admin_low label clients
2890 2908                           * only if the client is trusted, i.e. also
2891 2909                           * running Solaris Trusted Extension.
2892 2910                           */
2893 2911                          struct sockaddr *ca;
2894 2912                          int             addr_type;
2895 2913                          void            *ipaddr;
2896 2914                          tsol_tpc_t      *tp;
2897 2915  
2898 2916                          ca = (struct sockaddr *)svc_getrpccaller(
2899 2917                              req->rq_xprt)->buf;
2900 2918                          if (ca->sa_family == AF_INET) {
2901 2919                                  addr_type = IPV4_VERSION;
2902 2920                                  ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2903 2921                          } else if (ca->sa_family == AF_INET6) {
2904 2922                                  addr_type = IPV6_VERSION;
2905 2923                                  ipaddr = &((struct sockaddr_in6 *)
2906 2924                                      ca)->sin6_addr;
2907 2925                          }
2908 2926                          tp = find_tpc(ipaddr, addr_type, B_FALSE);
2909 2927                          if (tp == NULL || tp->tpc_tp.tp_doi !=
2910 2928                              l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2911 2929                              SUN_CIPSO) {
2912 2930                                  if (tp != NULL)
2913 2931                                          TPC_RELE(tp);
2914 2932                                  error = EACCES;
2915 2933                                  goto err_out;
2916 2934                          }
2917 2935                          TPC_RELE(tp);
2918 2936                  }
2919 2937          }
2920 2938  
2921 2939          error = makefh4(&cs->fh, vp, cs->exi);
2922 2940  
2923 2941  err_out:
2924 2942          if (error) {
2925 2943                  if (is_newvp) {
2926 2944                          VN_RELE(cs->vp);
2927 2945                          cs->vp = oldvp;
2928 2946                  } else
2929 2947                          VN_RELE(vp);
2930 2948                  return (puterrno4(error));
2931 2949          }
2932 2950  
2933 2951          if (!is_newvp) {
2934 2952                  if (cs->vp)
2935 2953                          VN_RELE(cs->vp);
2936 2954                  cs->vp = vp;
2937 2955          } else if (oldvp)
2938 2956                  VN_RELE(oldvp);
2939 2957  
2940 2958          /*
2941 2959           * if did lookup on attrdir and didn't lookup .., set named
2942 2960           * attr fh flag
2943 2961           */
2944 2962          if (attrdir && ! dotdot)
2945 2963                  set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2946 2964  
2947 2965          /* Assume false for now, open proc will set this */
2948 2966          cs->mandlock = FALSE;
2949 2967  
2950 2968          return (NFS4_OK);
2951 2969  }
2952 2970  
2953 2971  /* ARGSUSED */
2954 2972  static void
2955 2973  rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2956 2974      struct compound_state *cs)
2957 2975  {
2958 2976          LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2959 2977          LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2960 2978          char *nm;
2961 2979          uint_t len;
2962 2980          struct sockaddr *ca;
2963 2981          char *name = NULL;
2964 2982          nfsstat4 status;
2965 2983  
2966 2984          DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2967 2985              LOOKUP4args *, args);
2968 2986  
2969 2987          if (cs->vp == NULL) {
2970 2988                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2971 2989                  goto out;
2972 2990          }
2973 2991  
2974 2992          if (cs->vp->v_type == VLNK) {
2975 2993                  *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2976 2994                  goto out;
2977 2995          }
2978 2996  
2979 2997          if (cs->vp->v_type != VDIR) {
2980 2998                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2981 2999                  goto out;
2982 3000          }
2983 3001  
2984 3002          status = utf8_dir_verify(&args->objname);
2985 3003          if (status != NFS4_OK) {
2986 3004                  *cs->statusp = resp->status = status;
2987 3005                  goto out;
2988 3006          }
2989 3007  
2990 3008          nm = utf8_to_str(&args->objname, &len, NULL);
2991 3009          if (nm == NULL) {
2992 3010                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2993 3011                  goto out;
2994 3012          }
2995 3013  
2996 3014          if (len > MAXNAMELEN) {
2997 3015                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2998 3016                  kmem_free(nm, len);
2999 3017                  goto out;
3000 3018          }
3001 3019  
3002 3020          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3003 3021          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3004 3022              MAXPATHLEN  + 1);
3005 3023  
3006 3024          if (name == NULL) {
3007 3025                  *cs->statusp = resp->status = NFS4ERR_INVAL;
3008 3026                  kmem_free(nm, len);
3009 3027                  goto out;
3010 3028          }
3011 3029  
3012 3030          *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3013 3031  
3014 3032          if (name != nm)
3015 3033                  kmem_free(name, MAXPATHLEN + 1);
3016 3034          kmem_free(nm, len);
3017 3035  
3018 3036  out:
3019 3037          DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3020 3038              LOOKUP4res *, resp);
3021 3039  }
3022 3040  
3023 3041  /* ARGSUSED */
3024 3042  static void
3025 3043  rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3026 3044      struct compound_state *cs)
3027 3045  {
3028 3046          LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3029 3047  
3030 3048          DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3031 3049  
3032 3050          if (cs->vp == NULL) {
3033 3051                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3034 3052                  goto out;
3035 3053          }
3036 3054  
3037 3055          if (cs->vp->v_type != VDIR) {
3038 3056                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3039 3057                  goto out;
3040 3058          }
3041 3059  
3042 3060          *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3043 3061  
3044 3062          /*
3045 3063           * From NFSV4 Specification, LOOKUPP should not check for
3046 3064           * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3047 3065           */
3048 3066          if (resp->status == NFS4ERR_WRONGSEC) {
3049 3067                  *cs->statusp = resp->status = NFS4_OK;
3050 3068          }
3051 3069  
3052 3070  out:
3053 3071          DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3054 3072              LOOKUPP4res *, resp);
3055 3073  }
3056 3074  
3057 3075  
3058 3076  /*ARGSUSED2*/
3059 3077  static void
3060 3078  rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3061 3079      struct compound_state *cs)
3062 3080  {
3063 3081          OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3064 3082          OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3065 3083          vnode_t         *avp = NULL;
3066 3084          int             lookup_flags = LOOKUP_XATTR, error;
3067 3085          int             exp_ro = 0;
3068 3086  
3069 3087          DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3070 3088              OPENATTR4args *, args);
3071 3089  
3072 3090          if (cs->vp == NULL) {
3073 3091                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3074 3092                  goto out;
3075 3093          }
3076 3094  
3077 3095          if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3078 3096              !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3079 3097                  *cs->statusp = resp->status = puterrno4(ENOTSUP);
3080 3098                  goto out;
3081 3099          }
3082 3100  
3083 3101          /*
3084 3102           * If file system supports passing ACE mask to VOP_ACCESS then
3085 3103           * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3086 3104           */
3087 3105  
3088 3106          if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3089 3107                  error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3090 3108                      V_ACE_MASK, cs->cr, NULL);
3091 3109          else
3092 3110                  error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3093 3111                      (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3094 3112                      (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3095 3113  
3096 3114          if (error) {
3097 3115                  *cs->statusp = resp->status = puterrno4(EACCES);
3098 3116                  goto out;
3099 3117          }
3100 3118  
3101 3119          /*
3102 3120           * The CREATE_XATTR_DIR VOP flag cannot be specified if
3103 3121           * the file system is exported read-only -- regardless of
3104 3122           * createdir flag.  Otherwise the attrdir would be created
3105 3123           * (assuming server fs isn't mounted readonly locally).  If
3106 3124           * VOP_LOOKUP returns ENOENT in this case, the error will
3107 3125           * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3108 3126           * because specfs has no VOP_LOOKUP op, so the macro would
3109 3127           * return ENOSYS.  EINVAL is returned by all (current)
3110 3128           * Solaris file system implementations when any of their
3111 3129           * restrictions are violated (xattr(dir) can't have xattrdir).
3112 3130           * Returning NOTSUPP is more appropriate in this case
3113 3131           * because the object will never be able to have an attrdir.
3114 3132           */
3115 3133          if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3116 3134                  lookup_flags |= CREATE_XATTR_DIR;
3117 3135  
3118 3136          error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3119 3137              NULL, NULL, NULL);
3120 3138  
3121 3139          if (error) {
3122 3140                  if (error == ENOENT && args->createdir && exp_ro)
3123 3141                          *cs->statusp = resp->status = puterrno4(EROFS);
3124 3142                  else if (error == EINVAL || error == ENOSYS)
3125 3143                          *cs->statusp = resp->status = puterrno4(ENOTSUP);
3126 3144                  else
3127 3145                          *cs->statusp = resp->status = puterrno4(error);
3128 3146                  goto out;
3129 3147          }
3130 3148  
3131 3149          ASSERT(avp->v_flag & V_XATTRDIR);
3132 3150  
3133 3151          error = makefh4(&cs->fh, avp, cs->exi);
3134 3152  
3135 3153          if (error) {
3136 3154                  VN_RELE(avp);
3137 3155                  *cs->statusp = resp->status = puterrno4(error);
3138 3156                  goto out;
3139 3157          }
3140 3158  
3141 3159          VN_RELE(cs->vp);
3142 3160          cs->vp = avp;
3143 3161  
3144 3162          /*
3145 3163           * There is no requirement for an attrdir fh flag
3146 3164           * because the attrdir has a vnode flag to distinguish
3147 3165           * it from regular (non-xattr) directories.  The
3148 3166           * FH4_ATTRDIR flag is set for future sanity checks.
3149 3167           */
3150 3168          set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3151 3169          *cs->statusp = resp->status = NFS4_OK;
3152 3170  
3153 3171  out:
3154 3172          DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3155 3173              OPENATTR4res *, resp);
3156 3174  }
3157 3175  
3158 3176  static int
3159 3177  do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3160 3178      caller_context_t *ct)
3161 3179  {
3162 3180          int error;
3163 3181          int i;
3164 3182          clock_t delaytime;
3165 3183  
3166 3184          delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3167 3185  
3168 3186          /*
3169 3187           * Don't block on mandatory locks. If this routine returns
3170 3188           * EAGAIN, the caller should return NFS4ERR_LOCKED.
3171 3189           */
3172 3190          uio->uio_fmode = FNONBLOCK;
3173 3191  
3174 3192          for (i = 0; i < rfs4_maxlock_tries; i++) {
3175 3193  
3176 3194  
3177 3195                  if (direction == FREAD) {
3178 3196                          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3179 3197                          error = VOP_READ(vp, uio, ioflag, cred, ct);
3180 3198                          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3181 3199                  } else {
3182 3200                          (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3183 3201                          error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3184 3202                          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3185 3203                  }
3186 3204  
3187 3205                  if (error != EAGAIN)
3188 3206                          break;
3189 3207  
3190 3208                  if (i < rfs4_maxlock_tries - 1) {
3191 3209                          delay(delaytime);
3192 3210                          delaytime *= 2;
3193 3211                  }
3194 3212          }
3195 3213  
3196 3214          return (error);
3197 3215  }
3198 3216  
3199 3217  /* ARGSUSED */
3200 3218  static void
3201 3219  rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3202 3220      struct compound_state *cs)
3203 3221  {
3204 3222          READ4args *args = &argop->nfs_argop4_u.opread;
3205 3223          READ4res *resp = &resop->nfs_resop4_u.opread;
3206 3224          int error;
3207 3225          int verror;
3208 3226          vnode_t *vp;
3209 3227          struct vattr va;
3210 3228          struct iovec iov, *iovp = NULL;
3211 3229          int iovcnt;
3212 3230          struct uio uio;
3213 3231          u_offset_t offset;
3214 3232          bool_t *deleg = &cs->deleg;
3215 3233          nfsstat4 stat;
3216 3234          int in_crit = 0;
3217 3235          mblk_t *mp = NULL;
3218 3236          int alloc_err = 0;
3219 3237          int rdma_used = 0;
3220 3238          int loaned_buffers;
3221 3239          caller_context_t ct;
3222 3240          struct uio *uiop;
3223 3241  
3224 3242          DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3225 3243              READ4args, args);
3226 3244  
3227 3245          vp = cs->vp;
3228 3246          if (vp == NULL) {
3229 3247                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3230 3248                  goto out;
3231 3249          }
3232 3250          if (cs->access == CS_ACCESS_DENIED) {
3233 3251                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3234 3252                  goto out;
3235 3253          }
3236 3254  
3237 3255          if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3238 3256              deleg, TRUE, &ct)) != NFS4_OK) {
3239 3257                  *cs->statusp = resp->status = stat;
3240 3258                  goto out;
3241 3259          }
3242 3260  
3243 3261          /*
3244 3262           * Enter the critical region before calling VOP_RWLOCK
3245 3263           * to avoid a deadlock with write requests.
3246 3264           */
3247 3265          if (nbl_need_check(vp)) {
3248 3266                  nbl_start_crit(vp, RW_READER);
3249 3267                  in_crit = 1;
3250 3268                  if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3251 3269                      &ct)) {
3252 3270                          *cs->statusp = resp->status = NFS4ERR_LOCKED;
3253 3271                          goto out;
3254 3272                  }
3255 3273          }
3256 3274  
3257 3275          if (args->wlist) {
3258 3276                  if (args->count > clist_len(args->wlist)) {
3259 3277                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3260 3278                          goto out;
3261 3279                  }
3262 3280                  rdma_used = 1;
3263 3281          }
3264 3282  
3265 3283          /* use loaned buffers for TCP */
3266 3284          loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3267 3285  
3268 3286          va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3269 3287          verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3270 3288  
3271 3289          /*
3272 3290           * If we can't get the attributes, then we can't do the
3273 3291           * right access checking.  So, we'll fail the request.
3274 3292           */
3275 3293          if (verror) {
3276 3294                  *cs->statusp = resp->status = puterrno4(verror);
3277 3295                  goto out;
3278 3296          }
3279 3297  
3280 3298          if (vp->v_type != VREG) {
3281 3299                  *cs->statusp = resp->status =
3282 3300                      ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3283 3301                  goto out;
3284 3302          }
3285 3303  
3286 3304          if (crgetuid(cs->cr) != va.va_uid &&
3287 3305              (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3288 3306              (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3289 3307                  *cs->statusp = resp->status = puterrno4(error);
3290 3308                  goto out;
3291 3309          }
3292 3310  
3293 3311          if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3294 3312                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3295 3313                  goto out;
3296 3314          }
3297 3315  
3298 3316          offset = args->offset;
3299 3317          if (offset >= va.va_size) {
3300 3318                  *cs->statusp = resp->status = NFS4_OK;
3301 3319                  resp->eof = TRUE;
3302 3320                  resp->data_len = 0;
3303 3321                  resp->data_val = NULL;
3304 3322                  resp->mblk = NULL;
3305 3323                  /* RDMA */
3306 3324                  resp->wlist = args->wlist;
3307 3325                  resp->wlist_len = resp->data_len;
3308 3326                  *cs->statusp = resp->status = NFS4_OK;
3309 3327                  if (resp->wlist)
3310 3328                          clist_zero_len(resp->wlist);
3311 3329                  goto out;
3312 3330          }
3313 3331  
3314 3332          if (args->count == 0) {
3315 3333                  *cs->statusp = resp->status = NFS4_OK;
3316 3334                  resp->eof = FALSE;
3317 3335                  resp->data_len = 0;
3318 3336                  resp->data_val = NULL;
3319 3337                  resp->mblk = NULL;
3320 3338                  /* RDMA */
3321 3339                  resp->wlist = args->wlist;
3322 3340                  resp->wlist_len = resp->data_len;
3323 3341                  if (resp->wlist)
3324 3342                          clist_zero_len(resp->wlist);
3325 3343                  goto out;
3326 3344          }
3327 3345  
3328 3346          /*
3329 3347           * Do not allocate memory more than maximum allowed
3330 3348           * transfer size
3331 3349           */
3332 3350          if (args->count > rfs4_tsize(req))
3333 3351                  args->count = rfs4_tsize(req);
3334 3352  
3335 3353          if (loaned_buffers) {
3336 3354                  uiop = (uio_t *)rfs_setup_xuio(vp);
3337 3355                  ASSERT(uiop != NULL);
3338 3356                  uiop->uio_segflg = UIO_SYSSPACE;
3339 3357                  uiop->uio_loffset = args->offset;
3340 3358                  uiop->uio_resid = args->count;
3341 3359  
3342 3360                  /* Jump to do the read if successful */
3343 3361                  if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3344 3362                          /*
3345 3363                           * Need to hold the vnode until after VOP_RETZCBUF()
3346 3364                           * is called.
3347 3365                           */
3348 3366                          VN_HOLD(vp);
3349 3367                          goto doio_read;
3350 3368                  }
3351 3369  
3352 3370                  DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3353 3371                      uiop->uio_loffset, int, uiop->uio_resid);
3354 3372  
3355 3373                  uiop->uio_extflg = 0;
3356 3374  
3357 3375                  /* failure to setup for zero copy */
3358 3376                  rfs_free_xuio((void *)uiop);
3359 3377                  loaned_buffers = 0;
3360 3378          }
3361 3379  
3362 3380          /*
3363 3381           * If returning data via RDMA Write, then grab the chunk list. If we
3364 3382           * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3365 3383           */
3366 3384          if (rdma_used) {
3367 3385                  mp = NULL;
3368 3386                  (void) rdma_get_wchunk(req, &iov, args->wlist);
3369 3387                  uio.uio_iov = &iov;
3370 3388                  uio.uio_iovcnt = 1;
3371 3389          } else {
3372 3390                  /*
3373 3391                   * mp will contain the data to be sent out in the read reply.
3374 3392                   * It will be freed after the reply has been sent.
3375 3393                   */
3376 3394                  mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3377 3395                  ASSERT(mp != NULL);
3378 3396                  ASSERT(alloc_err == 0);
3379 3397                  uio.uio_iov = iovp;
3380 3398                  uio.uio_iovcnt = iovcnt;
3381 3399          }
3382 3400  
3383 3401          uio.uio_segflg = UIO_SYSSPACE;
3384 3402          uio.uio_extflg = UIO_COPY_CACHED;
3385 3403          uio.uio_loffset = args->offset;
3386 3404          uio.uio_resid = args->count;
3387 3405          uiop = &uio;
3388 3406  
3389 3407  doio_read:
3390 3408          error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3391 3409  
3392 3410          va.va_mask = AT_SIZE;
3393 3411          verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3394 3412  
3395 3413          if (error) {
3396 3414                  if (mp)
3397 3415                          freemsg(mp);
3398 3416                  *cs->statusp = resp->status = puterrno4(error);
3399 3417                  goto out;
3400 3418          }
3401 3419  
3402 3420          /* make mblk using zc buffers */
3403 3421          if (loaned_buffers) {
3404 3422                  mp = uio_to_mblk(uiop);
3405 3423                  ASSERT(mp != NULL);
3406 3424          }
3407 3425  
3408 3426          *cs->statusp = resp->status = NFS4_OK;
3409 3427  
3410 3428          ASSERT(uiop->uio_resid >= 0);
3411 3429          resp->data_len = args->count - uiop->uio_resid;
3412 3430          if (mp) {
3413 3431                  resp->data_val = (char *)mp->b_datap->db_base;
3414 3432                  rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3415 3433          } else {
3416 3434                  resp->data_val = (caddr_t)iov.iov_base;
3417 3435          }
3418 3436  
3419 3437          resp->mblk = mp;
3420 3438  
3421 3439          if (!verror && offset + resp->data_len == va.va_size)
3422 3440                  resp->eof = TRUE;
3423 3441          else
3424 3442                  resp->eof = FALSE;
3425 3443  
3426 3444          if (rdma_used) {
3427 3445                  if (!rdma_setup_read_data4(args, resp)) {
3428 3446                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3429 3447                  }
3430 3448          } else {
3431 3449                  resp->wlist = NULL;
3432 3450          }
3433 3451  
3434 3452  out:
3435 3453          if (in_crit)
3436 3454                  nbl_end_crit(vp);
3437 3455  
3438 3456          if (iovp != NULL)
3439 3457                  kmem_free(iovp, iovcnt * sizeof (struct iovec));
3440 3458  
3441 3459          DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3442 3460              READ4res *, resp);
3443 3461  }
3444 3462  
3445 3463  static void
3446 3464  rfs4_op_read_free(nfs_resop4 *resop)
3447 3465  {
3448 3466          READ4res        *resp = &resop->nfs_resop4_u.opread;
3449 3467  
3450 3468          if (resp->status == NFS4_OK && resp->mblk != NULL) {
3451 3469                  freemsg(resp->mblk);
3452 3470                  resp->mblk = NULL;
3453 3471                  resp->data_val = NULL;
3454 3472                  resp->data_len = 0;
3455 3473          }
3456 3474  }
3457 3475  
3458 3476  static void
3459 3477  rfs4_op_readdir_free(nfs_resop4 * resop)
3460 3478  {
3461 3479          READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3462 3480  
3463 3481          if (resp->status == NFS4_OK && resp->mblk != NULL) {
3464 3482                  freeb(resp->mblk);
3465 3483                  resp->mblk = NULL;
3466 3484                  resp->data_len = 0;
3467 3485          }
3468 3486  }
3469 3487  
3470 3488  
3471 3489  /* ARGSUSED */
3472 3490  static void
3473 3491  rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3474 3492      struct compound_state *cs)
3475 3493  {
3476 3494          PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3477 3495          int             error;
3478 3496          vnode_t         *vp;
3479 3497          struct exportinfo *exi, *sav_exi;
3480 3498          nfs_fh4_fmt_t   *fh_fmtp;
3481 3499          nfs_export_t *ne = nfs_get_export();
3482 3500  
3483 3501          DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3484 3502  
3485 3503          if (cs->vp) {
3486 3504                  VN_RELE(cs->vp);
3487 3505                  cs->vp = NULL;
3488 3506          }
3489 3507  
3490 3508          if (cs->cr)
3491 3509                  crfree(cs->cr);
3492 3510  
3493 3511          cs->cr = crdup(cs->basecr);
3494 3512  
3495 3513          vp = ne->exi_public->exi_vp;
3496 3514          if (vp == NULL) {
3497 3515                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3498 3516                  goto out;
3499 3517          }
3500 3518  
3501 3519          error = makefh4(&cs->fh, vp, ne->exi_public);
3502 3520          if (error != 0) {
3503 3521                  *cs->statusp = resp->status = puterrno4(error);
3504 3522                  goto out;
3505 3523          }
3506 3524          sav_exi = cs->exi;
3507 3525          if (ne->exi_public == ne->exi_root) {
3508 3526                  /*
3509 3527                   * No filesystem is actually shared public, so we default
3510 3528                   * to exi_root. In this case, we must check whether root
3511 3529                   * is exported.
3512 3530                   */
3513 3531                  fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3514 3532  
3515 3533                  /*
3516 3534                   * if root filesystem is exported, the exportinfo struct that we
3517 3535                   * should use is what checkexport4 returns, because root_exi is
3518 3536                   * actually a mostly empty struct.
3519 3537                   */
3520 3538                  exi = checkexport4(&fh_fmtp->fh4_fsid,
3521 3539                      (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3522 3540                  cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3523 3541          } else {
3524 3542                  /*
3525 3543                   * it's a properly shared filesystem
3526 3544                   */
3527 3545                  cs->exi = ne->exi_public;
3528 3546          }
3529 3547  
3530 3548          if (is_system_labeled()) {
3531 3549                  bslabel_t *clabel;
3532 3550  
3533 3551                  ASSERT(req->rq_label != NULL);
3534 3552                  clabel = req->rq_label;
3535 3553                  DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3536 3554                      "got client label from request(1)",
3537 3555                      struct svc_req *, req);
3538 3556                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
3539 3557                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3540 3558                              cs->exi)) {
3541 3559                                  *cs->statusp = resp->status =
3542 3560                                      NFS4ERR_SERVERFAULT;
3543 3561                                  goto out;
3544 3562                          }
3545 3563                  }
3546 3564          }
3547 3565  
3548 3566          VN_HOLD(vp);
3549 3567          cs->vp = vp;
3550 3568  
3551 3569          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3552 3570                  VN_RELE(cs->vp);
3553 3571                  cs->vp = NULL;
3554 3572                  cs->exi = sav_exi;
3555 3573                  goto out;
3556 3574          }
3557 3575  
3558 3576          *cs->statusp = resp->status = NFS4_OK;
3559 3577  out:
3560 3578          DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3561 3579              PUTPUBFH4res *, resp);
3562 3580  }
3563 3581  
3564 3582  /*
3565 3583   * XXX - issue with put*fh operations. Suppose /export/home is exported.
3566 3584   * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3567 3585   * or joe have restrictive search permissions, then we shouldn't let
3568 3586   * the client get a file handle. This is easy to enforce. However, we
3569 3587   * don't know what security flavor should be used until we resolve the
3570 3588   * path name. Another complication is uid mapping. If root is
3571 3589   * the user, then it will be mapped to the anonymous user by default,
3572 3590   * but we won't know that till we've resolved the path name. And we won't
3573 3591   * know what the anonymous user is.
3574 3592   * Luckily, SECINFO is specified to take a full filename.
3575 3593   * So what we will have to in rfs4_op_lookup is check that flavor of
3576 3594   * the target object matches that of the request, and if root was the
3577 3595   * caller, check for the root= and anon= options, and if necessary,
3578 3596   * repeat the lookup using the right cred_t. But that's not done yet.
3579 3597   */
3580 3598  /* ARGSUSED */
3581 3599  static void
3582 3600  rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3583 3601      struct compound_state *cs)
3584 3602  {
3585 3603          PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3586 3604          PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3587 3605          nfs_fh4_fmt_t *fh_fmtp;
3588 3606  
3589 3607          DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3590 3608              PUTFH4args *, args);
3591 3609  
3592 3610          if (cs->vp) {
3593 3611                  VN_RELE(cs->vp);
3594 3612                  cs->vp = NULL;
3595 3613          }
3596 3614  
3597 3615          if (cs->cr) {
3598 3616                  crfree(cs->cr);
3599 3617                  cs->cr = NULL;
3600 3618          }
3601 3619  
3602 3620  
3603 3621          if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3604 3622                  *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3605 3623                  goto out;
3606 3624          }
3607 3625  
3608 3626          fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3609 3627          cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3610 3628              NULL);
3611 3629  
3612 3630          if (cs->exi == NULL) {
3613 3631                  *cs->statusp = resp->status = NFS4ERR_STALE;
3614 3632                  goto out;
3615 3633          }
3616 3634  
3617 3635          cs->cr = crdup(cs->basecr);
3618 3636  
3619 3637          ASSERT(cs->cr != NULL);
3620 3638  
3621 3639          if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3622 3640                  *cs->statusp = resp->status;
3623 3641                  goto out;
3624 3642          }
3625 3643  
3626 3644          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3627 3645                  VN_RELE(cs->vp);
3628 3646                  cs->vp = NULL;
3629 3647                  goto out;
3630 3648          }
3631 3649  
3632 3650          nfs_fh4_copy(&args->object, &cs->fh);
3633 3651          *cs->statusp = resp->status = NFS4_OK;
3634 3652          cs->deleg = FALSE;
3635 3653  
3636 3654  out:
3637 3655          DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3638 3656              PUTFH4res *, resp);
3639 3657  }
3640 3658  
3641 3659  /* ARGSUSED */
3642 3660  static void
3643 3661  rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3644 3662      struct compound_state *cs)
3645 3663  {
3646 3664          PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3647 3665          int error;
3648 3666          fid_t fid;
3649 3667          struct exportinfo *exi, *sav_exi;
3650 3668  
3651 3669          DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3652 3670  
3653 3671          if (cs->vp) {
3654 3672                  VN_RELE(cs->vp);
3655 3673                  cs->vp = NULL;
3656 3674          }
3657 3675  
3658 3676          if (cs->cr)
3659 3677                  crfree(cs->cr);
3660 3678  
3661 3679          cs->cr = crdup(cs->basecr);
3662 3680  
3663 3681          /*
3664 3682           * Using rootdir, the system root vnode,
3665 3683           * get its fid.
3666 3684           */
3667 3685          bzero(&fid, sizeof (fid));
3668 3686          fid.fid_len = MAXFIDSZ;
3669 3687          error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3670 3688          if (error != 0) {
3671 3689                  *cs->statusp = resp->status = puterrno4(error);
3672 3690                  goto out;
3673 3691          }
3674 3692  
3675 3693          /*
3676 3694           * Then use the root fsid & fid it to find out if it's exported
3677 3695           *
3678 3696           * If the server root isn't exported directly, then
3679 3697           * it should at least be a pseudo export based on
3680 3698           * one or more exports further down in the server's
3681 3699           * file tree.
3682 3700           */
3683 3701          exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3684 3702          if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3685 3703                  NFS4_DEBUG(rfs4_debug,
3686 3704                      (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3687 3705                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3688 3706                  goto out;
3689 3707          }
3690 3708  
3691 3709          /*
3692 3710           * Now make a filehandle based on the root
3693 3711           * export and root vnode.
3694 3712           */
3695 3713          error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3696 3714          if (error != 0) {
3697 3715                  *cs->statusp = resp->status = puterrno4(error);
3698 3716                  goto out;
3699 3717          }
3700 3718  
3701 3719          sav_exi = cs->exi;
3702 3720          cs->exi = exi;
3703 3721  
3704 3722          VN_HOLD(ZONE_ROOTVP());
3705 3723          cs->vp = ZONE_ROOTVP();
3706 3724  
3707 3725          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3708 3726                  VN_RELE(cs->vp);
3709 3727                  cs->vp = NULL;
3710 3728                  cs->exi = sav_exi;
3711 3729                  goto out;
3712 3730          }
3713 3731  
3714 3732          *cs->statusp = resp->status = NFS4_OK;
3715 3733          cs->deleg = FALSE;
3716 3734  out:
3717 3735          DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3718 3736              PUTROOTFH4res *, resp);
3719 3737  }
3720 3738  
3721 3739  /*
3722 3740   * readlink: args: CURRENT_FH.
3723 3741   *      res: status. If success - CURRENT_FH unchanged, return linktext.
3724 3742   */
3725 3743  
3726 3744  /* ARGSUSED */
3727 3745  static void
3728 3746  rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3729 3747      struct compound_state *cs)
3730 3748  {
3731 3749          READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3732 3750          int error;
3733 3751          vnode_t *vp;
3734 3752          struct iovec iov;
3735 3753          struct vattr va;
3736 3754          struct uio uio;
3737 3755          char *data;
3738 3756          struct sockaddr *ca;
3739 3757          char *name = NULL;
3740 3758          int is_referral;
3741 3759  
3742 3760          DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3743 3761  
3744 3762          /* CURRENT_FH: directory */
3745 3763          vp = cs->vp;
3746 3764          if (vp == NULL) {
3747 3765                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3748 3766                  goto out;
3749 3767          }
3750 3768  
3751 3769          if (cs->access == CS_ACCESS_DENIED) {
3752 3770                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3753 3771                  goto out;
3754 3772          }
3755 3773  
3756 3774          /* Is it a referral? */
3757 3775          if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3758 3776  
3759 3777                  is_referral = 1;
3760 3778  
3761 3779          } else {
3762 3780  
3763 3781                  is_referral = 0;
3764 3782  
3765 3783                  if (vp->v_type == VDIR) {
3766 3784                          *cs->statusp = resp->status = NFS4ERR_ISDIR;
3767 3785                          goto out;
3768 3786                  }
3769 3787  
3770 3788                  if (vp->v_type != VLNK) {
3771 3789                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3772 3790                          goto out;
3773 3791                  }
3774 3792  
3775 3793          }
3776 3794  
3777 3795          va.va_mask = AT_MODE;
3778 3796          error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3779 3797          if (error) {
3780 3798                  *cs->statusp = resp->status = puterrno4(error);
3781 3799                  goto out;
3782 3800          }
3783 3801  
3784 3802          if (MANDLOCK(vp, va.va_mode)) {
3785 3803                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3786 3804                  goto out;
3787 3805          }
3788 3806  
3789 3807          data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3790 3808  
3791 3809          if (is_referral) {
3792 3810                  char *s;
3793 3811                  size_t strsz;
3794 3812  
3795 3813                  /* Get an artificial symlink based on a referral */
3796 3814                  s = build_symlink(vp, cs->cr, &strsz);
3797 3815                  global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3798 3816                  DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3799 3817                      vnode_t *, vp, char *, s);
3800 3818                  if (s == NULL)
3801 3819                          error = EINVAL;
3802 3820                  else {
3803 3821                          error = 0;
3804 3822                          (void) strlcpy(data, s, MAXPATHLEN + 1);
3805 3823                          kmem_free(s, strsz);
3806 3824                  }
3807 3825  
3808 3826          } else {
3809 3827  
3810 3828                  iov.iov_base = data;
3811 3829                  iov.iov_len = MAXPATHLEN;
3812 3830                  uio.uio_iov = &iov;
3813 3831                  uio.uio_iovcnt = 1;
3814 3832                  uio.uio_segflg = UIO_SYSSPACE;
3815 3833                  uio.uio_extflg = UIO_COPY_CACHED;
3816 3834                  uio.uio_loffset = 0;
3817 3835                  uio.uio_resid = MAXPATHLEN;
3818 3836  
3819 3837                  error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3820 3838  
3821 3839                  if (!error)
3822 3840                          *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3823 3841          }
3824 3842  
3825 3843          if (error) {
3826 3844                  kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3827 3845                  *cs->statusp = resp->status = puterrno4(error);
3828 3846                  goto out;
3829 3847          }
3830 3848  
3831 3849          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3832 3850          name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3833 3851              MAXPATHLEN  + 1);
3834 3852  
3835 3853          if (name == NULL) {
3836 3854                  /*
3837 3855                   * Even though the conversion failed, we return
3838 3856                   * something. We just don't translate it.
3839 3857                   */
3840 3858                  name = data;
3841 3859          }
3842 3860  
3843 3861          /*
3844 3862           * treat link name as data
3845 3863           */
3846 3864          (void) str_to_utf8(name, (utf8string *)&resp->link);
3847 3865  
3848 3866          if (name != data)
3849 3867                  kmem_free(name, MAXPATHLEN + 1);
3850 3868          kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3851 3869          *cs->statusp = resp->status = NFS4_OK;
3852 3870  
3853 3871  out:
3854 3872          DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3855 3873              READLINK4res *, resp);
3856 3874  }
3857 3875  
3858 3876  static void
3859 3877  rfs4_op_readlink_free(nfs_resop4 *resop)
3860 3878  {
3861 3879          READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3862 3880          utf8string *symlink = (utf8string *)&resp->link;
3863 3881  
3864 3882          if (symlink->utf8string_val) {
3865 3883                  UTF8STRING_FREE(*symlink)
3866 3884          }
3867 3885  }
3868 3886  
3869 3887  /*
3870 3888   * release_lockowner:
3871 3889   *      Release any state associated with the supplied
3872 3890   *      lockowner. Note if any lo_state is holding locks we will not
3873 3891   *      rele that lo_state and thus the lockowner will not be destroyed.
3874 3892   *      A client using lock after the lock owner stateid has been released
3875 3893   *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3876 3894   *      to reissue the lock with new_lock_owner set to TRUE.
3877 3895   *      args: lock_owner
3878 3896   *      res:  status
3879 3897   */
3880 3898  /* ARGSUSED */
3881 3899  static void
3882 3900  rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3883 3901      struct svc_req *req, struct compound_state *cs)
3884 3902  {
3885 3903          RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3886 3904          RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3887 3905          rfs4_lockowner_t *lo;
3888 3906          rfs4_openowner_t *oo;
3889 3907          rfs4_state_t *sp;
3890 3908          rfs4_lo_state_t *lsp;
3891 3909          rfs4_client_t *cp;
3892 3910          bool_t create = FALSE;
3893 3911          locklist_t *llist;
3894 3912          sysid_t sysid;
3895 3913  
3896 3914          DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3897 3915              cs, RELEASE_LOCKOWNER4args *, ap);
3898 3916  
3899 3917          /* Make sure there is a clientid around for this request */
3900 3918          cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3901 3919  
3902 3920          if (cp == NULL) {
3903 3921                  *cs->statusp = resp->status =
3904 3922                      rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3905 3923                  goto out;
3906 3924          }
3907 3925          rfs4_client_rele(cp);
3908 3926  
3909 3927          lo = rfs4_findlockowner(&ap->lock_owner, &create);
3910 3928          if (lo == NULL) {
3911 3929                  *cs->statusp = resp->status = NFS4_OK;
3912 3930                  goto out;
3913 3931          }
3914 3932          ASSERT(lo->rl_client != NULL);
3915 3933  
3916 3934          /*
3917 3935           * Check for EXPIRED client. If so will reap state with in a lease
3918 3936           * period or on next set_clientid_confirm step
3919 3937           */
3920 3938          if (rfs4_lease_expired(lo->rl_client)) {
3921 3939                  rfs4_lockowner_rele(lo);
3922 3940                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3923 3941                  goto out;
3924 3942          }
3925 3943  
3926 3944          /*
3927 3945           * If no sysid has been assigned, then no locks exist; just return.
3928 3946           */
3929 3947          rfs4_dbe_lock(lo->rl_client->rc_dbe);
3930 3948          if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3931 3949                  rfs4_lockowner_rele(lo);
3932 3950                  rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3933 3951                  goto out;
3934 3952          }
3935 3953  
3936 3954          sysid = lo->rl_client->rc_sysidt;
3937 3955          rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3938 3956  
3939 3957          /*
3940 3958           * Mark the lockowner invalid.
3941 3959           */
3942 3960          rfs4_dbe_hide(lo->rl_dbe);
3943 3961  
3944 3962          /*
3945 3963           * sysid-pid pair should now not be used since the lockowner is
3946 3964           * invalid. If the client were to instantiate the lockowner again
3947 3965           * it would be assigned a new pid. Thus we can get the list of
3948 3966           * current locks.
3949 3967           */
3950 3968  
3951 3969          llist = flk_get_active_locks(sysid, lo->rl_pid);
3952 3970          /* If we are still holding locks fail */
3953 3971          if (llist != NULL) {
3954 3972  
3955 3973                  *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3956 3974  
3957 3975                  flk_free_locklist(llist);
3958 3976                  /*
3959 3977                   * We need to unhide the lockowner so the client can
3960 3978                   * try it again. The bad thing here is if the client
3961 3979                   * has a logic error that took it here in the first place
3962 3980                   * they probably have lost accounting of the locks that it
3963 3981                   * is holding. So we may have dangling state until the
3964 3982                   * open owner state is reaped via close. One scenario
3965 3983                   * that could possibly occur is that the client has
3966 3984                   * sent the unlock request(s) in separate threads
3967 3985                   * and has not waited for the replies before sending the
3968 3986                   * RELEASE_LOCKOWNER request. Presumably, it would expect
3969 3987                   * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3970 3988                   * reissuing the request.
3971 3989                   */
3972 3990                  rfs4_dbe_unhide(lo->rl_dbe);
3973 3991                  rfs4_lockowner_rele(lo);
3974 3992                  goto out;
3975 3993          }
3976 3994  
3977 3995          /*
3978 3996           * For the corresponding client we need to check each open
3979 3997           * owner for any opens that have lockowner state associated
3980 3998           * with this lockowner.
3981 3999           */
3982 4000  
3983 4001          rfs4_dbe_lock(lo->rl_client->rc_dbe);
3984 4002          for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3985 4003              oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3986 4004  
3987 4005                  rfs4_dbe_lock(oo->ro_dbe);
3988 4006                  for (sp = list_head(&oo->ro_statelist); sp != NULL;
3989 4007                      sp = list_next(&oo->ro_statelist, sp)) {
3990 4008  
3991 4009                          rfs4_dbe_lock(sp->rs_dbe);
3992 4010                          for (lsp = list_head(&sp->rs_lostatelist);
3993 4011                              lsp != NULL;
3994 4012                              lsp = list_next(&sp->rs_lostatelist, lsp)) {
3995 4013                                  if (lsp->rls_locker == lo) {
3996 4014                                          rfs4_dbe_lock(lsp->rls_dbe);
3997 4015                                          rfs4_dbe_invalidate(lsp->rls_dbe);
3998 4016                                          rfs4_dbe_unlock(lsp->rls_dbe);
3999 4017                                  }
4000 4018                          }
4001 4019                          rfs4_dbe_unlock(sp->rs_dbe);
4002 4020                  }
4003 4021                  rfs4_dbe_unlock(oo->ro_dbe);
4004 4022          }
4005 4023          rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4006 4024  
4007 4025          rfs4_lockowner_rele(lo);
4008 4026  
4009 4027          *cs->statusp = resp->status = NFS4_OK;
4010 4028  
4011 4029  out:
4012 4030          DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4013 4031              cs, RELEASE_LOCKOWNER4res *, resp);
4014 4032  }
4015 4033  
4016 4034  /*
4017 4035   * short utility function to lookup a file and recall the delegation
4018 4036   */
4019 4037  static rfs4_file_t *
4020 4038  rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4021 4039      int *lkup_error, cred_t *cr)
4022 4040  {
4023 4041          vnode_t *vp;
4024 4042          rfs4_file_t *fp = NULL;
4025 4043          bool_t fcreate = FALSE;
4026 4044          int error;
4027 4045  
4028 4046          if (vpp)
4029 4047                  *vpp = NULL;
4030 4048  
4031 4049          if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4032 4050              NULL)) == 0) {
4033 4051                  if (vp->v_type == VREG)
4034 4052                          fp = rfs4_findfile(vp, NULL, &fcreate);
4035 4053                  if (vpp)
4036 4054                          *vpp = vp;
4037 4055                  else
4038 4056                          VN_RELE(vp);
4039 4057          }
4040 4058  
4041 4059          if (lkup_error)
4042 4060                  *lkup_error = error;
4043 4061  
4044 4062          return (fp);
4045 4063  }
4046 4064  
4047 4065  /*
4048 4066   * remove: args: CURRENT_FH: directory; name.
4049 4067   *      res: status. If success - CURRENT_FH unchanged, return change_info
4050 4068   *              for directory.
4051 4069   */
4052 4070  /* ARGSUSED */
4053 4071  static void
4054 4072  rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4055 4073      struct compound_state *cs)
4056 4074  {
4057 4075          REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4058 4076          REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4059 4077          int error;
4060 4078          vnode_t *dvp, *vp;
4061 4079          struct vattr bdva, idva, adva;
4062 4080          char *nm;
4063 4081          uint_t len;
4064 4082          rfs4_file_t *fp;
4065 4083          int in_crit = 0;
4066 4084          bslabel_t *clabel;
4067 4085          struct sockaddr *ca;
4068 4086          char *name = NULL;
4069 4087          nfsstat4 status;
4070 4088  
4071 4089          DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4072 4090              REMOVE4args *, args);
4073 4091  
4074 4092          /* CURRENT_FH: directory */
4075 4093          dvp = cs->vp;
4076 4094          if (dvp == NULL) {
4077 4095                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4078 4096                  goto out;
4079 4097          }
4080 4098  
4081 4099          if (cs->access == CS_ACCESS_DENIED) {
4082 4100                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4083 4101                  goto out;
4084 4102          }
4085 4103  
4086 4104          /*
4087 4105           * If there is an unshared filesystem mounted on this vnode,
4088 4106           * Do not allow to remove anything in this directory.
4089 4107           */
4090 4108          if (vn_ismntpt(dvp)) {
4091 4109                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4092 4110                  goto out;
4093 4111          }
4094 4112  
4095 4113          if (dvp->v_type != VDIR) {
4096 4114                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4097 4115                  goto out;
4098 4116          }
4099 4117  
4100 4118          status = utf8_dir_verify(&args->target);
4101 4119          if (status != NFS4_OK) {
4102 4120                  *cs->statusp = resp->status = status;
4103 4121                  goto out;
4104 4122          }
4105 4123  
4106 4124          /*
4107 4125           * Lookup the file so that we can check if it's a directory
4108 4126           */
4109 4127          nm = utf8_to_fn(&args->target, &len, NULL);
4110 4128          if (nm == NULL) {
4111 4129                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4112 4130                  goto out;
4113 4131          }
4114 4132  
4115 4133          if (len > MAXNAMELEN) {
4116 4134                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4117 4135                  kmem_free(nm, len);
4118 4136                  goto out;
4119 4137          }
4120 4138  
4121 4139          if (rdonly4(req, cs)) {
4122 4140                  *cs->statusp = resp->status = NFS4ERR_ROFS;
4123 4141                  kmem_free(nm, len);
4124 4142                  goto out;
4125 4143          }
4126 4144  
4127 4145          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4128 4146          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4129 4147              MAXPATHLEN  + 1);
4130 4148  
4131 4149          if (name == NULL) {
4132 4150                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4133 4151                  kmem_free(nm, len);
4134 4152                  goto out;
4135 4153          }
4136 4154  
4137 4155          /*
4138 4156           * Lookup the file to determine type and while we are see if
4139 4157           * there is a file struct around and check for delegation.
4140 4158           * We don't need to acquire va_seq before this lookup, if
4141 4159           * it causes an update, cinfo.before will not match, which will
4142 4160           * trigger a cache flush even if atomic is TRUE.
4143 4161           */
4144 4162          if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4145 4163                  if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4146 4164                      NULL)) {
4147 4165                          VN_RELE(vp);
4148 4166                          rfs4_file_rele(fp);
4149 4167                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4150 4168                          if (nm != name)
4151 4169                                  kmem_free(name, MAXPATHLEN + 1);
4152 4170                          kmem_free(nm, len);
4153 4171                          goto out;
4154 4172                  }
4155 4173          }
4156 4174  
4157 4175          /* Didn't find anything to remove */
4158 4176          if (vp == NULL) {
4159 4177                  *cs->statusp = resp->status = error;
4160 4178                  if (nm != name)
4161 4179                          kmem_free(name, MAXPATHLEN + 1);
4162 4180                  kmem_free(nm, len);
4163 4181                  goto out;
4164 4182          }
4165 4183  
4166 4184          if (nbl_need_check(vp)) {
4167 4185                  nbl_start_crit(vp, RW_READER);
4168 4186                  in_crit = 1;
4169 4187                  if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4170 4188                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4171 4189                          if (nm != name)
4172 4190                                  kmem_free(name, MAXPATHLEN + 1);
4173 4191                          kmem_free(nm, len);
4174 4192                          nbl_end_crit(vp);
4175 4193                          VN_RELE(vp);
4176 4194                          if (fp) {
4177 4195                                  rfs4_clear_dont_grant(fp);
4178 4196                                  rfs4_file_rele(fp);
4179 4197                          }
4180 4198                          goto out;
4181 4199                  }
4182 4200          }
4183 4201  
4184 4202          /* check label before allowing removal */
4185 4203          if (is_system_labeled()) {
4186 4204                  ASSERT(req->rq_label != NULL);
4187 4205                  clabel = req->rq_label;
4188 4206                  DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4189 4207                      "got client label from request(1)",
4190 4208                      struct svc_req *, req);
4191 4209                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
4192 4210                          if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4193 4211                              cs->exi)) {
4194 4212                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4195 4213                                  if (name != nm)
4196 4214                                          kmem_free(name, MAXPATHLEN + 1);
4197 4215                                  kmem_free(nm, len);
4198 4216                                  if (in_crit)
4199 4217                                          nbl_end_crit(vp);
4200 4218                                  VN_RELE(vp);
4201 4219                                  if (fp) {
4202 4220                                          rfs4_clear_dont_grant(fp);
4203 4221                                          rfs4_file_rele(fp);
4204 4222                                  }
4205 4223                                  goto out;
4206 4224                          }
4207 4225                  }
4208 4226          }
4209 4227  
4210 4228          /* Get dir "before" change value */
4211 4229          bdva.va_mask = AT_CTIME|AT_SEQ;
4212 4230          error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4213 4231          if (error) {
4214 4232                  *cs->statusp = resp->status = puterrno4(error);
4215 4233                  if (nm != name)
4216 4234                          kmem_free(name, MAXPATHLEN + 1);
4217 4235                  kmem_free(nm, len);
4218 4236                  if (in_crit)
4219 4237                          nbl_end_crit(vp);
4220 4238                  VN_RELE(vp);
4221 4239                  if (fp) {
4222 4240                          rfs4_clear_dont_grant(fp);
4223 4241                          rfs4_file_rele(fp);
4224 4242                  }
4225 4243                  goto out;
4226 4244          }
4227 4245          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4228 4246  
4229 4247          /* Actually do the REMOVE operation */
4230 4248          if (vp->v_type == VDIR) {
4231 4249                  /*
4232 4250                   * Can't remove a directory that has a mounted-on filesystem.
4233 4251                   */
4234 4252                  if (vn_ismntpt(vp)) {
4235 4253                          error = EACCES;
4236 4254                  } else {
4237 4255                          /*
4238 4256                           * System V defines rmdir to return EEXIST,
4239 4257                           * not ENOTEMPTY, if the directory is not
4240 4258                           * empty.  A System V NFS server needs to map
4241 4259                           * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4242 4260                           * transmit over the wire.
4243 4261                           */
4244 4262                          if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4245 4263                              NULL, 0)) == EEXIST)
4246 4264                                  error = ENOTEMPTY;
4247 4265                  }
4248 4266          } else {
4249 4267                  if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4250 4268                      fp != NULL) {
4251 4269                          struct vattr va;
4252 4270                          vnode_t *tvp;
4253 4271  
4254 4272                          rfs4_dbe_lock(fp->rf_dbe);
4255 4273                          tvp = fp->rf_vp;
4256 4274                          if (tvp)
4257 4275                                  VN_HOLD(tvp);
4258 4276                          rfs4_dbe_unlock(fp->rf_dbe);
4259 4277  
4260 4278                          if (tvp) {
4261 4279                                  /*
4262 4280                                   * This is va_seq safe because we are not
4263 4281                                   * manipulating dvp.
4264 4282                                   */
4265 4283                                  va.va_mask = AT_NLINK;
4266 4284                                  if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4267 4285                                      va.va_nlink == 0) {
4268 4286                                          /* Remove state on file remove */
4269 4287                                          if (in_crit) {
4270 4288                                                  nbl_end_crit(vp);
4271 4289                                                  in_crit = 0;
4272 4290                                          }
4273 4291                                          rfs4_close_all_state(fp);
4274 4292                                  }
4275 4293                                  VN_RELE(tvp);
4276 4294                          }
4277 4295                  }
4278 4296          }
4279 4297  
4280 4298          if (in_crit)
4281 4299                  nbl_end_crit(vp);
4282 4300          VN_RELE(vp);
4283 4301  
4284 4302          if (fp) {
4285 4303                  rfs4_clear_dont_grant(fp);
4286 4304                  rfs4_file_rele(fp);
4287 4305          }
4288 4306          if (nm != name)
4289 4307                  kmem_free(name, MAXPATHLEN + 1);
4290 4308          kmem_free(nm, len);
4291 4309  
4292 4310          if (error) {
4293 4311                  *cs->statusp = resp->status = puterrno4(error);
4294 4312                  goto out;
4295 4313          }
4296 4314  
4297 4315          /*
4298 4316           * Get the initial "after" sequence number, if it fails, set to zero
4299 4317           */
4300 4318          idva.va_mask = AT_SEQ;
4301 4319          if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4302 4320                  idva.va_seq = 0;
4303 4321  
4304 4322          /*
4305 4323           * Force modified data and metadata out to stable storage.
4306 4324           */
4307 4325          (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4308 4326  
4309 4327          /*
4310 4328           * Get "after" change value, if it fails, simply return the
4311 4329           * before value.
4312 4330           */
4313 4331          adva.va_mask = AT_CTIME|AT_SEQ;
4314 4332          if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4315 4333                  adva.va_ctime = bdva.va_ctime;
4316 4334                  adva.va_seq = 0;
4317 4335          }
4318 4336  
4319 4337          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4320 4338  
4321 4339          /*
4322 4340           * The cinfo.atomic = TRUE only if we have
4323 4341           * non-zero va_seq's, and it has incremented by exactly one
4324 4342           * during the VOP_REMOVE/RMDIR and it didn't change during
4325 4343           * the VOP_FSYNC.
4326 4344           */
4327 4345          if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4328 4346              idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4329 4347                  resp->cinfo.atomic = TRUE;
4330 4348          else
4331 4349                  resp->cinfo.atomic = FALSE;
4332 4350  
4333 4351          *cs->statusp = resp->status = NFS4_OK;
4334 4352  
4335 4353  out:
4336 4354          DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4337 4355              REMOVE4res *, resp);
4338 4356  }
4339 4357  
4340 4358  /*
4341 4359   * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4342 4360   *              oldname and newname.
4343 4361   *      res: status. If success - CURRENT_FH unchanged, return change_info
4344 4362   *              for both from and target directories.
4345 4363   */
4346 4364  /* ARGSUSED */
4347 4365  static void
4348 4366  rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4349 4367      struct compound_state *cs)
4350 4368  {
4351 4369          RENAME4args *args = &argop->nfs_argop4_u.oprename;
4352 4370          RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4353 4371          int error;
4354 4372          vnode_t *odvp;
4355 4373          vnode_t *ndvp;
4356 4374          vnode_t *srcvp, *targvp, *tvp;
4357 4375          struct vattr obdva, oidva, oadva;
4358 4376          struct vattr nbdva, nidva, nadva;
4359 4377          char *onm, *nnm;
4360 4378          uint_t olen, nlen;
4361 4379          rfs4_file_t *fp, *sfp;
4362 4380          int in_crit_src, in_crit_targ;
4363 4381          int fp_rele_grant_hold, sfp_rele_grant_hold;
4364 4382          int unlinked;
4365 4383          bslabel_t *clabel;
4366 4384          struct sockaddr *ca;
4367 4385          char *converted_onm = NULL;
4368 4386          char *converted_nnm = NULL;
4369 4387          nfsstat4 status;
4370 4388  
4371 4389          DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4372 4390              RENAME4args *, args);
4373 4391  
4374 4392          fp = sfp = NULL;
4375 4393          srcvp = targvp = tvp = NULL;
4376 4394          in_crit_src = in_crit_targ = 0;
4377 4395          fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4378 4396          unlinked = 0;
4379 4397  
4380 4398          /* CURRENT_FH: target directory */
4381 4399          ndvp = cs->vp;
4382 4400          if (ndvp == NULL) {
4383 4401                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4384 4402                  goto out;
4385 4403          }
4386 4404  
4387 4405          /* SAVED_FH: from directory */
4388 4406          odvp = cs->saved_vp;
4389 4407          if (odvp == NULL) {
4390 4408                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4391 4409                  goto out;
4392 4410          }
4393 4411  
4394 4412          if (cs->access == CS_ACCESS_DENIED) {
4395 4413                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4396 4414                  goto out;
4397 4415          }
4398 4416  
4399 4417          /*
4400 4418           * If there is an unshared filesystem mounted on this vnode,
4401 4419           * do not allow to rename objects in this directory.
4402 4420           */
4403 4421          if (vn_ismntpt(odvp)) {
4404 4422                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4405 4423                  goto out;
4406 4424          }
4407 4425  
4408 4426          /*
4409 4427           * If there is an unshared filesystem mounted on this vnode,
4410 4428           * do not allow to rename to this directory.
4411 4429           */
4412 4430          if (vn_ismntpt(ndvp)) {
4413 4431                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4414 4432                  goto out;
4415 4433          }
4416 4434  
4417 4435          if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4418 4436                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4419 4437                  goto out;
4420 4438          }
4421 4439  
4422 4440          if (cs->saved_exi != cs->exi) {
4423 4441                  *cs->statusp = resp->status = NFS4ERR_XDEV;
4424 4442                  goto out;
4425 4443          }
4426 4444  
4427 4445          status = utf8_dir_verify(&args->oldname);
4428 4446          if (status != NFS4_OK) {
4429 4447                  *cs->statusp = resp->status = status;
4430 4448                  goto out;
4431 4449          }
4432 4450  
4433 4451          status = utf8_dir_verify(&args->newname);
4434 4452          if (status != NFS4_OK) {
4435 4453                  *cs->statusp = resp->status = status;
4436 4454                  goto out;
4437 4455          }
4438 4456  
4439 4457          onm = utf8_to_fn(&args->oldname, &olen, NULL);
4440 4458          if (onm == NULL) {
4441 4459                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4442 4460                  goto out;
4443 4461          }
4444 4462          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4445 4463          nlen = MAXPATHLEN + 1;
4446 4464          converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4447 4465              nlen);
4448 4466  
4449 4467          if (converted_onm == NULL) {
4450 4468                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4451 4469                  kmem_free(onm, olen);
4452 4470                  goto out;
4453 4471          }
4454 4472  
4455 4473          nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4456 4474          if (nnm == NULL) {
4457 4475                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4458 4476                  if (onm != converted_onm)
4459 4477                          kmem_free(converted_onm, MAXPATHLEN + 1);
4460 4478                  kmem_free(onm, olen);
4461 4479                  goto out;
4462 4480          }
4463 4481          converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4464 4482              MAXPATHLEN  + 1);
4465 4483  
4466 4484          if (converted_nnm == NULL) {
4467 4485                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4468 4486                  kmem_free(nnm, nlen);
4469 4487                  nnm = NULL;
4470 4488                  if (onm != converted_onm)
4471 4489                          kmem_free(converted_onm, MAXPATHLEN + 1);
4472 4490                  kmem_free(onm, olen);
4473 4491                  goto out;
4474 4492          }
4475 4493  
4476 4494  
4477 4495          if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4478 4496                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4479 4497                  kmem_free(onm, olen);
4480 4498                  kmem_free(nnm, nlen);
4481 4499                  goto out;
4482 4500          }
4483 4501  
4484 4502  
4485 4503          if (rdonly4(req, cs)) {
4486 4504                  *cs->statusp = resp->status = NFS4ERR_ROFS;
4487 4505                  if (onm != converted_onm)
4488 4506                          kmem_free(converted_onm, MAXPATHLEN + 1);
4489 4507                  kmem_free(onm, olen);
4490 4508                  if (nnm != converted_nnm)
4491 4509                          kmem_free(converted_nnm, MAXPATHLEN + 1);
4492 4510                  kmem_free(nnm, nlen);
4493 4511                  goto out;
4494 4512          }
4495 4513  
4496 4514          /* check label of the target dir */
4497 4515          if (is_system_labeled()) {
4498 4516                  ASSERT(req->rq_label != NULL);
4499 4517                  clabel = req->rq_label;
4500 4518                  DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4501 4519                      "got client label from request(1)",
4502 4520                      struct svc_req *, req);
4503 4521                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
4504 4522                          if (!do_rfs_label_check(clabel, ndvp,
4505 4523                              EQUALITY_CHECK, cs->exi)) {
4506 4524                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4507 4525                                  goto err_out;
4508 4526                          }
4509 4527                  }
4510 4528          }
4511 4529  
4512 4530          /*
4513 4531           * Is the source a file and have a delegation?
4514 4532           * We don't need to acquire va_seq before these lookups, if
4515 4533           * it causes an update, cinfo.before will not match, which will
4516 4534           * trigger a cache flush even if atomic is TRUE.
4517 4535           */
4518 4536          if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4519 4537              &error, cs->cr)) {
4520 4538                  if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4521 4539                      NULL)) {
4522 4540                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4523 4541                          goto err_out;
4524 4542                  }
4525 4543          }
4526 4544  
4527 4545          if (srcvp == NULL) {
4528 4546                  *cs->statusp = resp->status = puterrno4(error);
4529 4547                  if (onm != converted_onm)
4530 4548                          kmem_free(converted_onm, MAXPATHLEN + 1);
4531 4549                  kmem_free(onm, olen);
4532 4550                  if (nnm != converted_nnm)
4533 4551                          kmem_free(converted_nnm, MAXPATHLEN + 1);
4534 4552                  kmem_free(nnm, nlen);
4535 4553                  goto out;
4536 4554          }
4537 4555  
4538 4556          sfp_rele_grant_hold = 1;
4539 4557  
4540 4558          /* Does the destination exist and a file and have a delegation? */
4541 4559          if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4542 4560              NULL, cs->cr)) {
4543 4561                  if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4544 4562                      NULL)) {
4545 4563                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4546 4564                          goto err_out;
4547 4565                  }
4548 4566          }
4549 4567          fp_rele_grant_hold = 1;
4550 4568  
4551 4569          /* Check for NBMAND lock on both source and target */
4552 4570          if (nbl_need_check(srcvp)) {
4553 4571                  nbl_start_crit(srcvp, RW_READER);
4554 4572                  in_crit_src = 1;
4555 4573                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4556 4574                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4557 4575                          goto err_out;
4558 4576                  }
4559 4577          }
4560 4578  
4561 4579          if (targvp && nbl_need_check(targvp)) {
4562 4580                  nbl_start_crit(targvp, RW_READER);
4563 4581                  in_crit_targ = 1;
4564 4582                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4565 4583                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4566 4584                          goto err_out;
4567 4585                  }
4568 4586          }
4569 4587  
4570 4588          /* Get source "before" change value */
4571 4589          obdva.va_mask = AT_CTIME|AT_SEQ;
4572 4590          error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4573 4591          if (!error) {
4574 4592                  nbdva.va_mask = AT_CTIME|AT_SEQ;
4575 4593                  error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4576 4594          }
4577 4595          if (error) {
4578 4596                  *cs->statusp = resp->status = puterrno4(error);
4579 4597                  goto err_out;
4580 4598          }
4581 4599  
4582 4600          NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4583 4601          NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4584 4602  
4585 4603          error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4586 4604              NULL, 0);
4587 4605  
4588 4606          /*
4589 4607           * If target existed and was unlinked by VOP_RENAME, state will need
4590 4608           * closed. To avoid deadlock, rfs4_close_all_state will be done after
4591 4609           * any necessary nbl_end_crit on srcvp and tgtvp.
4592 4610           */
4593 4611          if (error == 0 && fp != NULL) {
4594 4612                  rfs4_dbe_lock(fp->rf_dbe);
4595 4613                  tvp = fp->rf_vp;
4596 4614                  if (tvp)
4597 4615                          VN_HOLD(tvp);
4598 4616                  rfs4_dbe_unlock(fp->rf_dbe);
4599 4617  
4600 4618                  if (tvp) {
4601 4619                          struct vattr va;
4602 4620                          va.va_mask = AT_NLINK;
4603 4621  
4604 4622                          if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4605 4623                              va.va_nlink == 0) {
4606 4624                                  unlinked = 1;
4607 4625  
4608 4626                                  /* DEBUG data */
4609 4627                                  if ((srcvp == targvp) || (tvp != targvp)) {
4610 4628                                          cmn_err(CE_WARN, "rfs4_op_rename: "
4611 4629                                              "srcvp %p, targvp: %p, tvp: %p",
4612 4630                                              (void *)srcvp, (void *)targvp,
4613 4631                                              (void *)tvp);
4614 4632                                  }
4615 4633                          } else {
4616 4634                                  VN_RELE(tvp);
4617 4635                          }
4618 4636                  }
4619 4637          }
4620 4638          if (error == 0)
4621 4639                  vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4622 4640  
4623 4641          if (in_crit_src)
4624 4642                  nbl_end_crit(srcvp);
4625 4643          if (srcvp)
4626 4644                  VN_RELE(srcvp);
4627 4645          if (in_crit_targ)
4628 4646                  nbl_end_crit(targvp);
4629 4647          if (targvp)
4630 4648                  VN_RELE(targvp);
4631 4649  
4632 4650          if (unlinked) {
4633 4651                  ASSERT(fp != NULL);
4634 4652                  ASSERT(tvp != NULL);
4635 4653  
4636 4654                  /* DEBUG data */
4637 4655                  if (RW_READ_HELD(&tvp->v_nbllock)) {
4638 4656                          cmn_err(CE_WARN, "rfs4_op_rename: "
4639 4657                              "RW_READ_HELD(%p)", (void *)tvp);
4640 4658                  }
4641 4659  
4642 4660                  /* The file is gone and so should the state */
4643 4661                  rfs4_close_all_state(fp);
4644 4662                  VN_RELE(tvp);
4645 4663          }
4646 4664  
4647 4665          if (sfp) {
4648 4666                  rfs4_clear_dont_grant(sfp);
4649 4667                  rfs4_file_rele(sfp);
4650 4668          }
4651 4669          if (fp) {
4652 4670                  rfs4_clear_dont_grant(fp);
4653 4671                  rfs4_file_rele(fp);
4654 4672          }
4655 4673  
4656 4674          if (converted_onm != onm)
4657 4675                  kmem_free(converted_onm, MAXPATHLEN + 1);
4658 4676          kmem_free(onm, olen);
4659 4677          if (converted_nnm != nnm)
4660 4678                  kmem_free(converted_nnm, MAXPATHLEN + 1);
4661 4679          kmem_free(nnm, nlen);
4662 4680  
4663 4681          /*
4664 4682           * Get the initial "after" sequence number, if it fails, set to zero
4665 4683           */
4666 4684          oidva.va_mask = AT_SEQ;
4667 4685          if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4668 4686                  oidva.va_seq = 0;
4669 4687  
4670 4688          nidva.va_mask = AT_SEQ;
4671 4689          if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4672 4690                  nidva.va_seq = 0;
4673 4691  
4674 4692          /*
4675 4693           * Force modified data and metadata out to stable storage.
4676 4694           */
4677 4695          (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4678 4696          (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4679 4697  
4680 4698          if (error) {
4681 4699                  *cs->statusp = resp->status = puterrno4(error);
4682 4700                  goto out;
4683 4701          }
4684 4702  
4685 4703          /*
4686 4704           * Get "after" change values, if it fails, simply return the
4687 4705           * before value.
4688 4706           */
4689 4707          oadva.va_mask = AT_CTIME|AT_SEQ;
4690 4708          if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4691 4709                  oadva.va_ctime = obdva.va_ctime;
4692 4710                  oadva.va_seq = 0;
4693 4711          }
4694 4712  
4695 4713          nadva.va_mask = AT_CTIME|AT_SEQ;
4696 4714          if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4697 4715                  nadva.va_ctime = nbdva.va_ctime;
4698 4716                  nadva.va_seq = 0;
4699 4717          }
4700 4718  
4701 4719          NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4702 4720          NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4703 4721  
4704 4722          /*
4705 4723           * The cinfo.atomic = TRUE only if we have
4706 4724           * non-zero va_seq's, and it has incremented by exactly one
4707 4725           * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4708 4726           */
4709 4727          if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4710 4728              oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4711 4729                  resp->source_cinfo.atomic = TRUE;
4712 4730          else
4713 4731                  resp->source_cinfo.atomic = FALSE;
4714 4732  
4715 4733          if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4716 4734              nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4717 4735                  resp->target_cinfo.atomic = TRUE;
4718 4736          else
4719 4737                  resp->target_cinfo.atomic = FALSE;
4720 4738  
4721 4739  #ifdef  VOLATILE_FH_TEST
4722 4740          {
4723 4741          extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4724 4742  
4725 4743          /*
4726 4744           * Add the renamed file handle to the volatile rename list
4727 4745           */
4728 4746          if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4729 4747                  /* file handles may expire on rename */
4730 4748                  vnode_t *vp;
4731 4749  
4732 4750                  nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4733 4751                  /*
4734 4752                   * Already know that nnm will be a valid string
4735 4753                   */
4736 4754                  error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4737 4755                      NULL, NULL, NULL);
4738 4756                  kmem_free(nnm, nlen);
4739 4757                  if (!error) {
4740 4758                          add_volrnm_fh(cs->exi, vp);
4741 4759                          VN_RELE(vp);
4742 4760                  }
4743 4761          }
4744 4762          }
4745 4763  #endif  /* VOLATILE_FH_TEST */
4746 4764  
4747 4765          *cs->statusp = resp->status = NFS4_OK;
4748 4766  out:
4749 4767          DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4750 4768              RENAME4res *, resp);
4751 4769          return;
4752 4770  
4753 4771  err_out:
4754 4772          if (onm != converted_onm)
4755 4773                  kmem_free(converted_onm, MAXPATHLEN + 1);
4756 4774          if (onm != NULL)
4757 4775                  kmem_free(onm, olen);
4758 4776          if (nnm != converted_nnm)
4759 4777                  kmem_free(converted_nnm, MAXPATHLEN + 1);
4760 4778          if (nnm != NULL)
4761 4779                  kmem_free(nnm, nlen);
4762 4780  
4763 4781          if (in_crit_src) nbl_end_crit(srcvp);
4764 4782          if (in_crit_targ) nbl_end_crit(targvp);
4765 4783          if (targvp) VN_RELE(targvp);
4766 4784          if (srcvp) VN_RELE(srcvp);
4767 4785          if (sfp) {
4768 4786                  if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4769 4787                  rfs4_file_rele(sfp);
4770 4788          }
4771 4789          if (fp) {
4772 4790                  if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4773 4791                  rfs4_file_rele(fp);
4774 4792          }
4775 4793  
4776 4794          DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4777 4795              RENAME4res *, resp);
4778 4796  }
4779 4797  
4780 4798  /* ARGSUSED */
4781 4799  static void
4782 4800  rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4783 4801      struct compound_state *cs)
4784 4802  {
4785 4803          RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4786 4804          RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4787 4805          rfs4_client_t *cp;
4788 4806  
4789 4807          DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4790 4808              RENEW4args *, args);
4791 4809  
4792 4810          if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4793 4811                  *cs->statusp = resp->status =
4794 4812                      rfs4_check_clientid(&args->clientid, 0);
4795 4813                  goto out;
4796 4814          }
4797 4815  
4798 4816          if (rfs4_lease_expired(cp)) {
4799 4817                  rfs4_client_rele(cp);
4800 4818                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4801 4819                  goto out;
4802 4820          }
4803 4821  
4804 4822          rfs4_update_lease(cp);
4805 4823  
4806 4824          mutex_enter(cp->rc_cbinfo.cb_lock);
4807 4825          if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4808 4826                  cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4809 4827                  *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4810 4828          } else {
4811 4829                  *cs->statusp = resp->status = NFS4_OK;
4812 4830          }
4813 4831          mutex_exit(cp->rc_cbinfo.cb_lock);
4814 4832  
4815 4833          rfs4_client_rele(cp);
4816 4834  
4817 4835  out:
4818 4836          DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4819 4837              RENEW4res *, resp);
4820 4838  }
4821 4839  
4822 4840  /* ARGSUSED */
4823 4841  static void
4824 4842  rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4825 4843      struct compound_state *cs)
4826 4844  {
4827 4845          RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4828 4846  
4829 4847          DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4830 4848  
4831 4849          /* No need to check cs->access - we are not accessing any object */
4832 4850          if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4833 4851                  *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4834 4852                  goto out;
4835 4853          }
4836 4854          if (cs->vp != NULL) {
4837 4855                  VN_RELE(cs->vp);
4838 4856          }
4839 4857          cs->vp = cs->saved_vp;
4840 4858          cs->saved_vp = NULL;
4841 4859          cs->exi = cs->saved_exi;
4842 4860          nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4843 4861          *cs->statusp = resp->status = NFS4_OK;
4844 4862          cs->deleg = FALSE;
4845 4863  
4846 4864  out:
4847 4865          DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4848 4866              RESTOREFH4res *, resp);
4849 4867  }
4850 4868  
4851 4869  /* ARGSUSED */
4852 4870  static void
4853 4871  rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4854 4872      struct compound_state *cs)
4855 4873  {
4856 4874          SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4857 4875  
4858 4876          DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4859 4877  
4860 4878          /* No need to check cs->access - we are not accessing any object */
4861 4879          if (cs->vp == NULL) {
4862 4880                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4863 4881                  goto out;
4864 4882          }
4865 4883          if (cs->saved_vp != NULL) {
4866 4884                  VN_RELE(cs->saved_vp);
4867 4885          }
4868 4886          cs->saved_vp = cs->vp;
4869 4887          VN_HOLD(cs->saved_vp);
4870 4888          cs->saved_exi = cs->exi;
4871 4889          /*
4872 4890           * since SAVEFH is fairly rare, don't alloc space for its fh
4873 4891           * unless necessary.
4874 4892           */
4875 4893          if (cs->saved_fh.nfs_fh4_val == NULL) {
4876 4894                  cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4877 4895          }
4878 4896          nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4879 4897          *cs->statusp = resp->status = NFS4_OK;
4880 4898  
4881 4899  out:
4882 4900          DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4883 4901              SAVEFH4res *, resp);
4884 4902  }
4885 4903  
4886 4904  /*
4887 4905   * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4888 4906   * return the bitmap of attrs that were set successfully. It is also
4889 4907   * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4890 4908   * always be called only after rfs4_do_set_attrs().
4891 4909   *
4892 4910   * Verify that the attributes are same as the expected ones. sargp->vap
4893 4911   * and sargp->sbp contain the input attributes as translated from fattr4.
4894 4912   *
4895 4913   * This function verifies only the attrs that correspond to a vattr or
4896 4914   * vfsstat struct. That is because of the extra step needed to get the
4897 4915   * corresponding system structs. Other attributes have already been set or
4898 4916   * verified by do_rfs4_set_attrs.
4899 4917   *
4900 4918   * Return 0 if all attrs match, -1 if some don't, error if error processing.
4901 4919   */
4902 4920  static int
4903 4921  rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4904 4922      bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4905 4923  {
4906 4924          int error, ret_error = 0;
4907 4925          int i, k;
4908 4926          uint_t sva_mask = sargp->vap->va_mask;
4909 4927          uint_t vbit;
4910 4928          union nfs4_attr_u *na;
4911 4929          uint8_t *amap;
4912 4930          bool_t getsb = ntovp->vfsstat;
4913 4931  
4914 4932          if (sva_mask != 0) {
4915 4933                  /*
4916 4934                   * Okay to overwrite sargp->vap because we verify based
4917 4935                   * on the incoming values.
4918 4936                   */
4919 4937                  ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4920 4938                      sargp->cs->cr, NULL);
4921 4939                  if (ret_error) {
4922 4940                          if (resp == NULL)
4923 4941                                  return (ret_error);
4924 4942                          /*
4925 4943                           * Must return bitmap of successful attrs
4926 4944                           */
4927 4945                          sva_mask = 0;   /* to prevent checking vap later */
4928 4946                  } else {
4929 4947                          /*
4930 4948                           * Some file systems clobber va_mask. it is probably
4931 4949                           * wrong of them to do so, nonethless we practice
4932 4950                           * defensive coding.
4933 4951                           * See bug id 4276830.
4934 4952                           */
4935 4953                          sargp->vap->va_mask = sva_mask;
4936 4954                  }
4937 4955          }
4938 4956  
4939 4957          if (getsb) {
4940 4958                  /*
4941 4959                   * Now get the superblock and loop on the bitmap, as there is
4942 4960                   * no simple way of translating from superblock to bitmap4.
4943 4961                   */
4944 4962                  ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4945 4963                  if (ret_error) {
4946 4964                          if (resp == NULL)
4947 4965                                  goto errout;
4948 4966                          getsb = FALSE;
4949 4967                  }
4950 4968          }
4951 4969  
4952 4970          /*
4953 4971           * Now loop and verify each attribute which getattr returned
4954 4972           * whether it's the same as the input.
4955 4973           */
4956 4974          if (resp == NULL && !getsb && (sva_mask == 0))
4957 4975                  goto errout;
4958 4976  
4959 4977          na = ntovp->na;
4960 4978          amap = ntovp->amap;
4961 4979          k = 0;
4962 4980          for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4963 4981                  k = *amap;
4964 4982                  ASSERT(nfs4_ntov_map[k].nval == k);
4965 4983                  vbit = nfs4_ntov_map[k].vbit;
4966 4984  
4967 4985                  /*
4968 4986                   * If vattr attribute but VOP_GETATTR failed, or it's
4969 4987                   * superblock attribute but VFS_STATVFS failed, skip
4970 4988                   */
4971 4989                  if (vbit) {
4972 4990                          if ((vbit & sva_mask) == 0)
4973 4991                                  continue;
4974 4992                  } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4975 4993                          continue;
4976 4994                  }
4977 4995                  error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4978 4996                  if (resp != NULL) {
4979 4997                          if (error)
4980 4998                                  ret_error = -1; /* not all match */
4981 4999                          else    /* update response bitmap */
4982 5000                                  *resp |= nfs4_ntov_map[k].fbit;
4983 5001                          continue;
4984 5002                  }
4985 5003                  if (error) {
4986 5004                          ret_error = -1; /* not all match */
4987 5005                          break;
4988 5006                  }
4989 5007          }
4990 5008  errout:
4991 5009          return (ret_error);
4992 5010  }
4993 5011  
4994 5012  /*
4995 5013   * Decode the attribute to be set/verified. If the attr requires a sys op
4996 5014   * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4997 5015   * call the sv_getit function for it, because the sys op hasn't yet been done.
4998 5016   * Return 0 for success, error code if failed.
4999 5017   *
5000 5018   * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5001 5019   */
5002 5020  static int
5003 5021  decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5004 5022      int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5005 5023  {
5006 5024          int error = 0;
5007 5025          bool_t set_later;
5008 5026  
5009 5027          sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5010 5028  
5011 5029          if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5012 5030                  set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5013 5031                  /*
5014 5032                   * don't verify yet if a vattr or sb dependent attr,
5015 5033                   * because we don't have their sys values yet.
5016 5034                   * Will be done later.
5017 5035                   */
5018 5036                  if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5019 5037                          /*
5020 5038                           * ACLs are a special case, since setting the MODE
5021 5039                           * conflicts with setting the ACL.  We delay setting
5022 5040                           * the ACL until all other attributes have been set.
5023 5041                           * The ACL gets set in do_rfs4_op_setattr().
5024 5042                           */
5025 5043                          if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5026 5044                                  error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5027 5045                                      sargp, nap);
5028 5046                                  if (error) {
5029 5047                                          xdr_free(nfs4_ntov_map[k].xfunc,
5030 5048                                              (caddr_t)nap);
5031 5049                                  }
5032 5050                          }
5033 5051                  }
5034 5052          } else {
5035 5053  #ifdef  DEBUG
5036 5054                  cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5037 5055                      "decoding attribute %d\n", k);
5038 5056  #endif
5039 5057                  error = EINVAL;
5040 5058          }
5041 5059          if (!error && resp_bval && !set_later) {
5042 5060                  *resp_bval |= nfs4_ntov_map[k].fbit;
5043 5061          }
5044 5062  
5045 5063          return (error);
5046 5064  }
5047 5065  
5048 5066  /*
5049 5067   * Set vattr based on incoming fattr4 attrs - used by setattr.
5050 5068   * Set response mask. Ignore any values that are not writable vattr attrs.
5051 5069   */
5052 5070  static nfsstat4
5053 5071  do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5054 5072      struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5055 5073      nfs4_attr_cmd_t cmd)
5056 5074  {
5057 5075          int error = 0;
5058 5076          int i;
5059 5077          char *attrs = fattrp->attrlist4;
5060 5078          uint32_t attrslen = fattrp->attrlist4_len;
5061 5079          XDR xdr;
5062 5080          nfsstat4 status = NFS4_OK;
5063 5081          vnode_t *vp = cs->vp;
5064 5082          union nfs4_attr_u *na;
5065 5083          uint8_t *amap;
5066 5084  
5067 5085  #ifndef lint
5068 5086          /*
5069 5087           * Make sure that maximum attribute number can be expressed as an
5070 5088           * 8 bit quantity.
5071 5089           */
5072 5090          ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5073 5091  #endif
5074 5092  
5075 5093          if (vp == NULL) {
5076 5094                  if (resp)
5077 5095                          *resp = 0;
5078 5096                  return (NFS4ERR_NOFILEHANDLE);
5079 5097          }
5080 5098          if (cs->access == CS_ACCESS_DENIED) {
5081 5099                  if (resp)
5082 5100                          *resp = 0;
5083 5101                  return (NFS4ERR_ACCESS);
5084 5102          }
5085 5103  
5086 5104          sargp->op = cmd;
5087 5105          sargp->cs = cs;
5088 5106          sargp->flag = 0;        /* may be set later */
5089 5107          sargp->vap->va_mask = 0;
5090 5108          sargp->rdattr_error = NFS4_OK;
5091 5109          sargp->rdattr_error_req = FALSE;
5092 5110          /* sargp->sbp is set by the caller */
5093 5111  
5094 5112          xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5095 5113  
5096 5114          na = ntovp->na;
5097 5115          amap = ntovp->amap;
5098 5116  
5099 5117          /*
5100 5118           * The following loop iterates on the nfs4_ntov_map checking
5101 5119           * if the fbit is set in the requested bitmap.
5102 5120           * If set then we process the arguments using the
5103 5121           * rfs4_fattr4 conversion functions to populate the setattr
5104 5122           * vattr and va_mask. Any settable attrs that are not using vattr
5105 5123           * will be set in this loop.
5106 5124           */
5107 5125          for (i = 0; i < nfs4_ntov_map_size; i++) {
5108 5126                  if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5109 5127                          continue;
5110 5128                  }
5111 5129                  /*
5112 5130                   * If setattr, must be a writable attr.
5113 5131                   * If verify/nverify, must be a readable attr.
5114 5132                   */
5115 5133                  if ((error = (*nfs4_ntov_map[i].sv_getit)(
5116 5134                      NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5117 5135                          /*
5118 5136                           * Client tries to set/verify an
5119 5137                           * unsupported attribute, tries to set
5120 5138                           * a read only attr or verify a write
5121 5139                           * only one - error!
5122 5140                           */
5123 5141                          break;
5124 5142                  }
5125 5143                  /*
5126 5144                   * Decode the attribute to set/verify
5127 5145                   */
5128 5146                  error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5129 5147                      &xdr, resp ? resp : NULL, na);
5130 5148                  if (error)
5131 5149                          break;
5132 5150                  *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5133 5151                  na++;
5134 5152                  (ntovp->attrcnt)++;
5135 5153                  if (nfs4_ntov_map[i].vfsstat)
5136 5154                          ntovp->vfsstat = TRUE;
5137 5155          }
5138 5156  
5139 5157          if (error != 0)
5140 5158                  status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5141 5159                      puterrno4(error));
5142 5160          /* xdrmem_destroy(&xdrs); */    /* NO-OP */
5143 5161          return (status);
5144 5162  }
5145 5163  
5146 5164  static nfsstat4
5147 5165  do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5148 5166      stateid4 *stateid)
5149 5167  {
5150 5168          int error = 0;
5151 5169          struct nfs4_svgetit_arg sarg;
5152 5170          bool_t trunc;
5153 5171  
5154 5172          nfsstat4 status = NFS4_OK;
5155 5173          cred_t *cr = cs->cr;
5156 5174          vnode_t *vp = cs->vp;
5157 5175          struct nfs4_ntov_table ntov;
5158 5176          struct statvfs64 sb;
5159 5177          struct vattr bva;
5160 5178          struct flock64 bf;
5161 5179          int in_crit = 0;
5162 5180          uint_t saved_mask = 0;
5163 5181          caller_context_t ct;
5164 5182  
5165 5183          *resp = 0;
5166 5184          sarg.sbp = &sb;
5167 5185          sarg.is_referral = B_FALSE;
5168 5186          nfs4_ntov_table_init(&ntov);
5169 5187          status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5170 5188              NFS4ATTR_SETIT);
5171 5189          if (status != NFS4_OK) {
5172 5190                  /*
5173 5191                   * failed set attrs
5174 5192                   */
5175 5193                  goto done;
5176 5194          }
5177 5195          if ((sarg.vap->va_mask == 0) &&
5178 5196              (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5179 5197                  /*
5180 5198                   * no further work to be done
5181 5199                   */
5182 5200                  goto done;
5183 5201          }
5184 5202  
5185 5203          /*
5186 5204           * If we got a request to set the ACL and the MODE, only
5187 5205           * allow changing VSUID, VSGID, and VSVTX.  Attempting
5188 5206           * to change any other bits, along with setting an ACL,
5189 5207           * gives NFS4ERR_INVAL.
5190 5208           */
5191 5209          if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5192 5210              (fattrp->attrmask & FATTR4_MODE_MASK)) {
5193 5211                  vattr_t va;
5194 5212  
5195 5213                  va.va_mask = AT_MODE;
5196 5214                  error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5197 5215                  if (error) {
5198 5216                          status = puterrno4(error);
5199 5217                          goto done;
5200 5218                  }
5201 5219                  if ((sarg.vap->va_mode ^ va.va_mode) &
5202 5220                      ~(VSUID | VSGID | VSVTX)) {
5203 5221                          status = NFS4ERR_INVAL;
5204 5222                          goto done;
5205 5223                  }
5206 5224          }
5207 5225  
5208 5226          /* Check stateid only if size has been set */
5209 5227          if (sarg.vap->va_mask & AT_SIZE) {
5210 5228                  trunc = (sarg.vap->va_size == 0);
5211 5229                  status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5212 5230                      trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5213 5231                  if (status != NFS4_OK)
5214 5232                          goto done;
5215 5233          } else {
5216 5234                  ct.cc_sysid = 0;
5217 5235                  ct.cc_pid = 0;
5218 5236                  ct.cc_caller_id = nfs4_srv_caller_id;
5219 5237                  ct.cc_flags = CC_DONTBLOCK;
5220 5238          }
5221 5239  
5222 5240          /* XXX start of possible race with delegations */
5223 5241  
5224 5242          /*
5225 5243           * We need to specially handle size changes because it is
5226 5244           * possible for the client to create a file with read-only
5227 5245           * modes, but with the file opened for writing. If the client
5228 5246           * then tries to set the file size, e.g. ftruncate(3C),
5229 5247           * fcntl(F_FREESP), the normal access checking done in
5230 5248           * VOP_SETATTR would prevent the client from doing it even though
5231 5249           * it should be allowed to do so.  To get around this, we do the
5232 5250           * access checking for ourselves and use VOP_SPACE which doesn't
5233 5251           * do the access checking.
5234 5252           * Also the client should not be allowed to change the file
5235 5253           * size if there is a conflicting non-blocking mandatory lock in
5236 5254           * the region of the change.
5237 5255           */
5238 5256          if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5239 5257                  u_offset_t offset;
5240 5258                  ssize_t length;
5241 5259  
5242 5260                  /*
5243 5261                   * ufs_setattr clears AT_SIZE from vap->va_mask, but
5244 5262                   * before returning, sarg.vap->va_mask is used to
5245 5263                   * generate the setattr reply bitmap.  We also clear
5246 5264                   * AT_SIZE below before calling VOP_SPACE.  For both
5247 5265                   * of these cases, the va_mask needs to be saved here
5248 5266                   * and restored after calling VOP_SETATTR.
5249 5267                   */
5250 5268                  saved_mask = sarg.vap->va_mask;
5251 5269  
5252 5270                  /*
5253 5271                   * Check any possible conflict due to NBMAND locks.
5254 5272                   * Get into critical region before VOP_GETATTR, so the
5255 5273                   * size attribute is valid when checking conflicts.
5256 5274                   */
5257 5275                  if (nbl_need_check(vp)) {
5258 5276                          nbl_start_crit(vp, RW_READER);
5259 5277                          in_crit = 1;
5260 5278                  }
5261 5279  
5262 5280                  bva.va_mask = AT_UID|AT_SIZE;
5263 5281                  if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5264 5282                          status = puterrno4(error);
5265 5283                          goto done;
5266 5284                  }
5267 5285  
5268 5286                  if (in_crit) {
5269 5287                          if (sarg.vap->va_size < bva.va_size) {
5270 5288                                  offset = sarg.vap->va_size;
5271 5289                                  length = bva.va_size - sarg.vap->va_size;
5272 5290                          } else {
5273 5291                                  offset = bva.va_size;
5274 5292                                  length = sarg.vap->va_size - bva.va_size;
5275 5293                          }
5276 5294                          if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5277 5295                              &ct)) {
5278 5296                                  status = NFS4ERR_LOCKED;
5279 5297                                  goto done;
5280 5298                          }
5281 5299                  }
5282 5300  
5283 5301                  if (crgetuid(cr) == bva.va_uid) {
5284 5302                          sarg.vap->va_mask &= ~AT_SIZE;
5285 5303                          bf.l_type = F_WRLCK;
5286 5304                          bf.l_whence = 0;
5287 5305                          bf.l_start = (off64_t)sarg.vap->va_size;
5288 5306                          bf.l_len = 0;
5289 5307                          bf.l_sysid = 0;
5290 5308                          bf.l_pid = 0;
5291 5309                          error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5292 5310                              (offset_t)sarg.vap->va_size, cr, &ct);
5293 5311                  }
5294 5312          }
5295 5313  
5296 5314          if (!error && sarg.vap->va_mask != 0)
5297 5315                  error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5298 5316  
5299 5317          /* restore va_mask -- ufs_setattr clears AT_SIZE */
5300 5318          if (saved_mask & AT_SIZE)
5301 5319                  sarg.vap->va_mask |= AT_SIZE;
5302 5320  
5303 5321          /*
5304 5322           * If an ACL was being set, it has been delayed until now,
5305 5323           * in order to set the mode (via the VOP_SETATTR() above) first.
5306 5324           */
5307 5325          if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5308 5326                  int i;
5309 5327  
5310 5328                  for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5311 5329                          if (ntov.amap[i] == FATTR4_ACL)
5312 5330                                  break;
5313 5331                  if (i < NFS4_MAXNUM_ATTRS) {
5314 5332                          error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5315 5333                              NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5316 5334                          if (error == 0) {
5317 5335                                  *resp |= FATTR4_ACL_MASK;
5318 5336                          } else if (error == ENOTSUP) {
5319 5337                                  (void) rfs4_verify_attr(&sarg, resp, &ntov);
5320 5338                                  status = NFS4ERR_ATTRNOTSUPP;
5321 5339                                  goto done;
5322 5340                          }
5323 5341                  } else {
5324 5342                          NFS4_DEBUG(rfs4_debug,
5325 5343                              (CE_NOTE, "do_rfs4_op_setattr: "
5326 5344                              "unable to find ACL in fattr4"));
5327 5345                          error = EINVAL;
5328 5346                  }
5329 5347          }
5330 5348  
5331 5349          if (error) {
5332 5350                  /* check if a monitor detected a delegation conflict */
5333 5351                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5334 5352                          status = NFS4ERR_DELAY;
5335 5353                  else
5336 5354                          status = puterrno4(error);
5337 5355  
5338 5356                  /*
5339 5357                   * Set the response bitmap when setattr failed.
5340 5358                   * If VOP_SETATTR partially succeeded, test by doing a
5341 5359                   * VOP_GETATTR on the object and comparing the data
5342 5360                   * to the setattr arguments.
5343 5361                   */
5344 5362                  (void) rfs4_verify_attr(&sarg, resp, &ntov);
5345 5363          } else {
5346 5364                  /*
5347 5365                   * Force modified metadata out to stable storage.
5348 5366                   */
5349 5367                  (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5350 5368                  /*
5351 5369                   * Set response bitmap
5352 5370                   */
5353 5371                  nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5354 5372          }
5355 5373  
5356 5374  /* Return early and already have a NFSv4 error */
5357 5375  done:
5358 5376          /*
5359 5377           * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5360 5378           * conversion sets both readable and writeable NFS4 attrs
5361 5379           * for AT_MTIME and AT_ATIME.  The line below masks out
5362 5380           * unrequested attrs from the setattr result bitmap.  This
5363 5381           * is placed after the done: label to catch the ATTRNOTSUP
5364 5382           * case.
5365 5383           */
5366 5384          *resp &= fattrp->attrmask;
5367 5385  
5368 5386          if (in_crit)
5369 5387                  nbl_end_crit(vp);
5370 5388  
5371 5389          nfs4_ntov_table_free(&ntov, &sarg);
5372 5390  
5373 5391          return (status);
5374 5392  }
5375 5393  
5376 5394  /* ARGSUSED */
5377 5395  static void
5378 5396  rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5379 5397      struct compound_state *cs)
5380 5398  {
5381 5399          SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5382 5400          SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5383 5401          bslabel_t *clabel;
5384 5402  
5385 5403          DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5386 5404              SETATTR4args *, args);
5387 5405  
5388 5406          if (cs->vp == NULL) {
5389 5407                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5390 5408                  goto out;
5391 5409          }
5392 5410  
5393 5411          /*
5394 5412           * If there is an unshared filesystem mounted on this vnode,
5395 5413           * do not allow to setattr on this vnode.
5396 5414           */
5397 5415          if (vn_ismntpt(cs->vp)) {
5398 5416                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5399 5417                  goto out;
5400 5418          }
5401 5419  
5402 5420          resp->attrsset = 0;
5403 5421  
5404 5422          if (rdonly4(req, cs)) {
5405 5423                  *cs->statusp = resp->status = NFS4ERR_ROFS;
5406 5424                  goto out;
5407 5425          }
5408 5426  
5409 5427          /* check label before setting attributes */
5410 5428          if (is_system_labeled()) {
5411 5429                  ASSERT(req->rq_label != NULL);
5412 5430                  clabel = req->rq_label;
5413 5431                  DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5414 5432                      "got client label from request(1)",
5415 5433                      struct svc_req *, req);
5416 5434                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
5417 5435                          if (!do_rfs_label_check(clabel, cs->vp,
5418 5436                              EQUALITY_CHECK, cs->exi)) {
5419 5437                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5420 5438                                  goto out;
5421 5439                          }
5422 5440                  }
5423 5441          }
5424 5442  
5425 5443          *cs->statusp = resp->status =
5426 5444              do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5427 5445              &args->stateid);
5428 5446  
5429 5447  out:
5430 5448          DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5431 5449              SETATTR4res *, resp);
5432 5450  }
5433 5451  
5434 5452  /* ARGSUSED */
5435 5453  static void
5436 5454  rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5437 5455      struct compound_state *cs)
5438 5456  {
5439 5457          /*
5440 5458           * verify and nverify are exactly the same, except that nverify
5441 5459           * succeeds when some argument changed, and verify succeeds when
5442 5460           * when none changed.
5443 5461           */
5444 5462  
5445 5463          VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5446 5464          VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5447 5465  
5448 5466          int error;
5449 5467          struct nfs4_svgetit_arg sarg;
5450 5468          struct statvfs64 sb;
5451 5469          struct nfs4_ntov_table ntov;
5452 5470  
5453 5471          DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5454 5472              VERIFY4args *, args);
5455 5473  
5456 5474          if (cs->vp == NULL) {
5457 5475                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5458 5476                  goto out;
5459 5477          }
5460 5478  
5461 5479          sarg.sbp = &sb;
5462 5480          sarg.is_referral = B_FALSE;
5463 5481          nfs4_ntov_table_init(&ntov);
5464 5482          resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5465 5483              &sarg, &ntov, NFS4ATTR_VERIT);
5466 5484          if (resp->status != NFS4_OK) {
5467 5485                  /*
5468 5486                   * do_rfs4_set_attrs will try to verify systemwide attrs,
5469 5487                   * so could return -1 for "no match".
5470 5488                   */
5471 5489                  if (resp->status == -1)
5472 5490                          resp->status = NFS4ERR_NOT_SAME;
5473 5491                  goto done;
5474 5492          }
5475 5493          error = rfs4_verify_attr(&sarg, NULL, &ntov);
5476 5494          switch (error) {
5477 5495          case 0:
5478 5496                  resp->status = NFS4_OK;
5479 5497                  break;
5480 5498          case -1:
5481 5499                  resp->status = NFS4ERR_NOT_SAME;
5482 5500                  break;
5483 5501          default:
5484 5502                  resp->status = puterrno4(error);
5485 5503                  break;
5486 5504          }
5487 5505  done:
5488 5506          *cs->statusp = resp->status;
5489 5507          nfs4_ntov_table_free(&ntov, &sarg);
5490 5508  out:
5491 5509          DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5492 5510              VERIFY4res *, resp);
5493 5511  }
5494 5512  
5495 5513  /* ARGSUSED */
5496 5514  static void
5497 5515  rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5498 5516      struct compound_state *cs)
5499 5517  {
5500 5518          /*
5501 5519           * verify and nverify are exactly the same, except that nverify
5502 5520           * succeeds when some argument changed, and verify succeeds when
5503 5521           * when none changed.
5504 5522           */
5505 5523  
5506 5524          NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5507 5525          NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5508 5526  
5509 5527          int error;
5510 5528          struct nfs4_svgetit_arg sarg;
5511 5529          struct statvfs64 sb;
5512 5530          struct nfs4_ntov_table ntov;
5513 5531  
5514 5532          DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5515 5533              NVERIFY4args *, args);
5516 5534  
5517 5535          if (cs->vp == NULL) {
5518 5536                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5519 5537                  DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5520 5538                      NVERIFY4res *, resp);
5521 5539                  return;
5522 5540          }
5523 5541          sarg.sbp = &sb;
5524 5542          sarg.is_referral = B_FALSE;
5525 5543          nfs4_ntov_table_init(&ntov);
5526 5544          resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5527 5545              &sarg, &ntov, NFS4ATTR_VERIT);
5528 5546          if (resp->status != NFS4_OK) {
5529 5547                  /*
5530 5548                   * do_rfs4_set_attrs will try to verify systemwide attrs,
5531 5549                   * so could return -1 for "no match".
5532 5550                   */
5533 5551                  if (resp->status == -1)
5534 5552                          resp->status = NFS4_OK;
5535 5553                  goto done;
5536 5554          }
5537 5555          error = rfs4_verify_attr(&sarg, NULL, &ntov);
5538 5556          switch (error) {
5539 5557          case 0:
5540 5558                  resp->status = NFS4ERR_SAME;
5541 5559                  break;
5542 5560          case -1:
5543 5561                  resp->status = NFS4_OK;
5544 5562                  break;
5545 5563          default:
5546 5564                  resp->status = puterrno4(error);
5547 5565                  break;
5548 5566          }
5549 5567  done:
5550 5568          *cs->statusp = resp->status;
5551 5569          nfs4_ntov_table_free(&ntov, &sarg);
5552 5570  
5553 5571          DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5554 5572              NVERIFY4res *, resp);
5555 5573  }
5556 5574  
5557 5575  /*
5558 5576   * XXX - This should live in an NFS header file.
5559 5577   */
5560 5578  #define MAX_IOVECS      12
5561 5579  
5562 5580  /* ARGSUSED */
5563 5581  static void
5564 5582  rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5565 5583      struct compound_state *cs)
5566 5584  {
5567 5585          WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5568 5586          WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5569 5587          int error;
5570 5588          vnode_t *vp;
5571 5589          struct vattr bva;
5572 5590          u_offset_t rlimit;
5573 5591          struct uio uio;
5574 5592          struct iovec iov[MAX_IOVECS];
5575 5593          struct iovec *iovp;
5576 5594          int iovcnt;
5577 5595          int ioflag;
5578 5596          cred_t *savecred, *cr;
5579 5597          bool_t *deleg = &cs->deleg;
5580 5598          nfsstat4 stat;
5581 5599          int in_crit = 0;
5582 5600          caller_context_t ct;
5583 5601          nfs4_srv_t *nsrv4;
5584 5602  
5585 5603          DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5586 5604              WRITE4args *, args);
5587 5605  
5588 5606          vp = cs->vp;
5589 5607          if (vp == NULL) {
5590 5608                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5591 5609                  goto out;
5592 5610          }
5593 5611          if (cs->access == CS_ACCESS_DENIED) {
5594 5612                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5595 5613                  goto out;
5596 5614          }
5597 5615  
5598 5616          cr = cs->cr;
5599 5617  
5600 5618          if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5601 5619              deleg, TRUE, &ct)) != NFS4_OK) {
5602 5620                  *cs->statusp = resp->status = stat;
5603 5621                  goto out;
5604 5622          }
5605 5623  
5606 5624          /*
5607 5625           * We have to enter the critical region before calling VOP_RWLOCK
5608 5626           * to avoid a deadlock with ufs.
5609 5627           */
5610 5628          if (nbl_need_check(vp)) {
5611 5629                  nbl_start_crit(vp, RW_READER);
5612 5630                  in_crit = 1;
5613 5631                  if (nbl_conflict(vp, NBL_WRITE,
5614 5632                      args->offset, args->data_len, 0, &ct)) {
5615 5633                          *cs->statusp = resp->status = NFS4ERR_LOCKED;
5616 5634                          goto out;
5617 5635                  }
5618 5636          }
5619 5637  
5620 5638          bva.va_mask = AT_MODE | AT_UID;
5621 5639          error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5622 5640  
5623 5641          /*
5624 5642           * If we can't get the attributes, then we can't do the
5625 5643           * right access checking.  So, we'll fail the request.
5626 5644           */
5627 5645          if (error) {
5628 5646                  *cs->statusp = resp->status = puterrno4(error);
5629 5647                  goto out;
5630 5648          }
5631 5649  
5632 5650          if (rdonly4(req, cs)) {
5633 5651                  *cs->statusp = resp->status = NFS4ERR_ROFS;
5634 5652                  goto out;
5635 5653          }
5636 5654  
5637 5655          if (vp->v_type != VREG) {
5638 5656                  *cs->statusp = resp->status =
5639 5657                      ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5640 5658                  goto out;
5641 5659          }
5642 5660  
5643 5661          if (crgetuid(cr) != bva.va_uid &&
5644 5662              (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5645 5663                  *cs->statusp = resp->status = puterrno4(error);
5646 5664                  goto out;
5647 5665          }
5648 5666  
5649 5667          if (MANDLOCK(vp, bva.va_mode)) {
5650 5668                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5651 5669                  goto out;
5652 5670          }
5653 5671  
5654 5672          nsrv4 = nfs4_get_srv();
5655 5673          if (args->data_len == 0) {
5656 5674                  *cs->statusp = resp->status = NFS4_OK;
5657 5675                  resp->count = 0;
5658 5676                  resp->committed = args->stable;
5659 5677                  resp->writeverf = nsrv4->write4verf;
5660 5678                  goto out;
5661 5679          }
5662 5680  
5663 5681          if (args->mblk != NULL) {
5664 5682                  mblk_t *m;
5665 5683                  uint_t bytes, round_len;
5666 5684  
5667 5685                  iovcnt = 0;
5668 5686                  bytes = 0;
5669 5687                  round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5670 5688                  for (m = args->mblk;
5671 5689                      m != NULL && bytes < round_len;
5672 5690                      m = m->b_cont) {
5673 5691                          iovcnt++;
5674 5692                          bytes += MBLKL(m);
5675 5693                  }
5676 5694  #ifdef DEBUG
5677 5695                  /* should have ended on an mblk boundary */
5678 5696                  if (bytes != round_len) {
5679 5697                          printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5680 5698                              bytes, round_len, args->data_len);
5681 5699                          printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5682 5700                              (void *)args->mblk, (void *)m);
5683 5701                          ASSERT(bytes == round_len);
5684 5702                  }
5685 5703  #endif
5686 5704                  if (iovcnt <= MAX_IOVECS) {
5687 5705                          iovp = iov;
5688 5706                  } else {
5689 5707                          iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5690 5708                  }
5691 5709                  mblk_to_iov(args->mblk, iovcnt, iovp);
5692 5710          } else if (args->rlist != NULL) {
5693 5711                  iovcnt = 1;
5694 5712                  iovp = iov;
5695 5713                  iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5696 5714                  iovp->iov_len = args->data_len;
5697 5715          } else {
5698 5716                  iovcnt = 1;
5699 5717                  iovp = iov;
5700 5718                  iovp->iov_base = args->data_val;
5701 5719                  iovp->iov_len = args->data_len;
5702 5720          }
5703 5721  
5704 5722          uio.uio_iov = iovp;
5705 5723          uio.uio_iovcnt = iovcnt;
5706 5724  
5707 5725          uio.uio_segflg = UIO_SYSSPACE;
5708 5726          uio.uio_extflg = UIO_COPY_DEFAULT;
5709 5727          uio.uio_loffset = args->offset;
5710 5728          uio.uio_resid = args->data_len;
5711 5729          uio.uio_llimit = curproc->p_fsz_ctl;
5712 5730          rlimit = uio.uio_llimit - args->offset;
5713 5731          if (rlimit < (u_offset_t)uio.uio_resid)
5714 5732                  uio.uio_resid = (int)rlimit;
5715 5733  
5716 5734          if (args->stable == UNSTABLE4)
5717 5735                  ioflag = 0;
5718 5736          else if (args->stable == FILE_SYNC4)
5719 5737                  ioflag = FSYNC;
5720 5738          else if (args->stable == DATA_SYNC4)
5721 5739                  ioflag = FDSYNC;
5722 5740          else {
5723 5741                  if (iovp != iov)
5724 5742                          kmem_free(iovp, sizeof (*iovp) * iovcnt);
5725 5743                  *cs->statusp = resp->status = NFS4ERR_INVAL;
5726 5744                  goto out;
5727 5745          }
5728 5746  
5729 5747          /*
5730 5748           * We're changing creds because VM may fault and we need
5731 5749           * the cred of the current thread to be used if quota
5732 5750           * checking is enabled.
5733 5751           */
5734 5752          savecred = curthread->t_cred;
5735 5753          curthread->t_cred = cr;
5736 5754          error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5737 5755          curthread->t_cred = savecred;
5738 5756  
5739 5757          if (iovp != iov)
5740 5758                  kmem_free(iovp, sizeof (*iovp) * iovcnt);
5741 5759  
5742 5760          if (error) {
5743 5761                  *cs->statusp = resp->status = puterrno4(error);
5744 5762                  goto out;
5745 5763          }
5746 5764  
5747 5765          *cs->statusp = resp->status = NFS4_OK;
5748 5766          resp->count = args->data_len - uio.uio_resid;
5749 5767  
5750 5768          if (ioflag == 0)
5751 5769                  resp->committed = UNSTABLE4;
5752 5770          else
5753 5771                  resp->committed = FILE_SYNC4;
5754 5772  
5755 5773          resp->writeverf = nsrv4->write4verf;
5756 5774  
5757 5775  out:
5758 5776          if (in_crit)
5759 5777                  nbl_end_crit(vp);
5760 5778  
5761 5779          DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5762 5780              WRITE4res *, resp);
5763 5781  }
5764 5782  
5765 5783  
5766 5784  /* XXX put in a header file */
5767 5785  extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5768 5786  
5769 5787  void
5770 5788  rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5771 5789      struct svc_req *req, cred_t *cr, int *rv)
5772 5790  {
5773 5791          uint_t i;
5774 5792          struct compound_state cs;
5775 5793          nfs4_srv_t *nsrv4;
5776 5794          nfs_export_t *ne = nfs_get_export();
5777 5795  
5778 5796          if (rv != NULL)
5779 5797                  *rv = 0;
5780 5798          rfs4_init_compound_state(&cs);
5781 5799          /*
5782 5800           * Form a reply tag by copying over the reqeuest tag.
5783 5801           */
5784 5802          resp->tag.utf8string_val =
5785 5803              kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5786 5804          resp->tag.utf8string_len = args->tag.utf8string_len;
5787 5805          bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5788 5806              resp->tag.utf8string_len);
5789 5807  
5790 5808          cs.statusp = &resp->status;
5791 5809          cs.req = req;
5792 5810          resp->array = NULL;
5793 5811          resp->array_len = 0;
5794 5812  
5795 5813          /*
5796 5814           * XXX for now, minorversion should be zero
5797 5815           */
5798 5816          if (args->minorversion != NFS4_MINORVERSION) {
5799 5817                  DTRACE_NFSV4_2(compound__start, struct compound_state *,
5800 5818                      &cs, COMPOUND4args *, args);
5801 5819                  resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5802 5820                  DTRACE_NFSV4_2(compound__done, struct compound_state *,
5803 5821                      &cs, COMPOUND4res *, resp);
5804 5822                  return;
5805 5823          }
5806 5824  
5807 5825          if (args->array_len == 0) {
5808 5826                  resp->status = NFS4_OK;
5809 5827                  return;
5810 5828          }
5811 5829  
5812 5830          ASSERT(exi == NULL);
5813 5831          ASSERT(cr == NULL);
5814 5832  
5815 5833          cr = crget();
5816 5834          ASSERT(cr != NULL);
5817 5835  
5818 5836          if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5819 5837                  DTRACE_NFSV4_2(compound__start, struct compound_state *,
5820 5838                      &cs, COMPOUND4args *, args);
5821 5839                  crfree(cr);
5822 5840                  DTRACE_NFSV4_2(compound__done, struct compound_state *,
5823 5841                      &cs, COMPOUND4res *, resp);
5824 5842                  svcerr_badcred(req->rq_xprt);
5825 5843                  if (rv != NULL)
5826 5844                          *rv = 1;
5827 5845                  return;
5828 5846          }
5829 5847          resp->array_len = args->array_len;
5830 5848          resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5831 5849              KM_SLEEP);
5832 5850  
5833 5851          cs.basecr = cr;
5834 5852          nsrv4 = nfs4_get_srv();
5835 5853  
5836 5854          DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5837 5855              COMPOUND4args *, args);
5838 5856  
5839 5857          /*
5840 5858           * For now, NFS4 compound processing must be protected by
5841 5859           * exported_lock because it can access more than one exportinfo
5842 5860           * per compound and share/unshare can now change multiple
5843 5861           * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5844 5862           * per proc (excluding public exinfo), and exi_count design
5845 5863           * is sufficient to protect concurrent execution of NFS2/3
5846 5864           * ops along with unexport.  This lock will be removed as
5847 5865           * part of the NFSv4 phase 2 namespace redesign work.
5848 5866           */
5849 5867          rw_enter(&ne->exported_lock, RW_READER);
5850 5868  
5851 5869          /*
5852 5870           * If this is the first compound we've seen, we need to start all
5853 5871           * new instances' grace periods.
5854 5872           */
5855 5873          if (nsrv4->seen_first_compound == 0) {
5856 5874                  rfs4_grace_start_new(nsrv4);
5857 5875                  /*
5858 5876                   * This must be set after rfs4_grace_start_new(), otherwise
5859 5877                   * another thread could proceed past here before the former
5860 5878                   * is finished.
5861 5879                   */
5862 5880                  nsrv4->seen_first_compound = 1;
5863 5881          }
5864 5882  
5865 5883          for (i = 0; i < args->array_len && cs.cont; i++) {
5866 5884                  nfs_argop4 *argop;
5867 5885                  nfs_resop4 *resop;
5868 5886                  uint_t op;
5869 5887  
5870 5888                  argop = &args->array[i];
5871 5889                  resop = &resp->array[i];
5872 5890                  resop->resop = argop->argop;
5873 5891                  op = (uint_t)resop->resop;
5874 5892  
5875 5893                  if (op < rfsv4disp_cnt) {
5876 5894                          /*
5877 5895                           * Count the individual ops here; NULL and COMPOUND
5878 5896                           * are counted in common_dispatch()
5879 5897                           */
5880 5898                          rfsproccnt_v4_ptr[op].value.ui64++;
5881 5899  
5882 5900                          NFS4_DEBUG(rfs4_debug > 1,
5883 5901                              (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5884 5902                          (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5885 5903                          NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5886 5904                              rfs4_op_string[op], *cs.statusp));
5887 5905                          if (*cs.statusp != NFS4_OK)
5888 5906                                  cs.cont = FALSE;
5889 5907                  } else {
5890 5908                          /*
5891 5909                           * This is effectively dead code since XDR code
5892 5910                           * will have already returned BADXDR if op doesn't
5893 5911                           * decode to legal value.  This only done for a
5894 5912                           * day when XDR code doesn't verify v4 opcodes.
5895 5913                           */
5896 5914                          op = OP_ILLEGAL;
5897 5915                          rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5898 5916  
5899 5917                          rfs4_op_illegal(argop, resop, req, &cs);
5900 5918                          cs.cont = FALSE;
5901 5919                  }
5902 5920  
5903 5921                  /*
5904 5922                   * If not at last op, and if we are to stop, then
5905 5923                   * compact the results array.
5906 5924                   */
5907 5925                  if ((i + 1) < args->array_len && !cs.cont) {
5908 5926                          nfs_resop4 *new_res = kmem_alloc(
5909 5927                              (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5910 5928                          bcopy(resp->array,
5911 5929                              new_res, (i+1) * sizeof (nfs_resop4));
5912 5930                          kmem_free(resp->array,
5913 5931                              args->array_len * sizeof (nfs_resop4));
5914 5932  
5915 5933                          resp->array_len =  i + 1;
5916 5934                          resp->array = new_res;
5917 5935                  }
5918 5936          }
5919 5937  
5920 5938          rw_exit(&ne->exported_lock);
5921 5939  
5922 5940          /*
5923 5941           * clear exportinfo and vnode fields from compound_state before dtrace
5924 5942           * probe, to avoid tracing residual values for path and share path.
5925 5943           */
5926 5944          if (cs.vp)
5927 5945                  VN_RELE(cs.vp);
5928 5946          if (cs.saved_vp)
5929 5947                  VN_RELE(cs.saved_vp);
5930 5948          cs.exi = cs.saved_exi = NULL;
5931 5949          cs.vp = cs.saved_vp = NULL;
5932 5950  
5933 5951          DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5934 5952              COMPOUND4res *, resp);
5935 5953  
5936 5954          if (cs.saved_fh.nfs_fh4_val)
5937 5955                  kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5938 5956  
5939 5957          if (cs.basecr)
5940 5958                  crfree(cs.basecr);
5941 5959          if (cs.cr)
5942 5960                  crfree(cs.cr);
5943 5961          /*
5944 5962           * done with this compound request, free the label
5945 5963           */
5946 5964  
5947 5965          if (req->rq_label != NULL) {
5948 5966                  kmem_free(req->rq_label, sizeof (bslabel_t));
5949 5967                  req->rq_label = NULL;
5950 5968          }
5951 5969  }
5952 5970  
5953 5971  /*
5954 5972   * XXX because of what appears to be duplicate calls to rfs4_compound_free
5955 5973   * XXX zero out the tag and array values. Need to investigate why the
5956 5974   * XXX calls occur, but at least prevent the panic for now.
5957 5975   */
5958 5976  void
5959 5977  rfs4_compound_free(COMPOUND4res *resp)
5960 5978  {
5961 5979          uint_t i;
5962 5980  
5963 5981          if (resp->tag.utf8string_val) {
5964 5982                  UTF8STRING_FREE(resp->tag)
5965 5983          }
5966 5984  
5967 5985          for (i = 0; i < resp->array_len; i++) {
5968 5986                  nfs_resop4 *resop;
5969 5987                  uint_t op;
5970 5988  
5971 5989                  resop = &resp->array[i];
5972 5990                  op = (uint_t)resop->resop;
5973 5991                  if (op < rfsv4disp_cnt) {
5974 5992                          (*rfsv4disptab[op].dis_resfree)(resop);
5975 5993                  }
5976 5994          }
5977 5995          if (resp->array != NULL) {
5978 5996                  kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5979 5997          }
5980 5998  }
5981 5999  
5982 6000  /*
5983 6001   * Process the value of the compound request rpc flags, as a bit-AND
5984 6002   * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5985 6003   */
5986 6004  void
5987 6005  rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5988 6006  {
5989 6007          int i;
5990 6008          int flag = RPC_ALL;
5991 6009  
5992 6010          for (i = 0; flag && i < args->array_len; i++) {
5993 6011                  uint_t op;
5994 6012  
5995 6013                  op = (uint_t)args->array[i].argop;
5996 6014  
5997 6015                  if (op < rfsv4disp_cnt)
5998 6016                          flag &= rfsv4disptab[op].dis_flags;
5999 6017                  else
6000 6018                          flag = 0;
6001 6019          }
6002 6020          *flagp = flag;
6003 6021  }
6004 6022  
6005 6023  nfsstat4
6006 6024  rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6007 6025  {
6008 6026          nfsstat4 e;
6009 6027  
6010 6028          rfs4_dbe_lock(cp->rc_dbe);
6011 6029  
6012 6030          if (cp->rc_sysidt != LM_NOSYSID) {
6013 6031                  *sp = cp->rc_sysidt;
6014 6032                  e = NFS4_OK;
6015 6033  
6016 6034          } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6017 6035                  *sp = cp->rc_sysidt;
6018 6036                  e = NFS4_OK;
6019 6037  
6020 6038                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6021 6039                      "rfs4_client_sysid: allocated 0x%x\n", *sp));
6022 6040          } else
6023 6041                  e = NFS4ERR_DELAY;
6024 6042  
6025 6043          rfs4_dbe_unlock(cp->rc_dbe);
6026 6044          return (e);
6027 6045  }
6028 6046  
6029 6047  #if defined(DEBUG) && ! defined(lint)
6030 6048  static void lock_print(char *str, int operation, struct flock64 *flk)
6031 6049  {
6032 6050          char *op, *type;
6033 6051  
6034 6052          switch (operation) {
6035 6053          case F_GETLK: op = "F_GETLK";
6036 6054                  break;
6037 6055          case F_SETLK: op = "F_SETLK";
6038 6056                  break;
6039 6057          case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6040 6058                  break;
6041 6059          default: op = "F_UNKNOWN";
6042 6060                  break;
6043 6061          }
6044 6062          switch (flk->l_type) {
6045 6063          case F_UNLCK: type = "F_UNLCK";
6046 6064                  break;
6047 6065          case F_RDLCK: type = "F_RDLCK";
6048 6066                  break;
6049 6067          case F_WRLCK: type = "F_WRLCK";
6050 6068                  break;
6051 6069          default: type = "F_UNKNOWN";
6052 6070                  break;
6053 6071          }
6054 6072  
6055 6073          ASSERT(flk->l_whence == 0);
6056 6074          cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6057 6075              str, op, type, (longlong_t)flk->l_start,
6058 6076              flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6059 6077  }
6060 6078  
6061 6079  #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6062 6080  #else
6063 6081  #define LOCK_PRINT(d, s, t, f)
6064 6082  #endif
6065 6083  
6066 6084  /*ARGSUSED*/
6067 6085  static bool_t
6068 6086  creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6069 6087  {
6070 6088          return (TRUE);
6071 6089  }
6072 6090  
6073 6091  /*
6074 6092   * Look up the pathname using the vp in cs as the directory vnode.
6075 6093   * cs->vp will be the vnode for the file on success
6076 6094   */
6077 6095  
6078 6096  static nfsstat4
6079 6097  rfs4_lookup(component4 *component, struct svc_req *req,
6080 6098      struct compound_state *cs)
6081 6099  {
6082 6100          char *nm;
6083 6101          uint32_t len;
6084 6102          nfsstat4 status;
6085 6103          struct sockaddr *ca;
6086 6104          char *name;
6087 6105  
6088 6106          if (cs->vp == NULL) {
6089 6107                  return (NFS4ERR_NOFILEHANDLE);
6090 6108          }
6091 6109          if (cs->vp->v_type != VDIR) {
6092 6110                  return (NFS4ERR_NOTDIR);
6093 6111          }
6094 6112  
6095 6113          status = utf8_dir_verify(component);
6096 6114          if (status != NFS4_OK)
6097 6115                  return (status);
6098 6116  
6099 6117          nm = utf8_to_fn(component, &len, NULL);
6100 6118          if (nm == NULL) {
6101 6119                  return (NFS4ERR_INVAL);
6102 6120          }
6103 6121  
6104 6122          if (len > MAXNAMELEN) {
6105 6123                  kmem_free(nm, len);
6106 6124                  return (NFS4ERR_NAMETOOLONG);
6107 6125          }
6108 6126  
6109 6127          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6110 6128          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6111 6129              MAXPATHLEN + 1);
6112 6130  
6113 6131          if (name == NULL) {
6114 6132                  kmem_free(nm, len);
6115 6133                  return (NFS4ERR_INVAL);
6116 6134          }
6117 6135  
6118 6136          status = do_rfs4_op_lookup(name, req, cs);
6119 6137  
6120 6138          if (name != nm)
6121 6139                  kmem_free(name, MAXPATHLEN + 1);
6122 6140  
6123 6141          kmem_free(nm, len);
6124 6142  
6125 6143          return (status);
6126 6144  }
6127 6145  
6128 6146  static nfsstat4
6129 6147  rfs4_lookupfile(component4 *component, struct svc_req *req,
6130 6148      struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6131 6149  {
6132 6150          nfsstat4 status;
6133 6151          vnode_t *dvp = cs->vp;
6134 6152          vattr_t bva, ava, fva;
6135 6153          int error;
6136 6154  
6137 6155          /* Get "before" change value */
6138 6156          bva.va_mask = AT_CTIME|AT_SEQ;
6139 6157          error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6140 6158          if (error)
6141 6159                  return (puterrno4(error));
6142 6160  
6143 6161          /* rfs4_lookup may VN_RELE directory */
6144 6162          VN_HOLD(dvp);
6145 6163  
6146 6164          status = rfs4_lookup(component, req, cs);
6147 6165          if (status != NFS4_OK) {
6148 6166                  VN_RELE(dvp);
6149 6167                  return (status);
6150 6168          }
6151 6169  
6152 6170          /*
6153 6171           * Get "after" change value, if it fails, simply return the
6154 6172           * before value.
6155 6173           */
6156 6174          ava.va_mask = AT_CTIME|AT_SEQ;
6157 6175          if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6158 6176                  ava.va_ctime = bva.va_ctime;
6159 6177                  ava.va_seq = 0;
6160 6178          }
6161 6179          VN_RELE(dvp);
6162 6180  
6163 6181          /*
6164 6182           * Validate the file is a file
6165 6183           */
6166 6184          fva.va_mask = AT_TYPE|AT_MODE;
6167 6185          error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6168 6186          if (error)
6169 6187                  return (puterrno4(error));
6170 6188  
6171 6189          if (fva.va_type != VREG) {
6172 6190                  if (fva.va_type == VDIR)
6173 6191                          return (NFS4ERR_ISDIR);
6174 6192                  if (fva.va_type == VLNK)
6175 6193                          return (NFS4ERR_SYMLINK);
6176 6194                  return (NFS4ERR_INVAL);
6177 6195          }
6178 6196  
6179 6197          NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6180 6198          NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6181 6199  
6182 6200          /*
6183 6201           * It is undefined if VOP_LOOKUP will change va_seq, so
6184 6202           * cinfo.atomic = TRUE only if we have
6185 6203           * non-zero va_seq's, and they have not changed.
6186 6204           */
6187 6205          if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6188 6206                  cinfo->atomic = TRUE;
6189 6207          else
6190 6208                  cinfo->atomic = FALSE;
6191 6209  
6192 6210          /* Check for mandatory locking */
6193 6211          cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6194 6212          return (check_open_access(access, cs, req));
6195 6213  }
6196 6214  
6197 6215  static nfsstat4
6198 6216  create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6199 6217      cred_t *cr, vnode_t **vpp, bool_t *created)
6200 6218  {
6201 6219          int error;
6202 6220          nfsstat4 status = NFS4_OK;
6203 6221          vattr_t va;
6204 6222  
6205 6223  tryagain:
6206 6224  
6207 6225          /*
6208 6226           * The file open mode used is VWRITE.  If the client needs
6209 6227           * some other semantic, then it should do the access checking
6210 6228           * itself.  It would have been nice to have the file open mode
6211 6229           * passed as part of the arguments.
6212 6230           */
6213 6231  
6214 6232          *created = TRUE;
6215 6233          error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6216 6234  
6217 6235          if (error) {
6218 6236                  *created = FALSE;
6219 6237  
6220 6238                  /*
6221 6239                   * If we got something other than file already exists
6222 6240                   * then just return this error.  Otherwise, we got
6223 6241                   * EEXIST.  If we were doing a GUARDED create, then
6224 6242                   * just return this error.  Otherwise, we need to
6225 6243                   * make sure that this wasn't a duplicate of an
6226 6244                   * exclusive create request.
6227 6245                   *
6228 6246                   * The assumption is made that a non-exclusive create
6229 6247                   * request will never return EEXIST.
6230 6248                   */
6231 6249  
6232 6250                  if (error != EEXIST || mode == GUARDED4) {
6233 6251                          status = puterrno4(error);
6234 6252                          return (status);
6235 6253                  }
6236 6254                  error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6237 6255                      NULL, NULL, NULL);
6238 6256  
6239 6257                  if (error) {
6240 6258                          /*
6241 6259                           * We couldn't find the file that we thought that
6242 6260                           * we just created.  So, we'll just try creating
6243 6261                           * it again.
6244 6262                           */
6245 6263                          if (error == ENOENT)
6246 6264                                  goto tryagain;
6247 6265  
6248 6266                          status = puterrno4(error);
6249 6267                          return (status);
6250 6268                  }
6251 6269  
6252 6270                  if (mode == UNCHECKED4) {
6253 6271                          /* existing object must be regular file */
6254 6272                          if ((*vpp)->v_type != VREG) {
6255 6273                                  if ((*vpp)->v_type == VDIR)
6256 6274                                          status = NFS4ERR_ISDIR;
6257 6275                                  else if ((*vpp)->v_type == VLNK)
6258 6276                                          status = NFS4ERR_SYMLINK;
6259 6277                                  else
6260 6278                                          status = NFS4ERR_INVAL;
6261 6279                                  VN_RELE(*vpp);
6262 6280                                  return (status);
6263 6281                          }
6264 6282  
6265 6283                          return (NFS4_OK);
6266 6284                  }
6267 6285  
6268 6286                  /* Check for duplicate request */
6269 6287                  va.va_mask = AT_MTIME;
6270 6288                  error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6271 6289                  if (!error) {
6272 6290                          /* We found the file */
6273 6291                          const timestruc_t *mtime = &vap->va_mtime;
6274 6292  
6275 6293                          if (va.va_mtime.tv_sec != mtime->tv_sec ||
6276 6294                              va.va_mtime.tv_nsec != mtime->tv_nsec) {
6277 6295                                  /* but its not our creation */
6278 6296                                  VN_RELE(*vpp);
6279 6297                                  return (NFS4ERR_EXIST);
6280 6298                          }
6281 6299                          *created = TRUE; /* retrans of create == created */
6282 6300                          return (NFS4_OK);
6283 6301                  }
6284 6302                  VN_RELE(*vpp);
6285 6303                  return (NFS4ERR_EXIST);
6286 6304          }
6287 6305  
6288 6306          return (NFS4_OK);
6289 6307  }
6290 6308  
6291 6309  static nfsstat4
6292 6310  check_open_access(uint32_t access, struct compound_state *cs,
6293 6311      struct svc_req *req)
6294 6312  {
6295 6313          int error;
6296 6314          vnode_t *vp;
6297 6315          bool_t readonly;
6298 6316          cred_t *cr = cs->cr;
6299 6317  
6300 6318          /* For now we don't allow mandatory locking as per V2/V3 */
6301 6319          if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6302 6320                  return (NFS4ERR_ACCESS);
6303 6321          }
6304 6322  
6305 6323          vp = cs->vp;
6306 6324          ASSERT(cr != NULL && vp->v_type == VREG);
6307 6325  
6308 6326          /*
6309 6327           * If the file system is exported read only and we are trying
6310 6328           * to open for write, then return NFS4ERR_ROFS
6311 6329           */
6312 6330  
6313 6331          readonly = rdonly4(req, cs);
6314 6332  
6315 6333          if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6316 6334                  return (NFS4ERR_ROFS);
6317 6335  
6318 6336          if (access & OPEN4_SHARE_ACCESS_READ) {
6319 6337                  if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6320 6338                      (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6321 6339                          return (NFS4ERR_ACCESS);
6322 6340                  }
6323 6341          }
6324 6342  
6325 6343          if (access & OPEN4_SHARE_ACCESS_WRITE) {
6326 6344                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6327 6345                  if (error)
6328 6346                          return (NFS4ERR_ACCESS);
6329 6347          }
6330 6348  
6331 6349          return (NFS4_OK);
6332 6350  }
6333 6351  
6334 6352  static nfsstat4
6335 6353  rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6336 6354      change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6337 6355  {
6338 6356          struct nfs4_svgetit_arg sarg;
6339 6357          struct nfs4_ntov_table ntov;
6340 6358  
6341 6359          bool_t ntov_table_init = FALSE;
6342 6360          struct statvfs64 sb;
6343 6361          nfsstat4 status;
6344 6362          vnode_t *vp;
6345 6363          vattr_t bva, ava, iva, cva, *vap;
6346 6364          vnode_t *dvp;
6347 6365          timespec32_t *mtime;
6348 6366          char *nm = NULL;
6349 6367          uint_t buflen;
6350 6368          bool_t created;
6351 6369          bool_t setsize = FALSE;
6352 6370          len_t reqsize;
6353 6371          int error;
6354 6372          bool_t trunc;
6355 6373          caller_context_t ct;
6356 6374          component4 *component;
6357 6375          bslabel_t *clabel;
6358 6376          struct sockaddr *ca;
6359 6377          char *name = NULL;
6360 6378  
6361 6379          sarg.sbp = &sb;
6362 6380          sarg.is_referral = B_FALSE;
6363 6381  
6364 6382          dvp = cs->vp;
6365 6383  
6366 6384          /* Check if the file system is read only */
6367 6385          if (rdonly4(req, cs))
6368 6386                  return (NFS4ERR_ROFS);
6369 6387  
6370 6388          /* check the label of including directory */
6371 6389          if (is_system_labeled()) {
6372 6390                  ASSERT(req->rq_label != NULL);
6373 6391                  clabel = req->rq_label;
6374 6392                  DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6375 6393                      "got client label from request(1)",
6376 6394                      struct svc_req *, req);
6377 6395                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
6378 6396                          if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6379 6397                              cs->exi)) {
6380 6398                                  return (NFS4ERR_ACCESS);
6381 6399                          }
6382 6400                  }
6383 6401          }
6384 6402  
6385 6403          /*
6386 6404           * Get the last component of path name in nm. cs will reference
6387 6405           * the including directory on success.
6388 6406           */
6389 6407          component = &args->open_claim4_u.file;
6390 6408          status = utf8_dir_verify(component);
6391 6409          if (status != NFS4_OK)
6392 6410                  return (status);
6393 6411  
6394 6412          nm = utf8_to_fn(component, &buflen, NULL);
6395 6413  
6396 6414          if (nm == NULL)
6397 6415                  return (NFS4ERR_RESOURCE);
6398 6416  
6399 6417          if (buflen > MAXNAMELEN) {
6400 6418                  kmem_free(nm, buflen);
6401 6419                  return (NFS4ERR_NAMETOOLONG);
6402 6420          }
6403 6421  
6404 6422          bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6405 6423          error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6406 6424          if (error) {
6407 6425                  kmem_free(nm, buflen);
6408 6426                  return (puterrno4(error));
6409 6427          }
6410 6428  
6411 6429          if (bva.va_type != VDIR) {
6412 6430                  kmem_free(nm, buflen);
6413 6431                  return (NFS4ERR_NOTDIR);
6414 6432          }
6415 6433  
6416 6434          NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6417 6435  
6418 6436          switch (args->mode) {
6419 6437          case GUARDED4:
6420 6438                  /*FALLTHROUGH*/
6421 6439          case UNCHECKED4:
6422 6440                  nfs4_ntov_table_init(&ntov);
6423 6441                  ntov_table_init = TRUE;
6424 6442  
6425 6443                  *attrset = 0;
6426 6444                  status = do_rfs4_set_attrs(attrset,
6427 6445                      &args->createhow4_u.createattrs,
6428 6446                      cs, &sarg, &ntov, NFS4ATTR_SETIT);
6429 6447  
6430 6448                  if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6431 6449                      sarg.vap->va_type != VREG) {
6432 6450                          if (sarg.vap->va_type == VDIR)
6433 6451                                  status = NFS4ERR_ISDIR;
6434 6452                          else if (sarg.vap->va_type == VLNK)
6435 6453                                  status = NFS4ERR_SYMLINK;
6436 6454                          else
6437 6455                                  status = NFS4ERR_INVAL;
6438 6456                  }
6439 6457  
6440 6458                  if (status != NFS4_OK) {
6441 6459                          kmem_free(nm, buflen);
6442 6460                          nfs4_ntov_table_free(&ntov, &sarg);
6443 6461                          *attrset = 0;
6444 6462                          return (status);
6445 6463                  }
6446 6464  
6447 6465                  vap = sarg.vap;
6448 6466                  vap->va_type = VREG;
6449 6467                  vap->va_mask |= AT_TYPE;
6450 6468  
6451 6469                  if ((vap->va_mask & AT_MODE) == 0) {
6452 6470                          vap->va_mask |= AT_MODE;
6453 6471                          vap->va_mode = (mode_t)0600;
6454 6472                  }
6455 6473  
6456 6474                  if (vap->va_mask & AT_SIZE) {
6457 6475  
6458 6476                          /* Disallow create with a non-zero size */
6459 6477  
6460 6478                          if ((reqsize = sarg.vap->va_size) != 0) {
6461 6479                                  kmem_free(nm, buflen);
6462 6480                                  nfs4_ntov_table_free(&ntov, &sarg);
6463 6481                                  *attrset = 0;
6464 6482                                  return (NFS4ERR_INVAL);
6465 6483                          }
6466 6484                          setsize = TRUE;
6467 6485                  }
6468 6486                  break;
6469 6487  
6470 6488          case EXCLUSIVE4:
6471 6489                  /* prohibit EXCL create of named attributes */
6472 6490                  if (dvp->v_flag & V_XATTRDIR) {
6473 6491                          kmem_free(nm, buflen);
6474 6492                          *attrset = 0;
6475 6493                          return (NFS4ERR_INVAL);
6476 6494                  }
6477 6495  
6478 6496                  cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6479 6497                  cva.va_type = VREG;
6480 6498                  /*
6481 6499                   * Ensure no time overflows. Assumes underlying
6482 6500                   * filesystem supports at least 32 bits.
6483 6501                   * Truncate nsec to usec resolution to allow valid
6484 6502                   * compares even if the underlying filesystem truncates.
6485 6503                   */
6486 6504                  mtime = (timespec32_t *)&args->createhow4_u.createverf;
6487 6505                  cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6488 6506                  cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6489 6507                  cva.va_mode = (mode_t)0;
6490 6508                  vap = &cva;
6491 6509  
6492 6510                  /*
6493 6511                   * For EXCL create, attrset is set to the server attr
6494 6512                   * used to cache the client's verifier.
6495 6513                   */
6496 6514                  *attrset = FATTR4_TIME_MODIFY_MASK;
6497 6515                  break;
6498 6516          }
6499 6517  
6500 6518          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6501 6519          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6502 6520              MAXPATHLEN  + 1);
6503 6521  
6504 6522          if (name == NULL) {
6505 6523                  kmem_free(nm, buflen);
6506 6524                  return (NFS4ERR_SERVERFAULT);
6507 6525          }
6508 6526  
6509 6527          status = create_vnode(dvp, name, vap, args->mode,
6510 6528              cs->cr, &vp, &created);
6511 6529          if (nm != name)
6512 6530                  kmem_free(name, MAXPATHLEN + 1);
6513 6531          kmem_free(nm, buflen);
6514 6532  
6515 6533          if (status != NFS4_OK) {
6516 6534                  if (ntov_table_init)
6517 6535                          nfs4_ntov_table_free(&ntov, &sarg);
6518 6536                  *attrset = 0;
6519 6537                  return (status);
6520 6538          }
6521 6539  
6522 6540          trunc = (setsize && !created);
6523 6541  
6524 6542          if (args->mode != EXCLUSIVE4) {
6525 6543                  bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6526 6544  
6527 6545                  /*
6528 6546                   * True verification that object was created with correct
6529 6547                   * attrs is impossible.  The attrs could have been changed
6530 6548                   * immediately after object creation.  If attributes did
6531 6549                   * not verify, the only recourse for the server is to
6532 6550                   * destroy the object.  Maybe if some attrs (like gid)
6533 6551                   * are set incorrectly, the object should be destroyed;
6534 6552                   * however, seems bad as a default policy.  Do we really
6535 6553                   * want to destroy an object over one of the times not
6536 6554                   * verifying correctly?  For these reasons, the server
6537 6555                   * currently sets bits in attrset for createattrs
6538 6556                   * that were set; however, no verification is done.
6539 6557                   *
6540 6558                   * vmask_to_nmask accounts for vattr bits set on create
6541 6559                   *      [do_rfs4_set_attrs() only sets resp bits for
6542 6560                   *       non-vattr/vfs bits.]
6543 6561                   * Mask off any bits we set by default so as not to return
6544 6562                   * more attrset bits than were requested in createattrs
6545 6563                   */
6546 6564                  if (created) {
6547 6565                          nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6548 6566                          *attrset &= createmask;
6549 6567                  } else {
6550 6568                          /*
6551 6569                           * We did not create the vnode (we tried but it
6552 6570                           * already existed).  In this case, the only createattr
6553 6571                           * that the spec allows the server to set is size,
6554 6572                           * and even then, it can only be set if it is 0.
6555 6573                           */
6556 6574                          *attrset = 0;
6557 6575                          if (trunc)
6558 6576                                  *attrset = FATTR4_SIZE_MASK;
6559 6577                  }
6560 6578          }
6561 6579          if (ntov_table_init)
6562 6580                  nfs4_ntov_table_free(&ntov, &sarg);
6563 6581  
6564 6582          /*
6565 6583           * Get the initial "after" sequence number, if it fails,
6566 6584           * set to zero, time to before.
6567 6585           */
6568 6586          iva.va_mask = AT_CTIME|AT_SEQ;
6569 6587          if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6570 6588                  iva.va_seq = 0;
6571 6589                  iva.va_ctime = bva.va_ctime;
6572 6590          }
6573 6591  
6574 6592          /*
6575 6593           * create_vnode attempts to create the file exclusive,
6576 6594           * if it already exists the VOP_CREATE will fail and
6577 6595           * may not increase va_seq. It is atomic if
6578 6596           * we haven't changed the directory, but if it has changed
6579 6597           * we don't know what changed it.
6580 6598           */
6581 6599          if (!created) {
6582 6600                  if (bva.va_seq && iva.va_seq &&
6583 6601                      bva.va_seq == iva.va_seq)
6584 6602                          cinfo->atomic = TRUE;
6585 6603                  else
6586 6604                          cinfo->atomic = FALSE;
6587 6605                  NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6588 6606          } else {
6589 6607                  /*
6590 6608                   * The entry was created, we need to sync the
6591 6609                   * directory metadata.
6592 6610                   */
6593 6611                  (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6594 6612  
6595 6613                  /*
6596 6614                   * Get "after" change value, if it fails, simply return the
6597 6615                   * before value.
6598 6616                   */
6599 6617                  ava.va_mask = AT_CTIME|AT_SEQ;
6600 6618                  if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6601 6619                          ava.va_ctime = bva.va_ctime;
6602 6620                          ava.va_seq = 0;
6603 6621                  }
6604 6622  
6605 6623                  NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6606 6624  
6607 6625                  /*
6608 6626                   * The cinfo->atomic = TRUE only if we have
6609 6627                   * non-zero va_seq's, and it has incremented by exactly one
6610 6628                   * during the create_vnode and it didn't
6611 6629                   * change during the VOP_FSYNC.
6612 6630                   */
6613 6631                  if (bva.va_seq && iva.va_seq && ava.va_seq &&
6614 6632                      iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6615 6633                          cinfo->atomic = TRUE;
6616 6634                  else
6617 6635                          cinfo->atomic = FALSE;
6618 6636          }
6619 6637  
6620 6638          /* Check for mandatory locking and that the size gets set. */
6621 6639          cva.va_mask = AT_MODE;
6622 6640          if (setsize)
6623 6641                  cva.va_mask |= AT_SIZE;
6624 6642  
6625 6643          /* Assume the worst */
6626 6644          cs->mandlock = TRUE;
6627 6645  
6628 6646          if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6629 6647                  cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6630 6648  
6631 6649                  /*
6632 6650                   * Truncate the file if necessary; this would be
6633 6651                   * the case for create over an existing file.
6634 6652                   */
6635 6653  
6636 6654                  if (trunc) {
6637 6655                          int in_crit = 0;
6638 6656                          rfs4_file_t *fp;
6639 6657                          nfs4_srv_t *nsrv4;
6640 6658                          bool_t create = FALSE;
6641 6659  
6642 6660                          /*
6643 6661                           * We are writing over an existing file.
6644 6662                           * Check to see if we need to recall a delegation.
6645 6663                           */
6646 6664                          nsrv4 = nfs4_get_srv();
6647 6665                          rfs4_hold_deleg_policy(nsrv4);
6648 6666                          if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6649 6667                                  if (rfs4_check_delegated_byfp(FWRITE, fp,
6650 6668                                      (reqsize == 0), FALSE, FALSE, &clientid)) {
6651 6669                                          rfs4_file_rele(fp);
6652 6670                                          rfs4_rele_deleg_policy(nsrv4);
6653 6671                                          VN_RELE(vp);
6654 6672                                          *attrset = 0;
6655 6673                                          return (NFS4ERR_DELAY);
6656 6674                                  }
6657 6675                                  rfs4_file_rele(fp);
6658 6676                          }
6659 6677                          rfs4_rele_deleg_policy(nsrv4);
6660 6678  
6661 6679                          if (nbl_need_check(vp)) {
6662 6680                                  in_crit = 1;
6663 6681  
6664 6682                                  ASSERT(reqsize == 0);
6665 6683  
6666 6684                                  nbl_start_crit(vp, RW_READER);
6667 6685                                  if (nbl_conflict(vp, NBL_WRITE, 0,
6668 6686                                      cva.va_size, 0, NULL)) {
6669 6687                                          in_crit = 0;
6670 6688                                          nbl_end_crit(vp);
6671 6689                                          VN_RELE(vp);
6672 6690                                          *attrset = 0;
6673 6691                                          return (NFS4ERR_ACCESS);
6674 6692                                  }
6675 6693                          }
6676 6694                          ct.cc_sysid = 0;
6677 6695                          ct.cc_pid = 0;
6678 6696                          ct.cc_caller_id = nfs4_srv_caller_id;
6679 6697                          ct.cc_flags = CC_DONTBLOCK;
6680 6698  
6681 6699                          cva.va_mask = AT_SIZE;
6682 6700                          cva.va_size = reqsize;
6683 6701                          (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6684 6702                          if (in_crit)
6685 6703                                  nbl_end_crit(vp);
6686 6704                  }
6687 6705          }
6688 6706  
6689 6707          error = makefh4(&cs->fh, vp, cs->exi);
6690 6708  
6691 6709          /*
6692 6710           * Force modified data and metadata out to stable storage.
6693 6711           */
6694 6712          (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6695 6713  
6696 6714          if (error) {
6697 6715                  VN_RELE(vp);
6698 6716                  *attrset = 0;
6699 6717                  return (puterrno4(error));
6700 6718          }
6701 6719  
6702 6720          /* if parent dir is attrdir, set namedattr fh flag */
6703 6721          if (dvp->v_flag & V_XATTRDIR)
6704 6722                  set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6705 6723  
6706 6724          if (cs->vp)
6707 6725                  VN_RELE(cs->vp);
6708 6726  
6709 6727          cs->vp = vp;
6710 6728  
6711 6729          /*
6712 6730           * if we did not create the file, we will need to check
6713 6731           * the access bits on the file
6714 6732           */
6715 6733  
6716 6734          if (!created) {
6717 6735                  if (setsize)
6718 6736                          args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6719 6737                  status = check_open_access(args->share_access, cs, req);
6720 6738                  if (status != NFS4_OK)
6721 6739                          *attrset = 0;
6722 6740          }
6723 6741          return (status);
6724 6742  }
6725 6743  
6726 6744  /*ARGSUSED*/
6727 6745  static void
6728 6746  rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6729 6747      rfs4_openowner_t *oo, delegreq_t deleg,
6730 6748      uint32_t access, uint32_t deny,
6731 6749      OPEN4res *resp, int deleg_cur)
6732 6750  {
6733 6751          /* XXX Currently not using req  */
6734 6752          rfs4_state_t *sp;
6735 6753          rfs4_file_t *fp;
6736 6754          bool_t screate = TRUE;
6737 6755          bool_t fcreate = TRUE;
6738 6756          uint32_t open_a, share_a;
6739 6757          uint32_t open_d, share_d;
6740 6758          rfs4_deleg_state_t *dsp;
6741 6759          sysid_t sysid;
6742 6760          nfsstat4 status;
6743 6761          caller_context_t ct;
6744 6762          int fflags = 0;
6745 6763          int recall = 0;
6746 6764          int err;
6747 6765          int first_open;
6748 6766  
6749 6767          /* get the file struct and hold a lock on it during initial open */
6750 6768          fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6751 6769          if (fp == NULL) {
6752 6770                  resp->status = NFS4ERR_RESOURCE;
6753 6771                  DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6754 6772                  return;
6755 6773          }
6756 6774  
6757 6775          sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6758 6776          if (sp == NULL) {
6759 6777                  resp->status = NFS4ERR_RESOURCE;
6760 6778                  DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6761 6779                  /* No need to keep any reference */
6762 6780                  rw_exit(&fp->rf_file_rwlock);
6763 6781                  rfs4_file_rele(fp);
6764 6782                  return;
6765 6783          }
6766 6784  
6767 6785          /* try to get the sysid before continuing */
6768 6786          if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6769 6787                  resp->status = status;
6770 6788                  rfs4_file_rele(fp);
6771 6789                  /* Not a fully formed open; "close" it */
6772 6790                  if (screate == TRUE)
6773 6791                          rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6774 6792                  rfs4_state_rele(sp);
6775 6793                  return;
6776 6794          }
6777 6795  
6778 6796          /* Calculate the fflags for this OPEN. */
6779 6797          if (access & OPEN4_SHARE_ACCESS_READ)
6780 6798                  fflags |= FREAD;
6781 6799          if (access & OPEN4_SHARE_ACCESS_WRITE)
6782 6800                  fflags |= FWRITE;
6783 6801  
6784 6802          rfs4_dbe_lock(sp->rs_dbe);
6785 6803  
6786 6804          /*
6787 6805           * Calculate the new deny and access mode that this open is adding to
6788 6806           * the file for this open owner;
6789 6807           */
6790 6808          open_d = (deny & ~sp->rs_open_deny);
6791 6809          open_a = (access & ~sp->rs_open_access);
6792 6810  
6793 6811          /*
6794 6812           * Calculate the new share access and share deny modes that this open
6795 6813           * is adding to the file for this open owner;
6796 6814           */
6797 6815          share_a = (access & ~sp->rs_share_access);
6798 6816          share_d = (deny & ~sp->rs_share_deny);
6799 6817  
6800 6818          first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6801 6819  
6802 6820          /*
6803 6821           * Check to see the client has already sent an open for this
6804 6822           * open owner on this file with the same share/deny modes.
6805 6823           * If so, we don't need to check for a conflict and we don't
6806 6824           * need to add another shrlock.  If not, then we need to
6807 6825           * check for conflicts in deny and access before checking for
6808 6826           * conflicts in delegation.  We don't want to recall a
6809 6827           * delegation based on an open that will eventually fail based
6810 6828           * on shares modes.
6811 6829           */
6812 6830  
6813 6831          if (share_a || share_d) {
6814 6832                  if ((err = rfs4_share(sp, access, deny)) != 0) {
6815 6833                          rfs4_dbe_unlock(sp->rs_dbe);
6816 6834                          resp->status = err;
6817 6835  
6818 6836                          rfs4_file_rele(fp);
6819 6837                          /* Not a fully formed open; "close" it */
6820 6838                          if (screate == TRUE)
6821 6839                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6822 6840                          rfs4_state_rele(sp);
6823 6841                          return;
6824 6842                  }
6825 6843          }
6826 6844  
6827 6845          rfs4_dbe_lock(fp->rf_dbe);
6828 6846  
6829 6847          /*
6830 6848           * Check to see if this file is delegated and if so, if a
6831 6849           * recall needs to be done.
6832 6850           */
6833 6851          if (rfs4_check_recall(sp, access)) {
6834 6852                  rfs4_dbe_unlock(fp->rf_dbe);
6835 6853                  rfs4_dbe_unlock(sp->rs_dbe);
6836 6854                  rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6837 6855                  delay(NFS4_DELEGATION_CONFLICT_DELAY);
6838 6856                  rfs4_dbe_lock(sp->rs_dbe);
6839 6857  
6840 6858                  /* if state closed while lock was dropped */
6841 6859                  if (sp->rs_closed) {
6842 6860                          if (share_a || share_d)
6843 6861                                  (void) rfs4_unshare(sp);
6844 6862                          rfs4_dbe_unlock(sp->rs_dbe);
6845 6863                          rfs4_file_rele(fp);
6846 6864                          /* Not a fully formed open; "close" it */
6847 6865                          if (screate == TRUE)
6848 6866                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6849 6867                          rfs4_state_rele(sp);
6850 6868                          resp->status = NFS4ERR_OLD_STATEID;
6851 6869                          return;
6852 6870                  }
6853 6871  
6854 6872                  rfs4_dbe_lock(fp->rf_dbe);
6855 6873                  /* Let's see if the delegation was returned */
6856 6874                  if (rfs4_check_recall(sp, access)) {
6857 6875                          rfs4_dbe_unlock(fp->rf_dbe);
6858 6876                          if (share_a || share_d)
6859 6877                                  (void) rfs4_unshare(sp);
6860 6878                          rfs4_dbe_unlock(sp->rs_dbe);
6861 6879                          rfs4_file_rele(fp);
6862 6880                          rfs4_update_lease(sp->rs_owner->ro_client);
6863 6881  
6864 6882                          /* Not a fully formed open; "close" it */
6865 6883                          if (screate == TRUE)
6866 6884                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6867 6885                          rfs4_state_rele(sp);
6868 6886                          resp->status = NFS4ERR_DELAY;
6869 6887                          return;
6870 6888                  }
6871 6889          }
6872 6890          /*
6873 6891           * the share check passed and any delegation conflict has been
6874 6892           * taken care of, now call vop_open.
6875 6893           * if this is the first open then call vop_open with fflags.
6876 6894           * if not, call vn_open_upgrade with just the upgrade flags.
6877 6895           *
6878 6896           * if the file has been opened already, it will have the current
6879 6897           * access mode in the state struct.  if it has no share access, then
6880 6898           * this is a new open.
6881 6899           *
6882 6900           * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6883 6901           * call VOP_OPEN(), just do the open upgrade.
6884 6902           */
6885 6903          if (first_open && !deleg_cur) {
6886 6904                  ct.cc_sysid = sysid;
6887 6905                  ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6888 6906                  ct.cc_caller_id = nfs4_srv_caller_id;
6889 6907                  ct.cc_flags = CC_DONTBLOCK;
6890 6908                  err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6891 6909                  if (err) {
6892 6910                          rfs4_dbe_unlock(fp->rf_dbe);
6893 6911                          if (share_a || share_d)
6894 6912                                  (void) rfs4_unshare(sp);
6895 6913                          rfs4_dbe_unlock(sp->rs_dbe);
6896 6914                          rfs4_file_rele(fp);
6897 6915  
6898 6916                          /* Not a fully formed open; "close" it */
6899 6917                          if (screate == TRUE)
6900 6918                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6901 6919                          rfs4_state_rele(sp);
6902 6920                          /* check if a monitor detected a delegation conflict */
6903 6921                          if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6904 6922                                  resp->status = NFS4ERR_DELAY;
6905 6923                          else
6906 6924                                  resp->status = NFS4ERR_SERVERFAULT;
6907 6925                          return;
6908 6926                  }
6909 6927          } else { /* open upgrade */
6910 6928                  /*
6911 6929                   * calculate the fflags for the new mode that is being added
6912 6930                   * by this upgrade.
6913 6931                   */
6914 6932                  fflags = 0;
6915 6933                  if (open_a & OPEN4_SHARE_ACCESS_READ)
6916 6934                          fflags |= FREAD;
6917 6935                  if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6918 6936                          fflags |= FWRITE;
6919 6937                  vn_open_upgrade(cs->vp, fflags);
6920 6938          }
6921 6939          sp->rs_open_access |= access;
6922 6940          sp->rs_open_deny |= deny;
6923 6941  
6924 6942          if (open_d & OPEN4_SHARE_DENY_READ)
6925 6943                  fp->rf_deny_read++;
6926 6944          if (open_d & OPEN4_SHARE_DENY_WRITE)
6927 6945                  fp->rf_deny_write++;
6928 6946          fp->rf_share_deny |= deny;
6929 6947  
6930 6948          if (open_a & OPEN4_SHARE_ACCESS_READ)
6931 6949                  fp->rf_access_read++;
6932 6950          if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6933 6951                  fp->rf_access_write++;
6934 6952          fp->rf_share_access |= access;
6935 6953  
6936 6954          /*
6937 6955           * Check for delegation here. if the deleg argument is not
6938 6956           * DELEG_ANY, then this is a reclaim from a client and
6939 6957           * we must honor the delegation requested. If necessary we can
6940 6958           * set the recall flag.
6941 6959           */
6942 6960  
6943 6961          dsp = rfs4_grant_delegation(deleg, sp, &recall);
6944 6962  
6945 6963          cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6946 6964  
6947 6965          next_stateid(&sp->rs_stateid);
6948 6966  
6949 6967          resp->stateid = sp->rs_stateid.stateid;
6950 6968  
6951 6969          rfs4_dbe_unlock(fp->rf_dbe);
6952 6970          rfs4_dbe_unlock(sp->rs_dbe);
6953 6971  
6954 6972          if (dsp) {
6955 6973                  rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6956 6974                  rfs4_deleg_state_rele(dsp);
6957 6975          }
6958 6976  
6959 6977          rfs4_file_rele(fp);
6960 6978          rfs4_state_rele(sp);
6961 6979  
6962 6980          resp->status = NFS4_OK;
6963 6981  }
6964 6982  
6965 6983  /*ARGSUSED*/
6966 6984  static void
6967 6985  rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6968 6986      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6969 6987  {
6970 6988          change_info4 *cinfo = &resp->cinfo;
6971 6989          bitmap4 *attrset = &resp->attrset;
6972 6990  
6973 6991          if (args->opentype == OPEN4_NOCREATE)
6974 6992                  resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6975 6993                      req, cs, args->share_access, cinfo);
6976 6994          else {
6977 6995                  /* inhibit delegation grants during exclusive create */
6978 6996  
6979 6997                  if (args->mode == EXCLUSIVE4)
6980 6998                          rfs4_disable_delegation();
6981 6999  
6982 7000                  resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6983 7001                      oo->ro_client->rc_clientid);
6984 7002          }
6985 7003  
6986 7004          if (resp->status == NFS4_OK) {
6987 7005  
6988 7006                  /* cs->vp cs->fh now reference the desired file */
6989 7007  
6990 7008                  rfs4_do_open(cs, req, oo,
6991 7009                      oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6992 7010                      args->share_access, args->share_deny, resp, 0);
6993 7011  
6994 7012                  /*
6995 7013                   * If rfs4_createfile set attrset, we must
6996 7014                   * clear this attrset before the response is copied.
6997 7015                   */
6998 7016                  if (resp->status != NFS4_OK && resp->attrset) {
6999 7017                          resp->attrset = 0;
7000 7018                  }
7001 7019          }
7002 7020          else
7003 7021                  *cs->statusp = resp->status;
7004 7022  
7005 7023          if (args->mode == EXCLUSIVE4)
7006 7024                  rfs4_enable_delegation();
7007 7025  }
7008 7026  
7009 7027  /*ARGSUSED*/
7010 7028  static void
7011 7029  rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7012 7030      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7013 7031  {
7014 7032          change_info4 *cinfo = &resp->cinfo;
7015 7033          vattr_t va;
7016 7034          vtype_t v_type = cs->vp->v_type;
7017 7035          int error = 0;
7018 7036  
7019 7037          /* Verify that we have a regular file */
7020 7038          if (v_type != VREG) {
7021 7039                  if (v_type == VDIR)
7022 7040                          resp->status = NFS4ERR_ISDIR;
7023 7041                  else if (v_type == VLNK)
7024 7042                          resp->status = NFS4ERR_SYMLINK;
7025 7043                  else
7026 7044                          resp->status = NFS4ERR_INVAL;
7027 7045                  return;
7028 7046          }
7029 7047  
7030 7048          va.va_mask = AT_MODE|AT_UID;
7031 7049          error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7032 7050          if (error) {
7033 7051                  resp->status = puterrno4(error);
7034 7052                  return;
7035 7053          }
7036 7054  
7037 7055          cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7038 7056  
7039 7057          /*
7040 7058           * Check if we have access to the file, Note the the file
7041 7059           * could have originally been open UNCHECKED or GUARDED
7042 7060           * with mode bits that will now fail, but there is nothing
7043 7061           * we can really do about that except in the case that the
7044 7062           * owner of the file is the one requesting the open.
7045 7063           */
7046 7064          if (crgetuid(cs->cr) != va.va_uid) {
7047 7065                  resp->status = check_open_access(args->share_access, cs, req);
7048 7066                  if (resp->status != NFS4_OK) {
7049 7067                          return;
7050 7068                  }
7051 7069          }
7052 7070  
7053 7071          /*
7054 7072           * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7055 7073           */
7056 7074          cinfo->before = 0;
7057 7075          cinfo->after = 0;
7058 7076          cinfo->atomic = FALSE;
7059 7077  
7060 7078          rfs4_do_open(cs, req, oo,
7061 7079              NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7062 7080              args->share_access, args->share_deny, resp, 0);
7063 7081  }
7064 7082  
7065 7083  static void
7066 7084  rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7067 7085      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7068 7086  {
7069 7087          int error;
7070 7088          nfsstat4 status;
7071 7089          stateid4 stateid =
7072 7090              args->open_claim4_u.delegate_cur_info.delegate_stateid;
7073 7091          rfs4_deleg_state_t *dsp;
7074 7092  
7075 7093          /*
7076 7094           * Find the state info from the stateid and confirm that the
7077 7095           * file is delegated.  If the state openowner is the same as
7078 7096           * the supplied openowner we're done. If not, get the file
7079 7097           * info from the found state info. Use that file info to
7080 7098           * create the state for this lock owner. Note solaris doen't
7081 7099           * really need the pathname to find the file. We may want to
7082 7100           * lookup the pathname and make sure that the vp exist and
7083 7101           * matches the vp in the file structure. However it is
7084 7102           * possible that the pathname nolonger exists (local process
7085 7103           * unlinks the file), so this may not be that useful.
7086 7104           */
7087 7105  
7088 7106          status = rfs4_get_deleg_state(&stateid, &dsp);
7089 7107          if (status != NFS4_OK) {
7090 7108                  resp->status = status;
7091 7109                  return;
7092 7110          }
7093 7111  
7094 7112          ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7095 7113  
7096 7114          /*
7097 7115           * New lock owner, create state. Since this was probably called
7098 7116           * in response to a CB_RECALL we set deleg to DELEG_NONE
7099 7117           */
7100 7118  
7101 7119          ASSERT(cs->vp != NULL);
7102 7120          VN_RELE(cs->vp);
7103 7121          VN_HOLD(dsp->rds_finfo->rf_vp);
7104 7122          cs->vp = dsp->rds_finfo->rf_vp;
7105 7123  
7106 7124          if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7107 7125                  rfs4_deleg_state_rele(dsp);
7108 7126                  *cs->statusp = resp->status = puterrno4(error);
7109 7127                  return;
7110 7128          }
7111 7129  
7112 7130          /* Mark progress for delegation returns */
7113 7131          dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7114 7132          rfs4_deleg_state_rele(dsp);
7115 7133          rfs4_do_open(cs, req, oo, DELEG_NONE,
7116 7134              args->share_access, args->share_deny, resp, 1);
7117 7135  }
7118 7136  
7119 7137  /*ARGSUSED*/
7120 7138  static void
7121 7139  rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7122 7140      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7123 7141  {
7124 7142          /*
7125 7143           * Lookup the pathname, it must already exist since this file
7126 7144           * was delegated.
7127 7145           *
7128 7146           * Find the file and state info for this vp and open owner pair.
7129 7147           *      check that they are in fact delegated.
7130 7148           *      check that the state access and deny modes are the same.
7131 7149           *
7132 7150           * Return the delgation possibly seting the recall flag.
7133 7151           */
7134 7152          rfs4_file_t *fp;
7135 7153          rfs4_state_t *sp;
7136 7154          bool_t create = FALSE;
7137 7155          bool_t dcreate = FALSE;
7138 7156          rfs4_deleg_state_t *dsp;
7139 7157          nfsace4 *ace;
7140 7158  
7141 7159          /* Note we ignore oflags */
7142 7160          resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7143 7161              req, cs, args->share_access, &resp->cinfo);
7144 7162  
7145 7163          if (resp->status != NFS4_OK) {
7146 7164                  return;
7147 7165          }
7148 7166  
7149 7167          /* get the file struct and hold a lock on it during initial open */
7150 7168          fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7151 7169          if (fp == NULL) {
7152 7170                  resp->status = NFS4ERR_RESOURCE;
7153 7171                  DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7154 7172                  return;
7155 7173          }
7156 7174  
7157 7175          sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7158 7176          if (sp == NULL) {
7159 7177                  resp->status = NFS4ERR_SERVERFAULT;
7160 7178                  DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7161 7179                  rw_exit(&fp->rf_file_rwlock);
7162 7180                  rfs4_file_rele(fp);
7163 7181                  return;
7164 7182          }
7165 7183  
7166 7184          rfs4_dbe_lock(sp->rs_dbe);
7167 7185          rfs4_dbe_lock(fp->rf_dbe);
7168 7186          if (args->share_access != sp->rs_share_access ||
7169 7187              args->share_deny != sp->rs_share_deny ||
7170 7188              sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7171 7189                  NFS4_DEBUG(rfs4_debug,
7172 7190                      (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7173 7191                  rfs4_dbe_unlock(fp->rf_dbe);
7174 7192                  rfs4_dbe_unlock(sp->rs_dbe);
7175 7193                  rfs4_file_rele(fp);
7176 7194                  rfs4_state_rele(sp);
7177 7195                  resp->status = NFS4ERR_SERVERFAULT;
7178 7196                  return;
7179 7197          }
7180 7198          rfs4_dbe_unlock(fp->rf_dbe);
7181 7199          rfs4_dbe_unlock(sp->rs_dbe);
7182 7200  
7183 7201          dsp = rfs4_finddeleg(sp, &dcreate);
7184 7202          if (dsp == NULL) {
7185 7203                  rfs4_state_rele(sp);
7186 7204                  rfs4_file_rele(fp);
7187 7205                  resp->status = NFS4ERR_SERVERFAULT;
7188 7206                  return;
7189 7207          }
7190 7208  
7191 7209          next_stateid(&sp->rs_stateid);
7192 7210  
7193 7211          resp->stateid = sp->rs_stateid.stateid;
7194 7212  
7195 7213          resp->delegation.delegation_type = dsp->rds_dtype;
7196 7214  
7197 7215          if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7198 7216                  open_read_delegation4 *rv =
7199 7217                      &resp->delegation.open_delegation4_u.read;
7200 7218  
7201 7219                  rv->stateid = dsp->rds_delegid.stateid;
7202 7220                  rv->recall = FALSE; /* no policy in place to set to TRUE */
7203 7221                  ace = &rv->permissions;
7204 7222          } else {
7205 7223                  open_write_delegation4 *rv =
7206 7224                      &resp->delegation.open_delegation4_u.write;
7207 7225  
7208 7226                  rv->stateid = dsp->rds_delegid.stateid;
7209 7227                  rv->recall = FALSE;  /* no policy in place to set to TRUE */
7210 7228                  ace = &rv->permissions;
7211 7229                  rv->space_limit.limitby = NFS_LIMIT_SIZE;
7212 7230                  rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7213 7231          }
7214 7232  
7215 7233          /* XXX For now */
7216 7234          ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7217 7235          ace->flag = 0;
7218 7236          ace->access_mask = 0;
7219 7237          ace->who.utf8string_len = 0;
7220 7238          ace->who.utf8string_val = 0;
7221 7239  
7222 7240          rfs4_deleg_state_rele(dsp);
7223 7241          rfs4_state_rele(sp);
7224 7242          rfs4_file_rele(fp);
7225 7243  }
7226 7244  
7227 7245  typedef enum {
7228 7246          NFS4_CHKSEQ_OKAY = 0,
7229 7247          NFS4_CHKSEQ_REPLAY = 1,
7230 7248          NFS4_CHKSEQ_BAD = 2
7231 7249  } rfs4_chkseq_t;
7232 7250  
7233 7251  /*
7234 7252   * Generic function for sequence number checks.
7235 7253   */
7236 7254  static rfs4_chkseq_t
7237 7255  rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7238 7256      seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7239 7257  {
7240 7258          /* Same sequence ids and matching operations? */
7241 7259          if (seqid == rqst_seq && resop->resop == lastop->resop) {
7242 7260                  if (copyres == TRUE) {
7243 7261                          rfs4_free_reply(resop);
7244 7262                          rfs4_copy_reply(resop, lastop);
7245 7263                  }
7246 7264                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7247 7265                      "Replayed SEQID %d\n", seqid));
7248 7266                  return (NFS4_CHKSEQ_REPLAY);
7249 7267          }
7250 7268  
7251 7269          /* If the incoming sequence is not the next expected then it is bad */
7252 7270          if (rqst_seq != seqid + 1) {
7253 7271                  if (rqst_seq == seqid) {
7254 7272                          NFS4_DEBUG(rfs4_debug,
7255 7273                              (CE_NOTE, "BAD SEQID: Replayed sequence id "
7256 7274                              "but last op was %d current op is %d\n",
7257 7275                              lastop->resop, resop->resop));
7258 7276                          return (NFS4_CHKSEQ_BAD);
7259 7277                  }
7260 7278                  NFS4_DEBUG(rfs4_debug,
7261 7279                      (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7262 7280                      rqst_seq, seqid));
7263 7281                  return (NFS4_CHKSEQ_BAD);
7264 7282          }
7265 7283  
7266 7284          /* Everything okay -- next expected */
7267 7285          return (NFS4_CHKSEQ_OKAY);
7268 7286  }
7269 7287  
7270 7288  
7271 7289  static rfs4_chkseq_t
7272 7290  rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7273 7291  {
7274 7292          rfs4_chkseq_t rc;
7275 7293  
7276 7294          rfs4_dbe_lock(op->ro_dbe);
7277 7295          rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7278 7296              TRUE);
7279 7297          rfs4_dbe_unlock(op->ro_dbe);
7280 7298  
7281 7299          if (rc == NFS4_CHKSEQ_OKAY)
7282 7300                  rfs4_update_lease(op->ro_client);
7283 7301  
7284 7302          return (rc);
7285 7303  }
7286 7304  
7287 7305  static rfs4_chkseq_t
7288 7306  rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7289 7307  {
7290 7308          rfs4_chkseq_t rc;
7291 7309  
7292 7310          rfs4_dbe_lock(op->ro_dbe);
7293 7311          rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7294 7312              olo_seqid, resop, FALSE);
7295 7313          rfs4_dbe_unlock(op->ro_dbe);
7296 7314  
7297 7315          return (rc);
7298 7316  }
7299 7317  
7300 7318  static rfs4_chkseq_t
7301 7319  rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7302 7320  {
7303 7321          rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7304 7322  
7305 7323          rfs4_dbe_lock(lsp->rls_dbe);
7306 7324          if (!lsp->rls_skip_seqid_check)
7307 7325                  rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7308 7326                      resop, TRUE);
7309 7327          rfs4_dbe_unlock(lsp->rls_dbe);
7310 7328  
7311 7329          return (rc);
7312 7330  }
7313 7331  
7314 7332  static void
7315 7333  rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7316 7334      struct svc_req *req, struct compound_state *cs)
7317 7335  {
7318 7336          OPEN4args *args = &argop->nfs_argop4_u.opopen;
7319 7337          OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7320 7338          open_owner4 *owner = &args->owner;
7321 7339          open_claim_type4 claim = args->claim;
7322 7340          rfs4_client_t *cp;
7323 7341          rfs4_openowner_t *oo;
7324 7342          bool_t create;
7325 7343          bool_t replay = FALSE;
7326 7344          int can_reclaim;
7327 7345  
7328 7346          DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7329 7347              OPEN4args *, args);
7330 7348  
7331 7349          if (cs->vp == NULL) {
7332 7350                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7333 7351                  goto end;
7334 7352          }
7335 7353  
7336 7354          /*
7337 7355           * Need to check clientid and lease expiration first based on
7338 7356           * error ordering and incrementing sequence id.
7339 7357           */
7340 7358          cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7341 7359          if (cp == NULL) {
7342 7360                  *cs->statusp = resp->status =
7343 7361                      rfs4_check_clientid(&owner->clientid, 0);
7344 7362                  goto end;
7345 7363          }
7346 7364  
7347 7365          if (rfs4_lease_expired(cp)) {
7348 7366                  rfs4_client_close(cp);
7349 7367                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7350 7368                  goto end;
7351 7369          }
7352 7370          can_reclaim = cp->rc_can_reclaim;
7353 7371  
7354 7372          /*
7355 7373           * Find the open_owner for use from this point forward.  Take
7356 7374           * care in updating the sequence id based on the type of error
7357 7375           * being returned.
7358 7376           */
7359 7377  retry:
7360 7378          create = TRUE;
7361 7379          oo = rfs4_findopenowner(owner, &create, args->seqid);
7362 7380          if (oo == NULL) {
7363 7381                  *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7364 7382                  rfs4_client_rele(cp);
7365 7383                  goto end;
7366 7384          }
7367 7385  
7368 7386          /* Hold off access to the sequence space while the open is done */
7369 7387          rfs4_sw_enter(&oo->ro_sw);
7370 7388  
7371 7389          /*
7372 7390           * If the open_owner existed before at the server, then check
7373 7391           * the sequence id.
7374 7392           */
7375 7393          if (!create && !oo->ro_postpone_confirm) {
7376 7394                  switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7377 7395                  case NFS4_CHKSEQ_BAD:
7378 7396                          if ((args->seqid > oo->ro_open_seqid) &&
7379 7397                              oo->ro_need_confirm) {
7380 7398                                  rfs4_free_opens(oo, TRUE, FALSE);
7381 7399                                  rfs4_sw_exit(&oo->ro_sw);
7382 7400                                  rfs4_openowner_rele(oo);
7383 7401                                  goto retry;
7384 7402                          }
7385 7403                          resp->status = NFS4ERR_BAD_SEQID;
7386 7404                          goto out;
7387 7405                  case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7388 7406                          replay = TRUE;
7389 7407                          goto out;
7390 7408                  default:
7391 7409                          break;
7392 7410                  }
7393 7411  
7394 7412                  /*
7395 7413                   * Sequence was ok and open owner exists
7396 7414                   * check to see if we have yet to see an
7397 7415                   * open_confirm.
7398 7416                   */
7399 7417                  if (oo->ro_need_confirm) {
7400 7418                          rfs4_free_opens(oo, TRUE, FALSE);
7401 7419                          rfs4_sw_exit(&oo->ro_sw);
7402 7420                          rfs4_openowner_rele(oo);
7403 7421                          goto retry;
7404 7422                  }
7405 7423          }
7406 7424          /* Grace only applies to regular-type OPENs */
7407 7425          if (rfs4_clnt_in_grace(cp) &&
7408 7426              (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7409 7427                  *cs->statusp = resp->status = NFS4ERR_GRACE;
7410 7428                  goto out;
7411 7429          }
7412 7430  
7413 7431          /*
7414 7432           * If previous state at the server existed then can_reclaim
7415 7433           * will be set. If not reply NFS4ERR_NO_GRACE to the
7416 7434           * client.
7417 7435           */
7418 7436          if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7419 7437                  *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7420 7438                  goto out;
7421 7439          }
7422 7440  
7423 7441  
7424 7442          /*
7425 7443           * Reject the open if the client has missed the grace period
7426 7444           */
7427 7445          if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7428 7446                  *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7429 7447                  goto out;
7430 7448          }
7431 7449  
7432 7450          /* Couple of up-front bookkeeping items */
7433 7451          if (oo->ro_need_confirm) {
7434 7452                  /*
7435 7453                   * If this is a reclaim OPEN then we should not ask
7436 7454                   * for a confirmation of the open_owner per the
7437 7455                   * protocol specification.
7438 7456                   */
7439 7457                  if (claim == CLAIM_PREVIOUS)
7440 7458                          oo->ro_need_confirm = FALSE;
7441 7459                  else
7442 7460                          resp->rflags |= OPEN4_RESULT_CONFIRM;
7443 7461          }
7444 7462          resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7445 7463  
7446 7464          /*
7447 7465           * If there is an unshared filesystem mounted on this vnode,
7448 7466           * do not allow to open/create in this directory.
7449 7467           */
7450 7468          if (vn_ismntpt(cs->vp)) {
7451 7469                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
7452 7470                  goto out;
7453 7471          }
7454 7472  
7455 7473          /*
7456 7474           * access must READ, WRITE, or BOTH.  No access is invalid.
7457 7475           * deny can be READ, WRITE, BOTH, or NONE.
7458 7476           * bits not defined for access/deny are invalid.
7459 7477           */
7460 7478          if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7461 7479              (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7462 7480              (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7463 7481                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7464 7482                  goto out;
7465 7483          }
7466 7484  
7467 7485  
7468 7486          /*
7469 7487           * make sure attrset is zero before response is built.
7470 7488           */
7471 7489          resp->attrset = 0;
7472 7490  
7473 7491          switch (claim) {
7474 7492          case CLAIM_NULL:
7475 7493                  rfs4_do_opennull(cs, req, args, oo, resp);
7476 7494                  break;
7477 7495          case CLAIM_PREVIOUS:
7478 7496                  rfs4_do_openprev(cs, req, args, oo, resp);
7479 7497                  break;
7480 7498          case CLAIM_DELEGATE_CUR:
7481 7499                  rfs4_do_opendelcur(cs, req, args, oo, resp);
7482 7500                  break;
7483 7501          case CLAIM_DELEGATE_PREV:
7484 7502                  rfs4_do_opendelprev(cs, req, args, oo, resp);
7485 7503                  break;
7486 7504          default:
7487 7505                  resp->status = NFS4ERR_INVAL;
7488 7506                  break;
7489 7507          }
7490 7508  
7491 7509  out:
7492 7510          rfs4_client_rele(cp);
7493 7511  
7494 7512          /* Catch sequence id handling here to make it a little easier */
7495 7513          switch (resp->status) {
7496 7514          case NFS4ERR_BADXDR:
7497 7515          case NFS4ERR_BAD_SEQID:
7498 7516          case NFS4ERR_BAD_STATEID:
7499 7517          case NFS4ERR_NOFILEHANDLE:
7500 7518          case NFS4ERR_RESOURCE:
7501 7519          case NFS4ERR_STALE_CLIENTID:
7502 7520          case NFS4ERR_STALE_STATEID:
7503 7521                  /*
7504 7522                   * The protocol states that if any of these errors are
7505 7523                   * being returned, the sequence id should not be
7506 7524                   * incremented.  Any other return requires an
7507 7525                   * increment.
7508 7526                   */
7509 7527                  break;
7510 7528          default:
7511 7529                  /* Always update the lease in this case */
7512 7530                  rfs4_update_lease(oo->ro_client);
7513 7531  
7514 7532                  /* Regular response - copy the result */
7515 7533                  if (!replay)
7516 7534                          rfs4_update_open_resp(oo, resop, &cs->fh);
7517 7535  
7518 7536                  /*
7519 7537                   * REPLAY case: Only if the previous response was OK
7520 7538                   * do we copy the filehandle.  If not OK, no
7521 7539                   * filehandle to copy.
7522 7540                   */
7523 7541                  if (replay == TRUE &&
7524 7542                      resp->status == NFS4_OK &&
7525 7543                      oo->ro_reply_fh.nfs_fh4_val) {
7526 7544                          /*
7527 7545                           * If this is a replay, we must restore the
7528 7546                           * current filehandle/vp to that of what was
7529 7547                           * returned originally.  Try our best to do
7530 7548                           * it.
7531 7549                           */
7532 7550                          nfs_fh4_fmt_t *fh_fmtp =
7533 7551                              (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7534 7552  
7535 7553                          cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7536 7554                              (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7537 7555  
7538 7556                          if (cs->exi == NULL) {
7539 7557                                  resp->status = NFS4ERR_STALE;
7540 7558                                  goto finish;
7541 7559                          }
7542 7560  
7543 7561                          VN_RELE(cs->vp);
7544 7562  
7545 7563                          cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7546 7564                              &resp->status);
7547 7565  
7548 7566                          if (cs->vp == NULL)
7549 7567                                  goto finish;
7550 7568  
7551 7569                          nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7552 7570                  }
7553 7571  
7554 7572                  /*
7555 7573                   * If this was a replay, no need to update the
7556 7574                   * sequence id. If the open_owner was not created on
7557 7575                   * this pass, then update.  The first use of an
7558 7576                   * open_owner will not bump the sequence id.
7559 7577                   */
7560 7578                  if (replay == FALSE && !create)
7561 7579                          rfs4_update_open_sequence(oo);
7562 7580                  /*
7563 7581                   * If the client is receiving an error and the
7564 7582                   * open_owner needs to be confirmed, there is no way
7565 7583                   * to notify the client of this fact ignoring the fact
7566 7584                   * that the server has no method of returning a
7567 7585                   * stateid to confirm.  Therefore, the server needs to
7568 7586                   * mark this open_owner in a way as to avoid the
7569 7587                   * sequence id checking the next time the client uses
7570 7588                   * this open_owner.
7571 7589                   */
7572 7590                  if (resp->status != NFS4_OK && oo->ro_need_confirm)
7573 7591                          oo->ro_postpone_confirm = TRUE;
7574 7592                  /*
7575 7593                   * If OK response then clear the postpone flag and
7576 7594                   * reset the sequence id to keep in sync with the
7577 7595                   * client.
7578 7596                   */
7579 7597                  if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7580 7598                          oo->ro_postpone_confirm = FALSE;
7581 7599                          oo->ro_open_seqid = args->seqid;
7582 7600                  }
7583 7601                  break;
7584 7602          }
7585 7603  
7586 7604  finish:
7587 7605          *cs->statusp = resp->status;
7588 7606  
7589 7607          rfs4_sw_exit(&oo->ro_sw);
7590 7608          rfs4_openowner_rele(oo);
7591 7609  
7592 7610  end:
7593 7611          DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7594 7612              OPEN4res *, resp);
7595 7613  }
7596 7614  
7597 7615  /*ARGSUSED*/
7598 7616  void
7599 7617  rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7600 7618      struct svc_req *req, struct compound_state *cs)
7601 7619  {
7602 7620          OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7603 7621          OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7604 7622          rfs4_state_t *sp;
7605 7623          nfsstat4 status;
7606 7624  
7607 7625          DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7608 7626              OPEN_CONFIRM4args *, args);
7609 7627  
7610 7628          if (cs->vp == NULL) {
7611 7629                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7612 7630                  goto out;
7613 7631          }
7614 7632  
7615 7633          if (cs->vp->v_type != VREG) {
7616 7634                  *cs->statusp = resp->status =
7617 7635                      cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7618 7636                  return;
7619 7637          }
7620 7638  
7621 7639          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7622 7640          if (status != NFS4_OK) {
7623 7641                  *cs->statusp = resp->status = status;
7624 7642                  goto out;
7625 7643          }
7626 7644  
7627 7645          /* Ensure specified filehandle matches */
7628 7646          if (cs->vp != sp->rs_finfo->rf_vp) {
7629 7647                  rfs4_state_rele(sp);
7630 7648                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7631 7649                  goto out;
7632 7650          }
7633 7651  
7634 7652          /* hold off other access to open_owner while we tinker */
7635 7653          rfs4_sw_enter(&sp->rs_owner->ro_sw);
7636 7654  
7637 7655          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7638 7656          case NFS4_CHECK_STATEID_OKAY:
7639 7657                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7640 7658                      resop) != 0) {
7641 7659                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7642 7660                          break;
7643 7661                  }
7644 7662                  /*
7645 7663                   * If it is the appropriate stateid and determined to
7646 7664                   * be "OKAY" then this means that the stateid does not
7647 7665                   * need to be confirmed and the client is in error for
7648 7666                   * sending an OPEN_CONFIRM.
7649 7667                   */
7650 7668                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7651 7669                  break;
7652 7670          case NFS4_CHECK_STATEID_OLD:
7653 7671                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7654 7672                  break;
7655 7673          case NFS4_CHECK_STATEID_BAD:
7656 7674                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7657 7675                  break;
7658 7676          case NFS4_CHECK_STATEID_EXPIRED:
7659 7677                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7660 7678                  break;
7661 7679          case NFS4_CHECK_STATEID_CLOSED:
7662 7680                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7663 7681                  break;
7664 7682          case NFS4_CHECK_STATEID_REPLAY:
7665 7683                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7666 7684                      resop)) {
7667 7685                  case NFS4_CHKSEQ_OKAY:
7668 7686                          /*
7669 7687                           * This is replayed stateid; if seqid matches
7670 7688                           * next expected, then client is using wrong seqid.
7671 7689                           */
7672 7690                          /* fall through */
7673 7691                  case NFS4_CHKSEQ_BAD:
7674 7692                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7675 7693                          break;
7676 7694                  case NFS4_CHKSEQ_REPLAY:
7677 7695                          /*
7678 7696                           * Note this case is the duplicate case so
7679 7697                           * resp->status is already set.
7680 7698                           */
7681 7699                          *cs->statusp = resp->status;
7682 7700                          rfs4_update_lease(sp->rs_owner->ro_client);
7683 7701                          break;
7684 7702                  }
7685 7703                  break;
7686 7704          case NFS4_CHECK_STATEID_UNCONFIRMED:
7687 7705                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7688 7706                      resop) != NFS4_CHKSEQ_OKAY) {
7689 7707                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7690 7708                          break;
7691 7709                  }
7692 7710                  *cs->statusp = resp->status = NFS4_OK;
7693 7711  
7694 7712                  next_stateid(&sp->rs_stateid);
7695 7713                  resp->open_stateid = sp->rs_stateid.stateid;
7696 7714                  sp->rs_owner->ro_need_confirm = FALSE;
7697 7715                  rfs4_update_lease(sp->rs_owner->ro_client);
7698 7716                  rfs4_update_open_sequence(sp->rs_owner);
7699 7717                  rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7700 7718                  break;
7701 7719          default:
7702 7720                  ASSERT(FALSE);
7703 7721                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7704 7722                  break;
7705 7723          }
7706 7724          rfs4_sw_exit(&sp->rs_owner->ro_sw);
7707 7725          rfs4_state_rele(sp);
7708 7726  
7709 7727  out:
7710 7728          DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7711 7729              OPEN_CONFIRM4res *, resp);
7712 7730  }
7713 7731  
7714 7732  /*ARGSUSED*/
7715 7733  void
7716 7734  rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7717 7735      struct svc_req *req, struct compound_state *cs)
7718 7736  {
7719 7737          OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7720 7738          OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7721 7739          uint32_t access = args->share_access;
7722 7740          uint32_t deny = args->share_deny;
7723 7741          nfsstat4 status;
7724 7742          rfs4_state_t *sp;
7725 7743          rfs4_file_t *fp;
7726 7744          int fflags = 0;
7727 7745  
7728 7746          DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7729 7747              OPEN_DOWNGRADE4args *, args);
7730 7748  
7731 7749          if (cs->vp == NULL) {
7732 7750                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7733 7751                  goto out;
7734 7752          }
7735 7753  
7736 7754          if (cs->vp->v_type != VREG) {
7737 7755                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7738 7756                  return;
7739 7757          }
7740 7758  
7741 7759          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7742 7760          if (status != NFS4_OK) {
7743 7761                  *cs->statusp = resp->status = status;
7744 7762                  goto out;
7745 7763          }
7746 7764  
7747 7765          /* Ensure specified filehandle matches */
7748 7766          if (cs->vp != sp->rs_finfo->rf_vp) {
7749 7767                  rfs4_state_rele(sp);
7750 7768                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7751 7769                  goto out;
7752 7770          }
7753 7771  
7754 7772          /* hold off other access to open_owner while we tinker */
7755 7773          rfs4_sw_enter(&sp->rs_owner->ro_sw);
7756 7774  
7757 7775          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7758 7776          case NFS4_CHECK_STATEID_OKAY:
7759 7777                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7760 7778                      resop) != NFS4_CHKSEQ_OKAY) {
7761 7779                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7762 7780                          goto end;
7763 7781                  }
7764 7782                  break;
7765 7783          case NFS4_CHECK_STATEID_OLD:
7766 7784                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7767 7785                  goto end;
7768 7786          case NFS4_CHECK_STATEID_BAD:
7769 7787                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7770 7788                  goto end;
7771 7789          case NFS4_CHECK_STATEID_EXPIRED:
7772 7790                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7773 7791                  goto end;
7774 7792          case NFS4_CHECK_STATEID_CLOSED:
7775 7793                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7776 7794                  goto end;
7777 7795          case NFS4_CHECK_STATEID_UNCONFIRMED:
7778 7796                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7779 7797                  goto end;
7780 7798          case NFS4_CHECK_STATEID_REPLAY:
7781 7799                  /* Check the sequence id for the open owner */
7782 7800                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7783 7801                      resop)) {
7784 7802                  case NFS4_CHKSEQ_OKAY:
7785 7803                          /*
7786 7804                           * This is replayed stateid; if seqid matches
7787 7805                           * next expected, then client is using wrong seqid.
7788 7806                           */
7789 7807                          /* fall through */
7790 7808                  case NFS4_CHKSEQ_BAD:
7791 7809                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7792 7810                          goto end;
7793 7811                  case NFS4_CHKSEQ_REPLAY:
7794 7812                          /*
7795 7813                           * Note this case is the duplicate case so
7796 7814                           * resp->status is already set.
7797 7815                           */
7798 7816                          *cs->statusp = resp->status;
7799 7817                          rfs4_update_lease(sp->rs_owner->ro_client);
7800 7818                          goto end;
7801 7819                  }
7802 7820                  break;
7803 7821          default:
7804 7822                  ASSERT(FALSE);
7805 7823                  break;
7806 7824          }
7807 7825  
7808 7826          rfs4_dbe_lock(sp->rs_dbe);
7809 7827          /*
7810 7828           * Check that the new access modes and deny modes are valid.
7811 7829           * Check that no invalid bits are set.
7812 7830           */
7813 7831          if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7814 7832              (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7815 7833                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7816 7834                  rfs4_update_open_sequence(sp->rs_owner);
7817 7835                  rfs4_dbe_unlock(sp->rs_dbe);
7818 7836                  goto end;
7819 7837          }
7820 7838  
7821 7839          /*
7822 7840           * The new modes must be a subset of the current modes and
7823 7841           * the access must specify at least one mode. To test that
7824 7842           * the new mode is a subset of the current modes we bitwise
7825 7843           * AND them together and check that the result equals the new
7826 7844           * mode. For example:
7827 7845           * New mode, access == R and current mode, sp->rs_open_access  == RW
7828 7846           * access & sp->rs_open_access == R == access, so the new access mode
7829 7847           * is valid. Consider access == RW, sp->rs_open_access = R
7830 7848           * access & sp->rs_open_access == R != access, so the new access mode
7831 7849           * is invalid.
7832 7850           */
7833 7851          if ((access & sp->rs_open_access) != access ||
7834 7852              (deny & sp->rs_open_deny) != deny ||
7835 7853              (access &
7836 7854              (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7837 7855                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7838 7856                  rfs4_update_open_sequence(sp->rs_owner);
7839 7857                  rfs4_dbe_unlock(sp->rs_dbe);
7840 7858                  goto end;
7841 7859          }
7842 7860  
7843 7861          /*
7844 7862           * Release any share locks associated with this stateID.
7845 7863           * Strictly speaking, this violates the spec because the
7846 7864           * spec effectively requires that open downgrade be atomic.
7847 7865           * At present, fs_shrlock does not have this capability.
7848 7866           */
7849 7867          (void) rfs4_unshare(sp);
7850 7868  
7851 7869          status = rfs4_share(sp, access, deny);
7852 7870          if (status != NFS4_OK) {
7853 7871                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7854 7872                  rfs4_update_open_sequence(sp->rs_owner);
7855 7873                  rfs4_dbe_unlock(sp->rs_dbe);
7856 7874                  goto end;
7857 7875          }
7858 7876  
7859 7877          fp = sp->rs_finfo;
7860 7878          rfs4_dbe_lock(fp->rf_dbe);
7861 7879  
7862 7880          /*
7863 7881           * If the current mode has deny read and the new mode
7864 7882           * does not, decrement the number of deny read mode bits
7865 7883           * and if it goes to zero turn off the deny read bit
7866 7884           * on the file.
7867 7885           */
7868 7886          if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7869 7887              (deny & OPEN4_SHARE_DENY_READ) == 0) {
7870 7888                  fp->rf_deny_read--;
7871 7889                  if (fp->rf_deny_read == 0)
7872 7890                          fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7873 7891          }
7874 7892  
7875 7893          /*
7876 7894           * If the current mode has deny write and the new mode
7877 7895           * does not, decrement the number of deny write mode bits
7878 7896           * and if it goes to zero turn off the deny write bit
7879 7897           * on the file.
7880 7898           */
7881 7899          if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7882 7900              (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7883 7901                  fp->rf_deny_write--;
7884 7902                  if (fp->rf_deny_write == 0)
7885 7903                          fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7886 7904          }
7887 7905  
7888 7906          /*
7889 7907           * If the current mode has access read and the new mode
7890 7908           * does not, decrement the number of access read mode bits
7891 7909           * and if it goes to zero turn off the access read bit
7892 7910           * on the file.  set fflags to FREAD for the call to
7893 7911           * vn_open_downgrade().
7894 7912           */
7895 7913          if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7896 7914              (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7897 7915                  fp->rf_access_read--;
7898 7916                  if (fp->rf_access_read == 0)
7899 7917                          fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7900 7918                  fflags |= FREAD;
7901 7919          }
7902 7920  
7903 7921          /*
7904 7922           * If the current mode has access write and the new mode
7905 7923           * does not, decrement the number of access write mode bits
7906 7924           * and if it goes to zero turn off the access write bit
7907 7925           * on the file.  set fflags to FWRITE for the call to
7908 7926           * vn_open_downgrade().
7909 7927           */
7910 7928          if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7911 7929              (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7912 7930                  fp->rf_access_write--;
7913 7931                  if (fp->rf_access_write == 0)
7914 7932                          fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7915 7933                  fflags |= FWRITE;
7916 7934          }
7917 7935  
7918 7936          /* Check that the file is still accessible */
7919 7937          ASSERT(fp->rf_share_access);
7920 7938  
7921 7939          rfs4_dbe_unlock(fp->rf_dbe);
7922 7940  
7923 7941          /* now set the new open access and deny modes */
7924 7942          sp->rs_open_access = access;
7925 7943          sp->rs_open_deny = deny;
7926 7944  
7927 7945          /*
7928 7946           * we successfully downgraded the share lock, now we need to downgrade
7929 7947           * the open. it is possible that the downgrade was only for a deny
7930 7948           * mode and we have nothing else to do.
7931 7949           */
7932 7950          if ((fflags & (FREAD|FWRITE)) != 0)
7933 7951                  vn_open_downgrade(cs->vp, fflags);
7934 7952  
7935 7953          /* Update the stateid */
7936 7954          next_stateid(&sp->rs_stateid);
7937 7955          resp->open_stateid = sp->rs_stateid.stateid;
7938 7956  
7939 7957          rfs4_dbe_unlock(sp->rs_dbe);
7940 7958  
7941 7959          *cs->statusp = resp->status = NFS4_OK;
7942 7960          /* Update the lease */
7943 7961          rfs4_update_lease(sp->rs_owner->ro_client);
7944 7962          /* And the sequence */
7945 7963          rfs4_update_open_sequence(sp->rs_owner);
7946 7964          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7947 7965  
7948 7966  end:
7949 7967          rfs4_sw_exit(&sp->rs_owner->ro_sw);
7950 7968          rfs4_state_rele(sp);
7951 7969  out:
7952 7970          DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7953 7971              OPEN_DOWNGRADE4res *, resp);
7954 7972  }
7955 7973  
7956 7974  static void *
7957 7975  memstr(const void *s1, const char *s2, size_t n)
7958 7976  {
7959 7977          size_t l = strlen(s2);
7960 7978          char *p = (char *)s1;
7961 7979  
7962 7980          while (n >= l) {
7963 7981                  if (bcmp(p, s2, l) == 0)
7964 7982                          return (p);
7965 7983                  p++;
7966 7984                  n--;
7967 7985          }
7968 7986  
7969 7987          return (NULL);
7970 7988  }
7971 7989  
7972 7990  /*
7973 7991   * The logic behind this function is detailed in the NFSv4 RFC in the
7974 7992   * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7975 7993   * that section for explicit guidance to server behavior for
7976 7994   * SETCLIENTID.
7977 7995   */
7978 7996  void
7979 7997  rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7980 7998      struct svc_req *req, struct compound_state *cs)
7981 7999  {
7982 8000          SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7983 8001          SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7984 8002          rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7985 8003          rfs4_clntip_t *ci;
7986 8004          bool_t create;
7987 8005          char *addr, *netid;
7988 8006          int len;
7989 8007  
7990 8008          DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7991 8009              SETCLIENTID4args *, args);
7992 8010  retry:
7993 8011          newcp = cp_confirmed = cp_unconfirmed = NULL;
7994 8012  
7995 8013          /*
7996 8014           * Save the caller's IP address
7997 8015           */
7998 8016          args->client.cl_addr =
7999 8017              (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8000 8018  
8001 8019          /*
8002 8020           * Record if it is a Solaris client that cannot handle referrals.
8003 8021           */
8004 8022          if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8005 8023              !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8006 8024                  /* Add a "yes, it's downrev" record */
8007 8025                  create = TRUE;
8008 8026                  ci = rfs4_find_clntip(args->client.cl_addr, &create);
8009 8027                  ASSERT(ci != NULL);
8010 8028                  rfs4_dbe_rele(ci->ri_dbe);
8011 8029          } else {
8012 8030                  /* Remove any previous record */
8013 8031                  rfs4_invalidate_clntip(args->client.cl_addr);
8014 8032          }
8015 8033  
8016 8034          /*
8017 8035           * In search of an EXISTING client matching the incoming
8018 8036           * request to establish a new client identifier at the server
8019 8037           */
8020 8038          create = TRUE;
8021 8039          cp = rfs4_findclient(&args->client, &create, NULL);
8022 8040  
8023 8041          /* Should never happen */
8024 8042          ASSERT(cp != NULL);
8025 8043  
8026 8044          if (cp == NULL) {
8027 8045                  *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8028 8046                  goto out;
8029 8047          }
8030 8048  
8031 8049          /*
8032 8050           * Easiest case. Client identifier is newly created and is
8033 8051           * unconfirmed.  Also note that for this case, no other
8034 8052           * entries exist for the client identifier.  Nothing else to
8035 8053           * check.  Just setup the response and respond.
8036 8054           */
8037 8055          if (create) {
8038 8056                  *cs->statusp = res->status = NFS4_OK;
8039 8057                  res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8040 8058                  res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8041 8059                      cp->rc_confirm_verf;
8042 8060                  /* Setup callback information; CB_NULL confirmation later */
8043 8061                  rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8044 8062  
8045 8063                  rfs4_client_rele(cp);
8046 8064                  goto out;
8047 8065          }
8048 8066  
8049 8067          /*
8050 8068           * An existing, confirmed client may exist but it may not have
8051 8069           * been active for at least one lease period.  If so, then
8052 8070           * "close" the client and create a new client identifier
8053 8071           */
8054 8072          if (rfs4_lease_expired(cp)) {
8055 8073                  rfs4_client_close(cp);
8056 8074                  goto retry;
8057 8075          }
8058 8076  
8059 8077          if (cp->rc_need_confirm == TRUE)
8060 8078                  cp_unconfirmed = cp;
8061 8079          else
8062 8080                  cp_confirmed = cp;
8063 8081  
8064 8082          cp = NULL;
8065 8083  
8066 8084          /*
8067 8085           * We have a confirmed client, now check for an
8068 8086           * unconfimred entry
8069 8087           */
8070 8088          if (cp_confirmed) {
8071 8089                  /* If creds don't match then client identifier is inuse */
8072 8090                  if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8073 8091                          rfs4_cbinfo_t *cbp;
8074 8092                          /*
8075 8093                           * Some one else has established this client
8076 8094                           * id. Try and say * who they are. We will use
8077 8095                           * the call back address supplied by * the
8078 8096                           * first client.
8079 8097                           */
8080 8098                          *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8081 8099  
8082 8100                          addr = netid = NULL;
8083 8101  
8084 8102                          cbp = &cp_confirmed->rc_cbinfo;
8085 8103                          if (cbp->cb_callback.cb_location.r_addr &&
8086 8104                              cbp->cb_callback.cb_location.r_netid) {
8087 8105                                  cb_client4 *cbcp = &cbp->cb_callback;
8088 8106  
8089 8107                                  len = strlen(cbcp->cb_location.r_addr)+1;
8090 8108                                  addr = kmem_alloc(len, KM_SLEEP);
8091 8109                                  bcopy(cbcp->cb_location.r_addr, addr, len);
8092 8110                                  len = strlen(cbcp->cb_location.r_netid)+1;
8093 8111                                  netid = kmem_alloc(len, KM_SLEEP);
8094 8112                                  bcopy(cbcp->cb_location.r_netid, netid, len);
8095 8113                          }
8096 8114  
8097 8115                          res->SETCLIENTID4res_u.client_using.r_addr = addr;
8098 8116                          res->SETCLIENTID4res_u.client_using.r_netid = netid;
8099 8117  
8100 8118                          rfs4_client_rele(cp_confirmed);
8101 8119                  }
8102 8120  
8103 8121                  /*
8104 8122                   * Confirmed, creds match, and verifier matches; must
8105 8123                   * be an update of the callback info
8106 8124                   */
8107 8125                  if (cp_confirmed->rc_nfs_client.verifier ==
8108 8126                      args->client.verifier) {
8109 8127                          /* Setup callback information */
8110 8128                          rfs4_client_setcb(cp_confirmed, &args->callback,
8111 8129                              args->callback_ident);
8112 8130  
8113 8131                          /* everything okay -- move ahead */
8114 8132                          *cs->statusp = res->status = NFS4_OK;
8115 8133                          res->SETCLIENTID4res_u.resok4.clientid =
8116 8134                              cp_confirmed->rc_clientid;
8117 8135  
8118 8136                          /* update the confirm_verifier and return it */
8119 8137                          rfs4_client_scv_next(cp_confirmed);
8120 8138                          res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8121 8139                              cp_confirmed->rc_confirm_verf;
8122 8140  
8123 8141                          rfs4_client_rele(cp_confirmed);
8124 8142                          goto out;
8125 8143                  }
8126 8144  
8127 8145                  /*
8128 8146                   * Creds match but the verifier doesn't.  Must search
8129 8147                   * for an unconfirmed client that would be replaced by
8130 8148                   * this request.
8131 8149                   */
8132 8150                  create = FALSE;
8133 8151                  cp_unconfirmed = rfs4_findclient(&args->client, &create,
8134 8152                      cp_confirmed);
8135 8153          }
8136 8154  
8137 8155          /*
8138 8156           * At this point, we have taken care of the brand new client
8139 8157           * struct, INUSE case, update of an existing, and confirmed
8140 8158           * client struct.
8141 8159           */
8142 8160  
8143 8161          /*
8144 8162           * check to see if things have changed while we originally
8145 8163           * picked up the client struct.  If they have, then return and
8146 8164           * retry the processing of this SETCLIENTID request.
8147 8165           */
8148 8166          if (cp_unconfirmed) {
8149 8167                  rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8150 8168                  if (!cp_unconfirmed->rc_need_confirm) {
8151 8169                          rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8152 8170                          rfs4_client_rele(cp_unconfirmed);
8153 8171                          if (cp_confirmed)
8154 8172                                  rfs4_client_rele(cp_confirmed);
8155 8173                          goto retry;
8156 8174                  }
8157 8175                  /* do away with the old unconfirmed one */
8158 8176                  rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8159 8177                  rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8160 8178                  rfs4_client_rele(cp_unconfirmed);
8161 8179                  cp_unconfirmed = NULL;
8162 8180          }
8163 8181  
8164 8182          /*
8165 8183           * This search will temporarily hide the confirmed client
8166 8184           * struct while a new client struct is created as the
8167 8185           * unconfirmed one.
8168 8186           */
8169 8187          create = TRUE;
8170 8188          newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8171 8189  
8172 8190          ASSERT(newcp != NULL);
8173 8191  
8174 8192          if (newcp == NULL) {
8175 8193                  *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8176 8194                  rfs4_client_rele(cp_confirmed);
8177 8195                  goto out;
8178 8196          }
8179 8197  
8180 8198          /*
8181 8199           * If one was not created, then a similar request must be in
8182 8200           * process so release and start over with this one
8183 8201           */
8184 8202          if (create != TRUE) {
8185 8203                  rfs4_client_rele(newcp);
8186 8204                  if (cp_confirmed)
8187 8205                          rfs4_client_rele(cp_confirmed);
8188 8206                  goto retry;
8189 8207          }
8190 8208  
8191 8209          *cs->statusp = res->status = NFS4_OK;
8192 8210          res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8193 8211          res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8194 8212              newcp->rc_confirm_verf;
8195 8213          /* Setup callback information; CB_NULL confirmation later */
8196 8214          rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8197 8215  
8198 8216          newcp->rc_cp_confirmed = cp_confirmed;
8199 8217  
8200 8218          rfs4_client_rele(newcp);
8201 8219  
8202 8220  out:
8203 8221          DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8204 8222              SETCLIENTID4res *, res);
8205 8223  }
8206 8224  
8207 8225  /*ARGSUSED*/
8208 8226  void
8209 8227  rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8210 8228      struct svc_req *req, struct compound_state *cs)
8211 8229  {
8212 8230          SETCLIENTID_CONFIRM4args *args =
8213 8231              &argop->nfs_argop4_u.opsetclientid_confirm;
8214 8232          SETCLIENTID_CONFIRM4res *res =
8215 8233              &resop->nfs_resop4_u.opsetclientid_confirm;
8216 8234          rfs4_client_t *cp, *cptoclose = NULL;
8217 8235          nfs4_srv_t *nsrv4;
8218 8236  
8219 8237          DTRACE_NFSV4_2(op__setclientid__confirm__start,
8220 8238              struct compound_state *, cs,
8221 8239              SETCLIENTID_CONFIRM4args *, args);
8222 8240  
8223 8241          nsrv4 = nfs4_get_srv();
8224 8242          *cs->statusp = res->status = NFS4_OK;
8225 8243  
8226 8244          cp = rfs4_findclient_by_id(args->clientid, TRUE);
8227 8245  
8228 8246          if (cp == NULL) {
8229 8247                  *cs->statusp = res->status =
8230 8248                      rfs4_check_clientid(&args->clientid, 1);
8231 8249                  goto out;
8232 8250          }
8233 8251  
8234 8252          if (!creds_ok(cp, req, cs)) {
8235 8253                  *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8236 8254                  rfs4_client_rele(cp);
8237 8255                  goto out;
8238 8256          }
8239 8257  
8240 8258          /* If the verifier doesn't match, the record doesn't match */
8241 8259          if (cp->rc_confirm_verf != args->setclientid_confirm) {
8242 8260                  *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8243 8261                  rfs4_client_rele(cp);
8244 8262                  goto out;
8245 8263          }
8246 8264  
8247 8265          rfs4_dbe_lock(cp->rc_dbe);
8248 8266          cp->rc_need_confirm = FALSE;
8249 8267          if (cp->rc_cp_confirmed) {
8250 8268                  cptoclose = cp->rc_cp_confirmed;
8251 8269                  cptoclose->rc_ss_remove = 1;
8252 8270                  cp->rc_cp_confirmed = NULL;
8253 8271          }
8254 8272  
8255 8273          /*
8256 8274           * Update the client's associated server instance, if it's changed
8257 8275           * since the client was created.
8258 8276           */
8259 8277          if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8260 8278                  rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8261 8279  
8262 8280          /*
8263 8281           * Record clientid in stable storage.
8264 8282           * Must be done after server instance has been assigned.
8265 8283           */
8266 8284          rfs4_ss_clid(nsrv4, cp);
8267 8285  
8268 8286          rfs4_dbe_unlock(cp->rc_dbe);
8269 8287  
8270 8288          if (cptoclose)
8271 8289                  /* don't need to rele, client_close does it */
8272 8290                  rfs4_client_close(cptoclose);
8273 8291  
8274 8292          /* If needed, initiate CB_NULL call for callback path */
8275 8293          rfs4_deleg_cb_check(cp);
8276 8294          rfs4_update_lease(cp);
8277 8295  
8278 8296          /*
8279 8297           * Check to see if client can perform reclaims
8280 8298           */
8281 8299          rfs4_ss_chkclid(nsrv4, cp);
8282 8300  
8283 8301          rfs4_client_rele(cp);
8284 8302  
8285 8303  out:
8286 8304          DTRACE_NFSV4_2(op__setclientid__confirm__done,
8287 8305              struct compound_state *, cs,
8288 8306              SETCLIENTID_CONFIRM4 *, res);
8289 8307  }
8290 8308  
8291 8309  
8292 8310  /*ARGSUSED*/
8293 8311  void
8294 8312  rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8295 8313      struct svc_req *req, struct compound_state *cs)
8296 8314  {
8297 8315          CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8298 8316          CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8299 8317          rfs4_state_t *sp;
8300 8318          nfsstat4 status;
8301 8319  
8302 8320          DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8303 8321              CLOSE4args *, args);
8304 8322  
8305 8323          if (cs->vp == NULL) {
8306 8324                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8307 8325                  goto out;
8308 8326          }
8309 8327  
8310 8328          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8311 8329          if (status != NFS4_OK) {
8312 8330                  *cs->statusp = resp->status = status;
8313 8331                  goto out;
8314 8332          }
8315 8333  
8316 8334          /* Ensure specified filehandle matches */
8317 8335          if (cs->vp != sp->rs_finfo->rf_vp) {
8318 8336                  rfs4_state_rele(sp);
8319 8337                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8320 8338                  goto out;
8321 8339          }
8322 8340  
8323 8341          /* hold off other access to open_owner while we tinker */
8324 8342          rfs4_sw_enter(&sp->rs_owner->ro_sw);
8325 8343  
8326 8344          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8327 8345          case NFS4_CHECK_STATEID_OKAY:
8328 8346                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8329 8347                      resop) != NFS4_CHKSEQ_OKAY) {
8330 8348                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8331 8349                          goto end;
8332 8350                  }
8333 8351                  break;
8334 8352          case NFS4_CHECK_STATEID_OLD:
8335 8353                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8336 8354                  goto end;
8337 8355          case NFS4_CHECK_STATEID_BAD:
8338 8356                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8339 8357                  goto end;
8340 8358          case NFS4_CHECK_STATEID_EXPIRED:
8341 8359                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8342 8360                  goto end;
8343 8361          case NFS4_CHECK_STATEID_CLOSED:
8344 8362                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8345 8363                  goto end;
8346 8364          case NFS4_CHECK_STATEID_UNCONFIRMED:
8347 8365                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8348 8366                  goto end;
8349 8367          case NFS4_CHECK_STATEID_REPLAY:
8350 8368                  /* Check the sequence id for the open owner */
8351 8369                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8352 8370                      resop)) {
8353 8371                  case NFS4_CHKSEQ_OKAY:
8354 8372                          /*
8355 8373                           * This is replayed stateid; if seqid matches
8356 8374                           * next expected, then client is using wrong seqid.
8357 8375                           */
8358 8376                          /* FALL THROUGH */
8359 8377                  case NFS4_CHKSEQ_BAD:
8360 8378                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8361 8379                          goto end;
8362 8380                  case NFS4_CHKSEQ_REPLAY:
8363 8381                          /*
8364 8382                           * Note this case is the duplicate case so
8365 8383                           * resp->status is already set.
8366 8384                           */
8367 8385                          *cs->statusp = resp->status;
8368 8386                          rfs4_update_lease(sp->rs_owner->ro_client);
8369 8387                          goto end;
8370 8388                  }
8371 8389                  break;
8372 8390          default:
8373 8391                  ASSERT(FALSE);
8374 8392                  break;
8375 8393          }
8376 8394  
8377 8395          rfs4_dbe_lock(sp->rs_dbe);
8378 8396  
8379 8397          /* Update the stateid. */
8380 8398          next_stateid(&sp->rs_stateid);
8381 8399          resp->open_stateid = sp->rs_stateid.stateid;
8382 8400  
8383 8401          rfs4_dbe_unlock(sp->rs_dbe);
8384 8402  
8385 8403          rfs4_update_lease(sp->rs_owner->ro_client);
8386 8404          rfs4_update_open_sequence(sp->rs_owner);
8387 8405          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8388 8406  
8389 8407          rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8390 8408  
8391 8409          *cs->statusp = resp->status = status;
8392 8410  
8393 8411  end:
8394 8412          rfs4_sw_exit(&sp->rs_owner->ro_sw);
8395 8413          rfs4_state_rele(sp);
8396 8414  out:
8397 8415          DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8398 8416              CLOSE4res *, resp);
8399 8417  }
8400 8418  
8401 8419  /*
8402 8420   * Manage the counts on the file struct and close all file locks
8403 8421   */
8404 8422  /*ARGSUSED*/
8405 8423  void
8406 8424  rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8407 8425      bool_t close_of_client)
8408 8426  {
8409 8427          rfs4_file_t *fp = sp->rs_finfo;
8410 8428          rfs4_lo_state_t *lsp;
8411 8429          int fflags = 0;
8412 8430  
8413 8431          /*
8414 8432           * If this call is part of the larger closing down of client
8415 8433           * state then it is just easier to release all locks
8416 8434           * associated with this client instead of going through each
8417 8435           * individual file and cleaning locks there.
8418 8436           */
8419 8437          if (close_of_client) {
8420 8438                  if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8421 8439                      !list_is_empty(&sp->rs_lostatelist) &&
8422 8440                      sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8423 8441                          /* Is the PxFS kernel module loaded? */
8424 8442                          if (lm_remove_file_locks != NULL) {
8425 8443                                  int new_sysid;
8426 8444  
8427 8445                                  /* Encode the cluster nodeid in new sysid */
8428 8446                                  new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8429 8447                                  lm_set_nlmid_flk(&new_sysid);
8430 8448  
8431 8449                                  /*
8432 8450                                   * This PxFS routine removes file locks for a
8433 8451                                   * client over all nodes of a cluster.
8434 8452                                   */
8435 8453                                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8436 8454                                      "lm_remove_file_locks(sysid=0x%x)\n",
8437 8455                                      new_sysid));
8438 8456                                  (*lm_remove_file_locks)(new_sysid);
8439 8457                          } else {
8440 8458                                  struct flock64 flk;
8441 8459  
8442 8460                                  /* Release all locks for this client */
8443 8461                                  flk.l_type = F_UNLKSYS;
8444 8462                                  flk.l_whence = 0;
8445 8463                                  flk.l_start = 0;
8446 8464                                  flk.l_len = 0;
8447 8465                                  flk.l_sysid =
8448 8466                                      sp->rs_owner->ro_client->rc_sysidt;
8449 8467                                  flk.l_pid = 0;
8450 8468                                  (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8451 8469                                      &flk, F_REMOTELOCK | FREAD | FWRITE,
8452 8470                                      (u_offset_t)0, NULL, CRED(), NULL);
8453 8471                          }
8454 8472  
8455 8473                          sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8456 8474                  }
8457 8475          }
8458 8476  
8459 8477          /*
8460 8478           * Release all locks on this file by this lock owner or at
8461 8479           * least mark the locks as having been released
8462 8480           */
8463 8481          for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8464 8482              lsp = list_next(&sp->rs_lostatelist, lsp)) {
8465 8483                  lsp->rls_locks_cleaned = TRUE;
8466 8484  
8467 8485                  /* Was this already taken care of above? */
8468 8486                  if (!close_of_client &&
8469 8487                      sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8470 8488                          (void) cleanlocks(sp->rs_finfo->rf_vp,
8471 8489                              lsp->rls_locker->rl_pid,
8472 8490                              lsp->rls_locker->rl_client->rc_sysidt);
8473 8491          }
8474 8492  
8475 8493          /*
8476 8494           * Release any shrlocks associated with this open state ID.
8477 8495           * This must be done before the rfs4_state gets marked closed.
8478 8496           */
8479 8497          if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8480 8498                  (void) rfs4_unshare(sp);
8481 8499  
8482 8500          if (sp->rs_open_access) {
8483 8501                  rfs4_dbe_lock(fp->rf_dbe);
8484 8502  
8485 8503                  /*
8486 8504                   * Decrement the count for each access and deny bit that this
8487 8505                   * state has contributed to the file.
8488 8506                   * If the file counts go to zero
8489 8507                   * clear the appropriate bit in the appropriate mask.
8490 8508                   */
8491 8509                  if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8492 8510                          fp->rf_access_read--;
8493 8511                          fflags |= FREAD;
8494 8512                          if (fp->rf_access_read == 0)
8495 8513                                  fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8496 8514                  }
8497 8515                  if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8498 8516                          fp->rf_access_write--;
8499 8517                          fflags |= FWRITE;
8500 8518                          if (fp->rf_access_write == 0)
8501 8519                                  fp->rf_share_access &=
8502 8520                                      ~OPEN4_SHARE_ACCESS_WRITE;
8503 8521                  }
8504 8522                  if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8505 8523                          fp->rf_deny_read--;
8506 8524                          if (fp->rf_deny_read == 0)
8507 8525                                  fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8508 8526                  }
8509 8527                  if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8510 8528                          fp->rf_deny_write--;
8511 8529                          if (fp->rf_deny_write == 0)
8512 8530                                  fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8513 8531                  }
8514 8532  
8515 8533                  (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8516 8534  
8517 8535                  rfs4_dbe_unlock(fp->rf_dbe);
8518 8536  
8519 8537                  sp->rs_open_access = 0;
8520 8538                  sp->rs_open_deny = 0;
8521 8539          }
8522 8540  }
8523 8541  
8524 8542  /*
8525 8543   * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8526 8544   */
8527 8545  static nfsstat4
8528 8546  lock_denied(LOCK4denied *dp, struct flock64 *flk)
8529 8547  {
8530 8548          rfs4_lockowner_t *lo;
8531 8549          rfs4_client_t *cp;
8532 8550          uint32_t len;
8533 8551  
8534 8552          lo = rfs4_findlockowner_by_pid(flk->l_pid);
8535 8553          if (lo != NULL) {
8536 8554                  cp = lo->rl_client;
8537 8555                  if (rfs4_lease_expired(cp)) {
8538 8556                          rfs4_lockowner_rele(lo);
8539 8557                          rfs4_dbe_hold(cp->rc_dbe);
8540 8558                          rfs4_client_close(cp);
8541 8559                          return (NFS4ERR_EXPIRED);
8542 8560                  }
8543 8561                  dp->owner.clientid = lo->rl_owner.clientid;
8544 8562                  len = lo->rl_owner.owner_len;
8545 8563                  dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8546 8564                  bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8547 8565                  dp->owner.owner_len = len;
8548 8566                  rfs4_lockowner_rele(lo);
8549 8567                  goto finish;
8550 8568          }
8551 8569  
8552 8570          /*
8553 8571           * Its not a NFS4 lock. We take advantage that the upper 32 bits
8554 8572           * of the client id contain the boot time for a NFS4 lock. So we
8555 8573           * fabricate and identity by setting clientid to the sysid, and
8556 8574           * the lock owner to the pid.
8557 8575           */
8558 8576          dp->owner.clientid = flk->l_sysid;
8559 8577          len = sizeof (pid_t);
8560 8578          dp->owner.owner_len = len;
8561 8579          dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8562 8580          bcopy(&flk->l_pid, dp->owner.owner_val, len);
8563 8581  finish:
8564 8582          dp->offset = flk->l_start;
8565 8583          dp->length = flk->l_len;
8566 8584  
8567 8585          if (flk->l_type == F_RDLCK)
8568 8586                  dp->locktype = READ_LT;
8569 8587          else if (flk->l_type == F_WRLCK)
8570 8588                  dp->locktype = WRITE_LT;
8571 8589          else
8572 8590                  return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8573 8591  
8574 8592          return (NFS4_OK);
8575 8593  }
8576 8594  
8577 8595  /*
8578 8596   * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8579 8597   * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8580 8598   * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8581 8599   * for that (obviously); they are sending the LOCK requests with some delays
8582 8600   * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8583 8601   * locking and delay implementation at the client side.
8584 8602   *
8585 8603   * To make the life of the clients easier, the NFSv4.0 server tries to do some
8586 8604   * fast retries on its own (the for loop below) in a hope the lock will be
8587 8605   * available soon.  And if not, the client won't need to resend the LOCK
8588 8606   * requests so fast to check the lock availability.  This basically saves some
8589 8607   * network traffic and tries to make sure the client gets the lock ASAP.
8590 8608   */
8591 8609  static int
8592 8610  setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8593 8611  {
8594 8612          int error;
8595 8613          struct flock64 flk;
8596 8614          int i;
8597 8615          clock_t delaytime;
8598 8616          int cmd;
8599 8617          int spin_cnt = 0;
8600 8618  
8601 8619          cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8602 8620  retry:
8603 8621          delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8604 8622  
8605 8623          for (i = 0; i < rfs4_maxlock_tries; i++) {
8606 8624                  LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8607 8625                  error = VOP_FRLOCK(vp, cmd,
8608 8626                      flock, flag, (u_offset_t)0, NULL, cred, NULL);
8609 8627  
8610 8628                  if (error != EAGAIN && error != EACCES)
8611 8629                          break;
8612 8630  
8613 8631                  if (i < rfs4_maxlock_tries - 1) {
8614 8632                          delay(delaytime);
8615 8633                          delaytime *= 2;
8616 8634                  }
8617 8635          }
8618 8636  
8619 8637          if (error == EAGAIN || error == EACCES) {
8620 8638                  /* Get the owner of the lock */
8621 8639                  flk = *flock;
8622 8640                  LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8623 8641                  if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8624 8642                      NULL) == 0) {
8625 8643                          /*
8626 8644                           * There's a race inherent in the current VOP_FRLOCK
8627 8645                           * design where:
8628 8646                           * a: "other guy" takes a lock that conflicts with a
8629 8647                           * lock we want
8630 8648                           * b: we attempt to take our lock (non-blocking) and
8631 8649                           * the attempt fails.
8632 8650                           * c: "other guy" releases the conflicting lock
8633 8651                           * d: we ask what lock conflicts with the lock we want,
8634 8652                           * getting F_UNLCK (no lock blocks us)
8635 8653                           *
8636 8654                           * If we retry the non-blocking lock attempt in this
8637 8655                           * case (restart at step 'b') there's some possibility
8638 8656                           * that many such attempts might fail.  However a test
8639 8657                           * designed to actually provoke this race shows that
8640 8658                           * the vast majority of cases require no retry, and
8641 8659                           * only a few took as many as three retries.  Here's
8642 8660                           * the test outcome:
8643 8661                           *
8644 8662                           *         number of retries    how many times we needed
8645 8663                           *                              that many retries
8646 8664                           *         0                    79461
8647 8665                           *         1                      862
8648 8666                           *         2                       49
8649 8667                           *         3                        5
8650 8668                           *
8651 8669                           * Given those empirical results, we arbitrarily limit
8652 8670                           * the retry count to ten.
8653 8671                           *
8654 8672                           * If we actually make to ten retries and give up,
8655 8673                           * nothing catastrophic happens, but we're unable to
8656 8674                           * return the information about the conflicting lock to
8657 8675                           * the NFS client.  That's an acceptable trade off vs.
8658 8676                           * letting this retry loop run forever.
8659 8677                           */
8660 8678                          if (flk.l_type == F_UNLCK) {
8661 8679                                  if (spin_cnt++ < 10) {
8662 8680                                          /* No longer locked, retry */
8663 8681                                          goto retry;
8664 8682                                  }
8665 8683                          } else {
8666 8684                                  *flock = flk;
8667 8685                                  LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8668 8686                                      F_GETLK, &flk);
8669 8687                          }
8670 8688                  }
8671 8689          }
8672 8690  
8673 8691          return (error);
8674 8692  }
8675 8693  
8676 8694  /*ARGSUSED*/
8677 8695  static nfsstat4
8678 8696  rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8679 8697      offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8680 8698  {
8681 8699          nfsstat4 status;
8682 8700          rfs4_lockowner_t *lo = lsp->rls_locker;
8683 8701          rfs4_state_t *sp = lsp->rls_state;
8684 8702          struct flock64 flock;
8685 8703          int16_t ltype;
8686 8704          int flag;
8687 8705          int error;
8688 8706          sysid_t sysid;
8689 8707          LOCK4res *lres;
8690 8708          vnode_t *vp;
8691 8709  
8692 8710          if (rfs4_lease_expired(lo->rl_client)) {
8693 8711                  return (NFS4ERR_EXPIRED);
8694 8712          }
8695 8713  
8696 8714          if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8697 8715                  return (status);
8698 8716  
8699 8717          /* Check for zero length. To lock to end of file use all ones for V4 */
8700 8718          if (length == 0)
8701 8719                  return (NFS4ERR_INVAL);
8702 8720          else if (length == (length4)(~0))
8703 8721                  length = 0;             /* Posix to end of file  */
8704 8722  
8705 8723  retry:
8706 8724          rfs4_dbe_lock(sp->rs_dbe);
8707 8725          if (sp->rs_closed == TRUE) {
8708 8726                  rfs4_dbe_unlock(sp->rs_dbe);
8709 8727                  return (NFS4ERR_OLD_STATEID);
8710 8728          }
8711 8729  
8712 8730          if (resop->resop != OP_LOCKU) {
8713 8731                  switch (locktype) {
8714 8732                  case READ_LT:
8715 8733                  case READW_LT:
8716 8734                          if ((sp->rs_share_access
8717 8735                              & OPEN4_SHARE_ACCESS_READ) == 0) {
8718 8736                                  rfs4_dbe_unlock(sp->rs_dbe);
8719 8737  
8720 8738                                  return (NFS4ERR_OPENMODE);
8721 8739                          }
8722 8740                          ltype = F_RDLCK;
8723 8741                          break;
8724 8742                  case WRITE_LT:
8725 8743                  case WRITEW_LT:
8726 8744                          if ((sp->rs_share_access
8727 8745                              & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8728 8746                                  rfs4_dbe_unlock(sp->rs_dbe);
8729 8747  
8730 8748                                  return (NFS4ERR_OPENMODE);
8731 8749                          }
8732 8750                          ltype = F_WRLCK;
8733 8751                          break;
8734 8752                  }
8735 8753          } else
8736 8754                  ltype = F_UNLCK;
8737 8755  
8738 8756          flock.l_type = ltype;
8739 8757          flock.l_whence = 0;             /* SEEK_SET */
8740 8758          flock.l_start = offset;
8741 8759          flock.l_len = length;
8742 8760          flock.l_sysid = sysid;
8743 8761          flock.l_pid = lsp->rls_locker->rl_pid;
8744 8762  
8745 8763          /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8746 8764          if (flock.l_len < 0 || flock.l_start < 0) {
8747 8765                  rfs4_dbe_unlock(sp->rs_dbe);
8748 8766                  return (NFS4ERR_INVAL);
8749 8767          }
8750 8768  
8751 8769          /*
8752 8770           * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8753 8771           * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8754 8772           */
8755 8773          flag = (int)sp->rs_share_access | F_REMOTELOCK;
8756 8774  
8757 8775          vp = sp->rs_finfo->rf_vp;
8758 8776          VN_HOLD(vp);
8759 8777  
8760 8778          /*
8761 8779           * We need to unlock sp before we call the underlying filesystem to
8762 8780           * acquire the file lock.
8763 8781           */
8764 8782          rfs4_dbe_unlock(sp->rs_dbe);
8765 8783  
8766 8784          error = setlock(vp, &flock, flag, cred);
8767 8785  
8768 8786          /*
8769 8787           * Make sure the file is still open.  In a case the file was closed in
8770 8788           * the meantime, clean the lock we acquired using the setlock() call
8771 8789           * above, and return the appropriate error.
8772 8790           */
8773 8791          rfs4_dbe_lock(sp->rs_dbe);
8774 8792          if (sp->rs_closed == TRUE) {
8775 8793                  cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8776 8794                  rfs4_dbe_unlock(sp->rs_dbe);
8777 8795  
8778 8796                  VN_RELE(vp);
8779 8797  
8780 8798                  return (NFS4ERR_OLD_STATEID);
8781 8799          }
8782 8800          rfs4_dbe_unlock(sp->rs_dbe);
8783 8801  
8784 8802          VN_RELE(vp);
8785 8803  
8786 8804          if (error == 0) {
8787 8805                  rfs4_dbe_lock(lsp->rls_dbe);
8788 8806                  next_stateid(&lsp->rls_lockid);
8789 8807                  rfs4_dbe_unlock(lsp->rls_dbe);
8790 8808          }
8791 8809  
8792 8810          /*
8793 8811           * N.B. We map error values to nfsv4 errors. This is differrent
8794 8812           * than puterrno4 routine.
8795 8813           */
8796 8814          switch (error) {
8797 8815          case 0:
8798 8816                  status = NFS4_OK;
8799 8817                  break;
8800 8818          case EAGAIN:
8801 8819          case EACCES:            /* Old value */
8802 8820                  /* Can only get here if op is OP_LOCK */
8803 8821                  ASSERT(resop->resop == OP_LOCK);
8804 8822                  lres = &resop->nfs_resop4_u.oplock;
8805 8823                  status = NFS4ERR_DENIED;
8806 8824                  if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8807 8825                      == NFS4ERR_EXPIRED)
8808 8826                          goto retry;
8809 8827                  break;
8810 8828          case ENOLCK:
8811 8829                  status = NFS4ERR_DELAY;
8812 8830                  break;
8813 8831          case EOVERFLOW:
8814 8832                  status = NFS4ERR_INVAL;
8815 8833                  break;
8816 8834          case EINVAL:
8817 8835                  status = NFS4ERR_NOTSUPP;
8818 8836                  break;
8819 8837          default:
8820 8838                  status = NFS4ERR_SERVERFAULT;
8821 8839                  break;
8822 8840          }
8823 8841  
8824 8842          return (status);
8825 8843  }
8826 8844  
8827 8845  /*ARGSUSED*/
8828 8846  void
8829 8847  rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8830 8848      struct svc_req *req, struct compound_state *cs)
8831 8849  {
8832 8850          LOCK4args *args = &argop->nfs_argop4_u.oplock;
8833 8851          LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8834 8852          nfsstat4 status;
8835 8853          stateid4 *stateid;
8836 8854          rfs4_lockowner_t *lo;
8837 8855          rfs4_client_t *cp;
8838 8856          rfs4_state_t *sp = NULL;
8839 8857          rfs4_lo_state_t *lsp = NULL;
8840 8858          bool_t ls_sw_held = FALSE;
8841 8859          bool_t create = TRUE;
8842 8860          bool_t lcreate = TRUE;
8843 8861          bool_t dup_lock = FALSE;
8844 8862          int rc;
8845 8863  
8846 8864          DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8847 8865              LOCK4args *, args);
8848 8866  
8849 8867          if (cs->vp == NULL) {
8850 8868                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8851 8869                  DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8852 8870                      cs, LOCK4res *, resp);
8853 8871                  return;
8854 8872          }
8855 8873  
8856 8874          if (args->locker.new_lock_owner) {
8857 8875                  /* Create a new lockowner for this instance */
8858 8876                  open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8859 8877  
8860 8878                  NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8861 8879  
8862 8880                  stateid = &olo->open_stateid;
8863 8881                  status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8864 8882                  if (status != NFS4_OK) {
8865 8883                          NFS4_DEBUG(rfs4_debug,
8866 8884                              (CE_NOTE, "Get state failed in lock %d", status));
8867 8885                          *cs->statusp = resp->status = status;
8868 8886                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8869 8887                              cs, LOCK4res *, resp);
8870 8888                          return;
8871 8889                  }
8872 8890  
8873 8891                  /* Ensure specified filehandle matches */
8874 8892                  if (cs->vp != sp->rs_finfo->rf_vp) {
8875 8893                          rfs4_state_rele(sp);
8876 8894                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8877 8895                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8878 8896                              cs, LOCK4res *, resp);
8879 8897                          return;
8880 8898                  }
8881 8899  
8882 8900                  /* hold off other access to open_owner while we tinker */
8883 8901                  rfs4_sw_enter(&sp->rs_owner->ro_sw);
8884 8902  
8885 8903                  switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8886 8904                  case NFS4_CHECK_STATEID_OLD:
8887 8905                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8888 8906                          goto end;
8889 8907                  case NFS4_CHECK_STATEID_BAD:
8890 8908                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8891 8909                          goto end;
8892 8910                  case NFS4_CHECK_STATEID_EXPIRED:
8893 8911                          *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8894 8912                          goto end;
8895 8913                  case NFS4_CHECK_STATEID_UNCONFIRMED:
8896 8914                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8897 8915                          goto end;
8898 8916                  case NFS4_CHECK_STATEID_CLOSED:
8899 8917                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8900 8918                          goto end;
8901 8919                  case NFS4_CHECK_STATEID_OKAY:
8902 8920                  case NFS4_CHECK_STATEID_REPLAY:
8903 8921                          switch (rfs4_check_olo_seqid(olo->open_seqid,
8904 8922                              sp->rs_owner, resop)) {
8905 8923                          case NFS4_CHKSEQ_OKAY:
8906 8924                                  if (rc == NFS4_CHECK_STATEID_OKAY)
8907 8925                                          break;
8908 8926                                  /*
8909 8927                                   * This is replayed stateid; if seqid
8910 8928                                   * matches next expected, then client
8911 8929                                   * is using wrong seqid.
8912 8930                                   */
8913 8931                                  /* FALLTHROUGH */
8914 8932                          case NFS4_CHKSEQ_BAD:
8915 8933                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8916 8934                                  goto end;
8917 8935                          case NFS4_CHKSEQ_REPLAY:
8918 8936                                  /* This is a duplicate LOCK request */
8919 8937                                  dup_lock = TRUE;
8920 8938  
8921 8939                                  /*
8922 8940                                   * For a duplicate we do not want to
8923 8941                                   * create a new lockowner as it should
8924 8942                                   * already exist.
8925 8943                                   * Turn off the lockowner create flag.
8926 8944                                   */
8927 8945                                  lcreate = FALSE;
8928 8946                          }
8929 8947                          break;
8930 8948                  }
8931 8949  
8932 8950                  lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8933 8951                  if (lo == NULL) {
8934 8952                          NFS4_DEBUG(rfs4_debug,
8935 8953                              (CE_NOTE, "rfs4_op_lock: no lock owner"));
8936 8954                          *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8937 8955                          goto end;
8938 8956                  }
8939 8957  
8940 8958                  lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8941 8959                  if (lsp == NULL) {
8942 8960                          rfs4_update_lease(sp->rs_owner->ro_client);
8943 8961                          /*
8944 8962                           * Only update theh open_seqid if this is not
8945 8963                           * a duplicate request
8946 8964                           */
8947 8965                          if (dup_lock == FALSE) {
8948 8966                                  rfs4_update_open_sequence(sp->rs_owner);
8949 8967                          }
8950 8968  
8951 8969                          NFS4_DEBUG(rfs4_debug,
8952 8970                              (CE_NOTE, "rfs4_op_lock: no state"));
8953 8971                          *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8954 8972                          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8955 8973                          rfs4_lockowner_rele(lo);
8956 8974                          goto end;
8957 8975                  }
8958 8976  
8959 8977                  /*
8960 8978                   * This is the new_lock_owner branch and the client is
8961 8979                   * supposed to be associating a new lock_owner with
8962 8980                   * the open file at this point.  If we find that a
8963 8981                   * lock_owner/state association already exists and a
8964 8982                   * successful LOCK request was returned to the client,
8965 8983                   * an error is returned to the client since this is
8966 8984                   * not appropriate.  The client should be using the
8967 8985                   * existing lock_owner branch.
8968 8986                   */
8969 8987                  if (dup_lock == FALSE && create == FALSE) {
8970 8988                          if (lsp->rls_lock_completed == TRUE) {
8971 8989                                  *cs->statusp =
8972 8990                                      resp->status = NFS4ERR_BAD_SEQID;
8973 8991                                  rfs4_lockowner_rele(lo);
8974 8992                                  goto end;
8975 8993                          }
8976 8994                  }
8977 8995  
8978 8996                  rfs4_update_lease(sp->rs_owner->ro_client);
8979 8997  
8980 8998                  /*
8981 8999                   * Only update theh open_seqid if this is not
8982 9000                   * a duplicate request
8983 9001                   */
8984 9002                  if (dup_lock == FALSE) {
8985 9003                          rfs4_update_open_sequence(sp->rs_owner);
8986 9004                  }
8987 9005  
8988 9006                  /*
8989 9007                   * If this is a duplicate lock request, just copy the
8990 9008                   * previously saved reply and return.
8991 9009                   */
8992 9010                  if (dup_lock == TRUE) {
8993 9011                          /* verify that lock_seqid's match */
8994 9012                          if (lsp->rls_seqid != olo->lock_seqid) {
8995 9013                                  NFS4_DEBUG(rfs4_debug,
8996 9014                                      (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8997 9015                                      "lsp->seqid=%d old->seqid=%d",
8998 9016                                      lsp->rls_seqid, olo->lock_seqid));
8999 9017                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9000 9018                          } else {
9001 9019                                  rfs4_copy_reply(resop, &lsp->rls_reply);
9002 9020                                  /*
9003 9021                                   * Make sure to copy the just
9004 9022                                   * retrieved reply status into the
9005 9023                                   * overall compound status
9006 9024                                   */
9007 9025                                  *cs->statusp = resp->status;
9008 9026                          }
9009 9027                          rfs4_lockowner_rele(lo);
9010 9028                          goto end;
9011 9029                  }
9012 9030  
9013 9031                  rfs4_dbe_lock(lsp->rls_dbe);
9014 9032  
9015 9033                  /* Make sure to update the lock sequence id */
9016 9034                  lsp->rls_seqid = olo->lock_seqid;
9017 9035  
9018 9036                  NFS4_DEBUG(rfs4_debug,
9019 9037                      (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9020 9038  
9021 9039                  /*
9022 9040                   * This is used to signify the newly created lockowner
9023 9041                   * stateid and its sequence number.  The checks for
9024 9042                   * sequence number and increment don't occur on the
9025 9043                   * very first lock request for a lockowner.
9026 9044                   */
9027 9045                  lsp->rls_skip_seqid_check = TRUE;
9028 9046  
9029 9047                  /* hold off other access to lsp while we tinker */
9030 9048                  rfs4_sw_enter(&lsp->rls_sw);
9031 9049                  ls_sw_held = TRUE;
9032 9050  
9033 9051                  rfs4_dbe_unlock(lsp->rls_dbe);
9034 9052  
9035 9053                  rfs4_lockowner_rele(lo);
9036 9054          } else {
9037 9055                  stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9038 9056                  /* get lsp and hold the lock on the underlying file struct */
9039 9057                  if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9040 9058                      != NFS4_OK) {
9041 9059                          *cs->statusp = resp->status = status;
9042 9060                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9043 9061                              cs, LOCK4res *, resp);
9044 9062                          return;
9045 9063                  }
9046 9064                  create = FALSE; /* We didn't create lsp */
9047 9065  
9048 9066                  /* Ensure specified filehandle matches */
9049 9067                  if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9050 9068                          rfs4_lo_state_rele(lsp, TRUE);
9051 9069                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9052 9070                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9053 9071                              cs, LOCK4res *, resp);
9054 9072                          return;
9055 9073                  }
9056 9074  
9057 9075                  /* hold off other access to lsp while we tinker */
9058 9076                  rfs4_sw_enter(&lsp->rls_sw);
9059 9077                  ls_sw_held = TRUE;
9060 9078  
9061 9079                  switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9062 9080                  /*
9063 9081                   * The stateid looks like it was okay (expected to be
9064 9082                   * the next one)
9065 9083                   */
9066 9084                  case NFS4_CHECK_STATEID_OKAY:
9067 9085                          /*
9068 9086                           * The sequence id is now checked.  Determine
9069 9087                           * if this is a replay or if it is in the
9070 9088                           * expected (next) sequence.  In the case of a
9071 9089                           * replay, there are two replay conditions
9072 9090                           * that may occur.  The first is the normal
9073 9091                           * condition where a LOCK is done with a
9074 9092                           * NFS4_OK response and the stateid is
9075 9093                           * updated.  That case is handled below when
9076 9094                           * the stateid is identified as a REPLAY.  The
9077 9095                           * second is the case where an error is
9078 9096                           * returned, like NFS4ERR_DENIED, and the
9079 9097                           * sequence number is updated but the stateid
9080 9098                           * is not updated.  This second case is dealt
9081 9099                           * with here.  So it may seem odd that the
9082 9100                           * stateid is okay but the sequence id is a
9083 9101                           * replay but it is okay.
9084 9102                           */
9085 9103                          switch (rfs4_check_lock_seqid(
9086 9104                              args->locker.locker4_u.lock_owner.lock_seqid,
9087 9105                              lsp, resop)) {
9088 9106                          case NFS4_CHKSEQ_REPLAY:
9089 9107                                  if (resp->status != NFS4_OK) {
9090 9108                                          /*
9091 9109                                           * Here is our replay and need
9092 9110                                           * to verify that the last
9093 9111                                           * response was an error.
9094 9112                                           */
9095 9113                                          *cs->statusp = resp->status;
9096 9114                                          goto end;
9097 9115                                  }
9098 9116                                  /*
9099 9117                                   * This is done since the sequence id
9100 9118                                   * looked like a replay but it didn't
9101 9119                                   * pass our check so a BAD_SEQID is
9102 9120                                   * returned as a result.
9103 9121                                   */
9104 9122                                  /*FALLTHROUGH*/
9105 9123                          case NFS4_CHKSEQ_BAD:
9106 9124                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9107 9125                                  goto end;
9108 9126                          case NFS4_CHKSEQ_OKAY:
9109 9127                                  /* Everything looks okay move ahead */
9110 9128                                  break;
9111 9129                          }
9112 9130                          break;
9113 9131                  case NFS4_CHECK_STATEID_OLD:
9114 9132                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9115 9133                          goto end;
9116 9134                  case NFS4_CHECK_STATEID_BAD:
9117 9135                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9118 9136                          goto end;
9119 9137                  case NFS4_CHECK_STATEID_EXPIRED:
9120 9138                          *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9121 9139                          goto end;
9122 9140                  case NFS4_CHECK_STATEID_CLOSED:
9123 9141                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9124 9142                          goto end;
9125 9143                  case NFS4_CHECK_STATEID_REPLAY:
9126 9144                          switch (rfs4_check_lock_seqid(
9127 9145                              args->locker.locker4_u.lock_owner.lock_seqid,
9128 9146                              lsp, resop)) {
9129 9147                          case NFS4_CHKSEQ_OKAY:
9130 9148                                  /*
9131 9149                                   * This is a replayed stateid; if
9132 9150                                   * seqid matches the next expected,
9133 9151                                   * then client is using wrong seqid.
9134 9152                                   */
9135 9153                          case NFS4_CHKSEQ_BAD:
9136 9154                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9137 9155                                  goto end;
9138 9156                          case NFS4_CHKSEQ_REPLAY:
9139 9157                                  rfs4_update_lease(lsp->rls_locker->rl_client);
9140 9158                                  *cs->statusp = status = resp->status;
9141 9159                                  goto end;
9142 9160                          }
9143 9161                          break;
9144 9162                  default:
9145 9163                          ASSERT(FALSE);
9146 9164                          break;
9147 9165                  }
9148 9166  
9149 9167                  rfs4_update_lock_sequence(lsp);
9150 9168                  rfs4_update_lease(lsp->rls_locker->rl_client);
9151 9169          }
9152 9170  
9153 9171          /*
9154 9172           * NFS4 only allows locking on regular files, so
9155 9173           * verify type of object.
9156 9174           */
9157 9175          if (cs->vp->v_type != VREG) {
9158 9176                  if (cs->vp->v_type == VDIR)
9159 9177                          status = NFS4ERR_ISDIR;
9160 9178                  else
9161 9179                          status = NFS4ERR_INVAL;
9162 9180                  goto out;
9163 9181          }
9164 9182  
9165 9183          cp = lsp->rls_state->rs_owner->ro_client;
9166 9184  
9167 9185          if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9168 9186                  status = NFS4ERR_GRACE;
9169 9187                  goto out;
9170 9188          }
9171 9189  
9172 9190          if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9173 9191                  status = NFS4ERR_NO_GRACE;
9174 9192                  goto out;
9175 9193          }
9176 9194  
9177 9195          if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9178 9196                  status = NFS4ERR_NO_GRACE;
9179 9197                  goto out;
9180 9198          }
9181 9199  
9182 9200          if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9183 9201                  cs->deleg = TRUE;
9184 9202  
9185 9203          status = rfs4_do_lock(lsp, args->locktype,
9186 9204              args->offset, args->length, cs->cr, resop);
9187 9205  
9188 9206  out:
9189 9207          lsp->rls_skip_seqid_check = FALSE;
9190 9208  
9191 9209          *cs->statusp = resp->status = status;
9192 9210  
9193 9211          if (status == NFS4_OK) {
9194 9212                  resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9195 9213                  lsp->rls_lock_completed = TRUE;
9196 9214          }
9197 9215          /*
9198 9216           * Only update the "OPEN" response here if this was a new
9199 9217           * lock_owner
9200 9218           */
9201 9219          if (sp)
9202 9220                  rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9203 9221  
9204 9222          rfs4_update_lock_resp(lsp, resop);
9205 9223  
9206 9224  end:
9207 9225          if (lsp) {
9208 9226                  if (ls_sw_held)
9209 9227                          rfs4_sw_exit(&lsp->rls_sw);
9210 9228                  /*
9211 9229                   * If an sp obtained, then the lsp does not represent
9212 9230                   * a lock on the file struct.
9213 9231                   */
9214 9232                  if (sp != NULL)
9215 9233                          rfs4_lo_state_rele(lsp, FALSE);
9216 9234                  else
9217 9235                          rfs4_lo_state_rele(lsp, TRUE);
9218 9236          }
9219 9237          if (sp) {
9220 9238                  rfs4_sw_exit(&sp->rs_owner->ro_sw);
9221 9239                  rfs4_state_rele(sp);
9222 9240          }
9223 9241  
9224 9242          DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9225 9243              LOCK4res *, resp);
9226 9244  }
9227 9245  
9228 9246  /* free function for LOCK/LOCKT */
9229 9247  static void
9230 9248  lock_denied_free(nfs_resop4 *resop)
9231 9249  {
9232 9250          LOCK4denied *dp = NULL;
9233 9251  
9234 9252          switch (resop->resop) {
9235 9253          case OP_LOCK:
9236 9254                  if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9237 9255                          dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9238 9256                  break;
9239 9257          case OP_LOCKT:
9240 9258                  if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9241 9259                          dp = &resop->nfs_resop4_u.oplockt.denied;
9242 9260                  break;
9243 9261          default:
9244 9262                  break;
9245 9263          }
9246 9264  
9247 9265          if (dp)
9248 9266                  kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9249 9267  }
9250 9268  
9251 9269  /*ARGSUSED*/
9252 9270  void
9253 9271  rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9254 9272      struct svc_req *req, struct compound_state *cs)
9255 9273  {
9256 9274          LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9257 9275          LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9258 9276          nfsstat4 status;
9259 9277          stateid4 *stateid = &args->lock_stateid;
9260 9278          rfs4_lo_state_t *lsp;
9261 9279  
9262 9280          DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9263 9281              LOCKU4args *, args);
9264 9282  
9265 9283          if (cs->vp == NULL) {
9266 9284                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9267 9285                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9268 9286                      LOCKU4res *, resp);
9269 9287                  return;
9270 9288          }
9271 9289  
9272 9290          if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9273 9291                  *cs->statusp = resp->status = status;
9274 9292                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9275 9293                      LOCKU4res *, resp);
9276 9294                  return;
9277 9295          }
9278 9296  
9279 9297          /* Ensure specified filehandle matches */
9280 9298          if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9281 9299                  rfs4_lo_state_rele(lsp, TRUE);
9282 9300                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9283 9301                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9284 9302                      LOCKU4res *, resp);
9285 9303                  return;
9286 9304          }
9287 9305  
9288 9306          /* hold off other access to lsp while we tinker */
9289 9307          rfs4_sw_enter(&lsp->rls_sw);
9290 9308  
9291 9309          switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9292 9310          case NFS4_CHECK_STATEID_OKAY:
9293 9311                  if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9294 9312                      != NFS4_CHKSEQ_OKAY) {
9295 9313                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9296 9314                          goto end;
9297 9315                  }
9298 9316                  break;
9299 9317          case NFS4_CHECK_STATEID_OLD:
9300 9318                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9301 9319                  goto end;
9302 9320          case NFS4_CHECK_STATEID_BAD:
9303 9321                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9304 9322                  goto end;
9305 9323          case NFS4_CHECK_STATEID_EXPIRED:
9306 9324                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9307 9325                  goto end;
9308 9326          case NFS4_CHECK_STATEID_CLOSED:
9309 9327                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9310 9328                  goto end;
9311 9329          case NFS4_CHECK_STATEID_REPLAY:
9312 9330                  switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9313 9331                  case NFS4_CHKSEQ_OKAY:
9314 9332                                  /*
9315 9333                                   * This is a replayed stateid; if
9316 9334                                   * seqid matches the next expected,
9317 9335                                   * then client is using wrong seqid.
9318 9336                                   */
9319 9337                  case NFS4_CHKSEQ_BAD:
9320 9338                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9321 9339                          goto end;
9322 9340                  case NFS4_CHKSEQ_REPLAY:
9323 9341                          rfs4_update_lease(lsp->rls_locker->rl_client);
9324 9342                          *cs->statusp = status = resp->status;
9325 9343                          goto end;
9326 9344                  }
9327 9345                  break;
9328 9346          default:
9329 9347                  ASSERT(FALSE);
9330 9348                  break;
9331 9349          }
9332 9350  
9333 9351          rfs4_update_lock_sequence(lsp);
9334 9352          rfs4_update_lease(lsp->rls_locker->rl_client);
9335 9353  
9336 9354          /*
9337 9355           * NFS4 only allows locking on regular files, so
9338 9356           * verify type of object.
9339 9357           */
9340 9358          if (cs->vp->v_type != VREG) {
9341 9359                  if (cs->vp->v_type == VDIR)
9342 9360                          status = NFS4ERR_ISDIR;
9343 9361                  else
9344 9362                          status = NFS4ERR_INVAL;
9345 9363                  goto out;
9346 9364          }
9347 9365  
9348 9366          if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9349 9367                  status = NFS4ERR_GRACE;
9350 9368                  goto out;
9351 9369          }
9352 9370  
9353 9371          status = rfs4_do_lock(lsp, args->locktype,
9354 9372              args->offset, args->length, cs->cr, resop);
9355 9373  
9356 9374  out:
9357 9375          *cs->statusp = resp->status = status;
9358 9376  
9359 9377          if (status == NFS4_OK)
9360 9378                  resp->lock_stateid = lsp->rls_lockid.stateid;
9361 9379  
9362 9380          rfs4_update_lock_resp(lsp, resop);
9363 9381  
9364 9382  end:
9365 9383          rfs4_sw_exit(&lsp->rls_sw);
9366 9384          rfs4_lo_state_rele(lsp, TRUE);
9367 9385  
9368 9386          DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9369 9387              LOCKU4res *, resp);
9370 9388  }
9371 9389  
9372 9390  /*
9373 9391   * LOCKT is a best effort routine, the client can not be guaranteed that
9374 9392   * the status return is still in effect by the time the reply is received.
9375 9393   * They are numerous race conditions in this routine, but we are not required
9376 9394   * and can not be accurate.
9377 9395   */
9378 9396  /*ARGSUSED*/
9379 9397  void
9380 9398  rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9381 9399      struct svc_req *req, struct compound_state *cs)
9382 9400  {
9383 9401          LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9384 9402          LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9385 9403          rfs4_lockowner_t *lo;
9386 9404          rfs4_client_t *cp;
9387 9405          bool_t create = FALSE;
9388 9406          struct flock64 flk;
9389 9407          int error;
9390 9408          int flag = FREAD | FWRITE;
9391 9409          int ltype;
9392 9410          length4 posix_length;
9393 9411          sysid_t sysid;
9394 9412          pid_t pid;
9395 9413  
9396 9414          DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9397 9415              LOCKT4args *, args);
9398 9416  
9399 9417          if (cs->vp == NULL) {
9400 9418                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9401 9419                  goto out;
9402 9420          }
9403 9421  
9404 9422          /*
9405 9423           * NFS4 only allows locking on regular files, so
9406 9424           * verify type of object.
9407 9425           */
9408 9426          if (cs->vp->v_type != VREG) {
9409 9427                  if (cs->vp->v_type == VDIR)
9410 9428                          *cs->statusp = resp->status = NFS4ERR_ISDIR;
9411 9429                  else
9412 9430                          *cs->statusp = resp->status =  NFS4ERR_INVAL;
9413 9431                  goto out;
9414 9432          }
9415 9433  
9416 9434          /*
9417 9435           * Check out the clientid to ensure the server knows about it
9418 9436           * so that we correctly inform the client of a server reboot.
9419 9437           */
9420 9438          if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9421 9439              == NULL) {
9422 9440                  *cs->statusp = resp->status =
9423 9441                      rfs4_check_clientid(&args->owner.clientid, 0);
9424 9442                  goto out;
9425 9443          }
9426 9444          if (rfs4_lease_expired(cp)) {
9427 9445                  rfs4_client_close(cp);
9428 9446                  /*
9429 9447                   * Protocol doesn't allow returning NFS4ERR_STALE as
9430 9448                   * other operations do on this check so STALE_CLIENTID
9431 9449                   * is returned instead
9432 9450                   */
9433 9451                  *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9434 9452                  goto out;
9435 9453          }
9436 9454  
9437 9455          if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9438 9456                  *cs->statusp = resp->status = NFS4ERR_GRACE;
9439 9457                  rfs4_client_rele(cp);
9440 9458                  goto out;
9441 9459          }
9442 9460          rfs4_client_rele(cp);
9443 9461  
9444 9462          resp->status = NFS4_OK;
9445 9463  
9446 9464          switch (args->locktype) {
9447 9465          case READ_LT:
9448 9466          case READW_LT:
9449 9467                  ltype = F_RDLCK;
9450 9468                  break;
9451 9469          case WRITE_LT:
9452 9470          case WRITEW_LT:
9453 9471                  ltype = F_WRLCK;
9454 9472                  break;
9455 9473          }
9456 9474  
9457 9475          posix_length = args->length;
9458 9476          /* Check for zero length. To lock to end of file use all ones for V4 */
9459 9477          if (posix_length == 0) {
9460 9478                  *cs->statusp = resp->status = NFS4ERR_INVAL;
9461 9479                  goto out;
9462 9480          } else if (posix_length == (length4)(~0)) {
9463 9481                  posix_length = 0;       /* Posix to end of file  */
9464 9482          }
9465 9483  
9466 9484          /* Find or create a lockowner */
9467 9485          lo = rfs4_findlockowner(&args->owner, &create);
9468 9486  
9469 9487          if (lo) {
9470 9488                  pid = lo->rl_pid;
9471 9489                  if ((resp->status =
9472 9490                      rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9473 9491                          goto err;
9474 9492          } else {
9475 9493                  pid = 0;
9476 9494                  sysid = lockt_sysid;
9477 9495          }
9478 9496  retry:
9479 9497          flk.l_type = ltype;
9480 9498          flk.l_whence = 0;               /* SEEK_SET */
9481 9499          flk.l_start = args->offset;
9482 9500          flk.l_len = posix_length;
9483 9501          flk.l_sysid = sysid;
9484 9502          flk.l_pid = pid;
9485 9503          flag |= F_REMOTELOCK;
9486 9504  
9487 9505          LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9488 9506  
9489 9507          /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9490 9508          if (flk.l_len < 0 || flk.l_start < 0) {
9491 9509                  resp->status = NFS4ERR_INVAL;
9492 9510                  goto err;
9493 9511          }
9494 9512          error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9495 9513              NULL, cs->cr, NULL);
9496 9514  
9497 9515          /*
9498 9516           * N.B. We map error values to nfsv4 errors. This is differrent
9499 9517           * than puterrno4 routine.
9500 9518           */
9501 9519          switch (error) {
9502 9520          case 0:
9503 9521                  if (flk.l_type == F_UNLCK)
9504 9522                          resp->status = NFS4_OK;
9505 9523                  else {
9506 9524                          if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9507 9525                                  goto retry;
9508 9526                          resp->status = NFS4ERR_DENIED;
9509 9527                  }
9510 9528                  break;
9511 9529          case EOVERFLOW:
9512 9530                  resp->status = NFS4ERR_INVAL;
9513 9531                  break;
9514 9532          case EINVAL:
9515 9533                  resp->status = NFS4ERR_NOTSUPP;
9516 9534                  break;
9517 9535          default:
9518 9536                  cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9519 9537                      error);
9520 9538                  resp->status = NFS4ERR_SERVERFAULT;
9521 9539                  break;
9522 9540          }
9523 9541  
9524 9542  err:
9525 9543          if (lo)
9526 9544                  rfs4_lockowner_rele(lo);
9527 9545          *cs->statusp = resp->status;
9528 9546  out:
9529 9547          DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9530 9548              LOCKT4res *, resp);
9531 9549  }
9532 9550  
9533 9551  int
9534 9552  rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9535 9553  {
9536 9554          int err;
9537 9555          int cmd;
9538 9556          vnode_t *vp;
9539 9557          struct shrlock shr;
9540 9558          struct shr_locowner shr_loco;
9541 9559          int fflags = 0;
9542 9560  
9543 9561          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9544 9562          ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9545 9563  
9546 9564          if (sp->rs_closed)
9547 9565                  return (NFS4ERR_OLD_STATEID);
9548 9566  
9549 9567          vp = sp->rs_finfo->rf_vp;
9550 9568          ASSERT(vp);
9551 9569  
9552 9570          shr.s_access = shr.s_deny = 0;
9553 9571  
9554 9572          if (access & OPEN4_SHARE_ACCESS_READ) {
9555 9573                  fflags |= FREAD;
9556 9574                  shr.s_access |= F_RDACC;
9557 9575          }
9558 9576          if (access & OPEN4_SHARE_ACCESS_WRITE) {
9559 9577                  fflags |= FWRITE;
9560 9578                  shr.s_access |= F_WRACC;
9561 9579          }
9562 9580          ASSERT(shr.s_access);
9563 9581  
9564 9582          if (deny & OPEN4_SHARE_DENY_READ)
9565 9583                  shr.s_deny |= F_RDDNY;
9566 9584          if (deny & OPEN4_SHARE_DENY_WRITE)
9567 9585                  shr.s_deny |= F_WRDNY;
9568 9586  
9569 9587          shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9570 9588          shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9571 9589          shr_loco.sl_pid = shr.s_pid;
9572 9590          shr_loco.sl_id = shr.s_sysid;
9573 9591          shr.s_owner = (caddr_t)&shr_loco;
9574 9592          shr.s_own_len = sizeof (shr_loco);
9575 9593  
9576 9594          cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9577 9595  
9578 9596          err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9579 9597          if (err != 0) {
9580 9598                  if (err == EAGAIN)
9581 9599                          err = NFS4ERR_SHARE_DENIED;
9582 9600                  else
9583 9601                          err = puterrno4(err);
9584 9602                  return (err);
9585 9603          }
9586 9604  
9587 9605          sp->rs_share_access |= access;
9588 9606          sp->rs_share_deny |= deny;
9589 9607  
9590 9608          return (0);
9591 9609  }
9592 9610  
9593 9611  int
9594 9612  rfs4_unshare(rfs4_state_t *sp)
9595 9613  {
9596 9614          int err;
9597 9615          struct shrlock shr;
9598 9616          struct shr_locowner shr_loco;
9599 9617  
9600 9618          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9601 9619  
9602 9620          if (sp->rs_closed || sp->rs_share_access == 0)
9603 9621                  return (0);
9604 9622  
9605 9623          ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9606 9624          ASSERT(sp->rs_finfo->rf_vp);
9607 9625  
9608 9626          shr.s_access = shr.s_deny = 0;
9609 9627          shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9610 9628          shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9611 9629          shr_loco.sl_pid = shr.s_pid;
9612 9630          shr_loco.sl_id = shr.s_sysid;
9613 9631          shr.s_owner = (caddr_t)&shr_loco;
9614 9632          shr.s_own_len = sizeof (shr_loco);
9615 9633  
9616 9634          err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9617 9635              NULL);
9618 9636          if (err != 0) {
9619 9637                  err = puterrno4(err);
9620 9638                  return (err);
9621 9639          }
9622 9640  
9623 9641          sp->rs_share_access = 0;
9624 9642          sp->rs_share_deny = 0;
9625 9643  
9626 9644          return (0);
9627 9645  
9628 9646  }
9629 9647  
9630 9648  static int
9631 9649  rdma_setup_read_data4(READ4args *args, READ4res *rok)
9632 9650  {
9633 9651          struct clist    *wcl;
9634 9652          count4          count = rok->data_len;
9635 9653          int             wlist_len;
9636 9654  
9637 9655          wcl = args->wlist;
9638 9656          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9639 9657                  return (FALSE);
9640 9658          }
9641 9659          wcl = args->wlist;
9642 9660          rok->wlist_len = wlist_len;
9643 9661          rok->wlist = wcl;
9644 9662          return (TRUE);
9645 9663  }
9646 9664  
9647 9665  /* tunable to disable server referrals */
9648 9666  int rfs4_no_referrals = 0;
9649 9667  
9650 9668  /*
9651 9669   * Find an NFS record in reparse point data.
9652 9670   * Returns 0 for success and <0 or an errno value on failure.
9653 9671   */
9654 9672  int
9655 9673  vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9656 9674  {
9657 9675          int err;
9658 9676          char *stype, *val;
9659 9677          nvlist_t *nvl;
9660 9678          nvpair_t *curr;
9661 9679  
9662 9680          if ((nvl = reparse_init()) == NULL)
9663 9681                  return (-1);
9664 9682  
9665 9683          if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9666 9684                  reparse_free(nvl);
9667 9685                  return (err);
9668 9686          }
9669 9687  
9670 9688          curr = NULL;
9671 9689          while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9672 9690                  if ((stype = nvpair_name(curr)) == NULL) {
9673 9691                          reparse_free(nvl);
9674 9692                          return (-2);
9675 9693                  }
9676 9694                  if (strncasecmp(stype, "NFS", 3) == 0)
9677 9695                          break;
9678 9696          }
9679 9697  
9680 9698          if ((curr == NULL) ||
9681 9699              (nvpair_value_string(curr, &val))) {
9682 9700                  reparse_free(nvl);
9683 9701                  return (-3);
9684 9702          }
9685 9703          *nvlp = nvl;
9686 9704          *svcp = stype;
9687 9705          *datap = val;
9688 9706          return (0);
9689 9707  }
9690 9708  
9691 9709  int
9692 9710  vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9693 9711  {
9694 9712          nvlist_t *nvl;
9695 9713          char *s, *d;
9696 9714  
9697 9715          if (rfs4_no_referrals != 0)
9698 9716                  return (B_FALSE);
9699 9717  
9700 9718          if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9701 9719                  return (B_FALSE);
9702 9720  
9703 9721          if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9704 9722                  return (B_FALSE);
9705 9723  
9706 9724          reparse_free(nvl);
9707 9725  
9708 9726          return (B_TRUE);
9709 9727  }
9710 9728  
9711 9729  /*
9712 9730   * There is a user-level copy of this routine in ref_subr.c.
9713 9731   * Changes should be kept in sync.
9714 9732   */
9715 9733  static int
9716 9734  nfs4_create_components(char *path, component4 *comp4)
9717 9735  {
9718 9736          int slen, plen, ncomp;
9719 9737          char *ori_path, *nxtc, buf[MAXNAMELEN];
9720 9738  
9721 9739          if (path == NULL)
9722 9740                  return (0);
9723 9741  
9724 9742          plen = strlen(path) + 1;        /* include the terminator */
9725 9743          ori_path = path;
9726 9744          ncomp = 0;
9727 9745  
9728 9746          /* count number of components in the path */
9729 9747          for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9730 9748                  if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9731 9749                          if ((slen = nxtc - path) == 0) {
9732 9750                                  path = nxtc + 1;
9733 9751                                  continue;
9734 9752                          }
9735 9753  
9736 9754                          if (comp4 != NULL) {
9737 9755                                  bcopy(path, buf, slen);
9738 9756                                  buf[slen] = '\0';
9739 9757                                  (void) str_to_utf8(buf, &comp4[ncomp]);
9740 9758                          }
9741 9759  
9742 9760                          ncomp++;        /* 1 valid component */
9743 9761                          path = nxtc + 1;
9744 9762                  }
9745 9763                  if (*nxtc == '\0' || *nxtc == '\n')
9746 9764                          break;
9747 9765          }
9748 9766  
9749 9767          return (ncomp);
9750 9768  }
9751 9769  
9752 9770  /*
9753 9771   * There is a user-level copy of this routine in ref_subr.c.
9754 9772   * Changes should be kept in sync.
9755 9773   */
9756 9774  static int
9757 9775  make_pathname4(char *path, pathname4 *pathname)
9758 9776  {
9759 9777          int ncomp;
9760 9778          component4 *comp4;
9761 9779  
9762 9780          if (pathname == NULL)
9763 9781                  return (0);
9764 9782  
9765 9783          if (path == NULL) {
9766 9784                  pathname->pathname4_val = NULL;
9767 9785                  pathname->pathname4_len = 0;
9768 9786                  return (0);
9769 9787          }
9770 9788  
9771 9789          /* count number of components to alloc buffer */
9772 9790          if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9773 9791                  pathname->pathname4_val = NULL;
9774 9792                  pathname->pathname4_len = 0;
9775 9793                  return (0);
9776 9794          }
9777 9795          comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9778 9796  
9779 9797          /* copy components into allocated buffer */
9780 9798          ncomp = nfs4_create_components(path, comp4);
9781 9799  
9782 9800          pathname->pathname4_val = comp4;
9783 9801          pathname->pathname4_len = ncomp;
9784 9802  
9785 9803          return (ncomp);
9786 9804  }
9787 9805  
9788 9806  #define xdr_fs_locations4 xdr_fattr4_fs_locations
9789 9807  
9790 9808  fs_locations4 *
9791 9809  fetch_referral(vnode_t *vp, cred_t *cr)
9792 9810  {
9793 9811          nvlist_t *nvl;
9794 9812          char *stype, *sdata;
9795 9813          fs_locations4 *result;
9796 9814          char buf[1024];
9797 9815          size_t bufsize;
9798 9816          XDR xdr;
9799 9817          int err;
9800 9818  
9801 9819          /*
9802 9820           * Check attrs to ensure it's a reparse point
9803 9821           */
9804 9822          if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9805 9823                  return (NULL);
9806 9824  
9807 9825          /*
9808 9826           * Look for an NFS record and get the type and data
9809 9827           */
9810 9828          if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9811 9829                  return (NULL);
9812 9830  
9813 9831          /*
9814 9832           * With the type and data, upcall to get the referral
9815 9833           */
9816 9834          bufsize = sizeof (buf);
9817 9835          bzero(buf, sizeof (buf));
9818 9836          err = reparse_kderef((const char *)stype, (const char *)sdata,
9819 9837              buf, &bufsize);
9820 9838          reparse_free(nvl);
9821 9839  
9822 9840          DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9823 9841              char *, stype, char *, sdata, char *, buf, int, err);
9824 9842          if (err) {
9825 9843                  cmn_err(CE_NOTE,
9826 9844                      "reparsed daemon not running: unable to get referral (%d)",
9827 9845                      err);
9828 9846                  return (NULL);
9829 9847          }
9830 9848  
9831 9849          /*
9832 9850           * We get an XDR'ed record back from the kderef call
9833 9851           */
9834 9852          xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9835 9853          result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9836 9854          err = xdr_fs_locations4(&xdr, result);
9837 9855          XDR_DESTROY(&xdr);
9838 9856          if (err != TRUE) {
9839 9857                  DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9840 9858                      int, err);
9841 9859                  return (NULL);
9842 9860          }
9843 9861  
9844 9862          /*
9845 9863           * Look at path to recover fs_root, ignoring the leading '/'
9846 9864           */
9847 9865          (void) make_pathname4(vp->v_path, &result->fs_root);
9848 9866  
9849 9867          return (result);
9850 9868  }
9851 9869  
9852 9870  char *
9853 9871  build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9854 9872  {
9855 9873          fs_locations4 *fsl;
9856 9874          fs_location4 *fs;
9857 9875          char *server, *path, *symbuf;
9858 9876          static char *prefix = "/net/";
9859 9877          int i, size, npaths;
9860 9878          uint_t len;
9861 9879  
9862 9880          /* Get the referral */
9863 9881          if ((fsl = fetch_referral(vp, cr)) == NULL)
9864 9882                  return (NULL);
9865 9883  
9866 9884          /* Deal with only the first location and first server */
9867 9885          fs = &fsl->locations_val[0];
9868 9886          server = utf8_to_str(&fs->server_val[0], &len, NULL);
9869 9887          if (server == NULL) {
9870 9888                  rfs4_free_fs_locations4(fsl);
9871 9889                  kmem_free(fsl, sizeof (fs_locations4));
9872 9890                  return (NULL);
9873 9891          }
9874 9892  
9875 9893          /* Figure out size for "/net/" + host + /path/path/path + NULL */
9876 9894          size = strlen(prefix) + len;
9877 9895          for (i = 0; i < fs->rootpath.pathname4_len; i++)
9878 9896                  size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9879 9897  
9880 9898          /* Allocate the symlink buffer and fill it */
9881 9899          symbuf = kmem_zalloc(size, KM_SLEEP);
9882 9900          (void) strcat(symbuf, prefix);
9883 9901          (void) strcat(symbuf, server);
9884 9902          kmem_free(server, len);
9885 9903  
9886 9904          npaths = 0;
9887 9905          for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9888 9906                  path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9889 9907                  if (path == NULL)
9890 9908                          continue;
9891 9909                  (void) strcat(symbuf, "/");
9892 9910                  (void) strcat(symbuf, path);
9893 9911                  npaths++;
9894 9912                  kmem_free(path, len);
9895 9913          }
9896 9914  
9897 9915          rfs4_free_fs_locations4(fsl);
9898 9916          kmem_free(fsl, sizeof (fs_locations4));
9899 9917  
9900 9918          if (strsz != NULL)
9901 9919                  *strsz = size;
9902 9920          return (symbuf);
9903 9921  }
9904 9922  
9905 9923  /*
9906 9924   * Check to see if we have a downrev Solaris client, so that we
9907 9925   * can send it a symlink instead of a referral.
9908 9926   */
9909 9927  int
9910 9928  client_is_downrev(struct svc_req *req)
9911 9929  {
9912 9930          struct sockaddr *ca;
9913 9931          rfs4_clntip_t *ci;
9914 9932          bool_t create = FALSE;
9915 9933          int is_downrev;
9916 9934  
9917 9935          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9918 9936          ASSERT(ca);
9919 9937          ci = rfs4_find_clntip(ca, &create);
9920 9938          if (ci == NULL)
9921 9939                  return (0);
9922 9940          is_downrev = ci->ri_no_referrals;
9923 9941          rfs4_dbe_rele(ci->ri_dbe);
9924 9942          return (is_downrev);
9925 9943  }
9926 9944  
9927 9945  /*
9928 9946   * Do the main work of handling HA-NFSv4 Resource Group failover on
9929 9947   * Sun Cluster.
9930 9948   * We need to detect whether any RG admin paths have been added or removed,
9931 9949   * and adjust resources accordingly.
9932 9950   * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9933 9951   * order to scale, the list and array of paths need to be held in more
9934 9952   * suitable data structures.
9935 9953   */
9936 9954  static void
9937 9955  hanfsv4_failover(nfs4_srv_t *nsrv4)
9938 9956  {
9939 9957          int i, start_grace, numadded_paths = 0;
9940 9958          char **added_paths = NULL;
9941 9959          rfs4_dss_path_t *dss_path;
9942 9960  
9943 9961          /*
9944 9962           * Note: currently, dss_pathlist cannot be NULL, since
9945 9963           * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9946 9964           * make the latter dynamically specified too, the following will
9947 9965           * need to be adjusted.
9948 9966           */
9949 9967  
9950 9968          /*
9951 9969           * First, look for removed paths: RGs that have been failed-over
9952 9970           * away from this node.
9953 9971           * Walk the "currently-serving" dss_pathlist and, for each
9954 9972           * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9955 9973           * from nfsd. If not, that RG path has been removed.
9956 9974           *
9957 9975           * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9958 9976           * any duplicates.
9959 9977           */
9960 9978          dss_path = nsrv4->dss_pathlist;
9961 9979          do {
9962 9980                  int found = 0;
9963 9981                  char *path = dss_path->path;
9964 9982  
9965 9983                  /* used only for non-HA so may not be removed */
9966 9984                  if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9967 9985                          dss_path = dss_path->next;
9968 9986                          continue;
9969 9987                  }
9970 9988  
9971 9989                  for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9972 9990                          int cmpret;
9973 9991                          char *newpath = rfs4_dss_newpaths[i];
9974 9992  
9975 9993                          /*
9976 9994                           * Since nfsd has sorted rfs4_dss_newpaths for us,
9977 9995                           * once the return from strcmp is negative we know
9978 9996                           * we've passed the point where "path" should be,
9979 9997                           * and can stop searching: "path" has been removed.
9980 9998                           */
9981 9999                          cmpret = strcmp(path, newpath);
9982 10000                          if (cmpret < 0)
9983 10001                                  break;
9984 10002                          if (cmpret == 0) {
9985 10003                                  found = 1;
9986 10004                                  break;
9987 10005                          }
9988 10006                  }
9989 10007  
9990 10008                  if (found == 0) {
9991 10009                          unsigned index = dss_path->index;
9992 10010                          rfs4_servinst_t *sip = dss_path->sip;
9993 10011                          rfs4_dss_path_t *path_next = dss_path->next;
9994 10012  
9995 10013                          /*
9996 10014                           * This path has been removed.
9997 10015                           * We must clear out the servinst reference to
9998 10016                           * it, since it's now owned by another
9999 10017                           * node: we should not attempt to touch it.
10000 10018                           */
10001 10019                          ASSERT(dss_path == sip->dss_paths[index]);
10002 10020                          sip->dss_paths[index] = NULL;
10003 10021  
10004 10022                          /* remove from "currently-serving" list, and destroy */
10005 10023                          remque(dss_path);
10006 10024                          /* allow for NUL */
10007 10025                          kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10008 10026                          kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10009 10027  
10010 10028                          dss_path = path_next;
10011 10029                  } else {
10012 10030                          /* path was found; not removed */
10013 10031                          dss_path = dss_path->next;
10014 10032                  }
10015 10033          } while (dss_path != nsrv4->dss_pathlist);
10016 10034  
10017 10035          /*
10018 10036           * Now, look for added paths: RGs that have been failed-over
10019 10037           * to this node.
10020 10038           * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10021 10039           * for each path, check if it is on the "currently-serving"
10022 10040           * dss_pathlist. If not, that RG path has been added.
10023 10041           *
10024 10042           * Note: we don't do duplicate detection here; nfsd does that for us.
10025 10043           *
10026 10044           * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10027 10045           * an upper bound for the size needed for added_paths[numadded_paths].
10028 10046           */
10029 10047  
10030 10048          /* probably more space than we need, but guaranteed to be enough */
10031 10049          if (rfs4_dss_numnewpaths > 0) {
10032 10050                  size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10033 10051                  added_paths = kmem_zalloc(sz, KM_SLEEP);
10034 10052          }
10035 10053  
10036 10054          /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10037 10055          for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10038 10056                  int found = 0;
10039 10057                  char *newpath = rfs4_dss_newpaths[i];
10040 10058  
10041 10059                  dss_path = nsrv4->dss_pathlist;
10042 10060                  do {
10043 10061                          char *path = dss_path->path;
10044 10062  
10045 10063                          /* used only for non-HA */
10046 10064                          if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10047 10065                                  dss_path = dss_path->next;
10048 10066                                  continue;
10049 10067                          }
10050 10068  
10051 10069                          if (strncmp(path, newpath, strlen(path)) == 0) {
10052 10070                                  found = 1;
10053 10071                                  break;
10054 10072                          }
10055 10073  
10056 10074                          dss_path = dss_path->next;
10057 10075                  } while (dss_path != nsrv4->dss_pathlist);
10058 10076  
10059 10077                  if (found == 0) {
10060 10078                          added_paths[numadded_paths] = newpath;
10061 10079                          numadded_paths++;
10062 10080                  }
10063 10081          }
10064 10082  
10065 10083          /* did we find any added paths? */
10066 10084          if (numadded_paths > 0) {
10067 10085  
10068 10086                  /* create a new server instance, and start its grace period */
10069 10087                  start_grace = 1;
10070 10088                  /* CSTYLED */
10071 10089                  rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10072 10090  
10073 10091                  /* read in the stable storage state from these paths */
10074 10092                  rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10075 10093  
10076 10094                  /*
10077 10095                   * Multiple failovers during a grace period will cause
10078 10096                   * clients of the same resource group to be partitioned
10079 10097                   * into different server instances, with different
10080 10098                   * grace periods.  Since clients of the same resource
10081 10099                   * group must be subject to the same grace period,
10082 10100                   * we need to reset all currently active grace periods.
10083 10101                   */
10084 10102                  rfs4_grace_reset_all(nsrv4);
10085 10103          }
10086 10104  
10087 10105          if (rfs4_dss_numnewpaths > 0)
10088 10106                  kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10089 10107  }
  
    | 
      ↓ open down ↓ | 
    7368 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX