Print this page
    
Send nfs_export_t to untraverse()
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28   28   *      All Rights Reserved
  29   29   */
  30   30  
  31   31  /*
  32   32   * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  33   33   * Copyright 2019 Nexenta Systems, Inc.
  34   34   * Copyright 2019 Nexenta by DDN, Inc.
  35   35   */
  36   36  
  37   37  #include <sys/param.h>
  38   38  #include <sys/types.h>
  39   39  #include <sys/systm.h>
  40   40  #include <sys/cred.h>
  41   41  #include <sys/buf.h>
  42   42  #include <sys/vfs.h>
  43   43  #include <sys/vfs_opreg.h>
  44   44  #include <sys/vnode.h>
  45   45  #include <sys/uio.h>
  46   46  #include <sys/errno.h>
  47   47  #include <sys/sysmacros.h>
  48   48  #include <sys/statvfs.h>
  49   49  #include <sys/kmem.h>
  50   50  #include <sys/dirent.h>
  51   51  #include <sys/cmn_err.h>
  52   52  #include <sys/debug.h>
  53   53  #include <sys/systeminfo.h>
  54   54  #include <sys/flock.h>
  55   55  #include <sys/pathname.h>
  56   56  #include <sys/nbmlock.h>
  57   57  #include <sys/share.h>
  58   58  #include <sys/atomic.h>
  59   59  #include <sys/policy.h>
  60   60  #include <sys/fem.h>
  61   61  #include <sys/sdt.h>
  62   62  #include <sys/ddi.h>
  63   63  #include <sys/zone.h>
  64   64  
  65   65  #include <fs/fs_reparse.h>
  66   66  
  67   67  #include <rpc/types.h>
  68   68  #include <rpc/auth.h>
  69   69  #include <rpc/rpcsec_gss.h>
  70   70  #include <rpc/svc.h>
  71   71  
  72   72  #include <nfs/nfs.h>
  73   73  #include <nfs/nfssys.h>
  74   74  #include <nfs/export.h>
  75   75  #include <nfs/nfs_cmd.h>
  76   76  #include <nfs/lm.h>
  77   77  #include <nfs/nfs4.h>
  78   78  #include <nfs/nfs4_drc.h>
  79   79  
  80   80  #include <sys/strsubr.h>
  81   81  #include <sys/strsun.h>
  82   82  
  83   83  #include <inet/common.h>
  84   84  #include <inet/ip.h>
  85   85  #include <inet/ip6.h>
  86   86  
  87   87  #include <sys/tsol/label.h>
  88   88  #include <sys/tsol/tndb.h>
  89   89  
  90   90  #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91   91  static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92   92  #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93   93  static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94   94  extern struct svc_ops rdma_svc_ops;
  95   95  extern int nfs_loaned_buffers;
  96   96  /* End of Tunables */
  97   97  
  98   98  static int rdma_setup_read_data4(READ4args *, READ4res *);
  99   99  
 100  100  /*
 101  101   * Used to bump the stateid4.seqid value and show changes in the stateid
 102  102   */
 103  103  #define next_stateid(sp) (++(sp)->bits.chgseq)
 104  104  
 105  105  /*
 106  106   * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 107  107   *      This is used to return NFS4ERR_TOOSMALL when clients specify
 108  108   *      maxcount that isn't large enough to hold the smallest possible
 109  109   *      XDR encoded dirent.
 110  110   *
 111  111   *          sizeof cookie (8 bytes) +
 112  112   *          sizeof name_len (4 bytes) +
 113  113   *          sizeof smallest (padded) name (4 bytes) +
 114  114   *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 115  115   *          sizeof attrlist4_len (4 bytes) +
 116  116   *          sizeof next boolean (4 bytes)
 117  117   *
 118  118   * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 119  119   * the smallest possible entry4 (assumes no attrs requested).
 120  120   *      sizeof nfsstat4 (4 bytes) +
 121  121   *      sizeof verifier4 (8 bytes) +
 122  122   *      sizeof entry4list bool (4 bytes) +
 123  123   *      sizeof entry4   (36 bytes) +
 124  124   *      sizeof eof bool  (4 bytes)
 125  125   *
 126  126   * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 127  127   *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 128  128   *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 129  129   *      required for a given name length.  MAXNAMELEN is the maximum
 130  130   *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 131  131   *      macros are to allow for . and .. entries -- just a minor tweak to try
 132  132   *      and guarantee that buffer we give to VOP_READDIR will be large enough
 133  133   *      to hold ., .., and the largest possible solaris dirent64.
 134  134   */
 135  135  #define RFS4_MINLEN_ENTRY4 36
 136  136  #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137  137  #define RFS4_MINLEN_RDDIR_BUF \
 138  138          (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139  139  
 140  140  /*
 141  141   * It would be better to pad to 4 bytes since that's what XDR would do,
 142  142   * but the dirents UFS gives us are already padded to 8, so just take
 143  143   * what we're given.  Dircount is only a hint anyway.  Currently the
 144  144   * solaris kernel is ASCII only, so there's no point in calling the
 145  145   * UTF8 functions.
 146  146   *
 147  147   * dirent64: named padded to provide 8 byte struct alignment
 148  148   *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  149   *
 150  150   * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  151   *
 152  152   */
 153  153  #define DIRENT64_TO_DIRCOUNT(dp) \
 154  154          (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155  155  
 156  156  
 157  157  static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 158  158  
 159  159  u_longlong_t    nfs4_srv_caller_id;
 160  160  uint_t          nfs4_srv_vkey = 0;
 161  161  
 162  162  void    rfs4_init_compound_state(struct compound_state *);
 163  163  
 164  164  static void     nullfree(caddr_t);
 165  165  static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166  166                      struct compound_state *);
 167  167  static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168  168                      struct compound_state *);
 169  169  static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170  170                      struct compound_state *);
 171  171  static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172  172                      struct compound_state *);
 173  173  static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 174  174                      struct compound_state *);
 175  175  static void     rfs4_op_create_free(nfs_resop4 *resop);
 176  176  static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 177  177                      struct svc_req *, struct compound_state *);
 178  178  static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 179  179                      struct svc_req *, struct compound_state *);
 180  180  static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 181  181                      struct compound_state *);
 182  182  static void     rfs4_op_getattr_free(nfs_resop4 *);
 183  183  static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 184  184                      struct compound_state *);
 185  185  static void     rfs4_op_getfh_free(nfs_resop4 *);
 186  186  static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187  187                      struct compound_state *);
 188  188  static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189  189                      struct compound_state *);
 190  190  static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 191  191                      struct compound_state *);
 192  192  static void     lock_denied_free(nfs_resop4 *);
 193  193  static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194  194                      struct compound_state *);
 195  195  static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196  196                      struct compound_state *);
 197  197  static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198  198                      struct compound_state *);
 199  199  static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 200  200                      struct compound_state *);
 201  201  static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 202  202                      struct svc_req *req, struct compound_state *cs);
 203  203  static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204  204                      struct compound_state *);
 205  205  static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 206  206                      struct compound_state *);
 207  207  static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 208  208                      struct svc_req *, struct compound_state *);
 209  209  static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 210  210                      struct svc_req *, struct compound_state *);
 211  211  static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212  212                      struct compound_state *);
 213  213  static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214  214                      struct compound_state *);
 215  215  static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216  216                      struct compound_state *);
 217  217  static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 218  218                      struct compound_state *);
 219  219  static void     rfs4_op_read_free(nfs_resop4 *);
 220  220  static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 221  221  static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 222  222                      struct compound_state *);
 223  223  static void     rfs4_op_readlink_free(nfs_resop4 *);
 224  224  static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 225  225                      struct svc_req *, struct compound_state *);
 226  226  static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227  227                      struct compound_state *);
 228  228  static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229  229                      struct compound_state *);
 230  230  static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231  231                      struct compound_state *);
 232  232  static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233  233                      struct compound_state *);
 234  234  static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235  235                      struct compound_state *);
 236  236  static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237  237                      struct compound_state *);
 238  238  static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239  239                      struct compound_state *);
 240  240  static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 241  241                      struct compound_state *);
 242  242  static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 243  243                      struct svc_req *, struct compound_state *);
 244  244  static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 245  245                      struct svc_req *req, struct compound_state *);
 246  246  static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 247  247                      struct compound_state *);
 248  248  static void     rfs4_op_secinfo_free(nfs_resop4 *);
 249  249  
 250  250  static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 251  251                      struct svc_req *);
 252  252  nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 253  253  void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 254  254  
 255  255  
 256  256  /*
 257  257   * translation table for attrs
 258  258   */
 259  259  struct nfs4_ntov_table {
 260  260          union nfs4_attr_u *na;
 261  261          uint8_t amap[NFS4_MAXNUM_ATTRS];
 262  262          int attrcnt;
 263  263          bool_t vfsstat;
 264  264  };
 265  265  
 266  266  static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 267  267  static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 268  268                      struct nfs4_svgetit_arg *sargp);
 269  269  
 270  270  static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 271  271                      struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 272  272                      struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 273  273  
 274  274  static void     hanfsv4_failover(nfs4_srv_t *);
 275  275  
 276  276  fem_t           *deleg_rdops;
 277  277  fem_t           *deleg_wrops;
 278  278  
 279  279  /*
 280  280   * NFS4 op dispatch table
 281  281   */
 282  282  
 283  283  struct rfsv4disp {
 284  284          void    (*dis_proc)();          /* proc to call */
 285  285          void    (*dis_resfree)();       /* frees space allocated by proc */
 286  286          int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 287  287  };
 288  288  
 289  289  static struct rfsv4disp rfsv4disptab[] = {
 290  290          /*
 291  291           * NFS VERSION 4
 292  292           */
 293  293  
 294  294          /* RFS_NULL = 0 */
 295  295          {rfs4_op_illegal, nullfree, 0},
 296  296  
 297  297          /* UNUSED = 1 */
 298  298          {rfs4_op_illegal, nullfree, 0},
 299  299  
 300  300          /* UNUSED = 2 */
 301  301          {rfs4_op_illegal, nullfree, 0},
 302  302  
 303  303          /* OP_ACCESS = 3 */
 304  304          {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 305  305  
 306  306          /* OP_CLOSE = 4 */
 307  307          {rfs4_op_close, nullfree, 0},
 308  308  
 309  309          /* OP_COMMIT = 5 */
 310  310          {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 311  311  
 312  312          /* OP_CREATE = 6 */
 313  313          {rfs4_op_create, nullfree, 0},
 314  314  
 315  315          /* OP_DELEGPURGE = 7 */
 316  316          {rfs4_op_delegpurge, nullfree, 0},
 317  317  
 318  318          /* OP_DELEGRETURN = 8 */
 319  319          {rfs4_op_delegreturn, nullfree, 0},
 320  320  
 321  321          /* OP_GETATTR = 9 */
 322  322          {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 323  323  
 324  324          /* OP_GETFH = 10 */
 325  325          {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 326  326  
 327  327          /* OP_LINK = 11 */
 328  328          {rfs4_op_link, nullfree, 0},
 329  329  
 330  330          /* OP_LOCK = 12 */
 331  331          {rfs4_op_lock, lock_denied_free, 0},
 332  332  
 333  333          /* OP_LOCKT = 13 */
 334  334          {rfs4_op_lockt, lock_denied_free, 0},
 335  335  
 336  336          /* OP_LOCKU = 14 */
 337  337          {rfs4_op_locku, nullfree, 0},
 338  338  
 339  339          /* OP_LOOKUP = 15 */
 340  340          {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 341  341  
 342  342          /* OP_LOOKUPP = 16 */
 343  343          {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 344  344  
 345  345          /* OP_NVERIFY = 17 */
 346  346          {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 347  347  
 348  348          /* OP_OPEN = 18 */
 349  349          {rfs4_op_open, rfs4_free_reply, 0},
 350  350  
 351  351          /* OP_OPENATTR = 19 */
 352  352          {rfs4_op_openattr, nullfree, 0},
 353  353  
 354  354          /* OP_OPEN_CONFIRM = 20 */
 355  355          {rfs4_op_open_confirm, nullfree, 0},
 356  356  
 357  357          /* OP_OPEN_DOWNGRADE = 21 */
 358  358          {rfs4_op_open_downgrade, nullfree, 0},
 359  359  
 360  360          /* OP_OPEN_PUTFH = 22 */
 361  361          {rfs4_op_putfh, nullfree, RPC_ALL},
 362  362  
 363  363          /* OP_PUTPUBFH = 23 */
 364  364          {rfs4_op_putpubfh, nullfree, RPC_ALL},
 365  365  
 366  366          /* OP_PUTROOTFH = 24 */
 367  367          {rfs4_op_putrootfh, nullfree, RPC_ALL},
 368  368  
 369  369          /* OP_READ = 25 */
 370  370          {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 371  371  
 372  372          /* OP_READDIR = 26 */
 373  373          {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 374  374  
 375  375          /* OP_READLINK = 27 */
 376  376          {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 377  377  
 378  378          /* OP_REMOVE = 28 */
 379  379          {rfs4_op_remove, nullfree, 0},
 380  380  
 381  381          /* OP_RENAME = 29 */
 382  382          {rfs4_op_rename, nullfree, 0},
 383  383  
 384  384          /* OP_RENEW = 30 */
 385  385          {rfs4_op_renew, nullfree, 0},
 386  386  
 387  387          /* OP_RESTOREFH = 31 */
 388  388          {rfs4_op_restorefh, nullfree, RPC_ALL},
 389  389  
 390  390          /* OP_SAVEFH = 32 */
 391  391          {rfs4_op_savefh, nullfree, RPC_ALL},
 392  392  
 393  393          /* OP_SECINFO = 33 */
 394  394          {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 395  395  
 396  396          /* OP_SETATTR = 34 */
 397  397          {rfs4_op_setattr, nullfree, 0},
 398  398  
 399  399          /* OP_SETCLIENTID = 35 */
 400  400          {rfs4_op_setclientid, nullfree, 0},
 401  401  
 402  402          /* OP_SETCLIENTID_CONFIRM = 36 */
 403  403          {rfs4_op_setclientid_confirm, nullfree, 0},
 404  404  
 405  405          /* OP_VERIFY = 37 */
 406  406          {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 407  407  
 408  408          /* OP_WRITE = 38 */
 409  409          {rfs4_op_write, nullfree, 0},
 410  410  
 411  411          /* OP_RELEASE_LOCKOWNER = 39 */
 412  412          {rfs4_op_release_lockowner, nullfree, 0},
 413  413  };
 414  414  
 415  415  static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 416  416  
 417  417  #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 418  418  
 419  419  #ifdef DEBUG
 420  420  
 421  421  int             rfs4_fillone_debug = 0;
 422  422  int             rfs4_no_stub_access = 1;
 423  423  int             rfs4_rddir_debug = 0;
 424  424  
 425  425  static char    *rfs4_op_string[] = {
 426  426          "rfs4_op_null",
 427  427          "rfs4_op_1 unused",
 428  428          "rfs4_op_2 unused",
 429  429          "rfs4_op_access",
 430  430          "rfs4_op_close",
 431  431          "rfs4_op_commit",
 432  432          "rfs4_op_create",
 433  433          "rfs4_op_delegpurge",
 434  434          "rfs4_op_delegreturn",
 435  435          "rfs4_op_getattr",
 436  436          "rfs4_op_getfh",
 437  437          "rfs4_op_link",
 438  438          "rfs4_op_lock",
 439  439          "rfs4_op_lockt",
 440  440          "rfs4_op_locku",
 441  441          "rfs4_op_lookup",
 442  442          "rfs4_op_lookupp",
 443  443          "rfs4_op_nverify",
 444  444          "rfs4_op_open",
 445  445          "rfs4_op_openattr",
 446  446          "rfs4_op_open_confirm",
 447  447          "rfs4_op_open_downgrade",
 448  448          "rfs4_op_putfh",
 449  449          "rfs4_op_putpubfh",
 450  450          "rfs4_op_putrootfh",
 451  451          "rfs4_op_read",
 452  452          "rfs4_op_readdir",
 453  453          "rfs4_op_readlink",
 454  454          "rfs4_op_remove",
 455  455          "rfs4_op_rename",
 456  456          "rfs4_op_renew",
 457  457          "rfs4_op_restorefh",
 458  458          "rfs4_op_savefh",
 459  459          "rfs4_op_secinfo",
 460  460          "rfs4_op_setattr",
 461  461          "rfs4_op_setclientid",
 462  462          "rfs4_op_setclient_confirm",
 463  463          "rfs4_op_verify",
 464  464          "rfs4_op_write",
 465  465          "rfs4_op_release_lockowner",
 466  466          "rfs4_op_illegal"
 467  467  };
 468  468  #endif
 469  469  
 470  470  void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 471  471  
 472  472  extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 473  473  
 474  474  extern void     rfs4_free_fs_locations4(fs_locations4 *);
 475  475  
 476  476  #ifdef  nextdp
 477  477  #undef nextdp
 478  478  #endif
 479  479  #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 480  480  
 481  481  static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 482  482          VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 483  483          VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 484  484          VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 485  485          VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 486  486          VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 487  487          VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 488  488          VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 489  489          NULL,                   NULL
 490  490  };
 491  491  static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 492  492          VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 493  493          VOPNAME_READ,           { .femop_read = deleg_wr_read },
 494  494          VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 495  495          VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 496  496          VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 497  497          VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 498  498          VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 499  499          VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 500  500          NULL,                   NULL
 501  501  };
 502  502  
 503  503  nfs4_srv_t *
 504  504  nfs4_get_srv(void)
 505  505  {
 506  506          nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
 507  507          nfs4_srv_t *srv = ng->nfs4_srv;
 508  508          ASSERT(srv != NULL);
 509  509          return (srv);
 510  510  }
 511  511  
 512  512  void
 513  513  rfs4_srv_zone_init(nfs_globals_t *ng)
 514  514  {
 515  515          nfs4_srv_t *nsrv4;
 516  516          timespec32_t verf;
 517  517  
 518  518          nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 519  519  
 520  520          /*
 521  521           * The following algorithm attempts to find a unique verifier
 522  522           * to be used as the write verifier returned from the server
 523  523           * to the client.  It is important that this verifier change
 524  524           * whenever the server reboots.  Of secondary importance, it
 525  525           * is important for the verifier to be unique between two
 526  526           * different servers.
 527  527           *
 528  528           * Thus, an attempt is made to use the system hostid and the
 529  529           * current time in seconds when the nfssrv kernel module is
 530  530           * loaded.  It is assumed that an NFS server will not be able
 531  531           * to boot and then to reboot in less than a second.  If the
 532  532           * hostid has not been set, then the current high resolution
 533  533           * time is used.  This will ensure different verifiers each
 534  534           * time the server reboots and minimize the chances that two
 535  535           * different servers will have the same verifier.
 536  536           * XXX - this is broken on LP64 kernels.
 537  537           */
 538  538          verf.tv_sec = (time_t)zone_get_hostid(NULL);
 539  539          if (verf.tv_sec != 0) {
 540  540                  verf.tv_nsec = gethrestime_sec();
 541  541          } else {
 542  542                  timespec_t tverf;
 543  543  
 544  544                  gethrestime(&tverf);
 545  545                  verf.tv_sec = (time_t)tverf.tv_sec;
 546  546                  verf.tv_nsec = tverf.tv_nsec;
 547  547          }
 548  548          nsrv4->write4verf = *(uint64_t *)&verf;
 549  549  
 550  550          /* Used to manage create/destroy of server state */
 551  551          nsrv4->nfs4_server_state = NULL;
 552  552          nsrv4->nfs4_cur_servinst = NULL;
 553  553          nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 554  554          mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 555  555          mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 556  556          mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 557  557          rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 558  558  
 559  559          ng->nfs4_srv = nsrv4;
 560  560  }
 561  561  
 562  562  void
 563  563  rfs4_srv_zone_fini(nfs_globals_t *ng)
 564  564  {
 565  565          nfs4_srv_t *nsrv4 = ng->nfs4_srv;
 566  566  
 567  567          ng->nfs4_srv = NULL;
 568  568  
 569  569          mutex_destroy(&nsrv4->deleg_lock);
 570  570          mutex_destroy(&nsrv4->state_lock);
 571  571          mutex_destroy(&nsrv4->servinst_lock);
 572  572          rw_destroy(&nsrv4->deleg_policy_lock);
 573  573  
 574  574          kmem_free(nsrv4, sizeof (*nsrv4));
 575  575  }
 576  576  
 577  577  void
 578  578  rfs4_srvrinit(void)
 579  579  {
 580  580          extern void rfs4_attr_init();
 581  581  
 582  582          rfs4_attr_init();
 583  583  
 584  584          if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 585  585                  rfs4_disable_delegation();
 586  586          } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 587  587              &deleg_wrops) != 0) {
 588  588                  rfs4_disable_delegation();
 589  589                  fem_free(deleg_rdops);
 590  590          }
 591  591  
 592  592          nfs4_srv_caller_id = fs_new_caller_id();
 593  593          lockt_sysid = lm_alloc_sysidt();
 594  594          vsd_create(&nfs4_srv_vkey, NULL);
 595  595          rfs4_state_g_init();
 596  596  }
 597  597  
 598  598  void
 599  599  rfs4_srvrfini(void)
 600  600  {
 601  601          if (lockt_sysid != LM_NOSYSID) {
 602  602                  lm_free_sysidt(lockt_sysid);
 603  603                  lockt_sysid = LM_NOSYSID;
 604  604          }
 605  605  
 606  606          rfs4_state_g_fini();
 607  607  
 608  608          fem_free(deleg_rdops);
 609  609          fem_free(deleg_wrops);
 610  610  }
 611  611  
 612  612  void
 613  613  rfs4_do_server_start(int server_upordown,
 614  614      int srv_delegation, int cluster_booted)
 615  615  {
 616  616          nfs4_srv_t *nsrv4 = nfs4_get_srv();
 617  617  
 618  618          /* Is this a warm start? */
 619  619          if (server_upordown == NFS_SERVER_QUIESCED) {
 620  620                  cmn_err(CE_NOTE, "nfs4_srv: "
 621  621                      "server was previously quiesced; "
 622  622                      "existing NFSv4 state will be re-used");
 623  623  
 624  624                  /*
 625  625                   * HA-NFSv4: this is also the signal
 626  626                   * that a Resource Group failover has
 627  627                   * occurred.
 628  628                   */
 629  629                  if (cluster_booted)
 630  630                          hanfsv4_failover(nsrv4);
 631  631          } else {
 632  632                  /* Cold start */
 633  633                  nsrv4->rfs4_start_time = 0;
 634  634                  rfs4_state_zone_init(nsrv4);
 635  635                  nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 636  636                      nfs4_drc_hash);
 637  637  
 638  638                  /*
 639  639                   * The nfsd service was started with the -s option
 640  640                   * we need to pull in any state from the paths indicated.
 641  641                   */
 642  642                  if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
 643  643                          /* read in the stable storage state from these paths */
 644  644                          rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
 645  645                              rfs4_dss_newpaths);
 646  646                  }
 647  647          }
 648  648  
 649  649          /* Check if delegation is to be enabled */
 650  650          if (srv_delegation != FALSE)
 651  651                  rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 652  652  }
 653  653  
 654  654  void
 655  655  rfs4_init_compound_state(struct compound_state *cs)
 656  656  {
 657  657          bzero(cs, sizeof (*cs));
 658  658          cs->cont = TRUE;
 659  659          cs->access = CS_ACCESS_DENIED;
 660  660          cs->deleg = FALSE;
 661  661          cs->mandlock = FALSE;
 662  662          cs->fh.nfs_fh4_val = cs->fhbuf;
 663  663  }
 664  664  
 665  665  void
 666  666  rfs4_grace_start(rfs4_servinst_t *sip)
 667  667  {
 668  668          rw_enter(&sip->rwlock, RW_WRITER);
 669  669          sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 670  670          sip->grace_period = rfs4_grace_period;
 671  671          rw_exit(&sip->rwlock);
 672  672  }
 673  673  
 674  674  /*
 675  675   * returns true if the instance's grace period has never been started
 676  676   */
 677  677  int
 678  678  rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 679  679  {
 680  680          time_t start_time;
 681  681  
 682  682          rw_enter(&sip->rwlock, RW_READER);
 683  683          start_time = sip->start_time;
 684  684          rw_exit(&sip->rwlock);
 685  685  
 686  686          return (start_time == 0);
 687  687  }
 688  688  
 689  689  /*
 690  690   * Indicates if server instance is within the
 691  691   * grace period.
 692  692   */
 693  693  int
 694  694  rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 695  695  {
 696  696          time_t grace_expiry;
 697  697  
 698  698          rw_enter(&sip->rwlock, RW_READER);
 699  699          grace_expiry = sip->start_time + sip->grace_period;
 700  700          rw_exit(&sip->rwlock);
 701  701  
 702  702          return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 703  703  }
 704  704  
 705  705  int
 706  706  rfs4_clnt_in_grace(rfs4_client_t *cp)
 707  707  {
 708  708          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 709  709  
 710  710          return (rfs4_servinst_in_grace(cp->rc_server_instance));
 711  711  }
 712  712  
 713  713  /*
 714  714   * reset all currently active grace periods
 715  715   */
 716  716  void
 717  717  rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 718  718  {
 719  719          rfs4_servinst_t *sip;
 720  720  
 721  721          mutex_enter(&nsrv4->servinst_lock);
 722  722          for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 723  723                  if (rfs4_servinst_in_grace(sip))
 724  724                          rfs4_grace_start(sip);
 725  725          mutex_exit(&nsrv4->servinst_lock);
 726  726  }
 727  727  
 728  728  /*
 729  729   * start any new instances' grace periods
 730  730   */
 731  731  void
 732  732  rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 733  733  {
 734  734          rfs4_servinst_t *sip;
 735  735  
 736  736          mutex_enter(&nsrv4->servinst_lock);
 737  737          for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 738  738                  if (rfs4_servinst_grace_new(sip))
 739  739                          rfs4_grace_start(sip);
 740  740          mutex_exit(&nsrv4->servinst_lock);
 741  741  }
 742  742  
 743  743  static rfs4_dss_path_t *
 744  744  rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 745  745      char *path, unsigned index)
 746  746  {
 747  747          size_t len;
 748  748          rfs4_dss_path_t *dss_path;
 749  749  
 750  750          dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 751  751  
 752  752          /*
 753  753           * Take a copy of the string, since the original may be overwritten.
 754  754           * Sadly, no strdup() in the kernel.
 755  755           */
 756  756          /* allow for NUL */
 757  757          len = strlen(path) + 1;
 758  758          dss_path->path = kmem_alloc(len, KM_SLEEP);
 759  759          (void) strlcpy(dss_path->path, path, len);
 760  760  
 761  761          /* associate with servinst */
 762  762          dss_path->sip = sip;
 763  763          dss_path->index = index;
 764  764  
 765  765          /*
 766  766           * Add to list of served paths.
 767  767           * No locking required, as we're only ever called at startup.
 768  768           */
 769  769          if (nsrv4->dss_pathlist == NULL) {
 770  770                  /* this is the first dss_path_t */
 771  771  
 772  772                  /* needed for insque/remque */
 773  773                  dss_path->next = dss_path->prev = dss_path;
 774  774  
 775  775                  nsrv4->dss_pathlist = dss_path;
 776  776          } else {
 777  777                  insque(dss_path, nsrv4->dss_pathlist);
 778  778          }
 779  779  
 780  780          return (dss_path);
 781  781  }
 782  782  
 783  783  /*
 784  784   * Create a new server instance, and make it the currently active instance.
 785  785   * Note that starting the grace period too early will reduce the clients'
 786  786   * recovery window.
 787  787   */
 788  788  void
 789  789  rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 790  790      int dss_npaths, char **dss_paths)
 791  791  {
 792  792          unsigned i;
 793  793          rfs4_servinst_t *sip;
 794  794          rfs4_oldstate_t *oldstate;
 795  795  
 796  796          sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 797  797          rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 798  798  
 799  799          sip->start_time = (time_t)0;
 800  800          sip->grace_period = (time_t)0;
 801  801          sip->next = NULL;
 802  802          sip->prev = NULL;
 803  803  
 804  804          rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 805  805          /*
 806  806           * This initial dummy entry is required to setup for insque/remque.
 807  807           * It must be skipped over whenever the list is traversed.
 808  808           */
 809  809          oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 810  810          /* insque/remque require initial list entry to be self-terminated */
 811  811          oldstate->next = oldstate;
 812  812          oldstate->prev = oldstate;
 813  813          sip->oldstate = oldstate;
 814  814  
 815  815  
 816  816          sip->dss_npaths = dss_npaths;
 817  817          sip->dss_paths = kmem_alloc(dss_npaths *
 818  818              sizeof (rfs4_dss_path_t *), KM_SLEEP);
 819  819  
 820  820          for (i = 0; i < dss_npaths; i++) {
 821  821                  sip->dss_paths[i] =
 822  822                      rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 823  823          }
 824  824  
 825  825          mutex_enter(&nsrv4->servinst_lock);
 826  826          if (nsrv4->nfs4_cur_servinst != NULL) {
 827  827                  /* add to linked list */
 828  828                  sip->prev = nsrv4->nfs4_cur_servinst;
 829  829                  nsrv4->nfs4_cur_servinst->next = sip;
 830  830          }
 831  831          if (start_grace)
 832  832                  rfs4_grace_start(sip);
 833  833          /* make the new instance "current" */
 834  834          nsrv4->nfs4_cur_servinst = sip;
 835  835  
 836  836          mutex_exit(&nsrv4->servinst_lock);
 837  837  }
 838  838  
 839  839  /*
 840  840   * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 841  841   * all instances directly.
 842  842   */
 843  843  void
 844  844  rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 845  845  {
 846  846          rfs4_servinst_t *sip, *prev, *current;
 847  847  #ifdef DEBUG
 848  848          int n = 0;
 849  849  #endif
 850  850  
 851  851          mutex_enter(&nsrv4->servinst_lock);
 852  852          ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 853  853          current = nsrv4->nfs4_cur_servinst;
 854  854          nsrv4->nfs4_cur_servinst = NULL;
 855  855          for (sip = current; sip != NULL; sip = prev) {
 856  856                  prev = sip->prev;
 857  857                  rw_destroy(&sip->rwlock);
 858  858                  if (sip->oldstate)
 859  859                          kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 860  860                  if (sip->dss_paths) {
 861  861                          int i = sip->dss_npaths;
 862  862  
 863  863                          while (i > 0) {
 864  864                                  i--;
 865  865                                  if (sip->dss_paths[i] != NULL) {
 866  866                                          char *path = sip->dss_paths[i]->path;
 867  867  
 868  868                                          if (path != NULL) {
 869  869                                                  kmem_free(path,
 870  870                                                      strlen(path) + 1);
 871  871                                          }
 872  872                                          kmem_free(sip->dss_paths[i],
 873  873                                              sizeof (rfs4_dss_path_t));
 874  874                                  }
 875  875                          }
 876  876                          kmem_free(sip->dss_paths,
 877  877                              sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 878  878                  }
 879  879                  kmem_free(sip, sizeof (rfs4_servinst_t));
 880  880  #ifdef DEBUG
 881  881                  n++;
 882  882  #endif
 883  883          }
 884  884          mutex_exit(&nsrv4->servinst_lock);
 885  885  }
 886  886  
 887  887  /*
 888  888   * Assign the current server instance to a client_t.
 889  889   * Should be called with cp->rc_dbe held.
 890  890   */
 891  891  void
 892  892  rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 893  893      rfs4_servinst_t *sip)
 894  894  {
 895  895          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 896  896  
 897  897          /*
 898  898           * The lock ensures that if the current instance is in the process
 899  899           * of changing, we will see the new one.
 900  900           */
 901  901          mutex_enter(&nsrv4->servinst_lock);
 902  902          cp->rc_server_instance = sip;
 903  903          mutex_exit(&nsrv4->servinst_lock);
 904  904  }
 905  905  
 906  906  rfs4_servinst_t *
 907  907  rfs4_servinst(rfs4_client_t *cp)
 908  908  {
 909  909          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 910  910  
 911  911          return (cp->rc_server_instance);
 912  912  }
 913  913  
 914  914  /* ARGSUSED */
 915  915  static void
 916  916  nullfree(caddr_t resop)
 917  917  {
 918  918  }
 919  919  
 920  920  /*
 921  921   * This is a fall-through for invalid or not implemented (yet) ops
 922  922   */
 923  923  /* ARGSUSED */
 924  924  static void
 925  925  rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 926  926      struct compound_state *cs)
 927  927  {
 928  928          *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 929  929  }
 930  930  
 931  931  /*
 932  932   * Check if the security flavor, nfsnum, is in the flavor_list.
 933  933   */
 934  934  bool_t
 935  935  in_flavor_list(int nfsnum, int *flavor_list, int count)
 936  936  {
 937  937          int i;
 938  938  
 939  939          for (i = 0; i < count; i++) {
 940  940                  if (nfsnum == flavor_list[i])
 941  941                          return (TRUE);
 942  942          }
 943  943          return (FALSE);
 944  944  }
 945  945  
 946  946  /*
 947  947   * Used by rfs4_op_secinfo to get the security information from the
 948  948   * export structure associated with the component.
 949  949   */
 950  950  /* ARGSUSED */
 951  951  static nfsstat4
 952  952  do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 953  953  {
 954  954          int error, different_export = 0;
 955  955          vnode_t *dvp, *vp;
 956  956          struct exportinfo *exi;
 957  957          fid_t fid;
 958  958          uint_t count, i;
 959  959          secinfo4 *resok_val;
 960  960          struct secinfo *secp;
 961  961          seconfig_t *si;
 962  962          bool_t did_traverse = FALSE;
 963  963          int dotdot, walk;
 964  964          nfs_export_t *ne = nfs_get_export();
 965  965  
 966  966          dvp = cs->vp;
 967  967          exi = cs->exi;
 968  968          ASSERT(exi != NULL);
 969  969          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 970  970  
 971  971          /*
 972  972           * If dotdotting, then need to check whether it's above the
 973  973           * root of a filesystem, or above an export point.
 974  974           */
 975  975          if (dotdot) {
 976  976                  ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
 977  977                  /*
 978  978                   * If dotdotting at the root of a filesystem, then
 979  979                   * need to traverse back to the mounted-on filesystem
 980  980                   * and do the dotdot lookup there.
 981  981                   */
 982  982                  if ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp)) {
 983  983  
  
    | 
      ↓ open down ↓ | 
    983 lines elided | 
    
      ↑ open up ↑ | 
  
 984  984                          /*
 985  985                           * If at the system root, then can
 986  986                           * go up no further.
 987  987                           */
 988  988                          if (VN_CMP(dvp, ZONE_ROOTVP()))
 989  989                                  return (puterrno4(ENOENT));
 990  990  
 991  991                          /*
 992  992                           * Traverse back to the mounted-on filesystem
 993  993                           */
 994      -                        dvp = untraverse(dvp);
      994 +                        dvp = untraverse(ne, dvp);
 995  995  
 996  996                          /*
 997  997                           * Set the different_export flag so we remember
 998  998                           * to pick up a new exportinfo entry for
 999  999                           * this new filesystem.
1000 1000                           */
1001 1001                          different_export = 1;
1002 1002                  } else {
1003 1003  
1004 1004                          /*
1005 1005                           * If dotdotting above an export point then set
1006 1006                           * the different_export to get new export info.
1007 1007                           */
1008 1008                          different_export = nfs_exported(exi, dvp);
1009 1009                  }
1010 1010          }
1011 1011  
1012 1012          /*
1013 1013           * Get the vnode for the component "nm".
1014 1014           */
1015 1015          error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1016 1016              NULL, NULL, NULL);
1017 1017          if (error)
1018 1018                  return (puterrno4(error));
1019 1019  
1020 1020          /*
1021 1021           * If the vnode is in a pseudo filesystem, or if the security flavor
1022 1022           * used in the request is valid but not an explicitly shared flavor,
1023 1023           * or the access bit indicates that this is a limited access,
1024 1024           * check whether this vnode is visible.
1025 1025           */
1026 1026          if (!different_export &&
1027 1027              (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1028 1028              cs->access & CS_ACCESS_LIMITED)) {
1029 1029                  if (! nfs_visible(exi, vp, &different_export)) {
1030 1030                          VN_RELE(vp);
1031 1031                          return (puterrno4(ENOENT));
1032 1032                  }
1033 1033          }
1034 1034  
1035 1035          /*
1036 1036           * If it's a mountpoint, then traverse it.
1037 1037           */
1038 1038          if (vn_ismntpt(vp)) {
1039 1039                  if ((error = traverse(&vp)) != 0) {
1040 1040                          VN_RELE(vp);
1041 1041                          return (puterrno4(error));
1042 1042                  }
1043 1043                  /* remember that we had to traverse mountpoint */
1044 1044                  did_traverse = TRUE;
1045 1045                  different_export = 1;
1046 1046          } else if (vp->v_vfsp != dvp->v_vfsp) {
1047 1047                  /*
1048 1048                   * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1049 1049                   * then vp is probably an LOFS object.  We don't need the
1050 1050                   * realvp, we just need to know that we might have crossed
1051 1051                   * a server fs boundary and need to call checkexport4.
1052 1052                   * (LOFS lookup hides server fs mountpoints, and actually calls
1053 1053                   * traverse)
1054 1054                   */
1055 1055                  different_export = 1;
1056 1056          }
1057 1057  
1058 1058          /*
1059 1059           * Get the export information for it.
1060 1060           */
1061 1061          if (different_export) {
1062 1062  
1063 1063                  bzero(&fid, sizeof (fid));
1064 1064                  fid.fid_len = MAXFIDSZ;
1065 1065                  error = vop_fid_pseudo(vp, &fid);
1066 1066                  if (error) {
1067 1067                          VN_RELE(vp);
1068 1068                          return (puterrno4(error));
1069 1069                  }
1070 1070  
1071 1071                  /* We'll need to reassign "exi". */
1072 1072                  if (dotdot)
1073 1073                          exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1074 1074                  else
1075 1075                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1076 1076  
1077 1077                  if (exi == NULL) {
1078 1078                          if (did_traverse == TRUE) {
1079 1079                                  /*
1080 1080                                   * If this vnode is a mounted-on vnode,
1081 1081                                   * but the mounted-on file system is not
1082 1082                                   * exported, send back the secinfo for
1083 1083                                   * the exported node that the mounted-on
1084 1084                                   * vnode lives in.
1085 1085                                   */
1086 1086                                  exi = cs->exi;
1087 1087                          } else {
1088 1088                                  VN_RELE(vp);
1089 1089                                  return (puterrno4(EACCES));
1090 1090                          }
1091 1091                  }
1092 1092          }
1093 1093          ASSERT(exi != NULL);
1094 1094  
1095 1095  
1096 1096          /*
1097 1097           * Create the secinfo result based on the security information
1098 1098           * from the exportinfo structure (exi).
1099 1099           *
1100 1100           * Return all flavors for a pseudo node.
1101 1101           * For a real export node, return the flavor that the client
1102 1102           * has access with.
1103 1103           */
1104 1104          ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1105 1105          if (PSEUDO(exi)) {
1106 1106                  count = exi->exi_export.ex_seccnt; /* total sec count */
1107 1107                  resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1108 1108                  secp = exi->exi_export.ex_secinfo;
1109 1109  
1110 1110                  for (i = 0; i < count; i++) {
1111 1111                          si = &secp[i].s_secinfo;
1112 1112                          resok_val[i].flavor = si->sc_rpcnum;
1113 1113                          if (resok_val[i].flavor == RPCSEC_GSS) {
1114 1114                                  rpcsec_gss_info *info;
1115 1115  
1116 1116                                  info = &resok_val[i].flavor_info;
1117 1117                                  info->qop = si->sc_qop;
1118 1118                                  info->service = (rpc_gss_svc_t)si->sc_service;
1119 1119  
1120 1120                                  /* get oid opaque data */
1121 1121                                  info->oid.sec_oid4_len =
1122 1122                                      si->sc_gss_mech_type->length;
1123 1123                                  info->oid.sec_oid4_val = kmem_alloc(
1124 1124                                      si->sc_gss_mech_type->length, KM_SLEEP);
1125 1125                                  bcopy(
1126 1126                                      si->sc_gss_mech_type->elements,
1127 1127                                      info->oid.sec_oid4_val,
1128 1128                                      info->oid.sec_oid4_len);
1129 1129                          }
1130 1130                  }
1131 1131                  resp->SECINFO4resok_len = count;
1132 1132                  resp->SECINFO4resok_val = resok_val;
1133 1133          } else {
1134 1134                  int ret_cnt = 0, k = 0;
1135 1135                  int *flavor_list;
1136 1136  
1137 1137                  count = exi->exi_export.ex_seccnt; /* total sec count */
1138 1138                  secp = exi->exi_export.ex_secinfo;
1139 1139  
1140 1140                  flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1141 1141                  /* find out which flavors to return */
1142 1142                  for (i = 0; i < count; i ++) {
1143 1143                          int access, flavor, perm;
1144 1144  
1145 1145                          flavor = secp[i].s_secinfo.sc_nfsnum;
1146 1146                          perm = secp[i].s_flags;
1147 1147  
1148 1148                          access = nfsauth4_secinfo_access(exi, cs->req,
1149 1149                              flavor, perm, cs->basecr);
1150 1150  
1151 1151                          if (! (access & NFSAUTH_DENIED) &&
1152 1152                              ! (access & NFSAUTH_WRONGSEC)) {
1153 1153                                  flavor_list[ret_cnt] = flavor;
1154 1154                                  ret_cnt++;
1155 1155                          }
1156 1156                  }
1157 1157  
1158 1158                  /* Create the returning SECINFO value */
1159 1159                  resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1160 1160  
1161 1161                  for (i = 0; i < count; i++) {
1162 1162                          /*
1163 1163                           * If the flavor is in the flavor list,
1164 1164                           * fill in resok_val.
1165 1165                           */
1166 1166                          si = &secp[i].s_secinfo;
1167 1167                          if (in_flavor_list(si->sc_nfsnum,
1168 1168                              flavor_list, ret_cnt)) {
1169 1169                                  resok_val[k].flavor = si->sc_rpcnum;
1170 1170                                  if (resok_val[k].flavor == RPCSEC_GSS) {
1171 1171                                          rpcsec_gss_info *info;
1172 1172  
1173 1173                                          info = &resok_val[k].flavor_info;
1174 1174                                          info->qop = si->sc_qop;
1175 1175                                          info->service = (rpc_gss_svc_t)
1176 1176                                              si->sc_service;
1177 1177  
1178 1178                                          /* get oid opaque data */
1179 1179                                          info->oid.sec_oid4_len =
1180 1180                                              si->sc_gss_mech_type->length;
1181 1181                                          info->oid.sec_oid4_val = kmem_alloc(
1182 1182                                              si->sc_gss_mech_type->length,
1183 1183                                              KM_SLEEP);
1184 1184                                          bcopy(si->sc_gss_mech_type->elements,
1185 1185                                              info->oid.sec_oid4_val,
1186 1186                                              info->oid.sec_oid4_len);
1187 1187                                  }
1188 1188                                  k++;
1189 1189                          }
1190 1190                          if (k >= ret_cnt)
1191 1191                                  break;
1192 1192                  }
1193 1193                  resp->SECINFO4resok_len = ret_cnt;
1194 1194                  resp->SECINFO4resok_val = resok_val;
1195 1195                  kmem_free(flavor_list, count * sizeof (int));
1196 1196          }
1197 1197  
1198 1198          VN_RELE(vp);
1199 1199          return (NFS4_OK);
1200 1200  }
1201 1201  
1202 1202  /*
1203 1203   * SECINFO (Operation 33): Obtain required security information on
1204 1204   * the component name in the format of (security-mechanism-oid, qop, service)
1205 1205   * triplets.
1206 1206   */
1207 1207  /* ARGSUSED */
1208 1208  static void
1209 1209  rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1210 1210      struct compound_state *cs)
1211 1211  {
1212 1212          SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1213 1213          SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1214 1214          utf8string *utfnm = &args->name;
1215 1215          uint_t len;
1216 1216          char *nm;
1217 1217          struct sockaddr *ca;
1218 1218          char *name = NULL;
1219 1219          nfsstat4 status = NFS4_OK;
1220 1220  
1221 1221          DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1222 1222              SECINFO4args *, args);
1223 1223  
1224 1224          /*
1225 1225           * Current file handle (cfh) should have been set before getting
1226 1226           * into this function. If not, return error.
1227 1227           */
1228 1228          if (cs->vp == NULL) {
1229 1229                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1230 1230                  goto out;
1231 1231          }
1232 1232  
1233 1233          if (cs->vp->v_type != VDIR) {
1234 1234                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1235 1235                  goto out;
1236 1236          }
1237 1237  
1238 1238          /*
1239 1239           * Verify the component name. If failed, error out, but
1240 1240           * do not error out if the component name is a "..".
1241 1241           * SECINFO will return its parents secinfo data for SECINFO "..".
1242 1242           */
1243 1243          status = utf8_dir_verify(utfnm);
1244 1244          if (status != NFS4_OK) {
1245 1245                  if (utfnm->utf8string_len != 2 ||
1246 1246                      utfnm->utf8string_val[0] != '.' ||
1247 1247                      utfnm->utf8string_val[1] != '.') {
1248 1248                          *cs->statusp = resp->status = status;
1249 1249                          goto out;
1250 1250                  }
1251 1251          }
1252 1252  
1253 1253          nm = utf8_to_str(utfnm, &len, NULL);
1254 1254          if (nm == NULL) {
1255 1255                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1256 1256                  goto out;
1257 1257          }
1258 1258  
1259 1259          if (len > MAXNAMELEN) {
1260 1260                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1261 1261                  kmem_free(nm, len);
1262 1262                  goto out;
1263 1263          }
1264 1264  
1265 1265          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1266 1266          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1267 1267              MAXPATHLEN  + 1);
1268 1268  
1269 1269          if (name == NULL) {
1270 1270                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1271 1271                  kmem_free(nm, len);
1272 1272                  goto out;
1273 1273          }
1274 1274  
1275 1275  
1276 1276          *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1277 1277  
1278 1278          if (name != nm)
1279 1279                  kmem_free(name, MAXPATHLEN + 1);
1280 1280          kmem_free(nm, len);
1281 1281  
1282 1282  out:
1283 1283          DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1284 1284              SECINFO4res *, resp);
1285 1285  }
1286 1286  
1287 1287  /*
1288 1288   * Free SECINFO result.
1289 1289   */
1290 1290  /* ARGSUSED */
1291 1291  static void
1292 1292  rfs4_op_secinfo_free(nfs_resop4 *resop)
1293 1293  {
1294 1294          SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1295 1295          int count, i;
1296 1296          secinfo4 *resok_val;
1297 1297  
1298 1298          /* If this is not an Ok result, nothing to free. */
1299 1299          if (resp->status != NFS4_OK) {
1300 1300                  return;
1301 1301          }
1302 1302  
1303 1303          count = resp->SECINFO4resok_len;
1304 1304          resok_val = resp->SECINFO4resok_val;
1305 1305  
1306 1306          for (i = 0; i < count; i++) {
1307 1307                  if (resok_val[i].flavor == RPCSEC_GSS) {
1308 1308                          rpcsec_gss_info *info;
1309 1309  
1310 1310                          info = &resok_val[i].flavor_info;
1311 1311                          kmem_free(info->oid.sec_oid4_val,
1312 1312                              info->oid.sec_oid4_len);
1313 1313                  }
1314 1314          }
1315 1315          kmem_free(resok_val, count * sizeof (secinfo4));
1316 1316          resp->SECINFO4resok_len = 0;
1317 1317          resp->SECINFO4resok_val = NULL;
1318 1318  }
1319 1319  
1320 1320  /* ARGSUSED */
1321 1321  static void
1322 1322  rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1323 1323      struct compound_state *cs)
1324 1324  {
1325 1325          ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1326 1326          ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1327 1327          int error;
1328 1328          vnode_t *vp;
1329 1329          struct vattr va;
1330 1330          int checkwriteperm;
1331 1331          cred_t *cr = cs->cr;
1332 1332          bslabel_t *clabel, *slabel;
1333 1333          ts_label_t *tslabel;
1334 1334          boolean_t admin_low_client;
1335 1335  
1336 1336          DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1337 1337              ACCESS4args *, args);
1338 1338  
1339 1339  #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1340 1340          if (cs->access == CS_ACCESS_DENIED) {
1341 1341                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1342 1342                  goto out;
1343 1343          }
1344 1344  #endif
1345 1345          if (cs->vp == NULL) {
1346 1346                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1347 1347                  goto out;
1348 1348          }
1349 1349  
1350 1350          ASSERT(cr != NULL);
1351 1351  
1352 1352          vp = cs->vp;
1353 1353  
1354 1354          /*
1355 1355           * If the file system is exported read only, it is not appropriate
1356 1356           * to check write permissions for regular files and directories.
1357 1357           * Special files are interpreted by the client, so the underlying
1358 1358           * permissions are sent back to the client for interpretation.
1359 1359           */
1360 1360          if (rdonly4(req, cs) &&
1361 1361              (vp->v_type == VREG || vp->v_type == VDIR))
1362 1362                  checkwriteperm = 0;
1363 1363          else
1364 1364                  checkwriteperm = 1;
1365 1365  
1366 1366          /*
1367 1367           * XXX
1368 1368           * We need the mode so that we can correctly determine access
1369 1369           * permissions relative to a mandatory lock file.  Access to
1370 1370           * mandatory lock files is denied on the server, so it might
1371 1371           * as well be reflected to the server during the open.
1372 1372           */
1373 1373          va.va_mask = AT_MODE;
1374 1374          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1375 1375          if (error) {
1376 1376                  *cs->statusp = resp->status = puterrno4(error);
1377 1377                  goto out;
1378 1378          }
1379 1379          resp->access = 0;
1380 1380          resp->supported = 0;
1381 1381  
1382 1382          if (is_system_labeled()) {
1383 1383                  ASSERT(req->rq_label != NULL);
1384 1384                  clabel = req->rq_label;
1385 1385                  DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1386 1386                      "got client label from request(1)",
1387 1387                      struct svc_req *, req);
1388 1388                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
1389 1389                          if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1390 1390                                  *cs->statusp = resp->status = puterrno4(EACCES);
1391 1391                                  goto out;
1392 1392                          }
1393 1393                          slabel = label2bslabel(tslabel);
1394 1394                          DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1395 1395                              char *, "got server label(1) for vp(2)",
1396 1396                              bslabel_t *, slabel, vnode_t *, vp);
1397 1397  
1398 1398                          admin_low_client = B_FALSE;
1399 1399                  } else
1400 1400                          admin_low_client = B_TRUE;
1401 1401          }
1402 1402  
1403 1403          if (args->access & ACCESS4_READ) {
1404 1404                  error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1405 1405                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1406 1406                      (!is_system_labeled() || admin_low_client ||
1407 1407                      bldominates(clabel, slabel)))
1408 1408                          resp->access |= ACCESS4_READ;
1409 1409                  resp->supported |= ACCESS4_READ;
1410 1410          }
1411 1411          if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1412 1412                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1413 1413                  if (!error && (!is_system_labeled() || admin_low_client ||
1414 1414                      bldominates(clabel, slabel)))
1415 1415                          resp->access |= ACCESS4_LOOKUP;
1416 1416                  resp->supported |= ACCESS4_LOOKUP;
1417 1417          }
1418 1418          if (checkwriteperm &&
1419 1419              (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1420 1420                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1421 1421                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1422 1422                      (!is_system_labeled() || admin_low_client ||
1423 1423                      blequal(clabel, slabel)))
1424 1424                          resp->access |=
1425 1425                              (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1426 1426                  resp->supported |=
1427 1427                      resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1428 1428          }
1429 1429  
1430 1430          if (checkwriteperm &&
1431 1431              (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1432 1432                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1433 1433                  if (!error && (!is_system_labeled() || admin_low_client ||
1434 1434                      blequal(clabel, slabel)))
1435 1435                          resp->access |= ACCESS4_DELETE;
1436 1436                  resp->supported |= ACCESS4_DELETE;
1437 1437          }
1438 1438          if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1439 1439                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1440 1440                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1441 1441                      (!is_system_labeled() || admin_low_client ||
1442 1442                      bldominates(clabel, slabel)))
1443 1443                          resp->access |= ACCESS4_EXECUTE;
1444 1444                  resp->supported |= ACCESS4_EXECUTE;
1445 1445          }
1446 1446  
1447 1447          if (is_system_labeled() && !admin_low_client)
1448 1448                  label_rele(tslabel);
1449 1449  
1450 1450          *cs->statusp = resp->status = NFS4_OK;
1451 1451  out:
1452 1452          DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1453 1453              ACCESS4res *, resp);
1454 1454  }
1455 1455  
1456 1456  /* ARGSUSED */
1457 1457  static void
1458 1458  rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1459 1459      struct compound_state *cs)
1460 1460  {
1461 1461          COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1462 1462          COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1463 1463          int error;
1464 1464          vnode_t *vp = cs->vp;
1465 1465          cred_t *cr = cs->cr;
1466 1466          vattr_t va;
1467 1467          nfs4_srv_t *nsrv4;
1468 1468  
1469 1469          DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1470 1470              COMMIT4args *, args);
1471 1471  
1472 1472          if (vp == NULL) {
1473 1473                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1474 1474                  goto out;
1475 1475          }
1476 1476          if (cs->access == CS_ACCESS_DENIED) {
1477 1477                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1478 1478                  goto out;
1479 1479          }
1480 1480  
1481 1481          if (args->offset + args->count < args->offset) {
1482 1482                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1483 1483                  goto out;
1484 1484          }
1485 1485  
1486 1486          va.va_mask = AT_UID;
1487 1487          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1488 1488  
1489 1489          /*
1490 1490           * If we can't get the attributes, then we can't do the
1491 1491           * right access checking.  So, we'll fail the request.
1492 1492           */
1493 1493          if (error) {
1494 1494                  *cs->statusp = resp->status = puterrno4(error);
1495 1495                  goto out;
1496 1496          }
1497 1497          if (rdonly4(req, cs)) {
1498 1498                  *cs->statusp = resp->status = NFS4ERR_ROFS;
1499 1499                  goto out;
1500 1500          }
1501 1501  
1502 1502          if (vp->v_type != VREG) {
1503 1503                  if (vp->v_type == VDIR)
1504 1504                          resp->status = NFS4ERR_ISDIR;
1505 1505                  else
1506 1506                          resp->status = NFS4ERR_INVAL;
1507 1507                  *cs->statusp = resp->status;
1508 1508                  goto out;
1509 1509          }
1510 1510  
1511 1511          if (crgetuid(cr) != va.va_uid &&
1512 1512              (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1513 1513                  *cs->statusp = resp->status = puterrno4(error);
1514 1514                  goto out;
1515 1515          }
1516 1516  
1517 1517          error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1518 1518  
1519 1519          if (error) {
1520 1520                  *cs->statusp = resp->status = puterrno4(error);
1521 1521                  goto out;
1522 1522          }
1523 1523  
1524 1524          nsrv4 = nfs4_get_srv();
1525 1525          *cs->statusp = resp->status = NFS4_OK;
1526 1526          resp->writeverf = nsrv4->write4verf;
1527 1527  out:
1528 1528          DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1529 1529              COMMIT4res *, resp);
1530 1530  }
1531 1531  
1532 1532  /*
1533 1533   * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1534 1534   * was completed. It does the nfsv4 create for special files.
1535 1535   */
1536 1536  /* ARGSUSED */
1537 1537  static vnode_t *
1538 1538  do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1539 1539      struct compound_state *cs, vattr_t *vap, char *nm)
1540 1540  {
1541 1541          int error;
1542 1542          cred_t *cr = cs->cr;
1543 1543          vnode_t *dvp = cs->vp;
1544 1544          vnode_t *vp = NULL;
1545 1545          int mode;
1546 1546          enum vcexcl excl;
1547 1547  
1548 1548          switch (args->type) {
1549 1549          case NF4CHR:
1550 1550          case NF4BLK:
1551 1551                  if (secpolicy_sys_devices(cr) != 0) {
1552 1552                          *cs->statusp = resp->status = NFS4ERR_PERM;
1553 1553                          return (NULL);
1554 1554                  }
1555 1555                  if (args->type == NF4CHR)
1556 1556                          vap->va_type = VCHR;
1557 1557                  else
1558 1558                          vap->va_type = VBLK;
1559 1559                  vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1560 1560                      args->ftype4_u.devdata.specdata2);
1561 1561                  vap->va_mask |= AT_RDEV;
1562 1562                  break;
1563 1563          case NF4SOCK:
1564 1564                  vap->va_type = VSOCK;
1565 1565                  break;
1566 1566          case NF4FIFO:
1567 1567                  vap->va_type = VFIFO;
1568 1568                  break;
1569 1569          default:
1570 1570                  *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1571 1571                  return (NULL);
1572 1572          }
1573 1573  
1574 1574          /*
1575 1575           * Must specify the mode.
1576 1576           */
1577 1577          if (!(vap->va_mask & AT_MODE)) {
1578 1578                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1579 1579                  return (NULL);
1580 1580          }
1581 1581  
1582 1582          excl = EXCL;
1583 1583  
1584 1584          mode = 0;
1585 1585  
1586 1586          error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1587 1587          if (error) {
1588 1588                  *cs->statusp = resp->status = puterrno4(error);
1589 1589                  return (NULL);
1590 1590          }
1591 1591          return (vp);
1592 1592  }
1593 1593  
1594 1594  /*
1595 1595   * nfsv4 create is used to create non-regular files. For regular files,
1596 1596   * use nfsv4 open.
1597 1597   */
1598 1598  /* ARGSUSED */
1599 1599  static void
1600 1600  rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1601 1601      struct compound_state *cs)
1602 1602  {
1603 1603          CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1604 1604          CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1605 1605          int error;
1606 1606          struct vattr bva, iva, iva2, ava, *vap;
1607 1607          cred_t *cr = cs->cr;
1608 1608          vnode_t *dvp = cs->vp;
1609 1609          vnode_t *vp = NULL;
1610 1610          vnode_t *realvp;
1611 1611          char *nm, *lnm;
1612 1612          uint_t len, llen;
1613 1613          int syncval = 0;
1614 1614          struct nfs4_svgetit_arg sarg;
1615 1615          struct nfs4_ntov_table ntov;
1616 1616          struct statvfs64 sb;
1617 1617          nfsstat4 status;
1618 1618          struct sockaddr *ca;
1619 1619          char *name = NULL;
1620 1620          char *lname = NULL;
1621 1621  
1622 1622          DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1623 1623              CREATE4args *, args);
1624 1624  
1625 1625          resp->attrset = 0;
1626 1626  
1627 1627          if (dvp == NULL) {
1628 1628                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1629 1629                  goto out;
1630 1630          }
1631 1631  
1632 1632          /*
1633 1633           * If there is an unshared filesystem mounted on this vnode,
1634 1634           * do not allow to create an object in this directory.
1635 1635           */
1636 1636          if (vn_ismntpt(dvp)) {
1637 1637                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1638 1638                  goto out;
1639 1639          }
1640 1640  
1641 1641          /* Verify that type is correct */
1642 1642          switch (args->type) {
1643 1643          case NF4LNK:
1644 1644          case NF4BLK:
1645 1645          case NF4CHR:
1646 1646          case NF4SOCK:
1647 1647          case NF4FIFO:
1648 1648          case NF4DIR:
1649 1649                  break;
1650 1650          default:
1651 1651                  *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1652 1652                  goto out;
1653 1653          };
1654 1654  
1655 1655          if (cs->access == CS_ACCESS_DENIED) {
1656 1656                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1657 1657                  goto out;
1658 1658          }
1659 1659          if (dvp->v_type != VDIR) {
1660 1660                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1661 1661                  goto out;
1662 1662          }
1663 1663          status = utf8_dir_verify(&args->objname);
1664 1664          if (status != NFS4_OK) {
1665 1665                  *cs->statusp = resp->status = status;
1666 1666                  goto out;
1667 1667          }
1668 1668  
1669 1669          if (rdonly4(req, cs)) {
1670 1670                  *cs->statusp = resp->status = NFS4ERR_ROFS;
1671 1671                  goto out;
1672 1672          }
1673 1673  
1674 1674          /*
1675 1675           * Name of newly created object
1676 1676           */
1677 1677          nm = utf8_to_fn(&args->objname, &len, NULL);
1678 1678          if (nm == NULL) {
1679 1679                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1680 1680                  goto out;
1681 1681          }
1682 1682  
1683 1683          if (len > MAXNAMELEN) {
1684 1684                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1685 1685                  kmem_free(nm, len);
1686 1686                  goto out;
1687 1687          }
1688 1688  
1689 1689          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1690 1690          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1691 1691              MAXPATHLEN  + 1);
1692 1692  
1693 1693          if (name == NULL) {
1694 1694                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1695 1695                  kmem_free(nm, len);
1696 1696                  goto out;
1697 1697          }
1698 1698  
1699 1699          resp->attrset = 0;
1700 1700  
1701 1701          sarg.sbp = &sb;
1702 1702          sarg.is_referral = B_FALSE;
1703 1703          nfs4_ntov_table_init(&ntov);
1704 1704  
1705 1705          status = do_rfs4_set_attrs(&resp->attrset,
1706 1706              &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1707 1707  
1708 1708          if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1709 1709                  status = NFS4ERR_INVAL;
1710 1710  
1711 1711          if (status != NFS4_OK) {
1712 1712                  *cs->statusp = resp->status = status;
1713 1713                  if (name != nm)
1714 1714                          kmem_free(name, MAXPATHLEN + 1);
1715 1715                  kmem_free(nm, len);
1716 1716                  nfs4_ntov_table_free(&ntov, &sarg);
1717 1717                  resp->attrset = 0;
1718 1718                  goto out;
1719 1719          }
1720 1720  
1721 1721          /* Get "before" change value */
1722 1722          bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1723 1723          error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1724 1724          if (error) {
1725 1725                  *cs->statusp = resp->status = puterrno4(error);
1726 1726                  if (name != nm)
1727 1727                          kmem_free(name, MAXPATHLEN + 1);
1728 1728                  kmem_free(nm, len);
1729 1729                  nfs4_ntov_table_free(&ntov, &sarg);
1730 1730                  resp->attrset = 0;
1731 1731                  goto out;
1732 1732          }
1733 1733          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1734 1734  
1735 1735          vap = sarg.vap;
1736 1736  
1737 1737          /*
1738 1738           * Set the default initial values for attributes when the parent
1739 1739           * directory does not have the VSUID/VSGID bit set and they have
1740 1740           * not been specified in createattrs.
1741 1741           */
1742 1742          if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1743 1743                  vap->va_uid = crgetuid(cr);
1744 1744                  vap->va_mask |= AT_UID;
1745 1745          }
1746 1746          if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1747 1747                  vap->va_gid = crgetgid(cr);
1748 1748                  vap->va_mask |= AT_GID;
1749 1749          }
1750 1750  
1751 1751          vap->va_mask |= AT_TYPE;
1752 1752          switch (args->type) {
1753 1753          case NF4DIR:
1754 1754                  vap->va_type = VDIR;
1755 1755                  if ((vap->va_mask & AT_MODE) == 0) {
1756 1756                          vap->va_mode = 0700;    /* default: owner rwx only */
1757 1757                          vap->va_mask |= AT_MODE;
1758 1758                  }
1759 1759                  error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1760 1760                  if (error)
1761 1761                          break;
1762 1762  
1763 1763                  /*
1764 1764                   * Get the initial "after" sequence number, if it fails,
1765 1765                   * set to zero
1766 1766                   */
1767 1767                  iva.va_mask = AT_SEQ;
1768 1768                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1769 1769                          iva.va_seq = 0;
1770 1770                  break;
1771 1771          case NF4LNK:
1772 1772                  vap->va_type = VLNK;
1773 1773                  if ((vap->va_mask & AT_MODE) == 0) {
1774 1774                          vap->va_mode = 0700;    /* default: owner rwx only */
1775 1775                          vap->va_mask |= AT_MODE;
1776 1776                  }
1777 1777  
1778 1778                  /*
1779 1779                   * symlink names must be treated as data
1780 1780                   */
1781 1781                  lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1782 1782                      &llen, NULL);
1783 1783  
1784 1784                  if (lnm == NULL) {
1785 1785                          *cs->statusp = resp->status = NFS4ERR_INVAL;
1786 1786                          if (name != nm)
1787 1787                                  kmem_free(name, MAXPATHLEN + 1);
1788 1788                          kmem_free(nm, len);
1789 1789                          nfs4_ntov_table_free(&ntov, &sarg);
1790 1790                          resp->attrset = 0;
1791 1791                          goto out;
1792 1792                  }
1793 1793  
1794 1794                  if (llen > MAXPATHLEN) {
1795 1795                          *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1796 1796                          if (name != nm)
1797 1797                                  kmem_free(name, MAXPATHLEN + 1);
1798 1798                          kmem_free(nm, len);
1799 1799                          kmem_free(lnm, llen);
1800 1800                          nfs4_ntov_table_free(&ntov, &sarg);
1801 1801                          resp->attrset = 0;
1802 1802                          goto out;
1803 1803                  }
1804 1804  
1805 1805                  lname = nfscmd_convname(ca, cs->exi, lnm,
1806 1806                      NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1807 1807  
1808 1808                  if (lname == NULL) {
1809 1809                          *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1810 1810                          if (name != nm)
1811 1811                                  kmem_free(name, MAXPATHLEN + 1);
1812 1812                          kmem_free(nm, len);
1813 1813                          kmem_free(lnm, llen);
1814 1814                          nfs4_ntov_table_free(&ntov, &sarg);
1815 1815                          resp->attrset = 0;
1816 1816                          goto out;
1817 1817                  }
1818 1818  
1819 1819                  error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1820 1820                  if (lname != lnm)
1821 1821                          kmem_free(lname, MAXPATHLEN + 1);
1822 1822                  kmem_free(lnm, llen);
1823 1823                  if (error)
1824 1824                          break;
1825 1825  
1826 1826                  /*
1827 1827                   * Get the initial "after" sequence number, if it fails,
1828 1828                   * set to zero
1829 1829                   */
1830 1830                  iva.va_mask = AT_SEQ;
1831 1831                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1832 1832                          iva.va_seq = 0;
1833 1833  
1834 1834                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1835 1835                      NULL, NULL, NULL);
1836 1836                  if (error)
1837 1837                          break;
1838 1838  
1839 1839                  /*
1840 1840                   * va_seq is not safe over VOP calls, check it again
1841 1841                   * if it has changed zero out iva to force atomic = FALSE.
1842 1842                   */
1843 1843                  iva2.va_mask = AT_SEQ;
1844 1844                  if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1845 1845                      iva2.va_seq != iva.va_seq)
1846 1846                          iva.va_seq = 0;
1847 1847                  break;
1848 1848          default:
1849 1849                  /*
1850 1850                   * probably a special file.
1851 1851                   */
1852 1852                  if ((vap->va_mask & AT_MODE) == 0) {
1853 1853                          vap->va_mode = 0600;    /* default: owner rw only */
1854 1854                          vap->va_mask |= AT_MODE;
1855 1855                  }
1856 1856                  syncval = FNODSYNC;
1857 1857                  /*
1858 1858                   * We know this will only generate one VOP call
1859 1859                   */
1860 1860                  vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1861 1861  
1862 1862                  if (vp == NULL) {
1863 1863                          if (name != nm)
1864 1864                                  kmem_free(name, MAXPATHLEN + 1);
1865 1865                          kmem_free(nm, len);
1866 1866                          nfs4_ntov_table_free(&ntov, &sarg);
1867 1867                          resp->attrset = 0;
1868 1868                          goto out;
1869 1869                  }
1870 1870  
1871 1871                  /*
1872 1872                   * Get the initial "after" sequence number, if it fails,
1873 1873                   * set to zero
1874 1874                   */
1875 1875                  iva.va_mask = AT_SEQ;
1876 1876                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1877 1877                          iva.va_seq = 0;
1878 1878  
1879 1879                  break;
1880 1880          }
1881 1881          if (name != nm)
1882 1882                  kmem_free(name, MAXPATHLEN + 1);
1883 1883          kmem_free(nm, len);
1884 1884  
1885 1885          if (error) {
1886 1886                  *cs->statusp = resp->status = puterrno4(error);
1887 1887          }
1888 1888  
1889 1889          /*
1890 1890           * Force modified data and metadata out to stable storage.
1891 1891           */
1892 1892          (void) VOP_FSYNC(dvp, 0, cr, NULL);
1893 1893  
1894 1894          if (resp->status != NFS4_OK) {
1895 1895                  if (vp != NULL)
1896 1896                          VN_RELE(vp);
1897 1897                  nfs4_ntov_table_free(&ntov, &sarg);
1898 1898                  resp->attrset = 0;
1899 1899                  goto out;
1900 1900          }
1901 1901  
1902 1902          /*
1903 1903           * Finish setup of cinfo response, "before" value already set.
1904 1904           * Get "after" change value, if it fails, simply return the
1905 1905           * before value.
1906 1906           */
1907 1907          ava.va_mask = AT_CTIME|AT_SEQ;
1908 1908          if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1909 1909                  ava.va_ctime = bva.va_ctime;
1910 1910                  ava.va_seq = 0;
1911 1911          }
1912 1912          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1913 1913  
1914 1914          /*
1915 1915           * True verification that object was created with correct
1916 1916           * attrs is impossible.  The attrs could have been changed
1917 1917           * immediately after object creation.  If attributes did
1918 1918           * not verify, the only recourse for the server is to
1919 1919           * destroy the object.  Maybe if some attrs (like gid)
1920 1920           * are set incorrectly, the object should be destroyed;
1921 1921           * however, seems bad as a default policy.  Do we really
1922 1922           * want to destroy an object over one of the times not
1923 1923           * verifying correctly?  For these reasons, the server
1924 1924           * currently sets bits in attrset for createattrs
1925 1925           * that were set; however, no verification is done.
1926 1926           *
1927 1927           * vmask_to_nmask accounts for vattr bits set on create
1928 1928           *      [do_rfs4_set_attrs() only sets resp bits for
1929 1929           *       non-vattr/vfs bits.]
1930 1930           * Mask off any bits set by default so as not to return
1931 1931           * more attrset bits than were requested in createattrs
1932 1932           */
1933 1933          nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1934 1934          resp->attrset &= args->createattrs.attrmask;
1935 1935          nfs4_ntov_table_free(&ntov, &sarg);
1936 1936  
1937 1937          error = makefh4(&cs->fh, vp, cs->exi);
1938 1938          if (error) {
1939 1939                  *cs->statusp = resp->status = puterrno4(error);
1940 1940          }
1941 1941  
1942 1942          /*
1943 1943           * The cinfo.atomic = TRUE only if we got no errors, we have
1944 1944           * non-zero va_seq's, and it has incremented by exactly one
1945 1945           * during the creation and it didn't change during the VOP_LOOKUP
1946 1946           * or VOP_FSYNC.
1947 1947           */
1948 1948          if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1949 1949              iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1950 1950                  resp->cinfo.atomic = TRUE;
1951 1951          else
1952 1952                  resp->cinfo.atomic = FALSE;
1953 1953  
1954 1954          /*
1955 1955           * Force modified metadata out to stable storage.
1956 1956           *
1957 1957           * if a underlying vp exists, pass it to VOP_FSYNC
1958 1958           */
1959 1959          if (VOP_REALVP(vp, &realvp, NULL) == 0)
1960 1960                  (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1961 1961          else
1962 1962                  (void) VOP_FSYNC(vp, syncval, cr, NULL);
1963 1963  
1964 1964          if (resp->status != NFS4_OK) {
1965 1965                  VN_RELE(vp);
1966 1966                  goto out;
1967 1967          }
1968 1968          if (cs->vp)
1969 1969                  VN_RELE(cs->vp);
1970 1970  
1971 1971          cs->vp = vp;
1972 1972          *cs->statusp = resp->status = NFS4_OK;
1973 1973  out:
1974 1974          DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1975 1975              CREATE4res *, resp);
1976 1976  }
1977 1977  
1978 1978  /*ARGSUSED*/
1979 1979  static void
1980 1980  rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1981 1981      struct compound_state *cs)
1982 1982  {
1983 1983          DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1984 1984              DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1985 1985  
1986 1986          rfs4_op_inval(argop, resop, req, cs);
1987 1987  
1988 1988          DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1989 1989              DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1990 1990  }
1991 1991  
1992 1992  /*ARGSUSED*/
1993 1993  static void
1994 1994  rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1995 1995      struct compound_state *cs)
1996 1996  {
1997 1997          DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1998 1998          DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1999 1999          rfs4_deleg_state_t *dsp;
2000 2000          nfsstat4 status;
2001 2001  
2002 2002          DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2003 2003              DELEGRETURN4args *, args);
2004 2004  
2005 2005          status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2006 2006          resp->status = *cs->statusp = status;
2007 2007          if (status != NFS4_OK)
2008 2008                  goto out;
2009 2009  
2010 2010          /* Ensure specified filehandle matches */
2011 2011          if (cs->vp != dsp->rds_finfo->rf_vp) {
2012 2012                  resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2013 2013          } else
2014 2014                  rfs4_return_deleg(dsp, FALSE);
2015 2015  
2016 2016          rfs4_update_lease(dsp->rds_client);
2017 2017  
2018 2018          rfs4_deleg_state_rele(dsp);
2019 2019  out:
2020 2020          DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2021 2021              DELEGRETURN4res *, resp);
2022 2022  }
2023 2023  
2024 2024  /*
2025 2025   * Check to see if a given "flavor" is an explicitly shared flavor.
2026 2026   * The assumption of this routine is the "flavor" is already a valid
2027 2027   * flavor in the secinfo list of "exi".
2028 2028   *
2029 2029   *      e.g.
2030 2030   *              # share -o sec=flavor1 /export
2031 2031   *              # share -o sec=flavor2 /export/home
2032 2032   *
2033 2033   *              flavor2 is not an explicitly shared flavor for /export,
2034 2034   *              however it is in the secinfo list for /export thru the
2035 2035   *              server namespace setup.
2036 2036   */
2037 2037  int
2038 2038  is_exported_sec(int flavor, struct exportinfo *exi)
2039 2039  {
2040 2040          int     i;
2041 2041          struct secinfo *sp;
2042 2042  
2043 2043          sp = exi->exi_export.ex_secinfo;
2044 2044          for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2045 2045                  if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2046 2046                      sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2047 2047                          return (SEC_REF_EXPORTED(&sp[i]));
2048 2048                  }
2049 2049          }
2050 2050  
2051 2051          /* Should not reach this point based on the assumption */
2052 2052          return (0);
2053 2053  }
2054 2054  
2055 2055  /*
2056 2056   * Check if the security flavor used in the request matches what is
2057 2057   * required at the export point or at the root pseudo node (exi_root).
2058 2058   *
2059 2059   * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2060 2060   *
2061 2061   */
2062 2062  static int
2063 2063  secinfo_match_or_authnone(struct compound_state *cs)
2064 2064  {
2065 2065          int     i;
2066 2066          struct secinfo *sp;
2067 2067  
2068 2068          /*
2069 2069           * Check cs->nfsflavor (from the request) against
2070 2070           * the current export data in cs->exi.
2071 2071           */
2072 2072          sp = cs->exi->exi_export.ex_secinfo;
2073 2073          for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2074 2074                  if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2075 2075                      sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2076 2076                          return (1);
2077 2077          }
2078 2078  
2079 2079          return (0);
2080 2080  }
2081 2081  
2082 2082  /*
2083 2083   * Check the access authority for the client and return the correct error.
2084 2084   */
2085 2085  nfsstat4
2086 2086  call_checkauth4(struct compound_state *cs, struct svc_req *req)
2087 2087  {
2088 2088          int     authres;
2089 2089  
2090 2090          /*
2091 2091           * First, check if the security flavor used in the request
2092 2092           * are among the flavors set in the server namespace.
2093 2093           */
2094 2094          if (!secinfo_match_or_authnone(cs)) {
2095 2095                  *cs->statusp = NFS4ERR_WRONGSEC;
2096 2096                  return (*cs->statusp);
2097 2097          }
2098 2098  
2099 2099          authres = checkauth4(cs, req);
2100 2100  
2101 2101          if (authres > 0) {
2102 2102                  *cs->statusp = NFS4_OK;
2103 2103                  if (! (cs->access & CS_ACCESS_LIMITED))
2104 2104                          cs->access = CS_ACCESS_OK;
2105 2105          } else if (authres == 0) {
2106 2106                  *cs->statusp = NFS4ERR_ACCESS;
2107 2107          } else if (authres == -2) {
2108 2108                  *cs->statusp = NFS4ERR_WRONGSEC;
2109 2109          } else {
2110 2110                  *cs->statusp = NFS4ERR_DELAY;
2111 2111          }
2112 2112          return (*cs->statusp);
2113 2113  }
2114 2114  
2115 2115  /*
2116 2116   * bitmap4_to_attrmask is called by getattr and readdir.
2117 2117   * It sets up the vattr mask and determines whether vfsstat call is needed
2118 2118   * based on the input bitmap.
2119 2119   * Returns nfsv4 status.
2120 2120   */
2121 2121  static nfsstat4
2122 2122  bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2123 2123  {
2124 2124          int i;
2125 2125          uint_t  va_mask;
2126 2126          struct statvfs64 *sbp = sargp->sbp;
2127 2127  
2128 2128          sargp->sbp = NULL;
2129 2129          sargp->flag = 0;
2130 2130          sargp->rdattr_error = NFS4_OK;
2131 2131          sargp->mntdfid_set = FALSE;
2132 2132          if (sargp->cs->vp)
2133 2133                  sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2134 2134                      FH4_ATTRDIR | FH4_NAMEDATTR);
2135 2135          else
2136 2136                  sargp->xattr = 0;
2137 2137  
2138 2138          /*
2139 2139           * Set rdattr_error_req to true if return error per
2140 2140           * failed entry rather than fail the readdir.
2141 2141           */
2142 2142          if (breq & FATTR4_RDATTR_ERROR_MASK)
2143 2143                  sargp->rdattr_error_req = 1;
2144 2144          else
2145 2145                  sargp->rdattr_error_req = 0;
2146 2146  
2147 2147          /*
2148 2148           * generate the va_mask
2149 2149           * Handle the easy cases first
2150 2150           */
2151 2151          switch (breq) {
2152 2152          case NFS4_NTOV_ATTR_MASK:
2153 2153                  sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2154 2154                  return (NFS4_OK);
2155 2155  
2156 2156          case NFS4_FS_ATTR_MASK:
2157 2157                  sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2158 2158                  sargp->sbp = sbp;
2159 2159                  return (NFS4_OK);
2160 2160  
2161 2161          case NFS4_NTOV_ATTR_CACHE_MASK:
2162 2162                  sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2163 2163                  return (NFS4_OK);
2164 2164  
2165 2165          case FATTR4_LEASE_TIME_MASK:
2166 2166                  sargp->vap->va_mask = 0;
2167 2167                  return (NFS4_OK);
2168 2168  
2169 2169          default:
2170 2170                  va_mask = 0;
2171 2171                  for (i = 0; i < nfs4_ntov_map_size; i++) {
2172 2172                          if ((breq & nfs4_ntov_map[i].fbit) &&
2173 2173                              nfs4_ntov_map[i].vbit)
2174 2174                                  va_mask |= nfs4_ntov_map[i].vbit;
2175 2175                  }
2176 2176  
2177 2177                  /*
2178 2178                   * Check is vfsstat is needed
2179 2179                   */
2180 2180                  if (breq & NFS4_FS_ATTR_MASK)
2181 2181                          sargp->sbp = sbp;
2182 2182  
2183 2183                  sargp->vap->va_mask = va_mask;
2184 2184                  return (NFS4_OK);
2185 2185          }
2186 2186          /* NOTREACHED */
2187 2187  }
2188 2188  
2189 2189  /*
2190 2190   * bitmap4_get_sysattrs is called by getattr and readdir.
2191 2191   * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2192 2192   * Returns nfsv4 status.
2193 2193   */
2194 2194  static nfsstat4
2195 2195  bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2196 2196  {
2197 2197          int error;
2198 2198          struct compound_state *cs = sargp->cs;
2199 2199          vnode_t *vp = cs->vp;
2200 2200  
2201 2201          if (sargp->sbp != NULL) {
2202 2202                  if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2203 2203                          sargp->sbp = NULL;      /* to identify error */
2204 2204                          return (puterrno4(error));
2205 2205                  }
2206 2206          }
2207 2207  
2208 2208          return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2209 2209  }
2210 2210  
2211 2211  static void
2212 2212  nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2213 2213  {
2214 2214          ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2215 2215              KM_SLEEP);
2216 2216          ntovp->attrcnt = 0;
2217 2217          ntovp->vfsstat = FALSE;
2218 2218  }
2219 2219  
2220 2220  static void
2221 2221  nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2222 2222      struct nfs4_svgetit_arg *sargp)
2223 2223  {
2224 2224          int i;
2225 2225          union nfs4_attr_u *na;
2226 2226          uint8_t *amap;
2227 2227  
2228 2228          /*
2229 2229           * XXX Should do the same checks for whether the bit is set
2230 2230           */
2231 2231          for (i = 0, na = ntovp->na, amap = ntovp->amap;
2232 2232              i < ntovp->attrcnt; i++, na++, amap++) {
2233 2233                  (void) (*nfs4_ntov_map[*amap].sv_getit)(
2234 2234                      NFS4ATTR_FREEIT, sargp, na);
2235 2235          }
2236 2236          if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2237 2237                  /*
2238 2238                   * xdr_free for getattr will be done later
2239 2239                   */
2240 2240                  for (i = 0, na = ntovp->na, amap = ntovp->amap;
2241 2241                      i < ntovp->attrcnt; i++, na++, amap++) {
2242 2242                          xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2243 2243                  }
2244 2244          }
2245 2245          kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2246 2246  }
2247 2247  
2248 2248  /*
2249 2249   * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2250 2250   */
2251 2251  static nfsstat4
2252 2252  do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2253 2253      struct nfs4_svgetit_arg *sargp)
2254 2254  {
2255 2255          int error = 0;
2256 2256          int i, k;
2257 2257          struct nfs4_ntov_table ntov;
2258 2258          XDR xdr;
2259 2259          ulong_t xdr_size;
2260 2260          char *xdr_attrs;
2261 2261          nfsstat4 status = NFS4_OK;
2262 2262          nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2263 2263          union nfs4_attr_u *na;
2264 2264          uint8_t *amap;
2265 2265  
2266 2266          sargp->op = NFS4ATTR_GETIT;
2267 2267          sargp->flag = 0;
2268 2268  
2269 2269          fattrp->attrmask = 0;
2270 2270          /* if no bits requested, then return empty fattr4 */
2271 2271          if (breq == 0) {
2272 2272                  fattrp->attrlist4_len = 0;
2273 2273                  fattrp->attrlist4 = NULL;
2274 2274                  return (NFS4_OK);
2275 2275          }
2276 2276  
2277 2277          /*
2278 2278           * return NFS4ERR_INVAL when client requests write-only attrs
2279 2279           */
2280 2280          if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2281 2281                  return (NFS4ERR_INVAL);
2282 2282  
2283 2283          nfs4_ntov_table_init(&ntov);
2284 2284          na = ntov.na;
2285 2285          amap = ntov.amap;
2286 2286  
2287 2287          /*
2288 2288           * Now loop to get or verify the attrs
2289 2289           */
2290 2290          for (i = 0; i < nfs4_ntov_map_size; i++) {
2291 2291                  if (breq & nfs4_ntov_map[i].fbit) {
2292 2292                          if ((*nfs4_ntov_map[i].sv_getit)(
2293 2293                              NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2294 2294  
2295 2295                                  error = (*nfs4_ntov_map[i].sv_getit)(
2296 2296                                      NFS4ATTR_GETIT, sargp, na);
2297 2297  
2298 2298                                  /*
2299 2299                                   * Possible error values:
2300 2300                                   * >0 if sv_getit failed to
2301 2301                                   * get the attr; 0 if succeeded;
2302 2302                                   * <0 if rdattr_error and the
2303 2303                                   * attribute cannot be returned.
2304 2304                                   */
2305 2305                                  if (error && !(sargp->rdattr_error_req))
2306 2306                                          goto done;
2307 2307                                  /*
2308 2308                                   * If error then just for entry
2309 2309                                   */
2310 2310                                  if (error == 0) {
2311 2311                                          fattrp->attrmask |=
2312 2312                                              nfs4_ntov_map[i].fbit;
2313 2313                                          *amap++ =
2314 2314                                              (uint8_t)nfs4_ntov_map[i].nval;
2315 2315                                          na++;
2316 2316                                          (ntov.attrcnt)++;
2317 2317                                  } else if ((error > 0) &&
2318 2318                                      (sargp->rdattr_error == NFS4_OK)) {
2319 2319                                          sargp->rdattr_error = puterrno4(error);
2320 2320                                  }
2321 2321                                  error = 0;
2322 2322                          }
2323 2323                  }
2324 2324          }
2325 2325  
2326 2326          /*
2327 2327           * If rdattr_error was set after the return value for it was assigned,
2328 2328           * update it.
2329 2329           */
2330 2330          if (prev_rdattr_error != sargp->rdattr_error) {
2331 2331                  na = ntov.na;
2332 2332                  amap = ntov.amap;
2333 2333                  for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2334 2334                          k = *amap;
2335 2335                          if (k < FATTR4_RDATTR_ERROR) {
2336 2336                                  continue;
2337 2337                          }
2338 2338                          if ((k == FATTR4_RDATTR_ERROR) &&
2339 2339                              ((*nfs4_ntov_map[k].sv_getit)(
2340 2340                              NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2341 2341  
2342 2342                                  (void) (*nfs4_ntov_map[k].sv_getit)(
2343 2343                                      NFS4ATTR_GETIT, sargp, na);
2344 2344                          }
2345 2345                          break;
2346 2346                  }
2347 2347          }
2348 2348  
2349 2349          xdr_size = 0;
2350 2350          na = ntov.na;
2351 2351          amap = ntov.amap;
2352 2352          for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2353 2353                  xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2354 2354          }
2355 2355  
2356 2356          fattrp->attrlist4_len = xdr_size;
2357 2357          if (xdr_size) {
2358 2358                  /* freed by rfs4_op_getattr_free() */
2359 2359                  fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2360 2360  
2361 2361                  xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2362 2362  
2363 2363                  na = ntov.na;
2364 2364                  amap = ntov.amap;
2365 2365                  for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2366 2366                          if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2367 2367                                  DTRACE_PROBE1(nfss__e__getattr4_encfail,
2368 2368                                      int, *amap);
2369 2369                                  status = NFS4ERR_SERVERFAULT;
2370 2370                                  break;
2371 2371                          }
2372 2372                  }
2373 2373                  /* xdrmem_destroy(&xdrs); */    /* NO-OP */
2374 2374          } else {
2375 2375                  fattrp->attrlist4 = NULL;
2376 2376          }
2377 2377  done:
2378 2378  
2379 2379          nfs4_ntov_table_free(&ntov, sargp);
2380 2380  
2381 2381          if (error != 0)
2382 2382                  status = puterrno4(error);
2383 2383  
2384 2384          return (status);
2385 2385  }
2386 2386  
2387 2387  /* ARGSUSED */
2388 2388  static void
2389 2389  rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2390 2390      struct compound_state *cs)
2391 2391  {
2392 2392          GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2393 2393          GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2394 2394          struct nfs4_svgetit_arg sarg;
2395 2395          struct statvfs64 sb;
2396 2396          nfsstat4 status;
2397 2397  
2398 2398          DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2399 2399              GETATTR4args *, args);
2400 2400  
2401 2401          if (cs->vp == NULL) {
2402 2402                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2403 2403                  goto out;
2404 2404          }
2405 2405  
2406 2406          if (cs->access == CS_ACCESS_DENIED) {
2407 2407                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2408 2408                  goto out;
2409 2409          }
2410 2410  
2411 2411          sarg.sbp = &sb;
2412 2412          sarg.cs = cs;
2413 2413          sarg.is_referral = B_FALSE;
2414 2414  
2415 2415          status = bitmap4_to_attrmask(args->attr_request, &sarg);
2416 2416          if (status == NFS4_OK) {
2417 2417  
2418 2418                  status = bitmap4_get_sysattrs(&sarg);
2419 2419                  if (status == NFS4_OK) {
2420 2420  
2421 2421                          /* Is this a referral? */
2422 2422                          if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2423 2423                                  /* Older V4 Solaris client sees a link */
2424 2424                                  if (client_is_downrev(req))
2425 2425                                          sarg.vap->va_type = VLNK;
2426 2426                                  else
2427 2427                                          sarg.is_referral = B_TRUE;
2428 2428                          }
2429 2429  
2430 2430                          status = do_rfs4_op_getattr(args->attr_request,
2431 2431                              &resp->obj_attributes, &sarg);
2432 2432                  }
2433 2433          }
2434 2434          *cs->statusp = resp->status = status;
2435 2435  out:
2436 2436          DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2437 2437              GETATTR4res *, resp);
2438 2438  }
2439 2439  
2440 2440  static void
2441 2441  rfs4_op_getattr_free(nfs_resop4 *resop)
2442 2442  {
2443 2443          GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2444 2444  
2445 2445          nfs4_fattr4_free(&resp->obj_attributes);
2446 2446  }
2447 2447  
2448 2448  /* ARGSUSED */
2449 2449  static void
2450 2450  rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2451 2451      struct compound_state *cs)
2452 2452  {
2453 2453          GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2454 2454  
2455 2455          DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2456 2456  
2457 2457          if (cs->vp == NULL) {
2458 2458                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2459 2459                  goto out;
2460 2460          }
2461 2461          if (cs->access == CS_ACCESS_DENIED) {
2462 2462                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2463 2463                  goto out;
2464 2464          }
2465 2465  
2466 2466          /* check for reparse point at the share point */
2467 2467          if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2468 2468                  /* it's all bad */
2469 2469                  cs->exi->exi_moved = 1;
2470 2470                  *cs->statusp = resp->status = NFS4ERR_MOVED;
2471 2471                  DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2472 2472                      vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2473 2473                  return;
2474 2474          }
2475 2475  
2476 2476          /* check for reparse point at vp */
2477 2477          if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2478 2478                  /* it's not all bad */
2479 2479                  *cs->statusp = resp->status = NFS4ERR_MOVED;
2480 2480                  DTRACE_PROBE2(nfs4serv__func__referral__moved,
2481 2481                      vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2482 2482                  return;
2483 2483          }
2484 2484  
2485 2485          resp->object.nfs_fh4_val =
2486 2486              kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2487 2487          nfs_fh4_copy(&cs->fh, &resp->object);
2488 2488          *cs->statusp = resp->status = NFS4_OK;
2489 2489  out:
2490 2490          DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2491 2491              GETFH4res *, resp);
2492 2492  }
2493 2493  
2494 2494  static void
2495 2495  rfs4_op_getfh_free(nfs_resop4 *resop)
2496 2496  {
2497 2497          GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2498 2498  
2499 2499          if (resp->status == NFS4_OK &&
2500 2500              resp->object.nfs_fh4_val != NULL) {
2501 2501                  kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2502 2502                  resp->object.nfs_fh4_val = NULL;
2503 2503                  resp->object.nfs_fh4_len = 0;
2504 2504          }
2505 2505  }
2506 2506  
2507 2507  /*
2508 2508   * illegal: args: void
2509 2509   *          res : status (NFS4ERR_OP_ILLEGAL)
2510 2510   */
2511 2511  /* ARGSUSED */
2512 2512  static void
2513 2513  rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2514 2514      struct svc_req *req, struct compound_state *cs)
2515 2515  {
2516 2516          ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2517 2517  
2518 2518          resop->resop = OP_ILLEGAL;
2519 2519          *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2520 2520  }
2521 2521  
2522 2522  /*
2523 2523   * link: args: SAVED_FH: file, CURRENT_FH: target directory
2524 2524   *       res: status. If success - CURRENT_FH unchanged, return change_info
2525 2525   */
2526 2526  /* ARGSUSED */
2527 2527  static void
2528 2528  rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2529 2529      struct compound_state *cs)
2530 2530  {
2531 2531          LINK4args *args = &argop->nfs_argop4_u.oplink;
2532 2532          LINK4res *resp = &resop->nfs_resop4_u.oplink;
2533 2533          int error;
2534 2534          vnode_t *vp;
2535 2535          vnode_t *dvp;
2536 2536          struct vattr bdva, idva, adva;
2537 2537          char *nm;
2538 2538          uint_t  len;
2539 2539          struct sockaddr *ca;
2540 2540          char *name = NULL;
2541 2541          nfsstat4 status;
2542 2542  
2543 2543          DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2544 2544              LINK4args *, args);
2545 2545  
2546 2546          /* SAVED_FH: source object */
2547 2547          vp = cs->saved_vp;
2548 2548          if (vp == NULL) {
2549 2549                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2550 2550                  goto out;
2551 2551          }
2552 2552  
2553 2553          /* CURRENT_FH: target directory */
2554 2554          dvp = cs->vp;
2555 2555          if (dvp == NULL) {
2556 2556                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2557 2557                  goto out;
2558 2558          }
2559 2559  
2560 2560          /*
2561 2561           * If there is a non-shared filesystem mounted on this vnode,
2562 2562           * do not allow to link any file in this directory.
2563 2563           */
2564 2564          if (vn_ismntpt(dvp)) {
2565 2565                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2566 2566                  goto out;
2567 2567          }
2568 2568  
2569 2569          if (cs->access == CS_ACCESS_DENIED) {
2570 2570                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2571 2571                  goto out;
2572 2572          }
2573 2573  
2574 2574          /* Check source object's type validity */
2575 2575          if (vp->v_type == VDIR) {
2576 2576                  *cs->statusp = resp->status = NFS4ERR_ISDIR;
2577 2577                  goto out;
2578 2578          }
2579 2579  
2580 2580          /* Check target directory's type */
2581 2581          if (dvp->v_type != VDIR) {
2582 2582                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2583 2583                  goto out;
2584 2584          }
2585 2585  
2586 2586          if (cs->saved_exi != cs->exi) {
2587 2587                  *cs->statusp = resp->status = NFS4ERR_XDEV;
2588 2588                  goto out;
2589 2589          }
2590 2590  
2591 2591          status = utf8_dir_verify(&args->newname);
2592 2592          if (status != NFS4_OK) {
2593 2593                  *cs->statusp = resp->status = status;
2594 2594                  goto out;
2595 2595          }
2596 2596  
2597 2597          nm = utf8_to_fn(&args->newname, &len, NULL);
2598 2598          if (nm == NULL) {
2599 2599                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2600 2600                  goto out;
2601 2601          }
2602 2602  
2603 2603          if (len > MAXNAMELEN) {
2604 2604                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2605 2605                  kmem_free(nm, len);
2606 2606                  goto out;
2607 2607          }
2608 2608  
2609 2609          if (rdonly4(req, cs)) {
2610 2610                  *cs->statusp = resp->status = NFS4ERR_ROFS;
2611 2611                  kmem_free(nm, len);
2612 2612                  goto out;
2613 2613          }
2614 2614  
2615 2615          /* Get "before" change value */
2616 2616          bdva.va_mask = AT_CTIME|AT_SEQ;
2617 2617          error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2618 2618          if (error) {
2619 2619                  *cs->statusp = resp->status = puterrno4(error);
2620 2620                  kmem_free(nm, len);
2621 2621                  goto out;
2622 2622          }
2623 2623  
2624 2624          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2625 2625          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2626 2626              MAXPATHLEN  + 1);
2627 2627  
2628 2628          if (name == NULL) {
2629 2629                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2630 2630                  kmem_free(nm, len);
2631 2631                  goto out;
2632 2632          }
2633 2633  
2634 2634          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2635 2635  
2636 2636          error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2637 2637  
2638 2638          if (nm != name)
2639 2639                  kmem_free(name, MAXPATHLEN + 1);
2640 2640          kmem_free(nm, len);
2641 2641  
2642 2642          /*
2643 2643           * Get the initial "after" sequence number, if it fails, set to zero
2644 2644           */
2645 2645          idva.va_mask = AT_SEQ;
2646 2646          if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2647 2647                  idva.va_seq = 0;
2648 2648  
2649 2649          /*
2650 2650           * Force modified data and metadata out to stable storage.
2651 2651           */
2652 2652          (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2653 2653          (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2654 2654  
2655 2655          if (error) {
2656 2656                  *cs->statusp = resp->status = puterrno4(error);
2657 2657                  goto out;
2658 2658          }
2659 2659  
2660 2660          /*
2661 2661           * Get "after" change value, if it fails, simply return the
2662 2662           * before value.
2663 2663           */
2664 2664          adva.va_mask = AT_CTIME|AT_SEQ;
2665 2665          if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2666 2666                  adva.va_ctime = bdva.va_ctime;
2667 2667                  adva.va_seq = 0;
2668 2668          }
2669 2669  
2670 2670          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2671 2671  
2672 2672          /*
2673 2673           * The cinfo.atomic = TRUE only if we have
2674 2674           * non-zero va_seq's, and it has incremented by exactly one
2675 2675           * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2676 2676           */
2677 2677          if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2678 2678              idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2679 2679                  resp->cinfo.atomic = TRUE;
2680 2680          else
2681 2681                  resp->cinfo.atomic = FALSE;
2682 2682  
2683 2683          *cs->statusp = resp->status = NFS4_OK;
2684 2684  out:
2685 2685          DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2686 2686              LINK4res *, resp);
2687 2687  }
2688 2688  
2689 2689  /*
2690 2690   * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2691 2691   */
2692 2692  
2693 2693  /* ARGSUSED */
2694 2694  static nfsstat4
2695 2695  do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2696 2696  {
2697 2697          int error;
2698 2698          int different_export = 0;
2699 2699          vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2700 2700          struct exportinfo *exi = NULL, *pre_exi = NULL;
2701 2701          nfsstat4 stat;
2702 2702          fid_t fid;
2703 2703          int attrdir, dotdot, walk;
2704 2704          bool_t is_newvp = FALSE;
2705 2705  
2706 2706          if (cs->vp->v_flag & V_XATTRDIR) {
2707 2707                  attrdir = 1;
2708 2708                  ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2709 2709          } else {
2710 2710                  attrdir = 0;
2711 2711                  ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2712 2712          }
2713 2713  
2714 2714          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2715 2715  
2716 2716          /*
2717 2717           * If dotdotting, then need to check whether it's
2718 2718           * above the root of a filesystem, or above an
2719 2719           * export point.
2720 2720           */
2721 2721          if (dotdot) {
2722 2722                  ASSERT(cs->exi != NULL);
2723 2723                  ASSERT3U(cs->exi->exi_zoneid, ==, curzone->zone_id);
2724 2724                  /*
  
    | 
      ↓ open down ↓ | 
    1720 lines elided | 
    
      ↑ open up ↑ | 
  
2725 2725                   * If dotdotting at the root of a filesystem, then
2726 2726                   * need to traverse back to the mounted-on filesystem
2727 2727                   * and do the dotdot lookup there.
2728 2728                   */
2729 2729                  if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2730 2730  
2731 2731                          /*
2732 2732                           * If at the system root, then can
2733 2733                           * go up no further.
2734 2734                           */
2735      -                        if (VN_CMP(cs->vp, ZONE_ROOTVP()))
     2735 +                        if (VN_IS_CURZONEROOT(cs->vp))
2736 2736                                  return (puterrno4(ENOENT));
2737 2737  
2738 2738                          /*
2739 2739                           * Traverse back to the mounted-on filesystem
2740 2740                           */
2741      -                        cs->vp = untraverse(cs->vp);
     2741 +                        cs->vp = untraverse(cs->exi->exi_ne, cs->vp);
2742 2742  
2743 2743                          /*
2744 2744                           * Set the different_export flag so we remember
2745 2745                           * to pick up a new exportinfo entry for
2746 2746                           * this new filesystem.
2747 2747                           */
2748 2748                          different_export = 1;
2749 2749                  } else {
2750 2750  
2751 2751                          /*
2752 2752                           * If dotdotting above an export point then set
2753 2753                           * the different_export to get new export info.
2754 2754                           */
2755 2755                          different_export = nfs_exported(cs->exi, cs->vp);
2756 2756                  }
2757 2757          }
2758 2758  
2759 2759          error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2760 2760              NULL, NULL, NULL);
2761 2761          if (error)
2762 2762                  return (puterrno4(error));
2763 2763  
2764 2764          /*
2765 2765           * If the vnode is in a pseudo filesystem, check whether it is visible.
2766 2766           *
2767 2767           * XXX if the vnode is a symlink and it is not visible in
2768 2768           * a pseudo filesystem, return ENOENT (not following symlink).
2769 2769           * V4 client can not mount such symlink. This is a regression
2770 2770           * from V2/V3.
2771 2771           *
2772 2772           * In the same exported filesystem, if the security flavor used
2773 2773           * is not an explicitly shared flavor, limit the view to the visible
2774 2774           * list entries only. This is not a WRONGSEC case because it's already
2775 2775           * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2776 2776           */
2777 2777          if (!different_export &&
2778 2778              (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2779 2779              cs->access & CS_ACCESS_LIMITED)) {
2780 2780                  if (! nfs_visible(cs->exi, vp, &different_export)) {
2781 2781                          VN_RELE(vp);
2782 2782                          return (puterrno4(ENOENT));
2783 2783                  }
2784 2784          }
2785 2785  
2786 2786          /*
2787 2787           * If it's a mountpoint, then traverse it.
2788 2788           */
2789 2789          if (vn_ismntpt(vp)) {
2790 2790                  pre_exi = cs->exi;      /* save pre-traversed exportinfo */
2791 2791                  pre_tvp = vp;           /* save pre-traversed vnode     */
2792 2792  
2793 2793                  /*
2794 2794                   * hold pre_tvp to counteract rele by traverse.  We will
2795 2795                   * need pre_tvp below if checkexport4 fails
2796 2796                   */
2797 2797                  VN_HOLD(pre_tvp);
2798 2798                  if ((error = traverse(&vp)) != 0) {
2799 2799                          VN_RELE(vp);
2800 2800                          VN_RELE(pre_tvp);
2801 2801                          return (puterrno4(error));
2802 2802                  }
2803 2803                  different_export = 1;
2804 2804          } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2805 2805                  /*
2806 2806                   * The vfsp comparison is to handle the case where
2807 2807                   * a LOFS mount is shared.  lo_lookup traverses mount points,
2808 2808                   * and NFS is unaware of local fs transistions because
2809 2809                   * v_vfsmountedhere isn't set.  For this special LOFS case,
2810 2810                   * the dir and the obj returned by lookup will have different
2811 2811                   * vfs ptrs.
2812 2812                   */
2813 2813                  different_export = 1;
2814 2814          }
2815 2815  
2816 2816          if (different_export) {
2817 2817  
2818 2818                  bzero(&fid, sizeof (fid));
2819 2819                  fid.fid_len = MAXFIDSZ;
2820 2820                  error = vop_fid_pseudo(vp, &fid);
2821 2821                  if (error) {
2822 2822                          VN_RELE(vp);
2823 2823                          if (pre_tvp)
2824 2824                                  VN_RELE(pre_tvp);
2825 2825                          return (puterrno4(error));
2826 2826                  }
2827 2827  
2828 2828                  if (dotdot)
2829 2829                          exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2830 2830                  else
2831 2831                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2832 2832  
2833 2833                  if (exi == NULL) {
2834 2834                          if (pre_tvp) {
2835 2835                                  /*
2836 2836                                   * If this vnode is a mounted-on vnode,
2837 2837                                   * but the mounted-on file system is not
2838 2838                                   * exported, send back the filehandle for
2839 2839                                   * the mounted-on vnode, not the root of
2840 2840                                   * the mounted-on file system.
2841 2841                                   */
2842 2842                                  VN_RELE(vp);
2843 2843                                  vp = pre_tvp;
2844 2844                                  exi = pre_exi;
2845 2845                          } else {
2846 2846                                  VN_RELE(vp);
2847 2847                                  return (puterrno4(EACCES));
2848 2848                          }
2849 2849                  } else if (pre_tvp) {
2850 2850                          /* we're done with pre_tvp now. release extra hold */
2851 2851                          VN_RELE(pre_tvp);
2852 2852                  }
2853 2853  
2854 2854                  cs->exi = exi;
2855 2855  
2856 2856                  /*
2857 2857                   * Now we do a checkauth4. The reason is that
2858 2858                   * this client/user may not have access to the new
2859 2859                   * exported file system, and if they do,
2860 2860                   * the client/user may be mapped to a different uid.
2861 2861                   *
2862 2862                   * We start with a new cr, because the checkauth4 done
2863 2863                   * in the PUT*FH operation over wrote the cred's uid,
2864 2864                   * gid, etc, and we want the real thing before calling
2865 2865                   * checkauth4()
2866 2866                   */
2867 2867                  crfree(cs->cr);
2868 2868                  cs->cr = crdup(cs->basecr);
2869 2869  
2870 2870                  oldvp = cs->vp;
2871 2871                  cs->vp = vp;
2872 2872                  is_newvp = TRUE;
2873 2873  
2874 2874                  stat = call_checkauth4(cs, req);
2875 2875                  if (stat != NFS4_OK) {
2876 2876                          VN_RELE(cs->vp);
2877 2877                          cs->vp = oldvp;
2878 2878                          return (stat);
2879 2879                  }
2880 2880          }
2881 2881  
2882 2882          /*
2883 2883           * After various NFS checks, do a label check on the path
2884 2884           * component. The label on this path should either be the
2885 2885           * global zone's label or a zone's label. We are only
2886 2886           * interested in the zone's label because exported files
2887 2887           * in global zone is accessible (though read-only) to
2888 2888           * clients. The exportability/visibility check is already
2889 2889           * done before reaching this code.
2890 2890           */
2891 2891          if (is_system_labeled()) {
2892 2892                  bslabel_t *clabel;
2893 2893  
2894 2894                  ASSERT(req->rq_label != NULL);
2895 2895                  clabel = req->rq_label;
2896 2896                  DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2897 2897                      "got client label from request(1)", struct svc_req *, req);
2898 2898  
2899 2899                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2900 2900                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2901 2901                              cs->exi)) {
2902 2902                                  error = EACCES;
2903 2903                                  goto err_out;
2904 2904                          }
2905 2905                  } else {
2906 2906                          /*
2907 2907                           * We grant access to admin_low label clients
2908 2908                           * only if the client is trusted, i.e. also
2909 2909                           * running Solaris Trusted Extension.
2910 2910                           */
2911 2911                          struct sockaddr *ca;
2912 2912                          int             addr_type;
2913 2913                          void            *ipaddr;
2914 2914                          tsol_tpc_t      *tp;
2915 2915  
2916 2916                          ca = (struct sockaddr *)svc_getrpccaller(
2917 2917                              req->rq_xprt)->buf;
2918 2918                          if (ca->sa_family == AF_INET) {
2919 2919                                  addr_type = IPV4_VERSION;
2920 2920                                  ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2921 2921                          } else if (ca->sa_family == AF_INET6) {
2922 2922                                  addr_type = IPV6_VERSION;
2923 2923                                  ipaddr = &((struct sockaddr_in6 *)
2924 2924                                      ca)->sin6_addr;
2925 2925                          }
2926 2926                          tp = find_tpc(ipaddr, addr_type, B_FALSE);
2927 2927                          if (tp == NULL || tp->tpc_tp.tp_doi !=
2928 2928                              l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2929 2929                              SUN_CIPSO) {
2930 2930                                  if (tp != NULL)
2931 2931                                          TPC_RELE(tp);
2932 2932                                  error = EACCES;
2933 2933                                  goto err_out;
2934 2934                          }
2935 2935                          TPC_RELE(tp);
2936 2936                  }
2937 2937          }
2938 2938  
2939 2939          error = makefh4(&cs->fh, vp, cs->exi);
2940 2940  
2941 2941  err_out:
2942 2942          if (error) {
2943 2943                  if (is_newvp) {
2944 2944                          VN_RELE(cs->vp);
2945 2945                          cs->vp = oldvp;
2946 2946                  } else
2947 2947                          VN_RELE(vp);
2948 2948                  return (puterrno4(error));
2949 2949          }
2950 2950  
2951 2951          if (!is_newvp) {
2952 2952                  if (cs->vp)
2953 2953                          VN_RELE(cs->vp);
2954 2954                  cs->vp = vp;
2955 2955          } else if (oldvp)
2956 2956                  VN_RELE(oldvp);
2957 2957  
2958 2958          /*
2959 2959           * if did lookup on attrdir and didn't lookup .., set named
2960 2960           * attr fh flag
2961 2961           */
2962 2962          if (attrdir && ! dotdot)
2963 2963                  set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2964 2964  
2965 2965          /* Assume false for now, open proc will set this */
2966 2966          cs->mandlock = FALSE;
2967 2967  
2968 2968          return (NFS4_OK);
2969 2969  }
2970 2970  
2971 2971  /* ARGSUSED */
2972 2972  static void
2973 2973  rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2974 2974      struct compound_state *cs)
2975 2975  {
2976 2976          LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2977 2977          LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2978 2978          char *nm;
2979 2979          uint_t len;
2980 2980          struct sockaddr *ca;
2981 2981          char *name = NULL;
2982 2982          nfsstat4 status;
2983 2983  
2984 2984          DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2985 2985              LOOKUP4args *, args);
2986 2986  
2987 2987          if (cs->vp == NULL) {
2988 2988                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2989 2989                  goto out;
2990 2990          }
2991 2991  
2992 2992          if (cs->vp->v_type == VLNK) {
2993 2993                  *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2994 2994                  goto out;
2995 2995          }
2996 2996  
2997 2997          if (cs->vp->v_type != VDIR) {
2998 2998                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2999 2999                  goto out;
3000 3000          }
3001 3001  
3002 3002          status = utf8_dir_verify(&args->objname);
3003 3003          if (status != NFS4_OK) {
3004 3004                  *cs->statusp = resp->status = status;
3005 3005                  goto out;
3006 3006          }
3007 3007  
3008 3008          nm = utf8_to_str(&args->objname, &len, NULL);
3009 3009          if (nm == NULL) {
3010 3010                  *cs->statusp = resp->status = NFS4ERR_INVAL;
3011 3011                  goto out;
3012 3012          }
3013 3013  
3014 3014          if (len > MAXNAMELEN) {
3015 3015                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3016 3016                  kmem_free(nm, len);
3017 3017                  goto out;
3018 3018          }
3019 3019  
3020 3020          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3021 3021          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3022 3022              MAXPATHLEN  + 1);
3023 3023  
3024 3024          if (name == NULL) {
3025 3025                  *cs->statusp = resp->status = NFS4ERR_INVAL;
3026 3026                  kmem_free(nm, len);
3027 3027                  goto out;
3028 3028          }
3029 3029  
3030 3030          *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3031 3031  
3032 3032          if (name != nm)
3033 3033                  kmem_free(name, MAXPATHLEN + 1);
3034 3034          kmem_free(nm, len);
3035 3035  
3036 3036  out:
3037 3037          DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3038 3038              LOOKUP4res *, resp);
3039 3039  }
3040 3040  
3041 3041  /* ARGSUSED */
3042 3042  static void
3043 3043  rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3044 3044      struct compound_state *cs)
3045 3045  {
3046 3046          LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3047 3047  
3048 3048          DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3049 3049  
3050 3050          if (cs->vp == NULL) {
3051 3051                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3052 3052                  goto out;
3053 3053          }
3054 3054  
3055 3055          if (cs->vp->v_type != VDIR) {
3056 3056                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3057 3057                  goto out;
3058 3058          }
3059 3059  
3060 3060          *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3061 3061  
3062 3062          /*
3063 3063           * From NFSV4 Specification, LOOKUPP should not check for
3064 3064           * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3065 3065           */
3066 3066          if (resp->status == NFS4ERR_WRONGSEC) {
3067 3067                  *cs->statusp = resp->status = NFS4_OK;
3068 3068          }
3069 3069  
3070 3070  out:
3071 3071          DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3072 3072              LOOKUPP4res *, resp);
3073 3073  }
3074 3074  
3075 3075  
3076 3076  /*ARGSUSED2*/
3077 3077  static void
3078 3078  rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3079 3079      struct compound_state *cs)
3080 3080  {
3081 3081          OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3082 3082          OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3083 3083          vnode_t         *avp = NULL;
3084 3084          int             lookup_flags = LOOKUP_XATTR, error;
3085 3085          int             exp_ro = 0;
3086 3086  
3087 3087          DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3088 3088              OPENATTR4args *, args);
3089 3089  
3090 3090          if (cs->vp == NULL) {
3091 3091                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3092 3092                  goto out;
3093 3093          }
3094 3094  
3095 3095          if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3096 3096              !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3097 3097                  *cs->statusp = resp->status = puterrno4(ENOTSUP);
3098 3098                  goto out;
3099 3099          }
3100 3100  
3101 3101          /*
3102 3102           * If file system supports passing ACE mask to VOP_ACCESS then
3103 3103           * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3104 3104           */
3105 3105  
3106 3106          if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3107 3107                  error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3108 3108                      V_ACE_MASK, cs->cr, NULL);
3109 3109          else
3110 3110                  error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3111 3111                      (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3112 3112                      (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3113 3113  
3114 3114          if (error) {
3115 3115                  *cs->statusp = resp->status = puterrno4(EACCES);
3116 3116                  goto out;
3117 3117          }
3118 3118  
3119 3119          /*
3120 3120           * The CREATE_XATTR_DIR VOP flag cannot be specified if
3121 3121           * the file system is exported read-only -- regardless of
3122 3122           * createdir flag.  Otherwise the attrdir would be created
3123 3123           * (assuming server fs isn't mounted readonly locally).  If
3124 3124           * VOP_LOOKUP returns ENOENT in this case, the error will
3125 3125           * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3126 3126           * because specfs has no VOP_LOOKUP op, so the macro would
3127 3127           * return ENOSYS.  EINVAL is returned by all (current)
3128 3128           * Solaris file system implementations when any of their
3129 3129           * restrictions are violated (xattr(dir) can't have xattrdir).
3130 3130           * Returning NOTSUPP is more appropriate in this case
3131 3131           * because the object will never be able to have an attrdir.
3132 3132           */
3133 3133          if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3134 3134                  lookup_flags |= CREATE_XATTR_DIR;
3135 3135  
3136 3136          error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3137 3137              NULL, NULL, NULL);
3138 3138  
3139 3139          if (error) {
3140 3140                  if (error == ENOENT && args->createdir && exp_ro)
3141 3141                          *cs->statusp = resp->status = puterrno4(EROFS);
3142 3142                  else if (error == EINVAL || error == ENOSYS)
3143 3143                          *cs->statusp = resp->status = puterrno4(ENOTSUP);
3144 3144                  else
3145 3145                          *cs->statusp = resp->status = puterrno4(error);
3146 3146                  goto out;
3147 3147          }
3148 3148  
3149 3149          ASSERT(avp->v_flag & V_XATTRDIR);
3150 3150  
3151 3151          error = makefh4(&cs->fh, avp, cs->exi);
3152 3152  
3153 3153          if (error) {
3154 3154                  VN_RELE(avp);
3155 3155                  *cs->statusp = resp->status = puterrno4(error);
3156 3156                  goto out;
3157 3157          }
3158 3158  
3159 3159          VN_RELE(cs->vp);
3160 3160          cs->vp = avp;
3161 3161  
3162 3162          /*
3163 3163           * There is no requirement for an attrdir fh flag
3164 3164           * because the attrdir has a vnode flag to distinguish
3165 3165           * it from regular (non-xattr) directories.  The
3166 3166           * FH4_ATTRDIR flag is set for future sanity checks.
3167 3167           */
3168 3168          set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3169 3169          *cs->statusp = resp->status = NFS4_OK;
3170 3170  
3171 3171  out:
3172 3172          DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3173 3173              OPENATTR4res *, resp);
3174 3174  }
3175 3175  
3176 3176  static int
3177 3177  do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3178 3178      caller_context_t *ct)
3179 3179  {
3180 3180          int error;
3181 3181          int i;
3182 3182          clock_t delaytime;
3183 3183  
3184 3184          delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3185 3185  
3186 3186          /*
3187 3187           * Don't block on mandatory locks. If this routine returns
3188 3188           * EAGAIN, the caller should return NFS4ERR_LOCKED.
3189 3189           */
3190 3190          uio->uio_fmode = FNONBLOCK;
3191 3191  
3192 3192          for (i = 0; i < rfs4_maxlock_tries; i++) {
3193 3193  
3194 3194  
3195 3195                  if (direction == FREAD) {
3196 3196                          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3197 3197                          error = VOP_READ(vp, uio, ioflag, cred, ct);
3198 3198                          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3199 3199                  } else {
3200 3200                          (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3201 3201                          error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3202 3202                          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3203 3203                  }
3204 3204  
3205 3205                  if (error != EAGAIN)
3206 3206                          break;
3207 3207  
3208 3208                  if (i < rfs4_maxlock_tries - 1) {
3209 3209                          delay(delaytime);
3210 3210                          delaytime *= 2;
3211 3211                  }
3212 3212          }
3213 3213  
3214 3214          return (error);
3215 3215  }
3216 3216  
3217 3217  /* ARGSUSED */
3218 3218  static void
3219 3219  rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3220 3220      struct compound_state *cs)
3221 3221  {
3222 3222          READ4args *args = &argop->nfs_argop4_u.opread;
3223 3223          READ4res *resp = &resop->nfs_resop4_u.opread;
3224 3224          int error;
3225 3225          int verror;
3226 3226          vnode_t *vp;
3227 3227          struct vattr va;
3228 3228          struct iovec iov, *iovp = NULL;
3229 3229          int iovcnt;
3230 3230          struct uio uio;
3231 3231          u_offset_t offset;
3232 3232          bool_t *deleg = &cs->deleg;
3233 3233          nfsstat4 stat;
3234 3234          int in_crit = 0;
3235 3235          mblk_t *mp = NULL;
3236 3236          int alloc_err = 0;
3237 3237          int rdma_used = 0;
3238 3238          int loaned_buffers;
3239 3239          caller_context_t ct;
3240 3240          struct uio *uiop;
3241 3241  
3242 3242          DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3243 3243              READ4args, args);
3244 3244  
3245 3245          vp = cs->vp;
3246 3246          if (vp == NULL) {
3247 3247                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3248 3248                  goto out;
3249 3249          }
3250 3250          if (cs->access == CS_ACCESS_DENIED) {
3251 3251                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3252 3252                  goto out;
3253 3253          }
3254 3254  
3255 3255          if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3256 3256              deleg, TRUE, &ct)) != NFS4_OK) {
3257 3257                  *cs->statusp = resp->status = stat;
3258 3258                  goto out;
3259 3259          }
3260 3260  
3261 3261          /*
3262 3262           * Enter the critical region before calling VOP_RWLOCK
3263 3263           * to avoid a deadlock with write requests.
3264 3264           */
3265 3265          if (nbl_need_check(vp)) {
3266 3266                  nbl_start_crit(vp, RW_READER);
3267 3267                  in_crit = 1;
3268 3268                  if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3269 3269                      &ct)) {
3270 3270                          *cs->statusp = resp->status = NFS4ERR_LOCKED;
3271 3271                          goto out;
3272 3272                  }
3273 3273          }
3274 3274  
3275 3275          if (args->wlist) {
3276 3276                  if (args->count > clist_len(args->wlist)) {
3277 3277                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3278 3278                          goto out;
3279 3279                  }
3280 3280                  rdma_used = 1;
3281 3281          }
3282 3282  
3283 3283          /* use loaned buffers for TCP */
3284 3284          loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3285 3285  
3286 3286          va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3287 3287          verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3288 3288  
3289 3289          /*
3290 3290           * If we can't get the attributes, then we can't do the
3291 3291           * right access checking.  So, we'll fail the request.
3292 3292           */
3293 3293          if (verror) {
3294 3294                  *cs->statusp = resp->status = puterrno4(verror);
3295 3295                  goto out;
3296 3296          }
3297 3297  
3298 3298          if (vp->v_type != VREG) {
3299 3299                  *cs->statusp = resp->status =
3300 3300                      ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3301 3301                  goto out;
3302 3302          }
3303 3303  
3304 3304          if (crgetuid(cs->cr) != va.va_uid &&
3305 3305              (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3306 3306              (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3307 3307                  *cs->statusp = resp->status = puterrno4(error);
3308 3308                  goto out;
3309 3309          }
3310 3310  
3311 3311          if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3312 3312                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3313 3313                  goto out;
3314 3314          }
3315 3315  
3316 3316          offset = args->offset;
3317 3317          if (offset >= va.va_size) {
3318 3318                  *cs->statusp = resp->status = NFS4_OK;
3319 3319                  resp->eof = TRUE;
3320 3320                  resp->data_len = 0;
3321 3321                  resp->data_val = NULL;
3322 3322                  resp->mblk = NULL;
3323 3323                  /* RDMA */
3324 3324                  resp->wlist = args->wlist;
3325 3325                  resp->wlist_len = resp->data_len;
3326 3326                  *cs->statusp = resp->status = NFS4_OK;
3327 3327                  if (resp->wlist)
3328 3328                          clist_zero_len(resp->wlist);
3329 3329                  goto out;
3330 3330          }
3331 3331  
3332 3332          if (args->count == 0) {
3333 3333                  *cs->statusp = resp->status = NFS4_OK;
3334 3334                  resp->eof = FALSE;
3335 3335                  resp->data_len = 0;
3336 3336                  resp->data_val = NULL;
3337 3337                  resp->mblk = NULL;
3338 3338                  /* RDMA */
3339 3339                  resp->wlist = args->wlist;
3340 3340                  resp->wlist_len = resp->data_len;
3341 3341                  if (resp->wlist)
3342 3342                          clist_zero_len(resp->wlist);
3343 3343                  goto out;
3344 3344          }
3345 3345  
3346 3346          /*
3347 3347           * Do not allocate memory more than maximum allowed
3348 3348           * transfer size
3349 3349           */
3350 3350          if (args->count > rfs4_tsize(req))
3351 3351                  args->count = rfs4_tsize(req);
3352 3352  
3353 3353          if (loaned_buffers) {
3354 3354                  uiop = (uio_t *)rfs_setup_xuio(vp);
3355 3355                  ASSERT(uiop != NULL);
3356 3356                  uiop->uio_segflg = UIO_SYSSPACE;
3357 3357                  uiop->uio_loffset = args->offset;
3358 3358                  uiop->uio_resid = args->count;
3359 3359  
3360 3360                  /* Jump to do the read if successful */
3361 3361                  if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3362 3362                          /*
3363 3363                           * Need to hold the vnode until after VOP_RETZCBUF()
3364 3364                           * is called.
3365 3365                           */
3366 3366                          VN_HOLD(vp);
3367 3367                          goto doio_read;
3368 3368                  }
3369 3369  
3370 3370                  DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3371 3371                      uiop->uio_loffset, int, uiop->uio_resid);
3372 3372  
3373 3373                  uiop->uio_extflg = 0;
3374 3374  
3375 3375                  /* failure to setup for zero copy */
3376 3376                  rfs_free_xuio((void *)uiop);
3377 3377                  loaned_buffers = 0;
3378 3378          }
3379 3379  
3380 3380          /*
3381 3381           * If returning data via RDMA Write, then grab the chunk list. If we
3382 3382           * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3383 3383           */
3384 3384          if (rdma_used) {
3385 3385                  mp = NULL;
3386 3386                  (void) rdma_get_wchunk(req, &iov, args->wlist);
3387 3387                  uio.uio_iov = &iov;
3388 3388                  uio.uio_iovcnt = 1;
3389 3389          } else {
3390 3390                  /*
3391 3391                   * mp will contain the data to be sent out in the read reply.
3392 3392                   * It will be freed after the reply has been sent.
3393 3393                   */
3394 3394                  mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3395 3395                  ASSERT(mp != NULL);
3396 3396                  ASSERT(alloc_err == 0);
3397 3397                  uio.uio_iov = iovp;
3398 3398                  uio.uio_iovcnt = iovcnt;
3399 3399          }
3400 3400  
3401 3401          uio.uio_segflg = UIO_SYSSPACE;
3402 3402          uio.uio_extflg = UIO_COPY_CACHED;
3403 3403          uio.uio_loffset = args->offset;
3404 3404          uio.uio_resid = args->count;
3405 3405          uiop = &uio;
3406 3406  
3407 3407  doio_read:
3408 3408          error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3409 3409  
3410 3410          va.va_mask = AT_SIZE;
3411 3411          verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3412 3412  
3413 3413          if (error) {
3414 3414                  if (mp)
3415 3415                          freemsg(mp);
3416 3416                  *cs->statusp = resp->status = puterrno4(error);
3417 3417                  goto out;
3418 3418          }
3419 3419  
3420 3420          /* make mblk using zc buffers */
3421 3421          if (loaned_buffers) {
3422 3422                  mp = uio_to_mblk(uiop);
3423 3423                  ASSERT(mp != NULL);
3424 3424          }
3425 3425  
3426 3426          *cs->statusp = resp->status = NFS4_OK;
3427 3427  
3428 3428          ASSERT(uiop->uio_resid >= 0);
3429 3429          resp->data_len = args->count - uiop->uio_resid;
3430 3430          if (mp) {
3431 3431                  resp->data_val = (char *)mp->b_datap->db_base;
3432 3432                  rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3433 3433          } else {
3434 3434                  resp->data_val = (caddr_t)iov.iov_base;
3435 3435          }
3436 3436  
3437 3437          resp->mblk = mp;
3438 3438  
3439 3439          if (!verror && offset + resp->data_len == va.va_size)
3440 3440                  resp->eof = TRUE;
3441 3441          else
3442 3442                  resp->eof = FALSE;
3443 3443  
3444 3444          if (rdma_used) {
3445 3445                  if (!rdma_setup_read_data4(args, resp)) {
3446 3446                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3447 3447                  }
3448 3448          } else {
3449 3449                  resp->wlist = NULL;
3450 3450          }
3451 3451  
3452 3452  out:
3453 3453          if (in_crit)
3454 3454                  nbl_end_crit(vp);
3455 3455  
3456 3456          if (iovp != NULL)
3457 3457                  kmem_free(iovp, iovcnt * sizeof (struct iovec));
3458 3458  
3459 3459          DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3460 3460              READ4res *, resp);
3461 3461  }
3462 3462  
3463 3463  static void
3464 3464  rfs4_op_read_free(nfs_resop4 *resop)
3465 3465  {
3466 3466          READ4res        *resp = &resop->nfs_resop4_u.opread;
3467 3467  
3468 3468          if (resp->status == NFS4_OK && resp->mblk != NULL) {
3469 3469                  freemsg(resp->mblk);
3470 3470                  resp->mblk = NULL;
3471 3471                  resp->data_val = NULL;
3472 3472                  resp->data_len = 0;
3473 3473          }
3474 3474  }
3475 3475  
3476 3476  static void
3477 3477  rfs4_op_readdir_free(nfs_resop4 * resop)
3478 3478  {
3479 3479          READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3480 3480  
3481 3481          if (resp->status == NFS4_OK && resp->mblk != NULL) {
3482 3482                  freeb(resp->mblk);
3483 3483                  resp->mblk = NULL;
3484 3484                  resp->data_len = 0;
3485 3485          }
3486 3486  }
3487 3487  
3488 3488  
3489 3489  /* ARGSUSED */
3490 3490  static void
3491 3491  rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3492 3492      struct compound_state *cs)
3493 3493  {
3494 3494          PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3495 3495          int             error;
3496 3496          vnode_t         *vp;
3497 3497          struct exportinfo *exi, *sav_exi;
3498 3498          nfs_fh4_fmt_t   *fh_fmtp;
3499 3499          nfs_export_t *ne = nfs_get_export();
3500 3500  
3501 3501          DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3502 3502  
3503 3503          if (cs->vp) {
3504 3504                  VN_RELE(cs->vp);
3505 3505                  cs->vp = NULL;
3506 3506          }
3507 3507  
3508 3508          if (cs->cr)
3509 3509                  crfree(cs->cr);
3510 3510  
3511 3511          cs->cr = crdup(cs->basecr);
3512 3512  
3513 3513          vp = ne->exi_public->exi_vp;
3514 3514          if (vp == NULL) {
3515 3515                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3516 3516                  goto out;
3517 3517          }
3518 3518  
3519 3519          error = makefh4(&cs->fh, vp, ne->exi_public);
3520 3520          if (error != 0) {
3521 3521                  *cs->statusp = resp->status = puterrno4(error);
3522 3522                  goto out;
3523 3523          }
3524 3524          sav_exi = cs->exi;
3525 3525          if (ne->exi_public == ne->exi_root) {
3526 3526                  /*
3527 3527                   * No filesystem is actually shared public, so we default
3528 3528                   * to exi_root. In this case, we must check whether root
3529 3529                   * is exported.
3530 3530                   */
3531 3531                  fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3532 3532  
3533 3533                  /*
3534 3534                   * if root filesystem is exported, the exportinfo struct that we
3535 3535                   * should use is what checkexport4 returns, because root_exi is
3536 3536                   * actually a mostly empty struct.
3537 3537                   */
3538 3538                  exi = checkexport4(&fh_fmtp->fh4_fsid,
3539 3539                      (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3540 3540                  cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3541 3541          } else {
3542 3542                  /*
3543 3543                   * it's a properly shared filesystem
3544 3544                   */
3545 3545                  cs->exi = ne->exi_public;
3546 3546          }
3547 3547  
3548 3548          if (is_system_labeled()) {
3549 3549                  bslabel_t *clabel;
3550 3550  
3551 3551                  ASSERT(req->rq_label != NULL);
3552 3552                  clabel = req->rq_label;
3553 3553                  DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3554 3554                      "got client label from request(1)",
3555 3555                      struct svc_req *, req);
3556 3556                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
3557 3557                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3558 3558                              cs->exi)) {
3559 3559                                  *cs->statusp = resp->status =
3560 3560                                      NFS4ERR_SERVERFAULT;
3561 3561                                  goto out;
3562 3562                          }
3563 3563                  }
3564 3564          }
3565 3565  
3566 3566          VN_HOLD(vp);
3567 3567          cs->vp = vp;
3568 3568  
3569 3569          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3570 3570                  VN_RELE(cs->vp);
3571 3571                  cs->vp = NULL;
3572 3572                  cs->exi = sav_exi;
3573 3573                  goto out;
3574 3574          }
3575 3575  
3576 3576          *cs->statusp = resp->status = NFS4_OK;
3577 3577  out:
3578 3578          DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3579 3579              PUTPUBFH4res *, resp);
3580 3580  }
3581 3581  
3582 3582  /*
3583 3583   * XXX - issue with put*fh operations. Suppose /export/home is exported.
3584 3584   * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3585 3585   * or joe have restrictive search permissions, then we shouldn't let
3586 3586   * the client get a file handle. This is easy to enforce. However, we
3587 3587   * don't know what security flavor should be used until we resolve the
3588 3588   * path name. Another complication is uid mapping. If root is
3589 3589   * the user, then it will be mapped to the anonymous user by default,
3590 3590   * but we won't know that till we've resolved the path name. And we won't
3591 3591   * know what the anonymous user is.
3592 3592   * Luckily, SECINFO is specified to take a full filename.
3593 3593   * So what we will have to in rfs4_op_lookup is check that flavor of
3594 3594   * the target object matches that of the request, and if root was the
3595 3595   * caller, check for the root= and anon= options, and if necessary,
3596 3596   * repeat the lookup using the right cred_t. But that's not done yet.
3597 3597   */
3598 3598  /* ARGSUSED */
3599 3599  static void
3600 3600  rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3601 3601      struct compound_state *cs)
3602 3602  {
3603 3603          PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3604 3604          PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3605 3605          nfs_fh4_fmt_t *fh_fmtp;
3606 3606  
3607 3607          DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3608 3608              PUTFH4args *, args);
3609 3609  
3610 3610          if (cs->vp) {
3611 3611                  VN_RELE(cs->vp);
3612 3612                  cs->vp = NULL;
3613 3613          }
3614 3614  
3615 3615          if (cs->cr) {
3616 3616                  crfree(cs->cr);
3617 3617                  cs->cr = NULL;
3618 3618          }
3619 3619  
3620 3620  
3621 3621          if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3622 3622                  *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3623 3623                  goto out;
3624 3624          }
3625 3625  
3626 3626          fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3627 3627          cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3628 3628              NULL);
3629 3629  
3630 3630          if (cs->exi == NULL) {
3631 3631                  *cs->statusp = resp->status = NFS4ERR_STALE;
3632 3632                  goto out;
3633 3633          }
3634 3634  
3635 3635          cs->cr = crdup(cs->basecr);
3636 3636  
3637 3637          ASSERT(cs->cr != NULL);
3638 3638  
3639 3639          if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3640 3640                  *cs->statusp = resp->status;
3641 3641                  goto out;
3642 3642          }
3643 3643  
3644 3644          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3645 3645                  VN_RELE(cs->vp);
3646 3646                  cs->vp = NULL;
3647 3647                  goto out;
3648 3648          }
3649 3649  
3650 3650          nfs_fh4_copy(&args->object, &cs->fh);
3651 3651          *cs->statusp = resp->status = NFS4_OK;
3652 3652          cs->deleg = FALSE;
3653 3653  
3654 3654  out:
3655 3655          DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3656 3656              PUTFH4res *, resp);
3657 3657  }
3658 3658  
3659 3659  /* ARGSUSED */
3660 3660  static void
3661 3661  rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3662 3662      struct compound_state *cs)
3663 3663  {
3664 3664          PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3665 3665          int error;
3666 3666          fid_t fid;
3667 3667          struct exportinfo *exi, *sav_exi;
3668 3668  
3669 3669          DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3670 3670  
3671 3671          if (cs->vp) {
3672 3672                  VN_RELE(cs->vp);
3673 3673                  cs->vp = NULL;
3674 3674          }
3675 3675  
3676 3676          if (cs->cr)
3677 3677                  crfree(cs->cr);
3678 3678  
3679 3679          cs->cr = crdup(cs->basecr);
3680 3680  
3681 3681          /*
3682 3682           * Using rootdir, the system root vnode,
3683 3683           * get its fid.
3684 3684           */
3685 3685          bzero(&fid, sizeof (fid));
3686 3686          fid.fid_len = MAXFIDSZ;
3687 3687          error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3688 3688          if (error != 0) {
3689 3689                  *cs->statusp = resp->status = puterrno4(error);
3690 3690                  goto out;
3691 3691          }
3692 3692  
3693 3693          /*
3694 3694           * Then use the root fsid & fid it to find out if it's exported
3695 3695           *
3696 3696           * If the server root isn't exported directly, then
3697 3697           * it should at least be a pseudo export based on
3698 3698           * one or more exports further down in the server's
3699 3699           * file tree.
3700 3700           */
3701 3701          exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3702 3702          if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3703 3703                  NFS4_DEBUG(rfs4_debug,
3704 3704                      (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3705 3705                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3706 3706                  goto out;
3707 3707          }
3708 3708  
3709 3709          /*
3710 3710           * Now make a filehandle based on the root
3711 3711           * export and root vnode.
3712 3712           */
3713 3713          error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3714 3714          if (error != 0) {
3715 3715                  *cs->statusp = resp->status = puterrno4(error);
3716 3716                  goto out;
3717 3717          }
3718 3718  
3719 3719          sav_exi = cs->exi;
3720 3720          cs->exi = exi;
3721 3721  
3722 3722          VN_HOLD(ZONE_ROOTVP());
3723 3723          cs->vp = ZONE_ROOTVP();
3724 3724  
3725 3725          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3726 3726                  VN_RELE(cs->vp);
3727 3727                  cs->vp = NULL;
3728 3728                  cs->exi = sav_exi;
3729 3729                  goto out;
3730 3730          }
3731 3731  
3732 3732          *cs->statusp = resp->status = NFS4_OK;
3733 3733          cs->deleg = FALSE;
3734 3734  out:
3735 3735          DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3736 3736              PUTROOTFH4res *, resp);
3737 3737  }
3738 3738  
3739 3739  /*
3740 3740   * readlink: args: CURRENT_FH.
3741 3741   *      res: status. If success - CURRENT_FH unchanged, return linktext.
3742 3742   */
3743 3743  
3744 3744  /* ARGSUSED */
3745 3745  static void
3746 3746  rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3747 3747      struct compound_state *cs)
3748 3748  {
3749 3749          READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3750 3750          int error;
3751 3751          vnode_t *vp;
3752 3752          struct iovec iov;
3753 3753          struct vattr va;
3754 3754          struct uio uio;
3755 3755          char *data;
3756 3756          struct sockaddr *ca;
3757 3757          char *name = NULL;
3758 3758          int is_referral;
3759 3759  
3760 3760          DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3761 3761  
3762 3762          /* CURRENT_FH: directory */
3763 3763          vp = cs->vp;
3764 3764          if (vp == NULL) {
3765 3765                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3766 3766                  goto out;
3767 3767          }
3768 3768  
3769 3769          if (cs->access == CS_ACCESS_DENIED) {
3770 3770                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3771 3771                  goto out;
3772 3772          }
3773 3773  
3774 3774          /* Is it a referral? */
3775 3775          if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3776 3776  
3777 3777                  is_referral = 1;
3778 3778  
3779 3779          } else {
3780 3780  
3781 3781                  is_referral = 0;
3782 3782  
3783 3783                  if (vp->v_type == VDIR) {
3784 3784                          *cs->statusp = resp->status = NFS4ERR_ISDIR;
3785 3785                          goto out;
3786 3786                  }
3787 3787  
3788 3788                  if (vp->v_type != VLNK) {
3789 3789                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3790 3790                          goto out;
3791 3791                  }
3792 3792  
3793 3793          }
3794 3794  
3795 3795          va.va_mask = AT_MODE;
3796 3796          error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3797 3797          if (error) {
3798 3798                  *cs->statusp = resp->status = puterrno4(error);
3799 3799                  goto out;
3800 3800          }
3801 3801  
3802 3802          if (MANDLOCK(vp, va.va_mode)) {
3803 3803                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3804 3804                  goto out;
3805 3805          }
3806 3806  
3807 3807          data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3808 3808  
3809 3809          if (is_referral) {
3810 3810                  char *s;
3811 3811                  size_t strsz;
3812 3812  
3813 3813                  /* Get an artificial symlink based on a referral */
3814 3814                  s = build_symlink(vp, cs->cr, &strsz);
3815 3815                  global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3816 3816                  DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3817 3817                      vnode_t *, vp, char *, s);
3818 3818                  if (s == NULL)
3819 3819                          error = EINVAL;
3820 3820                  else {
3821 3821                          error = 0;
3822 3822                          (void) strlcpy(data, s, MAXPATHLEN + 1);
3823 3823                          kmem_free(s, strsz);
3824 3824                  }
3825 3825  
3826 3826          } else {
3827 3827  
3828 3828                  iov.iov_base = data;
3829 3829                  iov.iov_len = MAXPATHLEN;
3830 3830                  uio.uio_iov = &iov;
3831 3831                  uio.uio_iovcnt = 1;
3832 3832                  uio.uio_segflg = UIO_SYSSPACE;
3833 3833                  uio.uio_extflg = UIO_COPY_CACHED;
3834 3834                  uio.uio_loffset = 0;
3835 3835                  uio.uio_resid = MAXPATHLEN;
3836 3836  
3837 3837                  error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3838 3838  
3839 3839                  if (!error)
3840 3840                          *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3841 3841          }
3842 3842  
3843 3843          if (error) {
3844 3844                  kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3845 3845                  *cs->statusp = resp->status = puterrno4(error);
3846 3846                  goto out;
3847 3847          }
3848 3848  
3849 3849          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3850 3850          name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3851 3851              MAXPATHLEN  + 1);
3852 3852  
3853 3853          if (name == NULL) {
3854 3854                  /*
3855 3855                   * Even though the conversion failed, we return
3856 3856                   * something. We just don't translate it.
3857 3857                   */
3858 3858                  name = data;
3859 3859          }
3860 3860  
3861 3861          /*
3862 3862           * treat link name as data
3863 3863           */
3864 3864          (void) str_to_utf8(name, (utf8string *)&resp->link);
3865 3865  
3866 3866          if (name != data)
3867 3867                  kmem_free(name, MAXPATHLEN + 1);
3868 3868          kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3869 3869          *cs->statusp = resp->status = NFS4_OK;
3870 3870  
3871 3871  out:
3872 3872          DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3873 3873              READLINK4res *, resp);
3874 3874  }
3875 3875  
3876 3876  static void
3877 3877  rfs4_op_readlink_free(nfs_resop4 *resop)
3878 3878  {
3879 3879          READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3880 3880          utf8string *symlink = (utf8string *)&resp->link;
3881 3881  
3882 3882          if (symlink->utf8string_val) {
3883 3883                  UTF8STRING_FREE(*symlink)
3884 3884          }
3885 3885  }
3886 3886  
3887 3887  /*
3888 3888   * release_lockowner:
3889 3889   *      Release any state associated with the supplied
3890 3890   *      lockowner. Note if any lo_state is holding locks we will not
3891 3891   *      rele that lo_state and thus the lockowner will not be destroyed.
3892 3892   *      A client using lock after the lock owner stateid has been released
3893 3893   *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3894 3894   *      to reissue the lock with new_lock_owner set to TRUE.
3895 3895   *      args: lock_owner
3896 3896   *      res:  status
3897 3897   */
3898 3898  /* ARGSUSED */
3899 3899  static void
3900 3900  rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3901 3901      struct svc_req *req, struct compound_state *cs)
3902 3902  {
3903 3903          RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3904 3904          RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3905 3905          rfs4_lockowner_t *lo;
3906 3906          rfs4_openowner_t *oo;
3907 3907          rfs4_state_t *sp;
3908 3908          rfs4_lo_state_t *lsp;
3909 3909          rfs4_client_t *cp;
3910 3910          bool_t create = FALSE;
3911 3911          locklist_t *llist;
3912 3912          sysid_t sysid;
3913 3913  
3914 3914          DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3915 3915              cs, RELEASE_LOCKOWNER4args *, ap);
3916 3916  
3917 3917          /* Make sure there is a clientid around for this request */
3918 3918          cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3919 3919  
3920 3920          if (cp == NULL) {
3921 3921                  *cs->statusp = resp->status =
3922 3922                      rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3923 3923                  goto out;
3924 3924          }
3925 3925          rfs4_client_rele(cp);
3926 3926  
3927 3927          lo = rfs4_findlockowner(&ap->lock_owner, &create);
3928 3928          if (lo == NULL) {
3929 3929                  *cs->statusp = resp->status = NFS4_OK;
3930 3930                  goto out;
3931 3931          }
3932 3932          ASSERT(lo->rl_client != NULL);
3933 3933  
3934 3934          /*
3935 3935           * Check for EXPIRED client. If so will reap state with in a lease
3936 3936           * period or on next set_clientid_confirm step
3937 3937           */
3938 3938          if (rfs4_lease_expired(lo->rl_client)) {
3939 3939                  rfs4_lockowner_rele(lo);
3940 3940                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3941 3941                  goto out;
3942 3942          }
3943 3943  
3944 3944          /*
3945 3945           * If no sysid has been assigned, then no locks exist; just return.
3946 3946           */
3947 3947          rfs4_dbe_lock(lo->rl_client->rc_dbe);
3948 3948          if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3949 3949                  rfs4_lockowner_rele(lo);
3950 3950                  rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3951 3951                  goto out;
3952 3952          }
3953 3953  
3954 3954          sysid = lo->rl_client->rc_sysidt;
3955 3955          rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3956 3956  
3957 3957          /*
3958 3958           * Mark the lockowner invalid.
3959 3959           */
3960 3960          rfs4_dbe_hide(lo->rl_dbe);
3961 3961  
3962 3962          /*
3963 3963           * sysid-pid pair should now not be used since the lockowner is
3964 3964           * invalid. If the client were to instantiate the lockowner again
3965 3965           * it would be assigned a new pid. Thus we can get the list of
3966 3966           * current locks.
3967 3967           */
3968 3968  
3969 3969          llist = flk_get_active_locks(sysid, lo->rl_pid);
3970 3970          /* If we are still holding locks fail */
3971 3971          if (llist != NULL) {
3972 3972  
3973 3973                  *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3974 3974  
3975 3975                  flk_free_locklist(llist);
3976 3976                  /*
3977 3977                   * We need to unhide the lockowner so the client can
3978 3978                   * try it again. The bad thing here is if the client
3979 3979                   * has a logic error that took it here in the first place
3980 3980                   * they probably have lost accounting of the locks that it
3981 3981                   * is holding. So we may have dangling state until the
3982 3982                   * open owner state is reaped via close. One scenario
3983 3983                   * that could possibly occur is that the client has
3984 3984                   * sent the unlock request(s) in separate threads
3985 3985                   * and has not waited for the replies before sending the
3986 3986                   * RELEASE_LOCKOWNER request. Presumably, it would expect
3987 3987                   * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3988 3988                   * reissuing the request.
3989 3989                   */
3990 3990                  rfs4_dbe_unhide(lo->rl_dbe);
3991 3991                  rfs4_lockowner_rele(lo);
3992 3992                  goto out;
3993 3993          }
3994 3994  
3995 3995          /*
3996 3996           * For the corresponding client we need to check each open
3997 3997           * owner for any opens that have lockowner state associated
3998 3998           * with this lockowner.
3999 3999           */
4000 4000  
4001 4001          rfs4_dbe_lock(lo->rl_client->rc_dbe);
4002 4002          for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4003 4003              oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4004 4004  
4005 4005                  rfs4_dbe_lock(oo->ro_dbe);
4006 4006                  for (sp = list_head(&oo->ro_statelist); sp != NULL;
4007 4007                      sp = list_next(&oo->ro_statelist, sp)) {
4008 4008  
4009 4009                          rfs4_dbe_lock(sp->rs_dbe);
4010 4010                          for (lsp = list_head(&sp->rs_lostatelist);
4011 4011                              lsp != NULL;
4012 4012                              lsp = list_next(&sp->rs_lostatelist, lsp)) {
4013 4013                                  if (lsp->rls_locker == lo) {
4014 4014                                          rfs4_dbe_lock(lsp->rls_dbe);
4015 4015                                          rfs4_dbe_invalidate(lsp->rls_dbe);
4016 4016                                          rfs4_dbe_unlock(lsp->rls_dbe);
4017 4017                                  }
4018 4018                          }
4019 4019                          rfs4_dbe_unlock(sp->rs_dbe);
4020 4020                  }
4021 4021                  rfs4_dbe_unlock(oo->ro_dbe);
4022 4022          }
4023 4023          rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4024 4024  
4025 4025          rfs4_lockowner_rele(lo);
4026 4026  
4027 4027          *cs->statusp = resp->status = NFS4_OK;
4028 4028  
4029 4029  out:
4030 4030          DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4031 4031              cs, RELEASE_LOCKOWNER4res *, resp);
4032 4032  }
4033 4033  
4034 4034  /*
4035 4035   * short utility function to lookup a file and recall the delegation
4036 4036   */
4037 4037  static rfs4_file_t *
4038 4038  rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4039 4039      int *lkup_error, cred_t *cr)
4040 4040  {
4041 4041          vnode_t *vp;
4042 4042          rfs4_file_t *fp = NULL;
4043 4043          bool_t fcreate = FALSE;
4044 4044          int error;
4045 4045  
4046 4046          if (vpp)
4047 4047                  *vpp = NULL;
4048 4048  
4049 4049          if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4050 4050              NULL)) == 0) {
4051 4051                  if (vp->v_type == VREG)
4052 4052                          fp = rfs4_findfile(vp, NULL, &fcreate);
4053 4053                  if (vpp)
4054 4054                          *vpp = vp;
4055 4055                  else
4056 4056                          VN_RELE(vp);
4057 4057          }
4058 4058  
4059 4059          if (lkup_error)
4060 4060                  *lkup_error = error;
4061 4061  
4062 4062          return (fp);
4063 4063  }
4064 4064  
4065 4065  /*
4066 4066   * remove: args: CURRENT_FH: directory; name.
4067 4067   *      res: status. If success - CURRENT_FH unchanged, return change_info
4068 4068   *              for directory.
4069 4069   */
4070 4070  /* ARGSUSED */
4071 4071  static void
4072 4072  rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4073 4073      struct compound_state *cs)
4074 4074  {
4075 4075          REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4076 4076          REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4077 4077          int error;
4078 4078          vnode_t *dvp, *vp;
4079 4079          struct vattr bdva, idva, adva;
4080 4080          char *nm;
4081 4081          uint_t len;
4082 4082          rfs4_file_t *fp;
4083 4083          int in_crit = 0;
4084 4084          bslabel_t *clabel;
4085 4085          struct sockaddr *ca;
4086 4086          char *name = NULL;
4087 4087          nfsstat4 status;
4088 4088  
4089 4089          DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4090 4090              REMOVE4args *, args);
4091 4091  
4092 4092          /* CURRENT_FH: directory */
4093 4093          dvp = cs->vp;
4094 4094          if (dvp == NULL) {
4095 4095                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4096 4096                  goto out;
4097 4097          }
4098 4098  
4099 4099          if (cs->access == CS_ACCESS_DENIED) {
4100 4100                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4101 4101                  goto out;
4102 4102          }
4103 4103  
4104 4104          /*
4105 4105           * If there is an unshared filesystem mounted on this vnode,
4106 4106           * Do not allow to remove anything in this directory.
4107 4107           */
4108 4108          if (vn_ismntpt(dvp)) {
4109 4109                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4110 4110                  goto out;
4111 4111          }
4112 4112  
4113 4113          if (dvp->v_type != VDIR) {
4114 4114                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4115 4115                  goto out;
4116 4116          }
4117 4117  
4118 4118          status = utf8_dir_verify(&args->target);
4119 4119          if (status != NFS4_OK) {
4120 4120                  *cs->statusp = resp->status = status;
4121 4121                  goto out;
4122 4122          }
4123 4123  
4124 4124          /*
4125 4125           * Lookup the file so that we can check if it's a directory
4126 4126           */
4127 4127          nm = utf8_to_fn(&args->target, &len, NULL);
4128 4128          if (nm == NULL) {
4129 4129                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4130 4130                  goto out;
4131 4131          }
4132 4132  
4133 4133          if (len > MAXNAMELEN) {
4134 4134                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4135 4135                  kmem_free(nm, len);
4136 4136                  goto out;
4137 4137          }
4138 4138  
4139 4139          if (rdonly4(req, cs)) {
4140 4140                  *cs->statusp = resp->status = NFS4ERR_ROFS;
4141 4141                  kmem_free(nm, len);
4142 4142                  goto out;
4143 4143          }
4144 4144  
4145 4145          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4146 4146          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4147 4147              MAXPATHLEN  + 1);
4148 4148  
4149 4149          if (name == NULL) {
4150 4150                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4151 4151                  kmem_free(nm, len);
4152 4152                  goto out;
4153 4153          }
4154 4154  
4155 4155          /*
4156 4156           * Lookup the file to determine type and while we are see if
4157 4157           * there is a file struct around and check for delegation.
4158 4158           * We don't need to acquire va_seq before this lookup, if
4159 4159           * it causes an update, cinfo.before will not match, which will
4160 4160           * trigger a cache flush even if atomic is TRUE.
4161 4161           */
4162 4162          if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4163 4163                  if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4164 4164                      NULL)) {
4165 4165                          VN_RELE(vp);
4166 4166                          rfs4_file_rele(fp);
4167 4167                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4168 4168                          if (nm != name)
4169 4169                                  kmem_free(name, MAXPATHLEN + 1);
4170 4170                          kmem_free(nm, len);
4171 4171                          goto out;
4172 4172                  }
4173 4173          }
4174 4174  
4175 4175          /* Didn't find anything to remove */
4176 4176          if (vp == NULL) {
4177 4177                  *cs->statusp = resp->status = error;
4178 4178                  if (nm != name)
4179 4179                          kmem_free(name, MAXPATHLEN + 1);
4180 4180                  kmem_free(nm, len);
4181 4181                  goto out;
4182 4182          }
4183 4183  
4184 4184          if (nbl_need_check(vp)) {
4185 4185                  nbl_start_crit(vp, RW_READER);
4186 4186                  in_crit = 1;
4187 4187                  if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4188 4188                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4189 4189                          if (nm != name)
4190 4190                                  kmem_free(name, MAXPATHLEN + 1);
4191 4191                          kmem_free(nm, len);
4192 4192                          nbl_end_crit(vp);
4193 4193                          VN_RELE(vp);
4194 4194                          if (fp) {
4195 4195                                  rfs4_clear_dont_grant(fp);
4196 4196                                  rfs4_file_rele(fp);
4197 4197                          }
4198 4198                          goto out;
4199 4199                  }
4200 4200          }
4201 4201  
4202 4202          /* check label before allowing removal */
4203 4203          if (is_system_labeled()) {
4204 4204                  ASSERT(req->rq_label != NULL);
4205 4205                  clabel = req->rq_label;
4206 4206                  DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4207 4207                      "got client label from request(1)",
4208 4208                      struct svc_req *, req);
4209 4209                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
4210 4210                          if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4211 4211                              cs->exi)) {
4212 4212                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4213 4213                                  if (name != nm)
4214 4214                                          kmem_free(name, MAXPATHLEN + 1);
4215 4215                                  kmem_free(nm, len);
4216 4216                                  if (in_crit)
4217 4217                                          nbl_end_crit(vp);
4218 4218                                  VN_RELE(vp);
4219 4219                                  if (fp) {
4220 4220                                          rfs4_clear_dont_grant(fp);
4221 4221                                          rfs4_file_rele(fp);
4222 4222                                  }
4223 4223                                  goto out;
4224 4224                          }
4225 4225                  }
4226 4226          }
4227 4227  
4228 4228          /* Get dir "before" change value */
4229 4229          bdva.va_mask = AT_CTIME|AT_SEQ;
4230 4230          error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4231 4231          if (error) {
4232 4232                  *cs->statusp = resp->status = puterrno4(error);
4233 4233                  if (nm != name)
4234 4234                          kmem_free(name, MAXPATHLEN + 1);
4235 4235                  kmem_free(nm, len);
4236 4236                  if (in_crit)
4237 4237                          nbl_end_crit(vp);
4238 4238                  VN_RELE(vp);
4239 4239                  if (fp) {
4240 4240                          rfs4_clear_dont_grant(fp);
4241 4241                          rfs4_file_rele(fp);
4242 4242                  }
4243 4243                  goto out;
4244 4244          }
4245 4245          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4246 4246  
4247 4247          /* Actually do the REMOVE operation */
4248 4248          if (vp->v_type == VDIR) {
4249 4249                  /*
4250 4250                   * Can't remove a directory that has a mounted-on filesystem.
4251 4251                   */
4252 4252                  if (vn_ismntpt(vp)) {
4253 4253                          error = EACCES;
4254 4254                  } else {
4255 4255                          /*
4256 4256                           * System V defines rmdir to return EEXIST,
4257 4257                           * not ENOTEMPTY, if the directory is not
4258 4258                           * empty.  A System V NFS server needs to map
4259 4259                           * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4260 4260                           * transmit over the wire.
4261 4261                           */
4262 4262                          if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4263 4263                              NULL, 0)) == EEXIST)
4264 4264                                  error = ENOTEMPTY;
4265 4265                  }
4266 4266          } else {
4267 4267                  if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4268 4268                      fp != NULL) {
4269 4269                          struct vattr va;
4270 4270                          vnode_t *tvp;
4271 4271  
4272 4272                          rfs4_dbe_lock(fp->rf_dbe);
4273 4273                          tvp = fp->rf_vp;
4274 4274                          if (tvp)
4275 4275                                  VN_HOLD(tvp);
4276 4276                          rfs4_dbe_unlock(fp->rf_dbe);
4277 4277  
4278 4278                          if (tvp) {
4279 4279                                  /*
4280 4280                                   * This is va_seq safe because we are not
4281 4281                                   * manipulating dvp.
4282 4282                                   */
4283 4283                                  va.va_mask = AT_NLINK;
4284 4284                                  if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4285 4285                                      va.va_nlink == 0) {
4286 4286                                          /* Remove state on file remove */
4287 4287                                          if (in_crit) {
4288 4288                                                  nbl_end_crit(vp);
4289 4289                                                  in_crit = 0;
4290 4290                                          }
4291 4291                                          rfs4_close_all_state(fp);
4292 4292                                  }
4293 4293                                  VN_RELE(tvp);
4294 4294                          }
4295 4295                  }
4296 4296          }
4297 4297  
4298 4298          if (in_crit)
4299 4299                  nbl_end_crit(vp);
4300 4300          VN_RELE(vp);
4301 4301  
4302 4302          if (fp) {
4303 4303                  rfs4_clear_dont_grant(fp);
4304 4304                  rfs4_file_rele(fp);
4305 4305          }
4306 4306          if (nm != name)
4307 4307                  kmem_free(name, MAXPATHLEN + 1);
4308 4308          kmem_free(nm, len);
4309 4309  
4310 4310          if (error) {
4311 4311                  *cs->statusp = resp->status = puterrno4(error);
4312 4312                  goto out;
4313 4313          }
4314 4314  
4315 4315          /*
4316 4316           * Get the initial "after" sequence number, if it fails, set to zero
4317 4317           */
4318 4318          idva.va_mask = AT_SEQ;
4319 4319          if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4320 4320                  idva.va_seq = 0;
4321 4321  
4322 4322          /*
4323 4323           * Force modified data and metadata out to stable storage.
4324 4324           */
4325 4325          (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4326 4326  
4327 4327          /*
4328 4328           * Get "after" change value, if it fails, simply return the
4329 4329           * before value.
4330 4330           */
4331 4331          adva.va_mask = AT_CTIME|AT_SEQ;
4332 4332          if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4333 4333                  adva.va_ctime = bdva.va_ctime;
4334 4334                  adva.va_seq = 0;
4335 4335          }
4336 4336  
4337 4337          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4338 4338  
4339 4339          /*
4340 4340           * The cinfo.atomic = TRUE only if we have
4341 4341           * non-zero va_seq's, and it has incremented by exactly one
4342 4342           * during the VOP_REMOVE/RMDIR and it didn't change during
4343 4343           * the VOP_FSYNC.
4344 4344           */
4345 4345          if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4346 4346              idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4347 4347                  resp->cinfo.atomic = TRUE;
4348 4348          else
4349 4349                  resp->cinfo.atomic = FALSE;
4350 4350  
4351 4351          *cs->statusp = resp->status = NFS4_OK;
4352 4352  
4353 4353  out:
4354 4354          DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4355 4355              REMOVE4res *, resp);
4356 4356  }
4357 4357  
4358 4358  /*
4359 4359   * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4360 4360   *              oldname and newname.
4361 4361   *      res: status. If success - CURRENT_FH unchanged, return change_info
4362 4362   *              for both from and target directories.
4363 4363   */
4364 4364  /* ARGSUSED */
4365 4365  static void
4366 4366  rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4367 4367      struct compound_state *cs)
4368 4368  {
4369 4369          RENAME4args *args = &argop->nfs_argop4_u.oprename;
4370 4370          RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4371 4371          int error;
4372 4372          vnode_t *odvp;
4373 4373          vnode_t *ndvp;
4374 4374          vnode_t *srcvp, *targvp, *tvp;
4375 4375          struct vattr obdva, oidva, oadva;
4376 4376          struct vattr nbdva, nidva, nadva;
4377 4377          char *onm, *nnm;
4378 4378          uint_t olen, nlen;
4379 4379          rfs4_file_t *fp, *sfp;
4380 4380          int in_crit_src, in_crit_targ;
4381 4381          int fp_rele_grant_hold, sfp_rele_grant_hold;
4382 4382          int unlinked;
4383 4383          bslabel_t *clabel;
4384 4384          struct sockaddr *ca;
4385 4385          char *converted_onm = NULL;
4386 4386          char *converted_nnm = NULL;
4387 4387          nfsstat4 status;
4388 4388  
4389 4389          DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4390 4390              RENAME4args *, args);
4391 4391  
4392 4392          fp = sfp = NULL;
4393 4393          srcvp = targvp = tvp = NULL;
4394 4394          in_crit_src = in_crit_targ = 0;
4395 4395          fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4396 4396          unlinked = 0;
4397 4397  
4398 4398          /* CURRENT_FH: target directory */
4399 4399          ndvp = cs->vp;
4400 4400          if (ndvp == NULL) {
4401 4401                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4402 4402                  goto out;
4403 4403          }
4404 4404  
4405 4405          /* SAVED_FH: from directory */
4406 4406          odvp = cs->saved_vp;
4407 4407          if (odvp == NULL) {
4408 4408                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4409 4409                  goto out;
4410 4410          }
4411 4411  
4412 4412          if (cs->access == CS_ACCESS_DENIED) {
4413 4413                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4414 4414                  goto out;
4415 4415          }
4416 4416  
4417 4417          /*
4418 4418           * If there is an unshared filesystem mounted on this vnode,
4419 4419           * do not allow to rename objects in this directory.
4420 4420           */
4421 4421          if (vn_ismntpt(odvp)) {
4422 4422                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4423 4423                  goto out;
4424 4424          }
4425 4425  
4426 4426          /*
4427 4427           * If there is an unshared filesystem mounted on this vnode,
4428 4428           * do not allow to rename to this directory.
4429 4429           */
4430 4430          if (vn_ismntpt(ndvp)) {
4431 4431                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4432 4432                  goto out;
4433 4433          }
4434 4434  
4435 4435          if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4436 4436                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4437 4437                  goto out;
4438 4438          }
4439 4439  
4440 4440          if (cs->saved_exi != cs->exi) {
4441 4441                  *cs->statusp = resp->status = NFS4ERR_XDEV;
4442 4442                  goto out;
4443 4443          }
4444 4444  
4445 4445          status = utf8_dir_verify(&args->oldname);
4446 4446          if (status != NFS4_OK) {
4447 4447                  *cs->statusp = resp->status = status;
4448 4448                  goto out;
4449 4449          }
4450 4450  
4451 4451          status = utf8_dir_verify(&args->newname);
4452 4452          if (status != NFS4_OK) {
4453 4453                  *cs->statusp = resp->status = status;
4454 4454                  goto out;
4455 4455          }
4456 4456  
4457 4457          onm = utf8_to_fn(&args->oldname, &olen, NULL);
4458 4458          if (onm == NULL) {
4459 4459                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4460 4460                  goto out;
4461 4461          }
4462 4462          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4463 4463          nlen = MAXPATHLEN + 1;
4464 4464          converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4465 4465              nlen);
4466 4466  
4467 4467          if (converted_onm == NULL) {
4468 4468                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4469 4469                  kmem_free(onm, olen);
4470 4470                  goto out;
4471 4471          }
4472 4472  
4473 4473          nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4474 4474          if (nnm == NULL) {
4475 4475                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4476 4476                  if (onm != converted_onm)
4477 4477                          kmem_free(converted_onm, MAXPATHLEN + 1);
4478 4478                  kmem_free(onm, olen);
4479 4479                  goto out;
4480 4480          }
4481 4481          converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4482 4482              MAXPATHLEN  + 1);
4483 4483  
4484 4484          if (converted_nnm == NULL) {
4485 4485                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4486 4486                  kmem_free(nnm, nlen);
4487 4487                  nnm = NULL;
4488 4488                  if (onm != converted_onm)
4489 4489                          kmem_free(converted_onm, MAXPATHLEN + 1);
4490 4490                  kmem_free(onm, olen);
4491 4491                  goto out;
4492 4492          }
4493 4493  
4494 4494  
4495 4495          if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4496 4496                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4497 4497                  kmem_free(onm, olen);
4498 4498                  kmem_free(nnm, nlen);
4499 4499                  goto out;
4500 4500          }
4501 4501  
4502 4502  
4503 4503          if (rdonly4(req, cs)) {
4504 4504                  *cs->statusp = resp->status = NFS4ERR_ROFS;
4505 4505                  if (onm != converted_onm)
4506 4506                          kmem_free(converted_onm, MAXPATHLEN + 1);
4507 4507                  kmem_free(onm, olen);
4508 4508                  if (nnm != converted_nnm)
4509 4509                          kmem_free(converted_nnm, MAXPATHLEN + 1);
4510 4510                  kmem_free(nnm, nlen);
4511 4511                  goto out;
4512 4512          }
4513 4513  
4514 4514          /* check label of the target dir */
4515 4515          if (is_system_labeled()) {
4516 4516                  ASSERT(req->rq_label != NULL);
4517 4517                  clabel = req->rq_label;
4518 4518                  DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4519 4519                      "got client label from request(1)",
4520 4520                      struct svc_req *, req);
4521 4521                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
4522 4522                          if (!do_rfs_label_check(clabel, ndvp,
4523 4523                              EQUALITY_CHECK, cs->exi)) {
4524 4524                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4525 4525                                  goto err_out;
4526 4526                          }
4527 4527                  }
4528 4528          }
4529 4529  
4530 4530          /*
4531 4531           * Is the source a file and have a delegation?
4532 4532           * We don't need to acquire va_seq before these lookups, if
4533 4533           * it causes an update, cinfo.before will not match, which will
4534 4534           * trigger a cache flush even if atomic is TRUE.
4535 4535           */
4536 4536          if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4537 4537              &error, cs->cr)) {
4538 4538                  if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4539 4539                      NULL)) {
4540 4540                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4541 4541                          goto err_out;
4542 4542                  }
4543 4543          }
4544 4544  
4545 4545          if (srcvp == NULL) {
4546 4546                  *cs->statusp = resp->status = puterrno4(error);
4547 4547                  if (onm != converted_onm)
4548 4548                          kmem_free(converted_onm, MAXPATHLEN + 1);
4549 4549                  kmem_free(onm, olen);
4550 4550                  if (nnm != converted_nnm)
4551 4551                          kmem_free(converted_nnm, MAXPATHLEN + 1);
4552 4552                  kmem_free(nnm, nlen);
4553 4553                  goto out;
4554 4554          }
4555 4555  
4556 4556          sfp_rele_grant_hold = 1;
4557 4557  
4558 4558          /* Does the destination exist and a file and have a delegation? */
4559 4559          if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4560 4560              NULL, cs->cr)) {
4561 4561                  if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4562 4562                      NULL)) {
4563 4563                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4564 4564                          goto err_out;
4565 4565                  }
4566 4566          }
4567 4567          fp_rele_grant_hold = 1;
4568 4568  
4569 4569          /* Check for NBMAND lock on both source and target */
4570 4570          if (nbl_need_check(srcvp)) {
4571 4571                  nbl_start_crit(srcvp, RW_READER);
4572 4572                  in_crit_src = 1;
4573 4573                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4574 4574                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4575 4575                          goto err_out;
4576 4576                  }
4577 4577          }
4578 4578  
4579 4579          if (targvp && nbl_need_check(targvp)) {
4580 4580                  nbl_start_crit(targvp, RW_READER);
4581 4581                  in_crit_targ = 1;
4582 4582                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4583 4583                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4584 4584                          goto err_out;
4585 4585                  }
4586 4586          }
4587 4587  
4588 4588          /* Get source "before" change value */
4589 4589          obdva.va_mask = AT_CTIME|AT_SEQ;
4590 4590          error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4591 4591          if (!error) {
4592 4592                  nbdva.va_mask = AT_CTIME|AT_SEQ;
4593 4593                  error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4594 4594          }
4595 4595          if (error) {
4596 4596                  *cs->statusp = resp->status = puterrno4(error);
4597 4597                  goto err_out;
4598 4598          }
4599 4599  
4600 4600          NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4601 4601          NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4602 4602  
4603 4603          error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4604 4604              NULL, 0);
4605 4605  
4606 4606          /*
4607 4607           * If target existed and was unlinked by VOP_RENAME, state will need
4608 4608           * closed. To avoid deadlock, rfs4_close_all_state will be done after
4609 4609           * any necessary nbl_end_crit on srcvp and tgtvp.
4610 4610           */
4611 4611          if (error == 0 && fp != NULL) {
4612 4612                  rfs4_dbe_lock(fp->rf_dbe);
4613 4613                  tvp = fp->rf_vp;
4614 4614                  if (tvp)
4615 4615                          VN_HOLD(tvp);
4616 4616                  rfs4_dbe_unlock(fp->rf_dbe);
4617 4617  
4618 4618                  if (tvp) {
4619 4619                          struct vattr va;
4620 4620                          va.va_mask = AT_NLINK;
4621 4621  
4622 4622                          if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4623 4623                              va.va_nlink == 0) {
4624 4624                                  unlinked = 1;
4625 4625  
4626 4626                                  /* DEBUG data */
4627 4627                                  if ((srcvp == targvp) || (tvp != targvp)) {
4628 4628                                          cmn_err(CE_WARN, "rfs4_op_rename: "
4629 4629                                              "srcvp %p, targvp: %p, tvp: %p",
4630 4630                                              (void *)srcvp, (void *)targvp,
4631 4631                                              (void *)tvp);
4632 4632                                  }
4633 4633                          } else {
4634 4634                                  VN_RELE(tvp);
4635 4635                          }
4636 4636                  }
4637 4637          }
4638 4638          if (error == 0)
4639 4639                  vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4640 4640  
4641 4641          if (in_crit_src)
4642 4642                  nbl_end_crit(srcvp);
4643 4643          if (srcvp)
4644 4644                  VN_RELE(srcvp);
4645 4645          if (in_crit_targ)
4646 4646                  nbl_end_crit(targvp);
4647 4647          if (targvp)
4648 4648                  VN_RELE(targvp);
4649 4649  
4650 4650          if (unlinked) {
4651 4651                  ASSERT(fp != NULL);
4652 4652                  ASSERT(tvp != NULL);
4653 4653  
4654 4654                  /* DEBUG data */
4655 4655                  if (RW_READ_HELD(&tvp->v_nbllock)) {
4656 4656                          cmn_err(CE_WARN, "rfs4_op_rename: "
4657 4657                              "RW_READ_HELD(%p)", (void *)tvp);
4658 4658                  }
4659 4659  
4660 4660                  /* The file is gone and so should the state */
4661 4661                  rfs4_close_all_state(fp);
4662 4662                  VN_RELE(tvp);
4663 4663          }
4664 4664  
4665 4665          if (sfp) {
4666 4666                  rfs4_clear_dont_grant(sfp);
4667 4667                  rfs4_file_rele(sfp);
4668 4668          }
4669 4669          if (fp) {
4670 4670                  rfs4_clear_dont_grant(fp);
4671 4671                  rfs4_file_rele(fp);
4672 4672          }
4673 4673  
4674 4674          if (converted_onm != onm)
4675 4675                  kmem_free(converted_onm, MAXPATHLEN + 1);
4676 4676          kmem_free(onm, olen);
4677 4677          if (converted_nnm != nnm)
4678 4678                  kmem_free(converted_nnm, MAXPATHLEN + 1);
4679 4679          kmem_free(nnm, nlen);
4680 4680  
4681 4681          /*
4682 4682           * Get the initial "after" sequence number, if it fails, set to zero
4683 4683           */
4684 4684          oidva.va_mask = AT_SEQ;
4685 4685          if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4686 4686                  oidva.va_seq = 0;
4687 4687  
4688 4688          nidva.va_mask = AT_SEQ;
4689 4689          if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4690 4690                  nidva.va_seq = 0;
4691 4691  
4692 4692          /*
4693 4693           * Force modified data and metadata out to stable storage.
4694 4694           */
4695 4695          (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4696 4696          (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4697 4697  
4698 4698          if (error) {
4699 4699                  *cs->statusp = resp->status = puterrno4(error);
4700 4700                  goto out;
4701 4701          }
4702 4702  
4703 4703          /*
4704 4704           * Get "after" change values, if it fails, simply return the
4705 4705           * before value.
4706 4706           */
4707 4707          oadva.va_mask = AT_CTIME|AT_SEQ;
4708 4708          if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4709 4709                  oadva.va_ctime = obdva.va_ctime;
4710 4710                  oadva.va_seq = 0;
4711 4711          }
4712 4712  
4713 4713          nadva.va_mask = AT_CTIME|AT_SEQ;
4714 4714          if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4715 4715                  nadva.va_ctime = nbdva.va_ctime;
4716 4716                  nadva.va_seq = 0;
4717 4717          }
4718 4718  
4719 4719          NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4720 4720          NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4721 4721  
4722 4722          /*
4723 4723           * The cinfo.atomic = TRUE only if we have
4724 4724           * non-zero va_seq's, and it has incremented by exactly one
4725 4725           * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4726 4726           */
4727 4727          if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4728 4728              oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4729 4729                  resp->source_cinfo.atomic = TRUE;
4730 4730          else
4731 4731                  resp->source_cinfo.atomic = FALSE;
4732 4732  
4733 4733          if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4734 4734              nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4735 4735                  resp->target_cinfo.atomic = TRUE;
4736 4736          else
4737 4737                  resp->target_cinfo.atomic = FALSE;
4738 4738  
4739 4739  #ifdef  VOLATILE_FH_TEST
4740 4740          {
4741 4741          extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4742 4742  
4743 4743          /*
4744 4744           * Add the renamed file handle to the volatile rename list
4745 4745           */
4746 4746          if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4747 4747                  /* file handles may expire on rename */
4748 4748                  vnode_t *vp;
4749 4749  
4750 4750                  nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4751 4751                  /*
4752 4752                   * Already know that nnm will be a valid string
4753 4753                   */
4754 4754                  error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4755 4755                      NULL, NULL, NULL);
4756 4756                  kmem_free(nnm, nlen);
4757 4757                  if (!error) {
4758 4758                          add_volrnm_fh(cs->exi, vp);
4759 4759                          VN_RELE(vp);
4760 4760                  }
4761 4761          }
4762 4762          }
4763 4763  #endif  /* VOLATILE_FH_TEST */
4764 4764  
4765 4765          *cs->statusp = resp->status = NFS4_OK;
4766 4766  out:
4767 4767          DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4768 4768              RENAME4res *, resp);
4769 4769          return;
4770 4770  
4771 4771  err_out:
4772 4772          if (onm != converted_onm)
4773 4773                  kmem_free(converted_onm, MAXPATHLEN + 1);
4774 4774          if (onm != NULL)
4775 4775                  kmem_free(onm, olen);
4776 4776          if (nnm != converted_nnm)
4777 4777                  kmem_free(converted_nnm, MAXPATHLEN + 1);
4778 4778          if (nnm != NULL)
4779 4779                  kmem_free(nnm, nlen);
4780 4780  
4781 4781          if (in_crit_src) nbl_end_crit(srcvp);
4782 4782          if (in_crit_targ) nbl_end_crit(targvp);
4783 4783          if (targvp) VN_RELE(targvp);
4784 4784          if (srcvp) VN_RELE(srcvp);
4785 4785          if (sfp) {
4786 4786                  if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4787 4787                  rfs4_file_rele(sfp);
4788 4788          }
4789 4789          if (fp) {
4790 4790                  if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4791 4791                  rfs4_file_rele(fp);
4792 4792          }
4793 4793  
4794 4794          DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4795 4795              RENAME4res *, resp);
4796 4796  }
4797 4797  
4798 4798  /* ARGSUSED */
4799 4799  static void
4800 4800  rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4801 4801      struct compound_state *cs)
4802 4802  {
4803 4803          RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4804 4804          RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4805 4805          rfs4_client_t *cp;
4806 4806  
4807 4807          DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4808 4808              RENEW4args *, args);
4809 4809  
4810 4810          if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4811 4811                  *cs->statusp = resp->status =
4812 4812                      rfs4_check_clientid(&args->clientid, 0);
4813 4813                  goto out;
4814 4814          }
4815 4815  
4816 4816          if (rfs4_lease_expired(cp)) {
4817 4817                  rfs4_client_rele(cp);
4818 4818                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4819 4819                  goto out;
4820 4820          }
4821 4821  
4822 4822          rfs4_update_lease(cp);
4823 4823  
4824 4824          mutex_enter(cp->rc_cbinfo.cb_lock);
4825 4825          if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4826 4826                  cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4827 4827                  *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4828 4828          } else {
4829 4829                  *cs->statusp = resp->status = NFS4_OK;
4830 4830          }
4831 4831          mutex_exit(cp->rc_cbinfo.cb_lock);
4832 4832  
4833 4833          rfs4_client_rele(cp);
4834 4834  
4835 4835  out:
4836 4836          DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4837 4837              RENEW4res *, resp);
4838 4838  }
4839 4839  
4840 4840  /* ARGSUSED */
4841 4841  static void
4842 4842  rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4843 4843      struct compound_state *cs)
4844 4844  {
4845 4845          RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4846 4846  
4847 4847          DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4848 4848  
4849 4849          /* No need to check cs->access - we are not accessing any object */
4850 4850          if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4851 4851                  *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4852 4852                  goto out;
4853 4853          }
4854 4854          if (cs->vp != NULL) {
4855 4855                  VN_RELE(cs->vp);
4856 4856          }
4857 4857          cs->vp = cs->saved_vp;
4858 4858          cs->saved_vp = NULL;
4859 4859          cs->exi = cs->saved_exi;
4860 4860          nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4861 4861          *cs->statusp = resp->status = NFS4_OK;
4862 4862          cs->deleg = FALSE;
4863 4863  
4864 4864  out:
4865 4865          DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4866 4866              RESTOREFH4res *, resp);
4867 4867  }
4868 4868  
4869 4869  /* ARGSUSED */
4870 4870  static void
4871 4871  rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4872 4872      struct compound_state *cs)
4873 4873  {
4874 4874          SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4875 4875  
4876 4876          DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4877 4877  
4878 4878          /* No need to check cs->access - we are not accessing any object */
4879 4879          if (cs->vp == NULL) {
4880 4880                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4881 4881                  goto out;
4882 4882          }
4883 4883          if (cs->saved_vp != NULL) {
4884 4884                  VN_RELE(cs->saved_vp);
4885 4885          }
4886 4886          cs->saved_vp = cs->vp;
4887 4887          VN_HOLD(cs->saved_vp);
4888 4888          cs->saved_exi = cs->exi;
4889 4889          /*
4890 4890           * since SAVEFH is fairly rare, don't alloc space for its fh
4891 4891           * unless necessary.
4892 4892           */
4893 4893          if (cs->saved_fh.nfs_fh4_val == NULL) {
4894 4894                  cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4895 4895          }
4896 4896          nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4897 4897          *cs->statusp = resp->status = NFS4_OK;
4898 4898  
4899 4899  out:
4900 4900          DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4901 4901              SAVEFH4res *, resp);
4902 4902  }
4903 4903  
4904 4904  /*
4905 4905   * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4906 4906   * return the bitmap of attrs that were set successfully. It is also
4907 4907   * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4908 4908   * always be called only after rfs4_do_set_attrs().
4909 4909   *
4910 4910   * Verify that the attributes are same as the expected ones. sargp->vap
4911 4911   * and sargp->sbp contain the input attributes as translated from fattr4.
4912 4912   *
4913 4913   * This function verifies only the attrs that correspond to a vattr or
4914 4914   * vfsstat struct. That is because of the extra step needed to get the
4915 4915   * corresponding system structs. Other attributes have already been set or
4916 4916   * verified by do_rfs4_set_attrs.
4917 4917   *
4918 4918   * Return 0 if all attrs match, -1 if some don't, error if error processing.
4919 4919   */
4920 4920  static int
4921 4921  rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4922 4922      bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4923 4923  {
4924 4924          int error, ret_error = 0;
4925 4925          int i, k;
4926 4926          uint_t sva_mask = sargp->vap->va_mask;
4927 4927          uint_t vbit;
4928 4928          union nfs4_attr_u *na;
4929 4929          uint8_t *amap;
4930 4930          bool_t getsb = ntovp->vfsstat;
4931 4931  
4932 4932          if (sva_mask != 0) {
4933 4933                  /*
4934 4934                   * Okay to overwrite sargp->vap because we verify based
4935 4935                   * on the incoming values.
4936 4936                   */
4937 4937                  ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4938 4938                      sargp->cs->cr, NULL);
4939 4939                  if (ret_error) {
4940 4940                          if (resp == NULL)
4941 4941                                  return (ret_error);
4942 4942                          /*
4943 4943                           * Must return bitmap of successful attrs
4944 4944                           */
4945 4945                          sva_mask = 0;   /* to prevent checking vap later */
4946 4946                  } else {
4947 4947                          /*
4948 4948                           * Some file systems clobber va_mask. it is probably
4949 4949                           * wrong of them to do so, nonethless we practice
4950 4950                           * defensive coding.
4951 4951                           * See bug id 4276830.
4952 4952                           */
4953 4953                          sargp->vap->va_mask = sva_mask;
4954 4954                  }
4955 4955          }
4956 4956  
4957 4957          if (getsb) {
4958 4958                  /*
4959 4959                   * Now get the superblock and loop on the bitmap, as there is
4960 4960                   * no simple way of translating from superblock to bitmap4.
4961 4961                   */
4962 4962                  ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4963 4963                  if (ret_error) {
4964 4964                          if (resp == NULL)
4965 4965                                  goto errout;
4966 4966                          getsb = FALSE;
4967 4967                  }
4968 4968          }
4969 4969  
4970 4970          /*
4971 4971           * Now loop and verify each attribute which getattr returned
4972 4972           * whether it's the same as the input.
4973 4973           */
4974 4974          if (resp == NULL && !getsb && (sva_mask == 0))
4975 4975                  goto errout;
4976 4976  
4977 4977          na = ntovp->na;
4978 4978          amap = ntovp->amap;
4979 4979          k = 0;
4980 4980          for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4981 4981                  k = *amap;
4982 4982                  ASSERT(nfs4_ntov_map[k].nval == k);
4983 4983                  vbit = nfs4_ntov_map[k].vbit;
4984 4984  
4985 4985                  /*
4986 4986                   * If vattr attribute but VOP_GETATTR failed, or it's
4987 4987                   * superblock attribute but VFS_STATVFS failed, skip
4988 4988                   */
4989 4989                  if (vbit) {
4990 4990                          if ((vbit & sva_mask) == 0)
4991 4991                                  continue;
4992 4992                  } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4993 4993                          continue;
4994 4994                  }
4995 4995                  error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4996 4996                  if (resp != NULL) {
4997 4997                          if (error)
4998 4998                                  ret_error = -1; /* not all match */
4999 4999                          else    /* update response bitmap */
5000 5000                                  *resp |= nfs4_ntov_map[k].fbit;
5001 5001                          continue;
5002 5002                  }
5003 5003                  if (error) {
5004 5004                          ret_error = -1; /* not all match */
5005 5005                          break;
5006 5006                  }
5007 5007          }
5008 5008  errout:
5009 5009          return (ret_error);
5010 5010  }
5011 5011  
5012 5012  /*
5013 5013   * Decode the attribute to be set/verified. If the attr requires a sys op
5014 5014   * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5015 5015   * call the sv_getit function for it, because the sys op hasn't yet been done.
5016 5016   * Return 0 for success, error code if failed.
5017 5017   *
5018 5018   * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5019 5019   */
5020 5020  static int
5021 5021  decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5022 5022      int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5023 5023  {
5024 5024          int error = 0;
5025 5025          bool_t set_later;
5026 5026  
5027 5027          sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5028 5028  
5029 5029          if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5030 5030                  set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5031 5031                  /*
5032 5032                   * don't verify yet if a vattr or sb dependent attr,
5033 5033                   * because we don't have their sys values yet.
5034 5034                   * Will be done later.
5035 5035                   */
5036 5036                  if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5037 5037                          /*
5038 5038                           * ACLs are a special case, since setting the MODE
5039 5039                           * conflicts with setting the ACL.  We delay setting
5040 5040                           * the ACL until all other attributes have been set.
5041 5041                           * The ACL gets set in do_rfs4_op_setattr().
5042 5042                           */
5043 5043                          if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5044 5044                                  error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5045 5045                                      sargp, nap);
5046 5046                                  if (error) {
5047 5047                                          xdr_free(nfs4_ntov_map[k].xfunc,
5048 5048                                              (caddr_t)nap);
5049 5049                                  }
5050 5050                          }
5051 5051                  }
5052 5052          } else {
5053 5053  #ifdef  DEBUG
5054 5054                  cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5055 5055                      "decoding attribute %d\n", k);
5056 5056  #endif
5057 5057                  error = EINVAL;
5058 5058          }
5059 5059          if (!error && resp_bval && !set_later) {
5060 5060                  *resp_bval |= nfs4_ntov_map[k].fbit;
5061 5061          }
5062 5062  
5063 5063          return (error);
5064 5064  }
5065 5065  
5066 5066  /*
5067 5067   * Set vattr based on incoming fattr4 attrs - used by setattr.
5068 5068   * Set response mask. Ignore any values that are not writable vattr attrs.
5069 5069   */
5070 5070  static nfsstat4
5071 5071  do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5072 5072      struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5073 5073      nfs4_attr_cmd_t cmd)
5074 5074  {
5075 5075          int error = 0;
5076 5076          int i;
5077 5077          char *attrs = fattrp->attrlist4;
5078 5078          uint32_t attrslen = fattrp->attrlist4_len;
5079 5079          XDR xdr;
5080 5080          nfsstat4 status = NFS4_OK;
5081 5081          vnode_t *vp = cs->vp;
5082 5082          union nfs4_attr_u *na;
5083 5083          uint8_t *amap;
5084 5084  
5085 5085  #ifndef lint
5086 5086          /*
5087 5087           * Make sure that maximum attribute number can be expressed as an
5088 5088           * 8 bit quantity.
5089 5089           */
5090 5090          ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5091 5091  #endif
5092 5092  
5093 5093          if (vp == NULL) {
5094 5094                  if (resp)
5095 5095                          *resp = 0;
5096 5096                  return (NFS4ERR_NOFILEHANDLE);
5097 5097          }
5098 5098          if (cs->access == CS_ACCESS_DENIED) {
5099 5099                  if (resp)
5100 5100                          *resp = 0;
5101 5101                  return (NFS4ERR_ACCESS);
5102 5102          }
5103 5103  
5104 5104          sargp->op = cmd;
5105 5105          sargp->cs = cs;
5106 5106          sargp->flag = 0;        /* may be set later */
5107 5107          sargp->vap->va_mask = 0;
5108 5108          sargp->rdattr_error = NFS4_OK;
5109 5109          sargp->rdattr_error_req = FALSE;
5110 5110          /* sargp->sbp is set by the caller */
5111 5111  
5112 5112          xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5113 5113  
5114 5114          na = ntovp->na;
5115 5115          amap = ntovp->amap;
5116 5116  
5117 5117          /*
5118 5118           * The following loop iterates on the nfs4_ntov_map checking
5119 5119           * if the fbit is set in the requested bitmap.
5120 5120           * If set then we process the arguments using the
5121 5121           * rfs4_fattr4 conversion functions to populate the setattr
5122 5122           * vattr and va_mask. Any settable attrs that are not using vattr
5123 5123           * will be set in this loop.
5124 5124           */
5125 5125          for (i = 0; i < nfs4_ntov_map_size; i++) {
5126 5126                  if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5127 5127                          continue;
5128 5128                  }
5129 5129                  /*
5130 5130                   * If setattr, must be a writable attr.
5131 5131                   * If verify/nverify, must be a readable attr.
5132 5132                   */
5133 5133                  if ((error = (*nfs4_ntov_map[i].sv_getit)(
5134 5134                      NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5135 5135                          /*
5136 5136                           * Client tries to set/verify an
5137 5137                           * unsupported attribute, tries to set
5138 5138                           * a read only attr or verify a write
5139 5139                           * only one - error!
5140 5140                           */
5141 5141                          break;
5142 5142                  }
5143 5143                  /*
5144 5144                   * Decode the attribute to set/verify
5145 5145                   */
5146 5146                  error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5147 5147                      &xdr, resp ? resp : NULL, na);
5148 5148                  if (error)
5149 5149                          break;
5150 5150                  *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5151 5151                  na++;
5152 5152                  (ntovp->attrcnt)++;
5153 5153                  if (nfs4_ntov_map[i].vfsstat)
5154 5154                          ntovp->vfsstat = TRUE;
5155 5155          }
5156 5156  
5157 5157          if (error != 0)
5158 5158                  status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5159 5159                      puterrno4(error));
5160 5160          /* xdrmem_destroy(&xdrs); */    /* NO-OP */
5161 5161          return (status);
5162 5162  }
5163 5163  
5164 5164  static nfsstat4
5165 5165  do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5166 5166      stateid4 *stateid)
5167 5167  {
5168 5168          int error = 0;
5169 5169          struct nfs4_svgetit_arg sarg;
5170 5170          bool_t trunc;
5171 5171  
5172 5172          nfsstat4 status = NFS4_OK;
5173 5173          cred_t *cr = cs->cr;
5174 5174          vnode_t *vp = cs->vp;
5175 5175          struct nfs4_ntov_table ntov;
5176 5176          struct statvfs64 sb;
5177 5177          struct vattr bva;
5178 5178          struct flock64 bf;
5179 5179          int in_crit = 0;
5180 5180          uint_t saved_mask = 0;
5181 5181          caller_context_t ct;
5182 5182  
5183 5183          *resp = 0;
5184 5184          sarg.sbp = &sb;
5185 5185          sarg.is_referral = B_FALSE;
5186 5186          nfs4_ntov_table_init(&ntov);
5187 5187          status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5188 5188              NFS4ATTR_SETIT);
5189 5189          if (status != NFS4_OK) {
5190 5190                  /*
5191 5191                   * failed set attrs
5192 5192                   */
5193 5193                  goto done;
5194 5194          }
5195 5195          if ((sarg.vap->va_mask == 0) &&
5196 5196              (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5197 5197                  /*
5198 5198                   * no further work to be done
5199 5199                   */
5200 5200                  goto done;
5201 5201          }
5202 5202  
5203 5203          /*
5204 5204           * If we got a request to set the ACL and the MODE, only
5205 5205           * allow changing VSUID, VSGID, and VSVTX.  Attempting
5206 5206           * to change any other bits, along with setting an ACL,
5207 5207           * gives NFS4ERR_INVAL.
5208 5208           */
5209 5209          if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5210 5210              (fattrp->attrmask & FATTR4_MODE_MASK)) {
5211 5211                  vattr_t va;
5212 5212  
5213 5213                  va.va_mask = AT_MODE;
5214 5214                  error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5215 5215                  if (error) {
5216 5216                          status = puterrno4(error);
5217 5217                          goto done;
5218 5218                  }
5219 5219                  if ((sarg.vap->va_mode ^ va.va_mode) &
5220 5220                      ~(VSUID | VSGID | VSVTX)) {
5221 5221                          status = NFS4ERR_INVAL;
5222 5222                          goto done;
5223 5223                  }
5224 5224          }
5225 5225  
5226 5226          /* Check stateid only if size has been set */
5227 5227          if (sarg.vap->va_mask & AT_SIZE) {
5228 5228                  trunc = (sarg.vap->va_size == 0);
5229 5229                  status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5230 5230                      trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5231 5231                  if (status != NFS4_OK)
5232 5232                          goto done;
5233 5233          } else {
5234 5234                  ct.cc_sysid = 0;
5235 5235                  ct.cc_pid = 0;
5236 5236                  ct.cc_caller_id = nfs4_srv_caller_id;
5237 5237                  ct.cc_flags = CC_DONTBLOCK;
5238 5238          }
5239 5239  
5240 5240          /* XXX start of possible race with delegations */
5241 5241  
5242 5242          /*
5243 5243           * We need to specially handle size changes because it is
5244 5244           * possible for the client to create a file with read-only
5245 5245           * modes, but with the file opened for writing. If the client
5246 5246           * then tries to set the file size, e.g. ftruncate(3C),
5247 5247           * fcntl(F_FREESP), the normal access checking done in
5248 5248           * VOP_SETATTR would prevent the client from doing it even though
5249 5249           * it should be allowed to do so.  To get around this, we do the
5250 5250           * access checking for ourselves and use VOP_SPACE which doesn't
5251 5251           * do the access checking.
5252 5252           * Also the client should not be allowed to change the file
5253 5253           * size if there is a conflicting non-blocking mandatory lock in
5254 5254           * the region of the change.
5255 5255           */
5256 5256          if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5257 5257                  u_offset_t offset;
5258 5258                  ssize_t length;
5259 5259  
5260 5260                  /*
5261 5261                   * ufs_setattr clears AT_SIZE from vap->va_mask, but
5262 5262                   * before returning, sarg.vap->va_mask is used to
5263 5263                   * generate the setattr reply bitmap.  We also clear
5264 5264                   * AT_SIZE below before calling VOP_SPACE.  For both
5265 5265                   * of these cases, the va_mask needs to be saved here
5266 5266                   * and restored after calling VOP_SETATTR.
5267 5267                   */
5268 5268                  saved_mask = sarg.vap->va_mask;
5269 5269  
5270 5270                  /*
5271 5271                   * Check any possible conflict due to NBMAND locks.
5272 5272                   * Get into critical region before VOP_GETATTR, so the
5273 5273                   * size attribute is valid when checking conflicts.
5274 5274                   */
5275 5275                  if (nbl_need_check(vp)) {
5276 5276                          nbl_start_crit(vp, RW_READER);
5277 5277                          in_crit = 1;
5278 5278                  }
5279 5279  
5280 5280                  bva.va_mask = AT_UID|AT_SIZE;
5281 5281                  if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5282 5282                          status = puterrno4(error);
5283 5283                          goto done;
5284 5284                  }
5285 5285  
5286 5286                  if (in_crit) {
5287 5287                          if (sarg.vap->va_size < bva.va_size) {
5288 5288                                  offset = sarg.vap->va_size;
5289 5289                                  length = bva.va_size - sarg.vap->va_size;
5290 5290                          } else {
5291 5291                                  offset = bva.va_size;
5292 5292                                  length = sarg.vap->va_size - bva.va_size;
5293 5293                          }
5294 5294                          if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5295 5295                              &ct)) {
5296 5296                                  status = NFS4ERR_LOCKED;
5297 5297                                  goto done;
5298 5298                          }
5299 5299                  }
5300 5300  
5301 5301                  if (crgetuid(cr) == bva.va_uid) {
5302 5302                          sarg.vap->va_mask &= ~AT_SIZE;
5303 5303                          bf.l_type = F_WRLCK;
5304 5304                          bf.l_whence = 0;
5305 5305                          bf.l_start = (off64_t)sarg.vap->va_size;
5306 5306                          bf.l_len = 0;
5307 5307                          bf.l_sysid = 0;
5308 5308                          bf.l_pid = 0;
5309 5309                          error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5310 5310                              (offset_t)sarg.vap->va_size, cr, &ct);
5311 5311                  }
5312 5312          }
5313 5313  
5314 5314          if (!error && sarg.vap->va_mask != 0)
5315 5315                  error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5316 5316  
5317 5317          /* restore va_mask -- ufs_setattr clears AT_SIZE */
5318 5318          if (saved_mask & AT_SIZE)
5319 5319                  sarg.vap->va_mask |= AT_SIZE;
5320 5320  
5321 5321          /*
5322 5322           * If an ACL was being set, it has been delayed until now,
5323 5323           * in order to set the mode (via the VOP_SETATTR() above) first.
5324 5324           */
5325 5325          if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5326 5326                  int i;
5327 5327  
5328 5328                  for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5329 5329                          if (ntov.amap[i] == FATTR4_ACL)
5330 5330                                  break;
5331 5331                  if (i < NFS4_MAXNUM_ATTRS) {
5332 5332                          error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5333 5333                              NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5334 5334                          if (error == 0) {
5335 5335                                  *resp |= FATTR4_ACL_MASK;
5336 5336                          } else if (error == ENOTSUP) {
5337 5337                                  (void) rfs4_verify_attr(&sarg, resp, &ntov);
5338 5338                                  status = NFS4ERR_ATTRNOTSUPP;
5339 5339                                  goto done;
5340 5340                          }
5341 5341                  } else {
5342 5342                          NFS4_DEBUG(rfs4_debug,
5343 5343                              (CE_NOTE, "do_rfs4_op_setattr: "
5344 5344                              "unable to find ACL in fattr4"));
5345 5345                          error = EINVAL;
5346 5346                  }
5347 5347          }
5348 5348  
5349 5349          if (error) {
5350 5350                  /* check if a monitor detected a delegation conflict */
5351 5351                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5352 5352                          status = NFS4ERR_DELAY;
5353 5353                  else
5354 5354                          status = puterrno4(error);
5355 5355  
5356 5356                  /*
5357 5357                   * Set the response bitmap when setattr failed.
5358 5358                   * If VOP_SETATTR partially succeeded, test by doing a
5359 5359                   * VOP_GETATTR on the object and comparing the data
5360 5360                   * to the setattr arguments.
5361 5361                   */
5362 5362                  (void) rfs4_verify_attr(&sarg, resp, &ntov);
5363 5363          } else {
5364 5364                  /*
5365 5365                   * Force modified metadata out to stable storage.
5366 5366                   */
5367 5367                  (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5368 5368                  /*
5369 5369                   * Set response bitmap
5370 5370                   */
5371 5371                  nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5372 5372          }
5373 5373  
5374 5374  /* Return early and already have a NFSv4 error */
5375 5375  done:
5376 5376          /*
5377 5377           * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5378 5378           * conversion sets both readable and writeable NFS4 attrs
5379 5379           * for AT_MTIME and AT_ATIME.  The line below masks out
5380 5380           * unrequested attrs from the setattr result bitmap.  This
5381 5381           * is placed after the done: label to catch the ATTRNOTSUP
5382 5382           * case.
5383 5383           */
5384 5384          *resp &= fattrp->attrmask;
5385 5385  
5386 5386          if (in_crit)
5387 5387                  nbl_end_crit(vp);
5388 5388  
5389 5389          nfs4_ntov_table_free(&ntov, &sarg);
5390 5390  
5391 5391          return (status);
5392 5392  }
5393 5393  
5394 5394  /* ARGSUSED */
5395 5395  static void
5396 5396  rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5397 5397      struct compound_state *cs)
5398 5398  {
5399 5399          SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5400 5400          SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5401 5401          bslabel_t *clabel;
5402 5402  
5403 5403          DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5404 5404              SETATTR4args *, args);
5405 5405  
5406 5406          if (cs->vp == NULL) {
5407 5407                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5408 5408                  goto out;
5409 5409          }
5410 5410  
5411 5411          /*
5412 5412           * If there is an unshared filesystem mounted on this vnode,
5413 5413           * do not allow to setattr on this vnode.
5414 5414           */
5415 5415          if (vn_ismntpt(cs->vp)) {
5416 5416                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5417 5417                  goto out;
5418 5418          }
5419 5419  
5420 5420          resp->attrsset = 0;
5421 5421  
5422 5422          if (rdonly4(req, cs)) {
5423 5423                  *cs->statusp = resp->status = NFS4ERR_ROFS;
5424 5424                  goto out;
5425 5425          }
5426 5426  
5427 5427          /* check label before setting attributes */
5428 5428          if (is_system_labeled()) {
5429 5429                  ASSERT(req->rq_label != NULL);
5430 5430                  clabel = req->rq_label;
5431 5431                  DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5432 5432                      "got client label from request(1)",
5433 5433                      struct svc_req *, req);
5434 5434                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
5435 5435                          if (!do_rfs_label_check(clabel, cs->vp,
5436 5436                              EQUALITY_CHECK, cs->exi)) {
5437 5437                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5438 5438                                  goto out;
5439 5439                          }
5440 5440                  }
5441 5441          }
5442 5442  
5443 5443          *cs->statusp = resp->status =
5444 5444              do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5445 5445              &args->stateid);
5446 5446  
5447 5447  out:
5448 5448          DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5449 5449              SETATTR4res *, resp);
5450 5450  }
5451 5451  
5452 5452  /* ARGSUSED */
5453 5453  static void
5454 5454  rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5455 5455      struct compound_state *cs)
5456 5456  {
5457 5457          /*
5458 5458           * verify and nverify are exactly the same, except that nverify
5459 5459           * succeeds when some argument changed, and verify succeeds when
5460 5460           * when none changed.
5461 5461           */
5462 5462  
5463 5463          VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5464 5464          VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5465 5465  
5466 5466          int error;
5467 5467          struct nfs4_svgetit_arg sarg;
5468 5468          struct statvfs64 sb;
5469 5469          struct nfs4_ntov_table ntov;
5470 5470  
5471 5471          DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5472 5472              VERIFY4args *, args);
5473 5473  
5474 5474          if (cs->vp == NULL) {
5475 5475                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5476 5476                  goto out;
5477 5477          }
5478 5478  
5479 5479          sarg.sbp = &sb;
5480 5480          sarg.is_referral = B_FALSE;
5481 5481          nfs4_ntov_table_init(&ntov);
5482 5482          resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5483 5483              &sarg, &ntov, NFS4ATTR_VERIT);
5484 5484          if (resp->status != NFS4_OK) {
5485 5485                  /*
5486 5486                   * do_rfs4_set_attrs will try to verify systemwide attrs,
5487 5487                   * so could return -1 for "no match".
5488 5488                   */
5489 5489                  if (resp->status == -1)
5490 5490                          resp->status = NFS4ERR_NOT_SAME;
5491 5491                  goto done;
5492 5492          }
5493 5493          error = rfs4_verify_attr(&sarg, NULL, &ntov);
5494 5494          switch (error) {
5495 5495          case 0:
5496 5496                  resp->status = NFS4_OK;
5497 5497                  break;
5498 5498          case -1:
5499 5499                  resp->status = NFS4ERR_NOT_SAME;
5500 5500                  break;
5501 5501          default:
5502 5502                  resp->status = puterrno4(error);
5503 5503                  break;
5504 5504          }
5505 5505  done:
5506 5506          *cs->statusp = resp->status;
5507 5507          nfs4_ntov_table_free(&ntov, &sarg);
5508 5508  out:
5509 5509          DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5510 5510              VERIFY4res *, resp);
5511 5511  }
5512 5512  
5513 5513  /* ARGSUSED */
5514 5514  static void
5515 5515  rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5516 5516      struct compound_state *cs)
5517 5517  {
5518 5518          /*
5519 5519           * verify and nverify are exactly the same, except that nverify
5520 5520           * succeeds when some argument changed, and verify succeeds when
5521 5521           * when none changed.
5522 5522           */
5523 5523  
5524 5524          NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5525 5525          NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5526 5526  
5527 5527          int error;
5528 5528          struct nfs4_svgetit_arg sarg;
5529 5529          struct statvfs64 sb;
5530 5530          struct nfs4_ntov_table ntov;
5531 5531  
5532 5532          DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5533 5533              NVERIFY4args *, args);
5534 5534  
5535 5535          if (cs->vp == NULL) {
5536 5536                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5537 5537                  DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5538 5538                      NVERIFY4res *, resp);
5539 5539                  return;
5540 5540          }
5541 5541          sarg.sbp = &sb;
5542 5542          sarg.is_referral = B_FALSE;
5543 5543          nfs4_ntov_table_init(&ntov);
5544 5544          resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5545 5545              &sarg, &ntov, NFS4ATTR_VERIT);
5546 5546          if (resp->status != NFS4_OK) {
5547 5547                  /*
5548 5548                   * do_rfs4_set_attrs will try to verify systemwide attrs,
5549 5549                   * so could return -1 for "no match".
5550 5550                   */
5551 5551                  if (resp->status == -1)
5552 5552                          resp->status = NFS4_OK;
5553 5553                  goto done;
5554 5554          }
5555 5555          error = rfs4_verify_attr(&sarg, NULL, &ntov);
5556 5556          switch (error) {
5557 5557          case 0:
5558 5558                  resp->status = NFS4ERR_SAME;
5559 5559                  break;
5560 5560          case -1:
5561 5561                  resp->status = NFS4_OK;
5562 5562                  break;
5563 5563          default:
5564 5564                  resp->status = puterrno4(error);
5565 5565                  break;
5566 5566          }
5567 5567  done:
5568 5568          *cs->statusp = resp->status;
5569 5569          nfs4_ntov_table_free(&ntov, &sarg);
5570 5570  
5571 5571          DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5572 5572              NVERIFY4res *, resp);
5573 5573  }
5574 5574  
5575 5575  /*
5576 5576   * XXX - This should live in an NFS header file.
5577 5577   */
5578 5578  #define MAX_IOVECS      12
5579 5579  
5580 5580  /* ARGSUSED */
5581 5581  static void
5582 5582  rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5583 5583      struct compound_state *cs)
5584 5584  {
5585 5585          WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5586 5586          WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5587 5587          int error;
5588 5588          vnode_t *vp;
5589 5589          struct vattr bva;
5590 5590          u_offset_t rlimit;
5591 5591          struct uio uio;
5592 5592          struct iovec iov[MAX_IOVECS];
5593 5593          struct iovec *iovp;
5594 5594          int iovcnt;
5595 5595          int ioflag;
5596 5596          cred_t *savecred, *cr;
5597 5597          bool_t *deleg = &cs->deleg;
5598 5598          nfsstat4 stat;
5599 5599          int in_crit = 0;
5600 5600          caller_context_t ct;
5601 5601          nfs4_srv_t *nsrv4;
5602 5602  
5603 5603          DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5604 5604              WRITE4args *, args);
5605 5605  
5606 5606          vp = cs->vp;
5607 5607          if (vp == NULL) {
5608 5608                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5609 5609                  goto out;
5610 5610          }
5611 5611          if (cs->access == CS_ACCESS_DENIED) {
5612 5612                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5613 5613                  goto out;
5614 5614          }
5615 5615  
5616 5616          cr = cs->cr;
5617 5617  
5618 5618          if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5619 5619              deleg, TRUE, &ct)) != NFS4_OK) {
5620 5620                  *cs->statusp = resp->status = stat;
5621 5621                  goto out;
5622 5622          }
5623 5623  
5624 5624          /*
5625 5625           * We have to enter the critical region before calling VOP_RWLOCK
5626 5626           * to avoid a deadlock with ufs.
5627 5627           */
5628 5628          if (nbl_need_check(vp)) {
5629 5629                  nbl_start_crit(vp, RW_READER);
5630 5630                  in_crit = 1;
5631 5631                  if (nbl_conflict(vp, NBL_WRITE,
5632 5632                      args->offset, args->data_len, 0, &ct)) {
5633 5633                          *cs->statusp = resp->status = NFS4ERR_LOCKED;
5634 5634                          goto out;
5635 5635                  }
5636 5636          }
5637 5637  
5638 5638          bva.va_mask = AT_MODE | AT_UID;
5639 5639          error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5640 5640  
5641 5641          /*
5642 5642           * If we can't get the attributes, then we can't do the
5643 5643           * right access checking.  So, we'll fail the request.
5644 5644           */
5645 5645          if (error) {
5646 5646                  *cs->statusp = resp->status = puterrno4(error);
5647 5647                  goto out;
5648 5648          }
5649 5649  
5650 5650          if (rdonly4(req, cs)) {
5651 5651                  *cs->statusp = resp->status = NFS4ERR_ROFS;
5652 5652                  goto out;
5653 5653          }
5654 5654  
5655 5655          if (vp->v_type != VREG) {
5656 5656                  *cs->statusp = resp->status =
5657 5657                      ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5658 5658                  goto out;
5659 5659          }
5660 5660  
5661 5661          if (crgetuid(cr) != bva.va_uid &&
5662 5662              (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5663 5663                  *cs->statusp = resp->status = puterrno4(error);
5664 5664                  goto out;
5665 5665          }
5666 5666  
5667 5667          if (MANDLOCK(vp, bva.va_mode)) {
5668 5668                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5669 5669                  goto out;
5670 5670          }
5671 5671  
5672 5672          nsrv4 = nfs4_get_srv();
5673 5673          if (args->data_len == 0) {
5674 5674                  *cs->statusp = resp->status = NFS4_OK;
5675 5675                  resp->count = 0;
5676 5676                  resp->committed = args->stable;
5677 5677                  resp->writeverf = nsrv4->write4verf;
5678 5678                  goto out;
5679 5679          }
5680 5680  
5681 5681          if (args->mblk != NULL) {
5682 5682                  mblk_t *m;
5683 5683                  uint_t bytes, round_len;
5684 5684  
5685 5685                  iovcnt = 0;
5686 5686                  bytes = 0;
5687 5687                  round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5688 5688                  for (m = args->mblk;
5689 5689                      m != NULL && bytes < round_len;
5690 5690                      m = m->b_cont) {
5691 5691                          iovcnt++;
5692 5692                          bytes += MBLKL(m);
5693 5693                  }
5694 5694  #ifdef DEBUG
5695 5695                  /* should have ended on an mblk boundary */
5696 5696                  if (bytes != round_len) {
5697 5697                          printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5698 5698                              bytes, round_len, args->data_len);
5699 5699                          printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5700 5700                              (void *)args->mblk, (void *)m);
5701 5701                          ASSERT(bytes == round_len);
5702 5702                  }
5703 5703  #endif
5704 5704                  if (iovcnt <= MAX_IOVECS) {
5705 5705                          iovp = iov;
5706 5706                  } else {
5707 5707                          iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5708 5708                  }
5709 5709                  mblk_to_iov(args->mblk, iovcnt, iovp);
5710 5710          } else if (args->rlist != NULL) {
5711 5711                  iovcnt = 1;
5712 5712                  iovp = iov;
5713 5713                  iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5714 5714                  iovp->iov_len = args->data_len;
5715 5715          } else {
5716 5716                  iovcnt = 1;
5717 5717                  iovp = iov;
5718 5718                  iovp->iov_base = args->data_val;
5719 5719                  iovp->iov_len = args->data_len;
5720 5720          }
5721 5721  
5722 5722          uio.uio_iov = iovp;
5723 5723          uio.uio_iovcnt = iovcnt;
5724 5724  
5725 5725          uio.uio_segflg = UIO_SYSSPACE;
5726 5726          uio.uio_extflg = UIO_COPY_DEFAULT;
5727 5727          uio.uio_loffset = args->offset;
5728 5728          uio.uio_resid = args->data_len;
5729 5729          uio.uio_llimit = curproc->p_fsz_ctl;
5730 5730          rlimit = uio.uio_llimit - args->offset;
5731 5731          if (rlimit < (u_offset_t)uio.uio_resid)
5732 5732                  uio.uio_resid = (int)rlimit;
5733 5733  
5734 5734          if (args->stable == UNSTABLE4)
5735 5735                  ioflag = 0;
5736 5736          else if (args->stable == FILE_SYNC4)
5737 5737                  ioflag = FSYNC;
5738 5738          else if (args->stable == DATA_SYNC4)
5739 5739                  ioflag = FDSYNC;
5740 5740          else {
5741 5741                  if (iovp != iov)
5742 5742                          kmem_free(iovp, sizeof (*iovp) * iovcnt);
5743 5743                  *cs->statusp = resp->status = NFS4ERR_INVAL;
5744 5744                  goto out;
5745 5745          }
5746 5746  
5747 5747          /*
5748 5748           * We're changing creds because VM may fault and we need
5749 5749           * the cred of the current thread to be used if quota
5750 5750           * checking is enabled.
5751 5751           */
5752 5752          savecred = curthread->t_cred;
5753 5753          curthread->t_cred = cr;
5754 5754          error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5755 5755          curthread->t_cred = savecred;
5756 5756  
5757 5757          if (iovp != iov)
5758 5758                  kmem_free(iovp, sizeof (*iovp) * iovcnt);
5759 5759  
5760 5760          if (error) {
5761 5761                  *cs->statusp = resp->status = puterrno4(error);
5762 5762                  goto out;
5763 5763          }
5764 5764  
5765 5765          *cs->statusp = resp->status = NFS4_OK;
5766 5766          resp->count = args->data_len - uio.uio_resid;
5767 5767  
5768 5768          if (ioflag == 0)
5769 5769                  resp->committed = UNSTABLE4;
5770 5770          else
5771 5771                  resp->committed = FILE_SYNC4;
5772 5772  
5773 5773          resp->writeverf = nsrv4->write4verf;
5774 5774  
5775 5775  out:
5776 5776          if (in_crit)
5777 5777                  nbl_end_crit(vp);
5778 5778  
5779 5779          DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5780 5780              WRITE4res *, resp);
5781 5781  }
5782 5782  
5783 5783  
5784 5784  /* XXX put in a header file */
5785 5785  extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5786 5786  
5787 5787  void
5788 5788  rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5789 5789      struct svc_req *req, cred_t *cr, int *rv)
5790 5790  {
5791 5791          uint_t i;
5792 5792          struct compound_state cs;
5793 5793          nfs4_srv_t *nsrv4;
5794 5794          nfs_export_t *ne = nfs_get_export();
5795 5795  
5796 5796          if (rv != NULL)
5797 5797                  *rv = 0;
5798 5798          rfs4_init_compound_state(&cs);
5799 5799          /*
5800 5800           * Form a reply tag by copying over the reqeuest tag.
5801 5801           */
5802 5802          resp->tag.utf8string_val =
5803 5803              kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5804 5804          resp->tag.utf8string_len = args->tag.utf8string_len;
5805 5805          bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5806 5806              resp->tag.utf8string_len);
5807 5807  
5808 5808          cs.statusp = &resp->status;
5809 5809          cs.req = req;
5810 5810          resp->array = NULL;
5811 5811          resp->array_len = 0;
5812 5812  
5813 5813          /*
5814 5814           * XXX for now, minorversion should be zero
5815 5815           */
5816 5816          if (args->minorversion != NFS4_MINORVERSION) {
5817 5817                  DTRACE_NFSV4_2(compound__start, struct compound_state *,
5818 5818                      &cs, COMPOUND4args *, args);
5819 5819                  resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5820 5820                  DTRACE_NFSV4_2(compound__done, struct compound_state *,
5821 5821                      &cs, COMPOUND4res *, resp);
5822 5822                  return;
5823 5823          }
5824 5824  
5825 5825          if (args->array_len == 0) {
5826 5826                  resp->status = NFS4_OK;
5827 5827                  return;
5828 5828          }
5829 5829  
5830 5830          ASSERT(exi == NULL);
5831 5831          ASSERT(cr == NULL);
5832 5832  
5833 5833          cr = crget();
5834 5834          ASSERT(cr != NULL);
5835 5835  
5836 5836          if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5837 5837                  DTRACE_NFSV4_2(compound__start, struct compound_state *,
5838 5838                      &cs, COMPOUND4args *, args);
5839 5839                  crfree(cr);
5840 5840                  DTRACE_NFSV4_2(compound__done, struct compound_state *,
5841 5841                      &cs, COMPOUND4res *, resp);
5842 5842                  svcerr_badcred(req->rq_xprt);
5843 5843                  if (rv != NULL)
5844 5844                          *rv = 1;
5845 5845                  return;
5846 5846          }
5847 5847          resp->array_len = args->array_len;
5848 5848          resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5849 5849              KM_SLEEP);
5850 5850  
5851 5851          cs.basecr = cr;
5852 5852          nsrv4 = nfs4_get_srv();
5853 5853  
5854 5854          DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5855 5855              COMPOUND4args *, args);
5856 5856  
5857 5857          /*
5858 5858           * For now, NFS4 compound processing must be protected by
5859 5859           * exported_lock because it can access more than one exportinfo
5860 5860           * per compound and share/unshare can now change multiple
5861 5861           * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5862 5862           * per proc (excluding public exinfo), and exi_count design
5863 5863           * is sufficient to protect concurrent execution of NFS2/3
5864 5864           * ops along with unexport.  This lock will be removed as
5865 5865           * part of the NFSv4 phase 2 namespace redesign work.
5866 5866           */
5867 5867          rw_enter(&ne->exported_lock, RW_READER);
5868 5868  
5869 5869          /*
5870 5870           * If this is the first compound we've seen, we need to start all
5871 5871           * new instances' grace periods.
5872 5872           */
5873 5873          if (nsrv4->seen_first_compound == 0) {
5874 5874                  rfs4_grace_start_new(nsrv4);
5875 5875                  /*
5876 5876                   * This must be set after rfs4_grace_start_new(), otherwise
5877 5877                   * another thread could proceed past here before the former
5878 5878                   * is finished.
5879 5879                   */
5880 5880                  nsrv4->seen_first_compound = 1;
5881 5881          }
5882 5882  
5883 5883          for (i = 0; i < args->array_len && cs.cont; i++) {
5884 5884                  nfs_argop4 *argop;
5885 5885                  nfs_resop4 *resop;
5886 5886                  uint_t op;
5887 5887  
5888 5888                  argop = &args->array[i];
5889 5889                  resop = &resp->array[i];
5890 5890                  resop->resop = argop->argop;
5891 5891                  op = (uint_t)resop->resop;
5892 5892  
5893 5893                  if (op < rfsv4disp_cnt) {
5894 5894                          /*
5895 5895                           * Count the individual ops here; NULL and COMPOUND
5896 5896                           * are counted in common_dispatch()
5897 5897                           */
5898 5898                          rfsproccnt_v4_ptr[op].value.ui64++;
5899 5899  
5900 5900                          NFS4_DEBUG(rfs4_debug > 1,
5901 5901                              (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5902 5902                          (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5903 5903                          NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5904 5904                              rfs4_op_string[op], *cs.statusp));
5905 5905                          if (*cs.statusp != NFS4_OK)
5906 5906                                  cs.cont = FALSE;
5907 5907                  } else {
5908 5908                          /*
5909 5909                           * This is effectively dead code since XDR code
5910 5910                           * will have already returned BADXDR if op doesn't
5911 5911                           * decode to legal value.  This only done for a
5912 5912                           * day when XDR code doesn't verify v4 opcodes.
5913 5913                           */
5914 5914                          op = OP_ILLEGAL;
5915 5915                          rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5916 5916  
5917 5917                          rfs4_op_illegal(argop, resop, req, &cs);
5918 5918                          cs.cont = FALSE;
5919 5919                  }
5920 5920  
5921 5921                  /*
5922 5922                   * If not at last op, and if we are to stop, then
5923 5923                   * compact the results array.
5924 5924                   */
5925 5925                  if ((i + 1) < args->array_len && !cs.cont) {
5926 5926                          nfs_resop4 *new_res = kmem_alloc(
5927 5927                              (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5928 5928                          bcopy(resp->array,
5929 5929                              new_res, (i+1) * sizeof (nfs_resop4));
5930 5930                          kmem_free(resp->array,
5931 5931                              args->array_len * sizeof (nfs_resop4));
5932 5932  
5933 5933                          resp->array_len =  i + 1;
5934 5934                          resp->array = new_res;
5935 5935                  }
5936 5936          }
5937 5937  
5938 5938          rw_exit(&ne->exported_lock);
5939 5939  
5940 5940          /*
5941 5941           * clear exportinfo and vnode fields from compound_state before dtrace
5942 5942           * probe, to avoid tracing residual values for path and share path.
5943 5943           */
5944 5944          if (cs.vp)
5945 5945                  VN_RELE(cs.vp);
5946 5946          if (cs.saved_vp)
5947 5947                  VN_RELE(cs.saved_vp);
5948 5948          cs.exi = cs.saved_exi = NULL;
5949 5949          cs.vp = cs.saved_vp = NULL;
5950 5950  
5951 5951          DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5952 5952              COMPOUND4res *, resp);
5953 5953  
5954 5954          if (cs.saved_fh.nfs_fh4_val)
5955 5955                  kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5956 5956  
5957 5957          if (cs.basecr)
5958 5958                  crfree(cs.basecr);
5959 5959          if (cs.cr)
5960 5960                  crfree(cs.cr);
5961 5961          /*
5962 5962           * done with this compound request, free the label
5963 5963           */
5964 5964  
5965 5965          if (req->rq_label != NULL) {
5966 5966                  kmem_free(req->rq_label, sizeof (bslabel_t));
5967 5967                  req->rq_label = NULL;
5968 5968          }
5969 5969  }
5970 5970  
5971 5971  /*
5972 5972   * XXX because of what appears to be duplicate calls to rfs4_compound_free
5973 5973   * XXX zero out the tag and array values. Need to investigate why the
5974 5974   * XXX calls occur, but at least prevent the panic for now.
5975 5975   */
5976 5976  void
5977 5977  rfs4_compound_free(COMPOUND4res *resp)
5978 5978  {
5979 5979          uint_t i;
5980 5980  
5981 5981          if (resp->tag.utf8string_val) {
5982 5982                  UTF8STRING_FREE(resp->tag)
5983 5983          }
5984 5984  
5985 5985          for (i = 0; i < resp->array_len; i++) {
5986 5986                  nfs_resop4 *resop;
5987 5987                  uint_t op;
5988 5988  
5989 5989                  resop = &resp->array[i];
5990 5990                  op = (uint_t)resop->resop;
5991 5991                  if (op < rfsv4disp_cnt) {
5992 5992                          (*rfsv4disptab[op].dis_resfree)(resop);
5993 5993                  }
5994 5994          }
5995 5995          if (resp->array != NULL) {
5996 5996                  kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5997 5997          }
5998 5998  }
5999 5999  
6000 6000  /*
6001 6001   * Process the value of the compound request rpc flags, as a bit-AND
6002 6002   * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6003 6003   */
6004 6004  void
6005 6005  rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6006 6006  {
6007 6007          int i;
6008 6008          int flag = RPC_ALL;
6009 6009  
6010 6010          for (i = 0; flag && i < args->array_len; i++) {
6011 6011                  uint_t op;
6012 6012  
6013 6013                  op = (uint_t)args->array[i].argop;
6014 6014  
6015 6015                  if (op < rfsv4disp_cnt)
6016 6016                          flag &= rfsv4disptab[op].dis_flags;
6017 6017                  else
6018 6018                          flag = 0;
6019 6019          }
6020 6020          *flagp = flag;
6021 6021  }
6022 6022  
6023 6023  nfsstat4
6024 6024  rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6025 6025  {
6026 6026          nfsstat4 e;
6027 6027  
6028 6028          rfs4_dbe_lock(cp->rc_dbe);
6029 6029  
6030 6030          if (cp->rc_sysidt != LM_NOSYSID) {
6031 6031                  *sp = cp->rc_sysidt;
6032 6032                  e = NFS4_OK;
6033 6033  
6034 6034          } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6035 6035                  *sp = cp->rc_sysidt;
6036 6036                  e = NFS4_OK;
6037 6037  
6038 6038                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6039 6039                      "rfs4_client_sysid: allocated 0x%x\n", *sp));
6040 6040          } else
6041 6041                  e = NFS4ERR_DELAY;
6042 6042  
6043 6043          rfs4_dbe_unlock(cp->rc_dbe);
6044 6044          return (e);
6045 6045  }
6046 6046  
6047 6047  #if defined(DEBUG) && ! defined(lint)
6048 6048  static void lock_print(char *str, int operation, struct flock64 *flk)
6049 6049  {
6050 6050          char *op, *type;
6051 6051  
6052 6052          switch (operation) {
6053 6053          case F_GETLK: op = "F_GETLK";
6054 6054                  break;
6055 6055          case F_SETLK: op = "F_SETLK";
6056 6056                  break;
6057 6057          case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6058 6058                  break;
6059 6059          default: op = "F_UNKNOWN";
6060 6060                  break;
6061 6061          }
6062 6062          switch (flk->l_type) {
6063 6063          case F_UNLCK: type = "F_UNLCK";
6064 6064                  break;
6065 6065          case F_RDLCK: type = "F_RDLCK";
6066 6066                  break;
6067 6067          case F_WRLCK: type = "F_WRLCK";
6068 6068                  break;
6069 6069          default: type = "F_UNKNOWN";
6070 6070                  break;
6071 6071          }
6072 6072  
6073 6073          ASSERT(flk->l_whence == 0);
6074 6074          cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6075 6075              str, op, type, (longlong_t)flk->l_start,
6076 6076              flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6077 6077  }
6078 6078  
6079 6079  #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6080 6080  #else
6081 6081  #define LOCK_PRINT(d, s, t, f)
6082 6082  #endif
6083 6083  
6084 6084  /*ARGSUSED*/
6085 6085  static bool_t
6086 6086  creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6087 6087  {
6088 6088          return (TRUE);
6089 6089  }
6090 6090  
6091 6091  /*
6092 6092   * Look up the pathname using the vp in cs as the directory vnode.
6093 6093   * cs->vp will be the vnode for the file on success
6094 6094   */
6095 6095  
6096 6096  static nfsstat4
6097 6097  rfs4_lookup(component4 *component, struct svc_req *req,
6098 6098      struct compound_state *cs)
6099 6099  {
6100 6100          char *nm;
6101 6101          uint32_t len;
6102 6102          nfsstat4 status;
6103 6103          struct sockaddr *ca;
6104 6104          char *name;
6105 6105  
6106 6106          if (cs->vp == NULL) {
6107 6107                  return (NFS4ERR_NOFILEHANDLE);
6108 6108          }
6109 6109          if (cs->vp->v_type != VDIR) {
6110 6110                  return (NFS4ERR_NOTDIR);
6111 6111          }
6112 6112  
6113 6113          status = utf8_dir_verify(component);
6114 6114          if (status != NFS4_OK)
6115 6115                  return (status);
6116 6116  
6117 6117          nm = utf8_to_fn(component, &len, NULL);
6118 6118          if (nm == NULL) {
6119 6119                  return (NFS4ERR_INVAL);
6120 6120          }
6121 6121  
6122 6122          if (len > MAXNAMELEN) {
6123 6123                  kmem_free(nm, len);
6124 6124                  return (NFS4ERR_NAMETOOLONG);
6125 6125          }
6126 6126  
6127 6127          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6128 6128          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6129 6129              MAXPATHLEN + 1);
6130 6130  
6131 6131          if (name == NULL) {
6132 6132                  kmem_free(nm, len);
6133 6133                  return (NFS4ERR_INVAL);
6134 6134          }
6135 6135  
6136 6136          status = do_rfs4_op_lookup(name, req, cs);
6137 6137  
6138 6138          if (name != nm)
6139 6139                  kmem_free(name, MAXPATHLEN + 1);
6140 6140  
6141 6141          kmem_free(nm, len);
6142 6142  
6143 6143          return (status);
6144 6144  }
6145 6145  
6146 6146  static nfsstat4
6147 6147  rfs4_lookupfile(component4 *component, struct svc_req *req,
6148 6148      struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6149 6149  {
6150 6150          nfsstat4 status;
6151 6151          vnode_t *dvp = cs->vp;
6152 6152          vattr_t bva, ava, fva;
6153 6153          int error;
6154 6154  
6155 6155          /* Get "before" change value */
6156 6156          bva.va_mask = AT_CTIME|AT_SEQ;
6157 6157          error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6158 6158          if (error)
6159 6159                  return (puterrno4(error));
6160 6160  
6161 6161          /* rfs4_lookup may VN_RELE directory */
6162 6162          VN_HOLD(dvp);
6163 6163  
6164 6164          status = rfs4_lookup(component, req, cs);
6165 6165          if (status != NFS4_OK) {
6166 6166                  VN_RELE(dvp);
6167 6167                  return (status);
6168 6168          }
6169 6169  
6170 6170          /*
6171 6171           * Get "after" change value, if it fails, simply return the
6172 6172           * before value.
6173 6173           */
6174 6174          ava.va_mask = AT_CTIME|AT_SEQ;
6175 6175          if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6176 6176                  ava.va_ctime = bva.va_ctime;
6177 6177                  ava.va_seq = 0;
6178 6178          }
6179 6179          VN_RELE(dvp);
6180 6180  
6181 6181          /*
6182 6182           * Validate the file is a file
6183 6183           */
6184 6184          fva.va_mask = AT_TYPE|AT_MODE;
6185 6185          error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6186 6186          if (error)
6187 6187                  return (puterrno4(error));
6188 6188  
6189 6189          if (fva.va_type != VREG) {
6190 6190                  if (fva.va_type == VDIR)
6191 6191                          return (NFS4ERR_ISDIR);
6192 6192                  if (fva.va_type == VLNK)
6193 6193                          return (NFS4ERR_SYMLINK);
6194 6194                  return (NFS4ERR_INVAL);
6195 6195          }
6196 6196  
6197 6197          NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6198 6198          NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6199 6199  
6200 6200          /*
6201 6201           * It is undefined if VOP_LOOKUP will change va_seq, so
6202 6202           * cinfo.atomic = TRUE only if we have
6203 6203           * non-zero va_seq's, and they have not changed.
6204 6204           */
6205 6205          if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6206 6206                  cinfo->atomic = TRUE;
6207 6207          else
6208 6208                  cinfo->atomic = FALSE;
6209 6209  
6210 6210          /* Check for mandatory locking */
6211 6211          cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6212 6212          return (check_open_access(access, cs, req));
6213 6213  }
6214 6214  
6215 6215  static nfsstat4
6216 6216  create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6217 6217      cred_t *cr, vnode_t **vpp, bool_t *created)
6218 6218  {
6219 6219          int error;
6220 6220          nfsstat4 status = NFS4_OK;
6221 6221          vattr_t va;
6222 6222  
6223 6223  tryagain:
6224 6224  
6225 6225          /*
6226 6226           * The file open mode used is VWRITE.  If the client needs
6227 6227           * some other semantic, then it should do the access checking
6228 6228           * itself.  It would have been nice to have the file open mode
6229 6229           * passed as part of the arguments.
6230 6230           */
6231 6231  
6232 6232          *created = TRUE;
6233 6233          error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6234 6234  
6235 6235          if (error) {
6236 6236                  *created = FALSE;
6237 6237  
6238 6238                  /*
6239 6239                   * If we got something other than file already exists
6240 6240                   * then just return this error.  Otherwise, we got
6241 6241                   * EEXIST.  If we were doing a GUARDED create, then
6242 6242                   * just return this error.  Otherwise, we need to
6243 6243                   * make sure that this wasn't a duplicate of an
6244 6244                   * exclusive create request.
6245 6245                   *
6246 6246                   * The assumption is made that a non-exclusive create
6247 6247                   * request will never return EEXIST.
6248 6248                   */
6249 6249  
6250 6250                  if (error != EEXIST || mode == GUARDED4) {
6251 6251                          status = puterrno4(error);
6252 6252                          return (status);
6253 6253                  }
6254 6254                  error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6255 6255                      NULL, NULL, NULL);
6256 6256  
6257 6257                  if (error) {
6258 6258                          /*
6259 6259                           * We couldn't find the file that we thought that
6260 6260                           * we just created.  So, we'll just try creating
6261 6261                           * it again.
6262 6262                           */
6263 6263                          if (error == ENOENT)
6264 6264                                  goto tryagain;
6265 6265  
6266 6266                          status = puterrno4(error);
6267 6267                          return (status);
6268 6268                  }
6269 6269  
6270 6270                  if (mode == UNCHECKED4) {
6271 6271                          /* existing object must be regular file */
6272 6272                          if ((*vpp)->v_type != VREG) {
6273 6273                                  if ((*vpp)->v_type == VDIR)
6274 6274                                          status = NFS4ERR_ISDIR;
6275 6275                                  else if ((*vpp)->v_type == VLNK)
6276 6276                                          status = NFS4ERR_SYMLINK;
6277 6277                                  else
6278 6278                                          status = NFS4ERR_INVAL;
6279 6279                                  VN_RELE(*vpp);
6280 6280                                  return (status);
6281 6281                          }
6282 6282  
6283 6283                          return (NFS4_OK);
6284 6284                  }
6285 6285  
6286 6286                  /* Check for duplicate request */
6287 6287                  va.va_mask = AT_MTIME;
6288 6288                  error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6289 6289                  if (!error) {
6290 6290                          /* We found the file */
6291 6291                          const timestruc_t *mtime = &vap->va_mtime;
6292 6292  
6293 6293                          if (va.va_mtime.tv_sec != mtime->tv_sec ||
6294 6294                              va.va_mtime.tv_nsec != mtime->tv_nsec) {
6295 6295                                  /* but its not our creation */
6296 6296                                  VN_RELE(*vpp);
6297 6297                                  return (NFS4ERR_EXIST);
6298 6298                          }
6299 6299                          *created = TRUE; /* retrans of create == created */
6300 6300                          return (NFS4_OK);
6301 6301                  }
6302 6302                  VN_RELE(*vpp);
6303 6303                  return (NFS4ERR_EXIST);
6304 6304          }
6305 6305  
6306 6306          return (NFS4_OK);
6307 6307  }
6308 6308  
6309 6309  static nfsstat4
6310 6310  check_open_access(uint32_t access, struct compound_state *cs,
6311 6311      struct svc_req *req)
6312 6312  {
6313 6313          int error;
6314 6314          vnode_t *vp;
6315 6315          bool_t readonly;
6316 6316          cred_t *cr = cs->cr;
6317 6317  
6318 6318          /* For now we don't allow mandatory locking as per V2/V3 */
6319 6319          if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6320 6320                  return (NFS4ERR_ACCESS);
6321 6321          }
6322 6322  
6323 6323          vp = cs->vp;
6324 6324          ASSERT(cr != NULL && vp->v_type == VREG);
6325 6325  
6326 6326          /*
6327 6327           * If the file system is exported read only and we are trying
6328 6328           * to open for write, then return NFS4ERR_ROFS
6329 6329           */
6330 6330  
6331 6331          readonly = rdonly4(req, cs);
6332 6332  
6333 6333          if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6334 6334                  return (NFS4ERR_ROFS);
6335 6335  
6336 6336          if (access & OPEN4_SHARE_ACCESS_READ) {
6337 6337                  if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6338 6338                      (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6339 6339                          return (NFS4ERR_ACCESS);
6340 6340                  }
6341 6341          }
6342 6342  
6343 6343          if (access & OPEN4_SHARE_ACCESS_WRITE) {
6344 6344                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6345 6345                  if (error)
6346 6346                          return (NFS4ERR_ACCESS);
6347 6347          }
6348 6348  
6349 6349          return (NFS4_OK);
6350 6350  }
6351 6351  
6352 6352  static nfsstat4
6353 6353  rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6354 6354      change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6355 6355  {
6356 6356          struct nfs4_svgetit_arg sarg;
6357 6357          struct nfs4_ntov_table ntov;
6358 6358  
6359 6359          bool_t ntov_table_init = FALSE;
6360 6360          struct statvfs64 sb;
6361 6361          nfsstat4 status;
6362 6362          vnode_t *vp;
6363 6363          vattr_t bva, ava, iva, cva, *vap;
6364 6364          vnode_t *dvp;
6365 6365          timespec32_t *mtime;
6366 6366          char *nm = NULL;
6367 6367          uint_t buflen;
6368 6368          bool_t created;
6369 6369          bool_t setsize = FALSE;
6370 6370          len_t reqsize;
6371 6371          int error;
6372 6372          bool_t trunc;
6373 6373          caller_context_t ct;
6374 6374          component4 *component;
6375 6375          bslabel_t *clabel;
6376 6376          struct sockaddr *ca;
6377 6377          char *name = NULL;
6378 6378  
6379 6379          sarg.sbp = &sb;
6380 6380          sarg.is_referral = B_FALSE;
6381 6381  
6382 6382          dvp = cs->vp;
6383 6383  
6384 6384          /* Check if the file system is read only */
6385 6385          if (rdonly4(req, cs))
6386 6386                  return (NFS4ERR_ROFS);
6387 6387  
6388 6388          /* check the label of including directory */
6389 6389          if (is_system_labeled()) {
6390 6390                  ASSERT(req->rq_label != NULL);
6391 6391                  clabel = req->rq_label;
6392 6392                  DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6393 6393                      "got client label from request(1)",
6394 6394                      struct svc_req *, req);
6395 6395                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
6396 6396                          if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6397 6397                              cs->exi)) {
6398 6398                                  return (NFS4ERR_ACCESS);
6399 6399                          }
6400 6400                  }
6401 6401          }
6402 6402  
6403 6403          /*
6404 6404           * Get the last component of path name in nm. cs will reference
6405 6405           * the including directory on success.
6406 6406           */
6407 6407          component = &args->open_claim4_u.file;
6408 6408          status = utf8_dir_verify(component);
6409 6409          if (status != NFS4_OK)
6410 6410                  return (status);
6411 6411  
6412 6412          nm = utf8_to_fn(component, &buflen, NULL);
6413 6413  
6414 6414          if (nm == NULL)
6415 6415                  return (NFS4ERR_RESOURCE);
6416 6416  
6417 6417          if (buflen > MAXNAMELEN) {
6418 6418                  kmem_free(nm, buflen);
6419 6419                  return (NFS4ERR_NAMETOOLONG);
6420 6420          }
6421 6421  
6422 6422          bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6423 6423          error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6424 6424          if (error) {
6425 6425                  kmem_free(nm, buflen);
6426 6426                  return (puterrno4(error));
6427 6427          }
6428 6428  
6429 6429          if (bva.va_type != VDIR) {
6430 6430                  kmem_free(nm, buflen);
6431 6431                  return (NFS4ERR_NOTDIR);
6432 6432          }
6433 6433  
6434 6434          NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6435 6435  
6436 6436          switch (args->mode) {
6437 6437          case GUARDED4:
6438 6438                  /*FALLTHROUGH*/
6439 6439          case UNCHECKED4:
6440 6440                  nfs4_ntov_table_init(&ntov);
6441 6441                  ntov_table_init = TRUE;
6442 6442  
6443 6443                  *attrset = 0;
6444 6444                  status = do_rfs4_set_attrs(attrset,
6445 6445                      &args->createhow4_u.createattrs,
6446 6446                      cs, &sarg, &ntov, NFS4ATTR_SETIT);
6447 6447  
6448 6448                  if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6449 6449                      sarg.vap->va_type != VREG) {
6450 6450                          if (sarg.vap->va_type == VDIR)
6451 6451                                  status = NFS4ERR_ISDIR;
6452 6452                          else if (sarg.vap->va_type == VLNK)
6453 6453                                  status = NFS4ERR_SYMLINK;
6454 6454                          else
6455 6455                                  status = NFS4ERR_INVAL;
6456 6456                  }
6457 6457  
6458 6458                  if (status != NFS4_OK) {
6459 6459                          kmem_free(nm, buflen);
6460 6460                          nfs4_ntov_table_free(&ntov, &sarg);
6461 6461                          *attrset = 0;
6462 6462                          return (status);
6463 6463                  }
6464 6464  
6465 6465                  vap = sarg.vap;
6466 6466                  vap->va_type = VREG;
6467 6467                  vap->va_mask |= AT_TYPE;
6468 6468  
6469 6469                  if ((vap->va_mask & AT_MODE) == 0) {
6470 6470                          vap->va_mask |= AT_MODE;
6471 6471                          vap->va_mode = (mode_t)0600;
6472 6472                  }
6473 6473  
6474 6474                  if (vap->va_mask & AT_SIZE) {
6475 6475  
6476 6476                          /* Disallow create with a non-zero size */
6477 6477  
6478 6478                          if ((reqsize = sarg.vap->va_size) != 0) {
6479 6479                                  kmem_free(nm, buflen);
6480 6480                                  nfs4_ntov_table_free(&ntov, &sarg);
6481 6481                                  *attrset = 0;
6482 6482                                  return (NFS4ERR_INVAL);
6483 6483                          }
6484 6484                          setsize = TRUE;
6485 6485                  }
6486 6486                  break;
6487 6487  
6488 6488          case EXCLUSIVE4:
6489 6489                  /* prohibit EXCL create of named attributes */
6490 6490                  if (dvp->v_flag & V_XATTRDIR) {
6491 6491                          kmem_free(nm, buflen);
6492 6492                          *attrset = 0;
6493 6493                          return (NFS4ERR_INVAL);
6494 6494                  }
6495 6495  
6496 6496                  cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6497 6497                  cva.va_type = VREG;
6498 6498                  /*
6499 6499                   * Ensure no time overflows. Assumes underlying
6500 6500                   * filesystem supports at least 32 bits.
6501 6501                   * Truncate nsec to usec resolution to allow valid
6502 6502                   * compares even if the underlying filesystem truncates.
6503 6503                   */
6504 6504                  mtime = (timespec32_t *)&args->createhow4_u.createverf;
6505 6505                  cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6506 6506                  cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6507 6507                  cva.va_mode = (mode_t)0;
6508 6508                  vap = &cva;
6509 6509  
6510 6510                  /*
6511 6511                   * For EXCL create, attrset is set to the server attr
6512 6512                   * used to cache the client's verifier.
6513 6513                   */
6514 6514                  *attrset = FATTR4_TIME_MODIFY_MASK;
6515 6515                  break;
6516 6516          }
6517 6517  
6518 6518          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6519 6519          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6520 6520              MAXPATHLEN  + 1);
6521 6521  
6522 6522          if (name == NULL) {
6523 6523                  kmem_free(nm, buflen);
6524 6524                  return (NFS4ERR_SERVERFAULT);
6525 6525          }
6526 6526  
6527 6527          status = create_vnode(dvp, name, vap, args->mode,
6528 6528              cs->cr, &vp, &created);
6529 6529          if (nm != name)
6530 6530                  kmem_free(name, MAXPATHLEN + 1);
6531 6531          kmem_free(nm, buflen);
6532 6532  
6533 6533          if (status != NFS4_OK) {
6534 6534                  if (ntov_table_init)
6535 6535                          nfs4_ntov_table_free(&ntov, &sarg);
6536 6536                  *attrset = 0;
6537 6537                  return (status);
6538 6538          }
6539 6539  
6540 6540          trunc = (setsize && !created);
6541 6541  
6542 6542          if (args->mode != EXCLUSIVE4) {
6543 6543                  bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6544 6544  
6545 6545                  /*
6546 6546                   * True verification that object was created with correct
6547 6547                   * attrs is impossible.  The attrs could have been changed
6548 6548                   * immediately after object creation.  If attributes did
6549 6549                   * not verify, the only recourse for the server is to
6550 6550                   * destroy the object.  Maybe if some attrs (like gid)
6551 6551                   * are set incorrectly, the object should be destroyed;
6552 6552                   * however, seems bad as a default policy.  Do we really
6553 6553                   * want to destroy an object over one of the times not
6554 6554                   * verifying correctly?  For these reasons, the server
6555 6555                   * currently sets bits in attrset for createattrs
6556 6556                   * that were set; however, no verification is done.
6557 6557                   *
6558 6558                   * vmask_to_nmask accounts for vattr bits set on create
6559 6559                   *      [do_rfs4_set_attrs() only sets resp bits for
6560 6560                   *       non-vattr/vfs bits.]
6561 6561                   * Mask off any bits we set by default so as not to return
6562 6562                   * more attrset bits than were requested in createattrs
6563 6563                   */
6564 6564                  if (created) {
6565 6565                          nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6566 6566                          *attrset &= createmask;
6567 6567                  } else {
6568 6568                          /*
6569 6569                           * We did not create the vnode (we tried but it
6570 6570                           * already existed).  In this case, the only createattr
6571 6571                           * that the spec allows the server to set is size,
6572 6572                           * and even then, it can only be set if it is 0.
6573 6573                           */
6574 6574                          *attrset = 0;
6575 6575                          if (trunc)
6576 6576                                  *attrset = FATTR4_SIZE_MASK;
6577 6577                  }
6578 6578          }
6579 6579          if (ntov_table_init)
6580 6580                  nfs4_ntov_table_free(&ntov, &sarg);
6581 6581  
6582 6582          /*
6583 6583           * Get the initial "after" sequence number, if it fails,
6584 6584           * set to zero, time to before.
6585 6585           */
6586 6586          iva.va_mask = AT_CTIME|AT_SEQ;
6587 6587          if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6588 6588                  iva.va_seq = 0;
6589 6589                  iva.va_ctime = bva.va_ctime;
6590 6590          }
6591 6591  
6592 6592          /*
6593 6593           * create_vnode attempts to create the file exclusive,
6594 6594           * if it already exists the VOP_CREATE will fail and
6595 6595           * may not increase va_seq. It is atomic if
6596 6596           * we haven't changed the directory, but if it has changed
6597 6597           * we don't know what changed it.
6598 6598           */
6599 6599          if (!created) {
6600 6600                  if (bva.va_seq && iva.va_seq &&
6601 6601                      bva.va_seq == iva.va_seq)
6602 6602                          cinfo->atomic = TRUE;
6603 6603                  else
6604 6604                          cinfo->atomic = FALSE;
6605 6605                  NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6606 6606          } else {
6607 6607                  /*
6608 6608                   * The entry was created, we need to sync the
6609 6609                   * directory metadata.
6610 6610                   */
6611 6611                  (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6612 6612  
6613 6613                  /*
6614 6614                   * Get "after" change value, if it fails, simply return the
6615 6615                   * before value.
6616 6616                   */
6617 6617                  ava.va_mask = AT_CTIME|AT_SEQ;
6618 6618                  if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6619 6619                          ava.va_ctime = bva.va_ctime;
6620 6620                          ava.va_seq = 0;
6621 6621                  }
6622 6622  
6623 6623                  NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6624 6624  
6625 6625                  /*
6626 6626                   * The cinfo->atomic = TRUE only if we have
6627 6627                   * non-zero va_seq's, and it has incremented by exactly one
6628 6628                   * during the create_vnode and it didn't
6629 6629                   * change during the VOP_FSYNC.
6630 6630                   */
6631 6631                  if (bva.va_seq && iva.va_seq && ava.va_seq &&
6632 6632                      iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6633 6633                          cinfo->atomic = TRUE;
6634 6634                  else
6635 6635                          cinfo->atomic = FALSE;
6636 6636          }
6637 6637  
6638 6638          /* Check for mandatory locking and that the size gets set. */
6639 6639          cva.va_mask = AT_MODE;
6640 6640          if (setsize)
6641 6641                  cva.va_mask |= AT_SIZE;
6642 6642  
6643 6643          /* Assume the worst */
6644 6644          cs->mandlock = TRUE;
6645 6645  
6646 6646          if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6647 6647                  cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6648 6648  
6649 6649                  /*
6650 6650                   * Truncate the file if necessary; this would be
6651 6651                   * the case for create over an existing file.
6652 6652                   */
6653 6653  
6654 6654                  if (trunc) {
6655 6655                          int in_crit = 0;
6656 6656                          rfs4_file_t *fp;
6657 6657                          nfs4_srv_t *nsrv4;
6658 6658                          bool_t create = FALSE;
6659 6659  
6660 6660                          /*
6661 6661                           * We are writing over an existing file.
6662 6662                           * Check to see if we need to recall a delegation.
6663 6663                           */
6664 6664                          nsrv4 = nfs4_get_srv();
6665 6665                          rfs4_hold_deleg_policy(nsrv4);
6666 6666                          if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6667 6667                                  if (rfs4_check_delegated_byfp(FWRITE, fp,
6668 6668                                      (reqsize == 0), FALSE, FALSE, &clientid)) {
6669 6669                                          rfs4_file_rele(fp);
6670 6670                                          rfs4_rele_deleg_policy(nsrv4);
6671 6671                                          VN_RELE(vp);
6672 6672                                          *attrset = 0;
6673 6673                                          return (NFS4ERR_DELAY);
6674 6674                                  }
6675 6675                                  rfs4_file_rele(fp);
6676 6676                          }
6677 6677                          rfs4_rele_deleg_policy(nsrv4);
6678 6678  
6679 6679                          if (nbl_need_check(vp)) {
6680 6680                                  in_crit = 1;
6681 6681  
6682 6682                                  ASSERT(reqsize == 0);
6683 6683  
6684 6684                                  nbl_start_crit(vp, RW_READER);
6685 6685                                  if (nbl_conflict(vp, NBL_WRITE, 0,
6686 6686                                      cva.va_size, 0, NULL)) {
6687 6687                                          in_crit = 0;
6688 6688                                          nbl_end_crit(vp);
6689 6689                                          VN_RELE(vp);
6690 6690                                          *attrset = 0;
6691 6691                                          return (NFS4ERR_ACCESS);
6692 6692                                  }
6693 6693                          }
6694 6694                          ct.cc_sysid = 0;
6695 6695                          ct.cc_pid = 0;
6696 6696                          ct.cc_caller_id = nfs4_srv_caller_id;
6697 6697                          ct.cc_flags = CC_DONTBLOCK;
6698 6698  
6699 6699                          cva.va_mask = AT_SIZE;
6700 6700                          cva.va_size = reqsize;
6701 6701                          (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6702 6702                          if (in_crit)
6703 6703                                  nbl_end_crit(vp);
6704 6704                  }
6705 6705          }
6706 6706  
6707 6707          error = makefh4(&cs->fh, vp, cs->exi);
6708 6708  
6709 6709          /*
6710 6710           * Force modified data and metadata out to stable storage.
6711 6711           */
6712 6712          (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6713 6713  
6714 6714          if (error) {
6715 6715                  VN_RELE(vp);
6716 6716                  *attrset = 0;
6717 6717                  return (puterrno4(error));
6718 6718          }
6719 6719  
6720 6720          /* if parent dir is attrdir, set namedattr fh flag */
6721 6721          if (dvp->v_flag & V_XATTRDIR)
6722 6722                  set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6723 6723  
6724 6724          if (cs->vp)
6725 6725                  VN_RELE(cs->vp);
6726 6726  
6727 6727          cs->vp = vp;
6728 6728  
6729 6729          /*
6730 6730           * if we did not create the file, we will need to check
6731 6731           * the access bits on the file
6732 6732           */
6733 6733  
6734 6734          if (!created) {
6735 6735                  if (setsize)
6736 6736                          args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6737 6737                  status = check_open_access(args->share_access, cs, req);
6738 6738                  if (status != NFS4_OK)
6739 6739                          *attrset = 0;
6740 6740          }
6741 6741          return (status);
6742 6742  }
6743 6743  
6744 6744  /*ARGSUSED*/
6745 6745  static void
6746 6746  rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6747 6747      rfs4_openowner_t *oo, delegreq_t deleg,
6748 6748      uint32_t access, uint32_t deny,
6749 6749      OPEN4res *resp, int deleg_cur)
6750 6750  {
6751 6751          /* XXX Currently not using req  */
6752 6752          rfs4_state_t *sp;
6753 6753          rfs4_file_t *fp;
6754 6754          bool_t screate = TRUE;
6755 6755          bool_t fcreate = TRUE;
6756 6756          uint32_t open_a, share_a;
6757 6757          uint32_t open_d, share_d;
6758 6758          rfs4_deleg_state_t *dsp;
6759 6759          sysid_t sysid;
6760 6760          nfsstat4 status;
6761 6761          caller_context_t ct;
6762 6762          int fflags = 0;
6763 6763          int recall = 0;
6764 6764          int err;
6765 6765          int first_open;
6766 6766  
6767 6767          /* get the file struct and hold a lock on it during initial open */
6768 6768          fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6769 6769          if (fp == NULL) {
6770 6770                  resp->status = NFS4ERR_RESOURCE;
6771 6771                  DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6772 6772                  return;
6773 6773          }
6774 6774  
6775 6775          sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6776 6776          if (sp == NULL) {
6777 6777                  resp->status = NFS4ERR_RESOURCE;
6778 6778                  DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6779 6779                  /* No need to keep any reference */
6780 6780                  rw_exit(&fp->rf_file_rwlock);
6781 6781                  rfs4_file_rele(fp);
6782 6782                  return;
6783 6783          }
6784 6784  
6785 6785          /* try to get the sysid before continuing */
6786 6786          if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6787 6787                  resp->status = status;
6788 6788                  rfs4_file_rele(fp);
6789 6789                  /* Not a fully formed open; "close" it */
6790 6790                  if (screate == TRUE)
6791 6791                          rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6792 6792                  rfs4_state_rele(sp);
6793 6793                  return;
6794 6794          }
6795 6795  
6796 6796          /* Calculate the fflags for this OPEN. */
6797 6797          if (access & OPEN4_SHARE_ACCESS_READ)
6798 6798                  fflags |= FREAD;
6799 6799          if (access & OPEN4_SHARE_ACCESS_WRITE)
6800 6800                  fflags |= FWRITE;
6801 6801  
6802 6802          rfs4_dbe_lock(sp->rs_dbe);
6803 6803  
6804 6804          /*
6805 6805           * Calculate the new deny and access mode that this open is adding to
6806 6806           * the file for this open owner;
6807 6807           */
6808 6808          open_d = (deny & ~sp->rs_open_deny);
6809 6809          open_a = (access & ~sp->rs_open_access);
6810 6810  
6811 6811          /*
6812 6812           * Calculate the new share access and share deny modes that this open
6813 6813           * is adding to the file for this open owner;
6814 6814           */
6815 6815          share_a = (access & ~sp->rs_share_access);
6816 6816          share_d = (deny & ~sp->rs_share_deny);
6817 6817  
6818 6818          first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6819 6819  
6820 6820          /*
6821 6821           * Check to see the client has already sent an open for this
6822 6822           * open owner on this file with the same share/deny modes.
6823 6823           * If so, we don't need to check for a conflict and we don't
6824 6824           * need to add another shrlock.  If not, then we need to
6825 6825           * check for conflicts in deny and access before checking for
6826 6826           * conflicts in delegation.  We don't want to recall a
6827 6827           * delegation based on an open that will eventually fail based
6828 6828           * on shares modes.
6829 6829           */
6830 6830  
6831 6831          if (share_a || share_d) {
6832 6832                  if ((err = rfs4_share(sp, access, deny)) != 0) {
6833 6833                          rfs4_dbe_unlock(sp->rs_dbe);
6834 6834                          resp->status = err;
6835 6835  
6836 6836                          rfs4_file_rele(fp);
6837 6837                          /* Not a fully formed open; "close" it */
6838 6838                          if (screate == TRUE)
6839 6839                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6840 6840                          rfs4_state_rele(sp);
6841 6841                          return;
6842 6842                  }
6843 6843          }
6844 6844  
6845 6845          rfs4_dbe_lock(fp->rf_dbe);
6846 6846  
6847 6847          /*
6848 6848           * Check to see if this file is delegated and if so, if a
6849 6849           * recall needs to be done.
6850 6850           */
6851 6851          if (rfs4_check_recall(sp, access)) {
6852 6852                  rfs4_dbe_unlock(fp->rf_dbe);
6853 6853                  rfs4_dbe_unlock(sp->rs_dbe);
6854 6854                  rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6855 6855                  delay(NFS4_DELEGATION_CONFLICT_DELAY);
6856 6856                  rfs4_dbe_lock(sp->rs_dbe);
6857 6857  
6858 6858                  /* if state closed while lock was dropped */
6859 6859                  if (sp->rs_closed) {
6860 6860                          if (share_a || share_d)
6861 6861                                  (void) rfs4_unshare(sp);
6862 6862                          rfs4_dbe_unlock(sp->rs_dbe);
6863 6863                          rfs4_file_rele(fp);
6864 6864                          /* Not a fully formed open; "close" it */
6865 6865                          if (screate == TRUE)
6866 6866                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6867 6867                          rfs4_state_rele(sp);
6868 6868                          resp->status = NFS4ERR_OLD_STATEID;
6869 6869                          return;
6870 6870                  }
6871 6871  
6872 6872                  rfs4_dbe_lock(fp->rf_dbe);
6873 6873                  /* Let's see if the delegation was returned */
6874 6874                  if (rfs4_check_recall(sp, access)) {
6875 6875                          rfs4_dbe_unlock(fp->rf_dbe);
6876 6876                          if (share_a || share_d)
6877 6877                                  (void) rfs4_unshare(sp);
6878 6878                          rfs4_dbe_unlock(sp->rs_dbe);
6879 6879                          rfs4_file_rele(fp);
6880 6880                          rfs4_update_lease(sp->rs_owner->ro_client);
6881 6881  
6882 6882                          /* Not a fully formed open; "close" it */
6883 6883                          if (screate == TRUE)
6884 6884                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6885 6885                          rfs4_state_rele(sp);
6886 6886                          resp->status = NFS4ERR_DELAY;
6887 6887                          return;
6888 6888                  }
6889 6889          }
6890 6890          /*
6891 6891           * the share check passed and any delegation conflict has been
6892 6892           * taken care of, now call vop_open.
6893 6893           * if this is the first open then call vop_open with fflags.
6894 6894           * if not, call vn_open_upgrade with just the upgrade flags.
6895 6895           *
6896 6896           * if the file has been opened already, it will have the current
6897 6897           * access mode in the state struct.  if it has no share access, then
6898 6898           * this is a new open.
6899 6899           *
6900 6900           * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6901 6901           * call VOP_OPEN(), just do the open upgrade.
6902 6902           */
6903 6903          if (first_open && !deleg_cur) {
6904 6904                  ct.cc_sysid = sysid;
6905 6905                  ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6906 6906                  ct.cc_caller_id = nfs4_srv_caller_id;
6907 6907                  ct.cc_flags = CC_DONTBLOCK;
6908 6908                  err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6909 6909                  if (err) {
6910 6910                          rfs4_dbe_unlock(fp->rf_dbe);
6911 6911                          if (share_a || share_d)
6912 6912                                  (void) rfs4_unshare(sp);
6913 6913                          rfs4_dbe_unlock(sp->rs_dbe);
6914 6914                          rfs4_file_rele(fp);
6915 6915  
6916 6916                          /* Not a fully formed open; "close" it */
6917 6917                          if (screate == TRUE)
6918 6918                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6919 6919                          rfs4_state_rele(sp);
6920 6920                          /* check if a monitor detected a delegation conflict */
6921 6921                          if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6922 6922                                  resp->status = NFS4ERR_DELAY;
6923 6923                          else
6924 6924                                  resp->status = NFS4ERR_SERVERFAULT;
6925 6925                          return;
6926 6926                  }
6927 6927          } else { /* open upgrade */
6928 6928                  /*
6929 6929                   * calculate the fflags for the new mode that is being added
6930 6930                   * by this upgrade.
6931 6931                   */
6932 6932                  fflags = 0;
6933 6933                  if (open_a & OPEN4_SHARE_ACCESS_READ)
6934 6934                          fflags |= FREAD;
6935 6935                  if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6936 6936                          fflags |= FWRITE;
6937 6937                  vn_open_upgrade(cs->vp, fflags);
6938 6938          }
6939 6939          sp->rs_open_access |= access;
6940 6940          sp->rs_open_deny |= deny;
6941 6941  
6942 6942          if (open_d & OPEN4_SHARE_DENY_READ)
6943 6943                  fp->rf_deny_read++;
6944 6944          if (open_d & OPEN4_SHARE_DENY_WRITE)
6945 6945                  fp->rf_deny_write++;
6946 6946          fp->rf_share_deny |= deny;
6947 6947  
6948 6948          if (open_a & OPEN4_SHARE_ACCESS_READ)
6949 6949                  fp->rf_access_read++;
6950 6950          if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6951 6951                  fp->rf_access_write++;
6952 6952          fp->rf_share_access |= access;
6953 6953  
6954 6954          /*
6955 6955           * Check for delegation here. if the deleg argument is not
6956 6956           * DELEG_ANY, then this is a reclaim from a client and
6957 6957           * we must honor the delegation requested. If necessary we can
6958 6958           * set the recall flag.
6959 6959           */
6960 6960  
6961 6961          dsp = rfs4_grant_delegation(deleg, sp, &recall);
6962 6962  
6963 6963          cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6964 6964  
6965 6965          next_stateid(&sp->rs_stateid);
6966 6966  
6967 6967          resp->stateid = sp->rs_stateid.stateid;
6968 6968  
6969 6969          rfs4_dbe_unlock(fp->rf_dbe);
6970 6970          rfs4_dbe_unlock(sp->rs_dbe);
6971 6971  
6972 6972          if (dsp) {
6973 6973                  rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6974 6974                  rfs4_deleg_state_rele(dsp);
6975 6975          }
6976 6976  
6977 6977          rfs4_file_rele(fp);
6978 6978          rfs4_state_rele(sp);
6979 6979  
6980 6980          resp->status = NFS4_OK;
6981 6981  }
6982 6982  
6983 6983  /*ARGSUSED*/
6984 6984  static void
6985 6985  rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6986 6986      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6987 6987  {
6988 6988          change_info4 *cinfo = &resp->cinfo;
6989 6989          bitmap4 *attrset = &resp->attrset;
6990 6990  
6991 6991          if (args->opentype == OPEN4_NOCREATE)
6992 6992                  resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6993 6993                      req, cs, args->share_access, cinfo);
6994 6994          else {
6995 6995                  /* inhibit delegation grants during exclusive create */
6996 6996  
6997 6997                  if (args->mode == EXCLUSIVE4)
6998 6998                          rfs4_disable_delegation();
6999 6999  
7000 7000                  resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7001 7001                      oo->ro_client->rc_clientid);
7002 7002          }
7003 7003  
7004 7004          if (resp->status == NFS4_OK) {
7005 7005  
7006 7006                  /* cs->vp cs->fh now reference the desired file */
7007 7007  
7008 7008                  rfs4_do_open(cs, req, oo,
7009 7009                      oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7010 7010                      args->share_access, args->share_deny, resp, 0);
7011 7011  
7012 7012                  /*
7013 7013                   * If rfs4_createfile set attrset, we must
7014 7014                   * clear this attrset before the response is copied.
7015 7015                   */
7016 7016                  if (resp->status != NFS4_OK && resp->attrset) {
7017 7017                          resp->attrset = 0;
7018 7018                  }
7019 7019          }
7020 7020          else
7021 7021                  *cs->statusp = resp->status;
7022 7022  
7023 7023          if (args->mode == EXCLUSIVE4)
7024 7024                  rfs4_enable_delegation();
7025 7025  }
7026 7026  
7027 7027  /*ARGSUSED*/
7028 7028  static void
7029 7029  rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7030 7030      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7031 7031  {
7032 7032          change_info4 *cinfo = &resp->cinfo;
7033 7033          vattr_t va;
7034 7034          vtype_t v_type = cs->vp->v_type;
7035 7035          int error = 0;
7036 7036  
7037 7037          /* Verify that we have a regular file */
7038 7038          if (v_type != VREG) {
7039 7039                  if (v_type == VDIR)
7040 7040                          resp->status = NFS4ERR_ISDIR;
7041 7041                  else if (v_type == VLNK)
7042 7042                          resp->status = NFS4ERR_SYMLINK;
7043 7043                  else
7044 7044                          resp->status = NFS4ERR_INVAL;
7045 7045                  return;
7046 7046          }
7047 7047  
7048 7048          va.va_mask = AT_MODE|AT_UID;
7049 7049          error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7050 7050          if (error) {
7051 7051                  resp->status = puterrno4(error);
7052 7052                  return;
7053 7053          }
7054 7054  
7055 7055          cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7056 7056  
7057 7057          /*
7058 7058           * Check if we have access to the file, Note the the file
7059 7059           * could have originally been open UNCHECKED or GUARDED
7060 7060           * with mode bits that will now fail, but there is nothing
7061 7061           * we can really do about that except in the case that the
7062 7062           * owner of the file is the one requesting the open.
7063 7063           */
7064 7064          if (crgetuid(cs->cr) != va.va_uid) {
7065 7065                  resp->status = check_open_access(args->share_access, cs, req);
7066 7066                  if (resp->status != NFS4_OK) {
7067 7067                          return;
7068 7068                  }
7069 7069          }
7070 7070  
7071 7071          /*
7072 7072           * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7073 7073           */
7074 7074          cinfo->before = 0;
7075 7075          cinfo->after = 0;
7076 7076          cinfo->atomic = FALSE;
7077 7077  
7078 7078          rfs4_do_open(cs, req, oo,
7079 7079              NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7080 7080              args->share_access, args->share_deny, resp, 0);
7081 7081  }
7082 7082  
7083 7083  static void
7084 7084  rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7085 7085      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7086 7086  {
7087 7087          int error;
7088 7088          nfsstat4 status;
7089 7089          stateid4 stateid =
7090 7090              args->open_claim4_u.delegate_cur_info.delegate_stateid;
7091 7091          rfs4_deleg_state_t *dsp;
7092 7092  
7093 7093          /*
7094 7094           * Find the state info from the stateid and confirm that the
7095 7095           * file is delegated.  If the state openowner is the same as
7096 7096           * the supplied openowner we're done. If not, get the file
7097 7097           * info from the found state info. Use that file info to
7098 7098           * create the state for this lock owner. Note solaris doen't
7099 7099           * really need the pathname to find the file. We may want to
7100 7100           * lookup the pathname and make sure that the vp exist and
7101 7101           * matches the vp in the file structure. However it is
7102 7102           * possible that the pathname nolonger exists (local process
7103 7103           * unlinks the file), so this may not be that useful.
7104 7104           */
7105 7105  
7106 7106          status = rfs4_get_deleg_state(&stateid, &dsp);
7107 7107          if (status != NFS4_OK) {
7108 7108                  resp->status = status;
7109 7109                  return;
7110 7110          }
7111 7111  
7112 7112          ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7113 7113  
7114 7114          /*
7115 7115           * New lock owner, create state. Since this was probably called
7116 7116           * in response to a CB_RECALL we set deleg to DELEG_NONE
7117 7117           */
7118 7118  
7119 7119          ASSERT(cs->vp != NULL);
7120 7120          VN_RELE(cs->vp);
7121 7121          VN_HOLD(dsp->rds_finfo->rf_vp);
7122 7122          cs->vp = dsp->rds_finfo->rf_vp;
7123 7123  
7124 7124          if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7125 7125                  rfs4_deleg_state_rele(dsp);
7126 7126                  *cs->statusp = resp->status = puterrno4(error);
7127 7127                  return;
7128 7128          }
7129 7129  
7130 7130          /* Mark progress for delegation returns */
7131 7131          dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7132 7132          rfs4_deleg_state_rele(dsp);
7133 7133          rfs4_do_open(cs, req, oo, DELEG_NONE,
7134 7134              args->share_access, args->share_deny, resp, 1);
7135 7135  }
7136 7136  
7137 7137  /*ARGSUSED*/
7138 7138  static void
7139 7139  rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7140 7140      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7141 7141  {
7142 7142          /*
7143 7143           * Lookup the pathname, it must already exist since this file
7144 7144           * was delegated.
7145 7145           *
7146 7146           * Find the file and state info for this vp and open owner pair.
7147 7147           *      check that they are in fact delegated.
7148 7148           *      check that the state access and deny modes are the same.
7149 7149           *
7150 7150           * Return the delgation possibly seting the recall flag.
7151 7151           */
7152 7152          rfs4_file_t *fp;
7153 7153          rfs4_state_t *sp;
7154 7154          bool_t create = FALSE;
7155 7155          bool_t dcreate = FALSE;
7156 7156          rfs4_deleg_state_t *dsp;
7157 7157          nfsace4 *ace;
7158 7158  
7159 7159          /* Note we ignore oflags */
7160 7160          resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7161 7161              req, cs, args->share_access, &resp->cinfo);
7162 7162  
7163 7163          if (resp->status != NFS4_OK) {
7164 7164                  return;
7165 7165          }
7166 7166  
7167 7167          /* get the file struct and hold a lock on it during initial open */
7168 7168          fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7169 7169          if (fp == NULL) {
7170 7170                  resp->status = NFS4ERR_RESOURCE;
7171 7171                  DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7172 7172                  return;
7173 7173          }
7174 7174  
7175 7175          sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7176 7176          if (sp == NULL) {
7177 7177                  resp->status = NFS4ERR_SERVERFAULT;
7178 7178                  DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7179 7179                  rw_exit(&fp->rf_file_rwlock);
7180 7180                  rfs4_file_rele(fp);
7181 7181                  return;
7182 7182          }
7183 7183  
7184 7184          rfs4_dbe_lock(sp->rs_dbe);
7185 7185          rfs4_dbe_lock(fp->rf_dbe);
7186 7186          if (args->share_access != sp->rs_share_access ||
7187 7187              args->share_deny != sp->rs_share_deny ||
7188 7188              sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7189 7189                  NFS4_DEBUG(rfs4_debug,
7190 7190                      (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7191 7191                  rfs4_dbe_unlock(fp->rf_dbe);
7192 7192                  rfs4_dbe_unlock(sp->rs_dbe);
7193 7193                  rfs4_file_rele(fp);
7194 7194                  rfs4_state_rele(sp);
7195 7195                  resp->status = NFS4ERR_SERVERFAULT;
7196 7196                  return;
7197 7197          }
7198 7198          rfs4_dbe_unlock(fp->rf_dbe);
7199 7199          rfs4_dbe_unlock(sp->rs_dbe);
7200 7200  
7201 7201          dsp = rfs4_finddeleg(sp, &dcreate);
7202 7202          if (dsp == NULL) {
7203 7203                  rfs4_state_rele(sp);
7204 7204                  rfs4_file_rele(fp);
7205 7205                  resp->status = NFS4ERR_SERVERFAULT;
7206 7206                  return;
7207 7207          }
7208 7208  
7209 7209          next_stateid(&sp->rs_stateid);
7210 7210  
7211 7211          resp->stateid = sp->rs_stateid.stateid;
7212 7212  
7213 7213          resp->delegation.delegation_type = dsp->rds_dtype;
7214 7214  
7215 7215          if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7216 7216                  open_read_delegation4 *rv =
7217 7217                      &resp->delegation.open_delegation4_u.read;
7218 7218  
7219 7219                  rv->stateid = dsp->rds_delegid.stateid;
7220 7220                  rv->recall = FALSE; /* no policy in place to set to TRUE */
7221 7221                  ace = &rv->permissions;
7222 7222          } else {
7223 7223                  open_write_delegation4 *rv =
7224 7224                      &resp->delegation.open_delegation4_u.write;
7225 7225  
7226 7226                  rv->stateid = dsp->rds_delegid.stateid;
7227 7227                  rv->recall = FALSE;  /* no policy in place to set to TRUE */
7228 7228                  ace = &rv->permissions;
7229 7229                  rv->space_limit.limitby = NFS_LIMIT_SIZE;
7230 7230                  rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7231 7231          }
7232 7232  
7233 7233          /* XXX For now */
7234 7234          ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7235 7235          ace->flag = 0;
7236 7236          ace->access_mask = 0;
7237 7237          ace->who.utf8string_len = 0;
7238 7238          ace->who.utf8string_val = 0;
7239 7239  
7240 7240          rfs4_deleg_state_rele(dsp);
7241 7241          rfs4_state_rele(sp);
7242 7242          rfs4_file_rele(fp);
7243 7243  }
7244 7244  
7245 7245  typedef enum {
7246 7246          NFS4_CHKSEQ_OKAY = 0,
7247 7247          NFS4_CHKSEQ_REPLAY = 1,
7248 7248          NFS4_CHKSEQ_BAD = 2
7249 7249  } rfs4_chkseq_t;
7250 7250  
7251 7251  /*
7252 7252   * Generic function for sequence number checks.
7253 7253   */
7254 7254  static rfs4_chkseq_t
7255 7255  rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7256 7256      seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7257 7257  {
7258 7258          /* Same sequence ids and matching operations? */
7259 7259          if (seqid == rqst_seq && resop->resop == lastop->resop) {
7260 7260                  if (copyres == TRUE) {
7261 7261                          rfs4_free_reply(resop);
7262 7262                          rfs4_copy_reply(resop, lastop);
7263 7263                  }
7264 7264                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7265 7265                      "Replayed SEQID %d\n", seqid));
7266 7266                  return (NFS4_CHKSEQ_REPLAY);
7267 7267          }
7268 7268  
7269 7269          /* If the incoming sequence is not the next expected then it is bad */
7270 7270          if (rqst_seq != seqid + 1) {
7271 7271                  if (rqst_seq == seqid) {
7272 7272                          NFS4_DEBUG(rfs4_debug,
7273 7273                              (CE_NOTE, "BAD SEQID: Replayed sequence id "
7274 7274                              "but last op was %d current op is %d\n",
7275 7275                              lastop->resop, resop->resop));
7276 7276                          return (NFS4_CHKSEQ_BAD);
7277 7277                  }
7278 7278                  NFS4_DEBUG(rfs4_debug,
7279 7279                      (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7280 7280                      rqst_seq, seqid));
7281 7281                  return (NFS4_CHKSEQ_BAD);
7282 7282          }
7283 7283  
7284 7284          /* Everything okay -- next expected */
7285 7285          return (NFS4_CHKSEQ_OKAY);
7286 7286  }
7287 7287  
7288 7288  
7289 7289  static rfs4_chkseq_t
7290 7290  rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7291 7291  {
7292 7292          rfs4_chkseq_t rc;
7293 7293  
7294 7294          rfs4_dbe_lock(op->ro_dbe);
7295 7295          rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7296 7296              TRUE);
7297 7297          rfs4_dbe_unlock(op->ro_dbe);
7298 7298  
7299 7299          if (rc == NFS4_CHKSEQ_OKAY)
7300 7300                  rfs4_update_lease(op->ro_client);
7301 7301  
7302 7302          return (rc);
7303 7303  }
7304 7304  
7305 7305  static rfs4_chkseq_t
7306 7306  rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7307 7307  {
7308 7308          rfs4_chkseq_t rc;
7309 7309  
7310 7310          rfs4_dbe_lock(op->ro_dbe);
7311 7311          rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7312 7312              olo_seqid, resop, FALSE);
7313 7313          rfs4_dbe_unlock(op->ro_dbe);
7314 7314  
7315 7315          return (rc);
7316 7316  }
7317 7317  
7318 7318  static rfs4_chkseq_t
7319 7319  rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7320 7320  {
7321 7321          rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7322 7322  
7323 7323          rfs4_dbe_lock(lsp->rls_dbe);
7324 7324          if (!lsp->rls_skip_seqid_check)
7325 7325                  rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7326 7326                      resop, TRUE);
7327 7327          rfs4_dbe_unlock(lsp->rls_dbe);
7328 7328  
7329 7329          return (rc);
7330 7330  }
7331 7331  
7332 7332  static void
7333 7333  rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7334 7334      struct svc_req *req, struct compound_state *cs)
7335 7335  {
7336 7336          OPEN4args *args = &argop->nfs_argop4_u.opopen;
7337 7337          OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7338 7338          open_owner4 *owner = &args->owner;
7339 7339          open_claim_type4 claim = args->claim;
7340 7340          rfs4_client_t *cp;
7341 7341          rfs4_openowner_t *oo;
7342 7342          bool_t create;
7343 7343          bool_t replay = FALSE;
7344 7344          int can_reclaim;
7345 7345  
7346 7346          DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7347 7347              OPEN4args *, args);
7348 7348  
7349 7349          if (cs->vp == NULL) {
7350 7350                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7351 7351                  goto end;
7352 7352          }
7353 7353  
7354 7354          /*
7355 7355           * Need to check clientid and lease expiration first based on
7356 7356           * error ordering and incrementing sequence id.
7357 7357           */
7358 7358          cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7359 7359          if (cp == NULL) {
7360 7360                  *cs->statusp = resp->status =
7361 7361                      rfs4_check_clientid(&owner->clientid, 0);
7362 7362                  goto end;
7363 7363          }
7364 7364  
7365 7365          if (rfs4_lease_expired(cp)) {
7366 7366                  rfs4_client_close(cp);
7367 7367                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7368 7368                  goto end;
7369 7369          }
7370 7370          can_reclaim = cp->rc_can_reclaim;
7371 7371  
7372 7372          /*
7373 7373           * Find the open_owner for use from this point forward.  Take
7374 7374           * care in updating the sequence id based on the type of error
7375 7375           * being returned.
7376 7376           */
7377 7377  retry:
7378 7378          create = TRUE;
7379 7379          oo = rfs4_findopenowner(owner, &create, args->seqid);
7380 7380          if (oo == NULL) {
7381 7381                  *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7382 7382                  rfs4_client_rele(cp);
7383 7383                  goto end;
7384 7384          }
7385 7385  
7386 7386          /* Hold off access to the sequence space while the open is done */
7387 7387          rfs4_sw_enter(&oo->ro_sw);
7388 7388  
7389 7389          /*
7390 7390           * If the open_owner existed before at the server, then check
7391 7391           * the sequence id.
7392 7392           */
7393 7393          if (!create && !oo->ro_postpone_confirm) {
7394 7394                  switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7395 7395                  case NFS4_CHKSEQ_BAD:
7396 7396                          if ((args->seqid > oo->ro_open_seqid) &&
7397 7397                              oo->ro_need_confirm) {
7398 7398                                  rfs4_free_opens(oo, TRUE, FALSE);
7399 7399                                  rfs4_sw_exit(&oo->ro_sw);
7400 7400                                  rfs4_openowner_rele(oo);
7401 7401                                  goto retry;
7402 7402                          }
7403 7403                          resp->status = NFS4ERR_BAD_SEQID;
7404 7404                          goto out;
7405 7405                  case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7406 7406                          replay = TRUE;
7407 7407                          goto out;
7408 7408                  default:
7409 7409                          break;
7410 7410                  }
7411 7411  
7412 7412                  /*
7413 7413                   * Sequence was ok and open owner exists
7414 7414                   * check to see if we have yet to see an
7415 7415                   * open_confirm.
7416 7416                   */
7417 7417                  if (oo->ro_need_confirm) {
7418 7418                          rfs4_free_opens(oo, TRUE, FALSE);
7419 7419                          rfs4_sw_exit(&oo->ro_sw);
7420 7420                          rfs4_openowner_rele(oo);
7421 7421                          goto retry;
7422 7422                  }
7423 7423          }
7424 7424          /* Grace only applies to regular-type OPENs */
7425 7425          if (rfs4_clnt_in_grace(cp) &&
7426 7426              (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7427 7427                  *cs->statusp = resp->status = NFS4ERR_GRACE;
7428 7428                  goto out;
7429 7429          }
7430 7430  
7431 7431          /*
7432 7432           * If previous state at the server existed then can_reclaim
7433 7433           * will be set. If not reply NFS4ERR_NO_GRACE to the
7434 7434           * client.
7435 7435           */
7436 7436          if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7437 7437                  *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7438 7438                  goto out;
7439 7439          }
7440 7440  
7441 7441  
7442 7442          /*
7443 7443           * Reject the open if the client has missed the grace period
7444 7444           */
7445 7445          if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7446 7446                  *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7447 7447                  goto out;
7448 7448          }
7449 7449  
7450 7450          /* Couple of up-front bookkeeping items */
7451 7451          if (oo->ro_need_confirm) {
7452 7452                  /*
7453 7453                   * If this is a reclaim OPEN then we should not ask
7454 7454                   * for a confirmation of the open_owner per the
7455 7455                   * protocol specification.
7456 7456                   */
7457 7457                  if (claim == CLAIM_PREVIOUS)
7458 7458                          oo->ro_need_confirm = FALSE;
7459 7459                  else
7460 7460                          resp->rflags |= OPEN4_RESULT_CONFIRM;
7461 7461          }
7462 7462          resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7463 7463  
7464 7464          /*
7465 7465           * If there is an unshared filesystem mounted on this vnode,
7466 7466           * do not allow to open/create in this directory.
7467 7467           */
7468 7468          if (vn_ismntpt(cs->vp)) {
7469 7469                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
7470 7470                  goto out;
7471 7471          }
7472 7472  
7473 7473          /*
7474 7474           * access must READ, WRITE, or BOTH.  No access is invalid.
7475 7475           * deny can be READ, WRITE, BOTH, or NONE.
7476 7476           * bits not defined for access/deny are invalid.
7477 7477           */
7478 7478          if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7479 7479              (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7480 7480              (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7481 7481                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7482 7482                  goto out;
7483 7483          }
7484 7484  
7485 7485  
7486 7486          /*
7487 7487           * make sure attrset is zero before response is built.
7488 7488           */
7489 7489          resp->attrset = 0;
7490 7490  
7491 7491          switch (claim) {
7492 7492          case CLAIM_NULL:
7493 7493                  rfs4_do_opennull(cs, req, args, oo, resp);
7494 7494                  break;
7495 7495          case CLAIM_PREVIOUS:
7496 7496                  rfs4_do_openprev(cs, req, args, oo, resp);
7497 7497                  break;
7498 7498          case CLAIM_DELEGATE_CUR:
7499 7499                  rfs4_do_opendelcur(cs, req, args, oo, resp);
7500 7500                  break;
7501 7501          case CLAIM_DELEGATE_PREV:
7502 7502                  rfs4_do_opendelprev(cs, req, args, oo, resp);
7503 7503                  break;
7504 7504          default:
7505 7505                  resp->status = NFS4ERR_INVAL;
7506 7506                  break;
7507 7507          }
7508 7508  
7509 7509  out:
7510 7510          rfs4_client_rele(cp);
7511 7511  
7512 7512          /* Catch sequence id handling here to make it a little easier */
7513 7513          switch (resp->status) {
7514 7514          case NFS4ERR_BADXDR:
7515 7515          case NFS4ERR_BAD_SEQID:
7516 7516          case NFS4ERR_BAD_STATEID:
7517 7517          case NFS4ERR_NOFILEHANDLE:
7518 7518          case NFS4ERR_RESOURCE:
7519 7519          case NFS4ERR_STALE_CLIENTID:
7520 7520          case NFS4ERR_STALE_STATEID:
7521 7521                  /*
7522 7522                   * The protocol states that if any of these errors are
7523 7523                   * being returned, the sequence id should not be
7524 7524                   * incremented.  Any other return requires an
7525 7525                   * increment.
7526 7526                   */
7527 7527                  break;
7528 7528          default:
7529 7529                  /* Always update the lease in this case */
7530 7530                  rfs4_update_lease(oo->ro_client);
7531 7531  
7532 7532                  /* Regular response - copy the result */
7533 7533                  if (!replay)
7534 7534                          rfs4_update_open_resp(oo, resop, &cs->fh);
7535 7535  
7536 7536                  /*
7537 7537                   * REPLAY case: Only if the previous response was OK
7538 7538                   * do we copy the filehandle.  If not OK, no
7539 7539                   * filehandle to copy.
7540 7540                   */
7541 7541                  if (replay == TRUE &&
7542 7542                      resp->status == NFS4_OK &&
7543 7543                      oo->ro_reply_fh.nfs_fh4_val) {
7544 7544                          /*
7545 7545                           * If this is a replay, we must restore the
7546 7546                           * current filehandle/vp to that of what was
7547 7547                           * returned originally.  Try our best to do
7548 7548                           * it.
7549 7549                           */
7550 7550                          nfs_fh4_fmt_t *fh_fmtp =
7551 7551                              (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7552 7552  
7553 7553                          cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7554 7554                              (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7555 7555  
7556 7556                          if (cs->exi == NULL) {
7557 7557                                  resp->status = NFS4ERR_STALE;
7558 7558                                  goto finish;
7559 7559                          }
7560 7560  
7561 7561                          VN_RELE(cs->vp);
7562 7562  
7563 7563                          cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7564 7564                              &resp->status);
7565 7565  
7566 7566                          if (cs->vp == NULL)
7567 7567                                  goto finish;
7568 7568  
7569 7569                          nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7570 7570                  }
7571 7571  
7572 7572                  /*
7573 7573                   * If this was a replay, no need to update the
7574 7574                   * sequence id. If the open_owner was not created on
7575 7575                   * this pass, then update.  The first use of an
7576 7576                   * open_owner will not bump the sequence id.
7577 7577                   */
7578 7578                  if (replay == FALSE && !create)
7579 7579                          rfs4_update_open_sequence(oo);
7580 7580                  /*
7581 7581                   * If the client is receiving an error and the
7582 7582                   * open_owner needs to be confirmed, there is no way
7583 7583                   * to notify the client of this fact ignoring the fact
7584 7584                   * that the server has no method of returning a
7585 7585                   * stateid to confirm.  Therefore, the server needs to
7586 7586                   * mark this open_owner in a way as to avoid the
7587 7587                   * sequence id checking the next time the client uses
7588 7588                   * this open_owner.
7589 7589                   */
7590 7590                  if (resp->status != NFS4_OK && oo->ro_need_confirm)
7591 7591                          oo->ro_postpone_confirm = TRUE;
7592 7592                  /*
7593 7593                   * If OK response then clear the postpone flag and
7594 7594                   * reset the sequence id to keep in sync with the
7595 7595                   * client.
7596 7596                   */
7597 7597                  if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7598 7598                          oo->ro_postpone_confirm = FALSE;
7599 7599                          oo->ro_open_seqid = args->seqid;
7600 7600                  }
7601 7601                  break;
7602 7602          }
7603 7603  
7604 7604  finish:
7605 7605          *cs->statusp = resp->status;
7606 7606  
7607 7607          rfs4_sw_exit(&oo->ro_sw);
7608 7608          rfs4_openowner_rele(oo);
7609 7609  
7610 7610  end:
7611 7611          DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7612 7612              OPEN4res *, resp);
7613 7613  }
7614 7614  
7615 7615  /*ARGSUSED*/
7616 7616  void
7617 7617  rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7618 7618      struct svc_req *req, struct compound_state *cs)
7619 7619  {
7620 7620          OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7621 7621          OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7622 7622          rfs4_state_t *sp;
7623 7623          nfsstat4 status;
7624 7624  
7625 7625          DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7626 7626              OPEN_CONFIRM4args *, args);
7627 7627  
7628 7628          if (cs->vp == NULL) {
7629 7629                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7630 7630                  goto out;
7631 7631          }
7632 7632  
7633 7633          if (cs->vp->v_type != VREG) {
7634 7634                  *cs->statusp = resp->status =
7635 7635                      cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7636 7636                  return;
7637 7637          }
7638 7638  
7639 7639          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7640 7640          if (status != NFS4_OK) {
7641 7641                  *cs->statusp = resp->status = status;
7642 7642                  goto out;
7643 7643          }
7644 7644  
7645 7645          /* Ensure specified filehandle matches */
7646 7646          if (cs->vp != sp->rs_finfo->rf_vp) {
7647 7647                  rfs4_state_rele(sp);
7648 7648                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7649 7649                  goto out;
7650 7650          }
7651 7651  
7652 7652          /* hold off other access to open_owner while we tinker */
7653 7653          rfs4_sw_enter(&sp->rs_owner->ro_sw);
7654 7654  
7655 7655          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7656 7656          case NFS4_CHECK_STATEID_OKAY:
7657 7657                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7658 7658                      resop) != 0) {
7659 7659                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7660 7660                          break;
7661 7661                  }
7662 7662                  /*
7663 7663                   * If it is the appropriate stateid and determined to
7664 7664                   * be "OKAY" then this means that the stateid does not
7665 7665                   * need to be confirmed and the client is in error for
7666 7666                   * sending an OPEN_CONFIRM.
7667 7667                   */
7668 7668                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7669 7669                  break;
7670 7670          case NFS4_CHECK_STATEID_OLD:
7671 7671                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7672 7672                  break;
7673 7673          case NFS4_CHECK_STATEID_BAD:
7674 7674                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7675 7675                  break;
7676 7676          case NFS4_CHECK_STATEID_EXPIRED:
7677 7677                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7678 7678                  break;
7679 7679          case NFS4_CHECK_STATEID_CLOSED:
7680 7680                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7681 7681                  break;
7682 7682          case NFS4_CHECK_STATEID_REPLAY:
7683 7683                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7684 7684                      resop)) {
7685 7685                  case NFS4_CHKSEQ_OKAY:
7686 7686                          /*
7687 7687                           * This is replayed stateid; if seqid matches
7688 7688                           * next expected, then client is using wrong seqid.
7689 7689                           */
7690 7690                          /* fall through */
7691 7691                  case NFS4_CHKSEQ_BAD:
7692 7692                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7693 7693                          break;
7694 7694                  case NFS4_CHKSEQ_REPLAY:
7695 7695                          /*
7696 7696                           * Note this case is the duplicate case so
7697 7697                           * resp->status is already set.
7698 7698                           */
7699 7699                          *cs->statusp = resp->status;
7700 7700                          rfs4_update_lease(sp->rs_owner->ro_client);
7701 7701                          break;
7702 7702                  }
7703 7703                  break;
7704 7704          case NFS4_CHECK_STATEID_UNCONFIRMED:
7705 7705                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7706 7706                      resop) != NFS4_CHKSEQ_OKAY) {
7707 7707                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7708 7708                          break;
7709 7709                  }
7710 7710                  *cs->statusp = resp->status = NFS4_OK;
7711 7711  
7712 7712                  next_stateid(&sp->rs_stateid);
7713 7713                  resp->open_stateid = sp->rs_stateid.stateid;
7714 7714                  sp->rs_owner->ro_need_confirm = FALSE;
7715 7715                  rfs4_update_lease(sp->rs_owner->ro_client);
7716 7716                  rfs4_update_open_sequence(sp->rs_owner);
7717 7717                  rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7718 7718                  break;
7719 7719          default:
7720 7720                  ASSERT(FALSE);
7721 7721                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7722 7722                  break;
7723 7723          }
7724 7724          rfs4_sw_exit(&sp->rs_owner->ro_sw);
7725 7725          rfs4_state_rele(sp);
7726 7726  
7727 7727  out:
7728 7728          DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7729 7729              OPEN_CONFIRM4res *, resp);
7730 7730  }
7731 7731  
7732 7732  /*ARGSUSED*/
7733 7733  void
7734 7734  rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7735 7735      struct svc_req *req, struct compound_state *cs)
7736 7736  {
7737 7737          OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7738 7738          OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7739 7739          uint32_t access = args->share_access;
7740 7740          uint32_t deny = args->share_deny;
7741 7741          nfsstat4 status;
7742 7742          rfs4_state_t *sp;
7743 7743          rfs4_file_t *fp;
7744 7744          int fflags = 0;
7745 7745  
7746 7746          DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7747 7747              OPEN_DOWNGRADE4args *, args);
7748 7748  
7749 7749          if (cs->vp == NULL) {
7750 7750                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7751 7751                  goto out;
7752 7752          }
7753 7753  
7754 7754          if (cs->vp->v_type != VREG) {
7755 7755                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7756 7756                  return;
7757 7757          }
7758 7758  
7759 7759          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7760 7760          if (status != NFS4_OK) {
7761 7761                  *cs->statusp = resp->status = status;
7762 7762                  goto out;
7763 7763          }
7764 7764  
7765 7765          /* Ensure specified filehandle matches */
7766 7766          if (cs->vp != sp->rs_finfo->rf_vp) {
7767 7767                  rfs4_state_rele(sp);
7768 7768                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7769 7769                  goto out;
7770 7770          }
7771 7771  
7772 7772          /* hold off other access to open_owner while we tinker */
7773 7773          rfs4_sw_enter(&sp->rs_owner->ro_sw);
7774 7774  
7775 7775          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7776 7776          case NFS4_CHECK_STATEID_OKAY:
7777 7777                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7778 7778                      resop) != NFS4_CHKSEQ_OKAY) {
7779 7779                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7780 7780                          goto end;
7781 7781                  }
7782 7782                  break;
7783 7783          case NFS4_CHECK_STATEID_OLD:
7784 7784                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7785 7785                  goto end;
7786 7786          case NFS4_CHECK_STATEID_BAD:
7787 7787                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7788 7788                  goto end;
7789 7789          case NFS4_CHECK_STATEID_EXPIRED:
7790 7790                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7791 7791                  goto end;
7792 7792          case NFS4_CHECK_STATEID_CLOSED:
7793 7793                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7794 7794                  goto end;
7795 7795          case NFS4_CHECK_STATEID_UNCONFIRMED:
7796 7796                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7797 7797                  goto end;
7798 7798          case NFS4_CHECK_STATEID_REPLAY:
7799 7799                  /* Check the sequence id for the open owner */
7800 7800                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7801 7801                      resop)) {
7802 7802                  case NFS4_CHKSEQ_OKAY:
7803 7803                          /*
7804 7804                           * This is replayed stateid; if seqid matches
7805 7805                           * next expected, then client is using wrong seqid.
7806 7806                           */
7807 7807                          /* fall through */
7808 7808                  case NFS4_CHKSEQ_BAD:
7809 7809                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7810 7810                          goto end;
7811 7811                  case NFS4_CHKSEQ_REPLAY:
7812 7812                          /*
7813 7813                           * Note this case is the duplicate case so
7814 7814                           * resp->status is already set.
7815 7815                           */
7816 7816                          *cs->statusp = resp->status;
7817 7817                          rfs4_update_lease(sp->rs_owner->ro_client);
7818 7818                          goto end;
7819 7819                  }
7820 7820                  break;
7821 7821          default:
7822 7822                  ASSERT(FALSE);
7823 7823                  break;
7824 7824          }
7825 7825  
7826 7826          rfs4_dbe_lock(sp->rs_dbe);
7827 7827          /*
7828 7828           * Check that the new access modes and deny modes are valid.
7829 7829           * Check that no invalid bits are set.
7830 7830           */
7831 7831          if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7832 7832              (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7833 7833                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7834 7834                  rfs4_update_open_sequence(sp->rs_owner);
7835 7835                  rfs4_dbe_unlock(sp->rs_dbe);
7836 7836                  goto end;
7837 7837          }
7838 7838  
7839 7839          /*
7840 7840           * The new modes must be a subset of the current modes and
7841 7841           * the access must specify at least one mode. To test that
7842 7842           * the new mode is a subset of the current modes we bitwise
7843 7843           * AND them together and check that the result equals the new
7844 7844           * mode. For example:
7845 7845           * New mode, access == R and current mode, sp->rs_open_access  == RW
7846 7846           * access & sp->rs_open_access == R == access, so the new access mode
7847 7847           * is valid. Consider access == RW, sp->rs_open_access = R
7848 7848           * access & sp->rs_open_access == R != access, so the new access mode
7849 7849           * is invalid.
7850 7850           */
7851 7851          if ((access & sp->rs_open_access) != access ||
7852 7852              (deny & sp->rs_open_deny) != deny ||
7853 7853              (access &
7854 7854              (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7855 7855                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7856 7856                  rfs4_update_open_sequence(sp->rs_owner);
7857 7857                  rfs4_dbe_unlock(sp->rs_dbe);
7858 7858                  goto end;
7859 7859          }
7860 7860  
7861 7861          /*
7862 7862           * Release any share locks associated with this stateID.
7863 7863           * Strictly speaking, this violates the spec because the
7864 7864           * spec effectively requires that open downgrade be atomic.
7865 7865           * At present, fs_shrlock does not have this capability.
7866 7866           */
7867 7867          (void) rfs4_unshare(sp);
7868 7868  
7869 7869          status = rfs4_share(sp, access, deny);
7870 7870          if (status != NFS4_OK) {
7871 7871                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7872 7872                  rfs4_update_open_sequence(sp->rs_owner);
7873 7873                  rfs4_dbe_unlock(sp->rs_dbe);
7874 7874                  goto end;
7875 7875          }
7876 7876  
7877 7877          fp = sp->rs_finfo;
7878 7878          rfs4_dbe_lock(fp->rf_dbe);
7879 7879  
7880 7880          /*
7881 7881           * If the current mode has deny read and the new mode
7882 7882           * does not, decrement the number of deny read mode bits
7883 7883           * and if it goes to zero turn off the deny read bit
7884 7884           * on the file.
7885 7885           */
7886 7886          if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7887 7887              (deny & OPEN4_SHARE_DENY_READ) == 0) {
7888 7888                  fp->rf_deny_read--;
7889 7889                  if (fp->rf_deny_read == 0)
7890 7890                          fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7891 7891          }
7892 7892  
7893 7893          /*
7894 7894           * If the current mode has deny write and the new mode
7895 7895           * does not, decrement the number of deny write mode bits
7896 7896           * and if it goes to zero turn off the deny write bit
7897 7897           * on the file.
7898 7898           */
7899 7899          if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7900 7900              (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7901 7901                  fp->rf_deny_write--;
7902 7902                  if (fp->rf_deny_write == 0)
7903 7903                          fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7904 7904          }
7905 7905  
7906 7906          /*
7907 7907           * If the current mode has access read and the new mode
7908 7908           * does not, decrement the number of access read mode bits
7909 7909           * and if it goes to zero turn off the access read bit
7910 7910           * on the file.  set fflags to FREAD for the call to
7911 7911           * vn_open_downgrade().
7912 7912           */
7913 7913          if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7914 7914              (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7915 7915                  fp->rf_access_read--;
7916 7916                  if (fp->rf_access_read == 0)
7917 7917                          fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7918 7918                  fflags |= FREAD;
7919 7919          }
7920 7920  
7921 7921          /*
7922 7922           * If the current mode has access write and the new mode
7923 7923           * does not, decrement the number of access write mode bits
7924 7924           * and if it goes to zero turn off the access write bit
7925 7925           * on the file.  set fflags to FWRITE for the call to
7926 7926           * vn_open_downgrade().
7927 7927           */
7928 7928          if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7929 7929              (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7930 7930                  fp->rf_access_write--;
7931 7931                  if (fp->rf_access_write == 0)
7932 7932                          fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7933 7933                  fflags |= FWRITE;
7934 7934          }
7935 7935  
7936 7936          /* Check that the file is still accessible */
7937 7937          ASSERT(fp->rf_share_access);
7938 7938  
7939 7939          rfs4_dbe_unlock(fp->rf_dbe);
7940 7940  
7941 7941          /* now set the new open access and deny modes */
7942 7942          sp->rs_open_access = access;
7943 7943          sp->rs_open_deny = deny;
7944 7944  
7945 7945          /*
7946 7946           * we successfully downgraded the share lock, now we need to downgrade
7947 7947           * the open. it is possible that the downgrade was only for a deny
7948 7948           * mode and we have nothing else to do.
7949 7949           */
7950 7950          if ((fflags & (FREAD|FWRITE)) != 0)
7951 7951                  vn_open_downgrade(cs->vp, fflags);
7952 7952  
7953 7953          /* Update the stateid */
7954 7954          next_stateid(&sp->rs_stateid);
7955 7955          resp->open_stateid = sp->rs_stateid.stateid;
7956 7956  
7957 7957          rfs4_dbe_unlock(sp->rs_dbe);
7958 7958  
7959 7959          *cs->statusp = resp->status = NFS4_OK;
7960 7960          /* Update the lease */
7961 7961          rfs4_update_lease(sp->rs_owner->ro_client);
7962 7962          /* And the sequence */
7963 7963          rfs4_update_open_sequence(sp->rs_owner);
7964 7964          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7965 7965  
7966 7966  end:
7967 7967          rfs4_sw_exit(&sp->rs_owner->ro_sw);
7968 7968          rfs4_state_rele(sp);
7969 7969  out:
7970 7970          DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7971 7971              OPEN_DOWNGRADE4res *, resp);
7972 7972  }
7973 7973  
7974 7974  static void *
7975 7975  memstr(const void *s1, const char *s2, size_t n)
7976 7976  {
7977 7977          size_t l = strlen(s2);
7978 7978          char *p = (char *)s1;
7979 7979  
7980 7980          while (n >= l) {
7981 7981                  if (bcmp(p, s2, l) == 0)
7982 7982                          return (p);
7983 7983                  p++;
7984 7984                  n--;
7985 7985          }
7986 7986  
7987 7987          return (NULL);
7988 7988  }
7989 7989  
7990 7990  /*
7991 7991   * The logic behind this function is detailed in the NFSv4 RFC in the
7992 7992   * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7993 7993   * that section for explicit guidance to server behavior for
7994 7994   * SETCLIENTID.
7995 7995   */
7996 7996  void
7997 7997  rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7998 7998      struct svc_req *req, struct compound_state *cs)
7999 7999  {
8000 8000          SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8001 8001          SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8002 8002          rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8003 8003          rfs4_clntip_t *ci;
8004 8004          bool_t create;
8005 8005          char *addr, *netid;
8006 8006          int len;
8007 8007  
8008 8008          DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8009 8009              SETCLIENTID4args *, args);
8010 8010  retry:
8011 8011          newcp = cp_confirmed = cp_unconfirmed = NULL;
8012 8012  
8013 8013          /*
8014 8014           * Save the caller's IP address
8015 8015           */
8016 8016          args->client.cl_addr =
8017 8017              (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8018 8018  
8019 8019          /*
8020 8020           * Record if it is a Solaris client that cannot handle referrals.
8021 8021           */
8022 8022          if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8023 8023              !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8024 8024                  /* Add a "yes, it's downrev" record */
8025 8025                  create = TRUE;
8026 8026                  ci = rfs4_find_clntip(args->client.cl_addr, &create);
8027 8027                  ASSERT(ci != NULL);
8028 8028                  rfs4_dbe_rele(ci->ri_dbe);
8029 8029          } else {
8030 8030                  /* Remove any previous record */
8031 8031                  rfs4_invalidate_clntip(args->client.cl_addr);
8032 8032          }
8033 8033  
8034 8034          /*
8035 8035           * In search of an EXISTING client matching the incoming
8036 8036           * request to establish a new client identifier at the server
8037 8037           */
8038 8038          create = TRUE;
8039 8039          cp = rfs4_findclient(&args->client, &create, NULL);
8040 8040  
8041 8041          /* Should never happen */
8042 8042          ASSERT(cp != NULL);
8043 8043  
8044 8044          if (cp == NULL) {
8045 8045                  *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8046 8046                  goto out;
8047 8047          }
8048 8048  
8049 8049          /*
8050 8050           * Easiest case. Client identifier is newly created and is
8051 8051           * unconfirmed.  Also note that for this case, no other
8052 8052           * entries exist for the client identifier.  Nothing else to
8053 8053           * check.  Just setup the response and respond.
8054 8054           */
8055 8055          if (create) {
8056 8056                  *cs->statusp = res->status = NFS4_OK;
8057 8057                  res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8058 8058                  res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8059 8059                      cp->rc_confirm_verf;
8060 8060                  /* Setup callback information; CB_NULL confirmation later */
8061 8061                  rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8062 8062  
8063 8063                  rfs4_client_rele(cp);
8064 8064                  goto out;
8065 8065          }
8066 8066  
8067 8067          /*
8068 8068           * An existing, confirmed client may exist but it may not have
8069 8069           * been active for at least one lease period.  If so, then
8070 8070           * "close" the client and create a new client identifier
8071 8071           */
8072 8072          if (rfs4_lease_expired(cp)) {
8073 8073                  rfs4_client_close(cp);
8074 8074                  goto retry;
8075 8075          }
8076 8076  
8077 8077          if (cp->rc_need_confirm == TRUE)
8078 8078                  cp_unconfirmed = cp;
8079 8079          else
8080 8080                  cp_confirmed = cp;
8081 8081  
8082 8082          cp = NULL;
8083 8083  
8084 8084          /*
8085 8085           * We have a confirmed client, now check for an
8086 8086           * unconfimred entry
8087 8087           */
8088 8088          if (cp_confirmed) {
8089 8089                  /* If creds don't match then client identifier is inuse */
8090 8090                  if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8091 8091                          rfs4_cbinfo_t *cbp;
8092 8092                          /*
8093 8093                           * Some one else has established this client
8094 8094                           * id. Try and say * who they are. We will use
8095 8095                           * the call back address supplied by * the
8096 8096                           * first client.
8097 8097                           */
8098 8098                          *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8099 8099  
8100 8100                          addr = netid = NULL;
8101 8101  
8102 8102                          cbp = &cp_confirmed->rc_cbinfo;
8103 8103                          if (cbp->cb_callback.cb_location.r_addr &&
8104 8104                              cbp->cb_callback.cb_location.r_netid) {
8105 8105                                  cb_client4 *cbcp = &cbp->cb_callback;
8106 8106  
8107 8107                                  len = strlen(cbcp->cb_location.r_addr)+1;
8108 8108                                  addr = kmem_alloc(len, KM_SLEEP);
8109 8109                                  bcopy(cbcp->cb_location.r_addr, addr, len);
8110 8110                                  len = strlen(cbcp->cb_location.r_netid)+1;
8111 8111                                  netid = kmem_alloc(len, KM_SLEEP);
8112 8112                                  bcopy(cbcp->cb_location.r_netid, netid, len);
8113 8113                          }
8114 8114  
8115 8115                          res->SETCLIENTID4res_u.client_using.r_addr = addr;
8116 8116                          res->SETCLIENTID4res_u.client_using.r_netid = netid;
8117 8117  
8118 8118                          rfs4_client_rele(cp_confirmed);
8119 8119                  }
8120 8120  
8121 8121                  /*
8122 8122                   * Confirmed, creds match, and verifier matches; must
8123 8123                   * be an update of the callback info
8124 8124                   */
8125 8125                  if (cp_confirmed->rc_nfs_client.verifier ==
8126 8126                      args->client.verifier) {
8127 8127                          /* Setup callback information */
8128 8128                          rfs4_client_setcb(cp_confirmed, &args->callback,
8129 8129                              args->callback_ident);
8130 8130  
8131 8131                          /* everything okay -- move ahead */
8132 8132                          *cs->statusp = res->status = NFS4_OK;
8133 8133                          res->SETCLIENTID4res_u.resok4.clientid =
8134 8134                              cp_confirmed->rc_clientid;
8135 8135  
8136 8136                          /* update the confirm_verifier and return it */
8137 8137                          rfs4_client_scv_next(cp_confirmed);
8138 8138                          res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8139 8139                              cp_confirmed->rc_confirm_verf;
8140 8140  
8141 8141                          rfs4_client_rele(cp_confirmed);
8142 8142                          goto out;
8143 8143                  }
8144 8144  
8145 8145                  /*
8146 8146                   * Creds match but the verifier doesn't.  Must search
8147 8147                   * for an unconfirmed client that would be replaced by
8148 8148                   * this request.
8149 8149                   */
8150 8150                  create = FALSE;
8151 8151                  cp_unconfirmed = rfs4_findclient(&args->client, &create,
8152 8152                      cp_confirmed);
8153 8153          }
8154 8154  
8155 8155          /*
8156 8156           * At this point, we have taken care of the brand new client
8157 8157           * struct, INUSE case, update of an existing, and confirmed
8158 8158           * client struct.
8159 8159           */
8160 8160  
8161 8161          /*
8162 8162           * check to see if things have changed while we originally
8163 8163           * picked up the client struct.  If they have, then return and
8164 8164           * retry the processing of this SETCLIENTID request.
8165 8165           */
8166 8166          if (cp_unconfirmed) {
8167 8167                  rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8168 8168                  if (!cp_unconfirmed->rc_need_confirm) {
8169 8169                          rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8170 8170                          rfs4_client_rele(cp_unconfirmed);
8171 8171                          if (cp_confirmed)
8172 8172                                  rfs4_client_rele(cp_confirmed);
8173 8173                          goto retry;
8174 8174                  }
8175 8175                  /* do away with the old unconfirmed one */
8176 8176                  rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8177 8177                  rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8178 8178                  rfs4_client_rele(cp_unconfirmed);
8179 8179                  cp_unconfirmed = NULL;
8180 8180          }
8181 8181  
8182 8182          /*
8183 8183           * This search will temporarily hide the confirmed client
8184 8184           * struct while a new client struct is created as the
8185 8185           * unconfirmed one.
8186 8186           */
8187 8187          create = TRUE;
8188 8188          newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8189 8189  
8190 8190          ASSERT(newcp != NULL);
8191 8191  
8192 8192          if (newcp == NULL) {
8193 8193                  *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8194 8194                  rfs4_client_rele(cp_confirmed);
8195 8195                  goto out;
8196 8196          }
8197 8197  
8198 8198          /*
8199 8199           * If one was not created, then a similar request must be in
8200 8200           * process so release and start over with this one
8201 8201           */
8202 8202          if (create != TRUE) {
8203 8203                  rfs4_client_rele(newcp);
8204 8204                  if (cp_confirmed)
8205 8205                          rfs4_client_rele(cp_confirmed);
8206 8206                  goto retry;
8207 8207          }
8208 8208  
8209 8209          *cs->statusp = res->status = NFS4_OK;
8210 8210          res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8211 8211          res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8212 8212              newcp->rc_confirm_verf;
8213 8213          /* Setup callback information; CB_NULL confirmation later */
8214 8214          rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8215 8215  
8216 8216          newcp->rc_cp_confirmed = cp_confirmed;
8217 8217  
8218 8218          rfs4_client_rele(newcp);
8219 8219  
8220 8220  out:
8221 8221          DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8222 8222              SETCLIENTID4res *, res);
8223 8223  }
8224 8224  
8225 8225  /*ARGSUSED*/
8226 8226  void
8227 8227  rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8228 8228      struct svc_req *req, struct compound_state *cs)
8229 8229  {
8230 8230          SETCLIENTID_CONFIRM4args *args =
8231 8231              &argop->nfs_argop4_u.opsetclientid_confirm;
8232 8232          SETCLIENTID_CONFIRM4res *res =
8233 8233              &resop->nfs_resop4_u.opsetclientid_confirm;
8234 8234          rfs4_client_t *cp, *cptoclose = NULL;
8235 8235          nfs4_srv_t *nsrv4;
8236 8236  
8237 8237          DTRACE_NFSV4_2(op__setclientid__confirm__start,
8238 8238              struct compound_state *, cs,
8239 8239              SETCLIENTID_CONFIRM4args *, args);
8240 8240  
8241 8241          nsrv4 = nfs4_get_srv();
8242 8242          *cs->statusp = res->status = NFS4_OK;
8243 8243  
8244 8244          cp = rfs4_findclient_by_id(args->clientid, TRUE);
8245 8245  
8246 8246          if (cp == NULL) {
8247 8247                  *cs->statusp = res->status =
8248 8248                      rfs4_check_clientid(&args->clientid, 1);
8249 8249                  goto out;
8250 8250          }
8251 8251  
8252 8252          if (!creds_ok(cp, req, cs)) {
8253 8253                  *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8254 8254                  rfs4_client_rele(cp);
8255 8255                  goto out;
8256 8256          }
8257 8257  
8258 8258          /* If the verifier doesn't match, the record doesn't match */
8259 8259          if (cp->rc_confirm_verf != args->setclientid_confirm) {
8260 8260                  *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8261 8261                  rfs4_client_rele(cp);
8262 8262                  goto out;
8263 8263          }
8264 8264  
8265 8265          rfs4_dbe_lock(cp->rc_dbe);
8266 8266          cp->rc_need_confirm = FALSE;
8267 8267          if (cp->rc_cp_confirmed) {
8268 8268                  cptoclose = cp->rc_cp_confirmed;
8269 8269                  cptoclose->rc_ss_remove = 1;
8270 8270                  cp->rc_cp_confirmed = NULL;
8271 8271          }
8272 8272  
8273 8273          /*
8274 8274           * Update the client's associated server instance, if it's changed
8275 8275           * since the client was created.
8276 8276           */
8277 8277          if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8278 8278                  rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8279 8279  
8280 8280          /*
8281 8281           * Record clientid in stable storage.
8282 8282           * Must be done after server instance has been assigned.
8283 8283           */
8284 8284          rfs4_ss_clid(nsrv4, cp);
8285 8285  
8286 8286          rfs4_dbe_unlock(cp->rc_dbe);
8287 8287  
8288 8288          if (cptoclose)
8289 8289                  /* don't need to rele, client_close does it */
8290 8290                  rfs4_client_close(cptoclose);
8291 8291  
8292 8292          /* If needed, initiate CB_NULL call for callback path */
8293 8293          rfs4_deleg_cb_check(cp);
8294 8294          rfs4_update_lease(cp);
8295 8295  
8296 8296          /*
8297 8297           * Check to see if client can perform reclaims
8298 8298           */
8299 8299          rfs4_ss_chkclid(nsrv4, cp);
8300 8300  
8301 8301          rfs4_client_rele(cp);
8302 8302  
8303 8303  out:
8304 8304          DTRACE_NFSV4_2(op__setclientid__confirm__done,
8305 8305              struct compound_state *, cs,
8306 8306              SETCLIENTID_CONFIRM4 *, res);
8307 8307  }
8308 8308  
8309 8309  
8310 8310  /*ARGSUSED*/
8311 8311  void
8312 8312  rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8313 8313      struct svc_req *req, struct compound_state *cs)
8314 8314  {
8315 8315          CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8316 8316          CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8317 8317          rfs4_state_t *sp;
8318 8318          nfsstat4 status;
8319 8319  
8320 8320          DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8321 8321              CLOSE4args *, args);
8322 8322  
8323 8323          if (cs->vp == NULL) {
8324 8324                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8325 8325                  goto out;
8326 8326          }
8327 8327  
8328 8328          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8329 8329          if (status != NFS4_OK) {
8330 8330                  *cs->statusp = resp->status = status;
8331 8331                  goto out;
8332 8332          }
8333 8333  
8334 8334          /* Ensure specified filehandle matches */
8335 8335          if (cs->vp != sp->rs_finfo->rf_vp) {
8336 8336                  rfs4_state_rele(sp);
8337 8337                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8338 8338                  goto out;
8339 8339          }
8340 8340  
8341 8341          /* hold off other access to open_owner while we tinker */
8342 8342          rfs4_sw_enter(&sp->rs_owner->ro_sw);
8343 8343  
8344 8344          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8345 8345          case NFS4_CHECK_STATEID_OKAY:
8346 8346                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8347 8347                      resop) != NFS4_CHKSEQ_OKAY) {
8348 8348                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8349 8349                          goto end;
8350 8350                  }
8351 8351                  break;
8352 8352          case NFS4_CHECK_STATEID_OLD:
8353 8353                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8354 8354                  goto end;
8355 8355          case NFS4_CHECK_STATEID_BAD:
8356 8356                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8357 8357                  goto end;
8358 8358          case NFS4_CHECK_STATEID_EXPIRED:
8359 8359                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8360 8360                  goto end;
8361 8361          case NFS4_CHECK_STATEID_CLOSED:
8362 8362                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8363 8363                  goto end;
8364 8364          case NFS4_CHECK_STATEID_UNCONFIRMED:
8365 8365                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8366 8366                  goto end;
8367 8367          case NFS4_CHECK_STATEID_REPLAY:
8368 8368                  /* Check the sequence id for the open owner */
8369 8369                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8370 8370                      resop)) {
8371 8371                  case NFS4_CHKSEQ_OKAY:
8372 8372                          /*
8373 8373                           * This is replayed stateid; if seqid matches
8374 8374                           * next expected, then client is using wrong seqid.
8375 8375                           */
8376 8376                          /* FALL THROUGH */
8377 8377                  case NFS4_CHKSEQ_BAD:
8378 8378                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8379 8379                          goto end;
8380 8380                  case NFS4_CHKSEQ_REPLAY:
8381 8381                          /*
8382 8382                           * Note this case is the duplicate case so
8383 8383                           * resp->status is already set.
8384 8384                           */
8385 8385                          *cs->statusp = resp->status;
8386 8386                          rfs4_update_lease(sp->rs_owner->ro_client);
8387 8387                          goto end;
8388 8388                  }
8389 8389                  break;
8390 8390          default:
8391 8391                  ASSERT(FALSE);
8392 8392                  break;
8393 8393          }
8394 8394  
8395 8395          rfs4_dbe_lock(sp->rs_dbe);
8396 8396  
8397 8397          /* Update the stateid. */
8398 8398          next_stateid(&sp->rs_stateid);
8399 8399          resp->open_stateid = sp->rs_stateid.stateid;
8400 8400  
8401 8401          rfs4_dbe_unlock(sp->rs_dbe);
8402 8402  
8403 8403          rfs4_update_lease(sp->rs_owner->ro_client);
8404 8404          rfs4_update_open_sequence(sp->rs_owner);
8405 8405          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8406 8406  
8407 8407          rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8408 8408  
8409 8409          *cs->statusp = resp->status = status;
8410 8410  
8411 8411  end:
8412 8412          rfs4_sw_exit(&sp->rs_owner->ro_sw);
8413 8413          rfs4_state_rele(sp);
8414 8414  out:
8415 8415          DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8416 8416              CLOSE4res *, resp);
8417 8417  }
8418 8418  
8419 8419  /*
8420 8420   * Manage the counts on the file struct and close all file locks
8421 8421   */
8422 8422  /*ARGSUSED*/
8423 8423  void
8424 8424  rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8425 8425      bool_t close_of_client)
8426 8426  {
8427 8427          rfs4_file_t *fp = sp->rs_finfo;
8428 8428          rfs4_lo_state_t *lsp;
8429 8429          int fflags = 0;
8430 8430  
8431 8431          /*
8432 8432           * If this call is part of the larger closing down of client
8433 8433           * state then it is just easier to release all locks
8434 8434           * associated with this client instead of going through each
8435 8435           * individual file and cleaning locks there.
8436 8436           */
8437 8437          if (close_of_client) {
8438 8438                  if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8439 8439                      !list_is_empty(&sp->rs_lostatelist) &&
8440 8440                      sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8441 8441                          /* Is the PxFS kernel module loaded? */
8442 8442                          if (lm_remove_file_locks != NULL) {
8443 8443                                  int new_sysid;
8444 8444  
8445 8445                                  /* Encode the cluster nodeid in new sysid */
8446 8446                                  new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8447 8447                                  lm_set_nlmid_flk(&new_sysid);
8448 8448  
8449 8449                                  /*
8450 8450                                   * This PxFS routine removes file locks for a
8451 8451                                   * client over all nodes of a cluster.
8452 8452                                   */
8453 8453                                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8454 8454                                      "lm_remove_file_locks(sysid=0x%x)\n",
8455 8455                                      new_sysid));
8456 8456                                  (*lm_remove_file_locks)(new_sysid);
8457 8457                          } else {
8458 8458                                  struct flock64 flk;
8459 8459  
8460 8460                                  /* Release all locks for this client */
8461 8461                                  flk.l_type = F_UNLKSYS;
8462 8462                                  flk.l_whence = 0;
8463 8463                                  flk.l_start = 0;
8464 8464                                  flk.l_len = 0;
8465 8465                                  flk.l_sysid =
8466 8466                                      sp->rs_owner->ro_client->rc_sysidt;
8467 8467                                  flk.l_pid = 0;
8468 8468                                  (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8469 8469                                      &flk, F_REMOTELOCK | FREAD | FWRITE,
8470 8470                                      (u_offset_t)0, NULL, CRED(), NULL);
8471 8471                          }
8472 8472  
8473 8473                          sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8474 8474                  }
8475 8475          }
8476 8476  
8477 8477          /*
8478 8478           * Release all locks on this file by this lock owner or at
8479 8479           * least mark the locks as having been released
8480 8480           */
8481 8481          for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8482 8482              lsp = list_next(&sp->rs_lostatelist, lsp)) {
8483 8483                  lsp->rls_locks_cleaned = TRUE;
8484 8484  
8485 8485                  /* Was this already taken care of above? */
8486 8486                  if (!close_of_client &&
8487 8487                      sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8488 8488                          (void) cleanlocks(sp->rs_finfo->rf_vp,
8489 8489                              lsp->rls_locker->rl_pid,
8490 8490                              lsp->rls_locker->rl_client->rc_sysidt);
8491 8491          }
8492 8492  
8493 8493          /*
8494 8494           * Release any shrlocks associated with this open state ID.
8495 8495           * This must be done before the rfs4_state gets marked closed.
8496 8496           */
8497 8497          if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8498 8498                  (void) rfs4_unshare(sp);
8499 8499  
8500 8500          if (sp->rs_open_access) {
8501 8501                  rfs4_dbe_lock(fp->rf_dbe);
8502 8502  
8503 8503                  /*
8504 8504                   * Decrement the count for each access and deny bit that this
8505 8505                   * state has contributed to the file.
8506 8506                   * If the file counts go to zero
8507 8507                   * clear the appropriate bit in the appropriate mask.
8508 8508                   */
8509 8509                  if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8510 8510                          fp->rf_access_read--;
8511 8511                          fflags |= FREAD;
8512 8512                          if (fp->rf_access_read == 0)
8513 8513                                  fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8514 8514                  }
8515 8515                  if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8516 8516                          fp->rf_access_write--;
8517 8517                          fflags |= FWRITE;
8518 8518                          if (fp->rf_access_write == 0)
8519 8519                                  fp->rf_share_access &=
8520 8520                                      ~OPEN4_SHARE_ACCESS_WRITE;
8521 8521                  }
8522 8522                  if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8523 8523                          fp->rf_deny_read--;
8524 8524                          if (fp->rf_deny_read == 0)
8525 8525                                  fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8526 8526                  }
8527 8527                  if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8528 8528                          fp->rf_deny_write--;
8529 8529                          if (fp->rf_deny_write == 0)
8530 8530                                  fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8531 8531                  }
8532 8532  
8533 8533                  (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8534 8534  
8535 8535                  rfs4_dbe_unlock(fp->rf_dbe);
8536 8536  
8537 8537                  sp->rs_open_access = 0;
8538 8538                  sp->rs_open_deny = 0;
8539 8539          }
8540 8540  }
8541 8541  
8542 8542  /*
8543 8543   * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8544 8544   */
8545 8545  static nfsstat4
8546 8546  lock_denied(LOCK4denied *dp, struct flock64 *flk)
8547 8547  {
8548 8548          rfs4_lockowner_t *lo;
8549 8549          rfs4_client_t *cp;
8550 8550          uint32_t len;
8551 8551  
8552 8552          lo = rfs4_findlockowner_by_pid(flk->l_pid);
8553 8553          if (lo != NULL) {
8554 8554                  cp = lo->rl_client;
8555 8555                  if (rfs4_lease_expired(cp)) {
8556 8556                          rfs4_lockowner_rele(lo);
8557 8557                          rfs4_dbe_hold(cp->rc_dbe);
8558 8558                          rfs4_client_close(cp);
8559 8559                          return (NFS4ERR_EXPIRED);
8560 8560                  }
8561 8561                  dp->owner.clientid = lo->rl_owner.clientid;
8562 8562                  len = lo->rl_owner.owner_len;
8563 8563                  dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8564 8564                  bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8565 8565                  dp->owner.owner_len = len;
8566 8566                  rfs4_lockowner_rele(lo);
8567 8567                  goto finish;
8568 8568          }
8569 8569  
8570 8570          /*
8571 8571           * Its not a NFS4 lock. We take advantage that the upper 32 bits
8572 8572           * of the client id contain the boot time for a NFS4 lock. So we
8573 8573           * fabricate and identity by setting clientid to the sysid, and
8574 8574           * the lock owner to the pid.
8575 8575           */
8576 8576          dp->owner.clientid = flk->l_sysid;
8577 8577          len = sizeof (pid_t);
8578 8578          dp->owner.owner_len = len;
8579 8579          dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8580 8580          bcopy(&flk->l_pid, dp->owner.owner_val, len);
8581 8581  finish:
8582 8582          dp->offset = flk->l_start;
8583 8583          dp->length = flk->l_len;
8584 8584  
8585 8585          if (flk->l_type == F_RDLCK)
8586 8586                  dp->locktype = READ_LT;
8587 8587          else if (flk->l_type == F_WRLCK)
8588 8588                  dp->locktype = WRITE_LT;
8589 8589          else
8590 8590                  return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8591 8591  
8592 8592          return (NFS4_OK);
8593 8593  }
8594 8594  
8595 8595  /*
8596 8596   * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8597 8597   * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8598 8598   * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8599 8599   * for that (obviously); they are sending the LOCK requests with some delays
8600 8600   * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8601 8601   * locking and delay implementation at the client side.
8602 8602   *
8603 8603   * To make the life of the clients easier, the NFSv4.0 server tries to do some
8604 8604   * fast retries on its own (the for loop below) in a hope the lock will be
8605 8605   * available soon.  And if not, the client won't need to resend the LOCK
8606 8606   * requests so fast to check the lock availability.  This basically saves some
8607 8607   * network traffic and tries to make sure the client gets the lock ASAP.
8608 8608   */
8609 8609  static int
8610 8610  setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8611 8611  {
8612 8612          int error;
8613 8613          struct flock64 flk;
8614 8614          int i;
8615 8615          clock_t delaytime;
8616 8616          int cmd;
8617 8617          int spin_cnt = 0;
8618 8618  
8619 8619          cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8620 8620  retry:
8621 8621          delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8622 8622  
8623 8623          for (i = 0; i < rfs4_maxlock_tries; i++) {
8624 8624                  LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8625 8625                  error = VOP_FRLOCK(vp, cmd,
8626 8626                      flock, flag, (u_offset_t)0, NULL, cred, NULL);
8627 8627  
8628 8628                  if (error != EAGAIN && error != EACCES)
8629 8629                          break;
8630 8630  
8631 8631                  if (i < rfs4_maxlock_tries - 1) {
8632 8632                          delay(delaytime);
8633 8633                          delaytime *= 2;
8634 8634                  }
8635 8635          }
8636 8636  
8637 8637          if (error == EAGAIN || error == EACCES) {
8638 8638                  /* Get the owner of the lock */
8639 8639                  flk = *flock;
8640 8640                  LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8641 8641                  if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8642 8642                      NULL) == 0) {
8643 8643                          /*
8644 8644                           * There's a race inherent in the current VOP_FRLOCK
8645 8645                           * design where:
8646 8646                           * a: "other guy" takes a lock that conflicts with a
8647 8647                           * lock we want
8648 8648                           * b: we attempt to take our lock (non-blocking) and
8649 8649                           * the attempt fails.
8650 8650                           * c: "other guy" releases the conflicting lock
8651 8651                           * d: we ask what lock conflicts with the lock we want,
8652 8652                           * getting F_UNLCK (no lock blocks us)
8653 8653                           *
8654 8654                           * If we retry the non-blocking lock attempt in this
8655 8655                           * case (restart at step 'b') there's some possibility
8656 8656                           * that many such attempts might fail.  However a test
8657 8657                           * designed to actually provoke this race shows that
8658 8658                           * the vast majority of cases require no retry, and
8659 8659                           * only a few took as many as three retries.  Here's
8660 8660                           * the test outcome:
8661 8661                           *
8662 8662                           *         number of retries    how many times we needed
8663 8663                           *                              that many retries
8664 8664                           *         0                    79461
8665 8665                           *         1                      862
8666 8666                           *         2                       49
8667 8667                           *         3                        5
8668 8668                           *
8669 8669                           * Given those empirical results, we arbitrarily limit
8670 8670                           * the retry count to ten.
8671 8671                           *
8672 8672                           * If we actually make to ten retries and give up,
8673 8673                           * nothing catastrophic happens, but we're unable to
8674 8674                           * return the information about the conflicting lock to
8675 8675                           * the NFS client.  That's an acceptable trade off vs.
8676 8676                           * letting this retry loop run forever.
8677 8677                           */
8678 8678                          if (flk.l_type == F_UNLCK) {
8679 8679                                  if (spin_cnt++ < 10) {
8680 8680                                          /* No longer locked, retry */
8681 8681                                          goto retry;
8682 8682                                  }
8683 8683                          } else {
8684 8684                                  *flock = flk;
8685 8685                                  LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8686 8686                                      F_GETLK, &flk);
8687 8687                          }
8688 8688                  }
8689 8689          }
8690 8690  
8691 8691          return (error);
8692 8692  }
8693 8693  
8694 8694  /*ARGSUSED*/
8695 8695  static nfsstat4
8696 8696  rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8697 8697      offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8698 8698  {
8699 8699          nfsstat4 status;
8700 8700          rfs4_lockowner_t *lo = lsp->rls_locker;
8701 8701          rfs4_state_t *sp = lsp->rls_state;
8702 8702          struct flock64 flock;
8703 8703          int16_t ltype;
8704 8704          int flag;
8705 8705          int error;
8706 8706          sysid_t sysid;
8707 8707          LOCK4res *lres;
8708 8708          vnode_t *vp;
8709 8709  
8710 8710          if (rfs4_lease_expired(lo->rl_client)) {
8711 8711                  return (NFS4ERR_EXPIRED);
8712 8712          }
8713 8713  
8714 8714          if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8715 8715                  return (status);
8716 8716  
8717 8717          /* Check for zero length. To lock to end of file use all ones for V4 */
8718 8718          if (length == 0)
8719 8719                  return (NFS4ERR_INVAL);
8720 8720          else if (length == (length4)(~0))
8721 8721                  length = 0;             /* Posix to end of file  */
8722 8722  
8723 8723  retry:
8724 8724          rfs4_dbe_lock(sp->rs_dbe);
8725 8725          if (sp->rs_closed == TRUE) {
8726 8726                  rfs4_dbe_unlock(sp->rs_dbe);
8727 8727                  return (NFS4ERR_OLD_STATEID);
8728 8728          }
8729 8729  
8730 8730          if (resop->resop != OP_LOCKU) {
8731 8731                  switch (locktype) {
8732 8732                  case READ_LT:
8733 8733                  case READW_LT:
8734 8734                          if ((sp->rs_share_access
8735 8735                              & OPEN4_SHARE_ACCESS_READ) == 0) {
8736 8736                                  rfs4_dbe_unlock(sp->rs_dbe);
8737 8737  
8738 8738                                  return (NFS4ERR_OPENMODE);
8739 8739                          }
8740 8740                          ltype = F_RDLCK;
8741 8741                          break;
8742 8742                  case WRITE_LT:
8743 8743                  case WRITEW_LT:
8744 8744                          if ((sp->rs_share_access
8745 8745                              & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8746 8746                                  rfs4_dbe_unlock(sp->rs_dbe);
8747 8747  
8748 8748                                  return (NFS4ERR_OPENMODE);
8749 8749                          }
8750 8750                          ltype = F_WRLCK;
8751 8751                          break;
8752 8752                  }
8753 8753          } else
8754 8754                  ltype = F_UNLCK;
8755 8755  
8756 8756          flock.l_type = ltype;
8757 8757          flock.l_whence = 0;             /* SEEK_SET */
8758 8758          flock.l_start = offset;
8759 8759          flock.l_len = length;
8760 8760          flock.l_sysid = sysid;
8761 8761          flock.l_pid = lsp->rls_locker->rl_pid;
8762 8762  
8763 8763          /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8764 8764          if (flock.l_len < 0 || flock.l_start < 0) {
8765 8765                  rfs4_dbe_unlock(sp->rs_dbe);
8766 8766                  return (NFS4ERR_INVAL);
8767 8767          }
8768 8768  
8769 8769          /*
8770 8770           * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8771 8771           * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8772 8772           */
8773 8773          flag = (int)sp->rs_share_access | F_REMOTELOCK;
8774 8774  
8775 8775          vp = sp->rs_finfo->rf_vp;
8776 8776          VN_HOLD(vp);
8777 8777  
8778 8778          /*
8779 8779           * We need to unlock sp before we call the underlying filesystem to
8780 8780           * acquire the file lock.
8781 8781           */
8782 8782          rfs4_dbe_unlock(sp->rs_dbe);
8783 8783  
8784 8784          error = setlock(vp, &flock, flag, cred);
8785 8785  
8786 8786          /*
8787 8787           * Make sure the file is still open.  In a case the file was closed in
8788 8788           * the meantime, clean the lock we acquired using the setlock() call
8789 8789           * above, and return the appropriate error.
8790 8790           */
8791 8791          rfs4_dbe_lock(sp->rs_dbe);
8792 8792          if (sp->rs_closed == TRUE) {
8793 8793                  cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8794 8794                  rfs4_dbe_unlock(sp->rs_dbe);
8795 8795  
8796 8796                  VN_RELE(vp);
8797 8797  
8798 8798                  return (NFS4ERR_OLD_STATEID);
8799 8799          }
8800 8800          rfs4_dbe_unlock(sp->rs_dbe);
8801 8801  
8802 8802          VN_RELE(vp);
8803 8803  
8804 8804          if (error == 0) {
8805 8805                  rfs4_dbe_lock(lsp->rls_dbe);
8806 8806                  next_stateid(&lsp->rls_lockid);
8807 8807                  rfs4_dbe_unlock(lsp->rls_dbe);
8808 8808          }
8809 8809  
8810 8810          /*
8811 8811           * N.B. We map error values to nfsv4 errors. This is differrent
8812 8812           * than puterrno4 routine.
8813 8813           */
8814 8814          switch (error) {
8815 8815          case 0:
8816 8816                  status = NFS4_OK;
8817 8817                  break;
8818 8818          case EAGAIN:
8819 8819          case EACCES:            /* Old value */
8820 8820                  /* Can only get here if op is OP_LOCK */
8821 8821                  ASSERT(resop->resop == OP_LOCK);
8822 8822                  lres = &resop->nfs_resop4_u.oplock;
8823 8823                  status = NFS4ERR_DENIED;
8824 8824                  if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8825 8825                      == NFS4ERR_EXPIRED)
8826 8826                          goto retry;
8827 8827                  break;
8828 8828          case ENOLCK:
8829 8829                  status = NFS4ERR_DELAY;
8830 8830                  break;
8831 8831          case EOVERFLOW:
8832 8832                  status = NFS4ERR_INVAL;
8833 8833                  break;
8834 8834          case EINVAL:
8835 8835                  status = NFS4ERR_NOTSUPP;
8836 8836                  break;
8837 8837          default:
8838 8838                  status = NFS4ERR_SERVERFAULT;
8839 8839                  break;
8840 8840          }
8841 8841  
8842 8842          return (status);
8843 8843  }
8844 8844  
8845 8845  /*ARGSUSED*/
8846 8846  void
8847 8847  rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8848 8848      struct svc_req *req, struct compound_state *cs)
8849 8849  {
8850 8850          LOCK4args *args = &argop->nfs_argop4_u.oplock;
8851 8851          LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8852 8852          nfsstat4 status;
8853 8853          stateid4 *stateid;
8854 8854          rfs4_lockowner_t *lo;
8855 8855          rfs4_client_t *cp;
8856 8856          rfs4_state_t *sp = NULL;
8857 8857          rfs4_lo_state_t *lsp = NULL;
8858 8858          bool_t ls_sw_held = FALSE;
8859 8859          bool_t create = TRUE;
8860 8860          bool_t lcreate = TRUE;
8861 8861          bool_t dup_lock = FALSE;
8862 8862          int rc;
8863 8863  
8864 8864          DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8865 8865              LOCK4args *, args);
8866 8866  
8867 8867          if (cs->vp == NULL) {
8868 8868                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8869 8869                  DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8870 8870                      cs, LOCK4res *, resp);
8871 8871                  return;
8872 8872          }
8873 8873  
8874 8874          if (args->locker.new_lock_owner) {
8875 8875                  /* Create a new lockowner for this instance */
8876 8876                  open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8877 8877  
8878 8878                  NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8879 8879  
8880 8880                  stateid = &olo->open_stateid;
8881 8881                  status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8882 8882                  if (status != NFS4_OK) {
8883 8883                          NFS4_DEBUG(rfs4_debug,
8884 8884                              (CE_NOTE, "Get state failed in lock %d", status));
8885 8885                          *cs->statusp = resp->status = status;
8886 8886                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8887 8887                              cs, LOCK4res *, resp);
8888 8888                          return;
8889 8889                  }
8890 8890  
8891 8891                  /* Ensure specified filehandle matches */
8892 8892                  if (cs->vp != sp->rs_finfo->rf_vp) {
8893 8893                          rfs4_state_rele(sp);
8894 8894                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8895 8895                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8896 8896                              cs, LOCK4res *, resp);
8897 8897                          return;
8898 8898                  }
8899 8899  
8900 8900                  /* hold off other access to open_owner while we tinker */
8901 8901                  rfs4_sw_enter(&sp->rs_owner->ro_sw);
8902 8902  
8903 8903                  switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8904 8904                  case NFS4_CHECK_STATEID_OLD:
8905 8905                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8906 8906                          goto end;
8907 8907                  case NFS4_CHECK_STATEID_BAD:
8908 8908                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8909 8909                          goto end;
8910 8910                  case NFS4_CHECK_STATEID_EXPIRED:
8911 8911                          *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8912 8912                          goto end;
8913 8913                  case NFS4_CHECK_STATEID_UNCONFIRMED:
8914 8914                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8915 8915                          goto end;
8916 8916                  case NFS4_CHECK_STATEID_CLOSED:
8917 8917                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8918 8918                          goto end;
8919 8919                  case NFS4_CHECK_STATEID_OKAY:
8920 8920                  case NFS4_CHECK_STATEID_REPLAY:
8921 8921                          switch (rfs4_check_olo_seqid(olo->open_seqid,
8922 8922                              sp->rs_owner, resop)) {
8923 8923                          case NFS4_CHKSEQ_OKAY:
8924 8924                                  if (rc == NFS4_CHECK_STATEID_OKAY)
8925 8925                                          break;
8926 8926                                  /*
8927 8927                                   * This is replayed stateid; if seqid
8928 8928                                   * matches next expected, then client
8929 8929                                   * is using wrong seqid.
8930 8930                                   */
8931 8931                                  /* FALLTHROUGH */
8932 8932                          case NFS4_CHKSEQ_BAD:
8933 8933                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8934 8934                                  goto end;
8935 8935                          case NFS4_CHKSEQ_REPLAY:
8936 8936                                  /* This is a duplicate LOCK request */
8937 8937                                  dup_lock = TRUE;
8938 8938  
8939 8939                                  /*
8940 8940                                   * For a duplicate we do not want to
8941 8941                                   * create a new lockowner as it should
8942 8942                                   * already exist.
8943 8943                                   * Turn off the lockowner create flag.
8944 8944                                   */
8945 8945                                  lcreate = FALSE;
8946 8946                          }
8947 8947                          break;
8948 8948                  }
8949 8949  
8950 8950                  lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8951 8951                  if (lo == NULL) {
8952 8952                          NFS4_DEBUG(rfs4_debug,
8953 8953                              (CE_NOTE, "rfs4_op_lock: no lock owner"));
8954 8954                          *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8955 8955                          goto end;
8956 8956                  }
8957 8957  
8958 8958                  lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8959 8959                  if (lsp == NULL) {
8960 8960                          rfs4_update_lease(sp->rs_owner->ro_client);
8961 8961                          /*
8962 8962                           * Only update theh open_seqid if this is not
8963 8963                           * a duplicate request
8964 8964                           */
8965 8965                          if (dup_lock == FALSE) {
8966 8966                                  rfs4_update_open_sequence(sp->rs_owner);
8967 8967                          }
8968 8968  
8969 8969                          NFS4_DEBUG(rfs4_debug,
8970 8970                              (CE_NOTE, "rfs4_op_lock: no state"));
8971 8971                          *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8972 8972                          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8973 8973                          rfs4_lockowner_rele(lo);
8974 8974                          goto end;
8975 8975                  }
8976 8976  
8977 8977                  /*
8978 8978                   * This is the new_lock_owner branch and the client is
8979 8979                   * supposed to be associating a new lock_owner with
8980 8980                   * the open file at this point.  If we find that a
8981 8981                   * lock_owner/state association already exists and a
8982 8982                   * successful LOCK request was returned to the client,
8983 8983                   * an error is returned to the client since this is
8984 8984                   * not appropriate.  The client should be using the
8985 8985                   * existing lock_owner branch.
8986 8986                   */
8987 8987                  if (dup_lock == FALSE && create == FALSE) {
8988 8988                          if (lsp->rls_lock_completed == TRUE) {
8989 8989                                  *cs->statusp =
8990 8990                                      resp->status = NFS4ERR_BAD_SEQID;
8991 8991                                  rfs4_lockowner_rele(lo);
8992 8992                                  goto end;
8993 8993                          }
8994 8994                  }
8995 8995  
8996 8996                  rfs4_update_lease(sp->rs_owner->ro_client);
8997 8997  
8998 8998                  /*
8999 8999                   * Only update theh open_seqid if this is not
9000 9000                   * a duplicate request
9001 9001                   */
9002 9002                  if (dup_lock == FALSE) {
9003 9003                          rfs4_update_open_sequence(sp->rs_owner);
9004 9004                  }
9005 9005  
9006 9006                  /*
9007 9007                   * If this is a duplicate lock request, just copy the
9008 9008                   * previously saved reply and return.
9009 9009                   */
9010 9010                  if (dup_lock == TRUE) {
9011 9011                          /* verify that lock_seqid's match */
9012 9012                          if (lsp->rls_seqid != olo->lock_seqid) {
9013 9013                                  NFS4_DEBUG(rfs4_debug,
9014 9014                                      (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9015 9015                                      "lsp->seqid=%d old->seqid=%d",
9016 9016                                      lsp->rls_seqid, olo->lock_seqid));
9017 9017                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9018 9018                          } else {
9019 9019                                  rfs4_copy_reply(resop, &lsp->rls_reply);
9020 9020                                  /*
9021 9021                                   * Make sure to copy the just
9022 9022                                   * retrieved reply status into the
9023 9023                                   * overall compound status
9024 9024                                   */
9025 9025                                  *cs->statusp = resp->status;
9026 9026                          }
9027 9027                          rfs4_lockowner_rele(lo);
9028 9028                          goto end;
9029 9029                  }
9030 9030  
9031 9031                  rfs4_dbe_lock(lsp->rls_dbe);
9032 9032  
9033 9033                  /* Make sure to update the lock sequence id */
9034 9034                  lsp->rls_seqid = olo->lock_seqid;
9035 9035  
9036 9036                  NFS4_DEBUG(rfs4_debug,
9037 9037                      (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9038 9038  
9039 9039                  /*
9040 9040                   * This is used to signify the newly created lockowner
9041 9041                   * stateid and its sequence number.  The checks for
9042 9042                   * sequence number and increment don't occur on the
9043 9043                   * very first lock request for a lockowner.
9044 9044                   */
9045 9045                  lsp->rls_skip_seqid_check = TRUE;
9046 9046  
9047 9047                  /* hold off other access to lsp while we tinker */
9048 9048                  rfs4_sw_enter(&lsp->rls_sw);
9049 9049                  ls_sw_held = TRUE;
9050 9050  
9051 9051                  rfs4_dbe_unlock(lsp->rls_dbe);
9052 9052  
9053 9053                  rfs4_lockowner_rele(lo);
9054 9054          } else {
9055 9055                  stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9056 9056                  /* get lsp and hold the lock on the underlying file struct */
9057 9057                  if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9058 9058                      != NFS4_OK) {
9059 9059                          *cs->statusp = resp->status = status;
9060 9060                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9061 9061                              cs, LOCK4res *, resp);
9062 9062                          return;
9063 9063                  }
9064 9064                  create = FALSE; /* We didn't create lsp */
9065 9065  
9066 9066                  /* Ensure specified filehandle matches */
9067 9067                  if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9068 9068                          rfs4_lo_state_rele(lsp, TRUE);
9069 9069                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9070 9070                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9071 9071                              cs, LOCK4res *, resp);
9072 9072                          return;
9073 9073                  }
9074 9074  
9075 9075                  /* hold off other access to lsp while we tinker */
9076 9076                  rfs4_sw_enter(&lsp->rls_sw);
9077 9077                  ls_sw_held = TRUE;
9078 9078  
9079 9079                  switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9080 9080                  /*
9081 9081                   * The stateid looks like it was okay (expected to be
9082 9082                   * the next one)
9083 9083                   */
9084 9084                  case NFS4_CHECK_STATEID_OKAY:
9085 9085                          /*
9086 9086                           * The sequence id is now checked.  Determine
9087 9087                           * if this is a replay or if it is in the
9088 9088                           * expected (next) sequence.  In the case of a
9089 9089                           * replay, there are two replay conditions
9090 9090                           * that may occur.  The first is the normal
9091 9091                           * condition where a LOCK is done with a
9092 9092                           * NFS4_OK response and the stateid is
9093 9093                           * updated.  That case is handled below when
9094 9094                           * the stateid is identified as a REPLAY.  The
9095 9095                           * second is the case where an error is
9096 9096                           * returned, like NFS4ERR_DENIED, and the
9097 9097                           * sequence number is updated but the stateid
9098 9098                           * is not updated.  This second case is dealt
9099 9099                           * with here.  So it may seem odd that the
9100 9100                           * stateid is okay but the sequence id is a
9101 9101                           * replay but it is okay.
9102 9102                           */
9103 9103                          switch (rfs4_check_lock_seqid(
9104 9104                              args->locker.locker4_u.lock_owner.lock_seqid,
9105 9105                              lsp, resop)) {
9106 9106                          case NFS4_CHKSEQ_REPLAY:
9107 9107                                  if (resp->status != NFS4_OK) {
9108 9108                                          /*
9109 9109                                           * Here is our replay and need
9110 9110                                           * to verify that the last
9111 9111                                           * response was an error.
9112 9112                                           */
9113 9113                                          *cs->statusp = resp->status;
9114 9114                                          goto end;
9115 9115                                  }
9116 9116                                  /*
9117 9117                                   * This is done since the sequence id
9118 9118                                   * looked like a replay but it didn't
9119 9119                                   * pass our check so a BAD_SEQID is
9120 9120                                   * returned as a result.
9121 9121                                   */
9122 9122                                  /*FALLTHROUGH*/
9123 9123                          case NFS4_CHKSEQ_BAD:
9124 9124                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9125 9125                                  goto end;
9126 9126                          case NFS4_CHKSEQ_OKAY:
9127 9127                                  /* Everything looks okay move ahead */
9128 9128                                  break;
9129 9129                          }
9130 9130                          break;
9131 9131                  case NFS4_CHECK_STATEID_OLD:
9132 9132                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9133 9133                          goto end;
9134 9134                  case NFS4_CHECK_STATEID_BAD:
9135 9135                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9136 9136                          goto end;
9137 9137                  case NFS4_CHECK_STATEID_EXPIRED:
9138 9138                          *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9139 9139                          goto end;
9140 9140                  case NFS4_CHECK_STATEID_CLOSED:
9141 9141                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9142 9142                          goto end;
9143 9143                  case NFS4_CHECK_STATEID_REPLAY:
9144 9144                          switch (rfs4_check_lock_seqid(
9145 9145                              args->locker.locker4_u.lock_owner.lock_seqid,
9146 9146                              lsp, resop)) {
9147 9147                          case NFS4_CHKSEQ_OKAY:
9148 9148                                  /*
9149 9149                                   * This is a replayed stateid; if
9150 9150                                   * seqid matches the next expected,
9151 9151                                   * then client is using wrong seqid.
9152 9152                                   */
9153 9153                          case NFS4_CHKSEQ_BAD:
9154 9154                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9155 9155                                  goto end;
9156 9156                          case NFS4_CHKSEQ_REPLAY:
9157 9157                                  rfs4_update_lease(lsp->rls_locker->rl_client);
9158 9158                                  *cs->statusp = status = resp->status;
9159 9159                                  goto end;
9160 9160                          }
9161 9161                          break;
9162 9162                  default:
9163 9163                          ASSERT(FALSE);
9164 9164                          break;
9165 9165                  }
9166 9166  
9167 9167                  rfs4_update_lock_sequence(lsp);
9168 9168                  rfs4_update_lease(lsp->rls_locker->rl_client);
9169 9169          }
9170 9170  
9171 9171          /*
9172 9172           * NFS4 only allows locking on regular files, so
9173 9173           * verify type of object.
9174 9174           */
9175 9175          if (cs->vp->v_type != VREG) {
9176 9176                  if (cs->vp->v_type == VDIR)
9177 9177                          status = NFS4ERR_ISDIR;
9178 9178                  else
9179 9179                          status = NFS4ERR_INVAL;
9180 9180                  goto out;
9181 9181          }
9182 9182  
9183 9183          cp = lsp->rls_state->rs_owner->ro_client;
9184 9184  
9185 9185          if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9186 9186                  status = NFS4ERR_GRACE;
9187 9187                  goto out;
9188 9188          }
9189 9189  
9190 9190          if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9191 9191                  status = NFS4ERR_NO_GRACE;
9192 9192                  goto out;
9193 9193          }
9194 9194  
9195 9195          if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9196 9196                  status = NFS4ERR_NO_GRACE;
9197 9197                  goto out;
9198 9198          }
9199 9199  
9200 9200          if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9201 9201                  cs->deleg = TRUE;
9202 9202  
9203 9203          status = rfs4_do_lock(lsp, args->locktype,
9204 9204              args->offset, args->length, cs->cr, resop);
9205 9205  
9206 9206  out:
9207 9207          lsp->rls_skip_seqid_check = FALSE;
9208 9208  
9209 9209          *cs->statusp = resp->status = status;
9210 9210  
9211 9211          if (status == NFS4_OK) {
9212 9212                  resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9213 9213                  lsp->rls_lock_completed = TRUE;
9214 9214          }
9215 9215          /*
9216 9216           * Only update the "OPEN" response here if this was a new
9217 9217           * lock_owner
9218 9218           */
9219 9219          if (sp)
9220 9220                  rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9221 9221  
9222 9222          rfs4_update_lock_resp(lsp, resop);
9223 9223  
9224 9224  end:
9225 9225          if (lsp) {
9226 9226                  if (ls_sw_held)
9227 9227                          rfs4_sw_exit(&lsp->rls_sw);
9228 9228                  /*
9229 9229                   * If an sp obtained, then the lsp does not represent
9230 9230                   * a lock on the file struct.
9231 9231                   */
9232 9232                  if (sp != NULL)
9233 9233                          rfs4_lo_state_rele(lsp, FALSE);
9234 9234                  else
9235 9235                          rfs4_lo_state_rele(lsp, TRUE);
9236 9236          }
9237 9237          if (sp) {
9238 9238                  rfs4_sw_exit(&sp->rs_owner->ro_sw);
9239 9239                  rfs4_state_rele(sp);
9240 9240          }
9241 9241  
9242 9242          DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9243 9243              LOCK4res *, resp);
9244 9244  }
9245 9245  
9246 9246  /* free function for LOCK/LOCKT */
9247 9247  static void
9248 9248  lock_denied_free(nfs_resop4 *resop)
9249 9249  {
9250 9250          LOCK4denied *dp = NULL;
9251 9251  
9252 9252          switch (resop->resop) {
9253 9253          case OP_LOCK:
9254 9254                  if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9255 9255                          dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9256 9256                  break;
9257 9257          case OP_LOCKT:
9258 9258                  if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9259 9259                          dp = &resop->nfs_resop4_u.oplockt.denied;
9260 9260                  break;
9261 9261          default:
9262 9262                  break;
9263 9263          }
9264 9264  
9265 9265          if (dp)
9266 9266                  kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9267 9267  }
9268 9268  
9269 9269  /*ARGSUSED*/
9270 9270  void
9271 9271  rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9272 9272      struct svc_req *req, struct compound_state *cs)
9273 9273  {
9274 9274          LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9275 9275          LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9276 9276          nfsstat4 status;
9277 9277          stateid4 *stateid = &args->lock_stateid;
9278 9278          rfs4_lo_state_t *lsp;
9279 9279  
9280 9280          DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9281 9281              LOCKU4args *, args);
9282 9282  
9283 9283          if (cs->vp == NULL) {
9284 9284                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9285 9285                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9286 9286                      LOCKU4res *, resp);
9287 9287                  return;
9288 9288          }
9289 9289  
9290 9290          if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9291 9291                  *cs->statusp = resp->status = status;
9292 9292                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9293 9293                      LOCKU4res *, resp);
9294 9294                  return;
9295 9295          }
9296 9296  
9297 9297          /* Ensure specified filehandle matches */
9298 9298          if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9299 9299                  rfs4_lo_state_rele(lsp, TRUE);
9300 9300                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9301 9301                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9302 9302                      LOCKU4res *, resp);
9303 9303                  return;
9304 9304          }
9305 9305  
9306 9306          /* hold off other access to lsp while we tinker */
9307 9307          rfs4_sw_enter(&lsp->rls_sw);
9308 9308  
9309 9309          switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9310 9310          case NFS4_CHECK_STATEID_OKAY:
9311 9311                  if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9312 9312                      != NFS4_CHKSEQ_OKAY) {
9313 9313                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9314 9314                          goto end;
9315 9315                  }
9316 9316                  break;
9317 9317          case NFS4_CHECK_STATEID_OLD:
9318 9318                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9319 9319                  goto end;
9320 9320          case NFS4_CHECK_STATEID_BAD:
9321 9321                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9322 9322                  goto end;
9323 9323          case NFS4_CHECK_STATEID_EXPIRED:
9324 9324                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9325 9325                  goto end;
9326 9326          case NFS4_CHECK_STATEID_CLOSED:
9327 9327                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9328 9328                  goto end;
9329 9329          case NFS4_CHECK_STATEID_REPLAY:
9330 9330                  switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9331 9331                  case NFS4_CHKSEQ_OKAY:
9332 9332                                  /*
9333 9333                                   * This is a replayed stateid; if
9334 9334                                   * seqid matches the next expected,
9335 9335                                   * then client is using wrong seqid.
9336 9336                                   */
9337 9337                  case NFS4_CHKSEQ_BAD:
9338 9338                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9339 9339                          goto end;
9340 9340                  case NFS4_CHKSEQ_REPLAY:
9341 9341                          rfs4_update_lease(lsp->rls_locker->rl_client);
9342 9342                          *cs->statusp = status = resp->status;
9343 9343                          goto end;
9344 9344                  }
9345 9345                  break;
9346 9346          default:
9347 9347                  ASSERT(FALSE);
9348 9348                  break;
9349 9349          }
9350 9350  
9351 9351          rfs4_update_lock_sequence(lsp);
9352 9352          rfs4_update_lease(lsp->rls_locker->rl_client);
9353 9353  
9354 9354          /*
9355 9355           * NFS4 only allows locking on regular files, so
9356 9356           * verify type of object.
9357 9357           */
9358 9358          if (cs->vp->v_type != VREG) {
9359 9359                  if (cs->vp->v_type == VDIR)
9360 9360                          status = NFS4ERR_ISDIR;
9361 9361                  else
9362 9362                          status = NFS4ERR_INVAL;
9363 9363                  goto out;
9364 9364          }
9365 9365  
9366 9366          if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9367 9367                  status = NFS4ERR_GRACE;
9368 9368                  goto out;
9369 9369          }
9370 9370  
9371 9371          status = rfs4_do_lock(lsp, args->locktype,
9372 9372              args->offset, args->length, cs->cr, resop);
9373 9373  
9374 9374  out:
9375 9375          *cs->statusp = resp->status = status;
9376 9376  
9377 9377          if (status == NFS4_OK)
9378 9378                  resp->lock_stateid = lsp->rls_lockid.stateid;
9379 9379  
9380 9380          rfs4_update_lock_resp(lsp, resop);
9381 9381  
9382 9382  end:
9383 9383          rfs4_sw_exit(&lsp->rls_sw);
9384 9384          rfs4_lo_state_rele(lsp, TRUE);
9385 9385  
9386 9386          DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9387 9387              LOCKU4res *, resp);
9388 9388  }
9389 9389  
9390 9390  /*
9391 9391   * LOCKT is a best effort routine, the client can not be guaranteed that
9392 9392   * the status return is still in effect by the time the reply is received.
9393 9393   * They are numerous race conditions in this routine, but we are not required
9394 9394   * and can not be accurate.
9395 9395   */
9396 9396  /*ARGSUSED*/
9397 9397  void
9398 9398  rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9399 9399      struct svc_req *req, struct compound_state *cs)
9400 9400  {
9401 9401          LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9402 9402          LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9403 9403          rfs4_lockowner_t *lo;
9404 9404          rfs4_client_t *cp;
9405 9405          bool_t create = FALSE;
9406 9406          struct flock64 flk;
9407 9407          int error;
9408 9408          int flag = FREAD | FWRITE;
9409 9409          int ltype;
9410 9410          length4 posix_length;
9411 9411          sysid_t sysid;
9412 9412          pid_t pid;
9413 9413  
9414 9414          DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9415 9415              LOCKT4args *, args);
9416 9416  
9417 9417          if (cs->vp == NULL) {
9418 9418                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9419 9419                  goto out;
9420 9420          }
9421 9421  
9422 9422          /*
9423 9423           * NFS4 only allows locking on regular files, so
9424 9424           * verify type of object.
9425 9425           */
9426 9426          if (cs->vp->v_type != VREG) {
9427 9427                  if (cs->vp->v_type == VDIR)
9428 9428                          *cs->statusp = resp->status = NFS4ERR_ISDIR;
9429 9429                  else
9430 9430                          *cs->statusp = resp->status =  NFS4ERR_INVAL;
9431 9431                  goto out;
9432 9432          }
9433 9433  
9434 9434          /*
9435 9435           * Check out the clientid to ensure the server knows about it
9436 9436           * so that we correctly inform the client of a server reboot.
9437 9437           */
9438 9438          if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9439 9439              == NULL) {
9440 9440                  *cs->statusp = resp->status =
9441 9441                      rfs4_check_clientid(&args->owner.clientid, 0);
9442 9442                  goto out;
9443 9443          }
9444 9444          if (rfs4_lease_expired(cp)) {
9445 9445                  rfs4_client_close(cp);
9446 9446                  /*
9447 9447                   * Protocol doesn't allow returning NFS4ERR_STALE as
9448 9448                   * other operations do on this check so STALE_CLIENTID
9449 9449                   * is returned instead
9450 9450                   */
9451 9451                  *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9452 9452                  goto out;
9453 9453          }
9454 9454  
9455 9455          if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9456 9456                  *cs->statusp = resp->status = NFS4ERR_GRACE;
9457 9457                  rfs4_client_rele(cp);
9458 9458                  goto out;
9459 9459          }
9460 9460          rfs4_client_rele(cp);
9461 9461  
9462 9462          resp->status = NFS4_OK;
9463 9463  
9464 9464          switch (args->locktype) {
9465 9465          case READ_LT:
9466 9466          case READW_LT:
9467 9467                  ltype = F_RDLCK;
9468 9468                  break;
9469 9469          case WRITE_LT:
9470 9470          case WRITEW_LT:
9471 9471                  ltype = F_WRLCK;
9472 9472                  break;
9473 9473          }
9474 9474  
9475 9475          posix_length = args->length;
9476 9476          /* Check for zero length. To lock to end of file use all ones for V4 */
9477 9477          if (posix_length == 0) {
9478 9478                  *cs->statusp = resp->status = NFS4ERR_INVAL;
9479 9479                  goto out;
9480 9480          } else if (posix_length == (length4)(~0)) {
9481 9481                  posix_length = 0;       /* Posix to end of file  */
9482 9482          }
9483 9483  
9484 9484          /* Find or create a lockowner */
9485 9485          lo = rfs4_findlockowner(&args->owner, &create);
9486 9486  
9487 9487          if (lo) {
9488 9488                  pid = lo->rl_pid;
9489 9489                  if ((resp->status =
9490 9490                      rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9491 9491                          goto err;
9492 9492          } else {
9493 9493                  pid = 0;
9494 9494                  sysid = lockt_sysid;
9495 9495          }
9496 9496  retry:
9497 9497          flk.l_type = ltype;
9498 9498          flk.l_whence = 0;               /* SEEK_SET */
9499 9499          flk.l_start = args->offset;
9500 9500          flk.l_len = posix_length;
9501 9501          flk.l_sysid = sysid;
9502 9502          flk.l_pid = pid;
9503 9503          flag |= F_REMOTELOCK;
9504 9504  
9505 9505          LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9506 9506  
9507 9507          /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9508 9508          if (flk.l_len < 0 || flk.l_start < 0) {
9509 9509                  resp->status = NFS4ERR_INVAL;
9510 9510                  goto err;
9511 9511          }
9512 9512          error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9513 9513              NULL, cs->cr, NULL);
9514 9514  
9515 9515          /*
9516 9516           * N.B. We map error values to nfsv4 errors. This is differrent
9517 9517           * than puterrno4 routine.
9518 9518           */
9519 9519          switch (error) {
9520 9520          case 0:
9521 9521                  if (flk.l_type == F_UNLCK)
9522 9522                          resp->status = NFS4_OK;
9523 9523                  else {
9524 9524                          if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9525 9525                                  goto retry;
9526 9526                          resp->status = NFS4ERR_DENIED;
9527 9527                  }
9528 9528                  break;
9529 9529          case EOVERFLOW:
9530 9530                  resp->status = NFS4ERR_INVAL;
9531 9531                  break;
9532 9532          case EINVAL:
9533 9533                  resp->status = NFS4ERR_NOTSUPP;
9534 9534                  break;
9535 9535          default:
9536 9536                  cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9537 9537                      error);
9538 9538                  resp->status = NFS4ERR_SERVERFAULT;
9539 9539                  break;
9540 9540          }
9541 9541  
9542 9542  err:
9543 9543          if (lo)
9544 9544                  rfs4_lockowner_rele(lo);
9545 9545          *cs->statusp = resp->status;
9546 9546  out:
9547 9547          DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9548 9548              LOCKT4res *, resp);
9549 9549  }
9550 9550  
9551 9551  int
9552 9552  rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9553 9553  {
9554 9554          int err;
9555 9555          int cmd;
9556 9556          vnode_t *vp;
9557 9557          struct shrlock shr;
9558 9558          struct shr_locowner shr_loco;
9559 9559          int fflags = 0;
9560 9560  
9561 9561          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9562 9562          ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9563 9563  
9564 9564          if (sp->rs_closed)
9565 9565                  return (NFS4ERR_OLD_STATEID);
9566 9566  
9567 9567          vp = sp->rs_finfo->rf_vp;
9568 9568          ASSERT(vp);
9569 9569  
9570 9570          shr.s_access = shr.s_deny = 0;
9571 9571  
9572 9572          if (access & OPEN4_SHARE_ACCESS_READ) {
9573 9573                  fflags |= FREAD;
9574 9574                  shr.s_access |= F_RDACC;
9575 9575          }
9576 9576          if (access & OPEN4_SHARE_ACCESS_WRITE) {
9577 9577                  fflags |= FWRITE;
9578 9578                  shr.s_access |= F_WRACC;
9579 9579          }
9580 9580          ASSERT(shr.s_access);
9581 9581  
9582 9582          if (deny & OPEN4_SHARE_DENY_READ)
9583 9583                  shr.s_deny |= F_RDDNY;
9584 9584          if (deny & OPEN4_SHARE_DENY_WRITE)
9585 9585                  shr.s_deny |= F_WRDNY;
9586 9586  
9587 9587          shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9588 9588          shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9589 9589          shr_loco.sl_pid = shr.s_pid;
9590 9590          shr_loco.sl_id = shr.s_sysid;
9591 9591          shr.s_owner = (caddr_t)&shr_loco;
9592 9592          shr.s_own_len = sizeof (shr_loco);
9593 9593  
9594 9594          cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9595 9595  
9596 9596          err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9597 9597          if (err != 0) {
9598 9598                  if (err == EAGAIN)
9599 9599                          err = NFS4ERR_SHARE_DENIED;
9600 9600                  else
9601 9601                          err = puterrno4(err);
9602 9602                  return (err);
9603 9603          }
9604 9604  
9605 9605          sp->rs_share_access |= access;
9606 9606          sp->rs_share_deny |= deny;
9607 9607  
9608 9608          return (0);
9609 9609  }
9610 9610  
9611 9611  int
9612 9612  rfs4_unshare(rfs4_state_t *sp)
9613 9613  {
9614 9614          int err;
9615 9615          struct shrlock shr;
9616 9616          struct shr_locowner shr_loco;
9617 9617  
9618 9618          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9619 9619  
9620 9620          if (sp->rs_closed || sp->rs_share_access == 0)
9621 9621                  return (0);
9622 9622  
9623 9623          ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9624 9624          ASSERT(sp->rs_finfo->rf_vp);
9625 9625  
9626 9626          shr.s_access = shr.s_deny = 0;
9627 9627          shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9628 9628          shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9629 9629          shr_loco.sl_pid = shr.s_pid;
9630 9630          shr_loco.sl_id = shr.s_sysid;
9631 9631          shr.s_owner = (caddr_t)&shr_loco;
9632 9632          shr.s_own_len = sizeof (shr_loco);
9633 9633  
9634 9634          err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9635 9635              NULL);
9636 9636          if (err != 0) {
9637 9637                  err = puterrno4(err);
9638 9638                  return (err);
9639 9639          }
9640 9640  
9641 9641          sp->rs_share_access = 0;
9642 9642          sp->rs_share_deny = 0;
9643 9643  
9644 9644          return (0);
9645 9645  
9646 9646  }
9647 9647  
9648 9648  static int
9649 9649  rdma_setup_read_data4(READ4args *args, READ4res *rok)
9650 9650  {
9651 9651          struct clist    *wcl;
9652 9652          count4          count = rok->data_len;
9653 9653          int             wlist_len;
9654 9654  
9655 9655          wcl = args->wlist;
9656 9656          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9657 9657                  return (FALSE);
9658 9658          }
9659 9659          wcl = args->wlist;
9660 9660          rok->wlist_len = wlist_len;
9661 9661          rok->wlist = wcl;
9662 9662          return (TRUE);
9663 9663  }
9664 9664  
9665 9665  /* tunable to disable server referrals */
9666 9666  int rfs4_no_referrals = 0;
9667 9667  
9668 9668  /*
9669 9669   * Find an NFS record in reparse point data.
9670 9670   * Returns 0 for success and <0 or an errno value on failure.
9671 9671   */
9672 9672  int
9673 9673  vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9674 9674  {
9675 9675          int err;
9676 9676          char *stype, *val;
9677 9677          nvlist_t *nvl;
9678 9678          nvpair_t *curr;
9679 9679  
9680 9680          if ((nvl = reparse_init()) == NULL)
9681 9681                  return (-1);
9682 9682  
9683 9683          if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9684 9684                  reparse_free(nvl);
9685 9685                  return (err);
9686 9686          }
9687 9687  
9688 9688          curr = NULL;
9689 9689          while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9690 9690                  if ((stype = nvpair_name(curr)) == NULL) {
9691 9691                          reparse_free(nvl);
9692 9692                          return (-2);
9693 9693                  }
9694 9694                  if (strncasecmp(stype, "NFS", 3) == 0)
9695 9695                          break;
9696 9696          }
9697 9697  
9698 9698          if ((curr == NULL) ||
9699 9699              (nvpair_value_string(curr, &val))) {
9700 9700                  reparse_free(nvl);
9701 9701                  return (-3);
9702 9702          }
9703 9703          *nvlp = nvl;
9704 9704          *svcp = stype;
9705 9705          *datap = val;
9706 9706          return (0);
9707 9707  }
9708 9708  
9709 9709  int
9710 9710  vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9711 9711  {
9712 9712          nvlist_t *nvl;
9713 9713          char *s, *d;
9714 9714  
9715 9715          if (rfs4_no_referrals != 0)
9716 9716                  return (B_FALSE);
9717 9717  
9718 9718          if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9719 9719                  return (B_FALSE);
9720 9720  
9721 9721          if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9722 9722                  return (B_FALSE);
9723 9723  
9724 9724          reparse_free(nvl);
9725 9725  
9726 9726          return (B_TRUE);
9727 9727  }
9728 9728  
9729 9729  /*
9730 9730   * There is a user-level copy of this routine in ref_subr.c.
9731 9731   * Changes should be kept in sync.
9732 9732   */
9733 9733  static int
9734 9734  nfs4_create_components(char *path, component4 *comp4)
9735 9735  {
9736 9736          int slen, plen, ncomp;
9737 9737          char *ori_path, *nxtc, buf[MAXNAMELEN];
9738 9738  
9739 9739          if (path == NULL)
9740 9740                  return (0);
9741 9741  
9742 9742          plen = strlen(path) + 1;        /* include the terminator */
9743 9743          ori_path = path;
9744 9744          ncomp = 0;
9745 9745  
9746 9746          /* count number of components in the path */
9747 9747          for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9748 9748                  if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9749 9749                          if ((slen = nxtc - path) == 0) {
9750 9750                                  path = nxtc + 1;
9751 9751                                  continue;
9752 9752                          }
9753 9753  
9754 9754                          if (comp4 != NULL) {
9755 9755                                  bcopy(path, buf, slen);
9756 9756                                  buf[slen] = '\0';
9757 9757                                  (void) str_to_utf8(buf, &comp4[ncomp]);
9758 9758                          }
9759 9759  
9760 9760                          ncomp++;        /* 1 valid component */
9761 9761                          path = nxtc + 1;
9762 9762                  }
9763 9763                  if (*nxtc == '\0' || *nxtc == '\n')
9764 9764                          break;
9765 9765          }
9766 9766  
9767 9767          return (ncomp);
9768 9768  }
9769 9769  
9770 9770  /*
9771 9771   * There is a user-level copy of this routine in ref_subr.c.
9772 9772   * Changes should be kept in sync.
9773 9773   */
9774 9774  static int
9775 9775  make_pathname4(char *path, pathname4 *pathname)
9776 9776  {
9777 9777          int ncomp;
9778 9778          component4 *comp4;
9779 9779  
9780 9780          if (pathname == NULL)
9781 9781                  return (0);
9782 9782  
9783 9783          if (path == NULL) {
9784 9784                  pathname->pathname4_val = NULL;
9785 9785                  pathname->pathname4_len = 0;
9786 9786                  return (0);
9787 9787          }
9788 9788  
9789 9789          /* count number of components to alloc buffer */
9790 9790          if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9791 9791                  pathname->pathname4_val = NULL;
9792 9792                  pathname->pathname4_len = 0;
9793 9793                  return (0);
9794 9794          }
9795 9795          comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9796 9796  
9797 9797          /* copy components into allocated buffer */
9798 9798          ncomp = nfs4_create_components(path, comp4);
9799 9799  
9800 9800          pathname->pathname4_val = comp4;
9801 9801          pathname->pathname4_len = ncomp;
9802 9802  
9803 9803          return (ncomp);
9804 9804  }
9805 9805  
9806 9806  #define xdr_fs_locations4 xdr_fattr4_fs_locations
9807 9807  
9808 9808  fs_locations4 *
9809 9809  fetch_referral(vnode_t *vp, cred_t *cr)
9810 9810  {
9811 9811          nvlist_t *nvl;
9812 9812          char *stype, *sdata;
9813 9813          fs_locations4 *result;
9814 9814          char buf[1024];
9815 9815          size_t bufsize;
9816 9816          XDR xdr;
9817 9817          int err;
9818 9818  
9819 9819          /*
9820 9820           * Check attrs to ensure it's a reparse point
9821 9821           */
9822 9822          if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9823 9823                  return (NULL);
9824 9824  
9825 9825          /*
9826 9826           * Look for an NFS record and get the type and data
9827 9827           */
9828 9828          if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9829 9829                  return (NULL);
9830 9830  
9831 9831          /*
9832 9832           * With the type and data, upcall to get the referral
9833 9833           */
9834 9834          bufsize = sizeof (buf);
9835 9835          bzero(buf, sizeof (buf));
9836 9836          err = reparse_kderef((const char *)stype, (const char *)sdata,
9837 9837              buf, &bufsize);
9838 9838          reparse_free(nvl);
9839 9839  
9840 9840          DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9841 9841              char *, stype, char *, sdata, char *, buf, int, err);
9842 9842          if (err) {
9843 9843                  cmn_err(CE_NOTE,
9844 9844                      "reparsed daemon not running: unable to get referral (%d)",
9845 9845                      err);
9846 9846                  return (NULL);
9847 9847          }
9848 9848  
9849 9849          /*
9850 9850           * We get an XDR'ed record back from the kderef call
9851 9851           */
9852 9852          xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9853 9853          result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9854 9854          err = xdr_fs_locations4(&xdr, result);
9855 9855          XDR_DESTROY(&xdr);
9856 9856          if (err != TRUE) {
9857 9857                  DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9858 9858                      int, err);
9859 9859                  return (NULL);
9860 9860          }
9861 9861  
9862 9862          /*
9863 9863           * Look at path to recover fs_root, ignoring the leading '/'
9864 9864           */
9865 9865          (void) make_pathname4(vp->v_path, &result->fs_root);
9866 9866  
9867 9867          return (result);
9868 9868  }
9869 9869  
9870 9870  char *
9871 9871  build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9872 9872  {
9873 9873          fs_locations4 *fsl;
9874 9874          fs_location4 *fs;
9875 9875          char *server, *path, *symbuf;
9876 9876          static char *prefix = "/net/";
9877 9877          int i, size, npaths;
9878 9878          uint_t len;
9879 9879  
9880 9880          /* Get the referral */
9881 9881          if ((fsl = fetch_referral(vp, cr)) == NULL)
9882 9882                  return (NULL);
9883 9883  
9884 9884          /* Deal with only the first location and first server */
9885 9885          fs = &fsl->locations_val[0];
9886 9886          server = utf8_to_str(&fs->server_val[0], &len, NULL);
9887 9887          if (server == NULL) {
9888 9888                  rfs4_free_fs_locations4(fsl);
9889 9889                  kmem_free(fsl, sizeof (fs_locations4));
9890 9890                  return (NULL);
9891 9891          }
9892 9892  
9893 9893          /* Figure out size for "/net/" + host + /path/path/path + NULL */
9894 9894          size = strlen(prefix) + len;
9895 9895          for (i = 0; i < fs->rootpath.pathname4_len; i++)
9896 9896                  size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9897 9897  
9898 9898          /* Allocate the symlink buffer and fill it */
9899 9899          symbuf = kmem_zalloc(size, KM_SLEEP);
9900 9900          (void) strcat(symbuf, prefix);
9901 9901          (void) strcat(symbuf, server);
9902 9902          kmem_free(server, len);
9903 9903  
9904 9904          npaths = 0;
9905 9905          for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9906 9906                  path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9907 9907                  if (path == NULL)
9908 9908                          continue;
9909 9909                  (void) strcat(symbuf, "/");
9910 9910                  (void) strcat(symbuf, path);
9911 9911                  npaths++;
9912 9912                  kmem_free(path, len);
9913 9913          }
9914 9914  
9915 9915          rfs4_free_fs_locations4(fsl);
9916 9916          kmem_free(fsl, sizeof (fs_locations4));
9917 9917  
9918 9918          if (strsz != NULL)
9919 9919                  *strsz = size;
9920 9920          return (symbuf);
9921 9921  }
9922 9922  
9923 9923  /*
9924 9924   * Check to see if we have a downrev Solaris client, so that we
9925 9925   * can send it a symlink instead of a referral.
9926 9926   */
9927 9927  int
9928 9928  client_is_downrev(struct svc_req *req)
9929 9929  {
9930 9930          struct sockaddr *ca;
9931 9931          rfs4_clntip_t *ci;
9932 9932          bool_t create = FALSE;
9933 9933          int is_downrev;
9934 9934  
9935 9935          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9936 9936          ASSERT(ca);
9937 9937          ci = rfs4_find_clntip(ca, &create);
9938 9938          if (ci == NULL)
9939 9939                  return (0);
9940 9940          is_downrev = ci->ri_no_referrals;
9941 9941          rfs4_dbe_rele(ci->ri_dbe);
9942 9942          return (is_downrev);
9943 9943  }
9944 9944  
9945 9945  /*
9946 9946   * Do the main work of handling HA-NFSv4 Resource Group failover on
9947 9947   * Sun Cluster.
9948 9948   * We need to detect whether any RG admin paths have been added or removed,
9949 9949   * and adjust resources accordingly.
9950 9950   * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9951 9951   * order to scale, the list and array of paths need to be held in more
9952 9952   * suitable data structures.
9953 9953   */
9954 9954  static void
9955 9955  hanfsv4_failover(nfs4_srv_t *nsrv4)
9956 9956  {
9957 9957          int i, start_grace, numadded_paths = 0;
9958 9958          char **added_paths = NULL;
9959 9959          rfs4_dss_path_t *dss_path;
9960 9960  
9961 9961          /*
9962 9962           * Note: currently, dss_pathlist cannot be NULL, since
9963 9963           * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9964 9964           * make the latter dynamically specified too, the following will
9965 9965           * need to be adjusted.
9966 9966           */
9967 9967  
9968 9968          /*
9969 9969           * First, look for removed paths: RGs that have been failed-over
9970 9970           * away from this node.
9971 9971           * Walk the "currently-serving" dss_pathlist and, for each
9972 9972           * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9973 9973           * from nfsd. If not, that RG path has been removed.
9974 9974           *
9975 9975           * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9976 9976           * any duplicates.
9977 9977           */
9978 9978          dss_path = nsrv4->dss_pathlist;
9979 9979          do {
9980 9980                  int found = 0;
9981 9981                  char *path = dss_path->path;
9982 9982  
9983 9983                  /* used only for non-HA so may not be removed */
9984 9984                  if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9985 9985                          dss_path = dss_path->next;
9986 9986                          continue;
9987 9987                  }
9988 9988  
9989 9989                  for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9990 9990                          int cmpret;
9991 9991                          char *newpath = rfs4_dss_newpaths[i];
9992 9992  
9993 9993                          /*
9994 9994                           * Since nfsd has sorted rfs4_dss_newpaths for us,
9995 9995                           * once the return from strcmp is negative we know
9996 9996                           * we've passed the point where "path" should be,
9997 9997                           * and can stop searching: "path" has been removed.
9998 9998                           */
9999 9999                          cmpret = strcmp(path, newpath);
10000 10000                          if (cmpret < 0)
10001 10001                                  break;
10002 10002                          if (cmpret == 0) {
10003 10003                                  found = 1;
10004 10004                                  break;
10005 10005                          }
10006 10006                  }
10007 10007  
10008 10008                  if (found == 0) {
10009 10009                          unsigned index = dss_path->index;
10010 10010                          rfs4_servinst_t *sip = dss_path->sip;
10011 10011                          rfs4_dss_path_t *path_next = dss_path->next;
10012 10012  
10013 10013                          /*
10014 10014                           * This path has been removed.
10015 10015                           * We must clear out the servinst reference to
10016 10016                           * it, since it's now owned by another
10017 10017                           * node: we should not attempt to touch it.
10018 10018                           */
10019 10019                          ASSERT(dss_path == sip->dss_paths[index]);
10020 10020                          sip->dss_paths[index] = NULL;
10021 10021  
10022 10022                          /* remove from "currently-serving" list, and destroy */
10023 10023                          remque(dss_path);
10024 10024                          /* allow for NUL */
10025 10025                          kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10026 10026                          kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10027 10027  
10028 10028                          dss_path = path_next;
10029 10029                  } else {
10030 10030                          /* path was found; not removed */
10031 10031                          dss_path = dss_path->next;
10032 10032                  }
10033 10033          } while (dss_path != nsrv4->dss_pathlist);
10034 10034  
10035 10035          /*
10036 10036           * Now, look for added paths: RGs that have been failed-over
10037 10037           * to this node.
10038 10038           * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10039 10039           * for each path, check if it is on the "currently-serving"
10040 10040           * dss_pathlist. If not, that RG path has been added.
10041 10041           *
10042 10042           * Note: we don't do duplicate detection here; nfsd does that for us.
10043 10043           *
10044 10044           * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10045 10045           * an upper bound for the size needed for added_paths[numadded_paths].
10046 10046           */
10047 10047  
10048 10048          /* probably more space than we need, but guaranteed to be enough */
10049 10049          if (rfs4_dss_numnewpaths > 0) {
10050 10050                  size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10051 10051                  added_paths = kmem_zalloc(sz, KM_SLEEP);
10052 10052          }
10053 10053  
10054 10054          /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10055 10055          for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10056 10056                  int found = 0;
10057 10057                  char *newpath = rfs4_dss_newpaths[i];
10058 10058  
10059 10059                  dss_path = nsrv4->dss_pathlist;
10060 10060                  do {
10061 10061                          char *path = dss_path->path;
10062 10062  
10063 10063                          /* used only for non-HA */
10064 10064                          if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10065 10065                                  dss_path = dss_path->next;
10066 10066                                  continue;
10067 10067                          }
10068 10068  
10069 10069                          if (strncmp(path, newpath, strlen(path)) == 0) {
10070 10070                                  found = 1;
10071 10071                                  break;
10072 10072                          }
10073 10073  
10074 10074                          dss_path = dss_path->next;
10075 10075                  } while (dss_path != nsrv4->dss_pathlist);
10076 10076  
10077 10077                  if (found == 0) {
10078 10078                          added_paths[numadded_paths] = newpath;
10079 10079                          numadded_paths++;
10080 10080                  }
10081 10081          }
10082 10082  
10083 10083          /* did we find any added paths? */
10084 10084          if (numadded_paths > 0) {
10085 10085  
10086 10086                  /* create a new server instance, and start its grace period */
10087 10087                  start_grace = 1;
10088 10088                  /* CSTYLED */
10089 10089                  rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10090 10090  
10091 10091                  /* read in the stable storage state from these paths */
10092 10092                  rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10093 10093  
10094 10094                  /*
10095 10095                   * Multiple failovers during a grace period will cause
10096 10096                   * clients of the same resource group to be partitioned
10097 10097                   * into different server instances, with different
10098 10098                   * grace periods.  Since clients of the same resource
10099 10099                   * group must be subject to the same grace period,
10100 10100                   * we need to reset all currently active grace periods.
10101 10101                   */
10102 10102                  rfs4_grace_reset_all(nsrv4);
10103 10103          }
10104 10104  
10105 10105          if (rfs4_dss_numnewpaths > 0)
10106 10106                  kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10107 10107  }
  
    | 
      ↓ open down ↓ | 
    7356 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX