Print this page
    
Fix NFS design problems re. multiple zone keys
Make NFS server zone-specific data all have the same lifetime
Fix rfs4_clean_state_exi
Fix exi_cache_reclaim
Fix mistakes in zone keys work
More fixes re. exi_zoneid and exi_tree
(danmcd -> Keep some ASSERT()s around for readability.)
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28   28   *      All Rights Reserved
  29   29   */
  30   30  
  31   31  /*
  32   32   * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  33   33   * Copyright 2019 Nexenta Systems, Inc.
  34   34   * Copyright 2019 Nexenta by DDN, Inc.
  35   35   */
  36   36  
  37   37  #include <sys/param.h>
  38   38  #include <sys/types.h>
  39   39  #include <sys/systm.h>
  40   40  #include <sys/cred.h>
  41   41  #include <sys/buf.h>
  42   42  #include <sys/vfs.h>
  43   43  #include <sys/vfs_opreg.h>
  44   44  #include <sys/vnode.h>
  45   45  #include <sys/uio.h>
  46   46  #include <sys/errno.h>
  47   47  #include <sys/sysmacros.h>
  48   48  #include <sys/statvfs.h>
  49   49  #include <sys/kmem.h>
  50   50  #include <sys/dirent.h>
  51   51  #include <sys/cmn_err.h>
  52   52  #include <sys/debug.h>
  53   53  #include <sys/systeminfo.h>
  54   54  #include <sys/flock.h>
  55   55  #include <sys/pathname.h>
  56   56  #include <sys/nbmlock.h>
  57   57  #include <sys/share.h>
  58   58  #include <sys/atomic.h>
  59   59  #include <sys/policy.h>
  60   60  #include <sys/fem.h>
  61   61  #include <sys/sdt.h>
  62   62  #include <sys/ddi.h>
  63   63  #include <sys/zone.h>
  64   64  
  65   65  #include <fs/fs_reparse.h>
  66   66  
  67   67  #include <rpc/types.h>
  68   68  #include <rpc/auth.h>
  69   69  #include <rpc/rpcsec_gss.h>
  70   70  #include <rpc/svc.h>
  71   71  
  72   72  #include <nfs/nfs.h>
  73   73  #include <nfs/nfssys.h>
  74   74  #include <nfs/export.h>
  75   75  #include <nfs/nfs_cmd.h>
  76   76  #include <nfs/lm.h>
  77   77  #include <nfs/nfs4.h>
  78   78  #include <nfs/nfs4_drc.h>
  79   79  
  80   80  #include <sys/strsubr.h>
  81   81  #include <sys/strsun.h>
  82   82  
  83   83  #include <inet/common.h>
  84   84  #include <inet/ip.h>
  85   85  #include <inet/ip6.h>
  86   86  
  87   87  #include <sys/tsol/label.h>
  88   88  #include <sys/tsol/tndb.h>
  89   89  
  90   90  #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  91   91  static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  92   92  #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  93   93  static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  94   94  extern struct svc_ops rdma_svc_ops;
  95   95  extern int nfs_loaned_buffers;
  96   96  /* End of Tunables */
  97   97  
  98   98  static int rdma_setup_read_data4(READ4args *, READ4res *);
  99   99  
 100  100  /*
 101  101   * Used to bump the stateid4.seqid value and show changes in the stateid
 102  102   */
 103  103  #define next_stateid(sp) (++(sp)->bits.chgseq)
 104  104  
 105  105  /*
 106  106   * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 107  107   *      This is used to return NFS4ERR_TOOSMALL when clients specify
 108  108   *      maxcount that isn't large enough to hold the smallest possible
 109  109   *      XDR encoded dirent.
 110  110   *
 111  111   *          sizeof cookie (8 bytes) +
 112  112   *          sizeof name_len (4 bytes) +
 113  113   *          sizeof smallest (padded) name (4 bytes) +
 114  114   *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 115  115   *          sizeof attrlist4_len (4 bytes) +
 116  116   *          sizeof next boolean (4 bytes)
 117  117   *
 118  118   * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 119  119   * the smallest possible entry4 (assumes no attrs requested).
 120  120   *      sizeof nfsstat4 (4 bytes) +
 121  121   *      sizeof verifier4 (8 bytes) +
 122  122   *      sizeof entry4list bool (4 bytes) +
 123  123   *      sizeof entry4   (36 bytes) +
 124  124   *      sizeof eof bool  (4 bytes)
 125  125   *
 126  126   * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 127  127   *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 128  128   *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 129  129   *      required for a given name length.  MAXNAMELEN is the maximum
 130  130   *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 131  131   *      macros are to allow for . and .. entries -- just a minor tweak to try
 132  132   *      and guarantee that buffer we give to VOP_READDIR will be large enough
 133  133   *      to hold ., .., and the largest possible solaris dirent64.
 134  134   */
 135  135  #define RFS4_MINLEN_ENTRY4 36
 136  136  #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 137  137  #define RFS4_MINLEN_RDDIR_BUF \
 138  138          (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 139  139  
 140  140  /*
 141  141   * It would be better to pad to 4 bytes since that's what XDR would do,
 142  142   * but the dirents UFS gives us are already padded to 8, so just take
 143  143   * what we're given.  Dircount is only a hint anyway.  Currently the
 144  144   * solaris kernel is ASCII only, so there's no point in calling the
 145  145   * UTF8 functions.
  
    | 
      ↓ open down ↓ | 
    145 lines elided | 
    
      ↑ open up ↑ | 
  
 146  146   *
 147  147   * dirent64: named padded to provide 8 byte struct alignment
 148  148   *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 149  149   *
 150  150   * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 151  151   *
 152  152   */
 153  153  #define DIRENT64_TO_DIRCOUNT(dp) \
 154  154          (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 155  155  
 156      -zone_key_t      rfs4_zone_key;
 157  156  
 158  157  static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 159  158  
 160  159  u_longlong_t    nfs4_srv_caller_id;
 161  160  uint_t          nfs4_srv_vkey = 0;
 162  161  
 163  162  void    rfs4_init_compound_state(struct compound_state *);
 164  163  
 165  164  static void     nullfree(caddr_t);
 166  165  static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 167  166                      struct compound_state *);
 168  167  static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 169  168                      struct compound_state *);
 170  169  static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 171  170                      struct compound_state *);
 172  171  static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 173  172                      struct compound_state *);
 174  173  static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 175  174                      struct compound_state *);
 176  175  static void     rfs4_op_create_free(nfs_resop4 *resop);
 177  176  static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 178  177                      struct svc_req *, struct compound_state *);
 179  178  static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 180  179                      struct svc_req *, struct compound_state *);
 181  180  static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182  181                      struct compound_state *);
 183  182  static void     rfs4_op_getattr_free(nfs_resop4 *);
 184  183  static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185  184                      struct compound_state *);
 186  185  static void     rfs4_op_getfh_free(nfs_resop4 *);
 187  186  static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 188  187                      struct compound_state *);
 189  188  static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 190  189                      struct compound_state *);
 191  190  static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192  191                      struct compound_state *);
 193  192  static void     lock_denied_free(nfs_resop4 *);
 194  193  static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 195  194                      struct compound_state *);
 196  195  static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 197  196                      struct compound_state *);
 198  197  static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 199  198                      struct compound_state *);
 200  199  static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 201  200                      struct compound_state *);
 202  201  static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 203  202                      struct svc_req *req, struct compound_state *cs);
 204  203  static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 205  204                      struct compound_state *);
 206  205  static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 207  206                      struct compound_state *);
 208  207  static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 209  208                      struct svc_req *, struct compound_state *);
 210  209  static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 211  210                      struct svc_req *, struct compound_state *);
 212  211  static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 213  212                      struct compound_state *);
 214  213  static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 215  214                      struct compound_state *);
 216  215  static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 217  216                      struct compound_state *);
 218  217  static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 219  218                      struct compound_state *);
 220  219  static void     rfs4_op_read_free(nfs_resop4 *);
 221  220  static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 222  221  static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 223  222                      struct compound_state *);
 224  223  static void     rfs4_op_readlink_free(nfs_resop4 *);
 225  224  static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 226  225                      struct svc_req *, struct compound_state *);
 227  226  static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 228  227                      struct compound_state *);
 229  228  static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 230  229                      struct compound_state *);
 231  230  static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 232  231                      struct compound_state *);
 233  232  static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 234  233                      struct compound_state *);
 235  234  static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 236  235                      struct compound_state *);
 237  236  static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 238  237                      struct compound_state *);
 239  238  static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 240  239                      struct compound_state *);
 241  240  static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 242  241                      struct compound_state *);
 243  242  static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 244  243                      struct svc_req *, struct compound_state *);
 245  244  static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 246  245                      struct svc_req *req, struct compound_state *);
 247  246  static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 248  247                      struct compound_state *);
 249  248  static void     rfs4_op_secinfo_free(nfs_resop4 *);
 250  249  
 251  250  static nfsstat4 check_open_access(uint32_t, struct compound_state *,
 252  251                      struct svc_req *);
 253  252  nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 254  253  void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 255  254  
 256  255  
 257  256  /*
 258  257   * translation table for attrs
 259  258   */
 260  259  struct nfs4_ntov_table {
 261  260          union nfs4_attr_u *na;
 262  261          uint8_t amap[NFS4_MAXNUM_ATTRS];
 263  262          int attrcnt;
 264  263          bool_t vfsstat;
 265  264  };
 266  265  
 267  266  static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 268  267  static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 269  268                      struct nfs4_svgetit_arg *sargp);
 270  269  
 271  270  static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 272  271                      struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 273  272                      struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 274  273  
 275  274  static void     hanfsv4_failover(nfs4_srv_t *);
 276  275  
 277  276  fem_t           *deleg_rdops;
 278  277  fem_t           *deleg_wrops;
 279  278  
 280  279  /*
 281  280   * NFS4 op dispatch table
 282  281   */
 283  282  
 284  283  struct rfsv4disp {
 285  284          void    (*dis_proc)();          /* proc to call */
 286  285          void    (*dis_resfree)();       /* frees space allocated by proc */
 287  286          int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 288  287  };
 289  288  
 290  289  static struct rfsv4disp rfsv4disptab[] = {
 291  290          /*
 292  291           * NFS VERSION 4
 293  292           */
 294  293  
 295  294          /* RFS_NULL = 0 */
 296  295          {rfs4_op_illegal, nullfree, 0},
 297  296  
 298  297          /* UNUSED = 1 */
 299  298          {rfs4_op_illegal, nullfree, 0},
 300  299  
 301  300          /* UNUSED = 2 */
 302  301          {rfs4_op_illegal, nullfree, 0},
 303  302  
 304  303          /* OP_ACCESS = 3 */
 305  304          {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 306  305  
 307  306          /* OP_CLOSE = 4 */
 308  307          {rfs4_op_close, nullfree, 0},
 309  308  
 310  309          /* OP_COMMIT = 5 */
 311  310          {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 312  311  
 313  312          /* OP_CREATE = 6 */
 314  313          {rfs4_op_create, nullfree, 0},
 315  314  
 316  315          /* OP_DELEGPURGE = 7 */
 317  316          {rfs4_op_delegpurge, nullfree, 0},
 318  317  
 319  318          /* OP_DELEGRETURN = 8 */
 320  319          {rfs4_op_delegreturn, nullfree, 0},
 321  320  
 322  321          /* OP_GETATTR = 9 */
 323  322          {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 324  323  
 325  324          /* OP_GETFH = 10 */
 326  325          {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 327  326  
 328  327          /* OP_LINK = 11 */
 329  328          {rfs4_op_link, nullfree, 0},
 330  329  
 331  330          /* OP_LOCK = 12 */
 332  331          {rfs4_op_lock, lock_denied_free, 0},
 333  332  
 334  333          /* OP_LOCKT = 13 */
 335  334          {rfs4_op_lockt, lock_denied_free, 0},
 336  335  
 337  336          /* OP_LOCKU = 14 */
 338  337          {rfs4_op_locku, nullfree, 0},
 339  338  
 340  339          /* OP_LOOKUP = 15 */
 341  340          {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 342  341  
 343  342          /* OP_LOOKUPP = 16 */
 344  343          {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 345  344  
 346  345          /* OP_NVERIFY = 17 */
 347  346          {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 348  347  
 349  348          /* OP_OPEN = 18 */
 350  349          {rfs4_op_open, rfs4_free_reply, 0},
 351  350  
 352  351          /* OP_OPENATTR = 19 */
 353  352          {rfs4_op_openattr, nullfree, 0},
 354  353  
 355  354          /* OP_OPEN_CONFIRM = 20 */
 356  355          {rfs4_op_open_confirm, nullfree, 0},
 357  356  
 358  357          /* OP_OPEN_DOWNGRADE = 21 */
 359  358          {rfs4_op_open_downgrade, nullfree, 0},
 360  359  
 361  360          /* OP_OPEN_PUTFH = 22 */
 362  361          {rfs4_op_putfh, nullfree, RPC_ALL},
 363  362  
 364  363          /* OP_PUTPUBFH = 23 */
 365  364          {rfs4_op_putpubfh, nullfree, RPC_ALL},
 366  365  
 367  366          /* OP_PUTROOTFH = 24 */
 368  367          {rfs4_op_putrootfh, nullfree, RPC_ALL},
 369  368  
 370  369          /* OP_READ = 25 */
 371  370          {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 372  371  
 373  372          /* OP_READDIR = 26 */
 374  373          {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 375  374  
 376  375          /* OP_READLINK = 27 */
 377  376          {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 378  377  
 379  378          /* OP_REMOVE = 28 */
 380  379          {rfs4_op_remove, nullfree, 0},
 381  380  
 382  381          /* OP_RENAME = 29 */
 383  382          {rfs4_op_rename, nullfree, 0},
 384  383  
 385  384          /* OP_RENEW = 30 */
 386  385          {rfs4_op_renew, nullfree, 0},
 387  386  
 388  387          /* OP_RESTOREFH = 31 */
 389  388          {rfs4_op_restorefh, nullfree, RPC_ALL},
 390  389  
 391  390          /* OP_SAVEFH = 32 */
 392  391          {rfs4_op_savefh, nullfree, RPC_ALL},
 393  392  
 394  393          /* OP_SECINFO = 33 */
 395  394          {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 396  395  
 397  396          /* OP_SETATTR = 34 */
 398  397          {rfs4_op_setattr, nullfree, 0},
 399  398  
 400  399          /* OP_SETCLIENTID = 35 */
 401  400          {rfs4_op_setclientid, nullfree, 0},
 402  401  
 403  402          /* OP_SETCLIENTID_CONFIRM = 36 */
 404  403          {rfs4_op_setclientid_confirm, nullfree, 0},
 405  404  
 406  405          /* OP_VERIFY = 37 */
 407  406          {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 408  407  
 409  408          /* OP_WRITE = 38 */
 410  409          {rfs4_op_write, nullfree, 0},
 411  410  
 412  411          /* OP_RELEASE_LOCKOWNER = 39 */
 413  412          {rfs4_op_release_lockowner, nullfree, 0},
 414  413  };
 415  414  
 416  415  static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 417  416  
 418  417  #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 419  418  
 420  419  #ifdef DEBUG
 421  420  
 422  421  int             rfs4_fillone_debug = 0;
 423  422  int             rfs4_no_stub_access = 1;
 424  423  int             rfs4_rddir_debug = 0;
 425  424  
 426  425  static char    *rfs4_op_string[] = {
 427  426          "rfs4_op_null",
 428  427          "rfs4_op_1 unused",
 429  428          "rfs4_op_2 unused",
 430  429          "rfs4_op_access",
 431  430          "rfs4_op_close",
 432  431          "rfs4_op_commit",
 433  432          "rfs4_op_create",
 434  433          "rfs4_op_delegpurge",
 435  434          "rfs4_op_delegreturn",
 436  435          "rfs4_op_getattr",
 437  436          "rfs4_op_getfh",
 438  437          "rfs4_op_link",
 439  438          "rfs4_op_lock",
 440  439          "rfs4_op_lockt",
 441  440          "rfs4_op_locku",
 442  441          "rfs4_op_lookup",
 443  442          "rfs4_op_lookupp",
 444  443          "rfs4_op_nverify",
 445  444          "rfs4_op_open",
 446  445          "rfs4_op_openattr",
 447  446          "rfs4_op_open_confirm",
 448  447          "rfs4_op_open_downgrade",
 449  448          "rfs4_op_putfh",
 450  449          "rfs4_op_putpubfh",
 451  450          "rfs4_op_putrootfh",
 452  451          "rfs4_op_read",
 453  452          "rfs4_op_readdir",
 454  453          "rfs4_op_readlink",
 455  454          "rfs4_op_remove",
 456  455          "rfs4_op_rename",
 457  456          "rfs4_op_renew",
 458  457          "rfs4_op_restorefh",
 459  458          "rfs4_op_savefh",
 460  459          "rfs4_op_secinfo",
 461  460          "rfs4_op_setattr",
 462  461          "rfs4_op_setclientid",
 463  462          "rfs4_op_setclient_confirm",
 464  463          "rfs4_op_verify",
 465  464          "rfs4_op_write",
 466  465          "rfs4_op_release_lockowner",
 467  466          "rfs4_op_illegal"
 468  467  };
 469  468  #endif
 470  469  
 471  470  void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 472  471  
 473  472  extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 474  473  
 475  474  extern void     rfs4_free_fs_locations4(fs_locations4 *);
 476  475  
 477  476  #ifdef  nextdp
 478  477  #undef nextdp
 479  478  #endif
 480  479  #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 481  480  
 482  481  static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 483  482          VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 484  483          VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 485  484          VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 486  485          VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 487  486          VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 488  487          VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 489  488          VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 490  489          NULL,                   NULL
 491  490  };
 492  491  static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 493  492          VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
  
    | 
      ↓ open down ↓ | 
    327 lines elided | 
    
      ↑ open up ↑ | 
  
 494  493          VOPNAME_READ,           { .femop_read = deleg_wr_read },
 495  494          VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 496  495          VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 497  496          VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 498  497          VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 499  498          VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 500  499          VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 501  500          NULL,                   NULL
 502  501  };
 503  502  
 504      -/* ARGSUSED */
 505      -static void *
 506      -rfs4_zone_init(zoneid_t zoneid)
      503 +nfs4_srv_t *
      504 +nfs4_get_srv(void)
 507  505  {
      506 +        nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
      507 +        nfs4_srv_t *srv = ng->nfs4_srv;
      508 +        ASSERT(srv != NULL);
      509 +        return (srv);
      510 +}
      511 +
      512 +void
      513 +rfs4_srv_zone_init(nfs_globals_t *ng)
      514 +{
 508  515          nfs4_srv_t *nsrv4;
 509  516          timespec32_t verf;
 510  517  
 511  518          nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
 512  519  
 513  520          /*
 514  521           * The following algorithm attempts to find a unique verifier
 515  522           * to be used as the write verifier returned from the server
 516  523           * to the client.  It is important that this verifier change
 517  524           * whenever the server reboots.  Of secondary importance, it
 518  525           * is important for the verifier to be unique between two
 519  526           * different servers.
 520  527           *
 521  528           * Thus, an attempt is made to use the system hostid and the
 522  529           * current time in seconds when the nfssrv kernel module is
 523  530           * loaded.  It is assumed that an NFS server will not be able
 524  531           * to boot and then to reboot in less than a second.  If the
 525  532           * hostid has not been set, then the current high resolution
 526  533           * time is used.  This will ensure different verifiers each
 527  534           * time the server reboots and minimize the chances that two
 528  535           * different servers will have the same verifier.
 529  536           * XXX - this is broken on LP64 kernels.
 530  537           */
 531  538          verf.tv_sec = (time_t)zone_get_hostid(NULL);
 532  539          if (verf.tv_sec != 0) {
 533  540                  verf.tv_nsec = gethrestime_sec();
 534  541          } else {
 535  542                  timespec_t tverf;
 536  543  
 537  544                  gethrestime(&tverf);
 538  545                  verf.tv_sec = (time_t)tverf.tv_sec;
 539  546                  verf.tv_nsec = tverf.tv_nsec;
 540  547          }
 541  548          nsrv4->write4verf = *(uint64_t *)&verf;
  
    | 
      ↓ open down ↓ | 
    24 lines elided | 
    
      ↑ open up ↑ | 
  
 542  549  
 543  550          /* Used to manage create/destroy of server state */
 544  551          nsrv4->nfs4_server_state = NULL;
 545  552          nsrv4->nfs4_cur_servinst = NULL;
 546  553          nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
 547  554          mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 548  555          mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
 549  556          mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 550  557          rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 551  558  
 552      -        return (nsrv4);
      559 +        ng->nfs4_srv = nsrv4;
 553  560  }
 554  561  
 555      -/* ARGSUSED */
 556      -static void
 557      -rfs4_zone_fini(zoneid_t zoneid, void *data)
      562 +void
      563 +rfs4_srv_zone_fini(nfs_globals_t *ng)
 558  564  {
 559      -        nfs4_srv_t *nsrv4 = data;
      565 +        nfs4_srv_t *nsrv4 = ng->nfs4_srv;
 560  566  
      567 +        ng->nfs4_srv = NULL;
      568 +
 561  569          mutex_destroy(&nsrv4->deleg_lock);
 562  570          mutex_destroy(&nsrv4->state_lock);
 563  571          mutex_destroy(&nsrv4->servinst_lock);
 564  572          rw_destroy(&nsrv4->deleg_policy_lock);
 565  573  
 566  574          kmem_free(nsrv4, sizeof (*nsrv4));
 567  575  }
 568  576  
 569  577  void
 570  578  rfs4_srvrinit(void)
 571  579  {
 572  580          extern void rfs4_attr_init();
 573  581  
 574      -        zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini);
 575      -
 576  582          rfs4_attr_init();
 577  583  
 578      -
 579  584          if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 580  585                  rfs4_disable_delegation();
 581  586          } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 582  587              &deleg_wrops) != 0) {
 583  588                  rfs4_disable_delegation();
 584  589                  fem_free(deleg_rdops);
 585  590          }
 586  591  
 587  592          nfs4_srv_caller_id = fs_new_caller_id();
 588  593          lockt_sysid = lm_alloc_sysidt();
 589  594          vsd_create(&nfs4_srv_vkey, NULL);
 590  595          rfs4_state_g_init();
 591  596  }
 592  597  
 593  598  void
 594  599  rfs4_srvrfini(void)
  
    | 
      ↓ open down ↓ | 
    6 lines elided | 
    
      ↑ open up ↑ | 
  
 595  600  {
 596  601          if (lockt_sysid != LM_NOSYSID) {
 597  602                  lm_free_sysidt(lockt_sysid);
 598  603                  lockt_sysid = LM_NOSYSID;
 599  604          }
 600  605  
 601  606          rfs4_state_g_fini();
 602  607  
 603  608          fem_free(deleg_rdops);
 604  609          fem_free(deleg_wrops);
 605      -
 606      -        (void) zone_key_delete(rfs4_zone_key);
 607  610  }
 608  611  
 609  612  void
 610  613  rfs4_do_server_start(int server_upordown,
 611  614      int srv_delegation, int cluster_booted)
 612  615  {
 613      -        nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
      616 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
 614  617  
 615  618          /* Is this a warm start? */
 616  619          if (server_upordown == NFS_SERVER_QUIESCED) {
 617  620                  cmn_err(CE_NOTE, "nfs4_srv: "
 618  621                      "server was previously quiesced; "
 619  622                      "existing NFSv4 state will be re-used");
 620  623  
 621  624                  /*
 622  625                   * HA-NFSv4: this is also the signal
 623  626                   * that a Resource Group failover has
 624  627                   * occurred.
 625  628                   */
 626  629                  if (cluster_booted)
 627  630                          hanfsv4_failover(nsrv4);
 628  631          } else {
 629  632                  /* Cold start */
 630  633                  nsrv4->rfs4_start_time = 0;
 631  634                  rfs4_state_zone_init(nsrv4);
 632  635                  nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 633  636                      nfs4_drc_hash);
 634  637  
 635  638                  /*
 636  639                   * The nfsd service was started with the -s option
 637  640                   * we need to pull in any state from the paths indicated.
 638  641                   */
 639  642                  if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
 640  643                          /* read in the stable storage state from these paths */
 641  644                          rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
 642  645                              rfs4_dss_newpaths);
 643  646                  }
 644  647          }
 645  648  
 646  649          /* Check if delegation is to be enabled */
 647  650          if (srv_delegation != FALSE)
 648  651                  rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
 649  652  }
 650  653  
 651  654  void
 652  655  rfs4_init_compound_state(struct compound_state *cs)
 653  656  {
 654  657          bzero(cs, sizeof (*cs));
 655  658          cs->cont = TRUE;
 656  659          cs->access = CS_ACCESS_DENIED;
 657  660          cs->deleg = FALSE;
 658  661          cs->mandlock = FALSE;
 659  662          cs->fh.nfs_fh4_val = cs->fhbuf;
 660  663  }
 661  664  
 662  665  void
 663  666  rfs4_grace_start(rfs4_servinst_t *sip)
 664  667  {
 665  668          rw_enter(&sip->rwlock, RW_WRITER);
 666  669          sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 667  670          sip->grace_period = rfs4_grace_period;
 668  671          rw_exit(&sip->rwlock);
 669  672  }
 670  673  
 671  674  /*
 672  675   * returns true if the instance's grace period has never been started
 673  676   */
 674  677  int
 675  678  rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 676  679  {
 677  680          time_t start_time;
 678  681  
 679  682          rw_enter(&sip->rwlock, RW_READER);
 680  683          start_time = sip->start_time;
 681  684          rw_exit(&sip->rwlock);
 682  685  
 683  686          return (start_time == 0);
 684  687  }
 685  688  
 686  689  /*
 687  690   * Indicates if server instance is within the
 688  691   * grace period.
 689  692   */
 690  693  int
 691  694  rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 692  695  {
 693  696          time_t grace_expiry;
 694  697  
 695  698          rw_enter(&sip->rwlock, RW_READER);
 696  699          grace_expiry = sip->start_time + sip->grace_period;
 697  700          rw_exit(&sip->rwlock);
 698  701  
 699  702          return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 700  703  }
 701  704  
 702  705  int
 703  706  rfs4_clnt_in_grace(rfs4_client_t *cp)
 704  707  {
 705  708          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 706  709  
 707  710          return (rfs4_servinst_in_grace(cp->rc_server_instance));
 708  711  }
 709  712  
 710  713  /*
 711  714   * reset all currently active grace periods
 712  715   */
 713  716  void
 714  717  rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 715  718  {
 716  719          rfs4_servinst_t *sip;
 717  720  
 718  721          mutex_enter(&nsrv4->servinst_lock);
 719  722          for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 720  723                  if (rfs4_servinst_in_grace(sip))
 721  724                          rfs4_grace_start(sip);
 722  725          mutex_exit(&nsrv4->servinst_lock);
 723  726  }
 724  727  
 725  728  /*
 726  729   * start any new instances' grace periods
 727  730   */
 728  731  void
 729  732  rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 730  733  {
 731  734          rfs4_servinst_t *sip;
 732  735  
 733  736          mutex_enter(&nsrv4->servinst_lock);
 734  737          for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 735  738                  if (rfs4_servinst_grace_new(sip))
 736  739                          rfs4_grace_start(sip);
 737  740          mutex_exit(&nsrv4->servinst_lock);
 738  741  }
 739  742  
 740  743  static rfs4_dss_path_t *
 741  744  rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
 742  745      char *path, unsigned index)
 743  746  {
 744  747          size_t len;
 745  748          rfs4_dss_path_t *dss_path;
 746  749  
 747  750          dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 748  751  
 749  752          /*
 750  753           * Take a copy of the string, since the original may be overwritten.
 751  754           * Sadly, no strdup() in the kernel.
 752  755           */
 753  756          /* allow for NUL */
 754  757          len = strlen(path) + 1;
 755  758          dss_path->path = kmem_alloc(len, KM_SLEEP);
 756  759          (void) strlcpy(dss_path->path, path, len);
 757  760  
 758  761          /* associate with servinst */
 759  762          dss_path->sip = sip;
 760  763          dss_path->index = index;
 761  764  
 762  765          /*
 763  766           * Add to list of served paths.
 764  767           * No locking required, as we're only ever called at startup.
 765  768           */
 766  769          if (nsrv4->dss_pathlist == NULL) {
 767  770                  /* this is the first dss_path_t */
 768  771  
 769  772                  /* needed for insque/remque */
 770  773                  dss_path->next = dss_path->prev = dss_path;
 771  774  
 772  775                  nsrv4->dss_pathlist = dss_path;
 773  776          } else {
 774  777                  insque(dss_path, nsrv4->dss_pathlist);
 775  778          }
 776  779  
 777  780          return (dss_path);
 778  781  }
 779  782  
 780  783  /*
 781  784   * Create a new server instance, and make it the currently active instance.
 782  785   * Note that starting the grace period too early will reduce the clients'
 783  786   * recovery window.
 784  787   */
 785  788  void
 786  789  rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
 787  790      int dss_npaths, char **dss_paths)
 788  791  {
 789  792          unsigned i;
 790  793          rfs4_servinst_t *sip;
 791  794          rfs4_oldstate_t *oldstate;
 792  795  
 793  796          sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 794  797          rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 795  798  
 796  799          sip->start_time = (time_t)0;
 797  800          sip->grace_period = (time_t)0;
 798  801          sip->next = NULL;
 799  802          sip->prev = NULL;
 800  803  
 801  804          rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 802  805          /*
 803  806           * This initial dummy entry is required to setup for insque/remque.
 804  807           * It must be skipped over whenever the list is traversed.
 805  808           */
 806  809          oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 807  810          /* insque/remque require initial list entry to be self-terminated */
 808  811          oldstate->next = oldstate;
 809  812          oldstate->prev = oldstate;
 810  813          sip->oldstate = oldstate;
 811  814  
 812  815  
 813  816          sip->dss_npaths = dss_npaths;
 814  817          sip->dss_paths = kmem_alloc(dss_npaths *
 815  818              sizeof (rfs4_dss_path_t *), KM_SLEEP);
 816  819  
 817  820          for (i = 0; i < dss_npaths; i++) {
 818  821                  sip->dss_paths[i] =
 819  822                      rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 820  823          }
 821  824  
 822  825          mutex_enter(&nsrv4->servinst_lock);
 823  826          if (nsrv4->nfs4_cur_servinst != NULL) {
 824  827                  /* add to linked list */
 825  828                  sip->prev = nsrv4->nfs4_cur_servinst;
 826  829                  nsrv4->nfs4_cur_servinst->next = sip;
 827  830          }
 828  831          if (start_grace)
 829  832                  rfs4_grace_start(sip);
 830  833          /* make the new instance "current" */
 831  834          nsrv4->nfs4_cur_servinst = sip;
 832  835  
 833  836          mutex_exit(&nsrv4->servinst_lock);
 834  837  }
 835  838  
 836  839  /*
 837  840   * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 838  841   * all instances directly.
 839  842   */
 840  843  void
 841  844  rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 842  845  {
 843  846          rfs4_servinst_t *sip, *prev, *current;
 844  847  #ifdef DEBUG
 845  848          int n = 0;
 846  849  #endif
 847  850  
 848  851          mutex_enter(&nsrv4->servinst_lock);
 849  852          ASSERT(nsrv4->nfs4_cur_servinst != NULL);
 850  853          current = nsrv4->nfs4_cur_servinst;
 851  854          nsrv4->nfs4_cur_servinst = NULL;
 852  855          for (sip = current; sip != NULL; sip = prev) {
 853  856                  prev = sip->prev;
 854  857                  rw_destroy(&sip->rwlock);
 855  858                  if (sip->oldstate)
 856  859                          kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 857  860                  if (sip->dss_paths) {
 858  861                          int i = sip->dss_npaths;
 859  862  
 860  863                          while (i > 0) {
 861  864                                  i--;
 862  865                                  if (sip->dss_paths[i] != NULL) {
 863  866                                          char *path = sip->dss_paths[i]->path;
 864  867  
 865  868                                          if (path != NULL) {
 866  869                                                  kmem_free(path,
 867  870                                                      strlen(path) + 1);
 868  871                                          }
 869  872                                          kmem_free(sip->dss_paths[i],
 870  873                                              sizeof (rfs4_dss_path_t));
 871  874                                  }
 872  875                          }
 873  876                          kmem_free(sip->dss_paths,
 874  877                              sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 875  878                  }
 876  879                  kmem_free(sip, sizeof (rfs4_servinst_t));
 877  880  #ifdef DEBUG
 878  881                  n++;
 879  882  #endif
 880  883          }
 881  884          mutex_exit(&nsrv4->servinst_lock);
 882  885  }
 883  886  
 884  887  /*
 885  888   * Assign the current server instance to a client_t.
 886  889   * Should be called with cp->rc_dbe held.
 887  890   */
 888  891  void
 889  892  rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
 890  893      rfs4_servinst_t *sip)
 891  894  {
 892  895          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 893  896  
 894  897          /*
 895  898           * The lock ensures that if the current instance is in the process
 896  899           * of changing, we will see the new one.
 897  900           */
 898  901          mutex_enter(&nsrv4->servinst_lock);
 899  902          cp->rc_server_instance = sip;
 900  903          mutex_exit(&nsrv4->servinst_lock);
 901  904  }
 902  905  
 903  906  rfs4_servinst_t *
 904  907  rfs4_servinst(rfs4_client_t *cp)
 905  908  {
 906  909          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 907  910  
 908  911          return (cp->rc_server_instance);
 909  912  }
 910  913  
 911  914  /* ARGSUSED */
 912  915  static void
 913  916  nullfree(caddr_t resop)
 914  917  {
 915  918  }
 916  919  
 917  920  /*
 918  921   * This is a fall-through for invalid or not implemented (yet) ops
 919  922   */
 920  923  /* ARGSUSED */
 921  924  static void
 922  925  rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 923  926      struct compound_state *cs)
 924  927  {
 925  928          *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 926  929  }
 927  930  
 928  931  /*
 929  932   * Check if the security flavor, nfsnum, is in the flavor_list.
 930  933   */
 931  934  bool_t
 932  935  in_flavor_list(int nfsnum, int *flavor_list, int count)
 933  936  {
 934  937          int i;
 935  938  
 936  939          for (i = 0; i < count; i++) {
 937  940                  if (nfsnum == flavor_list[i])
 938  941                          return (TRUE);
 939  942          }
 940  943          return (FALSE);
 941  944  }
 942  945  
 943  946  /*
 944  947   * Used by rfs4_op_secinfo to get the security information from the
 945  948   * export structure associated with the component.
 946  949   */
 947  950  /* ARGSUSED */
 948  951  static nfsstat4
 949  952  do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 950  953  {
 951  954          int error, different_export = 0;
 952  955          vnode_t *dvp, *vp;
 953  956          struct exportinfo *exi = NULL;
 954  957          fid_t fid;
 955  958          uint_t count, i;
 956  959          secinfo4 *resok_val;
 957  960          struct secinfo *secp;
 958  961          seconfig_t *si;
 959  962          bool_t did_traverse = FALSE;
 960  963          int dotdot, walk;
 961  964          nfs_export_t *ne = nfs_get_export();
 962  965  
 963  966          dvp = cs->vp;
 964  967          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 965  968  
 966  969          /*
 967  970           * If dotdotting, then need to check whether it's above the
 968  971           * root of a filesystem, or above an export point.
 969  972           */
 970  973          if (dotdot) {
 971  974  
 972  975                  /*
 973  976                   * If dotdotting at the root of a filesystem, then
 974  977                   * need to traverse back to the mounted-on filesystem
 975  978                   * and do the dotdot lookup there.
 976  979                   */
 977  980                  if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
 978  981  
 979  982                          /*
 980  983                           * If at the system root, then can
 981  984                           * go up no further.
 982  985                           */
 983  986                          if (VN_CMP(dvp, ZONE_ROOTVP()))
 984  987                                  return (puterrno4(ENOENT));
 985  988  
 986  989                          /*
 987  990                           * Traverse back to the mounted-on filesystem
 988  991                           */
 989  992                          dvp = untraverse(cs->vp);
 990  993  
 991  994                          /*
 992  995                           * Set the different_export flag so we remember
 993  996                           * to pick up a new exportinfo entry for
 994  997                           * this new filesystem.
 995  998                           */
 996  999                          different_export = 1;
 997 1000                  } else {
 998 1001  
 999 1002                          /*
1000 1003                           * If dotdotting above an export point then set
1001 1004                           * the different_export to get new export info.
1002 1005                           */
1003 1006                          different_export = nfs_exported(cs->exi, cs->vp);
1004 1007                  }
1005 1008          }
1006 1009  
1007 1010          /*
1008 1011           * Get the vnode for the component "nm".
1009 1012           */
1010 1013          error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1011 1014              NULL, NULL, NULL);
1012 1015          if (error)
1013 1016                  return (puterrno4(error));
1014 1017  
1015 1018          /*
1016 1019           * If the vnode is in a pseudo filesystem, or if the security flavor
1017 1020           * used in the request is valid but not an explicitly shared flavor,
1018 1021           * or the access bit indicates that this is a limited access,
1019 1022           * check whether this vnode is visible.
1020 1023           */
1021 1024          if (!different_export &&
1022 1025              (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1023 1026              cs->access & CS_ACCESS_LIMITED)) {
1024 1027                  if (! nfs_visible(cs->exi, vp, &different_export)) {
1025 1028                          VN_RELE(vp);
1026 1029                          return (puterrno4(ENOENT));
1027 1030                  }
1028 1031          }
1029 1032  
1030 1033          /*
1031 1034           * If it's a mountpoint, then traverse it.
1032 1035           */
1033 1036          if (vn_ismntpt(vp)) {
1034 1037                  if ((error = traverse(&vp)) != 0) {
1035 1038                          VN_RELE(vp);
1036 1039                          return (puterrno4(error));
1037 1040                  }
1038 1041                  /* remember that we had to traverse mountpoint */
1039 1042                  did_traverse = TRUE;
1040 1043                  different_export = 1;
1041 1044          } else if (vp->v_vfsp != dvp->v_vfsp) {
1042 1045                  /*
1043 1046                   * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1044 1047                   * then vp is probably an LOFS object.  We don't need the
1045 1048                   * realvp, we just need to know that we might have crossed
1046 1049                   * a server fs boundary and need to call checkexport4.
1047 1050                   * (LOFS lookup hides server fs mountpoints, and actually calls
1048 1051                   * traverse)
1049 1052                   */
1050 1053                  different_export = 1;
1051 1054          }
1052 1055  
1053 1056          /*
1054 1057           * Get the export information for it.
1055 1058           */
1056 1059          if (different_export) {
1057 1060  
1058 1061                  bzero(&fid, sizeof (fid));
1059 1062                  fid.fid_len = MAXFIDSZ;
1060 1063                  error = vop_fid_pseudo(vp, &fid);
1061 1064                  if (error) {
1062 1065                          VN_RELE(vp);
1063 1066                          return (puterrno4(error));
1064 1067                  }
1065 1068  
1066 1069                  if (dotdot)
1067 1070                          exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1068 1071                  else
1069 1072                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1070 1073  
1071 1074                  if (exi == NULL) {
1072 1075                          if (did_traverse == TRUE) {
1073 1076                                  /*
1074 1077                                   * If this vnode is a mounted-on vnode,
1075 1078                                   * but the mounted-on file system is not
1076 1079                                   * exported, send back the secinfo for
1077 1080                                   * the exported node that the mounted-on
1078 1081                                   * vnode lives in.
1079 1082                                   */
1080 1083                                  exi = cs->exi;
1081 1084                          } else {
1082 1085                                  VN_RELE(vp);
1083 1086                                  return (puterrno4(EACCES));
1084 1087                          }
1085 1088                  }
1086 1089          } else {
1087 1090                  exi = cs->exi;
1088 1091          }
1089 1092          ASSERT(exi != NULL);
1090 1093  
1091 1094  
1092 1095          /*
1093 1096           * Create the secinfo result based on the security information
1094 1097           * from the exportinfo structure (exi).
1095 1098           *
1096 1099           * Return all flavors for a pseudo node.
1097 1100           * For a real export node, return the flavor that the client
1098 1101           * has access with.
1099 1102           */
1100 1103          ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1101 1104          if (PSEUDO(exi)) {
1102 1105                  count = exi->exi_export.ex_seccnt; /* total sec count */
1103 1106                  resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1104 1107                  secp = exi->exi_export.ex_secinfo;
1105 1108  
1106 1109                  for (i = 0; i < count; i++) {
1107 1110                          si = &secp[i].s_secinfo;
1108 1111                          resok_val[i].flavor = si->sc_rpcnum;
1109 1112                          if (resok_val[i].flavor == RPCSEC_GSS) {
1110 1113                                  rpcsec_gss_info *info;
1111 1114  
1112 1115                                  info = &resok_val[i].flavor_info;
1113 1116                                  info->qop = si->sc_qop;
1114 1117                                  info->service = (rpc_gss_svc_t)si->sc_service;
1115 1118  
1116 1119                                  /* get oid opaque data */
1117 1120                                  info->oid.sec_oid4_len =
1118 1121                                      si->sc_gss_mech_type->length;
1119 1122                                  info->oid.sec_oid4_val = kmem_alloc(
1120 1123                                      si->sc_gss_mech_type->length, KM_SLEEP);
1121 1124                                  bcopy(
1122 1125                                      si->sc_gss_mech_type->elements,
1123 1126                                      info->oid.sec_oid4_val,
1124 1127                                      info->oid.sec_oid4_len);
1125 1128                          }
1126 1129                  }
1127 1130                  resp->SECINFO4resok_len = count;
1128 1131                  resp->SECINFO4resok_val = resok_val;
1129 1132          } else {
1130 1133                  int ret_cnt = 0, k = 0;
1131 1134                  int *flavor_list;
1132 1135  
1133 1136                  count = exi->exi_export.ex_seccnt; /* total sec count */
1134 1137                  secp = exi->exi_export.ex_secinfo;
1135 1138  
1136 1139                  flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1137 1140                  /* find out which flavors to return */
1138 1141                  for (i = 0; i < count; i ++) {
1139 1142                          int access, flavor, perm;
1140 1143  
1141 1144                          flavor = secp[i].s_secinfo.sc_nfsnum;
1142 1145                          perm = secp[i].s_flags;
1143 1146  
1144 1147                          access = nfsauth4_secinfo_access(exi, cs->req,
1145 1148                              flavor, perm, cs->basecr);
1146 1149  
1147 1150                          if (! (access & NFSAUTH_DENIED) &&
1148 1151                              ! (access & NFSAUTH_WRONGSEC)) {
1149 1152                                  flavor_list[ret_cnt] = flavor;
1150 1153                                  ret_cnt++;
1151 1154                          }
1152 1155                  }
1153 1156  
1154 1157                  /* Create the returning SECINFO value */
1155 1158                  resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1156 1159  
1157 1160                  for (i = 0; i < count; i++) {
1158 1161                          /*
1159 1162                           * If the flavor is in the flavor list,
1160 1163                           * fill in resok_val.
1161 1164                           */
1162 1165                          si = &secp[i].s_secinfo;
1163 1166                          if (in_flavor_list(si->sc_nfsnum,
1164 1167                              flavor_list, ret_cnt)) {
1165 1168                                  resok_val[k].flavor = si->sc_rpcnum;
1166 1169                                  if (resok_val[k].flavor == RPCSEC_GSS) {
1167 1170                                          rpcsec_gss_info *info;
1168 1171  
1169 1172                                          info = &resok_val[k].flavor_info;
1170 1173                                          info->qop = si->sc_qop;
1171 1174                                          info->service = (rpc_gss_svc_t)
1172 1175                                              si->sc_service;
1173 1176  
1174 1177                                          /* get oid opaque data */
1175 1178                                          info->oid.sec_oid4_len =
1176 1179                                              si->sc_gss_mech_type->length;
1177 1180                                          info->oid.sec_oid4_val = kmem_alloc(
1178 1181                                              si->sc_gss_mech_type->length,
1179 1182                                              KM_SLEEP);
1180 1183                                          bcopy(si->sc_gss_mech_type->elements,
1181 1184                                              info->oid.sec_oid4_val,
1182 1185                                              info->oid.sec_oid4_len);
1183 1186                                  }
1184 1187                                  k++;
1185 1188                          }
1186 1189                          if (k >= ret_cnt)
1187 1190                                  break;
1188 1191                  }
1189 1192                  resp->SECINFO4resok_len = ret_cnt;
1190 1193                  resp->SECINFO4resok_val = resok_val;
1191 1194                  kmem_free(flavor_list, count * sizeof (int));
1192 1195          }
1193 1196  
1194 1197          VN_RELE(vp);
1195 1198          return (NFS4_OK);
1196 1199  }
1197 1200  
1198 1201  /*
1199 1202   * SECINFO (Operation 33): Obtain required security information on
1200 1203   * the component name in the format of (security-mechanism-oid, qop, service)
1201 1204   * triplets.
1202 1205   */
1203 1206  /* ARGSUSED */
1204 1207  static void
1205 1208  rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1206 1209      struct compound_state *cs)
1207 1210  {
1208 1211          SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1209 1212          SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1210 1213          utf8string *utfnm = &args->name;
1211 1214          uint_t len;
1212 1215          char *nm;
1213 1216          struct sockaddr *ca;
1214 1217          char *name = NULL;
1215 1218          nfsstat4 status = NFS4_OK;
1216 1219  
1217 1220          DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1218 1221              SECINFO4args *, args);
1219 1222  
1220 1223          /*
1221 1224           * Current file handle (cfh) should have been set before getting
1222 1225           * into this function. If not, return error.
1223 1226           */
1224 1227          if (cs->vp == NULL) {
1225 1228                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1226 1229                  goto out;
1227 1230          }
1228 1231  
1229 1232          if (cs->vp->v_type != VDIR) {
1230 1233                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1231 1234                  goto out;
1232 1235          }
1233 1236  
1234 1237          /*
1235 1238           * Verify the component name. If failed, error out, but
1236 1239           * do not error out if the component name is a "..".
1237 1240           * SECINFO will return its parents secinfo data for SECINFO "..".
1238 1241           */
1239 1242          status = utf8_dir_verify(utfnm);
1240 1243          if (status != NFS4_OK) {
1241 1244                  if (utfnm->utf8string_len != 2 ||
1242 1245                      utfnm->utf8string_val[0] != '.' ||
1243 1246                      utfnm->utf8string_val[1] != '.') {
1244 1247                          *cs->statusp = resp->status = status;
1245 1248                          goto out;
1246 1249                  }
1247 1250          }
1248 1251  
1249 1252          nm = utf8_to_str(utfnm, &len, NULL);
1250 1253          if (nm == NULL) {
1251 1254                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1252 1255                  goto out;
1253 1256          }
1254 1257  
1255 1258          if (len > MAXNAMELEN) {
1256 1259                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1257 1260                  kmem_free(nm, len);
1258 1261                  goto out;
1259 1262          }
1260 1263  
1261 1264          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1262 1265          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1263 1266              MAXPATHLEN  + 1);
1264 1267  
1265 1268          if (name == NULL) {
1266 1269                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1267 1270                  kmem_free(nm, len);
1268 1271                  goto out;
1269 1272          }
1270 1273  
1271 1274  
1272 1275          *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1273 1276  
1274 1277          if (name != nm)
1275 1278                  kmem_free(name, MAXPATHLEN + 1);
1276 1279          kmem_free(nm, len);
1277 1280  
1278 1281  out:
1279 1282          DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1280 1283              SECINFO4res *, resp);
1281 1284  }
1282 1285  
1283 1286  /*
1284 1287   * Free SECINFO result.
1285 1288   */
1286 1289  /* ARGSUSED */
1287 1290  static void
1288 1291  rfs4_op_secinfo_free(nfs_resop4 *resop)
1289 1292  {
1290 1293          SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1291 1294          int count, i;
1292 1295          secinfo4 *resok_val;
1293 1296  
1294 1297          /* If this is not an Ok result, nothing to free. */
1295 1298          if (resp->status != NFS4_OK) {
1296 1299                  return;
1297 1300          }
1298 1301  
1299 1302          count = resp->SECINFO4resok_len;
1300 1303          resok_val = resp->SECINFO4resok_val;
1301 1304  
1302 1305          for (i = 0; i < count; i++) {
1303 1306                  if (resok_val[i].flavor == RPCSEC_GSS) {
1304 1307                          rpcsec_gss_info *info;
1305 1308  
1306 1309                          info = &resok_val[i].flavor_info;
1307 1310                          kmem_free(info->oid.sec_oid4_val,
1308 1311                              info->oid.sec_oid4_len);
1309 1312                  }
1310 1313          }
1311 1314          kmem_free(resok_val, count * sizeof (secinfo4));
1312 1315          resp->SECINFO4resok_len = 0;
1313 1316          resp->SECINFO4resok_val = NULL;
1314 1317  }
1315 1318  
1316 1319  /* ARGSUSED */
1317 1320  static void
1318 1321  rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1319 1322      struct compound_state *cs)
1320 1323  {
1321 1324          ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1322 1325          ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1323 1326          int error;
1324 1327          vnode_t *vp;
1325 1328          struct vattr va;
1326 1329          int checkwriteperm;
1327 1330          cred_t *cr = cs->cr;
1328 1331          bslabel_t *clabel, *slabel;
1329 1332          ts_label_t *tslabel;
1330 1333          boolean_t admin_low_client;
1331 1334  
1332 1335          DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1333 1336              ACCESS4args *, args);
1334 1337  
1335 1338  #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1336 1339          if (cs->access == CS_ACCESS_DENIED) {
1337 1340                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1338 1341                  goto out;
1339 1342          }
1340 1343  #endif
1341 1344          if (cs->vp == NULL) {
1342 1345                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1343 1346                  goto out;
1344 1347          }
1345 1348  
1346 1349          ASSERT(cr != NULL);
1347 1350  
1348 1351          vp = cs->vp;
1349 1352  
1350 1353          /*
1351 1354           * If the file system is exported read only, it is not appropriate
1352 1355           * to check write permissions for regular files and directories.
1353 1356           * Special files are interpreted by the client, so the underlying
1354 1357           * permissions are sent back to the client for interpretation.
1355 1358           */
1356 1359          if (rdonly4(req, cs) &&
1357 1360              (vp->v_type == VREG || vp->v_type == VDIR))
1358 1361                  checkwriteperm = 0;
1359 1362          else
1360 1363                  checkwriteperm = 1;
1361 1364  
1362 1365          /*
1363 1366           * XXX
1364 1367           * We need the mode so that we can correctly determine access
1365 1368           * permissions relative to a mandatory lock file.  Access to
1366 1369           * mandatory lock files is denied on the server, so it might
1367 1370           * as well be reflected to the server during the open.
1368 1371           */
1369 1372          va.va_mask = AT_MODE;
1370 1373          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1371 1374          if (error) {
1372 1375                  *cs->statusp = resp->status = puterrno4(error);
1373 1376                  goto out;
1374 1377          }
1375 1378          resp->access = 0;
1376 1379          resp->supported = 0;
1377 1380  
1378 1381          if (is_system_labeled()) {
1379 1382                  ASSERT(req->rq_label != NULL);
1380 1383                  clabel = req->rq_label;
1381 1384                  DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1382 1385                      "got client label from request(1)",
1383 1386                      struct svc_req *, req);
1384 1387                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
1385 1388                          if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1386 1389                                  *cs->statusp = resp->status = puterrno4(EACCES);
1387 1390                                  goto out;
1388 1391                          }
1389 1392                          slabel = label2bslabel(tslabel);
1390 1393                          DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1391 1394                              char *, "got server label(1) for vp(2)",
1392 1395                              bslabel_t *, slabel, vnode_t *, vp);
1393 1396  
1394 1397                          admin_low_client = B_FALSE;
1395 1398                  } else
1396 1399                          admin_low_client = B_TRUE;
1397 1400          }
1398 1401  
1399 1402          if (args->access & ACCESS4_READ) {
1400 1403                  error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1401 1404                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1402 1405                      (!is_system_labeled() || admin_low_client ||
1403 1406                      bldominates(clabel, slabel)))
1404 1407                          resp->access |= ACCESS4_READ;
1405 1408                  resp->supported |= ACCESS4_READ;
1406 1409          }
1407 1410          if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1408 1411                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1409 1412                  if (!error && (!is_system_labeled() || admin_low_client ||
1410 1413                      bldominates(clabel, slabel)))
1411 1414                          resp->access |= ACCESS4_LOOKUP;
1412 1415                  resp->supported |= ACCESS4_LOOKUP;
1413 1416          }
1414 1417          if (checkwriteperm &&
1415 1418              (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1416 1419                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1417 1420                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1418 1421                      (!is_system_labeled() || admin_low_client ||
1419 1422                      blequal(clabel, slabel)))
1420 1423                          resp->access |=
1421 1424                              (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1422 1425                  resp->supported |=
1423 1426                      resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1424 1427          }
1425 1428  
1426 1429          if (checkwriteperm &&
1427 1430              (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1428 1431                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1429 1432                  if (!error && (!is_system_labeled() || admin_low_client ||
1430 1433                      blequal(clabel, slabel)))
1431 1434                          resp->access |= ACCESS4_DELETE;
1432 1435                  resp->supported |= ACCESS4_DELETE;
1433 1436          }
1434 1437          if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1435 1438                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1436 1439                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1437 1440                      (!is_system_labeled() || admin_low_client ||
1438 1441                      bldominates(clabel, slabel)))
1439 1442                          resp->access |= ACCESS4_EXECUTE;
1440 1443                  resp->supported |= ACCESS4_EXECUTE;
1441 1444          }
1442 1445  
1443 1446          if (is_system_labeled() && !admin_low_client)
1444 1447                  label_rele(tslabel);
1445 1448  
1446 1449          *cs->statusp = resp->status = NFS4_OK;
1447 1450  out:
1448 1451          DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1449 1452              ACCESS4res *, resp);
1450 1453  }
1451 1454  
1452 1455  /* ARGSUSED */
1453 1456  static void
1454 1457  rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1455 1458      struct compound_state *cs)
1456 1459  {
1457 1460          COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1458 1461          COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1459 1462          int error;
1460 1463          vnode_t *vp = cs->vp;
1461 1464          cred_t *cr = cs->cr;
1462 1465          vattr_t va;
1463 1466          nfs4_srv_t *nsrv4;
1464 1467  
1465 1468          DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1466 1469              COMMIT4args *, args);
1467 1470  
1468 1471          if (vp == NULL) {
1469 1472                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1470 1473                  goto out;
1471 1474          }
1472 1475          if (cs->access == CS_ACCESS_DENIED) {
1473 1476                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1474 1477                  goto out;
1475 1478          }
1476 1479  
1477 1480          if (args->offset + args->count < args->offset) {
1478 1481                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1479 1482                  goto out;
1480 1483          }
1481 1484  
1482 1485          va.va_mask = AT_UID;
1483 1486          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1484 1487  
1485 1488          /*
1486 1489           * If we can't get the attributes, then we can't do the
1487 1490           * right access checking.  So, we'll fail the request.
1488 1491           */
1489 1492          if (error) {
1490 1493                  *cs->statusp = resp->status = puterrno4(error);
1491 1494                  goto out;
1492 1495          }
1493 1496          if (rdonly4(req, cs)) {
1494 1497                  *cs->statusp = resp->status = NFS4ERR_ROFS;
1495 1498                  goto out;
1496 1499          }
1497 1500  
1498 1501          if (vp->v_type != VREG) {
1499 1502                  if (vp->v_type == VDIR)
1500 1503                          resp->status = NFS4ERR_ISDIR;
1501 1504                  else
1502 1505                          resp->status = NFS4ERR_INVAL;
1503 1506                  *cs->statusp = resp->status;
1504 1507                  goto out;
1505 1508          }
1506 1509  
1507 1510          if (crgetuid(cr) != va.va_uid &&
1508 1511              (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1509 1512                  *cs->statusp = resp->status = puterrno4(error);
  
    | 
      ↓ open down ↓ | 
    886 lines elided | 
    
      ↑ open up ↑ | 
  
1510 1513                  goto out;
1511 1514          }
1512 1515  
1513 1516          error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1514 1517  
1515 1518          if (error) {
1516 1519                  *cs->statusp = resp->status = puterrno4(error);
1517 1520                  goto out;
1518 1521          }
1519 1522  
1520      -        nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
     1523 +        nsrv4 = nfs4_get_srv();
1521 1524          *cs->statusp = resp->status = NFS4_OK;
1522 1525          resp->writeverf = nsrv4->write4verf;
1523 1526  out:
1524 1527          DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1525 1528              COMMIT4res *, resp);
1526 1529  }
1527 1530  
1528 1531  /*
1529 1532   * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1530 1533   * was completed. It does the nfsv4 create for special files.
1531 1534   */
1532 1535  /* ARGSUSED */
1533 1536  static vnode_t *
1534 1537  do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1535 1538      struct compound_state *cs, vattr_t *vap, char *nm)
1536 1539  {
1537 1540          int error;
1538 1541          cred_t *cr = cs->cr;
1539 1542          vnode_t *dvp = cs->vp;
1540 1543          vnode_t *vp = NULL;
1541 1544          int mode;
1542 1545          enum vcexcl excl;
1543 1546  
1544 1547          switch (args->type) {
1545 1548          case NF4CHR:
1546 1549          case NF4BLK:
1547 1550                  if (secpolicy_sys_devices(cr) != 0) {
1548 1551                          *cs->statusp = resp->status = NFS4ERR_PERM;
1549 1552                          return (NULL);
1550 1553                  }
1551 1554                  if (args->type == NF4CHR)
1552 1555                          vap->va_type = VCHR;
1553 1556                  else
1554 1557                          vap->va_type = VBLK;
1555 1558                  vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1556 1559                      args->ftype4_u.devdata.specdata2);
1557 1560                  vap->va_mask |= AT_RDEV;
1558 1561                  break;
1559 1562          case NF4SOCK:
1560 1563                  vap->va_type = VSOCK;
1561 1564                  break;
1562 1565          case NF4FIFO:
1563 1566                  vap->va_type = VFIFO;
1564 1567                  break;
1565 1568          default:
1566 1569                  *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1567 1570                  return (NULL);
1568 1571          }
1569 1572  
1570 1573          /*
1571 1574           * Must specify the mode.
1572 1575           */
1573 1576          if (!(vap->va_mask & AT_MODE)) {
1574 1577                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1575 1578                  return (NULL);
1576 1579          }
1577 1580  
1578 1581          excl = EXCL;
1579 1582  
1580 1583          mode = 0;
1581 1584  
1582 1585          error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1583 1586          if (error) {
1584 1587                  *cs->statusp = resp->status = puterrno4(error);
1585 1588                  return (NULL);
1586 1589          }
1587 1590          return (vp);
1588 1591  }
1589 1592  
1590 1593  /*
1591 1594   * nfsv4 create is used to create non-regular files. For regular files,
1592 1595   * use nfsv4 open.
1593 1596   */
1594 1597  /* ARGSUSED */
1595 1598  static void
1596 1599  rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1597 1600      struct compound_state *cs)
1598 1601  {
1599 1602          CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1600 1603          CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1601 1604          int error;
1602 1605          struct vattr bva, iva, iva2, ava, *vap;
1603 1606          cred_t *cr = cs->cr;
1604 1607          vnode_t *dvp = cs->vp;
1605 1608          vnode_t *vp = NULL;
1606 1609          vnode_t *realvp;
1607 1610          char *nm, *lnm;
1608 1611          uint_t len, llen;
1609 1612          int syncval = 0;
1610 1613          struct nfs4_svgetit_arg sarg;
1611 1614          struct nfs4_ntov_table ntov;
1612 1615          struct statvfs64 sb;
1613 1616          nfsstat4 status;
1614 1617          struct sockaddr *ca;
1615 1618          char *name = NULL;
1616 1619          char *lname = NULL;
1617 1620  
1618 1621          DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1619 1622              CREATE4args *, args);
1620 1623  
1621 1624          resp->attrset = 0;
1622 1625  
1623 1626          if (dvp == NULL) {
1624 1627                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1625 1628                  goto out;
1626 1629          }
1627 1630  
1628 1631          /*
1629 1632           * If there is an unshared filesystem mounted on this vnode,
1630 1633           * do not allow to create an object in this directory.
1631 1634           */
1632 1635          if (vn_ismntpt(dvp)) {
1633 1636                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1634 1637                  goto out;
1635 1638          }
1636 1639  
1637 1640          /* Verify that type is correct */
1638 1641          switch (args->type) {
1639 1642          case NF4LNK:
1640 1643          case NF4BLK:
1641 1644          case NF4CHR:
1642 1645          case NF4SOCK:
1643 1646          case NF4FIFO:
1644 1647          case NF4DIR:
1645 1648                  break;
1646 1649          default:
1647 1650                  *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1648 1651                  goto out;
1649 1652          };
1650 1653  
1651 1654          if (cs->access == CS_ACCESS_DENIED) {
1652 1655                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1653 1656                  goto out;
1654 1657          }
1655 1658          if (dvp->v_type != VDIR) {
1656 1659                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1657 1660                  goto out;
1658 1661          }
1659 1662          status = utf8_dir_verify(&args->objname);
1660 1663          if (status != NFS4_OK) {
1661 1664                  *cs->statusp = resp->status = status;
1662 1665                  goto out;
1663 1666          }
1664 1667  
1665 1668          if (rdonly4(req, cs)) {
1666 1669                  *cs->statusp = resp->status = NFS4ERR_ROFS;
1667 1670                  goto out;
1668 1671          }
1669 1672  
1670 1673          /*
1671 1674           * Name of newly created object
1672 1675           */
1673 1676          nm = utf8_to_fn(&args->objname, &len, NULL);
1674 1677          if (nm == NULL) {
1675 1678                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1676 1679                  goto out;
1677 1680          }
1678 1681  
1679 1682          if (len > MAXNAMELEN) {
1680 1683                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1681 1684                  kmem_free(nm, len);
1682 1685                  goto out;
1683 1686          }
1684 1687  
1685 1688          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1686 1689          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1687 1690              MAXPATHLEN  + 1);
1688 1691  
1689 1692          if (name == NULL) {
1690 1693                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1691 1694                  kmem_free(nm, len);
1692 1695                  goto out;
1693 1696          }
1694 1697  
1695 1698          resp->attrset = 0;
1696 1699  
1697 1700          sarg.sbp = &sb;
1698 1701          sarg.is_referral = B_FALSE;
1699 1702          nfs4_ntov_table_init(&ntov);
1700 1703  
1701 1704          status = do_rfs4_set_attrs(&resp->attrset,
1702 1705              &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1703 1706  
1704 1707          if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1705 1708                  status = NFS4ERR_INVAL;
1706 1709  
1707 1710          if (status != NFS4_OK) {
1708 1711                  *cs->statusp = resp->status = status;
1709 1712                  if (name != nm)
1710 1713                          kmem_free(name, MAXPATHLEN + 1);
1711 1714                  kmem_free(nm, len);
1712 1715                  nfs4_ntov_table_free(&ntov, &sarg);
1713 1716                  resp->attrset = 0;
1714 1717                  goto out;
1715 1718          }
1716 1719  
1717 1720          /* Get "before" change value */
1718 1721          bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1719 1722          error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1720 1723          if (error) {
1721 1724                  *cs->statusp = resp->status = puterrno4(error);
1722 1725                  if (name != nm)
1723 1726                          kmem_free(name, MAXPATHLEN + 1);
1724 1727                  kmem_free(nm, len);
1725 1728                  nfs4_ntov_table_free(&ntov, &sarg);
1726 1729                  resp->attrset = 0;
1727 1730                  goto out;
1728 1731          }
1729 1732          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1730 1733  
1731 1734          vap = sarg.vap;
1732 1735  
1733 1736          /*
1734 1737           * Set the default initial values for attributes when the parent
1735 1738           * directory does not have the VSUID/VSGID bit set and they have
1736 1739           * not been specified in createattrs.
1737 1740           */
1738 1741          if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1739 1742                  vap->va_uid = crgetuid(cr);
1740 1743                  vap->va_mask |= AT_UID;
1741 1744          }
1742 1745          if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1743 1746                  vap->va_gid = crgetgid(cr);
1744 1747                  vap->va_mask |= AT_GID;
1745 1748          }
1746 1749  
1747 1750          vap->va_mask |= AT_TYPE;
1748 1751          switch (args->type) {
1749 1752          case NF4DIR:
1750 1753                  vap->va_type = VDIR;
1751 1754                  if ((vap->va_mask & AT_MODE) == 0) {
1752 1755                          vap->va_mode = 0700;    /* default: owner rwx only */
1753 1756                          vap->va_mask |= AT_MODE;
1754 1757                  }
1755 1758                  error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1756 1759                  if (error)
1757 1760                          break;
1758 1761  
1759 1762                  /*
1760 1763                   * Get the initial "after" sequence number, if it fails,
1761 1764                   * set to zero
1762 1765                   */
1763 1766                  iva.va_mask = AT_SEQ;
1764 1767                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1765 1768                          iva.va_seq = 0;
1766 1769                  break;
1767 1770          case NF4LNK:
1768 1771                  vap->va_type = VLNK;
1769 1772                  if ((vap->va_mask & AT_MODE) == 0) {
1770 1773                          vap->va_mode = 0700;    /* default: owner rwx only */
1771 1774                          vap->va_mask |= AT_MODE;
1772 1775                  }
1773 1776  
1774 1777                  /*
1775 1778                   * symlink names must be treated as data
1776 1779                   */
1777 1780                  lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1778 1781                      &llen, NULL);
1779 1782  
1780 1783                  if (lnm == NULL) {
1781 1784                          *cs->statusp = resp->status = NFS4ERR_INVAL;
1782 1785                          if (name != nm)
1783 1786                                  kmem_free(name, MAXPATHLEN + 1);
1784 1787                          kmem_free(nm, len);
1785 1788                          nfs4_ntov_table_free(&ntov, &sarg);
1786 1789                          resp->attrset = 0;
1787 1790                          goto out;
1788 1791                  }
1789 1792  
1790 1793                  if (llen > MAXPATHLEN) {
1791 1794                          *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1792 1795                          if (name != nm)
1793 1796                                  kmem_free(name, MAXPATHLEN + 1);
1794 1797                          kmem_free(nm, len);
1795 1798                          kmem_free(lnm, llen);
1796 1799                          nfs4_ntov_table_free(&ntov, &sarg);
1797 1800                          resp->attrset = 0;
1798 1801                          goto out;
1799 1802                  }
1800 1803  
1801 1804                  lname = nfscmd_convname(ca, cs->exi, lnm,
1802 1805                      NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1803 1806  
1804 1807                  if (lname == NULL) {
1805 1808                          *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1806 1809                          if (name != nm)
1807 1810                                  kmem_free(name, MAXPATHLEN + 1);
1808 1811                          kmem_free(nm, len);
1809 1812                          kmem_free(lnm, llen);
1810 1813                          nfs4_ntov_table_free(&ntov, &sarg);
1811 1814                          resp->attrset = 0;
1812 1815                          goto out;
1813 1816                  }
1814 1817  
1815 1818                  error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1816 1819                  if (lname != lnm)
1817 1820                          kmem_free(lname, MAXPATHLEN + 1);
1818 1821                  kmem_free(lnm, llen);
1819 1822                  if (error)
1820 1823                          break;
1821 1824  
1822 1825                  /*
1823 1826                   * Get the initial "after" sequence number, if it fails,
1824 1827                   * set to zero
1825 1828                   */
1826 1829                  iva.va_mask = AT_SEQ;
1827 1830                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1828 1831                          iva.va_seq = 0;
1829 1832  
1830 1833                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1831 1834                      NULL, NULL, NULL);
1832 1835                  if (error)
1833 1836                          break;
1834 1837  
1835 1838                  /*
1836 1839                   * va_seq is not safe over VOP calls, check it again
1837 1840                   * if it has changed zero out iva to force atomic = FALSE.
1838 1841                   */
1839 1842                  iva2.va_mask = AT_SEQ;
1840 1843                  if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1841 1844                      iva2.va_seq != iva.va_seq)
1842 1845                          iva.va_seq = 0;
1843 1846                  break;
1844 1847          default:
1845 1848                  /*
1846 1849                   * probably a special file.
1847 1850                   */
1848 1851                  if ((vap->va_mask & AT_MODE) == 0) {
1849 1852                          vap->va_mode = 0600;    /* default: owner rw only */
1850 1853                          vap->va_mask |= AT_MODE;
1851 1854                  }
1852 1855                  syncval = FNODSYNC;
1853 1856                  /*
1854 1857                   * We know this will only generate one VOP call
1855 1858                   */
1856 1859                  vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1857 1860  
1858 1861                  if (vp == NULL) {
1859 1862                          if (name != nm)
1860 1863                                  kmem_free(name, MAXPATHLEN + 1);
1861 1864                          kmem_free(nm, len);
1862 1865                          nfs4_ntov_table_free(&ntov, &sarg);
1863 1866                          resp->attrset = 0;
1864 1867                          goto out;
1865 1868                  }
1866 1869  
1867 1870                  /*
1868 1871                   * Get the initial "after" sequence number, if it fails,
1869 1872                   * set to zero
1870 1873                   */
1871 1874                  iva.va_mask = AT_SEQ;
1872 1875                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1873 1876                          iva.va_seq = 0;
1874 1877  
1875 1878                  break;
1876 1879          }
1877 1880          if (name != nm)
1878 1881                  kmem_free(name, MAXPATHLEN + 1);
1879 1882          kmem_free(nm, len);
1880 1883  
1881 1884          if (error) {
1882 1885                  *cs->statusp = resp->status = puterrno4(error);
1883 1886          }
1884 1887  
1885 1888          /*
1886 1889           * Force modified data and metadata out to stable storage.
1887 1890           */
1888 1891          (void) VOP_FSYNC(dvp, 0, cr, NULL);
1889 1892  
1890 1893          if (resp->status != NFS4_OK) {
1891 1894                  if (vp != NULL)
1892 1895                          VN_RELE(vp);
1893 1896                  nfs4_ntov_table_free(&ntov, &sarg);
1894 1897                  resp->attrset = 0;
1895 1898                  goto out;
1896 1899          }
1897 1900  
1898 1901          /*
1899 1902           * Finish setup of cinfo response, "before" value already set.
1900 1903           * Get "after" change value, if it fails, simply return the
1901 1904           * before value.
1902 1905           */
1903 1906          ava.va_mask = AT_CTIME|AT_SEQ;
1904 1907          if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1905 1908                  ava.va_ctime = bva.va_ctime;
1906 1909                  ava.va_seq = 0;
1907 1910          }
1908 1911          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1909 1912  
1910 1913          /*
1911 1914           * True verification that object was created with correct
1912 1915           * attrs is impossible.  The attrs could have been changed
1913 1916           * immediately after object creation.  If attributes did
1914 1917           * not verify, the only recourse for the server is to
1915 1918           * destroy the object.  Maybe if some attrs (like gid)
1916 1919           * are set incorrectly, the object should be destroyed;
1917 1920           * however, seems bad as a default policy.  Do we really
1918 1921           * want to destroy an object over one of the times not
1919 1922           * verifying correctly?  For these reasons, the server
1920 1923           * currently sets bits in attrset for createattrs
1921 1924           * that were set; however, no verification is done.
1922 1925           *
1923 1926           * vmask_to_nmask accounts for vattr bits set on create
1924 1927           *      [do_rfs4_set_attrs() only sets resp bits for
1925 1928           *       non-vattr/vfs bits.]
1926 1929           * Mask off any bits set by default so as not to return
1927 1930           * more attrset bits than were requested in createattrs
1928 1931           */
1929 1932          nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1930 1933          resp->attrset &= args->createattrs.attrmask;
1931 1934          nfs4_ntov_table_free(&ntov, &sarg);
1932 1935  
1933 1936          error = makefh4(&cs->fh, vp, cs->exi);
1934 1937          if (error) {
1935 1938                  *cs->statusp = resp->status = puterrno4(error);
1936 1939          }
1937 1940  
1938 1941          /*
1939 1942           * The cinfo.atomic = TRUE only if we got no errors, we have
1940 1943           * non-zero va_seq's, and it has incremented by exactly one
1941 1944           * during the creation and it didn't change during the VOP_LOOKUP
1942 1945           * or VOP_FSYNC.
1943 1946           */
1944 1947          if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1945 1948              iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1946 1949                  resp->cinfo.atomic = TRUE;
1947 1950          else
1948 1951                  resp->cinfo.atomic = FALSE;
1949 1952  
1950 1953          /*
1951 1954           * Force modified metadata out to stable storage.
1952 1955           *
1953 1956           * if a underlying vp exists, pass it to VOP_FSYNC
1954 1957           */
1955 1958          if (VOP_REALVP(vp, &realvp, NULL) == 0)
1956 1959                  (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1957 1960          else
1958 1961                  (void) VOP_FSYNC(vp, syncval, cr, NULL);
1959 1962  
1960 1963          if (resp->status != NFS4_OK) {
1961 1964                  VN_RELE(vp);
1962 1965                  goto out;
1963 1966          }
1964 1967          if (cs->vp)
1965 1968                  VN_RELE(cs->vp);
1966 1969  
1967 1970          cs->vp = vp;
1968 1971          *cs->statusp = resp->status = NFS4_OK;
1969 1972  out:
1970 1973          DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1971 1974              CREATE4res *, resp);
1972 1975  }
1973 1976  
1974 1977  /*ARGSUSED*/
1975 1978  static void
1976 1979  rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1977 1980      struct compound_state *cs)
1978 1981  {
1979 1982          DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1980 1983              DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1981 1984  
1982 1985          rfs4_op_inval(argop, resop, req, cs);
1983 1986  
1984 1987          DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1985 1988              DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1986 1989  }
1987 1990  
1988 1991  /*ARGSUSED*/
1989 1992  static void
1990 1993  rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1991 1994      struct compound_state *cs)
1992 1995  {
1993 1996          DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1994 1997          DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1995 1998          rfs4_deleg_state_t *dsp;
1996 1999          nfsstat4 status;
1997 2000  
1998 2001          DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1999 2002              DELEGRETURN4args *, args);
2000 2003  
2001 2004          status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2002 2005          resp->status = *cs->statusp = status;
2003 2006          if (status != NFS4_OK)
2004 2007                  goto out;
2005 2008  
2006 2009          /* Ensure specified filehandle matches */
2007 2010          if (cs->vp != dsp->rds_finfo->rf_vp) {
2008 2011                  resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2009 2012          } else
2010 2013                  rfs4_return_deleg(dsp, FALSE);
2011 2014  
2012 2015          rfs4_update_lease(dsp->rds_client);
2013 2016  
2014 2017          rfs4_deleg_state_rele(dsp);
2015 2018  out:
2016 2019          DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2017 2020              DELEGRETURN4res *, resp);
2018 2021  }
2019 2022  
2020 2023  /*
2021 2024   * Check to see if a given "flavor" is an explicitly shared flavor.
2022 2025   * The assumption of this routine is the "flavor" is already a valid
2023 2026   * flavor in the secinfo list of "exi".
2024 2027   *
2025 2028   *      e.g.
2026 2029   *              # share -o sec=flavor1 /export
2027 2030   *              # share -o sec=flavor2 /export/home
2028 2031   *
2029 2032   *              flavor2 is not an explicitly shared flavor for /export,
2030 2033   *              however it is in the secinfo list for /export thru the
2031 2034   *              server namespace setup.
2032 2035   */
2033 2036  int
2034 2037  is_exported_sec(int flavor, struct exportinfo *exi)
2035 2038  {
2036 2039          int     i;
2037 2040          struct secinfo *sp;
2038 2041  
2039 2042          sp = exi->exi_export.ex_secinfo;
2040 2043          for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2041 2044                  if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2042 2045                      sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2043 2046                          return (SEC_REF_EXPORTED(&sp[i]));
2044 2047                  }
2045 2048          }
2046 2049  
2047 2050          /* Should not reach this point based on the assumption */
2048 2051          return (0);
2049 2052  }
2050 2053  
2051 2054  /*
2052 2055   * Check if the security flavor used in the request matches what is
2053 2056   * required at the export point or at the root pseudo node (exi_root).
2054 2057   *
2055 2058   * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2056 2059   *
2057 2060   */
2058 2061  static int
2059 2062  secinfo_match_or_authnone(struct compound_state *cs)
2060 2063  {
2061 2064          int     i;
2062 2065          struct secinfo *sp;
2063 2066  
2064 2067          /*
2065 2068           * Check cs->nfsflavor (from the request) against
2066 2069           * the current export data in cs->exi.
2067 2070           */
2068 2071          sp = cs->exi->exi_export.ex_secinfo;
2069 2072          for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2070 2073                  if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2071 2074                      sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2072 2075                          return (1);
2073 2076          }
2074 2077  
2075 2078          return (0);
2076 2079  }
2077 2080  
2078 2081  /*
2079 2082   * Check the access authority for the client and return the correct error.
2080 2083   */
2081 2084  nfsstat4
2082 2085  call_checkauth4(struct compound_state *cs, struct svc_req *req)
2083 2086  {
2084 2087          int     authres;
2085 2088  
2086 2089          /*
2087 2090           * First, check if the security flavor used in the request
2088 2091           * are among the flavors set in the server namespace.
2089 2092           */
2090 2093          if (!secinfo_match_or_authnone(cs)) {
2091 2094                  *cs->statusp = NFS4ERR_WRONGSEC;
2092 2095                  return (*cs->statusp);
2093 2096          }
2094 2097  
2095 2098          authres = checkauth4(cs, req);
2096 2099  
2097 2100          if (authres > 0) {
2098 2101                  *cs->statusp = NFS4_OK;
2099 2102                  if (! (cs->access & CS_ACCESS_LIMITED))
2100 2103                          cs->access = CS_ACCESS_OK;
2101 2104          } else if (authres == 0) {
2102 2105                  *cs->statusp = NFS4ERR_ACCESS;
2103 2106          } else if (authres == -2) {
2104 2107                  *cs->statusp = NFS4ERR_WRONGSEC;
2105 2108          } else {
2106 2109                  *cs->statusp = NFS4ERR_DELAY;
2107 2110          }
2108 2111          return (*cs->statusp);
2109 2112  }
2110 2113  
2111 2114  /*
2112 2115   * bitmap4_to_attrmask is called by getattr and readdir.
2113 2116   * It sets up the vattr mask and determines whether vfsstat call is needed
2114 2117   * based on the input bitmap.
2115 2118   * Returns nfsv4 status.
2116 2119   */
2117 2120  static nfsstat4
2118 2121  bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2119 2122  {
2120 2123          int i;
2121 2124          uint_t  va_mask;
2122 2125          struct statvfs64 *sbp = sargp->sbp;
2123 2126  
2124 2127          sargp->sbp = NULL;
2125 2128          sargp->flag = 0;
2126 2129          sargp->rdattr_error = NFS4_OK;
2127 2130          sargp->mntdfid_set = FALSE;
2128 2131          if (sargp->cs->vp)
2129 2132                  sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2130 2133                      FH4_ATTRDIR | FH4_NAMEDATTR);
2131 2134          else
2132 2135                  sargp->xattr = 0;
2133 2136  
2134 2137          /*
2135 2138           * Set rdattr_error_req to true if return error per
2136 2139           * failed entry rather than fail the readdir.
2137 2140           */
2138 2141          if (breq & FATTR4_RDATTR_ERROR_MASK)
2139 2142                  sargp->rdattr_error_req = 1;
2140 2143          else
2141 2144                  sargp->rdattr_error_req = 0;
2142 2145  
2143 2146          /*
2144 2147           * generate the va_mask
2145 2148           * Handle the easy cases first
2146 2149           */
2147 2150          switch (breq) {
2148 2151          case NFS4_NTOV_ATTR_MASK:
2149 2152                  sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2150 2153                  return (NFS4_OK);
2151 2154  
2152 2155          case NFS4_FS_ATTR_MASK:
2153 2156                  sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2154 2157                  sargp->sbp = sbp;
2155 2158                  return (NFS4_OK);
2156 2159  
2157 2160          case NFS4_NTOV_ATTR_CACHE_MASK:
2158 2161                  sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2159 2162                  return (NFS4_OK);
2160 2163  
2161 2164          case FATTR4_LEASE_TIME_MASK:
2162 2165                  sargp->vap->va_mask = 0;
2163 2166                  return (NFS4_OK);
2164 2167  
2165 2168          default:
2166 2169                  va_mask = 0;
2167 2170                  for (i = 0; i < nfs4_ntov_map_size; i++) {
2168 2171                          if ((breq & nfs4_ntov_map[i].fbit) &&
2169 2172                              nfs4_ntov_map[i].vbit)
2170 2173                                  va_mask |= nfs4_ntov_map[i].vbit;
2171 2174                  }
2172 2175  
2173 2176                  /*
2174 2177                   * Check is vfsstat is needed
2175 2178                   */
2176 2179                  if (breq & NFS4_FS_ATTR_MASK)
2177 2180                          sargp->sbp = sbp;
2178 2181  
2179 2182                  sargp->vap->va_mask = va_mask;
2180 2183                  return (NFS4_OK);
2181 2184          }
2182 2185          /* NOTREACHED */
2183 2186  }
2184 2187  
2185 2188  /*
2186 2189   * bitmap4_get_sysattrs is called by getattr and readdir.
2187 2190   * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2188 2191   * Returns nfsv4 status.
2189 2192   */
2190 2193  static nfsstat4
2191 2194  bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2192 2195  {
2193 2196          int error;
2194 2197          struct compound_state *cs = sargp->cs;
2195 2198          vnode_t *vp = cs->vp;
2196 2199  
2197 2200          if (sargp->sbp != NULL) {
2198 2201                  if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2199 2202                          sargp->sbp = NULL;      /* to identify error */
2200 2203                          return (puterrno4(error));
2201 2204                  }
2202 2205          }
2203 2206  
2204 2207          return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2205 2208  }
2206 2209  
2207 2210  static void
2208 2211  nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2209 2212  {
2210 2213          ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2211 2214              KM_SLEEP);
2212 2215          ntovp->attrcnt = 0;
2213 2216          ntovp->vfsstat = FALSE;
2214 2217  }
2215 2218  
2216 2219  static void
2217 2220  nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2218 2221      struct nfs4_svgetit_arg *sargp)
2219 2222  {
2220 2223          int i;
2221 2224          union nfs4_attr_u *na;
2222 2225          uint8_t *amap;
2223 2226  
2224 2227          /*
2225 2228           * XXX Should do the same checks for whether the bit is set
2226 2229           */
2227 2230          for (i = 0, na = ntovp->na, amap = ntovp->amap;
2228 2231              i < ntovp->attrcnt; i++, na++, amap++) {
2229 2232                  (void) (*nfs4_ntov_map[*amap].sv_getit)(
2230 2233                      NFS4ATTR_FREEIT, sargp, na);
2231 2234          }
2232 2235          if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2233 2236                  /*
2234 2237                   * xdr_free for getattr will be done later
2235 2238                   */
2236 2239                  for (i = 0, na = ntovp->na, amap = ntovp->amap;
2237 2240                      i < ntovp->attrcnt; i++, na++, amap++) {
2238 2241                          xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2239 2242                  }
2240 2243          }
2241 2244          kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2242 2245  }
2243 2246  
2244 2247  /*
2245 2248   * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2246 2249   */
2247 2250  static nfsstat4
2248 2251  do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2249 2252      struct nfs4_svgetit_arg *sargp)
2250 2253  {
2251 2254          int error = 0;
2252 2255          int i, k;
2253 2256          struct nfs4_ntov_table ntov;
2254 2257          XDR xdr;
2255 2258          ulong_t xdr_size;
2256 2259          char *xdr_attrs;
2257 2260          nfsstat4 status = NFS4_OK;
2258 2261          nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2259 2262          union nfs4_attr_u *na;
2260 2263          uint8_t *amap;
2261 2264  
2262 2265          sargp->op = NFS4ATTR_GETIT;
2263 2266          sargp->flag = 0;
2264 2267  
2265 2268          fattrp->attrmask = 0;
2266 2269          /* if no bits requested, then return empty fattr4 */
2267 2270          if (breq == 0) {
2268 2271                  fattrp->attrlist4_len = 0;
2269 2272                  fattrp->attrlist4 = NULL;
2270 2273                  return (NFS4_OK);
2271 2274          }
2272 2275  
2273 2276          /*
2274 2277           * return NFS4ERR_INVAL when client requests write-only attrs
2275 2278           */
2276 2279          if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2277 2280                  return (NFS4ERR_INVAL);
2278 2281  
2279 2282          nfs4_ntov_table_init(&ntov);
2280 2283          na = ntov.na;
2281 2284          amap = ntov.amap;
2282 2285  
2283 2286          /*
2284 2287           * Now loop to get or verify the attrs
2285 2288           */
2286 2289          for (i = 0; i < nfs4_ntov_map_size; i++) {
2287 2290                  if (breq & nfs4_ntov_map[i].fbit) {
2288 2291                          if ((*nfs4_ntov_map[i].sv_getit)(
2289 2292                              NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2290 2293  
2291 2294                                  error = (*nfs4_ntov_map[i].sv_getit)(
2292 2295                                      NFS4ATTR_GETIT, sargp, na);
2293 2296  
2294 2297                                  /*
2295 2298                                   * Possible error values:
2296 2299                                   * >0 if sv_getit failed to
2297 2300                                   * get the attr; 0 if succeeded;
2298 2301                                   * <0 if rdattr_error and the
2299 2302                                   * attribute cannot be returned.
2300 2303                                   */
2301 2304                                  if (error && !(sargp->rdattr_error_req))
2302 2305                                          goto done;
2303 2306                                  /*
2304 2307                                   * If error then just for entry
2305 2308                                   */
2306 2309                                  if (error == 0) {
2307 2310                                          fattrp->attrmask |=
2308 2311                                              nfs4_ntov_map[i].fbit;
2309 2312                                          *amap++ =
2310 2313                                              (uint8_t)nfs4_ntov_map[i].nval;
2311 2314                                          na++;
2312 2315                                          (ntov.attrcnt)++;
2313 2316                                  } else if ((error > 0) &&
2314 2317                                      (sargp->rdattr_error == NFS4_OK)) {
2315 2318                                          sargp->rdattr_error = puterrno4(error);
2316 2319                                  }
2317 2320                                  error = 0;
2318 2321                          }
2319 2322                  }
2320 2323          }
2321 2324  
2322 2325          /*
2323 2326           * If rdattr_error was set after the return value for it was assigned,
2324 2327           * update it.
2325 2328           */
2326 2329          if (prev_rdattr_error != sargp->rdattr_error) {
2327 2330                  na = ntov.na;
2328 2331                  amap = ntov.amap;
2329 2332                  for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2330 2333                          k = *amap;
2331 2334                          if (k < FATTR4_RDATTR_ERROR) {
2332 2335                                  continue;
2333 2336                          }
2334 2337                          if ((k == FATTR4_RDATTR_ERROR) &&
2335 2338                              ((*nfs4_ntov_map[k].sv_getit)(
2336 2339                              NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2337 2340  
2338 2341                                  (void) (*nfs4_ntov_map[k].sv_getit)(
2339 2342                                      NFS4ATTR_GETIT, sargp, na);
2340 2343                          }
2341 2344                          break;
2342 2345                  }
2343 2346          }
2344 2347  
2345 2348          xdr_size = 0;
2346 2349          na = ntov.na;
2347 2350          amap = ntov.amap;
2348 2351          for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2349 2352                  xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2350 2353          }
2351 2354  
2352 2355          fattrp->attrlist4_len = xdr_size;
2353 2356          if (xdr_size) {
2354 2357                  /* freed by rfs4_op_getattr_free() */
2355 2358                  fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2356 2359  
2357 2360                  xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2358 2361  
2359 2362                  na = ntov.na;
2360 2363                  amap = ntov.amap;
2361 2364                  for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2362 2365                          if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2363 2366                                  DTRACE_PROBE1(nfss__e__getattr4_encfail,
2364 2367                                      int, *amap);
2365 2368                                  status = NFS4ERR_SERVERFAULT;
2366 2369                                  break;
2367 2370                          }
2368 2371                  }
2369 2372                  /* xdrmem_destroy(&xdrs); */    /* NO-OP */
2370 2373          } else {
2371 2374                  fattrp->attrlist4 = NULL;
2372 2375          }
2373 2376  done:
2374 2377  
2375 2378          nfs4_ntov_table_free(&ntov, sargp);
2376 2379  
2377 2380          if (error != 0)
2378 2381                  status = puterrno4(error);
2379 2382  
2380 2383          return (status);
2381 2384  }
2382 2385  
2383 2386  /* ARGSUSED */
2384 2387  static void
2385 2388  rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2386 2389      struct compound_state *cs)
2387 2390  {
2388 2391          GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2389 2392          GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2390 2393          struct nfs4_svgetit_arg sarg;
2391 2394          struct statvfs64 sb;
2392 2395          nfsstat4 status;
2393 2396  
2394 2397          DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2395 2398              GETATTR4args *, args);
2396 2399  
2397 2400          if (cs->vp == NULL) {
2398 2401                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2399 2402                  goto out;
2400 2403          }
2401 2404  
2402 2405          if (cs->access == CS_ACCESS_DENIED) {
2403 2406                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2404 2407                  goto out;
2405 2408          }
2406 2409  
2407 2410          sarg.sbp = &sb;
2408 2411          sarg.cs = cs;
2409 2412          sarg.is_referral = B_FALSE;
2410 2413  
2411 2414          status = bitmap4_to_attrmask(args->attr_request, &sarg);
2412 2415          if (status == NFS4_OK) {
2413 2416  
2414 2417                  status = bitmap4_get_sysattrs(&sarg);
2415 2418                  if (status == NFS4_OK) {
2416 2419  
2417 2420                          /* Is this a referral? */
2418 2421                          if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2419 2422                                  /* Older V4 Solaris client sees a link */
2420 2423                                  if (client_is_downrev(req))
2421 2424                                          sarg.vap->va_type = VLNK;
2422 2425                                  else
2423 2426                                          sarg.is_referral = B_TRUE;
2424 2427                          }
2425 2428  
2426 2429                          status = do_rfs4_op_getattr(args->attr_request,
2427 2430                              &resp->obj_attributes, &sarg);
2428 2431                  }
2429 2432          }
2430 2433          *cs->statusp = resp->status = status;
2431 2434  out:
2432 2435          DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2433 2436              GETATTR4res *, resp);
2434 2437  }
2435 2438  
2436 2439  static void
2437 2440  rfs4_op_getattr_free(nfs_resop4 *resop)
2438 2441  {
2439 2442          GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2440 2443  
2441 2444          nfs4_fattr4_free(&resp->obj_attributes);
2442 2445  }
2443 2446  
2444 2447  /* ARGSUSED */
2445 2448  static void
2446 2449  rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2447 2450      struct compound_state *cs)
2448 2451  {
2449 2452          GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2450 2453  
2451 2454          DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2452 2455  
2453 2456          if (cs->vp == NULL) {
2454 2457                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2455 2458                  goto out;
2456 2459          }
2457 2460          if (cs->access == CS_ACCESS_DENIED) {
2458 2461                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2459 2462                  goto out;
2460 2463          }
2461 2464  
2462 2465          /* check for reparse point at the share point */
2463 2466          if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2464 2467                  /* it's all bad */
2465 2468                  cs->exi->exi_moved = 1;
2466 2469                  *cs->statusp = resp->status = NFS4ERR_MOVED;
2467 2470                  DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2468 2471                      vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2469 2472                  return;
2470 2473          }
2471 2474  
2472 2475          /* check for reparse point at vp */
2473 2476          if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2474 2477                  /* it's not all bad */
2475 2478                  *cs->statusp = resp->status = NFS4ERR_MOVED;
2476 2479                  DTRACE_PROBE2(nfs4serv__func__referral__moved,
2477 2480                      vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2478 2481                  return;
2479 2482          }
2480 2483  
2481 2484          resp->object.nfs_fh4_val =
2482 2485              kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2483 2486          nfs_fh4_copy(&cs->fh, &resp->object);
2484 2487          *cs->statusp = resp->status = NFS4_OK;
2485 2488  out:
2486 2489          DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2487 2490              GETFH4res *, resp);
2488 2491  }
2489 2492  
2490 2493  static void
2491 2494  rfs4_op_getfh_free(nfs_resop4 *resop)
2492 2495  {
2493 2496          GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2494 2497  
2495 2498          if (resp->status == NFS4_OK &&
2496 2499              resp->object.nfs_fh4_val != NULL) {
2497 2500                  kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2498 2501                  resp->object.nfs_fh4_val = NULL;
2499 2502                  resp->object.nfs_fh4_len = 0;
2500 2503          }
2501 2504  }
2502 2505  
2503 2506  /*
2504 2507   * illegal: args: void
2505 2508   *          res : status (NFS4ERR_OP_ILLEGAL)
2506 2509   */
2507 2510  /* ARGSUSED */
2508 2511  static void
2509 2512  rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2510 2513      struct svc_req *req, struct compound_state *cs)
2511 2514  {
2512 2515          ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2513 2516  
2514 2517          resop->resop = OP_ILLEGAL;
2515 2518          *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2516 2519  }
2517 2520  
2518 2521  /*
2519 2522   * link: args: SAVED_FH: file, CURRENT_FH: target directory
2520 2523   *       res: status. If success - CURRENT_FH unchanged, return change_info
2521 2524   */
2522 2525  /* ARGSUSED */
2523 2526  static void
2524 2527  rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2525 2528      struct compound_state *cs)
2526 2529  {
2527 2530          LINK4args *args = &argop->nfs_argop4_u.oplink;
2528 2531          LINK4res *resp = &resop->nfs_resop4_u.oplink;
2529 2532          int error;
2530 2533          vnode_t *vp;
2531 2534          vnode_t *dvp;
2532 2535          struct vattr bdva, idva, adva;
2533 2536          char *nm;
2534 2537          uint_t  len;
2535 2538          struct sockaddr *ca;
2536 2539          char *name = NULL;
2537 2540          nfsstat4 status;
2538 2541  
2539 2542          DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2540 2543              LINK4args *, args);
2541 2544  
2542 2545          /* SAVED_FH: source object */
2543 2546          vp = cs->saved_vp;
2544 2547          if (vp == NULL) {
2545 2548                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2546 2549                  goto out;
2547 2550          }
2548 2551  
2549 2552          /* CURRENT_FH: target directory */
2550 2553          dvp = cs->vp;
2551 2554          if (dvp == NULL) {
2552 2555                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2553 2556                  goto out;
2554 2557          }
2555 2558  
2556 2559          /*
2557 2560           * If there is a non-shared filesystem mounted on this vnode,
2558 2561           * do not allow to link any file in this directory.
2559 2562           */
2560 2563          if (vn_ismntpt(dvp)) {
2561 2564                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2562 2565                  goto out;
2563 2566          }
2564 2567  
2565 2568          if (cs->access == CS_ACCESS_DENIED) {
2566 2569                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2567 2570                  goto out;
2568 2571          }
2569 2572  
2570 2573          /* Check source object's type validity */
2571 2574          if (vp->v_type == VDIR) {
2572 2575                  *cs->statusp = resp->status = NFS4ERR_ISDIR;
2573 2576                  goto out;
2574 2577          }
2575 2578  
2576 2579          /* Check target directory's type */
2577 2580          if (dvp->v_type != VDIR) {
2578 2581                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2579 2582                  goto out;
2580 2583          }
2581 2584  
2582 2585          if (cs->saved_exi != cs->exi) {
2583 2586                  *cs->statusp = resp->status = NFS4ERR_XDEV;
2584 2587                  goto out;
2585 2588          }
2586 2589  
2587 2590          status = utf8_dir_verify(&args->newname);
2588 2591          if (status != NFS4_OK) {
2589 2592                  *cs->statusp = resp->status = status;
2590 2593                  goto out;
2591 2594          }
2592 2595  
2593 2596          nm = utf8_to_fn(&args->newname, &len, NULL);
2594 2597          if (nm == NULL) {
2595 2598                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2596 2599                  goto out;
2597 2600          }
2598 2601  
2599 2602          if (len > MAXNAMELEN) {
2600 2603                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2601 2604                  kmem_free(nm, len);
2602 2605                  goto out;
2603 2606          }
2604 2607  
2605 2608          if (rdonly4(req, cs)) {
2606 2609                  *cs->statusp = resp->status = NFS4ERR_ROFS;
2607 2610                  kmem_free(nm, len);
2608 2611                  goto out;
2609 2612          }
2610 2613  
2611 2614          /* Get "before" change value */
2612 2615          bdva.va_mask = AT_CTIME|AT_SEQ;
2613 2616          error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2614 2617          if (error) {
2615 2618                  *cs->statusp = resp->status = puterrno4(error);
2616 2619                  kmem_free(nm, len);
2617 2620                  goto out;
2618 2621          }
2619 2622  
2620 2623          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2621 2624          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2622 2625              MAXPATHLEN  + 1);
2623 2626  
2624 2627          if (name == NULL) {
2625 2628                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2626 2629                  kmem_free(nm, len);
2627 2630                  goto out;
2628 2631          }
2629 2632  
2630 2633          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2631 2634  
2632 2635          error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2633 2636  
2634 2637          if (nm != name)
2635 2638                  kmem_free(name, MAXPATHLEN + 1);
2636 2639          kmem_free(nm, len);
2637 2640  
2638 2641          /*
2639 2642           * Get the initial "after" sequence number, if it fails, set to zero
2640 2643           */
2641 2644          idva.va_mask = AT_SEQ;
2642 2645          if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2643 2646                  idva.va_seq = 0;
2644 2647  
2645 2648          /*
2646 2649           * Force modified data and metadata out to stable storage.
2647 2650           */
2648 2651          (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2649 2652          (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2650 2653  
2651 2654          if (error) {
2652 2655                  *cs->statusp = resp->status = puterrno4(error);
2653 2656                  goto out;
2654 2657          }
2655 2658  
2656 2659          /*
2657 2660           * Get "after" change value, if it fails, simply return the
2658 2661           * before value.
2659 2662           */
2660 2663          adva.va_mask = AT_CTIME|AT_SEQ;
2661 2664          if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2662 2665                  adva.va_ctime = bdva.va_ctime;
2663 2666                  adva.va_seq = 0;
2664 2667          }
2665 2668  
2666 2669          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2667 2670  
2668 2671          /*
2669 2672           * The cinfo.atomic = TRUE only if we have
2670 2673           * non-zero va_seq's, and it has incremented by exactly one
2671 2674           * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2672 2675           */
2673 2676          if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2674 2677              idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2675 2678                  resp->cinfo.atomic = TRUE;
2676 2679          else
2677 2680                  resp->cinfo.atomic = FALSE;
2678 2681  
2679 2682          *cs->statusp = resp->status = NFS4_OK;
2680 2683  out:
2681 2684          DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2682 2685              LINK4res *, resp);
2683 2686  }
2684 2687  
2685 2688  /*
2686 2689   * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2687 2690   */
2688 2691  
2689 2692  /* ARGSUSED */
2690 2693  static nfsstat4
2691 2694  do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2692 2695  {
2693 2696          int error;
2694 2697          int different_export = 0;
2695 2698          vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2696 2699          struct exportinfo *exi = NULL, *pre_exi = NULL;
2697 2700          nfsstat4 stat;
2698 2701          fid_t fid;
2699 2702          int attrdir, dotdot, walk;
2700 2703          bool_t is_newvp = FALSE;
2701 2704  
2702 2705          if (cs->vp->v_flag & V_XATTRDIR) {
2703 2706                  attrdir = 1;
2704 2707                  ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2705 2708          } else {
2706 2709                  attrdir = 0;
2707 2710                  ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2708 2711          }
2709 2712  
2710 2713          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2711 2714  
2712 2715          /*
2713 2716           * If dotdotting, then need to check whether it's
2714 2717           * above the root of a filesystem, or above an
2715 2718           * export point.
2716 2719           */
2717 2720          if (dotdot) {
2718 2721  
2719 2722                  /*
2720 2723                   * If dotdotting at the root of a filesystem, then
2721 2724                   * need to traverse back to the mounted-on filesystem
2722 2725                   * and do the dotdot lookup there.
2723 2726                   */
2724 2727                  if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2725 2728  
2726 2729                          /*
2727 2730                           * If at the system root, then can
2728 2731                           * go up no further.
2729 2732                           */
2730 2733                          if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2731 2734                                  return (puterrno4(ENOENT));
2732 2735  
2733 2736                          /*
2734 2737                           * Traverse back to the mounted-on filesystem
2735 2738                           */
2736 2739                          cs->vp = untraverse(cs->vp);
2737 2740  
2738 2741                          /*
2739 2742                           * Set the different_export flag so we remember
2740 2743                           * to pick up a new exportinfo entry for
2741 2744                           * this new filesystem.
2742 2745                           */
2743 2746                          different_export = 1;
2744 2747                  } else {
2745 2748  
2746 2749                          /*
2747 2750                           * If dotdotting above an export point then set
2748 2751                           * the different_export to get new export info.
2749 2752                           */
2750 2753                          different_export = nfs_exported(cs->exi, cs->vp);
2751 2754                  }
2752 2755          }
2753 2756  
2754 2757          error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2755 2758              NULL, NULL, NULL);
2756 2759          if (error)
2757 2760                  return (puterrno4(error));
2758 2761  
2759 2762          /*
2760 2763           * If the vnode is in a pseudo filesystem, check whether it is visible.
2761 2764           *
2762 2765           * XXX if the vnode is a symlink and it is not visible in
2763 2766           * a pseudo filesystem, return ENOENT (not following symlink).
2764 2767           * V4 client can not mount such symlink. This is a regression
2765 2768           * from V2/V3.
2766 2769           *
2767 2770           * In the same exported filesystem, if the security flavor used
2768 2771           * is not an explicitly shared flavor, limit the view to the visible
2769 2772           * list entries only. This is not a WRONGSEC case because it's already
2770 2773           * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2771 2774           */
2772 2775          if (!different_export &&
2773 2776              (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2774 2777              cs->access & CS_ACCESS_LIMITED)) {
2775 2778                  if (! nfs_visible(cs->exi, vp, &different_export)) {
2776 2779                          VN_RELE(vp);
2777 2780                          return (puterrno4(ENOENT));
2778 2781                  }
2779 2782          }
2780 2783  
2781 2784          /*
2782 2785           * If it's a mountpoint, then traverse it.
2783 2786           */
2784 2787          if (vn_ismntpt(vp)) {
2785 2788                  pre_exi = cs->exi;      /* save pre-traversed exportinfo */
2786 2789                  pre_tvp = vp;           /* save pre-traversed vnode     */
2787 2790  
2788 2791                  /*
2789 2792                   * hold pre_tvp to counteract rele by traverse.  We will
2790 2793                   * need pre_tvp below if checkexport4 fails
2791 2794                   */
2792 2795                  VN_HOLD(pre_tvp);
2793 2796                  if ((error = traverse(&vp)) != 0) {
2794 2797                          VN_RELE(vp);
2795 2798                          VN_RELE(pre_tvp);
2796 2799                          return (puterrno4(error));
2797 2800                  }
2798 2801                  different_export = 1;
2799 2802          } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2800 2803                  /*
2801 2804                   * The vfsp comparison is to handle the case where
2802 2805                   * a LOFS mount is shared.  lo_lookup traverses mount points,
2803 2806                   * and NFS is unaware of local fs transistions because
2804 2807                   * v_vfsmountedhere isn't set.  For this special LOFS case,
2805 2808                   * the dir and the obj returned by lookup will have different
2806 2809                   * vfs ptrs.
2807 2810                   */
2808 2811                  different_export = 1;
2809 2812          }
2810 2813  
2811 2814          if (different_export) {
2812 2815  
2813 2816                  bzero(&fid, sizeof (fid));
2814 2817                  fid.fid_len = MAXFIDSZ;
2815 2818                  error = vop_fid_pseudo(vp, &fid);
2816 2819                  if (error) {
2817 2820                          VN_RELE(vp);
2818 2821                          if (pre_tvp)
2819 2822                                  VN_RELE(pre_tvp);
2820 2823                          return (puterrno4(error));
2821 2824                  }
2822 2825  
2823 2826                  if (dotdot)
2824 2827                          exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2825 2828                  else
2826 2829                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2827 2830  
2828 2831                  if (exi == NULL) {
2829 2832                          if (pre_tvp) {
2830 2833                                  /*
2831 2834                                   * If this vnode is a mounted-on vnode,
2832 2835                                   * but the mounted-on file system is not
2833 2836                                   * exported, send back the filehandle for
2834 2837                                   * the mounted-on vnode, not the root of
2835 2838                                   * the mounted-on file system.
2836 2839                                   */
2837 2840                                  VN_RELE(vp);
2838 2841                                  vp = pre_tvp;
2839 2842                                  exi = pre_exi;
2840 2843                          } else {
2841 2844                                  VN_RELE(vp);
2842 2845                                  return (puterrno4(EACCES));
2843 2846                          }
2844 2847                  } else if (pre_tvp) {
2845 2848                          /* we're done with pre_tvp now. release extra hold */
2846 2849                          VN_RELE(pre_tvp);
2847 2850                  }
2848 2851  
2849 2852                  cs->exi = exi;
2850 2853  
2851 2854                  /*
2852 2855                   * Now we do a checkauth4. The reason is that
2853 2856                   * this client/user may not have access to the new
2854 2857                   * exported file system, and if they do,
2855 2858                   * the client/user may be mapped to a different uid.
2856 2859                   *
2857 2860                   * We start with a new cr, because the checkauth4 done
2858 2861                   * in the PUT*FH operation over wrote the cred's uid,
2859 2862                   * gid, etc, and we want the real thing before calling
2860 2863                   * checkauth4()
2861 2864                   */
2862 2865                  crfree(cs->cr);
2863 2866                  cs->cr = crdup(cs->basecr);
2864 2867  
2865 2868                  oldvp = cs->vp;
2866 2869                  cs->vp = vp;
2867 2870                  is_newvp = TRUE;
2868 2871  
2869 2872                  stat = call_checkauth4(cs, req);
2870 2873                  if (stat != NFS4_OK) {
2871 2874                          VN_RELE(cs->vp);
2872 2875                          cs->vp = oldvp;
2873 2876                          return (stat);
2874 2877                  }
2875 2878          }
2876 2879  
2877 2880          /*
2878 2881           * After various NFS checks, do a label check on the path
2879 2882           * component. The label on this path should either be the
2880 2883           * global zone's label or a zone's label. We are only
2881 2884           * interested in the zone's label because exported files
2882 2885           * in global zone is accessible (though read-only) to
2883 2886           * clients. The exportability/visibility check is already
2884 2887           * done before reaching this code.
2885 2888           */
2886 2889          if (is_system_labeled()) {
2887 2890                  bslabel_t *clabel;
2888 2891  
2889 2892                  ASSERT(req->rq_label != NULL);
2890 2893                  clabel = req->rq_label;
2891 2894                  DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2892 2895                      "got client label from request(1)", struct svc_req *, req);
2893 2896  
2894 2897                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2895 2898                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2896 2899                              cs->exi)) {
2897 2900                                  error = EACCES;
2898 2901                                  goto err_out;
2899 2902                          }
2900 2903                  } else {
2901 2904                          /*
2902 2905                           * We grant access to admin_low label clients
2903 2906                           * only if the client is trusted, i.e. also
2904 2907                           * running Solaris Trusted Extension.
2905 2908                           */
2906 2909                          struct sockaddr *ca;
2907 2910                          int             addr_type;
2908 2911                          void            *ipaddr;
2909 2912                          tsol_tpc_t      *tp;
2910 2913  
2911 2914                          ca = (struct sockaddr *)svc_getrpccaller(
2912 2915                              req->rq_xprt)->buf;
2913 2916                          if (ca->sa_family == AF_INET) {
2914 2917                                  addr_type = IPV4_VERSION;
2915 2918                                  ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2916 2919                          } else if (ca->sa_family == AF_INET6) {
2917 2920                                  addr_type = IPV6_VERSION;
2918 2921                                  ipaddr = &((struct sockaddr_in6 *)
2919 2922                                      ca)->sin6_addr;
2920 2923                          }
2921 2924                          tp = find_tpc(ipaddr, addr_type, B_FALSE);
2922 2925                          if (tp == NULL || tp->tpc_tp.tp_doi !=
2923 2926                              l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2924 2927                              SUN_CIPSO) {
2925 2928                                  if (tp != NULL)
2926 2929                                          TPC_RELE(tp);
2927 2930                                  error = EACCES;
2928 2931                                  goto err_out;
2929 2932                          }
2930 2933                          TPC_RELE(tp);
2931 2934                  }
2932 2935          }
2933 2936  
2934 2937          error = makefh4(&cs->fh, vp, cs->exi);
2935 2938  
2936 2939  err_out:
2937 2940          if (error) {
2938 2941                  if (is_newvp) {
2939 2942                          VN_RELE(cs->vp);
2940 2943                          cs->vp = oldvp;
2941 2944                  } else
2942 2945                          VN_RELE(vp);
2943 2946                  return (puterrno4(error));
2944 2947          }
2945 2948  
2946 2949          if (!is_newvp) {
2947 2950                  if (cs->vp)
2948 2951                          VN_RELE(cs->vp);
2949 2952                  cs->vp = vp;
2950 2953          } else if (oldvp)
2951 2954                  VN_RELE(oldvp);
2952 2955  
2953 2956          /*
2954 2957           * if did lookup on attrdir and didn't lookup .., set named
2955 2958           * attr fh flag
2956 2959           */
2957 2960          if (attrdir && ! dotdot)
2958 2961                  set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2959 2962  
2960 2963          /* Assume false for now, open proc will set this */
2961 2964          cs->mandlock = FALSE;
2962 2965  
2963 2966          return (NFS4_OK);
2964 2967  }
2965 2968  
2966 2969  /* ARGSUSED */
2967 2970  static void
2968 2971  rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2969 2972      struct compound_state *cs)
2970 2973  {
2971 2974          LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2972 2975          LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2973 2976          char *nm;
2974 2977          uint_t len;
2975 2978          struct sockaddr *ca;
2976 2979          char *name = NULL;
2977 2980          nfsstat4 status;
2978 2981  
2979 2982          DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2980 2983              LOOKUP4args *, args);
2981 2984  
2982 2985          if (cs->vp == NULL) {
2983 2986                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2984 2987                  goto out;
2985 2988          }
2986 2989  
2987 2990          if (cs->vp->v_type == VLNK) {
2988 2991                  *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2989 2992                  goto out;
2990 2993          }
2991 2994  
2992 2995          if (cs->vp->v_type != VDIR) {
2993 2996                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2994 2997                  goto out;
2995 2998          }
2996 2999  
2997 3000          status = utf8_dir_verify(&args->objname);
2998 3001          if (status != NFS4_OK) {
2999 3002                  *cs->statusp = resp->status = status;
3000 3003                  goto out;
3001 3004          }
3002 3005  
3003 3006          nm = utf8_to_str(&args->objname, &len, NULL);
3004 3007          if (nm == NULL) {
3005 3008                  *cs->statusp = resp->status = NFS4ERR_INVAL;
3006 3009                  goto out;
3007 3010          }
3008 3011  
3009 3012          if (len > MAXNAMELEN) {
3010 3013                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3011 3014                  kmem_free(nm, len);
3012 3015                  goto out;
3013 3016          }
3014 3017  
3015 3018          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3016 3019          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3017 3020              MAXPATHLEN  + 1);
3018 3021  
3019 3022          if (name == NULL) {
3020 3023                  *cs->statusp = resp->status = NFS4ERR_INVAL;
3021 3024                  kmem_free(nm, len);
3022 3025                  goto out;
3023 3026          }
3024 3027  
3025 3028          *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3026 3029  
3027 3030          if (name != nm)
3028 3031                  kmem_free(name, MAXPATHLEN + 1);
3029 3032          kmem_free(nm, len);
3030 3033  
3031 3034  out:
3032 3035          DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3033 3036              LOOKUP4res *, resp);
3034 3037  }
3035 3038  
3036 3039  /* ARGSUSED */
3037 3040  static void
3038 3041  rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3039 3042      struct compound_state *cs)
3040 3043  {
3041 3044          LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3042 3045  
3043 3046          DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3044 3047  
3045 3048          if (cs->vp == NULL) {
3046 3049                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3047 3050                  goto out;
3048 3051          }
3049 3052  
3050 3053          if (cs->vp->v_type != VDIR) {
3051 3054                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3052 3055                  goto out;
3053 3056          }
3054 3057  
3055 3058          *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3056 3059  
3057 3060          /*
3058 3061           * From NFSV4 Specification, LOOKUPP should not check for
3059 3062           * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3060 3063           */
3061 3064          if (resp->status == NFS4ERR_WRONGSEC) {
3062 3065                  *cs->statusp = resp->status = NFS4_OK;
3063 3066          }
3064 3067  
3065 3068  out:
3066 3069          DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3067 3070              LOOKUPP4res *, resp);
3068 3071  }
3069 3072  
3070 3073  
3071 3074  /*ARGSUSED2*/
3072 3075  static void
3073 3076  rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3074 3077      struct compound_state *cs)
3075 3078  {
3076 3079          OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3077 3080          OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3078 3081          vnode_t         *avp = NULL;
3079 3082          int             lookup_flags = LOOKUP_XATTR, error;
3080 3083          int             exp_ro = 0;
3081 3084  
3082 3085          DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3083 3086              OPENATTR4args *, args);
3084 3087  
3085 3088          if (cs->vp == NULL) {
3086 3089                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3087 3090                  goto out;
3088 3091          }
3089 3092  
3090 3093          if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3091 3094              !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3092 3095                  *cs->statusp = resp->status = puterrno4(ENOTSUP);
3093 3096                  goto out;
3094 3097          }
3095 3098  
3096 3099          /*
3097 3100           * If file system supports passing ACE mask to VOP_ACCESS then
3098 3101           * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3099 3102           */
3100 3103  
3101 3104          if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3102 3105                  error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3103 3106                      V_ACE_MASK, cs->cr, NULL);
3104 3107          else
3105 3108                  error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3106 3109                      (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3107 3110                      (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3108 3111  
3109 3112          if (error) {
3110 3113                  *cs->statusp = resp->status = puterrno4(EACCES);
3111 3114                  goto out;
3112 3115          }
3113 3116  
3114 3117          /*
3115 3118           * The CREATE_XATTR_DIR VOP flag cannot be specified if
3116 3119           * the file system is exported read-only -- regardless of
3117 3120           * createdir flag.  Otherwise the attrdir would be created
3118 3121           * (assuming server fs isn't mounted readonly locally).  If
3119 3122           * VOP_LOOKUP returns ENOENT in this case, the error will
3120 3123           * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3121 3124           * because specfs has no VOP_LOOKUP op, so the macro would
3122 3125           * return ENOSYS.  EINVAL is returned by all (current)
3123 3126           * Solaris file system implementations when any of their
3124 3127           * restrictions are violated (xattr(dir) can't have xattrdir).
3125 3128           * Returning NOTSUPP is more appropriate in this case
3126 3129           * because the object will never be able to have an attrdir.
3127 3130           */
3128 3131          if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3129 3132                  lookup_flags |= CREATE_XATTR_DIR;
3130 3133  
3131 3134          error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3132 3135              NULL, NULL, NULL);
3133 3136  
3134 3137          if (error) {
3135 3138                  if (error == ENOENT && args->createdir && exp_ro)
3136 3139                          *cs->statusp = resp->status = puterrno4(EROFS);
3137 3140                  else if (error == EINVAL || error == ENOSYS)
3138 3141                          *cs->statusp = resp->status = puterrno4(ENOTSUP);
3139 3142                  else
3140 3143                          *cs->statusp = resp->status = puterrno4(error);
3141 3144                  goto out;
3142 3145          }
3143 3146  
3144 3147          ASSERT(avp->v_flag & V_XATTRDIR);
3145 3148  
3146 3149          error = makefh4(&cs->fh, avp, cs->exi);
3147 3150  
3148 3151          if (error) {
3149 3152                  VN_RELE(avp);
3150 3153                  *cs->statusp = resp->status = puterrno4(error);
3151 3154                  goto out;
3152 3155          }
3153 3156  
3154 3157          VN_RELE(cs->vp);
3155 3158          cs->vp = avp;
3156 3159  
3157 3160          /*
3158 3161           * There is no requirement for an attrdir fh flag
3159 3162           * because the attrdir has a vnode flag to distinguish
3160 3163           * it from regular (non-xattr) directories.  The
3161 3164           * FH4_ATTRDIR flag is set for future sanity checks.
3162 3165           */
3163 3166          set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3164 3167          *cs->statusp = resp->status = NFS4_OK;
3165 3168  
3166 3169  out:
3167 3170          DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3168 3171              OPENATTR4res *, resp);
3169 3172  }
3170 3173  
3171 3174  static int
3172 3175  do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3173 3176      caller_context_t *ct)
3174 3177  {
3175 3178          int error;
3176 3179          int i;
3177 3180          clock_t delaytime;
3178 3181  
3179 3182          delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3180 3183  
3181 3184          /*
3182 3185           * Don't block on mandatory locks. If this routine returns
3183 3186           * EAGAIN, the caller should return NFS4ERR_LOCKED.
3184 3187           */
3185 3188          uio->uio_fmode = FNONBLOCK;
3186 3189  
3187 3190          for (i = 0; i < rfs4_maxlock_tries; i++) {
3188 3191  
3189 3192  
3190 3193                  if (direction == FREAD) {
3191 3194                          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3192 3195                          error = VOP_READ(vp, uio, ioflag, cred, ct);
3193 3196                          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3194 3197                  } else {
3195 3198                          (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3196 3199                          error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3197 3200                          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3198 3201                  }
3199 3202  
3200 3203                  if (error != EAGAIN)
3201 3204                          break;
3202 3205  
3203 3206                  if (i < rfs4_maxlock_tries - 1) {
3204 3207                          delay(delaytime);
3205 3208                          delaytime *= 2;
3206 3209                  }
3207 3210          }
3208 3211  
3209 3212          return (error);
3210 3213  }
3211 3214  
3212 3215  /* ARGSUSED */
3213 3216  static void
3214 3217  rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3215 3218      struct compound_state *cs)
3216 3219  {
3217 3220          READ4args *args = &argop->nfs_argop4_u.opread;
3218 3221          READ4res *resp = &resop->nfs_resop4_u.opread;
3219 3222          int error;
3220 3223          int verror;
3221 3224          vnode_t *vp;
3222 3225          struct vattr va;
3223 3226          struct iovec iov, *iovp = NULL;
3224 3227          int iovcnt;
3225 3228          struct uio uio;
3226 3229          u_offset_t offset;
3227 3230          bool_t *deleg = &cs->deleg;
3228 3231          nfsstat4 stat;
3229 3232          int in_crit = 0;
3230 3233          mblk_t *mp = NULL;
3231 3234          int alloc_err = 0;
3232 3235          int rdma_used = 0;
3233 3236          int loaned_buffers;
3234 3237          caller_context_t ct;
3235 3238          struct uio *uiop;
3236 3239  
3237 3240          DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3238 3241              READ4args, args);
3239 3242  
3240 3243          vp = cs->vp;
3241 3244          if (vp == NULL) {
3242 3245                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3243 3246                  goto out;
3244 3247          }
3245 3248          if (cs->access == CS_ACCESS_DENIED) {
3246 3249                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3247 3250                  goto out;
3248 3251          }
3249 3252  
3250 3253          if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3251 3254              deleg, TRUE, &ct)) != NFS4_OK) {
3252 3255                  *cs->statusp = resp->status = stat;
3253 3256                  goto out;
3254 3257          }
3255 3258  
3256 3259          /*
3257 3260           * Enter the critical region before calling VOP_RWLOCK
3258 3261           * to avoid a deadlock with write requests.
3259 3262           */
3260 3263          if (nbl_need_check(vp)) {
3261 3264                  nbl_start_crit(vp, RW_READER);
3262 3265                  in_crit = 1;
3263 3266                  if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3264 3267                      &ct)) {
3265 3268                          *cs->statusp = resp->status = NFS4ERR_LOCKED;
3266 3269                          goto out;
3267 3270                  }
3268 3271          }
3269 3272  
3270 3273          if (args->wlist) {
3271 3274                  if (args->count > clist_len(args->wlist)) {
3272 3275                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3273 3276                          goto out;
3274 3277                  }
3275 3278                  rdma_used = 1;
3276 3279          }
3277 3280  
3278 3281          /* use loaned buffers for TCP */
3279 3282          loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3280 3283  
3281 3284          va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3282 3285          verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3283 3286  
3284 3287          /*
3285 3288           * If we can't get the attributes, then we can't do the
3286 3289           * right access checking.  So, we'll fail the request.
3287 3290           */
3288 3291          if (verror) {
3289 3292                  *cs->statusp = resp->status = puterrno4(verror);
3290 3293                  goto out;
3291 3294          }
3292 3295  
3293 3296          if (vp->v_type != VREG) {
3294 3297                  *cs->statusp = resp->status =
3295 3298                      ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3296 3299                  goto out;
3297 3300          }
3298 3301  
3299 3302          if (crgetuid(cs->cr) != va.va_uid &&
3300 3303              (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3301 3304              (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3302 3305                  *cs->statusp = resp->status = puterrno4(error);
3303 3306                  goto out;
3304 3307          }
3305 3308  
3306 3309          if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3307 3310                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3308 3311                  goto out;
3309 3312          }
3310 3313  
3311 3314          offset = args->offset;
3312 3315          if (offset >= va.va_size) {
3313 3316                  *cs->statusp = resp->status = NFS4_OK;
3314 3317                  resp->eof = TRUE;
3315 3318                  resp->data_len = 0;
3316 3319                  resp->data_val = NULL;
3317 3320                  resp->mblk = NULL;
3318 3321                  /* RDMA */
3319 3322                  resp->wlist = args->wlist;
3320 3323                  resp->wlist_len = resp->data_len;
3321 3324                  *cs->statusp = resp->status = NFS4_OK;
3322 3325                  if (resp->wlist)
3323 3326                          clist_zero_len(resp->wlist);
3324 3327                  goto out;
3325 3328          }
3326 3329  
3327 3330          if (args->count == 0) {
3328 3331                  *cs->statusp = resp->status = NFS4_OK;
3329 3332                  resp->eof = FALSE;
3330 3333                  resp->data_len = 0;
3331 3334                  resp->data_val = NULL;
3332 3335                  resp->mblk = NULL;
3333 3336                  /* RDMA */
3334 3337                  resp->wlist = args->wlist;
3335 3338                  resp->wlist_len = resp->data_len;
3336 3339                  if (resp->wlist)
3337 3340                          clist_zero_len(resp->wlist);
3338 3341                  goto out;
3339 3342          }
3340 3343  
3341 3344          /*
3342 3345           * Do not allocate memory more than maximum allowed
3343 3346           * transfer size
3344 3347           */
3345 3348          if (args->count > rfs4_tsize(req))
3346 3349                  args->count = rfs4_tsize(req);
3347 3350  
3348 3351          if (loaned_buffers) {
3349 3352                  uiop = (uio_t *)rfs_setup_xuio(vp);
3350 3353                  ASSERT(uiop != NULL);
3351 3354                  uiop->uio_segflg = UIO_SYSSPACE;
3352 3355                  uiop->uio_loffset = args->offset;
3353 3356                  uiop->uio_resid = args->count;
3354 3357  
3355 3358                  /* Jump to do the read if successful */
3356 3359                  if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3357 3360                          /*
3358 3361                           * Need to hold the vnode until after VOP_RETZCBUF()
3359 3362                           * is called.
3360 3363                           */
3361 3364                          VN_HOLD(vp);
3362 3365                          goto doio_read;
3363 3366                  }
3364 3367  
3365 3368                  DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3366 3369                      uiop->uio_loffset, int, uiop->uio_resid);
3367 3370  
3368 3371                  uiop->uio_extflg = 0;
3369 3372  
3370 3373                  /* failure to setup for zero copy */
3371 3374                  rfs_free_xuio((void *)uiop);
3372 3375                  loaned_buffers = 0;
3373 3376          }
3374 3377  
3375 3378          /*
3376 3379           * If returning data via RDMA Write, then grab the chunk list. If we
3377 3380           * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3378 3381           */
3379 3382          if (rdma_used) {
3380 3383                  mp = NULL;
3381 3384                  (void) rdma_get_wchunk(req, &iov, args->wlist);
3382 3385                  uio.uio_iov = &iov;
3383 3386                  uio.uio_iovcnt = 1;
3384 3387          } else {
3385 3388                  /*
3386 3389                   * mp will contain the data to be sent out in the read reply.
3387 3390                   * It will be freed after the reply has been sent.
3388 3391                   */
3389 3392                  mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3390 3393                  ASSERT(mp != NULL);
3391 3394                  ASSERT(alloc_err == 0);
3392 3395                  uio.uio_iov = iovp;
3393 3396                  uio.uio_iovcnt = iovcnt;
3394 3397          }
3395 3398  
3396 3399          uio.uio_segflg = UIO_SYSSPACE;
3397 3400          uio.uio_extflg = UIO_COPY_CACHED;
3398 3401          uio.uio_loffset = args->offset;
3399 3402          uio.uio_resid = args->count;
3400 3403          uiop = &uio;
3401 3404  
3402 3405  doio_read:
3403 3406          error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3404 3407  
3405 3408          va.va_mask = AT_SIZE;
3406 3409          verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3407 3410  
3408 3411          if (error) {
3409 3412                  if (mp)
3410 3413                          freemsg(mp);
3411 3414                  *cs->statusp = resp->status = puterrno4(error);
3412 3415                  goto out;
3413 3416          }
3414 3417  
3415 3418          /* make mblk using zc buffers */
3416 3419          if (loaned_buffers) {
3417 3420                  mp = uio_to_mblk(uiop);
3418 3421                  ASSERT(mp != NULL);
3419 3422          }
3420 3423  
3421 3424          *cs->statusp = resp->status = NFS4_OK;
3422 3425  
3423 3426          ASSERT(uiop->uio_resid >= 0);
3424 3427          resp->data_len = args->count - uiop->uio_resid;
3425 3428          if (mp) {
3426 3429                  resp->data_val = (char *)mp->b_datap->db_base;
3427 3430                  rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3428 3431          } else {
3429 3432                  resp->data_val = (caddr_t)iov.iov_base;
3430 3433          }
3431 3434  
3432 3435          resp->mblk = mp;
3433 3436  
3434 3437          if (!verror && offset + resp->data_len == va.va_size)
3435 3438                  resp->eof = TRUE;
3436 3439          else
3437 3440                  resp->eof = FALSE;
3438 3441  
3439 3442          if (rdma_used) {
3440 3443                  if (!rdma_setup_read_data4(args, resp)) {
3441 3444                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3442 3445                  }
3443 3446          } else {
3444 3447                  resp->wlist = NULL;
3445 3448          }
3446 3449  
3447 3450  out:
3448 3451          if (in_crit)
3449 3452                  nbl_end_crit(vp);
3450 3453  
3451 3454          if (iovp != NULL)
3452 3455                  kmem_free(iovp, iovcnt * sizeof (struct iovec));
3453 3456  
3454 3457          DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3455 3458              READ4res *, resp);
3456 3459  }
3457 3460  
3458 3461  static void
3459 3462  rfs4_op_read_free(nfs_resop4 *resop)
3460 3463  {
3461 3464          READ4res        *resp = &resop->nfs_resop4_u.opread;
3462 3465  
3463 3466          if (resp->status == NFS4_OK && resp->mblk != NULL) {
3464 3467                  freemsg(resp->mblk);
3465 3468                  resp->mblk = NULL;
3466 3469                  resp->data_val = NULL;
3467 3470                  resp->data_len = 0;
3468 3471          }
3469 3472  }
3470 3473  
3471 3474  static void
3472 3475  rfs4_op_readdir_free(nfs_resop4 * resop)
3473 3476  {
3474 3477          READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3475 3478  
3476 3479          if (resp->status == NFS4_OK && resp->mblk != NULL) {
3477 3480                  freeb(resp->mblk);
3478 3481                  resp->mblk = NULL;
3479 3482                  resp->data_len = 0;
3480 3483          }
3481 3484  }
3482 3485  
3483 3486  
3484 3487  /* ARGSUSED */
3485 3488  static void
3486 3489  rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3487 3490      struct compound_state *cs)
3488 3491  {
3489 3492          PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3490 3493          int             error;
3491 3494          vnode_t         *vp;
3492 3495          struct exportinfo *exi, *sav_exi;
3493 3496          nfs_fh4_fmt_t   *fh_fmtp;
3494 3497          nfs_export_t *ne = nfs_get_export();
3495 3498  
3496 3499          DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3497 3500  
3498 3501          if (cs->vp) {
3499 3502                  VN_RELE(cs->vp);
3500 3503                  cs->vp = NULL;
3501 3504          }
3502 3505  
3503 3506          if (cs->cr)
3504 3507                  crfree(cs->cr);
3505 3508  
3506 3509          cs->cr = crdup(cs->basecr);
3507 3510  
3508 3511          vp = ne->exi_public->exi_vp;
3509 3512          if (vp == NULL) {
3510 3513                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3511 3514                  goto out;
3512 3515          }
3513 3516  
3514 3517          error = makefh4(&cs->fh, vp, ne->exi_public);
3515 3518          if (error != 0) {
3516 3519                  *cs->statusp = resp->status = puterrno4(error);
3517 3520                  goto out;
3518 3521          }
3519 3522          sav_exi = cs->exi;
3520 3523          if (ne->exi_public == ne->exi_root) {
3521 3524                  /*
3522 3525                   * No filesystem is actually shared public, so we default
3523 3526                   * to exi_root. In this case, we must check whether root
3524 3527                   * is exported.
3525 3528                   */
3526 3529                  fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3527 3530  
3528 3531                  /*
3529 3532                   * if root filesystem is exported, the exportinfo struct that we
3530 3533                   * should use is what checkexport4 returns, because root_exi is
3531 3534                   * actually a mostly empty struct.
3532 3535                   */
3533 3536                  exi = checkexport4(&fh_fmtp->fh4_fsid,
3534 3537                      (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3535 3538                  cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3536 3539          } else {
3537 3540                  /*
3538 3541                   * it's a properly shared filesystem
3539 3542                   */
3540 3543                  cs->exi = ne->exi_public;
3541 3544          }
3542 3545  
3543 3546          if (is_system_labeled()) {
3544 3547                  bslabel_t *clabel;
3545 3548  
3546 3549                  ASSERT(req->rq_label != NULL);
3547 3550                  clabel = req->rq_label;
3548 3551                  DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3549 3552                      "got client label from request(1)",
3550 3553                      struct svc_req *, req);
3551 3554                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
3552 3555                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3553 3556                              cs->exi)) {
3554 3557                                  *cs->statusp = resp->status =
3555 3558                                      NFS4ERR_SERVERFAULT;
3556 3559                                  goto out;
3557 3560                          }
3558 3561                  }
3559 3562          }
3560 3563  
3561 3564          VN_HOLD(vp);
3562 3565          cs->vp = vp;
3563 3566  
3564 3567          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3565 3568                  VN_RELE(cs->vp);
3566 3569                  cs->vp = NULL;
3567 3570                  cs->exi = sav_exi;
3568 3571                  goto out;
3569 3572          }
3570 3573  
3571 3574          *cs->statusp = resp->status = NFS4_OK;
3572 3575  out:
3573 3576          DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3574 3577              PUTPUBFH4res *, resp);
3575 3578  }
3576 3579  
3577 3580  /*
3578 3581   * XXX - issue with put*fh operations. Suppose /export/home is exported.
3579 3582   * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3580 3583   * or joe have restrictive search permissions, then we shouldn't let
3581 3584   * the client get a file handle. This is easy to enforce. However, we
3582 3585   * don't know what security flavor should be used until we resolve the
3583 3586   * path name. Another complication is uid mapping. If root is
3584 3587   * the user, then it will be mapped to the anonymous user by default,
3585 3588   * but we won't know that till we've resolved the path name. And we won't
3586 3589   * know what the anonymous user is.
3587 3590   * Luckily, SECINFO is specified to take a full filename.
3588 3591   * So what we will have to in rfs4_op_lookup is check that flavor of
3589 3592   * the target object matches that of the request, and if root was the
3590 3593   * caller, check for the root= and anon= options, and if necessary,
3591 3594   * repeat the lookup using the right cred_t. But that's not done yet.
3592 3595   */
3593 3596  /* ARGSUSED */
3594 3597  static void
3595 3598  rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3596 3599      struct compound_state *cs)
3597 3600  {
3598 3601          PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3599 3602          PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3600 3603          nfs_fh4_fmt_t *fh_fmtp;
3601 3604  
3602 3605          DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3603 3606              PUTFH4args *, args);
3604 3607  
3605 3608          if (cs->vp) {
3606 3609                  VN_RELE(cs->vp);
3607 3610                  cs->vp = NULL;
3608 3611          }
3609 3612  
3610 3613          if (cs->cr) {
3611 3614                  crfree(cs->cr);
3612 3615                  cs->cr = NULL;
3613 3616          }
3614 3617  
3615 3618  
3616 3619          if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3617 3620                  *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3618 3621                  goto out;
3619 3622          }
3620 3623  
3621 3624          fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3622 3625          cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3623 3626              NULL);
3624 3627  
3625 3628          if (cs->exi == NULL) {
3626 3629                  *cs->statusp = resp->status = NFS4ERR_STALE;
3627 3630                  goto out;
3628 3631          }
3629 3632  
3630 3633          cs->cr = crdup(cs->basecr);
3631 3634  
3632 3635          ASSERT(cs->cr != NULL);
3633 3636  
3634 3637          if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3635 3638                  *cs->statusp = resp->status;
3636 3639                  goto out;
3637 3640          }
3638 3641  
3639 3642          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3640 3643                  VN_RELE(cs->vp);
3641 3644                  cs->vp = NULL;
3642 3645                  goto out;
3643 3646          }
3644 3647  
3645 3648          nfs_fh4_copy(&args->object, &cs->fh);
3646 3649          *cs->statusp = resp->status = NFS4_OK;
3647 3650          cs->deleg = FALSE;
3648 3651  
3649 3652  out:
3650 3653          DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3651 3654              PUTFH4res *, resp);
3652 3655  }
3653 3656  
3654 3657  /* ARGSUSED */
3655 3658  static void
3656 3659  rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3657 3660      struct compound_state *cs)
3658 3661  {
3659 3662          PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3660 3663          int error;
3661 3664          fid_t fid;
3662 3665          struct exportinfo *exi, *sav_exi;
3663 3666  
3664 3667          DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3665 3668  
3666 3669          if (cs->vp) {
3667 3670                  VN_RELE(cs->vp);
3668 3671                  cs->vp = NULL;
3669 3672          }
3670 3673  
3671 3674          if (cs->cr)
3672 3675                  crfree(cs->cr);
3673 3676  
3674 3677          cs->cr = crdup(cs->basecr);
3675 3678  
3676 3679          /*
3677 3680           * Using rootdir, the system root vnode,
3678 3681           * get its fid.
3679 3682           */
3680 3683          bzero(&fid, sizeof (fid));
3681 3684          fid.fid_len = MAXFIDSZ;
3682 3685          error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3683 3686          if (error != 0) {
3684 3687                  *cs->statusp = resp->status = puterrno4(error);
3685 3688                  goto out;
3686 3689          }
3687 3690  
3688 3691          /*
3689 3692           * Then use the root fsid & fid it to find out if it's exported
3690 3693           *
3691 3694           * If the server root isn't exported directly, then
3692 3695           * it should at least be a pseudo export based on
3693 3696           * one or more exports further down in the server's
3694 3697           * file tree.
3695 3698           */
3696 3699          exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3697 3700          if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3698 3701                  NFS4_DEBUG(rfs4_debug,
3699 3702                      (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3700 3703                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3701 3704                  goto out;
3702 3705          }
3703 3706  
3704 3707          /*
3705 3708           * Now make a filehandle based on the root
3706 3709           * export and root vnode.
3707 3710           */
3708 3711          error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3709 3712          if (error != 0) {
3710 3713                  *cs->statusp = resp->status = puterrno4(error);
3711 3714                  goto out;
3712 3715          }
3713 3716  
3714 3717          sav_exi = cs->exi;
3715 3718          cs->exi = exi;
3716 3719  
3717 3720          VN_HOLD(ZONE_ROOTVP());
3718 3721          cs->vp = ZONE_ROOTVP();
3719 3722  
3720 3723          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3721 3724                  VN_RELE(cs->vp);
3722 3725                  cs->vp = NULL;
3723 3726                  cs->exi = sav_exi;
3724 3727                  goto out;
3725 3728          }
3726 3729  
3727 3730          *cs->statusp = resp->status = NFS4_OK;
3728 3731          cs->deleg = FALSE;
3729 3732  out:
3730 3733          DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3731 3734              PUTROOTFH4res *, resp);
3732 3735  }
3733 3736  
3734 3737  /*
3735 3738   * readlink: args: CURRENT_FH.
3736 3739   *      res: status. If success - CURRENT_FH unchanged, return linktext.
3737 3740   */
3738 3741  
3739 3742  /* ARGSUSED */
3740 3743  static void
3741 3744  rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3742 3745      struct compound_state *cs)
3743 3746  {
3744 3747          READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3745 3748          int error;
3746 3749          vnode_t *vp;
3747 3750          struct iovec iov;
3748 3751          struct vattr va;
3749 3752          struct uio uio;
3750 3753          char *data;
3751 3754          struct sockaddr *ca;
3752 3755          char *name = NULL;
3753 3756          int is_referral;
3754 3757  
3755 3758          DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3756 3759  
3757 3760          /* CURRENT_FH: directory */
3758 3761          vp = cs->vp;
3759 3762          if (vp == NULL) {
3760 3763                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3761 3764                  goto out;
3762 3765          }
3763 3766  
3764 3767          if (cs->access == CS_ACCESS_DENIED) {
3765 3768                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3766 3769                  goto out;
3767 3770          }
3768 3771  
3769 3772          /* Is it a referral? */
3770 3773          if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3771 3774  
3772 3775                  is_referral = 1;
3773 3776  
3774 3777          } else {
3775 3778  
3776 3779                  is_referral = 0;
3777 3780  
3778 3781                  if (vp->v_type == VDIR) {
3779 3782                          *cs->statusp = resp->status = NFS4ERR_ISDIR;
3780 3783                          goto out;
3781 3784                  }
3782 3785  
3783 3786                  if (vp->v_type != VLNK) {
3784 3787                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3785 3788                          goto out;
3786 3789                  }
3787 3790  
3788 3791          }
3789 3792  
3790 3793          va.va_mask = AT_MODE;
3791 3794          error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3792 3795          if (error) {
3793 3796                  *cs->statusp = resp->status = puterrno4(error);
3794 3797                  goto out;
3795 3798          }
3796 3799  
3797 3800          if (MANDLOCK(vp, va.va_mode)) {
3798 3801                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3799 3802                  goto out;
3800 3803          }
3801 3804  
3802 3805          data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3803 3806  
3804 3807          if (is_referral) {
3805 3808                  char *s;
3806 3809                  size_t strsz;
3807 3810  
3808 3811                  /* Get an artificial symlink based on a referral */
3809 3812                  s = build_symlink(vp, cs->cr, &strsz);
3810 3813                  global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3811 3814                  DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3812 3815                      vnode_t *, vp, char *, s);
3813 3816                  if (s == NULL)
3814 3817                          error = EINVAL;
3815 3818                  else {
3816 3819                          error = 0;
3817 3820                          (void) strlcpy(data, s, MAXPATHLEN + 1);
3818 3821                          kmem_free(s, strsz);
3819 3822                  }
3820 3823  
3821 3824          } else {
3822 3825  
3823 3826                  iov.iov_base = data;
3824 3827                  iov.iov_len = MAXPATHLEN;
3825 3828                  uio.uio_iov = &iov;
3826 3829                  uio.uio_iovcnt = 1;
3827 3830                  uio.uio_segflg = UIO_SYSSPACE;
3828 3831                  uio.uio_extflg = UIO_COPY_CACHED;
3829 3832                  uio.uio_loffset = 0;
3830 3833                  uio.uio_resid = MAXPATHLEN;
3831 3834  
3832 3835                  error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3833 3836  
3834 3837                  if (!error)
3835 3838                          *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3836 3839          }
3837 3840  
3838 3841          if (error) {
3839 3842                  kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3840 3843                  *cs->statusp = resp->status = puterrno4(error);
3841 3844                  goto out;
3842 3845          }
3843 3846  
3844 3847          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3845 3848          name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3846 3849              MAXPATHLEN  + 1);
3847 3850  
3848 3851          if (name == NULL) {
3849 3852                  /*
3850 3853                   * Even though the conversion failed, we return
3851 3854                   * something. We just don't translate it.
3852 3855                   */
3853 3856                  name = data;
3854 3857          }
3855 3858  
3856 3859          /*
3857 3860           * treat link name as data
3858 3861           */
3859 3862          (void) str_to_utf8(name, (utf8string *)&resp->link);
3860 3863  
3861 3864          if (name != data)
3862 3865                  kmem_free(name, MAXPATHLEN + 1);
3863 3866          kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3864 3867          *cs->statusp = resp->status = NFS4_OK;
3865 3868  
3866 3869  out:
3867 3870          DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3868 3871              READLINK4res *, resp);
3869 3872  }
3870 3873  
3871 3874  static void
3872 3875  rfs4_op_readlink_free(nfs_resop4 *resop)
3873 3876  {
3874 3877          READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3875 3878          utf8string *symlink = (utf8string *)&resp->link;
3876 3879  
3877 3880          if (symlink->utf8string_val) {
3878 3881                  UTF8STRING_FREE(*symlink)
3879 3882          }
3880 3883  }
3881 3884  
3882 3885  /*
3883 3886   * release_lockowner:
3884 3887   *      Release any state associated with the supplied
3885 3888   *      lockowner. Note if any lo_state is holding locks we will not
3886 3889   *      rele that lo_state and thus the lockowner will not be destroyed.
3887 3890   *      A client using lock after the lock owner stateid has been released
3888 3891   *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3889 3892   *      to reissue the lock with new_lock_owner set to TRUE.
3890 3893   *      args: lock_owner
3891 3894   *      res:  status
3892 3895   */
3893 3896  /* ARGSUSED */
3894 3897  static void
3895 3898  rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3896 3899      struct svc_req *req, struct compound_state *cs)
3897 3900  {
3898 3901          RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3899 3902          RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3900 3903          rfs4_lockowner_t *lo;
3901 3904          rfs4_openowner_t *oo;
3902 3905          rfs4_state_t *sp;
3903 3906          rfs4_lo_state_t *lsp;
3904 3907          rfs4_client_t *cp;
3905 3908          bool_t create = FALSE;
3906 3909          locklist_t *llist;
3907 3910          sysid_t sysid;
3908 3911  
3909 3912          DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3910 3913              cs, RELEASE_LOCKOWNER4args *, ap);
3911 3914  
3912 3915          /* Make sure there is a clientid around for this request */
3913 3916          cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3914 3917  
3915 3918          if (cp == NULL) {
3916 3919                  *cs->statusp = resp->status =
3917 3920                      rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3918 3921                  goto out;
3919 3922          }
3920 3923          rfs4_client_rele(cp);
3921 3924  
3922 3925          lo = rfs4_findlockowner(&ap->lock_owner, &create);
3923 3926          if (lo == NULL) {
3924 3927                  *cs->statusp = resp->status = NFS4_OK;
3925 3928                  goto out;
3926 3929          }
3927 3930          ASSERT(lo->rl_client != NULL);
3928 3931  
3929 3932          /*
3930 3933           * Check for EXPIRED client. If so will reap state with in a lease
3931 3934           * period or on next set_clientid_confirm step
3932 3935           */
3933 3936          if (rfs4_lease_expired(lo->rl_client)) {
3934 3937                  rfs4_lockowner_rele(lo);
3935 3938                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3936 3939                  goto out;
3937 3940          }
3938 3941  
3939 3942          /*
3940 3943           * If no sysid has been assigned, then no locks exist; just return.
3941 3944           */
3942 3945          rfs4_dbe_lock(lo->rl_client->rc_dbe);
3943 3946          if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3944 3947                  rfs4_lockowner_rele(lo);
3945 3948                  rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3946 3949                  goto out;
3947 3950          }
3948 3951  
3949 3952          sysid = lo->rl_client->rc_sysidt;
3950 3953          rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3951 3954  
3952 3955          /*
3953 3956           * Mark the lockowner invalid.
3954 3957           */
3955 3958          rfs4_dbe_hide(lo->rl_dbe);
3956 3959  
3957 3960          /*
3958 3961           * sysid-pid pair should now not be used since the lockowner is
3959 3962           * invalid. If the client were to instantiate the lockowner again
3960 3963           * it would be assigned a new pid. Thus we can get the list of
3961 3964           * current locks.
3962 3965           */
3963 3966  
3964 3967          llist = flk_get_active_locks(sysid, lo->rl_pid);
3965 3968          /* If we are still holding locks fail */
3966 3969          if (llist != NULL) {
3967 3970  
3968 3971                  *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3969 3972  
3970 3973                  flk_free_locklist(llist);
3971 3974                  /*
3972 3975                   * We need to unhide the lockowner so the client can
3973 3976                   * try it again. The bad thing here is if the client
3974 3977                   * has a logic error that took it here in the first place
3975 3978                   * they probably have lost accounting of the locks that it
3976 3979                   * is holding. So we may have dangling state until the
3977 3980                   * open owner state is reaped via close. One scenario
3978 3981                   * that could possibly occur is that the client has
3979 3982                   * sent the unlock request(s) in separate threads
3980 3983                   * and has not waited for the replies before sending the
3981 3984                   * RELEASE_LOCKOWNER request. Presumably, it would expect
3982 3985                   * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3983 3986                   * reissuing the request.
3984 3987                   */
3985 3988                  rfs4_dbe_unhide(lo->rl_dbe);
3986 3989                  rfs4_lockowner_rele(lo);
3987 3990                  goto out;
3988 3991          }
3989 3992  
3990 3993          /*
3991 3994           * For the corresponding client we need to check each open
3992 3995           * owner for any opens that have lockowner state associated
3993 3996           * with this lockowner.
3994 3997           */
3995 3998  
3996 3999          rfs4_dbe_lock(lo->rl_client->rc_dbe);
3997 4000          for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3998 4001              oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3999 4002  
4000 4003                  rfs4_dbe_lock(oo->ro_dbe);
4001 4004                  for (sp = list_head(&oo->ro_statelist); sp != NULL;
4002 4005                      sp = list_next(&oo->ro_statelist, sp)) {
4003 4006  
4004 4007                          rfs4_dbe_lock(sp->rs_dbe);
4005 4008                          for (lsp = list_head(&sp->rs_lostatelist);
4006 4009                              lsp != NULL;
4007 4010                              lsp = list_next(&sp->rs_lostatelist, lsp)) {
4008 4011                                  if (lsp->rls_locker == lo) {
4009 4012                                          rfs4_dbe_lock(lsp->rls_dbe);
4010 4013                                          rfs4_dbe_invalidate(lsp->rls_dbe);
4011 4014                                          rfs4_dbe_unlock(lsp->rls_dbe);
4012 4015                                  }
4013 4016                          }
4014 4017                          rfs4_dbe_unlock(sp->rs_dbe);
4015 4018                  }
4016 4019                  rfs4_dbe_unlock(oo->ro_dbe);
4017 4020          }
4018 4021          rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4019 4022  
4020 4023          rfs4_lockowner_rele(lo);
4021 4024  
4022 4025          *cs->statusp = resp->status = NFS4_OK;
4023 4026  
4024 4027  out:
4025 4028          DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4026 4029              cs, RELEASE_LOCKOWNER4res *, resp);
4027 4030  }
4028 4031  
4029 4032  /*
4030 4033   * short utility function to lookup a file and recall the delegation
4031 4034   */
4032 4035  static rfs4_file_t *
4033 4036  rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4034 4037      int *lkup_error, cred_t *cr)
4035 4038  {
4036 4039          vnode_t *vp;
4037 4040          rfs4_file_t *fp = NULL;
4038 4041          bool_t fcreate = FALSE;
4039 4042          int error;
4040 4043  
4041 4044          if (vpp)
4042 4045                  *vpp = NULL;
4043 4046  
4044 4047          if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4045 4048              NULL)) == 0) {
4046 4049                  if (vp->v_type == VREG)
4047 4050                          fp = rfs4_findfile(vp, NULL, &fcreate);
4048 4051                  if (vpp)
4049 4052                          *vpp = vp;
4050 4053                  else
4051 4054                          VN_RELE(vp);
4052 4055          }
4053 4056  
4054 4057          if (lkup_error)
4055 4058                  *lkup_error = error;
4056 4059  
4057 4060          return (fp);
4058 4061  }
4059 4062  
4060 4063  /*
4061 4064   * remove: args: CURRENT_FH: directory; name.
4062 4065   *      res: status. If success - CURRENT_FH unchanged, return change_info
4063 4066   *              for directory.
4064 4067   */
4065 4068  /* ARGSUSED */
4066 4069  static void
4067 4070  rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4068 4071      struct compound_state *cs)
4069 4072  {
4070 4073          REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4071 4074          REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4072 4075          int error;
4073 4076          vnode_t *dvp, *vp;
4074 4077          struct vattr bdva, idva, adva;
4075 4078          char *nm;
4076 4079          uint_t len;
4077 4080          rfs4_file_t *fp;
4078 4081          int in_crit = 0;
4079 4082          bslabel_t *clabel;
4080 4083          struct sockaddr *ca;
4081 4084          char *name = NULL;
4082 4085          nfsstat4 status;
4083 4086  
4084 4087          DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4085 4088              REMOVE4args *, args);
4086 4089  
4087 4090          /* CURRENT_FH: directory */
4088 4091          dvp = cs->vp;
4089 4092          if (dvp == NULL) {
4090 4093                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4091 4094                  goto out;
4092 4095          }
4093 4096  
4094 4097          if (cs->access == CS_ACCESS_DENIED) {
4095 4098                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4096 4099                  goto out;
4097 4100          }
4098 4101  
4099 4102          /*
4100 4103           * If there is an unshared filesystem mounted on this vnode,
4101 4104           * Do not allow to remove anything in this directory.
4102 4105           */
4103 4106          if (vn_ismntpt(dvp)) {
4104 4107                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4105 4108                  goto out;
4106 4109          }
4107 4110  
4108 4111          if (dvp->v_type != VDIR) {
4109 4112                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4110 4113                  goto out;
4111 4114          }
4112 4115  
4113 4116          status = utf8_dir_verify(&args->target);
4114 4117          if (status != NFS4_OK) {
4115 4118                  *cs->statusp = resp->status = status;
4116 4119                  goto out;
4117 4120          }
4118 4121  
4119 4122          /*
4120 4123           * Lookup the file so that we can check if it's a directory
4121 4124           */
4122 4125          nm = utf8_to_fn(&args->target, &len, NULL);
4123 4126          if (nm == NULL) {
4124 4127                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4125 4128                  goto out;
4126 4129          }
4127 4130  
4128 4131          if (len > MAXNAMELEN) {
4129 4132                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4130 4133                  kmem_free(nm, len);
4131 4134                  goto out;
4132 4135          }
4133 4136  
4134 4137          if (rdonly4(req, cs)) {
4135 4138                  *cs->statusp = resp->status = NFS4ERR_ROFS;
4136 4139                  kmem_free(nm, len);
4137 4140                  goto out;
4138 4141          }
4139 4142  
4140 4143          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4141 4144          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4142 4145              MAXPATHLEN  + 1);
4143 4146  
4144 4147          if (name == NULL) {
4145 4148                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4146 4149                  kmem_free(nm, len);
4147 4150                  goto out;
4148 4151          }
4149 4152  
4150 4153          /*
4151 4154           * Lookup the file to determine type and while we are see if
4152 4155           * there is a file struct around and check for delegation.
4153 4156           * We don't need to acquire va_seq before this lookup, if
4154 4157           * it causes an update, cinfo.before will not match, which will
4155 4158           * trigger a cache flush even if atomic is TRUE.
4156 4159           */
4157 4160          if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4158 4161                  if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4159 4162                      NULL)) {
4160 4163                          VN_RELE(vp);
4161 4164                          rfs4_file_rele(fp);
4162 4165                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4163 4166                          if (nm != name)
4164 4167                                  kmem_free(name, MAXPATHLEN + 1);
4165 4168                          kmem_free(nm, len);
4166 4169                          goto out;
4167 4170                  }
4168 4171          }
4169 4172  
4170 4173          /* Didn't find anything to remove */
4171 4174          if (vp == NULL) {
4172 4175                  *cs->statusp = resp->status = error;
4173 4176                  if (nm != name)
4174 4177                          kmem_free(name, MAXPATHLEN + 1);
4175 4178                  kmem_free(nm, len);
4176 4179                  goto out;
4177 4180          }
4178 4181  
4179 4182          if (nbl_need_check(vp)) {
4180 4183                  nbl_start_crit(vp, RW_READER);
4181 4184                  in_crit = 1;
4182 4185                  if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4183 4186                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4184 4187                          if (nm != name)
4185 4188                                  kmem_free(name, MAXPATHLEN + 1);
4186 4189                          kmem_free(nm, len);
4187 4190                          nbl_end_crit(vp);
4188 4191                          VN_RELE(vp);
4189 4192                          if (fp) {
4190 4193                                  rfs4_clear_dont_grant(fp);
4191 4194                                  rfs4_file_rele(fp);
4192 4195                          }
4193 4196                          goto out;
4194 4197                  }
4195 4198          }
4196 4199  
4197 4200          /* check label before allowing removal */
4198 4201          if (is_system_labeled()) {
4199 4202                  ASSERT(req->rq_label != NULL);
4200 4203                  clabel = req->rq_label;
4201 4204                  DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4202 4205                      "got client label from request(1)",
4203 4206                      struct svc_req *, req);
4204 4207                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
4205 4208                          if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4206 4209                              cs->exi)) {
4207 4210                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4208 4211                                  if (name != nm)
4209 4212                                          kmem_free(name, MAXPATHLEN + 1);
4210 4213                                  kmem_free(nm, len);
4211 4214                                  if (in_crit)
4212 4215                                          nbl_end_crit(vp);
4213 4216                                  VN_RELE(vp);
4214 4217                                  if (fp) {
4215 4218                                          rfs4_clear_dont_grant(fp);
4216 4219                                          rfs4_file_rele(fp);
4217 4220                                  }
4218 4221                                  goto out;
4219 4222                          }
4220 4223                  }
4221 4224          }
4222 4225  
4223 4226          /* Get dir "before" change value */
4224 4227          bdva.va_mask = AT_CTIME|AT_SEQ;
4225 4228          error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4226 4229          if (error) {
4227 4230                  *cs->statusp = resp->status = puterrno4(error);
4228 4231                  if (nm != name)
4229 4232                          kmem_free(name, MAXPATHLEN + 1);
4230 4233                  kmem_free(nm, len);
4231 4234                  if (in_crit)
4232 4235                          nbl_end_crit(vp);
4233 4236                  VN_RELE(vp);
4234 4237                  if (fp) {
4235 4238                          rfs4_clear_dont_grant(fp);
4236 4239                          rfs4_file_rele(fp);
4237 4240                  }
4238 4241                  goto out;
4239 4242          }
4240 4243          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4241 4244  
4242 4245          /* Actually do the REMOVE operation */
4243 4246          if (vp->v_type == VDIR) {
4244 4247                  /*
4245 4248                   * Can't remove a directory that has a mounted-on filesystem.
4246 4249                   */
4247 4250                  if (vn_ismntpt(vp)) {
4248 4251                          error = EACCES;
4249 4252                  } else {
4250 4253                          /*
4251 4254                           * System V defines rmdir to return EEXIST,
4252 4255                           * not ENOTEMPTY, if the directory is not
4253 4256                           * empty.  A System V NFS server needs to map
4254 4257                           * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4255 4258                           * transmit over the wire.
4256 4259                           */
4257 4260                          if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4258 4261                              NULL, 0)) == EEXIST)
4259 4262                                  error = ENOTEMPTY;
4260 4263                  }
4261 4264          } else {
4262 4265                  if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4263 4266                      fp != NULL) {
4264 4267                          struct vattr va;
4265 4268                          vnode_t *tvp;
4266 4269  
4267 4270                          rfs4_dbe_lock(fp->rf_dbe);
4268 4271                          tvp = fp->rf_vp;
4269 4272                          if (tvp)
4270 4273                                  VN_HOLD(tvp);
4271 4274                          rfs4_dbe_unlock(fp->rf_dbe);
4272 4275  
4273 4276                          if (tvp) {
4274 4277                                  /*
4275 4278                                   * This is va_seq safe because we are not
4276 4279                                   * manipulating dvp.
4277 4280                                   */
4278 4281                                  va.va_mask = AT_NLINK;
4279 4282                                  if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4280 4283                                      va.va_nlink == 0) {
4281 4284                                          /* Remove state on file remove */
4282 4285                                          if (in_crit) {
4283 4286                                                  nbl_end_crit(vp);
4284 4287                                                  in_crit = 0;
4285 4288                                          }
4286 4289                                          rfs4_close_all_state(fp);
4287 4290                                  }
4288 4291                                  VN_RELE(tvp);
4289 4292                          }
4290 4293                  }
4291 4294          }
4292 4295  
4293 4296          if (in_crit)
4294 4297                  nbl_end_crit(vp);
4295 4298          VN_RELE(vp);
4296 4299  
4297 4300          if (fp) {
4298 4301                  rfs4_clear_dont_grant(fp);
4299 4302                  rfs4_file_rele(fp);
4300 4303          }
4301 4304          if (nm != name)
4302 4305                  kmem_free(name, MAXPATHLEN + 1);
4303 4306          kmem_free(nm, len);
4304 4307  
4305 4308          if (error) {
4306 4309                  *cs->statusp = resp->status = puterrno4(error);
4307 4310                  goto out;
4308 4311          }
4309 4312  
4310 4313          /*
4311 4314           * Get the initial "after" sequence number, if it fails, set to zero
4312 4315           */
4313 4316          idva.va_mask = AT_SEQ;
4314 4317          if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4315 4318                  idva.va_seq = 0;
4316 4319  
4317 4320          /*
4318 4321           * Force modified data and metadata out to stable storage.
4319 4322           */
4320 4323          (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4321 4324  
4322 4325          /*
4323 4326           * Get "after" change value, if it fails, simply return the
4324 4327           * before value.
4325 4328           */
4326 4329          adva.va_mask = AT_CTIME|AT_SEQ;
4327 4330          if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4328 4331                  adva.va_ctime = bdva.va_ctime;
4329 4332                  adva.va_seq = 0;
4330 4333          }
4331 4334  
4332 4335          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4333 4336  
4334 4337          /*
4335 4338           * The cinfo.atomic = TRUE only if we have
4336 4339           * non-zero va_seq's, and it has incremented by exactly one
4337 4340           * during the VOP_REMOVE/RMDIR and it didn't change during
4338 4341           * the VOP_FSYNC.
4339 4342           */
4340 4343          if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4341 4344              idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4342 4345                  resp->cinfo.atomic = TRUE;
4343 4346          else
4344 4347                  resp->cinfo.atomic = FALSE;
4345 4348  
4346 4349          *cs->statusp = resp->status = NFS4_OK;
4347 4350  
4348 4351  out:
4349 4352          DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4350 4353              REMOVE4res *, resp);
4351 4354  }
4352 4355  
4353 4356  /*
4354 4357   * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4355 4358   *              oldname and newname.
4356 4359   *      res: status. If success - CURRENT_FH unchanged, return change_info
4357 4360   *              for both from and target directories.
4358 4361   */
4359 4362  /* ARGSUSED */
4360 4363  static void
4361 4364  rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4362 4365      struct compound_state *cs)
4363 4366  {
4364 4367          RENAME4args *args = &argop->nfs_argop4_u.oprename;
4365 4368          RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4366 4369          int error;
4367 4370          vnode_t *odvp;
4368 4371          vnode_t *ndvp;
4369 4372          vnode_t *srcvp, *targvp, *tvp;
4370 4373          struct vattr obdva, oidva, oadva;
4371 4374          struct vattr nbdva, nidva, nadva;
4372 4375          char *onm, *nnm;
4373 4376          uint_t olen, nlen;
4374 4377          rfs4_file_t *fp, *sfp;
4375 4378          int in_crit_src, in_crit_targ;
4376 4379          int fp_rele_grant_hold, sfp_rele_grant_hold;
4377 4380          int unlinked;
4378 4381          bslabel_t *clabel;
4379 4382          struct sockaddr *ca;
4380 4383          char *converted_onm = NULL;
4381 4384          char *converted_nnm = NULL;
4382 4385          nfsstat4 status;
4383 4386  
4384 4387          DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4385 4388              RENAME4args *, args);
4386 4389  
4387 4390          fp = sfp = NULL;
4388 4391          srcvp = targvp = tvp = NULL;
4389 4392          in_crit_src = in_crit_targ = 0;
4390 4393          fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4391 4394          unlinked = 0;
4392 4395  
4393 4396          /* CURRENT_FH: target directory */
4394 4397          ndvp = cs->vp;
4395 4398          if (ndvp == NULL) {
4396 4399                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4397 4400                  goto out;
4398 4401          }
4399 4402  
4400 4403          /* SAVED_FH: from directory */
4401 4404          odvp = cs->saved_vp;
4402 4405          if (odvp == NULL) {
4403 4406                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4404 4407                  goto out;
4405 4408          }
4406 4409  
4407 4410          if (cs->access == CS_ACCESS_DENIED) {
4408 4411                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4409 4412                  goto out;
4410 4413          }
4411 4414  
4412 4415          /*
4413 4416           * If there is an unshared filesystem mounted on this vnode,
4414 4417           * do not allow to rename objects in this directory.
4415 4418           */
4416 4419          if (vn_ismntpt(odvp)) {
4417 4420                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4418 4421                  goto out;
4419 4422          }
4420 4423  
4421 4424          /*
4422 4425           * If there is an unshared filesystem mounted on this vnode,
4423 4426           * do not allow to rename to this directory.
4424 4427           */
4425 4428          if (vn_ismntpt(ndvp)) {
4426 4429                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4427 4430                  goto out;
4428 4431          }
4429 4432  
4430 4433          if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4431 4434                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4432 4435                  goto out;
4433 4436          }
4434 4437  
4435 4438          if (cs->saved_exi != cs->exi) {
4436 4439                  *cs->statusp = resp->status = NFS4ERR_XDEV;
4437 4440                  goto out;
4438 4441          }
4439 4442  
4440 4443          status = utf8_dir_verify(&args->oldname);
4441 4444          if (status != NFS4_OK) {
4442 4445                  *cs->statusp = resp->status = status;
4443 4446                  goto out;
4444 4447          }
4445 4448  
4446 4449          status = utf8_dir_verify(&args->newname);
4447 4450          if (status != NFS4_OK) {
4448 4451                  *cs->statusp = resp->status = status;
4449 4452                  goto out;
4450 4453          }
4451 4454  
4452 4455          onm = utf8_to_fn(&args->oldname, &olen, NULL);
4453 4456          if (onm == NULL) {
4454 4457                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4455 4458                  goto out;
4456 4459          }
4457 4460          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4458 4461          nlen = MAXPATHLEN + 1;
4459 4462          converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4460 4463              nlen);
4461 4464  
4462 4465          if (converted_onm == NULL) {
4463 4466                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4464 4467                  kmem_free(onm, olen);
4465 4468                  goto out;
4466 4469          }
4467 4470  
4468 4471          nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4469 4472          if (nnm == NULL) {
4470 4473                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4471 4474                  if (onm != converted_onm)
4472 4475                          kmem_free(converted_onm, MAXPATHLEN + 1);
4473 4476                  kmem_free(onm, olen);
4474 4477                  goto out;
4475 4478          }
4476 4479          converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4477 4480              MAXPATHLEN  + 1);
4478 4481  
4479 4482          if (converted_nnm == NULL) {
4480 4483                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4481 4484                  kmem_free(nnm, nlen);
4482 4485                  nnm = NULL;
4483 4486                  if (onm != converted_onm)
4484 4487                          kmem_free(converted_onm, MAXPATHLEN + 1);
4485 4488                  kmem_free(onm, olen);
4486 4489                  goto out;
4487 4490          }
4488 4491  
4489 4492  
4490 4493          if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4491 4494                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4492 4495                  kmem_free(onm, olen);
4493 4496                  kmem_free(nnm, nlen);
4494 4497                  goto out;
4495 4498          }
4496 4499  
4497 4500  
4498 4501          if (rdonly4(req, cs)) {
4499 4502                  *cs->statusp = resp->status = NFS4ERR_ROFS;
4500 4503                  if (onm != converted_onm)
4501 4504                          kmem_free(converted_onm, MAXPATHLEN + 1);
4502 4505                  kmem_free(onm, olen);
4503 4506                  if (nnm != converted_nnm)
4504 4507                          kmem_free(converted_nnm, MAXPATHLEN + 1);
4505 4508                  kmem_free(nnm, nlen);
4506 4509                  goto out;
4507 4510          }
4508 4511  
4509 4512          /* check label of the target dir */
4510 4513          if (is_system_labeled()) {
4511 4514                  ASSERT(req->rq_label != NULL);
4512 4515                  clabel = req->rq_label;
4513 4516                  DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4514 4517                      "got client label from request(1)",
4515 4518                      struct svc_req *, req);
4516 4519                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
4517 4520                          if (!do_rfs_label_check(clabel, ndvp,
4518 4521                              EQUALITY_CHECK, cs->exi)) {
4519 4522                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4520 4523                                  goto err_out;
4521 4524                          }
4522 4525                  }
4523 4526          }
4524 4527  
4525 4528          /*
4526 4529           * Is the source a file and have a delegation?
4527 4530           * We don't need to acquire va_seq before these lookups, if
4528 4531           * it causes an update, cinfo.before will not match, which will
4529 4532           * trigger a cache flush even if atomic is TRUE.
4530 4533           */
4531 4534          if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4532 4535              &error, cs->cr)) {
4533 4536                  if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4534 4537                      NULL)) {
4535 4538                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4536 4539                          goto err_out;
4537 4540                  }
4538 4541          }
4539 4542  
4540 4543          if (srcvp == NULL) {
4541 4544                  *cs->statusp = resp->status = puterrno4(error);
4542 4545                  if (onm != converted_onm)
4543 4546                          kmem_free(converted_onm, MAXPATHLEN + 1);
4544 4547                  kmem_free(onm, olen);
4545 4548                  if (nnm != converted_nnm)
4546 4549                          kmem_free(converted_nnm, MAXPATHLEN + 1);
4547 4550                  kmem_free(nnm, nlen);
4548 4551                  goto out;
4549 4552          }
4550 4553  
4551 4554          sfp_rele_grant_hold = 1;
4552 4555  
4553 4556          /* Does the destination exist and a file and have a delegation? */
4554 4557          if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4555 4558              NULL, cs->cr)) {
4556 4559                  if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4557 4560                      NULL)) {
4558 4561                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4559 4562                          goto err_out;
4560 4563                  }
4561 4564          }
4562 4565          fp_rele_grant_hold = 1;
4563 4566  
4564 4567          /* Check for NBMAND lock on both source and target */
4565 4568          if (nbl_need_check(srcvp)) {
4566 4569                  nbl_start_crit(srcvp, RW_READER);
4567 4570                  in_crit_src = 1;
4568 4571                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4569 4572                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4570 4573                          goto err_out;
4571 4574                  }
4572 4575          }
4573 4576  
4574 4577          if (targvp && nbl_need_check(targvp)) {
4575 4578                  nbl_start_crit(targvp, RW_READER);
4576 4579                  in_crit_targ = 1;
4577 4580                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4578 4581                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4579 4582                          goto err_out;
4580 4583                  }
4581 4584          }
4582 4585  
4583 4586          /* Get source "before" change value */
4584 4587          obdva.va_mask = AT_CTIME|AT_SEQ;
4585 4588          error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4586 4589          if (!error) {
4587 4590                  nbdva.va_mask = AT_CTIME|AT_SEQ;
4588 4591                  error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4589 4592          }
4590 4593          if (error) {
4591 4594                  *cs->statusp = resp->status = puterrno4(error);
4592 4595                  goto err_out;
4593 4596          }
4594 4597  
4595 4598          NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4596 4599          NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4597 4600  
4598 4601          error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4599 4602              NULL, 0);
4600 4603  
4601 4604          /*
4602 4605           * If target existed and was unlinked by VOP_RENAME, state will need
4603 4606           * closed. To avoid deadlock, rfs4_close_all_state will be done after
4604 4607           * any necessary nbl_end_crit on srcvp and tgtvp.
4605 4608           */
4606 4609          if (error == 0 && fp != NULL) {
4607 4610                  rfs4_dbe_lock(fp->rf_dbe);
4608 4611                  tvp = fp->rf_vp;
4609 4612                  if (tvp)
4610 4613                          VN_HOLD(tvp);
4611 4614                  rfs4_dbe_unlock(fp->rf_dbe);
4612 4615  
4613 4616                  if (tvp) {
4614 4617                          struct vattr va;
4615 4618                          va.va_mask = AT_NLINK;
4616 4619  
4617 4620                          if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4618 4621                              va.va_nlink == 0) {
4619 4622                                  unlinked = 1;
4620 4623  
4621 4624                                  /* DEBUG data */
4622 4625                                  if ((srcvp == targvp) || (tvp != targvp)) {
4623 4626                                          cmn_err(CE_WARN, "rfs4_op_rename: "
4624 4627                                              "srcvp %p, targvp: %p, tvp: %p",
4625 4628                                              (void *)srcvp, (void *)targvp,
4626 4629                                              (void *)tvp);
4627 4630                                  }
4628 4631                          } else {
4629 4632                                  VN_RELE(tvp);
4630 4633                          }
4631 4634                  }
4632 4635          }
4633 4636          if (error == 0)
4634 4637                  vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4635 4638  
4636 4639          if (in_crit_src)
4637 4640                  nbl_end_crit(srcvp);
4638 4641          if (srcvp)
4639 4642                  VN_RELE(srcvp);
4640 4643          if (in_crit_targ)
4641 4644                  nbl_end_crit(targvp);
4642 4645          if (targvp)
4643 4646                  VN_RELE(targvp);
4644 4647  
4645 4648          if (unlinked) {
4646 4649                  ASSERT(fp != NULL);
4647 4650                  ASSERT(tvp != NULL);
4648 4651  
4649 4652                  /* DEBUG data */
4650 4653                  if (RW_READ_HELD(&tvp->v_nbllock)) {
4651 4654                          cmn_err(CE_WARN, "rfs4_op_rename: "
4652 4655                              "RW_READ_HELD(%p)", (void *)tvp);
4653 4656                  }
4654 4657  
4655 4658                  /* The file is gone and so should the state */
4656 4659                  rfs4_close_all_state(fp);
4657 4660                  VN_RELE(tvp);
4658 4661          }
4659 4662  
4660 4663          if (sfp) {
4661 4664                  rfs4_clear_dont_grant(sfp);
4662 4665                  rfs4_file_rele(sfp);
4663 4666          }
4664 4667          if (fp) {
4665 4668                  rfs4_clear_dont_grant(fp);
4666 4669                  rfs4_file_rele(fp);
4667 4670          }
4668 4671  
4669 4672          if (converted_onm != onm)
4670 4673                  kmem_free(converted_onm, MAXPATHLEN + 1);
4671 4674          kmem_free(onm, olen);
4672 4675          if (converted_nnm != nnm)
4673 4676                  kmem_free(converted_nnm, MAXPATHLEN + 1);
4674 4677          kmem_free(nnm, nlen);
4675 4678  
4676 4679          /*
4677 4680           * Get the initial "after" sequence number, if it fails, set to zero
4678 4681           */
4679 4682          oidva.va_mask = AT_SEQ;
4680 4683          if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4681 4684                  oidva.va_seq = 0;
4682 4685  
4683 4686          nidva.va_mask = AT_SEQ;
4684 4687          if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4685 4688                  nidva.va_seq = 0;
4686 4689  
4687 4690          /*
4688 4691           * Force modified data and metadata out to stable storage.
4689 4692           */
4690 4693          (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4691 4694          (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4692 4695  
4693 4696          if (error) {
4694 4697                  *cs->statusp = resp->status = puterrno4(error);
4695 4698                  goto out;
4696 4699          }
4697 4700  
4698 4701          /*
4699 4702           * Get "after" change values, if it fails, simply return the
4700 4703           * before value.
4701 4704           */
4702 4705          oadva.va_mask = AT_CTIME|AT_SEQ;
4703 4706          if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4704 4707                  oadva.va_ctime = obdva.va_ctime;
4705 4708                  oadva.va_seq = 0;
4706 4709          }
4707 4710  
4708 4711          nadva.va_mask = AT_CTIME|AT_SEQ;
4709 4712          if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4710 4713                  nadva.va_ctime = nbdva.va_ctime;
4711 4714                  nadva.va_seq = 0;
4712 4715          }
4713 4716  
4714 4717          NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4715 4718          NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4716 4719  
4717 4720          /*
4718 4721           * The cinfo.atomic = TRUE only if we have
4719 4722           * non-zero va_seq's, and it has incremented by exactly one
4720 4723           * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4721 4724           */
4722 4725          if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4723 4726              oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4724 4727                  resp->source_cinfo.atomic = TRUE;
4725 4728          else
4726 4729                  resp->source_cinfo.atomic = FALSE;
4727 4730  
4728 4731          if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4729 4732              nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4730 4733                  resp->target_cinfo.atomic = TRUE;
4731 4734          else
4732 4735                  resp->target_cinfo.atomic = FALSE;
4733 4736  
4734 4737  #ifdef  VOLATILE_FH_TEST
4735 4738          {
4736 4739          extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4737 4740  
4738 4741          /*
4739 4742           * Add the renamed file handle to the volatile rename list
4740 4743           */
4741 4744          if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4742 4745                  /* file handles may expire on rename */
4743 4746                  vnode_t *vp;
4744 4747  
4745 4748                  nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4746 4749                  /*
4747 4750                   * Already know that nnm will be a valid string
4748 4751                   */
4749 4752                  error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4750 4753                      NULL, NULL, NULL);
4751 4754                  kmem_free(nnm, nlen);
4752 4755                  if (!error) {
4753 4756                          add_volrnm_fh(cs->exi, vp);
4754 4757                          VN_RELE(vp);
4755 4758                  }
4756 4759          }
4757 4760          }
4758 4761  #endif  /* VOLATILE_FH_TEST */
4759 4762  
4760 4763          *cs->statusp = resp->status = NFS4_OK;
4761 4764  out:
4762 4765          DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4763 4766              RENAME4res *, resp);
4764 4767          return;
4765 4768  
4766 4769  err_out:
4767 4770          if (onm != converted_onm)
4768 4771                  kmem_free(converted_onm, MAXPATHLEN + 1);
4769 4772          if (onm != NULL)
4770 4773                  kmem_free(onm, olen);
4771 4774          if (nnm != converted_nnm)
4772 4775                  kmem_free(converted_nnm, MAXPATHLEN + 1);
4773 4776          if (nnm != NULL)
4774 4777                  kmem_free(nnm, nlen);
4775 4778  
4776 4779          if (in_crit_src) nbl_end_crit(srcvp);
4777 4780          if (in_crit_targ) nbl_end_crit(targvp);
4778 4781          if (targvp) VN_RELE(targvp);
4779 4782          if (srcvp) VN_RELE(srcvp);
4780 4783          if (sfp) {
4781 4784                  if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4782 4785                  rfs4_file_rele(sfp);
4783 4786          }
4784 4787          if (fp) {
4785 4788                  if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4786 4789                  rfs4_file_rele(fp);
4787 4790          }
4788 4791  
4789 4792          DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4790 4793              RENAME4res *, resp);
4791 4794  }
4792 4795  
4793 4796  /* ARGSUSED */
4794 4797  static void
4795 4798  rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4796 4799      struct compound_state *cs)
4797 4800  {
4798 4801          RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4799 4802          RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4800 4803          rfs4_client_t *cp;
4801 4804  
4802 4805          DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4803 4806              RENEW4args *, args);
4804 4807  
4805 4808          if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4806 4809                  *cs->statusp = resp->status =
4807 4810                      rfs4_check_clientid(&args->clientid, 0);
4808 4811                  goto out;
4809 4812          }
4810 4813  
4811 4814          if (rfs4_lease_expired(cp)) {
4812 4815                  rfs4_client_rele(cp);
4813 4816                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4814 4817                  goto out;
4815 4818          }
4816 4819  
4817 4820          rfs4_update_lease(cp);
4818 4821  
4819 4822          mutex_enter(cp->rc_cbinfo.cb_lock);
4820 4823          if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4821 4824                  cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4822 4825                  *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4823 4826          } else {
4824 4827                  *cs->statusp = resp->status = NFS4_OK;
4825 4828          }
4826 4829          mutex_exit(cp->rc_cbinfo.cb_lock);
4827 4830  
4828 4831          rfs4_client_rele(cp);
4829 4832  
4830 4833  out:
4831 4834          DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4832 4835              RENEW4res *, resp);
4833 4836  }
4834 4837  
4835 4838  /* ARGSUSED */
4836 4839  static void
4837 4840  rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4838 4841      struct compound_state *cs)
4839 4842  {
4840 4843          RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4841 4844  
4842 4845          DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4843 4846  
4844 4847          /* No need to check cs->access - we are not accessing any object */
4845 4848          if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4846 4849                  *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4847 4850                  goto out;
4848 4851          }
4849 4852          if (cs->vp != NULL) {
4850 4853                  VN_RELE(cs->vp);
4851 4854          }
4852 4855          cs->vp = cs->saved_vp;
4853 4856          cs->saved_vp = NULL;
4854 4857          cs->exi = cs->saved_exi;
4855 4858          nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4856 4859          *cs->statusp = resp->status = NFS4_OK;
4857 4860          cs->deleg = FALSE;
4858 4861  
4859 4862  out:
4860 4863          DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4861 4864              RESTOREFH4res *, resp);
4862 4865  }
4863 4866  
4864 4867  /* ARGSUSED */
4865 4868  static void
4866 4869  rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4867 4870      struct compound_state *cs)
4868 4871  {
4869 4872          SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4870 4873  
4871 4874          DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4872 4875  
4873 4876          /* No need to check cs->access - we are not accessing any object */
4874 4877          if (cs->vp == NULL) {
4875 4878                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4876 4879                  goto out;
4877 4880          }
4878 4881          if (cs->saved_vp != NULL) {
4879 4882                  VN_RELE(cs->saved_vp);
4880 4883          }
4881 4884          cs->saved_vp = cs->vp;
4882 4885          VN_HOLD(cs->saved_vp);
4883 4886          cs->saved_exi = cs->exi;
4884 4887          /*
4885 4888           * since SAVEFH is fairly rare, don't alloc space for its fh
4886 4889           * unless necessary.
4887 4890           */
4888 4891          if (cs->saved_fh.nfs_fh4_val == NULL) {
4889 4892                  cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4890 4893          }
4891 4894          nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4892 4895          *cs->statusp = resp->status = NFS4_OK;
4893 4896  
4894 4897  out:
4895 4898          DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4896 4899              SAVEFH4res *, resp);
4897 4900  }
4898 4901  
4899 4902  /*
4900 4903   * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4901 4904   * return the bitmap of attrs that were set successfully. It is also
4902 4905   * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4903 4906   * always be called only after rfs4_do_set_attrs().
4904 4907   *
4905 4908   * Verify that the attributes are same as the expected ones. sargp->vap
4906 4909   * and sargp->sbp contain the input attributes as translated from fattr4.
4907 4910   *
4908 4911   * This function verifies only the attrs that correspond to a vattr or
4909 4912   * vfsstat struct. That is because of the extra step needed to get the
4910 4913   * corresponding system structs. Other attributes have already been set or
4911 4914   * verified by do_rfs4_set_attrs.
4912 4915   *
4913 4916   * Return 0 if all attrs match, -1 if some don't, error if error processing.
4914 4917   */
4915 4918  static int
4916 4919  rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4917 4920      bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4918 4921  {
4919 4922          int error, ret_error = 0;
4920 4923          int i, k;
4921 4924          uint_t sva_mask = sargp->vap->va_mask;
4922 4925          uint_t vbit;
4923 4926          union nfs4_attr_u *na;
4924 4927          uint8_t *amap;
4925 4928          bool_t getsb = ntovp->vfsstat;
4926 4929  
4927 4930          if (sva_mask != 0) {
4928 4931                  /*
4929 4932                   * Okay to overwrite sargp->vap because we verify based
4930 4933                   * on the incoming values.
4931 4934                   */
4932 4935                  ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4933 4936                      sargp->cs->cr, NULL);
4934 4937                  if (ret_error) {
4935 4938                          if (resp == NULL)
4936 4939                                  return (ret_error);
4937 4940                          /*
4938 4941                           * Must return bitmap of successful attrs
4939 4942                           */
4940 4943                          sva_mask = 0;   /* to prevent checking vap later */
4941 4944                  } else {
4942 4945                          /*
4943 4946                           * Some file systems clobber va_mask. it is probably
4944 4947                           * wrong of them to do so, nonethless we practice
4945 4948                           * defensive coding.
4946 4949                           * See bug id 4276830.
4947 4950                           */
4948 4951                          sargp->vap->va_mask = sva_mask;
4949 4952                  }
4950 4953          }
4951 4954  
4952 4955          if (getsb) {
4953 4956                  /*
4954 4957                   * Now get the superblock and loop on the bitmap, as there is
4955 4958                   * no simple way of translating from superblock to bitmap4.
4956 4959                   */
4957 4960                  ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4958 4961                  if (ret_error) {
4959 4962                          if (resp == NULL)
4960 4963                                  goto errout;
4961 4964                          getsb = FALSE;
4962 4965                  }
4963 4966          }
4964 4967  
4965 4968          /*
4966 4969           * Now loop and verify each attribute which getattr returned
4967 4970           * whether it's the same as the input.
4968 4971           */
4969 4972          if (resp == NULL && !getsb && (sva_mask == 0))
4970 4973                  goto errout;
4971 4974  
4972 4975          na = ntovp->na;
4973 4976          amap = ntovp->amap;
4974 4977          k = 0;
4975 4978          for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4976 4979                  k = *amap;
4977 4980                  ASSERT(nfs4_ntov_map[k].nval == k);
4978 4981                  vbit = nfs4_ntov_map[k].vbit;
4979 4982  
4980 4983                  /*
4981 4984                   * If vattr attribute but VOP_GETATTR failed, or it's
4982 4985                   * superblock attribute but VFS_STATVFS failed, skip
4983 4986                   */
4984 4987                  if (vbit) {
4985 4988                          if ((vbit & sva_mask) == 0)
4986 4989                                  continue;
4987 4990                  } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4988 4991                          continue;
4989 4992                  }
4990 4993                  error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4991 4994                  if (resp != NULL) {
4992 4995                          if (error)
4993 4996                                  ret_error = -1; /* not all match */
4994 4997                          else    /* update response bitmap */
4995 4998                                  *resp |= nfs4_ntov_map[k].fbit;
4996 4999                          continue;
4997 5000                  }
4998 5001                  if (error) {
4999 5002                          ret_error = -1; /* not all match */
5000 5003                          break;
5001 5004                  }
5002 5005          }
5003 5006  errout:
5004 5007          return (ret_error);
5005 5008  }
5006 5009  
5007 5010  /*
5008 5011   * Decode the attribute to be set/verified. If the attr requires a sys op
5009 5012   * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5010 5013   * call the sv_getit function for it, because the sys op hasn't yet been done.
5011 5014   * Return 0 for success, error code if failed.
5012 5015   *
5013 5016   * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5014 5017   */
5015 5018  static int
5016 5019  decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5017 5020      int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5018 5021  {
5019 5022          int error = 0;
5020 5023          bool_t set_later;
5021 5024  
5022 5025          sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5023 5026  
5024 5027          if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5025 5028                  set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5026 5029                  /*
5027 5030                   * don't verify yet if a vattr or sb dependent attr,
5028 5031                   * because we don't have their sys values yet.
5029 5032                   * Will be done later.
5030 5033                   */
5031 5034                  if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5032 5035                          /*
5033 5036                           * ACLs are a special case, since setting the MODE
5034 5037                           * conflicts with setting the ACL.  We delay setting
5035 5038                           * the ACL until all other attributes have been set.
5036 5039                           * The ACL gets set in do_rfs4_op_setattr().
5037 5040                           */
5038 5041                          if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5039 5042                                  error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5040 5043                                      sargp, nap);
5041 5044                                  if (error) {
5042 5045                                          xdr_free(nfs4_ntov_map[k].xfunc,
5043 5046                                              (caddr_t)nap);
5044 5047                                  }
5045 5048                          }
5046 5049                  }
5047 5050          } else {
5048 5051  #ifdef  DEBUG
5049 5052                  cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5050 5053                      "decoding attribute %d\n", k);
5051 5054  #endif
5052 5055                  error = EINVAL;
5053 5056          }
5054 5057          if (!error && resp_bval && !set_later) {
5055 5058                  *resp_bval |= nfs4_ntov_map[k].fbit;
5056 5059          }
5057 5060  
5058 5061          return (error);
5059 5062  }
5060 5063  
5061 5064  /*
5062 5065   * Set vattr based on incoming fattr4 attrs - used by setattr.
5063 5066   * Set response mask. Ignore any values that are not writable vattr attrs.
5064 5067   */
5065 5068  static nfsstat4
5066 5069  do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5067 5070      struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5068 5071      nfs4_attr_cmd_t cmd)
5069 5072  {
5070 5073          int error = 0;
5071 5074          int i;
5072 5075          char *attrs = fattrp->attrlist4;
5073 5076          uint32_t attrslen = fattrp->attrlist4_len;
5074 5077          XDR xdr;
5075 5078          nfsstat4 status = NFS4_OK;
5076 5079          vnode_t *vp = cs->vp;
5077 5080          union nfs4_attr_u *na;
5078 5081          uint8_t *amap;
5079 5082  
5080 5083  #ifndef lint
5081 5084          /*
5082 5085           * Make sure that maximum attribute number can be expressed as an
5083 5086           * 8 bit quantity.
5084 5087           */
5085 5088          ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5086 5089  #endif
5087 5090  
5088 5091          if (vp == NULL) {
5089 5092                  if (resp)
5090 5093                          *resp = 0;
5091 5094                  return (NFS4ERR_NOFILEHANDLE);
5092 5095          }
5093 5096          if (cs->access == CS_ACCESS_DENIED) {
5094 5097                  if (resp)
5095 5098                          *resp = 0;
5096 5099                  return (NFS4ERR_ACCESS);
5097 5100          }
5098 5101  
5099 5102          sargp->op = cmd;
5100 5103          sargp->cs = cs;
5101 5104          sargp->flag = 0;        /* may be set later */
5102 5105          sargp->vap->va_mask = 0;
5103 5106          sargp->rdattr_error = NFS4_OK;
5104 5107          sargp->rdattr_error_req = FALSE;
5105 5108          /* sargp->sbp is set by the caller */
5106 5109  
5107 5110          xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5108 5111  
5109 5112          na = ntovp->na;
5110 5113          amap = ntovp->amap;
5111 5114  
5112 5115          /*
5113 5116           * The following loop iterates on the nfs4_ntov_map checking
5114 5117           * if the fbit is set in the requested bitmap.
5115 5118           * If set then we process the arguments using the
5116 5119           * rfs4_fattr4 conversion functions to populate the setattr
5117 5120           * vattr and va_mask. Any settable attrs that are not using vattr
5118 5121           * will be set in this loop.
5119 5122           */
5120 5123          for (i = 0; i < nfs4_ntov_map_size; i++) {
5121 5124                  if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5122 5125                          continue;
5123 5126                  }
5124 5127                  /*
5125 5128                   * If setattr, must be a writable attr.
5126 5129                   * If verify/nverify, must be a readable attr.
5127 5130                   */
5128 5131                  if ((error = (*nfs4_ntov_map[i].sv_getit)(
5129 5132                      NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5130 5133                          /*
5131 5134                           * Client tries to set/verify an
5132 5135                           * unsupported attribute, tries to set
5133 5136                           * a read only attr or verify a write
5134 5137                           * only one - error!
5135 5138                           */
5136 5139                          break;
5137 5140                  }
5138 5141                  /*
5139 5142                   * Decode the attribute to set/verify
5140 5143                   */
5141 5144                  error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5142 5145                      &xdr, resp ? resp : NULL, na);
5143 5146                  if (error)
5144 5147                          break;
5145 5148                  *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5146 5149                  na++;
5147 5150                  (ntovp->attrcnt)++;
5148 5151                  if (nfs4_ntov_map[i].vfsstat)
5149 5152                          ntovp->vfsstat = TRUE;
5150 5153          }
5151 5154  
5152 5155          if (error != 0)
5153 5156                  status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5154 5157                      puterrno4(error));
5155 5158          /* xdrmem_destroy(&xdrs); */    /* NO-OP */
5156 5159          return (status);
5157 5160  }
5158 5161  
5159 5162  static nfsstat4
5160 5163  do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5161 5164      stateid4 *stateid)
5162 5165  {
5163 5166          int error = 0;
5164 5167          struct nfs4_svgetit_arg sarg;
5165 5168          bool_t trunc;
5166 5169  
5167 5170          nfsstat4 status = NFS4_OK;
5168 5171          cred_t *cr = cs->cr;
5169 5172          vnode_t *vp = cs->vp;
5170 5173          struct nfs4_ntov_table ntov;
5171 5174          struct statvfs64 sb;
5172 5175          struct vattr bva;
5173 5176          struct flock64 bf;
5174 5177          int in_crit = 0;
5175 5178          uint_t saved_mask = 0;
5176 5179          caller_context_t ct;
5177 5180  
5178 5181          *resp = 0;
5179 5182          sarg.sbp = &sb;
5180 5183          sarg.is_referral = B_FALSE;
5181 5184          nfs4_ntov_table_init(&ntov);
5182 5185          status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5183 5186              NFS4ATTR_SETIT);
5184 5187          if (status != NFS4_OK) {
5185 5188                  /*
5186 5189                   * failed set attrs
5187 5190                   */
5188 5191                  goto done;
5189 5192          }
5190 5193          if ((sarg.vap->va_mask == 0) &&
5191 5194              (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5192 5195                  /*
5193 5196                   * no further work to be done
5194 5197                   */
5195 5198                  goto done;
5196 5199          }
5197 5200  
5198 5201          /*
5199 5202           * If we got a request to set the ACL and the MODE, only
5200 5203           * allow changing VSUID, VSGID, and VSVTX.  Attempting
5201 5204           * to change any other bits, along with setting an ACL,
5202 5205           * gives NFS4ERR_INVAL.
5203 5206           */
5204 5207          if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5205 5208              (fattrp->attrmask & FATTR4_MODE_MASK)) {
5206 5209                  vattr_t va;
5207 5210  
5208 5211                  va.va_mask = AT_MODE;
5209 5212                  error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5210 5213                  if (error) {
5211 5214                          status = puterrno4(error);
5212 5215                          goto done;
5213 5216                  }
5214 5217                  if ((sarg.vap->va_mode ^ va.va_mode) &
5215 5218                      ~(VSUID | VSGID | VSVTX)) {
5216 5219                          status = NFS4ERR_INVAL;
5217 5220                          goto done;
5218 5221                  }
5219 5222          }
5220 5223  
5221 5224          /* Check stateid only if size has been set */
5222 5225          if (sarg.vap->va_mask & AT_SIZE) {
5223 5226                  trunc = (sarg.vap->va_size == 0);
5224 5227                  status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5225 5228                      trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5226 5229                  if (status != NFS4_OK)
5227 5230                          goto done;
5228 5231          } else {
5229 5232                  ct.cc_sysid = 0;
5230 5233                  ct.cc_pid = 0;
5231 5234                  ct.cc_caller_id = nfs4_srv_caller_id;
5232 5235                  ct.cc_flags = CC_DONTBLOCK;
5233 5236          }
5234 5237  
5235 5238          /* XXX start of possible race with delegations */
5236 5239  
5237 5240          /*
5238 5241           * We need to specially handle size changes because it is
5239 5242           * possible for the client to create a file with read-only
5240 5243           * modes, but with the file opened for writing. If the client
5241 5244           * then tries to set the file size, e.g. ftruncate(3C),
5242 5245           * fcntl(F_FREESP), the normal access checking done in
5243 5246           * VOP_SETATTR would prevent the client from doing it even though
5244 5247           * it should be allowed to do so.  To get around this, we do the
5245 5248           * access checking for ourselves and use VOP_SPACE which doesn't
5246 5249           * do the access checking.
5247 5250           * Also the client should not be allowed to change the file
5248 5251           * size if there is a conflicting non-blocking mandatory lock in
5249 5252           * the region of the change.
5250 5253           */
5251 5254          if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5252 5255                  u_offset_t offset;
5253 5256                  ssize_t length;
5254 5257  
5255 5258                  /*
5256 5259                   * ufs_setattr clears AT_SIZE from vap->va_mask, but
5257 5260                   * before returning, sarg.vap->va_mask is used to
5258 5261                   * generate the setattr reply bitmap.  We also clear
5259 5262                   * AT_SIZE below before calling VOP_SPACE.  For both
5260 5263                   * of these cases, the va_mask needs to be saved here
5261 5264                   * and restored after calling VOP_SETATTR.
5262 5265                   */
5263 5266                  saved_mask = sarg.vap->va_mask;
5264 5267  
5265 5268                  /*
5266 5269                   * Check any possible conflict due to NBMAND locks.
5267 5270                   * Get into critical region before VOP_GETATTR, so the
5268 5271                   * size attribute is valid when checking conflicts.
5269 5272                   */
5270 5273                  if (nbl_need_check(vp)) {
5271 5274                          nbl_start_crit(vp, RW_READER);
5272 5275                          in_crit = 1;
5273 5276                  }
5274 5277  
5275 5278                  bva.va_mask = AT_UID|AT_SIZE;
5276 5279                  if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5277 5280                          status = puterrno4(error);
5278 5281                          goto done;
5279 5282                  }
5280 5283  
5281 5284                  if (in_crit) {
5282 5285                          if (sarg.vap->va_size < bva.va_size) {
5283 5286                                  offset = sarg.vap->va_size;
5284 5287                                  length = bva.va_size - sarg.vap->va_size;
5285 5288                          } else {
5286 5289                                  offset = bva.va_size;
5287 5290                                  length = sarg.vap->va_size - bva.va_size;
5288 5291                          }
5289 5292                          if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5290 5293                              &ct)) {
5291 5294                                  status = NFS4ERR_LOCKED;
5292 5295                                  goto done;
5293 5296                          }
5294 5297                  }
5295 5298  
5296 5299                  if (crgetuid(cr) == bva.va_uid) {
5297 5300                          sarg.vap->va_mask &= ~AT_SIZE;
5298 5301                          bf.l_type = F_WRLCK;
5299 5302                          bf.l_whence = 0;
5300 5303                          bf.l_start = (off64_t)sarg.vap->va_size;
5301 5304                          bf.l_len = 0;
5302 5305                          bf.l_sysid = 0;
5303 5306                          bf.l_pid = 0;
5304 5307                          error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5305 5308                              (offset_t)sarg.vap->va_size, cr, &ct);
5306 5309                  }
5307 5310          }
5308 5311  
5309 5312          if (!error && sarg.vap->va_mask != 0)
5310 5313                  error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5311 5314  
5312 5315          /* restore va_mask -- ufs_setattr clears AT_SIZE */
5313 5316          if (saved_mask & AT_SIZE)
5314 5317                  sarg.vap->va_mask |= AT_SIZE;
5315 5318  
5316 5319          /*
5317 5320           * If an ACL was being set, it has been delayed until now,
5318 5321           * in order to set the mode (via the VOP_SETATTR() above) first.
5319 5322           */
5320 5323          if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5321 5324                  int i;
5322 5325  
5323 5326                  for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5324 5327                          if (ntov.amap[i] == FATTR4_ACL)
5325 5328                                  break;
5326 5329                  if (i < NFS4_MAXNUM_ATTRS) {
5327 5330                          error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5328 5331                              NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5329 5332                          if (error == 0) {
5330 5333                                  *resp |= FATTR4_ACL_MASK;
5331 5334                          } else if (error == ENOTSUP) {
5332 5335                                  (void) rfs4_verify_attr(&sarg, resp, &ntov);
5333 5336                                  status = NFS4ERR_ATTRNOTSUPP;
5334 5337                                  goto done;
5335 5338                          }
5336 5339                  } else {
5337 5340                          NFS4_DEBUG(rfs4_debug,
5338 5341                              (CE_NOTE, "do_rfs4_op_setattr: "
5339 5342                              "unable to find ACL in fattr4"));
5340 5343                          error = EINVAL;
5341 5344                  }
5342 5345          }
5343 5346  
5344 5347          if (error) {
5345 5348                  /* check if a monitor detected a delegation conflict */
5346 5349                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5347 5350                          status = NFS4ERR_DELAY;
5348 5351                  else
5349 5352                          status = puterrno4(error);
5350 5353  
5351 5354                  /*
5352 5355                   * Set the response bitmap when setattr failed.
5353 5356                   * If VOP_SETATTR partially succeeded, test by doing a
5354 5357                   * VOP_GETATTR on the object and comparing the data
5355 5358                   * to the setattr arguments.
5356 5359                   */
5357 5360                  (void) rfs4_verify_attr(&sarg, resp, &ntov);
5358 5361          } else {
5359 5362                  /*
5360 5363                   * Force modified metadata out to stable storage.
5361 5364                   */
5362 5365                  (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5363 5366                  /*
5364 5367                   * Set response bitmap
5365 5368                   */
5366 5369                  nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5367 5370          }
5368 5371  
5369 5372  /* Return early and already have a NFSv4 error */
5370 5373  done:
5371 5374          /*
5372 5375           * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5373 5376           * conversion sets both readable and writeable NFS4 attrs
5374 5377           * for AT_MTIME and AT_ATIME.  The line below masks out
5375 5378           * unrequested attrs from the setattr result bitmap.  This
5376 5379           * is placed after the done: label to catch the ATTRNOTSUP
5377 5380           * case.
5378 5381           */
5379 5382          *resp &= fattrp->attrmask;
5380 5383  
5381 5384          if (in_crit)
5382 5385                  nbl_end_crit(vp);
5383 5386  
5384 5387          nfs4_ntov_table_free(&ntov, &sarg);
5385 5388  
5386 5389          return (status);
5387 5390  }
5388 5391  
5389 5392  /* ARGSUSED */
5390 5393  static void
5391 5394  rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5392 5395      struct compound_state *cs)
5393 5396  {
5394 5397          SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5395 5398          SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5396 5399          bslabel_t *clabel;
5397 5400  
5398 5401          DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5399 5402              SETATTR4args *, args);
5400 5403  
5401 5404          if (cs->vp == NULL) {
5402 5405                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5403 5406                  goto out;
5404 5407          }
5405 5408  
5406 5409          /*
5407 5410           * If there is an unshared filesystem mounted on this vnode,
5408 5411           * do not allow to setattr on this vnode.
5409 5412           */
5410 5413          if (vn_ismntpt(cs->vp)) {
5411 5414                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5412 5415                  goto out;
5413 5416          }
5414 5417  
5415 5418          resp->attrsset = 0;
5416 5419  
5417 5420          if (rdonly4(req, cs)) {
5418 5421                  *cs->statusp = resp->status = NFS4ERR_ROFS;
5419 5422                  goto out;
5420 5423          }
5421 5424  
5422 5425          /* check label before setting attributes */
5423 5426          if (is_system_labeled()) {
5424 5427                  ASSERT(req->rq_label != NULL);
5425 5428                  clabel = req->rq_label;
5426 5429                  DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5427 5430                      "got client label from request(1)",
5428 5431                      struct svc_req *, req);
5429 5432                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
5430 5433                          if (!do_rfs_label_check(clabel, cs->vp,
5431 5434                              EQUALITY_CHECK, cs->exi)) {
5432 5435                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5433 5436                                  goto out;
5434 5437                          }
5435 5438                  }
5436 5439          }
5437 5440  
5438 5441          *cs->statusp = resp->status =
5439 5442              do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5440 5443              &args->stateid);
5441 5444  
5442 5445  out:
5443 5446          DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5444 5447              SETATTR4res *, resp);
5445 5448  }
5446 5449  
5447 5450  /* ARGSUSED */
5448 5451  static void
5449 5452  rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5450 5453      struct compound_state *cs)
5451 5454  {
5452 5455          /*
5453 5456           * verify and nverify are exactly the same, except that nverify
5454 5457           * succeeds when some argument changed, and verify succeeds when
5455 5458           * when none changed.
5456 5459           */
5457 5460  
5458 5461          VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5459 5462          VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5460 5463  
5461 5464          int error;
5462 5465          struct nfs4_svgetit_arg sarg;
5463 5466          struct statvfs64 sb;
5464 5467          struct nfs4_ntov_table ntov;
5465 5468  
5466 5469          DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5467 5470              VERIFY4args *, args);
5468 5471  
5469 5472          if (cs->vp == NULL) {
5470 5473                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5471 5474                  goto out;
5472 5475          }
5473 5476  
5474 5477          sarg.sbp = &sb;
5475 5478          sarg.is_referral = B_FALSE;
5476 5479          nfs4_ntov_table_init(&ntov);
5477 5480          resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5478 5481              &sarg, &ntov, NFS4ATTR_VERIT);
5479 5482          if (resp->status != NFS4_OK) {
5480 5483                  /*
5481 5484                   * do_rfs4_set_attrs will try to verify systemwide attrs,
5482 5485                   * so could return -1 for "no match".
5483 5486                   */
5484 5487                  if (resp->status == -1)
5485 5488                          resp->status = NFS4ERR_NOT_SAME;
5486 5489                  goto done;
5487 5490          }
5488 5491          error = rfs4_verify_attr(&sarg, NULL, &ntov);
5489 5492          switch (error) {
5490 5493          case 0:
5491 5494                  resp->status = NFS4_OK;
5492 5495                  break;
5493 5496          case -1:
5494 5497                  resp->status = NFS4ERR_NOT_SAME;
5495 5498                  break;
5496 5499          default:
5497 5500                  resp->status = puterrno4(error);
5498 5501                  break;
5499 5502          }
5500 5503  done:
5501 5504          *cs->statusp = resp->status;
5502 5505          nfs4_ntov_table_free(&ntov, &sarg);
5503 5506  out:
5504 5507          DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5505 5508              VERIFY4res *, resp);
5506 5509  }
5507 5510  
5508 5511  /* ARGSUSED */
5509 5512  static void
5510 5513  rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5511 5514      struct compound_state *cs)
5512 5515  {
5513 5516          /*
5514 5517           * verify and nverify are exactly the same, except that nverify
5515 5518           * succeeds when some argument changed, and verify succeeds when
5516 5519           * when none changed.
5517 5520           */
5518 5521  
5519 5522          NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5520 5523          NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5521 5524  
5522 5525          int error;
5523 5526          struct nfs4_svgetit_arg sarg;
5524 5527          struct statvfs64 sb;
5525 5528          struct nfs4_ntov_table ntov;
5526 5529  
5527 5530          DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5528 5531              NVERIFY4args *, args);
5529 5532  
5530 5533          if (cs->vp == NULL) {
5531 5534                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5532 5535                  DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5533 5536                      NVERIFY4res *, resp);
5534 5537                  return;
5535 5538          }
5536 5539          sarg.sbp = &sb;
5537 5540          sarg.is_referral = B_FALSE;
5538 5541          nfs4_ntov_table_init(&ntov);
5539 5542          resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5540 5543              &sarg, &ntov, NFS4ATTR_VERIT);
5541 5544          if (resp->status != NFS4_OK) {
5542 5545                  /*
5543 5546                   * do_rfs4_set_attrs will try to verify systemwide attrs,
5544 5547                   * so could return -1 for "no match".
5545 5548                   */
5546 5549                  if (resp->status == -1)
5547 5550                          resp->status = NFS4_OK;
5548 5551                  goto done;
5549 5552          }
5550 5553          error = rfs4_verify_attr(&sarg, NULL, &ntov);
5551 5554          switch (error) {
5552 5555          case 0:
5553 5556                  resp->status = NFS4ERR_SAME;
5554 5557                  break;
5555 5558          case -1:
5556 5559                  resp->status = NFS4_OK;
5557 5560                  break;
5558 5561          default:
5559 5562                  resp->status = puterrno4(error);
5560 5563                  break;
5561 5564          }
5562 5565  done:
5563 5566          *cs->statusp = resp->status;
5564 5567          nfs4_ntov_table_free(&ntov, &sarg);
5565 5568  
5566 5569          DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5567 5570              NVERIFY4res *, resp);
5568 5571  }
5569 5572  
5570 5573  /*
5571 5574   * XXX - This should live in an NFS header file.
5572 5575   */
5573 5576  #define MAX_IOVECS      12
5574 5577  
5575 5578  /* ARGSUSED */
5576 5579  static void
5577 5580  rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5578 5581      struct compound_state *cs)
5579 5582  {
5580 5583          WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5581 5584          WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5582 5585          int error;
5583 5586          vnode_t *vp;
5584 5587          struct vattr bva;
5585 5588          u_offset_t rlimit;
5586 5589          struct uio uio;
5587 5590          struct iovec iov[MAX_IOVECS];
5588 5591          struct iovec *iovp;
5589 5592          int iovcnt;
5590 5593          int ioflag;
5591 5594          cred_t *savecred, *cr;
5592 5595          bool_t *deleg = &cs->deleg;
5593 5596          nfsstat4 stat;
5594 5597          int in_crit = 0;
5595 5598          caller_context_t ct;
5596 5599          nfs4_srv_t *nsrv4;
5597 5600  
5598 5601          DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5599 5602              WRITE4args *, args);
5600 5603  
5601 5604          vp = cs->vp;
5602 5605          if (vp == NULL) {
5603 5606                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5604 5607                  goto out;
5605 5608          }
5606 5609          if (cs->access == CS_ACCESS_DENIED) {
5607 5610                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5608 5611                  goto out;
5609 5612          }
5610 5613  
5611 5614          cr = cs->cr;
5612 5615  
5613 5616          if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5614 5617              deleg, TRUE, &ct)) != NFS4_OK) {
5615 5618                  *cs->statusp = resp->status = stat;
5616 5619                  goto out;
5617 5620          }
5618 5621  
5619 5622          /*
5620 5623           * We have to enter the critical region before calling VOP_RWLOCK
5621 5624           * to avoid a deadlock with ufs.
5622 5625           */
5623 5626          if (nbl_need_check(vp)) {
5624 5627                  nbl_start_crit(vp, RW_READER);
5625 5628                  in_crit = 1;
5626 5629                  if (nbl_conflict(vp, NBL_WRITE,
5627 5630                      args->offset, args->data_len, 0, &ct)) {
5628 5631                          *cs->statusp = resp->status = NFS4ERR_LOCKED;
5629 5632                          goto out;
5630 5633                  }
5631 5634          }
5632 5635  
5633 5636          bva.va_mask = AT_MODE | AT_UID;
5634 5637          error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5635 5638  
5636 5639          /*
5637 5640           * If we can't get the attributes, then we can't do the
5638 5641           * right access checking.  So, we'll fail the request.
5639 5642           */
5640 5643          if (error) {
5641 5644                  *cs->statusp = resp->status = puterrno4(error);
5642 5645                  goto out;
5643 5646          }
5644 5647  
5645 5648          if (rdonly4(req, cs)) {
5646 5649                  *cs->statusp = resp->status = NFS4ERR_ROFS;
5647 5650                  goto out;
5648 5651          }
5649 5652  
5650 5653          if (vp->v_type != VREG) {
5651 5654                  *cs->statusp = resp->status =
5652 5655                      ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5653 5656                  goto out;
5654 5657          }
5655 5658  
5656 5659          if (crgetuid(cr) != bva.va_uid &&
  
    | 
      ↓ open down ↓ | 
    4126 lines elided | 
    
      ↑ open up ↑ | 
  
5657 5660              (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5658 5661                  *cs->statusp = resp->status = puterrno4(error);
5659 5662                  goto out;
5660 5663          }
5661 5664  
5662 5665          if (MANDLOCK(vp, bva.va_mode)) {
5663 5666                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5664 5667                  goto out;
5665 5668          }
5666 5669  
5667      -        nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
     5670 +        nsrv4 = nfs4_get_srv();
5668 5671          if (args->data_len == 0) {
5669 5672                  *cs->statusp = resp->status = NFS4_OK;
5670 5673                  resp->count = 0;
5671 5674                  resp->committed = args->stable;
5672 5675                  resp->writeverf = nsrv4->write4verf;
5673 5676                  goto out;
5674 5677          }
5675 5678  
5676 5679          if (args->mblk != NULL) {
5677 5680                  mblk_t *m;
5678 5681                  uint_t bytes, round_len;
5679 5682  
5680 5683                  iovcnt = 0;
5681 5684                  bytes = 0;
5682 5685                  round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5683 5686                  for (m = args->mblk;
5684 5687                      m != NULL && bytes < round_len;
5685 5688                      m = m->b_cont) {
5686 5689                          iovcnt++;
5687 5690                          bytes += MBLKL(m);
5688 5691                  }
5689 5692  #ifdef DEBUG
5690 5693                  /* should have ended on an mblk boundary */
5691 5694                  if (bytes != round_len) {
5692 5695                          printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5693 5696                              bytes, round_len, args->data_len);
5694 5697                          printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5695 5698                              (void *)args->mblk, (void *)m);
5696 5699                          ASSERT(bytes == round_len);
5697 5700                  }
5698 5701  #endif
5699 5702                  if (iovcnt <= MAX_IOVECS) {
5700 5703                          iovp = iov;
5701 5704                  } else {
5702 5705                          iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5703 5706                  }
5704 5707                  mblk_to_iov(args->mblk, iovcnt, iovp);
5705 5708          } else if (args->rlist != NULL) {
5706 5709                  iovcnt = 1;
5707 5710                  iovp = iov;
5708 5711                  iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5709 5712                  iovp->iov_len = args->data_len;
5710 5713          } else {
5711 5714                  iovcnt = 1;
5712 5715                  iovp = iov;
5713 5716                  iovp->iov_base = args->data_val;
5714 5717                  iovp->iov_len = args->data_len;
5715 5718          }
5716 5719  
5717 5720          uio.uio_iov = iovp;
5718 5721          uio.uio_iovcnt = iovcnt;
5719 5722  
5720 5723          uio.uio_segflg = UIO_SYSSPACE;
5721 5724          uio.uio_extflg = UIO_COPY_DEFAULT;
5722 5725          uio.uio_loffset = args->offset;
5723 5726          uio.uio_resid = args->data_len;
5724 5727          uio.uio_llimit = curproc->p_fsz_ctl;
5725 5728          rlimit = uio.uio_llimit - args->offset;
5726 5729          if (rlimit < (u_offset_t)uio.uio_resid)
5727 5730                  uio.uio_resid = (int)rlimit;
5728 5731  
5729 5732          if (args->stable == UNSTABLE4)
5730 5733                  ioflag = 0;
5731 5734          else if (args->stable == FILE_SYNC4)
5732 5735                  ioflag = FSYNC;
5733 5736          else if (args->stable == DATA_SYNC4)
5734 5737                  ioflag = FDSYNC;
5735 5738          else {
5736 5739                  if (iovp != iov)
5737 5740                          kmem_free(iovp, sizeof (*iovp) * iovcnt);
5738 5741                  *cs->statusp = resp->status = NFS4ERR_INVAL;
5739 5742                  goto out;
5740 5743          }
5741 5744  
5742 5745          /*
5743 5746           * We're changing creds because VM may fault and we need
5744 5747           * the cred of the current thread to be used if quota
5745 5748           * checking is enabled.
5746 5749           */
5747 5750          savecred = curthread->t_cred;
5748 5751          curthread->t_cred = cr;
5749 5752          error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5750 5753          curthread->t_cred = savecred;
5751 5754  
5752 5755          if (iovp != iov)
5753 5756                  kmem_free(iovp, sizeof (*iovp) * iovcnt);
5754 5757  
5755 5758          if (error) {
5756 5759                  *cs->statusp = resp->status = puterrno4(error);
5757 5760                  goto out;
5758 5761          }
5759 5762  
5760 5763          *cs->statusp = resp->status = NFS4_OK;
5761 5764          resp->count = args->data_len - uio.uio_resid;
5762 5765  
5763 5766          if (ioflag == 0)
5764 5767                  resp->committed = UNSTABLE4;
5765 5768          else
5766 5769                  resp->committed = FILE_SYNC4;
5767 5770  
5768 5771          resp->writeverf = nsrv4->write4verf;
5769 5772  
5770 5773  out:
5771 5774          if (in_crit)
5772 5775                  nbl_end_crit(vp);
5773 5776  
5774 5777          DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5775 5778              WRITE4res *, resp);
5776 5779  }
5777 5780  
5778 5781  
5779 5782  /* XXX put in a header file */
5780 5783  extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5781 5784  
5782 5785  void
5783 5786  rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5784 5787      struct svc_req *req, cred_t *cr, int *rv)
5785 5788  {
5786 5789          uint_t i;
5787 5790          struct compound_state cs;
5788 5791          nfs4_srv_t *nsrv4;
5789 5792          nfs_export_t *ne = nfs_get_export();
5790 5793  
5791 5794          if (rv != NULL)
5792 5795                  *rv = 0;
5793 5796          rfs4_init_compound_state(&cs);
5794 5797          /*
5795 5798           * Form a reply tag by copying over the reqeuest tag.
5796 5799           */
5797 5800          resp->tag.utf8string_val =
5798 5801              kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5799 5802          resp->tag.utf8string_len = args->tag.utf8string_len;
5800 5803          bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5801 5804              resp->tag.utf8string_len);
5802 5805  
5803 5806          cs.statusp = &resp->status;
5804 5807          cs.req = req;
5805 5808          resp->array = NULL;
5806 5809          resp->array_len = 0;
5807 5810  
5808 5811          /*
5809 5812           * XXX for now, minorversion should be zero
5810 5813           */
5811 5814          if (args->minorversion != NFS4_MINORVERSION) {
5812 5815                  DTRACE_NFSV4_2(compound__start, struct compound_state *,
5813 5816                      &cs, COMPOUND4args *, args);
5814 5817                  resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5815 5818                  DTRACE_NFSV4_2(compound__done, struct compound_state *,
5816 5819                      &cs, COMPOUND4res *, resp);
5817 5820                  return;
5818 5821          }
5819 5822  
5820 5823          if (args->array_len == 0) {
5821 5824                  resp->status = NFS4_OK;
5822 5825                  return;
5823 5826          }
5824 5827  
5825 5828          ASSERT(exi == NULL);
5826 5829          ASSERT(cr == NULL);
5827 5830  
5828 5831          cr = crget();
5829 5832          ASSERT(cr != NULL);
5830 5833  
5831 5834          if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5832 5835                  DTRACE_NFSV4_2(compound__start, struct compound_state *,
5833 5836                      &cs, COMPOUND4args *, args);
5834 5837                  crfree(cr);
5835 5838                  DTRACE_NFSV4_2(compound__done, struct compound_state *,
5836 5839                      &cs, COMPOUND4res *, resp);
  
    | 
      ↓ open down ↓ | 
    159 lines elided | 
    
      ↑ open up ↑ | 
  
5837 5840                  svcerr_badcred(req->rq_xprt);
5838 5841                  if (rv != NULL)
5839 5842                          *rv = 1;
5840 5843                  return;
5841 5844          }
5842 5845          resp->array_len = args->array_len;
5843 5846          resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5844 5847              KM_SLEEP);
5845 5848  
5846 5849          cs.basecr = cr;
5847      -        nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
     5850 +        nsrv4 = nfs4_get_srv();
5848 5851  
5849 5852          DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5850 5853              COMPOUND4args *, args);
5851 5854  
5852 5855          /*
5853 5856           * For now, NFS4 compound processing must be protected by
5854 5857           * exported_lock because it can access more than one exportinfo
5855 5858           * per compound and share/unshare can now change multiple
5856 5859           * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5857 5860           * per proc (excluding public exinfo), and exi_count design
5858 5861           * is sufficient to protect concurrent execution of NFS2/3
5859 5862           * ops along with unexport.  This lock will be removed as
5860 5863           * part of the NFSv4 phase 2 namespace redesign work.
5861 5864           */
5862 5865          rw_enter(&ne->exported_lock, RW_READER);
5863 5866  
5864 5867          /*
5865 5868           * If this is the first compound we've seen, we need to start all
5866 5869           * new instances' grace periods.
5867 5870           */
5868 5871          if (nsrv4->seen_first_compound == 0) {
5869 5872                  rfs4_grace_start_new(nsrv4);
5870 5873                  /*
5871 5874                   * This must be set after rfs4_grace_start_new(), otherwise
5872 5875                   * another thread could proceed past here before the former
5873 5876                   * is finished.
5874 5877                   */
5875 5878                  nsrv4->seen_first_compound = 1;
5876 5879          }
5877 5880  
5878 5881          for (i = 0; i < args->array_len && cs.cont; i++) {
5879 5882                  nfs_argop4 *argop;
5880 5883                  nfs_resop4 *resop;
5881 5884                  uint_t op;
5882 5885  
5883 5886                  argop = &args->array[i];
5884 5887                  resop = &resp->array[i];
5885 5888                  resop->resop = argop->argop;
5886 5889                  op = (uint_t)resop->resop;
5887 5890  
5888 5891                  if (op < rfsv4disp_cnt) {
5889 5892                          /*
5890 5893                           * Count the individual ops here; NULL and COMPOUND
5891 5894                           * are counted in common_dispatch()
5892 5895                           */
5893 5896                          rfsproccnt_v4_ptr[op].value.ui64++;
5894 5897  
5895 5898                          NFS4_DEBUG(rfs4_debug > 1,
5896 5899                              (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5897 5900                          (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5898 5901                          NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5899 5902                              rfs4_op_string[op], *cs.statusp));
5900 5903                          if (*cs.statusp != NFS4_OK)
5901 5904                                  cs.cont = FALSE;
5902 5905                  } else {
5903 5906                          /*
5904 5907                           * This is effectively dead code since XDR code
5905 5908                           * will have already returned BADXDR if op doesn't
5906 5909                           * decode to legal value.  This only done for a
5907 5910                           * day when XDR code doesn't verify v4 opcodes.
5908 5911                           */
5909 5912                          op = OP_ILLEGAL;
5910 5913                          rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5911 5914  
5912 5915                          rfs4_op_illegal(argop, resop, req, &cs);
5913 5916                          cs.cont = FALSE;
5914 5917                  }
5915 5918  
5916 5919                  /*
5917 5920                   * If not at last op, and if we are to stop, then
5918 5921                   * compact the results array.
5919 5922                   */
5920 5923                  if ((i + 1) < args->array_len && !cs.cont) {
5921 5924                          nfs_resop4 *new_res = kmem_alloc(
5922 5925                              (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5923 5926                          bcopy(resp->array,
5924 5927                              new_res, (i+1) * sizeof (nfs_resop4));
5925 5928                          kmem_free(resp->array,
5926 5929                              args->array_len * sizeof (nfs_resop4));
5927 5930  
5928 5931                          resp->array_len =  i + 1;
5929 5932                          resp->array = new_res;
5930 5933                  }
5931 5934          }
5932 5935  
5933 5936          rw_exit(&ne->exported_lock);
5934 5937  
5935 5938          /*
5936 5939           * clear exportinfo and vnode fields from compound_state before dtrace
5937 5940           * probe, to avoid tracing residual values for path and share path.
5938 5941           */
5939 5942          if (cs.vp)
5940 5943                  VN_RELE(cs.vp);
5941 5944          if (cs.saved_vp)
5942 5945                  VN_RELE(cs.saved_vp);
5943 5946          cs.exi = cs.saved_exi = NULL;
5944 5947          cs.vp = cs.saved_vp = NULL;
5945 5948  
5946 5949          DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5947 5950              COMPOUND4res *, resp);
5948 5951  
5949 5952          if (cs.saved_fh.nfs_fh4_val)
5950 5953                  kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5951 5954  
5952 5955          if (cs.basecr)
5953 5956                  crfree(cs.basecr);
5954 5957          if (cs.cr)
5955 5958                  crfree(cs.cr);
5956 5959          /*
5957 5960           * done with this compound request, free the label
5958 5961           */
5959 5962  
5960 5963          if (req->rq_label != NULL) {
5961 5964                  kmem_free(req->rq_label, sizeof (bslabel_t));
5962 5965                  req->rq_label = NULL;
5963 5966          }
5964 5967  }
5965 5968  
5966 5969  /*
5967 5970   * XXX because of what appears to be duplicate calls to rfs4_compound_free
5968 5971   * XXX zero out the tag and array values. Need to investigate why the
5969 5972   * XXX calls occur, but at least prevent the panic for now.
5970 5973   */
5971 5974  void
5972 5975  rfs4_compound_free(COMPOUND4res *resp)
5973 5976  {
5974 5977          uint_t i;
5975 5978  
5976 5979          if (resp->tag.utf8string_val) {
5977 5980                  UTF8STRING_FREE(resp->tag)
5978 5981          }
5979 5982  
5980 5983          for (i = 0; i < resp->array_len; i++) {
5981 5984                  nfs_resop4 *resop;
5982 5985                  uint_t op;
5983 5986  
5984 5987                  resop = &resp->array[i];
5985 5988                  op = (uint_t)resop->resop;
5986 5989                  if (op < rfsv4disp_cnt) {
5987 5990                          (*rfsv4disptab[op].dis_resfree)(resop);
5988 5991                  }
5989 5992          }
5990 5993          if (resp->array != NULL) {
5991 5994                  kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5992 5995          }
5993 5996  }
5994 5997  
5995 5998  /*
5996 5999   * Process the value of the compound request rpc flags, as a bit-AND
5997 6000   * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5998 6001   */
5999 6002  void
6000 6003  rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6001 6004  {
6002 6005          int i;
6003 6006          int flag = RPC_ALL;
6004 6007  
6005 6008          for (i = 0; flag && i < args->array_len; i++) {
6006 6009                  uint_t op;
6007 6010  
6008 6011                  op = (uint_t)args->array[i].argop;
6009 6012  
6010 6013                  if (op < rfsv4disp_cnt)
6011 6014                          flag &= rfsv4disptab[op].dis_flags;
6012 6015                  else
6013 6016                          flag = 0;
6014 6017          }
6015 6018          *flagp = flag;
6016 6019  }
6017 6020  
6018 6021  nfsstat4
6019 6022  rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6020 6023  {
6021 6024          nfsstat4 e;
6022 6025  
6023 6026          rfs4_dbe_lock(cp->rc_dbe);
6024 6027  
6025 6028          if (cp->rc_sysidt != LM_NOSYSID) {
6026 6029                  *sp = cp->rc_sysidt;
6027 6030                  e = NFS4_OK;
6028 6031  
6029 6032          } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6030 6033                  *sp = cp->rc_sysidt;
6031 6034                  e = NFS4_OK;
6032 6035  
6033 6036                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6034 6037                      "rfs4_client_sysid: allocated 0x%x\n", *sp));
6035 6038          } else
6036 6039                  e = NFS4ERR_DELAY;
6037 6040  
6038 6041          rfs4_dbe_unlock(cp->rc_dbe);
6039 6042          return (e);
6040 6043  }
6041 6044  
6042 6045  #if defined(DEBUG) && ! defined(lint)
6043 6046  static void lock_print(char *str, int operation, struct flock64 *flk)
6044 6047  {
6045 6048          char *op, *type;
6046 6049  
6047 6050          switch (operation) {
6048 6051          case F_GETLK: op = "F_GETLK";
6049 6052                  break;
6050 6053          case F_SETLK: op = "F_SETLK";
6051 6054                  break;
6052 6055          case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6053 6056                  break;
6054 6057          default: op = "F_UNKNOWN";
6055 6058                  break;
6056 6059          }
6057 6060          switch (flk->l_type) {
6058 6061          case F_UNLCK: type = "F_UNLCK";
6059 6062                  break;
6060 6063          case F_RDLCK: type = "F_RDLCK";
6061 6064                  break;
6062 6065          case F_WRLCK: type = "F_WRLCK";
6063 6066                  break;
6064 6067          default: type = "F_UNKNOWN";
6065 6068                  break;
6066 6069          }
6067 6070  
6068 6071          ASSERT(flk->l_whence == 0);
6069 6072          cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6070 6073              str, op, type, (longlong_t)flk->l_start,
6071 6074              flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6072 6075  }
6073 6076  
6074 6077  #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6075 6078  #else
6076 6079  #define LOCK_PRINT(d, s, t, f)
6077 6080  #endif
6078 6081  
6079 6082  /*ARGSUSED*/
6080 6083  static bool_t
6081 6084  creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6082 6085  {
6083 6086          return (TRUE);
6084 6087  }
6085 6088  
6086 6089  /*
6087 6090   * Look up the pathname using the vp in cs as the directory vnode.
6088 6091   * cs->vp will be the vnode for the file on success
6089 6092   */
6090 6093  
6091 6094  static nfsstat4
6092 6095  rfs4_lookup(component4 *component, struct svc_req *req,
6093 6096      struct compound_state *cs)
6094 6097  {
6095 6098          char *nm;
6096 6099          uint32_t len;
6097 6100          nfsstat4 status;
6098 6101          struct sockaddr *ca;
6099 6102          char *name;
6100 6103  
6101 6104          if (cs->vp == NULL) {
6102 6105                  return (NFS4ERR_NOFILEHANDLE);
6103 6106          }
6104 6107          if (cs->vp->v_type != VDIR) {
6105 6108                  return (NFS4ERR_NOTDIR);
6106 6109          }
6107 6110  
6108 6111          status = utf8_dir_verify(component);
6109 6112          if (status != NFS4_OK)
6110 6113                  return (status);
6111 6114  
6112 6115          nm = utf8_to_fn(component, &len, NULL);
6113 6116          if (nm == NULL) {
6114 6117                  return (NFS4ERR_INVAL);
6115 6118          }
6116 6119  
6117 6120          if (len > MAXNAMELEN) {
6118 6121                  kmem_free(nm, len);
6119 6122                  return (NFS4ERR_NAMETOOLONG);
6120 6123          }
6121 6124  
6122 6125          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6123 6126          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6124 6127              MAXPATHLEN + 1);
6125 6128  
6126 6129          if (name == NULL) {
6127 6130                  kmem_free(nm, len);
6128 6131                  return (NFS4ERR_INVAL);
6129 6132          }
6130 6133  
6131 6134          status = do_rfs4_op_lookup(name, req, cs);
6132 6135  
6133 6136          if (name != nm)
6134 6137                  kmem_free(name, MAXPATHLEN + 1);
6135 6138  
6136 6139          kmem_free(nm, len);
6137 6140  
6138 6141          return (status);
6139 6142  }
6140 6143  
6141 6144  static nfsstat4
6142 6145  rfs4_lookupfile(component4 *component, struct svc_req *req,
6143 6146      struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6144 6147  {
6145 6148          nfsstat4 status;
6146 6149          vnode_t *dvp = cs->vp;
6147 6150          vattr_t bva, ava, fva;
6148 6151          int error;
6149 6152  
6150 6153          /* Get "before" change value */
6151 6154          bva.va_mask = AT_CTIME|AT_SEQ;
6152 6155          error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6153 6156          if (error)
6154 6157                  return (puterrno4(error));
6155 6158  
6156 6159          /* rfs4_lookup may VN_RELE directory */
6157 6160          VN_HOLD(dvp);
6158 6161  
6159 6162          status = rfs4_lookup(component, req, cs);
6160 6163          if (status != NFS4_OK) {
6161 6164                  VN_RELE(dvp);
6162 6165                  return (status);
6163 6166          }
6164 6167  
6165 6168          /*
6166 6169           * Get "after" change value, if it fails, simply return the
6167 6170           * before value.
6168 6171           */
6169 6172          ava.va_mask = AT_CTIME|AT_SEQ;
6170 6173          if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6171 6174                  ava.va_ctime = bva.va_ctime;
6172 6175                  ava.va_seq = 0;
6173 6176          }
6174 6177          VN_RELE(dvp);
6175 6178  
6176 6179          /*
6177 6180           * Validate the file is a file
6178 6181           */
6179 6182          fva.va_mask = AT_TYPE|AT_MODE;
6180 6183          error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6181 6184          if (error)
6182 6185                  return (puterrno4(error));
6183 6186  
6184 6187          if (fva.va_type != VREG) {
6185 6188                  if (fva.va_type == VDIR)
6186 6189                          return (NFS4ERR_ISDIR);
6187 6190                  if (fva.va_type == VLNK)
6188 6191                          return (NFS4ERR_SYMLINK);
6189 6192                  return (NFS4ERR_INVAL);
6190 6193          }
6191 6194  
6192 6195          NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6193 6196          NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6194 6197  
6195 6198          /*
6196 6199           * It is undefined if VOP_LOOKUP will change va_seq, so
6197 6200           * cinfo.atomic = TRUE only if we have
6198 6201           * non-zero va_seq's, and they have not changed.
6199 6202           */
6200 6203          if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6201 6204                  cinfo->atomic = TRUE;
6202 6205          else
6203 6206                  cinfo->atomic = FALSE;
6204 6207  
6205 6208          /* Check for mandatory locking */
6206 6209          cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6207 6210          return (check_open_access(access, cs, req));
6208 6211  }
6209 6212  
6210 6213  static nfsstat4
6211 6214  create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6212 6215      cred_t *cr, vnode_t **vpp, bool_t *created)
6213 6216  {
6214 6217          int error;
6215 6218          nfsstat4 status = NFS4_OK;
6216 6219          vattr_t va;
6217 6220  
6218 6221  tryagain:
6219 6222  
6220 6223          /*
6221 6224           * The file open mode used is VWRITE.  If the client needs
6222 6225           * some other semantic, then it should do the access checking
6223 6226           * itself.  It would have been nice to have the file open mode
6224 6227           * passed as part of the arguments.
6225 6228           */
6226 6229  
6227 6230          *created = TRUE;
6228 6231          error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6229 6232  
6230 6233          if (error) {
6231 6234                  *created = FALSE;
6232 6235  
6233 6236                  /*
6234 6237                   * If we got something other than file already exists
6235 6238                   * then just return this error.  Otherwise, we got
6236 6239                   * EEXIST.  If we were doing a GUARDED create, then
6237 6240                   * just return this error.  Otherwise, we need to
6238 6241                   * make sure that this wasn't a duplicate of an
6239 6242                   * exclusive create request.
6240 6243                   *
6241 6244                   * The assumption is made that a non-exclusive create
6242 6245                   * request will never return EEXIST.
6243 6246                   */
6244 6247  
6245 6248                  if (error != EEXIST || mode == GUARDED4) {
6246 6249                          status = puterrno4(error);
6247 6250                          return (status);
6248 6251                  }
6249 6252                  error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6250 6253                      NULL, NULL, NULL);
6251 6254  
6252 6255                  if (error) {
6253 6256                          /*
6254 6257                           * We couldn't find the file that we thought that
6255 6258                           * we just created.  So, we'll just try creating
6256 6259                           * it again.
6257 6260                           */
6258 6261                          if (error == ENOENT)
6259 6262                                  goto tryagain;
6260 6263  
6261 6264                          status = puterrno4(error);
6262 6265                          return (status);
6263 6266                  }
6264 6267  
6265 6268                  if (mode == UNCHECKED4) {
6266 6269                          /* existing object must be regular file */
6267 6270                          if ((*vpp)->v_type != VREG) {
6268 6271                                  if ((*vpp)->v_type == VDIR)
6269 6272                                          status = NFS4ERR_ISDIR;
6270 6273                                  else if ((*vpp)->v_type == VLNK)
6271 6274                                          status = NFS4ERR_SYMLINK;
6272 6275                                  else
6273 6276                                          status = NFS4ERR_INVAL;
6274 6277                                  VN_RELE(*vpp);
6275 6278                                  return (status);
6276 6279                          }
6277 6280  
6278 6281                          return (NFS4_OK);
6279 6282                  }
6280 6283  
6281 6284                  /* Check for duplicate request */
6282 6285                  va.va_mask = AT_MTIME;
6283 6286                  error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6284 6287                  if (!error) {
6285 6288                          /* We found the file */
6286 6289                          const timestruc_t *mtime = &vap->va_mtime;
6287 6290  
6288 6291                          if (va.va_mtime.tv_sec != mtime->tv_sec ||
6289 6292                              va.va_mtime.tv_nsec != mtime->tv_nsec) {
6290 6293                                  /* but its not our creation */
6291 6294                                  VN_RELE(*vpp);
6292 6295                                  return (NFS4ERR_EXIST);
6293 6296                          }
6294 6297                          *created = TRUE; /* retrans of create == created */
6295 6298                          return (NFS4_OK);
6296 6299                  }
6297 6300                  VN_RELE(*vpp);
6298 6301                  return (NFS4ERR_EXIST);
6299 6302          }
6300 6303  
6301 6304          return (NFS4_OK);
6302 6305  }
6303 6306  
6304 6307  static nfsstat4
6305 6308  check_open_access(uint32_t access, struct compound_state *cs,
6306 6309      struct svc_req *req)
6307 6310  {
6308 6311          int error;
6309 6312          vnode_t *vp;
6310 6313          bool_t readonly;
6311 6314          cred_t *cr = cs->cr;
6312 6315  
6313 6316          /* For now we don't allow mandatory locking as per V2/V3 */
6314 6317          if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6315 6318                  return (NFS4ERR_ACCESS);
6316 6319          }
6317 6320  
6318 6321          vp = cs->vp;
6319 6322          ASSERT(cr != NULL && vp->v_type == VREG);
6320 6323  
6321 6324          /*
6322 6325           * If the file system is exported read only and we are trying
6323 6326           * to open for write, then return NFS4ERR_ROFS
6324 6327           */
6325 6328  
6326 6329          readonly = rdonly4(req, cs);
6327 6330  
6328 6331          if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6329 6332                  return (NFS4ERR_ROFS);
6330 6333  
6331 6334          if (access & OPEN4_SHARE_ACCESS_READ) {
6332 6335                  if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6333 6336                      (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6334 6337                          return (NFS4ERR_ACCESS);
6335 6338                  }
6336 6339          }
6337 6340  
6338 6341          if (access & OPEN4_SHARE_ACCESS_WRITE) {
6339 6342                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6340 6343                  if (error)
6341 6344                          return (NFS4ERR_ACCESS);
6342 6345          }
6343 6346  
6344 6347          return (NFS4_OK);
6345 6348  }
6346 6349  
6347 6350  static nfsstat4
6348 6351  rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6349 6352      change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6350 6353  {
6351 6354          struct nfs4_svgetit_arg sarg;
6352 6355          struct nfs4_ntov_table ntov;
6353 6356  
6354 6357          bool_t ntov_table_init = FALSE;
6355 6358          struct statvfs64 sb;
6356 6359          nfsstat4 status;
6357 6360          vnode_t *vp;
6358 6361          vattr_t bva, ava, iva, cva, *vap;
6359 6362          vnode_t *dvp;
6360 6363          timespec32_t *mtime;
6361 6364          char *nm = NULL;
6362 6365          uint_t buflen;
6363 6366          bool_t created;
6364 6367          bool_t setsize = FALSE;
6365 6368          len_t reqsize;
6366 6369          int error;
6367 6370          bool_t trunc;
6368 6371          caller_context_t ct;
6369 6372          component4 *component;
6370 6373          bslabel_t *clabel;
6371 6374          struct sockaddr *ca;
6372 6375          char *name = NULL;
6373 6376  
6374 6377          sarg.sbp = &sb;
6375 6378          sarg.is_referral = B_FALSE;
6376 6379  
6377 6380          dvp = cs->vp;
6378 6381  
6379 6382          /* Check if the file system is read only */
6380 6383          if (rdonly4(req, cs))
6381 6384                  return (NFS4ERR_ROFS);
6382 6385  
6383 6386          /* check the label of including directory */
6384 6387          if (is_system_labeled()) {
6385 6388                  ASSERT(req->rq_label != NULL);
6386 6389                  clabel = req->rq_label;
6387 6390                  DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6388 6391                      "got client label from request(1)",
6389 6392                      struct svc_req *, req);
6390 6393                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
6391 6394                          if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6392 6395                              cs->exi)) {
6393 6396                                  return (NFS4ERR_ACCESS);
6394 6397                          }
6395 6398                  }
6396 6399          }
6397 6400  
6398 6401          /*
6399 6402           * Get the last component of path name in nm. cs will reference
6400 6403           * the including directory on success.
6401 6404           */
6402 6405          component = &args->open_claim4_u.file;
6403 6406          status = utf8_dir_verify(component);
6404 6407          if (status != NFS4_OK)
6405 6408                  return (status);
6406 6409  
6407 6410          nm = utf8_to_fn(component, &buflen, NULL);
6408 6411  
6409 6412          if (nm == NULL)
6410 6413                  return (NFS4ERR_RESOURCE);
6411 6414  
6412 6415          if (buflen > MAXNAMELEN) {
6413 6416                  kmem_free(nm, buflen);
6414 6417                  return (NFS4ERR_NAMETOOLONG);
6415 6418          }
6416 6419  
6417 6420          bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6418 6421          error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6419 6422          if (error) {
6420 6423                  kmem_free(nm, buflen);
6421 6424                  return (puterrno4(error));
6422 6425          }
6423 6426  
6424 6427          if (bva.va_type != VDIR) {
6425 6428                  kmem_free(nm, buflen);
6426 6429                  return (NFS4ERR_NOTDIR);
6427 6430          }
6428 6431  
6429 6432          NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6430 6433  
6431 6434          switch (args->mode) {
6432 6435          case GUARDED4:
6433 6436                  /*FALLTHROUGH*/
6434 6437          case UNCHECKED4:
6435 6438                  nfs4_ntov_table_init(&ntov);
6436 6439                  ntov_table_init = TRUE;
6437 6440  
6438 6441                  *attrset = 0;
6439 6442                  status = do_rfs4_set_attrs(attrset,
6440 6443                      &args->createhow4_u.createattrs,
6441 6444                      cs, &sarg, &ntov, NFS4ATTR_SETIT);
6442 6445  
6443 6446                  if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6444 6447                      sarg.vap->va_type != VREG) {
6445 6448                          if (sarg.vap->va_type == VDIR)
6446 6449                                  status = NFS4ERR_ISDIR;
6447 6450                          else if (sarg.vap->va_type == VLNK)
6448 6451                                  status = NFS4ERR_SYMLINK;
6449 6452                          else
6450 6453                                  status = NFS4ERR_INVAL;
6451 6454                  }
6452 6455  
6453 6456                  if (status != NFS4_OK) {
6454 6457                          kmem_free(nm, buflen);
6455 6458                          nfs4_ntov_table_free(&ntov, &sarg);
6456 6459                          *attrset = 0;
6457 6460                          return (status);
6458 6461                  }
6459 6462  
6460 6463                  vap = sarg.vap;
6461 6464                  vap->va_type = VREG;
6462 6465                  vap->va_mask |= AT_TYPE;
6463 6466  
6464 6467                  if ((vap->va_mask & AT_MODE) == 0) {
6465 6468                          vap->va_mask |= AT_MODE;
6466 6469                          vap->va_mode = (mode_t)0600;
6467 6470                  }
6468 6471  
6469 6472                  if (vap->va_mask & AT_SIZE) {
6470 6473  
6471 6474                          /* Disallow create with a non-zero size */
6472 6475  
6473 6476                          if ((reqsize = sarg.vap->va_size) != 0) {
6474 6477                                  kmem_free(nm, buflen);
6475 6478                                  nfs4_ntov_table_free(&ntov, &sarg);
6476 6479                                  *attrset = 0;
6477 6480                                  return (NFS4ERR_INVAL);
6478 6481                          }
6479 6482                          setsize = TRUE;
6480 6483                  }
6481 6484                  break;
6482 6485  
6483 6486          case EXCLUSIVE4:
6484 6487                  /* prohibit EXCL create of named attributes */
6485 6488                  if (dvp->v_flag & V_XATTRDIR) {
6486 6489                          kmem_free(nm, buflen);
6487 6490                          *attrset = 0;
6488 6491                          return (NFS4ERR_INVAL);
6489 6492                  }
6490 6493  
6491 6494                  cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6492 6495                  cva.va_type = VREG;
6493 6496                  /*
6494 6497                   * Ensure no time overflows. Assumes underlying
6495 6498                   * filesystem supports at least 32 bits.
6496 6499                   * Truncate nsec to usec resolution to allow valid
6497 6500                   * compares even if the underlying filesystem truncates.
6498 6501                   */
6499 6502                  mtime = (timespec32_t *)&args->createhow4_u.createverf;
6500 6503                  cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6501 6504                  cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6502 6505                  cva.va_mode = (mode_t)0;
6503 6506                  vap = &cva;
6504 6507  
6505 6508                  /*
6506 6509                   * For EXCL create, attrset is set to the server attr
6507 6510                   * used to cache the client's verifier.
6508 6511                   */
6509 6512                  *attrset = FATTR4_TIME_MODIFY_MASK;
6510 6513                  break;
6511 6514          }
6512 6515  
6513 6516          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6514 6517          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6515 6518              MAXPATHLEN  + 1);
6516 6519  
6517 6520          if (name == NULL) {
6518 6521                  kmem_free(nm, buflen);
6519 6522                  return (NFS4ERR_SERVERFAULT);
6520 6523          }
6521 6524  
6522 6525          status = create_vnode(dvp, name, vap, args->mode,
6523 6526              cs->cr, &vp, &created);
6524 6527          if (nm != name)
6525 6528                  kmem_free(name, MAXPATHLEN + 1);
6526 6529          kmem_free(nm, buflen);
6527 6530  
6528 6531          if (status != NFS4_OK) {
6529 6532                  if (ntov_table_init)
6530 6533                          nfs4_ntov_table_free(&ntov, &sarg);
6531 6534                  *attrset = 0;
6532 6535                  return (status);
6533 6536          }
6534 6537  
6535 6538          trunc = (setsize && !created);
6536 6539  
6537 6540          if (args->mode != EXCLUSIVE4) {
6538 6541                  bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6539 6542  
6540 6543                  /*
6541 6544                   * True verification that object was created with correct
6542 6545                   * attrs is impossible.  The attrs could have been changed
6543 6546                   * immediately after object creation.  If attributes did
6544 6547                   * not verify, the only recourse for the server is to
6545 6548                   * destroy the object.  Maybe if some attrs (like gid)
6546 6549                   * are set incorrectly, the object should be destroyed;
6547 6550                   * however, seems bad as a default policy.  Do we really
6548 6551                   * want to destroy an object over one of the times not
6549 6552                   * verifying correctly?  For these reasons, the server
6550 6553                   * currently sets bits in attrset for createattrs
6551 6554                   * that were set; however, no verification is done.
6552 6555                   *
6553 6556                   * vmask_to_nmask accounts for vattr bits set on create
6554 6557                   *      [do_rfs4_set_attrs() only sets resp bits for
6555 6558                   *       non-vattr/vfs bits.]
6556 6559                   * Mask off any bits we set by default so as not to return
6557 6560                   * more attrset bits than were requested in createattrs
6558 6561                   */
6559 6562                  if (created) {
6560 6563                          nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6561 6564                          *attrset &= createmask;
6562 6565                  } else {
6563 6566                          /*
6564 6567                           * We did not create the vnode (we tried but it
6565 6568                           * already existed).  In this case, the only createattr
6566 6569                           * that the spec allows the server to set is size,
6567 6570                           * and even then, it can only be set if it is 0.
6568 6571                           */
6569 6572                          *attrset = 0;
6570 6573                          if (trunc)
6571 6574                                  *attrset = FATTR4_SIZE_MASK;
6572 6575                  }
6573 6576          }
6574 6577          if (ntov_table_init)
6575 6578                  nfs4_ntov_table_free(&ntov, &sarg);
6576 6579  
6577 6580          /*
6578 6581           * Get the initial "after" sequence number, if it fails,
6579 6582           * set to zero, time to before.
6580 6583           */
6581 6584          iva.va_mask = AT_CTIME|AT_SEQ;
6582 6585          if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6583 6586                  iva.va_seq = 0;
6584 6587                  iva.va_ctime = bva.va_ctime;
6585 6588          }
6586 6589  
6587 6590          /*
6588 6591           * create_vnode attempts to create the file exclusive,
6589 6592           * if it already exists the VOP_CREATE will fail and
6590 6593           * may not increase va_seq. It is atomic if
6591 6594           * we haven't changed the directory, but if it has changed
6592 6595           * we don't know what changed it.
6593 6596           */
6594 6597          if (!created) {
6595 6598                  if (bva.va_seq && iva.va_seq &&
6596 6599                      bva.va_seq == iva.va_seq)
6597 6600                          cinfo->atomic = TRUE;
6598 6601                  else
6599 6602                          cinfo->atomic = FALSE;
6600 6603                  NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6601 6604          } else {
6602 6605                  /*
6603 6606                   * The entry was created, we need to sync the
6604 6607                   * directory metadata.
6605 6608                   */
6606 6609                  (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6607 6610  
6608 6611                  /*
6609 6612                   * Get "after" change value, if it fails, simply return the
6610 6613                   * before value.
6611 6614                   */
6612 6615                  ava.va_mask = AT_CTIME|AT_SEQ;
6613 6616                  if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6614 6617                          ava.va_ctime = bva.va_ctime;
6615 6618                          ava.va_seq = 0;
6616 6619                  }
6617 6620  
6618 6621                  NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6619 6622  
6620 6623                  /*
6621 6624                   * The cinfo->atomic = TRUE only if we have
6622 6625                   * non-zero va_seq's, and it has incremented by exactly one
6623 6626                   * during the create_vnode and it didn't
6624 6627                   * change during the VOP_FSYNC.
6625 6628                   */
6626 6629                  if (bva.va_seq && iva.va_seq && ava.va_seq &&
6627 6630                      iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6628 6631                          cinfo->atomic = TRUE;
6629 6632                  else
6630 6633                          cinfo->atomic = FALSE;
6631 6634          }
6632 6635  
6633 6636          /* Check for mandatory locking and that the size gets set. */
6634 6637          cva.va_mask = AT_MODE;
6635 6638          if (setsize)
6636 6639                  cva.va_mask |= AT_SIZE;
6637 6640  
6638 6641          /* Assume the worst */
6639 6642          cs->mandlock = TRUE;
6640 6643  
6641 6644          if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6642 6645                  cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6643 6646  
6644 6647                  /*
6645 6648                   * Truncate the file if necessary; this would be
6646 6649                   * the case for create over an existing file.
6647 6650                   */
6648 6651  
  
    | 
      ↓ open down ↓ | 
    791 lines elided | 
    
      ↑ open up ↑ | 
  
6649 6652                  if (trunc) {
6650 6653                          int in_crit = 0;
6651 6654                          rfs4_file_t *fp;
6652 6655                          nfs4_srv_t *nsrv4;
6653 6656                          bool_t create = FALSE;
6654 6657  
6655 6658                          /*
6656 6659                           * We are writing over an existing file.
6657 6660                           * Check to see if we need to recall a delegation.
6658 6661                           */
6659      -                        nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
     6662 +                        nsrv4 = nfs4_get_srv();
6660 6663                          rfs4_hold_deleg_policy(nsrv4);
6661 6664                          if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6662 6665                                  if (rfs4_check_delegated_byfp(FWRITE, fp,
6663 6666                                      (reqsize == 0), FALSE, FALSE, &clientid)) {
6664 6667                                          rfs4_file_rele(fp);
6665 6668                                          rfs4_rele_deleg_policy(nsrv4);
6666 6669                                          VN_RELE(vp);
6667 6670                                          *attrset = 0;
6668 6671                                          return (NFS4ERR_DELAY);
6669 6672                                  }
6670 6673                                  rfs4_file_rele(fp);
6671 6674                          }
6672 6675                          rfs4_rele_deleg_policy(nsrv4);
6673 6676  
6674 6677                          if (nbl_need_check(vp)) {
6675 6678                                  in_crit = 1;
6676 6679  
6677 6680                                  ASSERT(reqsize == 0);
6678 6681  
6679 6682                                  nbl_start_crit(vp, RW_READER);
6680 6683                                  if (nbl_conflict(vp, NBL_WRITE, 0,
6681 6684                                      cva.va_size, 0, NULL)) {
6682 6685                                          in_crit = 0;
6683 6686                                          nbl_end_crit(vp);
6684 6687                                          VN_RELE(vp);
6685 6688                                          *attrset = 0;
6686 6689                                          return (NFS4ERR_ACCESS);
6687 6690                                  }
6688 6691                          }
6689 6692                          ct.cc_sysid = 0;
6690 6693                          ct.cc_pid = 0;
6691 6694                          ct.cc_caller_id = nfs4_srv_caller_id;
6692 6695                          ct.cc_flags = CC_DONTBLOCK;
6693 6696  
6694 6697                          cva.va_mask = AT_SIZE;
6695 6698                          cva.va_size = reqsize;
6696 6699                          (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6697 6700                          if (in_crit)
6698 6701                                  nbl_end_crit(vp);
6699 6702                  }
6700 6703          }
6701 6704  
6702 6705          error = makefh4(&cs->fh, vp, cs->exi);
6703 6706  
6704 6707          /*
6705 6708           * Force modified data and metadata out to stable storage.
6706 6709           */
6707 6710          (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6708 6711  
6709 6712          if (error) {
6710 6713                  VN_RELE(vp);
6711 6714                  *attrset = 0;
6712 6715                  return (puterrno4(error));
6713 6716          }
6714 6717  
6715 6718          /* if parent dir is attrdir, set namedattr fh flag */
6716 6719          if (dvp->v_flag & V_XATTRDIR)
6717 6720                  set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6718 6721  
6719 6722          if (cs->vp)
6720 6723                  VN_RELE(cs->vp);
6721 6724  
6722 6725          cs->vp = vp;
6723 6726  
6724 6727          /*
6725 6728           * if we did not create the file, we will need to check
6726 6729           * the access bits on the file
6727 6730           */
6728 6731  
6729 6732          if (!created) {
6730 6733                  if (setsize)
6731 6734                          args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6732 6735                  status = check_open_access(args->share_access, cs, req);
6733 6736                  if (status != NFS4_OK)
6734 6737                          *attrset = 0;
6735 6738          }
6736 6739          return (status);
6737 6740  }
6738 6741  
6739 6742  /*ARGSUSED*/
6740 6743  static void
6741 6744  rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6742 6745      rfs4_openowner_t *oo, delegreq_t deleg,
6743 6746      uint32_t access, uint32_t deny,
6744 6747      OPEN4res *resp, int deleg_cur)
6745 6748  {
6746 6749          /* XXX Currently not using req  */
6747 6750          rfs4_state_t *sp;
6748 6751          rfs4_file_t *fp;
6749 6752          bool_t screate = TRUE;
6750 6753          bool_t fcreate = TRUE;
6751 6754          uint32_t open_a, share_a;
6752 6755          uint32_t open_d, share_d;
6753 6756          rfs4_deleg_state_t *dsp;
6754 6757          sysid_t sysid;
6755 6758          nfsstat4 status;
6756 6759          caller_context_t ct;
6757 6760          int fflags = 0;
6758 6761          int recall = 0;
6759 6762          int err;
6760 6763          int first_open;
6761 6764  
6762 6765          /* get the file struct and hold a lock on it during initial open */
6763 6766          fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6764 6767          if (fp == NULL) {
6765 6768                  resp->status = NFS4ERR_RESOURCE;
6766 6769                  DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6767 6770                  return;
6768 6771          }
6769 6772  
6770 6773          sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6771 6774          if (sp == NULL) {
6772 6775                  resp->status = NFS4ERR_RESOURCE;
6773 6776                  DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6774 6777                  /* No need to keep any reference */
6775 6778                  rw_exit(&fp->rf_file_rwlock);
6776 6779                  rfs4_file_rele(fp);
6777 6780                  return;
6778 6781          }
6779 6782  
6780 6783          /* try to get the sysid before continuing */
6781 6784          if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6782 6785                  resp->status = status;
6783 6786                  rfs4_file_rele(fp);
6784 6787                  /* Not a fully formed open; "close" it */
6785 6788                  if (screate == TRUE)
6786 6789                          rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6787 6790                  rfs4_state_rele(sp);
6788 6791                  return;
6789 6792          }
6790 6793  
6791 6794          /* Calculate the fflags for this OPEN. */
6792 6795          if (access & OPEN4_SHARE_ACCESS_READ)
6793 6796                  fflags |= FREAD;
6794 6797          if (access & OPEN4_SHARE_ACCESS_WRITE)
6795 6798                  fflags |= FWRITE;
6796 6799  
6797 6800          rfs4_dbe_lock(sp->rs_dbe);
6798 6801  
6799 6802          /*
6800 6803           * Calculate the new deny and access mode that this open is adding to
6801 6804           * the file for this open owner;
6802 6805           */
6803 6806          open_d = (deny & ~sp->rs_open_deny);
6804 6807          open_a = (access & ~sp->rs_open_access);
6805 6808  
6806 6809          /*
6807 6810           * Calculate the new share access and share deny modes that this open
6808 6811           * is adding to the file for this open owner;
6809 6812           */
6810 6813          share_a = (access & ~sp->rs_share_access);
6811 6814          share_d = (deny & ~sp->rs_share_deny);
6812 6815  
6813 6816          first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6814 6817  
6815 6818          /*
6816 6819           * Check to see the client has already sent an open for this
6817 6820           * open owner on this file with the same share/deny modes.
6818 6821           * If so, we don't need to check for a conflict and we don't
6819 6822           * need to add another shrlock.  If not, then we need to
6820 6823           * check for conflicts in deny and access before checking for
6821 6824           * conflicts in delegation.  We don't want to recall a
6822 6825           * delegation based on an open that will eventually fail based
6823 6826           * on shares modes.
6824 6827           */
6825 6828  
6826 6829          if (share_a || share_d) {
6827 6830                  if ((err = rfs4_share(sp, access, deny)) != 0) {
6828 6831                          rfs4_dbe_unlock(sp->rs_dbe);
6829 6832                          resp->status = err;
6830 6833  
6831 6834                          rfs4_file_rele(fp);
6832 6835                          /* Not a fully formed open; "close" it */
6833 6836                          if (screate == TRUE)
6834 6837                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6835 6838                          rfs4_state_rele(sp);
6836 6839                          return;
6837 6840                  }
6838 6841          }
6839 6842  
6840 6843          rfs4_dbe_lock(fp->rf_dbe);
6841 6844  
6842 6845          /*
6843 6846           * Check to see if this file is delegated and if so, if a
6844 6847           * recall needs to be done.
6845 6848           */
6846 6849          if (rfs4_check_recall(sp, access)) {
6847 6850                  rfs4_dbe_unlock(fp->rf_dbe);
6848 6851                  rfs4_dbe_unlock(sp->rs_dbe);
6849 6852                  rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6850 6853                  delay(NFS4_DELEGATION_CONFLICT_DELAY);
6851 6854                  rfs4_dbe_lock(sp->rs_dbe);
6852 6855  
6853 6856                  /* if state closed while lock was dropped */
6854 6857                  if (sp->rs_closed) {
6855 6858                          if (share_a || share_d)
6856 6859                                  (void) rfs4_unshare(sp);
6857 6860                          rfs4_dbe_unlock(sp->rs_dbe);
6858 6861                          rfs4_file_rele(fp);
6859 6862                          /* Not a fully formed open; "close" it */
6860 6863                          if (screate == TRUE)
6861 6864                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6862 6865                          rfs4_state_rele(sp);
6863 6866                          resp->status = NFS4ERR_OLD_STATEID;
6864 6867                          return;
6865 6868                  }
6866 6869  
6867 6870                  rfs4_dbe_lock(fp->rf_dbe);
6868 6871                  /* Let's see if the delegation was returned */
6869 6872                  if (rfs4_check_recall(sp, access)) {
6870 6873                          rfs4_dbe_unlock(fp->rf_dbe);
6871 6874                          if (share_a || share_d)
6872 6875                                  (void) rfs4_unshare(sp);
6873 6876                          rfs4_dbe_unlock(sp->rs_dbe);
6874 6877                          rfs4_file_rele(fp);
6875 6878                          rfs4_update_lease(sp->rs_owner->ro_client);
6876 6879  
6877 6880                          /* Not a fully formed open; "close" it */
6878 6881                          if (screate == TRUE)
6879 6882                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6880 6883                          rfs4_state_rele(sp);
6881 6884                          resp->status = NFS4ERR_DELAY;
6882 6885                          return;
6883 6886                  }
6884 6887          }
6885 6888          /*
6886 6889           * the share check passed and any delegation conflict has been
6887 6890           * taken care of, now call vop_open.
6888 6891           * if this is the first open then call vop_open with fflags.
6889 6892           * if not, call vn_open_upgrade with just the upgrade flags.
6890 6893           *
6891 6894           * if the file has been opened already, it will have the current
6892 6895           * access mode in the state struct.  if it has no share access, then
6893 6896           * this is a new open.
6894 6897           *
6895 6898           * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6896 6899           * call VOP_OPEN(), just do the open upgrade.
6897 6900           */
6898 6901          if (first_open && !deleg_cur) {
6899 6902                  ct.cc_sysid = sysid;
6900 6903                  ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6901 6904                  ct.cc_caller_id = nfs4_srv_caller_id;
6902 6905                  ct.cc_flags = CC_DONTBLOCK;
6903 6906                  err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6904 6907                  if (err) {
6905 6908                          rfs4_dbe_unlock(fp->rf_dbe);
6906 6909                          if (share_a || share_d)
6907 6910                                  (void) rfs4_unshare(sp);
6908 6911                          rfs4_dbe_unlock(sp->rs_dbe);
6909 6912                          rfs4_file_rele(fp);
6910 6913  
6911 6914                          /* Not a fully formed open; "close" it */
6912 6915                          if (screate == TRUE)
6913 6916                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6914 6917                          rfs4_state_rele(sp);
6915 6918                          /* check if a monitor detected a delegation conflict */
6916 6919                          if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6917 6920                                  resp->status = NFS4ERR_DELAY;
6918 6921                          else
6919 6922                                  resp->status = NFS4ERR_SERVERFAULT;
6920 6923                          return;
6921 6924                  }
6922 6925          } else { /* open upgrade */
6923 6926                  /*
6924 6927                   * calculate the fflags for the new mode that is being added
6925 6928                   * by this upgrade.
6926 6929                   */
6927 6930                  fflags = 0;
6928 6931                  if (open_a & OPEN4_SHARE_ACCESS_READ)
6929 6932                          fflags |= FREAD;
6930 6933                  if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6931 6934                          fflags |= FWRITE;
6932 6935                  vn_open_upgrade(cs->vp, fflags);
6933 6936          }
6934 6937          sp->rs_open_access |= access;
6935 6938          sp->rs_open_deny |= deny;
6936 6939  
6937 6940          if (open_d & OPEN4_SHARE_DENY_READ)
6938 6941                  fp->rf_deny_read++;
6939 6942          if (open_d & OPEN4_SHARE_DENY_WRITE)
6940 6943                  fp->rf_deny_write++;
6941 6944          fp->rf_share_deny |= deny;
6942 6945  
6943 6946          if (open_a & OPEN4_SHARE_ACCESS_READ)
6944 6947                  fp->rf_access_read++;
6945 6948          if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6946 6949                  fp->rf_access_write++;
6947 6950          fp->rf_share_access |= access;
6948 6951  
6949 6952          /*
6950 6953           * Check for delegation here. if the deleg argument is not
6951 6954           * DELEG_ANY, then this is a reclaim from a client and
6952 6955           * we must honor the delegation requested. If necessary we can
6953 6956           * set the recall flag.
6954 6957           */
6955 6958  
6956 6959          dsp = rfs4_grant_delegation(deleg, sp, &recall);
6957 6960  
6958 6961          cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6959 6962  
6960 6963          next_stateid(&sp->rs_stateid);
6961 6964  
6962 6965          resp->stateid = sp->rs_stateid.stateid;
6963 6966  
6964 6967          rfs4_dbe_unlock(fp->rf_dbe);
6965 6968          rfs4_dbe_unlock(sp->rs_dbe);
6966 6969  
6967 6970          if (dsp) {
6968 6971                  rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6969 6972                  rfs4_deleg_state_rele(dsp);
6970 6973          }
6971 6974  
6972 6975          rfs4_file_rele(fp);
6973 6976          rfs4_state_rele(sp);
6974 6977  
6975 6978          resp->status = NFS4_OK;
6976 6979  }
6977 6980  
6978 6981  /*ARGSUSED*/
6979 6982  static void
6980 6983  rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6981 6984      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6982 6985  {
6983 6986          change_info4 *cinfo = &resp->cinfo;
6984 6987          bitmap4 *attrset = &resp->attrset;
6985 6988  
6986 6989          if (args->opentype == OPEN4_NOCREATE)
6987 6990                  resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6988 6991                      req, cs, args->share_access, cinfo);
6989 6992          else {
6990 6993                  /* inhibit delegation grants during exclusive create */
6991 6994  
6992 6995                  if (args->mode == EXCLUSIVE4)
6993 6996                          rfs4_disable_delegation();
6994 6997  
6995 6998                  resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6996 6999                      oo->ro_client->rc_clientid);
6997 7000          }
6998 7001  
6999 7002          if (resp->status == NFS4_OK) {
7000 7003  
7001 7004                  /* cs->vp cs->fh now reference the desired file */
7002 7005  
7003 7006                  rfs4_do_open(cs, req, oo,
7004 7007                      oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7005 7008                      args->share_access, args->share_deny, resp, 0);
7006 7009  
7007 7010                  /*
7008 7011                   * If rfs4_createfile set attrset, we must
7009 7012                   * clear this attrset before the response is copied.
7010 7013                   */
7011 7014                  if (resp->status != NFS4_OK && resp->attrset) {
7012 7015                          resp->attrset = 0;
7013 7016                  }
7014 7017          }
7015 7018          else
7016 7019                  *cs->statusp = resp->status;
7017 7020  
7018 7021          if (args->mode == EXCLUSIVE4)
7019 7022                  rfs4_enable_delegation();
7020 7023  }
7021 7024  
7022 7025  /*ARGSUSED*/
7023 7026  static void
7024 7027  rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7025 7028      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7026 7029  {
7027 7030          change_info4 *cinfo = &resp->cinfo;
7028 7031          vattr_t va;
7029 7032          vtype_t v_type = cs->vp->v_type;
7030 7033          int error = 0;
7031 7034  
7032 7035          /* Verify that we have a regular file */
7033 7036          if (v_type != VREG) {
7034 7037                  if (v_type == VDIR)
7035 7038                          resp->status = NFS4ERR_ISDIR;
7036 7039                  else if (v_type == VLNK)
7037 7040                          resp->status = NFS4ERR_SYMLINK;
7038 7041                  else
7039 7042                          resp->status = NFS4ERR_INVAL;
7040 7043                  return;
7041 7044          }
7042 7045  
7043 7046          va.va_mask = AT_MODE|AT_UID;
7044 7047          error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7045 7048          if (error) {
7046 7049                  resp->status = puterrno4(error);
7047 7050                  return;
7048 7051          }
7049 7052  
7050 7053          cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7051 7054  
7052 7055          /*
7053 7056           * Check if we have access to the file, Note the the file
7054 7057           * could have originally been open UNCHECKED or GUARDED
7055 7058           * with mode bits that will now fail, but there is nothing
7056 7059           * we can really do about that except in the case that the
7057 7060           * owner of the file is the one requesting the open.
7058 7061           */
7059 7062          if (crgetuid(cs->cr) != va.va_uid) {
7060 7063                  resp->status = check_open_access(args->share_access, cs, req);
7061 7064                  if (resp->status != NFS4_OK) {
7062 7065                          return;
7063 7066                  }
7064 7067          }
7065 7068  
7066 7069          /*
7067 7070           * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7068 7071           */
7069 7072          cinfo->before = 0;
7070 7073          cinfo->after = 0;
7071 7074          cinfo->atomic = FALSE;
7072 7075  
7073 7076          rfs4_do_open(cs, req, oo,
7074 7077              NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7075 7078              args->share_access, args->share_deny, resp, 0);
7076 7079  }
7077 7080  
7078 7081  static void
7079 7082  rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7080 7083      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7081 7084  {
7082 7085          int error;
7083 7086          nfsstat4 status;
7084 7087          stateid4 stateid =
7085 7088              args->open_claim4_u.delegate_cur_info.delegate_stateid;
7086 7089          rfs4_deleg_state_t *dsp;
7087 7090  
7088 7091          /*
7089 7092           * Find the state info from the stateid and confirm that the
7090 7093           * file is delegated.  If the state openowner is the same as
7091 7094           * the supplied openowner we're done. If not, get the file
7092 7095           * info from the found state info. Use that file info to
7093 7096           * create the state for this lock owner. Note solaris doen't
7094 7097           * really need the pathname to find the file. We may want to
7095 7098           * lookup the pathname and make sure that the vp exist and
7096 7099           * matches the vp in the file structure. However it is
7097 7100           * possible that the pathname nolonger exists (local process
7098 7101           * unlinks the file), so this may not be that useful.
7099 7102           */
7100 7103  
7101 7104          status = rfs4_get_deleg_state(&stateid, &dsp);
7102 7105          if (status != NFS4_OK) {
7103 7106                  resp->status = status;
7104 7107                  return;
7105 7108          }
7106 7109  
7107 7110          ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7108 7111  
7109 7112          /*
7110 7113           * New lock owner, create state. Since this was probably called
7111 7114           * in response to a CB_RECALL we set deleg to DELEG_NONE
7112 7115           */
7113 7116  
7114 7117          ASSERT(cs->vp != NULL);
7115 7118          VN_RELE(cs->vp);
7116 7119          VN_HOLD(dsp->rds_finfo->rf_vp);
7117 7120          cs->vp = dsp->rds_finfo->rf_vp;
7118 7121  
7119 7122          if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7120 7123                  rfs4_deleg_state_rele(dsp);
7121 7124                  *cs->statusp = resp->status = puterrno4(error);
7122 7125                  return;
7123 7126          }
7124 7127  
7125 7128          /* Mark progress for delegation returns */
7126 7129          dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7127 7130          rfs4_deleg_state_rele(dsp);
7128 7131          rfs4_do_open(cs, req, oo, DELEG_NONE,
7129 7132              args->share_access, args->share_deny, resp, 1);
7130 7133  }
7131 7134  
7132 7135  /*ARGSUSED*/
7133 7136  static void
7134 7137  rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7135 7138      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7136 7139  {
7137 7140          /*
7138 7141           * Lookup the pathname, it must already exist since this file
7139 7142           * was delegated.
7140 7143           *
7141 7144           * Find the file and state info for this vp and open owner pair.
7142 7145           *      check that they are in fact delegated.
7143 7146           *      check that the state access and deny modes are the same.
7144 7147           *
7145 7148           * Return the delgation possibly seting the recall flag.
7146 7149           */
7147 7150          rfs4_file_t *fp;
7148 7151          rfs4_state_t *sp;
7149 7152          bool_t create = FALSE;
7150 7153          bool_t dcreate = FALSE;
7151 7154          rfs4_deleg_state_t *dsp;
7152 7155          nfsace4 *ace;
7153 7156  
7154 7157          /* Note we ignore oflags */
7155 7158          resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7156 7159              req, cs, args->share_access, &resp->cinfo);
7157 7160  
7158 7161          if (resp->status != NFS4_OK) {
7159 7162                  return;
7160 7163          }
7161 7164  
7162 7165          /* get the file struct and hold a lock on it during initial open */
7163 7166          fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7164 7167          if (fp == NULL) {
7165 7168                  resp->status = NFS4ERR_RESOURCE;
7166 7169                  DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7167 7170                  return;
7168 7171          }
7169 7172  
7170 7173          sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7171 7174          if (sp == NULL) {
7172 7175                  resp->status = NFS4ERR_SERVERFAULT;
7173 7176                  DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7174 7177                  rw_exit(&fp->rf_file_rwlock);
7175 7178                  rfs4_file_rele(fp);
7176 7179                  return;
7177 7180          }
7178 7181  
7179 7182          rfs4_dbe_lock(sp->rs_dbe);
7180 7183          rfs4_dbe_lock(fp->rf_dbe);
7181 7184          if (args->share_access != sp->rs_share_access ||
7182 7185              args->share_deny != sp->rs_share_deny ||
7183 7186              sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7184 7187                  NFS4_DEBUG(rfs4_debug,
7185 7188                      (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7186 7189                  rfs4_dbe_unlock(fp->rf_dbe);
7187 7190                  rfs4_dbe_unlock(sp->rs_dbe);
7188 7191                  rfs4_file_rele(fp);
7189 7192                  rfs4_state_rele(sp);
7190 7193                  resp->status = NFS4ERR_SERVERFAULT;
7191 7194                  return;
7192 7195          }
7193 7196          rfs4_dbe_unlock(fp->rf_dbe);
7194 7197          rfs4_dbe_unlock(sp->rs_dbe);
7195 7198  
7196 7199          dsp = rfs4_finddeleg(sp, &dcreate);
7197 7200          if (dsp == NULL) {
7198 7201                  rfs4_state_rele(sp);
7199 7202                  rfs4_file_rele(fp);
7200 7203                  resp->status = NFS4ERR_SERVERFAULT;
7201 7204                  return;
7202 7205          }
7203 7206  
7204 7207          next_stateid(&sp->rs_stateid);
7205 7208  
7206 7209          resp->stateid = sp->rs_stateid.stateid;
7207 7210  
7208 7211          resp->delegation.delegation_type = dsp->rds_dtype;
7209 7212  
7210 7213          if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7211 7214                  open_read_delegation4 *rv =
7212 7215                      &resp->delegation.open_delegation4_u.read;
7213 7216  
7214 7217                  rv->stateid = dsp->rds_delegid.stateid;
7215 7218                  rv->recall = FALSE; /* no policy in place to set to TRUE */
7216 7219                  ace = &rv->permissions;
7217 7220          } else {
7218 7221                  open_write_delegation4 *rv =
7219 7222                      &resp->delegation.open_delegation4_u.write;
7220 7223  
7221 7224                  rv->stateid = dsp->rds_delegid.stateid;
7222 7225                  rv->recall = FALSE;  /* no policy in place to set to TRUE */
7223 7226                  ace = &rv->permissions;
7224 7227                  rv->space_limit.limitby = NFS_LIMIT_SIZE;
7225 7228                  rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7226 7229          }
7227 7230  
7228 7231          /* XXX For now */
7229 7232          ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7230 7233          ace->flag = 0;
7231 7234          ace->access_mask = 0;
7232 7235          ace->who.utf8string_len = 0;
7233 7236          ace->who.utf8string_val = 0;
7234 7237  
7235 7238          rfs4_deleg_state_rele(dsp);
7236 7239          rfs4_state_rele(sp);
7237 7240          rfs4_file_rele(fp);
7238 7241  }
7239 7242  
7240 7243  typedef enum {
7241 7244          NFS4_CHKSEQ_OKAY = 0,
7242 7245          NFS4_CHKSEQ_REPLAY = 1,
7243 7246          NFS4_CHKSEQ_BAD = 2
7244 7247  } rfs4_chkseq_t;
7245 7248  
7246 7249  /*
7247 7250   * Generic function for sequence number checks.
7248 7251   */
7249 7252  static rfs4_chkseq_t
7250 7253  rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7251 7254      seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7252 7255  {
7253 7256          /* Same sequence ids and matching operations? */
7254 7257          if (seqid == rqst_seq && resop->resop == lastop->resop) {
7255 7258                  if (copyres == TRUE) {
7256 7259                          rfs4_free_reply(resop);
7257 7260                          rfs4_copy_reply(resop, lastop);
7258 7261                  }
7259 7262                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7260 7263                      "Replayed SEQID %d\n", seqid));
7261 7264                  return (NFS4_CHKSEQ_REPLAY);
7262 7265          }
7263 7266  
7264 7267          /* If the incoming sequence is not the next expected then it is bad */
7265 7268          if (rqst_seq != seqid + 1) {
7266 7269                  if (rqst_seq == seqid) {
7267 7270                          NFS4_DEBUG(rfs4_debug,
7268 7271                              (CE_NOTE, "BAD SEQID: Replayed sequence id "
7269 7272                              "but last op was %d current op is %d\n",
7270 7273                              lastop->resop, resop->resop));
7271 7274                          return (NFS4_CHKSEQ_BAD);
7272 7275                  }
7273 7276                  NFS4_DEBUG(rfs4_debug,
7274 7277                      (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7275 7278                      rqst_seq, seqid));
7276 7279                  return (NFS4_CHKSEQ_BAD);
7277 7280          }
7278 7281  
7279 7282          /* Everything okay -- next expected */
7280 7283          return (NFS4_CHKSEQ_OKAY);
7281 7284  }
7282 7285  
7283 7286  
7284 7287  static rfs4_chkseq_t
7285 7288  rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7286 7289  {
7287 7290          rfs4_chkseq_t rc;
7288 7291  
7289 7292          rfs4_dbe_lock(op->ro_dbe);
7290 7293          rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7291 7294              TRUE);
7292 7295          rfs4_dbe_unlock(op->ro_dbe);
7293 7296  
7294 7297          if (rc == NFS4_CHKSEQ_OKAY)
7295 7298                  rfs4_update_lease(op->ro_client);
7296 7299  
7297 7300          return (rc);
7298 7301  }
7299 7302  
7300 7303  static rfs4_chkseq_t
7301 7304  rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7302 7305  {
7303 7306          rfs4_chkseq_t rc;
7304 7307  
7305 7308          rfs4_dbe_lock(op->ro_dbe);
7306 7309          rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7307 7310              olo_seqid, resop, FALSE);
7308 7311          rfs4_dbe_unlock(op->ro_dbe);
7309 7312  
7310 7313          return (rc);
7311 7314  }
7312 7315  
7313 7316  static rfs4_chkseq_t
7314 7317  rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7315 7318  {
7316 7319          rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7317 7320  
7318 7321          rfs4_dbe_lock(lsp->rls_dbe);
7319 7322          if (!lsp->rls_skip_seqid_check)
7320 7323                  rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7321 7324                      resop, TRUE);
7322 7325          rfs4_dbe_unlock(lsp->rls_dbe);
7323 7326  
7324 7327          return (rc);
7325 7328  }
7326 7329  
7327 7330  static void
7328 7331  rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7329 7332      struct svc_req *req, struct compound_state *cs)
7330 7333  {
7331 7334          OPEN4args *args = &argop->nfs_argop4_u.opopen;
7332 7335          OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7333 7336          open_owner4 *owner = &args->owner;
7334 7337          open_claim_type4 claim = args->claim;
7335 7338          rfs4_client_t *cp;
7336 7339          rfs4_openowner_t *oo;
7337 7340          bool_t create;
7338 7341          bool_t replay = FALSE;
7339 7342          int can_reclaim;
7340 7343  
7341 7344          DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7342 7345              OPEN4args *, args);
7343 7346  
7344 7347          if (cs->vp == NULL) {
7345 7348                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7346 7349                  goto end;
7347 7350          }
7348 7351  
7349 7352          /*
7350 7353           * Need to check clientid and lease expiration first based on
7351 7354           * error ordering and incrementing sequence id.
7352 7355           */
7353 7356          cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7354 7357          if (cp == NULL) {
7355 7358                  *cs->statusp = resp->status =
7356 7359                      rfs4_check_clientid(&owner->clientid, 0);
7357 7360                  goto end;
7358 7361          }
7359 7362  
7360 7363          if (rfs4_lease_expired(cp)) {
7361 7364                  rfs4_client_close(cp);
7362 7365                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7363 7366                  goto end;
7364 7367          }
7365 7368          can_reclaim = cp->rc_can_reclaim;
7366 7369  
7367 7370          /*
7368 7371           * Find the open_owner for use from this point forward.  Take
7369 7372           * care in updating the sequence id based on the type of error
7370 7373           * being returned.
7371 7374           */
7372 7375  retry:
7373 7376          create = TRUE;
7374 7377          oo = rfs4_findopenowner(owner, &create, args->seqid);
7375 7378          if (oo == NULL) {
7376 7379                  *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7377 7380                  rfs4_client_rele(cp);
7378 7381                  goto end;
7379 7382          }
7380 7383  
7381 7384          /* Hold off access to the sequence space while the open is done */
7382 7385          rfs4_sw_enter(&oo->ro_sw);
7383 7386  
7384 7387          /*
7385 7388           * If the open_owner existed before at the server, then check
7386 7389           * the sequence id.
7387 7390           */
7388 7391          if (!create && !oo->ro_postpone_confirm) {
7389 7392                  switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7390 7393                  case NFS4_CHKSEQ_BAD:
7391 7394                          if ((args->seqid > oo->ro_open_seqid) &&
7392 7395                              oo->ro_need_confirm) {
7393 7396                                  rfs4_free_opens(oo, TRUE, FALSE);
7394 7397                                  rfs4_sw_exit(&oo->ro_sw);
7395 7398                                  rfs4_openowner_rele(oo);
7396 7399                                  goto retry;
7397 7400                          }
7398 7401                          resp->status = NFS4ERR_BAD_SEQID;
7399 7402                          goto out;
7400 7403                  case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7401 7404                          replay = TRUE;
7402 7405                          goto out;
7403 7406                  default:
7404 7407                          break;
7405 7408                  }
7406 7409  
7407 7410                  /*
7408 7411                   * Sequence was ok and open owner exists
7409 7412                   * check to see if we have yet to see an
7410 7413                   * open_confirm.
7411 7414                   */
7412 7415                  if (oo->ro_need_confirm) {
7413 7416                          rfs4_free_opens(oo, TRUE, FALSE);
7414 7417                          rfs4_sw_exit(&oo->ro_sw);
7415 7418                          rfs4_openowner_rele(oo);
7416 7419                          goto retry;
7417 7420                  }
7418 7421          }
7419 7422          /* Grace only applies to regular-type OPENs */
7420 7423          if (rfs4_clnt_in_grace(cp) &&
7421 7424              (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7422 7425                  *cs->statusp = resp->status = NFS4ERR_GRACE;
7423 7426                  goto out;
7424 7427          }
7425 7428  
7426 7429          /*
7427 7430           * If previous state at the server existed then can_reclaim
7428 7431           * will be set. If not reply NFS4ERR_NO_GRACE to the
7429 7432           * client.
7430 7433           */
7431 7434          if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7432 7435                  *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7433 7436                  goto out;
7434 7437          }
7435 7438  
7436 7439  
7437 7440          /*
7438 7441           * Reject the open if the client has missed the grace period
7439 7442           */
7440 7443          if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7441 7444                  *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7442 7445                  goto out;
7443 7446          }
7444 7447  
7445 7448          /* Couple of up-front bookkeeping items */
7446 7449          if (oo->ro_need_confirm) {
7447 7450                  /*
7448 7451                   * If this is a reclaim OPEN then we should not ask
7449 7452                   * for a confirmation of the open_owner per the
7450 7453                   * protocol specification.
7451 7454                   */
7452 7455                  if (claim == CLAIM_PREVIOUS)
7453 7456                          oo->ro_need_confirm = FALSE;
7454 7457                  else
7455 7458                          resp->rflags |= OPEN4_RESULT_CONFIRM;
7456 7459          }
7457 7460          resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7458 7461  
7459 7462          /*
7460 7463           * If there is an unshared filesystem mounted on this vnode,
7461 7464           * do not allow to open/create in this directory.
7462 7465           */
7463 7466          if (vn_ismntpt(cs->vp)) {
7464 7467                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
7465 7468                  goto out;
7466 7469          }
7467 7470  
7468 7471          /*
7469 7472           * access must READ, WRITE, or BOTH.  No access is invalid.
7470 7473           * deny can be READ, WRITE, BOTH, or NONE.
7471 7474           * bits not defined for access/deny are invalid.
7472 7475           */
7473 7476          if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7474 7477              (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7475 7478              (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7476 7479                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7477 7480                  goto out;
7478 7481          }
7479 7482  
7480 7483  
7481 7484          /*
7482 7485           * make sure attrset is zero before response is built.
7483 7486           */
7484 7487          resp->attrset = 0;
7485 7488  
7486 7489          switch (claim) {
7487 7490          case CLAIM_NULL:
7488 7491                  rfs4_do_opennull(cs, req, args, oo, resp);
7489 7492                  break;
7490 7493          case CLAIM_PREVIOUS:
7491 7494                  rfs4_do_openprev(cs, req, args, oo, resp);
7492 7495                  break;
7493 7496          case CLAIM_DELEGATE_CUR:
7494 7497                  rfs4_do_opendelcur(cs, req, args, oo, resp);
7495 7498                  break;
7496 7499          case CLAIM_DELEGATE_PREV:
7497 7500                  rfs4_do_opendelprev(cs, req, args, oo, resp);
7498 7501                  break;
7499 7502          default:
7500 7503                  resp->status = NFS4ERR_INVAL;
7501 7504                  break;
7502 7505          }
7503 7506  
7504 7507  out:
7505 7508          rfs4_client_rele(cp);
7506 7509  
7507 7510          /* Catch sequence id handling here to make it a little easier */
7508 7511          switch (resp->status) {
7509 7512          case NFS4ERR_BADXDR:
7510 7513          case NFS4ERR_BAD_SEQID:
7511 7514          case NFS4ERR_BAD_STATEID:
7512 7515          case NFS4ERR_NOFILEHANDLE:
7513 7516          case NFS4ERR_RESOURCE:
7514 7517          case NFS4ERR_STALE_CLIENTID:
7515 7518          case NFS4ERR_STALE_STATEID:
7516 7519                  /*
7517 7520                   * The protocol states that if any of these errors are
7518 7521                   * being returned, the sequence id should not be
7519 7522                   * incremented.  Any other return requires an
7520 7523                   * increment.
7521 7524                   */
7522 7525                  break;
7523 7526          default:
7524 7527                  /* Always update the lease in this case */
7525 7528                  rfs4_update_lease(oo->ro_client);
7526 7529  
7527 7530                  /* Regular response - copy the result */
7528 7531                  if (!replay)
7529 7532                          rfs4_update_open_resp(oo, resop, &cs->fh);
7530 7533  
7531 7534                  /*
7532 7535                   * REPLAY case: Only if the previous response was OK
7533 7536                   * do we copy the filehandle.  If not OK, no
7534 7537                   * filehandle to copy.
7535 7538                   */
7536 7539                  if (replay == TRUE &&
7537 7540                      resp->status == NFS4_OK &&
7538 7541                      oo->ro_reply_fh.nfs_fh4_val) {
7539 7542                          /*
7540 7543                           * If this is a replay, we must restore the
7541 7544                           * current filehandle/vp to that of what was
7542 7545                           * returned originally.  Try our best to do
7543 7546                           * it.
7544 7547                           */
7545 7548                          nfs_fh4_fmt_t *fh_fmtp =
7546 7549                              (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7547 7550  
7548 7551                          cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7549 7552                              (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7550 7553  
7551 7554                          if (cs->exi == NULL) {
7552 7555                                  resp->status = NFS4ERR_STALE;
7553 7556                                  goto finish;
7554 7557                          }
7555 7558  
7556 7559                          VN_RELE(cs->vp);
7557 7560  
7558 7561                          cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7559 7562                              &resp->status);
7560 7563  
7561 7564                          if (cs->vp == NULL)
7562 7565                                  goto finish;
7563 7566  
7564 7567                          nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7565 7568                  }
7566 7569  
7567 7570                  /*
7568 7571                   * If this was a replay, no need to update the
7569 7572                   * sequence id. If the open_owner was not created on
7570 7573                   * this pass, then update.  The first use of an
7571 7574                   * open_owner will not bump the sequence id.
7572 7575                   */
7573 7576                  if (replay == FALSE && !create)
7574 7577                          rfs4_update_open_sequence(oo);
7575 7578                  /*
7576 7579                   * If the client is receiving an error and the
7577 7580                   * open_owner needs to be confirmed, there is no way
7578 7581                   * to notify the client of this fact ignoring the fact
7579 7582                   * that the server has no method of returning a
7580 7583                   * stateid to confirm.  Therefore, the server needs to
7581 7584                   * mark this open_owner in a way as to avoid the
7582 7585                   * sequence id checking the next time the client uses
7583 7586                   * this open_owner.
7584 7587                   */
7585 7588                  if (resp->status != NFS4_OK && oo->ro_need_confirm)
7586 7589                          oo->ro_postpone_confirm = TRUE;
7587 7590                  /*
7588 7591                   * If OK response then clear the postpone flag and
7589 7592                   * reset the sequence id to keep in sync with the
7590 7593                   * client.
7591 7594                   */
7592 7595                  if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7593 7596                          oo->ro_postpone_confirm = FALSE;
7594 7597                          oo->ro_open_seqid = args->seqid;
7595 7598                  }
7596 7599                  break;
7597 7600          }
7598 7601  
7599 7602  finish:
7600 7603          *cs->statusp = resp->status;
7601 7604  
7602 7605          rfs4_sw_exit(&oo->ro_sw);
7603 7606          rfs4_openowner_rele(oo);
7604 7607  
7605 7608  end:
7606 7609          DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7607 7610              OPEN4res *, resp);
7608 7611  }
7609 7612  
7610 7613  /*ARGSUSED*/
7611 7614  void
7612 7615  rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7613 7616      struct svc_req *req, struct compound_state *cs)
7614 7617  {
7615 7618          OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7616 7619          OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7617 7620          rfs4_state_t *sp;
7618 7621          nfsstat4 status;
7619 7622  
7620 7623          DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7621 7624              OPEN_CONFIRM4args *, args);
7622 7625  
7623 7626          if (cs->vp == NULL) {
7624 7627                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7625 7628                  goto out;
7626 7629          }
7627 7630  
7628 7631          if (cs->vp->v_type != VREG) {
7629 7632                  *cs->statusp = resp->status =
7630 7633                      cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7631 7634                  return;
7632 7635          }
7633 7636  
7634 7637          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7635 7638          if (status != NFS4_OK) {
7636 7639                  *cs->statusp = resp->status = status;
7637 7640                  goto out;
7638 7641          }
7639 7642  
7640 7643          /* Ensure specified filehandle matches */
7641 7644          if (cs->vp != sp->rs_finfo->rf_vp) {
7642 7645                  rfs4_state_rele(sp);
7643 7646                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7644 7647                  goto out;
7645 7648          }
7646 7649  
7647 7650          /* hold off other access to open_owner while we tinker */
7648 7651          rfs4_sw_enter(&sp->rs_owner->ro_sw);
7649 7652  
7650 7653          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7651 7654          case NFS4_CHECK_STATEID_OKAY:
7652 7655                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7653 7656                      resop) != 0) {
7654 7657                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7655 7658                          break;
7656 7659                  }
7657 7660                  /*
7658 7661                   * If it is the appropriate stateid and determined to
7659 7662                   * be "OKAY" then this means that the stateid does not
7660 7663                   * need to be confirmed and the client is in error for
7661 7664                   * sending an OPEN_CONFIRM.
7662 7665                   */
7663 7666                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7664 7667                  break;
7665 7668          case NFS4_CHECK_STATEID_OLD:
7666 7669                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7667 7670                  break;
7668 7671          case NFS4_CHECK_STATEID_BAD:
7669 7672                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7670 7673                  break;
7671 7674          case NFS4_CHECK_STATEID_EXPIRED:
7672 7675                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7673 7676                  break;
7674 7677          case NFS4_CHECK_STATEID_CLOSED:
7675 7678                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7676 7679                  break;
7677 7680          case NFS4_CHECK_STATEID_REPLAY:
7678 7681                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7679 7682                      resop)) {
7680 7683                  case NFS4_CHKSEQ_OKAY:
7681 7684                          /*
7682 7685                           * This is replayed stateid; if seqid matches
7683 7686                           * next expected, then client is using wrong seqid.
7684 7687                           */
7685 7688                          /* fall through */
7686 7689                  case NFS4_CHKSEQ_BAD:
7687 7690                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7688 7691                          break;
7689 7692                  case NFS4_CHKSEQ_REPLAY:
7690 7693                          /*
7691 7694                           * Note this case is the duplicate case so
7692 7695                           * resp->status is already set.
7693 7696                           */
7694 7697                          *cs->statusp = resp->status;
7695 7698                          rfs4_update_lease(sp->rs_owner->ro_client);
7696 7699                          break;
7697 7700                  }
7698 7701                  break;
7699 7702          case NFS4_CHECK_STATEID_UNCONFIRMED:
7700 7703                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7701 7704                      resop) != NFS4_CHKSEQ_OKAY) {
7702 7705                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7703 7706                          break;
7704 7707                  }
7705 7708                  *cs->statusp = resp->status = NFS4_OK;
7706 7709  
7707 7710                  next_stateid(&sp->rs_stateid);
7708 7711                  resp->open_stateid = sp->rs_stateid.stateid;
7709 7712                  sp->rs_owner->ro_need_confirm = FALSE;
7710 7713                  rfs4_update_lease(sp->rs_owner->ro_client);
7711 7714                  rfs4_update_open_sequence(sp->rs_owner);
7712 7715                  rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7713 7716                  break;
7714 7717          default:
7715 7718                  ASSERT(FALSE);
7716 7719                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7717 7720                  break;
7718 7721          }
7719 7722          rfs4_sw_exit(&sp->rs_owner->ro_sw);
7720 7723          rfs4_state_rele(sp);
7721 7724  
7722 7725  out:
7723 7726          DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7724 7727              OPEN_CONFIRM4res *, resp);
7725 7728  }
7726 7729  
7727 7730  /*ARGSUSED*/
7728 7731  void
7729 7732  rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7730 7733      struct svc_req *req, struct compound_state *cs)
7731 7734  {
7732 7735          OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7733 7736          OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7734 7737          uint32_t access = args->share_access;
7735 7738          uint32_t deny = args->share_deny;
7736 7739          nfsstat4 status;
7737 7740          rfs4_state_t *sp;
7738 7741          rfs4_file_t *fp;
7739 7742          int fflags = 0;
7740 7743  
7741 7744          DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7742 7745              OPEN_DOWNGRADE4args *, args);
7743 7746  
7744 7747          if (cs->vp == NULL) {
7745 7748                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7746 7749                  goto out;
7747 7750          }
7748 7751  
7749 7752          if (cs->vp->v_type != VREG) {
7750 7753                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7751 7754                  return;
7752 7755          }
7753 7756  
7754 7757          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7755 7758          if (status != NFS4_OK) {
7756 7759                  *cs->statusp = resp->status = status;
7757 7760                  goto out;
7758 7761          }
7759 7762  
7760 7763          /* Ensure specified filehandle matches */
7761 7764          if (cs->vp != sp->rs_finfo->rf_vp) {
7762 7765                  rfs4_state_rele(sp);
7763 7766                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7764 7767                  goto out;
7765 7768          }
7766 7769  
7767 7770          /* hold off other access to open_owner while we tinker */
7768 7771          rfs4_sw_enter(&sp->rs_owner->ro_sw);
7769 7772  
7770 7773          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7771 7774          case NFS4_CHECK_STATEID_OKAY:
7772 7775                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7773 7776                      resop) != NFS4_CHKSEQ_OKAY) {
7774 7777                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7775 7778                          goto end;
7776 7779                  }
7777 7780                  break;
7778 7781          case NFS4_CHECK_STATEID_OLD:
7779 7782                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7780 7783                  goto end;
7781 7784          case NFS4_CHECK_STATEID_BAD:
7782 7785                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7783 7786                  goto end;
7784 7787          case NFS4_CHECK_STATEID_EXPIRED:
7785 7788                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7786 7789                  goto end;
7787 7790          case NFS4_CHECK_STATEID_CLOSED:
7788 7791                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7789 7792                  goto end;
7790 7793          case NFS4_CHECK_STATEID_UNCONFIRMED:
7791 7794                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7792 7795                  goto end;
7793 7796          case NFS4_CHECK_STATEID_REPLAY:
7794 7797                  /* Check the sequence id for the open owner */
7795 7798                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7796 7799                      resop)) {
7797 7800                  case NFS4_CHKSEQ_OKAY:
7798 7801                          /*
7799 7802                           * This is replayed stateid; if seqid matches
7800 7803                           * next expected, then client is using wrong seqid.
7801 7804                           */
7802 7805                          /* fall through */
7803 7806                  case NFS4_CHKSEQ_BAD:
7804 7807                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7805 7808                          goto end;
7806 7809                  case NFS4_CHKSEQ_REPLAY:
7807 7810                          /*
7808 7811                           * Note this case is the duplicate case so
7809 7812                           * resp->status is already set.
7810 7813                           */
7811 7814                          *cs->statusp = resp->status;
7812 7815                          rfs4_update_lease(sp->rs_owner->ro_client);
7813 7816                          goto end;
7814 7817                  }
7815 7818                  break;
7816 7819          default:
7817 7820                  ASSERT(FALSE);
7818 7821                  break;
7819 7822          }
7820 7823  
7821 7824          rfs4_dbe_lock(sp->rs_dbe);
7822 7825          /*
7823 7826           * Check that the new access modes and deny modes are valid.
7824 7827           * Check that no invalid bits are set.
7825 7828           */
7826 7829          if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7827 7830              (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7828 7831                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7829 7832                  rfs4_update_open_sequence(sp->rs_owner);
7830 7833                  rfs4_dbe_unlock(sp->rs_dbe);
7831 7834                  goto end;
7832 7835          }
7833 7836  
7834 7837          /*
7835 7838           * The new modes must be a subset of the current modes and
7836 7839           * the access must specify at least one mode. To test that
7837 7840           * the new mode is a subset of the current modes we bitwise
7838 7841           * AND them together and check that the result equals the new
7839 7842           * mode. For example:
7840 7843           * New mode, access == R and current mode, sp->rs_open_access  == RW
7841 7844           * access & sp->rs_open_access == R == access, so the new access mode
7842 7845           * is valid. Consider access == RW, sp->rs_open_access = R
7843 7846           * access & sp->rs_open_access == R != access, so the new access mode
7844 7847           * is invalid.
7845 7848           */
7846 7849          if ((access & sp->rs_open_access) != access ||
7847 7850              (deny & sp->rs_open_deny) != deny ||
7848 7851              (access &
7849 7852              (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7850 7853                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7851 7854                  rfs4_update_open_sequence(sp->rs_owner);
7852 7855                  rfs4_dbe_unlock(sp->rs_dbe);
7853 7856                  goto end;
7854 7857          }
7855 7858  
7856 7859          /*
7857 7860           * Release any share locks associated with this stateID.
7858 7861           * Strictly speaking, this violates the spec because the
7859 7862           * spec effectively requires that open downgrade be atomic.
7860 7863           * At present, fs_shrlock does not have this capability.
7861 7864           */
7862 7865          (void) rfs4_unshare(sp);
7863 7866  
7864 7867          status = rfs4_share(sp, access, deny);
7865 7868          if (status != NFS4_OK) {
7866 7869                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7867 7870                  rfs4_update_open_sequence(sp->rs_owner);
7868 7871                  rfs4_dbe_unlock(sp->rs_dbe);
7869 7872                  goto end;
7870 7873          }
7871 7874  
7872 7875          fp = sp->rs_finfo;
7873 7876          rfs4_dbe_lock(fp->rf_dbe);
7874 7877  
7875 7878          /*
7876 7879           * If the current mode has deny read and the new mode
7877 7880           * does not, decrement the number of deny read mode bits
7878 7881           * and if it goes to zero turn off the deny read bit
7879 7882           * on the file.
7880 7883           */
7881 7884          if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7882 7885              (deny & OPEN4_SHARE_DENY_READ) == 0) {
7883 7886                  fp->rf_deny_read--;
7884 7887                  if (fp->rf_deny_read == 0)
7885 7888                          fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7886 7889          }
7887 7890  
7888 7891          /*
7889 7892           * If the current mode has deny write and the new mode
7890 7893           * does not, decrement the number of deny write mode bits
7891 7894           * and if it goes to zero turn off the deny write bit
7892 7895           * on the file.
7893 7896           */
7894 7897          if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7895 7898              (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7896 7899                  fp->rf_deny_write--;
7897 7900                  if (fp->rf_deny_write == 0)
7898 7901                          fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7899 7902          }
7900 7903  
7901 7904          /*
7902 7905           * If the current mode has access read and the new mode
7903 7906           * does not, decrement the number of access read mode bits
7904 7907           * and if it goes to zero turn off the access read bit
7905 7908           * on the file.  set fflags to FREAD for the call to
7906 7909           * vn_open_downgrade().
7907 7910           */
7908 7911          if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7909 7912              (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7910 7913                  fp->rf_access_read--;
7911 7914                  if (fp->rf_access_read == 0)
7912 7915                          fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7913 7916                  fflags |= FREAD;
7914 7917          }
7915 7918  
7916 7919          /*
7917 7920           * If the current mode has access write and the new mode
7918 7921           * does not, decrement the number of access write mode bits
7919 7922           * and if it goes to zero turn off the access write bit
7920 7923           * on the file.  set fflags to FWRITE for the call to
7921 7924           * vn_open_downgrade().
7922 7925           */
7923 7926          if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7924 7927              (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7925 7928                  fp->rf_access_write--;
7926 7929                  if (fp->rf_access_write == 0)
7927 7930                          fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7928 7931                  fflags |= FWRITE;
7929 7932          }
7930 7933  
7931 7934          /* Check that the file is still accessible */
7932 7935          ASSERT(fp->rf_share_access);
7933 7936  
7934 7937          rfs4_dbe_unlock(fp->rf_dbe);
7935 7938  
7936 7939          /* now set the new open access and deny modes */
7937 7940          sp->rs_open_access = access;
7938 7941          sp->rs_open_deny = deny;
7939 7942  
7940 7943          /*
7941 7944           * we successfully downgraded the share lock, now we need to downgrade
7942 7945           * the open. it is possible that the downgrade was only for a deny
7943 7946           * mode and we have nothing else to do.
7944 7947           */
7945 7948          if ((fflags & (FREAD|FWRITE)) != 0)
7946 7949                  vn_open_downgrade(cs->vp, fflags);
7947 7950  
7948 7951          /* Update the stateid */
7949 7952          next_stateid(&sp->rs_stateid);
7950 7953          resp->open_stateid = sp->rs_stateid.stateid;
7951 7954  
7952 7955          rfs4_dbe_unlock(sp->rs_dbe);
7953 7956  
7954 7957          *cs->statusp = resp->status = NFS4_OK;
7955 7958          /* Update the lease */
7956 7959          rfs4_update_lease(sp->rs_owner->ro_client);
7957 7960          /* And the sequence */
7958 7961          rfs4_update_open_sequence(sp->rs_owner);
7959 7962          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7960 7963  
7961 7964  end:
7962 7965          rfs4_sw_exit(&sp->rs_owner->ro_sw);
7963 7966          rfs4_state_rele(sp);
7964 7967  out:
7965 7968          DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7966 7969              OPEN_DOWNGRADE4res *, resp);
7967 7970  }
7968 7971  
7969 7972  static void *
7970 7973  memstr(const void *s1, const char *s2, size_t n)
7971 7974  {
7972 7975          size_t l = strlen(s2);
7973 7976          char *p = (char *)s1;
7974 7977  
7975 7978          while (n >= l) {
7976 7979                  if (bcmp(p, s2, l) == 0)
7977 7980                          return (p);
7978 7981                  p++;
7979 7982                  n--;
7980 7983          }
7981 7984  
7982 7985          return (NULL);
7983 7986  }
7984 7987  
7985 7988  /*
7986 7989   * The logic behind this function is detailed in the NFSv4 RFC in the
7987 7990   * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7988 7991   * that section for explicit guidance to server behavior for
7989 7992   * SETCLIENTID.
7990 7993   */
7991 7994  void
7992 7995  rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7993 7996      struct svc_req *req, struct compound_state *cs)
7994 7997  {
7995 7998          SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7996 7999          SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7997 8000          rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7998 8001          rfs4_clntip_t *ci;
7999 8002          bool_t create;
8000 8003          char *addr, *netid;
8001 8004          int len;
8002 8005  
8003 8006          DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8004 8007              SETCLIENTID4args *, args);
8005 8008  retry:
8006 8009          newcp = cp_confirmed = cp_unconfirmed = NULL;
8007 8010  
8008 8011          /*
8009 8012           * Save the caller's IP address
8010 8013           */
8011 8014          args->client.cl_addr =
8012 8015              (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8013 8016  
8014 8017          /*
8015 8018           * Record if it is a Solaris client that cannot handle referrals.
8016 8019           */
8017 8020          if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8018 8021              !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8019 8022                  /* Add a "yes, it's downrev" record */
8020 8023                  create = TRUE;
8021 8024                  ci = rfs4_find_clntip(args->client.cl_addr, &create);
8022 8025                  ASSERT(ci != NULL);
8023 8026                  rfs4_dbe_rele(ci->ri_dbe);
8024 8027          } else {
8025 8028                  /* Remove any previous record */
8026 8029                  rfs4_invalidate_clntip(args->client.cl_addr);
8027 8030          }
8028 8031  
8029 8032          /*
8030 8033           * In search of an EXISTING client matching the incoming
8031 8034           * request to establish a new client identifier at the server
8032 8035           */
8033 8036          create = TRUE;
8034 8037          cp = rfs4_findclient(&args->client, &create, NULL);
8035 8038  
8036 8039          /* Should never happen */
8037 8040          ASSERT(cp != NULL);
8038 8041  
8039 8042          if (cp == NULL) {
8040 8043                  *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8041 8044                  goto out;
8042 8045          }
8043 8046  
8044 8047          /*
8045 8048           * Easiest case. Client identifier is newly created and is
8046 8049           * unconfirmed.  Also note that for this case, no other
8047 8050           * entries exist for the client identifier.  Nothing else to
8048 8051           * check.  Just setup the response and respond.
8049 8052           */
8050 8053          if (create) {
8051 8054                  *cs->statusp = res->status = NFS4_OK;
8052 8055                  res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8053 8056                  res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8054 8057                      cp->rc_confirm_verf;
8055 8058                  /* Setup callback information; CB_NULL confirmation later */
8056 8059                  rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8057 8060  
8058 8061                  rfs4_client_rele(cp);
8059 8062                  goto out;
8060 8063          }
8061 8064  
8062 8065          /*
8063 8066           * An existing, confirmed client may exist but it may not have
8064 8067           * been active for at least one lease period.  If so, then
8065 8068           * "close" the client and create a new client identifier
8066 8069           */
8067 8070          if (rfs4_lease_expired(cp)) {
8068 8071                  rfs4_client_close(cp);
8069 8072                  goto retry;
8070 8073          }
8071 8074  
8072 8075          if (cp->rc_need_confirm == TRUE)
8073 8076                  cp_unconfirmed = cp;
8074 8077          else
8075 8078                  cp_confirmed = cp;
8076 8079  
8077 8080          cp = NULL;
8078 8081  
8079 8082          /*
8080 8083           * We have a confirmed client, now check for an
8081 8084           * unconfimred entry
8082 8085           */
8083 8086          if (cp_confirmed) {
8084 8087                  /* If creds don't match then client identifier is inuse */
8085 8088                  if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8086 8089                          rfs4_cbinfo_t *cbp;
8087 8090                          /*
8088 8091                           * Some one else has established this client
8089 8092                           * id. Try and say * who they are. We will use
8090 8093                           * the call back address supplied by * the
8091 8094                           * first client.
8092 8095                           */
8093 8096                          *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8094 8097  
8095 8098                          addr = netid = NULL;
8096 8099  
8097 8100                          cbp = &cp_confirmed->rc_cbinfo;
8098 8101                          if (cbp->cb_callback.cb_location.r_addr &&
8099 8102                              cbp->cb_callback.cb_location.r_netid) {
8100 8103                                  cb_client4 *cbcp = &cbp->cb_callback;
8101 8104  
8102 8105                                  len = strlen(cbcp->cb_location.r_addr)+1;
8103 8106                                  addr = kmem_alloc(len, KM_SLEEP);
8104 8107                                  bcopy(cbcp->cb_location.r_addr, addr, len);
8105 8108                                  len = strlen(cbcp->cb_location.r_netid)+1;
8106 8109                                  netid = kmem_alloc(len, KM_SLEEP);
8107 8110                                  bcopy(cbcp->cb_location.r_netid, netid, len);
8108 8111                          }
8109 8112  
8110 8113                          res->SETCLIENTID4res_u.client_using.r_addr = addr;
8111 8114                          res->SETCLIENTID4res_u.client_using.r_netid = netid;
8112 8115  
8113 8116                          rfs4_client_rele(cp_confirmed);
8114 8117                  }
8115 8118  
8116 8119                  /*
8117 8120                   * Confirmed, creds match, and verifier matches; must
8118 8121                   * be an update of the callback info
8119 8122                   */
8120 8123                  if (cp_confirmed->rc_nfs_client.verifier ==
8121 8124                      args->client.verifier) {
8122 8125                          /* Setup callback information */
8123 8126                          rfs4_client_setcb(cp_confirmed, &args->callback,
8124 8127                              args->callback_ident);
8125 8128  
8126 8129                          /* everything okay -- move ahead */
8127 8130                          *cs->statusp = res->status = NFS4_OK;
8128 8131                          res->SETCLIENTID4res_u.resok4.clientid =
8129 8132                              cp_confirmed->rc_clientid;
8130 8133  
8131 8134                          /* update the confirm_verifier and return it */
8132 8135                          rfs4_client_scv_next(cp_confirmed);
8133 8136                          res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8134 8137                              cp_confirmed->rc_confirm_verf;
8135 8138  
8136 8139                          rfs4_client_rele(cp_confirmed);
8137 8140                          goto out;
8138 8141                  }
8139 8142  
8140 8143                  /*
8141 8144                   * Creds match but the verifier doesn't.  Must search
8142 8145                   * for an unconfirmed client that would be replaced by
8143 8146                   * this request.
8144 8147                   */
8145 8148                  create = FALSE;
8146 8149                  cp_unconfirmed = rfs4_findclient(&args->client, &create,
8147 8150                      cp_confirmed);
8148 8151          }
8149 8152  
8150 8153          /*
8151 8154           * At this point, we have taken care of the brand new client
8152 8155           * struct, INUSE case, update of an existing, and confirmed
8153 8156           * client struct.
8154 8157           */
8155 8158  
8156 8159          /*
8157 8160           * check to see if things have changed while we originally
8158 8161           * picked up the client struct.  If they have, then return and
8159 8162           * retry the processing of this SETCLIENTID request.
8160 8163           */
8161 8164          if (cp_unconfirmed) {
8162 8165                  rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8163 8166                  if (!cp_unconfirmed->rc_need_confirm) {
8164 8167                          rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8165 8168                          rfs4_client_rele(cp_unconfirmed);
8166 8169                          if (cp_confirmed)
8167 8170                                  rfs4_client_rele(cp_confirmed);
8168 8171                          goto retry;
8169 8172                  }
8170 8173                  /* do away with the old unconfirmed one */
8171 8174                  rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8172 8175                  rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8173 8176                  rfs4_client_rele(cp_unconfirmed);
8174 8177                  cp_unconfirmed = NULL;
8175 8178          }
8176 8179  
8177 8180          /*
8178 8181           * This search will temporarily hide the confirmed client
8179 8182           * struct while a new client struct is created as the
8180 8183           * unconfirmed one.
8181 8184           */
8182 8185          create = TRUE;
8183 8186          newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8184 8187  
8185 8188          ASSERT(newcp != NULL);
8186 8189  
8187 8190          if (newcp == NULL) {
8188 8191                  *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8189 8192                  rfs4_client_rele(cp_confirmed);
8190 8193                  goto out;
8191 8194          }
8192 8195  
8193 8196          /*
8194 8197           * If one was not created, then a similar request must be in
8195 8198           * process so release and start over with this one
8196 8199           */
8197 8200          if (create != TRUE) {
8198 8201                  rfs4_client_rele(newcp);
8199 8202                  if (cp_confirmed)
8200 8203                          rfs4_client_rele(cp_confirmed);
8201 8204                  goto retry;
8202 8205          }
8203 8206  
8204 8207          *cs->statusp = res->status = NFS4_OK;
8205 8208          res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8206 8209          res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8207 8210              newcp->rc_confirm_verf;
8208 8211          /* Setup callback information; CB_NULL confirmation later */
8209 8212          rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8210 8213  
8211 8214          newcp->rc_cp_confirmed = cp_confirmed;
8212 8215  
8213 8216          rfs4_client_rele(newcp);
8214 8217  
8215 8218  out:
8216 8219          DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8217 8220              SETCLIENTID4res *, res);
8218 8221  }
8219 8222  
8220 8223  /*ARGSUSED*/
8221 8224  void
8222 8225  rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8223 8226      struct svc_req *req, struct compound_state *cs)
8224 8227  {
8225 8228          SETCLIENTID_CONFIRM4args *args =
  
    | 
      ↓ open down ↓ | 
    1556 lines elided | 
    
      ↑ open up ↑ | 
  
8226 8229              &argop->nfs_argop4_u.opsetclientid_confirm;
8227 8230          SETCLIENTID_CONFIRM4res *res =
8228 8231              &resop->nfs_resop4_u.opsetclientid_confirm;
8229 8232          rfs4_client_t *cp, *cptoclose = NULL;
8230 8233          nfs4_srv_t *nsrv4;
8231 8234  
8232 8235          DTRACE_NFSV4_2(op__setclientid__confirm__start,
8233 8236              struct compound_state *, cs,
8234 8237              SETCLIENTID_CONFIRM4args *, args);
8235 8238  
8236      -        nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
     8239 +        nsrv4 = nfs4_get_srv();
8237 8240          *cs->statusp = res->status = NFS4_OK;
8238 8241  
8239 8242          cp = rfs4_findclient_by_id(args->clientid, TRUE);
8240 8243  
8241 8244          if (cp == NULL) {
8242 8245                  *cs->statusp = res->status =
8243 8246                      rfs4_check_clientid(&args->clientid, 1);
8244 8247                  goto out;
8245 8248          }
8246 8249  
8247 8250          if (!creds_ok(cp, req, cs)) {
8248 8251                  *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8249 8252                  rfs4_client_rele(cp);
8250 8253                  goto out;
8251 8254          }
8252 8255  
8253 8256          /* If the verifier doesn't match, the record doesn't match */
8254 8257          if (cp->rc_confirm_verf != args->setclientid_confirm) {
8255 8258                  *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8256 8259                  rfs4_client_rele(cp);
8257 8260                  goto out;
8258 8261          }
8259 8262  
8260 8263          rfs4_dbe_lock(cp->rc_dbe);
8261 8264          cp->rc_need_confirm = FALSE;
8262 8265          if (cp->rc_cp_confirmed) {
8263 8266                  cptoclose = cp->rc_cp_confirmed;
8264 8267                  cptoclose->rc_ss_remove = 1;
8265 8268                  cp->rc_cp_confirmed = NULL;
8266 8269          }
8267 8270  
8268 8271          /*
8269 8272           * Update the client's associated server instance, if it's changed
8270 8273           * since the client was created.
8271 8274           */
8272 8275          if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8273 8276                  rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8274 8277  
8275 8278          /*
8276 8279           * Record clientid in stable storage.
8277 8280           * Must be done after server instance has been assigned.
8278 8281           */
8279 8282          rfs4_ss_clid(nsrv4, cp);
8280 8283  
8281 8284          rfs4_dbe_unlock(cp->rc_dbe);
8282 8285  
8283 8286          if (cptoclose)
8284 8287                  /* don't need to rele, client_close does it */
8285 8288                  rfs4_client_close(cptoclose);
8286 8289  
8287 8290          /* If needed, initiate CB_NULL call for callback path */
8288 8291          rfs4_deleg_cb_check(cp);
8289 8292          rfs4_update_lease(cp);
8290 8293  
8291 8294          /*
8292 8295           * Check to see if client can perform reclaims
8293 8296           */
8294 8297          rfs4_ss_chkclid(nsrv4, cp);
8295 8298  
8296 8299          rfs4_client_rele(cp);
8297 8300  
8298 8301  out:
8299 8302          DTRACE_NFSV4_2(op__setclientid__confirm__done,
8300 8303              struct compound_state *, cs,
8301 8304              SETCLIENTID_CONFIRM4 *, res);
8302 8305  }
8303 8306  
8304 8307  
8305 8308  /*ARGSUSED*/
8306 8309  void
8307 8310  rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8308 8311      struct svc_req *req, struct compound_state *cs)
8309 8312  {
8310 8313          CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8311 8314          CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8312 8315          rfs4_state_t *sp;
8313 8316          nfsstat4 status;
8314 8317  
8315 8318          DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8316 8319              CLOSE4args *, args);
8317 8320  
8318 8321          if (cs->vp == NULL) {
8319 8322                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8320 8323                  goto out;
8321 8324          }
8322 8325  
8323 8326          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8324 8327          if (status != NFS4_OK) {
8325 8328                  *cs->statusp = resp->status = status;
8326 8329                  goto out;
8327 8330          }
8328 8331  
8329 8332          /* Ensure specified filehandle matches */
8330 8333          if (cs->vp != sp->rs_finfo->rf_vp) {
8331 8334                  rfs4_state_rele(sp);
8332 8335                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8333 8336                  goto out;
8334 8337          }
8335 8338  
8336 8339          /* hold off other access to open_owner while we tinker */
8337 8340          rfs4_sw_enter(&sp->rs_owner->ro_sw);
8338 8341  
8339 8342          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8340 8343          case NFS4_CHECK_STATEID_OKAY:
8341 8344                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8342 8345                      resop) != NFS4_CHKSEQ_OKAY) {
8343 8346                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8344 8347                          goto end;
8345 8348                  }
8346 8349                  break;
8347 8350          case NFS4_CHECK_STATEID_OLD:
8348 8351                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8349 8352                  goto end;
8350 8353          case NFS4_CHECK_STATEID_BAD:
8351 8354                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8352 8355                  goto end;
8353 8356          case NFS4_CHECK_STATEID_EXPIRED:
8354 8357                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8355 8358                  goto end;
8356 8359          case NFS4_CHECK_STATEID_CLOSED:
8357 8360                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8358 8361                  goto end;
8359 8362          case NFS4_CHECK_STATEID_UNCONFIRMED:
8360 8363                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8361 8364                  goto end;
8362 8365          case NFS4_CHECK_STATEID_REPLAY:
8363 8366                  /* Check the sequence id for the open owner */
8364 8367                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8365 8368                      resop)) {
8366 8369                  case NFS4_CHKSEQ_OKAY:
8367 8370                          /*
8368 8371                           * This is replayed stateid; if seqid matches
8369 8372                           * next expected, then client is using wrong seqid.
8370 8373                           */
8371 8374                          /* FALL THROUGH */
8372 8375                  case NFS4_CHKSEQ_BAD:
8373 8376                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8374 8377                          goto end;
8375 8378                  case NFS4_CHKSEQ_REPLAY:
8376 8379                          /*
8377 8380                           * Note this case is the duplicate case so
8378 8381                           * resp->status is already set.
8379 8382                           */
8380 8383                          *cs->statusp = resp->status;
8381 8384                          rfs4_update_lease(sp->rs_owner->ro_client);
8382 8385                          goto end;
8383 8386                  }
8384 8387                  break;
8385 8388          default:
8386 8389                  ASSERT(FALSE);
8387 8390                  break;
8388 8391          }
8389 8392  
8390 8393          rfs4_dbe_lock(sp->rs_dbe);
8391 8394  
8392 8395          /* Update the stateid. */
8393 8396          next_stateid(&sp->rs_stateid);
8394 8397          resp->open_stateid = sp->rs_stateid.stateid;
8395 8398  
8396 8399          rfs4_dbe_unlock(sp->rs_dbe);
8397 8400  
8398 8401          rfs4_update_lease(sp->rs_owner->ro_client);
8399 8402          rfs4_update_open_sequence(sp->rs_owner);
8400 8403          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8401 8404  
8402 8405          rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8403 8406  
8404 8407          *cs->statusp = resp->status = status;
8405 8408  
8406 8409  end:
8407 8410          rfs4_sw_exit(&sp->rs_owner->ro_sw);
8408 8411          rfs4_state_rele(sp);
8409 8412  out:
8410 8413          DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8411 8414              CLOSE4res *, resp);
8412 8415  }
8413 8416  
8414 8417  /*
8415 8418   * Manage the counts on the file struct and close all file locks
8416 8419   */
8417 8420  /*ARGSUSED*/
8418 8421  void
8419 8422  rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8420 8423      bool_t close_of_client)
8421 8424  {
8422 8425          rfs4_file_t *fp = sp->rs_finfo;
8423 8426          rfs4_lo_state_t *lsp;
8424 8427          int fflags = 0;
8425 8428  
8426 8429          /*
8427 8430           * If this call is part of the larger closing down of client
8428 8431           * state then it is just easier to release all locks
8429 8432           * associated with this client instead of going through each
8430 8433           * individual file and cleaning locks there.
8431 8434           */
8432 8435          if (close_of_client) {
8433 8436                  if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8434 8437                      !list_is_empty(&sp->rs_lostatelist) &&
8435 8438                      sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8436 8439                          /* Is the PxFS kernel module loaded? */
8437 8440                          if (lm_remove_file_locks != NULL) {
8438 8441                                  int new_sysid;
8439 8442  
8440 8443                                  /* Encode the cluster nodeid in new sysid */
8441 8444                                  new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8442 8445                                  lm_set_nlmid_flk(&new_sysid);
8443 8446  
8444 8447                                  /*
8445 8448                                   * This PxFS routine removes file locks for a
8446 8449                                   * client over all nodes of a cluster.
8447 8450                                   */
8448 8451                                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8449 8452                                      "lm_remove_file_locks(sysid=0x%x)\n",
8450 8453                                      new_sysid));
8451 8454                                  (*lm_remove_file_locks)(new_sysid);
8452 8455                          } else {
8453 8456                                  struct flock64 flk;
8454 8457  
8455 8458                                  /* Release all locks for this client */
8456 8459                                  flk.l_type = F_UNLKSYS;
8457 8460                                  flk.l_whence = 0;
8458 8461                                  flk.l_start = 0;
8459 8462                                  flk.l_len = 0;
8460 8463                                  flk.l_sysid =
8461 8464                                      sp->rs_owner->ro_client->rc_sysidt;
8462 8465                                  flk.l_pid = 0;
8463 8466                                  (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8464 8467                                      &flk, F_REMOTELOCK | FREAD | FWRITE,
8465 8468                                      (u_offset_t)0, NULL, CRED(), NULL);
8466 8469                          }
8467 8470  
8468 8471                          sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8469 8472                  }
8470 8473          }
8471 8474  
8472 8475          /*
8473 8476           * Release all locks on this file by this lock owner or at
8474 8477           * least mark the locks as having been released
8475 8478           */
8476 8479          for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8477 8480              lsp = list_next(&sp->rs_lostatelist, lsp)) {
8478 8481                  lsp->rls_locks_cleaned = TRUE;
8479 8482  
8480 8483                  /* Was this already taken care of above? */
8481 8484                  if (!close_of_client &&
8482 8485                      sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8483 8486                          (void) cleanlocks(sp->rs_finfo->rf_vp,
8484 8487                              lsp->rls_locker->rl_pid,
8485 8488                              lsp->rls_locker->rl_client->rc_sysidt);
8486 8489          }
8487 8490  
8488 8491          /*
8489 8492           * Release any shrlocks associated with this open state ID.
8490 8493           * This must be done before the rfs4_state gets marked closed.
8491 8494           */
8492 8495          if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8493 8496                  (void) rfs4_unshare(sp);
8494 8497  
8495 8498          if (sp->rs_open_access) {
8496 8499                  rfs4_dbe_lock(fp->rf_dbe);
8497 8500  
8498 8501                  /*
8499 8502                   * Decrement the count for each access and deny bit that this
8500 8503                   * state has contributed to the file.
8501 8504                   * If the file counts go to zero
8502 8505                   * clear the appropriate bit in the appropriate mask.
8503 8506                   */
8504 8507                  if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8505 8508                          fp->rf_access_read--;
8506 8509                          fflags |= FREAD;
8507 8510                          if (fp->rf_access_read == 0)
8508 8511                                  fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8509 8512                  }
8510 8513                  if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8511 8514                          fp->rf_access_write--;
8512 8515                          fflags |= FWRITE;
8513 8516                          if (fp->rf_access_write == 0)
8514 8517                                  fp->rf_share_access &=
8515 8518                                      ~OPEN4_SHARE_ACCESS_WRITE;
8516 8519                  }
8517 8520                  if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8518 8521                          fp->rf_deny_read--;
8519 8522                          if (fp->rf_deny_read == 0)
8520 8523                                  fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8521 8524                  }
8522 8525                  if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8523 8526                          fp->rf_deny_write--;
8524 8527                          if (fp->rf_deny_write == 0)
8525 8528                                  fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8526 8529                  }
8527 8530  
8528 8531                  (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8529 8532  
8530 8533                  rfs4_dbe_unlock(fp->rf_dbe);
8531 8534  
8532 8535                  sp->rs_open_access = 0;
8533 8536                  sp->rs_open_deny = 0;
8534 8537          }
8535 8538  }
8536 8539  
8537 8540  /*
8538 8541   * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8539 8542   */
8540 8543  static nfsstat4
8541 8544  lock_denied(LOCK4denied *dp, struct flock64 *flk)
8542 8545  {
8543 8546          rfs4_lockowner_t *lo;
8544 8547          rfs4_client_t *cp;
8545 8548          uint32_t len;
8546 8549  
8547 8550          lo = rfs4_findlockowner_by_pid(flk->l_pid);
8548 8551          if (lo != NULL) {
8549 8552                  cp = lo->rl_client;
8550 8553                  if (rfs4_lease_expired(cp)) {
8551 8554                          rfs4_lockowner_rele(lo);
8552 8555                          rfs4_dbe_hold(cp->rc_dbe);
8553 8556                          rfs4_client_close(cp);
8554 8557                          return (NFS4ERR_EXPIRED);
8555 8558                  }
8556 8559                  dp->owner.clientid = lo->rl_owner.clientid;
8557 8560                  len = lo->rl_owner.owner_len;
8558 8561                  dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8559 8562                  bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8560 8563                  dp->owner.owner_len = len;
8561 8564                  rfs4_lockowner_rele(lo);
8562 8565                  goto finish;
8563 8566          }
8564 8567  
8565 8568          /*
8566 8569           * Its not a NFS4 lock. We take advantage that the upper 32 bits
8567 8570           * of the client id contain the boot time for a NFS4 lock. So we
8568 8571           * fabricate and identity by setting clientid to the sysid, and
8569 8572           * the lock owner to the pid.
8570 8573           */
8571 8574          dp->owner.clientid = flk->l_sysid;
8572 8575          len = sizeof (pid_t);
8573 8576          dp->owner.owner_len = len;
8574 8577          dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8575 8578          bcopy(&flk->l_pid, dp->owner.owner_val, len);
8576 8579  finish:
8577 8580          dp->offset = flk->l_start;
8578 8581          dp->length = flk->l_len;
8579 8582  
8580 8583          if (flk->l_type == F_RDLCK)
8581 8584                  dp->locktype = READ_LT;
8582 8585          else if (flk->l_type == F_WRLCK)
8583 8586                  dp->locktype = WRITE_LT;
8584 8587          else
8585 8588                  return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8586 8589  
8587 8590          return (NFS4_OK);
8588 8591  }
8589 8592  
8590 8593  /*
8591 8594   * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8592 8595   * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8593 8596   * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8594 8597   * for that (obviously); they are sending the LOCK requests with some delays
8595 8598   * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8596 8599   * locking and delay implementation at the client side.
8597 8600   *
8598 8601   * To make the life of the clients easier, the NFSv4.0 server tries to do some
8599 8602   * fast retries on its own (the for loop below) in a hope the lock will be
8600 8603   * available soon.  And if not, the client won't need to resend the LOCK
8601 8604   * requests so fast to check the lock availability.  This basically saves some
8602 8605   * network traffic and tries to make sure the client gets the lock ASAP.
8603 8606   */
8604 8607  static int
8605 8608  setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8606 8609  {
8607 8610          int error;
8608 8611          struct flock64 flk;
8609 8612          int i;
8610 8613          clock_t delaytime;
8611 8614          int cmd;
8612 8615          int spin_cnt = 0;
8613 8616  
8614 8617          cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8615 8618  retry:
8616 8619          delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8617 8620  
8618 8621          for (i = 0; i < rfs4_maxlock_tries; i++) {
8619 8622                  LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8620 8623                  error = VOP_FRLOCK(vp, cmd,
8621 8624                      flock, flag, (u_offset_t)0, NULL, cred, NULL);
8622 8625  
8623 8626                  if (error != EAGAIN && error != EACCES)
8624 8627                          break;
8625 8628  
8626 8629                  if (i < rfs4_maxlock_tries - 1) {
8627 8630                          delay(delaytime);
8628 8631                          delaytime *= 2;
8629 8632                  }
8630 8633          }
8631 8634  
8632 8635          if (error == EAGAIN || error == EACCES) {
8633 8636                  /* Get the owner of the lock */
8634 8637                  flk = *flock;
8635 8638                  LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8636 8639                  if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8637 8640                      NULL) == 0) {
8638 8641                          /*
8639 8642                           * There's a race inherent in the current VOP_FRLOCK
8640 8643                           * design where:
8641 8644                           * a: "other guy" takes a lock that conflicts with a
8642 8645                           * lock we want
8643 8646                           * b: we attempt to take our lock (non-blocking) and
8644 8647                           * the attempt fails.
8645 8648                           * c: "other guy" releases the conflicting lock
8646 8649                           * d: we ask what lock conflicts with the lock we want,
8647 8650                           * getting F_UNLCK (no lock blocks us)
8648 8651                           *
8649 8652                           * If we retry the non-blocking lock attempt in this
8650 8653                           * case (restart at step 'b') there's some possibility
8651 8654                           * that many such attempts might fail.  However a test
8652 8655                           * designed to actually provoke this race shows that
8653 8656                           * the vast majority of cases require no retry, and
8654 8657                           * only a few took as many as three retries.  Here's
8655 8658                           * the test outcome:
8656 8659                           *
8657 8660                           *         number of retries    how many times we needed
8658 8661                           *                              that many retries
8659 8662                           *         0                    79461
8660 8663                           *         1                      862
8661 8664                           *         2                       49
8662 8665                           *         3                        5
8663 8666                           *
8664 8667                           * Given those empirical results, we arbitrarily limit
8665 8668                           * the retry count to ten.
8666 8669                           *
8667 8670                           * If we actually make to ten retries and give up,
8668 8671                           * nothing catastrophic happens, but we're unable to
8669 8672                           * return the information about the conflicting lock to
8670 8673                           * the NFS client.  That's an acceptable trade off vs.
8671 8674                           * letting this retry loop run forever.
8672 8675                           */
8673 8676                          if (flk.l_type == F_UNLCK) {
8674 8677                                  if (spin_cnt++ < 10) {
8675 8678                                          /* No longer locked, retry */
8676 8679                                          goto retry;
8677 8680                                  }
8678 8681                          } else {
8679 8682                                  *flock = flk;
8680 8683                                  LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8681 8684                                      F_GETLK, &flk);
8682 8685                          }
8683 8686                  }
8684 8687          }
8685 8688  
8686 8689          return (error);
8687 8690  }
8688 8691  
8689 8692  /*ARGSUSED*/
8690 8693  static nfsstat4
8691 8694  rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8692 8695      offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8693 8696  {
8694 8697          nfsstat4 status;
8695 8698          rfs4_lockowner_t *lo = lsp->rls_locker;
8696 8699          rfs4_state_t *sp = lsp->rls_state;
8697 8700          struct flock64 flock;
8698 8701          int16_t ltype;
8699 8702          int flag;
8700 8703          int error;
8701 8704          sysid_t sysid;
8702 8705          LOCK4res *lres;
8703 8706          vnode_t *vp;
8704 8707  
8705 8708          if (rfs4_lease_expired(lo->rl_client)) {
8706 8709                  return (NFS4ERR_EXPIRED);
8707 8710          }
8708 8711  
8709 8712          if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8710 8713                  return (status);
8711 8714  
8712 8715          /* Check for zero length. To lock to end of file use all ones for V4 */
8713 8716          if (length == 0)
8714 8717                  return (NFS4ERR_INVAL);
8715 8718          else if (length == (length4)(~0))
8716 8719                  length = 0;             /* Posix to end of file  */
8717 8720  
8718 8721  retry:
8719 8722          rfs4_dbe_lock(sp->rs_dbe);
8720 8723          if (sp->rs_closed == TRUE) {
8721 8724                  rfs4_dbe_unlock(sp->rs_dbe);
8722 8725                  return (NFS4ERR_OLD_STATEID);
8723 8726          }
8724 8727  
8725 8728          if (resop->resop != OP_LOCKU) {
8726 8729                  switch (locktype) {
8727 8730                  case READ_LT:
8728 8731                  case READW_LT:
8729 8732                          if ((sp->rs_share_access
8730 8733                              & OPEN4_SHARE_ACCESS_READ) == 0) {
8731 8734                                  rfs4_dbe_unlock(sp->rs_dbe);
8732 8735  
8733 8736                                  return (NFS4ERR_OPENMODE);
8734 8737                          }
8735 8738                          ltype = F_RDLCK;
8736 8739                          break;
8737 8740                  case WRITE_LT:
8738 8741                  case WRITEW_LT:
8739 8742                          if ((sp->rs_share_access
8740 8743                              & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8741 8744                                  rfs4_dbe_unlock(sp->rs_dbe);
8742 8745  
8743 8746                                  return (NFS4ERR_OPENMODE);
8744 8747                          }
8745 8748                          ltype = F_WRLCK;
8746 8749                          break;
8747 8750                  }
8748 8751          } else
8749 8752                  ltype = F_UNLCK;
8750 8753  
8751 8754          flock.l_type = ltype;
8752 8755          flock.l_whence = 0;             /* SEEK_SET */
8753 8756          flock.l_start = offset;
8754 8757          flock.l_len = length;
8755 8758          flock.l_sysid = sysid;
8756 8759          flock.l_pid = lsp->rls_locker->rl_pid;
8757 8760  
8758 8761          /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8759 8762          if (flock.l_len < 0 || flock.l_start < 0) {
8760 8763                  rfs4_dbe_unlock(sp->rs_dbe);
8761 8764                  return (NFS4ERR_INVAL);
8762 8765          }
8763 8766  
8764 8767          /*
8765 8768           * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8766 8769           * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8767 8770           */
8768 8771          flag = (int)sp->rs_share_access | F_REMOTELOCK;
8769 8772  
8770 8773          vp = sp->rs_finfo->rf_vp;
8771 8774          VN_HOLD(vp);
8772 8775  
8773 8776          /*
8774 8777           * We need to unlock sp before we call the underlying filesystem to
8775 8778           * acquire the file lock.
8776 8779           */
8777 8780          rfs4_dbe_unlock(sp->rs_dbe);
8778 8781  
8779 8782          error = setlock(vp, &flock, flag, cred);
8780 8783  
8781 8784          /*
8782 8785           * Make sure the file is still open.  In a case the file was closed in
8783 8786           * the meantime, clean the lock we acquired using the setlock() call
8784 8787           * above, and return the appropriate error.
8785 8788           */
8786 8789          rfs4_dbe_lock(sp->rs_dbe);
8787 8790          if (sp->rs_closed == TRUE) {
8788 8791                  cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8789 8792                  rfs4_dbe_unlock(sp->rs_dbe);
8790 8793  
8791 8794                  VN_RELE(vp);
8792 8795  
8793 8796                  return (NFS4ERR_OLD_STATEID);
8794 8797          }
8795 8798          rfs4_dbe_unlock(sp->rs_dbe);
8796 8799  
8797 8800          VN_RELE(vp);
8798 8801  
8799 8802          if (error == 0) {
8800 8803                  rfs4_dbe_lock(lsp->rls_dbe);
8801 8804                  next_stateid(&lsp->rls_lockid);
8802 8805                  rfs4_dbe_unlock(lsp->rls_dbe);
8803 8806          }
8804 8807  
8805 8808          /*
8806 8809           * N.B. We map error values to nfsv4 errors. This is differrent
8807 8810           * than puterrno4 routine.
8808 8811           */
8809 8812          switch (error) {
8810 8813          case 0:
8811 8814                  status = NFS4_OK;
8812 8815                  break;
8813 8816          case EAGAIN:
8814 8817          case EACCES:            /* Old value */
8815 8818                  /* Can only get here if op is OP_LOCK */
8816 8819                  ASSERT(resop->resop == OP_LOCK);
8817 8820                  lres = &resop->nfs_resop4_u.oplock;
8818 8821                  status = NFS4ERR_DENIED;
8819 8822                  if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8820 8823                      == NFS4ERR_EXPIRED)
8821 8824                          goto retry;
8822 8825                  break;
8823 8826          case ENOLCK:
8824 8827                  status = NFS4ERR_DELAY;
8825 8828                  break;
8826 8829          case EOVERFLOW:
8827 8830                  status = NFS4ERR_INVAL;
8828 8831                  break;
8829 8832          case EINVAL:
8830 8833                  status = NFS4ERR_NOTSUPP;
8831 8834                  break;
8832 8835          default:
8833 8836                  status = NFS4ERR_SERVERFAULT;
8834 8837                  break;
8835 8838          }
8836 8839  
8837 8840          return (status);
8838 8841  }
8839 8842  
8840 8843  /*ARGSUSED*/
8841 8844  void
8842 8845  rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8843 8846      struct svc_req *req, struct compound_state *cs)
8844 8847  {
8845 8848          LOCK4args *args = &argop->nfs_argop4_u.oplock;
8846 8849          LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8847 8850          nfsstat4 status;
8848 8851          stateid4 *stateid;
8849 8852          rfs4_lockowner_t *lo;
8850 8853          rfs4_client_t *cp;
8851 8854          rfs4_state_t *sp = NULL;
8852 8855          rfs4_lo_state_t *lsp = NULL;
8853 8856          bool_t ls_sw_held = FALSE;
8854 8857          bool_t create = TRUE;
8855 8858          bool_t lcreate = TRUE;
8856 8859          bool_t dup_lock = FALSE;
8857 8860          int rc;
8858 8861  
8859 8862          DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8860 8863              LOCK4args *, args);
8861 8864  
8862 8865          if (cs->vp == NULL) {
8863 8866                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8864 8867                  DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8865 8868                      cs, LOCK4res *, resp);
8866 8869                  return;
8867 8870          }
8868 8871  
8869 8872          if (args->locker.new_lock_owner) {
8870 8873                  /* Create a new lockowner for this instance */
8871 8874                  open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8872 8875  
8873 8876                  NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8874 8877  
8875 8878                  stateid = &olo->open_stateid;
8876 8879                  status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8877 8880                  if (status != NFS4_OK) {
8878 8881                          NFS4_DEBUG(rfs4_debug,
8879 8882                              (CE_NOTE, "Get state failed in lock %d", status));
8880 8883                          *cs->statusp = resp->status = status;
8881 8884                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8882 8885                              cs, LOCK4res *, resp);
8883 8886                          return;
8884 8887                  }
8885 8888  
8886 8889                  /* Ensure specified filehandle matches */
8887 8890                  if (cs->vp != sp->rs_finfo->rf_vp) {
8888 8891                          rfs4_state_rele(sp);
8889 8892                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8890 8893                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8891 8894                              cs, LOCK4res *, resp);
8892 8895                          return;
8893 8896                  }
8894 8897  
8895 8898                  /* hold off other access to open_owner while we tinker */
8896 8899                  rfs4_sw_enter(&sp->rs_owner->ro_sw);
8897 8900  
8898 8901                  switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8899 8902                  case NFS4_CHECK_STATEID_OLD:
8900 8903                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8901 8904                          goto end;
8902 8905                  case NFS4_CHECK_STATEID_BAD:
8903 8906                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8904 8907                          goto end;
8905 8908                  case NFS4_CHECK_STATEID_EXPIRED:
8906 8909                          *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8907 8910                          goto end;
8908 8911                  case NFS4_CHECK_STATEID_UNCONFIRMED:
8909 8912                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8910 8913                          goto end;
8911 8914                  case NFS4_CHECK_STATEID_CLOSED:
8912 8915                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8913 8916                          goto end;
8914 8917                  case NFS4_CHECK_STATEID_OKAY:
8915 8918                  case NFS4_CHECK_STATEID_REPLAY:
8916 8919                          switch (rfs4_check_olo_seqid(olo->open_seqid,
8917 8920                              sp->rs_owner, resop)) {
8918 8921                          case NFS4_CHKSEQ_OKAY:
8919 8922                                  if (rc == NFS4_CHECK_STATEID_OKAY)
8920 8923                                          break;
8921 8924                                  /*
8922 8925                                   * This is replayed stateid; if seqid
8923 8926                                   * matches next expected, then client
8924 8927                                   * is using wrong seqid.
8925 8928                                   */
8926 8929                                  /* FALLTHROUGH */
8927 8930                          case NFS4_CHKSEQ_BAD:
8928 8931                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8929 8932                                  goto end;
8930 8933                          case NFS4_CHKSEQ_REPLAY:
8931 8934                                  /* This is a duplicate LOCK request */
8932 8935                                  dup_lock = TRUE;
8933 8936  
8934 8937                                  /*
8935 8938                                   * For a duplicate we do not want to
8936 8939                                   * create a new lockowner as it should
8937 8940                                   * already exist.
8938 8941                                   * Turn off the lockowner create flag.
8939 8942                                   */
8940 8943                                  lcreate = FALSE;
8941 8944                          }
8942 8945                          break;
8943 8946                  }
8944 8947  
8945 8948                  lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8946 8949                  if (lo == NULL) {
8947 8950                          NFS4_DEBUG(rfs4_debug,
8948 8951                              (CE_NOTE, "rfs4_op_lock: no lock owner"));
8949 8952                          *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8950 8953                          goto end;
8951 8954                  }
8952 8955  
8953 8956                  lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8954 8957                  if (lsp == NULL) {
8955 8958                          rfs4_update_lease(sp->rs_owner->ro_client);
8956 8959                          /*
8957 8960                           * Only update theh open_seqid if this is not
8958 8961                           * a duplicate request
8959 8962                           */
8960 8963                          if (dup_lock == FALSE) {
8961 8964                                  rfs4_update_open_sequence(sp->rs_owner);
8962 8965                          }
8963 8966  
8964 8967                          NFS4_DEBUG(rfs4_debug,
8965 8968                              (CE_NOTE, "rfs4_op_lock: no state"));
8966 8969                          *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8967 8970                          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8968 8971                          rfs4_lockowner_rele(lo);
8969 8972                          goto end;
8970 8973                  }
8971 8974  
8972 8975                  /*
8973 8976                   * This is the new_lock_owner branch and the client is
8974 8977                   * supposed to be associating a new lock_owner with
8975 8978                   * the open file at this point.  If we find that a
8976 8979                   * lock_owner/state association already exists and a
8977 8980                   * successful LOCK request was returned to the client,
8978 8981                   * an error is returned to the client since this is
8979 8982                   * not appropriate.  The client should be using the
8980 8983                   * existing lock_owner branch.
8981 8984                   */
8982 8985                  if (dup_lock == FALSE && create == FALSE) {
8983 8986                          if (lsp->rls_lock_completed == TRUE) {
8984 8987                                  *cs->statusp =
8985 8988                                      resp->status = NFS4ERR_BAD_SEQID;
8986 8989                                  rfs4_lockowner_rele(lo);
8987 8990                                  goto end;
8988 8991                          }
8989 8992                  }
8990 8993  
8991 8994                  rfs4_update_lease(sp->rs_owner->ro_client);
8992 8995  
8993 8996                  /*
8994 8997                   * Only update theh open_seqid if this is not
8995 8998                   * a duplicate request
8996 8999                   */
8997 9000                  if (dup_lock == FALSE) {
8998 9001                          rfs4_update_open_sequence(sp->rs_owner);
8999 9002                  }
9000 9003  
9001 9004                  /*
9002 9005                   * If this is a duplicate lock request, just copy the
9003 9006                   * previously saved reply and return.
9004 9007                   */
9005 9008                  if (dup_lock == TRUE) {
9006 9009                          /* verify that lock_seqid's match */
9007 9010                          if (lsp->rls_seqid != olo->lock_seqid) {
9008 9011                                  NFS4_DEBUG(rfs4_debug,
9009 9012                                      (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9010 9013                                      "lsp->seqid=%d old->seqid=%d",
9011 9014                                      lsp->rls_seqid, olo->lock_seqid));
9012 9015                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9013 9016                          } else {
9014 9017                                  rfs4_copy_reply(resop, &lsp->rls_reply);
9015 9018                                  /*
9016 9019                                   * Make sure to copy the just
9017 9020                                   * retrieved reply status into the
9018 9021                                   * overall compound status
9019 9022                                   */
9020 9023                                  *cs->statusp = resp->status;
9021 9024                          }
9022 9025                          rfs4_lockowner_rele(lo);
9023 9026                          goto end;
9024 9027                  }
9025 9028  
9026 9029                  rfs4_dbe_lock(lsp->rls_dbe);
9027 9030  
9028 9031                  /* Make sure to update the lock sequence id */
9029 9032                  lsp->rls_seqid = olo->lock_seqid;
9030 9033  
9031 9034                  NFS4_DEBUG(rfs4_debug,
9032 9035                      (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9033 9036  
9034 9037                  /*
9035 9038                   * This is used to signify the newly created lockowner
9036 9039                   * stateid and its sequence number.  The checks for
9037 9040                   * sequence number and increment don't occur on the
9038 9041                   * very first lock request for a lockowner.
9039 9042                   */
9040 9043                  lsp->rls_skip_seqid_check = TRUE;
9041 9044  
9042 9045                  /* hold off other access to lsp while we tinker */
9043 9046                  rfs4_sw_enter(&lsp->rls_sw);
9044 9047                  ls_sw_held = TRUE;
9045 9048  
9046 9049                  rfs4_dbe_unlock(lsp->rls_dbe);
9047 9050  
9048 9051                  rfs4_lockowner_rele(lo);
9049 9052          } else {
9050 9053                  stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9051 9054                  /* get lsp and hold the lock on the underlying file struct */
9052 9055                  if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9053 9056                      != NFS4_OK) {
9054 9057                          *cs->statusp = resp->status = status;
9055 9058                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9056 9059                              cs, LOCK4res *, resp);
9057 9060                          return;
9058 9061                  }
9059 9062                  create = FALSE; /* We didn't create lsp */
9060 9063  
9061 9064                  /* Ensure specified filehandle matches */
9062 9065                  if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9063 9066                          rfs4_lo_state_rele(lsp, TRUE);
9064 9067                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9065 9068                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9066 9069                              cs, LOCK4res *, resp);
9067 9070                          return;
9068 9071                  }
9069 9072  
9070 9073                  /* hold off other access to lsp while we tinker */
9071 9074                  rfs4_sw_enter(&lsp->rls_sw);
9072 9075                  ls_sw_held = TRUE;
9073 9076  
9074 9077                  switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9075 9078                  /*
9076 9079                   * The stateid looks like it was okay (expected to be
9077 9080                   * the next one)
9078 9081                   */
9079 9082                  case NFS4_CHECK_STATEID_OKAY:
9080 9083                          /*
9081 9084                           * The sequence id is now checked.  Determine
9082 9085                           * if this is a replay or if it is in the
9083 9086                           * expected (next) sequence.  In the case of a
9084 9087                           * replay, there are two replay conditions
9085 9088                           * that may occur.  The first is the normal
9086 9089                           * condition where a LOCK is done with a
9087 9090                           * NFS4_OK response and the stateid is
9088 9091                           * updated.  That case is handled below when
9089 9092                           * the stateid is identified as a REPLAY.  The
9090 9093                           * second is the case where an error is
9091 9094                           * returned, like NFS4ERR_DENIED, and the
9092 9095                           * sequence number is updated but the stateid
9093 9096                           * is not updated.  This second case is dealt
9094 9097                           * with here.  So it may seem odd that the
9095 9098                           * stateid is okay but the sequence id is a
9096 9099                           * replay but it is okay.
9097 9100                           */
9098 9101                          switch (rfs4_check_lock_seqid(
9099 9102                              args->locker.locker4_u.lock_owner.lock_seqid,
9100 9103                              lsp, resop)) {
9101 9104                          case NFS4_CHKSEQ_REPLAY:
9102 9105                                  if (resp->status != NFS4_OK) {
9103 9106                                          /*
9104 9107                                           * Here is our replay and need
9105 9108                                           * to verify that the last
9106 9109                                           * response was an error.
9107 9110                                           */
9108 9111                                          *cs->statusp = resp->status;
9109 9112                                          goto end;
9110 9113                                  }
9111 9114                                  /*
9112 9115                                   * This is done since the sequence id
9113 9116                                   * looked like a replay but it didn't
9114 9117                                   * pass our check so a BAD_SEQID is
9115 9118                                   * returned as a result.
9116 9119                                   */
9117 9120                                  /*FALLTHROUGH*/
9118 9121                          case NFS4_CHKSEQ_BAD:
9119 9122                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9120 9123                                  goto end;
9121 9124                          case NFS4_CHKSEQ_OKAY:
9122 9125                                  /* Everything looks okay move ahead */
9123 9126                                  break;
9124 9127                          }
9125 9128                          break;
9126 9129                  case NFS4_CHECK_STATEID_OLD:
9127 9130                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9128 9131                          goto end;
9129 9132                  case NFS4_CHECK_STATEID_BAD:
9130 9133                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9131 9134                          goto end;
9132 9135                  case NFS4_CHECK_STATEID_EXPIRED:
9133 9136                          *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9134 9137                          goto end;
9135 9138                  case NFS4_CHECK_STATEID_CLOSED:
9136 9139                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9137 9140                          goto end;
9138 9141                  case NFS4_CHECK_STATEID_REPLAY:
9139 9142                          switch (rfs4_check_lock_seqid(
9140 9143                              args->locker.locker4_u.lock_owner.lock_seqid,
9141 9144                              lsp, resop)) {
9142 9145                          case NFS4_CHKSEQ_OKAY:
9143 9146                                  /*
9144 9147                                   * This is a replayed stateid; if
9145 9148                                   * seqid matches the next expected,
9146 9149                                   * then client is using wrong seqid.
9147 9150                                   */
9148 9151                          case NFS4_CHKSEQ_BAD:
9149 9152                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9150 9153                                  goto end;
9151 9154                          case NFS4_CHKSEQ_REPLAY:
9152 9155                                  rfs4_update_lease(lsp->rls_locker->rl_client);
9153 9156                                  *cs->statusp = status = resp->status;
9154 9157                                  goto end;
9155 9158                          }
9156 9159                          break;
9157 9160                  default:
9158 9161                          ASSERT(FALSE);
9159 9162                          break;
9160 9163                  }
9161 9164  
9162 9165                  rfs4_update_lock_sequence(lsp);
9163 9166                  rfs4_update_lease(lsp->rls_locker->rl_client);
9164 9167          }
9165 9168  
9166 9169          /*
9167 9170           * NFS4 only allows locking on regular files, so
9168 9171           * verify type of object.
9169 9172           */
9170 9173          if (cs->vp->v_type != VREG) {
9171 9174                  if (cs->vp->v_type == VDIR)
9172 9175                          status = NFS4ERR_ISDIR;
9173 9176                  else
9174 9177                          status = NFS4ERR_INVAL;
9175 9178                  goto out;
9176 9179          }
9177 9180  
9178 9181          cp = lsp->rls_state->rs_owner->ro_client;
9179 9182  
9180 9183          if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9181 9184                  status = NFS4ERR_GRACE;
9182 9185                  goto out;
9183 9186          }
9184 9187  
9185 9188          if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9186 9189                  status = NFS4ERR_NO_GRACE;
9187 9190                  goto out;
9188 9191          }
9189 9192  
9190 9193          if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9191 9194                  status = NFS4ERR_NO_GRACE;
9192 9195                  goto out;
9193 9196          }
9194 9197  
9195 9198          if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9196 9199                  cs->deleg = TRUE;
9197 9200  
9198 9201          status = rfs4_do_lock(lsp, args->locktype,
9199 9202              args->offset, args->length, cs->cr, resop);
9200 9203  
9201 9204  out:
9202 9205          lsp->rls_skip_seqid_check = FALSE;
9203 9206  
9204 9207          *cs->statusp = resp->status = status;
9205 9208  
9206 9209          if (status == NFS4_OK) {
9207 9210                  resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9208 9211                  lsp->rls_lock_completed = TRUE;
9209 9212          }
9210 9213          /*
9211 9214           * Only update the "OPEN" response here if this was a new
9212 9215           * lock_owner
9213 9216           */
9214 9217          if (sp)
9215 9218                  rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9216 9219  
9217 9220          rfs4_update_lock_resp(lsp, resop);
9218 9221  
9219 9222  end:
9220 9223          if (lsp) {
9221 9224                  if (ls_sw_held)
9222 9225                          rfs4_sw_exit(&lsp->rls_sw);
9223 9226                  /*
9224 9227                   * If an sp obtained, then the lsp does not represent
9225 9228                   * a lock on the file struct.
9226 9229                   */
9227 9230                  if (sp != NULL)
9228 9231                          rfs4_lo_state_rele(lsp, FALSE);
9229 9232                  else
9230 9233                          rfs4_lo_state_rele(lsp, TRUE);
9231 9234          }
9232 9235          if (sp) {
9233 9236                  rfs4_sw_exit(&sp->rs_owner->ro_sw);
9234 9237                  rfs4_state_rele(sp);
9235 9238          }
9236 9239  
9237 9240          DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9238 9241              LOCK4res *, resp);
9239 9242  }
9240 9243  
9241 9244  /* free function for LOCK/LOCKT */
9242 9245  static void
9243 9246  lock_denied_free(nfs_resop4 *resop)
9244 9247  {
9245 9248          LOCK4denied *dp = NULL;
9246 9249  
9247 9250          switch (resop->resop) {
9248 9251          case OP_LOCK:
9249 9252                  if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9250 9253                          dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9251 9254                  break;
9252 9255          case OP_LOCKT:
9253 9256                  if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9254 9257                          dp = &resop->nfs_resop4_u.oplockt.denied;
9255 9258                  break;
9256 9259          default:
9257 9260                  break;
9258 9261          }
9259 9262  
9260 9263          if (dp)
9261 9264                  kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9262 9265  }
9263 9266  
9264 9267  /*ARGSUSED*/
9265 9268  void
9266 9269  rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9267 9270      struct svc_req *req, struct compound_state *cs)
9268 9271  {
9269 9272          LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9270 9273          LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9271 9274          nfsstat4 status;
9272 9275          stateid4 *stateid = &args->lock_stateid;
9273 9276          rfs4_lo_state_t *lsp;
9274 9277  
9275 9278          DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9276 9279              LOCKU4args *, args);
9277 9280  
9278 9281          if (cs->vp == NULL) {
9279 9282                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9280 9283                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9281 9284                      LOCKU4res *, resp);
9282 9285                  return;
9283 9286          }
9284 9287  
9285 9288          if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9286 9289                  *cs->statusp = resp->status = status;
9287 9290                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9288 9291                      LOCKU4res *, resp);
9289 9292                  return;
9290 9293          }
9291 9294  
9292 9295          /* Ensure specified filehandle matches */
9293 9296          if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9294 9297                  rfs4_lo_state_rele(lsp, TRUE);
9295 9298                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9296 9299                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9297 9300                      LOCKU4res *, resp);
9298 9301                  return;
9299 9302          }
9300 9303  
9301 9304          /* hold off other access to lsp while we tinker */
9302 9305          rfs4_sw_enter(&lsp->rls_sw);
9303 9306  
9304 9307          switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9305 9308          case NFS4_CHECK_STATEID_OKAY:
9306 9309                  if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9307 9310                      != NFS4_CHKSEQ_OKAY) {
9308 9311                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9309 9312                          goto end;
9310 9313                  }
9311 9314                  break;
9312 9315          case NFS4_CHECK_STATEID_OLD:
9313 9316                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9314 9317                  goto end;
9315 9318          case NFS4_CHECK_STATEID_BAD:
9316 9319                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9317 9320                  goto end;
9318 9321          case NFS4_CHECK_STATEID_EXPIRED:
9319 9322                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9320 9323                  goto end;
9321 9324          case NFS4_CHECK_STATEID_CLOSED:
9322 9325                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9323 9326                  goto end;
9324 9327          case NFS4_CHECK_STATEID_REPLAY:
9325 9328                  switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9326 9329                  case NFS4_CHKSEQ_OKAY:
9327 9330                                  /*
9328 9331                                   * This is a replayed stateid; if
9329 9332                                   * seqid matches the next expected,
9330 9333                                   * then client is using wrong seqid.
9331 9334                                   */
9332 9335                  case NFS4_CHKSEQ_BAD:
9333 9336                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9334 9337                          goto end;
9335 9338                  case NFS4_CHKSEQ_REPLAY:
9336 9339                          rfs4_update_lease(lsp->rls_locker->rl_client);
9337 9340                          *cs->statusp = status = resp->status;
9338 9341                          goto end;
9339 9342                  }
9340 9343                  break;
9341 9344          default:
9342 9345                  ASSERT(FALSE);
9343 9346                  break;
9344 9347          }
9345 9348  
9346 9349          rfs4_update_lock_sequence(lsp);
9347 9350          rfs4_update_lease(lsp->rls_locker->rl_client);
9348 9351  
9349 9352          /*
9350 9353           * NFS4 only allows locking on regular files, so
9351 9354           * verify type of object.
9352 9355           */
9353 9356          if (cs->vp->v_type != VREG) {
9354 9357                  if (cs->vp->v_type == VDIR)
9355 9358                          status = NFS4ERR_ISDIR;
9356 9359                  else
9357 9360                          status = NFS4ERR_INVAL;
9358 9361                  goto out;
9359 9362          }
9360 9363  
9361 9364          if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9362 9365                  status = NFS4ERR_GRACE;
9363 9366                  goto out;
9364 9367          }
9365 9368  
9366 9369          status = rfs4_do_lock(lsp, args->locktype,
9367 9370              args->offset, args->length, cs->cr, resop);
9368 9371  
9369 9372  out:
9370 9373          *cs->statusp = resp->status = status;
9371 9374  
9372 9375          if (status == NFS4_OK)
9373 9376                  resp->lock_stateid = lsp->rls_lockid.stateid;
9374 9377  
9375 9378          rfs4_update_lock_resp(lsp, resop);
9376 9379  
9377 9380  end:
9378 9381          rfs4_sw_exit(&lsp->rls_sw);
9379 9382          rfs4_lo_state_rele(lsp, TRUE);
9380 9383  
9381 9384          DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9382 9385              LOCKU4res *, resp);
9383 9386  }
9384 9387  
9385 9388  /*
9386 9389   * LOCKT is a best effort routine, the client can not be guaranteed that
9387 9390   * the status return is still in effect by the time the reply is received.
9388 9391   * They are numerous race conditions in this routine, but we are not required
9389 9392   * and can not be accurate.
9390 9393   */
9391 9394  /*ARGSUSED*/
9392 9395  void
9393 9396  rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9394 9397      struct svc_req *req, struct compound_state *cs)
9395 9398  {
9396 9399          LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9397 9400          LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9398 9401          rfs4_lockowner_t *lo;
9399 9402          rfs4_client_t *cp;
9400 9403          bool_t create = FALSE;
9401 9404          struct flock64 flk;
9402 9405          int error;
9403 9406          int flag = FREAD | FWRITE;
9404 9407          int ltype;
9405 9408          length4 posix_length;
9406 9409          sysid_t sysid;
9407 9410          pid_t pid;
9408 9411  
9409 9412          DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9410 9413              LOCKT4args *, args);
9411 9414  
9412 9415          if (cs->vp == NULL) {
9413 9416                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9414 9417                  goto out;
9415 9418          }
9416 9419  
9417 9420          /*
9418 9421           * NFS4 only allows locking on regular files, so
9419 9422           * verify type of object.
9420 9423           */
9421 9424          if (cs->vp->v_type != VREG) {
9422 9425                  if (cs->vp->v_type == VDIR)
9423 9426                          *cs->statusp = resp->status = NFS4ERR_ISDIR;
9424 9427                  else
9425 9428                          *cs->statusp = resp->status =  NFS4ERR_INVAL;
9426 9429                  goto out;
9427 9430          }
9428 9431  
9429 9432          /*
9430 9433           * Check out the clientid to ensure the server knows about it
9431 9434           * so that we correctly inform the client of a server reboot.
9432 9435           */
9433 9436          if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9434 9437              == NULL) {
9435 9438                  *cs->statusp = resp->status =
9436 9439                      rfs4_check_clientid(&args->owner.clientid, 0);
9437 9440                  goto out;
9438 9441          }
9439 9442          if (rfs4_lease_expired(cp)) {
9440 9443                  rfs4_client_close(cp);
9441 9444                  /*
9442 9445                   * Protocol doesn't allow returning NFS4ERR_STALE as
9443 9446                   * other operations do on this check so STALE_CLIENTID
9444 9447                   * is returned instead
9445 9448                   */
9446 9449                  *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9447 9450                  goto out;
9448 9451          }
9449 9452  
9450 9453          if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9451 9454                  *cs->statusp = resp->status = NFS4ERR_GRACE;
9452 9455                  rfs4_client_rele(cp);
9453 9456                  goto out;
9454 9457          }
9455 9458          rfs4_client_rele(cp);
9456 9459  
9457 9460          resp->status = NFS4_OK;
9458 9461  
9459 9462          switch (args->locktype) {
9460 9463          case READ_LT:
9461 9464          case READW_LT:
9462 9465                  ltype = F_RDLCK;
9463 9466                  break;
9464 9467          case WRITE_LT:
9465 9468          case WRITEW_LT:
9466 9469                  ltype = F_WRLCK;
9467 9470                  break;
9468 9471          }
9469 9472  
9470 9473          posix_length = args->length;
9471 9474          /* Check for zero length. To lock to end of file use all ones for V4 */
9472 9475          if (posix_length == 0) {
9473 9476                  *cs->statusp = resp->status = NFS4ERR_INVAL;
9474 9477                  goto out;
9475 9478          } else if (posix_length == (length4)(~0)) {
9476 9479                  posix_length = 0;       /* Posix to end of file  */
9477 9480          }
9478 9481  
9479 9482          /* Find or create a lockowner */
9480 9483          lo = rfs4_findlockowner(&args->owner, &create);
9481 9484  
9482 9485          if (lo) {
9483 9486                  pid = lo->rl_pid;
9484 9487                  if ((resp->status =
9485 9488                      rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9486 9489                          goto err;
9487 9490          } else {
9488 9491                  pid = 0;
9489 9492                  sysid = lockt_sysid;
9490 9493          }
9491 9494  retry:
9492 9495          flk.l_type = ltype;
9493 9496          flk.l_whence = 0;               /* SEEK_SET */
9494 9497          flk.l_start = args->offset;
9495 9498          flk.l_len = posix_length;
9496 9499          flk.l_sysid = sysid;
9497 9500          flk.l_pid = pid;
9498 9501          flag |= F_REMOTELOCK;
9499 9502  
9500 9503          LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9501 9504  
9502 9505          /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9503 9506          if (flk.l_len < 0 || flk.l_start < 0) {
9504 9507                  resp->status = NFS4ERR_INVAL;
9505 9508                  goto err;
9506 9509          }
9507 9510          error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9508 9511              NULL, cs->cr, NULL);
9509 9512  
9510 9513          /*
9511 9514           * N.B. We map error values to nfsv4 errors. This is differrent
9512 9515           * than puterrno4 routine.
9513 9516           */
9514 9517          switch (error) {
9515 9518          case 0:
9516 9519                  if (flk.l_type == F_UNLCK)
9517 9520                          resp->status = NFS4_OK;
9518 9521                  else {
9519 9522                          if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9520 9523                                  goto retry;
9521 9524                          resp->status = NFS4ERR_DENIED;
9522 9525                  }
9523 9526                  break;
9524 9527          case EOVERFLOW:
9525 9528                  resp->status = NFS4ERR_INVAL;
9526 9529                  break;
9527 9530          case EINVAL:
9528 9531                  resp->status = NFS4ERR_NOTSUPP;
9529 9532                  break;
9530 9533          default:
9531 9534                  cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9532 9535                      error);
9533 9536                  resp->status = NFS4ERR_SERVERFAULT;
9534 9537                  break;
9535 9538          }
9536 9539  
9537 9540  err:
9538 9541          if (lo)
9539 9542                  rfs4_lockowner_rele(lo);
9540 9543          *cs->statusp = resp->status;
9541 9544  out:
9542 9545          DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9543 9546              LOCKT4res *, resp);
9544 9547  }
9545 9548  
9546 9549  int
9547 9550  rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9548 9551  {
9549 9552          int err;
9550 9553          int cmd;
9551 9554          vnode_t *vp;
9552 9555          struct shrlock shr;
9553 9556          struct shr_locowner shr_loco;
9554 9557          int fflags = 0;
9555 9558  
9556 9559          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9557 9560          ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9558 9561  
9559 9562          if (sp->rs_closed)
9560 9563                  return (NFS4ERR_OLD_STATEID);
9561 9564  
9562 9565          vp = sp->rs_finfo->rf_vp;
9563 9566          ASSERT(vp);
9564 9567  
9565 9568          shr.s_access = shr.s_deny = 0;
9566 9569  
9567 9570          if (access & OPEN4_SHARE_ACCESS_READ) {
9568 9571                  fflags |= FREAD;
9569 9572                  shr.s_access |= F_RDACC;
9570 9573          }
9571 9574          if (access & OPEN4_SHARE_ACCESS_WRITE) {
9572 9575                  fflags |= FWRITE;
9573 9576                  shr.s_access |= F_WRACC;
9574 9577          }
9575 9578          ASSERT(shr.s_access);
9576 9579  
9577 9580          if (deny & OPEN4_SHARE_DENY_READ)
9578 9581                  shr.s_deny |= F_RDDNY;
9579 9582          if (deny & OPEN4_SHARE_DENY_WRITE)
9580 9583                  shr.s_deny |= F_WRDNY;
9581 9584  
9582 9585          shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9583 9586          shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9584 9587          shr_loco.sl_pid = shr.s_pid;
9585 9588          shr_loco.sl_id = shr.s_sysid;
9586 9589          shr.s_owner = (caddr_t)&shr_loco;
9587 9590          shr.s_own_len = sizeof (shr_loco);
9588 9591  
9589 9592          cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9590 9593  
9591 9594          err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9592 9595          if (err != 0) {
9593 9596                  if (err == EAGAIN)
9594 9597                          err = NFS4ERR_SHARE_DENIED;
9595 9598                  else
9596 9599                          err = puterrno4(err);
9597 9600                  return (err);
9598 9601          }
9599 9602  
9600 9603          sp->rs_share_access |= access;
9601 9604          sp->rs_share_deny |= deny;
9602 9605  
9603 9606          return (0);
9604 9607  }
9605 9608  
9606 9609  int
9607 9610  rfs4_unshare(rfs4_state_t *sp)
9608 9611  {
9609 9612          int err;
9610 9613          struct shrlock shr;
9611 9614          struct shr_locowner shr_loco;
9612 9615  
9613 9616          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9614 9617  
9615 9618          if (sp->rs_closed || sp->rs_share_access == 0)
9616 9619                  return (0);
9617 9620  
9618 9621          ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9619 9622          ASSERT(sp->rs_finfo->rf_vp);
9620 9623  
9621 9624          shr.s_access = shr.s_deny = 0;
9622 9625          shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9623 9626          shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9624 9627          shr_loco.sl_pid = shr.s_pid;
9625 9628          shr_loco.sl_id = shr.s_sysid;
9626 9629          shr.s_owner = (caddr_t)&shr_loco;
9627 9630          shr.s_own_len = sizeof (shr_loco);
9628 9631  
9629 9632          err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9630 9633              NULL);
9631 9634          if (err != 0) {
9632 9635                  err = puterrno4(err);
9633 9636                  return (err);
9634 9637          }
9635 9638  
9636 9639          sp->rs_share_access = 0;
9637 9640          sp->rs_share_deny = 0;
9638 9641  
9639 9642          return (0);
9640 9643  
9641 9644  }
9642 9645  
9643 9646  static int
9644 9647  rdma_setup_read_data4(READ4args *args, READ4res *rok)
9645 9648  {
9646 9649          struct clist    *wcl;
9647 9650          count4          count = rok->data_len;
9648 9651          int             wlist_len;
9649 9652  
9650 9653          wcl = args->wlist;
9651 9654          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9652 9655                  return (FALSE);
9653 9656          }
9654 9657          wcl = args->wlist;
9655 9658          rok->wlist_len = wlist_len;
9656 9659          rok->wlist = wcl;
9657 9660          return (TRUE);
9658 9661  }
9659 9662  
9660 9663  /* tunable to disable server referrals */
9661 9664  int rfs4_no_referrals = 0;
9662 9665  
9663 9666  /*
9664 9667   * Find an NFS record in reparse point data.
9665 9668   * Returns 0 for success and <0 or an errno value on failure.
9666 9669   */
9667 9670  int
9668 9671  vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9669 9672  {
9670 9673          int err;
9671 9674          char *stype, *val;
9672 9675          nvlist_t *nvl;
9673 9676          nvpair_t *curr;
9674 9677  
9675 9678          if ((nvl = reparse_init()) == NULL)
9676 9679                  return (-1);
9677 9680  
9678 9681          if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9679 9682                  reparse_free(nvl);
9680 9683                  return (err);
9681 9684          }
9682 9685  
9683 9686          curr = NULL;
9684 9687          while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9685 9688                  if ((stype = nvpair_name(curr)) == NULL) {
9686 9689                          reparse_free(nvl);
9687 9690                          return (-2);
9688 9691                  }
9689 9692                  if (strncasecmp(stype, "NFS", 3) == 0)
9690 9693                          break;
9691 9694          }
9692 9695  
9693 9696          if ((curr == NULL) ||
9694 9697              (nvpair_value_string(curr, &val))) {
9695 9698                  reparse_free(nvl);
9696 9699                  return (-3);
9697 9700          }
9698 9701          *nvlp = nvl;
9699 9702          *svcp = stype;
9700 9703          *datap = val;
9701 9704          return (0);
9702 9705  }
9703 9706  
9704 9707  int
9705 9708  vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9706 9709  {
9707 9710          nvlist_t *nvl;
9708 9711          char *s, *d;
9709 9712  
9710 9713          if (rfs4_no_referrals != 0)
9711 9714                  return (B_FALSE);
9712 9715  
9713 9716          if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9714 9717                  return (B_FALSE);
9715 9718  
9716 9719          if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9717 9720                  return (B_FALSE);
9718 9721  
9719 9722          reparse_free(nvl);
9720 9723  
9721 9724          return (B_TRUE);
9722 9725  }
9723 9726  
9724 9727  /*
9725 9728   * There is a user-level copy of this routine in ref_subr.c.
9726 9729   * Changes should be kept in sync.
9727 9730   */
9728 9731  static int
9729 9732  nfs4_create_components(char *path, component4 *comp4)
9730 9733  {
9731 9734          int slen, plen, ncomp;
9732 9735          char *ori_path, *nxtc, buf[MAXNAMELEN];
9733 9736  
9734 9737          if (path == NULL)
9735 9738                  return (0);
9736 9739  
9737 9740          plen = strlen(path) + 1;        /* include the terminator */
9738 9741          ori_path = path;
9739 9742          ncomp = 0;
9740 9743  
9741 9744          /* count number of components in the path */
9742 9745          for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9743 9746                  if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9744 9747                          if ((slen = nxtc - path) == 0) {
9745 9748                                  path = nxtc + 1;
9746 9749                                  continue;
9747 9750                          }
9748 9751  
9749 9752                          if (comp4 != NULL) {
9750 9753                                  bcopy(path, buf, slen);
9751 9754                                  buf[slen] = '\0';
9752 9755                                  (void) str_to_utf8(buf, &comp4[ncomp]);
9753 9756                          }
9754 9757  
9755 9758                          ncomp++;        /* 1 valid component */
9756 9759                          path = nxtc + 1;
9757 9760                  }
9758 9761                  if (*nxtc == '\0' || *nxtc == '\n')
9759 9762                          break;
9760 9763          }
9761 9764  
9762 9765          return (ncomp);
9763 9766  }
9764 9767  
9765 9768  /*
9766 9769   * There is a user-level copy of this routine in ref_subr.c.
9767 9770   * Changes should be kept in sync.
9768 9771   */
9769 9772  static int
9770 9773  make_pathname4(char *path, pathname4 *pathname)
9771 9774  {
9772 9775          int ncomp;
9773 9776          component4 *comp4;
9774 9777  
9775 9778          if (pathname == NULL)
9776 9779                  return (0);
9777 9780  
9778 9781          if (path == NULL) {
9779 9782                  pathname->pathname4_val = NULL;
9780 9783                  pathname->pathname4_len = 0;
9781 9784                  return (0);
9782 9785          }
9783 9786  
9784 9787          /* count number of components to alloc buffer */
9785 9788          if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9786 9789                  pathname->pathname4_val = NULL;
9787 9790                  pathname->pathname4_len = 0;
9788 9791                  return (0);
9789 9792          }
9790 9793          comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9791 9794  
9792 9795          /* copy components into allocated buffer */
9793 9796          ncomp = nfs4_create_components(path, comp4);
9794 9797  
9795 9798          pathname->pathname4_val = comp4;
9796 9799          pathname->pathname4_len = ncomp;
9797 9800  
9798 9801          return (ncomp);
9799 9802  }
9800 9803  
9801 9804  #define xdr_fs_locations4 xdr_fattr4_fs_locations
9802 9805  
9803 9806  fs_locations4 *
9804 9807  fetch_referral(vnode_t *vp, cred_t *cr)
9805 9808  {
9806 9809          nvlist_t *nvl;
9807 9810          char *stype, *sdata;
9808 9811          fs_locations4 *result;
9809 9812          char buf[1024];
9810 9813          size_t bufsize;
9811 9814          XDR xdr;
9812 9815          int err;
9813 9816  
9814 9817          /*
9815 9818           * Check attrs to ensure it's a reparse point
9816 9819           */
9817 9820          if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9818 9821                  return (NULL);
9819 9822  
9820 9823          /*
9821 9824           * Look for an NFS record and get the type and data
9822 9825           */
9823 9826          if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9824 9827                  return (NULL);
9825 9828  
9826 9829          /*
9827 9830           * With the type and data, upcall to get the referral
9828 9831           */
9829 9832          bufsize = sizeof (buf);
9830 9833          bzero(buf, sizeof (buf));
9831 9834          err = reparse_kderef((const char *)stype, (const char *)sdata,
9832 9835              buf, &bufsize);
9833 9836          reparse_free(nvl);
9834 9837  
9835 9838          DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9836 9839              char *, stype, char *, sdata, char *, buf, int, err);
9837 9840          if (err) {
9838 9841                  cmn_err(CE_NOTE,
9839 9842                      "reparsed daemon not running: unable to get referral (%d)",
9840 9843                      err);
9841 9844                  return (NULL);
9842 9845          }
9843 9846  
9844 9847          /*
9845 9848           * We get an XDR'ed record back from the kderef call
9846 9849           */
9847 9850          xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9848 9851          result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9849 9852          err = xdr_fs_locations4(&xdr, result);
9850 9853          XDR_DESTROY(&xdr);
9851 9854          if (err != TRUE) {
9852 9855                  DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9853 9856                      int, err);
9854 9857                  return (NULL);
9855 9858          }
9856 9859  
9857 9860          /*
9858 9861           * Look at path to recover fs_root, ignoring the leading '/'
9859 9862           */
9860 9863          (void) make_pathname4(vp->v_path, &result->fs_root);
9861 9864  
9862 9865          return (result);
9863 9866  }
9864 9867  
9865 9868  char *
9866 9869  build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9867 9870  {
9868 9871          fs_locations4 *fsl;
9869 9872          fs_location4 *fs;
9870 9873          char *server, *path, *symbuf;
9871 9874          static char *prefix = "/net/";
9872 9875          int i, size, npaths;
9873 9876          uint_t len;
9874 9877  
9875 9878          /* Get the referral */
9876 9879          if ((fsl = fetch_referral(vp, cr)) == NULL)
9877 9880                  return (NULL);
9878 9881  
9879 9882          /* Deal with only the first location and first server */
9880 9883          fs = &fsl->locations_val[0];
9881 9884          server = utf8_to_str(&fs->server_val[0], &len, NULL);
9882 9885          if (server == NULL) {
9883 9886                  rfs4_free_fs_locations4(fsl);
9884 9887                  kmem_free(fsl, sizeof (fs_locations4));
9885 9888                  return (NULL);
9886 9889          }
9887 9890  
9888 9891          /* Figure out size for "/net/" + host + /path/path/path + NULL */
9889 9892          size = strlen(prefix) + len;
9890 9893          for (i = 0; i < fs->rootpath.pathname4_len; i++)
9891 9894                  size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9892 9895  
9893 9896          /* Allocate the symlink buffer and fill it */
9894 9897          symbuf = kmem_zalloc(size, KM_SLEEP);
9895 9898          (void) strcat(symbuf, prefix);
9896 9899          (void) strcat(symbuf, server);
9897 9900          kmem_free(server, len);
9898 9901  
9899 9902          npaths = 0;
9900 9903          for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9901 9904                  path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9902 9905                  if (path == NULL)
9903 9906                          continue;
9904 9907                  (void) strcat(symbuf, "/");
9905 9908                  (void) strcat(symbuf, path);
9906 9909                  npaths++;
9907 9910                  kmem_free(path, len);
9908 9911          }
9909 9912  
9910 9913          rfs4_free_fs_locations4(fsl);
9911 9914          kmem_free(fsl, sizeof (fs_locations4));
9912 9915  
9913 9916          if (strsz != NULL)
9914 9917                  *strsz = size;
9915 9918          return (symbuf);
9916 9919  }
9917 9920  
9918 9921  /*
9919 9922   * Check to see if we have a downrev Solaris client, so that we
9920 9923   * can send it a symlink instead of a referral.
9921 9924   */
9922 9925  int
9923 9926  client_is_downrev(struct svc_req *req)
9924 9927  {
9925 9928          struct sockaddr *ca;
9926 9929          rfs4_clntip_t *ci;
9927 9930          bool_t create = FALSE;
9928 9931          int is_downrev;
9929 9932  
9930 9933          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9931 9934          ASSERT(ca);
9932 9935          ci = rfs4_find_clntip(ca, &create);
9933 9936          if (ci == NULL)
9934 9937                  return (0);
9935 9938          is_downrev = ci->ri_no_referrals;
9936 9939          rfs4_dbe_rele(ci->ri_dbe);
9937 9940          return (is_downrev);
9938 9941  }
9939 9942  
9940 9943  /*
9941 9944   * Do the main work of handling HA-NFSv4 Resource Group failover on
9942 9945   * Sun Cluster.
9943 9946   * We need to detect whether any RG admin paths have been added or removed,
9944 9947   * and adjust resources accordingly.
9945 9948   * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9946 9949   * order to scale, the list and array of paths need to be held in more
9947 9950   * suitable data structures.
9948 9951   */
9949 9952  static void
9950 9953  hanfsv4_failover(nfs4_srv_t *nsrv4)
9951 9954  {
9952 9955          int i, start_grace, numadded_paths = 0;
9953 9956          char **added_paths = NULL;
9954 9957          rfs4_dss_path_t *dss_path;
9955 9958  
9956 9959          /*
9957 9960           * Note: currently, dss_pathlist cannot be NULL, since
9958 9961           * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9959 9962           * make the latter dynamically specified too, the following will
9960 9963           * need to be adjusted.
9961 9964           */
9962 9965  
9963 9966          /*
9964 9967           * First, look for removed paths: RGs that have been failed-over
9965 9968           * away from this node.
9966 9969           * Walk the "currently-serving" dss_pathlist and, for each
9967 9970           * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9968 9971           * from nfsd. If not, that RG path has been removed.
9969 9972           *
9970 9973           * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9971 9974           * any duplicates.
9972 9975           */
9973 9976          dss_path = nsrv4->dss_pathlist;
9974 9977          do {
9975 9978                  int found = 0;
9976 9979                  char *path = dss_path->path;
9977 9980  
9978 9981                  /* used only for non-HA so may not be removed */
9979 9982                  if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9980 9983                          dss_path = dss_path->next;
9981 9984                          continue;
9982 9985                  }
9983 9986  
9984 9987                  for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9985 9988                          int cmpret;
9986 9989                          char *newpath = rfs4_dss_newpaths[i];
9987 9990  
9988 9991                          /*
9989 9992                           * Since nfsd has sorted rfs4_dss_newpaths for us,
9990 9993                           * once the return from strcmp is negative we know
9991 9994                           * we've passed the point where "path" should be,
9992 9995                           * and can stop searching: "path" has been removed.
9993 9996                           */
9994 9997                          cmpret = strcmp(path, newpath);
9995 9998                          if (cmpret < 0)
9996 9999                                  break;
9997 10000                          if (cmpret == 0) {
9998 10001                                  found = 1;
9999 10002                                  break;
10000 10003                          }
10001 10004                  }
10002 10005  
10003 10006                  if (found == 0) {
10004 10007                          unsigned index = dss_path->index;
10005 10008                          rfs4_servinst_t *sip = dss_path->sip;
10006 10009                          rfs4_dss_path_t *path_next = dss_path->next;
10007 10010  
10008 10011                          /*
10009 10012                           * This path has been removed.
10010 10013                           * We must clear out the servinst reference to
10011 10014                           * it, since it's now owned by another
10012 10015                           * node: we should not attempt to touch it.
10013 10016                           */
10014 10017                          ASSERT(dss_path == sip->dss_paths[index]);
10015 10018                          sip->dss_paths[index] = NULL;
10016 10019  
10017 10020                          /* remove from "currently-serving" list, and destroy */
10018 10021                          remque(dss_path);
10019 10022                          /* allow for NUL */
10020 10023                          kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10021 10024                          kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10022 10025  
10023 10026                          dss_path = path_next;
10024 10027                  } else {
10025 10028                          /* path was found; not removed */
10026 10029                          dss_path = dss_path->next;
10027 10030                  }
10028 10031          } while (dss_path != nsrv4->dss_pathlist);
10029 10032  
10030 10033          /*
10031 10034           * Now, look for added paths: RGs that have been failed-over
10032 10035           * to this node.
10033 10036           * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10034 10037           * for each path, check if it is on the "currently-serving"
10035 10038           * dss_pathlist. If not, that RG path has been added.
10036 10039           *
10037 10040           * Note: we don't do duplicate detection here; nfsd does that for us.
10038 10041           *
10039 10042           * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10040 10043           * an upper bound for the size needed for added_paths[numadded_paths].
10041 10044           */
10042 10045  
10043 10046          /* probably more space than we need, but guaranteed to be enough */
10044 10047          if (rfs4_dss_numnewpaths > 0) {
10045 10048                  size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10046 10049                  added_paths = kmem_zalloc(sz, KM_SLEEP);
10047 10050          }
10048 10051  
10049 10052          /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10050 10053          for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10051 10054                  int found = 0;
10052 10055                  char *newpath = rfs4_dss_newpaths[i];
10053 10056  
10054 10057                  dss_path = nsrv4->dss_pathlist;
10055 10058                  do {
10056 10059                          char *path = dss_path->path;
10057 10060  
10058 10061                          /* used only for non-HA */
10059 10062                          if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10060 10063                                  dss_path = dss_path->next;
10061 10064                                  continue;
10062 10065                          }
10063 10066  
10064 10067                          if (strncmp(path, newpath, strlen(path)) == 0) {
10065 10068                                  found = 1;
10066 10069                                  break;
10067 10070                          }
10068 10071  
10069 10072                          dss_path = dss_path->next;
10070 10073                  } while (dss_path != nsrv4->dss_pathlist);
10071 10074  
10072 10075                  if (found == 0) {
10073 10076                          added_paths[numadded_paths] = newpath;
10074 10077                          numadded_paths++;
10075 10078                  }
10076 10079          }
10077 10080  
10078 10081          /* did we find any added paths? */
10079 10082          if (numadded_paths > 0) {
10080 10083  
10081 10084                  /* create a new server instance, and start its grace period */
10082 10085                  start_grace = 1;
10083 10086                  /* CSTYLED */
10084 10087                  rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10085 10088  
10086 10089                  /* read in the stable storage state from these paths */
10087 10090                  rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10088 10091  
10089 10092                  /*
10090 10093                   * Multiple failovers during a grace period will cause
10091 10094                   * clients of the same resource group to be partitioned
10092 10095                   * into different server instances, with different
10093 10096                   * grace periods.  Since clients of the same resource
10094 10097                   * group must be subject to the same grace period,
10095 10098                   * we need to reset all currently active grace periods.
10096 10099                   */
10097 10100                  rfs4_grace_reset_all(nsrv4);
10098 10101          }
10099 10102  
10100 10103          if (rfs4_dss_numnewpaths > 0)
10101 10104                  kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10102 10105  }
  
    | 
      ↓ open down ↓ | 
    1856 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX