Print this page
    
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23      - * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  24   23   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25      - * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  26   24   */
  27   25  
  28   26  /*
  29   27   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30   28   *      All Rights Reserved
  31   29   */
  32   30  
       31 +/*
       32 + * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
       33 + * Copyright 2019 Nexenta Systems, Inc.
       34 + * Copyright 2019 Nexenta by DDN, Inc.
       35 + */
       36 +
  33   37  #include <sys/param.h>
  34   38  #include <sys/types.h>
  35   39  #include <sys/systm.h>
  36   40  #include <sys/cred.h>
  37   41  #include <sys/buf.h>
  38   42  #include <sys/vfs.h>
  39   43  #include <sys/vfs_opreg.h>
  40   44  #include <sys/vnode.h>
  41   45  #include <sys/uio.h>
  42   46  #include <sys/errno.h>
  43   47  #include <sys/sysmacros.h>
  44   48  #include <sys/statvfs.h>
  45   49  #include <sys/kmem.h>
  46   50  #include <sys/dirent.h>
  47   51  #include <sys/cmn_err.h>
  48   52  #include <sys/debug.h>
  49   53  #include <sys/systeminfo.h>
  50   54  #include <sys/flock.h>
  51   55  #include <sys/pathname.h>
  52   56  #include <sys/nbmlock.h>
  53   57  #include <sys/share.h>
  54   58  #include <sys/atomic.h>
  55   59  #include <sys/policy.h>
  56   60  #include <sys/fem.h>
  57   61  #include <sys/sdt.h>
  58   62  #include <sys/ddi.h>
  
    | 
      ↓ open down ↓ | 
    16 lines elided | 
    
      ↑ open up ↑ | 
  
  59   63  #include <sys/zone.h>
  60   64  
  61   65  #include <fs/fs_reparse.h>
  62   66  
  63   67  #include <rpc/types.h>
  64   68  #include <rpc/auth.h>
  65   69  #include <rpc/rpcsec_gss.h>
  66   70  #include <rpc/svc.h>
  67   71  
  68   72  #include <nfs/nfs.h>
       73 +#include <nfs/nfssys.h>
  69   74  #include <nfs/export.h>
  70   75  #include <nfs/nfs_cmd.h>
  71   76  #include <nfs/lm.h>
  72   77  #include <nfs/nfs4.h>
       78 +#include <nfs/nfs4_drc.h>
  73   79  
  74   80  #include <sys/strsubr.h>
  75   81  #include <sys/strsun.h>
  76   82  
  77   83  #include <inet/common.h>
  78   84  #include <inet/ip.h>
  79   85  #include <inet/ip6.h>
  80   86  
  81   87  #include <sys/tsol/label.h>
  82   88  #include <sys/tsol/tndb.h>
  83   89  
  84   90  #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  85   91  static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  86   92  #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  87   93  static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  88   94  extern struct svc_ops rdma_svc_ops;
  89   95  extern int nfs_loaned_buffers;
  90   96  /* End of Tunables */
  91   97  
  92   98  static int rdma_setup_read_data4(READ4args *, READ4res *);
  93   99  
  94  100  /*
  95  101   * Used to bump the stateid4.seqid value and show changes in the stateid
  96  102   */
  97  103  #define next_stateid(sp) (++(sp)->bits.chgseq)
  98  104  
  99  105  /*
 100  106   * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 101  107   *      This is used to return NFS4ERR_TOOSMALL when clients specify
 102  108   *      maxcount that isn't large enough to hold the smallest possible
 103  109   *      XDR encoded dirent.
 104  110   *
 105  111   *          sizeof cookie (8 bytes) +
 106  112   *          sizeof name_len (4 bytes) +
  
    | 
      ↓ open down ↓ | 
    24 lines elided | 
    
      ↑ open up ↑ | 
  
 107  113   *          sizeof smallest (padded) name (4 bytes) +
 108  114   *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 109  115   *          sizeof attrlist4_len (4 bytes) +
 110  116   *          sizeof next boolean (4 bytes)
 111  117   *
 112  118   * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 113  119   * the smallest possible entry4 (assumes no attrs requested).
 114  120   *      sizeof nfsstat4 (4 bytes) +
 115  121   *      sizeof verifier4 (8 bytes) +
 116  122   *      sizeof entry4list bool (4 bytes) +
 117      - *      sizeof entry4   (36 bytes) +
 118      - *      sizeof eof bool  (4 bytes)
      123 + *      sizeof entry4 (36 bytes) +
      124 + *      sizeof eof bool (4 bytes)
 119  125   *
 120  126   * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 121  127   *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 122  128   *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 123  129   *      required for a given name length.  MAXNAMELEN is the maximum
 124  130   *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 125  131   *      macros are to allow for . and .. entries -- just a minor tweak to try
 126  132   *      and guarantee that buffer we give to VOP_READDIR will be large enough
 127  133   *      to hold ., .., and the largest possible solaris dirent64.
 128  134   */
 129  135  #define RFS4_MINLEN_ENTRY4 36
 130  136  #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 131  137  #define RFS4_MINLEN_RDDIR_BUF \
 132  138          (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 133  139  
 134  140  /*
 135  141   * It would be better to pad to 4 bytes since that's what XDR would do,
 136  142   * but the dirents UFS gives us are already padded to 8, so just take
 137  143   * what we're given.  Dircount is only a hint anyway.  Currently the
 138  144   * solaris kernel is ASCII only, so there's no point in calling the
 139  145   * UTF8 functions.
  
    | 
      ↓ open down ↓ | 
    11 lines elided | 
    
      ↑ open up ↑ | 
  
 140  146   *
 141  147   * dirent64: named padded to provide 8 byte struct alignment
 142  148   *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 143  149   *
 144  150   * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 145  151   *
 146  152   */
 147  153  #define DIRENT64_TO_DIRCOUNT(dp) \
 148  154          (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 149  155  
 150      -time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 151  156  
 152      -static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
      157 +static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 153  158  
 154  159  u_longlong_t    nfs4_srv_caller_id;
 155  160  uint_t          nfs4_srv_vkey = 0;
 156  161  
 157      -verifier4       Write4verf;
 158      -verifier4       Readdir4verf;
 159      -
 160  162  void    rfs4_init_compound_state(struct compound_state *);
 161  163  
 162  164  static void     nullfree(caddr_t);
 163  165  static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 164      -                        struct compound_state *);
      166 +                    struct compound_state *);
 165  167  static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166      -                        struct compound_state *);
      168 +                    struct compound_state *);
 167  169  static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168      -                        struct compound_state *);
      170 +                    struct compound_state *);
 169  171  static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170      -                        struct compound_state *);
      172 +                    struct compound_state *);
 171  173  static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172      -                        struct compound_state *);
      174 +                    struct compound_state *);
 173  175  static void     rfs4_op_create_free(nfs_resop4 *resop);
 174  176  static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 175      -                        struct svc_req *, struct compound_state *);
      177 +                    struct svc_req *, struct compound_state *);
 176  178  static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 177      -                        struct svc_req *, struct compound_state *);
      179 +                    struct svc_req *, struct compound_state *);
 178  180  static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 179      -                        struct compound_state *);
      181 +                    struct compound_state *);
 180  182  static void     rfs4_op_getattr_free(nfs_resop4 *);
 181  183  static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182      -                        struct compound_state *);
      184 +                    struct compound_state *);
 183  185  static void     rfs4_op_getfh_free(nfs_resop4 *);
 184  186  static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185      -                        struct compound_state *);
      187 +                    struct compound_state *);
 186  188  static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187      -                        struct compound_state *);
      189 +                    struct compound_state *);
 188  190  static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189      -                        struct compound_state *);
      191 +                    struct compound_state *);
 190  192  static void     lock_denied_free(nfs_resop4 *);
 191  193  static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192      -                        struct compound_state *);
      194 +                    struct compound_state *);
 193  195  static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194      -                        struct compound_state *);
      196 +                    struct compound_state *);
 195  197  static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196      -                        struct compound_state *);
      198 +                    struct compound_state *);
 197  199  static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198      -                        struct compound_state *);
      200 +                    struct compound_state *);
 199  201  static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 200      -                                struct svc_req *req, struct compound_state *cs);
      202 +                    struct svc_req *req, struct compound_state *cs);
 201  203  static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 202      -                        struct compound_state *);
      204 +                    struct compound_state *);
 203  205  static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204      -                        struct compound_state *);
      206 +                    struct compound_state *);
 205  207  static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 206      -                        struct svc_req *, struct compound_state *);
      208 +                    struct svc_req *, struct compound_state *);
 207  209  static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 208      -                        struct svc_req *, struct compound_state *);
      210 +                    struct svc_req *, struct compound_state *);
 209  211  static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 210      -                        struct compound_state *);
      212 +                    struct compound_state *);
 211  213  static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212      -                        struct compound_state *);
      214 +                    struct compound_state *);
 213  215  static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214      -                        struct compound_state *);
      216 +                    struct compound_state *);
 215  217  static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216      -                        struct compound_state *);
      218 +                    struct compound_state *);
 217  219  static void     rfs4_op_read_free(nfs_resop4 *);
 218  220  static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 219  221  static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 220      -                        struct compound_state *);
      222 +                    struct compound_state *);
 221  223  static void     rfs4_op_readlink_free(nfs_resop4 *);
 222  224  static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 223      -                        struct svc_req *, struct compound_state *);
      225 +                    struct svc_req *, struct compound_state *);
 224  226  static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 225      -                        struct compound_state *);
      227 +                    struct compound_state *);
 226  228  static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227      -                        struct compound_state *);
      229 +                    struct compound_state *);
 228  230  static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229      -                        struct compound_state *);
      231 +                    struct compound_state *);
 230  232  static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231      -                        struct compound_state *);
      233 +                    struct compound_state *);
 232  234  static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233      -                        struct compound_state *);
      235 +                    struct compound_state *);
 234  236  static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235      -                        struct compound_state *);
      237 +                    struct compound_state *);
 236  238  static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237      -                        struct compound_state *);
      239 +                    struct compound_state *);
 238  240  static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239      -                        struct compound_state *);
      241 +                    struct compound_state *);
 240  242  static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 241      -                        struct svc_req *, struct compound_state *);
      243 +                    struct svc_req *, struct compound_state *);
 242  244  static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 243      -                        struct svc_req *req, struct compound_state *);
      245 +                    struct svc_req *req, struct compound_state *);
 244  246  static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 245      -                        struct compound_state *);
      247 +                    struct compound_state *);
 246  248  static void     rfs4_op_secinfo_free(nfs_resop4 *);
 247  249  
 248      -static nfsstat4 check_open_access(uint32_t,
 249      -                                struct compound_state *, struct svc_req *);
 250      -nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 251      -void rfs4_ss_clid(rfs4_client_t *);
      250 +static nfsstat4 check_open_access(uint32_t, struct compound_state *,
      251 +                    struct svc_req *);
      252 +nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
      253 +void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 252  254  
      255 +
 253  256  /*
 254  257   * translation table for attrs
 255  258   */
 256  259  struct nfs4_ntov_table {
 257  260          union nfs4_attr_u *na;
 258  261          uint8_t amap[NFS4_MAXNUM_ATTRS];
 259  262          int attrcnt;
 260  263          bool_t vfsstat;
 261  264  };
 262  265  
 263  266  static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 264  267  static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 265      -                                    struct nfs4_svgetit_arg *sargp);
      268 +                    struct nfs4_svgetit_arg *sargp);
 266  269  
 267  270  static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 268  271                      struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 269  272                      struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 270  273  
      274 +static void     hanfsv4_failover(nfs4_srv_t *);
      275 +
 271  276  fem_t           *deleg_rdops;
 272  277  fem_t           *deleg_wrops;
 273  278  
 274      -rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 275      -kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 276      -int             rfs4_seen_first_compound;       /* set first time we see one */
 277      -
 278  279  /*
 279  280   * NFS4 op dispatch table
 280  281   */
 281  282  
 282  283  struct rfsv4disp {
 283  284          void    (*dis_proc)();          /* proc to call */
 284  285          void    (*dis_resfree)();       /* frees space allocated by proc */
 285  286          int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 286  287  };
 287  288  
 288  289  static struct rfsv4disp rfsv4disptab[] = {
 289  290          /*
 290  291           * NFS VERSION 4
 291  292           */
 292  293  
 293  294          /* RFS_NULL = 0 */
 294  295          {rfs4_op_illegal, nullfree, 0},
 295  296  
 296  297          /* UNUSED = 1 */
 297  298          {rfs4_op_illegal, nullfree, 0},
 298  299  
 299  300          /* UNUSED = 2 */
 300  301          {rfs4_op_illegal, nullfree, 0},
 301  302  
 302  303          /* OP_ACCESS = 3 */
 303  304          {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 304  305  
 305  306          /* OP_CLOSE = 4 */
 306  307          {rfs4_op_close, nullfree, 0},
 307  308  
 308  309          /* OP_COMMIT = 5 */
 309  310          {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 310  311  
 311  312          /* OP_CREATE = 6 */
 312  313          {rfs4_op_create, nullfree, 0},
 313  314  
 314  315          /* OP_DELEGPURGE = 7 */
 315  316          {rfs4_op_delegpurge, nullfree, 0},
 316  317  
 317  318          /* OP_DELEGRETURN = 8 */
 318  319          {rfs4_op_delegreturn, nullfree, 0},
 319  320  
 320  321          /* OP_GETATTR = 9 */
 321  322          {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 322  323  
 323  324          /* OP_GETFH = 10 */
 324  325          {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 325  326  
 326  327          /* OP_LINK = 11 */
 327  328          {rfs4_op_link, nullfree, 0},
 328  329  
 329  330          /* OP_LOCK = 12 */
 330  331          {rfs4_op_lock, lock_denied_free, 0},
 331  332  
 332  333          /* OP_LOCKT = 13 */
 333  334          {rfs4_op_lockt, lock_denied_free, 0},
 334  335  
 335  336          /* OP_LOCKU = 14 */
 336  337          {rfs4_op_locku, nullfree, 0},
 337  338  
 338  339          /* OP_LOOKUP = 15 */
 339  340          {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 340  341  
 341  342          /* OP_LOOKUPP = 16 */
 342  343          {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 343  344  
 344  345          /* OP_NVERIFY = 17 */
 345  346          {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 346  347  
 347  348          /* OP_OPEN = 18 */
 348  349          {rfs4_op_open, rfs4_free_reply, 0},
 349  350  
 350  351          /* OP_OPENATTR = 19 */
 351  352          {rfs4_op_openattr, nullfree, 0},
 352  353  
 353  354          /* OP_OPEN_CONFIRM = 20 */
 354  355          {rfs4_op_open_confirm, nullfree, 0},
 355  356  
 356  357          /* OP_OPEN_DOWNGRADE = 21 */
 357  358          {rfs4_op_open_downgrade, nullfree, 0},
 358  359  
 359  360          /* OP_OPEN_PUTFH = 22 */
 360  361          {rfs4_op_putfh, nullfree, RPC_ALL},
 361  362  
 362  363          /* OP_PUTPUBFH = 23 */
 363  364          {rfs4_op_putpubfh, nullfree, RPC_ALL},
 364  365  
 365  366          /* OP_PUTROOTFH = 24 */
 366  367          {rfs4_op_putrootfh, nullfree, RPC_ALL},
 367  368  
 368  369          /* OP_READ = 25 */
 369  370          {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 370  371  
 371  372          /* OP_READDIR = 26 */
 372  373          {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 373  374  
 374  375          /* OP_READLINK = 27 */
 375  376          {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 376  377  
 377  378          /* OP_REMOVE = 28 */
 378  379          {rfs4_op_remove, nullfree, 0},
 379  380  
 380  381          /* OP_RENAME = 29 */
 381  382          {rfs4_op_rename, nullfree, 0},
 382  383  
 383  384          /* OP_RENEW = 30 */
 384  385          {rfs4_op_renew, nullfree, 0},
 385  386  
 386  387          /* OP_RESTOREFH = 31 */
 387  388          {rfs4_op_restorefh, nullfree, RPC_ALL},
 388  389  
 389  390          /* OP_SAVEFH = 32 */
 390  391          {rfs4_op_savefh, nullfree, RPC_ALL},
 391  392  
 392  393          /* OP_SECINFO = 33 */
 393  394          {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 394  395  
 395  396          /* OP_SETATTR = 34 */
 396  397          {rfs4_op_setattr, nullfree, 0},
 397  398  
 398  399          /* OP_SETCLIENTID = 35 */
 399  400          {rfs4_op_setclientid, nullfree, 0},
 400  401  
 401  402          /* OP_SETCLIENTID_CONFIRM = 36 */
 402  403          {rfs4_op_setclientid_confirm, nullfree, 0},
 403  404  
 404  405          /* OP_VERIFY = 37 */
 405  406          {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 406  407  
 407  408          /* OP_WRITE = 38 */
 408  409          {rfs4_op_write, nullfree, 0},
 409  410  
 410  411          /* OP_RELEASE_LOCKOWNER = 39 */
 411  412          {rfs4_op_release_lockowner, nullfree, 0},
 412  413  };
 413  414  
 414  415  static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 415  416  
 416  417  #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 417  418  
 418  419  #ifdef DEBUG
 419  420  
 420  421  int             rfs4_fillone_debug = 0;
 421  422  int             rfs4_no_stub_access = 1;
 422  423  int             rfs4_rddir_debug = 0;
 423  424  
 424  425  static char    *rfs4_op_string[] = {
 425  426          "rfs4_op_null",
 426  427          "rfs4_op_1 unused",
 427  428          "rfs4_op_2 unused",
 428  429          "rfs4_op_access",
 429  430          "rfs4_op_close",
 430  431          "rfs4_op_commit",
 431  432          "rfs4_op_create",
 432  433          "rfs4_op_delegpurge",
 433  434          "rfs4_op_delegreturn",
 434  435          "rfs4_op_getattr",
 435  436          "rfs4_op_getfh",
 436  437          "rfs4_op_link",
 437  438          "rfs4_op_lock",
 438  439          "rfs4_op_lockt",
 439  440          "rfs4_op_locku",
 440  441          "rfs4_op_lookup",
 441  442          "rfs4_op_lookupp",
 442  443          "rfs4_op_nverify",
 443  444          "rfs4_op_open",
 444  445          "rfs4_op_openattr",
 445  446          "rfs4_op_open_confirm",
 446  447          "rfs4_op_open_downgrade",
 447  448          "rfs4_op_putfh",
 448  449          "rfs4_op_putpubfh",
 449  450          "rfs4_op_putrootfh",
 450  451          "rfs4_op_read",
 451  452          "rfs4_op_readdir",
 452  453          "rfs4_op_readlink",
 453  454          "rfs4_op_remove",
 454  455          "rfs4_op_rename",
 455  456          "rfs4_op_renew",
 456  457          "rfs4_op_restorefh",
 457  458          "rfs4_op_savefh",
 458  459          "rfs4_op_secinfo",
  
    | 
      ↓ open down ↓ | 
    171 lines elided | 
    
      ↑ open up ↑ | 
  
 459  460          "rfs4_op_setattr",
 460  461          "rfs4_op_setclientid",
 461  462          "rfs4_op_setclient_confirm",
 462  463          "rfs4_op_verify",
 463  464          "rfs4_op_write",
 464  465          "rfs4_op_release_lockowner",
 465  466          "rfs4_op_illegal"
 466  467  };
 467  468  #endif
 468  469  
 469      -void    rfs4_ss_chkclid(rfs4_client_t *);
      470 +void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 470  471  
 471  472  extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 472  473  
 473  474  extern void     rfs4_free_fs_locations4(fs_locations4 *);
 474  475  
 475  476  #ifdef  nextdp
 476  477  #undef nextdp
 477  478  #endif
 478  479  #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 479  480  
 480  481  static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 481  482          VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 482  483          VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 483  484          VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 484  485          VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 485  486          VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 486  487          VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 487  488          VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 488  489          NULL,                   NULL
 489  490  };
 490  491  static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 491  492          VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
 492  493          VOPNAME_READ,           { .femop_read = deleg_wr_read },
 493  494          VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 494  495          VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 495  496          VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 496  497          VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 497  498          VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 498  499          VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 499  500          NULL,                   NULL
 500  501  };
 501  502  
 502      -int
 503      -rfs4_srvrinit(void)
      503 +nfs4_srv_t *
      504 +nfs4_get_srv(void)
 504  505  {
      506 +        nfs_globals_t *ng = nfs_srv_getzg();
      507 +        nfs4_srv_t *srv = ng->nfs4_srv;
      508 +        ASSERT(srv != NULL);
      509 +        return (srv);
      510 +}
      511 +
      512 +void
      513 +rfs4_srv_zone_init(nfs_globals_t *ng)
      514 +{
      515 +        nfs4_srv_t *nsrv4;
 505  516          timespec32_t verf;
 506      -        int error;
 507      -        extern void rfs4_attr_init();
 508      -        extern krwlock_t rfs4_deleg_policy_lock;
 509  517  
      518 +        nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
      519 +
 510  520          /*
 511  521           * The following algorithm attempts to find a unique verifier
 512  522           * to be used as the write verifier returned from the server
 513  523           * to the client.  It is important that this verifier change
 514  524           * whenever the server reboots.  Of secondary importance, it
 515  525           * is important for the verifier to be unique between two
 516  526           * different servers.
 517  527           *
 518  528           * Thus, an attempt is made to use the system hostid and the
 519  529           * current time in seconds when the nfssrv kernel module is
 520  530           * loaded.  It is assumed that an NFS server will not be able
 521  531           * to boot and then to reboot in less than a second.  If the
 522  532           * hostid has not been set, then the current high resolution
 523  533           * time is used.  This will ensure different verifiers each
 524  534           * time the server reboots and minimize the chances that two
 525  535           * different servers will have the same verifier.
 526  536           * XXX - this is broken on LP64 kernels.
 527  537           */
  
    | 
      ↓ open down ↓ | 
    8 lines elided | 
    
      ↑ open up ↑ | 
  
 528  538          verf.tv_sec = (time_t)zone_get_hostid(NULL);
 529  539          if (verf.tv_sec != 0) {
 530  540                  verf.tv_nsec = gethrestime_sec();
 531  541          } else {
 532  542                  timespec_t tverf;
 533  543  
 534  544                  gethrestime(&tverf);
 535  545                  verf.tv_sec = (time_t)tverf.tv_sec;
 536  546                  verf.tv_nsec = tverf.tv_nsec;
 537  547          }
      548 +        nsrv4->write4verf = *(uint64_t *)&verf;
 538  549  
 539      -        Write4verf = *(uint64_t *)&verf;
      550 +        /* Used to manage create/destroy of server state */
      551 +        nsrv4->nfs4_server_state = NULL;
      552 +        nsrv4->nfs4_cur_servinst = NULL;
      553 +        nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
      554 +        mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
      555 +        mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
      556 +        mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
      557 +        rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 540  558  
 541      -        rfs4_attr_init();
 542      -        mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
      559 +        ng->nfs4_srv = nsrv4;
      560 +}
 543  561  
 544      -        /* Used to manage create/destroy of server state */
 545      -        mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
      562 +void
      563 +rfs4_srv_zone_fini(nfs_globals_t *ng)
      564 +{
      565 +        nfs4_srv_t *nsrv4 = ng->nfs4_srv;
 546  566  
 547      -        /* Used to manage access to server instance linked list */
 548      -        mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
      567 +        ng->nfs4_srv = NULL;
 549  568  
 550      -        /* Used to manage access to rfs4_deleg_policy */
 551      -        rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
      569 +        mutex_destroy(&nsrv4->deleg_lock);
      570 +        mutex_destroy(&nsrv4->state_lock);
      571 +        mutex_destroy(&nsrv4->servinst_lock);
      572 +        rw_destroy(&nsrv4->deleg_policy_lock);
 552  573  
 553      -        error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
 554      -        if (error != 0) {
      574 +        kmem_free(nsrv4, sizeof (*nsrv4));
      575 +}
      576 +
      577 +void
      578 +rfs4_srvrinit(void)
      579 +{
      580 +        extern void rfs4_attr_init();
      581 +
      582 +        rfs4_attr_init();
      583 +
      584 +        if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 555  585                  rfs4_disable_delegation();
 556      -        } else {
 557      -                error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 558      -                    &deleg_wrops);
 559      -                if (error != 0) {
 560      -                        rfs4_disable_delegation();
 561      -                        fem_free(deleg_rdops);
 562      -                }
      586 +        } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
      587 +            &deleg_wrops) != 0) {
      588 +                rfs4_disable_delegation();
      589 +                fem_free(deleg_rdops);
 563  590          }
 564  591  
 565  592          nfs4_srv_caller_id = fs_new_caller_id();
 566      -
 567  593          lockt_sysid = lm_alloc_sysidt();
 568      -
 569  594          vsd_create(&nfs4_srv_vkey, NULL);
 570      -
 571      -        return (0);
      595 +        rfs4_state_g_init();
 572  596  }
 573  597  
 574  598  void
 575  599  rfs4_srvrfini(void)
 576  600  {
 577      -        extern krwlock_t rfs4_deleg_policy_lock;
 578      -
 579  601          if (lockt_sysid != LM_NOSYSID) {
 580  602                  lm_free_sysidt(lockt_sysid);
 581  603                  lockt_sysid = LM_NOSYSID;
 582  604          }
 583  605  
 584      -        mutex_destroy(&rfs4_deleg_lock);
 585      -        mutex_destroy(&rfs4_state_lock);
 586      -        rw_destroy(&rfs4_deleg_policy_lock);
      606 +        rfs4_state_g_fini();
 587  607  
 588  608          fem_free(deleg_rdops);
 589  609          fem_free(deleg_wrops);
 590  610  }
 591  611  
 592  612  void
      613 +rfs4_do_server_start(int server_upordown,
      614 +    int srv_delegation, int cluster_booted)
      615 +{
      616 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
      617 +
      618 +        /* Is this a warm start? */
      619 +        if (server_upordown == NFS_SERVER_QUIESCED) {
      620 +                cmn_err(CE_NOTE, "nfs4_srv: "
      621 +                    "server was previously quiesced; "
      622 +                    "existing NFSv4 state will be re-used");
      623 +
      624 +                /*
      625 +                 * HA-NFSv4: this is also the signal
      626 +                 * that a Resource Group failover has
      627 +                 * occurred.
      628 +                 */
      629 +                if (cluster_booted)
      630 +                        hanfsv4_failover(nsrv4);
      631 +        } else {
      632 +                /* Cold start */
      633 +                nsrv4->rfs4_start_time = 0;
      634 +                rfs4_state_zone_init(nsrv4);
      635 +                nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
      636 +                    nfs4_drc_hash);
      637 +
      638 +                /*
      639 +                 * The nfsd service was started with the -s option
      640 +                 * we need to pull in any state from the paths indicated.
      641 +                 */
      642 +                if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
      643 +                        /* read in the stable storage state from these paths */
      644 +                        rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
      645 +                            rfs4_dss_newpaths);
      646 +                }
      647 +        }
      648 +
      649 +        /* Check if delegation is to be enabled */
      650 +        if (srv_delegation != FALSE)
      651 +                rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
      652 +}
      653 +
      654 +void
 593  655  rfs4_init_compound_state(struct compound_state *cs)
 594  656  {
 595  657          bzero(cs, sizeof (*cs));
 596  658          cs->cont = TRUE;
 597  659          cs->access = CS_ACCESS_DENIED;
 598  660          cs->deleg = FALSE;
 599  661          cs->mandlock = FALSE;
 600  662          cs->fh.nfs_fh4_val = cs->fhbuf;
 601  663  }
 602  664  
 603  665  void
 604  666  rfs4_grace_start(rfs4_servinst_t *sip)
 605  667  {
 606  668          rw_enter(&sip->rwlock, RW_WRITER);
 607  669          sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 608  670          sip->grace_period = rfs4_grace_period;
 609  671          rw_exit(&sip->rwlock);
 610  672  }
 611  673  
 612  674  /*
 613  675   * returns true if the instance's grace period has never been started
 614  676   */
 615  677  int
 616  678  rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 617  679  {
 618  680          time_t start_time;
 619  681  
 620  682          rw_enter(&sip->rwlock, RW_READER);
 621  683          start_time = sip->start_time;
 622  684          rw_exit(&sip->rwlock);
 623  685  
 624  686          return (start_time == 0);
 625  687  }
 626  688  
 627  689  /*
 628  690   * Indicates if server instance is within the
 629  691   * grace period.
 630  692   */
 631  693  int
 632  694  rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 633  695  {
 634  696          time_t grace_expiry;
 635  697  
 636  698          rw_enter(&sip->rwlock, RW_READER);
 637  699          grace_expiry = sip->start_time + sip->grace_period;
 638  700          rw_exit(&sip->rwlock);
 639  701  
 640  702          return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 641  703  }
 642  704  
 643  705  int
 644  706  rfs4_clnt_in_grace(rfs4_client_t *cp)
  
    | 
      ↓ open down ↓ | 
    42 lines elided | 
    
      ↑ open up ↑ | 
  
 645  707  {
 646  708          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 647  709  
 648  710          return (rfs4_servinst_in_grace(cp->rc_server_instance));
 649  711  }
 650  712  
 651  713  /*
 652  714   * reset all currently active grace periods
 653  715   */
 654  716  void
 655      -rfs4_grace_reset_all(void)
      717 +rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 656  718  {
 657  719          rfs4_servinst_t *sip;
 658  720  
 659      -        mutex_enter(&rfs4_servinst_lock);
 660      -        for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
      721 +        mutex_enter(&nsrv4->servinst_lock);
      722 +        for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 661  723                  if (rfs4_servinst_in_grace(sip))
 662  724                          rfs4_grace_start(sip);
 663      -        mutex_exit(&rfs4_servinst_lock);
      725 +        mutex_exit(&nsrv4->servinst_lock);
 664  726  }
 665  727  
 666  728  /*
 667  729   * start any new instances' grace periods
 668  730   */
 669  731  void
 670      -rfs4_grace_start_new(void)
      732 +rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 671  733  {
 672  734          rfs4_servinst_t *sip;
 673  735  
 674      -        mutex_enter(&rfs4_servinst_lock);
 675      -        for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
      736 +        mutex_enter(&nsrv4->servinst_lock);
      737 +        for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 676  738                  if (rfs4_servinst_grace_new(sip))
 677  739                          rfs4_grace_start(sip);
 678      -        mutex_exit(&rfs4_servinst_lock);
      740 +        mutex_exit(&nsrv4->servinst_lock);
 679  741  }
 680  742  
 681  743  static rfs4_dss_path_t *
 682      -rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
      744 +rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
      745 +    char *path, unsigned index)
 683  746  {
 684  747          size_t len;
 685  748          rfs4_dss_path_t *dss_path;
 686  749  
 687  750          dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 688  751  
 689  752          /*
 690  753           * Take a copy of the string, since the original may be overwritten.
 691  754           * Sadly, no strdup() in the kernel.
 692  755           */
 693  756          /* allow for NUL */
 694  757          len = strlen(path) + 1;
 695  758          dss_path->path = kmem_alloc(len, KM_SLEEP);
  
    | 
      ↓ open down ↓ | 
    3 lines elided | 
    
      ↑ open up ↑ | 
  
 696  759          (void) strlcpy(dss_path->path, path, len);
 697  760  
 698  761          /* associate with servinst */
 699  762          dss_path->sip = sip;
 700  763          dss_path->index = index;
 701  764  
 702  765          /*
 703  766           * Add to list of served paths.
 704  767           * No locking required, as we're only ever called at startup.
 705  768           */
 706      -        if (rfs4_dss_pathlist == NULL) {
      769 +        if (nsrv4->dss_pathlist == NULL) {
 707  770                  /* this is the first dss_path_t */
 708  771  
 709  772                  /* needed for insque/remque */
 710  773                  dss_path->next = dss_path->prev = dss_path;
 711  774  
 712      -                rfs4_dss_pathlist = dss_path;
      775 +                nsrv4->dss_pathlist = dss_path;
 713  776          } else {
 714      -                insque(dss_path, rfs4_dss_pathlist);
      777 +                insque(dss_path, nsrv4->dss_pathlist);
 715  778          }
 716  779  
 717  780          return (dss_path);
 718  781  }
 719  782  
 720  783  /*
 721  784   * Create a new server instance, and make it the currently active instance.
 722  785   * Note that starting the grace period too early will reduce the clients'
 723  786   * recovery window.
 724  787   */
 725  788  void
 726      -rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
      789 +rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
      790 +    int dss_npaths, char **dss_paths)
 727  791  {
 728  792          unsigned i;
 729  793          rfs4_servinst_t *sip;
 730  794          rfs4_oldstate_t *oldstate;
 731  795  
 732  796          sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 733  797          rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 734  798  
 735  799          sip->start_time = (time_t)0;
 736  800          sip->grace_period = (time_t)0;
 737  801          sip->next = NULL;
 738  802          sip->prev = NULL;
 739  803  
 740  804          rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 741  805          /*
 742  806           * This initial dummy entry is required to setup for insque/remque.
 743  807           * It must be skipped over whenever the list is traversed.
 744  808           */
 745  809          oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 746  810          /* insque/remque require initial list entry to be self-terminated */
  
    | 
      ↓ open down ↓ | 
    10 lines elided | 
    
      ↑ open up ↑ | 
  
 747  811          oldstate->next = oldstate;
 748  812          oldstate->prev = oldstate;
 749  813          sip->oldstate = oldstate;
 750  814  
 751  815  
 752  816          sip->dss_npaths = dss_npaths;
 753  817          sip->dss_paths = kmem_alloc(dss_npaths *
 754  818              sizeof (rfs4_dss_path_t *), KM_SLEEP);
 755  819  
 756  820          for (i = 0; i < dss_npaths; i++) {
 757      -                sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
      821 +                sip->dss_paths[i] =
      822 +                    rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 758  823          }
 759  824  
 760      -        mutex_enter(&rfs4_servinst_lock);
 761      -        if (rfs4_cur_servinst != NULL) {
      825 +        mutex_enter(&nsrv4->servinst_lock);
      826 +        if (nsrv4->nfs4_cur_servinst != NULL) {
 762  827                  /* add to linked list */
 763      -                sip->prev = rfs4_cur_servinst;
 764      -                rfs4_cur_servinst->next = sip;
      828 +                sip->prev = nsrv4->nfs4_cur_servinst;
      829 +                nsrv4->nfs4_cur_servinst->next = sip;
 765  830          }
 766  831          if (start_grace)
 767  832                  rfs4_grace_start(sip);
 768  833          /* make the new instance "current" */
 769      -        rfs4_cur_servinst = sip;
      834 +        nsrv4->nfs4_cur_servinst = sip;
 770  835  
 771      -        mutex_exit(&rfs4_servinst_lock);
      836 +        mutex_exit(&nsrv4->servinst_lock);
 772  837  }
 773  838  
 774  839  /*
 775  840   * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 776  841   * all instances directly.
 777  842   */
 778  843  void
 779      -rfs4_servinst_destroy_all(void)
      844 +rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 780  845  {
 781  846          rfs4_servinst_t *sip, *prev, *current;
 782  847  #ifdef DEBUG
 783  848          int n = 0;
 784  849  #endif
 785  850  
 786      -        mutex_enter(&rfs4_servinst_lock);
 787      -        ASSERT(rfs4_cur_servinst != NULL);
 788      -        current = rfs4_cur_servinst;
 789      -        rfs4_cur_servinst = NULL;
      851 +        mutex_enter(&nsrv4->servinst_lock);
      852 +        ASSERT(nsrv4->nfs4_cur_servinst != NULL);
      853 +        current = nsrv4->nfs4_cur_servinst;
      854 +        nsrv4->nfs4_cur_servinst = NULL;
 790  855          for (sip = current; sip != NULL; sip = prev) {
 791  856                  prev = sip->prev;
 792  857                  rw_destroy(&sip->rwlock);
 793  858                  if (sip->oldstate)
 794  859                          kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 795      -                if (sip->dss_paths)
      860 +                if (sip->dss_paths) {
      861 +                        int i = sip->dss_npaths;
      862 +
      863 +                        while (i > 0) {
      864 +                                i--;
      865 +                                if (sip->dss_paths[i] != NULL) {
      866 +                                        char *path = sip->dss_paths[i]->path;
      867 +
      868 +                                        if (path != NULL) {
      869 +                                                kmem_free(path,
      870 +                                                    strlen(path) + 1);
      871 +                                        }
      872 +                                        kmem_free(sip->dss_paths[i],
      873 +                                            sizeof (rfs4_dss_path_t));
      874 +                                }
      875 +                        }
 796  876                          kmem_free(sip->dss_paths,
 797  877                              sip->dss_npaths * sizeof (rfs4_dss_path_t *));
      878 +                }
 798  879                  kmem_free(sip, sizeof (rfs4_servinst_t));
 799  880  #ifdef DEBUG
 800  881                  n++;
 801  882  #endif
 802  883          }
 803      -        mutex_exit(&rfs4_servinst_lock);
      884 +        mutex_exit(&nsrv4->servinst_lock);
 804  885  }
 805  886  
 806  887  /*
 807  888   * Assign the current server instance to a client_t.
 808  889   * Should be called with cp->rc_dbe held.
 809  890   */
 810  891  void
 811      -rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
      892 +rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
      893 +    rfs4_servinst_t *sip)
 812  894  {
 813  895          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 814  896  
 815  897          /*
 816  898           * The lock ensures that if the current instance is in the process
 817  899           * of changing, we will see the new one.
 818  900           */
 819      -        mutex_enter(&rfs4_servinst_lock);
      901 +        mutex_enter(&nsrv4->servinst_lock);
 820  902          cp->rc_server_instance = sip;
 821      -        mutex_exit(&rfs4_servinst_lock);
      903 +        mutex_exit(&nsrv4->servinst_lock);
 822  904  }
 823  905  
 824  906  rfs4_servinst_t *
 825  907  rfs4_servinst(rfs4_client_t *cp)
 826  908  {
 827  909          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 828  910  
 829  911          return (cp->rc_server_instance);
 830  912  }
 831  913  
 832  914  /* ARGSUSED */
 833  915  static void
 834  916  nullfree(caddr_t resop)
 835  917  {
 836  918  }
 837  919  
 838  920  /*
 839  921   * This is a fall-through for invalid or not implemented (yet) ops
 840  922   */
 841  923  /* ARGSUSED */
 842  924  static void
 843  925  rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 844  926      struct compound_state *cs)
 845  927  {
 846  928          *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 847  929  }
 848  930  
 849  931  /*
 850  932   * Check if the security flavor, nfsnum, is in the flavor_list.
 851  933   */
 852  934  bool_t
 853  935  in_flavor_list(int nfsnum, int *flavor_list, int count)
 854  936  {
 855  937          int i;
 856  938  
 857  939          for (i = 0; i < count; i++) {
 858  940                  if (nfsnum == flavor_list[i])
 859  941                          return (TRUE);
 860  942          }
 861  943          return (FALSE);
 862  944  }
 863  945  
  
    | 
      ↓ open down ↓ | 
    32 lines elided | 
    
      ↑ open up ↑ | 
  
 864  946  /*
 865  947   * Used by rfs4_op_secinfo to get the security information from the
 866  948   * export structure associated with the component.
 867  949   */
 868  950  /* ARGSUSED */
 869  951  static nfsstat4
 870  952  do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 871  953  {
 872  954          int error, different_export = 0;
 873  955          vnode_t *dvp, *vp;
 874      -        struct exportinfo *exi = NULL;
      956 +        struct exportinfo *exi;
 875  957          fid_t fid;
 876  958          uint_t count, i;
 877  959          secinfo4 *resok_val;
 878  960          struct secinfo *secp;
 879  961          seconfig_t *si;
 880  962          bool_t did_traverse = FALSE;
 881  963          int dotdot, walk;
      964 +        nfs_export_t *ne = nfs_get_export();
 882  965  
 883  966          dvp = cs->vp;
      967 +        exi = cs->exi;
      968 +        ASSERT(exi != NULL);
 884  969          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 885  970  
 886  971          /*
 887  972           * If dotdotting, then need to check whether it's above the
 888  973           * root of a filesystem, or above an export point.
 889  974           */
 890  975          if (dotdot) {
      976 +                vnode_t *zone_rootvp = ne->exi_root->exi_vp;
 891  977  
      978 +                ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
 892  979                  /*
 893  980                   * If dotdotting at the root of a filesystem, then
 894  981                   * need to traverse back to the mounted-on filesystem
 895  982                   * and do the dotdot lookup there.
 896  983                   */
 897      -                if (cs->vp->v_flag & VROOT) {
      984 +                if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
 898  985  
 899  986                          /*
 900  987                           * If at the system root, then can
 901  988                           * go up no further.
 902  989                           */
 903      -                        if (VN_CMP(dvp, rootdir))
      990 +                        if (VN_CMP(dvp, zone_rootvp))
 904  991                                  return (puterrno4(ENOENT));
 905  992  
 906  993                          /*
 907  994                           * Traverse back to the mounted-on filesystem
 908  995                           */
 909      -                        dvp = untraverse(cs->vp);
      996 +                        dvp = untraverse(dvp, zone_rootvp);
 910  997  
 911  998                          /*
 912  999                           * Set the different_export flag so we remember
 913 1000                           * to pick up a new exportinfo entry for
 914 1001                           * this new filesystem.
 915 1002                           */
 916 1003                          different_export = 1;
 917 1004                  } else {
 918 1005  
 919 1006                          /*
 920 1007                           * If dotdotting above an export point then set
 921 1008                           * the different_export to get new export info.
 922 1009                           */
 923      -                        different_export = nfs_exported(cs->exi, cs->vp);
     1010 +                        different_export = nfs_exported(exi, dvp);
 924 1011                  }
 925 1012          }
 926 1013  
 927 1014          /*
 928 1015           * Get the vnode for the component "nm".
 929 1016           */
 930 1017          error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 931 1018              NULL, NULL, NULL);
 932 1019          if (error)
 933 1020                  return (puterrno4(error));
 934 1021  
 935 1022          /*
 936 1023           * If the vnode is in a pseudo filesystem, or if the security flavor
 937 1024           * used in the request is valid but not an explicitly shared flavor,
 938 1025           * or the access bit indicates that this is a limited access,
 939 1026           * check whether this vnode is visible.
 940 1027           */
 941 1028          if (!different_export &&
 942      -            (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
     1029 +            (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
 943 1030              cs->access & CS_ACCESS_LIMITED)) {
 944      -                if (! nfs_visible(cs->exi, vp, &different_export)) {
     1031 +                if (! nfs_visible(exi, vp, &different_export)) {
 945 1032                          VN_RELE(vp);
 946 1033                          return (puterrno4(ENOENT));
 947 1034                  }
 948 1035          }
 949 1036  
 950 1037          /*
 951 1038           * If it's a mountpoint, then traverse it.
 952 1039           */
 953 1040          if (vn_ismntpt(vp)) {
 954 1041                  if ((error = traverse(&vp)) != 0) {
 955 1042                          VN_RELE(vp);
 956 1043                          return (puterrno4(error));
 957 1044                  }
 958 1045                  /* remember that we had to traverse mountpoint */
 959 1046                  did_traverse = TRUE;
 960 1047                  different_export = 1;
 961 1048          } else if (vp->v_vfsp != dvp->v_vfsp) {
 962 1049                  /*
 963 1050                   * If vp isn't a mountpoint and the vfs ptrs aren't the same,
 964 1051                   * then vp is probably an LOFS object.  We don't need the
 965 1052                   * realvp, we just need to know that we might have crossed
 966 1053                   * a server fs boundary and need to call checkexport4.
 967 1054                   * (LOFS lookup hides server fs mountpoints, and actually calls
 968 1055                   * traverse)
 969 1056                   */
 970 1057                  different_export = 1;
 971 1058          }
 972 1059  
 973 1060          /*
 974 1061           * Get the export information for it.
 975 1062           */
  
    | 
      ↓ open down ↓ | 
    21 lines elided | 
    
      ↑ open up ↑ | 
  
 976 1063          if (different_export) {
 977 1064  
 978 1065                  bzero(&fid, sizeof (fid));
 979 1066                  fid.fid_len = MAXFIDSZ;
 980 1067                  error = vop_fid_pseudo(vp, &fid);
 981 1068                  if (error) {
 982 1069                          VN_RELE(vp);
 983 1070                          return (puterrno4(error));
 984 1071                  }
 985 1072  
     1073 +                /* We'll need to reassign "exi". */
 986 1074                  if (dotdot)
 987 1075                          exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
 988 1076                  else
 989 1077                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
 990 1078  
 991 1079                  if (exi == NULL) {
 992 1080                          if (did_traverse == TRUE) {
 993 1081                                  /*
 994 1082                                   * If this vnode is a mounted-on vnode,
 995 1083                                   * but the mounted-on file system is not
 996 1084                                   * exported, send back the secinfo for
 997 1085                                   * the exported node that the mounted-on
 998 1086                                   * vnode lives in.
 999 1087                                   */
1000 1088                                  exi = cs->exi;
1001 1089                          } else {
1002 1090                                  VN_RELE(vp);
1003 1091                                  return (puterrno4(EACCES));
1004 1092                          }
1005 1093                  }
1006      -        } else {
1007      -                exi = cs->exi;
1008 1094          }
1009 1095          ASSERT(exi != NULL);
1010 1096  
1011 1097  
1012 1098          /*
1013 1099           * Create the secinfo result based on the security information
1014 1100           * from the exportinfo structure (exi).
1015 1101           *
1016 1102           * Return all flavors for a pseudo node.
1017 1103           * For a real export node, return the flavor that the client
1018 1104           * has access with.
1019 1105           */
1020      -        ASSERT(RW_LOCK_HELD(&exported_lock));
     1106 +        ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1021 1107          if (PSEUDO(exi)) {
1022 1108                  count = exi->exi_export.ex_seccnt; /* total sec count */
1023 1109                  resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1024 1110                  secp = exi->exi_export.ex_secinfo;
1025 1111  
1026 1112                  for (i = 0; i < count; i++) {
1027 1113                          si = &secp[i].s_secinfo;
1028 1114                          resok_val[i].flavor = si->sc_rpcnum;
1029 1115                          if (resok_val[i].flavor == RPCSEC_GSS) {
1030 1116                                  rpcsec_gss_info *info;
1031 1117  
1032 1118                                  info = &resok_val[i].flavor_info;
1033 1119                                  info->qop = si->sc_qop;
1034 1120                                  info->service = (rpc_gss_svc_t)si->sc_service;
1035 1121  
1036 1122                                  /* get oid opaque data */
1037 1123                                  info->oid.sec_oid4_len =
1038 1124                                      si->sc_gss_mech_type->length;
1039 1125                                  info->oid.sec_oid4_val = kmem_alloc(
1040 1126                                      si->sc_gss_mech_type->length, KM_SLEEP);
1041 1127                                  bcopy(
1042 1128                                      si->sc_gss_mech_type->elements,
1043 1129                                      info->oid.sec_oid4_val,
1044 1130                                      info->oid.sec_oid4_len);
1045 1131                          }
1046 1132                  }
1047 1133                  resp->SECINFO4resok_len = count;
1048 1134                  resp->SECINFO4resok_val = resok_val;
1049 1135          } else {
1050 1136                  int ret_cnt = 0, k = 0;
1051 1137                  int *flavor_list;
1052 1138  
1053 1139                  count = exi->exi_export.ex_seccnt; /* total sec count */
1054 1140                  secp = exi->exi_export.ex_secinfo;
1055 1141  
1056 1142                  flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1057 1143                  /* find out which flavors to return */
1058 1144                  for (i = 0; i < count; i ++) {
1059 1145                          int access, flavor, perm;
1060 1146  
1061 1147                          flavor = secp[i].s_secinfo.sc_nfsnum;
1062 1148                          perm = secp[i].s_flags;
1063 1149  
1064 1150                          access = nfsauth4_secinfo_access(exi, cs->req,
1065 1151                              flavor, perm, cs->basecr);
1066 1152  
1067 1153                          if (! (access & NFSAUTH_DENIED) &&
1068 1154                              ! (access & NFSAUTH_WRONGSEC)) {
1069 1155                                  flavor_list[ret_cnt] = flavor;
1070 1156                                  ret_cnt++;
1071 1157                          }
1072 1158                  }
1073 1159  
1074 1160                  /* Create the returning SECINFO value */
1075 1161                  resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1076 1162  
1077 1163                  for (i = 0; i < count; i++) {
1078 1164                          /*
1079 1165                           * If the flavor is in the flavor list,
1080 1166                           * fill in resok_val.
1081 1167                           */
1082 1168                          si = &secp[i].s_secinfo;
1083 1169                          if (in_flavor_list(si->sc_nfsnum,
1084 1170                              flavor_list, ret_cnt)) {
1085 1171                                  resok_val[k].flavor = si->sc_rpcnum;
1086 1172                                  if (resok_val[k].flavor == RPCSEC_GSS) {
1087 1173                                          rpcsec_gss_info *info;
1088 1174  
1089 1175                                          info = &resok_val[k].flavor_info;
1090 1176                                          info->qop = si->sc_qop;
1091 1177                                          info->service = (rpc_gss_svc_t)
1092 1178                                              si->sc_service;
1093 1179  
1094 1180                                          /* get oid opaque data */
1095 1181                                          info->oid.sec_oid4_len =
1096 1182                                              si->sc_gss_mech_type->length;
1097 1183                                          info->oid.sec_oid4_val = kmem_alloc(
1098 1184                                              si->sc_gss_mech_type->length,
1099 1185                                              KM_SLEEP);
1100 1186                                          bcopy(si->sc_gss_mech_type->elements,
1101 1187                                              info->oid.sec_oid4_val,
1102 1188                                              info->oid.sec_oid4_len);
1103 1189                                  }
1104 1190                                  k++;
1105 1191                          }
1106 1192                          if (k >= ret_cnt)
1107 1193                                  break;
1108 1194                  }
1109 1195                  resp->SECINFO4resok_len = ret_cnt;
1110 1196                  resp->SECINFO4resok_val = resok_val;
1111 1197                  kmem_free(flavor_list, count * sizeof (int));
1112 1198          }
1113 1199  
1114 1200          VN_RELE(vp);
1115 1201          return (NFS4_OK);
1116 1202  }
1117 1203  
1118 1204  /*
1119 1205   * SECINFO (Operation 33): Obtain required security information on
1120 1206   * the component name in the format of (security-mechanism-oid, qop, service)
1121 1207   * triplets.
1122 1208   */
1123 1209  /* ARGSUSED */
1124 1210  static void
1125 1211  rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1126 1212      struct compound_state *cs)
1127 1213  {
1128 1214          SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1129 1215          SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1130 1216          utf8string *utfnm = &args->name;
1131 1217          uint_t len;
1132 1218          char *nm;
1133 1219          struct sockaddr *ca;
1134 1220          char *name = NULL;
1135 1221          nfsstat4 status = NFS4_OK;
1136 1222  
1137 1223          DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1138 1224              SECINFO4args *, args);
1139 1225  
1140 1226          /*
1141 1227           * Current file handle (cfh) should have been set before getting
1142 1228           * into this function. If not, return error.
1143 1229           */
1144 1230          if (cs->vp == NULL) {
1145 1231                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1146 1232                  goto out;
1147 1233          }
1148 1234  
1149 1235          if (cs->vp->v_type != VDIR) {
1150 1236                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1151 1237                  goto out;
1152 1238          }
1153 1239  
1154 1240          /*
1155 1241           * Verify the component name. If failed, error out, but
1156 1242           * do not error out if the component name is a "..".
1157 1243           * SECINFO will return its parents secinfo data for SECINFO "..".
1158 1244           */
1159 1245          status = utf8_dir_verify(utfnm);
1160 1246          if (status != NFS4_OK) {
1161 1247                  if (utfnm->utf8string_len != 2 ||
1162 1248                      utfnm->utf8string_val[0] != '.' ||
1163 1249                      utfnm->utf8string_val[1] != '.') {
1164 1250                          *cs->statusp = resp->status = status;
1165 1251                          goto out;
1166 1252                  }
1167 1253          }
1168 1254  
1169 1255          nm = utf8_to_str(utfnm, &len, NULL);
1170 1256          if (nm == NULL) {
1171 1257                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1172 1258                  goto out;
1173 1259          }
1174 1260  
1175 1261          if (len > MAXNAMELEN) {
1176 1262                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1177 1263                  kmem_free(nm, len);
1178 1264                  goto out;
1179 1265          }
1180 1266  
1181 1267          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1182 1268          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1183 1269              MAXPATHLEN  + 1);
1184 1270  
1185 1271          if (name == NULL) {
1186 1272                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1187 1273                  kmem_free(nm, len);
1188 1274                  goto out;
1189 1275          }
1190 1276  
1191 1277  
1192 1278          *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1193 1279  
1194 1280          if (name != nm)
1195 1281                  kmem_free(name, MAXPATHLEN + 1);
1196 1282          kmem_free(nm, len);
1197 1283  
1198 1284  out:
1199 1285          DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1200 1286              SECINFO4res *, resp);
1201 1287  }
1202 1288  
1203 1289  /*
1204 1290   * Free SECINFO result.
1205 1291   */
1206 1292  /* ARGSUSED */
1207 1293  static void
1208 1294  rfs4_op_secinfo_free(nfs_resop4 *resop)
1209 1295  {
1210 1296          SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1211 1297          int count, i;
1212 1298          secinfo4 *resok_val;
1213 1299  
1214 1300          /* If this is not an Ok result, nothing to free. */
1215 1301          if (resp->status != NFS4_OK) {
1216 1302                  return;
1217 1303          }
1218 1304  
1219 1305          count = resp->SECINFO4resok_len;
1220 1306          resok_val = resp->SECINFO4resok_val;
1221 1307  
1222 1308          for (i = 0; i < count; i++) {
1223 1309                  if (resok_val[i].flavor == RPCSEC_GSS) {
1224 1310                          rpcsec_gss_info *info;
1225 1311  
1226 1312                          info = &resok_val[i].flavor_info;
1227 1313                          kmem_free(info->oid.sec_oid4_val,
1228 1314                              info->oid.sec_oid4_len);
1229 1315                  }
1230 1316          }
1231 1317          kmem_free(resok_val, count * sizeof (secinfo4));
1232 1318          resp->SECINFO4resok_len = 0;
1233 1319          resp->SECINFO4resok_val = NULL;
1234 1320  }
1235 1321  
1236 1322  /* ARGSUSED */
1237 1323  static void
1238 1324  rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1239 1325      struct compound_state *cs)
1240 1326  {
1241 1327          ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1242 1328          ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1243 1329          int error;
1244 1330          vnode_t *vp;
1245 1331          struct vattr va;
1246 1332          int checkwriteperm;
1247 1333          cred_t *cr = cs->cr;
1248 1334          bslabel_t *clabel, *slabel;
1249 1335          ts_label_t *tslabel;
1250 1336          boolean_t admin_low_client;
1251 1337  
1252 1338          DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1253 1339              ACCESS4args *, args);
1254 1340  
1255 1341  #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1256 1342          if (cs->access == CS_ACCESS_DENIED) {
1257 1343                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1258 1344                  goto out;
1259 1345          }
1260 1346  #endif
1261 1347          if (cs->vp == NULL) {
1262 1348                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1263 1349                  goto out;
1264 1350          }
1265 1351  
1266 1352          ASSERT(cr != NULL);
1267 1353  
1268 1354          vp = cs->vp;
1269 1355  
1270 1356          /*
1271 1357           * If the file system is exported read only, it is not appropriate
1272 1358           * to check write permissions for regular files and directories.
1273 1359           * Special files are interpreted by the client, so the underlying
1274 1360           * permissions are sent back to the client for interpretation.
1275 1361           */
1276 1362          if (rdonly4(req, cs) &&
1277 1363              (vp->v_type == VREG || vp->v_type == VDIR))
1278 1364                  checkwriteperm = 0;
1279 1365          else
1280 1366                  checkwriteperm = 1;
1281 1367  
1282 1368          /*
1283 1369           * XXX
1284 1370           * We need the mode so that we can correctly determine access
1285 1371           * permissions relative to a mandatory lock file.  Access to
1286 1372           * mandatory lock files is denied on the server, so it might
1287 1373           * as well be reflected to the server during the open.
1288 1374           */
1289 1375          va.va_mask = AT_MODE;
1290 1376          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1291 1377          if (error) {
1292 1378                  *cs->statusp = resp->status = puterrno4(error);
1293 1379                  goto out;
1294 1380          }
1295 1381          resp->access = 0;
1296 1382          resp->supported = 0;
1297 1383  
1298 1384          if (is_system_labeled()) {
1299 1385                  ASSERT(req->rq_label != NULL);
1300 1386                  clabel = req->rq_label;
1301 1387                  DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1302 1388                      "got client label from request(1)",
1303 1389                      struct svc_req *, req);
1304 1390                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
1305 1391                          if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1306 1392                                  *cs->statusp = resp->status = puterrno4(EACCES);
1307 1393                                  goto out;
1308 1394                          }
1309 1395                          slabel = label2bslabel(tslabel);
1310 1396                          DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1311 1397                              char *, "got server label(1) for vp(2)",
1312 1398                              bslabel_t *, slabel, vnode_t *, vp);
1313 1399  
1314 1400                          admin_low_client = B_FALSE;
1315 1401                  } else
1316 1402                          admin_low_client = B_TRUE;
1317 1403          }
1318 1404  
1319 1405          if (args->access & ACCESS4_READ) {
1320 1406                  error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1321 1407                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1322 1408                      (!is_system_labeled() || admin_low_client ||
1323 1409                      bldominates(clabel, slabel)))
1324 1410                          resp->access |= ACCESS4_READ;
1325 1411                  resp->supported |= ACCESS4_READ;
1326 1412          }
1327 1413          if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1328 1414                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1329 1415                  if (!error && (!is_system_labeled() || admin_low_client ||
1330 1416                      bldominates(clabel, slabel)))
1331 1417                          resp->access |= ACCESS4_LOOKUP;
1332 1418                  resp->supported |= ACCESS4_LOOKUP;
1333 1419          }
1334 1420          if (checkwriteperm &&
1335 1421              (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1336 1422                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1337 1423                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1338 1424                      (!is_system_labeled() || admin_low_client ||
1339 1425                      blequal(clabel, slabel)))
1340 1426                          resp->access |=
1341 1427                              (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1342 1428                  resp->supported |=
1343 1429                      resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1344 1430          }
1345 1431  
1346 1432          if (checkwriteperm &&
1347 1433              (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1348 1434                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1349 1435                  if (!error && (!is_system_labeled() || admin_low_client ||
1350 1436                      blequal(clabel, slabel)))
1351 1437                          resp->access |= ACCESS4_DELETE;
1352 1438                  resp->supported |= ACCESS4_DELETE;
1353 1439          }
1354 1440          if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1355 1441                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1356 1442                  if (!error && !MANDLOCK(vp, va.va_mode) &&
1357 1443                      (!is_system_labeled() || admin_low_client ||
1358 1444                      bldominates(clabel, slabel)))
1359 1445                          resp->access |= ACCESS4_EXECUTE;
1360 1446                  resp->supported |= ACCESS4_EXECUTE;
1361 1447          }
1362 1448  
1363 1449          if (is_system_labeled() && !admin_low_client)
1364 1450                  label_rele(tslabel);
1365 1451  
1366 1452          *cs->statusp = resp->status = NFS4_OK;
1367 1453  out:
1368 1454          DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1369 1455              ACCESS4res *, resp);
1370 1456  }
1371 1457  
1372 1458  /* ARGSUSED */
  
    | 
      ↓ open down ↓ | 
    342 lines elided | 
    
      ↑ open up ↑ | 
  
1373 1459  static void
1374 1460  rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1375 1461      struct compound_state *cs)
1376 1462  {
1377 1463          COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1378 1464          COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1379 1465          int error;
1380 1466          vnode_t *vp = cs->vp;
1381 1467          cred_t *cr = cs->cr;
1382 1468          vattr_t va;
     1469 +        nfs4_srv_t *nsrv4;
1383 1470  
1384 1471          DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1385 1472              COMMIT4args *, args);
1386 1473  
1387 1474          if (vp == NULL) {
1388 1475                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1389 1476                  goto out;
1390 1477          }
1391 1478          if (cs->access == CS_ACCESS_DENIED) {
1392 1479                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1393 1480                  goto out;
1394 1481          }
1395 1482  
1396 1483          if (args->offset + args->count < args->offset) {
1397 1484                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1398 1485                  goto out;
1399 1486          }
1400 1487  
1401 1488          va.va_mask = AT_UID;
1402 1489          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1403 1490  
1404 1491          /*
1405 1492           * If we can't get the attributes, then we can't do the
1406 1493           * right access checking.  So, we'll fail the request.
1407 1494           */
1408 1495          if (error) {
1409 1496                  *cs->statusp = resp->status = puterrno4(error);
1410 1497                  goto out;
1411 1498          }
1412 1499          if (rdonly4(req, cs)) {
1413 1500                  *cs->statusp = resp->status = NFS4ERR_ROFS;
1414 1501                  goto out;
1415 1502          }
1416 1503  
1417 1504          if (vp->v_type != VREG) {
1418 1505                  if (vp->v_type == VDIR)
1419 1506                          resp->status = NFS4ERR_ISDIR;
1420 1507                  else
1421 1508                          resp->status = NFS4ERR_INVAL;
1422 1509                  *cs->statusp = resp->status;
1423 1510                  goto out;
1424 1511          }
1425 1512  
1426 1513          if (crgetuid(cr) != va.va_uid &&
1427 1514              (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1428 1515                  *cs->statusp = resp->status = puterrno4(error);
  
    | 
      ↓ open down ↓ | 
    36 lines elided | 
    
      ↑ open up ↑ | 
  
1429 1516                  goto out;
1430 1517          }
1431 1518  
1432 1519          error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1433 1520  
1434 1521          if (error) {
1435 1522                  *cs->statusp = resp->status = puterrno4(error);
1436 1523                  goto out;
1437 1524          }
1438 1525  
     1526 +        nsrv4 = nfs4_get_srv();
1439 1527          *cs->statusp = resp->status = NFS4_OK;
1440      -        resp->writeverf = Write4verf;
     1528 +        resp->writeverf = nsrv4->write4verf;
1441 1529  out:
1442 1530          DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1443 1531              COMMIT4res *, resp);
1444 1532  }
1445 1533  
1446 1534  /*
1447 1535   * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1448 1536   * was completed. It does the nfsv4 create for special files.
1449 1537   */
1450 1538  /* ARGSUSED */
1451 1539  static vnode_t *
1452 1540  do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1453 1541      struct compound_state *cs, vattr_t *vap, char *nm)
1454 1542  {
1455 1543          int error;
1456 1544          cred_t *cr = cs->cr;
1457 1545          vnode_t *dvp = cs->vp;
1458 1546          vnode_t *vp = NULL;
1459 1547          int mode;
1460 1548          enum vcexcl excl;
1461 1549  
1462 1550          switch (args->type) {
1463 1551          case NF4CHR:
1464 1552          case NF4BLK:
1465 1553                  if (secpolicy_sys_devices(cr) != 0) {
1466 1554                          *cs->statusp = resp->status = NFS4ERR_PERM;
1467 1555                          return (NULL);
1468 1556                  }
1469 1557                  if (args->type == NF4CHR)
1470 1558                          vap->va_type = VCHR;
1471 1559                  else
1472 1560                          vap->va_type = VBLK;
1473 1561                  vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1474 1562                      args->ftype4_u.devdata.specdata2);
1475 1563                  vap->va_mask |= AT_RDEV;
1476 1564                  break;
1477 1565          case NF4SOCK:
1478 1566                  vap->va_type = VSOCK;
1479 1567                  break;
1480 1568          case NF4FIFO:
1481 1569                  vap->va_type = VFIFO;
1482 1570                  break;
1483 1571          default:
1484 1572                  *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1485 1573                  return (NULL);
1486 1574          }
1487 1575  
1488 1576          /*
1489 1577           * Must specify the mode.
1490 1578           */
1491 1579          if (!(vap->va_mask & AT_MODE)) {
1492 1580                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1493 1581                  return (NULL);
1494 1582          }
1495 1583  
1496 1584          excl = EXCL;
1497 1585  
1498 1586          mode = 0;
1499 1587  
1500 1588          error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1501 1589          if (error) {
1502 1590                  *cs->statusp = resp->status = puterrno4(error);
1503 1591                  return (NULL);
1504 1592          }
1505 1593          return (vp);
1506 1594  }
1507 1595  
1508 1596  /*
1509 1597   * nfsv4 create is used to create non-regular files. For regular files,
1510 1598   * use nfsv4 open.
1511 1599   */
1512 1600  /* ARGSUSED */
1513 1601  static void
1514 1602  rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1515 1603      struct compound_state *cs)
1516 1604  {
1517 1605          CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1518 1606          CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1519 1607          int error;
1520 1608          struct vattr bva, iva, iva2, ava, *vap;
1521 1609          cred_t *cr = cs->cr;
1522 1610          vnode_t *dvp = cs->vp;
1523 1611          vnode_t *vp = NULL;
1524 1612          vnode_t *realvp;
1525 1613          char *nm, *lnm;
1526 1614          uint_t len, llen;
1527 1615          int syncval = 0;
1528 1616          struct nfs4_svgetit_arg sarg;
1529 1617          struct nfs4_ntov_table ntov;
1530 1618          struct statvfs64 sb;
1531 1619          nfsstat4 status;
1532 1620          struct sockaddr *ca;
1533 1621          char *name = NULL;
1534 1622          char *lname = NULL;
1535 1623  
1536 1624          DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1537 1625              CREATE4args *, args);
1538 1626  
1539 1627          resp->attrset = 0;
1540 1628  
1541 1629          if (dvp == NULL) {
1542 1630                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1543 1631                  goto out;
1544 1632          }
1545 1633  
1546 1634          /*
1547 1635           * If there is an unshared filesystem mounted on this vnode,
1548 1636           * do not allow to create an object in this directory.
1549 1637           */
1550 1638          if (vn_ismntpt(dvp)) {
1551 1639                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1552 1640                  goto out;
1553 1641          }
1554 1642  
1555 1643          /* Verify that type is correct */
1556 1644          switch (args->type) {
1557 1645          case NF4LNK:
1558 1646          case NF4BLK:
1559 1647          case NF4CHR:
1560 1648          case NF4SOCK:
1561 1649          case NF4FIFO:
1562 1650          case NF4DIR:
1563 1651                  break;
1564 1652          default:
1565 1653                  *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1566 1654                  goto out;
1567 1655          };
1568 1656  
1569 1657          if (cs->access == CS_ACCESS_DENIED) {
1570 1658                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
1571 1659                  goto out;
1572 1660          }
1573 1661          if (dvp->v_type != VDIR) {
1574 1662                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1575 1663                  goto out;
1576 1664          }
1577 1665          status = utf8_dir_verify(&args->objname);
1578 1666          if (status != NFS4_OK) {
1579 1667                  *cs->statusp = resp->status = status;
1580 1668                  goto out;
1581 1669          }
1582 1670  
1583 1671          if (rdonly4(req, cs)) {
1584 1672                  *cs->statusp = resp->status = NFS4ERR_ROFS;
1585 1673                  goto out;
1586 1674          }
1587 1675  
1588 1676          /*
1589 1677           * Name of newly created object
1590 1678           */
1591 1679          nm = utf8_to_fn(&args->objname, &len, NULL);
1592 1680          if (nm == NULL) {
1593 1681                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1594 1682                  goto out;
1595 1683          }
1596 1684  
1597 1685          if (len > MAXNAMELEN) {
1598 1686                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1599 1687                  kmem_free(nm, len);
1600 1688                  goto out;
1601 1689          }
1602 1690  
1603 1691          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1604 1692          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1605 1693              MAXPATHLEN  + 1);
1606 1694  
1607 1695          if (name == NULL) {
1608 1696                  *cs->statusp = resp->status = NFS4ERR_INVAL;
1609 1697                  kmem_free(nm, len);
1610 1698                  goto out;
1611 1699          }
1612 1700  
1613 1701          resp->attrset = 0;
1614 1702  
1615 1703          sarg.sbp = &sb;
1616 1704          sarg.is_referral = B_FALSE;
1617 1705          nfs4_ntov_table_init(&ntov);
1618 1706  
1619 1707          status = do_rfs4_set_attrs(&resp->attrset,
1620 1708              &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1621 1709  
1622 1710          if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1623 1711                  status = NFS4ERR_INVAL;
1624 1712  
1625 1713          if (status != NFS4_OK) {
1626 1714                  *cs->statusp = resp->status = status;
1627 1715                  if (name != nm)
1628 1716                          kmem_free(name, MAXPATHLEN + 1);
1629 1717                  kmem_free(nm, len);
1630 1718                  nfs4_ntov_table_free(&ntov, &sarg);
1631 1719                  resp->attrset = 0;
1632 1720                  goto out;
1633 1721          }
1634 1722  
1635 1723          /* Get "before" change value */
1636 1724          bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1637 1725          error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1638 1726          if (error) {
1639 1727                  *cs->statusp = resp->status = puterrno4(error);
1640 1728                  if (name != nm)
1641 1729                          kmem_free(name, MAXPATHLEN + 1);
1642 1730                  kmem_free(nm, len);
1643 1731                  nfs4_ntov_table_free(&ntov, &sarg);
1644 1732                  resp->attrset = 0;
1645 1733                  goto out;
1646 1734          }
1647 1735          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1648 1736  
1649 1737          vap = sarg.vap;
1650 1738  
1651 1739          /*
1652 1740           * Set the default initial values for attributes when the parent
1653 1741           * directory does not have the VSUID/VSGID bit set and they have
1654 1742           * not been specified in createattrs.
1655 1743           */
1656 1744          if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1657 1745                  vap->va_uid = crgetuid(cr);
1658 1746                  vap->va_mask |= AT_UID;
1659 1747          }
1660 1748          if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1661 1749                  vap->va_gid = crgetgid(cr);
1662 1750                  vap->va_mask |= AT_GID;
1663 1751          }
1664 1752  
1665 1753          vap->va_mask |= AT_TYPE;
1666 1754          switch (args->type) {
1667 1755          case NF4DIR:
1668 1756                  vap->va_type = VDIR;
1669 1757                  if ((vap->va_mask & AT_MODE) == 0) {
1670 1758                          vap->va_mode = 0700;    /* default: owner rwx only */
1671 1759                          vap->va_mask |= AT_MODE;
1672 1760                  }
1673 1761                  error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1674 1762                  if (error)
1675 1763                          break;
1676 1764  
1677 1765                  /*
1678 1766                   * Get the initial "after" sequence number, if it fails,
1679 1767                   * set to zero
1680 1768                   */
1681 1769                  iva.va_mask = AT_SEQ;
1682 1770                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1683 1771                          iva.va_seq = 0;
1684 1772                  break;
1685 1773          case NF4LNK:
1686 1774                  vap->va_type = VLNK;
1687 1775                  if ((vap->va_mask & AT_MODE) == 0) {
1688 1776                          vap->va_mode = 0700;    /* default: owner rwx only */
1689 1777                          vap->va_mask |= AT_MODE;
1690 1778                  }
1691 1779  
1692 1780                  /*
1693 1781                   * symlink names must be treated as data
1694 1782                   */
1695 1783                  lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1696 1784                      &llen, NULL);
1697 1785  
1698 1786                  if (lnm == NULL) {
1699 1787                          *cs->statusp = resp->status = NFS4ERR_INVAL;
1700 1788                          if (name != nm)
1701 1789                                  kmem_free(name, MAXPATHLEN + 1);
1702 1790                          kmem_free(nm, len);
1703 1791                          nfs4_ntov_table_free(&ntov, &sarg);
1704 1792                          resp->attrset = 0;
1705 1793                          goto out;
1706 1794                  }
1707 1795  
1708 1796                  if (llen > MAXPATHLEN) {
1709 1797                          *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1710 1798                          if (name != nm)
1711 1799                                  kmem_free(name, MAXPATHLEN + 1);
1712 1800                          kmem_free(nm, len);
1713 1801                          kmem_free(lnm, llen);
1714 1802                          nfs4_ntov_table_free(&ntov, &sarg);
1715 1803                          resp->attrset = 0;
1716 1804                          goto out;
1717 1805                  }
1718 1806  
1719 1807                  lname = nfscmd_convname(ca, cs->exi, lnm,
1720 1808                      NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1721 1809  
1722 1810                  if (lname == NULL) {
1723 1811                          *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1724 1812                          if (name != nm)
1725 1813                                  kmem_free(name, MAXPATHLEN + 1);
1726 1814                          kmem_free(nm, len);
1727 1815                          kmem_free(lnm, llen);
1728 1816                          nfs4_ntov_table_free(&ntov, &sarg);
1729 1817                          resp->attrset = 0;
1730 1818                          goto out;
1731 1819                  }
1732 1820  
1733 1821                  error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1734 1822                  if (lname != lnm)
1735 1823                          kmem_free(lname, MAXPATHLEN + 1);
1736 1824                  kmem_free(lnm, llen);
1737 1825                  if (error)
1738 1826                          break;
1739 1827  
1740 1828                  /*
1741 1829                   * Get the initial "after" sequence number, if it fails,
1742 1830                   * set to zero
1743 1831                   */
1744 1832                  iva.va_mask = AT_SEQ;
1745 1833                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1746 1834                          iva.va_seq = 0;
1747 1835  
1748 1836                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1749 1837                      NULL, NULL, NULL);
1750 1838                  if (error)
1751 1839                          break;
1752 1840  
1753 1841                  /*
1754 1842                   * va_seq is not safe over VOP calls, check it again
1755 1843                   * if it has changed zero out iva to force atomic = FALSE.
1756 1844                   */
1757 1845                  iva2.va_mask = AT_SEQ;
1758 1846                  if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1759 1847                      iva2.va_seq != iva.va_seq)
1760 1848                          iva.va_seq = 0;
1761 1849                  break;
1762 1850          default:
1763 1851                  /*
1764 1852                   * probably a special file.
1765 1853                   */
1766 1854                  if ((vap->va_mask & AT_MODE) == 0) {
1767 1855                          vap->va_mode = 0600;    /* default: owner rw only */
1768 1856                          vap->va_mask |= AT_MODE;
1769 1857                  }
1770 1858                  syncval = FNODSYNC;
1771 1859                  /*
1772 1860                   * We know this will only generate one VOP call
1773 1861                   */
1774 1862                  vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1775 1863  
1776 1864                  if (vp == NULL) {
1777 1865                          if (name != nm)
1778 1866                                  kmem_free(name, MAXPATHLEN + 1);
1779 1867                          kmem_free(nm, len);
1780 1868                          nfs4_ntov_table_free(&ntov, &sarg);
1781 1869                          resp->attrset = 0;
1782 1870                          goto out;
1783 1871                  }
1784 1872  
1785 1873                  /*
1786 1874                   * Get the initial "after" sequence number, if it fails,
1787 1875                   * set to zero
1788 1876                   */
1789 1877                  iva.va_mask = AT_SEQ;
1790 1878                  if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1791 1879                          iva.va_seq = 0;
1792 1880  
1793 1881                  break;
1794 1882          }
1795 1883          if (name != nm)
1796 1884                  kmem_free(name, MAXPATHLEN + 1);
1797 1885          kmem_free(nm, len);
1798 1886  
1799 1887          if (error) {
1800 1888                  *cs->statusp = resp->status = puterrno4(error);
1801 1889          }
1802 1890  
1803 1891          /*
1804 1892           * Force modified data and metadata out to stable storage.
1805 1893           */
1806 1894          (void) VOP_FSYNC(dvp, 0, cr, NULL);
1807 1895  
1808 1896          if (resp->status != NFS4_OK) {
1809 1897                  if (vp != NULL)
1810 1898                          VN_RELE(vp);
1811 1899                  nfs4_ntov_table_free(&ntov, &sarg);
1812 1900                  resp->attrset = 0;
1813 1901                  goto out;
1814 1902          }
1815 1903  
1816 1904          /*
1817 1905           * Finish setup of cinfo response, "before" value already set.
1818 1906           * Get "after" change value, if it fails, simply return the
1819 1907           * before value.
1820 1908           */
1821 1909          ava.va_mask = AT_CTIME|AT_SEQ;
1822 1910          if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1823 1911                  ava.va_ctime = bva.va_ctime;
1824 1912                  ava.va_seq = 0;
1825 1913          }
1826 1914          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1827 1915  
1828 1916          /*
1829 1917           * True verification that object was created with correct
1830 1918           * attrs is impossible.  The attrs could have been changed
1831 1919           * immediately after object creation.  If attributes did
1832 1920           * not verify, the only recourse for the server is to
1833 1921           * destroy the object.  Maybe if some attrs (like gid)
1834 1922           * are set incorrectly, the object should be destroyed;
1835 1923           * however, seems bad as a default policy.  Do we really
1836 1924           * want to destroy an object over one of the times not
1837 1925           * verifying correctly?  For these reasons, the server
1838 1926           * currently sets bits in attrset for createattrs
1839 1927           * that were set; however, no verification is done.
1840 1928           *
1841 1929           * vmask_to_nmask accounts for vattr bits set on create
1842 1930           *      [do_rfs4_set_attrs() only sets resp bits for
1843 1931           *       non-vattr/vfs bits.]
1844 1932           * Mask off any bits set by default so as not to return
1845 1933           * more attrset bits than were requested in createattrs
1846 1934           */
1847 1935          nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1848 1936          resp->attrset &= args->createattrs.attrmask;
1849 1937          nfs4_ntov_table_free(&ntov, &sarg);
1850 1938  
1851 1939          error = makefh4(&cs->fh, vp, cs->exi);
1852 1940          if (error) {
1853 1941                  *cs->statusp = resp->status = puterrno4(error);
1854 1942          }
1855 1943  
1856 1944          /*
1857 1945           * The cinfo.atomic = TRUE only if we got no errors, we have
1858 1946           * non-zero va_seq's, and it has incremented by exactly one
1859 1947           * during the creation and it didn't change during the VOP_LOOKUP
1860 1948           * or VOP_FSYNC.
1861 1949           */
1862 1950          if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1863 1951              iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1864 1952                  resp->cinfo.atomic = TRUE;
1865 1953          else
1866 1954                  resp->cinfo.atomic = FALSE;
1867 1955  
1868 1956          /*
1869 1957           * Force modified metadata out to stable storage.
1870 1958           *
1871 1959           * if a underlying vp exists, pass it to VOP_FSYNC
1872 1960           */
1873 1961          if (VOP_REALVP(vp, &realvp, NULL) == 0)
1874 1962                  (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1875 1963          else
1876 1964                  (void) VOP_FSYNC(vp, syncval, cr, NULL);
1877 1965  
1878 1966          if (resp->status != NFS4_OK) {
1879 1967                  VN_RELE(vp);
1880 1968                  goto out;
1881 1969          }
1882 1970          if (cs->vp)
1883 1971                  VN_RELE(cs->vp);
1884 1972  
1885 1973          cs->vp = vp;
1886 1974          *cs->statusp = resp->status = NFS4_OK;
1887 1975  out:
1888 1976          DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1889 1977              CREATE4res *, resp);
1890 1978  }
1891 1979  
1892 1980  /*ARGSUSED*/
1893 1981  static void
1894 1982  rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1895 1983      struct compound_state *cs)
1896 1984  {
1897 1985          DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1898 1986              DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1899 1987  
1900 1988          rfs4_op_inval(argop, resop, req, cs);
1901 1989  
1902 1990          DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1903 1991              DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1904 1992  }
1905 1993  
1906 1994  /*ARGSUSED*/
1907 1995  static void
1908 1996  rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1909 1997      struct compound_state *cs)
1910 1998  {
1911 1999          DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1912 2000          DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1913 2001          rfs4_deleg_state_t *dsp;
1914 2002          nfsstat4 status;
1915 2003  
1916 2004          DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1917 2005              DELEGRETURN4args *, args);
1918 2006  
1919 2007          status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1920 2008          resp->status = *cs->statusp = status;
1921 2009          if (status != NFS4_OK)
1922 2010                  goto out;
1923 2011  
1924 2012          /* Ensure specified filehandle matches */
1925 2013          if (cs->vp != dsp->rds_finfo->rf_vp) {
1926 2014                  resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1927 2015          } else
1928 2016                  rfs4_return_deleg(dsp, FALSE);
1929 2017  
1930 2018          rfs4_update_lease(dsp->rds_client);
1931 2019  
1932 2020          rfs4_deleg_state_rele(dsp);
1933 2021  out:
1934 2022          DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1935 2023              DELEGRETURN4res *, resp);
1936 2024  }
1937 2025  
1938 2026  /*
1939 2027   * Check to see if a given "flavor" is an explicitly shared flavor.
1940 2028   * The assumption of this routine is the "flavor" is already a valid
1941 2029   * flavor in the secinfo list of "exi".
1942 2030   *
1943 2031   *      e.g.
1944 2032   *              # share -o sec=flavor1 /export
1945 2033   *              # share -o sec=flavor2 /export/home
1946 2034   *
1947 2035   *              flavor2 is not an explicitly shared flavor for /export,
1948 2036   *              however it is in the secinfo list for /export thru the
1949 2037   *              server namespace setup.
1950 2038   */
1951 2039  int
1952 2040  is_exported_sec(int flavor, struct exportinfo *exi)
1953 2041  {
1954 2042          int     i;
1955 2043          struct secinfo *sp;
1956 2044  
1957 2045          sp = exi->exi_export.ex_secinfo;
1958 2046          for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1959 2047                  if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1960 2048                      sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1961 2049                          return (SEC_REF_EXPORTED(&sp[i]));
1962 2050                  }
1963 2051          }
1964 2052  
1965 2053          /* Should not reach this point based on the assumption */
1966 2054          return (0);
1967 2055  }
1968 2056  
1969 2057  /*
1970 2058   * Check if the security flavor used in the request matches what is
1971 2059   * required at the export point or at the root pseudo node (exi_root).
1972 2060   *
1973 2061   * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1974 2062   *
1975 2063   */
1976 2064  static int
1977 2065  secinfo_match_or_authnone(struct compound_state *cs)
1978 2066  {
1979 2067          int     i;
1980 2068          struct secinfo *sp;
1981 2069  
1982 2070          /*
1983 2071           * Check cs->nfsflavor (from the request) against
1984 2072           * the current export data in cs->exi.
1985 2073           */
1986 2074          sp = cs->exi->exi_export.ex_secinfo;
1987 2075          for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1988 2076                  if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1989 2077                      sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1990 2078                          return (1);
1991 2079          }
1992 2080  
1993 2081          return (0);
1994 2082  }
1995 2083  
1996 2084  /*
1997 2085   * Check the access authority for the client and return the correct error.
1998 2086   */
1999 2087  nfsstat4
2000 2088  call_checkauth4(struct compound_state *cs, struct svc_req *req)
2001 2089  {
2002 2090          int     authres;
2003 2091  
2004 2092          /*
2005 2093           * First, check if the security flavor used in the request
2006 2094           * are among the flavors set in the server namespace.
2007 2095           */
2008 2096          if (!secinfo_match_or_authnone(cs)) {
2009 2097                  *cs->statusp = NFS4ERR_WRONGSEC;
2010 2098                  return (*cs->statusp);
2011 2099          }
2012 2100  
2013 2101          authres = checkauth4(cs, req);
2014 2102  
2015 2103          if (authres > 0) {
2016 2104                  *cs->statusp = NFS4_OK;
2017 2105                  if (! (cs->access & CS_ACCESS_LIMITED))
2018 2106                          cs->access = CS_ACCESS_OK;
2019 2107          } else if (authres == 0) {
2020 2108                  *cs->statusp = NFS4ERR_ACCESS;
2021 2109          } else if (authres == -2) {
2022 2110                  *cs->statusp = NFS4ERR_WRONGSEC;
2023 2111          } else {
2024 2112                  *cs->statusp = NFS4ERR_DELAY;
2025 2113          }
2026 2114          return (*cs->statusp);
2027 2115  }
2028 2116  
2029 2117  /*
2030 2118   * bitmap4_to_attrmask is called by getattr and readdir.
2031 2119   * It sets up the vattr mask and determines whether vfsstat call is needed
2032 2120   * based on the input bitmap.
2033 2121   * Returns nfsv4 status.
2034 2122   */
2035 2123  static nfsstat4
2036 2124  bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2037 2125  {
2038 2126          int i;
2039 2127          uint_t  va_mask;
2040 2128          struct statvfs64 *sbp = sargp->sbp;
2041 2129  
2042 2130          sargp->sbp = NULL;
2043 2131          sargp->flag = 0;
2044 2132          sargp->rdattr_error = NFS4_OK;
2045 2133          sargp->mntdfid_set = FALSE;
2046 2134          if (sargp->cs->vp)
2047 2135                  sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2048 2136                      FH4_ATTRDIR | FH4_NAMEDATTR);
2049 2137          else
2050 2138                  sargp->xattr = 0;
2051 2139  
2052 2140          /*
2053 2141           * Set rdattr_error_req to true if return error per
2054 2142           * failed entry rather than fail the readdir.
2055 2143           */
2056 2144          if (breq & FATTR4_RDATTR_ERROR_MASK)
2057 2145                  sargp->rdattr_error_req = 1;
2058 2146          else
2059 2147                  sargp->rdattr_error_req = 0;
2060 2148  
2061 2149          /*
2062 2150           * generate the va_mask
2063 2151           * Handle the easy cases first
2064 2152           */
2065 2153          switch (breq) {
2066 2154          case NFS4_NTOV_ATTR_MASK:
2067 2155                  sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2068 2156                  return (NFS4_OK);
2069 2157  
2070 2158          case NFS4_FS_ATTR_MASK:
2071 2159                  sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2072 2160                  sargp->sbp = sbp;
2073 2161                  return (NFS4_OK);
2074 2162  
2075 2163          case NFS4_NTOV_ATTR_CACHE_MASK:
2076 2164                  sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2077 2165                  return (NFS4_OK);
2078 2166  
2079 2167          case FATTR4_LEASE_TIME_MASK:
2080 2168                  sargp->vap->va_mask = 0;
2081 2169                  return (NFS4_OK);
2082 2170  
2083 2171          default:
2084 2172                  va_mask = 0;
2085 2173                  for (i = 0; i < nfs4_ntov_map_size; i++) {
2086 2174                          if ((breq & nfs4_ntov_map[i].fbit) &&
2087 2175                              nfs4_ntov_map[i].vbit)
2088 2176                                  va_mask |= nfs4_ntov_map[i].vbit;
2089 2177                  }
2090 2178  
2091 2179                  /*
2092 2180                   * Check is vfsstat is needed
2093 2181                   */
2094 2182                  if (breq & NFS4_FS_ATTR_MASK)
2095 2183                          sargp->sbp = sbp;
2096 2184  
2097 2185                  sargp->vap->va_mask = va_mask;
2098 2186                  return (NFS4_OK);
2099 2187          }
2100 2188          /* NOTREACHED */
2101 2189  }
2102 2190  
2103 2191  /*
2104 2192   * bitmap4_get_sysattrs is called by getattr and readdir.
2105 2193   * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2106 2194   * Returns nfsv4 status.
2107 2195   */
2108 2196  static nfsstat4
2109 2197  bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2110 2198  {
2111 2199          int error;
2112 2200          struct compound_state *cs = sargp->cs;
2113 2201          vnode_t *vp = cs->vp;
2114 2202  
2115 2203          if (sargp->sbp != NULL) {
2116 2204                  if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2117 2205                          sargp->sbp = NULL;      /* to identify error */
2118 2206                          return (puterrno4(error));
2119 2207                  }
2120 2208          }
2121 2209  
2122 2210          return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2123 2211  }
2124 2212  
2125 2213  static void
2126 2214  nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2127 2215  {
2128 2216          ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2129 2217              KM_SLEEP);
2130 2218          ntovp->attrcnt = 0;
2131 2219          ntovp->vfsstat = FALSE;
2132 2220  }
2133 2221  
2134 2222  static void
2135 2223  nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2136 2224      struct nfs4_svgetit_arg *sargp)
2137 2225  {
2138 2226          int i;
2139 2227          union nfs4_attr_u *na;
2140 2228          uint8_t *amap;
2141 2229  
2142 2230          /*
2143 2231           * XXX Should do the same checks for whether the bit is set
2144 2232           */
2145 2233          for (i = 0, na = ntovp->na, amap = ntovp->amap;
2146 2234              i < ntovp->attrcnt; i++, na++, amap++) {
2147 2235                  (void) (*nfs4_ntov_map[*amap].sv_getit)(
2148 2236                      NFS4ATTR_FREEIT, sargp, na);
2149 2237          }
2150 2238          if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2151 2239                  /*
2152 2240                   * xdr_free for getattr will be done later
2153 2241                   */
2154 2242                  for (i = 0, na = ntovp->na, amap = ntovp->amap;
2155 2243                      i < ntovp->attrcnt; i++, na++, amap++) {
2156 2244                          xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2157 2245                  }
2158 2246          }
2159 2247          kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2160 2248  }
2161 2249  
2162 2250  /*
2163 2251   * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2164 2252   */
2165 2253  static nfsstat4
2166 2254  do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2167 2255      struct nfs4_svgetit_arg *sargp)
2168 2256  {
2169 2257          int error = 0;
2170 2258          int i, k;
2171 2259          struct nfs4_ntov_table ntov;
2172 2260          XDR xdr;
2173 2261          ulong_t xdr_size;
2174 2262          char *xdr_attrs;
2175 2263          nfsstat4 status = NFS4_OK;
2176 2264          nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2177 2265          union nfs4_attr_u *na;
2178 2266          uint8_t *amap;
2179 2267  
2180 2268          sargp->op = NFS4ATTR_GETIT;
2181 2269          sargp->flag = 0;
2182 2270  
2183 2271          fattrp->attrmask = 0;
2184 2272          /* if no bits requested, then return empty fattr4 */
2185 2273          if (breq == 0) {
2186 2274                  fattrp->attrlist4_len = 0;
2187 2275                  fattrp->attrlist4 = NULL;
2188 2276                  return (NFS4_OK);
2189 2277          }
2190 2278  
2191 2279          /*
2192 2280           * return NFS4ERR_INVAL when client requests write-only attrs
2193 2281           */
2194 2282          if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2195 2283                  return (NFS4ERR_INVAL);
2196 2284  
2197 2285          nfs4_ntov_table_init(&ntov);
2198 2286          na = ntov.na;
2199 2287          amap = ntov.amap;
2200 2288  
2201 2289          /*
2202 2290           * Now loop to get or verify the attrs
2203 2291           */
2204 2292          for (i = 0; i < nfs4_ntov_map_size; i++) {
2205 2293                  if (breq & nfs4_ntov_map[i].fbit) {
2206 2294                          if ((*nfs4_ntov_map[i].sv_getit)(
2207 2295                              NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2208 2296  
2209 2297                                  error = (*nfs4_ntov_map[i].sv_getit)(
2210 2298                                      NFS4ATTR_GETIT, sargp, na);
2211 2299  
2212 2300                                  /*
2213 2301                                   * Possible error values:
2214 2302                                   * >0 if sv_getit failed to
2215 2303                                   * get the attr; 0 if succeeded;
2216 2304                                   * <0 if rdattr_error and the
2217 2305                                   * attribute cannot be returned.
2218 2306                                   */
2219 2307                                  if (error && !(sargp->rdattr_error_req))
2220 2308                                          goto done;
2221 2309                                  /*
2222 2310                                   * If error then just for entry
2223 2311                                   */
2224 2312                                  if (error == 0) {
2225 2313                                          fattrp->attrmask |=
2226 2314                                              nfs4_ntov_map[i].fbit;
2227 2315                                          *amap++ =
2228 2316                                              (uint8_t)nfs4_ntov_map[i].nval;
2229 2317                                          na++;
2230 2318                                          (ntov.attrcnt)++;
2231 2319                                  } else if ((error > 0) &&
2232 2320                                      (sargp->rdattr_error == NFS4_OK)) {
2233 2321                                          sargp->rdattr_error = puterrno4(error);
2234 2322                                  }
2235 2323                                  error = 0;
2236 2324                          }
2237 2325                  }
2238 2326          }
2239 2327  
2240 2328          /*
2241 2329           * If rdattr_error was set after the return value for it was assigned,
2242 2330           * update it.
2243 2331           */
2244 2332          if (prev_rdattr_error != sargp->rdattr_error) {
2245 2333                  na = ntov.na;
2246 2334                  amap = ntov.amap;
2247 2335                  for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2248 2336                          k = *amap;
2249 2337                          if (k < FATTR4_RDATTR_ERROR) {
2250 2338                                  continue;
2251 2339                          }
2252 2340                          if ((k == FATTR4_RDATTR_ERROR) &&
2253 2341                              ((*nfs4_ntov_map[k].sv_getit)(
2254 2342                              NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2255 2343  
2256 2344                                  (void) (*nfs4_ntov_map[k].sv_getit)(
2257 2345                                      NFS4ATTR_GETIT, sargp, na);
2258 2346                          }
2259 2347                          break;
2260 2348                  }
2261 2349          }
2262 2350  
2263 2351          xdr_size = 0;
2264 2352          na = ntov.na;
2265 2353          amap = ntov.amap;
2266 2354          for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2267 2355                  xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2268 2356          }
2269 2357  
2270 2358          fattrp->attrlist4_len = xdr_size;
2271 2359          if (xdr_size) {
2272 2360                  /* freed by rfs4_op_getattr_free() */
2273 2361                  fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2274 2362  
2275 2363                  xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2276 2364  
2277 2365                  na = ntov.na;
2278 2366                  amap = ntov.amap;
2279 2367                  for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2280 2368                          if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2281 2369                                  DTRACE_PROBE1(nfss__e__getattr4_encfail,
2282 2370                                      int, *amap);
2283 2371                                  status = NFS4ERR_SERVERFAULT;
2284 2372                                  break;
2285 2373                          }
2286 2374                  }
2287 2375                  /* xdrmem_destroy(&xdrs); */    /* NO-OP */
2288 2376          } else {
2289 2377                  fattrp->attrlist4 = NULL;
2290 2378          }
2291 2379  done:
2292 2380  
2293 2381          nfs4_ntov_table_free(&ntov, sargp);
2294 2382  
2295 2383          if (error != 0)
2296 2384                  status = puterrno4(error);
2297 2385  
2298 2386          return (status);
2299 2387  }
2300 2388  
2301 2389  /* ARGSUSED */
2302 2390  static void
2303 2391  rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2304 2392      struct compound_state *cs)
2305 2393  {
2306 2394          GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2307 2395          GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2308 2396          struct nfs4_svgetit_arg sarg;
2309 2397          struct statvfs64 sb;
2310 2398          nfsstat4 status;
2311 2399  
2312 2400          DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2313 2401              GETATTR4args *, args);
2314 2402  
2315 2403          if (cs->vp == NULL) {
2316 2404                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2317 2405                  goto out;
2318 2406          }
2319 2407  
2320 2408          if (cs->access == CS_ACCESS_DENIED) {
2321 2409                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2322 2410                  goto out;
2323 2411          }
2324 2412  
2325 2413          sarg.sbp = &sb;
2326 2414          sarg.cs = cs;
2327 2415          sarg.is_referral = B_FALSE;
2328 2416  
2329 2417          status = bitmap4_to_attrmask(args->attr_request, &sarg);
2330 2418          if (status == NFS4_OK) {
2331 2419  
2332 2420                  status = bitmap4_get_sysattrs(&sarg);
2333 2421                  if (status == NFS4_OK) {
2334 2422  
2335 2423                          /* Is this a referral? */
2336 2424                          if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2337 2425                                  /* Older V4 Solaris client sees a link */
2338 2426                                  if (client_is_downrev(req))
2339 2427                                          sarg.vap->va_type = VLNK;
2340 2428                                  else
2341 2429                                          sarg.is_referral = B_TRUE;
2342 2430                          }
2343 2431  
2344 2432                          status = do_rfs4_op_getattr(args->attr_request,
2345 2433                              &resp->obj_attributes, &sarg);
2346 2434                  }
2347 2435          }
2348 2436          *cs->statusp = resp->status = status;
2349 2437  out:
2350 2438          DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2351 2439              GETATTR4res *, resp);
2352 2440  }
2353 2441  
2354 2442  static void
2355 2443  rfs4_op_getattr_free(nfs_resop4 *resop)
2356 2444  {
2357 2445          GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2358 2446  
2359 2447          nfs4_fattr4_free(&resp->obj_attributes);
2360 2448  }
2361 2449  
2362 2450  /* ARGSUSED */
2363 2451  static void
2364 2452  rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2365 2453      struct compound_state *cs)
2366 2454  {
2367 2455          GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2368 2456  
2369 2457          DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2370 2458  
2371 2459          if (cs->vp == NULL) {
2372 2460                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2373 2461                  goto out;
2374 2462          }
2375 2463          if (cs->access == CS_ACCESS_DENIED) {
2376 2464                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2377 2465                  goto out;
2378 2466          }
2379 2467  
2380 2468          /* check for reparse point at the share point */
2381 2469          if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2382 2470                  /* it's all bad */
2383 2471                  cs->exi->exi_moved = 1;
2384 2472                  *cs->statusp = resp->status = NFS4ERR_MOVED;
2385 2473                  DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2386 2474                      vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2387 2475                  return;
2388 2476          }
2389 2477  
2390 2478          /* check for reparse point at vp */
2391 2479          if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2392 2480                  /* it's not all bad */
2393 2481                  *cs->statusp = resp->status = NFS4ERR_MOVED;
2394 2482                  DTRACE_PROBE2(nfs4serv__func__referral__moved,
2395 2483                      vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2396 2484                  return;
2397 2485          }
2398 2486  
2399 2487          resp->object.nfs_fh4_val =
2400 2488              kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2401 2489          nfs_fh4_copy(&cs->fh, &resp->object);
2402 2490          *cs->statusp = resp->status = NFS4_OK;
2403 2491  out:
2404 2492          DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2405 2493              GETFH4res *, resp);
2406 2494  }
2407 2495  
2408 2496  static void
2409 2497  rfs4_op_getfh_free(nfs_resop4 *resop)
2410 2498  {
2411 2499          GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2412 2500  
2413 2501          if (resp->status == NFS4_OK &&
2414 2502              resp->object.nfs_fh4_val != NULL) {
2415 2503                  kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2416 2504                  resp->object.nfs_fh4_val = NULL;
2417 2505                  resp->object.nfs_fh4_len = 0;
2418 2506          }
2419 2507  }
2420 2508  
2421 2509  /*
2422 2510   * illegal: args: void
2423 2511   *          res : status (NFS4ERR_OP_ILLEGAL)
2424 2512   */
2425 2513  /* ARGSUSED */
2426 2514  static void
2427 2515  rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2428 2516      struct svc_req *req, struct compound_state *cs)
2429 2517  {
2430 2518          ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2431 2519  
2432 2520          resop->resop = OP_ILLEGAL;
2433 2521          *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2434 2522  }
2435 2523  
2436 2524  /*
2437 2525   * link: args: SAVED_FH: file, CURRENT_FH: target directory
2438 2526   *       res: status. If success - CURRENT_FH unchanged, return change_info
2439 2527   */
2440 2528  /* ARGSUSED */
2441 2529  static void
2442 2530  rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2443 2531      struct compound_state *cs)
2444 2532  {
2445 2533          LINK4args *args = &argop->nfs_argop4_u.oplink;
2446 2534          LINK4res *resp = &resop->nfs_resop4_u.oplink;
2447 2535          int error;
2448 2536          vnode_t *vp;
2449 2537          vnode_t *dvp;
2450 2538          struct vattr bdva, idva, adva;
2451 2539          char *nm;
2452 2540          uint_t  len;
2453 2541          struct sockaddr *ca;
2454 2542          char *name = NULL;
2455 2543          nfsstat4 status;
2456 2544  
2457 2545          DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2458 2546              LINK4args *, args);
2459 2547  
2460 2548          /* SAVED_FH: source object */
2461 2549          vp = cs->saved_vp;
2462 2550          if (vp == NULL) {
2463 2551                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464 2552                  goto out;
2465 2553          }
2466 2554  
2467 2555          /* CURRENT_FH: target directory */
2468 2556          dvp = cs->vp;
2469 2557          if (dvp == NULL) {
2470 2558                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2471 2559                  goto out;
2472 2560          }
2473 2561  
2474 2562          /*
2475 2563           * If there is a non-shared filesystem mounted on this vnode,
2476 2564           * do not allow to link any file in this directory.
2477 2565           */
2478 2566          if (vn_ismntpt(dvp)) {
2479 2567                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2480 2568                  goto out;
2481 2569          }
2482 2570  
2483 2571          if (cs->access == CS_ACCESS_DENIED) {
2484 2572                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
2485 2573                  goto out;
2486 2574          }
2487 2575  
2488 2576          /* Check source object's type validity */
2489 2577          if (vp->v_type == VDIR) {
2490 2578                  *cs->statusp = resp->status = NFS4ERR_ISDIR;
2491 2579                  goto out;
2492 2580          }
2493 2581  
2494 2582          /* Check target directory's type */
2495 2583          if (dvp->v_type != VDIR) {
2496 2584                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2497 2585                  goto out;
2498 2586          }
2499 2587  
2500 2588          if (cs->saved_exi != cs->exi) {
2501 2589                  *cs->statusp = resp->status = NFS4ERR_XDEV;
2502 2590                  goto out;
2503 2591          }
2504 2592  
2505 2593          status = utf8_dir_verify(&args->newname);
2506 2594          if (status != NFS4_OK) {
2507 2595                  *cs->statusp = resp->status = status;
2508 2596                  goto out;
2509 2597          }
2510 2598  
2511 2599          nm = utf8_to_fn(&args->newname, &len, NULL);
2512 2600          if (nm == NULL) {
2513 2601                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2514 2602                  goto out;
2515 2603          }
2516 2604  
2517 2605          if (len > MAXNAMELEN) {
2518 2606                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2519 2607                  kmem_free(nm, len);
2520 2608                  goto out;
2521 2609          }
2522 2610  
2523 2611          if (rdonly4(req, cs)) {
2524 2612                  *cs->statusp = resp->status = NFS4ERR_ROFS;
2525 2613                  kmem_free(nm, len);
2526 2614                  goto out;
2527 2615          }
2528 2616  
2529 2617          /* Get "before" change value */
2530 2618          bdva.va_mask = AT_CTIME|AT_SEQ;
2531 2619          error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2532 2620          if (error) {
2533 2621                  *cs->statusp = resp->status = puterrno4(error);
2534 2622                  kmem_free(nm, len);
2535 2623                  goto out;
2536 2624          }
2537 2625  
2538 2626          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2539 2627          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2540 2628              MAXPATHLEN  + 1);
2541 2629  
2542 2630          if (name == NULL) {
2543 2631                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2544 2632                  kmem_free(nm, len);
2545 2633                  goto out;
2546 2634          }
2547 2635  
2548 2636          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2549 2637  
2550 2638          error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2551 2639  
2552 2640          if (nm != name)
2553 2641                  kmem_free(name, MAXPATHLEN + 1);
2554 2642          kmem_free(nm, len);
2555 2643  
2556 2644          /*
2557 2645           * Get the initial "after" sequence number, if it fails, set to zero
2558 2646           */
2559 2647          idva.va_mask = AT_SEQ;
2560 2648          if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2561 2649                  idva.va_seq = 0;
2562 2650  
2563 2651          /*
2564 2652           * Force modified data and metadata out to stable storage.
2565 2653           */
2566 2654          (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2567 2655          (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2568 2656  
2569 2657          if (error) {
2570 2658                  *cs->statusp = resp->status = puterrno4(error);
2571 2659                  goto out;
2572 2660          }
2573 2661  
2574 2662          /*
2575 2663           * Get "after" change value, if it fails, simply return the
2576 2664           * before value.
2577 2665           */
2578 2666          adva.va_mask = AT_CTIME|AT_SEQ;
2579 2667          if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2580 2668                  adva.va_ctime = bdva.va_ctime;
2581 2669                  adva.va_seq = 0;
2582 2670          }
2583 2671  
2584 2672          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2585 2673  
2586 2674          /*
2587 2675           * The cinfo.atomic = TRUE only if we have
2588 2676           * non-zero va_seq's, and it has incremented by exactly one
2589 2677           * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2590 2678           */
2591 2679          if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2592 2680              idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2593 2681                  resp->cinfo.atomic = TRUE;
2594 2682          else
2595 2683                  resp->cinfo.atomic = FALSE;
2596 2684  
2597 2685          *cs->statusp = resp->status = NFS4_OK;
2598 2686  out:
2599 2687          DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2600 2688              LINK4res *, resp);
2601 2689  }
2602 2690  
2603 2691  /*
2604 2692   * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2605 2693   */
2606 2694  
2607 2695  /* ARGSUSED */
2608 2696  static nfsstat4
2609 2697  do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2610 2698  {
2611 2699          int error;
2612 2700          int different_export = 0;
2613 2701          vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2614 2702          struct exportinfo *exi = NULL, *pre_exi = NULL;
2615 2703          nfsstat4 stat;
2616 2704          fid_t fid;
2617 2705          int attrdir, dotdot, walk;
2618 2706          bool_t is_newvp = FALSE;
2619 2707  
2620 2708          if (cs->vp->v_flag & V_XATTRDIR) {
2621 2709                  attrdir = 1;
2622 2710                  ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2623 2711          } else {
2624 2712                  attrdir = 0;
2625 2713                  ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
  
    | 
      ↓ open down ↓ | 
    1175 lines elided | 
    
      ↑ open up ↑ | 
  
2626 2714          }
2627 2715  
2628 2716          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629 2717  
2630 2718          /*
2631 2719           * If dotdotting, then need to check whether it's
2632 2720           * above the root of a filesystem, or above an
2633 2721           * export point.
2634 2722           */
2635 2723          if (dotdot) {
     2724 +                vnode_t *zone_rootvp;
2636 2725  
     2726 +                ASSERT(cs->exi != NULL);
     2727 +                zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2637 2728                  /*
2638 2729                   * If dotdotting at the root of a filesystem, then
2639 2730                   * need to traverse back to the mounted-on filesystem
2640 2731                   * and do the dotdot lookup there.
2641 2732                   */
2642      -                if (cs->vp->v_flag & VROOT) {
     2733 +                if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2643 2734  
2644 2735                          /*
2645 2736                           * If at the system root, then can
2646 2737                           * go up no further.
2647 2738                           */
2648      -                        if (VN_CMP(cs->vp, rootdir))
     2739 +                        if (VN_CMP(cs->vp, zone_rootvp))
2649 2740                                  return (puterrno4(ENOENT));
2650 2741  
2651 2742                          /*
2652 2743                           * Traverse back to the mounted-on filesystem
2653 2744                           */
2654      -                        cs->vp = untraverse(cs->vp);
     2745 +                        cs->vp = untraverse(cs->vp, zone_rootvp);
2655 2746  
2656 2747                          /*
2657 2748                           * Set the different_export flag so we remember
2658 2749                           * to pick up a new exportinfo entry for
2659 2750                           * this new filesystem.
2660 2751                           */
2661 2752                          different_export = 1;
2662 2753                  } else {
2663 2754  
2664 2755                          /*
2665 2756                           * If dotdotting above an export point then set
2666 2757                           * the different_export to get new export info.
2667 2758                           */
2668 2759                          different_export = nfs_exported(cs->exi, cs->vp);
2669 2760                  }
2670 2761          }
2671 2762  
2672 2763          error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2673 2764              NULL, NULL, NULL);
2674 2765          if (error)
2675 2766                  return (puterrno4(error));
2676 2767  
2677 2768          /*
2678 2769           * If the vnode is in a pseudo filesystem, check whether it is visible.
2679 2770           *
2680 2771           * XXX if the vnode is a symlink and it is not visible in
2681 2772           * a pseudo filesystem, return ENOENT (not following symlink).
2682 2773           * V4 client can not mount such symlink. This is a regression
2683 2774           * from V2/V3.
2684 2775           *
2685 2776           * In the same exported filesystem, if the security flavor used
2686 2777           * is not an explicitly shared flavor, limit the view to the visible
2687 2778           * list entries only. This is not a WRONGSEC case because it's already
2688 2779           * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2689 2780           */
2690 2781          if (!different_export &&
2691 2782              (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2692 2783              cs->access & CS_ACCESS_LIMITED)) {
2693 2784                  if (! nfs_visible(cs->exi, vp, &different_export)) {
2694 2785                          VN_RELE(vp);
2695 2786                          return (puterrno4(ENOENT));
2696 2787                  }
2697 2788          }
2698 2789  
2699 2790          /*
2700 2791           * If it's a mountpoint, then traverse it.
2701 2792           */
2702 2793          if (vn_ismntpt(vp)) {
2703 2794                  pre_exi = cs->exi;      /* save pre-traversed exportinfo */
2704 2795                  pre_tvp = vp;           /* save pre-traversed vnode     */
2705 2796  
2706 2797                  /*
2707 2798                   * hold pre_tvp to counteract rele by traverse.  We will
2708 2799                   * need pre_tvp below if checkexport4 fails
2709 2800                   */
2710 2801                  VN_HOLD(pre_tvp);
2711 2802                  if ((error = traverse(&vp)) != 0) {
2712 2803                          VN_RELE(vp);
2713 2804                          VN_RELE(pre_tvp);
2714 2805                          return (puterrno4(error));
2715 2806                  }
2716 2807                  different_export = 1;
2717 2808          } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2718 2809                  /*
2719 2810                   * The vfsp comparison is to handle the case where
2720 2811                   * a LOFS mount is shared.  lo_lookup traverses mount points,
2721 2812                   * and NFS is unaware of local fs transistions because
2722 2813                   * v_vfsmountedhere isn't set.  For this special LOFS case,
2723 2814                   * the dir and the obj returned by lookup will have different
2724 2815                   * vfs ptrs.
2725 2816                   */
2726 2817                  different_export = 1;
2727 2818          }
2728 2819  
2729 2820          if (different_export) {
2730 2821  
2731 2822                  bzero(&fid, sizeof (fid));
2732 2823                  fid.fid_len = MAXFIDSZ;
2733 2824                  error = vop_fid_pseudo(vp, &fid);
2734 2825                  if (error) {
2735 2826                          VN_RELE(vp);
2736 2827                          if (pre_tvp)
2737 2828                                  VN_RELE(pre_tvp);
2738 2829                          return (puterrno4(error));
2739 2830                  }
2740 2831  
2741 2832                  if (dotdot)
2742 2833                          exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2743 2834                  else
2744 2835                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2745 2836  
2746 2837                  if (exi == NULL) {
2747 2838                          if (pre_tvp) {
2748 2839                                  /*
2749 2840                                   * If this vnode is a mounted-on vnode,
2750 2841                                   * but the mounted-on file system is not
2751 2842                                   * exported, send back the filehandle for
2752 2843                                   * the mounted-on vnode, not the root of
2753 2844                                   * the mounted-on file system.
2754 2845                                   */
2755 2846                                  VN_RELE(vp);
2756 2847                                  vp = pre_tvp;
2757 2848                                  exi = pre_exi;
2758 2849                          } else {
2759 2850                                  VN_RELE(vp);
2760 2851                                  return (puterrno4(EACCES));
2761 2852                          }
2762 2853                  } else if (pre_tvp) {
2763 2854                          /* we're done with pre_tvp now. release extra hold */
2764 2855                          VN_RELE(pre_tvp);
2765 2856                  }
2766 2857  
2767 2858                  cs->exi = exi;
2768 2859  
2769 2860                  /*
2770 2861                   * Now we do a checkauth4. The reason is that
2771 2862                   * this client/user may not have access to the new
2772 2863                   * exported file system, and if they do,
2773 2864                   * the client/user may be mapped to a different uid.
2774 2865                   *
2775 2866                   * We start with a new cr, because the checkauth4 done
2776 2867                   * in the PUT*FH operation over wrote the cred's uid,
2777 2868                   * gid, etc, and we want the real thing before calling
2778 2869                   * checkauth4()
2779 2870                   */
2780 2871                  crfree(cs->cr);
2781 2872                  cs->cr = crdup(cs->basecr);
2782 2873  
2783 2874                  oldvp = cs->vp;
2784 2875                  cs->vp = vp;
2785 2876                  is_newvp = TRUE;
2786 2877  
2787 2878                  stat = call_checkauth4(cs, req);
2788 2879                  if (stat != NFS4_OK) {
2789 2880                          VN_RELE(cs->vp);
2790 2881                          cs->vp = oldvp;
2791 2882                          return (stat);
2792 2883                  }
2793 2884          }
2794 2885  
2795 2886          /*
2796 2887           * After various NFS checks, do a label check on the path
2797 2888           * component. The label on this path should either be the
2798 2889           * global zone's label or a zone's label. We are only
2799 2890           * interested in the zone's label because exported files
2800 2891           * in global zone is accessible (though read-only) to
2801 2892           * clients. The exportability/visibility check is already
2802 2893           * done before reaching this code.
2803 2894           */
2804 2895          if (is_system_labeled()) {
2805 2896                  bslabel_t *clabel;
2806 2897  
2807 2898                  ASSERT(req->rq_label != NULL);
2808 2899                  clabel = req->rq_label;
2809 2900                  DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2810 2901                      "got client label from request(1)", struct svc_req *, req);
2811 2902  
2812 2903                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2813 2904                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2814 2905                              cs->exi)) {
2815 2906                                  error = EACCES;
2816 2907                                  goto err_out;
2817 2908                          }
2818 2909                  } else {
2819 2910                          /*
2820 2911                           * We grant access to admin_low label clients
2821 2912                           * only if the client is trusted, i.e. also
2822 2913                           * running Solaris Trusted Extension.
2823 2914                           */
2824 2915                          struct sockaddr *ca;
2825 2916                          int             addr_type;
2826 2917                          void            *ipaddr;
2827 2918                          tsol_tpc_t      *tp;
2828 2919  
2829 2920                          ca = (struct sockaddr *)svc_getrpccaller(
2830 2921                              req->rq_xprt)->buf;
2831 2922                          if (ca->sa_family == AF_INET) {
2832 2923                                  addr_type = IPV4_VERSION;
2833 2924                                  ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2834 2925                          } else if (ca->sa_family == AF_INET6) {
2835 2926                                  addr_type = IPV6_VERSION;
2836 2927                                  ipaddr = &((struct sockaddr_in6 *)
2837 2928                                      ca)->sin6_addr;
2838 2929                          }
2839 2930                          tp = find_tpc(ipaddr, addr_type, B_FALSE);
2840 2931                          if (tp == NULL || tp->tpc_tp.tp_doi !=
2841 2932                              l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2842 2933                              SUN_CIPSO) {
2843 2934                                  if (tp != NULL)
2844 2935                                          TPC_RELE(tp);
2845 2936                                  error = EACCES;
2846 2937                                  goto err_out;
2847 2938                          }
2848 2939                          TPC_RELE(tp);
2849 2940                  }
2850 2941          }
2851 2942  
2852 2943          error = makefh4(&cs->fh, vp, cs->exi);
2853 2944  
2854 2945  err_out:
2855 2946          if (error) {
2856 2947                  if (is_newvp) {
2857 2948                          VN_RELE(cs->vp);
2858 2949                          cs->vp = oldvp;
2859 2950                  } else
2860 2951                          VN_RELE(vp);
2861 2952                  return (puterrno4(error));
2862 2953          }
2863 2954  
2864 2955          if (!is_newvp) {
2865 2956                  if (cs->vp)
2866 2957                          VN_RELE(cs->vp);
2867 2958                  cs->vp = vp;
2868 2959          } else if (oldvp)
2869 2960                  VN_RELE(oldvp);
2870 2961  
2871 2962          /*
2872 2963           * if did lookup on attrdir and didn't lookup .., set named
2873 2964           * attr fh flag
2874 2965           */
2875 2966          if (attrdir && ! dotdot)
2876 2967                  set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2877 2968  
2878 2969          /* Assume false for now, open proc will set this */
2879 2970          cs->mandlock = FALSE;
2880 2971  
2881 2972          return (NFS4_OK);
2882 2973  }
2883 2974  
2884 2975  /* ARGSUSED */
2885 2976  static void
2886 2977  rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2887 2978      struct compound_state *cs)
2888 2979  {
2889 2980          LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2890 2981          LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2891 2982          char *nm;
2892 2983          uint_t len;
2893 2984          struct sockaddr *ca;
2894 2985          char *name = NULL;
2895 2986          nfsstat4 status;
2896 2987  
2897 2988          DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2898 2989              LOOKUP4args *, args);
2899 2990  
2900 2991          if (cs->vp == NULL) {
2901 2992                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2902 2993                  goto out;
2903 2994          }
2904 2995  
2905 2996          if (cs->vp->v_type == VLNK) {
2906 2997                  *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2907 2998                  goto out;
2908 2999          }
2909 3000  
2910 3001          if (cs->vp->v_type != VDIR) {
2911 3002                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2912 3003                  goto out;
2913 3004          }
2914 3005  
2915 3006          status = utf8_dir_verify(&args->objname);
2916 3007          if (status != NFS4_OK) {
2917 3008                  *cs->statusp = resp->status = status;
2918 3009                  goto out;
2919 3010          }
2920 3011  
2921 3012          nm = utf8_to_str(&args->objname, &len, NULL);
2922 3013          if (nm == NULL) {
2923 3014                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2924 3015                  goto out;
2925 3016          }
2926 3017  
2927 3018          if (len > MAXNAMELEN) {
2928 3019                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2929 3020                  kmem_free(nm, len);
2930 3021                  goto out;
2931 3022          }
2932 3023  
2933 3024          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2934 3025          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2935 3026              MAXPATHLEN  + 1);
2936 3027  
2937 3028          if (name == NULL) {
2938 3029                  *cs->statusp = resp->status = NFS4ERR_INVAL;
2939 3030                  kmem_free(nm, len);
2940 3031                  goto out;
2941 3032          }
2942 3033  
2943 3034          *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2944 3035  
2945 3036          if (name != nm)
2946 3037                  kmem_free(name, MAXPATHLEN + 1);
2947 3038          kmem_free(nm, len);
2948 3039  
2949 3040  out:
2950 3041          DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2951 3042              LOOKUP4res *, resp);
2952 3043  }
2953 3044  
2954 3045  /* ARGSUSED */
2955 3046  static void
2956 3047  rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2957 3048      struct compound_state *cs)
2958 3049  {
2959 3050          LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2960 3051  
2961 3052          DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2962 3053  
2963 3054          if (cs->vp == NULL) {
2964 3055                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2965 3056                  goto out;
2966 3057          }
2967 3058  
2968 3059          if (cs->vp->v_type != VDIR) {
2969 3060                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2970 3061                  goto out;
2971 3062          }
2972 3063  
2973 3064          *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2974 3065  
2975 3066          /*
2976 3067           * From NFSV4 Specification, LOOKUPP should not check for
2977 3068           * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2978 3069           */
2979 3070          if (resp->status == NFS4ERR_WRONGSEC) {
2980 3071                  *cs->statusp = resp->status = NFS4_OK;
2981 3072          }
2982 3073  
2983 3074  out:
2984 3075          DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2985 3076              LOOKUPP4res *, resp);
2986 3077  }
2987 3078  
2988 3079  
2989 3080  /*ARGSUSED2*/
2990 3081  static void
2991 3082  rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2992 3083      struct compound_state *cs)
2993 3084  {
2994 3085          OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
2995 3086          OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
2996 3087          vnode_t         *avp = NULL;
2997 3088          int             lookup_flags = LOOKUP_XATTR, error;
2998 3089          int             exp_ro = 0;
2999 3090  
3000 3091          DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3001 3092              OPENATTR4args *, args);
3002 3093  
3003 3094          if (cs->vp == NULL) {
3004 3095                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3005 3096                  goto out;
3006 3097          }
3007 3098  
3008 3099          if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3009 3100              !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3010 3101                  *cs->statusp = resp->status = puterrno4(ENOTSUP);
3011 3102                  goto out;
3012 3103          }
3013 3104  
3014 3105          /*
3015 3106           * If file system supports passing ACE mask to VOP_ACCESS then
3016 3107           * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3017 3108           */
3018 3109  
3019 3110          if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3020 3111                  error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3021 3112                      V_ACE_MASK, cs->cr, NULL);
3022 3113          else
3023 3114                  error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3024 3115                      (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3025 3116                      (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3026 3117  
3027 3118          if (error) {
3028 3119                  *cs->statusp = resp->status = puterrno4(EACCES);
3029 3120                  goto out;
3030 3121          }
3031 3122  
3032 3123          /*
3033 3124           * The CREATE_XATTR_DIR VOP flag cannot be specified if
3034 3125           * the file system is exported read-only -- regardless of
3035 3126           * createdir flag.  Otherwise the attrdir would be created
3036 3127           * (assuming server fs isn't mounted readonly locally).  If
3037 3128           * VOP_LOOKUP returns ENOENT in this case, the error will
3038 3129           * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3039 3130           * because specfs has no VOP_LOOKUP op, so the macro would
3040 3131           * return ENOSYS.  EINVAL is returned by all (current)
3041 3132           * Solaris file system implementations when any of their
3042 3133           * restrictions are violated (xattr(dir) can't have xattrdir).
3043 3134           * Returning NOTSUPP is more appropriate in this case
3044 3135           * because the object will never be able to have an attrdir.
3045 3136           */
3046 3137          if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3047 3138                  lookup_flags |= CREATE_XATTR_DIR;
3048 3139  
3049 3140          error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3050 3141              NULL, NULL, NULL);
3051 3142  
3052 3143          if (error) {
3053 3144                  if (error == ENOENT && args->createdir && exp_ro)
3054 3145                          *cs->statusp = resp->status = puterrno4(EROFS);
3055 3146                  else if (error == EINVAL || error == ENOSYS)
3056 3147                          *cs->statusp = resp->status = puterrno4(ENOTSUP);
3057 3148                  else
3058 3149                          *cs->statusp = resp->status = puterrno4(error);
3059 3150                  goto out;
3060 3151          }
3061 3152  
3062 3153          ASSERT(avp->v_flag & V_XATTRDIR);
3063 3154  
3064 3155          error = makefh4(&cs->fh, avp, cs->exi);
3065 3156  
3066 3157          if (error) {
3067 3158                  VN_RELE(avp);
3068 3159                  *cs->statusp = resp->status = puterrno4(error);
3069 3160                  goto out;
3070 3161          }
3071 3162  
3072 3163          VN_RELE(cs->vp);
3073 3164          cs->vp = avp;
3074 3165  
3075 3166          /*
3076 3167           * There is no requirement for an attrdir fh flag
3077 3168           * because the attrdir has a vnode flag to distinguish
3078 3169           * it from regular (non-xattr) directories.  The
3079 3170           * FH4_ATTRDIR flag is set for future sanity checks.
3080 3171           */
3081 3172          set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3082 3173          *cs->statusp = resp->status = NFS4_OK;
3083 3174  
3084 3175  out:
3085 3176          DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3086 3177              OPENATTR4res *, resp);
3087 3178  }
3088 3179  
3089 3180  static int
3090 3181  do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3091 3182      caller_context_t *ct)
3092 3183  {
3093 3184          int error;
3094 3185          int i;
3095 3186          clock_t delaytime;
3096 3187  
3097 3188          delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3098 3189  
3099 3190          /*
3100 3191           * Don't block on mandatory locks. If this routine returns
3101 3192           * EAGAIN, the caller should return NFS4ERR_LOCKED.
3102 3193           */
3103 3194          uio->uio_fmode = FNONBLOCK;
3104 3195  
3105 3196          for (i = 0; i < rfs4_maxlock_tries; i++) {
3106 3197  
3107 3198  
3108 3199                  if (direction == FREAD) {
3109 3200                          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3110 3201                          error = VOP_READ(vp, uio, ioflag, cred, ct);
3111 3202                          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3112 3203                  } else {
3113 3204                          (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3114 3205                          error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3115 3206                          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3116 3207                  }
3117 3208  
3118 3209                  if (error != EAGAIN)
3119 3210                          break;
3120 3211  
3121 3212                  if (i < rfs4_maxlock_tries - 1) {
3122 3213                          delay(delaytime);
3123 3214                          delaytime *= 2;
3124 3215                  }
3125 3216          }
3126 3217  
3127 3218          return (error);
3128 3219  }
3129 3220  
3130 3221  /* ARGSUSED */
3131 3222  static void
3132 3223  rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3133 3224      struct compound_state *cs)
3134 3225  {
3135 3226          READ4args *args = &argop->nfs_argop4_u.opread;
3136 3227          READ4res *resp = &resop->nfs_resop4_u.opread;
3137 3228          int error;
3138 3229          int verror;
3139 3230          vnode_t *vp;
3140 3231          struct vattr va;
3141 3232          struct iovec iov, *iovp = NULL;
3142 3233          int iovcnt;
3143 3234          struct uio uio;
3144 3235          u_offset_t offset;
3145 3236          bool_t *deleg = &cs->deleg;
3146 3237          nfsstat4 stat;
3147 3238          int in_crit = 0;
3148 3239          mblk_t *mp = NULL;
3149 3240          int alloc_err = 0;
3150 3241          int rdma_used = 0;
3151 3242          int loaned_buffers;
3152 3243          caller_context_t ct;
3153 3244          struct uio *uiop;
3154 3245  
3155 3246          DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3156 3247              READ4args, args);
3157 3248  
3158 3249          vp = cs->vp;
3159 3250          if (vp == NULL) {
3160 3251                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3161 3252                  goto out;
3162 3253          }
3163 3254          if (cs->access == CS_ACCESS_DENIED) {
3164 3255                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3165 3256                  goto out;
3166 3257          }
3167 3258  
3168 3259          if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3169 3260              deleg, TRUE, &ct)) != NFS4_OK) {
3170 3261                  *cs->statusp = resp->status = stat;
3171 3262                  goto out;
3172 3263          }
3173 3264  
3174 3265          /*
3175 3266           * Enter the critical region before calling VOP_RWLOCK
3176 3267           * to avoid a deadlock with write requests.
3177 3268           */
3178 3269          if (nbl_need_check(vp)) {
3179 3270                  nbl_start_crit(vp, RW_READER);
3180 3271                  in_crit = 1;
3181 3272                  if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3182 3273                      &ct)) {
3183 3274                          *cs->statusp = resp->status = NFS4ERR_LOCKED;
3184 3275                          goto out;
3185 3276                  }
3186 3277          }
3187 3278  
3188 3279          if (args->wlist) {
3189 3280                  if (args->count > clist_len(args->wlist)) {
3190 3281                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3191 3282                          goto out;
3192 3283                  }
3193 3284                  rdma_used = 1;
3194 3285          }
3195 3286  
3196 3287          /* use loaned buffers for TCP */
3197 3288          loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3198 3289  
3199 3290          va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3200 3291          verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3201 3292  
3202 3293          /*
3203 3294           * If we can't get the attributes, then we can't do the
3204 3295           * right access checking.  So, we'll fail the request.
3205 3296           */
3206 3297          if (verror) {
3207 3298                  *cs->statusp = resp->status = puterrno4(verror);
3208 3299                  goto out;
3209 3300          }
3210 3301  
3211 3302          if (vp->v_type != VREG) {
3212 3303                  *cs->statusp = resp->status =
3213 3304                      ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3214 3305                  goto out;
3215 3306          }
3216 3307  
3217 3308          if (crgetuid(cs->cr) != va.va_uid &&
3218 3309              (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3219 3310              (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3220 3311                  *cs->statusp = resp->status = puterrno4(error);
3221 3312                  goto out;
3222 3313          }
3223 3314  
3224 3315          if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3225 3316                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3226 3317                  goto out;
3227 3318          }
3228 3319  
3229 3320          offset = args->offset;
3230 3321          if (offset >= va.va_size) {
3231 3322                  *cs->statusp = resp->status = NFS4_OK;
3232 3323                  resp->eof = TRUE;
3233 3324                  resp->data_len = 0;
3234 3325                  resp->data_val = NULL;
3235 3326                  resp->mblk = NULL;
3236 3327                  /* RDMA */
3237 3328                  resp->wlist = args->wlist;
3238 3329                  resp->wlist_len = resp->data_len;
3239 3330                  *cs->statusp = resp->status = NFS4_OK;
3240 3331                  if (resp->wlist)
3241 3332                          clist_zero_len(resp->wlist);
3242 3333                  goto out;
3243 3334          }
3244 3335  
3245 3336          if (args->count == 0) {
3246 3337                  *cs->statusp = resp->status = NFS4_OK;
3247 3338                  resp->eof = FALSE;
3248 3339                  resp->data_len = 0;
3249 3340                  resp->data_val = NULL;
3250 3341                  resp->mblk = NULL;
3251 3342                  /* RDMA */
3252 3343                  resp->wlist = args->wlist;
3253 3344                  resp->wlist_len = resp->data_len;
3254 3345                  if (resp->wlist)
3255 3346                          clist_zero_len(resp->wlist);
3256 3347                  goto out;
3257 3348          }
3258 3349  
3259 3350          /*
3260 3351           * Do not allocate memory more than maximum allowed
3261 3352           * transfer size
3262 3353           */
3263 3354          if (args->count > rfs4_tsize(req))
3264 3355                  args->count = rfs4_tsize(req);
3265 3356  
3266 3357          if (loaned_buffers) {
3267 3358                  uiop = (uio_t *)rfs_setup_xuio(vp);
3268 3359                  ASSERT(uiop != NULL);
3269 3360                  uiop->uio_segflg = UIO_SYSSPACE;
3270 3361                  uiop->uio_loffset = args->offset;
3271 3362                  uiop->uio_resid = args->count;
3272 3363  
3273 3364                  /* Jump to do the read if successful */
3274 3365                  if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3275 3366                          /*
3276 3367                           * Need to hold the vnode until after VOP_RETZCBUF()
3277 3368                           * is called.
3278 3369                           */
3279 3370                          VN_HOLD(vp);
3280 3371                          goto doio_read;
3281 3372                  }
3282 3373  
3283 3374                  DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3284 3375                      uiop->uio_loffset, int, uiop->uio_resid);
3285 3376  
3286 3377                  uiop->uio_extflg = 0;
3287 3378  
3288 3379                  /* failure to setup for zero copy */
3289 3380                  rfs_free_xuio((void *)uiop);
3290 3381                  loaned_buffers = 0;
3291 3382          }
3292 3383  
3293 3384          /*
3294 3385           * If returning data via RDMA Write, then grab the chunk list. If we
3295 3386           * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3296 3387           */
3297 3388          if (rdma_used) {
3298 3389                  mp = NULL;
3299 3390                  (void) rdma_get_wchunk(req, &iov, args->wlist);
3300 3391                  uio.uio_iov = &iov;
3301 3392                  uio.uio_iovcnt = 1;
3302 3393          } else {
3303 3394                  /*
3304 3395                   * mp will contain the data to be sent out in the read reply.
3305 3396                   * It will be freed after the reply has been sent.
3306 3397                   */
3307 3398                  mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3308 3399                  ASSERT(mp != NULL);
3309 3400                  ASSERT(alloc_err == 0);
3310 3401                  uio.uio_iov = iovp;
3311 3402                  uio.uio_iovcnt = iovcnt;
3312 3403          }
3313 3404  
3314 3405          uio.uio_segflg = UIO_SYSSPACE;
3315 3406          uio.uio_extflg = UIO_COPY_CACHED;
3316 3407          uio.uio_loffset = args->offset;
3317 3408          uio.uio_resid = args->count;
3318 3409          uiop = &uio;
3319 3410  
3320 3411  doio_read:
3321 3412          error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3322 3413  
3323 3414          va.va_mask = AT_SIZE;
3324 3415          verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3325 3416  
3326 3417          if (error) {
3327 3418                  if (mp)
3328 3419                          freemsg(mp);
3329 3420                  *cs->statusp = resp->status = puterrno4(error);
3330 3421                  goto out;
3331 3422          }
3332 3423  
3333 3424          /* make mblk using zc buffers */
3334 3425          if (loaned_buffers) {
3335 3426                  mp = uio_to_mblk(uiop);
3336 3427                  ASSERT(mp != NULL);
3337 3428          }
3338 3429  
3339 3430          *cs->statusp = resp->status = NFS4_OK;
3340 3431  
3341 3432          ASSERT(uiop->uio_resid >= 0);
3342 3433          resp->data_len = args->count - uiop->uio_resid;
3343 3434          if (mp) {
3344 3435                  resp->data_val = (char *)mp->b_datap->db_base;
3345 3436                  rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3346 3437          } else {
3347 3438                  resp->data_val = (caddr_t)iov.iov_base;
3348 3439          }
3349 3440  
3350 3441          resp->mblk = mp;
3351 3442  
3352 3443          if (!verror && offset + resp->data_len == va.va_size)
3353 3444                  resp->eof = TRUE;
3354 3445          else
3355 3446                  resp->eof = FALSE;
3356 3447  
3357 3448          if (rdma_used) {
3358 3449                  if (!rdma_setup_read_data4(args, resp)) {
3359 3450                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3360 3451                  }
3361 3452          } else {
3362 3453                  resp->wlist = NULL;
3363 3454          }
3364 3455  
3365 3456  out:
3366 3457          if (in_crit)
3367 3458                  nbl_end_crit(vp);
3368 3459  
3369 3460          if (iovp != NULL)
3370 3461                  kmem_free(iovp, iovcnt * sizeof (struct iovec));
3371 3462  
3372 3463          DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3373 3464              READ4res *, resp);
3374 3465  }
3375 3466  
3376 3467  static void
3377 3468  rfs4_op_read_free(nfs_resop4 *resop)
3378 3469  {
3379 3470          READ4res        *resp = &resop->nfs_resop4_u.opread;
3380 3471  
3381 3472          if (resp->status == NFS4_OK && resp->mblk != NULL) {
3382 3473                  freemsg(resp->mblk);
3383 3474                  resp->mblk = NULL;
3384 3475                  resp->data_val = NULL;
3385 3476                  resp->data_len = 0;
3386 3477          }
3387 3478  }
3388 3479  
3389 3480  static void
3390 3481  rfs4_op_readdir_free(nfs_resop4 * resop)
3391 3482  {
3392 3483          READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3393 3484  
3394 3485          if (resp->status == NFS4_OK && resp->mblk != NULL) {
3395 3486                  freeb(resp->mblk);
3396 3487                  resp->mblk = NULL;
3397 3488                  resp->data_len = 0;
3398 3489          }
3399 3490  }
3400 3491  
3401 3492  
  
    | 
      ↓ open down ↓ | 
    737 lines elided | 
    
      ↑ open up ↑ | 
  
3402 3493  /* ARGSUSED */
3403 3494  static void
3404 3495  rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3405 3496      struct compound_state *cs)
3406 3497  {
3407 3498          PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3408 3499          int             error;
3409 3500          vnode_t         *vp;
3410 3501          struct exportinfo *exi, *sav_exi;
3411 3502          nfs_fh4_fmt_t   *fh_fmtp;
     3503 +        nfs_export_t *ne = nfs_get_export();
3412 3504  
3413 3505          DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3414 3506  
3415 3507          if (cs->vp) {
3416 3508                  VN_RELE(cs->vp);
3417 3509                  cs->vp = NULL;
3418 3510          }
3419 3511  
3420 3512          if (cs->cr)
3421 3513                  crfree(cs->cr);
3422 3514  
3423 3515          cs->cr = crdup(cs->basecr);
3424 3516  
3425      -        vp = exi_public->exi_vp;
     3517 +        vp = ne->exi_public->exi_vp;
3426 3518          if (vp == NULL) {
3427 3519                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3428 3520                  goto out;
3429 3521          }
3430 3522  
3431      -        error = makefh4(&cs->fh, vp, exi_public);
     3523 +        error = makefh4(&cs->fh, vp, ne->exi_public);
3432 3524          if (error != 0) {
3433 3525                  *cs->statusp = resp->status = puterrno4(error);
3434 3526                  goto out;
3435 3527          }
3436 3528          sav_exi = cs->exi;
3437      -        if (exi_public == exi_root) {
     3529 +        if (ne->exi_public == ne->exi_root) {
3438 3530                  /*
3439 3531                   * No filesystem is actually shared public, so we default
3440 3532                   * to exi_root. In this case, we must check whether root
3441 3533                   * is exported.
3442 3534                   */
3443 3535                  fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3444 3536  
3445 3537                  /*
3446 3538                   * if root filesystem is exported, the exportinfo struct that we
3447 3539                   * should use is what checkexport4 returns, because root_exi is
3448 3540                   * actually a mostly empty struct.
3449 3541                   */
3450 3542                  exi = checkexport4(&fh_fmtp->fh4_fsid,
3451 3543                      (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3452      -                cs->exi = ((exi != NULL) ? exi : exi_public);
     3544 +                cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3453 3545          } else {
3454 3546                  /*
3455 3547                   * it's a properly shared filesystem
3456 3548                   */
3457      -                cs->exi = exi_public;
     3549 +                cs->exi = ne->exi_public;
3458 3550          }
3459 3551  
3460 3552          if (is_system_labeled()) {
3461 3553                  bslabel_t *clabel;
3462 3554  
3463 3555                  ASSERT(req->rq_label != NULL);
3464 3556                  clabel = req->rq_label;
3465 3557                  DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3466 3558                      "got client label from request(1)",
3467 3559                      struct svc_req *, req);
3468 3560                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
3469 3561                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3470 3562                              cs->exi)) {
3471 3563                                  *cs->statusp = resp->status =
3472 3564                                      NFS4ERR_SERVERFAULT;
3473 3565                                  goto out;
3474 3566                          }
3475 3567                  }
3476 3568          }
3477 3569  
3478 3570          VN_HOLD(vp);
3479 3571          cs->vp = vp;
3480 3572  
3481 3573          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3482 3574                  VN_RELE(cs->vp);
3483 3575                  cs->vp = NULL;
3484 3576                  cs->exi = sav_exi;
3485 3577                  goto out;
3486 3578          }
3487 3579  
3488 3580          *cs->statusp = resp->status = NFS4_OK;
3489 3581  out:
3490 3582          DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3491 3583              PUTPUBFH4res *, resp);
3492 3584  }
3493 3585  
3494 3586  /*
3495 3587   * XXX - issue with put*fh operations. Suppose /export/home is exported.
3496 3588   * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3497 3589   * or joe have restrictive search permissions, then we shouldn't let
3498 3590   * the client get a file handle. This is easy to enforce. However, we
3499 3591   * don't know what security flavor should be used until we resolve the
3500 3592   * path name. Another complication is uid mapping. If root is
3501 3593   * the user, then it will be mapped to the anonymous user by default,
3502 3594   * but we won't know that till we've resolved the path name. And we won't
3503 3595   * know what the anonymous user is.
3504 3596   * Luckily, SECINFO is specified to take a full filename.
3505 3597   * So what we will have to in rfs4_op_lookup is check that flavor of
3506 3598   * the target object matches that of the request, and if root was the
3507 3599   * caller, check for the root= and anon= options, and if necessary,
3508 3600   * repeat the lookup using the right cred_t. But that's not done yet.
3509 3601   */
3510 3602  /* ARGSUSED */
3511 3603  static void
3512 3604  rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3513 3605      struct compound_state *cs)
3514 3606  {
3515 3607          PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3516 3608          PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3517 3609          nfs_fh4_fmt_t *fh_fmtp;
3518 3610  
3519 3611          DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3520 3612              PUTFH4args *, args);
3521 3613  
3522 3614          if (cs->vp) {
3523 3615                  VN_RELE(cs->vp);
3524 3616                  cs->vp = NULL;
3525 3617          }
3526 3618  
3527 3619          if (cs->cr) {
3528 3620                  crfree(cs->cr);
3529 3621                  cs->cr = NULL;
3530 3622          }
3531 3623  
3532 3624  
3533 3625          if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3534 3626                  *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3535 3627                  goto out;
3536 3628          }
3537 3629  
3538 3630          fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3539 3631          cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3540 3632              NULL);
3541 3633  
3542 3634          if (cs->exi == NULL) {
3543 3635                  *cs->statusp = resp->status = NFS4ERR_STALE;
3544 3636                  goto out;
3545 3637          }
3546 3638  
3547 3639          cs->cr = crdup(cs->basecr);
3548 3640  
3549 3641          ASSERT(cs->cr != NULL);
3550 3642  
3551 3643          if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3552 3644                  *cs->statusp = resp->status;
3553 3645                  goto out;
3554 3646          }
3555 3647  
3556 3648          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3557 3649                  VN_RELE(cs->vp);
3558 3650                  cs->vp = NULL;
3559 3651                  goto out;
3560 3652          }
3561 3653  
3562 3654          nfs_fh4_copy(&args->object, &cs->fh);
3563 3655          *cs->statusp = resp->status = NFS4_OK;
3564 3656          cs->deleg = FALSE;
3565 3657  
3566 3658  out:
3567 3659          DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3568 3660              PUTFH4res *, resp);
3569 3661  }
3570 3662  
3571 3663  /* ARGSUSED */
3572 3664  static void
3573 3665  rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3574 3666      struct compound_state *cs)
3575 3667  {
3576 3668          PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3577 3669          int error;
3578 3670          fid_t fid;
3579 3671          struct exportinfo *exi, *sav_exi;
3580 3672  
3581 3673          DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3582 3674  
3583 3675          if (cs->vp) {
3584 3676                  VN_RELE(cs->vp);
3585 3677                  cs->vp = NULL;
3586 3678          }
3587 3679  
3588 3680          if (cs->cr)
  
    | 
      ↓ open down ↓ | 
    121 lines elided | 
    
      ↑ open up ↑ | 
  
3589 3681                  crfree(cs->cr);
3590 3682  
3591 3683          cs->cr = crdup(cs->basecr);
3592 3684  
3593 3685          /*
3594 3686           * Using rootdir, the system root vnode,
3595 3687           * get its fid.
3596 3688           */
3597 3689          bzero(&fid, sizeof (fid));
3598 3690          fid.fid_len = MAXFIDSZ;
3599      -        error = vop_fid_pseudo(rootdir, &fid);
     3691 +        error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3600 3692          if (error != 0) {
3601 3693                  *cs->statusp = resp->status = puterrno4(error);
3602 3694                  goto out;
3603 3695          }
3604 3696  
3605 3697          /*
3606 3698           * Then use the root fsid & fid it to find out if it's exported
3607 3699           *
3608 3700           * If the server root isn't exported directly, then
3609 3701           * it should at least be a pseudo export based on
3610 3702           * one or more exports further down in the server's
3611 3703           * file tree.
3612 3704           */
3613      -        exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
     3705 +        exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3614 3706          if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3615 3707                  NFS4_DEBUG(rfs4_debug,
3616 3708                      (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3617 3709                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3618 3710                  goto out;
3619 3711          }
3620 3712  
3621 3713          /*
3622 3714           * Now make a filehandle based on the root
3623 3715           * export and root vnode.
3624 3716           */
3625      -        error = makefh4(&cs->fh, rootdir, exi);
     3717 +        error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3626 3718          if (error != 0) {
3627 3719                  *cs->statusp = resp->status = puterrno4(error);
3628 3720                  goto out;
3629 3721          }
3630 3722  
3631 3723          sav_exi = cs->exi;
3632 3724          cs->exi = exi;
3633 3725  
3634      -        VN_HOLD(rootdir);
3635      -        cs->vp = rootdir;
     3726 +        VN_HOLD(ZONE_ROOTVP());
     3727 +        cs->vp = ZONE_ROOTVP();
3636 3728  
3637 3729          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3638      -                VN_RELE(rootdir);
     3730 +                VN_RELE(cs->vp);
3639 3731                  cs->vp = NULL;
3640 3732                  cs->exi = sav_exi;
3641 3733                  goto out;
3642 3734          }
3643 3735  
3644 3736          *cs->statusp = resp->status = NFS4_OK;
3645 3737          cs->deleg = FALSE;
3646 3738  out:
3647 3739          DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3648 3740              PUTROOTFH4res *, resp);
3649 3741  }
3650 3742  
3651 3743  /*
3652 3744   * readlink: args: CURRENT_FH.
3653 3745   *      res: status. If success - CURRENT_FH unchanged, return linktext.
3654 3746   */
3655 3747  
3656 3748  /* ARGSUSED */
3657 3749  static void
3658 3750  rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3659 3751      struct compound_state *cs)
3660 3752  {
3661 3753          READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3662 3754          int error;
3663 3755          vnode_t *vp;
3664 3756          struct iovec iov;
3665 3757          struct vattr va;
3666 3758          struct uio uio;
3667 3759          char *data;
3668 3760          struct sockaddr *ca;
3669 3761          char *name = NULL;
3670 3762          int is_referral;
3671 3763  
3672 3764          DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3673 3765  
3674 3766          /* CURRENT_FH: directory */
3675 3767          vp = cs->vp;
3676 3768          if (vp == NULL) {
3677 3769                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3678 3770                  goto out;
3679 3771          }
3680 3772  
3681 3773          if (cs->access == CS_ACCESS_DENIED) {
3682 3774                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3683 3775                  goto out;
3684 3776          }
3685 3777  
3686 3778          /* Is it a referral? */
3687 3779          if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3688 3780  
3689 3781                  is_referral = 1;
3690 3782  
3691 3783          } else {
3692 3784  
3693 3785                  is_referral = 0;
3694 3786  
3695 3787                  if (vp->v_type == VDIR) {
3696 3788                          *cs->statusp = resp->status = NFS4ERR_ISDIR;
3697 3789                          goto out;
3698 3790                  }
3699 3791  
3700 3792                  if (vp->v_type != VLNK) {
3701 3793                          *cs->statusp = resp->status = NFS4ERR_INVAL;
3702 3794                          goto out;
3703 3795                  }
3704 3796  
3705 3797          }
3706 3798  
3707 3799          va.va_mask = AT_MODE;
3708 3800          error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3709 3801          if (error) {
3710 3802                  *cs->statusp = resp->status = puterrno4(error);
3711 3803                  goto out;
3712 3804          }
3713 3805  
  
    | 
      ↓ open down ↓ | 
    65 lines elided | 
    
      ↑ open up ↑ | 
  
3714 3806          if (MANDLOCK(vp, va.va_mode)) {
3715 3807                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3716 3808                  goto out;
3717 3809          }
3718 3810  
3719 3811          data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3720 3812  
3721 3813          if (is_referral) {
3722 3814                  char *s;
3723 3815                  size_t strsz;
     3816 +                kstat_named_t *stat =
     3817 +                    cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
3724 3818  
3725 3819                  /* Get an artificial symlink based on a referral */
3726 3820                  s = build_symlink(vp, cs->cr, &strsz);
3727      -                global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
     3821 +                stat[NFS_REFERLINKS].value.ui64++;
3728 3822                  DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3729 3823                      vnode_t *, vp, char *, s);
3730 3824                  if (s == NULL)
3731 3825                          error = EINVAL;
3732 3826                  else {
3733 3827                          error = 0;
3734 3828                          (void) strlcpy(data, s, MAXPATHLEN + 1);
3735 3829                          kmem_free(s, strsz);
3736 3830                  }
3737 3831  
3738 3832          } else {
3739 3833  
3740 3834                  iov.iov_base = data;
3741 3835                  iov.iov_len = MAXPATHLEN;
3742 3836                  uio.uio_iov = &iov;
3743 3837                  uio.uio_iovcnt = 1;
3744 3838                  uio.uio_segflg = UIO_SYSSPACE;
3745 3839                  uio.uio_extflg = UIO_COPY_CACHED;
3746 3840                  uio.uio_loffset = 0;
3747 3841                  uio.uio_resid = MAXPATHLEN;
3748 3842  
3749 3843                  error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3750 3844  
3751 3845                  if (!error)
3752 3846                          *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3753 3847          }
3754 3848  
3755 3849          if (error) {
3756 3850                  kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3757 3851                  *cs->statusp = resp->status = puterrno4(error);
3758 3852                  goto out;
3759 3853          }
3760 3854  
3761 3855          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3762 3856          name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3763 3857              MAXPATHLEN  + 1);
3764 3858  
3765 3859          if (name == NULL) {
3766 3860                  /*
3767 3861                   * Even though the conversion failed, we return
3768 3862                   * something. We just don't translate it.
3769 3863                   */
3770 3864                  name = data;
3771 3865          }
3772 3866  
3773 3867          /*
3774 3868           * treat link name as data
3775 3869           */
3776 3870          (void) str_to_utf8(name, (utf8string *)&resp->link);
3777 3871  
3778 3872          if (name != data)
3779 3873                  kmem_free(name, MAXPATHLEN + 1);
3780 3874          kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3781 3875          *cs->statusp = resp->status = NFS4_OK;
3782 3876  
3783 3877  out:
3784 3878          DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3785 3879              READLINK4res *, resp);
3786 3880  }
3787 3881  
3788 3882  static void
3789 3883  rfs4_op_readlink_free(nfs_resop4 *resop)
3790 3884  {
3791 3885          READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3792 3886          utf8string *symlink = (utf8string *)&resp->link;
3793 3887  
3794 3888          if (symlink->utf8string_val) {
3795 3889                  UTF8STRING_FREE(*symlink)
3796 3890          }
3797 3891  }
3798 3892  
3799 3893  /*
3800 3894   * release_lockowner:
3801 3895   *      Release any state associated with the supplied
3802 3896   *      lockowner. Note if any lo_state is holding locks we will not
3803 3897   *      rele that lo_state and thus the lockowner will not be destroyed.
3804 3898   *      A client using lock after the lock owner stateid has been released
3805 3899   *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3806 3900   *      to reissue the lock with new_lock_owner set to TRUE.
3807 3901   *      args: lock_owner
3808 3902   *      res:  status
3809 3903   */
3810 3904  /* ARGSUSED */
3811 3905  static void
3812 3906  rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3813 3907      struct svc_req *req, struct compound_state *cs)
3814 3908  {
3815 3909          RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3816 3910          RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3817 3911          rfs4_lockowner_t *lo;
3818 3912          rfs4_openowner_t *oo;
3819 3913          rfs4_state_t *sp;
3820 3914          rfs4_lo_state_t *lsp;
3821 3915          rfs4_client_t *cp;
3822 3916          bool_t create = FALSE;
3823 3917          locklist_t *llist;
3824 3918          sysid_t sysid;
3825 3919  
3826 3920          DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3827 3921              cs, RELEASE_LOCKOWNER4args *, ap);
3828 3922  
3829 3923          /* Make sure there is a clientid around for this request */
3830 3924          cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3831 3925  
3832 3926          if (cp == NULL) {
3833 3927                  *cs->statusp = resp->status =
3834 3928                      rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3835 3929                  goto out;
3836 3930          }
3837 3931          rfs4_client_rele(cp);
3838 3932  
3839 3933          lo = rfs4_findlockowner(&ap->lock_owner, &create);
3840 3934          if (lo == NULL) {
3841 3935                  *cs->statusp = resp->status = NFS4_OK;
3842 3936                  goto out;
3843 3937          }
3844 3938          ASSERT(lo->rl_client != NULL);
3845 3939  
3846 3940          /*
3847 3941           * Check for EXPIRED client. If so will reap state with in a lease
3848 3942           * period or on next set_clientid_confirm step
3849 3943           */
3850 3944          if (rfs4_lease_expired(lo->rl_client)) {
3851 3945                  rfs4_lockowner_rele(lo);
3852 3946                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3853 3947                  goto out;
3854 3948          }
3855 3949  
3856 3950          /*
3857 3951           * If no sysid has been assigned, then no locks exist; just return.
3858 3952           */
3859 3953          rfs4_dbe_lock(lo->rl_client->rc_dbe);
3860 3954          if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3861 3955                  rfs4_lockowner_rele(lo);
3862 3956                  rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3863 3957                  goto out;
3864 3958          }
3865 3959  
3866 3960          sysid = lo->rl_client->rc_sysidt;
3867 3961          rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3868 3962  
3869 3963          /*
3870 3964           * Mark the lockowner invalid.
3871 3965           */
3872 3966          rfs4_dbe_hide(lo->rl_dbe);
3873 3967  
3874 3968          /*
3875 3969           * sysid-pid pair should now not be used since the lockowner is
3876 3970           * invalid. If the client were to instantiate the lockowner again
3877 3971           * it would be assigned a new pid. Thus we can get the list of
3878 3972           * current locks.
3879 3973           */
3880 3974  
3881 3975          llist = flk_get_active_locks(sysid, lo->rl_pid);
3882 3976          /* If we are still holding locks fail */
3883 3977          if (llist != NULL) {
3884 3978  
3885 3979                  *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3886 3980  
3887 3981                  flk_free_locklist(llist);
3888 3982                  /*
3889 3983                   * We need to unhide the lockowner so the client can
3890 3984                   * try it again. The bad thing here is if the client
3891 3985                   * has a logic error that took it here in the first place
3892 3986                   * they probably have lost accounting of the locks that it
3893 3987                   * is holding. So we may have dangling state until the
3894 3988                   * open owner state is reaped via close. One scenario
3895 3989                   * that could possibly occur is that the client has
3896 3990                   * sent the unlock request(s) in separate threads
3897 3991                   * and has not waited for the replies before sending the
3898 3992                   * RELEASE_LOCKOWNER request. Presumably, it would expect
3899 3993                   * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3900 3994                   * reissuing the request.
3901 3995                   */
3902 3996                  rfs4_dbe_unhide(lo->rl_dbe);
3903 3997                  rfs4_lockowner_rele(lo);
3904 3998                  goto out;
3905 3999          }
3906 4000  
3907 4001          /*
3908 4002           * For the corresponding client we need to check each open
3909 4003           * owner for any opens that have lockowner state associated
3910 4004           * with this lockowner.
3911 4005           */
3912 4006  
3913 4007          rfs4_dbe_lock(lo->rl_client->rc_dbe);
3914 4008          for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3915 4009              oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3916 4010  
3917 4011                  rfs4_dbe_lock(oo->ro_dbe);
3918 4012                  for (sp = list_head(&oo->ro_statelist); sp != NULL;
3919 4013                      sp = list_next(&oo->ro_statelist, sp)) {
3920 4014  
3921 4015                          rfs4_dbe_lock(sp->rs_dbe);
3922 4016                          for (lsp = list_head(&sp->rs_lostatelist);
3923 4017                              lsp != NULL;
3924 4018                              lsp = list_next(&sp->rs_lostatelist, lsp)) {
3925 4019                                  if (lsp->rls_locker == lo) {
3926 4020                                          rfs4_dbe_lock(lsp->rls_dbe);
3927 4021                                          rfs4_dbe_invalidate(lsp->rls_dbe);
3928 4022                                          rfs4_dbe_unlock(lsp->rls_dbe);
3929 4023                                  }
3930 4024                          }
3931 4025                          rfs4_dbe_unlock(sp->rs_dbe);
3932 4026                  }
3933 4027                  rfs4_dbe_unlock(oo->ro_dbe);
3934 4028          }
3935 4029          rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3936 4030  
3937 4031          rfs4_lockowner_rele(lo);
3938 4032  
3939 4033          *cs->statusp = resp->status = NFS4_OK;
3940 4034  
3941 4035  out:
3942 4036          DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
3943 4037              cs, RELEASE_LOCKOWNER4res *, resp);
3944 4038  }
3945 4039  
3946 4040  /*
3947 4041   * short utility function to lookup a file and recall the delegation
3948 4042   */
3949 4043  static rfs4_file_t *
3950 4044  rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
3951 4045      int *lkup_error, cred_t *cr)
3952 4046  {
3953 4047          vnode_t *vp;
3954 4048          rfs4_file_t *fp = NULL;
3955 4049          bool_t fcreate = FALSE;
3956 4050          int error;
3957 4051  
3958 4052          if (vpp)
3959 4053                  *vpp = NULL;
3960 4054  
3961 4055          if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
3962 4056              NULL)) == 0) {
3963 4057                  if (vp->v_type == VREG)
3964 4058                          fp = rfs4_findfile(vp, NULL, &fcreate);
3965 4059                  if (vpp)
3966 4060                          *vpp = vp;
3967 4061                  else
3968 4062                          VN_RELE(vp);
3969 4063          }
3970 4064  
3971 4065          if (lkup_error)
3972 4066                  *lkup_error = error;
3973 4067  
3974 4068          return (fp);
3975 4069  }
3976 4070  
3977 4071  /*
3978 4072   * remove: args: CURRENT_FH: directory; name.
3979 4073   *      res: status. If success - CURRENT_FH unchanged, return change_info
3980 4074   *              for directory.
3981 4075   */
3982 4076  /* ARGSUSED */
3983 4077  static void
3984 4078  rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3985 4079      struct compound_state *cs)
3986 4080  {
3987 4081          REMOVE4args *args = &argop->nfs_argop4_u.opremove;
3988 4082          REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
3989 4083          int error;
3990 4084          vnode_t *dvp, *vp;
3991 4085          struct vattr bdva, idva, adva;
3992 4086          char *nm;
3993 4087          uint_t len;
3994 4088          rfs4_file_t *fp;
3995 4089          int in_crit = 0;
3996 4090          bslabel_t *clabel;
3997 4091          struct sockaddr *ca;
3998 4092          char *name = NULL;
3999 4093          nfsstat4 status;
4000 4094  
4001 4095          DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4002 4096              REMOVE4args *, args);
4003 4097  
4004 4098          /* CURRENT_FH: directory */
4005 4099          dvp = cs->vp;
4006 4100          if (dvp == NULL) {
4007 4101                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4008 4102                  goto out;
4009 4103          }
4010 4104  
4011 4105          if (cs->access == CS_ACCESS_DENIED) {
4012 4106                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4013 4107                  goto out;
4014 4108          }
4015 4109  
4016 4110          /*
4017 4111           * If there is an unshared filesystem mounted on this vnode,
4018 4112           * Do not allow to remove anything in this directory.
4019 4113           */
4020 4114          if (vn_ismntpt(dvp)) {
4021 4115                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4022 4116                  goto out;
4023 4117          }
4024 4118  
4025 4119          if (dvp->v_type != VDIR) {
4026 4120                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4027 4121                  goto out;
4028 4122          }
4029 4123  
4030 4124          status = utf8_dir_verify(&args->target);
4031 4125          if (status != NFS4_OK) {
4032 4126                  *cs->statusp = resp->status = status;
4033 4127                  goto out;
4034 4128          }
4035 4129  
4036 4130          /*
4037 4131           * Lookup the file so that we can check if it's a directory
4038 4132           */
4039 4133          nm = utf8_to_fn(&args->target, &len, NULL);
4040 4134          if (nm == NULL) {
4041 4135                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4042 4136                  goto out;
4043 4137          }
4044 4138  
4045 4139          if (len > MAXNAMELEN) {
4046 4140                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4047 4141                  kmem_free(nm, len);
4048 4142                  goto out;
4049 4143          }
4050 4144  
4051 4145          if (rdonly4(req, cs)) {
4052 4146                  *cs->statusp = resp->status = NFS4ERR_ROFS;
4053 4147                  kmem_free(nm, len);
4054 4148                  goto out;
4055 4149          }
4056 4150  
4057 4151          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4058 4152          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4059 4153              MAXPATHLEN  + 1);
4060 4154  
4061 4155          if (name == NULL) {
4062 4156                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4063 4157                  kmem_free(nm, len);
4064 4158                  goto out;
4065 4159          }
4066 4160  
4067 4161          /*
4068 4162           * Lookup the file to determine type and while we are see if
4069 4163           * there is a file struct around and check for delegation.
4070 4164           * We don't need to acquire va_seq before this lookup, if
4071 4165           * it causes an update, cinfo.before will not match, which will
4072 4166           * trigger a cache flush even if atomic is TRUE.
4073 4167           */
4074 4168          if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4075 4169                  if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4076 4170                      NULL)) {
4077 4171                          VN_RELE(vp);
4078 4172                          rfs4_file_rele(fp);
4079 4173                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4080 4174                          if (nm != name)
4081 4175                                  kmem_free(name, MAXPATHLEN + 1);
4082 4176                          kmem_free(nm, len);
4083 4177                          goto out;
4084 4178                  }
4085 4179          }
4086 4180  
4087 4181          /* Didn't find anything to remove */
4088 4182          if (vp == NULL) {
4089 4183                  *cs->statusp = resp->status = error;
4090 4184                  if (nm != name)
4091 4185                          kmem_free(name, MAXPATHLEN + 1);
4092 4186                  kmem_free(nm, len);
4093 4187                  goto out;
4094 4188          }
4095 4189  
4096 4190          if (nbl_need_check(vp)) {
4097 4191                  nbl_start_crit(vp, RW_READER);
4098 4192                  in_crit = 1;
4099 4193                  if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4100 4194                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4101 4195                          if (nm != name)
4102 4196                                  kmem_free(name, MAXPATHLEN + 1);
4103 4197                          kmem_free(nm, len);
4104 4198                          nbl_end_crit(vp);
4105 4199                          VN_RELE(vp);
4106 4200                          if (fp) {
4107 4201                                  rfs4_clear_dont_grant(fp);
4108 4202                                  rfs4_file_rele(fp);
4109 4203                          }
4110 4204                          goto out;
4111 4205                  }
4112 4206          }
4113 4207  
4114 4208          /* check label before allowing removal */
4115 4209          if (is_system_labeled()) {
4116 4210                  ASSERT(req->rq_label != NULL);
4117 4211                  clabel = req->rq_label;
4118 4212                  DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4119 4213                      "got client label from request(1)",
4120 4214                      struct svc_req *, req);
4121 4215                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
4122 4216                          if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4123 4217                              cs->exi)) {
4124 4218                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4125 4219                                  if (name != nm)
4126 4220                                          kmem_free(name, MAXPATHLEN + 1);
4127 4221                                  kmem_free(nm, len);
4128 4222                                  if (in_crit)
4129 4223                                          nbl_end_crit(vp);
4130 4224                                  VN_RELE(vp);
4131 4225                                  if (fp) {
4132 4226                                          rfs4_clear_dont_grant(fp);
4133 4227                                          rfs4_file_rele(fp);
4134 4228                                  }
4135 4229                                  goto out;
4136 4230                          }
4137 4231                  }
4138 4232          }
4139 4233  
4140 4234          /* Get dir "before" change value */
4141 4235          bdva.va_mask = AT_CTIME|AT_SEQ;
4142 4236          error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4143 4237          if (error) {
4144 4238                  *cs->statusp = resp->status = puterrno4(error);
4145 4239                  if (nm != name)
4146 4240                          kmem_free(name, MAXPATHLEN + 1);
4147 4241                  kmem_free(nm, len);
4148 4242                  if (in_crit)
4149 4243                          nbl_end_crit(vp);
4150 4244                  VN_RELE(vp);
4151 4245                  if (fp) {
4152 4246                          rfs4_clear_dont_grant(fp);
4153 4247                          rfs4_file_rele(fp);
4154 4248                  }
4155 4249                  goto out;
4156 4250          }
4157 4251          NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4158 4252  
4159 4253          /* Actually do the REMOVE operation */
4160 4254          if (vp->v_type == VDIR) {
4161 4255                  /*
4162 4256                   * Can't remove a directory that has a mounted-on filesystem.
4163 4257                   */
  
    | 
      ↓ open down ↓ | 
    426 lines elided | 
    
      ↑ open up ↑ | 
  
4164 4258                  if (vn_ismntpt(vp)) {
4165 4259                          error = EACCES;
4166 4260                  } else {
4167 4261                          /*
4168 4262                           * System V defines rmdir to return EEXIST,
4169 4263                           * not ENOTEMPTY, if the directory is not
4170 4264                           * empty.  A System V NFS server needs to map
4171 4265                           * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4172 4266                           * transmit over the wire.
4173 4267                           */
4174      -                        if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
     4268 +                        if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4175 4269                              NULL, 0)) == EEXIST)
4176 4270                                  error = ENOTEMPTY;
4177 4271                  }
4178 4272          } else {
4179 4273                  if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4180 4274                      fp != NULL) {
4181 4275                          struct vattr va;
4182 4276                          vnode_t *tvp;
4183 4277  
4184 4278                          rfs4_dbe_lock(fp->rf_dbe);
4185 4279                          tvp = fp->rf_vp;
4186 4280                          if (tvp)
4187 4281                                  VN_HOLD(tvp);
4188 4282                          rfs4_dbe_unlock(fp->rf_dbe);
4189 4283  
4190 4284                          if (tvp) {
4191 4285                                  /*
4192 4286                                   * This is va_seq safe because we are not
4193 4287                                   * manipulating dvp.
4194 4288                                   */
4195 4289                                  va.va_mask = AT_NLINK;
4196 4290                                  if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4197 4291                                      va.va_nlink == 0) {
4198 4292                                          /* Remove state on file remove */
4199 4293                                          if (in_crit) {
4200 4294                                                  nbl_end_crit(vp);
4201 4295                                                  in_crit = 0;
4202 4296                                          }
4203 4297                                          rfs4_close_all_state(fp);
4204 4298                                  }
4205 4299                                  VN_RELE(tvp);
4206 4300                          }
4207 4301                  }
4208 4302          }
4209 4303  
4210 4304          if (in_crit)
4211 4305                  nbl_end_crit(vp);
4212 4306          VN_RELE(vp);
4213 4307  
4214 4308          if (fp) {
4215 4309                  rfs4_clear_dont_grant(fp);
4216 4310                  rfs4_file_rele(fp);
4217 4311          }
4218 4312          if (nm != name)
4219 4313                  kmem_free(name, MAXPATHLEN + 1);
4220 4314          kmem_free(nm, len);
4221 4315  
4222 4316          if (error) {
4223 4317                  *cs->statusp = resp->status = puterrno4(error);
4224 4318                  goto out;
4225 4319          }
4226 4320  
4227 4321          /*
4228 4322           * Get the initial "after" sequence number, if it fails, set to zero
4229 4323           */
4230 4324          idva.va_mask = AT_SEQ;
4231 4325          if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4232 4326                  idva.va_seq = 0;
4233 4327  
4234 4328          /*
4235 4329           * Force modified data and metadata out to stable storage.
4236 4330           */
4237 4331          (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4238 4332  
4239 4333          /*
4240 4334           * Get "after" change value, if it fails, simply return the
4241 4335           * before value.
4242 4336           */
4243 4337          adva.va_mask = AT_CTIME|AT_SEQ;
4244 4338          if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4245 4339                  adva.va_ctime = bdva.va_ctime;
4246 4340                  adva.va_seq = 0;
4247 4341          }
4248 4342  
4249 4343          NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4250 4344  
4251 4345          /*
4252 4346           * The cinfo.atomic = TRUE only if we have
4253 4347           * non-zero va_seq's, and it has incremented by exactly one
4254 4348           * during the VOP_REMOVE/RMDIR and it didn't change during
4255 4349           * the VOP_FSYNC.
4256 4350           */
4257 4351          if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4258 4352              idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4259 4353                  resp->cinfo.atomic = TRUE;
4260 4354          else
4261 4355                  resp->cinfo.atomic = FALSE;
4262 4356  
4263 4357          *cs->statusp = resp->status = NFS4_OK;
4264 4358  
4265 4359  out:
4266 4360          DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4267 4361              REMOVE4res *, resp);
4268 4362  }
4269 4363  
4270 4364  /*
4271 4365   * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4272 4366   *              oldname and newname.
4273 4367   *      res: status. If success - CURRENT_FH unchanged, return change_info
4274 4368   *              for both from and target directories.
4275 4369   */
  
    | 
      ↓ open down ↓ | 
    91 lines elided | 
    
      ↑ open up ↑ | 
  
4276 4370  /* ARGSUSED */
4277 4371  static void
4278 4372  rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4279 4373      struct compound_state *cs)
4280 4374  {
4281 4375          RENAME4args *args = &argop->nfs_argop4_u.oprename;
4282 4376          RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4283 4377          int error;
4284 4378          vnode_t *odvp;
4285 4379          vnode_t *ndvp;
4286      -        vnode_t *srcvp, *targvp;
     4380 +        vnode_t *srcvp, *targvp, *tvp;
4287 4381          struct vattr obdva, oidva, oadva;
4288 4382          struct vattr nbdva, nidva, nadva;
4289 4383          char *onm, *nnm;
4290 4384          uint_t olen, nlen;
4291 4385          rfs4_file_t *fp, *sfp;
4292 4386          int in_crit_src, in_crit_targ;
4293 4387          int fp_rele_grant_hold, sfp_rele_grant_hold;
     4388 +        int unlinked;
4294 4389          bslabel_t *clabel;
4295 4390          struct sockaddr *ca;
4296 4391          char *converted_onm = NULL;
4297 4392          char *converted_nnm = NULL;
4298 4393          nfsstat4 status;
4299 4394  
4300 4395          DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4301 4396              RENAME4args *, args);
4302 4397  
4303 4398          fp = sfp = NULL;
4304      -        srcvp = targvp = NULL;
     4399 +        srcvp = targvp = tvp = NULL;
4305 4400          in_crit_src = in_crit_targ = 0;
4306 4401          fp_rele_grant_hold = sfp_rele_grant_hold = 0;
     4402 +        unlinked = 0;
4307 4403  
4308 4404          /* CURRENT_FH: target directory */
4309 4405          ndvp = cs->vp;
4310 4406          if (ndvp == NULL) {
4311 4407                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4312 4408                  goto out;
4313 4409          }
4314 4410  
4315 4411          /* SAVED_FH: from directory */
4316 4412          odvp = cs->saved_vp;
4317 4413          if (odvp == NULL) {
4318 4414                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4319 4415                  goto out;
4320 4416          }
4321 4417  
4322 4418          if (cs->access == CS_ACCESS_DENIED) {
4323 4419                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4324 4420                  goto out;
4325 4421          }
4326 4422  
4327 4423          /*
4328 4424           * If there is an unshared filesystem mounted on this vnode,
4329 4425           * do not allow to rename objects in this directory.
4330 4426           */
4331 4427          if (vn_ismntpt(odvp)) {
4332 4428                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4333 4429                  goto out;
4334 4430          }
4335 4431  
4336 4432          /*
4337 4433           * If there is an unshared filesystem mounted on this vnode,
4338 4434           * do not allow to rename to this directory.
4339 4435           */
4340 4436          if (vn_ismntpt(ndvp)) {
4341 4437                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4342 4438                  goto out;
4343 4439          }
4344 4440  
4345 4441          if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4346 4442                  *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4347 4443                  goto out;
4348 4444          }
4349 4445  
4350 4446          if (cs->saved_exi != cs->exi) {
4351 4447                  *cs->statusp = resp->status = NFS4ERR_XDEV;
4352 4448                  goto out;
4353 4449          }
4354 4450  
4355 4451          status = utf8_dir_verify(&args->oldname);
4356 4452          if (status != NFS4_OK) {
4357 4453                  *cs->statusp = resp->status = status;
4358 4454                  goto out;
4359 4455          }
4360 4456  
4361 4457          status = utf8_dir_verify(&args->newname);
4362 4458          if (status != NFS4_OK) {
4363 4459                  *cs->statusp = resp->status = status;
4364 4460                  goto out;
4365 4461          }
4366 4462  
4367 4463          onm = utf8_to_fn(&args->oldname, &olen, NULL);
4368 4464          if (onm == NULL) {
4369 4465                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4370 4466                  goto out;
4371 4467          }
4372 4468          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4373 4469          nlen = MAXPATHLEN + 1;
4374 4470          converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4375 4471              nlen);
4376 4472  
4377 4473          if (converted_onm == NULL) {
4378 4474                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4379 4475                  kmem_free(onm, olen);
4380 4476                  goto out;
4381 4477          }
4382 4478  
4383 4479          nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4384 4480          if (nnm == NULL) {
4385 4481                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4386 4482                  if (onm != converted_onm)
4387 4483                          kmem_free(converted_onm, MAXPATHLEN + 1);
4388 4484                  kmem_free(onm, olen);
4389 4485                  goto out;
4390 4486          }
4391 4487          converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4392 4488              MAXPATHLEN  + 1);
4393 4489  
4394 4490          if (converted_nnm == NULL) {
4395 4491                  *cs->statusp = resp->status = NFS4ERR_INVAL;
4396 4492                  kmem_free(nnm, nlen);
4397 4493                  nnm = NULL;
4398 4494                  if (onm != converted_onm)
4399 4495                          kmem_free(converted_onm, MAXPATHLEN + 1);
4400 4496                  kmem_free(onm, olen);
4401 4497                  goto out;
4402 4498          }
4403 4499  
4404 4500  
4405 4501          if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4406 4502                  *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4407 4503                  kmem_free(onm, olen);
4408 4504                  kmem_free(nnm, nlen);
4409 4505                  goto out;
4410 4506          }
4411 4507  
4412 4508  
4413 4509          if (rdonly4(req, cs)) {
4414 4510                  *cs->statusp = resp->status = NFS4ERR_ROFS;
4415 4511                  if (onm != converted_onm)
4416 4512                          kmem_free(converted_onm, MAXPATHLEN + 1);
4417 4513                  kmem_free(onm, olen);
4418 4514                  if (nnm != converted_nnm)
4419 4515                          kmem_free(converted_nnm, MAXPATHLEN + 1);
4420 4516                  kmem_free(nnm, nlen);
4421 4517                  goto out;
4422 4518          }
4423 4519  
4424 4520          /* check label of the target dir */
4425 4521          if (is_system_labeled()) {
4426 4522                  ASSERT(req->rq_label != NULL);
4427 4523                  clabel = req->rq_label;
4428 4524                  DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4429 4525                      "got client label from request(1)",
4430 4526                      struct svc_req *, req);
4431 4527                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
4432 4528                          if (!do_rfs_label_check(clabel, ndvp,
4433 4529                              EQUALITY_CHECK, cs->exi)) {
4434 4530                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
4435 4531                                  goto err_out;
4436 4532                          }
4437 4533                  }
4438 4534          }
4439 4535  
4440 4536          /*
4441 4537           * Is the source a file and have a delegation?
4442 4538           * We don't need to acquire va_seq before these lookups, if
4443 4539           * it causes an update, cinfo.before will not match, which will
4444 4540           * trigger a cache flush even if atomic is TRUE.
4445 4541           */
4446 4542          if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4447 4543              &error, cs->cr)) {
4448 4544                  if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4449 4545                      NULL)) {
4450 4546                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4451 4547                          goto err_out;
4452 4548                  }
4453 4549          }
4454 4550  
4455 4551          if (srcvp == NULL) {
4456 4552                  *cs->statusp = resp->status = puterrno4(error);
4457 4553                  if (onm != converted_onm)
4458 4554                          kmem_free(converted_onm, MAXPATHLEN + 1);
4459 4555                  kmem_free(onm, olen);
4460 4556                  if (nnm != converted_nnm)
4461 4557                          kmem_free(converted_nnm, MAXPATHLEN + 1);
4462 4558                  kmem_free(nnm, nlen);
4463 4559                  goto out;
4464 4560          }
4465 4561  
4466 4562          sfp_rele_grant_hold = 1;
4467 4563  
4468 4564          /* Does the destination exist and a file and have a delegation? */
  
    | 
      ↓ open down ↓ | 
    152 lines elided | 
    
      ↑ open up ↑ | 
  
4469 4565          if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4470 4566              NULL, cs->cr)) {
4471 4567                  if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4472 4568                      NULL)) {
4473 4569                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4474 4570                          goto err_out;
4475 4571                  }
4476 4572          }
4477 4573          fp_rele_grant_hold = 1;
4478 4574  
4479      -
4480 4575          /* Check for NBMAND lock on both source and target */
4481 4576          if (nbl_need_check(srcvp)) {
4482 4577                  nbl_start_crit(srcvp, RW_READER);
4483 4578                  in_crit_src = 1;
4484 4579                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4485 4580                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4486 4581                          goto err_out;
4487 4582                  }
4488 4583          }
4489 4584  
4490 4585          if (targvp && nbl_need_check(targvp)) {
4491 4586                  nbl_start_crit(targvp, RW_READER);
4492 4587                  in_crit_targ = 1;
4493 4588                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4494 4589                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4495 4590                          goto err_out;
4496 4591                  }
4497 4592          }
4498 4593  
4499 4594          /* Get source "before" change value */
4500 4595          obdva.va_mask = AT_CTIME|AT_SEQ;
4501 4596          error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4502 4597          if (!error) {
4503 4598                  nbdva.va_mask = AT_CTIME|AT_SEQ;
  
    | 
      ↓ open down ↓ | 
    14 lines elided | 
    
      ↑ open up ↑ | 
  
4504 4599                  error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4505 4600          }
4506 4601          if (error) {
4507 4602                  *cs->statusp = resp->status = puterrno4(error);
4508 4603                  goto err_out;
4509 4604          }
4510 4605  
4511 4606          NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4512 4607          NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4513 4608  
4514      -        if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4515      -            cs->cr, NULL, 0)) == 0 && fp != NULL) {
4516      -                struct vattr va;
4517      -                vnode_t *tvp;
     4609 +        error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
     4610 +            NULL, 0);
4518 4611  
     4612 +        /*
     4613 +         * If target existed and was unlinked by VOP_RENAME, state will need
     4614 +         * closed. To avoid deadlock, rfs4_close_all_state will be done after
     4615 +         * any necessary nbl_end_crit on srcvp and tgtvp.
     4616 +         */
     4617 +        if (error == 0 && fp != NULL) {
4519 4618                  rfs4_dbe_lock(fp->rf_dbe);
4520 4619                  tvp = fp->rf_vp;
4521 4620                  if (tvp)
4522 4621                          VN_HOLD(tvp);
4523 4622                  rfs4_dbe_unlock(fp->rf_dbe);
4524 4623  
4525 4624                  if (tvp) {
     4625 +                        struct vattr va;
4526 4626                          va.va_mask = AT_NLINK;
     4627 +
4527 4628                          if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4528 4629                              va.va_nlink == 0) {
4529      -                                /* The file is gone and so should the state */
4530      -                                if (in_crit_targ) {
4531      -                                        nbl_end_crit(targvp);
4532      -                                        in_crit_targ = 0;
     4630 +                                unlinked = 1;
     4631 +
     4632 +                                /* DEBUG data */
     4633 +                                if ((srcvp == targvp) || (tvp != targvp)) {
     4634 +                                        cmn_err(CE_WARN, "rfs4_op_rename: "
     4635 +                                            "srcvp %p, targvp: %p, tvp: %p",
     4636 +                                            (void *)srcvp, (void *)targvp,
     4637 +                                            (void *)tvp);
4533 4638                                  }
4534      -                                rfs4_close_all_state(fp);
     4639 +                        } else {
     4640 +                                VN_RELE(tvp);
4535 4641                          }
4536      -                        VN_RELE(tvp);
4537 4642                  }
4538 4643          }
4539 4644          if (error == 0)
4540 4645                  vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4541 4646  
4542 4647          if (in_crit_src)
4543 4648                  nbl_end_crit(srcvp);
4544 4649          if (srcvp)
4545 4650                  VN_RELE(srcvp);
4546 4651          if (in_crit_targ)
4547 4652                  nbl_end_crit(targvp);
4548 4653          if (targvp)
4549 4654                  VN_RELE(targvp);
4550 4655  
     4656 +        if (unlinked) {
     4657 +                ASSERT(fp != NULL);
     4658 +                ASSERT(tvp != NULL);
     4659 +
     4660 +                /* DEBUG data */
     4661 +                if (RW_READ_HELD(&tvp->v_nbllock)) {
     4662 +                        cmn_err(CE_WARN, "rfs4_op_rename: "
     4663 +                            "RW_READ_HELD(%p)", (void *)tvp);
     4664 +                }
     4665 +
     4666 +                /* The file is gone and so should the state */
     4667 +                rfs4_close_all_state(fp);
     4668 +                VN_RELE(tvp);
     4669 +        }
     4670 +
4551 4671          if (sfp) {
4552 4672                  rfs4_clear_dont_grant(sfp);
4553 4673                  rfs4_file_rele(sfp);
4554 4674          }
4555 4675          if (fp) {
4556 4676                  rfs4_clear_dont_grant(fp);
4557 4677                  rfs4_file_rele(fp);
4558 4678          }
4559 4679  
4560 4680          if (converted_onm != onm)
4561 4681                  kmem_free(converted_onm, MAXPATHLEN + 1);
4562 4682          kmem_free(onm, olen);
4563 4683          if (converted_nnm != nnm)
4564 4684                  kmem_free(converted_nnm, MAXPATHLEN + 1);
4565 4685          kmem_free(nnm, nlen);
4566 4686  
4567 4687          /*
4568 4688           * Get the initial "after" sequence number, if it fails, set to zero
4569 4689           */
4570 4690          oidva.va_mask = AT_SEQ;
4571 4691          if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4572 4692                  oidva.va_seq = 0;
4573 4693  
4574 4694          nidva.va_mask = AT_SEQ;
4575 4695          if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4576 4696                  nidva.va_seq = 0;
4577 4697  
4578 4698          /*
4579 4699           * Force modified data and metadata out to stable storage.
4580 4700           */
4581 4701          (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4582 4702          (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4583 4703  
4584 4704          if (error) {
4585 4705                  *cs->statusp = resp->status = puterrno4(error);
4586 4706                  goto out;
4587 4707          }
4588 4708  
4589 4709          /*
4590 4710           * Get "after" change values, if it fails, simply return the
4591 4711           * before value.
4592 4712           */
4593 4713          oadva.va_mask = AT_CTIME|AT_SEQ;
4594 4714          if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4595 4715                  oadva.va_ctime = obdva.va_ctime;
4596 4716                  oadva.va_seq = 0;
4597 4717          }
4598 4718  
4599 4719          nadva.va_mask = AT_CTIME|AT_SEQ;
4600 4720          if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4601 4721                  nadva.va_ctime = nbdva.va_ctime;
4602 4722                  nadva.va_seq = 0;
4603 4723          }
4604 4724  
4605 4725          NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4606 4726          NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4607 4727  
4608 4728          /*
4609 4729           * The cinfo.atomic = TRUE only if we have
4610 4730           * non-zero va_seq's, and it has incremented by exactly one
4611 4731           * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4612 4732           */
4613 4733          if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4614 4734              oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4615 4735                  resp->source_cinfo.atomic = TRUE;
4616 4736          else
4617 4737                  resp->source_cinfo.atomic = FALSE;
4618 4738  
4619 4739          if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4620 4740              nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4621 4741                  resp->target_cinfo.atomic = TRUE;
4622 4742          else
4623 4743                  resp->target_cinfo.atomic = FALSE;
4624 4744  
4625 4745  #ifdef  VOLATILE_FH_TEST
4626 4746          {
4627 4747          extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4628 4748  
4629 4749          /*
4630 4750           * Add the renamed file handle to the volatile rename list
4631 4751           */
4632 4752          if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4633 4753                  /* file handles may expire on rename */
4634 4754                  vnode_t *vp;
4635 4755  
4636 4756                  nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4637 4757                  /*
4638 4758                   * Already know that nnm will be a valid string
4639 4759                   */
4640 4760                  error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4641 4761                      NULL, NULL, NULL);
4642 4762                  kmem_free(nnm, nlen);
4643 4763                  if (!error) {
4644 4764                          add_volrnm_fh(cs->exi, vp);
4645 4765                          VN_RELE(vp);
4646 4766                  }
4647 4767          }
4648 4768          }
4649 4769  #endif  /* VOLATILE_FH_TEST */
4650 4770  
4651 4771          *cs->statusp = resp->status = NFS4_OK;
4652 4772  out:
4653 4773          DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4654 4774              RENAME4res *, resp);
4655 4775          return;
4656 4776  
4657 4777  err_out:
4658 4778          if (onm != converted_onm)
4659 4779                  kmem_free(converted_onm, MAXPATHLEN + 1);
4660 4780          if (onm != NULL)
4661 4781                  kmem_free(onm, olen);
4662 4782          if (nnm != converted_nnm)
4663 4783                  kmem_free(converted_nnm, MAXPATHLEN + 1);
4664 4784          if (nnm != NULL)
4665 4785                  kmem_free(nnm, nlen);
4666 4786  
4667 4787          if (in_crit_src) nbl_end_crit(srcvp);
4668 4788          if (in_crit_targ) nbl_end_crit(targvp);
4669 4789          if (targvp) VN_RELE(targvp);
4670 4790          if (srcvp) VN_RELE(srcvp);
4671 4791          if (sfp) {
4672 4792                  if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4673 4793                  rfs4_file_rele(sfp);
4674 4794          }
4675 4795          if (fp) {
4676 4796                  if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4677 4797                  rfs4_file_rele(fp);
4678 4798          }
4679 4799  
4680 4800          DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4681 4801              RENAME4res *, resp);
4682 4802  }
4683 4803  
4684 4804  /* ARGSUSED */
4685 4805  static void
4686 4806  rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4687 4807      struct compound_state *cs)
4688 4808  {
4689 4809          RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4690 4810          RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4691 4811          rfs4_client_t *cp;
4692 4812  
4693 4813          DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4694 4814              RENEW4args *, args);
4695 4815  
4696 4816          if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4697 4817                  *cs->statusp = resp->status =
4698 4818                      rfs4_check_clientid(&args->clientid, 0);
4699 4819                  goto out;
4700 4820          }
4701 4821  
4702 4822          if (rfs4_lease_expired(cp)) {
4703 4823                  rfs4_client_rele(cp);
4704 4824                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4705 4825                  goto out;
4706 4826          }
4707 4827  
4708 4828          rfs4_update_lease(cp);
4709 4829  
4710 4830          mutex_enter(cp->rc_cbinfo.cb_lock);
4711 4831          if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4712 4832                  cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4713 4833                  *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4714 4834          } else {
4715 4835                  *cs->statusp = resp->status = NFS4_OK;
4716 4836          }
4717 4837          mutex_exit(cp->rc_cbinfo.cb_lock);
4718 4838  
4719 4839          rfs4_client_rele(cp);
4720 4840  
4721 4841  out:
4722 4842          DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4723 4843              RENEW4res *, resp);
4724 4844  }
4725 4845  
4726 4846  /* ARGSUSED */
4727 4847  static void
4728 4848  rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4729 4849      struct compound_state *cs)
4730 4850  {
4731 4851          RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4732 4852  
4733 4853          DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4734 4854  
4735 4855          /* No need to check cs->access - we are not accessing any object */
4736 4856          if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4737 4857                  *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4738 4858                  goto out;
4739 4859          }
4740 4860          if (cs->vp != NULL) {
4741 4861                  VN_RELE(cs->vp);
4742 4862          }
4743 4863          cs->vp = cs->saved_vp;
4744 4864          cs->saved_vp = NULL;
4745 4865          cs->exi = cs->saved_exi;
4746 4866          nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4747 4867          *cs->statusp = resp->status = NFS4_OK;
4748 4868          cs->deleg = FALSE;
4749 4869  
4750 4870  out:
4751 4871          DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4752 4872              RESTOREFH4res *, resp);
4753 4873  }
4754 4874  
4755 4875  /* ARGSUSED */
4756 4876  static void
4757 4877  rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4758 4878      struct compound_state *cs)
4759 4879  {
4760 4880          SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4761 4881  
4762 4882          DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4763 4883  
4764 4884          /* No need to check cs->access - we are not accessing any object */
4765 4885          if (cs->vp == NULL) {
4766 4886                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4767 4887                  goto out;
4768 4888          }
4769 4889          if (cs->saved_vp != NULL) {
4770 4890                  VN_RELE(cs->saved_vp);
4771 4891          }
4772 4892          cs->saved_vp = cs->vp;
4773 4893          VN_HOLD(cs->saved_vp);
4774 4894          cs->saved_exi = cs->exi;
4775 4895          /*
4776 4896           * since SAVEFH is fairly rare, don't alloc space for its fh
4777 4897           * unless necessary.
4778 4898           */
4779 4899          if (cs->saved_fh.nfs_fh4_val == NULL) {
4780 4900                  cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4781 4901          }
4782 4902          nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4783 4903          *cs->statusp = resp->status = NFS4_OK;
4784 4904  
4785 4905  out:
4786 4906          DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4787 4907              SAVEFH4res *, resp);
4788 4908  }
4789 4909  
4790 4910  /*
4791 4911   * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4792 4912   * return the bitmap of attrs that were set successfully. It is also
4793 4913   * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4794 4914   * always be called only after rfs4_do_set_attrs().
4795 4915   *
4796 4916   * Verify that the attributes are same as the expected ones. sargp->vap
4797 4917   * and sargp->sbp contain the input attributes as translated from fattr4.
4798 4918   *
4799 4919   * This function verifies only the attrs that correspond to a vattr or
4800 4920   * vfsstat struct. That is because of the extra step needed to get the
4801 4921   * corresponding system structs. Other attributes have already been set or
4802 4922   * verified by do_rfs4_set_attrs.
4803 4923   *
4804 4924   * Return 0 if all attrs match, -1 if some don't, error if error processing.
4805 4925   */
4806 4926  static int
4807 4927  rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4808 4928      bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4809 4929  {
4810 4930          int error, ret_error = 0;
4811 4931          int i, k;
4812 4932          uint_t sva_mask = sargp->vap->va_mask;
4813 4933          uint_t vbit;
4814 4934          union nfs4_attr_u *na;
4815 4935          uint8_t *amap;
4816 4936          bool_t getsb = ntovp->vfsstat;
4817 4937  
4818 4938          if (sva_mask != 0) {
4819 4939                  /*
4820 4940                   * Okay to overwrite sargp->vap because we verify based
4821 4941                   * on the incoming values.
4822 4942                   */
4823 4943                  ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4824 4944                      sargp->cs->cr, NULL);
4825 4945                  if (ret_error) {
4826 4946                          if (resp == NULL)
4827 4947                                  return (ret_error);
4828 4948                          /*
4829 4949                           * Must return bitmap of successful attrs
4830 4950                           */
4831 4951                          sva_mask = 0;   /* to prevent checking vap later */
4832 4952                  } else {
4833 4953                          /*
4834 4954                           * Some file systems clobber va_mask. it is probably
4835 4955                           * wrong of them to do so, nonethless we practice
4836 4956                           * defensive coding.
4837 4957                           * See bug id 4276830.
4838 4958                           */
4839 4959                          sargp->vap->va_mask = sva_mask;
4840 4960                  }
4841 4961          }
4842 4962  
4843 4963          if (getsb) {
4844 4964                  /*
4845 4965                   * Now get the superblock and loop on the bitmap, as there is
4846 4966                   * no simple way of translating from superblock to bitmap4.
4847 4967                   */
4848 4968                  ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4849 4969                  if (ret_error) {
4850 4970                          if (resp == NULL)
4851 4971                                  goto errout;
4852 4972                          getsb = FALSE;
4853 4973                  }
4854 4974          }
4855 4975  
4856 4976          /*
4857 4977           * Now loop and verify each attribute which getattr returned
4858 4978           * whether it's the same as the input.
4859 4979           */
4860 4980          if (resp == NULL && !getsb && (sva_mask == 0))
4861 4981                  goto errout;
4862 4982  
4863 4983          na = ntovp->na;
4864 4984          amap = ntovp->amap;
4865 4985          k = 0;
4866 4986          for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4867 4987                  k = *amap;
4868 4988                  ASSERT(nfs4_ntov_map[k].nval == k);
4869 4989                  vbit = nfs4_ntov_map[k].vbit;
4870 4990  
4871 4991                  /*
4872 4992                   * If vattr attribute but VOP_GETATTR failed, or it's
4873 4993                   * superblock attribute but VFS_STATVFS failed, skip
4874 4994                   */
4875 4995                  if (vbit) {
4876 4996                          if ((vbit & sva_mask) == 0)
4877 4997                                  continue;
4878 4998                  } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4879 4999                          continue;
4880 5000                  }
4881 5001                  error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4882 5002                  if (resp != NULL) {
4883 5003                          if (error)
4884 5004                                  ret_error = -1; /* not all match */
4885 5005                          else    /* update response bitmap */
4886 5006                                  *resp |= nfs4_ntov_map[k].fbit;
4887 5007                          continue;
4888 5008                  }
4889 5009                  if (error) {
4890 5010                          ret_error = -1; /* not all match */
4891 5011                          break;
4892 5012                  }
4893 5013          }
4894 5014  errout:
4895 5015          return (ret_error);
4896 5016  }
4897 5017  
4898 5018  /*
4899 5019   * Decode the attribute to be set/verified. If the attr requires a sys op
4900 5020   * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4901 5021   * call the sv_getit function for it, because the sys op hasn't yet been done.
4902 5022   * Return 0 for success, error code if failed.
4903 5023   *
4904 5024   * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4905 5025   */
4906 5026  static int
4907 5027  decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4908 5028      int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4909 5029  {
4910 5030          int error = 0;
4911 5031          bool_t set_later;
4912 5032  
4913 5033          sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4914 5034  
4915 5035          if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4916 5036                  set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4917 5037                  /*
4918 5038                   * don't verify yet if a vattr or sb dependent attr,
4919 5039                   * because we don't have their sys values yet.
4920 5040                   * Will be done later.
4921 5041                   */
4922 5042                  if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4923 5043                          /*
4924 5044                           * ACLs are a special case, since setting the MODE
4925 5045                           * conflicts with setting the ACL.  We delay setting
4926 5046                           * the ACL until all other attributes have been set.
4927 5047                           * The ACL gets set in do_rfs4_op_setattr().
4928 5048                           */
4929 5049                          if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
4930 5050                                  error = (*nfs4_ntov_map[k].sv_getit)(cmd,
4931 5051                                      sargp, nap);
4932 5052                                  if (error) {
4933 5053                                          xdr_free(nfs4_ntov_map[k].xfunc,
4934 5054                                              (caddr_t)nap);
4935 5055                                  }
4936 5056                          }
4937 5057                  }
4938 5058          } else {
4939 5059  #ifdef  DEBUG
4940 5060                  cmn_err(CE_NOTE, "decode_fattr4_attr: error "
4941 5061                      "decoding attribute %d\n", k);
4942 5062  #endif
4943 5063                  error = EINVAL;
4944 5064          }
4945 5065          if (!error && resp_bval && !set_later) {
4946 5066                  *resp_bval |= nfs4_ntov_map[k].fbit;
4947 5067          }
4948 5068  
4949 5069          return (error);
4950 5070  }
4951 5071  
4952 5072  /*
4953 5073   * Set vattr based on incoming fattr4 attrs - used by setattr.
4954 5074   * Set response mask. Ignore any values that are not writable vattr attrs.
4955 5075   */
4956 5076  static nfsstat4
4957 5077  do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4958 5078      struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
4959 5079      nfs4_attr_cmd_t cmd)
4960 5080  {
4961 5081          int error = 0;
4962 5082          int i;
4963 5083          char *attrs = fattrp->attrlist4;
4964 5084          uint32_t attrslen = fattrp->attrlist4_len;
4965 5085          XDR xdr;
4966 5086          nfsstat4 status = NFS4_OK;
4967 5087          vnode_t *vp = cs->vp;
4968 5088          union nfs4_attr_u *na;
4969 5089          uint8_t *amap;
4970 5090  
4971 5091  #ifndef lint
4972 5092          /*
4973 5093           * Make sure that maximum attribute number can be expressed as an
4974 5094           * 8 bit quantity.
4975 5095           */
4976 5096          ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
4977 5097  #endif
4978 5098  
4979 5099          if (vp == NULL) {
4980 5100                  if (resp)
4981 5101                          *resp = 0;
4982 5102                  return (NFS4ERR_NOFILEHANDLE);
4983 5103          }
4984 5104          if (cs->access == CS_ACCESS_DENIED) {
4985 5105                  if (resp)
4986 5106                          *resp = 0;
4987 5107                  return (NFS4ERR_ACCESS);
4988 5108          }
4989 5109  
4990 5110          sargp->op = cmd;
4991 5111          sargp->cs = cs;
4992 5112          sargp->flag = 0;        /* may be set later */
4993 5113          sargp->vap->va_mask = 0;
4994 5114          sargp->rdattr_error = NFS4_OK;
4995 5115          sargp->rdattr_error_req = FALSE;
4996 5116          /* sargp->sbp is set by the caller */
4997 5117  
4998 5118          xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
4999 5119  
5000 5120          na = ntovp->na;
5001 5121          amap = ntovp->amap;
5002 5122  
5003 5123          /*
5004 5124           * The following loop iterates on the nfs4_ntov_map checking
5005 5125           * if the fbit is set in the requested bitmap.
5006 5126           * If set then we process the arguments using the
5007 5127           * rfs4_fattr4 conversion functions to populate the setattr
5008 5128           * vattr and va_mask. Any settable attrs that are not using vattr
5009 5129           * will be set in this loop.
5010 5130           */
5011 5131          for (i = 0; i < nfs4_ntov_map_size; i++) {
5012 5132                  if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5013 5133                          continue;
5014 5134                  }
5015 5135                  /*
5016 5136                   * If setattr, must be a writable attr.
5017 5137                   * If verify/nverify, must be a readable attr.
5018 5138                   */
5019 5139                  if ((error = (*nfs4_ntov_map[i].sv_getit)(
5020 5140                      NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5021 5141                          /*
5022 5142                           * Client tries to set/verify an
5023 5143                           * unsupported attribute, tries to set
5024 5144                           * a read only attr or verify a write
5025 5145                           * only one - error!
5026 5146                           */
5027 5147                          break;
5028 5148                  }
5029 5149                  /*
5030 5150                   * Decode the attribute to set/verify
5031 5151                   */
5032 5152                  error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5033 5153                      &xdr, resp ? resp : NULL, na);
5034 5154                  if (error)
5035 5155                          break;
5036 5156                  *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5037 5157                  na++;
5038 5158                  (ntovp->attrcnt)++;
5039 5159                  if (nfs4_ntov_map[i].vfsstat)
5040 5160                          ntovp->vfsstat = TRUE;
5041 5161          }
5042 5162  
5043 5163          if (error != 0)
5044 5164                  status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5045 5165                      puterrno4(error));
5046 5166          /* xdrmem_destroy(&xdrs); */    /* NO-OP */
5047 5167          return (status);
5048 5168  }
5049 5169  
5050 5170  static nfsstat4
5051 5171  do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5052 5172      stateid4 *stateid)
5053 5173  {
5054 5174          int error = 0;
5055 5175          struct nfs4_svgetit_arg sarg;
5056 5176          bool_t trunc;
5057 5177  
5058 5178          nfsstat4 status = NFS4_OK;
5059 5179          cred_t *cr = cs->cr;
5060 5180          vnode_t *vp = cs->vp;
5061 5181          struct nfs4_ntov_table ntov;
5062 5182          struct statvfs64 sb;
5063 5183          struct vattr bva;
5064 5184          struct flock64 bf;
5065 5185          int in_crit = 0;
5066 5186          uint_t saved_mask = 0;
5067 5187          caller_context_t ct;
5068 5188  
5069 5189          *resp = 0;
5070 5190          sarg.sbp = &sb;
5071 5191          sarg.is_referral = B_FALSE;
5072 5192          nfs4_ntov_table_init(&ntov);
5073 5193          status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5074 5194              NFS4ATTR_SETIT);
5075 5195          if (status != NFS4_OK) {
5076 5196                  /*
5077 5197                   * failed set attrs
5078 5198                   */
5079 5199                  goto done;
5080 5200          }
5081 5201          if ((sarg.vap->va_mask == 0) &&
5082 5202              (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5083 5203                  /*
5084 5204                   * no further work to be done
5085 5205                   */
5086 5206                  goto done;
5087 5207          }
5088 5208  
5089 5209          /*
5090 5210           * If we got a request to set the ACL and the MODE, only
5091 5211           * allow changing VSUID, VSGID, and VSVTX.  Attempting
5092 5212           * to change any other bits, along with setting an ACL,
5093 5213           * gives NFS4ERR_INVAL.
5094 5214           */
5095 5215          if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5096 5216              (fattrp->attrmask & FATTR4_MODE_MASK)) {
5097 5217                  vattr_t va;
5098 5218  
5099 5219                  va.va_mask = AT_MODE;
5100 5220                  error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5101 5221                  if (error) {
5102 5222                          status = puterrno4(error);
5103 5223                          goto done;
5104 5224                  }
5105 5225                  if ((sarg.vap->va_mode ^ va.va_mode) &
5106 5226                      ~(VSUID | VSGID | VSVTX)) {
5107 5227                          status = NFS4ERR_INVAL;
5108 5228                          goto done;
5109 5229                  }
5110 5230          }
5111 5231  
5112 5232          /* Check stateid only if size has been set */
5113 5233          if (sarg.vap->va_mask & AT_SIZE) {
5114 5234                  trunc = (sarg.vap->va_size == 0);
5115 5235                  status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5116 5236                      trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5117 5237                  if (status != NFS4_OK)
5118 5238                          goto done;
5119 5239          } else {
5120 5240                  ct.cc_sysid = 0;
5121 5241                  ct.cc_pid = 0;
5122 5242                  ct.cc_caller_id = nfs4_srv_caller_id;
5123 5243                  ct.cc_flags = CC_DONTBLOCK;
5124 5244          }
5125 5245  
5126 5246          /* XXX start of possible race with delegations */
5127 5247  
5128 5248          /*
5129 5249           * We need to specially handle size changes because it is
5130 5250           * possible for the client to create a file with read-only
5131 5251           * modes, but with the file opened for writing. If the client
5132 5252           * then tries to set the file size, e.g. ftruncate(3C),
5133 5253           * fcntl(F_FREESP), the normal access checking done in
5134 5254           * VOP_SETATTR would prevent the client from doing it even though
5135 5255           * it should be allowed to do so.  To get around this, we do the
5136 5256           * access checking for ourselves and use VOP_SPACE which doesn't
5137 5257           * do the access checking.
5138 5258           * Also the client should not be allowed to change the file
5139 5259           * size if there is a conflicting non-blocking mandatory lock in
5140 5260           * the region of the change.
5141 5261           */
5142 5262          if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5143 5263                  u_offset_t offset;
5144 5264                  ssize_t length;
5145 5265  
5146 5266                  /*
5147 5267                   * ufs_setattr clears AT_SIZE from vap->va_mask, but
5148 5268                   * before returning, sarg.vap->va_mask is used to
5149 5269                   * generate the setattr reply bitmap.  We also clear
5150 5270                   * AT_SIZE below before calling VOP_SPACE.  For both
5151 5271                   * of these cases, the va_mask needs to be saved here
5152 5272                   * and restored after calling VOP_SETATTR.
5153 5273                   */
5154 5274                  saved_mask = sarg.vap->va_mask;
5155 5275  
5156 5276                  /*
5157 5277                   * Check any possible conflict due to NBMAND locks.
5158 5278                   * Get into critical region before VOP_GETATTR, so the
5159 5279                   * size attribute is valid when checking conflicts.
5160 5280                   */
5161 5281                  if (nbl_need_check(vp)) {
5162 5282                          nbl_start_crit(vp, RW_READER);
5163 5283                          in_crit = 1;
5164 5284                  }
5165 5285  
5166 5286                  bva.va_mask = AT_UID|AT_SIZE;
5167 5287                  if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5168 5288                          status = puterrno4(error);
5169 5289                          goto done;
5170 5290                  }
5171 5291  
5172 5292                  if (in_crit) {
5173 5293                          if (sarg.vap->va_size < bva.va_size) {
5174 5294                                  offset = sarg.vap->va_size;
5175 5295                                  length = bva.va_size - sarg.vap->va_size;
5176 5296                          } else {
5177 5297                                  offset = bva.va_size;
5178 5298                                  length = sarg.vap->va_size - bva.va_size;
5179 5299                          }
5180 5300                          if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5181 5301                              &ct)) {
5182 5302                                  status = NFS4ERR_LOCKED;
5183 5303                                  goto done;
5184 5304                          }
5185 5305                  }
5186 5306  
5187 5307                  if (crgetuid(cr) == bva.va_uid) {
5188 5308                          sarg.vap->va_mask &= ~AT_SIZE;
5189 5309                          bf.l_type = F_WRLCK;
5190 5310                          bf.l_whence = 0;
5191 5311                          bf.l_start = (off64_t)sarg.vap->va_size;
5192 5312                          bf.l_len = 0;
5193 5313                          bf.l_sysid = 0;
5194 5314                          bf.l_pid = 0;
5195 5315                          error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5196 5316                              (offset_t)sarg.vap->va_size, cr, &ct);
5197 5317                  }
5198 5318          }
5199 5319  
5200 5320          if (!error && sarg.vap->va_mask != 0)
5201 5321                  error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5202 5322  
5203 5323          /* restore va_mask -- ufs_setattr clears AT_SIZE */
5204 5324          if (saved_mask & AT_SIZE)
5205 5325                  sarg.vap->va_mask |= AT_SIZE;
5206 5326  
5207 5327          /*
5208 5328           * If an ACL was being set, it has been delayed until now,
5209 5329           * in order to set the mode (via the VOP_SETATTR() above) first.
5210 5330           */
5211 5331          if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5212 5332                  int i;
5213 5333  
5214 5334                  for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5215 5335                          if (ntov.amap[i] == FATTR4_ACL)
5216 5336                                  break;
5217 5337                  if (i < NFS4_MAXNUM_ATTRS) {
5218 5338                          error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5219 5339                              NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5220 5340                          if (error == 0) {
5221 5341                                  *resp |= FATTR4_ACL_MASK;
5222 5342                          } else if (error == ENOTSUP) {
5223 5343                                  (void) rfs4_verify_attr(&sarg, resp, &ntov);
5224 5344                                  status = NFS4ERR_ATTRNOTSUPP;
5225 5345                                  goto done;
5226 5346                          }
5227 5347                  } else {
5228 5348                          NFS4_DEBUG(rfs4_debug,
5229 5349                              (CE_NOTE, "do_rfs4_op_setattr: "
5230 5350                              "unable to find ACL in fattr4"));
5231 5351                          error = EINVAL;
5232 5352                  }
5233 5353          }
5234 5354  
5235 5355          if (error) {
5236 5356                  /* check if a monitor detected a delegation conflict */
5237 5357                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5238 5358                          status = NFS4ERR_DELAY;
5239 5359                  else
5240 5360                          status = puterrno4(error);
5241 5361  
5242 5362                  /*
5243 5363                   * Set the response bitmap when setattr failed.
5244 5364                   * If VOP_SETATTR partially succeeded, test by doing a
5245 5365                   * VOP_GETATTR on the object and comparing the data
5246 5366                   * to the setattr arguments.
5247 5367                   */
5248 5368                  (void) rfs4_verify_attr(&sarg, resp, &ntov);
5249 5369          } else {
5250 5370                  /*
5251 5371                   * Force modified metadata out to stable storage.
5252 5372                   */
5253 5373                  (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5254 5374                  /*
5255 5375                   * Set response bitmap
5256 5376                   */
5257 5377                  nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5258 5378          }
5259 5379  
5260 5380  /* Return early and already have a NFSv4 error */
5261 5381  done:
5262 5382          /*
5263 5383           * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5264 5384           * conversion sets both readable and writeable NFS4 attrs
5265 5385           * for AT_MTIME and AT_ATIME.  The line below masks out
5266 5386           * unrequested attrs from the setattr result bitmap.  This
5267 5387           * is placed after the done: label to catch the ATTRNOTSUP
5268 5388           * case.
5269 5389           */
5270 5390          *resp &= fattrp->attrmask;
5271 5391  
5272 5392          if (in_crit)
5273 5393                  nbl_end_crit(vp);
5274 5394  
5275 5395          nfs4_ntov_table_free(&ntov, &sarg);
5276 5396  
5277 5397          return (status);
5278 5398  }
5279 5399  
5280 5400  /* ARGSUSED */
5281 5401  static void
5282 5402  rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5283 5403      struct compound_state *cs)
5284 5404  {
5285 5405          SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5286 5406          SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5287 5407          bslabel_t *clabel;
5288 5408  
5289 5409          DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5290 5410              SETATTR4args *, args);
5291 5411  
5292 5412          if (cs->vp == NULL) {
5293 5413                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5294 5414                  goto out;
5295 5415          }
5296 5416  
5297 5417          /*
5298 5418           * If there is an unshared filesystem mounted on this vnode,
5299 5419           * do not allow to setattr on this vnode.
5300 5420           */
5301 5421          if (vn_ismntpt(cs->vp)) {
5302 5422                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5303 5423                  goto out;
5304 5424          }
5305 5425  
5306 5426          resp->attrsset = 0;
5307 5427  
5308 5428          if (rdonly4(req, cs)) {
5309 5429                  *cs->statusp = resp->status = NFS4ERR_ROFS;
5310 5430                  goto out;
5311 5431          }
5312 5432  
5313 5433          /* check label before setting attributes */
5314 5434          if (is_system_labeled()) {
5315 5435                  ASSERT(req->rq_label != NULL);
5316 5436                  clabel = req->rq_label;
5317 5437                  DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5318 5438                      "got client label from request(1)",
5319 5439                      struct svc_req *, req);
5320 5440                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
5321 5441                          if (!do_rfs_label_check(clabel, cs->vp,
5322 5442                              EQUALITY_CHECK, cs->exi)) {
5323 5443                                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5324 5444                                  goto out;
5325 5445                          }
5326 5446                  }
5327 5447          }
5328 5448  
5329 5449          *cs->statusp = resp->status =
5330 5450              do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5331 5451              &args->stateid);
5332 5452  
5333 5453  out:
5334 5454          DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5335 5455              SETATTR4res *, resp);
5336 5456  }
5337 5457  
5338 5458  /* ARGSUSED */
5339 5459  static void
5340 5460  rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5341 5461      struct compound_state *cs)
5342 5462  {
5343 5463          /*
5344 5464           * verify and nverify are exactly the same, except that nverify
5345 5465           * succeeds when some argument changed, and verify succeeds when
5346 5466           * when none changed.
5347 5467           */
5348 5468  
5349 5469          VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5350 5470          VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5351 5471  
5352 5472          int error;
5353 5473          struct nfs4_svgetit_arg sarg;
5354 5474          struct statvfs64 sb;
5355 5475          struct nfs4_ntov_table ntov;
5356 5476  
5357 5477          DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5358 5478              VERIFY4args *, args);
5359 5479  
5360 5480          if (cs->vp == NULL) {
5361 5481                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5362 5482                  goto out;
5363 5483          }
5364 5484  
5365 5485          sarg.sbp = &sb;
5366 5486          sarg.is_referral = B_FALSE;
5367 5487          nfs4_ntov_table_init(&ntov);
5368 5488          resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5369 5489              &sarg, &ntov, NFS4ATTR_VERIT);
5370 5490          if (resp->status != NFS4_OK) {
5371 5491                  /*
5372 5492                   * do_rfs4_set_attrs will try to verify systemwide attrs,
5373 5493                   * so could return -1 for "no match".
5374 5494                   */
5375 5495                  if (resp->status == -1)
5376 5496                          resp->status = NFS4ERR_NOT_SAME;
5377 5497                  goto done;
5378 5498          }
5379 5499          error = rfs4_verify_attr(&sarg, NULL, &ntov);
5380 5500          switch (error) {
5381 5501          case 0:
5382 5502                  resp->status = NFS4_OK;
5383 5503                  break;
5384 5504          case -1:
5385 5505                  resp->status = NFS4ERR_NOT_SAME;
5386 5506                  break;
5387 5507          default:
5388 5508                  resp->status = puterrno4(error);
5389 5509                  break;
5390 5510          }
5391 5511  done:
5392 5512          *cs->statusp = resp->status;
5393 5513          nfs4_ntov_table_free(&ntov, &sarg);
5394 5514  out:
5395 5515          DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5396 5516              VERIFY4res *, resp);
5397 5517  }
5398 5518  
5399 5519  /* ARGSUSED */
5400 5520  static void
5401 5521  rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5402 5522      struct compound_state *cs)
5403 5523  {
5404 5524          /*
5405 5525           * verify and nverify are exactly the same, except that nverify
5406 5526           * succeeds when some argument changed, and verify succeeds when
5407 5527           * when none changed.
5408 5528           */
5409 5529  
5410 5530          NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5411 5531          NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5412 5532  
5413 5533          int error;
5414 5534          struct nfs4_svgetit_arg sarg;
5415 5535          struct statvfs64 sb;
5416 5536          struct nfs4_ntov_table ntov;
5417 5537  
5418 5538          DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5419 5539              NVERIFY4args *, args);
5420 5540  
5421 5541          if (cs->vp == NULL) {
5422 5542                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5423 5543                  DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5424 5544                      NVERIFY4res *, resp);
5425 5545                  return;
5426 5546          }
5427 5547          sarg.sbp = &sb;
5428 5548          sarg.is_referral = B_FALSE;
5429 5549          nfs4_ntov_table_init(&ntov);
5430 5550          resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5431 5551              &sarg, &ntov, NFS4ATTR_VERIT);
5432 5552          if (resp->status != NFS4_OK) {
5433 5553                  /*
5434 5554                   * do_rfs4_set_attrs will try to verify systemwide attrs,
5435 5555                   * so could return -1 for "no match".
5436 5556                   */
5437 5557                  if (resp->status == -1)
5438 5558                          resp->status = NFS4_OK;
5439 5559                  goto done;
5440 5560          }
5441 5561          error = rfs4_verify_attr(&sarg, NULL, &ntov);
5442 5562          switch (error) {
5443 5563          case 0:
5444 5564                  resp->status = NFS4ERR_SAME;
5445 5565                  break;
5446 5566          case -1:
5447 5567                  resp->status = NFS4_OK;
5448 5568                  break;
5449 5569          default:
5450 5570                  resp->status = puterrno4(error);
5451 5571                  break;
5452 5572          }
5453 5573  done:
5454 5574          *cs->statusp = resp->status;
5455 5575          nfs4_ntov_table_free(&ntov, &sarg);
5456 5576  
5457 5577          DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5458 5578              NVERIFY4res *, resp);
5459 5579  }
5460 5580  
5461 5581  /*
5462 5582   * XXX - This should live in an NFS header file.
5463 5583   */
5464 5584  #define MAX_IOVECS      12
5465 5585  
5466 5586  /* ARGSUSED */
5467 5587  static void
5468 5588  rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5469 5589      struct compound_state *cs)
5470 5590  {
5471 5591          WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5472 5592          WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5473 5593          int error;
5474 5594          vnode_t *vp;
5475 5595          struct vattr bva;
5476 5596          u_offset_t rlimit;
  
    | 
      ↓ open down ↓ | 
    916 lines elided | 
    
      ↑ open up ↑ | 
  
5477 5597          struct uio uio;
5478 5598          struct iovec iov[MAX_IOVECS];
5479 5599          struct iovec *iovp;
5480 5600          int iovcnt;
5481 5601          int ioflag;
5482 5602          cred_t *savecred, *cr;
5483 5603          bool_t *deleg = &cs->deleg;
5484 5604          nfsstat4 stat;
5485 5605          int in_crit = 0;
5486 5606          caller_context_t ct;
     5607 +        nfs4_srv_t *nsrv4;
5487 5608  
5488 5609          DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5489 5610              WRITE4args *, args);
5490 5611  
5491 5612          vp = cs->vp;
5492 5613          if (vp == NULL) {
5493 5614                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5494 5615                  goto out;
5495 5616          }
5496 5617          if (cs->access == CS_ACCESS_DENIED) {
5497 5618                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5498 5619                  goto out;
5499 5620          }
5500 5621  
5501 5622          cr = cs->cr;
5502 5623  
5503 5624          if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5504 5625              deleg, TRUE, &ct)) != NFS4_OK) {
5505 5626                  *cs->statusp = resp->status = stat;
5506 5627                  goto out;
5507 5628          }
5508 5629  
5509 5630          /*
5510 5631           * We have to enter the critical region before calling VOP_RWLOCK
5511 5632           * to avoid a deadlock with ufs.
5512 5633           */
5513 5634          if (nbl_need_check(vp)) {
5514 5635                  nbl_start_crit(vp, RW_READER);
5515 5636                  in_crit = 1;
5516 5637                  if (nbl_conflict(vp, NBL_WRITE,
5517 5638                      args->offset, args->data_len, 0, &ct)) {
5518 5639                          *cs->statusp = resp->status = NFS4ERR_LOCKED;
5519 5640                          goto out;
5520 5641                  }
5521 5642          }
5522 5643  
5523 5644          bva.va_mask = AT_MODE | AT_UID;
5524 5645          error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5525 5646  
5526 5647          /*
5527 5648           * If we can't get the attributes, then we can't do the
5528 5649           * right access checking.  So, we'll fail the request.
5529 5650           */
5530 5651          if (error) {
5531 5652                  *cs->statusp = resp->status = puterrno4(error);
5532 5653                  goto out;
5533 5654          }
5534 5655  
5535 5656          if (rdonly4(req, cs)) {
5536 5657                  *cs->statusp = resp->status = NFS4ERR_ROFS;
5537 5658                  goto out;
5538 5659          }
5539 5660  
5540 5661          if (vp->v_type != VREG) {
5541 5662                  *cs->statusp = resp->status =
5542 5663                      ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5543 5664                  goto out;
5544 5665          }
5545 5666  
5546 5667          if (crgetuid(cr) != bva.va_uid &&
  
    | 
      ↓ open down ↓ | 
    50 lines elided | 
    
      ↑ open up ↑ | 
  
5547 5668              (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5548 5669                  *cs->statusp = resp->status = puterrno4(error);
5549 5670                  goto out;
5550 5671          }
5551 5672  
5552 5673          if (MANDLOCK(vp, bva.va_mode)) {
5553 5674                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5554 5675                  goto out;
5555 5676          }
5556 5677  
     5678 +        nsrv4 = nfs4_get_srv();
5557 5679          if (args->data_len == 0) {
5558 5680                  *cs->statusp = resp->status = NFS4_OK;
5559 5681                  resp->count = 0;
5560 5682                  resp->committed = args->stable;
5561      -                resp->writeverf = Write4verf;
     5683 +                resp->writeverf = nsrv4->write4verf;
5562 5684                  goto out;
5563 5685          }
5564 5686  
5565 5687          if (args->mblk != NULL) {
5566 5688                  mblk_t *m;
5567 5689                  uint_t bytes, round_len;
5568 5690  
5569 5691                  iovcnt = 0;
5570 5692                  bytes = 0;
5571 5693                  round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5572 5694                  for (m = args->mblk;
5573 5695                      m != NULL && bytes < round_len;
5574 5696                      m = m->b_cont) {
5575 5697                          iovcnt++;
5576 5698                          bytes += MBLKL(m);
5577 5699                  }
5578 5700  #ifdef DEBUG
5579 5701                  /* should have ended on an mblk boundary */
5580 5702                  if (bytes != round_len) {
5581 5703                          printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5582 5704                              bytes, round_len, args->data_len);
5583 5705                          printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5584 5706                              (void *)args->mblk, (void *)m);
5585 5707                          ASSERT(bytes == round_len);
5586 5708                  }
5587 5709  #endif
5588 5710                  if (iovcnt <= MAX_IOVECS) {
5589 5711                          iovp = iov;
5590 5712                  } else {
5591 5713                          iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5592 5714                  }
5593 5715                  mblk_to_iov(args->mblk, iovcnt, iovp);
5594 5716          } else if (args->rlist != NULL) {
5595 5717                  iovcnt = 1;
5596 5718                  iovp = iov;
5597 5719                  iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5598 5720                  iovp->iov_len = args->data_len;
5599 5721          } else {
5600 5722                  iovcnt = 1;
5601 5723                  iovp = iov;
5602 5724                  iovp->iov_base = args->data_val;
5603 5725                  iovp->iov_len = args->data_len;
5604 5726          }
5605 5727  
5606 5728          uio.uio_iov = iovp;
5607 5729          uio.uio_iovcnt = iovcnt;
5608 5730  
5609 5731          uio.uio_segflg = UIO_SYSSPACE;
5610 5732          uio.uio_extflg = UIO_COPY_DEFAULT;
5611 5733          uio.uio_loffset = args->offset;
5612 5734          uio.uio_resid = args->data_len;
5613 5735          uio.uio_llimit = curproc->p_fsz_ctl;
5614 5736          rlimit = uio.uio_llimit - args->offset;
5615 5737          if (rlimit < (u_offset_t)uio.uio_resid)
5616 5738                  uio.uio_resid = (int)rlimit;
5617 5739  
5618 5740          if (args->stable == UNSTABLE4)
5619 5741                  ioflag = 0;
5620 5742          else if (args->stable == FILE_SYNC4)
5621 5743                  ioflag = FSYNC;
5622 5744          else if (args->stable == DATA_SYNC4)
5623 5745                  ioflag = FDSYNC;
5624 5746          else {
5625 5747                  if (iovp != iov)
5626 5748                          kmem_free(iovp, sizeof (*iovp) * iovcnt);
5627 5749                  *cs->statusp = resp->status = NFS4ERR_INVAL;
5628 5750                  goto out;
5629 5751          }
5630 5752  
5631 5753          /*
5632 5754           * We're changing creds because VM may fault and we need
5633 5755           * the cred of the current thread to be used if quota
5634 5756           * checking is enabled.
5635 5757           */
5636 5758          savecred = curthread->t_cred;
5637 5759          curthread->t_cred = cr;
5638 5760          error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5639 5761          curthread->t_cred = savecred;
5640 5762  
5641 5763          if (iovp != iov)
5642 5764                  kmem_free(iovp, sizeof (*iovp) * iovcnt);
5643 5765  
5644 5766          if (error) {
5645 5767                  *cs->statusp = resp->status = puterrno4(error);
5646 5768                  goto out;
  
    | 
      ↓ open down ↓ | 
    75 lines elided | 
    
      ↑ open up ↑ | 
  
5647 5769          }
5648 5770  
5649 5771          *cs->statusp = resp->status = NFS4_OK;
5650 5772          resp->count = args->data_len - uio.uio_resid;
5651 5773  
5652 5774          if (ioflag == 0)
5653 5775                  resp->committed = UNSTABLE4;
5654 5776          else
5655 5777                  resp->committed = FILE_SYNC4;
5656 5778  
5657      -        resp->writeverf = Write4verf;
     5779 +        resp->writeverf = nsrv4->write4verf;
5658 5780  
5659 5781  out:
5660 5782          if (in_crit)
5661 5783                  nbl_end_crit(vp);
5662 5784  
5663 5785          DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5664 5786              WRITE4res *, resp);
5665 5787  }
5666 5788  
5667 5789  
5668 5790  /* XXX put in a header file */
5669 5791  extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5670 5792  
5671 5793  void
5672 5794  rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5673 5795      struct svc_req *req, cred_t *cr, int *rv)
5674 5796  {
5675 5797          uint_t i;
5676 5798          struct compound_state cs;
     5799 +        nfs4_srv_t *nsrv4;
     5800 +        nfs_export_t *ne = nfs_get_export();
5677 5801  
5678 5802          if (rv != NULL)
5679 5803                  *rv = 0;
5680 5804          rfs4_init_compound_state(&cs);
5681 5805          /*
5682      -         * Form a reply tag by copying over the reqeuest tag.
     5806 +         * Form a reply tag by copying over the request tag.
5683 5807           */
5684      -        resp->tag.utf8string_val =
5685      -            kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5686 5808          resp->tag.utf8string_len = args->tag.utf8string_len;
5687      -        bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5688      -            resp->tag.utf8string_len);
     5809 +        if (args->tag.utf8string_len != 0) {
     5810 +                resp->tag.utf8string_val =
     5811 +                    kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
     5812 +                bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
     5813 +                    resp->tag.utf8string_len);
     5814 +        } else {
     5815 +                resp->tag.utf8string_val = NULL;
     5816 +        }
5689 5817  
5690 5818          cs.statusp = &resp->status;
5691 5819          cs.req = req;
5692 5820          resp->array = NULL;
5693 5821          resp->array_len = 0;
5694 5822  
5695 5823          /*
5696 5824           * XXX for now, minorversion should be zero
5697 5825           */
5698 5826          if (args->minorversion != NFS4_MINORVERSION) {
5699 5827                  DTRACE_NFSV4_2(compound__start, struct compound_state *,
5700 5828                      &cs, COMPOUND4args *, args);
5701 5829                  resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5702 5830                  DTRACE_NFSV4_2(compound__done, struct compound_state *,
5703 5831                      &cs, COMPOUND4res *, resp);
5704 5832                  return;
5705 5833          }
5706 5834  
5707 5835          if (args->array_len == 0) {
5708 5836                  resp->status = NFS4_OK;
5709 5837                  return;
5710 5838          }
5711 5839  
5712 5840          ASSERT(exi == NULL);
5713 5841          ASSERT(cr == NULL);
5714 5842  
5715 5843          cr = crget();
5716 5844          ASSERT(cr != NULL);
5717 5845  
5718 5846          if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5719 5847                  DTRACE_NFSV4_2(compound__start, struct compound_state *,
5720 5848                      &cs, COMPOUND4args *, args);
5721 5849                  crfree(cr);
5722 5850                  DTRACE_NFSV4_2(compound__done, struct compound_state *,
5723 5851                      &cs, COMPOUND4res *, resp);
  
    | 
      ↓ open down ↓ | 
    25 lines elided | 
    
      ↑ open up ↑ | 
  
5724 5852                  svcerr_badcred(req->rq_xprt);
5725 5853                  if (rv != NULL)
5726 5854                          *rv = 1;
5727 5855                  return;
5728 5856          }
5729 5857          resp->array_len = args->array_len;
5730 5858          resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5731 5859              KM_SLEEP);
5732 5860  
5733 5861          cs.basecr = cr;
     5862 +        nsrv4 = nfs4_get_srv();
5734 5863  
5735 5864          DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5736 5865              COMPOUND4args *, args);
5737 5866  
5738 5867          /*
5739 5868           * For now, NFS4 compound processing must be protected by
5740 5869           * exported_lock because it can access more than one exportinfo
5741 5870           * per compound and share/unshare can now change multiple
5742 5871           * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5743 5872           * per proc (excluding public exinfo), and exi_count design
5744 5873           * is sufficient to protect concurrent execution of NFS2/3
5745 5874           * ops along with unexport.  This lock will be removed as
5746 5875           * part of the NFSv4 phase 2 namespace redesign work.
5747 5876           */
5748      -        rw_enter(&exported_lock, RW_READER);
     5877 +        rw_enter(&ne->exported_lock, RW_READER);
5749 5878  
5750 5879          /*
5751 5880           * If this is the first compound we've seen, we need to start all
5752 5881           * new instances' grace periods.
5753 5882           */
5754      -        if (rfs4_seen_first_compound == 0) {
5755      -                rfs4_grace_start_new();
     5883 +        if (nsrv4->seen_first_compound == 0) {
     5884 +                rfs4_grace_start_new(nsrv4);
5756 5885                  /*
5757 5886                   * This must be set after rfs4_grace_start_new(), otherwise
5758 5887                   * another thread could proceed past here before the former
5759 5888                   * is finished.
5760 5889                   */
5761      -                rfs4_seen_first_compound = 1;
     5890 +                nsrv4->seen_first_compound = 1;
5762 5891          }
5763 5892  
5764 5893          for (i = 0; i < args->array_len && cs.cont; i++) {
5765 5894                  nfs_argop4 *argop;
5766 5895                  nfs_resop4 *resop;
5767 5896                  uint_t op;
     5897 +                kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
5768 5898  
5769 5899                  argop = &args->array[i];
5770 5900                  resop = &resp->array[i];
5771 5901                  resop->resop = argop->argop;
5772 5902                  op = (uint_t)resop->resop;
5773 5903  
5774 5904                  if (op < rfsv4disp_cnt) {
5775 5905                          /*
5776 5906                           * Count the individual ops here; NULL and COMPOUND
5777 5907                           * are counted in common_dispatch()
5778 5908                           */
5779      -                        rfsproccnt_v4_ptr[op].value.ui64++;
     5909 +                        stat[op].value.ui64++;
5780 5910  
5781 5911                          NFS4_DEBUG(rfs4_debug > 1,
5782 5912                              (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5783 5913                          (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5784 5914                          NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5785 5915                              rfs4_op_string[op], *cs.statusp));
5786 5916                          if (*cs.statusp != NFS4_OK)
5787 5917                                  cs.cont = FALSE;
5788 5918                  } else {
5789 5919                          /*
5790 5920                           * This is effectively dead code since XDR code
5791 5921                           * will have already returned BADXDR if op doesn't
5792 5922                           * decode to legal value.  This only done for a
5793 5923                           * day when XDR code doesn't verify v4 opcodes.
5794 5924                           */
5795 5925                          op = OP_ILLEGAL;
5796      -                        rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
     5926 +                        stat[OP_ILLEGAL_IDX].value.ui64++;
5797 5927  
5798 5928                          rfs4_op_illegal(argop, resop, req, &cs);
5799 5929                          cs.cont = FALSE;
5800 5930                  }
5801 5931  
5802 5932                  /*
5803 5933                   * If not at last op, and if we are to stop, then
5804 5934                   * compact the results array.
5805 5935                   */
5806 5936                  if ((i + 1) < args->array_len && !cs.cont) {
5807 5937                          nfs_resop4 *new_res = kmem_alloc(
5808 5938                              (i+1) * sizeof (nfs_resop4), KM_SLEEP);
  
    | 
      ↓ open down ↓ | 
    2 lines elided | 
    
      ↑ open up ↑ | 
  
5809 5939                          bcopy(resp->array,
5810 5940                              new_res, (i+1) * sizeof (nfs_resop4));
5811 5941                          kmem_free(resp->array,
5812 5942                              args->array_len * sizeof (nfs_resop4));
5813 5943  
5814 5944                          resp->array_len =  i + 1;
5815 5945                          resp->array = new_res;
5816 5946                  }
5817 5947          }
5818 5948  
5819      -        rw_exit(&exported_lock);
     5949 +        rw_exit(&ne->exported_lock);
5820 5950  
5821      -        DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5822      -            COMPOUND4res *, resp);
5823      -
     5951 +        /*
     5952 +         * clear exportinfo and vnode fields from compound_state before dtrace
     5953 +         * probe, to avoid tracing residual values for path and share path.
     5954 +         */
5824 5955          if (cs.vp)
5825 5956                  VN_RELE(cs.vp);
5826 5957          if (cs.saved_vp)
5827 5958                  VN_RELE(cs.saved_vp);
     5959 +        cs.exi = cs.saved_exi = NULL;
     5960 +        cs.vp = cs.saved_vp = NULL;
     5961 +
     5962 +        DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
     5963 +            COMPOUND4res *, resp);
     5964 +
5828 5965          if (cs.saved_fh.nfs_fh4_val)
5829 5966                  kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5830 5967  
5831 5968          if (cs.basecr)
5832 5969                  crfree(cs.basecr);
5833 5970          if (cs.cr)
5834 5971                  crfree(cs.cr);
5835 5972          /*
5836 5973           * done with this compound request, free the label
5837 5974           */
5838 5975  
5839 5976          if (req->rq_label != NULL) {
5840 5977                  kmem_free(req->rq_label, sizeof (bslabel_t));
5841 5978                  req->rq_label = NULL;
5842 5979          }
5843 5980  }
5844 5981  
5845 5982  /*
5846 5983   * XXX because of what appears to be duplicate calls to rfs4_compound_free
5847 5984   * XXX zero out the tag and array values. Need to investigate why the
5848 5985   * XXX calls occur, but at least prevent the panic for now.
5849 5986   */
5850 5987  void
5851 5988  rfs4_compound_free(COMPOUND4res *resp)
5852 5989  {
5853 5990          uint_t i;
5854 5991  
5855 5992          if (resp->tag.utf8string_val) {
5856 5993                  UTF8STRING_FREE(resp->tag)
5857 5994          }
5858 5995  
5859 5996          for (i = 0; i < resp->array_len; i++) {
5860 5997                  nfs_resop4 *resop;
5861 5998                  uint_t op;
5862 5999  
5863 6000                  resop = &resp->array[i];
5864 6001                  op = (uint_t)resop->resop;
5865 6002                  if (op < rfsv4disp_cnt) {
5866 6003                          (*rfsv4disptab[op].dis_resfree)(resop);
5867 6004                  }
5868 6005          }
5869 6006          if (resp->array != NULL) {
5870 6007                  kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5871 6008          }
5872 6009  }
5873 6010  
5874 6011  /*
5875 6012   * Process the value of the compound request rpc flags, as a bit-AND
5876 6013   * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5877 6014   */
5878 6015  void
5879 6016  rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5880 6017  {
5881 6018          int i;
5882 6019          int flag = RPC_ALL;
5883 6020  
5884 6021          for (i = 0; flag && i < args->array_len; i++) {
5885 6022                  uint_t op;
5886 6023  
5887 6024                  op = (uint_t)args->array[i].argop;
5888 6025  
5889 6026                  if (op < rfsv4disp_cnt)
5890 6027                          flag &= rfsv4disptab[op].dis_flags;
5891 6028                  else
5892 6029                          flag = 0;
5893 6030          }
5894 6031          *flagp = flag;
5895 6032  }
5896 6033  
5897 6034  nfsstat4
5898 6035  rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5899 6036  {
5900 6037          nfsstat4 e;
5901 6038  
5902 6039          rfs4_dbe_lock(cp->rc_dbe);
5903 6040  
5904 6041          if (cp->rc_sysidt != LM_NOSYSID) {
5905 6042                  *sp = cp->rc_sysidt;
5906 6043                  e = NFS4_OK;
5907 6044  
5908 6045          } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5909 6046                  *sp = cp->rc_sysidt;
5910 6047                  e = NFS4_OK;
5911 6048  
5912 6049                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5913 6050                      "rfs4_client_sysid: allocated 0x%x\n", *sp));
5914 6051          } else
5915 6052                  e = NFS4ERR_DELAY;
5916 6053  
5917 6054          rfs4_dbe_unlock(cp->rc_dbe);
5918 6055          return (e);
5919 6056  }
5920 6057  
5921 6058  #if defined(DEBUG) && ! defined(lint)
5922 6059  static void lock_print(char *str, int operation, struct flock64 *flk)
5923 6060  {
5924 6061          char *op, *type;
5925 6062  
5926 6063          switch (operation) {
5927 6064          case F_GETLK: op = "F_GETLK";
5928 6065                  break;
5929 6066          case F_SETLK: op = "F_SETLK";
5930 6067                  break;
5931 6068          case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
5932 6069                  break;
5933 6070          default: op = "F_UNKNOWN";
5934 6071                  break;
5935 6072          }
5936 6073          switch (flk->l_type) {
5937 6074          case F_UNLCK: type = "F_UNLCK";
5938 6075                  break;
5939 6076          case F_RDLCK: type = "F_RDLCK";
5940 6077                  break;
5941 6078          case F_WRLCK: type = "F_WRLCK";
5942 6079                  break;
5943 6080          default: type = "F_UNKNOWN";
5944 6081                  break;
5945 6082          }
5946 6083  
5947 6084          ASSERT(flk->l_whence == 0);
5948 6085          cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
5949 6086              str, op, type, (longlong_t)flk->l_start,
5950 6087              flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
5951 6088  }
5952 6089  
5953 6090  #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
5954 6091  #else
5955 6092  #define LOCK_PRINT(d, s, t, f)
5956 6093  #endif
5957 6094  
5958 6095  /*ARGSUSED*/
5959 6096  static bool_t
5960 6097  creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
5961 6098  {
5962 6099          return (TRUE);
5963 6100  }
5964 6101  
5965 6102  /*
5966 6103   * Look up the pathname using the vp in cs as the directory vnode.
5967 6104   * cs->vp will be the vnode for the file on success
5968 6105   */
5969 6106  
5970 6107  static nfsstat4
5971 6108  rfs4_lookup(component4 *component, struct svc_req *req,
5972 6109      struct compound_state *cs)
5973 6110  {
5974 6111          char *nm;
5975 6112          uint32_t len;
5976 6113          nfsstat4 status;
5977 6114          struct sockaddr *ca;
5978 6115          char *name;
5979 6116  
5980 6117          if (cs->vp == NULL) {
5981 6118                  return (NFS4ERR_NOFILEHANDLE);
5982 6119          }
5983 6120          if (cs->vp->v_type != VDIR) {
5984 6121                  return (NFS4ERR_NOTDIR);
5985 6122          }
5986 6123  
5987 6124          status = utf8_dir_verify(component);
5988 6125          if (status != NFS4_OK)
5989 6126                  return (status);
5990 6127  
5991 6128          nm = utf8_to_fn(component, &len, NULL);
5992 6129          if (nm == NULL) {
5993 6130                  return (NFS4ERR_INVAL);
5994 6131          }
5995 6132  
5996 6133          if (len > MAXNAMELEN) {
5997 6134                  kmem_free(nm, len);
5998 6135                  return (NFS4ERR_NAMETOOLONG);
5999 6136          }
6000 6137  
6001 6138          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6002 6139          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6003 6140              MAXPATHLEN + 1);
6004 6141  
6005 6142          if (name == NULL) {
6006 6143                  kmem_free(nm, len);
6007 6144                  return (NFS4ERR_INVAL);
6008 6145          }
6009 6146  
6010 6147          status = do_rfs4_op_lookup(name, req, cs);
6011 6148  
6012 6149          if (name != nm)
6013 6150                  kmem_free(name, MAXPATHLEN + 1);
6014 6151  
6015 6152          kmem_free(nm, len);
6016 6153  
6017 6154          return (status);
6018 6155  }
6019 6156  
6020 6157  static nfsstat4
6021 6158  rfs4_lookupfile(component4 *component, struct svc_req *req,
6022 6159      struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6023 6160  {
6024 6161          nfsstat4 status;
6025 6162          vnode_t *dvp = cs->vp;
6026 6163          vattr_t bva, ava, fva;
6027 6164          int error;
6028 6165  
6029 6166          /* Get "before" change value */
6030 6167          bva.va_mask = AT_CTIME|AT_SEQ;
6031 6168          error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6032 6169          if (error)
6033 6170                  return (puterrno4(error));
6034 6171  
6035 6172          /* rfs4_lookup may VN_RELE directory */
6036 6173          VN_HOLD(dvp);
6037 6174  
6038 6175          status = rfs4_lookup(component, req, cs);
6039 6176          if (status != NFS4_OK) {
6040 6177                  VN_RELE(dvp);
6041 6178                  return (status);
6042 6179          }
6043 6180  
6044 6181          /*
6045 6182           * Get "after" change value, if it fails, simply return the
6046 6183           * before value.
6047 6184           */
6048 6185          ava.va_mask = AT_CTIME|AT_SEQ;
6049 6186          if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6050 6187                  ava.va_ctime = bva.va_ctime;
6051 6188                  ava.va_seq = 0;
6052 6189          }
6053 6190          VN_RELE(dvp);
6054 6191  
6055 6192          /*
6056 6193           * Validate the file is a file
6057 6194           */
6058 6195          fva.va_mask = AT_TYPE|AT_MODE;
6059 6196          error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6060 6197          if (error)
6061 6198                  return (puterrno4(error));
6062 6199  
6063 6200          if (fva.va_type != VREG) {
6064 6201                  if (fva.va_type == VDIR)
6065 6202                          return (NFS4ERR_ISDIR);
6066 6203                  if (fva.va_type == VLNK)
6067 6204                          return (NFS4ERR_SYMLINK);
6068 6205                  return (NFS4ERR_INVAL);
6069 6206          }
6070 6207  
6071 6208          NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6072 6209          NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6073 6210  
6074 6211          /*
6075 6212           * It is undefined if VOP_LOOKUP will change va_seq, so
6076 6213           * cinfo.atomic = TRUE only if we have
6077 6214           * non-zero va_seq's, and they have not changed.
6078 6215           */
6079 6216          if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6080 6217                  cinfo->atomic = TRUE;
6081 6218          else
6082 6219                  cinfo->atomic = FALSE;
6083 6220  
6084 6221          /* Check for mandatory locking */
6085 6222          cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6086 6223          return (check_open_access(access, cs, req));
6087 6224  }
6088 6225  
6089 6226  static nfsstat4
6090 6227  create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6091 6228      cred_t *cr, vnode_t **vpp, bool_t *created)
6092 6229  {
6093 6230          int error;
6094 6231          nfsstat4 status = NFS4_OK;
6095 6232          vattr_t va;
6096 6233  
6097 6234  tryagain:
6098 6235  
6099 6236          /*
6100 6237           * The file open mode used is VWRITE.  If the client needs
6101 6238           * some other semantic, then it should do the access checking
6102 6239           * itself.  It would have been nice to have the file open mode
6103 6240           * passed as part of the arguments.
6104 6241           */
6105 6242  
6106 6243          *created = TRUE;
6107 6244          error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6108 6245  
6109 6246          if (error) {
6110 6247                  *created = FALSE;
6111 6248  
6112 6249                  /*
6113 6250                   * If we got something other than file already exists
6114 6251                   * then just return this error.  Otherwise, we got
6115 6252                   * EEXIST.  If we were doing a GUARDED create, then
6116 6253                   * just return this error.  Otherwise, we need to
6117 6254                   * make sure that this wasn't a duplicate of an
6118 6255                   * exclusive create request.
6119 6256                   *
6120 6257                   * The assumption is made that a non-exclusive create
6121 6258                   * request will never return EEXIST.
6122 6259                   */
6123 6260  
6124 6261                  if (error != EEXIST || mode == GUARDED4) {
6125 6262                          status = puterrno4(error);
6126 6263                          return (status);
6127 6264                  }
6128 6265                  error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6129 6266                      NULL, NULL, NULL);
6130 6267  
6131 6268                  if (error) {
6132 6269                          /*
6133 6270                           * We couldn't find the file that we thought that
6134 6271                           * we just created.  So, we'll just try creating
6135 6272                           * it again.
6136 6273                           */
6137 6274                          if (error == ENOENT)
6138 6275                                  goto tryagain;
6139 6276  
6140 6277                          status = puterrno4(error);
6141 6278                          return (status);
6142 6279                  }
6143 6280  
6144 6281                  if (mode == UNCHECKED4) {
6145 6282                          /* existing object must be regular file */
6146 6283                          if ((*vpp)->v_type != VREG) {
6147 6284                                  if ((*vpp)->v_type == VDIR)
6148 6285                                          status = NFS4ERR_ISDIR;
6149 6286                                  else if ((*vpp)->v_type == VLNK)
6150 6287                                          status = NFS4ERR_SYMLINK;
6151 6288                                  else
6152 6289                                          status = NFS4ERR_INVAL;
6153 6290                                  VN_RELE(*vpp);
6154 6291                                  return (status);
6155 6292                          }
6156 6293  
6157 6294                          return (NFS4_OK);
6158 6295                  }
6159 6296  
6160 6297                  /* Check for duplicate request */
6161 6298                  va.va_mask = AT_MTIME;
6162 6299                  error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6163 6300                  if (!error) {
6164 6301                          /* We found the file */
6165 6302                          const timestruc_t *mtime = &vap->va_mtime;
6166 6303  
6167 6304                          if (va.va_mtime.tv_sec != mtime->tv_sec ||
6168 6305                              va.va_mtime.tv_nsec != mtime->tv_nsec) {
6169 6306                                  /* but its not our creation */
6170 6307                                  VN_RELE(*vpp);
6171 6308                                  return (NFS4ERR_EXIST);
6172 6309                          }
6173 6310                          *created = TRUE; /* retrans of create == created */
6174 6311                          return (NFS4_OK);
6175 6312                  }
6176 6313                  VN_RELE(*vpp);
6177 6314                  return (NFS4ERR_EXIST);
6178 6315          }
6179 6316  
6180 6317          return (NFS4_OK);
6181 6318  }
6182 6319  
6183 6320  static nfsstat4
6184 6321  check_open_access(uint32_t access, struct compound_state *cs,
6185 6322      struct svc_req *req)
6186 6323  {
6187 6324          int error;
6188 6325          vnode_t *vp;
6189 6326          bool_t readonly;
6190 6327          cred_t *cr = cs->cr;
6191 6328  
6192 6329          /* For now we don't allow mandatory locking as per V2/V3 */
6193 6330          if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6194 6331                  return (NFS4ERR_ACCESS);
6195 6332          }
6196 6333  
6197 6334          vp = cs->vp;
6198 6335          ASSERT(cr != NULL && vp->v_type == VREG);
6199 6336  
6200 6337          /*
6201 6338           * If the file system is exported read only and we are trying
6202 6339           * to open for write, then return NFS4ERR_ROFS
6203 6340           */
6204 6341  
6205 6342          readonly = rdonly4(req, cs);
6206 6343  
6207 6344          if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6208 6345                  return (NFS4ERR_ROFS);
6209 6346  
6210 6347          if (access & OPEN4_SHARE_ACCESS_READ) {
6211 6348                  if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6212 6349                      (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6213 6350                          return (NFS4ERR_ACCESS);
6214 6351                  }
6215 6352          }
6216 6353  
6217 6354          if (access & OPEN4_SHARE_ACCESS_WRITE) {
6218 6355                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6219 6356                  if (error)
6220 6357                          return (NFS4ERR_ACCESS);
6221 6358          }
6222 6359  
6223 6360          return (NFS4_OK);
6224 6361  }
6225 6362  
6226 6363  static nfsstat4
6227 6364  rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6228 6365      change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6229 6366  {
6230 6367          struct nfs4_svgetit_arg sarg;
6231 6368          struct nfs4_ntov_table ntov;
6232 6369  
6233 6370          bool_t ntov_table_init = FALSE;
6234 6371          struct statvfs64 sb;
6235 6372          nfsstat4 status;
6236 6373          vnode_t *vp;
6237 6374          vattr_t bva, ava, iva, cva, *vap;
6238 6375          vnode_t *dvp;
6239 6376          timespec32_t *mtime;
6240 6377          char *nm = NULL;
6241 6378          uint_t buflen;
6242 6379          bool_t created;
6243 6380          bool_t setsize = FALSE;
6244 6381          len_t reqsize;
6245 6382          int error;
6246 6383          bool_t trunc;
6247 6384          caller_context_t ct;
6248 6385          component4 *component;
6249 6386          bslabel_t *clabel;
6250 6387          struct sockaddr *ca;
6251 6388          char *name = NULL;
6252 6389  
6253 6390          sarg.sbp = &sb;
6254 6391          sarg.is_referral = B_FALSE;
6255 6392  
6256 6393          dvp = cs->vp;
6257 6394  
6258 6395          /* Check if the file system is read only */
6259 6396          if (rdonly4(req, cs))
6260 6397                  return (NFS4ERR_ROFS);
6261 6398  
6262 6399          /* check the label of including directory */
6263 6400          if (is_system_labeled()) {
6264 6401                  ASSERT(req->rq_label != NULL);
6265 6402                  clabel = req->rq_label;
6266 6403                  DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6267 6404                      "got client label from request(1)",
6268 6405                      struct svc_req *, req);
6269 6406                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
6270 6407                          if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6271 6408                              cs->exi)) {
6272 6409                                  return (NFS4ERR_ACCESS);
6273 6410                          }
6274 6411                  }
6275 6412          }
6276 6413  
6277 6414          /*
6278 6415           * Get the last component of path name in nm. cs will reference
6279 6416           * the including directory on success.
6280 6417           */
6281 6418          component = &args->open_claim4_u.file;
6282 6419          status = utf8_dir_verify(component);
6283 6420          if (status != NFS4_OK)
6284 6421                  return (status);
6285 6422  
6286 6423          nm = utf8_to_fn(component, &buflen, NULL);
6287 6424  
6288 6425          if (nm == NULL)
6289 6426                  return (NFS4ERR_RESOURCE);
6290 6427  
6291 6428          if (buflen > MAXNAMELEN) {
6292 6429                  kmem_free(nm, buflen);
6293 6430                  return (NFS4ERR_NAMETOOLONG);
6294 6431          }
6295 6432  
6296 6433          bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6297 6434          error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6298 6435          if (error) {
6299 6436                  kmem_free(nm, buflen);
6300 6437                  return (puterrno4(error));
6301 6438          }
6302 6439  
6303 6440          if (bva.va_type != VDIR) {
6304 6441                  kmem_free(nm, buflen);
6305 6442                  return (NFS4ERR_NOTDIR);
6306 6443          }
6307 6444  
6308 6445          NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6309 6446  
6310 6447          switch (args->mode) {
6311 6448          case GUARDED4:
6312 6449                  /*FALLTHROUGH*/
6313 6450          case UNCHECKED4:
6314 6451                  nfs4_ntov_table_init(&ntov);
6315 6452                  ntov_table_init = TRUE;
6316 6453  
6317 6454                  *attrset = 0;
6318 6455                  status = do_rfs4_set_attrs(attrset,
6319 6456                      &args->createhow4_u.createattrs,
6320 6457                      cs, &sarg, &ntov, NFS4ATTR_SETIT);
6321 6458  
6322 6459                  if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6323 6460                      sarg.vap->va_type != VREG) {
6324 6461                          if (sarg.vap->va_type == VDIR)
6325 6462                                  status = NFS4ERR_ISDIR;
6326 6463                          else if (sarg.vap->va_type == VLNK)
6327 6464                                  status = NFS4ERR_SYMLINK;
6328 6465                          else
6329 6466                                  status = NFS4ERR_INVAL;
6330 6467                  }
6331 6468  
6332 6469                  if (status != NFS4_OK) {
6333 6470                          kmem_free(nm, buflen);
6334 6471                          nfs4_ntov_table_free(&ntov, &sarg);
6335 6472                          *attrset = 0;
6336 6473                          return (status);
6337 6474                  }
6338 6475  
6339 6476                  vap = sarg.vap;
6340 6477                  vap->va_type = VREG;
6341 6478                  vap->va_mask |= AT_TYPE;
6342 6479  
6343 6480                  if ((vap->va_mask & AT_MODE) == 0) {
6344 6481                          vap->va_mask |= AT_MODE;
6345 6482                          vap->va_mode = (mode_t)0600;
6346 6483                  }
6347 6484  
6348 6485                  if (vap->va_mask & AT_SIZE) {
6349 6486  
6350 6487                          /* Disallow create with a non-zero size */
6351 6488  
6352 6489                          if ((reqsize = sarg.vap->va_size) != 0) {
6353 6490                                  kmem_free(nm, buflen);
6354 6491                                  nfs4_ntov_table_free(&ntov, &sarg);
6355 6492                                  *attrset = 0;
6356 6493                                  return (NFS4ERR_INVAL);
6357 6494                          }
6358 6495                          setsize = TRUE;
6359 6496                  }
6360 6497                  break;
6361 6498  
6362 6499          case EXCLUSIVE4:
6363 6500                  /* prohibit EXCL create of named attributes */
6364 6501                  if (dvp->v_flag & V_XATTRDIR) {
6365 6502                          kmem_free(nm, buflen);
6366 6503                          *attrset = 0;
6367 6504                          return (NFS4ERR_INVAL);
6368 6505                  }
6369 6506  
6370 6507                  cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6371 6508                  cva.va_type = VREG;
6372 6509                  /*
6373 6510                   * Ensure no time overflows. Assumes underlying
6374 6511                   * filesystem supports at least 32 bits.
6375 6512                   * Truncate nsec to usec resolution to allow valid
6376 6513                   * compares even if the underlying filesystem truncates.
6377 6514                   */
6378 6515                  mtime = (timespec32_t *)&args->createhow4_u.createverf;
6379 6516                  cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6380 6517                  cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6381 6518                  cva.va_mode = (mode_t)0;
6382 6519                  vap = &cva;
6383 6520  
6384 6521                  /*
6385 6522                   * For EXCL create, attrset is set to the server attr
6386 6523                   * used to cache the client's verifier.
6387 6524                   */
6388 6525                  *attrset = FATTR4_TIME_MODIFY_MASK;
6389 6526                  break;
6390 6527          }
6391 6528  
6392 6529          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6393 6530          name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6394 6531              MAXPATHLEN  + 1);
6395 6532  
6396 6533          if (name == NULL) {
6397 6534                  kmem_free(nm, buflen);
6398 6535                  return (NFS4ERR_SERVERFAULT);
6399 6536          }
6400 6537  
6401 6538          status = create_vnode(dvp, name, vap, args->mode,
6402 6539              cs->cr, &vp, &created);
6403 6540          if (nm != name)
6404 6541                  kmem_free(name, MAXPATHLEN + 1);
6405 6542          kmem_free(nm, buflen);
6406 6543  
6407 6544          if (status != NFS4_OK) {
6408 6545                  if (ntov_table_init)
6409 6546                          nfs4_ntov_table_free(&ntov, &sarg);
6410 6547                  *attrset = 0;
6411 6548                  return (status);
6412 6549          }
6413 6550  
6414 6551          trunc = (setsize && !created);
6415 6552  
6416 6553          if (args->mode != EXCLUSIVE4) {
6417 6554                  bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6418 6555  
6419 6556                  /*
6420 6557                   * True verification that object was created with correct
6421 6558                   * attrs is impossible.  The attrs could have been changed
6422 6559                   * immediately after object creation.  If attributes did
6423 6560                   * not verify, the only recourse for the server is to
6424 6561                   * destroy the object.  Maybe if some attrs (like gid)
6425 6562                   * are set incorrectly, the object should be destroyed;
6426 6563                   * however, seems bad as a default policy.  Do we really
6427 6564                   * want to destroy an object over one of the times not
6428 6565                   * verifying correctly?  For these reasons, the server
6429 6566                   * currently sets bits in attrset for createattrs
6430 6567                   * that were set; however, no verification is done.
6431 6568                   *
6432 6569                   * vmask_to_nmask accounts for vattr bits set on create
6433 6570                   *      [do_rfs4_set_attrs() only sets resp bits for
6434 6571                   *       non-vattr/vfs bits.]
6435 6572                   * Mask off any bits we set by default so as not to return
6436 6573                   * more attrset bits than were requested in createattrs
6437 6574                   */
6438 6575                  if (created) {
6439 6576                          nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6440 6577                          *attrset &= createmask;
6441 6578                  } else {
6442 6579                          /*
6443 6580                           * We did not create the vnode (we tried but it
6444 6581                           * already existed).  In this case, the only createattr
6445 6582                           * that the spec allows the server to set is size,
6446 6583                           * and even then, it can only be set if it is 0.
6447 6584                           */
6448 6585                          *attrset = 0;
6449 6586                          if (trunc)
6450 6587                                  *attrset = FATTR4_SIZE_MASK;
6451 6588                  }
6452 6589          }
6453 6590          if (ntov_table_init)
6454 6591                  nfs4_ntov_table_free(&ntov, &sarg);
6455 6592  
6456 6593          /*
6457 6594           * Get the initial "after" sequence number, if it fails,
6458 6595           * set to zero, time to before.
6459 6596           */
6460 6597          iva.va_mask = AT_CTIME|AT_SEQ;
6461 6598          if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6462 6599                  iva.va_seq = 0;
6463 6600                  iva.va_ctime = bva.va_ctime;
6464 6601          }
6465 6602  
6466 6603          /*
6467 6604           * create_vnode attempts to create the file exclusive,
6468 6605           * if it already exists the VOP_CREATE will fail and
6469 6606           * may not increase va_seq. It is atomic if
6470 6607           * we haven't changed the directory, but if it has changed
6471 6608           * we don't know what changed it.
6472 6609           */
6473 6610          if (!created) {
6474 6611                  if (bva.va_seq && iva.va_seq &&
6475 6612                      bva.va_seq == iva.va_seq)
6476 6613                          cinfo->atomic = TRUE;
6477 6614                  else
6478 6615                          cinfo->atomic = FALSE;
6479 6616                  NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6480 6617          } else {
6481 6618                  /*
6482 6619                   * The entry was created, we need to sync the
6483 6620                   * directory metadata.
6484 6621                   */
6485 6622                  (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6486 6623  
6487 6624                  /*
6488 6625                   * Get "after" change value, if it fails, simply return the
6489 6626                   * before value.
6490 6627                   */
6491 6628                  ava.va_mask = AT_CTIME|AT_SEQ;
6492 6629                  if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6493 6630                          ava.va_ctime = bva.va_ctime;
6494 6631                          ava.va_seq = 0;
6495 6632                  }
6496 6633  
6497 6634                  NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6498 6635  
6499 6636                  /*
6500 6637                   * The cinfo->atomic = TRUE only if we have
6501 6638                   * non-zero va_seq's, and it has incremented by exactly one
6502 6639                   * during the create_vnode and it didn't
6503 6640                   * change during the VOP_FSYNC.
6504 6641                   */
6505 6642                  if (bva.va_seq && iva.va_seq && ava.va_seq &&
6506 6643                      iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6507 6644                          cinfo->atomic = TRUE;
6508 6645                  else
6509 6646                          cinfo->atomic = FALSE;
6510 6647          }
6511 6648  
6512 6649          /* Check for mandatory locking and that the size gets set. */
6513 6650          cva.va_mask = AT_MODE;
6514 6651          if (setsize)
6515 6652                  cva.va_mask |= AT_SIZE;
6516 6653  
6517 6654          /* Assume the worst */
6518 6655          cs->mandlock = TRUE;
6519 6656  
6520 6657          if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
  
    | 
      ↓ open down ↓ | 
    683 lines elided | 
    
      ↑ open up ↑ | 
  
6521 6658                  cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6522 6659  
6523 6660                  /*
6524 6661                   * Truncate the file if necessary; this would be
6525 6662                   * the case for create over an existing file.
6526 6663                   */
6527 6664  
6528 6665                  if (trunc) {
6529 6666                          int in_crit = 0;
6530 6667                          rfs4_file_t *fp;
     6668 +                        nfs4_srv_t *nsrv4;
6531 6669                          bool_t create = FALSE;
6532 6670  
6533 6671                          /*
6534 6672                           * We are writing over an existing file.
6535 6673                           * Check to see if we need to recall a delegation.
6536 6674                           */
6537      -                        rfs4_hold_deleg_policy();
     6675 +                        nsrv4 = nfs4_get_srv();
     6676 +                        rfs4_hold_deleg_policy(nsrv4);
6538 6677                          if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6539 6678                                  if (rfs4_check_delegated_byfp(FWRITE, fp,
6540 6679                                      (reqsize == 0), FALSE, FALSE, &clientid)) {
6541 6680                                          rfs4_file_rele(fp);
6542      -                                        rfs4_rele_deleg_policy();
     6681 +                                        rfs4_rele_deleg_policy(nsrv4);
6543 6682                                          VN_RELE(vp);
6544 6683                                          *attrset = 0;
6545 6684                                          return (NFS4ERR_DELAY);
6546 6685                                  }
6547 6686                                  rfs4_file_rele(fp);
6548 6687                          }
6549      -                        rfs4_rele_deleg_policy();
     6688 +                        rfs4_rele_deleg_policy(nsrv4);
6550 6689  
6551 6690                          if (nbl_need_check(vp)) {
6552 6691                                  in_crit = 1;
6553 6692  
6554 6693                                  ASSERT(reqsize == 0);
6555 6694  
6556 6695                                  nbl_start_crit(vp, RW_READER);
6557 6696                                  if (nbl_conflict(vp, NBL_WRITE, 0,
6558 6697                                      cva.va_size, 0, NULL)) {
6559 6698                                          in_crit = 0;
6560 6699                                          nbl_end_crit(vp);
6561 6700                                          VN_RELE(vp);
6562 6701                                          *attrset = 0;
6563 6702                                          return (NFS4ERR_ACCESS);
6564 6703                                  }
6565 6704                          }
6566 6705                          ct.cc_sysid = 0;
6567 6706                          ct.cc_pid = 0;
6568 6707                          ct.cc_caller_id = nfs4_srv_caller_id;
6569 6708                          ct.cc_flags = CC_DONTBLOCK;
6570 6709  
6571 6710                          cva.va_mask = AT_SIZE;
6572 6711                          cva.va_size = reqsize;
6573 6712                          (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6574 6713                          if (in_crit)
6575 6714                                  nbl_end_crit(vp);
6576 6715                  }
6577 6716          }
6578 6717  
6579 6718          error = makefh4(&cs->fh, vp, cs->exi);
6580 6719  
6581 6720          /*
6582 6721           * Force modified data and metadata out to stable storage.
6583 6722           */
6584 6723          (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6585 6724  
6586 6725          if (error) {
6587 6726                  VN_RELE(vp);
6588 6727                  *attrset = 0;
6589 6728                  return (puterrno4(error));
6590 6729          }
6591 6730  
6592 6731          /* if parent dir is attrdir, set namedattr fh flag */
6593 6732          if (dvp->v_flag & V_XATTRDIR)
6594 6733                  set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6595 6734  
6596 6735          if (cs->vp)
6597 6736                  VN_RELE(cs->vp);
6598 6737  
6599 6738          cs->vp = vp;
6600 6739  
6601 6740          /*
6602 6741           * if we did not create the file, we will need to check
6603 6742           * the access bits on the file
6604 6743           */
6605 6744  
6606 6745          if (!created) {
6607 6746                  if (setsize)
6608 6747                          args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6609 6748                  status = check_open_access(args->share_access, cs, req);
6610 6749                  if (status != NFS4_OK)
6611 6750                          *attrset = 0;
6612 6751          }
6613 6752          return (status);
6614 6753  }
6615 6754  
6616 6755  /*ARGSUSED*/
6617 6756  static void
6618 6757  rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6619 6758      rfs4_openowner_t *oo, delegreq_t deleg,
6620 6759      uint32_t access, uint32_t deny,
6621 6760      OPEN4res *resp, int deleg_cur)
6622 6761  {
6623 6762          /* XXX Currently not using req  */
6624 6763          rfs4_state_t *sp;
6625 6764          rfs4_file_t *fp;
6626 6765          bool_t screate = TRUE;
6627 6766          bool_t fcreate = TRUE;
6628 6767          uint32_t open_a, share_a;
6629 6768          uint32_t open_d, share_d;
6630 6769          rfs4_deleg_state_t *dsp;
6631 6770          sysid_t sysid;
6632 6771          nfsstat4 status;
6633 6772          caller_context_t ct;
6634 6773          int fflags = 0;
6635 6774          int recall = 0;
6636 6775          int err;
6637 6776          int first_open;
6638 6777  
6639 6778          /* get the file struct and hold a lock on it during initial open */
6640 6779          fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6641 6780          if (fp == NULL) {
6642 6781                  resp->status = NFS4ERR_RESOURCE;
6643 6782                  DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6644 6783                  return;
6645 6784          }
6646 6785  
6647 6786          sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6648 6787          if (sp == NULL) {
6649 6788                  resp->status = NFS4ERR_RESOURCE;
6650 6789                  DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6651 6790                  /* No need to keep any reference */
6652 6791                  rw_exit(&fp->rf_file_rwlock);
6653 6792                  rfs4_file_rele(fp);
6654 6793                  return;
6655 6794          }
6656 6795  
6657 6796          /* try to get the sysid before continuing */
6658 6797          if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6659 6798                  resp->status = status;
6660 6799                  rfs4_file_rele(fp);
6661 6800                  /* Not a fully formed open; "close" it */
6662 6801                  if (screate == TRUE)
6663 6802                          rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6664 6803                  rfs4_state_rele(sp);
6665 6804                  return;
6666 6805          }
6667 6806  
6668 6807          /* Calculate the fflags for this OPEN. */
6669 6808          if (access & OPEN4_SHARE_ACCESS_READ)
6670 6809                  fflags |= FREAD;
6671 6810          if (access & OPEN4_SHARE_ACCESS_WRITE)
6672 6811                  fflags |= FWRITE;
6673 6812  
6674 6813          rfs4_dbe_lock(sp->rs_dbe);
6675 6814  
6676 6815          /*
6677 6816           * Calculate the new deny and access mode that this open is adding to
6678 6817           * the file for this open owner;
6679 6818           */
6680 6819          open_d = (deny & ~sp->rs_open_deny);
6681 6820          open_a = (access & ~sp->rs_open_access);
6682 6821  
6683 6822          /*
6684 6823           * Calculate the new share access and share deny modes that this open
6685 6824           * is adding to the file for this open owner;
6686 6825           */
6687 6826          share_a = (access & ~sp->rs_share_access);
6688 6827          share_d = (deny & ~sp->rs_share_deny);
6689 6828  
6690 6829          first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6691 6830  
6692 6831          /*
6693 6832           * Check to see the client has already sent an open for this
6694 6833           * open owner on this file with the same share/deny modes.
6695 6834           * If so, we don't need to check for a conflict and we don't
6696 6835           * need to add another shrlock.  If not, then we need to
6697 6836           * check for conflicts in deny and access before checking for
6698 6837           * conflicts in delegation.  We don't want to recall a
6699 6838           * delegation based on an open that will eventually fail based
6700 6839           * on shares modes.
6701 6840           */
6702 6841  
6703 6842          if (share_a || share_d) {
6704 6843                  if ((err = rfs4_share(sp, access, deny)) != 0) {
6705 6844                          rfs4_dbe_unlock(sp->rs_dbe);
6706 6845                          resp->status = err;
6707 6846  
6708 6847                          rfs4_file_rele(fp);
6709 6848                          /* Not a fully formed open; "close" it */
6710 6849                          if (screate == TRUE)
6711 6850                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6712 6851                          rfs4_state_rele(sp);
6713 6852                          return;
6714 6853                  }
6715 6854          }
6716 6855  
6717 6856          rfs4_dbe_lock(fp->rf_dbe);
6718 6857  
6719 6858          /*
6720 6859           * Check to see if this file is delegated and if so, if a
6721 6860           * recall needs to be done.
6722 6861           */
6723 6862          if (rfs4_check_recall(sp, access)) {
6724 6863                  rfs4_dbe_unlock(fp->rf_dbe);
6725 6864                  rfs4_dbe_unlock(sp->rs_dbe);
6726 6865                  rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6727 6866                  delay(NFS4_DELEGATION_CONFLICT_DELAY);
6728 6867                  rfs4_dbe_lock(sp->rs_dbe);
6729 6868  
6730 6869                  /* if state closed while lock was dropped */
6731 6870                  if (sp->rs_closed) {
6732 6871                          if (share_a || share_d)
6733 6872                                  (void) rfs4_unshare(sp);
6734 6873                          rfs4_dbe_unlock(sp->rs_dbe);
6735 6874                          rfs4_file_rele(fp);
6736 6875                          /* Not a fully formed open; "close" it */
6737 6876                          if (screate == TRUE)
6738 6877                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6739 6878                          rfs4_state_rele(sp);
6740 6879                          resp->status = NFS4ERR_OLD_STATEID;
6741 6880                          return;
6742 6881                  }
6743 6882  
6744 6883                  rfs4_dbe_lock(fp->rf_dbe);
6745 6884                  /* Let's see if the delegation was returned */
6746 6885                  if (rfs4_check_recall(sp, access)) {
6747 6886                          rfs4_dbe_unlock(fp->rf_dbe);
6748 6887                          if (share_a || share_d)
6749 6888                                  (void) rfs4_unshare(sp);
6750 6889                          rfs4_dbe_unlock(sp->rs_dbe);
6751 6890                          rfs4_file_rele(fp);
6752 6891                          rfs4_update_lease(sp->rs_owner->ro_client);
6753 6892  
6754 6893                          /* Not a fully formed open; "close" it */
6755 6894                          if (screate == TRUE)
6756 6895                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6757 6896                          rfs4_state_rele(sp);
6758 6897                          resp->status = NFS4ERR_DELAY;
6759 6898                          return;
6760 6899                  }
6761 6900          }
6762 6901          /*
6763 6902           * the share check passed and any delegation conflict has been
6764 6903           * taken care of, now call vop_open.
6765 6904           * if this is the first open then call vop_open with fflags.
6766 6905           * if not, call vn_open_upgrade with just the upgrade flags.
6767 6906           *
6768 6907           * if the file has been opened already, it will have the current
6769 6908           * access mode in the state struct.  if it has no share access, then
6770 6909           * this is a new open.
6771 6910           *
6772 6911           * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6773 6912           * call VOP_OPEN(), just do the open upgrade.
6774 6913           */
6775 6914          if (first_open && !deleg_cur) {
6776 6915                  ct.cc_sysid = sysid;
6777 6916                  ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6778 6917                  ct.cc_caller_id = nfs4_srv_caller_id;
6779 6918                  ct.cc_flags = CC_DONTBLOCK;
6780 6919                  err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6781 6920                  if (err) {
6782 6921                          rfs4_dbe_unlock(fp->rf_dbe);
6783 6922                          if (share_a || share_d)
6784 6923                                  (void) rfs4_unshare(sp);
6785 6924                          rfs4_dbe_unlock(sp->rs_dbe);
6786 6925                          rfs4_file_rele(fp);
6787 6926  
6788 6927                          /* Not a fully formed open; "close" it */
6789 6928                          if (screate == TRUE)
6790 6929                                  rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6791 6930                          rfs4_state_rele(sp);
6792 6931                          /* check if a monitor detected a delegation conflict */
6793 6932                          if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6794 6933                                  resp->status = NFS4ERR_DELAY;
6795 6934                          else
6796 6935                                  resp->status = NFS4ERR_SERVERFAULT;
6797 6936                          return;
6798 6937                  }
6799 6938          } else { /* open upgrade */
6800 6939                  /*
6801 6940                   * calculate the fflags for the new mode that is being added
6802 6941                   * by this upgrade.
6803 6942                   */
6804 6943                  fflags = 0;
6805 6944                  if (open_a & OPEN4_SHARE_ACCESS_READ)
6806 6945                          fflags |= FREAD;
6807 6946                  if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6808 6947                          fflags |= FWRITE;
6809 6948                  vn_open_upgrade(cs->vp, fflags);
6810 6949          }
6811 6950          sp->rs_open_access |= access;
6812 6951          sp->rs_open_deny |= deny;
6813 6952  
6814 6953          if (open_d & OPEN4_SHARE_DENY_READ)
6815 6954                  fp->rf_deny_read++;
6816 6955          if (open_d & OPEN4_SHARE_DENY_WRITE)
6817 6956                  fp->rf_deny_write++;
6818 6957          fp->rf_share_deny |= deny;
6819 6958  
6820 6959          if (open_a & OPEN4_SHARE_ACCESS_READ)
6821 6960                  fp->rf_access_read++;
6822 6961          if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6823 6962                  fp->rf_access_write++;
6824 6963          fp->rf_share_access |= access;
6825 6964  
6826 6965          /*
6827 6966           * Check for delegation here. if the deleg argument is not
6828 6967           * DELEG_ANY, then this is a reclaim from a client and
6829 6968           * we must honor the delegation requested. If necessary we can
6830 6969           * set the recall flag.
6831 6970           */
6832 6971  
6833 6972          dsp = rfs4_grant_delegation(deleg, sp, &recall);
6834 6973  
6835 6974          cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6836 6975  
6837 6976          next_stateid(&sp->rs_stateid);
6838 6977  
6839 6978          resp->stateid = sp->rs_stateid.stateid;
6840 6979  
6841 6980          rfs4_dbe_unlock(fp->rf_dbe);
6842 6981          rfs4_dbe_unlock(sp->rs_dbe);
6843 6982  
6844 6983          if (dsp) {
6845 6984                  rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6846 6985                  rfs4_deleg_state_rele(dsp);
6847 6986          }
6848 6987  
6849 6988          rfs4_file_rele(fp);
6850 6989          rfs4_state_rele(sp);
6851 6990  
6852 6991          resp->status = NFS4_OK;
6853 6992  }
6854 6993  
6855 6994  /*ARGSUSED*/
6856 6995  static void
6857 6996  rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6858 6997      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6859 6998  {
6860 6999          change_info4 *cinfo = &resp->cinfo;
6861 7000          bitmap4 *attrset = &resp->attrset;
6862 7001  
6863 7002          if (args->opentype == OPEN4_NOCREATE)
6864 7003                  resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6865 7004                      req, cs, args->share_access, cinfo);
6866 7005          else {
6867 7006                  /* inhibit delegation grants during exclusive create */
6868 7007  
6869 7008                  if (args->mode == EXCLUSIVE4)
6870 7009                          rfs4_disable_delegation();
6871 7010  
6872 7011                  resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6873 7012                      oo->ro_client->rc_clientid);
6874 7013          }
6875 7014  
6876 7015          if (resp->status == NFS4_OK) {
6877 7016  
6878 7017                  /* cs->vp cs->fh now reference the desired file */
6879 7018  
6880 7019                  rfs4_do_open(cs, req, oo,
6881 7020                      oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6882 7021                      args->share_access, args->share_deny, resp, 0);
6883 7022  
6884 7023                  /*
6885 7024                   * If rfs4_createfile set attrset, we must
6886 7025                   * clear this attrset before the response is copied.
6887 7026                   */
6888 7027                  if (resp->status != NFS4_OK && resp->attrset) {
6889 7028                          resp->attrset = 0;
6890 7029                  }
6891 7030          }
6892 7031          else
6893 7032                  *cs->statusp = resp->status;
6894 7033  
6895 7034          if (args->mode == EXCLUSIVE4)
6896 7035                  rfs4_enable_delegation();
6897 7036  }
6898 7037  
6899 7038  /*ARGSUSED*/
6900 7039  static void
6901 7040  rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6902 7041      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6903 7042  {
6904 7043          change_info4 *cinfo = &resp->cinfo;
6905 7044          vattr_t va;
6906 7045          vtype_t v_type = cs->vp->v_type;
6907 7046          int error = 0;
6908 7047  
6909 7048          /* Verify that we have a regular file */
6910 7049          if (v_type != VREG) {
6911 7050                  if (v_type == VDIR)
6912 7051                          resp->status = NFS4ERR_ISDIR;
6913 7052                  else if (v_type == VLNK)
6914 7053                          resp->status = NFS4ERR_SYMLINK;
6915 7054                  else
6916 7055                          resp->status = NFS4ERR_INVAL;
6917 7056                  return;
6918 7057          }
6919 7058  
6920 7059          va.va_mask = AT_MODE|AT_UID;
6921 7060          error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
6922 7061          if (error) {
6923 7062                  resp->status = puterrno4(error);
6924 7063                  return;
6925 7064          }
6926 7065  
6927 7066          cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
6928 7067  
6929 7068          /*
6930 7069           * Check if we have access to the file, Note the the file
6931 7070           * could have originally been open UNCHECKED or GUARDED
6932 7071           * with mode bits that will now fail, but there is nothing
6933 7072           * we can really do about that except in the case that the
6934 7073           * owner of the file is the one requesting the open.
6935 7074           */
6936 7075          if (crgetuid(cs->cr) != va.va_uid) {
6937 7076                  resp->status = check_open_access(args->share_access, cs, req);
6938 7077                  if (resp->status != NFS4_OK) {
6939 7078                          return;
6940 7079                  }
6941 7080          }
6942 7081  
6943 7082          /*
6944 7083           * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
6945 7084           */
6946 7085          cinfo->before = 0;
6947 7086          cinfo->after = 0;
6948 7087          cinfo->atomic = FALSE;
6949 7088  
6950 7089          rfs4_do_open(cs, req, oo,
6951 7090              NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
6952 7091              args->share_access, args->share_deny, resp, 0);
6953 7092  }
6954 7093  
6955 7094  static void
6956 7095  rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
6957 7096      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6958 7097  {
6959 7098          int error;
6960 7099          nfsstat4 status;
6961 7100          stateid4 stateid =
6962 7101              args->open_claim4_u.delegate_cur_info.delegate_stateid;
6963 7102          rfs4_deleg_state_t *dsp;
6964 7103  
6965 7104          /*
6966 7105           * Find the state info from the stateid and confirm that the
6967 7106           * file is delegated.  If the state openowner is the same as
6968 7107           * the supplied openowner we're done. If not, get the file
6969 7108           * info from the found state info. Use that file info to
6970 7109           * create the state for this lock owner. Note solaris doen't
6971 7110           * really need the pathname to find the file. We may want to
6972 7111           * lookup the pathname and make sure that the vp exist and
6973 7112           * matches the vp in the file structure. However it is
6974 7113           * possible that the pathname nolonger exists (local process
6975 7114           * unlinks the file), so this may not be that useful.
6976 7115           */
6977 7116  
6978 7117          status = rfs4_get_deleg_state(&stateid, &dsp);
6979 7118          if (status != NFS4_OK) {
6980 7119                  resp->status = status;
6981 7120                  return;
6982 7121          }
6983 7122  
6984 7123          ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
6985 7124  
6986 7125          /*
6987 7126           * New lock owner, create state. Since this was probably called
6988 7127           * in response to a CB_RECALL we set deleg to DELEG_NONE
6989 7128           */
6990 7129  
6991 7130          ASSERT(cs->vp != NULL);
6992 7131          VN_RELE(cs->vp);
6993 7132          VN_HOLD(dsp->rds_finfo->rf_vp);
6994 7133          cs->vp = dsp->rds_finfo->rf_vp;
6995 7134  
6996 7135          if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
6997 7136                  rfs4_deleg_state_rele(dsp);
6998 7137                  *cs->statusp = resp->status = puterrno4(error);
6999 7138                  return;
7000 7139          }
7001 7140  
7002 7141          /* Mark progress for delegation returns */
7003 7142          dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7004 7143          rfs4_deleg_state_rele(dsp);
7005 7144          rfs4_do_open(cs, req, oo, DELEG_NONE,
7006 7145              args->share_access, args->share_deny, resp, 1);
7007 7146  }
7008 7147  
7009 7148  /*ARGSUSED*/
7010 7149  static void
7011 7150  rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7012 7151      OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7013 7152  {
7014 7153          /*
7015 7154           * Lookup the pathname, it must already exist since this file
7016 7155           * was delegated.
7017 7156           *
7018 7157           * Find the file and state info for this vp and open owner pair.
7019 7158           *      check that they are in fact delegated.
7020 7159           *      check that the state access and deny modes are the same.
7021 7160           *
7022 7161           * Return the delgation possibly seting the recall flag.
7023 7162           */
7024 7163          rfs4_file_t *fp;
7025 7164          rfs4_state_t *sp;
7026 7165          bool_t create = FALSE;
7027 7166          bool_t dcreate = FALSE;
7028 7167          rfs4_deleg_state_t *dsp;
7029 7168          nfsace4 *ace;
7030 7169  
7031 7170          /* Note we ignore oflags */
7032 7171          resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7033 7172              req, cs, args->share_access, &resp->cinfo);
7034 7173  
7035 7174          if (resp->status != NFS4_OK) {
7036 7175                  return;
7037 7176          }
7038 7177  
7039 7178          /* get the file struct and hold a lock on it during initial open */
7040 7179          fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7041 7180          if (fp == NULL) {
7042 7181                  resp->status = NFS4ERR_RESOURCE;
7043 7182                  DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7044 7183                  return;
7045 7184          }
7046 7185  
7047 7186          sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7048 7187          if (sp == NULL) {
7049 7188                  resp->status = NFS4ERR_SERVERFAULT;
7050 7189                  DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7051 7190                  rw_exit(&fp->rf_file_rwlock);
7052 7191                  rfs4_file_rele(fp);
7053 7192                  return;
7054 7193          }
7055 7194  
7056 7195          rfs4_dbe_lock(sp->rs_dbe);
7057 7196          rfs4_dbe_lock(fp->rf_dbe);
7058 7197          if (args->share_access != sp->rs_share_access ||
7059 7198              args->share_deny != sp->rs_share_deny ||
7060 7199              sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7061 7200                  NFS4_DEBUG(rfs4_debug,
7062 7201                      (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7063 7202                  rfs4_dbe_unlock(fp->rf_dbe);
7064 7203                  rfs4_dbe_unlock(sp->rs_dbe);
7065 7204                  rfs4_file_rele(fp);
7066 7205                  rfs4_state_rele(sp);
7067 7206                  resp->status = NFS4ERR_SERVERFAULT;
7068 7207                  return;
7069 7208          }
7070 7209          rfs4_dbe_unlock(fp->rf_dbe);
7071 7210          rfs4_dbe_unlock(sp->rs_dbe);
7072 7211  
7073 7212          dsp = rfs4_finddeleg(sp, &dcreate);
7074 7213          if (dsp == NULL) {
7075 7214                  rfs4_state_rele(sp);
7076 7215                  rfs4_file_rele(fp);
7077 7216                  resp->status = NFS4ERR_SERVERFAULT;
7078 7217                  return;
7079 7218          }
7080 7219  
7081 7220          next_stateid(&sp->rs_stateid);
7082 7221  
7083 7222          resp->stateid = sp->rs_stateid.stateid;
7084 7223  
7085 7224          resp->delegation.delegation_type = dsp->rds_dtype;
7086 7225  
7087 7226          if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7088 7227                  open_read_delegation4 *rv =
7089 7228                      &resp->delegation.open_delegation4_u.read;
7090 7229  
7091 7230                  rv->stateid = dsp->rds_delegid.stateid;
7092 7231                  rv->recall = FALSE; /* no policy in place to set to TRUE */
7093 7232                  ace = &rv->permissions;
7094 7233          } else {
7095 7234                  open_write_delegation4 *rv =
7096 7235                      &resp->delegation.open_delegation4_u.write;
7097 7236  
7098 7237                  rv->stateid = dsp->rds_delegid.stateid;
7099 7238                  rv->recall = FALSE;  /* no policy in place to set to TRUE */
7100 7239                  ace = &rv->permissions;
7101 7240                  rv->space_limit.limitby = NFS_LIMIT_SIZE;
7102 7241                  rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7103 7242          }
7104 7243  
7105 7244          /* XXX For now */
7106 7245          ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7107 7246          ace->flag = 0;
7108 7247          ace->access_mask = 0;
7109 7248          ace->who.utf8string_len = 0;
7110 7249          ace->who.utf8string_val = 0;
7111 7250  
7112 7251          rfs4_deleg_state_rele(dsp);
7113 7252          rfs4_state_rele(sp);
7114 7253          rfs4_file_rele(fp);
7115 7254  }
7116 7255  
7117 7256  typedef enum {
7118 7257          NFS4_CHKSEQ_OKAY = 0,
7119 7258          NFS4_CHKSEQ_REPLAY = 1,
7120 7259          NFS4_CHKSEQ_BAD = 2
7121 7260  } rfs4_chkseq_t;
7122 7261  
7123 7262  /*
7124 7263   * Generic function for sequence number checks.
7125 7264   */
7126 7265  static rfs4_chkseq_t
7127 7266  rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7128 7267      seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7129 7268  {
7130 7269          /* Same sequence ids and matching operations? */
7131 7270          if (seqid == rqst_seq && resop->resop == lastop->resop) {
7132 7271                  if (copyres == TRUE) {
7133 7272                          rfs4_free_reply(resop);
7134 7273                          rfs4_copy_reply(resop, lastop);
7135 7274                  }
7136 7275                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7137 7276                      "Replayed SEQID %d\n", seqid));
7138 7277                  return (NFS4_CHKSEQ_REPLAY);
7139 7278          }
7140 7279  
7141 7280          /* If the incoming sequence is not the next expected then it is bad */
7142 7281          if (rqst_seq != seqid + 1) {
7143 7282                  if (rqst_seq == seqid) {
7144 7283                          NFS4_DEBUG(rfs4_debug,
7145 7284                              (CE_NOTE, "BAD SEQID: Replayed sequence id "
7146 7285                              "but last op was %d current op is %d\n",
7147 7286                              lastop->resop, resop->resop));
7148 7287                          return (NFS4_CHKSEQ_BAD);
7149 7288                  }
7150 7289                  NFS4_DEBUG(rfs4_debug,
7151 7290                      (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7152 7291                      rqst_seq, seqid));
7153 7292                  return (NFS4_CHKSEQ_BAD);
7154 7293          }
7155 7294  
7156 7295          /* Everything okay -- next expected */
7157 7296          return (NFS4_CHKSEQ_OKAY);
7158 7297  }
7159 7298  
7160 7299  
7161 7300  static rfs4_chkseq_t
7162 7301  rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7163 7302  {
7164 7303          rfs4_chkseq_t rc;
7165 7304  
7166 7305          rfs4_dbe_lock(op->ro_dbe);
7167 7306          rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7168 7307              TRUE);
7169 7308          rfs4_dbe_unlock(op->ro_dbe);
7170 7309  
7171 7310          if (rc == NFS4_CHKSEQ_OKAY)
7172 7311                  rfs4_update_lease(op->ro_client);
7173 7312  
7174 7313          return (rc);
7175 7314  }
7176 7315  
7177 7316  static rfs4_chkseq_t
7178 7317  rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7179 7318  {
7180 7319          rfs4_chkseq_t rc;
7181 7320  
7182 7321          rfs4_dbe_lock(op->ro_dbe);
7183 7322          rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7184 7323              olo_seqid, resop, FALSE);
7185 7324          rfs4_dbe_unlock(op->ro_dbe);
7186 7325  
7187 7326          return (rc);
7188 7327  }
7189 7328  
7190 7329  static rfs4_chkseq_t
7191 7330  rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7192 7331  {
7193 7332          rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7194 7333  
7195 7334          rfs4_dbe_lock(lsp->rls_dbe);
7196 7335          if (!lsp->rls_skip_seqid_check)
7197 7336                  rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7198 7337                      resop, TRUE);
7199 7338          rfs4_dbe_unlock(lsp->rls_dbe);
7200 7339  
7201 7340          return (rc);
7202 7341  }
7203 7342  
7204 7343  static void
7205 7344  rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7206 7345      struct svc_req *req, struct compound_state *cs)
7207 7346  {
7208 7347          OPEN4args *args = &argop->nfs_argop4_u.opopen;
7209 7348          OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7210 7349          open_owner4 *owner = &args->owner;
7211 7350          open_claim_type4 claim = args->claim;
7212 7351          rfs4_client_t *cp;
7213 7352          rfs4_openowner_t *oo;
7214 7353          bool_t create;
7215 7354          bool_t replay = FALSE;
7216 7355          int can_reclaim;
7217 7356  
7218 7357          DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7219 7358              OPEN4args *, args);
7220 7359  
7221 7360          if (cs->vp == NULL) {
7222 7361                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7223 7362                  goto end;
7224 7363          }
7225 7364  
7226 7365          /*
7227 7366           * Need to check clientid and lease expiration first based on
7228 7367           * error ordering and incrementing sequence id.
7229 7368           */
7230 7369          cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7231 7370          if (cp == NULL) {
7232 7371                  *cs->statusp = resp->status =
7233 7372                      rfs4_check_clientid(&owner->clientid, 0);
7234 7373                  goto end;
7235 7374          }
7236 7375  
7237 7376          if (rfs4_lease_expired(cp)) {
7238 7377                  rfs4_client_close(cp);
7239 7378                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7240 7379                  goto end;
7241 7380          }
7242 7381          can_reclaim = cp->rc_can_reclaim;
7243 7382  
7244 7383          /*
7245 7384           * Find the open_owner for use from this point forward.  Take
7246 7385           * care in updating the sequence id based on the type of error
7247 7386           * being returned.
7248 7387           */
7249 7388  retry:
7250 7389          create = TRUE;
7251 7390          oo = rfs4_findopenowner(owner, &create, args->seqid);
7252 7391          if (oo == NULL) {
7253 7392                  *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7254 7393                  rfs4_client_rele(cp);
7255 7394                  goto end;
7256 7395          }
7257 7396  
7258 7397          /* Hold off access to the sequence space while the open is done */
7259 7398          rfs4_sw_enter(&oo->ro_sw);
7260 7399  
7261 7400          /*
7262 7401           * If the open_owner existed before at the server, then check
7263 7402           * the sequence id.
7264 7403           */
7265 7404          if (!create && !oo->ro_postpone_confirm) {
7266 7405                  switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7267 7406                  case NFS4_CHKSEQ_BAD:
7268 7407                          if ((args->seqid > oo->ro_open_seqid) &&
7269 7408                              oo->ro_need_confirm) {
7270 7409                                  rfs4_free_opens(oo, TRUE, FALSE);
7271 7410                                  rfs4_sw_exit(&oo->ro_sw);
7272 7411                                  rfs4_openowner_rele(oo);
7273 7412                                  goto retry;
7274 7413                          }
7275 7414                          resp->status = NFS4ERR_BAD_SEQID;
7276 7415                          goto out;
7277 7416                  case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7278 7417                          replay = TRUE;
7279 7418                          goto out;
7280 7419                  default:
7281 7420                          break;
7282 7421                  }
7283 7422  
7284 7423                  /*
7285 7424                   * Sequence was ok and open owner exists
7286 7425                   * check to see if we have yet to see an
7287 7426                   * open_confirm.
7288 7427                   */
7289 7428                  if (oo->ro_need_confirm) {
7290 7429                          rfs4_free_opens(oo, TRUE, FALSE);
7291 7430                          rfs4_sw_exit(&oo->ro_sw);
7292 7431                          rfs4_openowner_rele(oo);
7293 7432                          goto retry;
7294 7433                  }
7295 7434          }
7296 7435          /* Grace only applies to regular-type OPENs */
7297 7436          if (rfs4_clnt_in_grace(cp) &&
7298 7437              (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7299 7438                  *cs->statusp = resp->status = NFS4ERR_GRACE;
7300 7439                  goto out;
7301 7440          }
7302 7441  
7303 7442          /*
7304 7443           * If previous state at the server existed then can_reclaim
7305 7444           * will be set. If not reply NFS4ERR_NO_GRACE to the
7306 7445           * client.
7307 7446           */
7308 7447          if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7309 7448                  *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7310 7449                  goto out;
7311 7450          }
7312 7451  
7313 7452  
7314 7453          /*
7315 7454           * Reject the open if the client has missed the grace period
7316 7455           */
7317 7456          if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7318 7457                  *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7319 7458                  goto out;
7320 7459          }
7321 7460  
7322 7461          /* Couple of up-front bookkeeping items */
7323 7462          if (oo->ro_need_confirm) {
7324 7463                  /*
7325 7464                   * If this is a reclaim OPEN then we should not ask
7326 7465                   * for a confirmation of the open_owner per the
7327 7466                   * protocol specification.
7328 7467                   */
7329 7468                  if (claim == CLAIM_PREVIOUS)
7330 7469                          oo->ro_need_confirm = FALSE;
7331 7470                  else
7332 7471                          resp->rflags |= OPEN4_RESULT_CONFIRM;
7333 7472          }
7334 7473          resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7335 7474  
7336 7475          /*
7337 7476           * If there is an unshared filesystem mounted on this vnode,
7338 7477           * do not allow to open/create in this directory.
7339 7478           */
7340 7479          if (vn_ismntpt(cs->vp)) {
7341 7480                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
7342 7481                  goto out;
7343 7482          }
7344 7483  
7345 7484          /*
7346 7485           * access must READ, WRITE, or BOTH.  No access is invalid.
7347 7486           * deny can be READ, WRITE, BOTH, or NONE.
7348 7487           * bits not defined for access/deny are invalid.
7349 7488           */
7350 7489          if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7351 7490              (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7352 7491              (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7353 7492                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7354 7493                  goto out;
7355 7494          }
7356 7495  
7357 7496  
7358 7497          /*
7359 7498           * make sure attrset is zero before response is built.
7360 7499           */
7361 7500          resp->attrset = 0;
7362 7501  
7363 7502          switch (claim) {
7364 7503          case CLAIM_NULL:
7365 7504                  rfs4_do_opennull(cs, req, args, oo, resp);
7366 7505                  break;
7367 7506          case CLAIM_PREVIOUS:
7368 7507                  rfs4_do_openprev(cs, req, args, oo, resp);
7369 7508                  break;
7370 7509          case CLAIM_DELEGATE_CUR:
7371 7510                  rfs4_do_opendelcur(cs, req, args, oo, resp);
7372 7511                  break;
7373 7512          case CLAIM_DELEGATE_PREV:
7374 7513                  rfs4_do_opendelprev(cs, req, args, oo, resp);
7375 7514                  break;
7376 7515          default:
7377 7516                  resp->status = NFS4ERR_INVAL;
7378 7517                  break;
7379 7518          }
7380 7519  
7381 7520  out:
7382 7521          rfs4_client_rele(cp);
7383 7522  
7384 7523          /* Catch sequence id handling here to make it a little easier */
7385 7524          switch (resp->status) {
7386 7525          case NFS4ERR_BADXDR:
7387 7526          case NFS4ERR_BAD_SEQID:
7388 7527          case NFS4ERR_BAD_STATEID:
7389 7528          case NFS4ERR_NOFILEHANDLE:
7390 7529          case NFS4ERR_RESOURCE:
7391 7530          case NFS4ERR_STALE_CLIENTID:
7392 7531          case NFS4ERR_STALE_STATEID:
7393 7532                  /*
7394 7533                   * The protocol states that if any of these errors are
7395 7534                   * being returned, the sequence id should not be
7396 7535                   * incremented.  Any other return requires an
7397 7536                   * increment.
7398 7537                   */
7399 7538                  break;
7400 7539          default:
7401 7540                  /* Always update the lease in this case */
7402 7541                  rfs4_update_lease(oo->ro_client);
7403 7542  
7404 7543                  /* Regular response - copy the result */
7405 7544                  if (!replay)
7406 7545                          rfs4_update_open_resp(oo, resop, &cs->fh);
7407 7546  
7408 7547                  /*
7409 7548                   * REPLAY case: Only if the previous response was OK
7410 7549                   * do we copy the filehandle.  If not OK, no
7411 7550                   * filehandle to copy.
7412 7551                   */
7413 7552                  if (replay == TRUE &&
7414 7553                      resp->status == NFS4_OK &&
7415 7554                      oo->ro_reply_fh.nfs_fh4_val) {
7416 7555                          /*
7417 7556                           * If this is a replay, we must restore the
7418 7557                           * current filehandle/vp to that of what was
7419 7558                           * returned originally.  Try our best to do
7420 7559                           * it.
7421 7560                           */
7422 7561                          nfs_fh4_fmt_t *fh_fmtp =
7423 7562                              (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7424 7563  
7425 7564                          cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7426 7565                              (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7427 7566  
7428 7567                          if (cs->exi == NULL) {
7429 7568                                  resp->status = NFS4ERR_STALE;
7430 7569                                  goto finish;
7431 7570                          }
7432 7571  
7433 7572                          VN_RELE(cs->vp);
7434 7573  
7435 7574                          cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7436 7575                              &resp->status);
7437 7576  
7438 7577                          if (cs->vp == NULL)
7439 7578                                  goto finish;
7440 7579  
7441 7580                          nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7442 7581                  }
7443 7582  
7444 7583                  /*
7445 7584                   * If this was a replay, no need to update the
7446 7585                   * sequence id. If the open_owner was not created on
7447 7586                   * this pass, then update.  The first use of an
7448 7587                   * open_owner will not bump the sequence id.
7449 7588                   */
7450 7589                  if (replay == FALSE && !create)
7451 7590                          rfs4_update_open_sequence(oo);
7452 7591                  /*
7453 7592                   * If the client is receiving an error and the
7454 7593                   * open_owner needs to be confirmed, there is no way
7455 7594                   * to notify the client of this fact ignoring the fact
7456 7595                   * that the server has no method of returning a
7457 7596                   * stateid to confirm.  Therefore, the server needs to
7458 7597                   * mark this open_owner in a way as to avoid the
7459 7598                   * sequence id checking the next time the client uses
7460 7599                   * this open_owner.
7461 7600                   */
7462 7601                  if (resp->status != NFS4_OK && oo->ro_need_confirm)
7463 7602                          oo->ro_postpone_confirm = TRUE;
7464 7603                  /*
7465 7604                   * If OK response then clear the postpone flag and
7466 7605                   * reset the sequence id to keep in sync with the
7467 7606                   * client.
7468 7607                   */
7469 7608                  if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7470 7609                          oo->ro_postpone_confirm = FALSE;
7471 7610                          oo->ro_open_seqid = args->seqid;
7472 7611                  }
7473 7612                  break;
7474 7613          }
7475 7614  
7476 7615  finish:
7477 7616          *cs->statusp = resp->status;
7478 7617  
7479 7618          rfs4_sw_exit(&oo->ro_sw);
7480 7619          rfs4_openowner_rele(oo);
7481 7620  
7482 7621  end:
7483 7622          DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7484 7623              OPEN4res *, resp);
7485 7624  }
7486 7625  
7487 7626  /*ARGSUSED*/
7488 7627  void
7489 7628  rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7490 7629      struct svc_req *req, struct compound_state *cs)
7491 7630  {
7492 7631          OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7493 7632          OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7494 7633          rfs4_state_t *sp;
7495 7634          nfsstat4 status;
7496 7635  
7497 7636          DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7498 7637              OPEN_CONFIRM4args *, args);
7499 7638  
7500 7639          if (cs->vp == NULL) {
7501 7640                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7502 7641                  goto out;
7503 7642          }
7504 7643  
7505 7644          if (cs->vp->v_type != VREG) {
7506 7645                  *cs->statusp = resp->status =
7507 7646                      cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7508 7647                  return;
7509 7648          }
7510 7649  
7511 7650          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7512 7651          if (status != NFS4_OK) {
7513 7652                  *cs->statusp = resp->status = status;
7514 7653                  goto out;
7515 7654          }
7516 7655  
7517 7656          /* Ensure specified filehandle matches */
7518 7657          if (cs->vp != sp->rs_finfo->rf_vp) {
7519 7658                  rfs4_state_rele(sp);
7520 7659                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7521 7660                  goto out;
7522 7661          }
7523 7662  
7524 7663          /* hold off other access to open_owner while we tinker */
7525 7664          rfs4_sw_enter(&sp->rs_owner->ro_sw);
7526 7665  
7527 7666          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7528 7667          case NFS4_CHECK_STATEID_OKAY:
7529 7668                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7530 7669                      resop) != 0) {
7531 7670                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7532 7671                          break;
7533 7672                  }
7534 7673                  /*
7535 7674                   * If it is the appropriate stateid and determined to
7536 7675                   * be "OKAY" then this means that the stateid does not
7537 7676                   * need to be confirmed and the client is in error for
7538 7677                   * sending an OPEN_CONFIRM.
7539 7678                   */
7540 7679                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7541 7680                  break;
7542 7681          case NFS4_CHECK_STATEID_OLD:
7543 7682                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7544 7683                  break;
7545 7684          case NFS4_CHECK_STATEID_BAD:
7546 7685                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7547 7686                  break;
7548 7687          case NFS4_CHECK_STATEID_EXPIRED:
7549 7688                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7550 7689                  break;
7551 7690          case NFS4_CHECK_STATEID_CLOSED:
7552 7691                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7553 7692                  break;
7554 7693          case NFS4_CHECK_STATEID_REPLAY:
7555 7694                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7556 7695                      resop)) {
7557 7696                  case NFS4_CHKSEQ_OKAY:
7558 7697                          /*
7559 7698                           * This is replayed stateid; if seqid matches
7560 7699                           * next expected, then client is using wrong seqid.
7561 7700                           */
7562 7701                          /* fall through */
7563 7702                  case NFS4_CHKSEQ_BAD:
7564 7703                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7565 7704                          break;
7566 7705                  case NFS4_CHKSEQ_REPLAY:
7567 7706                          /*
7568 7707                           * Note this case is the duplicate case so
7569 7708                           * resp->status is already set.
7570 7709                           */
7571 7710                          *cs->statusp = resp->status;
7572 7711                          rfs4_update_lease(sp->rs_owner->ro_client);
7573 7712                          break;
7574 7713                  }
7575 7714                  break;
7576 7715          case NFS4_CHECK_STATEID_UNCONFIRMED:
7577 7716                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7578 7717                      resop) != NFS4_CHKSEQ_OKAY) {
7579 7718                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7580 7719                          break;
7581 7720                  }
7582 7721                  *cs->statusp = resp->status = NFS4_OK;
7583 7722  
7584 7723                  next_stateid(&sp->rs_stateid);
7585 7724                  resp->open_stateid = sp->rs_stateid.stateid;
7586 7725                  sp->rs_owner->ro_need_confirm = FALSE;
7587 7726                  rfs4_update_lease(sp->rs_owner->ro_client);
7588 7727                  rfs4_update_open_sequence(sp->rs_owner);
7589 7728                  rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7590 7729                  break;
7591 7730          default:
7592 7731                  ASSERT(FALSE);
7593 7732                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7594 7733                  break;
7595 7734          }
7596 7735          rfs4_sw_exit(&sp->rs_owner->ro_sw);
7597 7736          rfs4_state_rele(sp);
7598 7737  
7599 7738  out:
7600 7739          DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7601 7740              OPEN_CONFIRM4res *, resp);
7602 7741  }
7603 7742  
7604 7743  /*ARGSUSED*/
7605 7744  void
7606 7745  rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7607 7746      struct svc_req *req, struct compound_state *cs)
7608 7747  {
7609 7748          OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7610 7749          OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7611 7750          uint32_t access = args->share_access;
7612 7751          uint32_t deny = args->share_deny;
7613 7752          nfsstat4 status;
7614 7753          rfs4_state_t *sp;
7615 7754          rfs4_file_t *fp;
7616 7755          int fflags = 0;
7617 7756  
7618 7757          DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7619 7758              OPEN_DOWNGRADE4args *, args);
7620 7759  
7621 7760          if (cs->vp == NULL) {
7622 7761                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7623 7762                  goto out;
7624 7763          }
7625 7764  
7626 7765          if (cs->vp->v_type != VREG) {
7627 7766                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7628 7767                  return;
7629 7768          }
7630 7769  
7631 7770          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7632 7771          if (status != NFS4_OK) {
7633 7772                  *cs->statusp = resp->status = status;
7634 7773                  goto out;
7635 7774          }
7636 7775  
7637 7776          /* Ensure specified filehandle matches */
7638 7777          if (cs->vp != sp->rs_finfo->rf_vp) {
7639 7778                  rfs4_state_rele(sp);
7640 7779                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7641 7780                  goto out;
7642 7781          }
7643 7782  
7644 7783          /* hold off other access to open_owner while we tinker */
7645 7784          rfs4_sw_enter(&sp->rs_owner->ro_sw);
7646 7785  
7647 7786          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7648 7787          case NFS4_CHECK_STATEID_OKAY:
7649 7788                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7650 7789                      resop) != NFS4_CHKSEQ_OKAY) {
7651 7790                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7652 7791                          goto end;
7653 7792                  }
7654 7793                  break;
7655 7794          case NFS4_CHECK_STATEID_OLD:
7656 7795                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7657 7796                  goto end;
7658 7797          case NFS4_CHECK_STATEID_BAD:
7659 7798                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7660 7799                  goto end;
7661 7800          case NFS4_CHECK_STATEID_EXPIRED:
7662 7801                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7663 7802                  goto end;
7664 7803          case NFS4_CHECK_STATEID_CLOSED:
7665 7804                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7666 7805                  goto end;
7667 7806          case NFS4_CHECK_STATEID_UNCONFIRMED:
7668 7807                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7669 7808                  goto end;
7670 7809          case NFS4_CHECK_STATEID_REPLAY:
7671 7810                  /* Check the sequence id for the open owner */
7672 7811                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7673 7812                      resop)) {
7674 7813                  case NFS4_CHKSEQ_OKAY:
7675 7814                          /*
7676 7815                           * This is replayed stateid; if seqid matches
7677 7816                           * next expected, then client is using wrong seqid.
7678 7817                           */
7679 7818                          /* fall through */
7680 7819                  case NFS4_CHKSEQ_BAD:
7681 7820                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7682 7821                          goto end;
7683 7822                  case NFS4_CHKSEQ_REPLAY:
7684 7823                          /*
7685 7824                           * Note this case is the duplicate case so
7686 7825                           * resp->status is already set.
7687 7826                           */
7688 7827                          *cs->statusp = resp->status;
7689 7828                          rfs4_update_lease(sp->rs_owner->ro_client);
7690 7829                          goto end;
7691 7830                  }
7692 7831                  break;
7693 7832          default:
7694 7833                  ASSERT(FALSE);
7695 7834                  break;
7696 7835          }
7697 7836  
7698 7837          rfs4_dbe_lock(sp->rs_dbe);
7699 7838          /*
7700 7839           * Check that the new access modes and deny modes are valid.
7701 7840           * Check that no invalid bits are set.
7702 7841           */
7703 7842          if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7704 7843              (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7705 7844                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7706 7845                  rfs4_update_open_sequence(sp->rs_owner);
7707 7846                  rfs4_dbe_unlock(sp->rs_dbe);
7708 7847                  goto end;
7709 7848          }
7710 7849  
7711 7850          /*
7712 7851           * The new modes must be a subset of the current modes and
7713 7852           * the access must specify at least one mode. To test that
7714 7853           * the new mode is a subset of the current modes we bitwise
7715 7854           * AND them together and check that the result equals the new
7716 7855           * mode. For example:
7717 7856           * New mode, access == R and current mode, sp->rs_open_access  == RW
7718 7857           * access & sp->rs_open_access == R == access, so the new access mode
7719 7858           * is valid. Consider access == RW, sp->rs_open_access = R
7720 7859           * access & sp->rs_open_access == R != access, so the new access mode
7721 7860           * is invalid.
7722 7861           */
7723 7862          if ((access & sp->rs_open_access) != access ||
7724 7863              (deny & sp->rs_open_deny) != deny ||
7725 7864              (access &
7726 7865              (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7727 7866                  *cs->statusp = resp->status = NFS4ERR_INVAL;
7728 7867                  rfs4_update_open_sequence(sp->rs_owner);
7729 7868                  rfs4_dbe_unlock(sp->rs_dbe);
7730 7869                  goto end;
7731 7870          }
7732 7871  
7733 7872          /*
7734 7873           * Release any share locks associated with this stateID.
7735 7874           * Strictly speaking, this violates the spec because the
7736 7875           * spec effectively requires that open downgrade be atomic.
7737 7876           * At present, fs_shrlock does not have this capability.
7738 7877           */
7739 7878          (void) rfs4_unshare(sp);
7740 7879  
7741 7880          status = rfs4_share(sp, access, deny);
7742 7881          if (status != NFS4_OK) {
7743 7882                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7744 7883                  rfs4_update_open_sequence(sp->rs_owner);
7745 7884                  rfs4_dbe_unlock(sp->rs_dbe);
7746 7885                  goto end;
7747 7886          }
7748 7887  
7749 7888          fp = sp->rs_finfo;
7750 7889          rfs4_dbe_lock(fp->rf_dbe);
7751 7890  
7752 7891          /*
7753 7892           * If the current mode has deny read and the new mode
7754 7893           * does not, decrement the number of deny read mode bits
7755 7894           * and if it goes to zero turn off the deny read bit
7756 7895           * on the file.
7757 7896           */
7758 7897          if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7759 7898              (deny & OPEN4_SHARE_DENY_READ) == 0) {
7760 7899                  fp->rf_deny_read--;
7761 7900                  if (fp->rf_deny_read == 0)
7762 7901                          fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7763 7902          }
7764 7903  
7765 7904          /*
7766 7905           * If the current mode has deny write and the new mode
7767 7906           * does not, decrement the number of deny write mode bits
7768 7907           * and if it goes to zero turn off the deny write bit
7769 7908           * on the file.
7770 7909           */
7771 7910          if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7772 7911              (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7773 7912                  fp->rf_deny_write--;
7774 7913                  if (fp->rf_deny_write == 0)
7775 7914                          fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7776 7915          }
7777 7916  
7778 7917          /*
7779 7918           * If the current mode has access read and the new mode
7780 7919           * does not, decrement the number of access read mode bits
7781 7920           * and if it goes to zero turn off the access read bit
7782 7921           * on the file.  set fflags to FREAD for the call to
7783 7922           * vn_open_downgrade().
7784 7923           */
7785 7924          if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7786 7925              (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7787 7926                  fp->rf_access_read--;
7788 7927                  if (fp->rf_access_read == 0)
7789 7928                          fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7790 7929                  fflags |= FREAD;
7791 7930          }
7792 7931  
7793 7932          /*
7794 7933           * If the current mode has access write and the new mode
7795 7934           * does not, decrement the number of access write mode bits
7796 7935           * and if it goes to zero turn off the access write bit
7797 7936           * on the file.  set fflags to FWRITE for the call to
7798 7937           * vn_open_downgrade().
7799 7938           */
7800 7939          if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7801 7940              (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7802 7941                  fp->rf_access_write--;
7803 7942                  if (fp->rf_access_write == 0)
7804 7943                          fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7805 7944                  fflags |= FWRITE;
7806 7945          }
7807 7946  
7808 7947          /* Check that the file is still accessible */
7809 7948          ASSERT(fp->rf_share_access);
7810 7949  
7811 7950          rfs4_dbe_unlock(fp->rf_dbe);
7812 7951  
7813 7952          /* now set the new open access and deny modes */
7814 7953          sp->rs_open_access = access;
7815 7954          sp->rs_open_deny = deny;
7816 7955  
7817 7956          /*
7818 7957           * we successfully downgraded the share lock, now we need to downgrade
7819 7958           * the open. it is possible that the downgrade was only for a deny
7820 7959           * mode and we have nothing else to do.
7821 7960           */
7822 7961          if ((fflags & (FREAD|FWRITE)) != 0)
7823 7962                  vn_open_downgrade(cs->vp, fflags);
7824 7963  
7825 7964          /* Update the stateid */
7826 7965          next_stateid(&sp->rs_stateid);
7827 7966          resp->open_stateid = sp->rs_stateid.stateid;
7828 7967  
7829 7968          rfs4_dbe_unlock(sp->rs_dbe);
7830 7969  
7831 7970          *cs->statusp = resp->status = NFS4_OK;
7832 7971          /* Update the lease */
7833 7972          rfs4_update_lease(sp->rs_owner->ro_client);
7834 7973          /* And the sequence */
7835 7974          rfs4_update_open_sequence(sp->rs_owner);
7836 7975          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7837 7976  
7838 7977  end:
7839 7978          rfs4_sw_exit(&sp->rs_owner->ro_sw);
7840 7979          rfs4_state_rele(sp);
7841 7980  out:
7842 7981          DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7843 7982              OPEN_DOWNGRADE4res *, resp);
7844 7983  }
7845 7984  
7846 7985  static void *
7847 7986  memstr(const void *s1, const char *s2, size_t n)
7848 7987  {
7849 7988          size_t l = strlen(s2);
7850 7989          char *p = (char *)s1;
7851 7990  
7852 7991          while (n >= l) {
7853 7992                  if (bcmp(p, s2, l) == 0)
7854 7993                          return (p);
7855 7994                  p++;
7856 7995                  n--;
7857 7996          }
7858 7997  
7859 7998          return (NULL);
7860 7999  }
7861 8000  
7862 8001  /*
7863 8002   * The logic behind this function is detailed in the NFSv4 RFC in the
7864 8003   * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7865 8004   * that section for explicit guidance to server behavior for
7866 8005   * SETCLIENTID.
7867 8006   */
7868 8007  void
7869 8008  rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7870 8009      struct svc_req *req, struct compound_state *cs)
7871 8010  {
7872 8011          SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7873 8012          SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7874 8013          rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7875 8014          rfs4_clntip_t *ci;
7876 8015          bool_t create;
7877 8016          char *addr, *netid;
7878 8017          int len;
7879 8018  
7880 8019          DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7881 8020              SETCLIENTID4args *, args);
7882 8021  retry:
7883 8022          newcp = cp_confirmed = cp_unconfirmed = NULL;
7884 8023  
7885 8024          /*
7886 8025           * Save the caller's IP address
7887 8026           */
7888 8027          args->client.cl_addr =
7889 8028              (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7890 8029  
7891 8030          /*
7892 8031           * Record if it is a Solaris client that cannot handle referrals.
7893 8032           */
7894 8033          if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
7895 8034              !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
7896 8035                  /* Add a "yes, it's downrev" record */
7897 8036                  create = TRUE;
7898 8037                  ci = rfs4_find_clntip(args->client.cl_addr, &create);
7899 8038                  ASSERT(ci != NULL);
7900 8039                  rfs4_dbe_rele(ci->ri_dbe);
7901 8040          } else {
7902 8041                  /* Remove any previous record */
7903 8042                  rfs4_invalidate_clntip(args->client.cl_addr);
7904 8043          }
7905 8044  
7906 8045          /*
7907 8046           * In search of an EXISTING client matching the incoming
7908 8047           * request to establish a new client identifier at the server
7909 8048           */
7910 8049          create = TRUE;
7911 8050          cp = rfs4_findclient(&args->client, &create, NULL);
7912 8051  
7913 8052          /* Should never happen */
7914 8053          ASSERT(cp != NULL);
7915 8054  
7916 8055          if (cp == NULL) {
7917 8056                  *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7918 8057                  goto out;
7919 8058          }
7920 8059  
7921 8060          /*
7922 8061           * Easiest case. Client identifier is newly created and is
7923 8062           * unconfirmed.  Also note that for this case, no other
7924 8063           * entries exist for the client identifier.  Nothing else to
7925 8064           * check.  Just setup the response and respond.
7926 8065           */
7927 8066          if (create) {
7928 8067                  *cs->statusp = res->status = NFS4_OK;
7929 8068                  res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
7930 8069                  res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7931 8070                      cp->rc_confirm_verf;
7932 8071                  /* Setup callback information; CB_NULL confirmation later */
7933 8072                  rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7934 8073  
7935 8074                  rfs4_client_rele(cp);
7936 8075                  goto out;
7937 8076          }
7938 8077  
7939 8078          /*
7940 8079           * An existing, confirmed client may exist but it may not have
7941 8080           * been active for at least one lease period.  If so, then
7942 8081           * "close" the client and create a new client identifier
7943 8082           */
7944 8083          if (rfs4_lease_expired(cp)) {
7945 8084                  rfs4_client_close(cp);
7946 8085                  goto retry;
7947 8086          }
7948 8087  
7949 8088          if (cp->rc_need_confirm == TRUE)
7950 8089                  cp_unconfirmed = cp;
7951 8090          else
7952 8091                  cp_confirmed = cp;
7953 8092  
7954 8093          cp = NULL;
7955 8094  
7956 8095          /*
7957 8096           * We have a confirmed client, now check for an
7958 8097           * unconfimred entry
7959 8098           */
7960 8099          if (cp_confirmed) {
7961 8100                  /* If creds don't match then client identifier is inuse */
7962 8101                  if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
7963 8102                          rfs4_cbinfo_t *cbp;
7964 8103                          /*
7965 8104                           * Some one else has established this client
7966 8105                           * id. Try and say * who they are. We will use
7967 8106                           * the call back address supplied by * the
7968 8107                           * first client.
7969 8108                           */
7970 8109                          *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7971 8110  
7972 8111                          addr = netid = NULL;
7973 8112  
7974 8113                          cbp = &cp_confirmed->rc_cbinfo;
7975 8114                          if (cbp->cb_callback.cb_location.r_addr &&
7976 8115                              cbp->cb_callback.cb_location.r_netid) {
7977 8116                                  cb_client4 *cbcp = &cbp->cb_callback;
7978 8117  
7979 8118                                  len = strlen(cbcp->cb_location.r_addr)+1;
7980 8119                                  addr = kmem_alloc(len, KM_SLEEP);
7981 8120                                  bcopy(cbcp->cb_location.r_addr, addr, len);
7982 8121                                  len = strlen(cbcp->cb_location.r_netid)+1;
7983 8122                                  netid = kmem_alloc(len, KM_SLEEP);
7984 8123                                  bcopy(cbcp->cb_location.r_netid, netid, len);
7985 8124                          }
7986 8125  
7987 8126                          res->SETCLIENTID4res_u.client_using.r_addr = addr;
7988 8127                          res->SETCLIENTID4res_u.client_using.r_netid = netid;
7989 8128  
7990 8129                          rfs4_client_rele(cp_confirmed);
7991 8130                  }
7992 8131  
7993 8132                  /*
7994 8133                   * Confirmed, creds match, and verifier matches; must
7995 8134                   * be an update of the callback info
7996 8135                   */
7997 8136                  if (cp_confirmed->rc_nfs_client.verifier ==
7998 8137                      args->client.verifier) {
7999 8138                          /* Setup callback information */
8000 8139                          rfs4_client_setcb(cp_confirmed, &args->callback,
8001 8140                              args->callback_ident);
8002 8141  
8003 8142                          /* everything okay -- move ahead */
8004 8143                          *cs->statusp = res->status = NFS4_OK;
8005 8144                          res->SETCLIENTID4res_u.resok4.clientid =
8006 8145                              cp_confirmed->rc_clientid;
8007 8146  
8008 8147                          /* update the confirm_verifier and return it */
8009 8148                          rfs4_client_scv_next(cp_confirmed);
8010 8149                          res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8011 8150                              cp_confirmed->rc_confirm_verf;
8012 8151  
8013 8152                          rfs4_client_rele(cp_confirmed);
8014 8153                          goto out;
8015 8154                  }
8016 8155  
8017 8156                  /*
8018 8157                   * Creds match but the verifier doesn't.  Must search
8019 8158                   * for an unconfirmed client that would be replaced by
8020 8159                   * this request.
8021 8160                   */
8022 8161                  create = FALSE;
8023 8162                  cp_unconfirmed = rfs4_findclient(&args->client, &create,
8024 8163                      cp_confirmed);
8025 8164          }
8026 8165  
8027 8166          /*
8028 8167           * At this point, we have taken care of the brand new client
8029 8168           * struct, INUSE case, update of an existing, and confirmed
8030 8169           * client struct.
8031 8170           */
8032 8171  
8033 8172          /*
8034 8173           * check to see if things have changed while we originally
8035 8174           * picked up the client struct.  If they have, then return and
8036 8175           * retry the processing of this SETCLIENTID request.
8037 8176           */
8038 8177          if (cp_unconfirmed) {
8039 8178                  rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8040 8179                  if (!cp_unconfirmed->rc_need_confirm) {
8041 8180                          rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8042 8181                          rfs4_client_rele(cp_unconfirmed);
8043 8182                          if (cp_confirmed)
8044 8183                                  rfs4_client_rele(cp_confirmed);
8045 8184                          goto retry;
8046 8185                  }
8047 8186                  /* do away with the old unconfirmed one */
8048 8187                  rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8049 8188                  rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8050 8189                  rfs4_client_rele(cp_unconfirmed);
8051 8190                  cp_unconfirmed = NULL;
8052 8191          }
8053 8192  
8054 8193          /*
8055 8194           * This search will temporarily hide the confirmed client
8056 8195           * struct while a new client struct is created as the
8057 8196           * unconfirmed one.
8058 8197           */
8059 8198          create = TRUE;
8060 8199          newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8061 8200  
8062 8201          ASSERT(newcp != NULL);
8063 8202  
8064 8203          if (newcp == NULL) {
8065 8204                  *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8066 8205                  rfs4_client_rele(cp_confirmed);
8067 8206                  goto out;
8068 8207          }
8069 8208  
8070 8209          /*
8071 8210           * If one was not created, then a similar request must be in
8072 8211           * process so release and start over with this one
8073 8212           */
8074 8213          if (create != TRUE) {
8075 8214                  rfs4_client_rele(newcp);
8076 8215                  if (cp_confirmed)
8077 8216                          rfs4_client_rele(cp_confirmed);
8078 8217                  goto retry;
8079 8218          }
8080 8219  
8081 8220          *cs->statusp = res->status = NFS4_OK;
8082 8221          res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8083 8222          res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8084 8223              newcp->rc_confirm_verf;
8085 8224          /* Setup callback information; CB_NULL confirmation later */
8086 8225          rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8087 8226  
8088 8227          newcp->rc_cp_confirmed = cp_confirmed;
8089 8228  
8090 8229          rfs4_client_rele(newcp);
8091 8230  
8092 8231  out:
8093 8232          DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8094 8233              SETCLIENTID4res *, res);
8095 8234  }
8096 8235  
  
    | 
      ↓ open down ↓ | 
    1537 lines elided | 
    
      ↑ open up ↑ | 
  
8097 8236  /*ARGSUSED*/
8098 8237  void
8099 8238  rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8100 8239      struct svc_req *req, struct compound_state *cs)
8101 8240  {
8102 8241          SETCLIENTID_CONFIRM4args *args =
8103 8242              &argop->nfs_argop4_u.opsetclientid_confirm;
8104 8243          SETCLIENTID_CONFIRM4res *res =
8105 8244              &resop->nfs_resop4_u.opsetclientid_confirm;
8106 8245          rfs4_client_t *cp, *cptoclose = NULL;
     8246 +        nfs4_srv_t *nsrv4;
8107 8247  
8108 8248          DTRACE_NFSV4_2(op__setclientid__confirm__start,
8109 8249              struct compound_state *, cs,
8110 8250              SETCLIENTID_CONFIRM4args *, args);
8111 8251  
     8252 +        nsrv4 = nfs4_get_srv();
8112 8253          *cs->statusp = res->status = NFS4_OK;
8113 8254  
8114 8255          cp = rfs4_findclient_by_id(args->clientid, TRUE);
8115 8256  
8116 8257          if (cp == NULL) {
8117 8258                  *cs->statusp = res->status =
8118 8259                      rfs4_check_clientid(&args->clientid, 1);
8119 8260                  goto out;
8120 8261          }
8121 8262  
8122 8263          if (!creds_ok(cp, req, cs)) {
8123 8264                  *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8124 8265                  rfs4_client_rele(cp);
8125 8266                  goto out;
8126 8267          }
8127 8268  
8128 8269          /* If the verifier doesn't match, the record doesn't match */
8129 8270          if (cp->rc_confirm_verf != args->setclientid_confirm) {
8130 8271                  *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8131 8272                  rfs4_client_rele(cp);
8132 8273                  goto out;
8133 8274          }
8134 8275  
8135 8276          rfs4_dbe_lock(cp->rc_dbe);
8136 8277          cp->rc_need_confirm = FALSE;
  
    | 
      ↓ open down ↓ | 
    15 lines elided | 
    
      ↑ open up ↑ | 
  
8137 8278          if (cp->rc_cp_confirmed) {
8138 8279                  cptoclose = cp->rc_cp_confirmed;
8139 8280                  cptoclose->rc_ss_remove = 1;
8140 8281                  cp->rc_cp_confirmed = NULL;
8141 8282          }
8142 8283  
8143 8284          /*
8144 8285           * Update the client's associated server instance, if it's changed
8145 8286           * since the client was created.
8146 8287           */
8147      -        if (rfs4_servinst(cp) != rfs4_cur_servinst)
8148      -                rfs4_servinst_assign(cp, rfs4_cur_servinst);
     8288 +        if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
     8289 +                rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8149 8290  
8150 8291          /*
8151 8292           * Record clientid in stable storage.
8152 8293           * Must be done after server instance has been assigned.
8153 8294           */
8154      -        rfs4_ss_clid(cp);
     8295 +        rfs4_ss_clid(nsrv4, cp);
8155 8296  
8156 8297          rfs4_dbe_unlock(cp->rc_dbe);
8157 8298  
8158 8299          if (cptoclose)
8159 8300                  /* don't need to rele, client_close does it */
8160 8301                  rfs4_client_close(cptoclose);
8161 8302  
8162 8303          /* If needed, initiate CB_NULL call for callback path */
8163 8304          rfs4_deleg_cb_check(cp);
8164 8305          rfs4_update_lease(cp);
8165 8306  
8166 8307          /*
8167 8308           * Check to see if client can perform reclaims
8168 8309           */
8169      -        rfs4_ss_chkclid(cp);
     8310 +        rfs4_ss_chkclid(nsrv4, cp);
8170 8311  
8171 8312          rfs4_client_rele(cp);
8172 8313  
8173 8314  out:
8174 8315          DTRACE_NFSV4_2(op__setclientid__confirm__done,
8175 8316              struct compound_state *, cs,
8176 8317              SETCLIENTID_CONFIRM4 *, res);
8177 8318  }
8178 8319  
8179 8320  
8180 8321  /*ARGSUSED*/
8181 8322  void
8182 8323  rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8183 8324      struct svc_req *req, struct compound_state *cs)
8184 8325  {
8185 8326          CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8186 8327          CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8187 8328          rfs4_state_t *sp;
8188 8329          nfsstat4 status;
8189 8330  
8190 8331          DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8191 8332              CLOSE4args *, args);
8192 8333  
8193 8334          if (cs->vp == NULL) {
8194 8335                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8195 8336                  goto out;
8196 8337          }
8197 8338  
8198 8339          status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8199 8340          if (status != NFS4_OK) {
8200 8341                  *cs->statusp = resp->status = status;
8201 8342                  goto out;
8202 8343          }
8203 8344  
8204 8345          /* Ensure specified filehandle matches */
8205 8346          if (cs->vp != sp->rs_finfo->rf_vp) {
8206 8347                  rfs4_state_rele(sp);
8207 8348                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8208 8349                  goto out;
8209 8350          }
8210 8351  
8211 8352          /* hold off other access to open_owner while we tinker */
8212 8353          rfs4_sw_enter(&sp->rs_owner->ro_sw);
8213 8354  
8214 8355          switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8215 8356          case NFS4_CHECK_STATEID_OKAY:
8216 8357                  if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8217 8358                      resop) != NFS4_CHKSEQ_OKAY) {
8218 8359                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8219 8360                          goto end;
8220 8361                  }
8221 8362                  break;
8222 8363          case NFS4_CHECK_STATEID_OLD:
8223 8364                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8224 8365                  goto end;
8225 8366          case NFS4_CHECK_STATEID_BAD:
8226 8367                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8227 8368                  goto end;
8228 8369          case NFS4_CHECK_STATEID_EXPIRED:
8229 8370                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8230 8371                  goto end;
8231 8372          case NFS4_CHECK_STATEID_CLOSED:
8232 8373                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8233 8374                  goto end;
8234 8375          case NFS4_CHECK_STATEID_UNCONFIRMED:
8235 8376                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8236 8377                  goto end;
8237 8378          case NFS4_CHECK_STATEID_REPLAY:
8238 8379                  /* Check the sequence id for the open owner */
8239 8380                  switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8240 8381                      resop)) {
8241 8382                  case NFS4_CHKSEQ_OKAY:
8242 8383                          /*
8243 8384                           * This is replayed stateid; if seqid matches
8244 8385                           * next expected, then client is using wrong seqid.
8245 8386                           */
8246 8387                          /* FALL THROUGH */
8247 8388                  case NFS4_CHKSEQ_BAD:
8248 8389                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8249 8390                          goto end;
8250 8391                  case NFS4_CHKSEQ_REPLAY:
8251 8392                          /*
8252 8393                           * Note this case is the duplicate case so
8253 8394                           * resp->status is already set.
8254 8395                           */
8255 8396                          *cs->statusp = resp->status;
8256 8397                          rfs4_update_lease(sp->rs_owner->ro_client);
8257 8398                          goto end;
8258 8399                  }
8259 8400                  break;
8260 8401          default:
8261 8402                  ASSERT(FALSE);
8262 8403                  break;
8263 8404          }
8264 8405  
8265 8406          rfs4_dbe_lock(sp->rs_dbe);
8266 8407  
8267 8408          /* Update the stateid. */
8268 8409          next_stateid(&sp->rs_stateid);
8269 8410          resp->open_stateid = sp->rs_stateid.stateid;
8270 8411  
8271 8412          rfs4_dbe_unlock(sp->rs_dbe);
8272 8413  
8273 8414          rfs4_update_lease(sp->rs_owner->ro_client);
8274 8415          rfs4_update_open_sequence(sp->rs_owner);
8275 8416          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8276 8417  
8277 8418          rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8278 8419  
8279 8420          *cs->statusp = resp->status = status;
8280 8421  
8281 8422  end:
8282 8423          rfs4_sw_exit(&sp->rs_owner->ro_sw);
8283 8424          rfs4_state_rele(sp);
8284 8425  out:
8285 8426          DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8286 8427              CLOSE4res *, resp);
8287 8428  }
8288 8429  
8289 8430  /*
8290 8431   * Manage the counts on the file struct and close all file locks
8291 8432   */
8292 8433  /*ARGSUSED*/
8293 8434  void
8294 8435  rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8295 8436      bool_t close_of_client)
8296 8437  {
8297 8438          rfs4_file_t *fp = sp->rs_finfo;
8298 8439          rfs4_lo_state_t *lsp;
8299 8440          int fflags = 0;
8300 8441  
8301 8442          /*
8302 8443           * If this call is part of the larger closing down of client
8303 8444           * state then it is just easier to release all locks
8304 8445           * associated with this client instead of going through each
8305 8446           * individual file and cleaning locks there.
8306 8447           */
8307 8448          if (close_of_client) {
8308 8449                  if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8309 8450                      !list_is_empty(&sp->rs_lostatelist) &&
8310 8451                      sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8311 8452                          /* Is the PxFS kernel module loaded? */
8312 8453                          if (lm_remove_file_locks != NULL) {
8313 8454                                  int new_sysid;
8314 8455  
8315 8456                                  /* Encode the cluster nodeid in new sysid */
8316 8457                                  new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8317 8458                                  lm_set_nlmid_flk(&new_sysid);
8318 8459  
8319 8460                                  /*
8320 8461                                   * This PxFS routine removes file locks for a
8321 8462                                   * client over all nodes of a cluster.
8322 8463                                   */
8323 8464                                  NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8324 8465                                      "lm_remove_file_locks(sysid=0x%x)\n",
8325 8466                                      new_sysid));
8326 8467                                  (*lm_remove_file_locks)(new_sysid);
8327 8468                          } else {
8328 8469                                  struct flock64 flk;
8329 8470  
8330 8471                                  /* Release all locks for this client */
8331 8472                                  flk.l_type = F_UNLKSYS;
8332 8473                                  flk.l_whence = 0;
8333 8474                                  flk.l_start = 0;
8334 8475                                  flk.l_len = 0;
8335 8476                                  flk.l_sysid =
8336 8477                                      sp->rs_owner->ro_client->rc_sysidt;
8337 8478                                  flk.l_pid = 0;
8338 8479                                  (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8339 8480                                      &flk, F_REMOTELOCK | FREAD | FWRITE,
8340 8481                                      (u_offset_t)0, NULL, CRED(), NULL);
8341 8482                          }
8342 8483  
8343 8484                          sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8344 8485                  }
8345 8486          }
8346 8487  
8347 8488          /*
8348 8489           * Release all locks on this file by this lock owner or at
8349 8490           * least mark the locks as having been released
8350 8491           */
8351 8492          for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8352 8493              lsp = list_next(&sp->rs_lostatelist, lsp)) {
8353 8494                  lsp->rls_locks_cleaned = TRUE;
8354 8495  
8355 8496                  /* Was this already taken care of above? */
8356 8497                  if (!close_of_client &&
8357 8498                      sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8358 8499                          (void) cleanlocks(sp->rs_finfo->rf_vp,
8359 8500                              lsp->rls_locker->rl_pid,
8360 8501                              lsp->rls_locker->rl_client->rc_sysidt);
8361 8502          }
8362 8503  
8363 8504          /*
8364 8505           * Release any shrlocks associated with this open state ID.
8365 8506           * This must be done before the rfs4_state gets marked closed.
8366 8507           */
8367 8508          if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8368 8509                  (void) rfs4_unshare(sp);
8369 8510  
8370 8511          if (sp->rs_open_access) {
8371 8512                  rfs4_dbe_lock(fp->rf_dbe);
8372 8513  
8373 8514                  /*
8374 8515                   * Decrement the count for each access and deny bit that this
8375 8516                   * state has contributed to the file.
8376 8517                   * If the file counts go to zero
8377 8518                   * clear the appropriate bit in the appropriate mask.
8378 8519                   */
8379 8520                  if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8380 8521                          fp->rf_access_read--;
8381 8522                          fflags |= FREAD;
8382 8523                          if (fp->rf_access_read == 0)
8383 8524                                  fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8384 8525                  }
8385 8526                  if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8386 8527                          fp->rf_access_write--;
8387 8528                          fflags |= FWRITE;
8388 8529                          if (fp->rf_access_write == 0)
8389 8530                                  fp->rf_share_access &=
8390 8531                                      ~OPEN4_SHARE_ACCESS_WRITE;
8391 8532                  }
8392 8533                  if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8393 8534                          fp->rf_deny_read--;
8394 8535                          if (fp->rf_deny_read == 0)
8395 8536                                  fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8396 8537                  }
8397 8538                  if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8398 8539                          fp->rf_deny_write--;
8399 8540                          if (fp->rf_deny_write == 0)
8400 8541                                  fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8401 8542                  }
8402 8543  
8403 8544                  (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8404 8545  
8405 8546                  rfs4_dbe_unlock(fp->rf_dbe);
8406 8547  
8407 8548                  sp->rs_open_access = 0;
8408 8549                  sp->rs_open_deny = 0;
8409 8550          }
8410 8551  }
8411 8552  
8412 8553  /*
8413 8554   * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8414 8555   */
8415 8556  static nfsstat4
8416 8557  lock_denied(LOCK4denied *dp, struct flock64 *flk)
8417 8558  {
8418 8559          rfs4_lockowner_t *lo;
8419 8560          rfs4_client_t *cp;
8420 8561          uint32_t len;
8421 8562  
8422 8563          lo = rfs4_findlockowner_by_pid(flk->l_pid);
8423 8564          if (lo != NULL) {
8424 8565                  cp = lo->rl_client;
8425 8566                  if (rfs4_lease_expired(cp)) {
8426 8567                          rfs4_lockowner_rele(lo);
8427 8568                          rfs4_dbe_hold(cp->rc_dbe);
8428 8569                          rfs4_client_close(cp);
8429 8570                          return (NFS4ERR_EXPIRED);
8430 8571                  }
8431 8572                  dp->owner.clientid = lo->rl_owner.clientid;
8432 8573                  len = lo->rl_owner.owner_len;
8433 8574                  dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8434 8575                  bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8435 8576                  dp->owner.owner_len = len;
8436 8577                  rfs4_lockowner_rele(lo);
8437 8578                  goto finish;
8438 8579          }
8439 8580  
8440 8581          /*
8441 8582           * Its not a NFS4 lock. We take advantage that the upper 32 bits
8442 8583           * of the client id contain the boot time for a NFS4 lock. So we
8443 8584           * fabricate and identity by setting clientid to the sysid, and
8444 8585           * the lock owner to the pid.
8445 8586           */
8446 8587          dp->owner.clientid = flk->l_sysid;
8447 8588          len = sizeof (pid_t);
8448 8589          dp->owner.owner_len = len;
8449 8590          dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8450 8591          bcopy(&flk->l_pid, dp->owner.owner_val, len);
8451 8592  finish:
8452 8593          dp->offset = flk->l_start;
8453 8594          dp->length = flk->l_len;
8454 8595  
8455 8596          if (flk->l_type == F_RDLCK)
8456 8597                  dp->locktype = READ_LT;
8457 8598          else if (flk->l_type == F_WRLCK)
8458 8599                  dp->locktype = WRITE_LT;
8459 8600          else
8460 8601                  return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8461 8602  
8462 8603          return (NFS4_OK);
8463 8604  }
8464 8605  
8465 8606  /*
8466 8607   * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8467 8608   * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8468 8609   * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8469 8610   * for that (obviously); they are sending the LOCK requests with some delays
8470 8611   * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8471 8612   * locking and delay implementation at the client side.
8472 8613   *
8473 8614   * To make the life of the clients easier, the NFSv4.0 server tries to do some
8474 8615   * fast retries on its own (the for loop below) in a hope the lock will be
8475 8616   * available soon.  And if not, the client won't need to resend the LOCK
8476 8617   * requests so fast to check the lock availability.  This basically saves some
8477 8618   * network traffic and tries to make sure the client gets the lock ASAP.
8478 8619   */
8479 8620  static int
8480 8621  setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8481 8622  {
8482 8623          int error;
8483 8624          struct flock64 flk;
8484 8625          int i;
8485 8626          clock_t delaytime;
8486 8627          int cmd;
8487 8628          int spin_cnt = 0;
8488 8629  
8489 8630          cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8490 8631  retry:
8491 8632          delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8492 8633  
8493 8634          for (i = 0; i < rfs4_maxlock_tries; i++) {
8494 8635                  LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8495 8636                  error = VOP_FRLOCK(vp, cmd,
8496 8637                      flock, flag, (u_offset_t)0, NULL, cred, NULL);
8497 8638  
8498 8639                  if (error != EAGAIN && error != EACCES)
8499 8640                          break;
8500 8641  
8501 8642                  if (i < rfs4_maxlock_tries - 1) {
8502 8643                          delay(delaytime);
8503 8644                          delaytime *= 2;
8504 8645                  }
8505 8646          }
8506 8647  
8507 8648          if (error == EAGAIN || error == EACCES) {
8508 8649                  /* Get the owner of the lock */
8509 8650                  flk = *flock;
8510 8651                  LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8511 8652                  if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8512 8653                      NULL) == 0) {
8513 8654                          /*
8514 8655                           * There's a race inherent in the current VOP_FRLOCK
8515 8656                           * design where:
8516 8657                           * a: "other guy" takes a lock that conflicts with a
8517 8658                           * lock we want
8518 8659                           * b: we attempt to take our lock (non-blocking) and
8519 8660                           * the attempt fails.
8520 8661                           * c: "other guy" releases the conflicting lock
8521 8662                           * d: we ask what lock conflicts with the lock we want,
8522 8663                           * getting F_UNLCK (no lock blocks us)
8523 8664                           *
8524 8665                           * If we retry the non-blocking lock attempt in this
8525 8666                           * case (restart at step 'b') there's some possibility
8526 8667                           * that many such attempts might fail.  However a test
8527 8668                           * designed to actually provoke this race shows that
8528 8669                           * the vast majority of cases require no retry, and
8529 8670                           * only a few took as many as three retries.  Here's
8530 8671                           * the test outcome:
8531 8672                           *
8532 8673                           *         number of retries    how many times we needed
8533 8674                           *                              that many retries
8534 8675                           *         0                    79461
8535 8676                           *         1                      862
8536 8677                           *         2                       49
8537 8678                           *         3                        5
8538 8679                           *
8539 8680                           * Given those empirical results, we arbitrarily limit
8540 8681                           * the retry count to ten.
8541 8682                           *
8542 8683                           * If we actually make to ten retries and give up,
8543 8684                           * nothing catastrophic happens, but we're unable to
8544 8685                           * return the information about the conflicting lock to
8545 8686                           * the NFS client.  That's an acceptable trade off vs.
8546 8687                           * letting this retry loop run forever.
8547 8688                           */
8548 8689                          if (flk.l_type == F_UNLCK) {
8549 8690                                  if (spin_cnt++ < 10) {
8550 8691                                          /* No longer locked, retry */
8551 8692                                          goto retry;
8552 8693                                  }
8553 8694                          } else {
8554 8695                                  *flock = flk;
8555 8696                                  LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8556 8697                                      F_GETLK, &flk);
8557 8698                          }
8558 8699                  }
8559 8700          }
8560 8701  
8561 8702          return (error);
8562 8703  }
8563 8704  
8564 8705  /*ARGSUSED*/
8565 8706  static nfsstat4
8566 8707  rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8567 8708      offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8568 8709  {
8569 8710          nfsstat4 status;
8570 8711          rfs4_lockowner_t *lo = lsp->rls_locker;
8571 8712          rfs4_state_t *sp = lsp->rls_state;
8572 8713          struct flock64 flock;
8573 8714          int16_t ltype;
8574 8715          int flag;
8575 8716          int error;
8576 8717          sysid_t sysid;
8577 8718          LOCK4res *lres;
8578 8719          vnode_t *vp;
8579 8720  
8580 8721          if (rfs4_lease_expired(lo->rl_client)) {
8581 8722                  return (NFS4ERR_EXPIRED);
8582 8723          }
8583 8724  
8584 8725          if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8585 8726                  return (status);
8586 8727  
8587 8728          /* Check for zero length. To lock to end of file use all ones for V4 */
8588 8729          if (length == 0)
8589 8730                  return (NFS4ERR_INVAL);
8590 8731          else if (length == (length4)(~0))
8591 8732                  length = 0;             /* Posix to end of file  */
8592 8733  
8593 8734  retry:
8594 8735          rfs4_dbe_lock(sp->rs_dbe);
8595 8736          if (sp->rs_closed == TRUE) {
8596 8737                  rfs4_dbe_unlock(sp->rs_dbe);
8597 8738                  return (NFS4ERR_OLD_STATEID);
8598 8739          }
8599 8740  
8600 8741          if (resop->resop != OP_LOCKU) {
8601 8742                  switch (locktype) {
8602 8743                  case READ_LT:
8603 8744                  case READW_LT:
8604 8745                          if ((sp->rs_share_access
8605 8746                              & OPEN4_SHARE_ACCESS_READ) == 0) {
8606 8747                                  rfs4_dbe_unlock(sp->rs_dbe);
8607 8748  
8608 8749                                  return (NFS4ERR_OPENMODE);
8609 8750                          }
8610 8751                          ltype = F_RDLCK;
8611 8752                          break;
8612 8753                  case WRITE_LT:
8613 8754                  case WRITEW_LT:
8614 8755                          if ((sp->rs_share_access
8615 8756                              & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8616 8757                                  rfs4_dbe_unlock(sp->rs_dbe);
8617 8758  
8618 8759                                  return (NFS4ERR_OPENMODE);
8619 8760                          }
8620 8761                          ltype = F_WRLCK;
8621 8762                          break;
8622 8763                  }
8623 8764          } else
8624 8765                  ltype = F_UNLCK;
8625 8766  
8626 8767          flock.l_type = ltype;
8627 8768          flock.l_whence = 0;             /* SEEK_SET */
8628 8769          flock.l_start = offset;
8629 8770          flock.l_len = length;
8630 8771          flock.l_sysid = sysid;
8631 8772          flock.l_pid = lsp->rls_locker->rl_pid;
8632 8773  
8633 8774          /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8634 8775          if (flock.l_len < 0 || flock.l_start < 0) {
8635 8776                  rfs4_dbe_unlock(sp->rs_dbe);
8636 8777                  return (NFS4ERR_INVAL);
8637 8778          }
8638 8779  
8639 8780          /*
8640 8781           * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8641 8782           * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8642 8783           */
8643 8784          flag = (int)sp->rs_share_access | F_REMOTELOCK;
8644 8785  
8645 8786          vp = sp->rs_finfo->rf_vp;
8646 8787          VN_HOLD(vp);
8647 8788  
8648 8789          /*
8649 8790           * We need to unlock sp before we call the underlying filesystem to
8650 8791           * acquire the file lock.
8651 8792           */
8652 8793          rfs4_dbe_unlock(sp->rs_dbe);
8653 8794  
8654 8795          error = setlock(vp, &flock, flag, cred);
8655 8796  
8656 8797          /*
8657 8798           * Make sure the file is still open.  In a case the file was closed in
8658 8799           * the meantime, clean the lock we acquired using the setlock() call
8659 8800           * above, and return the appropriate error.
8660 8801           */
8661 8802          rfs4_dbe_lock(sp->rs_dbe);
8662 8803          if (sp->rs_closed == TRUE) {
8663 8804                  cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8664 8805                  rfs4_dbe_unlock(sp->rs_dbe);
8665 8806  
8666 8807                  VN_RELE(vp);
8667 8808  
8668 8809                  return (NFS4ERR_OLD_STATEID);
8669 8810          }
8670 8811          rfs4_dbe_unlock(sp->rs_dbe);
8671 8812  
8672 8813          VN_RELE(vp);
8673 8814  
8674 8815          if (error == 0) {
8675 8816                  rfs4_dbe_lock(lsp->rls_dbe);
8676 8817                  next_stateid(&lsp->rls_lockid);
8677 8818                  rfs4_dbe_unlock(lsp->rls_dbe);
8678 8819          }
8679 8820  
8680 8821          /*
8681 8822           * N.B. We map error values to nfsv4 errors. This is differrent
8682 8823           * than puterrno4 routine.
8683 8824           */
8684 8825          switch (error) {
8685 8826          case 0:
8686 8827                  status = NFS4_OK;
8687 8828                  break;
8688 8829          case EAGAIN:
8689 8830          case EACCES:            /* Old value */
8690 8831                  /* Can only get here if op is OP_LOCK */
8691 8832                  ASSERT(resop->resop == OP_LOCK);
8692 8833                  lres = &resop->nfs_resop4_u.oplock;
8693 8834                  status = NFS4ERR_DENIED;
8694 8835                  if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8695 8836                      == NFS4ERR_EXPIRED)
8696 8837                          goto retry;
8697 8838                  break;
8698 8839          case ENOLCK:
8699 8840                  status = NFS4ERR_DELAY;
8700 8841                  break;
8701 8842          case EOVERFLOW:
8702 8843                  status = NFS4ERR_INVAL;
8703 8844                  break;
8704 8845          case EINVAL:
8705 8846                  status = NFS4ERR_NOTSUPP;
8706 8847                  break;
8707 8848          default:
8708 8849                  status = NFS4ERR_SERVERFAULT;
8709 8850                  break;
8710 8851          }
8711 8852  
8712 8853          return (status);
8713 8854  }
8714 8855  
8715 8856  /*ARGSUSED*/
8716 8857  void
8717 8858  rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8718 8859      struct svc_req *req, struct compound_state *cs)
8719 8860  {
8720 8861          LOCK4args *args = &argop->nfs_argop4_u.oplock;
8721 8862          LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8722 8863          nfsstat4 status;
8723 8864          stateid4 *stateid;
8724 8865          rfs4_lockowner_t *lo;
8725 8866          rfs4_client_t *cp;
8726 8867          rfs4_state_t *sp = NULL;
8727 8868          rfs4_lo_state_t *lsp = NULL;
8728 8869          bool_t ls_sw_held = FALSE;
8729 8870          bool_t create = TRUE;
8730 8871          bool_t lcreate = TRUE;
8731 8872          bool_t dup_lock = FALSE;
8732 8873          int rc;
8733 8874  
8734 8875          DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8735 8876              LOCK4args *, args);
8736 8877  
8737 8878          if (cs->vp == NULL) {
8738 8879                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8739 8880                  DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8740 8881                      cs, LOCK4res *, resp);
8741 8882                  return;
8742 8883          }
8743 8884  
8744 8885          if (args->locker.new_lock_owner) {
8745 8886                  /* Create a new lockowner for this instance */
8746 8887                  open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8747 8888  
8748 8889                  NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8749 8890  
8750 8891                  stateid = &olo->open_stateid;
8751 8892                  status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8752 8893                  if (status != NFS4_OK) {
8753 8894                          NFS4_DEBUG(rfs4_debug,
8754 8895                              (CE_NOTE, "Get state failed in lock %d", status));
8755 8896                          *cs->statusp = resp->status = status;
8756 8897                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8757 8898                              cs, LOCK4res *, resp);
8758 8899                          return;
8759 8900                  }
8760 8901  
8761 8902                  /* Ensure specified filehandle matches */
8762 8903                  if (cs->vp != sp->rs_finfo->rf_vp) {
8763 8904                          rfs4_state_rele(sp);
8764 8905                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8765 8906                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8766 8907                              cs, LOCK4res *, resp);
8767 8908                          return;
8768 8909                  }
8769 8910  
8770 8911                  /* hold off other access to open_owner while we tinker */
8771 8912                  rfs4_sw_enter(&sp->rs_owner->ro_sw);
8772 8913  
8773 8914                  switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8774 8915                  case NFS4_CHECK_STATEID_OLD:
8775 8916                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8776 8917                          goto end;
8777 8918                  case NFS4_CHECK_STATEID_BAD:
8778 8919                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8779 8920                          goto end;
8780 8921                  case NFS4_CHECK_STATEID_EXPIRED:
8781 8922                          *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8782 8923                          goto end;
8783 8924                  case NFS4_CHECK_STATEID_UNCONFIRMED:
8784 8925                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8785 8926                          goto end;
8786 8927                  case NFS4_CHECK_STATEID_CLOSED:
8787 8928                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8788 8929                          goto end;
8789 8930                  case NFS4_CHECK_STATEID_OKAY:
8790 8931                  case NFS4_CHECK_STATEID_REPLAY:
8791 8932                          switch (rfs4_check_olo_seqid(olo->open_seqid,
8792 8933                              sp->rs_owner, resop)) {
8793 8934                          case NFS4_CHKSEQ_OKAY:
8794 8935                                  if (rc == NFS4_CHECK_STATEID_OKAY)
8795 8936                                          break;
8796 8937                                  /*
8797 8938                                   * This is replayed stateid; if seqid
8798 8939                                   * matches next expected, then client
8799 8940                                   * is using wrong seqid.
8800 8941                                   */
8801 8942                                  /* FALLTHROUGH */
8802 8943                          case NFS4_CHKSEQ_BAD:
8803 8944                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8804 8945                                  goto end;
8805 8946                          case NFS4_CHKSEQ_REPLAY:
8806 8947                                  /* This is a duplicate LOCK request */
8807 8948                                  dup_lock = TRUE;
8808 8949  
8809 8950                                  /*
8810 8951                                   * For a duplicate we do not want to
8811 8952                                   * create a new lockowner as it should
8812 8953                                   * already exist.
8813 8954                                   * Turn off the lockowner create flag.
8814 8955                                   */
8815 8956                                  lcreate = FALSE;
8816 8957                          }
8817 8958                          break;
8818 8959                  }
8819 8960  
8820 8961                  lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8821 8962                  if (lo == NULL) {
8822 8963                          NFS4_DEBUG(rfs4_debug,
8823 8964                              (CE_NOTE, "rfs4_op_lock: no lock owner"));
8824 8965                          *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8825 8966                          goto end;
8826 8967                  }
8827 8968  
8828 8969                  lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8829 8970                  if (lsp == NULL) {
8830 8971                          rfs4_update_lease(sp->rs_owner->ro_client);
8831 8972                          /*
8832 8973                           * Only update theh open_seqid if this is not
8833 8974                           * a duplicate request
8834 8975                           */
8835 8976                          if (dup_lock == FALSE) {
8836 8977                                  rfs4_update_open_sequence(sp->rs_owner);
8837 8978                          }
8838 8979  
8839 8980                          NFS4_DEBUG(rfs4_debug,
8840 8981                              (CE_NOTE, "rfs4_op_lock: no state"));
8841 8982                          *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8842 8983                          rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8843 8984                          rfs4_lockowner_rele(lo);
8844 8985                          goto end;
8845 8986                  }
8846 8987  
8847 8988                  /*
8848 8989                   * This is the new_lock_owner branch and the client is
8849 8990                   * supposed to be associating a new lock_owner with
8850 8991                   * the open file at this point.  If we find that a
8851 8992                   * lock_owner/state association already exists and a
8852 8993                   * successful LOCK request was returned to the client,
8853 8994                   * an error is returned to the client since this is
8854 8995                   * not appropriate.  The client should be using the
8855 8996                   * existing lock_owner branch.
8856 8997                   */
8857 8998                  if (dup_lock == FALSE && create == FALSE) {
8858 8999                          if (lsp->rls_lock_completed == TRUE) {
8859 9000                                  *cs->statusp =
8860 9001                                      resp->status = NFS4ERR_BAD_SEQID;
8861 9002                                  rfs4_lockowner_rele(lo);
8862 9003                                  goto end;
8863 9004                          }
8864 9005                  }
8865 9006  
8866 9007                  rfs4_update_lease(sp->rs_owner->ro_client);
8867 9008  
8868 9009                  /*
8869 9010                   * Only update theh open_seqid if this is not
8870 9011                   * a duplicate request
8871 9012                   */
8872 9013                  if (dup_lock == FALSE) {
8873 9014                          rfs4_update_open_sequence(sp->rs_owner);
8874 9015                  }
8875 9016  
8876 9017                  /*
8877 9018                   * If this is a duplicate lock request, just copy the
8878 9019                   * previously saved reply and return.
8879 9020                   */
8880 9021                  if (dup_lock == TRUE) {
8881 9022                          /* verify that lock_seqid's match */
8882 9023                          if (lsp->rls_seqid != olo->lock_seqid) {
8883 9024                                  NFS4_DEBUG(rfs4_debug,
8884 9025                                      (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8885 9026                                      "lsp->seqid=%d old->seqid=%d",
8886 9027                                      lsp->rls_seqid, olo->lock_seqid));
8887 9028                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8888 9029                          } else {
8889 9030                                  rfs4_copy_reply(resop, &lsp->rls_reply);
8890 9031                                  /*
8891 9032                                   * Make sure to copy the just
8892 9033                                   * retrieved reply status into the
8893 9034                                   * overall compound status
8894 9035                                   */
8895 9036                                  *cs->statusp = resp->status;
8896 9037                          }
8897 9038                          rfs4_lockowner_rele(lo);
8898 9039                          goto end;
8899 9040                  }
8900 9041  
8901 9042                  rfs4_dbe_lock(lsp->rls_dbe);
8902 9043  
8903 9044                  /* Make sure to update the lock sequence id */
8904 9045                  lsp->rls_seqid = olo->lock_seqid;
8905 9046  
8906 9047                  NFS4_DEBUG(rfs4_debug,
8907 9048                      (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8908 9049  
8909 9050                  /*
8910 9051                   * This is used to signify the newly created lockowner
8911 9052                   * stateid and its sequence number.  The checks for
8912 9053                   * sequence number and increment don't occur on the
8913 9054                   * very first lock request for a lockowner.
8914 9055                   */
8915 9056                  lsp->rls_skip_seqid_check = TRUE;
8916 9057  
8917 9058                  /* hold off other access to lsp while we tinker */
8918 9059                  rfs4_sw_enter(&lsp->rls_sw);
8919 9060                  ls_sw_held = TRUE;
8920 9061  
8921 9062                  rfs4_dbe_unlock(lsp->rls_dbe);
8922 9063  
8923 9064                  rfs4_lockowner_rele(lo);
8924 9065          } else {
8925 9066                  stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
8926 9067                  /* get lsp and hold the lock on the underlying file struct */
8927 9068                  if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
8928 9069                      != NFS4_OK) {
8929 9070                          *cs->statusp = resp->status = status;
8930 9071                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8931 9072                              cs, LOCK4res *, resp);
8932 9073                          return;
8933 9074                  }
8934 9075                  create = FALSE; /* We didn't create lsp */
8935 9076  
8936 9077                  /* Ensure specified filehandle matches */
8937 9078                  if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
8938 9079                          rfs4_lo_state_rele(lsp, TRUE);
8939 9080                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8940 9081                          DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8941 9082                              cs, LOCK4res *, resp);
8942 9083                          return;
8943 9084                  }
8944 9085  
8945 9086                  /* hold off other access to lsp while we tinker */
8946 9087                  rfs4_sw_enter(&lsp->rls_sw);
8947 9088                  ls_sw_held = TRUE;
8948 9089  
8949 9090                  switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8950 9091                  /*
8951 9092                   * The stateid looks like it was okay (expected to be
8952 9093                   * the next one)
8953 9094                   */
8954 9095                  case NFS4_CHECK_STATEID_OKAY:
8955 9096                          /*
8956 9097                           * The sequence id is now checked.  Determine
8957 9098                           * if this is a replay or if it is in the
8958 9099                           * expected (next) sequence.  In the case of a
8959 9100                           * replay, there are two replay conditions
8960 9101                           * that may occur.  The first is the normal
8961 9102                           * condition where a LOCK is done with a
8962 9103                           * NFS4_OK response and the stateid is
8963 9104                           * updated.  That case is handled below when
8964 9105                           * the stateid is identified as a REPLAY.  The
8965 9106                           * second is the case where an error is
8966 9107                           * returned, like NFS4ERR_DENIED, and the
8967 9108                           * sequence number is updated but the stateid
8968 9109                           * is not updated.  This second case is dealt
8969 9110                           * with here.  So it may seem odd that the
8970 9111                           * stateid is okay but the sequence id is a
8971 9112                           * replay but it is okay.
8972 9113                           */
8973 9114                          switch (rfs4_check_lock_seqid(
8974 9115                              args->locker.locker4_u.lock_owner.lock_seqid,
8975 9116                              lsp, resop)) {
8976 9117                          case NFS4_CHKSEQ_REPLAY:
8977 9118                                  if (resp->status != NFS4_OK) {
8978 9119                                          /*
8979 9120                                           * Here is our replay and need
8980 9121                                           * to verify that the last
8981 9122                                           * response was an error.
8982 9123                                           */
8983 9124                                          *cs->statusp = resp->status;
8984 9125                                          goto end;
8985 9126                                  }
8986 9127                                  /*
8987 9128                                   * This is done since the sequence id
8988 9129                                   * looked like a replay but it didn't
8989 9130                                   * pass our check so a BAD_SEQID is
8990 9131                                   * returned as a result.
8991 9132                                   */
8992 9133                                  /*FALLTHROUGH*/
8993 9134                          case NFS4_CHKSEQ_BAD:
8994 9135                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8995 9136                                  goto end;
8996 9137                          case NFS4_CHKSEQ_OKAY:
8997 9138                                  /* Everything looks okay move ahead */
8998 9139                                  break;
8999 9140                          }
9000 9141                          break;
9001 9142                  case NFS4_CHECK_STATEID_OLD:
9002 9143                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9003 9144                          goto end;
9004 9145                  case NFS4_CHECK_STATEID_BAD:
9005 9146                          *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9006 9147                          goto end;
9007 9148                  case NFS4_CHECK_STATEID_EXPIRED:
9008 9149                          *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9009 9150                          goto end;
9010 9151                  case NFS4_CHECK_STATEID_CLOSED:
9011 9152                          *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9012 9153                          goto end;
9013 9154                  case NFS4_CHECK_STATEID_REPLAY:
9014 9155                          switch (rfs4_check_lock_seqid(
9015 9156                              args->locker.locker4_u.lock_owner.lock_seqid,
9016 9157                              lsp, resop)) {
9017 9158                          case NFS4_CHKSEQ_OKAY:
9018 9159                                  /*
9019 9160                                   * This is a replayed stateid; if
9020 9161                                   * seqid matches the next expected,
9021 9162                                   * then client is using wrong seqid.
9022 9163                                   */
9023 9164                          case NFS4_CHKSEQ_BAD:
9024 9165                                  *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9025 9166                                  goto end;
9026 9167                          case NFS4_CHKSEQ_REPLAY:
9027 9168                                  rfs4_update_lease(lsp->rls_locker->rl_client);
9028 9169                                  *cs->statusp = status = resp->status;
9029 9170                                  goto end;
9030 9171                          }
9031 9172                          break;
9032 9173                  default:
9033 9174                          ASSERT(FALSE);
9034 9175                          break;
9035 9176                  }
9036 9177  
9037 9178                  rfs4_update_lock_sequence(lsp);
9038 9179                  rfs4_update_lease(lsp->rls_locker->rl_client);
9039 9180          }
9040 9181  
9041 9182          /*
9042 9183           * NFS4 only allows locking on regular files, so
9043 9184           * verify type of object.
9044 9185           */
9045 9186          if (cs->vp->v_type != VREG) {
9046 9187                  if (cs->vp->v_type == VDIR)
9047 9188                          status = NFS4ERR_ISDIR;
9048 9189                  else
9049 9190                          status = NFS4ERR_INVAL;
9050 9191                  goto out;
9051 9192          }
9052 9193  
9053 9194          cp = lsp->rls_state->rs_owner->ro_client;
9054 9195  
9055 9196          if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9056 9197                  status = NFS4ERR_GRACE;
9057 9198                  goto out;
9058 9199          }
9059 9200  
9060 9201          if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9061 9202                  status = NFS4ERR_NO_GRACE;
9062 9203                  goto out;
9063 9204          }
9064 9205  
9065 9206          if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9066 9207                  status = NFS4ERR_NO_GRACE;
9067 9208                  goto out;
9068 9209          }
9069 9210  
9070 9211          if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9071 9212                  cs->deleg = TRUE;
9072 9213  
9073 9214          status = rfs4_do_lock(lsp, args->locktype,
9074 9215              args->offset, args->length, cs->cr, resop);
9075 9216  
9076 9217  out:
9077 9218          lsp->rls_skip_seqid_check = FALSE;
9078 9219  
9079 9220          *cs->statusp = resp->status = status;
9080 9221  
9081 9222          if (status == NFS4_OK) {
9082 9223                  resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9083 9224                  lsp->rls_lock_completed = TRUE;
9084 9225          }
9085 9226          /*
9086 9227           * Only update the "OPEN" response here if this was a new
9087 9228           * lock_owner
9088 9229           */
9089 9230          if (sp)
9090 9231                  rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9091 9232  
9092 9233          rfs4_update_lock_resp(lsp, resop);
9093 9234  
9094 9235  end:
9095 9236          if (lsp) {
9096 9237                  if (ls_sw_held)
9097 9238                          rfs4_sw_exit(&lsp->rls_sw);
9098 9239                  /*
9099 9240                   * If an sp obtained, then the lsp does not represent
9100 9241                   * a lock on the file struct.
9101 9242                   */
9102 9243                  if (sp != NULL)
9103 9244                          rfs4_lo_state_rele(lsp, FALSE);
9104 9245                  else
9105 9246                          rfs4_lo_state_rele(lsp, TRUE);
9106 9247          }
9107 9248          if (sp) {
9108 9249                  rfs4_sw_exit(&sp->rs_owner->ro_sw);
9109 9250                  rfs4_state_rele(sp);
9110 9251          }
9111 9252  
9112 9253          DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9113 9254              LOCK4res *, resp);
9114 9255  }
9115 9256  
9116 9257  /* free function for LOCK/LOCKT */
9117 9258  static void
9118 9259  lock_denied_free(nfs_resop4 *resop)
9119 9260  {
9120 9261          LOCK4denied *dp = NULL;
9121 9262  
9122 9263          switch (resop->resop) {
9123 9264          case OP_LOCK:
9124 9265                  if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9125 9266                          dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9126 9267                  break;
9127 9268          case OP_LOCKT:
9128 9269                  if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9129 9270                          dp = &resop->nfs_resop4_u.oplockt.denied;
9130 9271                  break;
9131 9272          default:
9132 9273                  break;
9133 9274          }
9134 9275  
9135 9276          if (dp)
9136 9277                  kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9137 9278  }
9138 9279  
9139 9280  /*ARGSUSED*/
9140 9281  void
9141 9282  rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9142 9283      struct svc_req *req, struct compound_state *cs)
9143 9284  {
9144 9285          LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9145 9286          LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9146 9287          nfsstat4 status;
9147 9288          stateid4 *stateid = &args->lock_stateid;
9148 9289          rfs4_lo_state_t *lsp;
9149 9290  
9150 9291          DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9151 9292              LOCKU4args *, args);
9152 9293  
9153 9294          if (cs->vp == NULL) {
9154 9295                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9155 9296                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9156 9297                      LOCKU4res *, resp);
9157 9298                  return;
9158 9299          }
9159 9300  
9160 9301          if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9161 9302                  *cs->statusp = resp->status = status;
9162 9303                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9163 9304                      LOCKU4res *, resp);
9164 9305                  return;
9165 9306          }
9166 9307  
9167 9308          /* Ensure specified filehandle matches */
9168 9309          if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9169 9310                  rfs4_lo_state_rele(lsp, TRUE);
9170 9311                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9171 9312                  DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9172 9313                      LOCKU4res *, resp);
9173 9314                  return;
9174 9315          }
9175 9316  
9176 9317          /* hold off other access to lsp while we tinker */
9177 9318          rfs4_sw_enter(&lsp->rls_sw);
9178 9319  
9179 9320          switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9180 9321          case NFS4_CHECK_STATEID_OKAY:
9181 9322                  if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9182 9323                      != NFS4_CHKSEQ_OKAY) {
9183 9324                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9184 9325                          goto end;
9185 9326                  }
9186 9327                  break;
9187 9328          case NFS4_CHECK_STATEID_OLD:
9188 9329                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9189 9330                  goto end;
9190 9331          case NFS4_CHECK_STATEID_BAD:
9191 9332                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9192 9333                  goto end;
9193 9334          case NFS4_CHECK_STATEID_EXPIRED:
9194 9335                  *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9195 9336                  goto end;
9196 9337          case NFS4_CHECK_STATEID_CLOSED:
9197 9338                  *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9198 9339                  goto end;
9199 9340          case NFS4_CHECK_STATEID_REPLAY:
9200 9341                  switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9201 9342                  case NFS4_CHKSEQ_OKAY:
9202 9343                                  /*
9203 9344                                   * This is a replayed stateid; if
9204 9345                                   * seqid matches the next expected,
9205 9346                                   * then client is using wrong seqid.
9206 9347                                   */
9207 9348                  case NFS4_CHKSEQ_BAD:
9208 9349                          *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9209 9350                          goto end;
9210 9351                  case NFS4_CHKSEQ_REPLAY:
9211 9352                          rfs4_update_lease(lsp->rls_locker->rl_client);
9212 9353                          *cs->statusp = status = resp->status;
9213 9354                          goto end;
9214 9355                  }
9215 9356                  break;
9216 9357          default:
9217 9358                  ASSERT(FALSE);
9218 9359                  break;
9219 9360          }
9220 9361  
9221 9362          rfs4_update_lock_sequence(lsp);
9222 9363          rfs4_update_lease(lsp->rls_locker->rl_client);
9223 9364  
9224 9365          /*
9225 9366           * NFS4 only allows locking on regular files, so
9226 9367           * verify type of object.
9227 9368           */
9228 9369          if (cs->vp->v_type != VREG) {
9229 9370                  if (cs->vp->v_type == VDIR)
9230 9371                          status = NFS4ERR_ISDIR;
9231 9372                  else
9232 9373                          status = NFS4ERR_INVAL;
9233 9374                  goto out;
9234 9375          }
9235 9376  
9236 9377          if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9237 9378                  status = NFS4ERR_GRACE;
9238 9379                  goto out;
9239 9380          }
9240 9381  
9241 9382          status = rfs4_do_lock(lsp, args->locktype,
9242 9383              args->offset, args->length, cs->cr, resop);
9243 9384  
9244 9385  out:
9245 9386          *cs->statusp = resp->status = status;
9246 9387  
9247 9388          if (status == NFS4_OK)
9248 9389                  resp->lock_stateid = lsp->rls_lockid.stateid;
9249 9390  
9250 9391          rfs4_update_lock_resp(lsp, resop);
9251 9392  
9252 9393  end:
9253 9394          rfs4_sw_exit(&lsp->rls_sw);
9254 9395          rfs4_lo_state_rele(lsp, TRUE);
9255 9396  
9256 9397          DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9257 9398              LOCKU4res *, resp);
9258 9399  }
9259 9400  
9260 9401  /*
9261 9402   * LOCKT is a best effort routine, the client can not be guaranteed that
9262 9403   * the status return is still in effect by the time the reply is received.
9263 9404   * They are numerous race conditions in this routine, but we are not required
9264 9405   * and can not be accurate.
9265 9406   */
9266 9407  /*ARGSUSED*/
9267 9408  void
9268 9409  rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9269 9410      struct svc_req *req, struct compound_state *cs)
9270 9411  {
9271 9412          LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9272 9413          LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9273 9414          rfs4_lockowner_t *lo;
9274 9415          rfs4_client_t *cp;
9275 9416          bool_t create = FALSE;
9276 9417          struct flock64 flk;
9277 9418          int error;
9278 9419          int flag = FREAD | FWRITE;
9279 9420          int ltype;
9280 9421          length4 posix_length;
9281 9422          sysid_t sysid;
9282 9423          pid_t pid;
9283 9424  
9284 9425          DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9285 9426              LOCKT4args *, args);
9286 9427  
9287 9428          if (cs->vp == NULL) {
9288 9429                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9289 9430                  goto out;
9290 9431          }
9291 9432  
9292 9433          /*
9293 9434           * NFS4 only allows locking on regular files, so
9294 9435           * verify type of object.
9295 9436           */
9296 9437          if (cs->vp->v_type != VREG) {
9297 9438                  if (cs->vp->v_type == VDIR)
9298 9439                          *cs->statusp = resp->status = NFS4ERR_ISDIR;
9299 9440                  else
9300 9441                          *cs->statusp = resp->status =  NFS4ERR_INVAL;
9301 9442                  goto out;
9302 9443          }
9303 9444  
9304 9445          /*
9305 9446           * Check out the clientid to ensure the server knows about it
9306 9447           * so that we correctly inform the client of a server reboot.
9307 9448           */
9308 9449          if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9309 9450              == NULL) {
9310 9451                  *cs->statusp = resp->status =
9311 9452                      rfs4_check_clientid(&args->owner.clientid, 0);
9312 9453                  goto out;
9313 9454          }
9314 9455          if (rfs4_lease_expired(cp)) {
9315 9456                  rfs4_client_close(cp);
9316 9457                  /*
9317 9458                   * Protocol doesn't allow returning NFS4ERR_STALE as
9318 9459                   * other operations do on this check so STALE_CLIENTID
9319 9460                   * is returned instead
9320 9461                   */
9321 9462                  *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9322 9463                  goto out;
9323 9464          }
9324 9465  
9325 9466          if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9326 9467                  *cs->statusp = resp->status = NFS4ERR_GRACE;
9327 9468                  rfs4_client_rele(cp);
9328 9469                  goto out;
9329 9470          }
9330 9471          rfs4_client_rele(cp);
9331 9472  
9332 9473          resp->status = NFS4_OK;
9333 9474  
9334 9475          switch (args->locktype) {
9335 9476          case READ_LT:
9336 9477          case READW_LT:
9337 9478                  ltype = F_RDLCK;
9338 9479                  break;
9339 9480          case WRITE_LT:
9340 9481          case WRITEW_LT:
9341 9482                  ltype = F_WRLCK;
9342 9483                  break;
9343 9484          }
9344 9485  
9345 9486          posix_length = args->length;
9346 9487          /* Check for zero length. To lock to end of file use all ones for V4 */
9347 9488          if (posix_length == 0) {
9348 9489                  *cs->statusp = resp->status = NFS4ERR_INVAL;
9349 9490                  goto out;
9350 9491          } else if (posix_length == (length4)(~0)) {
9351 9492                  posix_length = 0;       /* Posix to end of file  */
9352 9493          }
9353 9494  
9354 9495          /* Find or create a lockowner */
9355 9496          lo = rfs4_findlockowner(&args->owner, &create);
9356 9497  
9357 9498          if (lo) {
9358 9499                  pid = lo->rl_pid;
9359 9500                  if ((resp->status =
9360 9501                      rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9361 9502                          goto err;
9362 9503          } else {
9363 9504                  pid = 0;
9364 9505                  sysid = lockt_sysid;
9365 9506          }
9366 9507  retry:
9367 9508          flk.l_type = ltype;
9368 9509          flk.l_whence = 0;               /* SEEK_SET */
9369 9510          flk.l_start = args->offset;
9370 9511          flk.l_len = posix_length;
9371 9512          flk.l_sysid = sysid;
9372 9513          flk.l_pid = pid;
9373 9514          flag |= F_REMOTELOCK;
9374 9515  
9375 9516          LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9376 9517  
9377 9518          /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9378 9519          if (flk.l_len < 0 || flk.l_start < 0) {
9379 9520                  resp->status = NFS4ERR_INVAL;
9380 9521                  goto err;
9381 9522          }
9382 9523          error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9383 9524              NULL, cs->cr, NULL);
9384 9525  
9385 9526          /*
9386 9527           * N.B. We map error values to nfsv4 errors. This is differrent
9387 9528           * than puterrno4 routine.
9388 9529           */
9389 9530          switch (error) {
9390 9531          case 0:
9391 9532                  if (flk.l_type == F_UNLCK)
9392 9533                          resp->status = NFS4_OK;
9393 9534                  else {
9394 9535                          if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9395 9536                                  goto retry;
9396 9537                          resp->status = NFS4ERR_DENIED;
9397 9538                  }
9398 9539                  break;
9399 9540          case EOVERFLOW:
9400 9541                  resp->status = NFS4ERR_INVAL;
9401 9542                  break;
9402 9543          case EINVAL:
9403 9544                  resp->status = NFS4ERR_NOTSUPP;
9404 9545                  break;
9405 9546          default:
9406 9547                  cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9407 9548                      error);
9408 9549                  resp->status = NFS4ERR_SERVERFAULT;
9409 9550                  break;
9410 9551          }
9411 9552  
9412 9553  err:
9413 9554          if (lo)
9414 9555                  rfs4_lockowner_rele(lo);
9415 9556          *cs->statusp = resp->status;
9416 9557  out:
9417 9558          DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9418 9559              LOCKT4res *, resp);
9419 9560  }
9420 9561  
9421 9562  int
9422 9563  rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9423 9564  {
9424 9565          int err;
9425 9566          int cmd;
9426 9567          vnode_t *vp;
9427 9568          struct shrlock shr;
9428 9569          struct shr_locowner shr_loco;
9429 9570          int fflags = 0;
9430 9571  
9431 9572          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9432 9573          ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9433 9574  
9434 9575          if (sp->rs_closed)
9435 9576                  return (NFS4ERR_OLD_STATEID);
9436 9577  
9437 9578          vp = sp->rs_finfo->rf_vp;
9438 9579          ASSERT(vp);
9439 9580  
9440 9581          shr.s_access = shr.s_deny = 0;
9441 9582  
9442 9583          if (access & OPEN4_SHARE_ACCESS_READ) {
9443 9584                  fflags |= FREAD;
9444 9585                  shr.s_access |= F_RDACC;
9445 9586          }
9446 9587          if (access & OPEN4_SHARE_ACCESS_WRITE) {
9447 9588                  fflags |= FWRITE;
9448 9589                  shr.s_access |= F_WRACC;
9449 9590          }
9450 9591          ASSERT(shr.s_access);
9451 9592  
9452 9593          if (deny & OPEN4_SHARE_DENY_READ)
9453 9594                  shr.s_deny |= F_RDDNY;
9454 9595          if (deny & OPEN4_SHARE_DENY_WRITE)
9455 9596                  shr.s_deny |= F_WRDNY;
9456 9597  
9457 9598          shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9458 9599          shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9459 9600          shr_loco.sl_pid = shr.s_pid;
9460 9601          shr_loco.sl_id = shr.s_sysid;
9461 9602          shr.s_owner = (caddr_t)&shr_loco;
9462 9603          shr.s_own_len = sizeof (shr_loco);
9463 9604  
9464 9605          cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9465 9606  
9466 9607          err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9467 9608          if (err != 0) {
9468 9609                  if (err == EAGAIN)
9469 9610                          err = NFS4ERR_SHARE_DENIED;
9470 9611                  else
9471 9612                          err = puterrno4(err);
9472 9613                  return (err);
9473 9614          }
9474 9615  
9475 9616          sp->rs_share_access |= access;
9476 9617          sp->rs_share_deny |= deny;
9477 9618  
9478 9619          return (0);
9479 9620  }
9480 9621  
9481 9622  int
9482 9623  rfs4_unshare(rfs4_state_t *sp)
9483 9624  {
9484 9625          int err;
9485 9626          struct shrlock shr;
9486 9627          struct shr_locowner shr_loco;
9487 9628  
9488 9629          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9489 9630  
9490 9631          if (sp->rs_closed || sp->rs_share_access == 0)
9491 9632                  return (0);
9492 9633  
9493 9634          ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9494 9635          ASSERT(sp->rs_finfo->rf_vp);
9495 9636  
9496 9637          shr.s_access = shr.s_deny = 0;
9497 9638          shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9498 9639          shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9499 9640          shr_loco.sl_pid = shr.s_pid;
9500 9641          shr_loco.sl_id = shr.s_sysid;
9501 9642          shr.s_owner = (caddr_t)&shr_loco;
9502 9643          shr.s_own_len = sizeof (shr_loco);
9503 9644  
9504 9645          err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9505 9646              NULL);
9506 9647          if (err != 0) {
9507 9648                  err = puterrno4(err);
9508 9649                  return (err);
9509 9650          }
9510 9651  
9511 9652          sp->rs_share_access = 0;
9512 9653          sp->rs_share_deny = 0;
9513 9654  
9514 9655          return (0);
9515 9656  
9516 9657  }
9517 9658  
9518 9659  static int
9519 9660  rdma_setup_read_data4(READ4args *args, READ4res *rok)
9520 9661  {
9521 9662          struct clist    *wcl;
9522 9663          count4          count = rok->data_len;
9523 9664          int             wlist_len;
9524 9665  
9525 9666          wcl = args->wlist;
9526 9667          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9527 9668                  return (FALSE);
9528 9669          }
9529 9670          wcl = args->wlist;
9530 9671          rok->wlist_len = wlist_len;
9531 9672          rok->wlist = wcl;
9532 9673          return (TRUE);
9533 9674  }
9534 9675  
9535 9676  /* tunable to disable server referrals */
9536 9677  int rfs4_no_referrals = 0;
9537 9678  
9538 9679  /*
9539 9680   * Find an NFS record in reparse point data.
9540 9681   * Returns 0 for success and <0 or an errno value on failure.
9541 9682   */
9542 9683  int
9543 9684  vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9544 9685  {
9545 9686          int err;
9546 9687          char *stype, *val;
9547 9688          nvlist_t *nvl;
9548 9689          nvpair_t *curr;
9549 9690  
9550 9691          if ((nvl = reparse_init()) == NULL)
9551 9692                  return (-1);
9552 9693  
9553 9694          if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9554 9695                  reparse_free(nvl);
9555 9696                  return (err);
9556 9697          }
9557 9698  
9558 9699          curr = NULL;
9559 9700          while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9560 9701                  if ((stype = nvpair_name(curr)) == NULL) {
9561 9702                          reparse_free(nvl);
9562 9703                          return (-2);
9563 9704                  }
9564 9705                  if (strncasecmp(stype, "NFS", 3) == 0)
9565 9706                          break;
9566 9707          }
9567 9708  
9568 9709          if ((curr == NULL) ||
9569 9710              (nvpair_value_string(curr, &val))) {
9570 9711                  reparse_free(nvl);
9571 9712                  return (-3);
9572 9713          }
9573 9714          *nvlp = nvl;
9574 9715          *svcp = stype;
9575 9716          *datap = val;
9576 9717          return (0);
9577 9718  }
9578 9719  
9579 9720  int
9580 9721  vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9581 9722  {
9582 9723          nvlist_t *nvl;
9583 9724          char *s, *d;
9584 9725  
9585 9726          if (rfs4_no_referrals != 0)
9586 9727                  return (B_FALSE);
9587 9728  
9588 9729          if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9589 9730                  return (B_FALSE);
9590 9731  
9591 9732          if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9592 9733                  return (B_FALSE);
9593 9734  
9594 9735          reparse_free(nvl);
9595 9736  
9596 9737          return (B_TRUE);
9597 9738  }
9598 9739  
9599 9740  /*
9600 9741   * There is a user-level copy of this routine in ref_subr.c.
9601 9742   * Changes should be kept in sync.
9602 9743   */
9603 9744  static int
9604 9745  nfs4_create_components(char *path, component4 *comp4)
9605 9746  {
9606 9747          int slen, plen, ncomp;
9607 9748          char *ori_path, *nxtc, buf[MAXNAMELEN];
9608 9749  
9609 9750          if (path == NULL)
9610 9751                  return (0);
9611 9752  
9612 9753          plen = strlen(path) + 1;        /* include the terminator */
9613 9754          ori_path = path;
9614 9755          ncomp = 0;
9615 9756  
9616 9757          /* count number of components in the path */
9617 9758          for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9618 9759                  if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9619 9760                          if ((slen = nxtc - path) == 0) {
9620 9761                                  path = nxtc + 1;
9621 9762                                  continue;
9622 9763                          }
9623 9764  
9624 9765                          if (comp4 != NULL) {
9625 9766                                  bcopy(path, buf, slen);
9626 9767                                  buf[slen] = '\0';
9627 9768                                  (void) str_to_utf8(buf, &comp4[ncomp]);
9628 9769                          }
9629 9770  
9630 9771                          ncomp++;        /* 1 valid component */
9631 9772                          path = nxtc + 1;
9632 9773                  }
9633 9774                  if (*nxtc == '\0' || *nxtc == '\n')
9634 9775                          break;
9635 9776          }
9636 9777  
9637 9778          return (ncomp);
9638 9779  }
9639 9780  
9640 9781  /*
9641 9782   * There is a user-level copy of this routine in ref_subr.c.
9642 9783   * Changes should be kept in sync.
9643 9784   */
9644 9785  static int
9645 9786  make_pathname4(char *path, pathname4 *pathname)
9646 9787  {
9647 9788          int ncomp;
9648 9789          component4 *comp4;
9649 9790  
9650 9791          if (pathname == NULL)
9651 9792                  return (0);
9652 9793  
9653 9794          if (path == NULL) {
9654 9795                  pathname->pathname4_val = NULL;
9655 9796                  pathname->pathname4_len = 0;
9656 9797                  return (0);
9657 9798          }
9658 9799  
9659 9800          /* count number of components to alloc buffer */
9660 9801          if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9661 9802                  pathname->pathname4_val = NULL;
9662 9803                  pathname->pathname4_len = 0;
9663 9804                  return (0);
9664 9805          }
9665 9806          comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9666 9807  
9667 9808          /* copy components into allocated buffer */
9668 9809          ncomp = nfs4_create_components(path, comp4);
9669 9810  
9670 9811          pathname->pathname4_val = comp4;
9671 9812          pathname->pathname4_len = ncomp;
9672 9813  
9673 9814          return (ncomp);
9674 9815  }
9675 9816  
9676 9817  #define xdr_fs_locations4 xdr_fattr4_fs_locations
9677 9818  
9678 9819  fs_locations4 *
9679 9820  fetch_referral(vnode_t *vp, cred_t *cr)
9680 9821  {
9681 9822          nvlist_t *nvl;
9682 9823          char *stype, *sdata;
9683 9824          fs_locations4 *result;
9684 9825          char buf[1024];
9685 9826          size_t bufsize;
9686 9827          XDR xdr;
9687 9828          int err;
9688 9829  
9689 9830          /*
9690 9831           * Check attrs to ensure it's a reparse point
9691 9832           */
9692 9833          if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9693 9834                  return (NULL);
9694 9835  
9695 9836          /*
9696 9837           * Look for an NFS record and get the type and data
9697 9838           */
9698 9839          if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9699 9840                  return (NULL);
9700 9841  
9701 9842          /*
9702 9843           * With the type and data, upcall to get the referral
9703 9844           */
9704 9845          bufsize = sizeof (buf);
9705 9846          bzero(buf, sizeof (buf));
9706 9847          err = reparse_kderef((const char *)stype, (const char *)sdata,
9707 9848              buf, &bufsize);
9708 9849          reparse_free(nvl);
9709 9850  
9710 9851          DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9711 9852              char *, stype, char *, sdata, char *, buf, int, err);
9712 9853          if (err) {
9713 9854                  cmn_err(CE_NOTE,
9714 9855                      "reparsed daemon not running: unable to get referral (%d)",
9715 9856                      err);
9716 9857                  return (NULL);
9717 9858          }
9718 9859  
9719 9860          /*
9720 9861           * We get an XDR'ed record back from the kderef call
9721 9862           */
9722 9863          xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9723 9864          result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9724 9865          err = xdr_fs_locations4(&xdr, result);
9725 9866          XDR_DESTROY(&xdr);
9726 9867          if (err != TRUE) {
9727 9868                  DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9728 9869                      int, err);
9729 9870                  return (NULL);
9730 9871          }
9731 9872  
9732 9873          /*
9733 9874           * Look at path to recover fs_root, ignoring the leading '/'
9734 9875           */
9735 9876          (void) make_pathname4(vp->v_path, &result->fs_root);
9736 9877  
9737 9878          return (result);
9738 9879  }
9739 9880  
9740 9881  char *
9741 9882  build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9742 9883  {
9743 9884          fs_locations4 *fsl;
9744 9885          fs_location4 *fs;
9745 9886          char *server, *path, *symbuf;
9746 9887          static char *prefix = "/net/";
9747 9888          int i, size, npaths;
9748 9889          uint_t len;
9749 9890  
9750 9891          /* Get the referral */
9751 9892          if ((fsl = fetch_referral(vp, cr)) == NULL)
9752 9893                  return (NULL);
9753 9894  
9754 9895          /* Deal with only the first location and first server */
9755 9896          fs = &fsl->locations_val[0];
9756 9897          server = utf8_to_str(&fs->server_val[0], &len, NULL);
9757 9898          if (server == NULL) {
9758 9899                  rfs4_free_fs_locations4(fsl);
9759 9900                  kmem_free(fsl, sizeof (fs_locations4));
9760 9901                  return (NULL);
9761 9902          }
9762 9903  
9763 9904          /* Figure out size for "/net/" + host + /path/path/path + NULL */
9764 9905          size = strlen(prefix) + len;
9765 9906          for (i = 0; i < fs->rootpath.pathname4_len; i++)
9766 9907                  size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9767 9908  
9768 9909          /* Allocate the symlink buffer and fill it */
9769 9910          symbuf = kmem_zalloc(size, KM_SLEEP);
9770 9911          (void) strcat(symbuf, prefix);
9771 9912          (void) strcat(symbuf, server);
9772 9913          kmem_free(server, len);
9773 9914  
9774 9915          npaths = 0;
9775 9916          for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9776 9917                  path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9777 9918                  if (path == NULL)
9778 9919                          continue;
9779 9920                  (void) strcat(symbuf, "/");
9780 9921                  (void) strcat(symbuf, path);
9781 9922                  npaths++;
9782 9923                  kmem_free(path, len);
9783 9924          }
9784 9925  
9785 9926          rfs4_free_fs_locations4(fsl);
9786 9927          kmem_free(fsl, sizeof (fs_locations4));
9787 9928  
9788 9929          if (strsz != NULL)
9789 9930                  *strsz = size;
9790 9931          return (symbuf);
9791 9932  }
9792 9933  
9793 9934  /*
9794 9935   * Check to see if we have a downrev Solaris client, so that we
9795 9936   * can send it a symlink instead of a referral.
9796 9937   */
9797 9938  int
9798 9939  client_is_downrev(struct svc_req *req)
9799 9940  {
9800 9941          struct sockaddr *ca;
9801 9942          rfs4_clntip_t *ci;
9802 9943          bool_t create = FALSE;
  
    | 
      ↓ open down ↓ | 
    1623 lines elided | 
    
      ↑ open up ↑ | 
  
9803 9944          int is_downrev;
9804 9945  
9805 9946          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9806 9947          ASSERT(ca);
9807 9948          ci = rfs4_find_clntip(ca, &create);
9808 9949          if (ci == NULL)
9809 9950                  return (0);
9810 9951          is_downrev = ci->ri_no_referrals;
9811 9952          rfs4_dbe_rele(ci->ri_dbe);
9812 9953          return (is_downrev);
     9954 +}
     9955 +
     9956 +/*
     9957 + * Do the main work of handling HA-NFSv4 Resource Group failover on
     9958 + * Sun Cluster.
     9959 + * We need to detect whether any RG admin paths have been added or removed,
     9960 + * and adjust resources accordingly.
     9961 + * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
     9962 + * order to scale, the list and array of paths need to be held in more
     9963 + * suitable data structures.
     9964 + */
     9965 +static void
     9966 +hanfsv4_failover(nfs4_srv_t *nsrv4)
     9967 +{
     9968 +        int i, start_grace, numadded_paths = 0;
     9969 +        char **added_paths = NULL;
     9970 +        rfs4_dss_path_t *dss_path;
     9971 +
     9972 +        /*
     9973 +         * Note: currently, dss_pathlist cannot be NULL, since
     9974 +         * it will always include an entry for NFS4_DSS_VAR_DIR. If we
     9975 +         * make the latter dynamically specified too, the following will
     9976 +         * need to be adjusted.
     9977 +         */
     9978 +
     9979 +        /*
     9980 +         * First, look for removed paths: RGs that have been failed-over
     9981 +         * away from this node.
     9982 +         * Walk the "currently-serving" dss_pathlist and, for each
     9983 +         * path, check if it is on the "passed-in" rfs4_dss_newpaths array
     9984 +         * from nfsd. If not, that RG path has been removed.
     9985 +         *
     9986 +         * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
     9987 +         * any duplicates.
     9988 +         */
     9989 +        dss_path = nsrv4->dss_pathlist;
     9990 +        do {
     9991 +                int found = 0;
     9992 +                char *path = dss_path->path;
     9993 +
     9994 +                /* used only for non-HA so may not be removed */
     9995 +                if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
     9996 +                        dss_path = dss_path->next;
     9997 +                        continue;
     9998 +                }
     9999 +
     10000 +                for (i = 0; i < rfs4_dss_numnewpaths; i++) {
     10001 +                        int cmpret;
     10002 +                        char *newpath = rfs4_dss_newpaths[i];
     10003 +
     10004 +                        /*
     10005 +                         * Since nfsd has sorted rfs4_dss_newpaths for us,
     10006 +                         * once the return from strcmp is negative we know
     10007 +                         * we've passed the point where "path" should be,
     10008 +                         * and can stop searching: "path" has been removed.
     10009 +                         */
     10010 +                        cmpret = strcmp(path, newpath);
     10011 +                        if (cmpret < 0)
     10012 +                                break;
     10013 +                        if (cmpret == 0) {
     10014 +                                found = 1;
     10015 +                                break;
     10016 +                        }
     10017 +                }
     10018 +
     10019 +                if (found == 0) {
     10020 +                        unsigned index = dss_path->index;
     10021 +                        rfs4_servinst_t *sip = dss_path->sip;
     10022 +                        rfs4_dss_path_t *path_next = dss_path->next;
     10023 +
     10024 +                        /*
     10025 +                         * This path has been removed.
     10026 +                         * We must clear out the servinst reference to
     10027 +                         * it, since it's now owned by another
     10028 +                         * node: we should not attempt to touch it.
     10029 +                         */
     10030 +                        ASSERT(dss_path == sip->dss_paths[index]);
     10031 +                        sip->dss_paths[index] = NULL;
     10032 +
     10033 +                        /* remove from "currently-serving" list, and destroy */
     10034 +                        remque(dss_path);
     10035 +                        /* allow for NUL */
     10036 +                        kmem_free(dss_path->path, strlen(dss_path->path) + 1);
     10037 +                        kmem_free(dss_path, sizeof (rfs4_dss_path_t));
     10038 +
     10039 +                        dss_path = path_next;
     10040 +                } else {
     10041 +                        /* path was found; not removed */
     10042 +                        dss_path = dss_path->next;
     10043 +                }
     10044 +        } while (dss_path != nsrv4->dss_pathlist);
     10045 +
     10046 +        /*
     10047 +         * Now, look for added paths: RGs that have been failed-over
     10048 +         * to this node.
     10049 +         * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
     10050 +         * for each path, check if it is on the "currently-serving"
     10051 +         * dss_pathlist. If not, that RG path has been added.
     10052 +         *
     10053 +         * Note: we don't do duplicate detection here; nfsd does that for us.
     10054 +         *
     10055 +         * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
     10056 +         * an upper bound for the size needed for added_paths[numadded_paths].
     10057 +         */
     10058 +
     10059 +        /* probably more space than we need, but guaranteed to be enough */
     10060 +        if (rfs4_dss_numnewpaths > 0) {
     10061 +                size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
     10062 +                added_paths = kmem_zalloc(sz, KM_SLEEP);
     10063 +        }
     10064 +
     10065 +        /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
     10066 +        for (i = 0; i < rfs4_dss_numnewpaths; i++) {
     10067 +                int found = 0;
     10068 +                char *newpath = rfs4_dss_newpaths[i];
     10069 +
     10070 +                dss_path = nsrv4->dss_pathlist;
     10071 +                do {
     10072 +                        char *path = dss_path->path;
     10073 +
     10074 +                        /* used only for non-HA */
     10075 +                        if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
     10076 +                                dss_path = dss_path->next;
     10077 +                                continue;
     10078 +                        }
     10079 +
     10080 +                        if (strncmp(path, newpath, strlen(path)) == 0) {
     10081 +                                found = 1;
     10082 +                                break;
     10083 +                        }
     10084 +
     10085 +                        dss_path = dss_path->next;
     10086 +                } while (dss_path != nsrv4->dss_pathlist);
     10087 +
     10088 +                if (found == 0) {
     10089 +                        added_paths[numadded_paths] = newpath;
     10090 +                        numadded_paths++;
     10091 +                }
     10092 +        }
     10093 +
     10094 +        /* did we find any added paths? */
     10095 +        if (numadded_paths > 0) {
     10096 +
     10097 +                /* create a new server instance, and start its grace period */
     10098 +                start_grace = 1;
     10099 +                /* CSTYLED */
     10100 +                rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
     10101 +
     10102 +                /* read in the stable storage state from these paths */
     10103 +                rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
     10104 +
     10105 +                /*
     10106 +                 * Multiple failovers during a grace period will cause
     10107 +                 * clients of the same resource group to be partitioned
     10108 +                 * into different server instances, with different
     10109 +                 * grace periods.  Since clients of the same resource
     10110 +                 * group must be subject to the same grace period,
     10111 +                 * we need to reset all currently active grace periods.
     10112 +                 */
     10113 +                rfs4_grace_reset_all(nsrv4);
     10114 +        }
     10115 +
     10116 +        if (rfs4_dss_numnewpaths > 0)
     10117 +                kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
9813 10118  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX