Print this page
    
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  
    | 
      ↓ open down ↓ | 
    22 lines elided | 
    
      ↑ open up ↑ | 
  
  23   23   * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  25   25   * Copyright (c) 2016 by Delphix. All rights reserved.
  26   26   */
  27   27  
  28   28  /*
  29   29   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30   30   *      All rights reserved.
  31   31   */
  32   32  
       33 +/*
       34 + * Copyright 2018 Nexenta Systems, Inc.
       35 + * Copyright (c) 2016 by Delphix. All rights reserved.
       36 + */
       37 +
  33   38  #include <sys/param.h>
  34   39  #include <sys/types.h>
  35   40  #include <sys/systm.h>
  36   41  #include <sys/cred.h>
  37   42  #include <sys/buf.h>
  38   43  #include <sys/vfs.h>
  39   44  #include <sys/vnode.h>
  40   45  #include <sys/uio.h>
  41   46  #include <sys/stat.h>
  42   47  #include <sys/errno.h>
  43   48  #include <sys/sysmacros.h>
  44   49  #include <sys/statvfs.h>
  45   50  #include <sys/kmem.h>
  46   51  #include <sys/kstat.h>
  47   52  #include <sys/dirent.h>
  48   53  #include <sys/cmn_err.h>
  49   54  #include <sys/debug.h>
  50   55  #include <sys/vtrace.h>
  51   56  #include <sys/mode.h>
  52   57  #include <sys/acl.h>
  53   58  #include <sys/nbmlock.h>
  54   59  #include <sys/policy.h>
  55   60  #include <sys/sdt.h>
  56   61  
  57   62  #include <rpc/types.h>
  58   63  #include <rpc/auth.h>
  59   64  #include <rpc/svc.h>
  60   65  
  61   66  #include <nfs/nfs.h>
  62   67  #include <nfs/export.h>
  
    | 
      ↓ open down ↓ | 
    20 lines elided | 
    
      ↑ open up ↑ | 
  
  63   68  #include <nfs/nfs_cmd.h>
  64   69  
  65   70  #include <vm/hat.h>
  66   71  #include <vm/as.h>
  67   72  #include <vm/seg.h>
  68   73  #include <vm/seg_map.h>
  69   74  #include <vm/seg_kmem.h>
  70   75  
  71   76  #include <sys/strsubr.h>
  72   77  
       78 +struct rfs_async_write_list;
       79 +
  73   80  /*
       81 + * Zone globals of NFSv2 server
       82 + */
       83 +typedef struct nfs_srv {
       84 +        kmutex_t                        async_write_lock;
       85 +        struct rfs_async_write_list     *async_write_head;
       86 +
       87 +        /*
       88 +         * enables write clustering if == 1
       89 +         */
       90 +        int             write_async;
       91 +} nfs_srv_t;
       92 +
       93 +/*
  74   94   * These are the interface routines for the server side of the
  75   95   * Network File System.  See the NFS version 2 protocol specification
  76   96   * for a description of this interface.
  77   97   */
  78   98  
  79   99  static int      sattr_to_vattr(struct nfssattr *, struct vattr *);
  80  100  static void     acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
  81  101                          cred_t *);
  82  102  
      103 +
  83  104  /*
  84  105   * Some "over the wire" UNIX file types.  These are encoded
  85  106   * into the mode.  This needs to be fixed in the next rev.
  86  107   */
  87  108  #define IFMT            0170000         /* type of file */
  88  109  #define IFCHR           0020000         /* character special */
  89  110  #define IFBLK           0060000         /* block special */
  90  111  #define IFSOCK          0140000         /* socket */
  91  112  
  92  113  u_longlong_t nfs2_srv_caller_id;
  93  114  
      115 +static nfs_srv_t *
      116 +nfs_get_srv(void)
      117 +{
      118 +        nfs_globals_t *ng = nfs_srv_getzg();
      119 +        nfs_srv_t *srv = ng->nfs_srv;
      120 +        ASSERT(srv != NULL);
      121 +        return (srv);
      122 +}
      123 +
  94  124  /*
  95  125   * Get file attributes.
  96  126   * Returns the current attributes of the file with the given fhandle.
  97  127   */
  98  128  /* ARGSUSED */
  99  129  void
 100  130  rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
 101  131      struct svc_req *req, cred_t *cr, bool_t ro)
 102  132  {
 103  133          int error;
 104  134          vnode_t *vp;
 105  135          struct vattr va;
 106  136  
 107  137          vp = nfs_fhtovp(fhp, exi);
 108  138          if (vp == NULL) {
 109  139                  ns->ns_status = NFSERR_STALE;
 110  140                  return;
 111  141          }
 112  142  
 113  143          /*
 114  144           * Do the getattr.
 115  145           */
 116  146          va.va_mask = AT_ALL;    /* we want all the attributes */
 117  147  
 118  148          error = rfs4_delegated_getattr(vp, &va, 0, cr);
 119  149  
 120  150          /* check for overflows */
 121  151          if (!error) {
 122  152                  /* Lie about the object type for a referral */
 123  153                  if (vn_is_nfs_reparse(vp, cr))
 124  154                          va.va_type = VLNK;
 125  155  
 126  156                  acl_perm(vp, exi, &va, cr);
 127  157                  error = vattr_to_nattr(&va, &ns->ns_attr);
 128  158          }
 129  159  
 130  160          VN_RELE(vp);
 131  161  
 132  162          ns->ns_status = puterrno(error);
 133  163  }
 134  164  void *
 135  165  rfs_getattr_getfh(fhandle_t *fhp)
 136  166  {
 137  167          return (fhp);
 138  168  }
 139  169  
 140  170  /*
 141  171   * Set file attributes.
 142  172   * Sets the attributes of the file with the given fhandle.  Returns
 143  173   * the new attributes.
 144  174   */
 145  175  /* ARGSUSED */
 146  176  void
 147  177  rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
 148  178      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 149  179  {
 150  180          int error;
 151  181          int flag;
 152  182          int in_crit = 0;
 153  183          vnode_t *vp;
 154  184          struct vattr va;
 155  185          struct vattr bva;
 156  186          struct flock64 bf;
 157  187          caller_context_t ct;
 158  188  
 159  189  
 160  190          vp = nfs_fhtovp(&args->saa_fh, exi);
 161  191          if (vp == NULL) {
 162  192                  ns->ns_status = NFSERR_STALE;
 163  193                  return;
 164  194          }
 165  195  
 166  196          if (rdonly(ro, vp)) {
 167  197                  VN_RELE(vp);
 168  198                  ns->ns_status = NFSERR_ROFS;
 169  199                  return;
 170  200          }
 171  201  
 172  202          error = sattr_to_vattr(&args->saa_sa, &va);
 173  203          if (error) {
 174  204                  VN_RELE(vp);
 175  205                  ns->ns_status = puterrno(error);
 176  206                  return;
 177  207          }
 178  208  
 179  209          /*
 180  210           * If the client is requesting a change to the mtime,
 181  211           * but the nanosecond field is set to 1 billion, then
 182  212           * this is a flag to the server that it should set the
 183  213           * atime and mtime fields to the server's current time.
 184  214           * The 1 billion number actually came from the client
 185  215           * as 1 million, but the units in the over the wire
 186  216           * request are microseconds instead of nanoseconds.
 187  217           *
 188  218           * This is an overload of the protocol and should be
 189  219           * documented in the NFS Version 2 protocol specification.
 190  220           */
 191  221          if (va.va_mask & AT_MTIME) {
 192  222                  if (va.va_mtime.tv_nsec == 1000000000) {
 193  223                          gethrestime(&va.va_mtime);
 194  224                          va.va_atime = va.va_mtime;
 195  225                          va.va_mask |= AT_ATIME;
 196  226                          flag = 0;
 197  227                  } else
 198  228                          flag = ATTR_UTIME;
 199  229          } else
 200  230                  flag = 0;
 201  231  
 202  232          /*
 203  233           * If the filesystem is exported with nosuid, then mask off
 204  234           * the setuid and setgid bits.
 205  235           */
 206  236          if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
 207  237              (exi->exi_export.ex_flags & EX_NOSUID))
 208  238                  va.va_mode &= ~(VSUID | VSGID);
 209  239  
 210  240          ct.cc_sysid = 0;
 211  241          ct.cc_pid = 0;
 212  242          ct.cc_caller_id = nfs2_srv_caller_id;
 213  243          ct.cc_flags = CC_DONTBLOCK;
 214  244  
 215  245          /*
 216  246           * We need to specially handle size changes because it is
 217  247           * possible for the client to create a file with modes
 218  248           * which indicate read-only, but with the file opened for
 219  249           * writing.  If the client then tries to set the size of
 220  250           * the file, then the normal access checking done in
 221  251           * VOP_SETATTR would prevent the client from doing so,
 222  252           * although it should be legal for it to do so.  To get
 223  253           * around this, we do the access checking for ourselves
 224  254           * and then use VOP_SPACE which doesn't do the access
 225  255           * checking which VOP_SETATTR does. VOP_SPACE can only
 226  256           * operate on VREG files, let VOP_SETATTR handle the other
 227  257           * extremely rare cases.
 228  258           * Also the client should not be allowed to change the
 229  259           * size of the file if there is a conflicting non-blocking
 230  260           * mandatory lock in the region of change.
 231  261           */
 232  262          if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
 233  263                  if (nbl_need_check(vp)) {
 234  264                          nbl_start_crit(vp, RW_READER);
 235  265                          in_crit = 1;
 236  266                  }
 237  267  
 238  268                  bva.va_mask = AT_UID | AT_SIZE;
 239  269  
 240  270                  error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
 241  271  
 242  272                  if (error) {
 243  273                          if (in_crit)
 244  274                                  nbl_end_crit(vp);
 245  275                          VN_RELE(vp);
 246  276                          ns->ns_status = puterrno(error);
 247  277                          return;
 248  278                  }
 249  279  
 250  280                  if (in_crit) {
 251  281                          u_offset_t offset;
 252  282                          ssize_t length;
 253  283  
 254  284                          if (va.va_size < bva.va_size) {
 255  285                                  offset = va.va_size;
 256  286                                  length = bva.va_size - va.va_size;
 257  287                          } else {
 258  288                                  offset = bva.va_size;
 259  289                                  length = va.va_size - bva.va_size;
 260  290                          }
 261  291                          if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 262  292                              NULL)) {
 263  293                                  error = EACCES;
 264  294                          }
 265  295                  }
 266  296  
 267  297                  if (crgetuid(cr) == bva.va_uid && !error &&
 268  298                      va.va_size != bva.va_size) {
 269  299                          va.va_mask &= ~AT_SIZE;
 270  300                          bf.l_type = F_WRLCK;
 271  301                          bf.l_whence = 0;
 272  302                          bf.l_start = (off64_t)va.va_size;
 273  303                          bf.l_len = 0;
 274  304                          bf.l_sysid = 0;
 275  305                          bf.l_pid = 0;
 276  306  
 277  307                          error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 278  308                              (offset_t)va.va_size, cr, &ct);
 279  309                  }
 280  310                  if (in_crit)
 281  311                          nbl_end_crit(vp);
 282  312          } else
 283  313                  error = 0;
 284  314  
 285  315          /*
 286  316           * Do the setattr.
 287  317           */
 288  318          if (!error && va.va_mask) {
 289  319                  error = VOP_SETATTR(vp, &va, flag, cr, &ct);
 290  320          }
 291  321  
 292  322          /*
 293  323           * check if the monitor on either vop_space or vop_setattr detected
 294  324           * a delegation conflict and if so, mark the thread flag as
 295  325           * wouldblock so that the response is dropped and the client will
 296  326           * try again.
 297  327           */
 298  328          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 299  329                  VN_RELE(vp);
 300  330                  curthread->t_flag |= T_WOULDBLOCK;
 301  331                  return;
 302  332          }
 303  333  
 304  334          if (!error) {
 305  335                  va.va_mask = AT_ALL;    /* get everything */
 306  336  
 307  337                  error = rfs4_delegated_getattr(vp, &va, 0, cr);
 308  338  
 309  339                  /* check for overflows */
 310  340                  if (!error) {
 311  341                          acl_perm(vp, exi, &va, cr);
 312  342                          error = vattr_to_nattr(&va, &ns->ns_attr);
 313  343                  }
 314  344          }
 315  345  
 316  346          ct.cc_flags = 0;
 317  347  
 318  348          /*
 319  349           * Force modified metadata out to stable storage.
 320  350           */
 321  351          (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 322  352  
 323  353          VN_RELE(vp);
 324  354  
 325  355          ns->ns_status = puterrno(error);
 326  356  }
 327  357  void *
 328  358  rfs_setattr_getfh(struct nfssaargs *args)
 329  359  {
 330  360          return (&args->saa_fh);
 331  361  }
 332  362  
 333  363  /* Change and release @exip and @vpp only in success */
 334  364  int
 335  365  rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
 336  366  {
 337  367          struct exportinfo *exi;
 338  368          vnode_t *vp = *vpp;
 339  369          fid_t fid;
 340  370          int error;
 341  371  
 342  372          VN_HOLD(vp);
 343  373  
 344  374          if ((error = traverse(&vp)) != 0) {
 345  375                  VN_RELE(vp);
 346  376                  return (error);
 347  377          }
 348  378  
 349  379          bzero(&fid, sizeof (fid));
 350  380          fid.fid_len = MAXFIDSZ;
 351  381          error = VOP_FID(vp, &fid, NULL);
 352  382          if (error) {
 353  383                  VN_RELE(vp);
 354  384                  return (error);
 355  385          }
 356  386  
 357  387          exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
 358  388          if (exi == NULL ||
 359  389              (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
 360  390                  /*
 361  391                   * It is not error, just subdir is not exported
 362  392                   * or "nohide" is not set
 363  393                   */
 364  394                  if (exi != NULL)
 365  395                          exi_rele(exi);
 366  396                  VN_RELE(vp);
 367  397          } else {
 368  398                  /* go to submount */
 369  399                  exi_rele(*exip);
 370  400                  *exip = exi;
 371  401  
 372  402                  VN_RELE(*vpp);
 373  403                  *vpp = vp;
 374  404          }
 375  405  
 376  406          return (0);
 377  407  }
 378  408  
  
    | 
      ↓ open down ↓ | 
    275 lines elided | 
    
      ↑ open up ↑ | 
  
 379  409  /*
 380  410   * Given mounted "dvp" and "exi", go upper mountpoint
 381  411   * with dvp/exi correction
 382  412   * Return 0 in success
 383  413   */
 384  414  int
 385  415  rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
 386  416  {
 387  417          struct exportinfo *exi;
 388  418          vnode_t *dvp = *dvpp;
      419 +        vnode_t *zone_rootvp;
 389  420  
 390      -        ASSERT(dvp->v_flag & VROOT);
      421 +        zone_rootvp = (*exip)->exi_ne->exi_root->exi_vp;
      422 +        ASSERT((dvp->v_flag & VROOT) || VN_CMP(zone_rootvp, dvp));
 391  423  
 392  424          VN_HOLD(dvp);
 393      -        dvp = untraverse(dvp);
      425 +        dvp = untraverse(dvp, zone_rootvp);
 394  426          exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
 395  427          if (exi == NULL) {
 396  428                  VN_RELE(dvp);
 397  429                  return (-1);
 398  430          }
 399  431  
      432 +        ASSERT3U(exi->exi_zoneid, ==, (*exip)->exi_zoneid);
 400  433          exi_rele(*exip);
 401  434          *exip = exi;
 402  435          VN_RELE(*dvpp);
 403  436          *dvpp = dvp;
 404  437  
 405  438          return (0);
 406  439  }
 407  440  /*
 408  441   * Directory lookup.
 409  442   * Returns an fhandle and file attributes for file name in a directory.
 410  443   */
 411  444  /* ARGSUSED */
 412  445  void
 413  446  rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
 414  447      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 415  448  {
 416  449          int error;
 417  450          vnode_t *dvp;
 418  451          vnode_t *vp;
 419  452          struct vattr va;
 420  453          fhandle_t *fhp = da->da_fhandle;
 421  454          struct sec_ol sec = {0, 0};
 422  455          bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 423  456          char *name;
 424  457          struct sockaddr *ca;
 425  458  
 426  459          /*
 427  460           * Trusted Extension doesn't support NFSv2. MOUNT
 428  461           * will reject v2 clients. Need to prevent v2 client
 429  462           * access via WebNFS here.
 430  463           */
 431  464          if (is_system_labeled() && req->rq_vers == 2) {
 432  465                  dr->dr_status = NFSERR_ACCES;
 433  466                  return;
 434  467          }
 435  468  
 436  469          /*
 437  470           * Disallow NULL paths
 438  471           */
  
    | 
      ↓ open down ↓ | 
    29 lines elided | 
    
      ↑ open up ↑ | 
  
 439  472          if (da->da_name == NULL || *da->da_name == '\0') {
 440  473                  dr->dr_status = NFSERR_ACCES;
 441  474                  return;
 442  475          }
 443  476  
 444  477          /*
 445  478           * Allow lookups from the root - the default
 446  479           * location of the public filehandle.
 447  480           */
 448  481          if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 449      -                dvp = rootdir;
      482 +                dvp = ZONE_ROOTVP();
 450  483                  VN_HOLD(dvp);
 451  484          } else {
 452  485                  dvp = nfs_fhtovp(fhp, exi);
 453  486                  if (dvp == NULL) {
 454  487                          dr->dr_status = NFSERR_STALE;
 455  488                          return;
 456  489                  }
 457  490          }
 458  491  
 459  492          exi_hold(exi);
      493 +        ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
 460  494  
 461  495          /*
 462  496           * Not allow lookup beyond root.
 463  497           * If the filehandle matches a filehandle of the exi,
 464  498           * then the ".." refers beyond the root of an exported filesystem.
 465  499           */
 466  500          if (strcmp(da->da_name, "..") == 0 &&
 467  501              EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
 468  502                  if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
 469      -                    (dvp->v_flag & VROOT)) {
      503 +                    ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
 470  504                          /*
 471  505                           * special case for ".." and 'nohide'exported root
 472  506                           */
 473  507                          if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
 474  508                                  error = NFSERR_ACCES;
 475  509                                  goto out;
 476  510                          }
 477  511                  } else  {
 478  512                          error = NFSERR_NOENT;
 479  513                          goto out;
 480  514                  }
 481  515          }
 482  516  
 483  517          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 484  518          name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
 485  519              MAXPATHLEN);
 486  520  
 487  521          if (name == NULL) {
 488  522                  error = NFSERR_ACCES;
 489  523                  goto out;
 490  524          }
 491  525  
 492  526          /*
 493  527           * If the public filehandle is used then allow
 494  528           * a multi-component lookup, i.e. evaluate
  
    | 
      ↓ open down ↓ | 
    15 lines elided | 
    
      ↑ open up ↑ | 
  
 495  529           * a pathname and follow symbolic links if
 496  530           * necessary.
 497  531           *
 498  532           * This may result in a vnode in another filesystem
 499  533           * which is OK as long as the filesystem is exported.
 500  534           */
 501  535          if (PUBLIC_FH2(fhp)) {
 502  536                  publicfh_flag = TRUE;
 503  537  
 504  538                  exi_rele(exi);
      539 +                exi = NULL;
 505  540  
 506  541                  error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
 507  542                      &sec);
 508  543          } else {
 509  544                  /*
 510  545                   * Do a normal single component lookup.
 511  546                   */
 512  547                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
 513  548                      NULL, NULL, NULL);
 514  549          }
 515  550  
 516  551          if (name != da->da_name)
 517  552                  kmem_free(name, MAXPATHLEN);
 518  553  
 519  554          if (error == 0 && vn_ismntpt(vp)) {
 520  555                  error = rfs_cross_mnt(&vp, &exi);
 521  556                  if (error)
 522  557                          VN_RELE(vp);
 523  558          }
 524  559  
 525  560          if (!error) {
 526  561                  va.va_mask = AT_ALL;    /* we want everything */
 527  562  
 528  563                  error = rfs4_delegated_getattr(vp, &va, 0, cr);
 529  564  
 530  565                  /* check for overflows */
 531  566                  if (!error) {
 532  567                          acl_perm(vp, exi, &va, cr);
 533  568                          error = vattr_to_nattr(&va, &dr->dr_attr);
 534  569                          if (!error) {
 535  570                                  if (sec.sec_flags & SEC_QUERY)
 536  571                                          error = makefh_ol(&dr->dr_fhandle, exi,
 537  572                                              sec.sec_index);
 538  573                                  else {
 539  574                                          error = makefh(&dr->dr_fhandle, vp,
 540  575                                              exi);
 541  576                                          if (!error && publicfh_flag &&
 542  577                                              !chk_clnt_sec(exi, req))
 543  578                                                  auth_weak = TRUE;
 544  579                                  }
 545  580                          }
 546  581                  }
 547  582                  VN_RELE(vp);
 548  583          }
 549  584  
 550  585  out:
 551  586          VN_RELE(dvp);
 552  587  
 553  588          if (exi != NULL)
 554  589                  exi_rele(exi);
 555  590  
 556  591          /*
 557  592           * If it's public fh, no 0x81, and client's flavor is
 558  593           * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 559  594           * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 560  595           */
 561  596          if (auth_weak)
 562  597                  dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
 563  598          else
 564  599                  dr->dr_status = puterrno(error);
 565  600  }
 566  601  void *
 567  602  rfs_lookup_getfh(struct nfsdiropargs *da)
 568  603  {
 569  604          return (da->da_fhandle);
 570  605  }
 571  606  
 572  607  /*
 573  608   * Read symbolic link.
 574  609   * Returns the string in the symbolic link at the given fhandle.
 575  610   */
 576  611  /* ARGSUSED */
 577  612  void
 578  613  rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
 579  614      struct svc_req *req, cred_t *cr, bool_t ro)
 580  615  {
 581  616          int error;
 582  617          struct iovec iov;
 583  618          struct uio uio;
 584  619          vnode_t *vp;
 585  620          struct vattr va;
 586  621          struct sockaddr *ca;
 587  622          char *name = NULL;
 588  623          int is_referral = 0;
 589  624  
 590  625          vp = nfs_fhtovp(fhp, exi);
 591  626          if (vp == NULL) {
 592  627                  rl->rl_data = NULL;
 593  628                  rl->rl_status = NFSERR_STALE;
 594  629                  return;
 595  630          }
 596  631  
 597  632          va.va_mask = AT_MODE;
 598  633  
 599  634          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 600  635  
 601  636          if (error) {
 602  637                  VN_RELE(vp);
 603  638                  rl->rl_data = NULL;
 604  639                  rl->rl_status = puterrno(error);
 605  640                  return;
 606  641          }
 607  642  
 608  643          if (MANDLOCK(vp, va.va_mode)) {
 609  644                  VN_RELE(vp);
 610  645                  rl->rl_data = NULL;
 611  646                  rl->rl_status = NFSERR_ACCES;
 612  647                  return;
 613  648          }
 614  649  
 615  650          /* We lied about the object type for a referral */
 616  651          if (vn_is_nfs_reparse(vp, cr))
 617  652                  is_referral = 1;
 618  653  
 619  654          /*
 620  655           * XNFS and RFC1094 require us to return ENXIO if argument
 621  656           * is not a link. BUGID 1138002.
 622  657           */
 623  658          if (vp->v_type != VLNK && !is_referral) {
 624  659                  VN_RELE(vp);
 625  660                  rl->rl_data = NULL;
 626  661                  rl->rl_status = NFSERR_NXIO;
 627  662                  return;
  
    | 
      ↓ open down ↓ | 
    113 lines elided | 
    
      ↑ open up ↑ | 
  
 628  663          }
 629  664  
 630  665          /*
 631  666           * Allocate data for pathname.  This will be freed by rfs_rlfree.
 632  667           */
 633  668          rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
 634  669  
 635  670          if (is_referral) {
 636  671                  char *s;
 637  672                  size_t strsz;
      673 +                kstat_named_t *stat =
      674 +                    exi->exi_ne->ne_globals->svstat[NFS_VERSION];
 638  675  
 639  676                  /* Get an artificial symlink based on a referral */
 640  677                  s = build_symlink(vp, cr, &strsz);
 641      -                global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
      678 +                stat[NFS_REFERLINKS].value.ui64++;
 642  679                  DTRACE_PROBE2(nfs2serv__func__referral__reflink,
 643  680                      vnode_t *, vp, char *, s);
 644  681                  if (s == NULL)
 645  682                          error = EINVAL;
 646  683                  else {
 647  684                          error = 0;
 648  685                          (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
 649  686                          rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
 650  687                          kmem_free(s, strsz);
 651  688                  }
 652  689  
 653  690          } else {
 654  691  
 655  692                  /*
 656  693                   * Set up io vector to read sym link data
 657  694                   */
 658  695                  iov.iov_base = rl->rl_data;
 659  696                  iov.iov_len = NFS_MAXPATHLEN;
 660  697                  uio.uio_iov = &iov;
 661  698                  uio.uio_iovcnt = 1;
 662  699                  uio.uio_segflg = UIO_SYSSPACE;
 663  700                  uio.uio_extflg = UIO_COPY_CACHED;
 664  701                  uio.uio_loffset = (offset_t)0;
 665  702                  uio.uio_resid = NFS_MAXPATHLEN;
 666  703  
 667  704                  /*
 668  705                   * Do the readlink.
 669  706                   */
 670  707                  error = VOP_READLINK(vp, &uio, cr, NULL);
 671  708  
 672  709                  rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
 673  710  
 674  711                  if (!error)
 675  712                          rl->rl_data[rl->rl_count] = '\0';
 676  713  
 677  714          }
 678  715  
 679  716  
 680  717          VN_RELE(vp);
 681  718  
 682  719          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 683  720          name = nfscmd_convname(ca, exi, rl->rl_data,
 684  721              NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
 685  722  
 686  723          if (name != NULL && name != rl->rl_data) {
 687  724                  kmem_free(rl->rl_data, NFS_MAXPATHLEN);
 688  725                  rl->rl_data = name;
 689  726          }
 690  727  
 691  728          /*
 692  729           * XNFS and RFC1094 require us to return ENXIO if argument
 693  730           * is not a link. UFS returns EINVAL if this is the case,
 694  731           * so we do the mapping here. BUGID 1138002.
 695  732           */
 696  733          if (error == EINVAL)
 697  734                  rl->rl_status = NFSERR_NXIO;
 698  735          else
 699  736                  rl->rl_status = puterrno(error);
 700  737  
 701  738  }
 702  739  void *
 703  740  rfs_readlink_getfh(fhandle_t *fhp)
 704  741  {
 705  742          return (fhp);
 706  743  }
 707  744  /*
 708  745   * Free data allocated by rfs_readlink
 709  746   */
 710  747  void
 711  748  rfs_rlfree(struct nfsrdlnres *rl)
 712  749  {
 713  750          if (rl->rl_data != NULL)
 714  751                  kmem_free(rl->rl_data, NFS_MAXPATHLEN);
 715  752  }
 716  753  
 717  754  static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
 718  755  
 719  756  /*
 720  757   * Read data.
 721  758   * Returns some data read from the file at the given fhandle.
 722  759   */
 723  760  /* ARGSUSED */
 724  761  void
 725  762  rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
 726  763      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 727  764  {
 728  765          vnode_t *vp;
 729  766          int error;
 730  767          struct vattr va;
 731  768          struct iovec iov;
 732  769          struct uio uio;
 733  770          mblk_t *mp;
 734  771          int alloc_err = 0;
 735  772          int in_crit = 0;
 736  773          caller_context_t ct;
 737  774  
 738  775          vp = nfs_fhtovp(&ra->ra_fhandle, exi);
 739  776          if (vp == NULL) {
 740  777                  rr->rr_data = NULL;
 741  778                  rr->rr_status = NFSERR_STALE;
 742  779                  return;
 743  780          }
 744  781  
 745  782          if (vp->v_type != VREG) {
 746  783                  VN_RELE(vp);
 747  784                  rr->rr_data = NULL;
 748  785                  rr->rr_status = NFSERR_ISDIR;
 749  786                  return;
 750  787          }
 751  788  
 752  789          ct.cc_sysid = 0;
 753  790          ct.cc_pid = 0;
 754  791          ct.cc_caller_id = nfs2_srv_caller_id;
 755  792          ct.cc_flags = CC_DONTBLOCK;
 756  793  
 757  794          /*
 758  795           * Enter the critical region before calling VOP_RWLOCK
 759  796           * to avoid a deadlock with write requests.
 760  797           */
 761  798          if (nbl_need_check(vp)) {
 762  799                  nbl_start_crit(vp, RW_READER);
 763  800                  if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
 764  801                      0, NULL)) {
 765  802                          nbl_end_crit(vp);
 766  803                          VN_RELE(vp);
 767  804                          rr->rr_data = NULL;
  
    | 
      ↓ open down ↓ | 
    116 lines elided | 
    
      ↑ open up ↑ | 
  
 768  805                          rr->rr_status = NFSERR_ACCES;
 769  806                          return;
 770  807                  }
 771  808                  in_crit = 1;
 772  809          }
 773  810  
 774  811          error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
 775  812  
 776  813          /* check if a monitor detected a delegation conflict */
 777  814          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
      815 +                if (in_crit)
      816 +                        nbl_end_crit(vp);
 778  817                  VN_RELE(vp);
 779  818                  /* mark as wouldblock so response is dropped */
 780  819                  curthread->t_flag |= T_WOULDBLOCK;
 781  820  
 782  821                  rr->rr_data = NULL;
 783  822                  return;
 784  823          }
 785  824  
 786  825          va.va_mask = AT_ALL;
 787  826  
 788  827          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 789  828  
 790  829          if (error) {
 791  830                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 792  831                  if (in_crit)
 793  832                          nbl_end_crit(vp);
 794  833  
 795  834                  VN_RELE(vp);
 796  835                  rr->rr_data = NULL;
 797  836                  rr->rr_status = puterrno(error);
 798  837  
 799  838                  return;
 800  839          }
 801  840  
 802  841          /*
 803  842           * This is a kludge to allow reading of files created
 804  843           * with no read permission.  The owner of the file
 805  844           * is always allowed to read it.
 806  845           */
 807  846          if (crgetuid(cr) != va.va_uid) {
 808  847                  error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
 809  848  
 810  849                  if (error) {
 811  850                          /*
 812  851                           * Exec is the same as read over the net because
 813  852                           * of demand loading.
 814  853                           */
 815  854                          error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
 816  855                  }
 817  856                  if (error) {
 818  857                          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 819  858                          if (in_crit)
 820  859                                  nbl_end_crit(vp);
 821  860                          VN_RELE(vp);
 822  861                          rr->rr_data = NULL;
 823  862                          rr->rr_status = puterrno(error);
 824  863  
 825  864                          return;
 826  865                  }
 827  866          }
 828  867  
 829  868          if (MANDLOCK(vp, va.va_mode)) {
 830  869                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 831  870                  if (in_crit)
 832  871                          nbl_end_crit(vp);
 833  872  
 834  873                  VN_RELE(vp);
 835  874                  rr->rr_data = NULL;
 836  875                  rr->rr_status = NFSERR_ACCES;
 837  876  
 838  877                  return;
 839  878          }
 840  879  
 841  880          rr->rr_ok.rrok_wlist_len = 0;
 842  881          rr->rr_ok.rrok_wlist = NULL;
 843  882  
 844  883          if ((u_offset_t)ra->ra_offset >= va.va_size) {
 845  884                  rr->rr_count = 0;
 846  885                  rr->rr_data = NULL;
 847  886                  /*
 848  887                   * In this case, status is NFS_OK, but there is no data
 849  888                   * to encode. So set rr_mp to NULL.
 850  889                   */
 851  890                  rr->rr_mp = NULL;
 852  891                  rr->rr_ok.rrok_wlist = ra->ra_wlist;
 853  892                  if (rr->rr_ok.rrok_wlist)
 854  893                          clist_zero_len(rr->rr_ok.rrok_wlist);
 855  894                  goto done;
 856  895          }
 857  896  
 858  897          if (ra->ra_wlist) {
 859  898                  mp = NULL;
 860  899                  rr->rr_mp = NULL;
 861  900                  (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
 862  901                  if (ra->ra_count > iov.iov_len) {
 863  902                          rr->rr_data = NULL;
 864  903                          rr->rr_status = NFSERR_INVAL;
 865  904                          goto done;
 866  905                  }
 867  906          } else {
 868  907                  /*
 869  908                   * mp will contain the data to be sent out in the read reply.
 870  909                   * This will be freed after the reply has been sent out (by the
 871  910                   * driver).
 872  911                   * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
 873  912                   * that the call to xdrmblk_putmblk() never fails.
 874  913                   */
 875  914                  mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
 876  915                      &alloc_err);
 877  916                  ASSERT(mp != NULL);
 878  917                  ASSERT(alloc_err == 0);
 879  918  
 880  919                  rr->rr_mp = mp;
 881  920  
 882  921                  /*
 883  922                   * Set up io vector
 884  923                   */
 885  924                  iov.iov_base = (caddr_t)mp->b_datap->db_base;
 886  925                  iov.iov_len = ra->ra_count;
 887  926          }
 888  927  
 889  928          uio.uio_iov = &iov;
 890  929          uio.uio_iovcnt = 1;
 891  930          uio.uio_segflg = UIO_SYSSPACE;
 892  931          uio.uio_extflg = UIO_COPY_CACHED;
 893  932          uio.uio_loffset = (offset_t)ra->ra_offset;
 894  933          uio.uio_resid = ra->ra_count;
 895  934  
 896  935          error = VOP_READ(vp, &uio, 0, cr, &ct);
 897  936  
 898  937          if (error) {
 899  938                  if (mp)
 900  939                          freeb(mp);
 901  940  
 902  941                  /*
 903  942                   * check if a monitor detected a delegation conflict and
 904  943                   * mark as wouldblock so response is dropped
 905  944                   */
 906  945                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
 907  946                          curthread->t_flag |= T_WOULDBLOCK;
 908  947                  else
 909  948                          rr->rr_status = puterrno(error);
 910  949  
 911  950                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 912  951                  if (in_crit)
 913  952                          nbl_end_crit(vp);
 914  953  
 915  954                  VN_RELE(vp);
 916  955                  rr->rr_data = NULL;
 917  956  
 918  957                  return;
 919  958          }
 920  959  
 921  960          /*
 922  961           * Get attributes again so we can send the latest access
 923  962           * time to the client side for its cache.
 924  963           */
 925  964          va.va_mask = AT_ALL;
 926  965  
 927  966          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 928  967  
 929  968          if (error) {
 930  969                  if (mp)
 931  970                          freeb(mp);
 932  971  
 933  972                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 934  973                  if (in_crit)
 935  974                          nbl_end_crit(vp);
 936  975  
 937  976                  VN_RELE(vp);
 938  977                  rr->rr_data = NULL;
 939  978                  rr->rr_status = puterrno(error);
 940  979  
 941  980                  return;
 942  981          }
 943  982  
 944  983          rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
 945  984  
 946  985          if (mp) {
 947  986                  rr->rr_data = (char *)mp->b_datap->db_base;
 948  987          } else {
 949  988                  if (ra->ra_wlist) {
 950  989                          rr->rr_data = (caddr_t)iov.iov_base;
 951  990                          if (!rdma_setup_read_data2(ra, rr)) {
 952  991                                  rr->rr_data = NULL;
 953  992                                  rr->rr_status = puterrno(NFSERR_INVAL);
 954  993                          }
 955  994                  }
 956  995          }
 957  996  done:
 958  997          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 959  998          if (in_crit)
 960  999                  nbl_end_crit(vp);
 961 1000  
 962 1001          acl_perm(vp, exi, &va, cr);
 963 1002  
 964 1003          /* check for overflows */
 965 1004          error = vattr_to_nattr(&va, &rr->rr_attr);
 966 1005  
 967 1006          VN_RELE(vp);
 968 1007  
 969 1008          rr->rr_status = puterrno(error);
 970 1009  }
 971 1010  
 972 1011  /*
 973 1012   * Free data allocated by rfs_read
 974 1013   */
 975 1014  void
 976 1015  rfs_rdfree(struct nfsrdresult *rr)
 977 1016  {
 978 1017          mblk_t *mp;
 979 1018  
 980 1019          if (rr->rr_status == NFS_OK) {
 981 1020                  mp = rr->rr_mp;
 982 1021                  if (mp != NULL)
 983 1022                          freeb(mp);
 984 1023          }
 985 1024  }
 986 1025  
 987 1026  void *
 988 1027  rfs_read_getfh(struct nfsreadargs *ra)
 989 1028  {
 990 1029          return (&ra->ra_fhandle);
 991 1030  }
 992 1031  
 993 1032  #define MAX_IOVECS      12
 994 1033  
 995 1034  #ifdef DEBUG
 996 1035  static int rfs_write_sync_hits = 0;
 997 1036  static int rfs_write_sync_misses = 0;
 998 1037  #endif
 999 1038  
1000 1039  /*
1001 1040   * Write data to file.
1002 1041   * Returns attributes of a file after writing some data to it.
1003 1042   *
1004 1043   * Any changes made here, especially in error handling might have
1005 1044   * to also be done in rfs_write (which clusters write requests).
1006 1045   */
1007 1046  /* ARGSUSED */
1008 1047  void
1009 1048  rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1010 1049      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1011 1050  {
1012 1051          int error;
1013 1052          vnode_t *vp;
1014 1053          rlim64_t rlimit;
1015 1054          struct vattr va;
1016 1055          struct uio uio;
1017 1056          struct iovec iov[MAX_IOVECS];
1018 1057          mblk_t *m;
1019 1058          struct iovec *iovp;
1020 1059          int iovcnt;
1021 1060          cred_t *savecred;
1022 1061          int in_crit = 0;
1023 1062          caller_context_t ct;
1024 1063  
1025 1064          vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1026 1065          if (vp == NULL) {
1027 1066                  ns->ns_status = NFSERR_STALE;
1028 1067                  return;
1029 1068          }
1030 1069  
1031 1070          if (rdonly(ro, vp)) {
1032 1071                  VN_RELE(vp);
1033 1072                  ns->ns_status = NFSERR_ROFS;
1034 1073                  return;
1035 1074          }
1036 1075  
1037 1076          if (vp->v_type != VREG) {
1038 1077                  VN_RELE(vp);
1039 1078                  ns->ns_status = NFSERR_ISDIR;
1040 1079                  return;
1041 1080          }
1042 1081  
1043 1082          ct.cc_sysid = 0;
1044 1083          ct.cc_pid = 0;
1045 1084          ct.cc_caller_id = nfs2_srv_caller_id;
1046 1085          ct.cc_flags = CC_DONTBLOCK;
1047 1086  
1048 1087          va.va_mask = AT_UID|AT_MODE;
1049 1088  
1050 1089          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1051 1090  
1052 1091          if (error) {
1053 1092                  VN_RELE(vp);
1054 1093                  ns->ns_status = puterrno(error);
1055 1094  
1056 1095                  return;
1057 1096          }
1058 1097  
1059 1098          if (crgetuid(cr) != va.va_uid) {
1060 1099                  /*
1061 1100                   * This is a kludge to allow writes of files created
1062 1101                   * with read only permission.  The owner of the file
1063 1102                   * is always allowed to write it.
1064 1103                   */
1065 1104                  error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1066 1105  
1067 1106                  if (error) {
1068 1107                          VN_RELE(vp);
1069 1108                          ns->ns_status = puterrno(error);
1070 1109                          return;
1071 1110                  }
1072 1111          }
1073 1112  
1074 1113          /*
1075 1114           * Can't access a mandatory lock file.  This might cause
1076 1115           * the NFS service thread to block forever waiting for a
1077 1116           * lock to be released that will never be released.
1078 1117           */
1079 1118          if (MANDLOCK(vp, va.va_mode)) {
1080 1119                  VN_RELE(vp);
1081 1120                  ns->ns_status = NFSERR_ACCES;
1082 1121                  return;
1083 1122          }
1084 1123  
1085 1124          /*
1086 1125           * We have to enter the critical region before calling VOP_RWLOCK
1087 1126           * to avoid a deadlock with ufs.
1088 1127           */
1089 1128          if (nbl_need_check(vp)) {
1090 1129                  nbl_start_crit(vp, RW_READER);
1091 1130                  in_crit = 1;
1092 1131                  if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
  
    | 
      ↓ open down ↓ | 
    305 lines elided | 
    
      ↑ open up ↑ | 
  
1093 1132                      wa->wa_count, 0, NULL)) {
1094 1133                          error = EACCES;
1095 1134                          goto out;
1096 1135                  }
1097 1136          }
1098 1137  
1099 1138          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1100 1139  
1101 1140          /* check if a monitor detected a delegation conflict */
1102 1141          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1103      -                VN_RELE(vp);
1104      -                /* mark as wouldblock so response is dropped */
1105      -                curthread->t_flag |= T_WOULDBLOCK;
1106      -                return;
     1142 +                goto out;
1107 1143          }
1108 1144  
1109 1145          if (wa->wa_data || wa->wa_rlist) {
1110 1146                  /* Do the RDMA thing if necessary */
1111 1147                  if (wa->wa_rlist) {
1112 1148                          iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1113 1149                          iov[0].iov_len = wa->wa_count;
1114 1150                  } else  {
1115 1151                          iov[0].iov_base = wa->wa_data;
1116 1152                          iov[0].iov_len = wa->wa_count;
1117 1153                  }
1118 1154                  uio.uio_iov = iov;
1119 1155                  uio.uio_iovcnt = 1;
1120 1156                  uio.uio_segflg = UIO_SYSSPACE;
1121 1157                  uio.uio_extflg = UIO_COPY_DEFAULT;
1122 1158                  uio.uio_loffset = (offset_t)wa->wa_offset;
1123 1159                  uio.uio_resid = wa->wa_count;
1124 1160                  /*
1125 1161                   * The limit is checked on the client. We
1126 1162                   * should allow any size writes here.
1127 1163                   */
1128 1164                  uio.uio_llimit = curproc->p_fsz_ctl;
1129 1165                  rlimit = uio.uio_llimit - wa->wa_offset;
1130 1166                  if (rlimit < (rlim64_t)uio.uio_resid)
1131 1167                          uio.uio_resid = (uint_t)rlimit;
1132 1168  
1133 1169                  /*
1134 1170                   * for now we assume no append mode
1135 1171                   */
  
    | 
      ↓ open down ↓ | 
    19 lines elided | 
    
      ↑ open up ↑ | 
  
1136 1172                  /*
1137 1173                   * We're changing creds because VM may fault and we need
1138 1174                   * the cred of the current thread to be used if quota
1139 1175                   * checking is enabled.
1140 1176                   */
1141 1177                  savecred = curthread->t_cred;
1142 1178                  curthread->t_cred = cr;
1143 1179                  error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1144 1180                  curthread->t_cred = savecred;
1145 1181          } else {
     1182 +
1146 1183                  iovcnt = 0;
1147 1184                  for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1148 1185                          iovcnt++;
1149 1186                  if (iovcnt <= MAX_IOVECS) {
1150 1187  #ifdef DEBUG
1151 1188                          rfs_write_sync_hits++;
1152 1189  #endif
1153 1190                          iovp = iov;
1154 1191                  } else {
1155 1192  #ifdef DEBUG
1156 1193                          rfs_write_sync_misses++;
1157 1194  #endif
1158 1195                          iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1159 1196                  }
1160 1197                  mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1161 1198                  uio.uio_iov = iovp;
1162 1199                  uio.uio_iovcnt = iovcnt;
1163 1200                  uio.uio_segflg = UIO_SYSSPACE;
1164 1201                  uio.uio_extflg = UIO_COPY_DEFAULT;
1165 1202                  uio.uio_loffset = (offset_t)wa->wa_offset;
1166 1203                  uio.uio_resid = wa->wa_count;
1167 1204                  /*
1168 1205                   * The limit is checked on the client. We
1169 1206                   * should allow any size writes here.
1170 1207                   */
1171 1208                  uio.uio_llimit = curproc->p_fsz_ctl;
1172 1209                  rlimit = uio.uio_llimit - wa->wa_offset;
1173 1210                  if (rlimit < (rlim64_t)uio.uio_resid)
1174 1211                          uio.uio_resid = (uint_t)rlimit;
1175 1212  
1176 1213                  /*
1177 1214                   * For now we assume no append mode.
1178 1215                   */
1179 1216                  /*
1180 1217                   * We're changing creds because VM may fault and we need
1181 1218                   * the cred of the current thread to be used if quota
1182 1219                   * checking is enabled.
1183 1220                   */
1184 1221                  savecred = curthread->t_cred;
1185 1222                  curthread->t_cred = cr;
1186 1223                  error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1187 1224                  curthread->t_cred = savecred;
1188 1225  
1189 1226                  if (iovp != iov)
1190 1227                          kmem_free(iovp, sizeof (*iovp) * iovcnt);
1191 1228          }
1192 1229  
1193 1230          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1194 1231  
1195 1232          if (!error) {
1196 1233                  /*
1197 1234                   * Get attributes again so we send the latest mod
1198 1235                   * time to the client side for its cache.
1199 1236                   */
1200 1237                  va.va_mask = AT_ALL;    /* now we want everything */
1201 1238  
1202 1239                  error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1203 1240  
1204 1241                  /* check for overflows */
1205 1242                  if (!error) {
1206 1243                          acl_perm(vp, exi, &va, cr);
1207 1244                          error = vattr_to_nattr(&va, &ns->ns_attr);
1208 1245                  }
1209 1246          }
1210 1247  
1211 1248  out:
1212 1249          if (in_crit)
1213 1250                  nbl_end_crit(vp);
1214 1251          VN_RELE(vp);
1215 1252  
1216 1253          /* check if a monitor detected a delegation conflict */
1217 1254          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1218 1255                  /* mark as wouldblock so response is dropped */
1219 1256                  curthread->t_flag |= T_WOULDBLOCK;
1220 1257          else
1221 1258                  ns->ns_status = puterrno(error);
1222 1259  
1223 1260  }
1224 1261  
1225 1262  struct rfs_async_write {
1226 1263          struct nfswriteargs *wa;
1227 1264          struct nfsattrstat *ns;
1228 1265          struct svc_req *req;
1229 1266          cred_t *cr;
1230 1267          bool_t ro;
1231 1268          kthread_t *thread;
1232 1269          struct rfs_async_write *list;
1233 1270  };
1234 1271  
1235 1272  struct rfs_async_write_list {
1236 1273          fhandle_t *fhp;
1237 1274          kcondvar_t cv;
1238 1275          struct rfs_async_write *list;
1239 1276          struct rfs_async_write_list *next;
1240 1277  };
1241 1278  
1242 1279  static struct rfs_async_write_list *rfs_async_write_head = NULL;
1243 1280  static kmutex_t rfs_async_write_lock;
1244 1281  static int rfs_write_async = 1; /* enables write clustering if == 1 */
1245 1282  
1246 1283  #define MAXCLIOVECS     42
1247 1284  #define RFSWRITE_INITVAL (enum nfsstat) -1
1248 1285  
1249 1286  #ifdef DEBUG
1250 1287  static int rfs_write_hits = 0;
1251 1288  static int rfs_write_misses = 0;
1252 1289  #endif
1253 1290  
1254 1291  /*
1255 1292   * Write data to file.
1256 1293   * Returns attributes of a file after writing some data to it.
1257 1294   */
1258 1295  void
1259 1296  rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1260 1297      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1261 1298  {
1262 1299          int error;
1263 1300          vnode_t *vp;
1264 1301          rlim64_t rlimit;
1265 1302          struct vattr va;
1266 1303          struct uio uio;
1267 1304          struct rfs_async_write_list *lp;
1268 1305          struct rfs_async_write_list *nlp;
1269 1306          struct rfs_async_write *rp;
1270 1307          struct rfs_async_write *nrp;
1271 1308          struct rfs_async_write *trp;
1272 1309          struct rfs_async_write *lrp;
1273 1310          int data_written;
1274 1311          int iovcnt;
1275 1312          mblk_t *m;
1276 1313          struct iovec *iovp;
1277 1314          struct iovec *niovp;
1278 1315          struct iovec iov[MAXCLIOVECS];
  
    | 
      ↓ open down ↓ | 
    123 lines elided | 
    
      ↑ open up ↑ | 
  
1279 1316          int count;
1280 1317          int rcount;
1281 1318          uint_t off;
1282 1319          uint_t len;
1283 1320          struct rfs_async_write nrpsp;
1284 1321          struct rfs_async_write_list nlpsp;
1285 1322          ushort_t t_flag;
1286 1323          cred_t *savecred;
1287 1324          int in_crit = 0;
1288 1325          caller_context_t ct;
     1326 +        nfs_srv_t *nsrv;
1289 1327  
1290      -        if (!rfs_write_async) {
     1328 +        ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
     1329 +        nsrv = nfs_get_srv();
     1330 +        if (!nsrv->write_async) {
1291 1331                  rfs_write_sync(wa, ns, exi, req, cr, ro);
1292 1332                  return;
1293 1333          }
1294 1334  
1295 1335          /*
1296 1336           * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1297 1337           * is considered an OK.
1298 1338           */
1299 1339          ns->ns_status = RFSWRITE_INITVAL;
1300 1340  
1301 1341          nrp = &nrpsp;
1302 1342          nrp->wa = wa;
1303 1343          nrp->ns = ns;
1304 1344          nrp->req = req;
  
    | 
      ↓ open down ↓ | 
    4 lines elided | 
    
      ↑ open up ↑ | 
  
1305 1345          nrp->cr = cr;
1306 1346          nrp->ro = ro;
1307 1347          nrp->thread = curthread;
1308 1348  
1309 1349          ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1310 1350  
1311 1351          /*
1312 1352           * Look to see if there is already a cluster started
1313 1353           * for this file.
1314 1354           */
1315      -        mutex_enter(&rfs_async_write_lock);
1316      -        for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
     1355 +        mutex_enter(&nsrv->async_write_lock);
     1356 +        for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1317 1357                  if (bcmp(&wa->wa_fhandle, lp->fhp,
1318 1358                      sizeof (fhandle_t)) == 0)
1319 1359                          break;
1320 1360          }
1321 1361  
1322 1362          /*
1323 1363           * If lp is non-NULL, then there is already a cluster
1324 1364           * started.  We need to place ourselves in the cluster
1325 1365           * list in the right place as determined by starting
1326 1366           * offset.  Conflicts with non-blocking mandatory locked
1327 1367           * regions will be checked when the cluster is processed.
1328 1368           */
1329 1369          if (lp != NULL) {
1330 1370                  rp = lp->list;
1331 1371                  trp = NULL;
  
    | 
      ↓ open down ↓ | 
    5 lines elided | 
    
      ↑ open up ↑ | 
  
1332 1372                  while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1333 1373                          trp = rp;
1334 1374                          rp = rp->list;
1335 1375                  }
1336 1376                  nrp->list = rp;
1337 1377                  if (trp == NULL)
1338 1378                          lp->list = nrp;
1339 1379                  else
1340 1380                          trp->list = nrp;
1341 1381                  while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1342      -                        cv_wait(&lp->cv, &rfs_async_write_lock);
1343      -                mutex_exit(&rfs_async_write_lock);
     1382 +                        cv_wait(&lp->cv, &nsrv->async_write_lock);
     1383 +                mutex_exit(&nsrv->async_write_lock);
1344 1384  
1345 1385                  return;
1346 1386          }
1347 1387  
1348 1388          /*
1349 1389           * No cluster started yet, start one and add ourselves
1350 1390           * to the list of clusters.
1351 1391           */
1352 1392          nrp->list = NULL;
1353 1393  
1354 1394          nlp = &nlpsp;
1355 1395          nlp->fhp = &wa->wa_fhandle;
1356 1396          cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1357 1397          nlp->list = nrp;
1358 1398          nlp->next = NULL;
1359 1399  
1360      -        if (rfs_async_write_head == NULL) {
1361      -                rfs_async_write_head = nlp;
     1400 +        if (nsrv->async_write_head == NULL) {
     1401 +                nsrv->async_write_head = nlp;
1362 1402          } else {
1363      -                lp = rfs_async_write_head;
     1403 +                lp = nsrv->async_write_head;
1364 1404                  while (lp->next != NULL)
1365 1405                          lp = lp->next;
1366 1406                  lp->next = nlp;
1367 1407          }
1368      -        mutex_exit(&rfs_async_write_lock);
     1408 +        mutex_exit(&nsrv->async_write_lock);
1369 1409  
1370 1410          /*
1371 1411           * Convert the file handle common to all of the requests
1372 1412           * in this cluster to a vnode.
1373 1413           */
1374 1414          vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1375 1415          if (vp == NULL) {
1376      -                mutex_enter(&rfs_async_write_lock);
1377      -                if (rfs_async_write_head == nlp)
1378      -                        rfs_async_write_head = nlp->next;
     1416 +                mutex_enter(&nsrv->async_write_lock);
     1417 +                if (nsrv->async_write_head == nlp)
     1418 +                        nsrv->async_write_head = nlp->next;
1379 1419                  else {
1380      -                        lp = rfs_async_write_head;
     1420 +                        lp = nsrv->async_write_head;
1381 1421                          while (lp->next != nlp)
1382 1422                                  lp = lp->next;
1383 1423                          lp->next = nlp->next;
1384 1424                  }
1385 1425                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1386 1426                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1387 1427                          rp->ns->ns_status = NFSERR_STALE;
1388 1428                          rp->thread->t_flag |= t_flag;
1389 1429                  }
1390 1430                  cv_broadcast(&nlp->cv);
1391      -                mutex_exit(&rfs_async_write_lock);
     1431 +                mutex_exit(&nsrv->async_write_lock);
1392 1432  
1393 1433                  return;
1394 1434          }
1395 1435  
1396 1436          /*
1397 1437           * Can only write regular files.  Attempts to write any
1398 1438           * other file types fail with EISDIR.
1399 1439           */
1400 1440          if (vp->v_type != VREG) {
1401 1441                  VN_RELE(vp);
1402      -                mutex_enter(&rfs_async_write_lock);
1403      -                if (rfs_async_write_head == nlp)
1404      -                        rfs_async_write_head = nlp->next;
     1442 +                mutex_enter(&nsrv->async_write_lock);
     1443 +                if (nsrv->async_write_head == nlp)
     1444 +                        nsrv->async_write_head = nlp->next;
1405 1445                  else {
1406      -                        lp = rfs_async_write_head;
     1446 +                        lp = nsrv->async_write_head;
1407 1447                          while (lp->next != nlp)
1408 1448                                  lp = lp->next;
1409 1449                          lp->next = nlp->next;
1410 1450                  }
1411 1451                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1412 1452                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1413 1453                          rp->ns->ns_status = NFSERR_ISDIR;
1414 1454                          rp->thread->t_flag |= t_flag;
1415 1455                  }
1416 1456                  cv_broadcast(&nlp->cv);
1417      -                mutex_exit(&rfs_async_write_lock);
     1457 +                mutex_exit(&nsrv->async_write_lock);
1418 1458  
1419 1459                  return;
1420 1460          }
1421 1461  
1422 1462          /*
1423 1463           * Enter the critical region before calling VOP_RWLOCK, to avoid a
1424 1464           * deadlock with ufs.
1425 1465           */
1426 1466          if (nbl_need_check(vp)) {
1427 1467                  nbl_start_crit(vp, RW_READER);
1428 1468                  in_crit = 1;
1429 1469          }
1430 1470  
1431 1471          ct.cc_sysid = 0;
1432 1472          ct.cc_pid = 0;
1433 1473          ct.cc_caller_id = nfs2_srv_caller_id;
1434 1474          ct.cc_flags = CC_DONTBLOCK;
1435 1475  
1436 1476          /*
1437 1477           * Lock the file for writing.  This operation provides
1438 1478           * the delay which allows clusters to grow.
  
    | 
      ↓ open down ↓ | 
    11 lines elided | 
    
      ↑ open up ↑ | 
  
1439 1479           */
1440 1480          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1441 1481  
1442 1482          /* check if a monitor detected a delegation conflict */
1443 1483          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1444 1484                  if (in_crit)
1445 1485                          nbl_end_crit(vp);
1446 1486                  VN_RELE(vp);
1447 1487                  /* mark as wouldblock so response is dropped */
1448 1488                  curthread->t_flag |= T_WOULDBLOCK;
1449      -                mutex_enter(&rfs_async_write_lock);
1450      -                if (rfs_async_write_head == nlp)
1451      -                        rfs_async_write_head = nlp->next;
     1489 +                mutex_enter(&nsrv->async_write_lock);
     1490 +                if (nsrv->async_write_head == nlp)
     1491 +                        nsrv->async_write_head = nlp->next;
1452 1492                  else {
1453      -                        lp = rfs_async_write_head;
     1493 +                        lp = nsrv->async_write_head;
1454 1494                          while (lp->next != nlp)
1455 1495                                  lp = lp->next;
1456 1496                          lp->next = nlp->next;
1457 1497                  }
1458 1498                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1459 1499                          if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1460 1500                                  rp->ns->ns_status = puterrno(error);
1461 1501                                  rp->thread->t_flag |= T_WOULDBLOCK;
1462 1502                          }
1463 1503                  }
1464 1504                  cv_broadcast(&nlp->cv);
1465      -                mutex_exit(&rfs_async_write_lock);
     1505 +                mutex_exit(&nsrv->async_write_lock);
1466 1506  
1467 1507                  return;
1468 1508          }
1469 1509  
1470 1510          /*
1471 1511           * Disconnect this cluster from the list of clusters.
1472 1512           * The cluster that is being dealt with must be fixed
1473 1513           * in size after this point, so there is no reason
1474 1514           * to leave it on the list so that new requests can
1475 1515           * find it.
1476 1516           *
  
    | 
      ↓ open down ↓ | 
    1 lines elided | 
    
      ↑ open up ↑ | 
  
1477 1517           * The algorithm is that the first write request will
1478 1518           * create a cluster, convert the file handle to a
1479 1519           * vnode pointer, and then lock the file for writing.
1480 1520           * This request is not likely to be clustered with
1481 1521           * any others.  However, the next request will create
1482 1522           * a new cluster and be blocked in VOP_RWLOCK while
1483 1523           * the first request is being processed.  This delay
1484 1524           * will allow more requests to be clustered in this
1485 1525           * second cluster.
1486 1526           */
1487      -        mutex_enter(&rfs_async_write_lock);
1488      -        if (rfs_async_write_head == nlp)
1489      -                rfs_async_write_head = nlp->next;
     1527 +        mutex_enter(&nsrv->async_write_lock);
     1528 +        if (nsrv->async_write_head == nlp)
     1529 +                nsrv->async_write_head = nlp->next;
1490 1530          else {
1491      -                lp = rfs_async_write_head;
     1531 +                lp = nsrv->async_write_head;
1492 1532                  while (lp->next != nlp)
1493 1533                          lp = lp->next;
1494 1534                  lp->next = nlp->next;
1495 1535          }
1496      -        mutex_exit(&rfs_async_write_lock);
     1536 +        mutex_exit(&nsrv->async_write_lock);
1497 1537  
1498 1538          /*
1499 1539           * Step through the list of requests in this cluster.
1500 1540           * We need to check permissions to make sure that all
1501 1541           * of the requests have sufficient permission to write
1502 1542           * the file.  A cluster can be composed of requests
1503 1543           * from different clients and different users on each
1504 1544           * client.
1505 1545           *
1506 1546           * As a side effect, we also calculate the size of the
1507 1547           * byte range that this cluster encompasses.
1508 1548           */
1509 1549          rp = nlp->list;
1510 1550          off = rp->wa->wa_offset;
1511 1551          len = (uint_t)0;
1512 1552          do {
1513 1553                  if (rdonly(rp->ro, vp)) {
1514 1554                          rp->ns->ns_status = NFSERR_ROFS;
1515 1555                          t_flag = curthread->t_flag & T_WOULDBLOCK;
1516 1556                          rp->thread->t_flag |= t_flag;
1517 1557                          continue;
1518 1558                  }
1519 1559  
1520 1560                  va.va_mask = AT_UID|AT_MODE;
1521 1561  
1522 1562                  error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1523 1563  
1524 1564                  if (!error) {
1525 1565                          if (crgetuid(rp->cr) != va.va_uid) {
1526 1566                                  /*
1527 1567                                   * This is a kludge to allow writes of files
1528 1568                                   * created with read only permission.  The
1529 1569                                   * owner of the file is always allowed to
1530 1570                                   * write it.
1531 1571                                   */
1532 1572                                  error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1533 1573                          }
1534 1574                          if (!error && MANDLOCK(vp, va.va_mode))
1535 1575                                  error = EACCES;
1536 1576                  }
1537 1577  
1538 1578                  /*
1539 1579                   * Check for a conflict with a nbmand-locked region.
1540 1580                   */
1541 1581                  if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1542 1582                      rp->wa->wa_count, 0, NULL)) {
1543 1583                          error = EACCES;
1544 1584                  }
1545 1585  
1546 1586                  if (error) {
1547 1587                          rp->ns->ns_status = puterrno(error);
1548 1588                          t_flag = curthread->t_flag & T_WOULDBLOCK;
1549 1589                          rp->thread->t_flag |= t_flag;
1550 1590                          continue;
1551 1591                  }
1552 1592                  if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1553 1593                          len = rp->wa->wa_offset + rp->wa->wa_count - off;
1554 1594          } while ((rp = rp->list) != NULL);
1555 1595  
1556 1596          /*
1557 1597           * Step through the cluster attempting to gather as many
1558 1598           * requests which are contiguous as possible.  These
1559 1599           * contiguous requests are handled via one call to VOP_WRITE
1560 1600           * instead of different calls to VOP_WRITE.  We also keep
1561 1601           * track of the fact that any data was written.
1562 1602           */
1563 1603          rp = nlp->list;
1564 1604          data_written = 0;
1565 1605          do {
1566 1606                  /*
1567 1607                   * Skip any requests which are already marked as having an
1568 1608                   * error.
1569 1609                   */
1570 1610                  if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1571 1611                          rp = rp->list;
1572 1612                          continue;
1573 1613                  }
1574 1614  
1575 1615                  /*
1576 1616                   * Count the number of iovec's which are required
1577 1617                   * to handle this set of requests.  One iovec is
1578 1618                   * needed for each data buffer, whether addressed
1579 1619                   * by wa_data or by the b_rptr pointers in the
1580 1620                   * mblk chains.
1581 1621                   */
1582 1622                  iovcnt = 0;
1583 1623                  lrp = rp;
1584 1624                  for (;;) {
1585 1625                          if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1586 1626                                  iovcnt++;
1587 1627                          else {
1588 1628                                  m = lrp->wa->wa_mblk;
1589 1629                                  while (m != NULL) {
1590 1630                                          iovcnt++;
1591 1631                                          m = m->b_cont;
1592 1632                                  }
1593 1633                          }
1594 1634                          if (lrp->list == NULL ||
1595 1635                              lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1596 1636                              lrp->wa->wa_offset + lrp->wa->wa_count !=
1597 1637                              lrp->list->wa->wa_offset) {
1598 1638                                  lrp = lrp->list;
1599 1639                                  break;
1600 1640                          }
1601 1641                          lrp = lrp->list;
1602 1642                  }
1603 1643  
1604 1644                  if (iovcnt <= MAXCLIOVECS) {
1605 1645  #ifdef DEBUG
1606 1646                          rfs_write_hits++;
1607 1647  #endif
1608 1648                          niovp = iov;
1609 1649                  } else {
1610 1650  #ifdef DEBUG
1611 1651                          rfs_write_misses++;
1612 1652  #endif
1613 1653                          niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1614 1654                  }
1615 1655                  /*
1616 1656                   * Put together the scatter/gather iovecs.
1617 1657                   */
1618 1658                  iovp = niovp;
1619 1659                  trp = rp;
1620 1660                  count = 0;
1621 1661                  do {
1622 1662                          if (trp->wa->wa_data || trp->wa->wa_rlist) {
1623 1663                                  if (trp->wa->wa_rlist) {
1624 1664                                          iovp->iov_base =
1625 1665                                              (char *)((trp->wa->wa_rlist)->
1626 1666                                              u.c_daddr3);
1627 1667                                          iovp->iov_len = trp->wa->wa_count;
1628 1668                                  } else  {
1629 1669                                          iovp->iov_base = trp->wa->wa_data;
1630 1670                                          iovp->iov_len = trp->wa->wa_count;
1631 1671                                  }
1632 1672                                  iovp++;
1633 1673                          } else {
1634 1674                                  m = trp->wa->wa_mblk;
1635 1675                                  rcount = trp->wa->wa_count;
1636 1676                                  while (m != NULL) {
1637 1677                                          iovp->iov_base = (caddr_t)m->b_rptr;
1638 1678                                          iovp->iov_len = (m->b_wptr - m->b_rptr);
1639 1679                                          rcount -= iovp->iov_len;
1640 1680                                          if (rcount < 0)
1641 1681                                                  iovp->iov_len += rcount;
1642 1682                                          iovp++;
1643 1683                                          if (rcount <= 0)
1644 1684                                                  break;
1645 1685                                          m = m->b_cont;
1646 1686                                  }
1647 1687                          }
1648 1688                          count += trp->wa->wa_count;
1649 1689                          trp = trp->list;
1650 1690                  } while (trp != lrp);
1651 1691  
1652 1692                  uio.uio_iov = niovp;
1653 1693                  uio.uio_iovcnt = iovcnt;
1654 1694                  uio.uio_segflg = UIO_SYSSPACE;
1655 1695                  uio.uio_extflg = UIO_COPY_DEFAULT;
1656 1696                  uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1657 1697                  uio.uio_resid = count;
1658 1698                  /*
1659 1699                   * The limit is checked on the client. We
1660 1700                   * should allow any size writes here.
1661 1701                   */
1662 1702                  uio.uio_llimit = curproc->p_fsz_ctl;
1663 1703                  rlimit = uio.uio_llimit - rp->wa->wa_offset;
1664 1704                  if (rlimit < (rlim64_t)uio.uio_resid)
1665 1705                          uio.uio_resid = (uint_t)rlimit;
1666 1706  
1667 1707                  /*
1668 1708                   * For now we assume no append mode.
1669 1709                   */
1670 1710  
1671 1711                  /*
1672 1712                   * We're changing creds because VM may fault
1673 1713                   * and we need the cred of the current
1674 1714                   * thread to be used if quota * checking is
1675 1715                   * enabled.
1676 1716                   */
1677 1717                  savecred = curthread->t_cred;
1678 1718                  curthread->t_cred = cr;
1679 1719                  error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1680 1720                  curthread->t_cred = savecred;
1681 1721  
1682 1722                  /* check if a monitor detected a delegation conflict */
1683 1723                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1684 1724                          /* mark as wouldblock so response is dropped */
1685 1725                          curthread->t_flag |= T_WOULDBLOCK;
1686 1726  
1687 1727                  if (niovp != iov)
1688 1728                          kmem_free(niovp, sizeof (*niovp) * iovcnt);
1689 1729  
1690 1730                  if (!error) {
1691 1731                          data_written = 1;
1692 1732                          /*
1693 1733                           * Get attributes again so we send the latest mod
1694 1734                           * time to the client side for its cache.
1695 1735                           */
1696 1736                          va.va_mask = AT_ALL;    /* now we want everything */
1697 1737  
1698 1738                          error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1699 1739  
1700 1740                          if (!error)
1701 1741                                  acl_perm(vp, exi, &va, rp->cr);
1702 1742                  }
1703 1743  
1704 1744                  /*
1705 1745                   * Fill in the status responses for each request
1706 1746                   * which was just handled.  Also, copy the latest
1707 1747                   * attributes in to the attribute responses if
1708 1748                   * appropriate.
1709 1749                   */
1710 1750                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1711 1751                  do {
1712 1752                          rp->thread->t_flag |= t_flag;
1713 1753                          /* check for overflows */
1714 1754                          if (!error) {
1715 1755                                  error  = vattr_to_nattr(&va, &rp->ns->ns_attr);
1716 1756                          }
1717 1757                          rp->ns->ns_status = puterrno(error);
1718 1758                          rp = rp->list;
1719 1759                  } while (rp != lrp);
1720 1760          } while (rp != NULL);
1721 1761  
1722 1762          /*
1723 1763           * If any data was written at all, then we need to flush
1724 1764           * the data and metadata to stable storage.
1725 1765           */
1726 1766          if (data_written) {
1727 1767                  error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1728 1768  
1729 1769                  if (!error) {
1730 1770                          error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
  
    | 
      ↓ open down ↓ | 
    224 lines elided | 
    
      ↑ open up ↑ | 
  
1731 1771                  }
1732 1772          }
1733 1773  
1734 1774          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1735 1775  
1736 1776          if (in_crit)
1737 1777                  nbl_end_crit(vp);
1738 1778          VN_RELE(vp);
1739 1779  
1740 1780          t_flag = curthread->t_flag & T_WOULDBLOCK;
1741      -        mutex_enter(&rfs_async_write_lock);
     1781 +        mutex_enter(&nsrv->async_write_lock);
1742 1782          for (rp = nlp->list; rp != NULL; rp = rp->list) {
1743 1783                  if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1744 1784                          rp->ns->ns_status = puterrno(error);
1745 1785                          rp->thread->t_flag |= t_flag;
1746 1786                  }
1747 1787          }
1748 1788          cv_broadcast(&nlp->cv);
1749      -        mutex_exit(&rfs_async_write_lock);
     1789 +        mutex_exit(&nsrv->async_write_lock);
1750 1790  
1751 1791  }
1752 1792  
1753 1793  void *
1754 1794  rfs_write_getfh(struct nfswriteargs *wa)
1755 1795  {
1756 1796          return (&wa->wa_fhandle);
1757 1797  }
1758 1798  
1759 1799  /*
1760 1800   * Create a file.
1761 1801   * Creates a file with given attributes and returns those attributes
1762 1802   * and an fhandle for the new file.
1763 1803   */
1764 1804  void
1765 1805  rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1766 1806      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1767 1807  {
1768 1808          int error;
1769 1809          int lookuperr;
1770 1810          int in_crit = 0;
1771 1811          struct vattr va;
1772 1812          vnode_t *vp;
1773 1813          vnode_t *realvp;
1774 1814          vnode_t *dvp;
1775 1815          char *name = args->ca_da.da_name;
1776 1816          vnode_t *tvp = NULL;
1777 1817          int mode;
1778 1818          int lookup_ok;
1779 1819          bool_t trunc;
1780 1820          struct sockaddr *ca;
1781 1821  
1782 1822          /*
1783 1823           * Disallow NULL paths
1784 1824           */
1785 1825          if (name == NULL || *name == '\0') {
1786 1826                  dr->dr_status = NFSERR_ACCES;
1787 1827                  return;
1788 1828          }
1789 1829  
1790 1830          dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1791 1831          if (dvp == NULL) {
1792 1832                  dr->dr_status = NFSERR_STALE;
1793 1833                  return;
1794 1834          }
1795 1835  
1796 1836          error = sattr_to_vattr(args->ca_sa, &va);
1797 1837          if (error) {
1798 1838                  dr->dr_status = puterrno(error);
1799 1839                  return;
1800 1840          }
1801 1841  
1802 1842          /*
1803 1843           * Must specify the mode.
1804 1844           */
1805 1845          if (!(va.va_mask & AT_MODE)) {
1806 1846                  VN_RELE(dvp);
1807 1847                  dr->dr_status = NFSERR_INVAL;
1808 1848                  return;
1809 1849          }
1810 1850  
1811 1851          /*
1812 1852           * This is a completely gross hack to make mknod
1813 1853           * work over the wire until we can wack the protocol
1814 1854           */
1815 1855          if ((va.va_mode & IFMT) == IFCHR) {
1816 1856                  if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1817 1857                          va.va_type = VFIFO;     /* xtra kludge for named pipe */
1818 1858                  else {
1819 1859                          va.va_type = VCHR;
1820 1860                          /*
1821 1861                           * uncompress the received dev_t
1822 1862                           * if the top half is zero indicating a request
1823 1863                           * from an `older style' OS.
1824 1864                           */
1825 1865                          if ((va.va_size & 0xffff0000) == 0)
1826 1866                                  va.va_rdev = nfsv2_expdev(va.va_size);
1827 1867                          else
1828 1868                                  va.va_rdev = (dev_t)va.va_size;
1829 1869                  }
1830 1870                  va.va_mask &= ~AT_SIZE;
1831 1871          } else if ((va.va_mode & IFMT) == IFBLK) {
1832 1872                  va.va_type = VBLK;
1833 1873                  /*
1834 1874                   * uncompress the received dev_t
1835 1875                   * if the top half is zero indicating a request
1836 1876                   * from an `older style' OS.
1837 1877                   */
1838 1878                  if ((va.va_size & 0xffff0000) == 0)
1839 1879                          va.va_rdev = nfsv2_expdev(va.va_size);
1840 1880                  else
1841 1881                          va.va_rdev = (dev_t)va.va_size;
1842 1882                  va.va_mask &= ~AT_SIZE;
1843 1883          } else if ((va.va_mode & IFMT) == IFSOCK) {
1844 1884                  va.va_type = VSOCK;
1845 1885          } else {
1846 1886                  va.va_type = VREG;
1847 1887          }
1848 1888          va.va_mode &= ~IFMT;
1849 1889          va.va_mask |= AT_TYPE;
1850 1890  
1851 1891          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1852 1892          name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1853 1893              MAXPATHLEN);
1854 1894          if (name == NULL) {
1855 1895                  dr->dr_status = puterrno(EINVAL);
1856 1896                  return;
1857 1897          }
1858 1898  
1859 1899          /*
1860 1900           * Why was the choice made to use VWRITE as the mode to the
1861 1901           * call to VOP_CREATE ? This results in a bug.  When a client
1862 1902           * opens a file that already exists and is RDONLY, the second
1863 1903           * open fails with an EACESS because of the mode.
1864 1904           * bug ID 1054648.
1865 1905           */
1866 1906          lookup_ok = 0;
1867 1907          mode = VWRITE;
1868 1908          if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1869 1909                  error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1870 1910                      NULL, NULL, NULL);
1871 1911                  if (!error) {
1872 1912                          struct vattr at;
1873 1913  
1874 1914                          lookup_ok = 1;
1875 1915                          at.va_mask = AT_MODE;
1876 1916                          error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1877 1917                          if (!error)
1878 1918                                  mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1879 1919                          VN_RELE(tvp);
1880 1920                          tvp = NULL;
1881 1921                  }
1882 1922          }
1883 1923  
1884 1924          if (!lookup_ok) {
1885 1925                  if (rdonly(ro, dvp)) {
1886 1926                          error = EROFS;
1887 1927                  } else if (va.va_type != VREG && va.va_type != VFIFO &&
1888 1928                      va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1889 1929                          error = EPERM;
1890 1930                  } else {
1891 1931                          error = 0;
1892 1932                  }
1893 1933          }
1894 1934  
1895 1935          /*
1896 1936           * If file size is being modified on an already existing file
1897 1937           * make sure that there are no conflicting non-blocking mandatory
1898 1938           * locks in the region being manipulated. Return EACCES if there
1899 1939           * are conflicting locks.
1900 1940           */
1901 1941          if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1902 1942                  lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1903 1943                      NULL, NULL, NULL);
1904 1944  
1905 1945                  if (!lookuperr &&
1906 1946                      rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1907 1947                          VN_RELE(tvp);
1908 1948                          curthread->t_flag |= T_WOULDBLOCK;
1909 1949                          goto out;
1910 1950                  }
1911 1951  
1912 1952                  if (!lookuperr && nbl_need_check(tvp)) {
1913 1953                          /*
1914 1954                           * The file exists. Now check if it has any
1915 1955                           * conflicting non-blocking mandatory locks
1916 1956                           * in the region being changed.
1917 1957                           */
1918 1958                          struct vattr bva;
1919 1959                          u_offset_t offset;
1920 1960                          ssize_t length;
1921 1961  
1922 1962                          nbl_start_crit(tvp, RW_READER);
1923 1963                          in_crit = 1;
1924 1964  
1925 1965                          bva.va_mask = AT_SIZE;
1926 1966                          error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1927 1967                          if (!error) {
1928 1968                                  if (va.va_size < bva.va_size) {
1929 1969                                          offset = va.va_size;
1930 1970                                          length = bva.va_size - va.va_size;
1931 1971                                  } else {
1932 1972                                          offset = bva.va_size;
1933 1973                                          length = va.va_size - bva.va_size;
1934 1974                                  }
1935 1975                                  if (length) {
1936 1976                                          if (nbl_conflict(tvp, NBL_WRITE,
1937 1977                                              offset, length, 0, NULL)) {
1938 1978                                                  error = EACCES;
1939 1979                                          }
1940 1980                                  }
1941 1981                          }
1942 1982                          if (error) {
1943 1983                                  nbl_end_crit(tvp);
1944 1984                                  VN_RELE(tvp);
1945 1985                                  in_crit = 0;
1946 1986                          }
1947 1987                  } else if (tvp != NULL) {
1948 1988                          VN_RELE(tvp);
1949 1989                  }
1950 1990          }
1951 1991  
1952 1992          if (!error) {
1953 1993                  /*
1954 1994                   * If filesystem is shared with nosuid the remove any
1955 1995                   * setuid/setgid bits on create.
1956 1996                   */
1957 1997                  if (va.va_type == VREG &&
1958 1998                      exi->exi_export.ex_flags & EX_NOSUID)
1959 1999                          va.va_mode &= ~(VSUID | VSGID);
1960 2000  
1961 2001                  error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1962 2002                      NULL, NULL);
1963 2003  
1964 2004                  if (!error) {
1965 2005  
1966 2006                          if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1967 2007                                  trunc = TRUE;
1968 2008                          else
1969 2009                                  trunc = FALSE;
1970 2010  
1971 2011                          if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1972 2012                                  VN_RELE(vp);
1973 2013                                  curthread->t_flag |= T_WOULDBLOCK;
1974 2014                                  goto out;
1975 2015                          }
1976 2016                          va.va_mask = AT_ALL;
1977 2017  
1978 2018                          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1979 2019  
1980 2020                          /* check for overflows */
1981 2021                          if (!error) {
1982 2022                                  acl_perm(vp, exi, &va, cr);
1983 2023                                  error = vattr_to_nattr(&va, &dr->dr_attr);
1984 2024                                  if (!error) {
1985 2025                                          error = makefh(&dr->dr_fhandle, vp,
1986 2026                                              exi);
1987 2027                                  }
1988 2028                          }
1989 2029                          /*
1990 2030                           * Force modified metadata out to stable storage.
1991 2031                           *
1992 2032                           * if a underlying vp exists, pass it to VOP_FSYNC
1993 2033                           */
1994 2034                          if (VOP_REALVP(vp, &realvp, NULL) == 0)
1995 2035                                  (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1996 2036                          else
1997 2037                                  (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1998 2038                          VN_RELE(vp);
1999 2039                  }
2000 2040  
2001 2041                  if (in_crit) {
2002 2042                          nbl_end_crit(tvp);
2003 2043                          VN_RELE(tvp);
2004 2044                  }
2005 2045          }
2006 2046  
2007 2047          /*
2008 2048           * Force modified data and metadata out to stable storage.
2009 2049           */
2010 2050          (void) VOP_FSYNC(dvp, 0, cr, NULL);
2011 2051  
2012 2052  out:
2013 2053  
2014 2054          VN_RELE(dvp);
2015 2055  
2016 2056          dr->dr_status = puterrno(error);
2017 2057  
2018 2058          if (name != args->ca_da.da_name)
2019 2059                  kmem_free(name, MAXPATHLEN);
2020 2060  }
2021 2061  void *
2022 2062  rfs_create_getfh(struct nfscreatargs *args)
2023 2063  {
2024 2064          return (args->ca_da.da_fhandle);
2025 2065  }
2026 2066  
2027 2067  /*
2028 2068   * Remove a file.
2029 2069   * Remove named file from parent directory.
2030 2070   */
2031 2071  /* ARGSUSED */
2032 2072  void
2033 2073  rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2034 2074      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2035 2075  {
2036 2076          int error = 0;
2037 2077          vnode_t *vp;
2038 2078          vnode_t *targvp;
2039 2079          int in_crit = 0;
2040 2080  
2041 2081          /*
2042 2082           * Disallow NULL paths
2043 2083           */
2044 2084          if (da->da_name == NULL || *da->da_name == '\0') {
2045 2085                  *status = NFSERR_ACCES;
2046 2086                  return;
2047 2087          }
2048 2088  
2049 2089          vp = nfs_fhtovp(da->da_fhandle, exi);
2050 2090          if (vp == NULL) {
2051 2091                  *status = NFSERR_STALE;
2052 2092                  return;
2053 2093          }
2054 2094  
2055 2095          if (rdonly(ro, vp)) {
2056 2096                  VN_RELE(vp);
2057 2097                  *status = NFSERR_ROFS;
2058 2098                  return;
2059 2099          }
2060 2100  
2061 2101          /*
2062 2102           * Check for a conflict with a non-blocking mandatory share reservation.
2063 2103           */
2064 2104          error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2065 2105              NULL, cr, NULL, NULL, NULL);
2066 2106          if (error != 0) {
2067 2107                  VN_RELE(vp);
2068 2108                  *status = puterrno(error);
2069 2109                  return;
2070 2110          }
2071 2111  
2072 2112          /*
2073 2113           * If the file is delegated to an v4 client, then initiate
2074 2114           * recall and drop this request (by setting T_WOULDBLOCK).
2075 2115           * The client will eventually re-transmit the request and
2076 2116           * (hopefully), by then, the v4 client will have returned
2077 2117           * the delegation.
2078 2118           */
2079 2119  
2080 2120          if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2081 2121                  VN_RELE(vp);
2082 2122                  VN_RELE(targvp);
2083 2123                  curthread->t_flag |= T_WOULDBLOCK;
2084 2124                  return;
2085 2125          }
2086 2126  
2087 2127          if (nbl_need_check(targvp)) {
2088 2128                  nbl_start_crit(targvp, RW_READER);
2089 2129                  in_crit = 1;
2090 2130                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2091 2131                          error = EACCES;
2092 2132                          goto out;
2093 2133                  }
2094 2134          }
2095 2135  
2096 2136          error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2097 2137  
2098 2138          /*
2099 2139           * Force modified data and metadata out to stable storage.
2100 2140           */
2101 2141          (void) VOP_FSYNC(vp, 0, cr, NULL);
2102 2142  
2103 2143  out:
2104 2144          if (in_crit)
2105 2145                  nbl_end_crit(targvp);
2106 2146          VN_RELE(targvp);
2107 2147          VN_RELE(vp);
2108 2148  
2109 2149          *status = puterrno(error);
2110 2150  
2111 2151  }
2112 2152  
2113 2153  void *
2114 2154  rfs_remove_getfh(struct nfsdiropargs *da)
2115 2155  {
2116 2156          return (da->da_fhandle);
2117 2157  }
2118 2158  
2119 2159  /*
2120 2160   * rename a file
2121 2161   * Give a file (from) a new name (to).
2122 2162   */
2123 2163  /* ARGSUSED */
2124 2164  void
2125 2165  rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2126 2166      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2127 2167  {
2128 2168          int error = 0;
2129 2169          vnode_t *fromvp;
2130 2170          vnode_t *tovp;
2131 2171          struct exportinfo *to_exi;
2132 2172          fhandle_t *fh;
2133 2173          vnode_t *srcvp;
2134 2174          vnode_t *targvp;
2135 2175          int in_crit = 0;
2136 2176  
2137 2177          fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2138 2178          if (fromvp == NULL) {
2139 2179                  *status = NFSERR_STALE;
2140 2180                  return;
2141 2181          }
2142 2182  
2143 2183          fh = args->rna_to.da_fhandle;
2144 2184          to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2145 2185          if (to_exi == NULL) {
2146 2186                  VN_RELE(fromvp);
2147 2187                  *status = NFSERR_ACCES;
2148 2188                  return;
2149 2189          }
2150 2190          exi_rele(to_exi);
2151 2191  
2152 2192          if (to_exi != exi) {
2153 2193                  VN_RELE(fromvp);
2154 2194                  *status = NFSERR_XDEV;
2155 2195                  return;
2156 2196          }
2157 2197  
2158 2198          tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2159 2199          if (tovp == NULL) {
2160 2200                  VN_RELE(fromvp);
2161 2201                  *status = NFSERR_STALE;
2162 2202                  return;
2163 2203          }
2164 2204  
2165 2205          if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2166 2206                  VN_RELE(tovp);
2167 2207                  VN_RELE(fromvp);
2168 2208                  *status = NFSERR_NOTDIR;
2169 2209                  return;
2170 2210          }
2171 2211  
2172 2212          /*
2173 2213           * Disallow NULL paths
2174 2214           */
2175 2215          if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2176 2216              args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2177 2217                  VN_RELE(tovp);
2178 2218                  VN_RELE(fromvp);
2179 2219                  *status = NFSERR_ACCES;
2180 2220                  return;
2181 2221          }
2182 2222  
2183 2223          if (rdonly(ro, tovp)) {
2184 2224                  VN_RELE(tovp);
2185 2225                  VN_RELE(fromvp);
2186 2226                  *status = NFSERR_ROFS;
2187 2227                  return;
2188 2228          }
2189 2229  
2190 2230          /*
2191 2231           * Check for a conflict with a non-blocking mandatory share reservation.
2192 2232           */
2193 2233          error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2194 2234              NULL, cr, NULL, NULL, NULL);
2195 2235          if (error != 0) {
2196 2236                  VN_RELE(tovp);
2197 2237                  VN_RELE(fromvp);
2198 2238                  *status = puterrno(error);
2199 2239                  return;
2200 2240          }
2201 2241  
2202 2242          /* Check for delegations on the source file */
2203 2243  
  
    | 
      ↓ open down ↓ | 
    444 lines elided | 
    
      ↑ open up ↑ | 
  
2204 2244          if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2205 2245                  VN_RELE(tovp);
2206 2246                  VN_RELE(fromvp);
2207 2247                  VN_RELE(srcvp);
2208 2248                  curthread->t_flag |= T_WOULDBLOCK;
2209 2249                  return;
2210 2250          }
2211 2251  
2212 2252          /* Check for delegation on the file being renamed over, if it exists */
2213 2253  
2214      -        if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
     2254 +        if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2215 2255              VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2216 2256              NULL, NULL, NULL) == 0) {
2217 2257  
2218 2258                  if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2219 2259                          VN_RELE(tovp);
2220 2260                          VN_RELE(fromvp);
2221 2261                          VN_RELE(srcvp);
2222 2262                          VN_RELE(targvp);
2223 2263                          curthread->t_flag |= T_WOULDBLOCK;
2224 2264                          return;
2225 2265                  }
2226 2266                  VN_RELE(targvp);
2227 2267          }
2228 2268  
2229 2269  
2230 2270          if (nbl_need_check(srcvp)) {
2231 2271                  nbl_start_crit(srcvp, RW_READER);
2232 2272                  in_crit = 1;
2233 2273                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2234 2274                          error = EACCES;
2235 2275                          goto out;
2236 2276                  }
2237 2277          }
2238 2278  
2239 2279          error = VOP_RENAME(fromvp, args->rna_from.da_name,
2240 2280              tovp, args->rna_to.da_name, cr, NULL, 0);
2241 2281  
2242 2282          if (error == 0)
2243 2283                  vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2244 2284                      strlen(args->rna_to.da_name));
2245 2285  
2246 2286          /*
2247 2287           * Force modified data and metadata out to stable storage.
2248 2288           */
2249 2289          (void) VOP_FSYNC(tovp, 0, cr, NULL);
2250 2290          (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2251 2291  
2252 2292  out:
2253 2293          if (in_crit)
2254 2294                  nbl_end_crit(srcvp);
2255 2295          VN_RELE(srcvp);
2256 2296          VN_RELE(tovp);
2257 2297          VN_RELE(fromvp);
2258 2298  
2259 2299          *status = puterrno(error);
2260 2300  
2261 2301  }
2262 2302  void *
2263 2303  rfs_rename_getfh(struct nfsrnmargs *args)
2264 2304  {
2265 2305          return (args->rna_from.da_fhandle);
2266 2306  }
2267 2307  
2268 2308  /*
2269 2309   * Link to a file.
2270 2310   * Create a file (to) which is a hard link to the given file (from).
2271 2311   */
2272 2312  /* ARGSUSED */
2273 2313  void
2274 2314  rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2275 2315      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2276 2316  {
2277 2317          int error;
2278 2318          vnode_t *fromvp;
2279 2319          vnode_t *tovp;
2280 2320          struct exportinfo *to_exi;
2281 2321          fhandle_t *fh;
2282 2322  
2283 2323          fromvp = nfs_fhtovp(args->la_from, exi);
2284 2324          if (fromvp == NULL) {
2285 2325                  *status = NFSERR_STALE;
2286 2326                  return;
2287 2327          }
2288 2328  
2289 2329          fh = args->la_to.da_fhandle;
2290 2330          to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2291 2331          if (to_exi == NULL) {
2292 2332                  VN_RELE(fromvp);
2293 2333                  *status = NFSERR_ACCES;
2294 2334                  return;
2295 2335          }
2296 2336          exi_rele(to_exi);
2297 2337  
2298 2338          if (to_exi != exi) {
2299 2339                  VN_RELE(fromvp);
2300 2340                  *status = NFSERR_XDEV;
2301 2341                  return;
2302 2342          }
2303 2343  
2304 2344          tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2305 2345          if (tovp == NULL) {
2306 2346                  VN_RELE(fromvp);
2307 2347                  *status = NFSERR_STALE;
2308 2348                  return;
2309 2349          }
2310 2350  
2311 2351          if (tovp->v_type != VDIR) {
2312 2352                  VN_RELE(tovp);
2313 2353                  VN_RELE(fromvp);
2314 2354                  *status = NFSERR_NOTDIR;
2315 2355                  return;
2316 2356          }
2317 2357          /*
2318 2358           * Disallow NULL paths
2319 2359           */
2320 2360          if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2321 2361                  VN_RELE(tovp);
2322 2362                  VN_RELE(fromvp);
2323 2363                  *status = NFSERR_ACCES;
2324 2364                  return;
2325 2365          }
2326 2366  
2327 2367          if (rdonly(ro, tovp)) {
2328 2368                  VN_RELE(tovp);
2329 2369                  VN_RELE(fromvp);
2330 2370                  *status = NFSERR_ROFS;
2331 2371                  return;
2332 2372          }
2333 2373  
2334 2374          error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2335 2375  
2336 2376          /*
2337 2377           * Force modified data and metadata out to stable storage.
2338 2378           */
2339 2379          (void) VOP_FSYNC(tovp, 0, cr, NULL);
2340 2380          (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2341 2381  
2342 2382          VN_RELE(tovp);
2343 2383          VN_RELE(fromvp);
2344 2384  
2345 2385          *status = puterrno(error);
2346 2386  
2347 2387  }
2348 2388  void *
2349 2389  rfs_link_getfh(struct nfslinkargs *args)
2350 2390  {
2351 2391          return (args->la_from);
2352 2392  }
2353 2393  
2354 2394  /*
2355 2395   * Symbolicly link to a file.
2356 2396   * Create a file (to) with the given attributes which is a symbolic link
2357 2397   * to the given path name (to).
2358 2398   */
2359 2399  void
2360 2400  rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2361 2401      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2362 2402  {
2363 2403          int error;
2364 2404          struct vattr va;
2365 2405          vnode_t *vp;
2366 2406          vnode_t *svp;
2367 2407          int lerror;
2368 2408          struct sockaddr *ca;
2369 2409          char *name = NULL;
2370 2410  
2371 2411          /*
2372 2412           * Disallow NULL paths
2373 2413           */
2374 2414          if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2375 2415                  *status = NFSERR_ACCES;
2376 2416                  return;
2377 2417          }
2378 2418  
2379 2419          vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2380 2420          if (vp == NULL) {
2381 2421                  *status = NFSERR_STALE;
2382 2422                  return;
2383 2423          }
2384 2424  
2385 2425          if (rdonly(ro, vp)) {
2386 2426                  VN_RELE(vp);
2387 2427                  *status = NFSERR_ROFS;
2388 2428                  return;
2389 2429          }
2390 2430  
2391 2431          error = sattr_to_vattr(args->sla_sa, &va);
2392 2432          if (error) {
2393 2433                  VN_RELE(vp);
2394 2434                  *status = puterrno(error);
2395 2435                  return;
2396 2436          }
2397 2437  
2398 2438          if (!(va.va_mask & AT_MODE)) {
2399 2439                  VN_RELE(vp);
2400 2440                  *status = NFSERR_INVAL;
2401 2441                  return;
2402 2442          }
2403 2443  
2404 2444          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2405 2445          name = nfscmd_convname(ca, exi, args->sla_tnm,
2406 2446              NFSCMD_CONV_INBOUND, MAXPATHLEN);
2407 2447  
2408 2448          if (name == NULL) {
2409 2449                  *status = NFSERR_ACCES;
2410 2450                  return;
2411 2451          }
2412 2452  
2413 2453          va.va_type = VLNK;
2414 2454          va.va_mask |= AT_TYPE;
2415 2455  
2416 2456          error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2417 2457  
2418 2458          /*
2419 2459           * Force new data and metadata out to stable storage.
2420 2460           */
2421 2461          lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2422 2462              NULL, cr, NULL, NULL, NULL);
2423 2463  
2424 2464          if (!lerror) {
2425 2465                  (void) VOP_FSYNC(svp, 0, cr, NULL);
2426 2466                  VN_RELE(svp);
2427 2467          }
2428 2468  
2429 2469          /*
2430 2470           * Force modified data and metadata out to stable storage.
2431 2471           */
2432 2472          (void) VOP_FSYNC(vp, 0, cr, NULL);
2433 2473  
2434 2474          VN_RELE(vp);
2435 2475  
2436 2476          *status = puterrno(error);
2437 2477          if (name != args->sla_tnm)
2438 2478                  kmem_free(name, MAXPATHLEN);
2439 2479  
2440 2480  }
2441 2481  void *
2442 2482  rfs_symlink_getfh(struct nfsslargs *args)
2443 2483  {
2444 2484          return (args->sla_from.da_fhandle);
2445 2485  }
2446 2486  
2447 2487  /*
2448 2488   * Make a directory.
2449 2489   * Create a directory with the given name, parent directory, and attributes.
2450 2490   * Returns a file handle and attributes for the new directory.
2451 2491   */
2452 2492  /* ARGSUSED */
2453 2493  void
2454 2494  rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2455 2495      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2456 2496  {
2457 2497          int error;
2458 2498          struct vattr va;
2459 2499          vnode_t *dvp = NULL;
2460 2500          vnode_t *vp;
2461 2501          char *name = args->ca_da.da_name;
2462 2502  
2463 2503          /*
2464 2504           * Disallow NULL paths
2465 2505           */
2466 2506          if (name == NULL || *name == '\0') {
2467 2507                  dr->dr_status = NFSERR_ACCES;
2468 2508                  return;
2469 2509          }
2470 2510  
2471 2511          vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2472 2512          if (vp == NULL) {
2473 2513                  dr->dr_status = NFSERR_STALE;
2474 2514                  return;
2475 2515          }
2476 2516  
2477 2517          if (rdonly(ro, vp)) {
2478 2518                  VN_RELE(vp);
2479 2519                  dr->dr_status = NFSERR_ROFS;
2480 2520                  return;
2481 2521          }
2482 2522  
2483 2523          error = sattr_to_vattr(args->ca_sa, &va);
2484 2524          if (error) {
2485 2525                  VN_RELE(vp);
2486 2526                  dr->dr_status = puterrno(error);
2487 2527                  return;
2488 2528          }
2489 2529  
2490 2530          if (!(va.va_mask & AT_MODE)) {
2491 2531                  VN_RELE(vp);
2492 2532                  dr->dr_status = NFSERR_INVAL;
2493 2533                  return;
2494 2534          }
2495 2535  
2496 2536          va.va_type = VDIR;
2497 2537          va.va_mask |= AT_TYPE;
2498 2538  
2499 2539          error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2500 2540  
2501 2541          if (!error) {
2502 2542                  /*
2503 2543                   * Attribtutes of the newly created directory should
2504 2544                   * be returned to the client.
2505 2545                   */
2506 2546                  va.va_mask = AT_ALL; /* We want everything */
2507 2547                  error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2508 2548  
2509 2549                  /* check for overflows */
2510 2550                  if (!error) {
2511 2551                          acl_perm(vp, exi, &va, cr);
2512 2552                          error = vattr_to_nattr(&va, &dr->dr_attr);
2513 2553                          if (!error) {
2514 2554                                  error = makefh(&dr->dr_fhandle, dvp, exi);
2515 2555                          }
2516 2556                  }
2517 2557                  /*
2518 2558                   * Force new data and metadata out to stable storage.
2519 2559                   */
2520 2560                  (void) VOP_FSYNC(dvp, 0, cr, NULL);
2521 2561                  VN_RELE(dvp);
2522 2562          }
2523 2563  
2524 2564          /*
2525 2565           * Force modified data and metadata out to stable storage.
2526 2566           */
2527 2567          (void) VOP_FSYNC(vp, 0, cr, NULL);
2528 2568  
2529 2569          VN_RELE(vp);
2530 2570  
2531 2571          dr->dr_status = puterrno(error);
2532 2572  
2533 2573  }
2534 2574  void *
2535 2575  rfs_mkdir_getfh(struct nfscreatargs *args)
2536 2576  {
2537 2577          return (args->ca_da.da_fhandle);
2538 2578  }
2539 2579  
2540 2580  /*
2541 2581   * Remove a directory.
2542 2582   * Remove the given directory name from the given parent directory.
2543 2583   */
2544 2584  /* ARGSUSED */
2545 2585  void
2546 2586  rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2547 2587      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2548 2588  {
2549 2589          int error;
2550 2590          vnode_t *vp;
2551 2591  
2552 2592          /*
2553 2593           * Disallow NULL paths
2554 2594           */
2555 2595          if (da->da_name == NULL || *da->da_name == '\0') {
2556 2596                  *status = NFSERR_ACCES;
2557 2597                  return;
2558 2598          }
2559 2599  
2560 2600          vp = nfs_fhtovp(da->da_fhandle, exi);
2561 2601          if (vp == NULL) {
2562 2602                  *status = NFSERR_STALE;
2563 2603                  return;
2564 2604          }
2565 2605  
2566 2606          if (rdonly(ro, vp)) {
2567 2607                  VN_RELE(vp);
2568 2608                  *status = NFSERR_ROFS;
2569 2609                  return;
2570 2610          }
  
    | 
      ↓ open down ↓ | 
    346 lines elided | 
    
      ↑ open up ↑ | 
  
2571 2611  
2572 2612          /*
2573 2613           * VOP_RMDIR takes a third argument (the current
2574 2614           * directory of the process).  That's because someone
2575 2615           * wants to return EINVAL if one tries to remove ".".
2576 2616           * Of course, NFS servers have no idea what their
2577 2617           * clients' current directories are.  We fake it by
2578 2618           * supplying a vnode known to exist and illegal to
2579 2619           * remove.
2580 2620           */
2581      -        error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
     2621 +        error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2582 2622  
2583 2623          /*
2584 2624           * Force modified data and metadata out to stable storage.
2585 2625           */
2586 2626          (void) VOP_FSYNC(vp, 0, cr, NULL);
2587 2627  
2588 2628          VN_RELE(vp);
2589 2629  
2590 2630          /*
2591 2631           * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2592 2632           * if the directory is not empty.  A System V NFS server
2593 2633           * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2594 2634           * over the wire.
2595 2635           */
2596 2636          if (error == EEXIST)
2597 2637                  *status = NFSERR_NOTEMPTY;
2598 2638          else
2599 2639                  *status = puterrno(error);
2600 2640  
2601 2641  }
2602 2642  void *
2603 2643  rfs_rmdir_getfh(struct nfsdiropargs *da)
2604 2644  {
2605 2645          return (da->da_fhandle);
2606 2646  }
2607 2647  
2608 2648  /* ARGSUSED */
2609 2649  void
2610 2650  rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2611 2651      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2612 2652  {
2613 2653          int error;
2614 2654          int iseof;
2615 2655          struct iovec iov;
2616 2656          struct uio uio;
2617 2657          vnode_t *vp;
2618 2658          char *ndata = NULL;
2619 2659          struct sockaddr *ca;
2620 2660          size_t nents;
2621 2661          int ret;
2622 2662  
2623 2663          vp = nfs_fhtovp(&rda->rda_fh, exi);
2624 2664          if (vp == NULL) {
2625 2665                  rd->rd_entries = NULL;
2626 2666                  rd->rd_status = NFSERR_STALE;
2627 2667                  return;
2628 2668          }
2629 2669  
2630 2670          if (vp->v_type != VDIR) {
2631 2671                  VN_RELE(vp);
2632 2672                  rd->rd_entries = NULL;
2633 2673                  rd->rd_status = NFSERR_NOTDIR;
2634 2674                  return;
2635 2675          }
2636 2676  
2637 2677          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2638 2678  
2639 2679          error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2640 2680  
2641 2681          if (error) {
2642 2682                  rd->rd_entries = NULL;
2643 2683                  goto bad;
2644 2684          }
2645 2685  
2646 2686          if (rda->rda_count == 0) {
2647 2687                  rd->rd_entries = NULL;
2648 2688                  rd->rd_size = 0;
2649 2689                  rd->rd_eof = FALSE;
2650 2690                  goto bad;
2651 2691          }
2652 2692  
2653 2693          rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2654 2694  
2655 2695          /*
2656 2696           * Allocate data for entries.  This will be freed by rfs_rddirfree.
2657 2697           */
2658 2698          rd->rd_bufsize = (uint_t)rda->rda_count;
2659 2699          rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2660 2700  
2661 2701          /*
2662 2702           * Set up io vector to read directory data
2663 2703           */
2664 2704          iov.iov_base = (caddr_t)rd->rd_entries;
2665 2705          iov.iov_len = rda->rda_count;
2666 2706          uio.uio_iov = &iov;
2667 2707          uio.uio_iovcnt = 1;
2668 2708          uio.uio_segflg = UIO_SYSSPACE;
2669 2709          uio.uio_extflg = UIO_COPY_CACHED;
2670 2710          uio.uio_loffset = (offset_t)rda->rda_offset;
2671 2711          uio.uio_resid = rda->rda_count;
2672 2712  
2673 2713          /*
2674 2714           * read directory
2675 2715           */
2676 2716          error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2677 2717  
2678 2718          /*
2679 2719           * Clean up
2680 2720           */
2681 2721          if (!error) {
2682 2722                  /*
2683 2723                   * set size and eof
2684 2724                   */
2685 2725                  if (uio.uio_resid == rda->rda_count) {
2686 2726                          rd->rd_size = 0;
2687 2727                          rd->rd_eof = TRUE;
2688 2728                  } else {
2689 2729                          rd->rd_size = (uint32_t)(rda->rda_count -
2690 2730                              uio.uio_resid);
2691 2731                          rd->rd_eof = iseof ? TRUE : FALSE;
2692 2732                  }
2693 2733          }
2694 2734  
2695 2735          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2696 2736          nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2697 2737          ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2698 2738              rda->rda_count, &ndata);
2699 2739  
2700 2740          if (ret != 0) {
2701 2741                  size_t dropbytes;
2702 2742                  /*
2703 2743                   * We had to drop one or more entries in order to fit
2704 2744                   * during the character conversion.  We need to patch
2705 2745                   * up the size and eof info.
2706 2746                   */
2707 2747                  if (rd->rd_eof)
2708 2748                          rd->rd_eof = FALSE;
2709 2749                  dropbytes = nfscmd_dropped_entrysize(
2710 2750                      (struct dirent64 *)rd->rd_entries, nents, ret);
2711 2751                  rd->rd_size -= dropbytes;
2712 2752          }
2713 2753          if (ndata == NULL) {
2714 2754                  ndata = (char *)rd->rd_entries;
2715 2755          } else if (ndata != (char *)rd->rd_entries) {
2716 2756                  kmem_free(rd->rd_entries, rd->rd_bufsize);
2717 2757                  rd->rd_entries = (void *)ndata;
2718 2758                  rd->rd_bufsize = rda->rda_count;
2719 2759          }
2720 2760  
2721 2761  bad:
2722 2762          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2723 2763  
2724 2764  #if 0 /* notyet */
2725 2765          /*
2726 2766           * Don't do this.  It causes local disk writes when just
2727 2767           * reading the file and the overhead is deemed larger
2728 2768           * than the benefit.
2729 2769           */
2730 2770          /*
2731 2771           * Force modified metadata out to stable storage.
2732 2772           */
2733 2773          (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2734 2774  #endif
2735 2775  
2736 2776          VN_RELE(vp);
2737 2777  
2738 2778          rd->rd_status = puterrno(error);
2739 2779  
2740 2780  }
2741 2781  void *
2742 2782  rfs_readdir_getfh(struct nfsrddirargs *rda)
2743 2783  {
2744 2784          return (&rda->rda_fh);
2745 2785  }
2746 2786  void
2747 2787  rfs_rddirfree(struct nfsrddirres *rd)
2748 2788  {
2749 2789          if (rd->rd_entries != NULL)
2750 2790                  kmem_free(rd->rd_entries, rd->rd_bufsize);
2751 2791  }
2752 2792  
2753 2793  /* ARGSUSED */
2754 2794  void
2755 2795  rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2756 2796      struct svc_req *req, cred_t *cr, bool_t ro)
2757 2797  {
2758 2798          int error;
2759 2799          struct statvfs64 sb;
2760 2800          vnode_t *vp;
2761 2801  
2762 2802          vp = nfs_fhtovp(fh, exi);
2763 2803          if (vp == NULL) {
2764 2804                  fs->fs_status = NFSERR_STALE;
2765 2805                  return;
2766 2806          }
2767 2807  
2768 2808          error = VFS_STATVFS(vp->v_vfsp, &sb);
2769 2809  
2770 2810          if (!error) {
2771 2811                  fs->fs_tsize = nfstsize();
2772 2812                  fs->fs_bsize = sb.f_frsize;
2773 2813                  fs->fs_blocks = sb.f_blocks;
2774 2814                  fs->fs_bfree = sb.f_bfree;
2775 2815                  fs->fs_bavail = sb.f_bavail;
2776 2816          }
2777 2817  
2778 2818          VN_RELE(vp);
2779 2819  
2780 2820          fs->fs_status = puterrno(error);
2781 2821  
2782 2822  }
2783 2823  void *
2784 2824  rfs_statfs_getfh(fhandle_t *fh)
2785 2825  {
2786 2826          return (fh);
2787 2827  }
2788 2828  
2789 2829  static int
2790 2830  sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2791 2831  {
2792 2832          vap->va_mask = 0;
2793 2833  
2794 2834          /*
2795 2835           * There was a sign extension bug in some VFS based systems
2796 2836           * which stored the mode as a short.  When it would get
2797 2837           * assigned to a u_long, no sign extension would occur.
2798 2838           * It needed to, but this wasn't noticed because sa_mode
2799 2839           * would then get assigned back to the short, thus ignoring
2800 2840           * the upper 16 bits of sa_mode.
2801 2841           *
2802 2842           * To make this implementation work for both broken
2803 2843           * clients and good clients, we check for both versions
2804 2844           * of the mode.
2805 2845           */
2806 2846          if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2807 2847              sa->sa_mode != (uint32_t)-1) {
2808 2848                  vap->va_mask |= AT_MODE;
2809 2849                  vap->va_mode = sa->sa_mode;
2810 2850          }
2811 2851          if (sa->sa_uid != (uint32_t)-1) {
2812 2852                  vap->va_mask |= AT_UID;
2813 2853                  vap->va_uid = sa->sa_uid;
2814 2854          }
2815 2855          if (sa->sa_gid != (uint32_t)-1) {
2816 2856                  vap->va_mask |= AT_GID;
2817 2857                  vap->va_gid = sa->sa_gid;
2818 2858          }
2819 2859          if (sa->sa_size != (uint32_t)-1) {
2820 2860                  vap->va_mask |= AT_SIZE;
2821 2861                  vap->va_size = sa->sa_size;
2822 2862          }
2823 2863          if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2824 2864              sa->sa_atime.tv_usec != (int32_t)-1) {
2825 2865  #ifndef _LP64
2826 2866                  /* return error if time overflow */
2827 2867                  if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2828 2868                          return (EOVERFLOW);
2829 2869  #endif
2830 2870                  vap->va_mask |= AT_ATIME;
2831 2871                  /*
2832 2872                   * nfs protocol defines times as unsigned so don't extend sign,
2833 2873                   * unless sysadmin set nfs_allow_preepoch_time.
2834 2874                   */
2835 2875                  NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2836 2876                  vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2837 2877          }
2838 2878          if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2839 2879              sa->sa_mtime.tv_usec != (int32_t)-1) {
2840 2880  #ifndef _LP64
2841 2881                  /* return error if time overflow */
2842 2882                  if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2843 2883                          return (EOVERFLOW);
2844 2884  #endif
2845 2885                  vap->va_mask |= AT_MTIME;
  
    | 
      ↓ open down ↓ | 
    254 lines elided | 
    
      ↑ open up ↑ | 
  
2846 2886                  /*
2847 2887                   * nfs protocol defines times as unsigned so don't extend sign,
2848 2888                   * unless sysadmin set nfs_allow_preepoch_time.
2849 2889                   */
2850 2890                  NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2851 2891                  vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2852 2892          }
2853 2893          return (0);
2854 2894  }
2855 2895  
2856      -static enum nfsftype vt_to_nf[] = {
     2896 +static const enum nfsftype vt_to_nf[] = {
2857 2897          0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2858 2898  };
2859 2899  
2860 2900  /*
2861 2901   * check the following fields for overflow: nodeid, size, and time.
2862 2902   * There could be a problem when converting 64-bit LP64 fields
2863 2903   * into 32-bit ones.  Return an error if there is an overflow.
2864 2904   */
2865 2905  int
2866 2906  vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2867 2907  {
2868 2908          ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2869 2909          na->na_type = vt_to_nf[vap->va_type];
2870 2910  
2871 2911          if (vap->va_mode == (unsigned short) -1)
2872 2912                  na->na_mode = (uint32_t)-1;
2873 2913          else
2874 2914                  na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2875 2915  
2876 2916          if (vap->va_uid == (unsigned short)(-1))
2877 2917                  na->na_uid = (uint32_t)(-1);
2878 2918          else if (vap->va_uid == UID_NOBODY)
2879 2919                  na->na_uid = (uint32_t)NFS_UID_NOBODY;
2880 2920          else
2881 2921                  na->na_uid = vap->va_uid;
2882 2922  
2883 2923          if (vap->va_gid == (unsigned short)(-1))
2884 2924                  na->na_gid = (uint32_t)-1;
2885 2925          else if (vap->va_gid == GID_NOBODY)
2886 2926                  na->na_gid = (uint32_t)NFS_GID_NOBODY;
2887 2927          else
2888 2928                  na->na_gid = vap->va_gid;
2889 2929  
2890 2930          /*
2891 2931           * Do we need to check fsid for overflow?  It is 64-bit in the
2892 2932           * vattr, but are bigger than 32 bit values supported?
2893 2933           */
2894 2934          na->na_fsid = vap->va_fsid;
2895 2935  
2896 2936          na->na_nodeid = vap->va_nodeid;
2897 2937  
2898 2938          /*
2899 2939           * Check to make sure that the nodeid is representable over the
2900 2940           * wire without losing bits.
2901 2941           */
2902 2942          if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2903 2943                  return (EFBIG);
2904 2944          na->na_nlink = vap->va_nlink;
2905 2945  
2906 2946          /*
2907 2947           * Check for big files here, instead of at the caller.  See
2908 2948           * comments in cstat for large special file explanation.
2909 2949           */
2910 2950          if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2911 2951                  if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2912 2952                          return (EFBIG);
2913 2953                  if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2914 2954                          /* UNKNOWN_SIZE | OVERFLOW */
2915 2955                          na->na_size = MAXOFF32_T;
2916 2956                  } else
2917 2957                          na->na_size = vap->va_size;
2918 2958          } else
2919 2959                  na->na_size = vap->va_size;
2920 2960  
2921 2961          /*
2922 2962           * If the vnode times overflow the 32-bit times that NFS2
2923 2963           * uses on the wire then return an error.
2924 2964           */
2925 2965          if (!NFS_VAP_TIME_OK(vap)) {
2926 2966                  return (EOVERFLOW);
2927 2967          }
2928 2968          na->na_atime.tv_sec = vap->va_atime.tv_sec;
2929 2969          na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2930 2970  
2931 2971          na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2932 2972          na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2933 2973  
2934 2974          na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2935 2975          na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2936 2976  
2937 2977          /*
2938 2978           * If the dev_t will fit into 16 bits then compress
2939 2979           * it, otherwise leave it alone. See comments in
2940 2980           * nfs_client.c.
2941 2981           */
2942 2982          if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2943 2983              getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2944 2984                  na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2945 2985          else
2946 2986                  (void) cmpldev(&na->na_rdev, vap->va_rdev);
2947 2987  
2948 2988          na->na_blocks = vap->va_nblocks;
2949 2989          na->na_blocksize = vap->va_blksize;
2950 2990  
2951 2991          /*
2952 2992           * This bit of ugliness is a *TEMPORARY* hack to preserve the
2953 2993           * over-the-wire protocols for named-pipe vnodes.  It remaps the
2954 2994           * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2955 2995           *
2956 2996           * BUYER BEWARE:
2957 2997           *  If you are porting the NFS to a non-Sun server, you probably
2958 2998           *  don't want to include the following block of code.  The
2959 2999           *  over-the-wire special file types will be changing with the
2960 3000           *  NFS Protocol Revision.
2961 3001           */
2962 3002          if (vap->va_type == VFIFO)
2963 3003                  NA_SETFIFO(na);
2964 3004          return (0);
2965 3005  }
2966 3006  
2967 3007  /*
2968 3008   * acl v2 support: returns approximate permission.
2969 3009   *      default: returns minimal permission (more restrictive)
2970 3010   *      aclok: returns maximal permission (less restrictive)
2971 3011   *      This routine changes the permissions that are alaredy in *va.
2972 3012   *      If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2973 3013   *      CLASS_OBJ is always the same as GROUP_OBJ entry.
2974 3014   */
2975 3015  static void
2976 3016  acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2977 3017  {
2978 3018          vsecattr_t      vsa;
2979 3019          int             aclcnt;
2980 3020          aclent_t        *aclentp;
2981 3021          mode_t          mask_perm;
2982 3022          mode_t          grp_perm;
2983 3023          mode_t          other_perm;
2984 3024          mode_t          other_orig;
2985 3025          int             error;
2986 3026  
2987 3027          /* dont care default acl */
2988 3028          vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2989 3029          error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2990 3030  
2991 3031          if (!error) {
2992 3032                  aclcnt = vsa.vsa_aclcnt;
2993 3033                  if (aclcnt > MIN_ACL_ENTRIES) {
2994 3034                          /* non-trivial ACL */
2995 3035                          aclentp = vsa.vsa_aclentp;
2996 3036                          if (exi->exi_export.ex_flags & EX_ACLOK) {
2997 3037                                  /* maximal permissions */
2998 3038                                  grp_perm = 0;
2999 3039                                  other_perm = 0;
3000 3040                                  for (; aclcnt > 0; aclcnt--, aclentp++) {
3001 3041                                          switch (aclentp->a_type) {
3002 3042                                          case USER_OBJ:
3003 3043                                                  break;
3004 3044                                          case USER:
3005 3045                                                  grp_perm |=
3006 3046                                                      aclentp->a_perm << 3;
3007 3047                                                  other_perm |= aclentp->a_perm;
3008 3048                                                  break;
3009 3049                                          case GROUP_OBJ:
3010 3050                                                  grp_perm |=
3011 3051                                                      aclentp->a_perm << 3;
3012 3052                                                  break;
3013 3053                                          case GROUP:
3014 3054                                                  other_perm |= aclentp->a_perm;
3015 3055                                                  break;
3016 3056                                          case OTHER_OBJ:
3017 3057                                                  other_orig = aclentp->a_perm;
3018 3058                                                  break;
3019 3059                                          case CLASS_OBJ:
3020 3060                                                  mask_perm = aclentp->a_perm;
3021 3061                                                  break;
3022 3062                                          default:
3023 3063                                                  break;
3024 3064                                          }
3025 3065                                  }
3026 3066                                  grp_perm &= mask_perm << 3;
3027 3067                                  other_perm &= mask_perm;
3028 3068                                  other_perm |= other_orig;
3029 3069  
3030 3070                          } else {
3031 3071                                  /* minimal permissions */
3032 3072                                  grp_perm = 070;
3033 3073                                  other_perm = 07;
3034 3074                                  for (; aclcnt > 0; aclcnt--, aclentp++) {
3035 3075                                          switch (aclentp->a_type) {
3036 3076                                          case USER_OBJ:
3037 3077                                                  break;
3038 3078                                          case USER:
3039 3079                                          case CLASS_OBJ:
3040 3080                                                  grp_perm &=
3041 3081                                                      aclentp->a_perm << 3;
3042 3082                                                  other_perm &=
3043 3083                                                      aclentp->a_perm;
3044 3084                                                  break;
3045 3085                                          case GROUP_OBJ:
3046 3086                                                  grp_perm &=
3047 3087                                                      aclentp->a_perm << 3;
3048 3088                                                  break;
3049 3089                                          case GROUP:
3050 3090                                                  other_perm &=
3051 3091                                                      aclentp->a_perm;
3052 3092                                                  break;
3053 3093                                          case OTHER_OBJ:
3054 3094                                                  other_perm &=
3055 3095                                                      aclentp->a_perm;
3056 3096                                                  break;
3057 3097                                          default:
3058 3098                                                  break;
3059 3099                                          }
3060 3100                                  }
3061 3101                          }
3062 3102                          /* copy to va */
3063 3103                          va->va_mode &= ~077;
3064 3104                          va->va_mode |= grp_perm | other_perm;
  
    | 
      ↓ open down ↓ | 
    198 lines elided | 
    
      ↑ open up ↑ | 
  
3065 3105                  }
3066 3106                  if (vsa.vsa_aclcnt)
3067 3107                          kmem_free(vsa.vsa_aclentp,
3068 3108                              vsa.vsa_aclcnt * sizeof (aclent_t));
3069 3109          }
3070 3110  }
3071 3111  
3072 3112  void
3073 3113  rfs_srvrinit(void)
3074 3114  {
3075      -        mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3076 3115          nfs2_srv_caller_id = fs_new_caller_id();
3077 3116  }
3078 3117  
3079 3118  void
3080 3119  rfs_srvrfini(void)
3081 3120  {
3082      -        mutex_destroy(&rfs_async_write_lock);
3083 3121  }
3084 3122  
     3123 +/* ARGSUSED */
     3124 +void
     3125 +rfs_srv_zone_init(nfs_globals_t *ng)
     3126 +{
     3127 +        nfs_srv_t *ns;
     3128 +
     3129 +        ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
     3130 +
     3131 +        mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
     3132 +        ns->write_async = 1;
     3133 +
     3134 +        ng->nfs_srv = ns;
     3135 +}
     3136 +
     3137 +/* ARGSUSED */
     3138 +void
     3139 +rfs_srv_zone_fini(nfs_globals_t *ng)
     3140 +{
     3141 +        nfs_srv_t *ns = ng->nfs_srv;
     3142 +
     3143 +        ng->nfs_srv = NULL;
     3144 +
     3145 +        mutex_destroy(&ns->async_write_lock);
     3146 +        kmem_free(ns, sizeof (*ns));
     3147 +}
     3148 +
3085 3149  static int
3086 3150  rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3087 3151  {
3088 3152          struct clist    *wcl;
3089 3153          int             wlist_len;
3090 3154          uint32_t        count = rr->rr_count;
3091 3155  
3092 3156          wcl = ra->ra_wlist;
3093 3157  
3094 3158          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3095 3159                  return (FALSE);
3096 3160          }
3097 3161  
3098 3162          wcl = ra->ra_wlist;
3099 3163          rr->rr_ok.rrok_wlist_len = wlist_len;
3100 3164          rr->rr_ok.rrok_wlist = wcl;
3101 3165  
3102 3166          return (TRUE);
3103 3167  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX