Print this page
    
Revert exi_zone to exi_zoneid, and install exi_ne backpointer
Caution with use after exi_rele()
Dan mods to NFS desgin problems re. multiple zone keys
curzone reality check and teardown changes to use the RIGHT zone
Try to remove assumption that zone's root vnode is marked VROOT
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  25   25   * Copyright (c) 2016 by Delphix. All rights reserved.
  26   26   */
  27   27  
  28   28  /*
  29   29   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30   30   *      All rights reserved.
  31   31   */
  32   32  
  33   33  /*
  34   34   * Copyright 2018 Nexenta Systems, Inc.
  35   35   * Copyright (c) 2016 by Delphix. All rights reserved.
  36   36   */
  37   37  
  38   38  #include <sys/param.h>
  39   39  #include <sys/types.h>
  40   40  #include <sys/systm.h>
  41   41  #include <sys/cred.h>
  42   42  #include <sys/buf.h>
  43   43  #include <sys/vfs.h>
  44   44  #include <sys/vnode.h>
  45   45  #include <sys/uio.h>
  46   46  #include <sys/stat.h>
  47   47  #include <sys/errno.h>
  48   48  #include <sys/sysmacros.h>
  49   49  #include <sys/statvfs.h>
  50   50  #include <sys/kmem.h>
  51   51  #include <sys/kstat.h>
  52   52  #include <sys/dirent.h>
  53   53  #include <sys/cmn_err.h>
  54   54  #include <sys/debug.h>
  55   55  #include <sys/vtrace.h>
  56   56  #include <sys/mode.h>
  57   57  #include <sys/acl.h>
  58   58  #include <sys/nbmlock.h>
  59   59  #include <sys/policy.h>
  60   60  #include <sys/sdt.h>
  61   61  
  62   62  #include <rpc/types.h>
  63   63  #include <rpc/auth.h>
  64   64  #include <rpc/svc.h>
  65   65  
  66   66  #include <nfs/nfs.h>
  67   67  #include <nfs/export.h>
  68   68  #include <nfs/nfs_cmd.h>
  69   69  
  70   70  #include <vm/hat.h>
  71   71  #include <vm/as.h>
  72   72  #include <vm/seg.h>
  73   73  #include <vm/seg_map.h>
  74   74  #include <vm/seg_kmem.h>
  75   75  
  76   76  #include <sys/strsubr.h>
  77   77  
  78   78  struct rfs_async_write_list;
  79   79  
  80   80  /*
  81   81   * Zone globals of NFSv2 server
  82   82   */
  83   83  typedef struct nfs_srv {
  84   84          kmutex_t                        async_write_lock;
  85   85          struct rfs_async_write_list     *async_write_head;
  86   86  
  87   87          /*
  88   88           * enables write clustering if == 1
  89   89           */
  90   90          int             write_async;
  91   91  } nfs_srv_t;
  92   92  
  93   93  /*
  94   94   * These are the interface routines for the server side of the
  95   95   * Network File System.  See the NFS version 2 protocol specification
  96   96   * for a description of this interface.
  97   97   */
  98   98  
  99   99  static int      sattr_to_vattr(struct nfssattr *, struct vattr *);
 100  100  static void     acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
 101  101                          cred_t *);
 102  102  
 103  103  
 104  104  /*
 105  105   * Some "over the wire" UNIX file types.  These are encoded
 106  106   * into the mode.  This needs to be fixed in the next rev.
 107  107   */
 108  108  #define IFMT            0170000         /* type of file */
 109  109  #define IFCHR           0020000         /* character special */
 110  110  #define IFBLK           0060000         /* block special */
 111  111  #define IFSOCK          0140000         /* socket */
 112  112  
 113  113  u_longlong_t nfs2_srv_caller_id;
 114  114  
 115  115  static nfs_srv_t *
 116  116  nfs_get_srv(void)
 117  117  {
 118  118          nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
 119  119          nfs_srv_t *srv = ng->nfs_srv;
 120  120          ASSERT(srv != NULL);
 121  121          return (srv);
 122  122  }
 123  123  
 124  124  /*
 125  125   * Get file attributes.
 126  126   * Returns the current attributes of the file with the given fhandle.
 127  127   */
 128  128  /* ARGSUSED */
 129  129  void
 130  130  rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
 131  131      struct svc_req *req, cred_t *cr, bool_t ro)
 132  132  {
 133  133          int error;
 134  134          vnode_t *vp;
 135  135          struct vattr va;
 136  136  
 137  137          vp = nfs_fhtovp(fhp, exi);
 138  138          if (vp == NULL) {
 139  139                  ns->ns_status = NFSERR_STALE;
 140  140                  return;
 141  141          }
 142  142  
 143  143          /*
 144  144           * Do the getattr.
 145  145           */
 146  146          va.va_mask = AT_ALL;    /* we want all the attributes */
 147  147  
 148  148          error = rfs4_delegated_getattr(vp, &va, 0, cr);
 149  149  
 150  150          /* check for overflows */
 151  151          if (!error) {
 152  152                  /* Lie about the object type for a referral */
 153  153                  if (vn_is_nfs_reparse(vp, cr))
 154  154                          va.va_type = VLNK;
 155  155  
 156  156                  acl_perm(vp, exi, &va, cr);
 157  157                  error = vattr_to_nattr(&va, &ns->ns_attr);
 158  158          }
 159  159  
 160  160          VN_RELE(vp);
 161  161  
 162  162          ns->ns_status = puterrno(error);
 163  163  }
 164  164  void *
 165  165  rfs_getattr_getfh(fhandle_t *fhp)
 166  166  {
 167  167          return (fhp);
 168  168  }
 169  169  
 170  170  /*
 171  171   * Set file attributes.
 172  172   * Sets the attributes of the file with the given fhandle.  Returns
 173  173   * the new attributes.
 174  174   */
 175  175  /* ARGSUSED */
 176  176  void
 177  177  rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
 178  178      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 179  179  {
 180  180          int error;
 181  181          int flag;
 182  182          int in_crit = 0;
 183  183          vnode_t *vp;
 184  184          struct vattr va;
 185  185          struct vattr bva;
 186  186          struct flock64 bf;
 187  187          caller_context_t ct;
 188  188  
 189  189  
 190  190          vp = nfs_fhtovp(&args->saa_fh, exi);
 191  191          if (vp == NULL) {
 192  192                  ns->ns_status = NFSERR_STALE;
 193  193                  return;
 194  194          }
 195  195  
 196  196          if (rdonly(ro, vp)) {
 197  197                  VN_RELE(vp);
 198  198                  ns->ns_status = NFSERR_ROFS;
 199  199                  return;
 200  200          }
 201  201  
 202  202          error = sattr_to_vattr(&args->saa_sa, &va);
 203  203          if (error) {
 204  204                  VN_RELE(vp);
 205  205                  ns->ns_status = puterrno(error);
 206  206                  return;
 207  207          }
 208  208  
 209  209          /*
 210  210           * If the client is requesting a change to the mtime,
 211  211           * but the nanosecond field is set to 1 billion, then
 212  212           * this is a flag to the server that it should set the
 213  213           * atime and mtime fields to the server's current time.
 214  214           * The 1 billion number actually came from the client
 215  215           * as 1 million, but the units in the over the wire
 216  216           * request are microseconds instead of nanoseconds.
 217  217           *
 218  218           * This is an overload of the protocol and should be
 219  219           * documented in the NFS Version 2 protocol specification.
 220  220           */
 221  221          if (va.va_mask & AT_MTIME) {
 222  222                  if (va.va_mtime.tv_nsec == 1000000000) {
 223  223                          gethrestime(&va.va_mtime);
 224  224                          va.va_atime = va.va_mtime;
 225  225                          va.va_mask |= AT_ATIME;
 226  226                          flag = 0;
 227  227                  } else
 228  228                          flag = ATTR_UTIME;
 229  229          } else
 230  230                  flag = 0;
 231  231  
 232  232          /*
 233  233           * If the filesystem is exported with nosuid, then mask off
 234  234           * the setuid and setgid bits.
 235  235           */
 236  236          if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
 237  237              (exi->exi_export.ex_flags & EX_NOSUID))
 238  238                  va.va_mode &= ~(VSUID | VSGID);
 239  239  
 240  240          ct.cc_sysid = 0;
 241  241          ct.cc_pid = 0;
 242  242          ct.cc_caller_id = nfs2_srv_caller_id;
 243  243          ct.cc_flags = CC_DONTBLOCK;
 244  244  
 245  245          /*
 246  246           * We need to specially handle size changes because it is
 247  247           * possible for the client to create a file with modes
 248  248           * which indicate read-only, but with the file opened for
 249  249           * writing.  If the client then tries to set the size of
 250  250           * the file, then the normal access checking done in
 251  251           * VOP_SETATTR would prevent the client from doing so,
 252  252           * although it should be legal for it to do so.  To get
 253  253           * around this, we do the access checking for ourselves
 254  254           * and then use VOP_SPACE which doesn't do the access
 255  255           * checking which VOP_SETATTR does. VOP_SPACE can only
 256  256           * operate on VREG files, let VOP_SETATTR handle the other
 257  257           * extremely rare cases.
 258  258           * Also the client should not be allowed to change the
 259  259           * size of the file if there is a conflicting non-blocking
 260  260           * mandatory lock in the region of change.
 261  261           */
 262  262          if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
 263  263                  if (nbl_need_check(vp)) {
 264  264                          nbl_start_crit(vp, RW_READER);
 265  265                          in_crit = 1;
 266  266                  }
 267  267  
 268  268                  bva.va_mask = AT_UID | AT_SIZE;
 269  269  
 270  270                  error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
 271  271  
 272  272                  if (error) {
 273  273                          if (in_crit)
 274  274                                  nbl_end_crit(vp);
 275  275                          VN_RELE(vp);
 276  276                          ns->ns_status = puterrno(error);
 277  277                          return;
 278  278                  }
 279  279  
 280  280                  if (in_crit) {
 281  281                          u_offset_t offset;
 282  282                          ssize_t length;
 283  283  
 284  284                          if (va.va_size < bva.va_size) {
 285  285                                  offset = va.va_size;
 286  286                                  length = bva.va_size - va.va_size;
 287  287                          } else {
 288  288                                  offset = bva.va_size;
 289  289                                  length = va.va_size - bva.va_size;
 290  290                          }
 291  291                          if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 292  292                              NULL)) {
 293  293                                  error = EACCES;
 294  294                          }
 295  295                  }
 296  296  
 297  297                  if (crgetuid(cr) == bva.va_uid && !error &&
 298  298                      va.va_size != bva.va_size) {
 299  299                          va.va_mask &= ~AT_SIZE;
 300  300                          bf.l_type = F_WRLCK;
 301  301                          bf.l_whence = 0;
 302  302                          bf.l_start = (off64_t)va.va_size;
 303  303                          bf.l_len = 0;
 304  304                          bf.l_sysid = 0;
 305  305                          bf.l_pid = 0;
 306  306  
 307  307                          error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 308  308                              (offset_t)va.va_size, cr, &ct);
 309  309                  }
 310  310                  if (in_crit)
 311  311                          nbl_end_crit(vp);
 312  312          } else
 313  313                  error = 0;
 314  314  
 315  315          /*
 316  316           * Do the setattr.
 317  317           */
 318  318          if (!error && va.va_mask) {
 319  319                  error = VOP_SETATTR(vp, &va, flag, cr, &ct);
 320  320          }
 321  321  
 322  322          /*
 323  323           * check if the monitor on either vop_space or vop_setattr detected
 324  324           * a delegation conflict and if so, mark the thread flag as
 325  325           * wouldblock so that the response is dropped and the client will
 326  326           * try again.
 327  327           */
 328  328          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 329  329                  VN_RELE(vp);
 330  330                  curthread->t_flag |= T_WOULDBLOCK;
 331  331                  return;
 332  332          }
 333  333  
 334  334          if (!error) {
 335  335                  va.va_mask = AT_ALL;    /* get everything */
 336  336  
 337  337                  error = rfs4_delegated_getattr(vp, &va, 0, cr);
 338  338  
 339  339                  /* check for overflows */
 340  340                  if (!error) {
 341  341                          acl_perm(vp, exi, &va, cr);
 342  342                          error = vattr_to_nattr(&va, &ns->ns_attr);
 343  343                  }
 344  344          }
 345  345  
 346  346          ct.cc_flags = 0;
 347  347  
 348  348          /*
 349  349           * Force modified metadata out to stable storage.
 350  350           */
 351  351          (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 352  352  
 353  353          VN_RELE(vp);
 354  354  
 355  355          ns->ns_status = puterrno(error);
 356  356  }
 357  357  void *
 358  358  rfs_setattr_getfh(struct nfssaargs *args)
 359  359  {
 360  360          return (&args->saa_fh);
 361  361  }
 362  362  
 363  363  /* Change and release @exip and @vpp only in success */
 364  364  int
 365  365  rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
 366  366  {
 367  367          struct exportinfo *exi;
 368  368          vnode_t *vp = *vpp;
 369  369          fid_t fid;
 370  370          int error;
 371  371  
 372  372          VN_HOLD(vp);
 373  373  
 374  374          if ((error = traverse(&vp)) != 0) {
 375  375                  VN_RELE(vp);
 376  376                  return (error);
 377  377          }
 378  378  
 379  379          bzero(&fid, sizeof (fid));
 380  380          fid.fid_len = MAXFIDSZ;
 381  381          error = VOP_FID(vp, &fid, NULL);
 382  382          if (error) {
 383  383                  VN_RELE(vp);
 384  384                  return (error);
 385  385          }
 386  386  
 387  387          exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
 388  388          if (exi == NULL ||
 389  389              (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
 390  390                  /*
 391  391                   * It is not error, just subdir is not exported
 392  392                   * or "nohide" is not set
 393  393                   */
 394  394                  if (exi != NULL)
 395  395                          exi_rele(exi);
 396  396                  VN_RELE(vp);
 397  397          } else {
 398  398                  /* go to submount */
 399  399                  exi_rele(*exip);
 400  400                  *exip = exi;
 401  401  
 402  402                  VN_RELE(*vpp);
 403  403                  *vpp = vp;
 404  404          }
 405  405  
 406  406          return (0);
 407  407  }
 408  408  
 409  409  /*
  
    | 
      ↓ open down ↓ | 
    409 lines elided | 
    
      ↑ open up ↑ | 
  
 410  410   * Given mounted "dvp" and "exi", go upper mountpoint
 411  411   * with dvp/exi correction
 412  412   * Return 0 in success
 413  413   */
 414  414  int
 415  415  rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
 416  416  {
 417  417          struct exportinfo *exi;
 418  418          vnode_t *dvp = *dvpp;
 419  419  
 420      -        ASSERT(dvp->v_flag & VROOT);
      420 +        ASSERT3U((*exip)->exi_zoneid, ==, curzone->zone_id);
      421 +        ASSERT((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp));
 421  422  
 422  423          VN_HOLD(dvp);
 423  424          dvp = untraverse(dvp);
 424  425          exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
 425  426          if (exi == NULL) {
 426  427                  VN_RELE(dvp);
 427  428                  return (-1);
 428  429          }
 429  430  
      431 +        ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
 430  432          exi_rele(*exip);
 431  433          *exip = exi;
 432  434          VN_RELE(*dvpp);
 433  435          *dvpp = dvp;
 434  436  
 435  437          return (0);
 436  438  }
 437  439  /*
 438  440   * Directory lookup.
 439  441   * Returns an fhandle and file attributes for file name in a directory.
 440  442   */
 441  443  /* ARGSUSED */
 442  444  void
 443  445  rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
 444  446      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 445  447  {
 446  448          int error;
 447  449          vnode_t *dvp;
 448  450          vnode_t *vp;
 449  451          struct vattr va;
 450  452          fhandle_t *fhp = da->da_fhandle;
 451  453          struct sec_ol sec = {0, 0};
 452  454          bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 453  455          char *name;
 454  456          struct sockaddr *ca;
 455  457  
 456  458          /*
 457  459           * Trusted Extension doesn't support NFSv2. MOUNT
 458  460           * will reject v2 clients. Need to prevent v2 client
 459  461           * access via WebNFS here.
 460  462           */
 461  463          if (is_system_labeled() && req->rq_vers == 2) {
 462  464                  dr->dr_status = NFSERR_ACCES;
 463  465                  return;
 464  466          }
 465  467  
 466  468          /*
 467  469           * Disallow NULL paths
 468  470           */
 469  471          if (da->da_name == NULL || *da->da_name == '\0') {
 470  472                  dr->dr_status = NFSERR_ACCES;
 471  473                  return;
 472  474          }
 473  475  
 474  476          /*
 475  477           * Allow lookups from the root - the default
 476  478           * location of the public filehandle.
 477  479           */
 478  480          if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 479  481                  dvp = ZONE_ROOTVP();
  
    | 
      ↓ open down ↓ | 
    40 lines elided | 
    
      ↑ open up ↑ | 
  
 480  482                  VN_HOLD(dvp);
 481  483          } else {
 482  484                  dvp = nfs_fhtovp(fhp, exi);
 483  485                  if (dvp == NULL) {
 484  486                          dr->dr_status = NFSERR_STALE;
 485  487                          return;
 486  488                  }
 487  489          }
 488  490  
 489  491          exi_hold(exi);
      492 +        ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
 490  493  
 491  494          /*
 492  495           * Not allow lookup beyond root.
 493  496           * If the filehandle matches a filehandle of the exi,
 494  497           * then the ".." refers beyond the root of an exported filesystem.
 495  498           */
 496  499          if (strcmp(da->da_name, "..") == 0 &&
 497  500              EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
 498  501                  if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
 499      -                    (dvp->v_flag & VROOT)) {
      502 +                    ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
 500  503                          /*
 501  504                           * special case for ".." and 'nohide'exported root
 502  505                           */
 503  506                          if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
 504  507                                  error = NFSERR_ACCES;
 505  508                                  goto out;
 506  509                          }
 507  510                  } else  {
 508  511                          error = NFSERR_NOENT;
 509  512                          goto out;
 510  513                  }
 511  514          }
 512  515  
 513  516          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 514  517          name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
 515  518              MAXPATHLEN);
 516  519  
 517  520          if (name == NULL) {
 518  521                  error = NFSERR_ACCES;
 519  522                  goto out;
 520  523          }
 521  524  
 522  525          /*
 523  526           * If the public filehandle is used then allow
 524  527           * a multi-component lookup, i.e. evaluate
  
    | 
      ↓ open down ↓ | 
    15 lines elided | 
    
      ↑ open up ↑ | 
  
 525  528           * a pathname and follow symbolic links if
 526  529           * necessary.
 527  530           *
 528  531           * This may result in a vnode in another filesystem
 529  532           * which is OK as long as the filesystem is exported.
 530  533           */
 531  534          if (PUBLIC_FH2(fhp)) {
 532  535                  publicfh_flag = TRUE;
 533  536  
 534  537                  exi_rele(exi);
      538 +                exi = NULL;
 535  539  
 536  540                  error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
 537  541                      &sec);
 538  542          } else {
 539  543                  /*
 540  544                   * Do a normal single component lookup.
 541  545                   */
 542  546                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
 543  547                      NULL, NULL, NULL);
 544  548          }
 545  549  
 546  550          if (name != da->da_name)
 547  551                  kmem_free(name, MAXPATHLEN);
 548  552  
 549  553          if (error == 0 && vn_ismntpt(vp)) {
 550  554                  error = rfs_cross_mnt(&vp, &exi);
 551  555                  if (error)
 552  556                          VN_RELE(vp);
 553  557          }
 554  558  
 555  559          if (!error) {
 556  560                  va.va_mask = AT_ALL;    /* we want everything */
 557  561  
 558  562                  error = rfs4_delegated_getattr(vp, &va, 0, cr);
 559  563  
 560  564                  /* check for overflows */
 561  565                  if (!error) {
 562  566                          acl_perm(vp, exi, &va, cr);
 563  567                          error = vattr_to_nattr(&va, &dr->dr_attr);
 564  568                          if (!error) {
 565  569                                  if (sec.sec_flags & SEC_QUERY)
 566  570                                          error = makefh_ol(&dr->dr_fhandle, exi,
 567  571                                              sec.sec_index);
 568  572                                  else {
 569  573                                          error = makefh(&dr->dr_fhandle, vp,
 570  574                                              exi);
 571  575                                          if (!error && publicfh_flag &&
 572  576                                              !chk_clnt_sec(exi, req))
 573  577                                                  auth_weak = TRUE;
 574  578                                  }
 575  579                          }
 576  580                  }
 577  581                  VN_RELE(vp);
 578  582          }
 579  583  
 580  584  out:
 581  585          VN_RELE(dvp);
 582  586  
 583  587          if (exi != NULL)
 584  588                  exi_rele(exi);
 585  589  
 586  590          /*
 587  591           * If it's public fh, no 0x81, and client's flavor is
 588  592           * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 589  593           * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 590  594           */
 591  595          if (auth_weak)
 592  596                  dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
 593  597          else
 594  598                  dr->dr_status = puterrno(error);
 595  599  }
 596  600  void *
 597  601  rfs_lookup_getfh(struct nfsdiropargs *da)
 598  602  {
 599  603          return (da->da_fhandle);
 600  604  }
 601  605  
 602  606  /*
 603  607   * Read symbolic link.
 604  608   * Returns the string in the symbolic link at the given fhandle.
 605  609   */
 606  610  /* ARGSUSED */
 607  611  void
 608  612  rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
 609  613      struct svc_req *req, cred_t *cr, bool_t ro)
 610  614  {
 611  615          int error;
 612  616          struct iovec iov;
 613  617          struct uio uio;
 614  618          vnode_t *vp;
 615  619          struct vattr va;
 616  620          struct sockaddr *ca;
 617  621          char *name = NULL;
 618  622          int is_referral = 0;
 619  623  
 620  624          vp = nfs_fhtovp(fhp, exi);
 621  625          if (vp == NULL) {
 622  626                  rl->rl_data = NULL;
 623  627                  rl->rl_status = NFSERR_STALE;
 624  628                  return;
 625  629          }
 626  630  
 627  631          va.va_mask = AT_MODE;
 628  632  
 629  633          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 630  634  
 631  635          if (error) {
 632  636                  VN_RELE(vp);
 633  637                  rl->rl_data = NULL;
 634  638                  rl->rl_status = puterrno(error);
 635  639                  return;
 636  640          }
 637  641  
 638  642          if (MANDLOCK(vp, va.va_mode)) {
 639  643                  VN_RELE(vp);
 640  644                  rl->rl_data = NULL;
 641  645                  rl->rl_status = NFSERR_ACCES;
 642  646                  return;
 643  647          }
 644  648  
 645  649          /* We lied about the object type for a referral */
 646  650          if (vn_is_nfs_reparse(vp, cr))
 647  651                  is_referral = 1;
 648  652  
 649  653          /*
 650  654           * XNFS and RFC1094 require us to return ENXIO if argument
 651  655           * is not a link. BUGID 1138002.
 652  656           */
 653  657          if (vp->v_type != VLNK && !is_referral) {
 654  658                  VN_RELE(vp);
 655  659                  rl->rl_data = NULL;
 656  660                  rl->rl_status = NFSERR_NXIO;
 657  661                  return;
 658  662          }
 659  663  
 660  664          /*
 661  665           * Allocate data for pathname.  This will be freed by rfs_rlfree.
 662  666           */
 663  667          rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
 664  668  
 665  669          if (is_referral) {
 666  670                  char *s;
 667  671                  size_t strsz;
 668  672  
 669  673                  /* Get an artificial symlink based on a referral */
 670  674                  s = build_symlink(vp, cr, &strsz);
 671  675                  global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
 672  676                  DTRACE_PROBE2(nfs2serv__func__referral__reflink,
 673  677                      vnode_t *, vp, char *, s);
 674  678                  if (s == NULL)
 675  679                          error = EINVAL;
 676  680                  else {
 677  681                          error = 0;
 678  682                          (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
 679  683                          rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
 680  684                          kmem_free(s, strsz);
 681  685                  }
 682  686  
 683  687          } else {
 684  688  
 685  689                  /*
 686  690                   * Set up io vector to read sym link data
 687  691                   */
 688  692                  iov.iov_base = rl->rl_data;
 689  693                  iov.iov_len = NFS_MAXPATHLEN;
 690  694                  uio.uio_iov = &iov;
 691  695                  uio.uio_iovcnt = 1;
 692  696                  uio.uio_segflg = UIO_SYSSPACE;
 693  697                  uio.uio_extflg = UIO_COPY_CACHED;
 694  698                  uio.uio_loffset = (offset_t)0;
 695  699                  uio.uio_resid = NFS_MAXPATHLEN;
 696  700  
 697  701                  /*
 698  702                   * Do the readlink.
 699  703                   */
 700  704                  error = VOP_READLINK(vp, &uio, cr, NULL);
 701  705  
 702  706                  rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
 703  707  
 704  708                  if (!error)
 705  709                          rl->rl_data[rl->rl_count] = '\0';
 706  710  
 707  711          }
 708  712  
 709  713  
 710  714          VN_RELE(vp);
 711  715  
 712  716          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 713  717          name = nfscmd_convname(ca, exi, rl->rl_data,
 714  718              NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
 715  719  
 716  720          if (name != NULL && name != rl->rl_data) {
 717  721                  kmem_free(rl->rl_data, NFS_MAXPATHLEN);
 718  722                  rl->rl_data = name;
 719  723          }
 720  724  
 721  725          /*
 722  726           * XNFS and RFC1094 require us to return ENXIO if argument
 723  727           * is not a link. UFS returns EINVAL if this is the case,
 724  728           * so we do the mapping here. BUGID 1138002.
 725  729           */
 726  730          if (error == EINVAL)
 727  731                  rl->rl_status = NFSERR_NXIO;
 728  732          else
 729  733                  rl->rl_status = puterrno(error);
 730  734  
 731  735  }
 732  736  void *
 733  737  rfs_readlink_getfh(fhandle_t *fhp)
 734  738  {
 735  739          return (fhp);
 736  740  }
 737  741  /*
 738  742   * Free data allocated by rfs_readlink
 739  743   */
 740  744  void
 741  745  rfs_rlfree(struct nfsrdlnres *rl)
 742  746  {
 743  747          if (rl->rl_data != NULL)
 744  748                  kmem_free(rl->rl_data, NFS_MAXPATHLEN);
 745  749  }
 746  750  
 747  751  static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
 748  752  
 749  753  /*
 750  754   * Read data.
 751  755   * Returns some data read from the file at the given fhandle.
 752  756   */
 753  757  /* ARGSUSED */
 754  758  void
 755  759  rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
 756  760      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 757  761  {
 758  762          vnode_t *vp;
 759  763          int error;
 760  764          struct vattr va;
 761  765          struct iovec iov;
 762  766          struct uio uio;
 763  767          mblk_t *mp;
 764  768          int alloc_err = 0;
 765  769          int in_crit = 0;
 766  770          caller_context_t ct;
 767  771  
 768  772          vp = nfs_fhtovp(&ra->ra_fhandle, exi);
 769  773          if (vp == NULL) {
 770  774                  rr->rr_data = NULL;
 771  775                  rr->rr_status = NFSERR_STALE;
 772  776                  return;
 773  777          }
 774  778  
 775  779          if (vp->v_type != VREG) {
 776  780                  VN_RELE(vp);
 777  781                  rr->rr_data = NULL;
 778  782                  rr->rr_status = NFSERR_ISDIR;
 779  783                  return;
 780  784          }
 781  785  
 782  786          ct.cc_sysid = 0;
 783  787          ct.cc_pid = 0;
 784  788          ct.cc_caller_id = nfs2_srv_caller_id;
 785  789          ct.cc_flags = CC_DONTBLOCK;
 786  790  
 787  791          /*
 788  792           * Enter the critical region before calling VOP_RWLOCK
 789  793           * to avoid a deadlock with write requests.
 790  794           */
 791  795          if (nbl_need_check(vp)) {
 792  796                  nbl_start_crit(vp, RW_READER);
 793  797                  if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
 794  798                      0, NULL)) {
 795  799                          nbl_end_crit(vp);
 796  800                          VN_RELE(vp);
 797  801                          rr->rr_data = NULL;
 798  802                          rr->rr_status = NFSERR_ACCES;
 799  803                          return;
 800  804                  }
 801  805                  in_crit = 1;
 802  806          }
 803  807  
 804  808          error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
 805  809  
 806  810          /* check if a monitor detected a delegation conflict */
 807  811          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 808  812                  if (in_crit)
 809  813                          nbl_end_crit(vp);
 810  814                  VN_RELE(vp);
 811  815                  /* mark as wouldblock so response is dropped */
 812  816                  curthread->t_flag |= T_WOULDBLOCK;
 813  817  
 814  818                  rr->rr_data = NULL;
 815  819                  return;
 816  820          }
 817  821  
 818  822          va.va_mask = AT_ALL;
 819  823  
 820  824          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 821  825  
 822  826          if (error) {
 823  827                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 824  828                  if (in_crit)
 825  829                          nbl_end_crit(vp);
 826  830  
 827  831                  VN_RELE(vp);
 828  832                  rr->rr_data = NULL;
 829  833                  rr->rr_status = puterrno(error);
 830  834  
 831  835                  return;
 832  836          }
 833  837  
 834  838          /*
 835  839           * This is a kludge to allow reading of files created
 836  840           * with no read permission.  The owner of the file
 837  841           * is always allowed to read it.
 838  842           */
 839  843          if (crgetuid(cr) != va.va_uid) {
 840  844                  error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
 841  845  
 842  846                  if (error) {
 843  847                          /*
 844  848                           * Exec is the same as read over the net because
 845  849                           * of demand loading.
 846  850                           */
 847  851                          error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
 848  852                  }
 849  853                  if (error) {
 850  854                          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 851  855                          if (in_crit)
 852  856                                  nbl_end_crit(vp);
 853  857                          VN_RELE(vp);
 854  858                          rr->rr_data = NULL;
 855  859                          rr->rr_status = puterrno(error);
 856  860  
 857  861                          return;
 858  862                  }
 859  863          }
 860  864  
 861  865          if (MANDLOCK(vp, va.va_mode)) {
 862  866                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 863  867                  if (in_crit)
 864  868                          nbl_end_crit(vp);
 865  869  
 866  870                  VN_RELE(vp);
 867  871                  rr->rr_data = NULL;
 868  872                  rr->rr_status = NFSERR_ACCES;
 869  873  
 870  874                  return;
 871  875          }
 872  876  
 873  877          rr->rr_ok.rrok_wlist_len = 0;
 874  878          rr->rr_ok.rrok_wlist = NULL;
 875  879  
 876  880          if ((u_offset_t)ra->ra_offset >= va.va_size) {
 877  881                  rr->rr_count = 0;
 878  882                  rr->rr_data = NULL;
 879  883                  /*
 880  884                   * In this case, status is NFS_OK, but there is no data
 881  885                   * to encode. So set rr_mp to NULL.
 882  886                   */
 883  887                  rr->rr_mp = NULL;
 884  888                  rr->rr_ok.rrok_wlist = ra->ra_wlist;
 885  889                  if (rr->rr_ok.rrok_wlist)
 886  890                          clist_zero_len(rr->rr_ok.rrok_wlist);
 887  891                  goto done;
 888  892          }
 889  893  
 890  894          if (ra->ra_wlist) {
 891  895                  mp = NULL;
 892  896                  rr->rr_mp = NULL;
 893  897                  (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
 894  898                  if (ra->ra_count > iov.iov_len) {
 895  899                          rr->rr_data = NULL;
 896  900                          rr->rr_status = NFSERR_INVAL;
 897  901                          goto done;
 898  902                  }
 899  903          } else {
 900  904                  /*
 901  905                   * mp will contain the data to be sent out in the read reply.
 902  906                   * This will be freed after the reply has been sent out (by the
 903  907                   * driver).
 904  908                   * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
 905  909                   * that the call to xdrmblk_putmblk() never fails.
 906  910                   */
 907  911                  mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
 908  912                      &alloc_err);
 909  913                  ASSERT(mp != NULL);
 910  914                  ASSERT(alloc_err == 0);
 911  915  
 912  916                  rr->rr_mp = mp;
 913  917  
 914  918                  /*
 915  919                   * Set up io vector
 916  920                   */
 917  921                  iov.iov_base = (caddr_t)mp->b_datap->db_base;
 918  922                  iov.iov_len = ra->ra_count;
 919  923          }
 920  924  
 921  925          uio.uio_iov = &iov;
 922  926          uio.uio_iovcnt = 1;
 923  927          uio.uio_segflg = UIO_SYSSPACE;
 924  928          uio.uio_extflg = UIO_COPY_CACHED;
 925  929          uio.uio_loffset = (offset_t)ra->ra_offset;
 926  930          uio.uio_resid = ra->ra_count;
 927  931  
 928  932          error = VOP_READ(vp, &uio, 0, cr, &ct);
 929  933  
 930  934          if (error) {
 931  935                  if (mp)
 932  936                          freeb(mp);
 933  937  
 934  938                  /*
 935  939                   * check if a monitor detected a delegation conflict and
 936  940                   * mark as wouldblock so response is dropped
 937  941                   */
 938  942                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
 939  943                          curthread->t_flag |= T_WOULDBLOCK;
 940  944                  else
 941  945                          rr->rr_status = puterrno(error);
 942  946  
 943  947                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 944  948                  if (in_crit)
 945  949                          nbl_end_crit(vp);
 946  950  
 947  951                  VN_RELE(vp);
 948  952                  rr->rr_data = NULL;
 949  953  
 950  954                  return;
 951  955          }
 952  956  
 953  957          /*
 954  958           * Get attributes again so we can send the latest access
 955  959           * time to the client side for its cache.
 956  960           */
 957  961          va.va_mask = AT_ALL;
 958  962  
 959  963          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 960  964  
 961  965          if (error) {
 962  966                  if (mp)
 963  967                          freeb(mp);
 964  968  
 965  969                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 966  970                  if (in_crit)
 967  971                          nbl_end_crit(vp);
 968  972  
 969  973                  VN_RELE(vp);
 970  974                  rr->rr_data = NULL;
 971  975                  rr->rr_status = puterrno(error);
 972  976  
 973  977                  return;
 974  978          }
 975  979  
 976  980          rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
 977  981  
 978  982          if (mp) {
 979  983                  rr->rr_data = (char *)mp->b_datap->db_base;
 980  984          } else {
 981  985                  if (ra->ra_wlist) {
 982  986                          rr->rr_data = (caddr_t)iov.iov_base;
 983  987                          if (!rdma_setup_read_data2(ra, rr)) {
 984  988                                  rr->rr_data = NULL;
 985  989                                  rr->rr_status = puterrno(NFSERR_INVAL);
 986  990                          }
 987  991                  }
 988  992          }
 989  993  done:
 990  994          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 991  995          if (in_crit)
 992  996                  nbl_end_crit(vp);
 993  997  
 994  998          acl_perm(vp, exi, &va, cr);
 995  999  
 996 1000          /* check for overflows */
 997 1001          error = vattr_to_nattr(&va, &rr->rr_attr);
 998 1002  
 999 1003          VN_RELE(vp);
1000 1004  
1001 1005          rr->rr_status = puterrno(error);
1002 1006  }
1003 1007  
1004 1008  /*
1005 1009   * Free data allocated by rfs_read
1006 1010   */
1007 1011  void
1008 1012  rfs_rdfree(struct nfsrdresult *rr)
1009 1013  {
1010 1014          mblk_t *mp;
1011 1015  
1012 1016          if (rr->rr_status == NFS_OK) {
1013 1017                  mp = rr->rr_mp;
1014 1018                  if (mp != NULL)
1015 1019                          freeb(mp);
1016 1020          }
1017 1021  }
1018 1022  
1019 1023  void *
1020 1024  rfs_read_getfh(struct nfsreadargs *ra)
1021 1025  {
1022 1026          return (&ra->ra_fhandle);
1023 1027  }
1024 1028  
1025 1029  #define MAX_IOVECS      12
1026 1030  
1027 1031  #ifdef DEBUG
1028 1032  static int rfs_write_sync_hits = 0;
1029 1033  static int rfs_write_sync_misses = 0;
1030 1034  #endif
1031 1035  
1032 1036  /*
1033 1037   * Write data to file.
1034 1038   * Returns attributes of a file after writing some data to it.
1035 1039   *
1036 1040   * Any changes made here, especially in error handling might have
1037 1041   * to also be done in rfs_write (which clusters write requests).
1038 1042   */
1039 1043  /* ARGSUSED */
1040 1044  void
1041 1045  rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1042 1046      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1043 1047  {
1044 1048          int error;
1045 1049          vnode_t *vp;
1046 1050          rlim64_t rlimit;
1047 1051          struct vattr va;
1048 1052          struct uio uio;
1049 1053          struct iovec iov[MAX_IOVECS];
1050 1054          mblk_t *m;
1051 1055          struct iovec *iovp;
1052 1056          int iovcnt;
1053 1057          cred_t *savecred;
1054 1058          int in_crit = 0;
1055 1059          caller_context_t ct;
1056 1060  
1057 1061          vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1058 1062          if (vp == NULL) {
1059 1063                  ns->ns_status = NFSERR_STALE;
1060 1064                  return;
1061 1065          }
1062 1066  
1063 1067          if (rdonly(ro, vp)) {
1064 1068                  VN_RELE(vp);
1065 1069                  ns->ns_status = NFSERR_ROFS;
1066 1070                  return;
1067 1071          }
1068 1072  
1069 1073          if (vp->v_type != VREG) {
1070 1074                  VN_RELE(vp);
1071 1075                  ns->ns_status = NFSERR_ISDIR;
1072 1076                  return;
1073 1077          }
1074 1078  
1075 1079          ct.cc_sysid = 0;
1076 1080          ct.cc_pid = 0;
1077 1081          ct.cc_caller_id = nfs2_srv_caller_id;
1078 1082          ct.cc_flags = CC_DONTBLOCK;
1079 1083  
1080 1084          va.va_mask = AT_UID|AT_MODE;
1081 1085  
1082 1086          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1083 1087  
1084 1088          if (error) {
1085 1089                  VN_RELE(vp);
1086 1090                  ns->ns_status = puterrno(error);
1087 1091  
1088 1092                  return;
1089 1093          }
1090 1094  
1091 1095          if (crgetuid(cr) != va.va_uid) {
1092 1096                  /*
1093 1097                   * This is a kludge to allow writes of files created
1094 1098                   * with read only permission.  The owner of the file
1095 1099                   * is always allowed to write it.
1096 1100                   */
1097 1101                  error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1098 1102  
1099 1103                  if (error) {
1100 1104                          VN_RELE(vp);
1101 1105                          ns->ns_status = puterrno(error);
1102 1106                          return;
1103 1107                  }
1104 1108          }
1105 1109  
1106 1110          /*
1107 1111           * Can't access a mandatory lock file.  This might cause
1108 1112           * the NFS service thread to block forever waiting for a
1109 1113           * lock to be released that will never be released.
1110 1114           */
1111 1115          if (MANDLOCK(vp, va.va_mode)) {
1112 1116                  VN_RELE(vp);
1113 1117                  ns->ns_status = NFSERR_ACCES;
1114 1118                  return;
1115 1119          }
1116 1120  
1117 1121          /*
1118 1122           * We have to enter the critical region before calling VOP_RWLOCK
1119 1123           * to avoid a deadlock with ufs.
1120 1124           */
1121 1125          if (nbl_need_check(vp)) {
1122 1126                  nbl_start_crit(vp, RW_READER);
1123 1127                  in_crit = 1;
1124 1128                  if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1125 1129                      wa->wa_count, 0, NULL)) {
1126 1130                          error = EACCES;
1127 1131                          goto out;
1128 1132                  }
1129 1133          }
1130 1134  
1131 1135          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1132 1136  
1133 1137          /* check if a monitor detected a delegation conflict */
1134 1138          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1135 1139                  goto out;
1136 1140          }
1137 1141  
1138 1142          if (wa->wa_data || wa->wa_rlist) {
1139 1143                  /* Do the RDMA thing if necessary */
1140 1144                  if (wa->wa_rlist) {
1141 1145                          iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1142 1146                          iov[0].iov_len = wa->wa_count;
1143 1147                  } else  {
1144 1148                          iov[0].iov_base = wa->wa_data;
1145 1149                          iov[0].iov_len = wa->wa_count;
1146 1150                  }
1147 1151                  uio.uio_iov = iov;
1148 1152                  uio.uio_iovcnt = 1;
1149 1153                  uio.uio_segflg = UIO_SYSSPACE;
1150 1154                  uio.uio_extflg = UIO_COPY_DEFAULT;
1151 1155                  uio.uio_loffset = (offset_t)wa->wa_offset;
1152 1156                  uio.uio_resid = wa->wa_count;
1153 1157                  /*
1154 1158                   * The limit is checked on the client. We
1155 1159                   * should allow any size writes here.
1156 1160                   */
1157 1161                  uio.uio_llimit = curproc->p_fsz_ctl;
1158 1162                  rlimit = uio.uio_llimit - wa->wa_offset;
1159 1163                  if (rlimit < (rlim64_t)uio.uio_resid)
1160 1164                          uio.uio_resid = (uint_t)rlimit;
1161 1165  
1162 1166                  /*
1163 1167                   * for now we assume no append mode
1164 1168                   */
1165 1169                  /*
1166 1170                   * We're changing creds because VM may fault and we need
1167 1171                   * the cred of the current thread to be used if quota
1168 1172                   * checking is enabled.
1169 1173                   */
1170 1174                  savecred = curthread->t_cred;
1171 1175                  curthread->t_cred = cr;
1172 1176                  error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1173 1177                  curthread->t_cred = savecred;
1174 1178          } else {
1175 1179  
1176 1180                  iovcnt = 0;
1177 1181                  for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1178 1182                          iovcnt++;
1179 1183                  if (iovcnt <= MAX_IOVECS) {
1180 1184  #ifdef DEBUG
1181 1185                          rfs_write_sync_hits++;
1182 1186  #endif
1183 1187                          iovp = iov;
1184 1188                  } else {
1185 1189  #ifdef DEBUG
1186 1190                          rfs_write_sync_misses++;
1187 1191  #endif
1188 1192                          iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1189 1193                  }
1190 1194                  mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1191 1195                  uio.uio_iov = iovp;
1192 1196                  uio.uio_iovcnt = iovcnt;
1193 1197                  uio.uio_segflg = UIO_SYSSPACE;
1194 1198                  uio.uio_extflg = UIO_COPY_DEFAULT;
1195 1199                  uio.uio_loffset = (offset_t)wa->wa_offset;
1196 1200                  uio.uio_resid = wa->wa_count;
1197 1201                  /*
1198 1202                   * The limit is checked on the client. We
1199 1203                   * should allow any size writes here.
1200 1204                   */
1201 1205                  uio.uio_llimit = curproc->p_fsz_ctl;
1202 1206                  rlimit = uio.uio_llimit - wa->wa_offset;
1203 1207                  if (rlimit < (rlim64_t)uio.uio_resid)
1204 1208                          uio.uio_resid = (uint_t)rlimit;
1205 1209  
1206 1210                  /*
1207 1211                   * For now we assume no append mode.
1208 1212                   */
1209 1213                  /*
1210 1214                   * We're changing creds because VM may fault and we need
1211 1215                   * the cred of the current thread to be used if quota
1212 1216                   * checking is enabled.
1213 1217                   */
1214 1218                  savecred = curthread->t_cred;
1215 1219                  curthread->t_cred = cr;
1216 1220                  error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1217 1221                  curthread->t_cred = savecred;
1218 1222  
1219 1223                  if (iovp != iov)
1220 1224                          kmem_free(iovp, sizeof (*iovp) * iovcnt);
1221 1225          }
1222 1226  
1223 1227          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1224 1228  
1225 1229          if (!error) {
1226 1230                  /*
1227 1231                   * Get attributes again so we send the latest mod
1228 1232                   * time to the client side for its cache.
1229 1233                   */
1230 1234                  va.va_mask = AT_ALL;    /* now we want everything */
1231 1235  
1232 1236                  error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1233 1237  
1234 1238                  /* check for overflows */
1235 1239                  if (!error) {
1236 1240                          acl_perm(vp, exi, &va, cr);
1237 1241                          error = vattr_to_nattr(&va, &ns->ns_attr);
1238 1242                  }
1239 1243          }
1240 1244  
1241 1245  out:
1242 1246          if (in_crit)
1243 1247                  nbl_end_crit(vp);
1244 1248          VN_RELE(vp);
1245 1249  
1246 1250          /* check if a monitor detected a delegation conflict */
1247 1251          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1248 1252                  /* mark as wouldblock so response is dropped */
1249 1253                  curthread->t_flag |= T_WOULDBLOCK;
1250 1254          else
1251 1255                  ns->ns_status = puterrno(error);
1252 1256  
1253 1257  }
1254 1258  
1255 1259  struct rfs_async_write {
1256 1260          struct nfswriteargs *wa;
1257 1261          struct nfsattrstat *ns;
1258 1262          struct svc_req *req;
1259 1263          cred_t *cr;
1260 1264          bool_t ro;
1261 1265          kthread_t *thread;
1262 1266          struct rfs_async_write *list;
1263 1267  };
1264 1268  
1265 1269  struct rfs_async_write_list {
1266 1270          fhandle_t *fhp;
1267 1271          kcondvar_t cv;
1268 1272          struct rfs_async_write *list;
1269 1273          struct rfs_async_write_list *next;
1270 1274  };
1271 1275  
1272 1276  static struct rfs_async_write_list *rfs_async_write_head = NULL;
1273 1277  static kmutex_t rfs_async_write_lock;
1274 1278  static int rfs_write_async = 1; /* enables write clustering if == 1 */
1275 1279  
1276 1280  #define MAXCLIOVECS     42
1277 1281  #define RFSWRITE_INITVAL (enum nfsstat) -1
1278 1282  
1279 1283  #ifdef DEBUG
1280 1284  static int rfs_write_hits = 0;
1281 1285  static int rfs_write_misses = 0;
1282 1286  #endif
1283 1287  
1284 1288  /*
1285 1289   * Write data to file.
1286 1290   * Returns attributes of a file after writing some data to it.
1287 1291   */
1288 1292  void
1289 1293  rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1290 1294      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1291 1295  {
1292 1296          int error;
1293 1297          vnode_t *vp;
1294 1298          rlim64_t rlimit;
1295 1299          struct vattr va;
1296 1300          struct uio uio;
1297 1301          struct rfs_async_write_list *lp;
1298 1302          struct rfs_async_write_list *nlp;
1299 1303          struct rfs_async_write *rp;
1300 1304          struct rfs_async_write *nrp;
1301 1305          struct rfs_async_write *trp;
1302 1306          struct rfs_async_write *lrp;
1303 1307          int data_written;
1304 1308          int iovcnt;
1305 1309          mblk_t *m;
1306 1310          struct iovec *iovp;
1307 1311          struct iovec *niovp;
1308 1312          struct iovec iov[MAXCLIOVECS];
1309 1313          int count;
1310 1314          int rcount;
  
    | 
      ↓ open down ↓ | 
    766 lines elided | 
    
      ↑ open up ↑ | 
  
1311 1315          uint_t off;
1312 1316          uint_t len;
1313 1317          struct rfs_async_write nrpsp;
1314 1318          struct rfs_async_write_list nlpsp;
1315 1319          ushort_t t_flag;
1316 1320          cred_t *savecred;
1317 1321          int in_crit = 0;
1318 1322          caller_context_t ct;
1319 1323          nfs_srv_t *nsrv;
1320 1324  
     1325 +        ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
1321 1326          nsrv = nfs_get_srv();
1322 1327          if (!nsrv->write_async) {
1323 1328                  rfs_write_sync(wa, ns, exi, req, cr, ro);
1324 1329                  return;
1325 1330          }
1326 1331  
1327 1332          /*
1328 1333           * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1329 1334           * is considered an OK.
1330 1335           */
1331 1336          ns->ns_status = RFSWRITE_INITVAL;
1332 1337  
1333 1338          nrp = &nrpsp;
1334 1339          nrp->wa = wa;
1335 1340          nrp->ns = ns;
1336 1341          nrp->req = req;
1337 1342          nrp->cr = cr;
1338 1343          nrp->ro = ro;
1339 1344          nrp->thread = curthread;
1340 1345  
1341 1346          ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1342 1347  
1343 1348          /*
1344 1349           * Look to see if there is already a cluster started
1345 1350           * for this file.
1346 1351           */
1347 1352          mutex_enter(&nsrv->async_write_lock);
1348 1353          for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1349 1354                  if (bcmp(&wa->wa_fhandle, lp->fhp,
1350 1355                      sizeof (fhandle_t)) == 0)
1351 1356                          break;
1352 1357          }
1353 1358  
1354 1359          /*
1355 1360           * If lp is non-NULL, then there is already a cluster
1356 1361           * started.  We need to place ourselves in the cluster
1357 1362           * list in the right place as determined by starting
1358 1363           * offset.  Conflicts with non-blocking mandatory locked
1359 1364           * regions will be checked when the cluster is processed.
1360 1365           */
1361 1366          if (lp != NULL) {
1362 1367                  rp = lp->list;
1363 1368                  trp = NULL;
1364 1369                  while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1365 1370                          trp = rp;
1366 1371                          rp = rp->list;
1367 1372                  }
1368 1373                  nrp->list = rp;
1369 1374                  if (trp == NULL)
1370 1375                          lp->list = nrp;
1371 1376                  else
1372 1377                          trp->list = nrp;
1373 1378                  while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1374 1379                          cv_wait(&lp->cv, &nsrv->async_write_lock);
1375 1380                  mutex_exit(&nsrv->async_write_lock);
1376 1381  
1377 1382                  return;
1378 1383          }
1379 1384  
1380 1385          /*
1381 1386           * No cluster started yet, start one and add ourselves
1382 1387           * to the list of clusters.
1383 1388           */
1384 1389          nrp->list = NULL;
1385 1390  
1386 1391          nlp = &nlpsp;
1387 1392          nlp->fhp = &wa->wa_fhandle;
1388 1393          cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1389 1394          nlp->list = nrp;
1390 1395          nlp->next = NULL;
1391 1396  
1392 1397          if (nsrv->async_write_head == NULL) {
1393 1398                  nsrv->async_write_head = nlp;
1394 1399          } else {
1395 1400                  lp = nsrv->async_write_head;
1396 1401                  while (lp->next != NULL)
1397 1402                          lp = lp->next;
1398 1403                  lp->next = nlp;
1399 1404          }
1400 1405          mutex_exit(&nsrv->async_write_lock);
1401 1406  
1402 1407          /*
1403 1408           * Convert the file handle common to all of the requests
1404 1409           * in this cluster to a vnode.
1405 1410           */
1406 1411          vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1407 1412          if (vp == NULL) {
1408 1413                  mutex_enter(&nsrv->async_write_lock);
1409 1414                  if (nsrv->async_write_head == nlp)
1410 1415                          nsrv->async_write_head = nlp->next;
1411 1416                  else {
1412 1417                          lp = nsrv->async_write_head;
1413 1418                          while (lp->next != nlp)
1414 1419                                  lp = lp->next;
1415 1420                          lp->next = nlp->next;
1416 1421                  }
1417 1422                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1418 1423                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1419 1424                          rp->ns->ns_status = NFSERR_STALE;
1420 1425                          rp->thread->t_flag |= t_flag;
1421 1426                  }
1422 1427                  cv_broadcast(&nlp->cv);
1423 1428                  mutex_exit(&nsrv->async_write_lock);
1424 1429  
1425 1430                  return;
1426 1431          }
1427 1432  
1428 1433          /*
1429 1434           * Can only write regular files.  Attempts to write any
1430 1435           * other file types fail with EISDIR.
1431 1436           */
1432 1437          if (vp->v_type != VREG) {
1433 1438                  VN_RELE(vp);
1434 1439                  mutex_enter(&nsrv->async_write_lock);
1435 1440                  if (nsrv->async_write_head == nlp)
1436 1441                          nsrv->async_write_head = nlp->next;
1437 1442                  else {
1438 1443                          lp = nsrv->async_write_head;
1439 1444                          while (lp->next != nlp)
1440 1445                                  lp = lp->next;
1441 1446                          lp->next = nlp->next;
1442 1447                  }
1443 1448                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1444 1449                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1445 1450                          rp->ns->ns_status = NFSERR_ISDIR;
1446 1451                          rp->thread->t_flag |= t_flag;
1447 1452                  }
1448 1453                  cv_broadcast(&nlp->cv);
1449 1454                  mutex_exit(&nsrv->async_write_lock);
1450 1455  
1451 1456                  return;
1452 1457          }
1453 1458  
1454 1459          /*
1455 1460           * Enter the critical region before calling VOP_RWLOCK, to avoid a
1456 1461           * deadlock with ufs.
1457 1462           */
1458 1463          if (nbl_need_check(vp)) {
1459 1464                  nbl_start_crit(vp, RW_READER);
1460 1465                  in_crit = 1;
1461 1466          }
1462 1467  
1463 1468          ct.cc_sysid = 0;
1464 1469          ct.cc_pid = 0;
1465 1470          ct.cc_caller_id = nfs2_srv_caller_id;
1466 1471          ct.cc_flags = CC_DONTBLOCK;
1467 1472  
1468 1473          /*
1469 1474           * Lock the file for writing.  This operation provides
1470 1475           * the delay which allows clusters to grow.
1471 1476           */
1472 1477          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1473 1478  
1474 1479          /* check if a monitor detected a delegation conflict */
1475 1480          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1476 1481                  if (in_crit)
1477 1482                          nbl_end_crit(vp);
1478 1483                  VN_RELE(vp);
1479 1484                  /* mark as wouldblock so response is dropped */
1480 1485                  curthread->t_flag |= T_WOULDBLOCK;
1481 1486                  mutex_enter(&nsrv->async_write_lock);
1482 1487                  if (nsrv->async_write_head == nlp)
1483 1488                          nsrv->async_write_head = nlp->next;
1484 1489                  else {
1485 1490                          lp = nsrv->async_write_head;
1486 1491                          while (lp->next != nlp)
1487 1492                                  lp = lp->next;
1488 1493                          lp->next = nlp->next;
1489 1494                  }
1490 1495                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1491 1496                          if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1492 1497                                  rp->ns->ns_status = puterrno(error);
1493 1498                                  rp->thread->t_flag |= T_WOULDBLOCK;
1494 1499                          }
1495 1500                  }
1496 1501                  cv_broadcast(&nlp->cv);
1497 1502                  mutex_exit(&nsrv->async_write_lock);
1498 1503  
1499 1504                  return;
1500 1505          }
1501 1506  
1502 1507          /*
1503 1508           * Disconnect this cluster from the list of clusters.
1504 1509           * The cluster that is being dealt with must be fixed
1505 1510           * in size after this point, so there is no reason
1506 1511           * to leave it on the list so that new requests can
1507 1512           * find it.
1508 1513           *
1509 1514           * The algorithm is that the first write request will
1510 1515           * create a cluster, convert the file handle to a
1511 1516           * vnode pointer, and then lock the file for writing.
1512 1517           * This request is not likely to be clustered with
1513 1518           * any others.  However, the next request will create
1514 1519           * a new cluster and be blocked in VOP_RWLOCK while
1515 1520           * the first request is being processed.  This delay
1516 1521           * will allow more requests to be clustered in this
1517 1522           * second cluster.
1518 1523           */
1519 1524          mutex_enter(&nsrv->async_write_lock);
1520 1525          if (nsrv->async_write_head == nlp)
1521 1526                  nsrv->async_write_head = nlp->next;
1522 1527          else {
1523 1528                  lp = nsrv->async_write_head;
1524 1529                  while (lp->next != nlp)
1525 1530                          lp = lp->next;
1526 1531                  lp->next = nlp->next;
1527 1532          }
1528 1533          mutex_exit(&nsrv->async_write_lock);
1529 1534  
1530 1535          /*
1531 1536           * Step through the list of requests in this cluster.
1532 1537           * We need to check permissions to make sure that all
1533 1538           * of the requests have sufficient permission to write
1534 1539           * the file.  A cluster can be composed of requests
1535 1540           * from different clients and different users on each
1536 1541           * client.
1537 1542           *
1538 1543           * As a side effect, we also calculate the size of the
1539 1544           * byte range that this cluster encompasses.
1540 1545           */
1541 1546          rp = nlp->list;
1542 1547          off = rp->wa->wa_offset;
1543 1548          len = (uint_t)0;
1544 1549          do {
1545 1550                  if (rdonly(rp->ro, vp)) {
1546 1551                          rp->ns->ns_status = NFSERR_ROFS;
1547 1552                          t_flag = curthread->t_flag & T_WOULDBLOCK;
1548 1553                          rp->thread->t_flag |= t_flag;
1549 1554                          continue;
1550 1555                  }
1551 1556  
1552 1557                  va.va_mask = AT_UID|AT_MODE;
1553 1558  
1554 1559                  error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1555 1560  
1556 1561                  if (!error) {
1557 1562                          if (crgetuid(rp->cr) != va.va_uid) {
1558 1563                                  /*
1559 1564                                   * This is a kludge to allow writes of files
1560 1565                                   * created with read only permission.  The
1561 1566                                   * owner of the file is always allowed to
1562 1567                                   * write it.
1563 1568                                   */
1564 1569                                  error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1565 1570                          }
1566 1571                          if (!error && MANDLOCK(vp, va.va_mode))
1567 1572                                  error = EACCES;
1568 1573                  }
1569 1574  
1570 1575                  /*
1571 1576                   * Check for a conflict with a nbmand-locked region.
1572 1577                   */
1573 1578                  if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1574 1579                      rp->wa->wa_count, 0, NULL)) {
1575 1580                          error = EACCES;
1576 1581                  }
1577 1582  
1578 1583                  if (error) {
1579 1584                          rp->ns->ns_status = puterrno(error);
1580 1585                          t_flag = curthread->t_flag & T_WOULDBLOCK;
1581 1586                          rp->thread->t_flag |= t_flag;
1582 1587                          continue;
1583 1588                  }
1584 1589                  if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1585 1590                          len = rp->wa->wa_offset + rp->wa->wa_count - off;
1586 1591          } while ((rp = rp->list) != NULL);
1587 1592  
1588 1593          /*
1589 1594           * Step through the cluster attempting to gather as many
1590 1595           * requests which are contiguous as possible.  These
1591 1596           * contiguous requests are handled via one call to VOP_WRITE
1592 1597           * instead of different calls to VOP_WRITE.  We also keep
1593 1598           * track of the fact that any data was written.
1594 1599           */
1595 1600          rp = nlp->list;
1596 1601          data_written = 0;
1597 1602          do {
1598 1603                  /*
1599 1604                   * Skip any requests which are already marked as having an
1600 1605                   * error.
1601 1606                   */
1602 1607                  if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1603 1608                          rp = rp->list;
1604 1609                          continue;
1605 1610                  }
1606 1611  
1607 1612                  /*
1608 1613                   * Count the number of iovec's which are required
1609 1614                   * to handle this set of requests.  One iovec is
1610 1615                   * needed for each data buffer, whether addressed
1611 1616                   * by wa_data or by the b_rptr pointers in the
1612 1617                   * mblk chains.
1613 1618                   */
1614 1619                  iovcnt = 0;
1615 1620                  lrp = rp;
1616 1621                  for (;;) {
1617 1622                          if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1618 1623                                  iovcnt++;
1619 1624                          else {
1620 1625                                  m = lrp->wa->wa_mblk;
1621 1626                                  while (m != NULL) {
1622 1627                                          iovcnt++;
1623 1628                                          m = m->b_cont;
1624 1629                                  }
1625 1630                          }
1626 1631                          if (lrp->list == NULL ||
1627 1632                              lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1628 1633                              lrp->wa->wa_offset + lrp->wa->wa_count !=
1629 1634                              lrp->list->wa->wa_offset) {
1630 1635                                  lrp = lrp->list;
1631 1636                                  break;
1632 1637                          }
1633 1638                          lrp = lrp->list;
1634 1639                  }
1635 1640  
1636 1641                  if (iovcnt <= MAXCLIOVECS) {
1637 1642  #ifdef DEBUG
1638 1643                          rfs_write_hits++;
1639 1644  #endif
1640 1645                          niovp = iov;
1641 1646                  } else {
1642 1647  #ifdef DEBUG
1643 1648                          rfs_write_misses++;
1644 1649  #endif
1645 1650                          niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1646 1651                  }
1647 1652                  /*
1648 1653                   * Put together the scatter/gather iovecs.
1649 1654                   */
1650 1655                  iovp = niovp;
1651 1656                  trp = rp;
1652 1657                  count = 0;
1653 1658                  do {
1654 1659                          if (trp->wa->wa_data || trp->wa->wa_rlist) {
1655 1660                                  if (trp->wa->wa_rlist) {
1656 1661                                          iovp->iov_base =
1657 1662                                              (char *)((trp->wa->wa_rlist)->
1658 1663                                              u.c_daddr3);
1659 1664                                          iovp->iov_len = trp->wa->wa_count;
1660 1665                                  } else  {
1661 1666                                          iovp->iov_base = trp->wa->wa_data;
1662 1667                                          iovp->iov_len = trp->wa->wa_count;
1663 1668                                  }
1664 1669                                  iovp++;
1665 1670                          } else {
1666 1671                                  m = trp->wa->wa_mblk;
1667 1672                                  rcount = trp->wa->wa_count;
1668 1673                                  while (m != NULL) {
1669 1674                                          iovp->iov_base = (caddr_t)m->b_rptr;
1670 1675                                          iovp->iov_len = (m->b_wptr - m->b_rptr);
1671 1676                                          rcount -= iovp->iov_len;
1672 1677                                          if (rcount < 0)
1673 1678                                                  iovp->iov_len += rcount;
1674 1679                                          iovp++;
1675 1680                                          if (rcount <= 0)
1676 1681                                                  break;
1677 1682                                          m = m->b_cont;
1678 1683                                  }
1679 1684                          }
1680 1685                          count += trp->wa->wa_count;
1681 1686                          trp = trp->list;
1682 1687                  } while (trp != lrp);
1683 1688  
1684 1689                  uio.uio_iov = niovp;
1685 1690                  uio.uio_iovcnt = iovcnt;
1686 1691                  uio.uio_segflg = UIO_SYSSPACE;
1687 1692                  uio.uio_extflg = UIO_COPY_DEFAULT;
1688 1693                  uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1689 1694                  uio.uio_resid = count;
1690 1695                  /*
1691 1696                   * The limit is checked on the client. We
1692 1697                   * should allow any size writes here.
1693 1698                   */
1694 1699                  uio.uio_llimit = curproc->p_fsz_ctl;
1695 1700                  rlimit = uio.uio_llimit - rp->wa->wa_offset;
1696 1701                  if (rlimit < (rlim64_t)uio.uio_resid)
1697 1702                          uio.uio_resid = (uint_t)rlimit;
1698 1703  
1699 1704                  /*
1700 1705                   * For now we assume no append mode.
1701 1706                   */
1702 1707  
1703 1708                  /*
1704 1709                   * We're changing creds because VM may fault
1705 1710                   * and we need the cred of the current
1706 1711                   * thread to be used if quota * checking is
1707 1712                   * enabled.
1708 1713                   */
1709 1714                  savecred = curthread->t_cred;
1710 1715                  curthread->t_cred = cr;
1711 1716                  error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1712 1717                  curthread->t_cred = savecred;
1713 1718  
1714 1719                  /* check if a monitor detected a delegation conflict */
1715 1720                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1716 1721                          /* mark as wouldblock so response is dropped */
1717 1722                          curthread->t_flag |= T_WOULDBLOCK;
1718 1723  
1719 1724                  if (niovp != iov)
1720 1725                          kmem_free(niovp, sizeof (*niovp) * iovcnt);
1721 1726  
1722 1727                  if (!error) {
1723 1728                          data_written = 1;
1724 1729                          /*
1725 1730                           * Get attributes again so we send the latest mod
1726 1731                           * time to the client side for its cache.
1727 1732                           */
1728 1733                          va.va_mask = AT_ALL;    /* now we want everything */
1729 1734  
1730 1735                          error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1731 1736  
1732 1737                          if (!error)
1733 1738                                  acl_perm(vp, exi, &va, rp->cr);
1734 1739                  }
1735 1740  
1736 1741                  /*
1737 1742                   * Fill in the status responses for each request
1738 1743                   * which was just handled.  Also, copy the latest
1739 1744                   * attributes in to the attribute responses if
1740 1745                   * appropriate.
1741 1746                   */
1742 1747                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1743 1748                  do {
1744 1749                          rp->thread->t_flag |= t_flag;
1745 1750                          /* check for overflows */
1746 1751                          if (!error) {
1747 1752                                  error  = vattr_to_nattr(&va, &rp->ns->ns_attr);
1748 1753                          }
1749 1754                          rp->ns->ns_status = puterrno(error);
1750 1755                          rp = rp->list;
1751 1756                  } while (rp != lrp);
1752 1757          } while (rp != NULL);
1753 1758  
1754 1759          /*
1755 1760           * If any data was written at all, then we need to flush
1756 1761           * the data and metadata to stable storage.
1757 1762           */
1758 1763          if (data_written) {
1759 1764                  error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1760 1765  
1761 1766                  if (!error) {
1762 1767                          error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1763 1768                  }
1764 1769          }
1765 1770  
1766 1771          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1767 1772  
1768 1773          if (in_crit)
1769 1774                  nbl_end_crit(vp);
1770 1775          VN_RELE(vp);
1771 1776  
1772 1777          t_flag = curthread->t_flag & T_WOULDBLOCK;
1773 1778          mutex_enter(&nsrv->async_write_lock);
1774 1779          for (rp = nlp->list; rp != NULL; rp = rp->list) {
1775 1780                  if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1776 1781                          rp->ns->ns_status = puterrno(error);
1777 1782                          rp->thread->t_flag |= t_flag;
1778 1783                  }
1779 1784          }
1780 1785          cv_broadcast(&nlp->cv);
1781 1786          mutex_exit(&nsrv->async_write_lock);
1782 1787  
1783 1788  }
1784 1789  
1785 1790  void *
1786 1791  rfs_write_getfh(struct nfswriteargs *wa)
1787 1792  {
1788 1793          return (&wa->wa_fhandle);
1789 1794  }
1790 1795  
1791 1796  /*
1792 1797   * Create a file.
1793 1798   * Creates a file with given attributes and returns those attributes
1794 1799   * and an fhandle for the new file.
1795 1800   */
1796 1801  void
1797 1802  rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1798 1803      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1799 1804  {
1800 1805          int error;
1801 1806          int lookuperr;
1802 1807          int in_crit = 0;
1803 1808          struct vattr va;
1804 1809          vnode_t *vp;
1805 1810          vnode_t *realvp;
1806 1811          vnode_t *dvp;
1807 1812          char *name = args->ca_da.da_name;
1808 1813          vnode_t *tvp = NULL;
1809 1814          int mode;
1810 1815          int lookup_ok;
1811 1816          bool_t trunc;
1812 1817          struct sockaddr *ca;
1813 1818  
1814 1819          /*
1815 1820           * Disallow NULL paths
1816 1821           */
1817 1822          if (name == NULL || *name == '\0') {
1818 1823                  dr->dr_status = NFSERR_ACCES;
1819 1824                  return;
1820 1825          }
1821 1826  
1822 1827          dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1823 1828          if (dvp == NULL) {
1824 1829                  dr->dr_status = NFSERR_STALE;
1825 1830                  return;
1826 1831          }
1827 1832  
1828 1833          error = sattr_to_vattr(args->ca_sa, &va);
1829 1834          if (error) {
1830 1835                  dr->dr_status = puterrno(error);
1831 1836                  return;
1832 1837          }
1833 1838  
1834 1839          /*
1835 1840           * Must specify the mode.
1836 1841           */
1837 1842          if (!(va.va_mask & AT_MODE)) {
1838 1843                  VN_RELE(dvp);
1839 1844                  dr->dr_status = NFSERR_INVAL;
1840 1845                  return;
1841 1846          }
1842 1847  
1843 1848          /*
1844 1849           * This is a completely gross hack to make mknod
1845 1850           * work over the wire until we can wack the protocol
1846 1851           */
1847 1852          if ((va.va_mode & IFMT) == IFCHR) {
1848 1853                  if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1849 1854                          va.va_type = VFIFO;     /* xtra kludge for named pipe */
1850 1855                  else {
1851 1856                          va.va_type = VCHR;
1852 1857                          /*
1853 1858                           * uncompress the received dev_t
1854 1859                           * if the top half is zero indicating a request
1855 1860                           * from an `older style' OS.
1856 1861                           */
1857 1862                          if ((va.va_size & 0xffff0000) == 0)
1858 1863                                  va.va_rdev = nfsv2_expdev(va.va_size);
1859 1864                          else
1860 1865                                  va.va_rdev = (dev_t)va.va_size;
1861 1866                  }
1862 1867                  va.va_mask &= ~AT_SIZE;
1863 1868          } else if ((va.va_mode & IFMT) == IFBLK) {
1864 1869                  va.va_type = VBLK;
1865 1870                  /*
1866 1871                   * uncompress the received dev_t
1867 1872                   * if the top half is zero indicating a request
1868 1873                   * from an `older style' OS.
1869 1874                   */
1870 1875                  if ((va.va_size & 0xffff0000) == 0)
1871 1876                          va.va_rdev = nfsv2_expdev(va.va_size);
1872 1877                  else
1873 1878                          va.va_rdev = (dev_t)va.va_size;
1874 1879                  va.va_mask &= ~AT_SIZE;
1875 1880          } else if ((va.va_mode & IFMT) == IFSOCK) {
1876 1881                  va.va_type = VSOCK;
1877 1882          } else {
1878 1883                  va.va_type = VREG;
1879 1884          }
1880 1885          va.va_mode &= ~IFMT;
1881 1886          va.va_mask |= AT_TYPE;
1882 1887  
1883 1888          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1884 1889          name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1885 1890              MAXPATHLEN);
1886 1891          if (name == NULL) {
1887 1892                  dr->dr_status = puterrno(EINVAL);
1888 1893                  return;
1889 1894          }
1890 1895  
1891 1896          /*
1892 1897           * Why was the choice made to use VWRITE as the mode to the
1893 1898           * call to VOP_CREATE ? This results in a bug.  When a client
1894 1899           * opens a file that already exists and is RDONLY, the second
1895 1900           * open fails with an EACESS because of the mode.
1896 1901           * bug ID 1054648.
1897 1902           */
1898 1903          lookup_ok = 0;
1899 1904          mode = VWRITE;
1900 1905          if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1901 1906                  error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1902 1907                      NULL, NULL, NULL);
1903 1908                  if (!error) {
1904 1909                          struct vattr at;
1905 1910  
1906 1911                          lookup_ok = 1;
1907 1912                          at.va_mask = AT_MODE;
1908 1913                          error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1909 1914                          if (!error)
1910 1915                                  mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1911 1916                          VN_RELE(tvp);
1912 1917                          tvp = NULL;
1913 1918                  }
1914 1919          }
1915 1920  
1916 1921          if (!lookup_ok) {
1917 1922                  if (rdonly(ro, dvp)) {
1918 1923                          error = EROFS;
1919 1924                  } else if (va.va_type != VREG && va.va_type != VFIFO &&
1920 1925                      va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1921 1926                          error = EPERM;
1922 1927                  } else {
1923 1928                          error = 0;
1924 1929                  }
1925 1930          }
1926 1931  
1927 1932          /*
1928 1933           * If file size is being modified on an already existing file
1929 1934           * make sure that there are no conflicting non-blocking mandatory
1930 1935           * locks in the region being manipulated. Return EACCES if there
1931 1936           * are conflicting locks.
1932 1937           */
1933 1938          if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1934 1939                  lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1935 1940                      NULL, NULL, NULL);
1936 1941  
1937 1942                  if (!lookuperr &&
1938 1943                      rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1939 1944                          VN_RELE(tvp);
1940 1945                          curthread->t_flag |= T_WOULDBLOCK;
1941 1946                          goto out;
1942 1947                  }
1943 1948  
1944 1949                  if (!lookuperr && nbl_need_check(tvp)) {
1945 1950                          /*
1946 1951                           * The file exists. Now check if it has any
1947 1952                           * conflicting non-blocking mandatory locks
1948 1953                           * in the region being changed.
1949 1954                           */
1950 1955                          struct vattr bva;
1951 1956                          u_offset_t offset;
1952 1957                          ssize_t length;
1953 1958  
1954 1959                          nbl_start_crit(tvp, RW_READER);
1955 1960                          in_crit = 1;
1956 1961  
1957 1962                          bva.va_mask = AT_SIZE;
1958 1963                          error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1959 1964                          if (!error) {
1960 1965                                  if (va.va_size < bva.va_size) {
1961 1966                                          offset = va.va_size;
1962 1967                                          length = bva.va_size - va.va_size;
1963 1968                                  } else {
1964 1969                                          offset = bva.va_size;
1965 1970                                          length = va.va_size - bva.va_size;
1966 1971                                  }
1967 1972                                  if (length) {
1968 1973                                          if (nbl_conflict(tvp, NBL_WRITE,
1969 1974                                              offset, length, 0, NULL)) {
1970 1975                                                  error = EACCES;
1971 1976                                          }
1972 1977                                  }
1973 1978                          }
1974 1979                          if (error) {
1975 1980                                  nbl_end_crit(tvp);
1976 1981                                  VN_RELE(tvp);
1977 1982                                  in_crit = 0;
1978 1983                          }
1979 1984                  } else if (tvp != NULL) {
1980 1985                          VN_RELE(tvp);
1981 1986                  }
1982 1987          }
1983 1988  
1984 1989          if (!error) {
1985 1990                  /*
1986 1991                   * If filesystem is shared with nosuid the remove any
1987 1992                   * setuid/setgid bits on create.
1988 1993                   */
1989 1994                  if (va.va_type == VREG &&
1990 1995                      exi->exi_export.ex_flags & EX_NOSUID)
1991 1996                          va.va_mode &= ~(VSUID | VSGID);
1992 1997  
1993 1998                  error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1994 1999                      NULL, NULL);
1995 2000  
1996 2001                  if (!error) {
1997 2002  
1998 2003                          if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1999 2004                                  trunc = TRUE;
2000 2005                          else
2001 2006                                  trunc = FALSE;
2002 2007  
2003 2008                          if (rfs4_check_delegated(FWRITE, vp, trunc)) {
2004 2009                                  VN_RELE(vp);
2005 2010                                  curthread->t_flag |= T_WOULDBLOCK;
2006 2011                                  goto out;
2007 2012                          }
2008 2013                          va.va_mask = AT_ALL;
2009 2014  
2010 2015                          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
2011 2016  
2012 2017                          /* check for overflows */
2013 2018                          if (!error) {
2014 2019                                  acl_perm(vp, exi, &va, cr);
2015 2020                                  error = vattr_to_nattr(&va, &dr->dr_attr);
2016 2021                                  if (!error) {
2017 2022                                          error = makefh(&dr->dr_fhandle, vp,
2018 2023                                              exi);
2019 2024                                  }
2020 2025                          }
2021 2026                          /*
2022 2027                           * Force modified metadata out to stable storage.
2023 2028                           *
2024 2029                           * if a underlying vp exists, pass it to VOP_FSYNC
2025 2030                           */
2026 2031                          if (VOP_REALVP(vp, &realvp, NULL) == 0)
2027 2032                                  (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2028 2033                          else
2029 2034                                  (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2030 2035                          VN_RELE(vp);
2031 2036                  }
2032 2037  
2033 2038                  if (in_crit) {
2034 2039                          nbl_end_crit(tvp);
2035 2040                          VN_RELE(tvp);
2036 2041                  }
2037 2042          }
2038 2043  
2039 2044          /*
2040 2045           * Force modified data and metadata out to stable storage.
2041 2046           */
2042 2047          (void) VOP_FSYNC(dvp, 0, cr, NULL);
2043 2048  
2044 2049  out:
2045 2050  
2046 2051          VN_RELE(dvp);
2047 2052  
2048 2053          dr->dr_status = puterrno(error);
2049 2054  
2050 2055          if (name != args->ca_da.da_name)
2051 2056                  kmem_free(name, MAXPATHLEN);
2052 2057  }
2053 2058  void *
2054 2059  rfs_create_getfh(struct nfscreatargs *args)
2055 2060  {
2056 2061          return (args->ca_da.da_fhandle);
2057 2062  }
2058 2063  
2059 2064  /*
2060 2065   * Remove a file.
2061 2066   * Remove named file from parent directory.
2062 2067   */
2063 2068  /* ARGSUSED */
2064 2069  void
2065 2070  rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2066 2071      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2067 2072  {
2068 2073          int error = 0;
2069 2074          vnode_t *vp;
2070 2075          vnode_t *targvp;
2071 2076          int in_crit = 0;
2072 2077  
2073 2078          /*
2074 2079           * Disallow NULL paths
2075 2080           */
2076 2081          if (da->da_name == NULL || *da->da_name == '\0') {
2077 2082                  *status = NFSERR_ACCES;
2078 2083                  return;
2079 2084          }
2080 2085  
2081 2086          vp = nfs_fhtovp(da->da_fhandle, exi);
2082 2087          if (vp == NULL) {
2083 2088                  *status = NFSERR_STALE;
2084 2089                  return;
2085 2090          }
2086 2091  
2087 2092          if (rdonly(ro, vp)) {
2088 2093                  VN_RELE(vp);
2089 2094                  *status = NFSERR_ROFS;
2090 2095                  return;
2091 2096          }
2092 2097  
2093 2098          /*
2094 2099           * Check for a conflict with a non-blocking mandatory share reservation.
2095 2100           */
2096 2101          error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2097 2102              NULL, cr, NULL, NULL, NULL);
2098 2103          if (error != 0) {
2099 2104                  VN_RELE(vp);
2100 2105                  *status = puterrno(error);
2101 2106                  return;
2102 2107          }
2103 2108  
2104 2109          /*
2105 2110           * If the file is delegated to an v4 client, then initiate
2106 2111           * recall and drop this request (by setting T_WOULDBLOCK).
2107 2112           * The client will eventually re-transmit the request and
2108 2113           * (hopefully), by then, the v4 client will have returned
2109 2114           * the delegation.
2110 2115           */
2111 2116  
2112 2117          if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2113 2118                  VN_RELE(vp);
2114 2119                  VN_RELE(targvp);
2115 2120                  curthread->t_flag |= T_WOULDBLOCK;
2116 2121                  return;
2117 2122          }
2118 2123  
2119 2124          if (nbl_need_check(targvp)) {
2120 2125                  nbl_start_crit(targvp, RW_READER);
2121 2126                  in_crit = 1;
2122 2127                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2123 2128                          error = EACCES;
2124 2129                          goto out;
2125 2130                  }
2126 2131          }
2127 2132  
2128 2133          error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2129 2134  
2130 2135          /*
2131 2136           * Force modified data and metadata out to stable storage.
2132 2137           */
2133 2138          (void) VOP_FSYNC(vp, 0, cr, NULL);
2134 2139  
2135 2140  out:
2136 2141          if (in_crit)
2137 2142                  nbl_end_crit(targvp);
2138 2143          VN_RELE(targvp);
2139 2144          VN_RELE(vp);
2140 2145  
2141 2146          *status = puterrno(error);
2142 2147  
2143 2148  }
2144 2149  
2145 2150  void *
2146 2151  rfs_remove_getfh(struct nfsdiropargs *da)
2147 2152  {
2148 2153          return (da->da_fhandle);
2149 2154  }
2150 2155  
2151 2156  /*
2152 2157   * rename a file
2153 2158   * Give a file (from) a new name (to).
2154 2159   */
2155 2160  /* ARGSUSED */
2156 2161  void
2157 2162  rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2158 2163      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2159 2164  {
2160 2165          int error = 0;
2161 2166          vnode_t *fromvp;
2162 2167          vnode_t *tovp;
2163 2168          struct exportinfo *to_exi;
2164 2169          fhandle_t *fh;
2165 2170          vnode_t *srcvp;
2166 2171          vnode_t *targvp;
2167 2172          int in_crit = 0;
2168 2173  
2169 2174          fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2170 2175          if (fromvp == NULL) {
2171 2176                  *status = NFSERR_STALE;
2172 2177                  return;
2173 2178          }
2174 2179  
2175 2180          fh = args->rna_to.da_fhandle;
2176 2181          to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2177 2182          if (to_exi == NULL) {
2178 2183                  VN_RELE(fromvp);
2179 2184                  *status = NFSERR_ACCES;
2180 2185                  return;
2181 2186          }
2182 2187          exi_rele(to_exi);
2183 2188  
2184 2189          if (to_exi != exi) {
2185 2190                  VN_RELE(fromvp);
2186 2191                  *status = NFSERR_XDEV;
2187 2192                  return;
2188 2193          }
2189 2194  
2190 2195          tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2191 2196          if (tovp == NULL) {
2192 2197                  VN_RELE(fromvp);
2193 2198                  *status = NFSERR_STALE;
2194 2199                  return;
2195 2200          }
2196 2201  
2197 2202          if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2198 2203                  VN_RELE(tovp);
2199 2204                  VN_RELE(fromvp);
2200 2205                  *status = NFSERR_NOTDIR;
2201 2206                  return;
2202 2207          }
2203 2208  
2204 2209          /*
2205 2210           * Disallow NULL paths
2206 2211           */
2207 2212          if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2208 2213              args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2209 2214                  VN_RELE(tovp);
2210 2215                  VN_RELE(fromvp);
2211 2216                  *status = NFSERR_ACCES;
2212 2217                  return;
2213 2218          }
2214 2219  
2215 2220          if (rdonly(ro, tovp)) {
2216 2221                  VN_RELE(tovp);
2217 2222                  VN_RELE(fromvp);
2218 2223                  *status = NFSERR_ROFS;
2219 2224                  return;
2220 2225          }
2221 2226  
2222 2227          /*
2223 2228           * Check for a conflict with a non-blocking mandatory share reservation.
2224 2229           */
2225 2230          error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2226 2231              NULL, cr, NULL, NULL, NULL);
2227 2232          if (error != 0) {
2228 2233                  VN_RELE(tovp);
2229 2234                  VN_RELE(fromvp);
2230 2235                  *status = puterrno(error);
2231 2236                  return;
2232 2237          }
2233 2238  
2234 2239          /* Check for delegations on the source file */
2235 2240  
2236 2241          if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2237 2242                  VN_RELE(tovp);
2238 2243                  VN_RELE(fromvp);
2239 2244                  VN_RELE(srcvp);
2240 2245                  curthread->t_flag |= T_WOULDBLOCK;
2241 2246                  return;
2242 2247          }
2243 2248  
2244 2249          /* Check for delegation on the file being renamed over, if it exists */
2245 2250  
2246 2251          if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2247 2252              VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2248 2253              NULL, NULL, NULL) == 0) {
2249 2254  
2250 2255                  if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2251 2256                          VN_RELE(tovp);
2252 2257                          VN_RELE(fromvp);
2253 2258                          VN_RELE(srcvp);
2254 2259                          VN_RELE(targvp);
2255 2260                          curthread->t_flag |= T_WOULDBLOCK;
2256 2261                          return;
2257 2262                  }
2258 2263                  VN_RELE(targvp);
2259 2264          }
2260 2265  
2261 2266  
2262 2267          if (nbl_need_check(srcvp)) {
2263 2268                  nbl_start_crit(srcvp, RW_READER);
2264 2269                  in_crit = 1;
2265 2270                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2266 2271                          error = EACCES;
2267 2272                          goto out;
2268 2273                  }
2269 2274          }
2270 2275  
2271 2276          error = VOP_RENAME(fromvp, args->rna_from.da_name,
2272 2277              tovp, args->rna_to.da_name, cr, NULL, 0);
2273 2278  
2274 2279          if (error == 0)
2275 2280                  vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2276 2281                      strlen(args->rna_to.da_name));
2277 2282  
2278 2283          /*
2279 2284           * Force modified data and metadata out to stable storage.
2280 2285           */
2281 2286          (void) VOP_FSYNC(tovp, 0, cr, NULL);
2282 2287          (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2283 2288  
2284 2289  out:
2285 2290          if (in_crit)
2286 2291                  nbl_end_crit(srcvp);
2287 2292          VN_RELE(srcvp);
2288 2293          VN_RELE(tovp);
2289 2294          VN_RELE(fromvp);
2290 2295  
2291 2296          *status = puterrno(error);
2292 2297  
2293 2298  }
2294 2299  void *
2295 2300  rfs_rename_getfh(struct nfsrnmargs *args)
2296 2301  {
2297 2302          return (args->rna_from.da_fhandle);
2298 2303  }
2299 2304  
2300 2305  /*
2301 2306   * Link to a file.
2302 2307   * Create a file (to) which is a hard link to the given file (from).
2303 2308   */
2304 2309  /* ARGSUSED */
2305 2310  void
2306 2311  rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2307 2312      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2308 2313  {
2309 2314          int error;
2310 2315          vnode_t *fromvp;
2311 2316          vnode_t *tovp;
2312 2317          struct exportinfo *to_exi;
2313 2318          fhandle_t *fh;
2314 2319  
2315 2320          fromvp = nfs_fhtovp(args->la_from, exi);
2316 2321          if (fromvp == NULL) {
2317 2322                  *status = NFSERR_STALE;
2318 2323                  return;
2319 2324          }
2320 2325  
2321 2326          fh = args->la_to.da_fhandle;
2322 2327          to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2323 2328          if (to_exi == NULL) {
2324 2329                  VN_RELE(fromvp);
2325 2330                  *status = NFSERR_ACCES;
2326 2331                  return;
2327 2332          }
2328 2333          exi_rele(to_exi);
2329 2334  
2330 2335          if (to_exi != exi) {
2331 2336                  VN_RELE(fromvp);
2332 2337                  *status = NFSERR_XDEV;
2333 2338                  return;
2334 2339          }
2335 2340  
2336 2341          tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2337 2342          if (tovp == NULL) {
2338 2343                  VN_RELE(fromvp);
2339 2344                  *status = NFSERR_STALE;
2340 2345                  return;
2341 2346          }
2342 2347  
2343 2348          if (tovp->v_type != VDIR) {
2344 2349                  VN_RELE(tovp);
2345 2350                  VN_RELE(fromvp);
2346 2351                  *status = NFSERR_NOTDIR;
2347 2352                  return;
2348 2353          }
2349 2354          /*
2350 2355           * Disallow NULL paths
2351 2356           */
2352 2357          if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2353 2358                  VN_RELE(tovp);
2354 2359                  VN_RELE(fromvp);
2355 2360                  *status = NFSERR_ACCES;
2356 2361                  return;
2357 2362          }
2358 2363  
2359 2364          if (rdonly(ro, tovp)) {
2360 2365                  VN_RELE(tovp);
2361 2366                  VN_RELE(fromvp);
2362 2367                  *status = NFSERR_ROFS;
2363 2368                  return;
2364 2369          }
2365 2370  
2366 2371          error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2367 2372  
2368 2373          /*
2369 2374           * Force modified data and metadata out to stable storage.
2370 2375           */
2371 2376          (void) VOP_FSYNC(tovp, 0, cr, NULL);
2372 2377          (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2373 2378  
2374 2379          VN_RELE(tovp);
2375 2380          VN_RELE(fromvp);
2376 2381  
2377 2382          *status = puterrno(error);
2378 2383  
2379 2384  }
2380 2385  void *
2381 2386  rfs_link_getfh(struct nfslinkargs *args)
2382 2387  {
2383 2388          return (args->la_from);
2384 2389  }
2385 2390  
2386 2391  /*
2387 2392   * Symbolicly link to a file.
2388 2393   * Create a file (to) with the given attributes which is a symbolic link
2389 2394   * to the given path name (to).
2390 2395   */
2391 2396  void
2392 2397  rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2393 2398      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2394 2399  {
2395 2400          int error;
2396 2401          struct vattr va;
2397 2402          vnode_t *vp;
2398 2403          vnode_t *svp;
2399 2404          int lerror;
2400 2405          struct sockaddr *ca;
2401 2406          char *name = NULL;
2402 2407  
2403 2408          /*
2404 2409           * Disallow NULL paths
2405 2410           */
2406 2411          if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2407 2412                  *status = NFSERR_ACCES;
2408 2413                  return;
2409 2414          }
2410 2415  
2411 2416          vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2412 2417          if (vp == NULL) {
2413 2418                  *status = NFSERR_STALE;
2414 2419                  return;
2415 2420          }
2416 2421  
2417 2422          if (rdonly(ro, vp)) {
2418 2423                  VN_RELE(vp);
2419 2424                  *status = NFSERR_ROFS;
2420 2425                  return;
2421 2426          }
2422 2427  
2423 2428          error = sattr_to_vattr(args->sla_sa, &va);
2424 2429          if (error) {
2425 2430                  VN_RELE(vp);
2426 2431                  *status = puterrno(error);
2427 2432                  return;
2428 2433          }
2429 2434  
2430 2435          if (!(va.va_mask & AT_MODE)) {
2431 2436                  VN_RELE(vp);
2432 2437                  *status = NFSERR_INVAL;
2433 2438                  return;
2434 2439          }
2435 2440  
2436 2441          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2437 2442          name = nfscmd_convname(ca, exi, args->sla_tnm,
2438 2443              NFSCMD_CONV_INBOUND, MAXPATHLEN);
2439 2444  
2440 2445          if (name == NULL) {
2441 2446                  *status = NFSERR_ACCES;
2442 2447                  return;
2443 2448          }
2444 2449  
2445 2450          va.va_type = VLNK;
2446 2451          va.va_mask |= AT_TYPE;
2447 2452  
2448 2453          error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2449 2454  
2450 2455          /*
2451 2456           * Force new data and metadata out to stable storage.
2452 2457           */
2453 2458          lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2454 2459              NULL, cr, NULL, NULL, NULL);
2455 2460  
2456 2461          if (!lerror) {
2457 2462                  (void) VOP_FSYNC(svp, 0, cr, NULL);
2458 2463                  VN_RELE(svp);
2459 2464          }
2460 2465  
2461 2466          /*
2462 2467           * Force modified data and metadata out to stable storage.
2463 2468           */
2464 2469          (void) VOP_FSYNC(vp, 0, cr, NULL);
2465 2470  
2466 2471          VN_RELE(vp);
2467 2472  
2468 2473          *status = puterrno(error);
2469 2474          if (name != args->sla_tnm)
2470 2475                  kmem_free(name, MAXPATHLEN);
2471 2476  
2472 2477  }
2473 2478  void *
2474 2479  rfs_symlink_getfh(struct nfsslargs *args)
2475 2480  {
2476 2481          return (args->sla_from.da_fhandle);
2477 2482  }
2478 2483  
2479 2484  /*
2480 2485   * Make a directory.
2481 2486   * Create a directory with the given name, parent directory, and attributes.
2482 2487   * Returns a file handle and attributes for the new directory.
2483 2488   */
2484 2489  /* ARGSUSED */
2485 2490  void
2486 2491  rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2487 2492      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2488 2493  {
2489 2494          int error;
2490 2495          struct vattr va;
2491 2496          vnode_t *dvp = NULL;
2492 2497          vnode_t *vp;
2493 2498          char *name = args->ca_da.da_name;
2494 2499  
2495 2500          /*
2496 2501           * Disallow NULL paths
2497 2502           */
2498 2503          if (name == NULL || *name == '\0') {
2499 2504                  dr->dr_status = NFSERR_ACCES;
2500 2505                  return;
2501 2506          }
2502 2507  
2503 2508          vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2504 2509          if (vp == NULL) {
2505 2510                  dr->dr_status = NFSERR_STALE;
2506 2511                  return;
2507 2512          }
2508 2513  
2509 2514          if (rdonly(ro, vp)) {
2510 2515                  VN_RELE(vp);
2511 2516                  dr->dr_status = NFSERR_ROFS;
2512 2517                  return;
2513 2518          }
2514 2519  
2515 2520          error = sattr_to_vattr(args->ca_sa, &va);
2516 2521          if (error) {
2517 2522                  VN_RELE(vp);
2518 2523                  dr->dr_status = puterrno(error);
2519 2524                  return;
2520 2525          }
2521 2526  
2522 2527          if (!(va.va_mask & AT_MODE)) {
2523 2528                  VN_RELE(vp);
2524 2529                  dr->dr_status = NFSERR_INVAL;
2525 2530                  return;
2526 2531          }
2527 2532  
2528 2533          va.va_type = VDIR;
2529 2534          va.va_mask |= AT_TYPE;
2530 2535  
2531 2536          error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2532 2537  
2533 2538          if (!error) {
2534 2539                  /*
2535 2540                   * Attribtutes of the newly created directory should
2536 2541                   * be returned to the client.
2537 2542                   */
2538 2543                  va.va_mask = AT_ALL; /* We want everything */
2539 2544                  error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2540 2545  
2541 2546                  /* check for overflows */
2542 2547                  if (!error) {
2543 2548                          acl_perm(vp, exi, &va, cr);
2544 2549                          error = vattr_to_nattr(&va, &dr->dr_attr);
2545 2550                          if (!error) {
2546 2551                                  error = makefh(&dr->dr_fhandle, dvp, exi);
2547 2552                          }
2548 2553                  }
2549 2554                  /*
2550 2555                   * Force new data and metadata out to stable storage.
2551 2556                   */
2552 2557                  (void) VOP_FSYNC(dvp, 0, cr, NULL);
2553 2558                  VN_RELE(dvp);
2554 2559          }
2555 2560  
2556 2561          /*
2557 2562           * Force modified data and metadata out to stable storage.
2558 2563           */
2559 2564          (void) VOP_FSYNC(vp, 0, cr, NULL);
2560 2565  
2561 2566          VN_RELE(vp);
2562 2567  
2563 2568          dr->dr_status = puterrno(error);
2564 2569  
2565 2570  }
2566 2571  void *
2567 2572  rfs_mkdir_getfh(struct nfscreatargs *args)
2568 2573  {
2569 2574          return (args->ca_da.da_fhandle);
2570 2575  }
2571 2576  
2572 2577  /*
2573 2578   * Remove a directory.
2574 2579   * Remove the given directory name from the given parent directory.
2575 2580   */
2576 2581  /* ARGSUSED */
2577 2582  void
2578 2583  rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2579 2584      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2580 2585  {
2581 2586          int error;
2582 2587          vnode_t *vp;
2583 2588  
2584 2589          /*
2585 2590           * Disallow NULL paths
2586 2591           */
2587 2592          if (da->da_name == NULL || *da->da_name == '\0') {
2588 2593                  *status = NFSERR_ACCES;
2589 2594                  return;
2590 2595          }
2591 2596  
2592 2597          vp = nfs_fhtovp(da->da_fhandle, exi);
2593 2598          if (vp == NULL) {
2594 2599                  *status = NFSERR_STALE;
2595 2600                  return;
2596 2601          }
2597 2602  
2598 2603          if (rdonly(ro, vp)) {
2599 2604                  VN_RELE(vp);
2600 2605                  *status = NFSERR_ROFS;
2601 2606                  return;
2602 2607          }
2603 2608  
2604 2609          /*
2605 2610           * VOP_RMDIR takes a third argument (the current
2606 2611           * directory of the process).  That's because someone
2607 2612           * wants to return EINVAL if one tries to remove ".".
2608 2613           * Of course, NFS servers have no idea what their
2609 2614           * clients' current directories are.  We fake it by
2610 2615           * supplying a vnode known to exist and illegal to
2611 2616           * remove.
2612 2617           */
2613 2618          error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2614 2619  
2615 2620          /*
2616 2621           * Force modified data and metadata out to stable storage.
2617 2622           */
2618 2623          (void) VOP_FSYNC(vp, 0, cr, NULL);
2619 2624  
2620 2625          VN_RELE(vp);
2621 2626  
2622 2627          /*
2623 2628           * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2624 2629           * if the directory is not empty.  A System V NFS server
2625 2630           * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2626 2631           * over the wire.
2627 2632           */
2628 2633          if (error == EEXIST)
2629 2634                  *status = NFSERR_NOTEMPTY;
2630 2635          else
2631 2636                  *status = puterrno(error);
2632 2637  
2633 2638  }
2634 2639  void *
2635 2640  rfs_rmdir_getfh(struct nfsdiropargs *da)
2636 2641  {
2637 2642          return (da->da_fhandle);
2638 2643  }
2639 2644  
2640 2645  /* ARGSUSED */
2641 2646  void
2642 2647  rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2643 2648      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2644 2649  {
2645 2650          int error;
2646 2651          int iseof;
2647 2652          struct iovec iov;
2648 2653          struct uio uio;
2649 2654          vnode_t *vp;
2650 2655          char *ndata = NULL;
2651 2656          struct sockaddr *ca;
2652 2657          size_t nents;
2653 2658          int ret;
2654 2659  
2655 2660          vp = nfs_fhtovp(&rda->rda_fh, exi);
2656 2661          if (vp == NULL) {
2657 2662                  rd->rd_entries = NULL;
2658 2663                  rd->rd_status = NFSERR_STALE;
2659 2664                  return;
2660 2665          }
2661 2666  
2662 2667          if (vp->v_type != VDIR) {
2663 2668                  VN_RELE(vp);
2664 2669                  rd->rd_entries = NULL;
2665 2670                  rd->rd_status = NFSERR_NOTDIR;
2666 2671                  return;
2667 2672          }
2668 2673  
2669 2674          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2670 2675  
2671 2676          error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2672 2677  
2673 2678          if (error) {
2674 2679                  rd->rd_entries = NULL;
2675 2680                  goto bad;
2676 2681          }
2677 2682  
2678 2683          if (rda->rda_count == 0) {
2679 2684                  rd->rd_entries = NULL;
2680 2685                  rd->rd_size = 0;
2681 2686                  rd->rd_eof = FALSE;
2682 2687                  goto bad;
2683 2688          }
2684 2689  
2685 2690          rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2686 2691  
2687 2692          /*
2688 2693           * Allocate data for entries.  This will be freed by rfs_rddirfree.
2689 2694           */
2690 2695          rd->rd_bufsize = (uint_t)rda->rda_count;
2691 2696          rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2692 2697  
2693 2698          /*
2694 2699           * Set up io vector to read directory data
2695 2700           */
2696 2701          iov.iov_base = (caddr_t)rd->rd_entries;
2697 2702          iov.iov_len = rda->rda_count;
2698 2703          uio.uio_iov = &iov;
2699 2704          uio.uio_iovcnt = 1;
2700 2705          uio.uio_segflg = UIO_SYSSPACE;
2701 2706          uio.uio_extflg = UIO_COPY_CACHED;
2702 2707          uio.uio_loffset = (offset_t)rda->rda_offset;
2703 2708          uio.uio_resid = rda->rda_count;
2704 2709  
2705 2710          /*
2706 2711           * read directory
2707 2712           */
2708 2713          error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2709 2714  
2710 2715          /*
2711 2716           * Clean up
2712 2717           */
2713 2718          if (!error) {
2714 2719                  /*
2715 2720                   * set size and eof
2716 2721                   */
2717 2722                  if (uio.uio_resid == rda->rda_count) {
2718 2723                          rd->rd_size = 0;
2719 2724                          rd->rd_eof = TRUE;
2720 2725                  } else {
2721 2726                          rd->rd_size = (uint32_t)(rda->rda_count -
2722 2727                              uio.uio_resid);
2723 2728                          rd->rd_eof = iseof ? TRUE : FALSE;
2724 2729                  }
2725 2730          }
2726 2731  
2727 2732          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2728 2733          nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2729 2734          ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2730 2735              rda->rda_count, &ndata);
2731 2736  
2732 2737          if (ret != 0) {
2733 2738                  size_t dropbytes;
2734 2739                  /*
2735 2740                   * We had to drop one or more entries in order to fit
2736 2741                   * during the character conversion.  We need to patch
2737 2742                   * up the size and eof info.
2738 2743                   */
2739 2744                  if (rd->rd_eof)
2740 2745                          rd->rd_eof = FALSE;
2741 2746                  dropbytes = nfscmd_dropped_entrysize(
2742 2747                      (struct dirent64 *)rd->rd_entries, nents, ret);
2743 2748                  rd->rd_size -= dropbytes;
2744 2749          }
2745 2750          if (ndata == NULL) {
2746 2751                  ndata = (char *)rd->rd_entries;
2747 2752          } else if (ndata != (char *)rd->rd_entries) {
2748 2753                  kmem_free(rd->rd_entries, rd->rd_bufsize);
2749 2754                  rd->rd_entries = (void *)ndata;
2750 2755                  rd->rd_bufsize = rda->rda_count;
2751 2756          }
2752 2757  
2753 2758  bad:
2754 2759          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2755 2760  
2756 2761  #if 0 /* notyet */
2757 2762          /*
2758 2763           * Don't do this.  It causes local disk writes when just
2759 2764           * reading the file and the overhead is deemed larger
2760 2765           * than the benefit.
2761 2766           */
2762 2767          /*
2763 2768           * Force modified metadata out to stable storage.
2764 2769           */
2765 2770          (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2766 2771  #endif
2767 2772  
2768 2773          VN_RELE(vp);
2769 2774  
2770 2775          rd->rd_status = puterrno(error);
2771 2776  
2772 2777  }
2773 2778  void *
2774 2779  rfs_readdir_getfh(struct nfsrddirargs *rda)
2775 2780  {
2776 2781          return (&rda->rda_fh);
2777 2782  }
2778 2783  void
2779 2784  rfs_rddirfree(struct nfsrddirres *rd)
2780 2785  {
2781 2786          if (rd->rd_entries != NULL)
2782 2787                  kmem_free(rd->rd_entries, rd->rd_bufsize);
2783 2788  }
2784 2789  
2785 2790  /* ARGSUSED */
2786 2791  void
2787 2792  rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2788 2793      struct svc_req *req, cred_t *cr, bool_t ro)
2789 2794  {
2790 2795          int error;
2791 2796          struct statvfs64 sb;
2792 2797          vnode_t *vp;
2793 2798  
2794 2799          vp = nfs_fhtovp(fh, exi);
2795 2800          if (vp == NULL) {
2796 2801                  fs->fs_status = NFSERR_STALE;
2797 2802                  return;
2798 2803          }
2799 2804  
2800 2805          error = VFS_STATVFS(vp->v_vfsp, &sb);
2801 2806  
2802 2807          if (!error) {
2803 2808                  fs->fs_tsize = nfstsize();
2804 2809                  fs->fs_bsize = sb.f_frsize;
2805 2810                  fs->fs_blocks = sb.f_blocks;
2806 2811                  fs->fs_bfree = sb.f_bfree;
2807 2812                  fs->fs_bavail = sb.f_bavail;
2808 2813          }
2809 2814  
2810 2815          VN_RELE(vp);
2811 2816  
2812 2817          fs->fs_status = puterrno(error);
2813 2818  
2814 2819  }
2815 2820  void *
2816 2821  rfs_statfs_getfh(fhandle_t *fh)
2817 2822  {
2818 2823          return (fh);
2819 2824  }
2820 2825  
2821 2826  static int
2822 2827  sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2823 2828  {
2824 2829          vap->va_mask = 0;
2825 2830  
2826 2831          /*
2827 2832           * There was a sign extension bug in some VFS based systems
2828 2833           * which stored the mode as a short.  When it would get
2829 2834           * assigned to a u_long, no sign extension would occur.
2830 2835           * It needed to, but this wasn't noticed because sa_mode
2831 2836           * would then get assigned back to the short, thus ignoring
2832 2837           * the upper 16 bits of sa_mode.
2833 2838           *
2834 2839           * To make this implementation work for both broken
2835 2840           * clients and good clients, we check for both versions
2836 2841           * of the mode.
2837 2842           */
2838 2843          if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2839 2844              sa->sa_mode != (uint32_t)-1) {
2840 2845                  vap->va_mask |= AT_MODE;
2841 2846                  vap->va_mode = sa->sa_mode;
2842 2847          }
2843 2848          if (sa->sa_uid != (uint32_t)-1) {
2844 2849                  vap->va_mask |= AT_UID;
2845 2850                  vap->va_uid = sa->sa_uid;
2846 2851          }
2847 2852          if (sa->sa_gid != (uint32_t)-1) {
2848 2853                  vap->va_mask |= AT_GID;
2849 2854                  vap->va_gid = sa->sa_gid;
2850 2855          }
2851 2856          if (sa->sa_size != (uint32_t)-1) {
2852 2857                  vap->va_mask |= AT_SIZE;
2853 2858                  vap->va_size = sa->sa_size;
2854 2859          }
2855 2860          if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2856 2861              sa->sa_atime.tv_usec != (int32_t)-1) {
2857 2862  #ifndef _LP64
2858 2863                  /* return error if time overflow */
2859 2864                  if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2860 2865                          return (EOVERFLOW);
2861 2866  #endif
2862 2867                  vap->va_mask |= AT_ATIME;
2863 2868                  /*
2864 2869                   * nfs protocol defines times as unsigned so don't extend sign,
2865 2870                   * unless sysadmin set nfs_allow_preepoch_time.
2866 2871                   */
2867 2872                  NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2868 2873                  vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2869 2874          }
2870 2875          if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2871 2876              sa->sa_mtime.tv_usec != (int32_t)-1) {
2872 2877  #ifndef _LP64
2873 2878                  /* return error if time overflow */
2874 2879                  if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2875 2880                          return (EOVERFLOW);
2876 2881  #endif
2877 2882                  vap->va_mask |= AT_MTIME;
2878 2883                  /*
2879 2884                   * nfs protocol defines times as unsigned so don't extend sign,
2880 2885                   * unless sysadmin set nfs_allow_preepoch_time.
2881 2886                   */
2882 2887                  NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2883 2888                  vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2884 2889          }
2885 2890          return (0);
2886 2891  }
2887 2892  
2888 2893  static const enum nfsftype vt_to_nf[] = {
2889 2894          0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2890 2895  };
2891 2896  
2892 2897  /*
2893 2898   * check the following fields for overflow: nodeid, size, and time.
2894 2899   * There could be a problem when converting 64-bit LP64 fields
2895 2900   * into 32-bit ones.  Return an error if there is an overflow.
2896 2901   */
2897 2902  int
2898 2903  vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2899 2904  {
2900 2905          ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2901 2906          na->na_type = vt_to_nf[vap->va_type];
2902 2907  
2903 2908          if (vap->va_mode == (unsigned short) -1)
2904 2909                  na->na_mode = (uint32_t)-1;
2905 2910          else
2906 2911                  na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2907 2912  
2908 2913          if (vap->va_uid == (unsigned short)(-1))
2909 2914                  na->na_uid = (uint32_t)(-1);
2910 2915          else if (vap->va_uid == UID_NOBODY)
2911 2916                  na->na_uid = (uint32_t)NFS_UID_NOBODY;
2912 2917          else
2913 2918                  na->na_uid = vap->va_uid;
2914 2919  
2915 2920          if (vap->va_gid == (unsigned short)(-1))
2916 2921                  na->na_gid = (uint32_t)-1;
2917 2922          else if (vap->va_gid == GID_NOBODY)
2918 2923                  na->na_gid = (uint32_t)NFS_GID_NOBODY;
2919 2924          else
2920 2925                  na->na_gid = vap->va_gid;
2921 2926  
2922 2927          /*
2923 2928           * Do we need to check fsid for overflow?  It is 64-bit in the
2924 2929           * vattr, but are bigger than 32 bit values supported?
2925 2930           */
2926 2931          na->na_fsid = vap->va_fsid;
2927 2932  
2928 2933          na->na_nodeid = vap->va_nodeid;
2929 2934  
2930 2935          /*
2931 2936           * Check to make sure that the nodeid is representable over the
2932 2937           * wire without losing bits.
2933 2938           */
2934 2939          if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2935 2940                  return (EFBIG);
2936 2941          na->na_nlink = vap->va_nlink;
2937 2942  
2938 2943          /*
2939 2944           * Check for big files here, instead of at the caller.  See
2940 2945           * comments in cstat for large special file explanation.
2941 2946           */
2942 2947          if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2943 2948                  if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2944 2949                          return (EFBIG);
2945 2950                  if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2946 2951                          /* UNKNOWN_SIZE | OVERFLOW */
2947 2952                          na->na_size = MAXOFF32_T;
2948 2953                  } else
2949 2954                          na->na_size = vap->va_size;
2950 2955          } else
2951 2956                  na->na_size = vap->va_size;
2952 2957  
2953 2958          /*
2954 2959           * If the vnode times overflow the 32-bit times that NFS2
2955 2960           * uses on the wire then return an error.
2956 2961           */
2957 2962          if (!NFS_VAP_TIME_OK(vap)) {
2958 2963                  return (EOVERFLOW);
2959 2964          }
2960 2965          na->na_atime.tv_sec = vap->va_atime.tv_sec;
2961 2966          na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2962 2967  
2963 2968          na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2964 2969          na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2965 2970  
2966 2971          na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2967 2972          na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2968 2973  
2969 2974          /*
2970 2975           * If the dev_t will fit into 16 bits then compress
2971 2976           * it, otherwise leave it alone. See comments in
2972 2977           * nfs_client.c.
2973 2978           */
2974 2979          if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2975 2980              getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2976 2981                  na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2977 2982          else
2978 2983                  (void) cmpldev(&na->na_rdev, vap->va_rdev);
2979 2984  
2980 2985          na->na_blocks = vap->va_nblocks;
2981 2986          na->na_blocksize = vap->va_blksize;
2982 2987  
2983 2988          /*
2984 2989           * This bit of ugliness is a *TEMPORARY* hack to preserve the
2985 2990           * over-the-wire protocols for named-pipe vnodes.  It remaps the
2986 2991           * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2987 2992           *
2988 2993           * BUYER BEWARE:
2989 2994           *  If you are porting the NFS to a non-Sun server, you probably
2990 2995           *  don't want to include the following block of code.  The
2991 2996           *  over-the-wire special file types will be changing with the
2992 2997           *  NFS Protocol Revision.
2993 2998           */
2994 2999          if (vap->va_type == VFIFO)
2995 3000                  NA_SETFIFO(na);
2996 3001          return (0);
2997 3002  }
2998 3003  
2999 3004  /*
3000 3005   * acl v2 support: returns approximate permission.
3001 3006   *      default: returns minimal permission (more restrictive)
3002 3007   *      aclok: returns maximal permission (less restrictive)
3003 3008   *      This routine changes the permissions that are alaredy in *va.
3004 3009   *      If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
3005 3010   *      CLASS_OBJ is always the same as GROUP_OBJ entry.
3006 3011   */
3007 3012  static void
3008 3013  acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
3009 3014  {
3010 3015          vsecattr_t      vsa;
3011 3016          int             aclcnt;
3012 3017          aclent_t        *aclentp;
3013 3018          mode_t          mask_perm;
3014 3019          mode_t          grp_perm;
3015 3020          mode_t          other_perm;
3016 3021          mode_t          other_orig;
3017 3022          int             error;
3018 3023  
3019 3024          /* dont care default acl */
3020 3025          vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
3021 3026          error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
3022 3027  
3023 3028          if (!error) {
3024 3029                  aclcnt = vsa.vsa_aclcnt;
3025 3030                  if (aclcnt > MIN_ACL_ENTRIES) {
3026 3031                          /* non-trivial ACL */
3027 3032                          aclentp = vsa.vsa_aclentp;
3028 3033                          if (exi->exi_export.ex_flags & EX_ACLOK) {
3029 3034                                  /* maximal permissions */
3030 3035                                  grp_perm = 0;
3031 3036                                  other_perm = 0;
3032 3037                                  for (; aclcnt > 0; aclcnt--, aclentp++) {
3033 3038                                          switch (aclentp->a_type) {
3034 3039                                          case USER_OBJ:
3035 3040                                                  break;
3036 3041                                          case USER:
3037 3042                                                  grp_perm |=
3038 3043                                                      aclentp->a_perm << 3;
3039 3044                                                  other_perm |= aclentp->a_perm;
3040 3045                                                  break;
3041 3046                                          case GROUP_OBJ:
3042 3047                                                  grp_perm |=
3043 3048                                                      aclentp->a_perm << 3;
3044 3049                                                  break;
3045 3050                                          case GROUP:
3046 3051                                                  other_perm |= aclentp->a_perm;
3047 3052                                                  break;
3048 3053                                          case OTHER_OBJ:
3049 3054                                                  other_orig = aclentp->a_perm;
3050 3055                                                  break;
3051 3056                                          case CLASS_OBJ:
3052 3057                                                  mask_perm = aclentp->a_perm;
3053 3058                                                  break;
3054 3059                                          default:
3055 3060                                                  break;
3056 3061                                          }
3057 3062                                  }
3058 3063                                  grp_perm &= mask_perm << 3;
3059 3064                                  other_perm &= mask_perm;
3060 3065                                  other_perm |= other_orig;
3061 3066  
3062 3067                          } else {
3063 3068                                  /* minimal permissions */
3064 3069                                  grp_perm = 070;
3065 3070                                  other_perm = 07;
3066 3071                                  for (; aclcnt > 0; aclcnt--, aclentp++) {
3067 3072                                          switch (aclentp->a_type) {
3068 3073                                          case USER_OBJ:
3069 3074                                                  break;
3070 3075                                          case USER:
3071 3076                                          case CLASS_OBJ:
3072 3077                                                  grp_perm &=
3073 3078                                                      aclentp->a_perm << 3;
3074 3079                                                  other_perm &=
3075 3080                                                      aclentp->a_perm;
3076 3081                                                  break;
3077 3082                                          case GROUP_OBJ:
3078 3083                                                  grp_perm &=
3079 3084                                                      aclentp->a_perm << 3;
3080 3085                                                  break;
3081 3086                                          case GROUP:
3082 3087                                                  other_perm &=
3083 3088                                                      aclentp->a_perm;
3084 3089                                                  break;
3085 3090                                          case OTHER_OBJ:
3086 3091                                                  other_perm &=
3087 3092                                                      aclentp->a_perm;
3088 3093                                                  break;
3089 3094                                          default:
3090 3095                                                  break;
3091 3096                                          }
3092 3097                                  }
3093 3098                          }
3094 3099                          /* copy to va */
3095 3100                          va->va_mode &= ~077;
3096 3101                          va->va_mode |= grp_perm | other_perm;
3097 3102                  }
3098 3103                  if (vsa.vsa_aclcnt)
3099 3104                          kmem_free(vsa.vsa_aclentp,
3100 3105                              vsa.vsa_aclcnt * sizeof (aclent_t));
3101 3106          }
3102 3107  }
3103 3108  
3104 3109  void
3105 3110  rfs_srvrinit(void)
3106 3111  {
3107 3112          nfs2_srv_caller_id = fs_new_caller_id();
3108 3113  }
3109 3114  
3110 3115  void
3111 3116  rfs_srvrfini(void)
3112 3117  {
3113 3118  }
3114 3119  
3115 3120  /* ARGSUSED */
3116 3121  void
3117 3122  rfs_srv_zone_init(nfs_globals_t *ng)
3118 3123  {
3119 3124          nfs_srv_t *ns;
3120 3125  
3121 3126          ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3122 3127  
3123 3128          mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3124 3129          ns->write_async = 1;
3125 3130  
3126 3131          ng->nfs_srv = ns;
3127 3132  }
3128 3133  
3129 3134  /* ARGSUSED */
3130 3135  void
3131 3136  rfs_srv_zone_fini(nfs_globals_t *ng)
3132 3137  {
3133 3138          nfs_srv_t *ns = ng->nfs_srv;
3134 3139  
3135 3140          ng->nfs_srv = NULL;
3136 3141  
3137 3142          mutex_destroy(&ns->async_write_lock);
3138 3143          kmem_free(ns, sizeof (*ns));
3139 3144  }
3140 3145  
3141 3146  static int
3142 3147  rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3143 3148  {
3144 3149          struct clist    *wcl;
3145 3150          int             wlist_len;
3146 3151          uint32_t        count = rr->rr_count;
3147 3152  
3148 3153          wcl = ra->ra_wlist;
3149 3154  
3150 3155          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3151 3156                  return (FALSE);
3152 3157          }
3153 3158  
3154 3159          wcl = ra->ra_wlist;
3155 3160          rr->rr_ok.rrok_wlist_len = wlist_len;
3156 3161          rr->rr_ok.rrok_wlist = wcl;
3157 3162  
3158 3163          return (TRUE);
3159 3164  }
  
    | 
      ↓ open down ↓ | 
    1829 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX