Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/syscall/rw.c
          +++ new/usr/src/uts/common/syscall/rw.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   * Copyright 2015, Joyent, Inc.  All rights reserved.
  26   26   */
  27   27  
  28   28  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  29   29  /*        All Rights Reserved   */
  30   30  
  31   31  /*
  32   32   * Portions of this source code were derived from Berkeley 4.3 BSD
  33   33   * under license from the Regents of the University of California.
  34   34   */
  35   35  
  36   36  #include <sys/param.h>
  37   37  #include <sys/isa_defs.h>
  38   38  #include <sys/types.h>
  39   39  #include <sys/inttypes.h>
  40   40  #include <sys/sysmacros.h>
  41   41  #include <sys/cred.h>
  42   42  #include <sys/user.h>
  43   43  #include <sys/systm.h>
  44   44  #include <sys/errno.h>
  45   45  #include <sys/vnode.h>
  46   46  #include <sys/file.h>
  47   47  #include <sys/proc.h>
  48   48  #include <sys/cpuvar.h>
  49   49  #include <sys/uio.h>
  50   50  #include <sys/debug.h>
  51   51  #include <sys/rctl.h>
  52   52  #include <sys/nbmlock.h>
  53   53  #include <sys/limits.h>
  54   54  
  55   55  #define COPYOUT_MAX_CACHE       (1<<17)         /* 128K */
  56   56  
  57   57  size_t copyout_max_cached = COPYOUT_MAX_CACHE;  /* global so it's patchable */
  58   58  
  59   59  /*
  60   60   * read, write, pread, pwrite, readv, and writev syscalls.
  61   61   *
  62   62   * 64-bit open: all open's are large file opens.
  63   63   * Large Files: the behaviour of read depends on whether the fd
  64   64   *              corresponds to large open or not.
  65   65   * 32-bit open: FOFFMAX flag not set.
  66   66   *              read until MAXOFF32_T - 1 and read at MAXOFF32_T returns
  67   67   *              EOVERFLOW if count is non-zero and if size of file
  68   68   *              is > MAXOFF32_T. If size of file is <= MAXOFF32_T read
  69   69   *              at >= MAXOFF32_T returns EOF.
  70   70   */
  71   71  
  72   72  /*
  73   73   * Native system call
  74   74   */
  75   75  ssize_t
  76   76  read(int fdes, void *cbuf, size_t count)
  77   77  {
  78   78          struct uio auio;
  79   79          struct iovec aiov;
  80   80          file_t *fp;
  81   81          register vnode_t *vp;
  82   82          struct cpu *cp;
  83   83          int fflag, ioflag, rwflag;
  84   84          ssize_t cnt, bcount;
  85   85          int error = 0;
  86   86          u_offset_t fileoff;
  87   87          int in_crit = 0;
  88   88  
  89   89          if ((cnt = (ssize_t)count) < 0)
  90   90                  return (set_errno(EINVAL));
  91   91          if ((fp = getf(fdes)) == NULL)
  92   92                  return (set_errno(EBADF));
  93   93          if (((fflag = fp->f_flag) & FREAD) == 0) {
  94   94                  error = EBADF;
  95   95                  goto out;
  96   96          }
  97   97          vp = fp->f_vnode;
  98   98  
  99   99          if (vp->v_type == VREG && cnt == 0) {
 100  100                  goto out;
 101  101          }
 102  102  
 103  103          rwflag = 0;
 104  104          aiov.iov_base = cbuf;
 105  105          aiov.iov_len = cnt;
 106  106  
 107  107          /*
 108  108           * We have to enter the critical region before calling VOP_RWLOCK
 109  109           * to avoid a deadlock with write() calls.
 110  110           */
 111  111          if (nbl_need_check(vp)) {
 112  112                  int svmand;
 113  113  
 114  114                  nbl_start_crit(vp, RW_READER);
 115  115                  in_crit = 1;
 116  116                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 117  117                  if (error != 0)
 118  118                          goto out;
 119  119                  if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand,
 120  120                      NULL)) {
 121  121                          error = EACCES;
 122  122                          goto out;
 123  123                  }
 124  124          }
 125  125  
 126  126          (void) VOP_RWLOCK(vp, rwflag, NULL);
 127  127  
 128  128          /*
 129  129           * We do the following checks inside VOP_RWLOCK so as to
 130  130           * prevent file size from changing while these checks are
 131  131           * being done. Also, we load fp's offset to the local
 132  132           * variable fileoff because we can have a parallel lseek
 133  133           * going on (f_offset is not protected by any lock) which
 134  134           * could change f_offset. We need to see the value only
 135  135           * once here and take a decision. Seeing it more than once
 136  136           * can lead to incorrect functionality.
 137  137           */
 138  138  
 139  139          fileoff = (u_offset_t)fp->f_offset;
 140  140          if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) {
 141  141                  struct vattr va;
 142  142                  va.va_mask = AT_SIZE;
 143  143                  if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
 144  144                          VOP_RWUNLOCK(vp, rwflag, NULL);
 145  145                          goto out;
 146  146                  }
 147  147                  if (fileoff >= va.va_size) {
 148  148                          cnt = 0;
 149  149                          VOP_RWUNLOCK(vp, rwflag, NULL);
 150  150                          goto out;
 151  151                  } else {
 152  152                          error = EOVERFLOW;
 153  153                          VOP_RWUNLOCK(vp, rwflag, NULL);
 154  154                          goto out;
 155  155                  }
 156  156          }
 157  157          if ((vp->v_type == VREG) &&
 158  158              (fileoff + cnt > OFFSET_MAX(fp))) {
 159  159                  cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
 160  160          }
 161  161          auio.uio_loffset = fileoff;
 162  162          auio.uio_iov = &aiov;
 163  163          auio.uio_iovcnt = 1;
 164  164          auio.uio_resid = bcount = cnt;
 165  165          auio.uio_segflg = UIO_USERSPACE;
 166  166          auio.uio_llimit = MAXOFFSET_T;
 167  167          auio.uio_fmode = fflag;
 168  168          /*
 169  169           * Only use bypass caches when the count is large enough
 170  170           */
 171  171          if (bcount <= copyout_max_cached)
 172  172                  auio.uio_extflg = UIO_COPY_CACHED;
 173  173          else
 174  174                  auio.uio_extflg = UIO_COPY_DEFAULT;
 175  175  
 176  176          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 177  177  
 178  178          /* If read sync is not asked for, filter sync flags */
 179  179          if ((ioflag & FRSYNC) == 0)
 180  180                  ioflag &= ~(FSYNC|FDSYNC);
 181  181          error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
 182  182          cnt -= auio.uio_resid;
 183  183          CPU_STATS_ENTER_K();
 184  184          cp = CPU;
 185  185          CPU_STATS_ADDQ(cp, sys, sysread, 1);
 186  186          CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt);
 187  187          CPU_STATS_EXIT_K();
 188  188          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
 189  189  
 190  190          if (vp->v_type == VFIFO)        /* Backward compatibility */
 191  191                  fp->f_offset = cnt;
 192  192          else if (((fp->f_flag & FAPPEND) == 0) ||
 193  193              (vp->v_type != VREG) || (bcount != 0))      /* POSIX */
 194  194                  fp->f_offset = auio.uio_loffset;
 195  195          VOP_RWUNLOCK(vp, rwflag, NULL);
 196  196  
 197  197          if (error == EINTR && cnt != 0)
 198  198                  error = 0;
 199  199  out:
 200  200          if (in_crit)
 201  201                  nbl_end_crit(vp);
 202  202          releasef(fdes);
 203  203          if (error)
 204  204                  return (set_errno(error));
 205  205          return (cnt);
 206  206  }
 207  207  
 208  208  /*
 209  209   * Native system call
 210  210   */
 211  211  ssize_t
 212  212  write(int fdes, void *cbuf, size_t count)
 213  213  {
 214  214          struct uio auio;
 215  215          struct iovec aiov;
 216  216          file_t *fp;
 217  217          register vnode_t *vp;
 218  218          struct cpu *cp;
 219  219          int fflag, ioflag, rwflag;
 220  220          ssize_t cnt, bcount;
 221  221          int error = 0;
 222  222          u_offset_t fileoff;
 223  223          int in_crit = 0;
 224  224  
 225  225          if ((cnt = (ssize_t)count) < 0)
 226  226                  return (set_errno(EINVAL));
 227  227          if ((fp = getf(fdes)) == NULL)
 228  228                  return (set_errno(EBADF));
 229  229          if (((fflag = fp->f_flag) & FWRITE) == 0) {
 230  230                  error = EBADF;
 231  231                  goto out;
 232  232          }
 233  233          vp = fp->f_vnode;
 234  234  
 235  235          if (vp->v_type == VREG && cnt == 0) {
 236  236                  goto out;
 237  237          }
 238  238  
 239  239          rwflag = 1;
 240  240          aiov.iov_base = cbuf;
 241  241          aiov.iov_len = cnt;
 242  242  
 243  243          /*
 244  244           * We have to enter the critical region before calling VOP_RWLOCK
 245  245           * to avoid a deadlock with ufs.
 246  246           */
 247  247          if (nbl_need_check(vp)) {
 248  248                  int svmand;
 249  249  
 250  250                  nbl_start_crit(vp, RW_READER);
 251  251                  in_crit = 1;
 252  252                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 253  253                  if (error != 0)
 254  254                          goto out;
 255  255                  if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand,
 256  256                      NULL)) {
 257  257                          error = EACCES;
 258  258                          goto out;
 259  259                  }
 260  260          }
 261  261  
 262  262          (void) VOP_RWLOCK(vp, rwflag, NULL);
 263  263  
 264  264          fileoff = fp->f_offset;
 265  265          if (vp->v_type == VREG) {
 266  266  
 267  267                  /*
 268  268                   * We raise psignal if write for >0 bytes causes
 269  269                   * it to exceed the ulimit.
 270  270                   */
 271  271                  if (fileoff >= curproc->p_fsz_ctl) {
 272  272                          VOP_RWUNLOCK(vp, rwflag, NULL);
 273  273  
 274  274                          mutex_enter(&curproc->p_lock);
 275  275                          (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 276  276                              curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
 277  277                          mutex_exit(&curproc->p_lock);
 278  278  
 279  279                          error = EFBIG;
 280  280                          goto out;
 281  281                  }
 282  282                  /*
 283  283                   * We return EFBIG if write is done at an offset
 284  284                   * greater than the offset maximum for this file structure.
 285  285                   */
 286  286  
 287  287                  if (fileoff >= OFFSET_MAX(fp)) {
 288  288                          VOP_RWUNLOCK(vp, rwflag, NULL);
 289  289                          error = EFBIG;
 290  290                          goto out;
 291  291                  }
 292  292                  /*
 293  293                   * Limit the bytes to be written  upto offset maximum for
 294  294                   * this open file structure.
 295  295                   */
 296  296                  if (fileoff + cnt > OFFSET_MAX(fp))
 297  297                          cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
 298  298          }
 299  299          auio.uio_loffset = fileoff;
 300  300          auio.uio_iov = &aiov;
 301  301          auio.uio_iovcnt = 1;
 302  302          auio.uio_resid = bcount = cnt;
 303  303          auio.uio_segflg = UIO_USERSPACE;
 304  304          auio.uio_llimit = curproc->p_fsz_ctl;
 305  305          auio.uio_fmode = fflag;
 306  306          auio.uio_extflg = UIO_COPY_DEFAULT;
 307  307  
 308  308          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 309  309  
 310  310          error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
 311  311          cnt -= auio.uio_resid;
 312  312          CPU_STATS_ENTER_K();
 313  313          cp = CPU;
 314  314          CPU_STATS_ADDQ(cp, sys, syswrite, 1);
 315  315          CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt);
 316  316          CPU_STATS_EXIT_K();
 317  317          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
 318  318  
 319  319          if (vp->v_type == VFIFO)        /* Backward compatibility */
 320  320                  fp->f_offset = cnt;
 321  321          else if (((fp->f_flag & FAPPEND) == 0) ||
 322  322              (vp->v_type != VREG) || (bcount != 0))      /* POSIX */
 323  323                  fp->f_offset = auio.uio_loffset;
 324  324          VOP_RWUNLOCK(vp, rwflag, NULL);
 325  325  
 326  326          if (error == EINTR && cnt != 0)
 327  327                  error = 0;
 328  328  out:
 329  329          if (in_crit)
 330  330                  nbl_end_crit(vp);
 331  331          releasef(fdes);
 332  332          if (error)
 333  333                  return (set_errno(error));
 334  334          return (cnt);
 335  335  }
 336  336  
 337  337  ssize_t
 338  338  pread(int fdes, void *cbuf, size_t count, off_t offset)
 339  339  {
 340  340          struct uio auio;
 341  341          struct iovec aiov;
 342  342          file_t *fp;
 343  343          register vnode_t *vp;
 344  344          struct cpu *cp;
 345  345          int fflag, ioflag, rwflag;
 346  346          ssize_t bcount;
 347  347          int error = 0;
 348  348          u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
 349  349  #ifdef _SYSCALL32_IMPL
 350  350          u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
 351  351              MAXOFF32_T : MAXOFFSET_T;
 352  352  #else
 353  353          const u_offset_t maxoff = MAXOFF32_T;
 354  354  #endif
 355  355          int in_crit = 0;
 356  356  
 357  357          if ((bcount = (ssize_t)count) < 0)
 358  358                  return (set_errno(EINVAL));
 359  359  
 360  360          if ((fp = getf(fdes)) == NULL)
 361  361                  return (set_errno(EBADF));
 362  362          if (((fflag = fp->f_flag) & (FREAD)) == 0) {
 363  363                  error = EBADF;
 364  364                  goto out;
 365  365          }
 366  366  
 367  367          rwflag = 0;
 368  368          vp = fp->f_vnode;
 369  369  
 370  370          if (vp->v_type == VREG) {
 371  371  
 372  372                  if (bcount == 0)
 373  373                          goto out;
 374  374  
 375  375                  /*
 376  376                   * Return EINVAL if an invalid offset comes to pread.
 377  377                   * Negative offset from user will cause this error.
 378  378                   */
 379  379  
 380  380                  if (fileoff > maxoff) {
 381  381                          error = EINVAL;
 382  382                          goto out;
 383  383                  }
 384  384                  /*
 385  385                   * Limit offset such that we don't read or write
 386  386                   * a file beyond the maximum offset representable in
 387  387                   * an off_t structure.
 388  388                   */
 389  389                  if (fileoff + bcount > maxoff)
 390  390                          bcount = (ssize_t)((offset_t)maxoff - fileoff);
 391  391          } else if (vp->v_type == VFIFO) {
 392  392                  error = ESPIPE;
 393  393                  goto out;
 394  394          }
 395  395  
 396  396          /*
 397  397           * We have to enter the critical region before calling VOP_RWLOCK
 398  398           * to avoid a deadlock with ufs.
 399  399           */
 400  400          if (nbl_need_check(vp)) {
 401  401                  int svmand;
 402  402  
 403  403                  nbl_start_crit(vp, RW_READER);
 404  404                  in_crit = 1;
 405  405                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 406  406                  if (error != 0)
 407  407                          goto out;
 408  408                  if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
 409  409                      NULL)) {
 410  410                          error = EACCES;
 411  411                          goto out;
 412  412                  }
 413  413          }
 414  414  
 415  415          aiov.iov_base = cbuf;
 416  416          aiov.iov_len = bcount;
 417  417          (void) VOP_RWLOCK(vp, rwflag, NULL);
 418  418          if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) {
 419  419                  struct vattr va;
 420  420                  va.va_mask = AT_SIZE;
 421  421                  if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
 422  422                          VOP_RWUNLOCK(vp, rwflag, NULL);
 423  423                          goto out;
 424  424                  }
 425  425                  VOP_RWUNLOCK(vp, rwflag, NULL);
 426  426  
 427  427                  /*
 428  428                   * We have to return EOF if fileoff is >= file size.
 429  429                   */
 430  430                  if (fileoff >= va.va_size) {
 431  431                          bcount = 0;
 432  432                          goto out;
 433  433                  }
 434  434  
 435  435                  /*
 436  436                   * File is greater than or equal to maxoff and therefore
 437  437                   * we return EOVERFLOW.
 438  438                   */
 439  439                  error = EOVERFLOW;
 440  440                  goto out;
 441  441          }
 442  442          auio.uio_loffset = fileoff;
 443  443          auio.uio_iov = &aiov;
 444  444          auio.uio_iovcnt = 1;
 445  445          auio.uio_resid = bcount;
 446  446          auio.uio_segflg = UIO_USERSPACE;
 447  447          auio.uio_llimit = MAXOFFSET_T;
 448  448          auio.uio_fmode = fflag;
 449  449          auio.uio_extflg = UIO_COPY_CACHED;
 450  450  
 451  451          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 452  452  
 453  453          /* If read sync is not asked for, filter sync flags */
 454  454          if ((ioflag & FRSYNC) == 0)
 455  455                  ioflag &= ~(FSYNC|FDSYNC);
 456  456          error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
 457  457          bcount -= auio.uio_resid;
 458  458          CPU_STATS_ENTER_K();
 459  459          cp = CPU;
 460  460          CPU_STATS_ADDQ(cp, sys, sysread, 1);
 461  461          CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
 462  462          CPU_STATS_EXIT_K();
 463  463          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
 464  464          VOP_RWUNLOCK(vp, rwflag, NULL);
 465  465  
 466  466          if (error == EINTR && bcount != 0)
 467  467                  error = 0;
 468  468  out:
 469  469          if (in_crit)
 470  470                  nbl_end_crit(vp);
 471  471          releasef(fdes);
 472  472          if (error)
 473  473                  return (set_errno(error));
 474  474          return (bcount);
 475  475  }
 476  476  
 477  477  ssize_t
 478  478  pwrite(int fdes, void *cbuf, size_t count, off_t offset)
 479  479  {
 480  480          struct uio auio;
 481  481          struct iovec aiov;
 482  482          file_t *fp;
 483  483          register vnode_t *vp;
 484  484          struct cpu *cp;
 485  485          int fflag, ioflag, rwflag;
 486  486          ssize_t bcount;
 487  487          int error = 0;
 488  488          u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
 489  489  #ifdef _SYSCALL32_IMPL
 490  490          u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
 491  491              MAXOFF32_T : MAXOFFSET_T;
 492  492  #else
 493  493          const u_offset_t maxoff = MAXOFF32_T;
 494  494  #endif
 495  495          int in_crit = 0;
 496  496  
 497  497          if ((bcount = (ssize_t)count) < 0)
 498  498                  return (set_errno(EINVAL));
 499  499          if ((fp = getf(fdes)) == NULL)
 500  500                  return (set_errno(EBADF));
 501  501          if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
 502  502                  error = EBADF;
 503  503                  goto out;
 504  504          }
 505  505  
 506  506          rwflag = 1;
 507  507          vp = fp->f_vnode;
 508  508  
 509  509          if (vp->v_type == VREG) {
 510  510  
 511  511                  if (bcount == 0)
 512  512                          goto out;
 513  513  
 514  514                  /*
 515  515                   * return EINVAL for offsets that cannot be
 516  516                   * represented in an off_t.
 517  517                   */
 518  518                  if (fileoff > maxoff) {
 519  519                          error = EINVAL;
 520  520                          goto out;
 521  521                  }
 522  522                  /*
 523  523                   * Take appropriate action if we are trying to write above the
 524  524                   * resource limit.
 525  525                   */
 526  526                  if (fileoff >= curproc->p_fsz_ctl) {
 527  527                          mutex_enter(&curproc->p_lock);
 528  528                          (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 529  529                              curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
 530  530                          mutex_exit(&curproc->p_lock);
 531  531  
 532  532                          error = EFBIG;
 533  533                          goto out;
 534  534                  }
 535  535                  /*
 536  536                   * Don't allow pwrite to cause file sizes to exceed
 537  537                   * maxoff.
 538  538                   */
 539  539                  if (fileoff == maxoff) {
 540  540                          error = EFBIG;
 541  541                          goto out;
 542  542                  }
 543  543                  if (fileoff + count > maxoff)
 544  544                          bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
 545  545          } else if (vp->v_type == VFIFO) {
 546  546                  error = ESPIPE;
 547  547                  goto out;
 548  548          }
 549  549  
 550  550          /*
 551  551           * We have to enter the critical region before calling VOP_RWLOCK
 552  552           * to avoid a deadlock with ufs.
 553  553           */
 554  554          if (nbl_need_check(vp)) {
 555  555                  int svmand;
 556  556  
 557  557                  nbl_start_crit(vp, RW_READER);
 558  558                  in_crit = 1;
 559  559                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 560  560                  if (error != 0)
 561  561                          goto out;
 562  562                  if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
 563  563                      NULL)) {
 564  564                          error = EACCES;
 565  565                          goto out;
 566  566                  }
 567  567          }
 568  568  
 569  569          aiov.iov_base = cbuf;
 570  570          aiov.iov_len = bcount;
 571  571          (void) VOP_RWLOCK(vp, rwflag, NULL);
 572  572          auio.uio_loffset = fileoff;
 573  573          auio.uio_iov = &aiov;
 574  574          auio.uio_iovcnt = 1;
 575  575          auio.uio_resid = bcount;
 576  576          auio.uio_segflg = UIO_USERSPACE;
 577  577          auio.uio_llimit = curproc->p_fsz_ctl;
 578  578          auio.uio_fmode = fflag;
 579  579          auio.uio_extflg = UIO_COPY_CACHED;
 580  580  
 581  581          /*
 582  582           * The SUSv4 POSIX specification states:
 583  583           *      The pwrite() function shall be equivalent to write(), except
 584  584           *      that it writes into a given position and does not change
 585  585           *      the file offset (regardless of whether O_APPEND is set).
 586  586           * To make this be true, we omit the FAPPEND flag from ioflag.
 587  587           */
 588  588          ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
 589  589  
 590  590          error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
 591  591          bcount -= auio.uio_resid;
 592  592          CPU_STATS_ENTER_K();
 593  593          cp = CPU;
 594  594          CPU_STATS_ADDQ(cp, sys, syswrite, 1);
 595  595          CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
 596  596          CPU_STATS_EXIT_K();
 597  597          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
 598  598          VOP_RWUNLOCK(vp, rwflag, NULL);
 599  599  
 600  600          if (error == EINTR && bcount != 0)
 601  601                  error = 0;
 602  602  out:
 603  603          if (in_crit)
 604  604                  nbl_end_crit(vp);
 605  605          releasef(fdes);
 606  606          if (error)
 607  607                  return (set_errno(error));
 608  608          return (bcount);
 609  609  }
 610  610  
 611  611  ssize_t
 612  612  readv(int fdes, struct iovec *iovp, int iovcnt)
 613  613  {
 614  614          struct uio auio;
 615  615          struct iovec buf[IOV_MAX_STACK], *aiov = buf;
 616  616          int aiovlen = 0;
 617  617          file_t *fp;
 618  618          register vnode_t *vp;
 619  619          struct cpu *cp;
 620  620          int fflag, ioflag, rwflag;
 621  621          ssize_t count, bcount;
 622  622          int error = 0;
 623  623          int i;
 624  624          u_offset_t fileoff;
 625  625          int in_crit = 0;
 626  626  
 627  627          if (iovcnt <= 0 || iovcnt > IOV_MAX)
 628  628                  return (set_errno(EINVAL));
 629  629  
 630  630          if (iovcnt > IOV_MAX_STACK) {
 631  631                  aiovlen = iovcnt * sizeof (iovec_t);
 632  632                  aiov = kmem_alloc(aiovlen, KM_SLEEP);
 633  633          }
 634  634  
 635  635  #ifdef _SYSCALL32_IMPL
 636  636          /*
 637  637           * 32-bit callers need to have their iovec expanded,
 638  638           * while ensuring that they can't move more than 2Gbytes
 639  639           * of data in a single call.
 640  640           */
 641  641          if (get_udatamodel() == DATAMODEL_ILP32) {
 642  642                  struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
 643  643                  int aiov32len;
 644  644                  ssize32_t count32;
 645  645  
 646  646                  aiov32len = iovcnt * sizeof (iovec32_t);
 647  647                  if (aiovlen != 0)
 648  648                          aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
 649  649  
 650  650                  if (copyin(iovp, aiov32, aiov32len)) {
 651  651                          if (aiovlen != 0) {
 652  652                                  kmem_free(aiov32, aiov32len);
 653  653                                  kmem_free(aiov, aiovlen);
 654  654                          }
 655  655                          return (set_errno(EFAULT));
 656  656                  }
 657  657  
 658  658                  count32 = 0;
 659  659                  for (i = 0; i < iovcnt; i++) {
 660  660                          ssize32_t iovlen32 = aiov32[i].iov_len;
 661  661                          count32 += iovlen32;
 662  662                          if (iovlen32 < 0 || count32 < 0) {
 663  663                                  if (aiovlen != 0) {
 664  664                                          kmem_free(aiov32, aiov32len);
 665  665                                          kmem_free(aiov, aiovlen);
 666  666                                  }
 667  667                                  return (set_errno(EINVAL));
 668  668                          }
 669  669                          aiov[i].iov_len = iovlen32;
 670  670                          aiov[i].iov_base =
 671  671                              (caddr_t)(uintptr_t)aiov32[i].iov_base;
 672  672                  }
 673  673  
 674  674                  if (aiovlen != 0)
 675  675                          kmem_free(aiov32, aiov32len);
 676  676          } else
 677  677  #endif
 678  678          if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
 679  679                  if (aiovlen != 0)
 680  680                          kmem_free(aiov, aiovlen);
 681  681                  return (set_errno(EFAULT));
 682  682          }
 683  683  
 684  684          count = 0;
 685  685          for (i = 0; i < iovcnt; i++) {
 686  686                  ssize_t iovlen = aiov[i].iov_len;
 687  687                  count += iovlen;
 688  688                  if (iovlen < 0 || count < 0) {
 689  689                          if (aiovlen != 0)
 690  690                                  kmem_free(aiov, aiovlen);
 691  691                          return (set_errno(EINVAL));
 692  692                  }
 693  693          }
 694  694          if ((fp = getf(fdes)) == NULL) {
 695  695                  if (aiovlen != 0)
 696  696                          kmem_free(aiov, aiovlen);
 697  697                  return (set_errno(EBADF));
 698  698          }
 699  699          if (((fflag = fp->f_flag) & FREAD) == 0) {
 700  700                  error = EBADF;
 701  701                  goto out;
 702  702          }
 703  703          vp = fp->f_vnode;
 704  704          if (vp->v_type == VREG && count == 0) {
 705  705                  goto out;
 706  706          }
 707  707  
 708  708          rwflag = 0;
 709  709  
 710  710          /*
 711  711           * We have to enter the critical region before calling VOP_RWLOCK
 712  712           * to avoid a deadlock with ufs.
 713  713           */
 714  714          if (nbl_need_check(vp)) {
 715  715                  int svmand;
 716  716  
 717  717                  nbl_start_crit(vp, RW_READER);
 718  718                  in_crit = 1;
 719  719                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 720  720                  if (error != 0)
 721  721                          goto out;
 722  722                  if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand,
 723  723                      NULL)) {
 724  724                          error = EACCES;
 725  725                          goto out;
 726  726                  }
 727  727          }
 728  728  
 729  729          (void) VOP_RWLOCK(vp, rwflag, NULL);
 730  730          fileoff = fp->f_offset;
 731  731  
 732  732          /*
 733  733           * Behaviour is same as read. Please see comments in read.
 734  734           */
 735  735  
 736  736          if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
 737  737                  struct vattr va;
 738  738                  va.va_mask = AT_SIZE;
 739  739                  if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
 740  740                          VOP_RWUNLOCK(vp, rwflag, NULL);
 741  741                          goto out;
 742  742                  }
 743  743                  if (fileoff >= va.va_size) {
 744  744                          VOP_RWUNLOCK(vp, rwflag, NULL);
 745  745                          count = 0;
 746  746                          goto out;
 747  747                  } else {
 748  748                          VOP_RWUNLOCK(vp, rwflag, NULL);
 749  749                          error = EOVERFLOW;
 750  750                          goto out;
 751  751                  }
 752  752          }
 753  753          if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) {
 754  754                  count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
 755  755          }
 756  756          auio.uio_loffset = fileoff;
 757  757          auio.uio_iov = aiov;
 758  758          auio.uio_iovcnt = iovcnt;
 759  759          auio.uio_resid = bcount = count;
 760  760          auio.uio_segflg = UIO_USERSPACE;
 761  761          auio.uio_llimit = MAXOFFSET_T;
 762  762          auio.uio_fmode = fflag;
 763  763          if (bcount <= copyout_max_cached)
 764  764                  auio.uio_extflg = UIO_COPY_CACHED;
 765  765          else
 766  766                  auio.uio_extflg = UIO_COPY_DEFAULT;
 767  767  
 768  768  
 769  769          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 770  770  
 771  771          /* If read sync is not asked for, filter sync flags */
 772  772          if ((ioflag & FRSYNC) == 0)
 773  773                  ioflag &= ~(FSYNC|FDSYNC);
 774  774          error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
 775  775          count -= auio.uio_resid;
 776  776          CPU_STATS_ENTER_K();
 777  777          cp = CPU;
 778  778          CPU_STATS_ADDQ(cp, sys, sysread, 1);
 779  779          CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
 780  780          CPU_STATS_EXIT_K();
 781  781          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
 782  782  
 783  783          if (vp->v_type == VFIFO)        /* Backward compatibility */
 784  784                  fp->f_offset = count;
 785  785          else if (((fp->f_flag & FAPPEND) == 0) ||
 786  786              (vp->v_type != VREG) || (bcount != 0))      /* POSIX */
 787  787                  fp->f_offset = auio.uio_loffset;
 788  788  
 789  789          VOP_RWUNLOCK(vp, rwflag, NULL);
 790  790  
 791  791          if (error == EINTR && count != 0)
 792  792                  error = 0;
 793  793  out:
 794  794          if (in_crit)
 795  795                  nbl_end_crit(vp);
 796  796          releasef(fdes);
 797  797          if (aiovlen != 0)
 798  798                  kmem_free(aiov, aiovlen);
 799  799          if (error)
 800  800                  return (set_errno(error));
 801  801          return (count);
 802  802  }
 803  803  
 804  804  ssize_t
 805  805  writev(int fdes, struct iovec *iovp, int iovcnt)
 806  806  {
 807  807          struct uio auio;
 808  808          struct iovec buf[IOV_MAX_STACK], *aiov = buf;
 809  809          int aiovlen = 0;
 810  810          file_t *fp;
 811  811          register vnode_t *vp;
 812  812          struct cpu *cp;
 813  813          int fflag, ioflag, rwflag;
 814  814          ssize_t count, bcount;
 815  815          int error = 0;
 816  816          int i;
 817  817          u_offset_t fileoff;
 818  818          int in_crit = 0;
 819  819  
 820  820          if (iovcnt <= 0 || iovcnt > IOV_MAX)
 821  821                  return (set_errno(EINVAL));
 822  822  
 823  823          if (iovcnt > IOV_MAX_STACK) {
 824  824                  aiovlen = iovcnt * sizeof (iovec_t);
 825  825                  aiov = kmem_alloc(aiovlen, KM_SLEEP);
 826  826          }
 827  827  
 828  828  #ifdef _SYSCALL32_IMPL
 829  829          /*
 830  830           * 32-bit callers need to have their iovec expanded,
 831  831           * while ensuring that they can't move more than 2Gbytes
 832  832           * of data in a single call.
 833  833           */
 834  834          if (get_udatamodel() == DATAMODEL_ILP32) {
 835  835                  struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
 836  836                  int aiov32len;
 837  837                  ssize32_t count32;
 838  838  
 839  839                  aiov32len = iovcnt * sizeof (iovec32_t);
 840  840                  if (aiovlen != 0)
 841  841                          aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
 842  842  
 843  843                  if (copyin(iovp, aiov32, aiov32len)) {
 844  844                          if (aiovlen != 0) {
 845  845                                  kmem_free(aiov32, aiov32len);
 846  846                                  kmem_free(aiov, aiovlen);
 847  847                          }
 848  848                          return (set_errno(EFAULT));
 849  849                  }
 850  850  
 851  851                  count32 = 0;
 852  852                  for (i = 0; i < iovcnt; i++) {
 853  853                          ssize32_t iovlen = aiov32[i].iov_len;
 854  854                          count32 += iovlen;
 855  855                          if (iovlen < 0 || count32 < 0) {
 856  856                                  if (aiovlen != 0) {
 857  857                                          kmem_free(aiov32, aiov32len);
 858  858                                          kmem_free(aiov, aiovlen);
 859  859                                  }
 860  860                                  return (set_errno(EINVAL));
 861  861                          }
 862  862                          aiov[i].iov_len = iovlen;
 863  863                          aiov[i].iov_base =
 864  864                              (caddr_t)(uintptr_t)aiov32[i].iov_base;
 865  865                  }
 866  866                  if (aiovlen != 0)
 867  867                          kmem_free(aiov32, aiov32len);
 868  868          } else
 869  869  #endif
 870  870          if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
 871  871                  if (aiovlen != 0)
 872  872                          kmem_free(aiov, aiovlen);
 873  873                  return (set_errno(EFAULT));
 874  874          }
 875  875  
 876  876          count = 0;
 877  877          for (i = 0; i < iovcnt; i++) {
 878  878                  ssize_t iovlen = aiov[i].iov_len;
 879  879                  count += iovlen;
 880  880                  if (iovlen < 0 || count < 0) {
 881  881                          if (aiovlen != 0)
 882  882                                  kmem_free(aiov, aiovlen);
 883  883                          return (set_errno(EINVAL));
 884  884                  }
 885  885          }
 886  886          if ((fp = getf(fdes)) == NULL) {
 887  887                  if (aiovlen != 0)
 888  888                          kmem_free(aiov, aiovlen);
 889  889                  return (set_errno(EBADF));
 890  890          }
 891  891          if (((fflag = fp->f_flag) & FWRITE) == 0) {
 892  892                  error = EBADF;
 893  893                  goto out;
 894  894          }
 895  895          vp = fp->f_vnode;
 896  896          if (vp->v_type == VREG && count == 0) {
 897  897                  goto out;
 898  898          }
 899  899  
 900  900          rwflag = 1;
 901  901  
 902  902          /*
 903  903           * We have to enter the critical region before calling VOP_RWLOCK
 904  904           * to avoid a deadlock with ufs.
 905  905           */
 906  906          if (nbl_need_check(vp)) {
 907  907                  int svmand;
 908  908  
 909  909                  nbl_start_crit(vp, RW_READER);
 910  910                  in_crit = 1;
 911  911                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 912  912                  if (error != 0)
 913  913                          goto out;
 914  914                  if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand,
 915  915                      NULL)) {
 916  916                          error = EACCES;
 917  917                          goto out;
 918  918                  }
 919  919          }
 920  920  
 921  921          (void) VOP_RWLOCK(vp, rwflag, NULL);
 922  922  
 923  923          fileoff = fp->f_offset;
 924  924  
 925  925          /*
 926  926           * Behaviour is same as write. Please see comments for write.
 927  927           */
 928  928  
 929  929          if (vp->v_type == VREG) {
 930  930                  if (fileoff >= curproc->p_fsz_ctl) {
 931  931                          VOP_RWUNLOCK(vp, rwflag, NULL);
 932  932                          mutex_enter(&curproc->p_lock);
 933  933                          (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 934  934                              curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
 935  935                          mutex_exit(&curproc->p_lock);
 936  936                          error = EFBIG;
 937  937                          goto out;
 938  938                  }
 939  939                  if (fileoff >= OFFSET_MAX(fp)) {
 940  940                          VOP_RWUNLOCK(vp, rwflag, NULL);
 941  941                          error = EFBIG;
 942  942                          goto out;
 943  943                  }
 944  944                  if (fileoff + count > OFFSET_MAX(fp))
 945  945                          count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
 946  946          }
 947  947          auio.uio_loffset = fileoff;
 948  948          auio.uio_iov = aiov;
 949  949          auio.uio_iovcnt = iovcnt;
 950  950          auio.uio_resid = bcount = count;
 951  951          auio.uio_segflg = UIO_USERSPACE;
 952  952          auio.uio_llimit = curproc->p_fsz_ctl;
 953  953          auio.uio_fmode = fflag;
 954  954          auio.uio_extflg = UIO_COPY_DEFAULT;
 955  955  
 956  956          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 957  957  
 958  958          error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
 959  959          count -= auio.uio_resid;
 960  960          CPU_STATS_ENTER_K();
 961  961          cp = CPU;
 962  962          CPU_STATS_ADDQ(cp, sys, syswrite, 1);
 963  963          CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
 964  964          CPU_STATS_EXIT_K();
 965  965          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
 966  966  
 967  967          if (vp->v_type == VFIFO)        /* Backward compatibility */
 968  968                  fp->f_offset = count;
 969  969          else if (((fp->f_flag & FAPPEND) == 0) ||
 970  970              (vp->v_type != VREG) || (bcount != 0))      /* POSIX */
 971  971                  fp->f_offset = auio.uio_loffset;
 972  972          VOP_RWUNLOCK(vp, rwflag, NULL);
 973  973  
 974  974          if (error == EINTR && count != 0)
 975  975                  error = 0;
 976  976  out:
 977  977          if (in_crit)
 978  978                  nbl_end_crit(vp);
 979  979          releasef(fdes);
 980  980          if (aiovlen != 0)
 981  981                  kmem_free(aiov, aiovlen);
 982  982          if (error)
 983  983                  return (set_errno(error));
 984  984          return (count);
 985  985  }
 986  986  
 987  987  ssize_t
 988  988  preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
 989  989      off_t extended_offset)
 990  990  {
 991  991          struct uio auio;
 992  992          struct iovec buf[IOV_MAX_STACK], *aiov = buf;
 993  993          int aiovlen = 0;
 994  994          file_t *fp;
 995  995          register vnode_t *vp;
 996  996          struct cpu *cp;
 997  997          int fflag, ioflag, rwflag;
 998  998          ssize_t count, bcount;
 999  999          int error = 0;
1000 1000          int i;
1001 1001  
1002 1002  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1003 1003          u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1004 1004              (u_offset_t)offset;
1005 1005  #else /* _SYSCALL32_IMPL || _ILP32 */
1006 1006          u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1007 1007  #endif /* _SYSCALL32_IMPR || _ILP32 */
1008 1008  #ifdef _SYSCALL32_IMPL
1009 1009          const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1010 1010              extended_offset == 0?
1011 1011              MAXOFF32_T : MAXOFFSET_T;
1012 1012  #else /* _SYSCALL32_IMPL */
1013 1013          const u_offset_t maxoff = MAXOFF32_T;
1014 1014  #endif /* _SYSCALL32_IMPL */
1015 1015  
1016 1016          int in_crit = 0;
1017 1017  
1018 1018          if (iovcnt <= 0 || iovcnt > IOV_MAX)
1019 1019                  return (set_errno(EINVAL));
1020 1020  
1021 1021          if (iovcnt > IOV_MAX_STACK) {
1022 1022                  aiovlen = iovcnt * sizeof (iovec_t);
1023 1023                  aiov = kmem_alloc(aiovlen, KM_SLEEP);
1024 1024          }
1025 1025  
1026 1026  #ifdef _SYSCALL32_IMPL
1027 1027          /*
1028 1028           * 32-bit callers need to have their iovec expanded,
1029 1029           * while ensuring that they can't move more than 2Gbytes
1030 1030           * of data in a single call.
1031 1031           */
1032 1032          if (get_udatamodel() == DATAMODEL_ILP32) {
1033 1033                  struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1034 1034                  int aiov32len;
1035 1035                  ssize32_t count32;
1036 1036  
1037 1037                  aiov32len = iovcnt * sizeof (iovec32_t);
1038 1038                  if (aiovlen != 0)
1039 1039                          aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
1040 1040  
1041 1041                  if (copyin(iovp, aiov32, aiov32len)) {
1042 1042                          if (aiovlen != 0) {
1043 1043                                  kmem_free(aiov32, aiov32len);
1044 1044                                  kmem_free(aiov, aiovlen);
1045 1045                          }
1046 1046                          return (set_errno(EFAULT));
1047 1047                  }
1048 1048  
1049 1049                  count32 = 0;
1050 1050                  for (i = 0; i < iovcnt; i++) {
1051 1051                          ssize32_t iovlen32 = aiov32[i].iov_len;
1052 1052                          count32 += iovlen32;
1053 1053                          if (iovlen32 < 0 || count32 < 0) {
1054 1054                                  if (aiovlen != 0) {
1055 1055                                          kmem_free(aiov32, aiov32len);
1056 1056                                          kmem_free(aiov, aiovlen);
1057 1057                                  }
1058 1058                                  return (set_errno(EINVAL));
1059 1059                          }
1060 1060                          aiov[i].iov_len = iovlen32;
1061 1061                          aiov[i].iov_base =
1062 1062                              (caddr_t)(uintptr_t)aiov32[i].iov_base;
1063 1063                  }
1064 1064                  if (aiovlen != 0)
1065 1065                          kmem_free(aiov32, aiov32len);
1066 1066          } else
1067 1067  #endif /* _SYSCALL32_IMPL */
1068 1068                  if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1069 1069                          if (aiovlen != 0)
1070 1070                                  kmem_free(aiov, aiovlen);
1071 1071                          return (set_errno(EFAULT));
1072 1072                  }
1073 1073  
1074 1074          count = 0;
1075 1075          for (i = 0; i < iovcnt; i++) {
1076 1076                  ssize_t iovlen = aiov[i].iov_len;
1077 1077                  count += iovlen;
1078 1078                  if (iovlen < 0 || count < 0) {
1079 1079                          if (aiovlen != 0)
1080 1080                                  kmem_free(aiov, aiovlen);
1081 1081                          return (set_errno(EINVAL));
1082 1082                  }
1083 1083          }
1084 1084  
1085 1085          if ((bcount = (ssize_t)count) < 0) {
1086 1086                  if (aiovlen != 0)
1087 1087                          kmem_free(aiov, aiovlen);
1088 1088                  return (set_errno(EINVAL));
1089 1089          }
1090 1090          if ((fp = getf(fdes)) == NULL) {
1091 1091                  if (aiovlen != 0)
1092 1092                          kmem_free(aiov, aiovlen);
1093 1093                  return (set_errno(EBADF));
1094 1094          }
1095 1095          if (((fflag = fp->f_flag) & FREAD) == 0) {
1096 1096                  error = EBADF;
1097 1097                  goto out;
1098 1098          }
1099 1099          vp = fp->f_vnode;
1100 1100          rwflag = 0;
1101 1101          if (vp->v_type == VREG) {
1102 1102  
1103 1103                  if (bcount == 0)
1104 1104                          goto out;
1105 1105  
1106 1106                  /*
1107 1107                   * return EINVAL for offsets that cannot be
1108 1108                   * represented in an off_t.
1109 1109                   */
1110 1110                  if (fileoff > maxoff) {
1111 1111                          error = EINVAL;
1112 1112                          goto out;
1113 1113                  }
1114 1114  
1115 1115                  if (fileoff + bcount > maxoff)
1116 1116                          bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
1117 1117          } else if (vp->v_type == VFIFO) {
1118 1118                  error = ESPIPE;
1119 1119                  goto out;
1120 1120          }
1121 1121          /*
1122 1122           * We have to enter the critical region before calling VOP_RWLOCK
1123 1123           * to avoid a deadlock with ufs.
1124 1124           */
1125 1125          if (nbl_need_check(vp)) {
1126 1126                  int svmand;
1127 1127  
1128 1128                  nbl_start_crit(vp, RW_READER);
1129 1129                  in_crit = 1;
1130 1130                  error = nbl_svmand(vp, fp->f_cred, &svmand);
1131 1131                  if (error != 0)
1132 1132                          goto out;
1133 1133                  if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1134 1134                      NULL)) {
1135 1135                          error = EACCES;
1136 1136                          goto out;
1137 1137                  }
1138 1138          }
1139 1139  
1140 1140          (void) VOP_RWLOCK(vp, rwflag, NULL);
1141 1141  
1142 1142          /*
1143 1143           * Behaviour is same as read(2). Please see comments in
1144 1144           * read(2).
1145 1145           */
1146 1146  
1147 1147          if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
1148 1148                  struct vattr va;
1149 1149                  va.va_mask = AT_SIZE;
1150 1150                  if ((error =
1151 1151                      VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
1152 1152                          VOP_RWUNLOCK(vp, rwflag, NULL);
1153 1153                          goto out;
1154 1154                  }
1155 1155                  if (fileoff >= va.va_size) {
1156 1156                          VOP_RWUNLOCK(vp, rwflag, NULL);
1157 1157                          count = 0;
1158 1158                          goto out;
1159 1159                  } else {
1160 1160                          VOP_RWUNLOCK(vp, rwflag, NULL);
1161 1161                          error = EOVERFLOW;
1162 1162                          goto out;
1163 1163                  }
1164 1164          }
1165 1165          if ((vp->v_type == VREG) &&
1166 1166              (fileoff + count > OFFSET_MAX(fp))) {
1167 1167                  count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1168 1168          }
1169 1169          auio.uio_loffset = fileoff;
1170 1170          auio.uio_iov = aiov;
1171 1171          auio.uio_iovcnt = iovcnt;
1172 1172          auio.uio_resid = bcount = count;
1173 1173          auio.uio_segflg = UIO_USERSPACE;
1174 1174          auio.uio_llimit = MAXOFFSET_T;
1175 1175          auio.uio_fmode = fflag;
1176 1176          if (bcount <= copyout_max_cached)
1177 1177                  auio.uio_extflg = UIO_COPY_CACHED;
1178 1178          else
1179 1179                  auio.uio_extflg = UIO_COPY_DEFAULT;
1180 1180  
1181 1181          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1182 1182          error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1183 1183          count -= auio.uio_resid;
1184 1184          CPU_STATS_ENTER_K();
1185 1185          cp = CPU;
1186 1186          CPU_STATS_ADDQ(cp, sys, sysread, 1);
1187 1187          CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1188 1188          CPU_STATS_EXIT_K();
1189 1189          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1190 1190  
1191 1191          VOP_RWUNLOCK(vp, rwflag, NULL);
1192 1192  
1193 1193          if (error == EINTR && count != 0)
1194 1194                  error = 0;
1195 1195  out:
1196 1196          if (in_crit)
1197 1197                  nbl_end_crit(vp);
1198 1198          releasef(fdes);
1199 1199          if (aiovlen != 0)
1200 1200                  kmem_free(aiov, aiovlen);
1201 1201          if (error)
1202 1202                  return (set_errno(error));
1203 1203          return (count);
1204 1204  }
1205 1205  
1206 1206  ssize_t
1207 1207  pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1208 1208      off_t extended_offset)
1209 1209  {
1210 1210          struct uio auio;
1211 1211          struct iovec buf[IOV_MAX_STACK], *aiov = buf;
1212 1212          int aiovlen = 0;
1213 1213          file_t *fp;
1214 1214          register vnode_t *vp;
1215 1215          struct cpu *cp;
1216 1216          int fflag, ioflag, rwflag;
1217 1217          ssize_t count, bcount;
1218 1218          int error = 0;
1219 1219          int i;
1220 1220  
1221 1221  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1222 1222          u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1223 1223              (u_offset_t)offset;
1224 1224  #else /* _SYSCALL32_IMPL || _ILP32 */
1225 1225          u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1226 1226  #endif /* _SYSCALL32_IMPR || _ILP32 */
1227 1227  #ifdef _SYSCALL32_IMPL
1228 1228          const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1229 1229              extended_offset == 0?
1230 1230              MAXOFF32_T : MAXOFFSET_T;
1231 1231  #else /* _SYSCALL32_IMPL */
1232 1232          const u_offset_t maxoff = MAXOFF32_T;
1233 1233  #endif /* _SYSCALL32_IMPL */
1234 1234  
1235 1235          int in_crit = 0;
1236 1236  
1237 1237          if (iovcnt <= 0 || iovcnt > IOV_MAX)
1238 1238                  return (set_errno(EINVAL));
1239 1239  
1240 1240          if (iovcnt > IOV_MAX_STACK) {
1241 1241                  aiovlen = iovcnt * sizeof (iovec_t);
1242 1242                  aiov = kmem_alloc(aiovlen, KM_SLEEP);
1243 1243          }
1244 1244  
1245 1245  #ifdef _SYSCALL32_IMPL
1246 1246          /*
1247 1247           * 32-bit callers need to have their iovec expanded,
1248 1248           * while ensuring that they can't move more than 2Gbytes
1249 1249           * of data in a single call.
1250 1250           */
1251 1251          if (get_udatamodel() == DATAMODEL_ILP32) {
1252 1252                  struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1253 1253                  int aiov32len;
1254 1254                  ssize32_t count32;
1255 1255  
1256 1256                  aiov32len = iovcnt * sizeof (iovec32_t);
1257 1257                  if (aiovlen != 0)
1258 1258                          aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
1259 1259  
1260 1260                  if (copyin(iovp, aiov32, aiov32len)) {
1261 1261                          if (aiovlen != 0) {
1262 1262                                  kmem_free(aiov32, aiov32len);
1263 1263                                  kmem_free(aiov, aiovlen);
1264 1264                          }
1265 1265                          return (set_errno(EFAULT));
1266 1266                  }
1267 1267  
1268 1268                  count32 = 0;
1269 1269                  for (i = 0; i < iovcnt; i++) {
1270 1270                          ssize32_t iovlen32 = aiov32[i].iov_len;
1271 1271                          count32 += iovlen32;
1272 1272                          if (iovlen32 < 0 || count32 < 0) {
1273 1273                                  if (aiovlen != 0) {
1274 1274                                          kmem_free(aiov32, aiov32len);
1275 1275                                          kmem_free(aiov, aiovlen);
1276 1276                                  }
1277 1277                                  return (set_errno(EINVAL));
1278 1278                          }
1279 1279                          aiov[i].iov_len = iovlen32;
1280 1280                          aiov[i].iov_base =
1281 1281                              (caddr_t)(uintptr_t)aiov32[i].iov_base;
1282 1282                  }
1283 1283                  if (aiovlen != 0)
1284 1284                          kmem_free(aiov32, aiov32len);
1285 1285          } else
1286 1286  #endif /* _SYSCALL32_IMPL */
1287 1287                  if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1288 1288                          if (aiovlen != 0)
1289 1289                                  kmem_free(aiov, aiovlen);
1290 1290                          return (set_errno(EFAULT));
1291 1291                  }
1292 1292  
1293 1293          count = 0;
1294 1294          for (i = 0; i < iovcnt; i++) {
1295 1295                  ssize_t iovlen = aiov[i].iov_len;
1296 1296                  count += iovlen;
1297 1297                  if (iovlen < 0 || count < 0) {
1298 1298                          if (aiovlen != 0)
1299 1299                                  kmem_free(aiov, aiovlen);
1300 1300                          return (set_errno(EINVAL));
1301 1301                  }
1302 1302          }
1303 1303  
1304 1304          if ((bcount = (ssize_t)count) < 0) {
1305 1305                  if (aiovlen != 0)
1306 1306                          kmem_free(aiov, aiovlen);
1307 1307                  return (set_errno(EINVAL));
1308 1308          }
1309 1309          if ((fp = getf(fdes)) == NULL) {
1310 1310                  if (aiovlen != 0)
1311 1311                          kmem_free(aiov, aiovlen);
1312 1312                  return (set_errno(EBADF));
1313 1313          }
1314 1314          if (((fflag = fp->f_flag) & FWRITE) == 0) {
1315 1315                  error = EBADF;
1316 1316                  goto out;
1317 1317          }
1318 1318          vp = fp->f_vnode;
1319 1319          rwflag = 1;
1320 1320          if (vp->v_type == VREG) {
1321 1321  
1322 1322                  if (bcount == 0)
1323 1323                          goto out;
1324 1324  
1325 1325                  /*
1326 1326                   * return EINVAL for offsets that cannot be
1327 1327                   * represented in an off_t.
1328 1328                   */
1329 1329                  if (fileoff > maxoff) {
1330 1330                          error = EINVAL;
1331 1331                          goto out;
1332 1332                  }
1333 1333                  /*
1334 1334                   * Take appropriate action if we are trying
1335 1335                   * to write above the resource limit.
1336 1336                   */
1337 1337                  if (fileoff >= curproc->p_fsz_ctl) {
1338 1338                          mutex_enter(&curproc->p_lock);
1339 1339                          /*
1340 1340                           * Return value ignored because it lists
1341 1341                           * actions taken, but we are in an error case.
1342 1342                           * We don't have any actions that depend on
1343 1343                           * what could happen in this call, so we ignore
1344 1344                           * the return value.
1345 1345                           */
1346 1346                          (void) rctl_action(
1347 1347                              rctlproc_legacy[RLIMIT_FSIZE],
1348 1348                              curproc->p_rctls, curproc,
1349 1349                              RCA_UNSAFE_SIGINFO);
1350 1350                          mutex_exit(&curproc->p_lock);
1351 1351  
1352 1352                          error = EFBIG;
1353 1353                          goto out;
1354 1354                  }
1355 1355                  /*
1356 1356                   * Don't allow pwritev to cause file sizes to exceed
1357 1357                   * maxoff.
1358 1358                   */
1359 1359                  if (fileoff == maxoff) {
1360 1360                          error = EFBIG;
1361 1361                          goto out;
1362 1362                  }
1363 1363  
1364 1364                  if (fileoff + bcount > maxoff)
1365 1365                          bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
1366 1366          } else if (vp->v_type == VFIFO) {
1367 1367                  error = ESPIPE;
1368 1368                  goto out;
1369 1369          }
1370 1370          /*
1371 1371           * We have to enter the critical region before calling VOP_RWLOCK
1372 1372           * to avoid a deadlock with ufs.
1373 1373           */
1374 1374          if (nbl_need_check(vp)) {
1375 1375                  int svmand;
1376 1376  
1377 1377                  nbl_start_crit(vp, RW_READER);
1378 1378                  in_crit = 1;
1379 1379                  error = nbl_svmand(vp, fp->f_cred, &svmand);
1380 1380                  if (error != 0)
1381 1381                          goto out;
1382 1382                  if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1383 1383                      NULL)) {
1384 1384                          error = EACCES;
1385 1385                          goto out;
1386 1386                  }
1387 1387          }
1388 1388  
1389 1389          (void) VOP_RWLOCK(vp, rwflag, NULL);
1390 1390  
1391 1391  
1392 1392          /*
1393 1393           * Behaviour is same as write(2). Please see comments for
1394 1394           * write(2).
1395 1395           */
1396 1396  
1397 1397          if (vp->v_type == VREG) {
1398 1398                  if (fileoff >= curproc->p_fsz_ctl) {
1399 1399                          VOP_RWUNLOCK(vp, rwflag, NULL);
1400 1400                          mutex_enter(&curproc->p_lock);
1401 1401                          /* see above rctl_action comment */
1402 1402                          (void) rctl_action(
1403 1403                              rctlproc_legacy[RLIMIT_FSIZE],
1404 1404                              curproc->p_rctls,
1405 1405                              curproc, RCA_UNSAFE_SIGINFO);
1406 1406                          mutex_exit(&curproc->p_lock);
1407 1407                          error = EFBIG;
1408 1408                          goto out;
1409 1409                  }
1410 1410                  if (fileoff >= OFFSET_MAX(fp)) {
1411 1411                          VOP_RWUNLOCK(vp, rwflag, NULL);
1412 1412                          error = EFBIG;
1413 1413                          goto out;
1414 1414                  }
1415 1415                  if (fileoff + count > OFFSET_MAX(fp))
1416 1416                          count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1417 1417          }
1418 1418  
1419 1419          auio.uio_loffset = fileoff;
1420 1420          auio.uio_iov = aiov;
1421 1421          auio.uio_iovcnt = iovcnt;
1422 1422          auio.uio_resid = bcount = count;
1423 1423          auio.uio_segflg = UIO_USERSPACE;
1424 1424          auio.uio_llimit = curproc->p_fsz_ctl;
1425 1425          auio.uio_fmode = fflag;
1426 1426          auio.uio_extflg = UIO_COPY_CACHED;
1427 1427          ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1428 1428          error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1429 1429          count -= auio.uio_resid;
1430 1430          CPU_STATS_ENTER_K();
1431 1431          cp = CPU;
1432 1432          CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1433 1433          CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1434 1434          CPU_STATS_EXIT_K();
1435 1435          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1436 1436  
1437 1437          VOP_RWUNLOCK(vp, rwflag, NULL);
1438 1438  
1439 1439          if (error == EINTR && count != 0)
1440 1440                  error = 0;
1441 1441  out:
1442 1442          if (in_crit)
1443 1443                  nbl_end_crit(vp);
1444 1444          releasef(fdes);
1445 1445          if (aiovlen != 0)
1446 1446                  kmem_free(aiov, aiovlen);
1447 1447          if (error)
1448 1448                  return (set_errno(error));
1449 1449          return (count);
1450 1450  }
1451 1451  
1452 1452  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1453 1453  
1454 1454  /*
1455 1455   * This syscall supplies 64-bit file offsets to 32-bit applications only.
1456 1456   */
1457 1457  ssize32_t
1458 1458  pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1459 1459      uint32_t offset_2)
1460 1460  {
1461 1461          struct uio auio;
1462 1462          struct iovec aiov;
1463 1463          file_t *fp;
1464 1464          register vnode_t *vp;
1465 1465          struct cpu *cp;
1466 1466          int fflag, ioflag, rwflag;
1467 1467          ssize_t bcount;
1468 1468          int error = 0;
1469 1469          u_offset_t fileoff;
1470 1470          int in_crit = 0;
1471 1471  
1472 1472  #if defined(_LITTLE_ENDIAN)
1473 1473          fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
1474 1474  #else
1475 1475          fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
1476 1476  #endif
1477 1477  
1478 1478          if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1479 1479                  return (set_errno(EINVAL));
1480 1480  
1481 1481          if ((fp = getf(fdes)) == NULL)
1482 1482                  return (set_errno(EBADF));
1483 1483          if (((fflag = fp->f_flag) & (FREAD)) == 0) {
1484 1484                  error = EBADF;
1485 1485                  goto out;
1486 1486          }
1487 1487  
1488 1488          rwflag = 0;
1489 1489          vp = fp->f_vnode;
1490 1490  
1491 1491          if (vp->v_type == VREG) {
1492 1492  
1493 1493                  if (bcount == 0)
1494 1494                          goto out;
1495 1495  
1496 1496                  /*
1497 1497                   * Same as pread. See comments in pread.
1498 1498                   */
1499 1499  
1500 1500                  if (fileoff > MAXOFFSET_T) {
1501 1501                          error = EINVAL;
1502 1502                          goto out;
1503 1503                  }
1504 1504                  if (fileoff + bcount > MAXOFFSET_T)
1505 1505                          bcount = (ssize_t)(MAXOFFSET_T - fileoff);
1506 1506          } else if (vp->v_type == VFIFO) {
1507 1507                  error = ESPIPE;
1508 1508                  goto out;
1509 1509          }
1510 1510  
1511 1511          /*
1512 1512           * We have to enter the critical region before calling VOP_RWLOCK
1513 1513           * to avoid a deadlock with ufs.
1514 1514           */
1515 1515          if (nbl_need_check(vp)) {
1516 1516                  int svmand;
1517 1517  
1518 1518                  nbl_start_crit(vp, RW_READER);
1519 1519                  in_crit = 1;
1520 1520                  error = nbl_svmand(vp, fp->f_cred, &svmand);
1521 1521                  if (error != 0)
1522 1522                          goto out;
1523 1523                  if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
1524 1524                      NULL)) {
1525 1525                          error = EACCES;
1526 1526                          goto out;
1527 1527                  }
1528 1528          }
1529 1529  
1530 1530          aiov.iov_base = cbuf;
1531 1531          aiov.iov_len = bcount;
1532 1532          (void) VOP_RWLOCK(vp, rwflag, NULL);
1533 1533          auio.uio_loffset = fileoff;
1534 1534  
1535 1535          /*
1536 1536           * Note: File size can never be greater than MAXOFFSET_T.
1537 1537           * If ever we start supporting 128 bit files the code
1538 1538           * similar to the one in pread at this place should be here.
1539 1539           * Here we avoid the unnecessary VOP_GETATTR() when we
1540 1540           * know that fileoff == MAXOFFSET_T implies that it is always
1541 1541           * greater than or equal to file size.
1542 1542           */
1543 1543          auio.uio_iov = &aiov;
1544 1544          auio.uio_iovcnt = 1;
1545 1545          auio.uio_resid = bcount;
1546 1546          auio.uio_segflg = UIO_USERSPACE;
1547 1547          auio.uio_llimit = MAXOFFSET_T;
1548 1548          auio.uio_fmode = fflag;
1549 1549          auio.uio_extflg = UIO_COPY_CACHED;
1550 1550  
1551 1551          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1552 1552  
1553 1553          /* If read sync is not asked for, filter sync flags */
1554 1554          if ((ioflag & FRSYNC) == 0)
1555 1555                  ioflag &= ~(FSYNC|FDSYNC);
1556 1556          error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1557 1557          bcount -= auio.uio_resid;
1558 1558          CPU_STATS_ENTER_K();
1559 1559          cp = CPU;
1560 1560          CPU_STATS_ADDQ(cp, sys, sysread, 1);
1561 1561          CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
1562 1562          CPU_STATS_EXIT_K();
1563 1563          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1564 1564          VOP_RWUNLOCK(vp, rwflag, NULL);
1565 1565  
1566 1566          if (error == EINTR && bcount != 0)
1567 1567                  error = 0;
1568 1568  out:
1569 1569          if (in_crit)
1570 1570                  nbl_end_crit(vp);
1571 1571          releasef(fdes);
1572 1572          if (error)
1573 1573                  return (set_errno(error));
1574 1574          return (bcount);
1575 1575  }
1576 1576  
1577 1577  /*
1578 1578   * This syscall supplies 64-bit file offsets to 32-bit applications only.
1579 1579   */
1580 1580  ssize32_t
1581 1581  pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1582 1582      uint32_t offset_2)
1583 1583  {
1584 1584          struct uio auio;
1585 1585          struct iovec aiov;
1586 1586          file_t *fp;
1587 1587          register vnode_t *vp;
1588 1588          struct cpu *cp;
1589 1589          int fflag, ioflag, rwflag;
1590 1590          ssize_t bcount;
1591 1591          int error = 0;
1592 1592          u_offset_t fileoff;
1593 1593          int in_crit = 0;
1594 1594  
1595 1595  #if defined(_LITTLE_ENDIAN)
1596 1596          fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
1597 1597  #else
1598 1598          fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
1599 1599  #endif
1600 1600  
1601 1601          if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1602 1602                  return (set_errno(EINVAL));
1603 1603          if ((fp = getf(fdes)) == NULL)
1604 1604                  return (set_errno(EBADF));
1605 1605          if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
1606 1606                  error = EBADF;
1607 1607                  goto out;
1608 1608          }
1609 1609  
1610 1610          rwflag = 1;
1611 1611          vp = fp->f_vnode;
1612 1612  
1613 1613          if (vp->v_type == VREG) {
1614 1614  
1615 1615                  if (bcount == 0)
1616 1616                          goto out;
1617 1617  
1618 1618                  /*
1619 1619                   * See comments in pwrite.
1620 1620                   */
1621 1621                  if (fileoff > MAXOFFSET_T) {
1622 1622                          error = EINVAL;
1623 1623                          goto out;
1624 1624                  }
1625 1625                  if (fileoff >= curproc->p_fsz_ctl) {
1626 1626                          mutex_enter(&curproc->p_lock);
1627 1627                          (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
1628 1628                              curproc->p_rctls, curproc, RCA_SAFE);
1629 1629                          mutex_exit(&curproc->p_lock);
1630 1630                          error = EFBIG;
1631 1631                          goto out;
1632 1632                  }
1633 1633                  if (fileoff == MAXOFFSET_T) {
1634 1634                          error = EFBIG;
1635 1635                          goto out;
1636 1636                  }
1637 1637                  if (fileoff + bcount > MAXOFFSET_T)
1638 1638                          bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff);
1639 1639          } else if (vp->v_type == VFIFO) {
1640 1640                  error = ESPIPE;
1641 1641                  goto out;
1642 1642          }
1643 1643  
1644 1644          /*
1645 1645           * We have to enter the critical region before calling VOP_RWLOCK
1646 1646           * to avoid a deadlock with ufs.
1647 1647           */
1648 1648          if (nbl_need_check(vp)) {
1649 1649                  int svmand;
1650 1650  
1651 1651                  nbl_start_crit(vp, RW_READER);
1652 1652                  in_crit = 1;
1653 1653                  error = nbl_svmand(vp, fp->f_cred, &svmand);
1654 1654                  if (error != 0)
1655 1655                          goto out;
1656 1656                  if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
1657 1657                      NULL)) {
1658 1658                          error = EACCES;
1659 1659                          goto out;
1660 1660                  }
1661 1661          }
1662 1662  
1663 1663          aiov.iov_base = cbuf;
1664 1664          aiov.iov_len = bcount;
1665 1665          (void) VOP_RWLOCK(vp, rwflag, NULL);
1666 1666          auio.uio_loffset = fileoff;
1667 1667          auio.uio_iov = &aiov;
1668 1668          auio.uio_iovcnt = 1;
1669 1669          auio.uio_resid = bcount;
1670 1670          auio.uio_segflg = UIO_USERSPACE;
1671 1671          auio.uio_llimit = curproc->p_fsz_ctl;
1672 1672          auio.uio_fmode = fflag;
1673 1673          auio.uio_extflg = UIO_COPY_CACHED;
1674 1674  
1675 1675          /*
1676 1676           * The SUSv4 POSIX specification states:
1677 1677           *      The pwrite() function shall be equivalent to write(), except
1678 1678           *      that it writes into a given position and does not change
1679 1679           *      the file offset (regardless of whether O_APPEND is set).
1680 1680           * To make this be true, we omit the FAPPEND flag from ioflag.
1681 1681           */
1682 1682          ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1683 1683  
1684 1684          error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1685 1685          bcount -= auio.uio_resid;
1686 1686          CPU_STATS_ENTER_K();
1687 1687          cp = CPU;
1688 1688          CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1689 1689          CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
1690 1690          CPU_STATS_EXIT_K();
1691 1691          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1692 1692          VOP_RWUNLOCK(vp, rwflag, NULL);
1693 1693  
1694 1694          if (error == EINTR && bcount != 0)
1695 1695                  error = 0;
1696 1696  out:
1697 1697          if (in_crit)
1698 1698                  nbl_end_crit(vp);
1699 1699          releasef(fdes);
1700 1700          if (error)
1701 1701                  return (set_errno(error));
1702 1702          return (bcount);
1703 1703  }
1704 1704  
1705 1705  #endif  /* _SYSCALL32_IMPL || _ILP32 */
1706 1706  
1707 1707  #ifdef _SYSCALL32_IMPL
1708 1708  /*
1709 1709   * Tail-call elimination of xxx32() down to xxx()
1710 1710   *
1711 1711   * A number of xxx32 system calls take a len (or count) argument and
1712 1712   * return a number in the range [0,len] or -1 on error.
1713 1713   * Given an ssize32_t input len, the downcall xxx() will return
1714 1714   * a 64-bit value that is -1 or in the range [0,len] which actually
1715 1715   * is a proper return value for the xxx32 call. So even if the xxx32
1716 1716   * calls can be considered as returning a ssize32_t, they are currently
1717 1717   * declared as returning a ssize_t as this enables tail-call elimination.
1718 1718   *
1719 1719   * The cast of len (or count) to ssize32_t is needed to ensure we pass
1720 1720   * down negative input values as such and let the downcall handle error
1721 1721   * reporting. Functions covered by this comments are:
1722 1722   *
1723 1723   * rw.c:           read32, write32, pread32, pwrite32, readv32, writev32.
1724 1724   * socksyscall.c:  recv32, recvfrom32, send32, sendto32.
1725 1725   * readlink.c:     readlink32.
1726 1726   */
1727 1727  
1728 1728  ssize_t
1729 1729  read32(int32_t fdes, caddr32_t cbuf, size32_t count)
1730 1730  {
1731 1731          return (read(fdes,
1732 1732              (void *)(uintptr_t)cbuf, (ssize32_t)count));
1733 1733  }
1734 1734  
1735 1735  ssize_t
1736 1736  write32(int32_t fdes, caddr32_t cbuf, size32_t count)
1737 1737  {
1738 1738          return (write(fdes,
1739 1739              (void *)(uintptr_t)cbuf, (ssize32_t)count));
1740 1740  }
1741 1741  
1742 1742  ssize_t
1743 1743  pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
1744 1744  {
1745 1745          return (pread(fdes,
1746 1746              (void *)(uintptr_t)cbuf, (ssize32_t)count,
1747 1747              (off_t)(uint32_t)offset));
1748 1748  }
1749 1749  
1750 1750  ssize_t
1751 1751  pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
1752 1752  {
1753 1753          return (pwrite(fdes,
1754 1754              (void *)(uintptr_t)cbuf, (ssize32_t)count,
1755 1755              (off_t)(uint32_t)offset));
1756 1756  }
1757 1757  
1758 1758  ssize_t
1759 1759  readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1760 1760  {
1761 1761          return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt));
1762 1762  }
1763 1763  
1764 1764  ssize_t
1765 1765  writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1766 1766  {
1767 1767          return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt));
1768 1768  }
1769 1769  #endif  /* _SYSCALL32_IMPL */
  
    | 
      ↓ open down ↓ | 
    1769 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX