Print this page
    
OS-3752 Increase IOV_MAX to at least 1024
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/syscall/rw.c
          +++ new/usr/src/uts/common/syscall/rw.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  
    | 
      ↓ open down ↓ | 
    14 lines elided | 
    
      ↑ open up ↑ | 
  
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25      - * Copyright (c) 2015, Joyent, Inc.  All rights reserved.
       25 + * Copyright 2015, Joyent, Inc.  All rights reserved.
  26   26   */
  27   27  
  28   28  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  29   29  /*        All Rights Reserved   */
  30   30  
  31   31  /*
  32   32   * Portions of this source code were derived from Berkeley 4.3 BSD
  33   33   * under license from the Regents of the University of California.
  34   34   */
  35   35  
  36   36  #include <sys/param.h>
  37   37  #include <sys/isa_defs.h>
  38   38  #include <sys/types.h>
  39   39  #include <sys/inttypes.h>
  40   40  #include <sys/sysmacros.h>
  41   41  #include <sys/cred.h>
  42   42  #include <sys/user.h>
  
    | 
      ↓ open down ↓ | 
    7 lines elided | 
    
      ↑ open up ↑ | 
  
  43   43  #include <sys/systm.h>
  44   44  #include <sys/errno.h>
  45   45  #include <sys/vnode.h>
  46   46  #include <sys/file.h>
  47   47  #include <sys/proc.h>
  48   48  #include <sys/cpuvar.h>
  49   49  #include <sys/uio.h>
  50   50  #include <sys/debug.h>
  51   51  #include <sys/rctl.h>
  52   52  #include <sys/nbmlock.h>
       53 +#include <sys/limits.h>
  53   54  
  54   55  #define COPYOUT_MAX_CACHE       (1<<17)         /* 128K */
  55   56  
  56   57  size_t copyout_max_cached = COPYOUT_MAX_CACHE;  /* global so it's patchable */
  57   58  
  58   59  /*
  59   60   * read, write, pread, pwrite, readv, and writev syscalls.
  60   61   *
  61   62   * 64-bit open: all open's are large file opens.
  62   63   * Large Files: the behaviour of read depends on whether the fd
  63   64   *              corresponds to large open or not.
  64   65   * 32-bit open: FOFFMAX flag not set.
  65   66   *              read until MAXOFF32_T - 1 and read at MAXOFF32_T returns
  66   67   *              EOVERFLOW if count is non-zero and if size of file
  67   68   *              is > MAXOFF32_T. If size of file is <= MAXOFF32_T read
  68   69   *              at >= MAXOFF32_T returns EOF.
  69   70   */
  70   71  
  71   72  /*
  72   73   * Native system call
  73   74   */
  74   75  ssize_t
  75   76  read(int fdes, void *cbuf, size_t count)
  76   77  {
  77   78          struct uio auio;
  78   79          struct iovec aiov;
  79   80          file_t *fp;
  80   81          register vnode_t *vp;
  81   82          struct cpu *cp;
  82   83          int fflag, ioflag, rwflag;
  83   84          ssize_t cnt, bcount;
  84   85          int error = 0;
  85   86          u_offset_t fileoff;
  86   87          int in_crit = 0;
  87   88  
  88   89          if ((cnt = (ssize_t)count) < 0)
  89   90                  return (set_errno(EINVAL));
  90   91          if ((fp = getf(fdes)) == NULL)
  91   92                  return (set_errno(EBADF));
  92   93          if (((fflag = fp->f_flag) & FREAD) == 0) {
  93   94                  error = EBADF;
  94   95                  goto out;
  95   96          }
  96   97          vp = fp->f_vnode;
  97   98  
  98   99          if (vp->v_type == VREG && cnt == 0) {
  99  100                  goto out;
 100  101          }
 101  102  
 102  103          rwflag = 0;
 103  104          aiov.iov_base = cbuf;
 104  105          aiov.iov_len = cnt;
 105  106  
 106  107          /*
 107  108           * We have to enter the critical region before calling VOP_RWLOCK
 108  109           * to avoid a deadlock with write() calls.
 109  110           */
 110  111          if (nbl_need_check(vp)) {
 111  112                  int svmand;
 112  113  
 113  114                  nbl_start_crit(vp, RW_READER);
 114  115                  in_crit = 1;
 115  116                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 116  117                  if (error != 0)
 117  118                          goto out;
 118  119                  if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand,
 119  120                      NULL)) {
 120  121                          error = EACCES;
 121  122                          goto out;
 122  123                  }
 123  124          }
 124  125  
 125  126          (void) VOP_RWLOCK(vp, rwflag, NULL);
 126  127  
 127  128          /*
 128  129           * We do the following checks inside VOP_RWLOCK so as to
 129  130           * prevent file size from changing while these checks are
 130  131           * being done. Also, we load fp's offset to the local
 131  132           * variable fileoff because we can have a parallel lseek
 132  133           * going on (f_offset is not protected by any lock) which
 133  134           * could change f_offset. We need to see the value only
 134  135           * once here and take a decision. Seeing it more than once
 135  136           * can lead to incorrect functionality.
 136  137           */
 137  138  
 138  139          fileoff = (u_offset_t)fp->f_offset;
 139  140          if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) {
 140  141                  struct vattr va;
 141  142                  va.va_mask = AT_SIZE;
 142  143                  if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
 143  144                          VOP_RWUNLOCK(vp, rwflag, NULL);
 144  145                          goto out;
 145  146                  }
 146  147                  if (fileoff >= va.va_size) {
 147  148                          cnt = 0;
 148  149                          VOP_RWUNLOCK(vp, rwflag, NULL);
 149  150                          goto out;
 150  151                  } else {
 151  152                          error = EOVERFLOW;
 152  153                          VOP_RWUNLOCK(vp, rwflag, NULL);
 153  154                          goto out;
 154  155                  }
 155  156          }
 156  157          if ((vp->v_type == VREG) &&
 157  158              (fileoff + cnt > OFFSET_MAX(fp))) {
 158  159                  cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
 159  160          }
 160  161          auio.uio_loffset = fileoff;
 161  162          auio.uio_iov = &aiov;
 162  163          auio.uio_iovcnt = 1;
 163  164          auio.uio_resid = bcount = cnt;
 164  165          auio.uio_segflg = UIO_USERSPACE;
 165  166          auio.uio_llimit = MAXOFFSET_T;
 166  167          auio.uio_fmode = fflag;
 167  168          /*
 168  169           * Only use bypass caches when the count is large enough
 169  170           */
 170  171          if (bcount <= copyout_max_cached)
 171  172                  auio.uio_extflg = UIO_COPY_CACHED;
 172  173          else
 173  174                  auio.uio_extflg = UIO_COPY_DEFAULT;
 174  175  
 175  176          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 176  177  
 177  178          /* If read sync is not asked for, filter sync flags */
 178  179          if ((ioflag & FRSYNC) == 0)
 179  180                  ioflag &= ~(FSYNC|FDSYNC);
 180  181          error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
 181  182          cnt -= auio.uio_resid;
 182  183          CPU_STATS_ENTER_K();
 183  184          cp = CPU;
 184  185          CPU_STATS_ADDQ(cp, sys, sysread, 1);
 185  186          CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt);
 186  187          CPU_STATS_EXIT_K();
 187  188          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
 188  189  
 189  190          if (vp->v_type == VFIFO)        /* Backward compatibility */
 190  191                  fp->f_offset = cnt;
 191  192          else if (((fp->f_flag & FAPPEND) == 0) ||
 192  193              (vp->v_type != VREG) || (bcount != 0))      /* POSIX */
 193  194                  fp->f_offset = auio.uio_loffset;
 194  195          VOP_RWUNLOCK(vp, rwflag, NULL);
 195  196  
 196  197          if (error == EINTR && cnt != 0)
 197  198                  error = 0;
 198  199  out:
 199  200          if (in_crit)
 200  201                  nbl_end_crit(vp);
 201  202          releasef(fdes);
 202  203          if (error)
 203  204                  return (set_errno(error));
 204  205          return (cnt);
 205  206  }
 206  207  
 207  208  /*
 208  209   * Native system call
 209  210   */
 210  211  ssize_t
 211  212  write(int fdes, void *cbuf, size_t count)
 212  213  {
 213  214          struct uio auio;
 214  215          struct iovec aiov;
 215  216          file_t *fp;
 216  217          register vnode_t *vp;
 217  218          struct cpu *cp;
 218  219          int fflag, ioflag, rwflag;
 219  220          ssize_t cnt, bcount;
 220  221          int error = 0;
 221  222          u_offset_t fileoff;
 222  223          int in_crit = 0;
 223  224  
 224  225          if ((cnt = (ssize_t)count) < 0)
 225  226                  return (set_errno(EINVAL));
 226  227          if ((fp = getf(fdes)) == NULL)
 227  228                  return (set_errno(EBADF));
 228  229          if (((fflag = fp->f_flag) & FWRITE) == 0) {
 229  230                  error = EBADF;
 230  231                  goto out;
 231  232          }
 232  233          vp = fp->f_vnode;
 233  234  
 234  235          if (vp->v_type == VREG && cnt == 0) {
 235  236                  goto out;
 236  237          }
 237  238  
 238  239          rwflag = 1;
 239  240          aiov.iov_base = cbuf;
 240  241          aiov.iov_len = cnt;
 241  242  
 242  243          /*
 243  244           * We have to enter the critical region before calling VOP_RWLOCK
 244  245           * to avoid a deadlock with ufs.
 245  246           */
 246  247          if (nbl_need_check(vp)) {
 247  248                  int svmand;
 248  249  
 249  250                  nbl_start_crit(vp, RW_READER);
 250  251                  in_crit = 1;
 251  252                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 252  253                  if (error != 0)
 253  254                          goto out;
 254  255                  if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand,
 255  256                      NULL)) {
 256  257                          error = EACCES;
 257  258                          goto out;
 258  259                  }
 259  260          }
 260  261  
 261  262          (void) VOP_RWLOCK(vp, rwflag, NULL);
 262  263  
 263  264          fileoff = fp->f_offset;
 264  265          if (vp->v_type == VREG) {
 265  266  
 266  267                  /*
 267  268                   * We raise psignal if write for >0 bytes causes
 268  269                   * it to exceed the ulimit.
 269  270                   */
 270  271                  if (fileoff >= curproc->p_fsz_ctl) {
 271  272                          VOP_RWUNLOCK(vp, rwflag, NULL);
 272  273  
 273  274                          mutex_enter(&curproc->p_lock);
 274  275                          (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 275  276                              curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
 276  277                          mutex_exit(&curproc->p_lock);
 277  278  
 278  279                          error = EFBIG;
 279  280                          goto out;
 280  281                  }
 281  282                  /*
 282  283                   * We return EFBIG if write is done at an offset
 283  284                   * greater than the offset maximum for this file structure.
 284  285                   */
 285  286  
 286  287                  if (fileoff >= OFFSET_MAX(fp)) {
 287  288                          VOP_RWUNLOCK(vp, rwflag, NULL);
 288  289                          error = EFBIG;
 289  290                          goto out;
 290  291                  }
 291  292                  /*
 292  293                   * Limit the bytes to be written  upto offset maximum for
 293  294                   * this open file structure.
 294  295                   */
 295  296                  if (fileoff + cnt > OFFSET_MAX(fp))
 296  297                          cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
 297  298          }
 298  299          auio.uio_loffset = fileoff;
 299  300          auio.uio_iov = &aiov;
 300  301          auio.uio_iovcnt = 1;
 301  302          auio.uio_resid = bcount = cnt;
 302  303          auio.uio_segflg = UIO_USERSPACE;
 303  304          auio.uio_llimit = curproc->p_fsz_ctl;
 304  305          auio.uio_fmode = fflag;
 305  306          auio.uio_extflg = UIO_COPY_DEFAULT;
 306  307  
 307  308          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 308  309  
 309  310          error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
 310  311          cnt -= auio.uio_resid;
 311  312          CPU_STATS_ENTER_K();
 312  313          cp = CPU;
 313  314          CPU_STATS_ADDQ(cp, sys, syswrite, 1);
 314  315          CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt);
 315  316          CPU_STATS_EXIT_K();
 316  317          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
 317  318  
 318  319          if (vp->v_type == VFIFO)        /* Backward compatibility */
 319  320                  fp->f_offset = cnt;
 320  321          else if (((fp->f_flag & FAPPEND) == 0) ||
 321  322              (vp->v_type != VREG) || (bcount != 0))      /* POSIX */
 322  323                  fp->f_offset = auio.uio_loffset;
 323  324          VOP_RWUNLOCK(vp, rwflag, NULL);
 324  325  
 325  326          if (error == EINTR && cnt != 0)
 326  327                  error = 0;
 327  328  out:
 328  329          if (in_crit)
 329  330                  nbl_end_crit(vp);
 330  331          releasef(fdes);
 331  332          if (error)
 332  333                  return (set_errno(error));
 333  334          return (cnt);
 334  335  }
 335  336  
 336  337  ssize_t
 337  338  pread(int fdes, void *cbuf, size_t count, off_t offset)
 338  339  {
 339  340          struct uio auio;
 340  341          struct iovec aiov;
 341  342          file_t *fp;
 342  343          register vnode_t *vp;
 343  344          struct cpu *cp;
 344  345          int fflag, ioflag, rwflag;
 345  346          ssize_t bcount;
 346  347          int error = 0;
 347  348          u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
 348  349  #ifdef _SYSCALL32_IMPL
 349  350          u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
 350  351              MAXOFF32_T : MAXOFFSET_T;
 351  352  #else
 352  353          const u_offset_t maxoff = MAXOFF32_T;
 353  354  #endif
 354  355          int in_crit = 0;
 355  356  
 356  357          if ((bcount = (ssize_t)count) < 0)
 357  358                  return (set_errno(EINVAL));
 358  359  
 359  360          if ((fp = getf(fdes)) == NULL)
 360  361                  return (set_errno(EBADF));
 361  362          if (((fflag = fp->f_flag) & (FREAD)) == 0) {
 362  363                  error = EBADF;
 363  364                  goto out;
 364  365          }
 365  366  
 366  367          rwflag = 0;
 367  368          vp = fp->f_vnode;
 368  369  
 369  370          if (vp->v_type == VREG) {
 370  371  
 371  372                  if (bcount == 0)
 372  373                          goto out;
 373  374  
 374  375                  /*
 375  376                   * Return EINVAL if an invalid offset comes to pread.
 376  377                   * Negative offset from user will cause this error.
 377  378                   */
 378  379  
 379  380                  if (fileoff > maxoff) {
 380  381                          error = EINVAL;
 381  382                          goto out;
 382  383                  }
 383  384                  /*
 384  385                   * Limit offset such that we don't read or write
 385  386                   * a file beyond the maximum offset representable in
 386  387                   * an off_t structure.
 387  388                   */
 388  389                  if (fileoff + bcount > maxoff)
 389  390                          bcount = (ssize_t)((offset_t)maxoff - fileoff);
 390  391          } else if (vp->v_type == VFIFO) {
 391  392                  error = ESPIPE;
 392  393                  goto out;
 393  394          }
 394  395  
 395  396          /*
 396  397           * We have to enter the critical region before calling VOP_RWLOCK
 397  398           * to avoid a deadlock with ufs.
 398  399           */
 399  400          if (nbl_need_check(vp)) {
 400  401                  int svmand;
 401  402  
 402  403                  nbl_start_crit(vp, RW_READER);
 403  404                  in_crit = 1;
 404  405                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 405  406                  if (error != 0)
 406  407                          goto out;
 407  408                  if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
 408  409                      NULL)) {
 409  410                          error = EACCES;
 410  411                          goto out;
 411  412                  }
 412  413          }
 413  414  
 414  415          aiov.iov_base = cbuf;
 415  416          aiov.iov_len = bcount;
 416  417          (void) VOP_RWLOCK(vp, rwflag, NULL);
 417  418          if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) {
 418  419                  struct vattr va;
 419  420                  va.va_mask = AT_SIZE;
 420  421                  if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
 421  422                          VOP_RWUNLOCK(vp, rwflag, NULL);
 422  423                          goto out;
 423  424                  }
 424  425                  VOP_RWUNLOCK(vp, rwflag, NULL);
 425  426  
 426  427                  /*
 427  428                   * We have to return EOF if fileoff is >= file size.
 428  429                   */
 429  430                  if (fileoff >= va.va_size) {
 430  431                          bcount = 0;
 431  432                          goto out;
 432  433                  }
 433  434  
 434  435                  /*
 435  436                   * File is greater than or equal to maxoff and therefore
 436  437                   * we return EOVERFLOW.
 437  438                   */
 438  439                  error = EOVERFLOW;
 439  440                  goto out;
 440  441          }
 441  442          auio.uio_loffset = fileoff;
 442  443          auio.uio_iov = &aiov;
 443  444          auio.uio_iovcnt = 1;
 444  445          auio.uio_resid = bcount;
 445  446          auio.uio_segflg = UIO_USERSPACE;
 446  447          auio.uio_llimit = MAXOFFSET_T;
 447  448          auio.uio_fmode = fflag;
 448  449          auio.uio_extflg = UIO_COPY_CACHED;
 449  450  
 450  451          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 451  452  
 452  453          /* If read sync is not asked for, filter sync flags */
 453  454          if ((ioflag & FRSYNC) == 0)
 454  455                  ioflag &= ~(FSYNC|FDSYNC);
 455  456          error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
 456  457          bcount -= auio.uio_resid;
 457  458          CPU_STATS_ENTER_K();
 458  459          cp = CPU;
 459  460          CPU_STATS_ADDQ(cp, sys, sysread, 1);
 460  461          CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
 461  462          CPU_STATS_EXIT_K();
 462  463          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
 463  464          VOP_RWUNLOCK(vp, rwflag, NULL);
 464  465  
 465  466          if (error == EINTR && bcount != 0)
 466  467                  error = 0;
 467  468  out:
 468  469          if (in_crit)
 469  470                  nbl_end_crit(vp);
 470  471          releasef(fdes);
 471  472          if (error)
 472  473                  return (set_errno(error));
 473  474          return (bcount);
 474  475  }
 475  476  
 476  477  ssize_t
 477  478  pwrite(int fdes, void *cbuf, size_t count, off_t offset)
 478  479  {
 479  480          struct uio auio;
 480  481          struct iovec aiov;
 481  482          file_t *fp;
 482  483          register vnode_t *vp;
 483  484          struct cpu *cp;
 484  485          int fflag, ioflag, rwflag;
 485  486          ssize_t bcount;
 486  487          int error = 0;
 487  488          u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
 488  489  #ifdef _SYSCALL32_IMPL
 489  490          u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
 490  491              MAXOFF32_T : MAXOFFSET_T;
 491  492  #else
 492  493          const u_offset_t maxoff = MAXOFF32_T;
 493  494  #endif
 494  495          int in_crit = 0;
 495  496  
 496  497          if ((bcount = (ssize_t)count) < 0)
 497  498                  return (set_errno(EINVAL));
 498  499          if ((fp = getf(fdes)) == NULL)
 499  500                  return (set_errno(EBADF));
 500  501          if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
 501  502                  error = EBADF;
 502  503                  goto out;
 503  504          }
 504  505  
 505  506          rwflag = 1;
 506  507          vp = fp->f_vnode;
 507  508  
 508  509          if (vp->v_type == VREG) {
 509  510  
 510  511                  if (bcount == 0)
 511  512                          goto out;
 512  513  
 513  514                  /*
 514  515                   * return EINVAL for offsets that cannot be
 515  516                   * represented in an off_t.
 516  517                   */
 517  518                  if (fileoff > maxoff) {
 518  519                          error = EINVAL;
 519  520                          goto out;
 520  521                  }
 521  522                  /*
 522  523                   * Take appropriate action if we are trying to write above the
 523  524                   * resource limit.
 524  525                   */
 525  526                  if (fileoff >= curproc->p_fsz_ctl) {
 526  527                          mutex_enter(&curproc->p_lock);
 527  528                          (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 528  529                              curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
 529  530                          mutex_exit(&curproc->p_lock);
 530  531  
 531  532                          error = EFBIG;
 532  533                          goto out;
 533  534                  }
 534  535                  /*
 535  536                   * Don't allow pwrite to cause file sizes to exceed
 536  537                   * maxoff.
 537  538                   */
 538  539                  if (fileoff == maxoff) {
 539  540                          error = EFBIG;
 540  541                          goto out;
 541  542                  }
 542  543                  if (fileoff + count > maxoff)
 543  544                          bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
 544  545          } else if (vp->v_type == VFIFO) {
 545  546                  error = ESPIPE;
 546  547                  goto out;
 547  548          }
 548  549  
 549  550          /*
 550  551           * We have to enter the critical region before calling VOP_RWLOCK
 551  552           * to avoid a deadlock with ufs.
 552  553           */
 553  554          if (nbl_need_check(vp)) {
 554  555                  int svmand;
 555  556  
 556  557                  nbl_start_crit(vp, RW_READER);
 557  558                  in_crit = 1;
 558  559                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 559  560                  if (error != 0)
 560  561                          goto out;
 561  562                  if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
 562  563                      NULL)) {
 563  564                          error = EACCES;
 564  565                          goto out;
 565  566                  }
 566  567          }
 567  568  
 568  569          aiov.iov_base = cbuf;
 569  570          aiov.iov_len = bcount;
 570  571          (void) VOP_RWLOCK(vp, rwflag, NULL);
 571  572          auio.uio_loffset = fileoff;
 572  573          auio.uio_iov = &aiov;
 573  574          auio.uio_iovcnt = 1;
 574  575          auio.uio_resid = bcount;
 575  576          auio.uio_segflg = UIO_USERSPACE;
 576  577          auio.uio_llimit = curproc->p_fsz_ctl;
 577  578          auio.uio_fmode = fflag;
 578  579          auio.uio_extflg = UIO_COPY_CACHED;
 579  580  
 580  581          /*
 581  582           * The SUSv4 POSIX specification states:
 582  583           *      The pwrite() function shall be equivalent to write(), except
 583  584           *      that it writes into a given position and does not change
 584  585           *      the file offset (regardless of whether O_APPEND is set).
 585  586           * To make this be true, we omit the FAPPEND flag from ioflag.
 586  587           */
 587  588          ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
 588  589  
 589  590          error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
 590  591          bcount -= auio.uio_resid;
 591  592          CPU_STATS_ENTER_K();
 592  593          cp = CPU;
 593  594          CPU_STATS_ADDQ(cp, sys, syswrite, 1);
 594  595          CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
 595  596          CPU_STATS_EXIT_K();
 596  597          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
 597  598          VOP_RWUNLOCK(vp, rwflag, NULL);
 598  599  
 599  600          if (error == EINTR && bcount != 0)
  
    | 
      ↓ open down ↓ | 
    537 lines elided | 
    
      ↑ open up ↑ | 
  
 600  601                  error = 0;
 601  602  out:
 602  603          if (in_crit)
 603  604                  nbl_end_crit(vp);
 604  605          releasef(fdes);
 605  606          if (error)
 606  607                  return (set_errno(error));
 607  608          return (bcount);
 608  609  }
 609  610  
 610      -/*
 611      - * XXX -- The SVID refers to IOV_MAX, but doesn't define it.  Grrrr....
 612      - * XXX -- However, SVVS expects readv() and writev() to fail if
 613      - * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source),
 614      - * XXX -- so I guess that's the "interface".
 615      - */
 616      -#define DEF_IOV_MAX     16
 617      -
 618  611  ssize_t
 619  612  readv(int fdes, struct iovec *iovp, int iovcnt)
 620  613  {
 621  614          struct uio auio;
 622      -        struct iovec aiov[DEF_IOV_MAX];
      615 +        struct iovec buf[IOV_MAX_STACK], *aiov = buf;
      616 +        int aiovlen = 0;
 623  617          file_t *fp;
 624  618          register vnode_t *vp;
 625  619          struct cpu *cp;
 626  620          int fflag, ioflag, rwflag;
 627  621          ssize_t count, bcount;
 628  622          int error = 0;
 629  623          int i;
 630  624          u_offset_t fileoff;
 631  625          int in_crit = 0;
 632  626  
 633      -        if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
      627 +        if (iovcnt <= 0 || iovcnt > IOV_MAX)
 634  628                  return (set_errno(EINVAL));
 635  629  
      630 +        if (iovcnt > IOV_MAX_STACK) {
      631 +                aiovlen = iovcnt * sizeof (iovec_t);
      632 +                aiov = kmem_alloc(aiovlen, KM_SLEEP);
      633 +        }
      634 +
 636  635  #ifdef _SYSCALL32_IMPL
 637  636          /*
 638  637           * 32-bit callers need to have their iovec expanded,
 639  638           * while ensuring that they can't move more than 2Gbytes
 640  639           * of data in a single call.
 641  640           */
 642  641          if (get_udatamodel() == DATAMODEL_ILP32) {
 643      -                struct iovec32 aiov32[DEF_IOV_MAX];
      642 +                struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
      643 +                int aiov32len;
 644  644                  ssize32_t count32;
 645  645  
 646      -                if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
      646 +                aiov32len = iovcnt * sizeof (iovec32_t);
      647 +                if (aiovlen != 0)
      648 +                        aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
      649 +
      650 +                if (copyin(iovp, aiov32, aiov32len)) {
      651 +                        if (aiovlen != 0) {
      652 +                                kmem_free(aiov32, aiov32len);
      653 +                                kmem_free(aiov, aiovlen);
      654 +                        }
 647  655                          return (set_errno(EFAULT));
      656 +                }
 648  657  
 649  658                  count32 = 0;
 650  659                  for (i = 0; i < iovcnt; i++) {
 651  660                          ssize32_t iovlen32 = aiov32[i].iov_len;
 652  661                          count32 += iovlen32;
 653      -                        if (iovlen32 < 0 || count32 < 0)
      662 +                        if (iovlen32 < 0 || count32 < 0) {
      663 +                                if (aiovlen != 0) {
      664 +                                        kmem_free(aiov32, aiov32len);
      665 +                                        kmem_free(aiov, aiovlen);
      666 +                                }
 654  667                                  return (set_errno(EINVAL));
      668 +                        }
 655  669                          aiov[i].iov_len = iovlen32;
 656  670                          aiov[i].iov_base =
 657  671                              (caddr_t)(uintptr_t)aiov32[i].iov_base;
 658  672                  }
      673 +
      674 +                if (aiovlen != 0)
      675 +                        kmem_free(aiov32, aiov32len);
 659  676          } else
 660  677  #endif
 661      -        if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
      678 +        if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
      679 +                if (aiovlen != 0)
      680 +                        kmem_free(aiov, aiovlen);
 662  681                  return (set_errno(EFAULT));
      682 +        }
 663  683  
 664  684          count = 0;
 665  685          for (i = 0; i < iovcnt; i++) {
 666  686                  ssize_t iovlen = aiov[i].iov_len;
 667  687                  count += iovlen;
 668      -                if (iovlen < 0 || count < 0)
      688 +                if (iovlen < 0 || count < 0) {
      689 +                        if (aiovlen != 0)
      690 +                                kmem_free(aiov, aiovlen);
 669  691                          return (set_errno(EINVAL));
      692 +                }
 670  693          }
 671      -        if ((fp = getf(fdes)) == NULL)
      694 +        if ((fp = getf(fdes)) == NULL) {
      695 +                if (aiovlen != 0)
      696 +                        kmem_free(aiov, aiovlen);
 672  697                  return (set_errno(EBADF));
      698 +        }
 673  699          if (((fflag = fp->f_flag) & FREAD) == 0) {
 674  700                  error = EBADF;
 675  701                  goto out;
 676  702          }
 677  703          vp = fp->f_vnode;
 678  704          if (vp->v_type == VREG && count == 0) {
 679  705                  goto out;
 680  706          }
 681  707  
 682  708          rwflag = 0;
 683  709  
 684  710          /*
 685  711           * We have to enter the critical region before calling VOP_RWLOCK
 686  712           * to avoid a deadlock with ufs.
 687  713           */
 688  714          if (nbl_need_check(vp)) {
 689  715                  int svmand;
 690  716  
 691  717                  nbl_start_crit(vp, RW_READER);
 692  718                  in_crit = 1;
 693  719                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 694  720                  if (error != 0)
 695  721                          goto out;
 696  722                  if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand,
 697  723                      NULL)) {
 698  724                          error = EACCES;
 699  725                          goto out;
 700  726                  }
 701  727          }
 702  728  
 703  729          (void) VOP_RWLOCK(vp, rwflag, NULL);
 704  730          fileoff = fp->f_offset;
 705  731  
 706  732          /*
 707  733           * Behaviour is same as read. Please see comments in read.
 708  734           */
 709  735  
 710  736          if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
 711  737                  struct vattr va;
 712  738                  va.va_mask = AT_SIZE;
 713  739                  if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
 714  740                          VOP_RWUNLOCK(vp, rwflag, NULL);
 715  741                          goto out;
 716  742                  }
 717  743                  if (fileoff >= va.va_size) {
 718  744                          VOP_RWUNLOCK(vp, rwflag, NULL);
 719  745                          count = 0;
 720  746                          goto out;
 721  747                  } else {
 722  748                          VOP_RWUNLOCK(vp, rwflag, NULL);
 723  749                          error = EOVERFLOW;
 724  750                          goto out;
 725  751                  }
 726  752          }
 727  753          if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) {
 728  754                  count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
 729  755          }
 730  756          auio.uio_loffset = fileoff;
 731  757          auio.uio_iov = aiov;
 732  758          auio.uio_iovcnt = iovcnt;
 733  759          auio.uio_resid = bcount = count;
 734  760          auio.uio_segflg = UIO_USERSPACE;
 735  761          auio.uio_llimit = MAXOFFSET_T;
 736  762          auio.uio_fmode = fflag;
 737  763          if (bcount <= copyout_max_cached)
 738  764                  auio.uio_extflg = UIO_COPY_CACHED;
 739  765          else
 740  766                  auio.uio_extflg = UIO_COPY_DEFAULT;
 741  767  
 742  768  
 743  769          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 744  770  
 745  771          /* If read sync is not asked for, filter sync flags */
 746  772          if ((ioflag & FRSYNC) == 0)
 747  773                  ioflag &= ~(FSYNC|FDSYNC);
 748  774          error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
 749  775          count -= auio.uio_resid;
 750  776          CPU_STATS_ENTER_K();
 751  777          cp = CPU;
 752  778          CPU_STATS_ADDQ(cp, sys, sysread, 1);
 753  779          CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
 754  780          CPU_STATS_EXIT_K();
 755  781          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
 756  782  
 757  783          if (vp->v_type == VFIFO)        /* Backward compatibility */
 758  784                  fp->f_offset = count;
 759  785          else if (((fp->f_flag & FAPPEND) == 0) ||
 760  786              (vp->v_type != VREG) || (bcount != 0))      /* POSIX */
  
    | 
      ↓ open down ↓ | 
    78 lines elided | 
    
      ↑ open up ↑ | 
  
 761  787                  fp->f_offset = auio.uio_loffset;
 762  788  
 763  789          VOP_RWUNLOCK(vp, rwflag, NULL);
 764  790  
 765  791          if (error == EINTR && count != 0)
 766  792                  error = 0;
 767  793  out:
 768  794          if (in_crit)
 769  795                  nbl_end_crit(vp);
 770  796          releasef(fdes);
      797 +        if (aiovlen != 0)
      798 +                kmem_free(aiov, aiovlen);
 771  799          if (error)
 772  800                  return (set_errno(error));
 773  801          return (count);
 774  802  }
 775  803  
 776  804  ssize_t
 777  805  writev(int fdes, struct iovec *iovp, int iovcnt)
 778  806  {
 779  807          struct uio auio;
 780      -        struct iovec aiov[DEF_IOV_MAX];
      808 +        struct iovec buf[IOV_MAX_STACK], *aiov = buf;
      809 +        int aiovlen = 0;
 781  810          file_t *fp;
 782  811          register vnode_t *vp;
 783  812          struct cpu *cp;
 784  813          int fflag, ioflag, rwflag;
 785  814          ssize_t count, bcount;
 786  815          int error = 0;
 787  816          int i;
 788  817          u_offset_t fileoff;
 789  818          int in_crit = 0;
 790  819  
 791      -        if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
      820 +        if (iovcnt <= 0 || iovcnt > IOV_MAX)
 792  821                  return (set_errno(EINVAL));
 793  822  
      823 +        if (iovcnt > IOV_MAX_STACK) {
      824 +                aiovlen = iovcnt * sizeof (iovec_t);
      825 +                aiov = kmem_alloc(aiovlen, KM_SLEEP);
      826 +        }
      827 +
 794  828  #ifdef _SYSCALL32_IMPL
 795  829          /*
 796  830           * 32-bit callers need to have their iovec expanded,
 797  831           * while ensuring that they can't move more than 2Gbytes
 798  832           * of data in a single call.
 799  833           */
 800  834          if (get_udatamodel() == DATAMODEL_ILP32) {
 801      -                struct iovec32 aiov32[DEF_IOV_MAX];
      835 +                struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
      836 +                int aiov32len;
 802  837                  ssize32_t count32;
 803  838  
 804      -                if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
      839 +                aiov32len = iovcnt * sizeof (iovec32_t);
      840 +                if (aiovlen != 0)
      841 +                        aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
      842 +
      843 +                if (copyin(iovp, aiov32, aiov32len)) {
      844 +                        if (aiovlen != 0) {
      845 +                                kmem_free(aiov32, aiov32len);
      846 +                                kmem_free(aiov, aiovlen);
      847 +                        }
 805  848                          return (set_errno(EFAULT));
      849 +                }
 806  850  
 807  851                  count32 = 0;
 808  852                  for (i = 0; i < iovcnt; i++) {
 809  853                          ssize32_t iovlen = aiov32[i].iov_len;
 810  854                          count32 += iovlen;
 811      -                        if (iovlen < 0 || count32 < 0)
      855 +                        if (iovlen < 0 || count32 < 0) {
      856 +                                if (aiovlen != 0) {
      857 +                                        kmem_free(aiov32, aiov32len);
      858 +                                        kmem_free(aiov, aiovlen);
      859 +                                }
 812  860                                  return (set_errno(EINVAL));
      861 +                        }
 813  862                          aiov[i].iov_len = iovlen;
 814  863                          aiov[i].iov_base =
 815  864                              (caddr_t)(uintptr_t)aiov32[i].iov_base;
 816  865                  }
      866 +                if (aiovlen != 0)
      867 +                        kmem_free(aiov32, aiov32len);
 817  868          } else
 818  869  #endif
 819      -        if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
      870 +        if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
      871 +                if (aiovlen != 0)
      872 +                        kmem_free(aiov, aiovlen);
 820  873                  return (set_errno(EFAULT));
      874 +        }
 821  875  
 822  876          count = 0;
 823  877          for (i = 0; i < iovcnt; i++) {
 824  878                  ssize_t iovlen = aiov[i].iov_len;
 825  879                  count += iovlen;
 826      -                if (iovlen < 0 || count < 0)
      880 +                if (iovlen < 0 || count < 0) {
      881 +                        if (aiovlen != 0)
      882 +                                kmem_free(aiov, aiovlen);
 827  883                          return (set_errno(EINVAL));
      884 +                }
 828  885          }
 829      -        if ((fp = getf(fdes)) == NULL)
      886 +        if ((fp = getf(fdes)) == NULL) {
      887 +                if (aiovlen != 0)
      888 +                        kmem_free(aiov, aiovlen);
 830  889                  return (set_errno(EBADF));
      890 +        }
 831  891          if (((fflag = fp->f_flag) & FWRITE) == 0) {
 832  892                  error = EBADF;
 833  893                  goto out;
 834  894          }
 835  895          vp = fp->f_vnode;
 836  896          if (vp->v_type == VREG && count == 0) {
 837  897                  goto out;
 838  898          }
 839  899  
 840  900          rwflag = 1;
 841  901  
 842  902          /*
 843  903           * We have to enter the critical region before calling VOP_RWLOCK
 844  904           * to avoid a deadlock with ufs.
 845  905           */
 846  906          if (nbl_need_check(vp)) {
 847  907                  int svmand;
 848  908  
 849  909                  nbl_start_crit(vp, RW_READER);
 850  910                  in_crit = 1;
 851  911                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 852  912                  if (error != 0)
 853  913                          goto out;
 854  914                  if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand,
 855  915                      NULL)) {
 856  916                          error = EACCES;
 857  917                          goto out;
 858  918                  }
 859  919          }
 860  920  
 861  921          (void) VOP_RWLOCK(vp, rwflag, NULL);
 862  922  
 863  923          fileoff = fp->f_offset;
 864  924  
 865  925          /*
 866  926           * Behaviour is same as write. Please see comments for write.
 867  927           */
 868  928  
 869  929          if (vp->v_type == VREG) {
 870  930                  if (fileoff >= curproc->p_fsz_ctl) {
 871  931                          VOP_RWUNLOCK(vp, rwflag, NULL);
 872  932                          mutex_enter(&curproc->p_lock);
 873  933                          (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 874  934                              curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
 875  935                          mutex_exit(&curproc->p_lock);
 876  936                          error = EFBIG;
 877  937                          goto out;
 878  938                  }
 879  939                  if (fileoff >= OFFSET_MAX(fp)) {
 880  940                          VOP_RWUNLOCK(vp, rwflag, NULL);
 881  941                          error = EFBIG;
 882  942                          goto out;
 883  943                  }
 884  944                  if (fileoff + count > OFFSET_MAX(fp))
 885  945                          count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
 886  946          }
 887  947          auio.uio_loffset = fileoff;
 888  948          auio.uio_iov = aiov;
 889  949          auio.uio_iovcnt = iovcnt;
 890  950          auio.uio_resid = bcount = count;
 891  951          auio.uio_segflg = UIO_USERSPACE;
 892  952          auio.uio_llimit = curproc->p_fsz_ctl;
 893  953          auio.uio_fmode = fflag;
 894  954          auio.uio_extflg = UIO_COPY_DEFAULT;
 895  955  
 896  956          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 897  957  
 898  958          error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
 899  959          count -= auio.uio_resid;
 900  960          CPU_STATS_ENTER_K();
 901  961          cp = CPU;
 902  962          CPU_STATS_ADDQ(cp, sys, syswrite, 1);
 903  963          CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
 904  964          CPU_STATS_EXIT_K();
 905  965          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
 906  966  
 907  967          if (vp->v_type == VFIFO)        /* Backward compatibility */
 908  968                  fp->f_offset = count;
 909  969          else if (((fp->f_flag & FAPPEND) == 0) ||
  
    | 
      ↓ open down ↓ | 
    69 lines elided | 
    
      ↑ open up ↑ | 
  
 910  970              (vp->v_type != VREG) || (bcount != 0))      /* POSIX */
 911  971                  fp->f_offset = auio.uio_loffset;
 912  972          VOP_RWUNLOCK(vp, rwflag, NULL);
 913  973  
 914  974          if (error == EINTR && count != 0)
 915  975                  error = 0;
 916  976  out:
 917  977          if (in_crit)
 918  978                  nbl_end_crit(vp);
 919  979          releasef(fdes);
      980 +        if (aiovlen != 0)
      981 +                kmem_free(aiov, aiovlen);
 920  982          if (error)
 921  983                  return (set_errno(error));
 922  984          return (count);
 923  985  }
 924  986  
 925  987  ssize_t
 926  988  preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
 927  989      off_t extended_offset)
 928  990  {
 929  991          struct uio auio;
 930      -        struct iovec aiov[DEF_IOV_MAX];
      992 +        struct iovec buf[IOV_MAX_STACK], *aiov = buf;
      993 +        int aiovlen = 0;
 931  994          file_t *fp;
 932  995          register vnode_t *vp;
 933  996          struct cpu *cp;
 934  997          int fflag, ioflag, rwflag;
 935  998          ssize_t count, bcount;
 936  999          int error = 0;
 937 1000          int i;
 938 1001  
 939 1002  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
 940 1003          u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
 941 1004              (u_offset_t)offset;
 942 1005  #else /* _SYSCALL32_IMPL || _ILP32 */
 943 1006          u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
 944 1007  #endif /* _SYSCALL32_IMPR || _ILP32 */
  
    | 
      ↓ open down ↓ | 
    4 lines elided | 
    
      ↑ open up ↑ | 
  
 945 1008  #ifdef _SYSCALL32_IMPL
 946 1009          const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
 947 1010              extended_offset == 0?
 948 1011              MAXOFF32_T : MAXOFFSET_T;
 949 1012  #else /* _SYSCALL32_IMPL */
 950 1013          const u_offset_t maxoff = MAXOFF32_T;
 951 1014  #endif /* _SYSCALL32_IMPL */
 952 1015  
 953 1016          int in_crit = 0;
 954 1017  
 955      -        if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
     1018 +        if (iovcnt <= 0 || iovcnt > IOV_MAX)
 956 1019                  return (set_errno(EINVAL));
 957 1020  
     1021 +        if (iovcnt > IOV_MAX_STACK) {
     1022 +                aiovlen = iovcnt * sizeof (iovec_t);
     1023 +                aiov = kmem_alloc(aiovlen, KM_SLEEP);
     1024 +        }
     1025 +
 958 1026  #ifdef _SYSCALL32_IMPL
 959 1027          /*
 960 1028           * 32-bit callers need to have their iovec expanded,
 961 1029           * while ensuring that they can't move more than 2Gbytes
 962 1030           * of data in a single call.
 963 1031           */
 964 1032          if (get_udatamodel() == DATAMODEL_ILP32) {
 965      -                struct iovec32 aiov32[DEF_IOV_MAX];
     1033 +                struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
     1034 +                int aiov32len;
 966 1035                  ssize32_t count32;
 967 1036  
 968      -                if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
     1037 +                aiov32len = iovcnt * sizeof (iovec32_t);
     1038 +                if (aiovlen != 0)
     1039 +                        aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
     1040 +
     1041 +                if (copyin(iovp, aiov32, aiov32len)) {
     1042 +                        if (aiovlen != 0) {
     1043 +                                kmem_free(aiov32, aiov32len);
     1044 +                                kmem_free(aiov, aiovlen);
     1045 +                        }
 969 1046                          return (set_errno(EFAULT));
     1047 +                }
 970 1048  
 971 1049                  count32 = 0;
 972 1050                  for (i = 0; i < iovcnt; i++) {
 973 1051                          ssize32_t iovlen32 = aiov32[i].iov_len;
 974 1052                          count32 += iovlen32;
 975      -                        if (iovlen32 < 0 || count32 < 0)
     1053 +                        if (iovlen32 < 0 || count32 < 0) {
     1054 +                                if (aiovlen != 0) {
     1055 +                                        kmem_free(aiov32, aiov32len);
     1056 +                                        kmem_free(aiov, aiovlen);
     1057 +                                }
 976 1058                                  return (set_errno(EINVAL));
     1059 +                        }
 977 1060                          aiov[i].iov_len = iovlen32;
 978 1061                          aiov[i].iov_base =
 979 1062                              (caddr_t)(uintptr_t)aiov32[i].iov_base;
 980 1063                  }
     1064 +                if (aiovlen != 0)
     1065 +                        kmem_free(aiov32, aiov32len);
 981 1066          } else
 982 1067  #endif /* _SYSCALL32_IMPL */
 983      -                if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
     1068 +                if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
     1069 +                        if (aiovlen != 0)
     1070 +                                kmem_free(aiov, aiovlen);
 984 1071                          return (set_errno(EFAULT));
     1072 +                }
 985 1073  
 986 1074          count = 0;
 987 1075          for (i = 0; i < iovcnt; i++) {
 988 1076                  ssize_t iovlen = aiov[i].iov_len;
 989 1077                  count += iovlen;
 990      -                if (iovlen < 0 || count < 0)
     1078 +                if (iovlen < 0 || count < 0) {
     1079 +                        if (aiovlen != 0)
     1080 +                                kmem_free(aiov, aiovlen);
 991 1081                          return (set_errno(EINVAL));
     1082 +                }
 992 1083          }
 993 1084  
 994      -        if ((bcount = (ssize_t)count) < 0)
     1085 +        if ((bcount = (ssize_t)count) < 0) {
     1086 +                if (aiovlen != 0)
     1087 +                        kmem_free(aiov, aiovlen);
 995 1088                  return (set_errno(EINVAL));
 996      -        if ((fp = getf(fdes)) == NULL)
     1089 +        }
     1090 +        if ((fp = getf(fdes)) == NULL) {
     1091 +                if (aiovlen != 0)
     1092 +                        kmem_free(aiov, aiovlen);
 997 1093                  return (set_errno(EBADF));
     1094 +        }
 998 1095          if (((fflag = fp->f_flag) & FREAD) == 0) {
 999 1096                  error = EBADF;
1000 1097                  goto out;
1001 1098          }
1002 1099          vp = fp->f_vnode;
1003 1100          rwflag = 0;
1004 1101          if (vp->v_type == VREG) {
1005 1102  
1006 1103                  if (bcount == 0)
1007 1104                          goto out;
1008 1105  
1009 1106                  /*
1010 1107                   * return EINVAL for offsets that cannot be
1011 1108                   * represented in an off_t.
1012 1109                   */
1013 1110                  if (fileoff > maxoff) {
1014 1111                          error = EINVAL;
1015 1112                          goto out;
1016 1113                  }
1017 1114  
1018 1115                  if (fileoff + bcount > maxoff)
1019 1116                          bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
1020 1117          } else if (vp->v_type == VFIFO) {
1021 1118                  error = ESPIPE;
1022 1119                  goto out;
1023 1120          }
1024 1121          /*
1025 1122           * We have to enter the critical region before calling VOP_RWLOCK
1026 1123           * to avoid a deadlock with ufs.
1027 1124           */
1028 1125          if (nbl_need_check(vp)) {
1029 1126                  int svmand;
1030 1127  
1031 1128                  nbl_start_crit(vp, RW_READER);
1032 1129                  in_crit = 1;
1033 1130                  error = nbl_svmand(vp, fp->f_cred, &svmand);
1034 1131                  if (error != 0)
1035 1132                          goto out;
1036 1133                  if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1037 1134                      NULL)) {
1038 1135                          error = EACCES;
1039 1136                          goto out;
1040 1137                  }
1041 1138          }
1042 1139  
1043 1140          (void) VOP_RWLOCK(vp, rwflag, NULL);
1044 1141  
1045 1142          /*
1046 1143           * Behaviour is same as read(2). Please see comments in
1047 1144           * read(2).
1048 1145           */
1049 1146  
1050 1147          if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
1051 1148                  struct vattr va;
1052 1149                  va.va_mask = AT_SIZE;
1053 1150                  if ((error =
1054 1151                      VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
1055 1152                          VOP_RWUNLOCK(vp, rwflag, NULL);
1056 1153                          goto out;
1057 1154                  }
1058 1155                  if (fileoff >= va.va_size) {
1059 1156                          VOP_RWUNLOCK(vp, rwflag, NULL);
1060 1157                          count = 0;
1061 1158                          goto out;
1062 1159                  } else {
1063 1160                          VOP_RWUNLOCK(vp, rwflag, NULL);
1064 1161                          error = EOVERFLOW;
1065 1162                          goto out;
1066 1163                  }
1067 1164          }
1068 1165          if ((vp->v_type == VREG) &&
1069 1166              (fileoff + count > OFFSET_MAX(fp))) {
1070 1167                  count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1071 1168          }
1072 1169          auio.uio_loffset = fileoff;
1073 1170          auio.uio_iov = aiov;
1074 1171          auio.uio_iovcnt = iovcnt;
1075 1172          auio.uio_resid = bcount = count;
1076 1173          auio.uio_segflg = UIO_USERSPACE;
1077 1174          auio.uio_llimit = MAXOFFSET_T;
1078 1175          auio.uio_fmode = fflag;
1079 1176          if (bcount <= copyout_max_cached)
1080 1177                  auio.uio_extflg = UIO_COPY_CACHED;
1081 1178          else
1082 1179                  auio.uio_extflg = UIO_COPY_DEFAULT;
1083 1180  
1084 1181          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1085 1182          error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1086 1183          count -= auio.uio_resid;
1087 1184          CPU_STATS_ENTER_K();
1088 1185          cp = CPU;
1089 1186          CPU_STATS_ADDQ(cp, sys, sysread, 1);
1090 1187          CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1091 1188          CPU_STATS_EXIT_K();
  
    | 
      ↓ open down ↓ | 
    84 lines elided | 
    
      ↑ open up ↑ | 
  
1092 1189          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1093 1190  
1094 1191          VOP_RWUNLOCK(vp, rwflag, NULL);
1095 1192  
1096 1193          if (error == EINTR && count != 0)
1097 1194                  error = 0;
1098 1195  out:
1099 1196          if (in_crit)
1100 1197                  nbl_end_crit(vp);
1101 1198          releasef(fdes);
     1199 +        if (aiovlen != 0)
     1200 +                kmem_free(aiov, aiovlen);
1102 1201          if (error)
1103 1202                  return (set_errno(error));
1104 1203          return (count);
1105 1204  }
1106 1205  
1107 1206  ssize_t
1108 1207  pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1109 1208      off_t extended_offset)
1110 1209  {
1111 1210          struct uio auio;
1112      -        struct iovec aiov[DEF_IOV_MAX];
     1211 +        struct iovec buf[IOV_MAX_STACK], *aiov = buf;
     1212 +        int aiovlen = 0;
1113 1213          file_t *fp;
1114 1214          register vnode_t *vp;
1115 1215          struct cpu *cp;
1116 1216          int fflag, ioflag, rwflag;
1117 1217          ssize_t count, bcount;
1118 1218          int error = 0;
1119 1219          int i;
1120 1220  
1121 1221  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1122 1222          u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1123 1223              (u_offset_t)offset;
1124 1224  #else /* _SYSCALL32_IMPL || _ILP32 */
1125 1225          u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1126 1226  #endif /* _SYSCALL32_IMPR || _ILP32 */
  
    | 
      ↓ open down ↓ | 
    4 lines elided | 
    
      ↑ open up ↑ | 
  
1127 1227  #ifdef _SYSCALL32_IMPL
1128 1228          const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1129 1229              extended_offset == 0?
1130 1230              MAXOFF32_T : MAXOFFSET_T;
1131 1231  #else /* _SYSCALL32_IMPL */
1132 1232          const u_offset_t maxoff = MAXOFF32_T;
1133 1233  #endif /* _SYSCALL32_IMPL */
1134 1234  
1135 1235          int in_crit = 0;
1136 1236  
1137      -        if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
     1237 +        if (iovcnt <= 0 || iovcnt > IOV_MAX)
1138 1238                  return (set_errno(EINVAL));
1139 1239  
     1240 +        if (iovcnt > IOV_MAX_STACK) {
     1241 +                aiovlen = iovcnt * sizeof (iovec_t);
     1242 +                aiov = kmem_alloc(aiovlen, KM_SLEEP);
     1243 +        }
     1244 +
1140 1245  #ifdef _SYSCALL32_IMPL
1141 1246          /*
1142 1247           * 32-bit callers need to have their iovec expanded,
1143 1248           * while ensuring that they can't move more than 2Gbytes
1144 1249           * of data in a single call.
1145 1250           */
1146 1251          if (get_udatamodel() == DATAMODEL_ILP32) {
1147      -                struct iovec32 aiov32[DEF_IOV_MAX];
     1252 +                struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
     1253 +                int aiov32len;
1148 1254                  ssize32_t count32;
1149 1255  
1150      -                if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
     1256 +                aiov32len = iovcnt * sizeof (iovec32_t);
     1257 +                if (aiovlen != 0)
     1258 +                        aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
     1259 +
     1260 +                if (copyin(iovp, aiov32, aiov32len)) {
     1261 +                        if (aiovlen != 0) {
     1262 +                                kmem_free(aiov32, aiov32len);
     1263 +                                kmem_free(aiov, aiovlen);
     1264 +                        }
1151 1265                          return (set_errno(EFAULT));
     1266 +                }
1152 1267  
1153 1268                  count32 = 0;
1154 1269                  for (i = 0; i < iovcnt; i++) {
1155 1270                          ssize32_t iovlen32 = aiov32[i].iov_len;
1156 1271                          count32 += iovlen32;
1157      -                        if (iovlen32 < 0 || count32 < 0)
     1272 +                        if (iovlen32 < 0 || count32 < 0) {
     1273 +                                if (aiovlen != 0) {
     1274 +                                        kmem_free(aiov32, aiov32len);
     1275 +                                        kmem_free(aiov, aiovlen);
     1276 +                                }
1158 1277                                  return (set_errno(EINVAL));
     1278 +                        }
1159 1279                          aiov[i].iov_len = iovlen32;
1160 1280                          aiov[i].iov_base =
1161 1281                              (caddr_t)(uintptr_t)aiov32[i].iov_base;
1162 1282                  }
     1283 +                if (aiovlen != 0)
     1284 +                        kmem_free(aiov32, aiov32len);
1163 1285          } else
1164 1286  #endif /* _SYSCALL32_IMPL */
1165      -                if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
     1287 +                if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
     1288 +                        if (aiovlen != 0)
     1289 +                                kmem_free(aiov, aiovlen);
1166 1290                          return (set_errno(EFAULT));
     1291 +                }
1167 1292  
1168 1293          count = 0;
1169 1294          for (i = 0; i < iovcnt; i++) {
1170 1295                  ssize_t iovlen = aiov[i].iov_len;
1171 1296                  count += iovlen;
1172      -                if (iovlen < 0 || count < 0)
     1297 +                if (iovlen < 0 || count < 0) {
     1298 +                        if (aiovlen != 0)
     1299 +                                kmem_free(aiov, aiovlen);
1173 1300                          return (set_errno(EINVAL));
     1301 +                }
1174 1302          }
1175 1303  
1176      -        if ((bcount = (ssize_t)count) < 0)
     1304 +        if ((bcount = (ssize_t)count) < 0) {
     1305 +                if (aiovlen != 0)
     1306 +                        kmem_free(aiov, aiovlen);
1177 1307                  return (set_errno(EINVAL));
1178      -        if ((fp = getf(fdes)) == NULL)
     1308 +        }
     1309 +        if ((fp = getf(fdes)) == NULL) {
     1310 +                if (aiovlen != 0)
     1311 +                        kmem_free(aiov, aiovlen);
1179 1312                  return (set_errno(EBADF));
     1313 +        }
1180 1314          if (((fflag = fp->f_flag) & FWRITE) == 0) {
1181 1315                  error = EBADF;
1182 1316                  goto out;
1183 1317          }
1184 1318          vp = fp->f_vnode;
1185 1319          rwflag = 1;
1186 1320          if (vp->v_type == VREG) {
1187 1321  
1188 1322                  if (bcount == 0)
1189 1323                          goto out;
1190 1324  
1191 1325                  /*
1192 1326                   * return EINVAL for offsets that cannot be
1193 1327                   * represented in an off_t.
1194 1328                   */
1195 1329                  if (fileoff > maxoff) {
1196 1330                          error = EINVAL;
1197 1331                          goto out;
1198 1332                  }
1199 1333                  /*
1200 1334                   * Take appropriate action if we are trying
1201 1335                   * to write above the resource limit.
1202 1336                   */
1203 1337                  if (fileoff >= curproc->p_fsz_ctl) {
1204 1338                          mutex_enter(&curproc->p_lock);
1205 1339                          /*
1206 1340                           * Return value ignored because it lists
1207 1341                           * actions taken, but we are in an error case.
1208 1342                           * We don't have any actions that depend on
1209 1343                           * what could happen in this call, so we ignore
1210 1344                           * the return value.
1211 1345                           */
1212 1346                          (void) rctl_action(
1213 1347                              rctlproc_legacy[RLIMIT_FSIZE],
1214 1348                              curproc->p_rctls, curproc,
1215 1349                              RCA_UNSAFE_SIGINFO);
1216 1350                          mutex_exit(&curproc->p_lock);
1217 1351  
1218 1352                          error = EFBIG;
1219 1353                          goto out;
1220 1354                  }
1221 1355                  /*
1222 1356                   * Don't allow pwritev to cause file sizes to exceed
1223 1357                   * maxoff.
1224 1358                   */
1225 1359                  if (fileoff == maxoff) {
1226 1360                          error = EFBIG;
1227 1361                          goto out;
1228 1362                  }
1229 1363  
1230 1364                  if (fileoff + bcount > maxoff)
1231 1365                          bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
1232 1366          } else if (vp->v_type == VFIFO) {
1233 1367                  error = ESPIPE;
1234 1368                  goto out;
1235 1369          }
1236 1370          /*
1237 1371           * We have to enter the critical region before calling VOP_RWLOCK
1238 1372           * to avoid a deadlock with ufs.
1239 1373           */
1240 1374          if (nbl_need_check(vp)) {
1241 1375                  int svmand;
1242 1376  
1243 1377                  nbl_start_crit(vp, RW_READER);
1244 1378                  in_crit = 1;
1245 1379                  error = nbl_svmand(vp, fp->f_cred, &svmand);
1246 1380                  if (error != 0)
1247 1381                          goto out;
1248 1382                  if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1249 1383                      NULL)) {
1250 1384                          error = EACCES;
1251 1385                          goto out;
1252 1386                  }
1253 1387          }
1254 1388  
1255 1389          (void) VOP_RWLOCK(vp, rwflag, NULL);
1256 1390  
1257 1391  
1258 1392          /*
1259 1393           * Behaviour is same as write(2). Please see comments for
1260 1394           * write(2).
1261 1395           */
1262 1396  
1263 1397          if (vp->v_type == VREG) {
1264 1398                  if (fileoff >= curproc->p_fsz_ctl) {
1265 1399                          VOP_RWUNLOCK(vp, rwflag, NULL);
1266 1400                          mutex_enter(&curproc->p_lock);
1267 1401                          /* see above rctl_action comment */
1268 1402                          (void) rctl_action(
1269 1403                              rctlproc_legacy[RLIMIT_FSIZE],
1270 1404                              curproc->p_rctls,
1271 1405                              curproc, RCA_UNSAFE_SIGINFO);
1272 1406                          mutex_exit(&curproc->p_lock);
1273 1407                          error = EFBIG;
1274 1408                          goto out;
1275 1409                  }
1276 1410                  if (fileoff >= OFFSET_MAX(fp)) {
1277 1411                          VOP_RWUNLOCK(vp, rwflag, NULL);
1278 1412                          error = EFBIG;
1279 1413                          goto out;
1280 1414                  }
1281 1415                  if (fileoff + count > OFFSET_MAX(fp))
1282 1416                          count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1283 1417          }
1284 1418  
1285 1419          auio.uio_loffset = fileoff;
1286 1420          auio.uio_iov = aiov;
1287 1421          auio.uio_iovcnt = iovcnt;
1288 1422          auio.uio_resid = bcount = count;
1289 1423          auio.uio_segflg = UIO_USERSPACE;
1290 1424          auio.uio_llimit = curproc->p_fsz_ctl;
1291 1425          auio.uio_fmode = fflag;
1292 1426          auio.uio_extflg = UIO_COPY_CACHED;
1293 1427          ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1294 1428          error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1295 1429          count -= auio.uio_resid;
1296 1430          CPU_STATS_ENTER_K();
1297 1431          cp = CPU;
1298 1432          CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1299 1433          CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1300 1434          CPU_STATS_EXIT_K();
  
    | 
      ↓ open down ↓ | 
    111 lines elided | 
    
      ↑ open up ↑ | 
  
1301 1435          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1302 1436  
1303 1437          VOP_RWUNLOCK(vp, rwflag, NULL);
1304 1438  
1305 1439          if (error == EINTR && count != 0)
1306 1440                  error = 0;
1307 1441  out:
1308 1442          if (in_crit)
1309 1443                  nbl_end_crit(vp);
1310 1444          releasef(fdes);
     1445 +        if (aiovlen != 0)
     1446 +                kmem_free(aiov, aiovlen);
1311 1447          if (error)
1312 1448                  return (set_errno(error));
1313 1449          return (count);
1314 1450  }
1315 1451  
1316 1452  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1317 1453  
1318 1454  /*
1319 1455   * This syscall supplies 64-bit file offsets to 32-bit applications only.
1320 1456   */
1321 1457  ssize32_t
1322 1458  pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1323 1459      uint32_t offset_2)
1324 1460  {
1325 1461          struct uio auio;
1326 1462          struct iovec aiov;
1327 1463          file_t *fp;
1328 1464          register vnode_t *vp;
1329 1465          struct cpu *cp;
1330 1466          int fflag, ioflag, rwflag;
1331 1467          ssize_t bcount;
1332 1468          int error = 0;
1333 1469          u_offset_t fileoff;
1334 1470          int in_crit = 0;
1335 1471  
1336 1472  #if defined(_LITTLE_ENDIAN)
1337 1473          fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
1338 1474  #else
1339 1475          fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
1340 1476  #endif
1341 1477  
1342 1478          if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1343 1479                  return (set_errno(EINVAL));
1344 1480  
1345 1481          if ((fp = getf(fdes)) == NULL)
1346 1482                  return (set_errno(EBADF));
1347 1483          if (((fflag = fp->f_flag) & (FREAD)) == 0) {
1348 1484                  error = EBADF;
1349 1485                  goto out;
1350 1486          }
1351 1487  
1352 1488          rwflag = 0;
1353 1489          vp = fp->f_vnode;
1354 1490  
1355 1491          if (vp->v_type == VREG) {
1356 1492  
1357 1493                  if (bcount == 0)
1358 1494                          goto out;
1359 1495  
1360 1496                  /*
1361 1497                   * Same as pread. See comments in pread.
1362 1498                   */
1363 1499  
1364 1500                  if (fileoff > MAXOFFSET_T) {
1365 1501                          error = EINVAL;
1366 1502                          goto out;
1367 1503                  }
1368 1504                  if (fileoff + bcount > MAXOFFSET_T)
1369 1505                          bcount = (ssize_t)(MAXOFFSET_T - fileoff);
1370 1506          } else if (vp->v_type == VFIFO) {
1371 1507                  error = ESPIPE;
1372 1508                  goto out;
1373 1509          }
1374 1510  
1375 1511          /*
1376 1512           * We have to enter the critical region before calling VOP_RWLOCK
1377 1513           * to avoid a deadlock with ufs.
1378 1514           */
1379 1515          if (nbl_need_check(vp)) {
1380 1516                  int svmand;
1381 1517  
1382 1518                  nbl_start_crit(vp, RW_READER);
1383 1519                  in_crit = 1;
1384 1520                  error = nbl_svmand(vp, fp->f_cred, &svmand);
1385 1521                  if (error != 0)
1386 1522                          goto out;
1387 1523                  if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
1388 1524                      NULL)) {
1389 1525                          error = EACCES;
1390 1526                          goto out;
1391 1527                  }
1392 1528          }
1393 1529  
1394 1530          aiov.iov_base = cbuf;
1395 1531          aiov.iov_len = bcount;
1396 1532          (void) VOP_RWLOCK(vp, rwflag, NULL);
1397 1533          auio.uio_loffset = fileoff;
1398 1534  
1399 1535          /*
1400 1536           * Note: File size can never be greater than MAXOFFSET_T.
1401 1537           * If ever we start supporting 128 bit files the code
1402 1538           * similar to the one in pread at this place should be here.
1403 1539           * Here we avoid the unnecessary VOP_GETATTR() when we
1404 1540           * know that fileoff == MAXOFFSET_T implies that it is always
1405 1541           * greater than or equal to file size.
1406 1542           */
1407 1543          auio.uio_iov = &aiov;
1408 1544          auio.uio_iovcnt = 1;
1409 1545          auio.uio_resid = bcount;
1410 1546          auio.uio_segflg = UIO_USERSPACE;
1411 1547          auio.uio_llimit = MAXOFFSET_T;
1412 1548          auio.uio_fmode = fflag;
1413 1549          auio.uio_extflg = UIO_COPY_CACHED;
1414 1550  
1415 1551          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1416 1552  
1417 1553          /* If read sync is not asked for, filter sync flags */
1418 1554          if ((ioflag & FRSYNC) == 0)
1419 1555                  ioflag &= ~(FSYNC|FDSYNC);
1420 1556          error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1421 1557          bcount -= auio.uio_resid;
1422 1558          CPU_STATS_ENTER_K();
1423 1559          cp = CPU;
1424 1560          CPU_STATS_ADDQ(cp, sys, sysread, 1);
1425 1561          CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
1426 1562          CPU_STATS_EXIT_K();
1427 1563          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1428 1564          VOP_RWUNLOCK(vp, rwflag, NULL);
1429 1565  
1430 1566          if (error == EINTR && bcount != 0)
1431 1567                  error = 0;
1432 1568  out:
1433 1569          if (in_crit)
1434 1570                  nbl_end_crit(vp);
1435 1571          releasef(fdes);
1436 1572          if (error)
1437 1573                  return (set_errno(error));
1438 1574          return (bcount);
1439 1575  }
1440 1576  
1441 1577  /*
1442 1578   * This syscall supplies 64-bit file offsets to 32-bit applications only.
1443 1579   */
1444 1580  ssize32_t
1445 1581  pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1446 1582      uint32_t offset_2)
1447 1583  {
1448 1584          struct uio auio;
1449 1585          struct iovec aiov;
1450 1586          file_t *fp;
1451 1587          register vnode_t *vp;
1452 1588          struct cpu *cp;
1453 1589          int fflag, ioflag, rwflag;
1454 1590          ssize_t bcount;
1455 1591          int error = 0;
1456 1592          u_offset_t fileoff;
1457 1593          int in_crit = 0;
1458 1594  
1459 1595  #if defined(_LITTLE_ENDIAN)
1460 1596          fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
1461 1597  #else
1462 1598          fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
1463 1599  #endif
1464 1600  
1465 1601          if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1466 1602                  return (set_errno(EINVAL));
1467 1603          if ((fp = getf(fdes)) == NULL)
1468 1604                  return (set_errno(EBADF));
1469 1605          if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
1470 1606                  error = EBADF;
1471 1607                  goto out;
1472 1608          }
1473 1609  
1474 1610          rwflag = 1;
1475 1611          vp = fp->f_vnode;
1476 1612  
1477 1613          if (vp->v_type == VREG) {
1478 1614  
1479 1615                  if (bcount == 0)
1480 1616                          goto out;
1481 1617  
1482 1618                  /*
1483 1619                   * See comments in pwrite.
1484 1620                   */
1485 1621                  if (fileoff > MAXOFFSET_T) {
1486 1622                          error = EINVAL;
1487 1623                          goto out;
1488 1624                  }
1489 1625                  if (fileoff >= curproc->p_fsz_ctl) {
1490 1626                          mutex_enter(&curproc->p_lock);
1491 1627                          (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
1492 1628                              curproc->p_rctls, curproc, RCA_SAFE);
1493 1629                          mutex_exit(&curproc->p_lock);
1494 1630                          error = EFBIG;
1495 1631                          goto out;
1496 1632                  }
1497 1633                  if (fileoff == MAXOFFSET_T) {
1498 1634                          error = EFBIG;
1499 1635                          goto out;
1500 1636                  }
1501 1637                  if (fileoff + bcount > MAXOFFSET_T)
1502 1638                          bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff);
1503 1639          } else if (vp->v_type == VFIFO) {
1504 1640                  error = ESPIPE;
1505 1641                  goto out;
1506 1642          }
1507 1643  
1508 1644          /*
1509 1645           * We have to enter the critical region before calling VOP_RWLOCK
1510 1646           * to avoid a deadlock with ufs.
1511 1647           */
1512 1648          if (nbl_need_check(vp)) {
1513 1649                  int svmand;
1514 1650  
1515 1651                  nbl_start_crit(vp, RW_READER);
1516 1652                  in_crit = 1;
1517 1653                  error = nbl_svmand(vp, fp->f_cred, &svmand);
1518 1654                  if (error != 0)
1519 1655                          goto out;
1520 1656                  if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
1521 1657                      NULL)) {
1522 1658                          error = EACCES;
1523 1659                          goto out;
1524 1660                  }
1525 1661          }
1526 1662  
1527 1663          aiov.iov_base = cbuf;
1528 1664          aiov.iov_len = bcount;
1529 1665          (void) VOP_RWLOCK(vp, rwflag, NULL);
1530 1666          auio.uio_loffset = fileoff;
1531 1667          auio.uio_iov = &aiov;
1532 1668          auio.uio_iovcnt = 1;
1533 1669          auio.uio_resid = bcount;
1534 1670          auio.uio_segflg = UIO_USERSPACE;
1535 1671          auio.uio_llimit = curproc->p_fsz_ctl;
1536 1672          auio.uio_fmode = fflag;
1537 1673          auio.uio_extflg = UIO_COPY_CACHED;
1538 1674  
1539 1675          /*
1540 1676           * The SUSv4 POSIX specification states:
1541 1677           *      The pwrite() function shall be equivalent to write(), except
1542 1678           *      that it writes into a given position and does not change
1543 1679           *      the file offset (regardless of whether O_APPEND is set).
1544 1680           * To make this be true, we omit the FAPPEND flag from ioflag.
1545 1681           */
1546 1682          ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1547 1683  
1548 1684          error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1549 1685          bcount -= auio.uio_resid;
1550 1686          CPU_STATS_ENTER_K();
1551 1687          cp = CPU;
1552 1688          CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1553 1689          CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
1554 1690          CPU_STATS_EXIT_K();
1555 1691          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1556 1692          VOP_RWUNLOCK(vp, rwflag, NULL);
1557 1693  
1558 1694          if (error == EINTR && bcount != 0)
1559 1695                  error = 0;
1560 1696  out:
1561 1697          if (in_crit)
1562 1698                  nbl_end_crit(vp);
1563 1699          releasef(fdes);
1564 1700          if (error)
1565 1701                  return (set_errno(error));
1566 1702          return (bcount);
1567 1703  }
1568 1704  
1569 1705  #endif  /* _SYSCALL32_IMPL || _ILP32 */
1570 1706  
1571 1707  #ifdef _SYSCALL32_IMPL
1572 1708  /*
1573 1709   * Tail-call elimination of xxx32() down to xxx()
1574 1710   *
1575 1711   * A number of xxx32 system calls take a len (or count) argument and
1576 1712   * return a number in the range [0,len] or -1 on error.
1577 1713   * Given an ssize32_t input len, the downcall xxx() will return
1578 1714   * a 64-bit value that is -1 or in the range [0,len] which actually
1579 1715   * is a proper return value for the xxx32 call. So even if the xxx32
1580 1716   * calls can be considered as returning a ssize32_t, they are currently
1581 1717   * declared as returning a ssize_t as this enables tail-call elimination.
1582 1718   *
1583 1719   * The cast of len (or count) to ssize32_t is needed to ensure we pass
1584 1720   * down negative input values as such and let the downcall handle error
1585 1721   * reporting. Functions covered by this comments are:
1586 1722   *
1587 1723   * rw.c:           read32, write32, pread32, pwrite32, readv32, writev32.
1588 1724   * socksyscall.c:  recv32, recvfrom32, send32, sendto32.
1589 1725   * readlink.c:     readlink32.
1590 1726   */
1591 1727  
1592 1728  ssize_t
1593 1729  read32(int32_t fdes, caddr32_t cbuf, size32_t count)
1594 1730  {
1595 1731          return (read(fdes,
1596 1732              (void *)(uintptr_t)cbuf, (ssize32_t)count));
1597 1733  }
1598 1734  
1599 1735  ssize_t
1600 1736  write32(int32_t fdes, caddr32_t cbuf, size32_t count)
1601 1737  {
1602 1738          return (write(fdes,
1603 1739              (void *)(uintptr_t)cbuf, (ssize32_t)count));
1604 1740  }
1605 1741  
1606 1742  ssize_t
1607 1743  pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
1608 1744  {
1609 1745          return (pread(fdes,
1610 1746              (void *)(uintptr_t)cbuf, (ssize32_t)count,
1611 1747              (off_t)(uint32_t)offset));
1612 1748  }
1613 1749  
1614 1750  ssize_t
1615 1751  pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
1616 1752  {
1617 1753          return (pwrite(fdes,
1618 1754              (void *)(uintptr_t)cbuf, (ssize32_t)count,
1619 1755              (off_t)(uint32_t)offset));
1620 1756  }
1621 1757  
1622 1758  ssize_t
1623 1759  readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1624 1760  {
1625 1761          return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt));
1626 1762  }
1627 1763  
1628 1764  ssize_t
1629 1765  writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1630 1766  {
1631 1767          return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt));
1632 1768  }
1633 1769  #endif  /* _SYSCALL32_IMPL */
  
    | 
      ↓ open down ↓ | 
    313 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX