Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/syscall/sendfile.c
          +++ new/usr/src/uts/common/syscall/sendfile.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  #include <sys/types.h>
  27   27  #include <sys/t_lock.h>
  28   28  #include <sys/param.h>
  29   29  #include <sys/systm.h>
  30   30  #include <sys/buf.h>
  31   31  #include <sys/conf.h>
  32   32  #include <sys/cred.h>
  33   33  #include <sys/kmem.h>
  34   34  #include <sys/sysmacros.h>
  35   35  #include <sys/vfs.h>
  36   36  #include <sys/vnode.h>
  37   37  #include <sys/debug.h>
  38   38  #include <sys/errno.h>
  39   39  #include <sys/time.h>
  40   40  #include <sys/file.h>
  41   41  #include <sys/open.h>
  42   42  #include <sys/user.h>
  43   43  #include <sys/termios.h>
  44   44  #include <sys/stream.h>
  45   45  #include <sys/strsubr.h>
  46   46  #include <sys/sunddi.h>
  47   47  #include <sys/esunddi.h>
  48   48  #include <sys/flock.h>
  49   49  #include <sys/modctl.h>
  50   50  #include <sys/cmn_err.h>
  51   51  #include <sys/vmsystm.h>
  52   52  
  53   53  #include <sys/socket.h>
  54   54  #include <sys/socketvar.h>
  55   55  #include <fs/sockfs/sockcommon.h>
  56   56  #include <fs/sockfs/socktpi.h>
  57   57  
  58   58  #include <netinet/in.h>
  59   59  #include <sys/sendfile.h>
  60   60  #include <sys/un.h>
  61   61  #include <sys/tihdr.h>
  62   62  #include <sys/atomic.h>
  63   63  
  64   64  #include <inet/common.h>
  65   65  #include <inet/ip.h>
  66   66  #include <inet/ip6.h>
  67   67  #include <inet/tcp.h>
  68   68  
  69   69  extern int sosendfile64(file_t *, file_t *, const struct ksendfilevec64 *,
  70   70                  ssize32_t *);
  71   71  extern int nl7c_sendfilev(struct sonode *, u_offset_t *, struct sendfilevec *,
  72   72                  int, ssize_t *);
  73   73  extern int snf_segmap(file_t *, vnode_t *, u_offset_t, u_offset_t, ssize_t *,
  74   74                  boolean_t);
  75   75  extern sotpi_info_t *sotpi_sototpi(struct sonode *);
  76   76  
  77   77  #define SEND_MAX_CHUNK  16
  78   78  
  79   79  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
  80   80  /*
  81   81   * 64 bit offsets for 32 bit applications only running either on
  82   82   * 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
  83   83   * more than 2GB of data.
  84   84   */
  85   85  static int
  86   86  sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
  87   87      int copy_cnt, ssize32_t *count)
  88   88  {
  89   89          struct vnode *vp;
  90   90          ushort_t fflag;
  91   91          int ioflag;
  92   92          size32_t cnt;
  93   93          ssize32_t sfv_len;
  94   94          ssize32_t tmpcount;
  95   95          u_offset_t sfv_off;
  96   96          struct uio auio;
  97   97          struct iovec aiov;
  98   98          int i, error;
  99   99  
 100  100          fflag = fp->f_flag;
 101  101          vp = fp->f_vnode;
 102  102          for (i = 0; i < copy_cnt; i++) {
 103  103  
 104  104                  if (ISSIG(curthread, JUSTLOOKING))
 105  105                          return (EINTR);
 106  106  
 107  107                  /*
 108  108                   * Do similar checks as "write" as we are writing
 109  109                   * sfv_len bytes into "vp".
 110  110                   */
 111  111                  sfv_len = (ssize32_t)sfv->sfv_len;
 112  112  
 113  113                  if (sfv_len == 0) {
 114  114                          sfv++;
 115  115                          continue;
 116  116                  }
 117  117  
 118  118                  if (sfv_len < 0)
 119  119                          return (EINVAL);
 120  120  
 121  121                  if (vp->v_type == VREG) {
 122  122                          if (*fileoff >= curproc->p_fsz_ctl) {
 123  123                                  mutex_enter(&curproc->p_lock);
 124  124                                  (void) rctl_action(
 125  125                                      rctlproc_legacy[RLIMIT_FSIZE],
 126  126                                      curproc->p_rctls, curproc, RCA_SAFE);
 127  127                                  mutex_exit(&curproc->p_lock);
 128  128                                  return (EFBIG);
 129  129                          }
 130  130  
 131  131                          if (*fileoff >= OFFSET_MAX(fp))
 132  132                                  return (EFBIG);
 133  133  
 134  134                          if (*fileoff + sfv_len > OFFSET_MAX(fp))
 135  135                                  return (EINVAL);
 136  136                  }
 137  137  
 138  138                  tmpcount = *count + sfv_len;
 139  139                  if (tmpcount < 0)
 140  140                          return (EINVAL);
 141  141  
 142  142                  sfv_off = sfv->sfv_off;
 143  143  
 144  144                  auio.uio_extflg = UIO_COPY_DEFAULT;
 145  145                  if (sfv->sfv_fd == SFV_FD_SELF) {
 146  146                          aiov.iov_len = sfv_len;
 147  147                          aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
 148  148                          auio.uio_loffset = *fileoff;
 149  149                          auio.uio_iovcnt = 1;
 150  150                          auio.uio_resid = sfv_len;
 151  151                          auio.uio_iov = &aiov;
 152  152                          auio.uio_segflg = UIO_USERSPACE;
 153  153                          auio.uio_llimit = curproc->p_fsz_ctl;
 154  154                          auio.uio_fmode = fflag;
 155  155                          ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 156  156                          while (sfv_len > 0) {
 157  157                                  error = VOP_WRITE(vp, &auio, ioflag,
 158  158                                      fp->f_cred, NULL);
 159  159                                  cnt = sfv_len - auio.uio_resid;
 160  160                                  sfv_len -= cnt;
 161  161                                  ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
 162  162                                  if (vp->v_type == VREG)
 163  163                                          *fileoff += cnt;
 164  164                                  *count += cnt;
 165  165                                  if (error != 0)
 166  166                                          return (error);
 167  167                          }
 168  168                  } else {
 169  169                          file_t  *ffp;
 170  170                          vnode_t *readvp;
 171  171                          size_t  size;
 172  172                          caddr_t ptr;
 173  173  
 174  174                          if ((ffp = getf(sfv->sfv_fd)) == NULL)
 175  175                                  return (EBADF);
 176  176  
 177  177                          if ((ffp->f_flag & FREAD) == 0) {
 178  178                                  releasef(sfv->sfv_fd);
 179  179                                  return (EBADF);
 180  180                          }
 181  181  
 182  182                          readvp = ffp->f_vnode;
 183  183                          if (readvp->v_type != VREG) {
 184  184                                  releasef(sfv->sfv_fd);
 185  185                                  return (EINVAL);
 186  186                          }
 187  187  
 188  188                          /*
 189  189                           * No point reading and writing to same vp,
 190  190                           * as long as both are regular files. readvp is not
 191  191                           * locked; but since we got it from an open file the
 192  192                           * contents will be valid during the time of access.
 193  193                           */
 194  194                          if (vn_compare(vp, readvp)) {
 195  195                                  releasef(sfv->sfv_fd);
 196  196                                  return (EINVAL);
 197  197                          }
 198  198  
 199  199                          /*
 200  200                           * Optimize the regular file over
 201  201                           * the socket case.
 202  202                           */
 203  203                          if (vp->v_type == VSOCK) {
 204  204                                  error = sosendfile64(fp, ffp, sfv,
 205  205                                      (ssize32_t *)&cnt);
 206  206                                  *count += cnt;
 207  207                                  if (error)
 208  208                                          return (error);
 209  209                                  sfv++;
 210  210                                  continue;
 211  211                          }
 212  212  
 213  213                          /*
 214  214                           * Note: we assume readvp != vp. "vp" is already
 215  215                           * locked, and "readvp" must not be.
 216  216                           */
 217  217                          if (readvp < vp) {
 218  218                                  VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 219  219                                  (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 220  220                                      NULL);
 221  221                                  (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 222  222                          } else {
 223  223                                  (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 224  224                                      NULL);
 225  225                          }
 226  226  
 227  227                          /*
 228  228                           * Same checks as in pread64.
 229  229                           */
 230  230                          if (sfv_off > MAXOFFSET_T) {
 231  231                                  VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 232  232                                  releasef(sfv->sfv_fd);
 233  233                                  return (EINVAL);
 234  234                          }
 235  235  
 236  236                          if (sfv_off + sfv_len > MAXOFFSET_T)
 237  237                                  sfv_len = (ssize32_t)(MAXOFFSET_T - sfv_off);
 238  238  
 239  239                          /* Find the native blocksize to transfer data */
 240  240                          size = MIN(vp->v_vfsp->vfs_bsize,
 241  241                              readvp->v_vfsp->vfs_bsize);
 242  242                          size = sfv_len < size ? sfv_len : size;
 243  243                          ptr = kmem_alloc(size, KM_NOSLEEP);
 244  244                          if (ptr == NULL) {
 245  245                                  VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 246  246                                  releasef(sfv->sfv_fd);
 247  247                                  return (ENOMEM);
 248  248                          }
 249  249  
 250  250                          while (sfv_len > 0) {
 251  251                                  size_t  iov_len;
 252  252  
 253  253                                  iov_len = MIN(size, sfv_len);
 254  254                                  aiov.iov_base = ptr;
 255  255                                  aiov.iov_len = iov_len;
 256  256                                  auio.uio_loffset = sfv_off;
 257  257                                  auio.uio_iov = &aiov;
 258  258                                  auio.uio_iovcnt = 1;
 259  259                                  auio.uio_resid = iov_len;
 260  260                                  auio.uio_segflg = UIO_SYSSPACE;
 261  261                                  auio.uio_llimit = MAXOFFSET_T;
 262  262                                  auio.uio_fmode = ffp->f_flag;
 263  263                                  ioflag = auio.uio_fmode &
 264  264                                      (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 265  265  
 266  266                                  /*
 267  267                                   * If read sync is not asked for,
 268  268                                   * filter sync flags
 269  269                                   */
 270  270                                  if ((ioflag & FRSYNC) == 0)
 271  271                                          ioflag &= ~(FSYNC|FDSYNC);
 272  272                                  error = VOP_READ(readvp, &auio, ioflag,
 273  273                                      fp->f_cred, NULL);
 274  274                                  if (error) {
 275  275                                          kmem_free(ptr, size);
 276  276                                          VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 277  277                                              NULL);
 278  278                                          releasef(sfv->sfv_fd);
 279  279                                          return (error);
 280  280                                  }
 281  281  
 282  282                                  /*
 283  283                                   * Check how must data was really read.
 284  284                                   * Decrement the 'len' and increment the
 285  285                                   * 'off' appropriately.
 286  286                                   */
 287  287                                  cnt = iov_len - auio.uio_resid;
 288  288                                  if (cnt == 0) {
 289  289                                          /*
 290  290                                           * If we were reading a pipe (currently
 291  291                                           * not implemented), we may now lose
 292  292                                           * data.
 293  293                                           */
 294  294                                          kmem_free(ptr, size);
 295  295                                          VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 296  296                                              NULL);
 297  297                                          releasef(sfv->sfv_fd);
 298  298                                          return (EINVAL);
 299  299                                  }
 300  300                                  sfv_len -= cnt;
 301  301                                  sfv_off += cnt;
 302  302  
 303  303                                  aiov.iov_base = ptr;
 304  304                                  aiov.iov_len = cnt;
 305  305                                  auio.uio_loffset = *fileoff;
 306  306                                  auio.uio_iov = &aiov;
 307  307                                  auio.uio_iovcnt = 1;
 308  308                                  auio.uio_resid = cnt;
 309  309                                  auio.uio_segflg = UIO_SYSSPACE;
 310  310                                  auio.uio_llimit = curproc->p_fsz_ctl;
 311  311                                  auio.uio_fmode = fflag;
 312  312                                  ioflag = auio.uio_fmode &
 313  313                                      (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 314  314                                  error = VOP_WRITE(vp, &auio, ioflag,
 315  315                                      fp->f_cred, NULL);
 316  316  
 317  317                                  /*
 318  318                                   * Check how much data was written. Increment
 319  319                                   * the 'len' and decrement the 'off' if all
 320  320                                   * the data was not written.
 321  321                                   */
 322  322                                  cnt -= auio.uio_resid;
 323  323                                  sfv_len += auio.uio_resid;
 324  324                                  sfv_off -= auio.uio_resid;
 325  325                                  ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
 326  326                                  if (vp->v_type == VREG)
 327  327                                          *fileoff += cnt;
 328  328                                  *count += cnt;
 329  329                                  if (error != 0) {
 330  330                                          kmem_free(ptr, size);
 331  331                                          VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 332  332                                              NULL);
 333  333                                          releasef(sfv->sfv_fd);
 334  334                                          return (error);
 335  335                                  }
 336  336                          }
 337  337                          VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 338  338                          releasef(sfv->sfv_fd);
 339  339                          kmem_free(ptr, size);
 340  340                  }
 341  341                  sfv++;
 342  342          }
 343  343          return (0);
 344  344  }
 345  345  
 346  346  static ssize32_t
 347  347  sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
 348  348          size32_t *xferred, int fildes)
 349  349  {
 350  350          u_offset_t              fileoff;
 351  351          int                     copy_cnt;
 352  352          const struct ksendfilevec64 *copy_vec;
 353  353          struct ksendfilevec64 sfv[SEND_MAX_CHUNK];
 354  354          struct vnode *vp;
 355  355          int error;
 356  356          ssize32_t count = 0;
 357  357  
 358  358          vp = fp->f_vnode;
 359  359          (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 360  360  
 361  361          copy_vec = vec;
 362  362          fileoff = fp->f_offset;
 363  363  
 364  364          do {
 365  365                  copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
 366  366                  if (copyin(copy_vec, sfv, copy_cnt *
 367  367                      sizeof (struct ksendfilevec64))) {
 368  368                          error = EFAULT;
 369  369                          break;
 370  370                  }
 371  371  
 372  372                  error = sendvec_chunk64(fp, &fileoff, sfv, copy_cnt, &count);
 373  373                  if (error != 0)
 374  374                          break;
 375  375  
 376  376                  copy_vec += copy_cnt;
 377  377                  sfvcnt -= copy_cnt;
 378  378          } while (sfvcnt > 0);
 379  379  
 380  380          if (vp->v_type == VREG)
 381  381                  fp->f_offset += count;
 382  382  
 383  383          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 384  384          if (copyout(&count, xferred, sizeof (count)))
 385  385                  error = EFAULT;
 386  386          releasef(fildes);
 387  387          if (error != 0)
 388  388                  return (set_errno(error));
 389  389          return (count);
 390  390  }
 391  391  #endif
 392  392  
 393  393  static int
 394  394  sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
 395  395      int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count)
 396  396  {
 397  397          struct vnode *vp;
 398  398          struct uio auio;
 399  399          struct iovec aiov;
 400  400          ushort_t fflag;
 401  401          int ioflag;
 402  402          int i, error;
 403  403          size_t cnt;
 404  404          ssize_t sfv_len;
 405  405          u_offset_t sfv_off;
 406  406  #ifdef _SYSCALL32_IMPL
 407  407          model_t model = get_udatamodel();
 408  408          u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
 409  409              MAXOFF32_T : MAXOFFSET_T;
 410  410  #else
 411  411          const u_offset_t maxoff = MAXOFF32_T;
 412  412  #endif
 413  413          mblk_t *dmp = NULL;
 414  414          int wroff;
 415  415          int buf_left = 0;
 416  416          size_t  iov_len;
 417  417          mblk_t  *head, *tmp;
 418  418          size_t  size = total_size;
 419  419          size_t  extra;
 420  420          int tail_len;
 421  421          struct nmsghdr msg;
 422  422  
 423  423          fflag = fp->f_flag;
 424  424          vp = fp->f_vnode;
 425  425  
 426  426          ASSERT(vp->v_type == VSOCK);
 427  427          ASSERT(maxblk > 0);
 428  428  
 429  429          /* If nothing to send, return */
 430  430          if (total_size == 0)
 431  431                  return (0);
 432  432  
 433  433          if (vp->v_stream != NULL) {
 434  434                  wroff = (int)vp->v_stream->sd_wroff;
 435  435                  tail_len = (int)vp->v_stream->sd_tail;
 436  436          } else {
 437  437                  struct sonode *so;
 438  438  
 439  439                  so = VTOSO(vp);
 440  440                  wroff = so->so_proto_props.sopp_wroff;
 441  441                  tail_len = so->so_proto_props.sopp_tail;
 442  442          }
 443  443  
 444  444          extra = wroff + tail_len;
 445  445  
 446  446          buf_left = MIN(total_size, maxblk);
 447  447          head = dmp = allocb(buf_left + extra, BPRI_HI);
 448  448          if (head == NULL)
 449  449                  return (ENOMEM);
 450  450          head->b_wptr = head->b_rptr = head->b_rptr + wroff;
 451  451          bzero(&msg, sizeof (msg));
 452  452  
 453  453          auio.uio_extflg = UIO_COPY_DEFAULT;
 454  454          for (i = 0; i < copy_cnt; i++) {
 455  455                  if (ISSIG(curthread, JUSTLOOKING)) {
 456  456                          freemsg(head);
 457  457                          return (EINTR);
 458  458                  }
 459  459  
 460  460                  /*
 461  461                   * Do similar checks as "write" as we are writing
 462  462                   * sfv_len bytes into "vp".
 463  463                   */
 464  464                  sfv_len = (ssize_t)sfv->sfv_len;
 465  465  
 466  466                  if (sfv_len == 0) {
 467  467                          sfv++;
 468  468                          continue;
 469  469                  }
 470  470  
 471  471                  /* Check for overflow */
 472  472  #ifdef _SYSCALL32_IMPL
 473  473                  if (model == DATAMODEL_ILP32) {
 474  474                          if (((ssize32_t)(*count + sfv_len)) < 0) {
 475  475                                  freemsg(head);
 476  476                                  return (EINVAL);
 477  477                          }
 478  478                  } else
 479  479  #endif
 480  480                  if ((*count + sfv_len) < 0) {
 481  481                          freemsg(head);
 482  482                          return (EINVAL);
 483  483                  }
 484  484  
 485  485                  sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
 486  486  
 487  487                  if (sfv->sfv_fd == SFV_FD_SELF) {
 488  488                          while (sfv_len > 0) {
 489  489                                  if (buf_left == 0) {
 490  490                                          tmp = dmp;
 491  491                                          buf_left = MIN(total_size, maxblk);
 492  492                                          iov_len = MIN(buf_left, sfv_len);
 493  493                                          dmp = allocb(buf_left + extra, BPRI_HI);
 494  494                                          if (dmp == NULL) {
 495  495                                                  freemsg(head);
 496  496                                                  return (ENOMEM);
 497  497                                          }
 498  498                                          dmp->b_wptr = dmp->b_rptr =
 499  499                                              dmp->b_rptr + wroff;
 500  500                                          tmp->b_cont = dmp;
 501  501                                  } else {
 502  502                                          iov_len = MIN(buf_left, sfv_len);
 503  503                                  }
 504  504  
 505  505                                  aiov.iov_len = iov_len;
 506  506                                  aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
 507  507                                  auio.uio_loffset = *fileoff;
 508  508                                  auio.uio_iovcnt = 1;
 509  509                                  auio.uio_resid = iov_len;
 510  510                                  auio.uio_iov = &aiov;
 511  511                                  auio.uio_segflg = UIO_USERSPACE;
 512  512                                  auio.uio_llimit = curproc->p_fsz_ctl;
 513  513                                  auio.uio_fmode = fflag;
 514  514  
 515  515                                  buf_left -= iov_len;
 516  516                                  total_size -= iov_len;
 517  517                                  sfv_len -= iov_len;
 518  518                                  sfv_off += iov_len;
 519  519  
 520  520                                  error = uiomove((caddr_t)dmp->b_wptr,
 521  521                                      iov_len, UIO_WRITE, &auio);
 522  522                                  if (error != 0) {
 523  523                                          freemsg(head);
 524  524                                          return (error);
 525  525                                  }
 526  526                                  dmp->b_wptr += iov_len;
 527  527                          }
 528  528                  } else {
 529  529                          file_t  *ffp;
 530  530                          vnode_t *readvp;
 531  531  
 532  532                          if ((ffp = getf(sfv->sfv_fd)) == NULL) {
 533  533                                  freemsg(head);
 534  534                                  return (EBADF);
 535  535                          }
 536  536  
 537  537                          if ((ffp->f_flag & FREAD) == 0) {
 538  538                                  releasef(sfv->sfv_fd);
 539  539                                  freemsg(head);
 540  540                                  return (EACCES);
 541  541                          }
 542  542  
 543  543                          readvp = ffp->f_vnode;
 544  544                          if (readvp->v_type != VREG) {
 545  545                                  releasef(sfv->sfv_fd);
 546  546                                  freemsg(head);
 547  547                                  return (EINVAL);
 548  548                          }
 549  549  
 550  550                          /*
 551  551                           * No point reading and writing to same vp,
 552  552                           * as long as both are regular files. readvp is not
 553  553                           * locked; but since we got it from an open file the
 554  554                           * contents will be valid during the time of access.
 555  555                           */
 556  556  
 557  557                          if (vn_compare(vp, readvp)) {
 558  558                                  releasef(sfv->sfv_fd);
 559  559                                  freemsg(head);
 560  560                                  return (EINVAL);
 561  561                          }
 562  562  
 563  563                          /*
 564  564                           * Note: we assume readvp != vp. "vp" is already
 565  565                           * locked, and "readvp" must not be.
 566  566                           */
 567  567  
 568  568                          if (readvp < vp) {
 569  569                                  VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 570  570                                  (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 571  571                                      NULL);
 572  572                                  (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 573  573                          } else {
 574  574                                  (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 575  575                                      NULL);
 576  576                          }
 577  577  
 578  578                          /* Same checks as in pread */
 579  579                          if (sfv_off > maxoff) {
 580  580                                  VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 581  581                                  releasef(sfv->sfv_fd);
 582  582                                  freemsg(head);
 583  583                                  return (EINVAL);
 584  584                          }
 585  585                          if (sfv_off + sfv_len > maxoff) {
 586  586                                  total_size -= (sfv_off + sfv_len - maxoff);
 587  587                                  sfv_len = (ssize_t)((offset_t)maxoff -
 588  588                                      sfv_off);
 589  589                          }
 590  590  
 591  591                          while (sfv_len > 0) {
 592  592                                  if (buf_left == 0) {
 593  593                                          tmp = dmp;
 594  594                                          buf_left = MIN(total_size, maxblk);
 595  595                                          iov_len = MIN(buf_left, sfv_len);
 596  596                                          dmp = allocb(buf_left + extra, BPRI_HI);
 597  597                                          if (dmp == NULL) {
 598  598                                                  VOP_RWUNLOCK(readvp,
 599  599                                                      V_WRITELOCK_FALSE, NULL);
 600  600                                                  releasef(sfv->sfv_fd);
 601  601                                                  freemsg(head);
 602  602                                                  return (ENOMEM);
 603  603                                          }
 604  604                                          dmp->b_wptr = dmp->b_rptr =
 605  605                                              dmp->b_rptr + wroff;
 606  606                                          tmp->b_cont = dmp;
 607  607                                  } else {
 608  608                                          iov_len = MIN(buf_left, sfv_len);
 609  609                                  }
 610  610                                  aiov.iov_base = (caddr_t)dmp->b_wptr;
 611  611                                  aiov.iov_len = iov_len;
 612  612                                  auio.uio_loffset = sfv_off;
 613  613                                  auio.uio_iov = &aiov;
 614  614                                  auio.uio_iovcnt = 1;
 615  615                                  auio.uio_resid = iov_len;
 616  616                                  auio.uio_segflg = UIO_SYSSPACE;
 617  617                                  auio.uio_llimit = MAXOFFSET_T;
 618  618                                  auio.uio_fmode = ffp->f_flag;
 619  619                                  ioflag = auio.uio_fmode &
 620  620                                      (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 621  621  
 622  622                                  /*
 623  623                                   * If read sync is not asked for,
 624  624                                   * filter sync flags
 625  625                                   */
 626  626                                  if ((ioflag & FRSYNC) == 0)
 627  627                                          ioflag &= ~(FSYNC|FDSYNC);
 628  628                                  error = VOP_READ(readvp, &auio, ioflag,
 629  629                                      fp->f_cred, NULL);
 630  630                                  if (error != 0) {
 631  631                                          /*
 632  632                                           * If we were reading a pipe (currently
 633  633                                           * not implemented), we may now loose
 634  634                                           * data.
 635  635                                           */
 636  636                                          VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 637  637                                              NULL);
 638  638                                          releasef(sfv->sfv_fd);
 639  639                                          freemsg(head);
 640  640                                          return (error);
 641  641                                  }
 642  642  
 643  643                                  /*
 644  644                                   * Check how much data was really read.
 645  645                                   * Decrement the 'len' and increment the
 646  646                                   * 'off' appropriately.
 647  647                                   */
 648  648                                  cnt = iov_len - auio.uio_resid;
 649  649                                  if (cnt == 0) {
 650  650                                          VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 651  651                                              NULL);
 652  652                                          releasef(sfv->sfv_fd);
 653  653                                          freemsg(head);
 654  654                                          return (EINVAL);
 655  655                                  }
 656  656                                  sfv_len -= cnt;
 657  657                                  sfv_off += cnt;
 658  658                                  total_size -= cnt;
 659  659                                  buf_left -= cnt;
 660  660  
 661  661                                  dmp->b_wptr += cnt;
 662  662                          }
 663  663                          VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 664  664                          releasef(sfv->sfv_fd);
 665  665                  }
 666  666                  sfv++;
 667  667          }
 668  668  
 669  669          ASSERT(total_size == 0);
 670  670          error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &head);
 671  671          if (error != 0) {
 672  672                  if (head != NULL)
 673  673                          freemsg(head);
 674  674                  return (error);
 675  675          }
 676  676          ttolwp(curthread)->lwp_ru.ioch += (ulong_t)size;
 677  677          *count += size;
 678  678  
 679  679          return (0);
 680  680  }
 681  681  
 682  682  
 683  683  static int
 684  684  sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
 685  685      int copy_cnt, ssize_t *count)
 686  686  {
 687  687          struct vnode *vp;
 688  688          struct uio auio;
 689  689          struct iovec aiov;
 690  690          ushort_t fflag;
 691  691          int ioflag;
 692  692          int i, error;
 693  693          size_t cnt;
 694  694          ssize_t sfv_len;
 695  695          u_offset_t sfv_off;
 696  696  #ifdef _SYSCALL32_IMPL
 697  697          model_t model = get_udatamodel();
 698  698          u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
 699  699              MAXOFF32_T : MAXOFFSET_T;
 700  700  #else
 701  701          const u_offset_t maxoff = MAXOFF32_T;
 702  702  #endif
 703  703          mblk_t  *dmp = NULL;
 704  704          char    *buf = NULL;
 705  705          size_t  extra;
 706  706          int maxblk, wroff, tail_len;
 707  707          struct sonode *so;
 708  708          stdata_t *stp;
 709  709          struct nmsghdr msg;
 710  710  
 711  711          fflag = fp->f_flag;
 712  712          vp = fp->f_vnode;
 713  713  
 714  714          if (vp->v_type == VSOCK) {
 715  715                  so = VTOSO(vp);
 716  716                  if (vp->v_stream != NULL) {
 717  717                          stp = vp->v_stream;
 718  718                          wroff = (int)stp->sd_wroff;
 719  719                          tail_len = (int)stp->sd_tail;
 720  720                          maxblk = (int)stp->sd_maxblk;
 721  721                  } else {
 722  722                          stp = NULL;
 723  723                          wroff = so->so_proto_props.sopp_wroff;
 724  724                          tail_len = so->so_proto_props.sopp_tail;
 725  725                          maxblk = so->so_proto_props.sopp_maxblk;
 726  726                  }
 727  727                  extra = wroff + tail_len;
 728  728          }
 729  729  
 730  730          bzero(&msg, sizeof (msg));
 731  731          auio.uio_extflg = UIO_COPY_DEFAULT;
 732  732          for (i = 0; i < copy_cnt; i++) {
 733  733                  if (ISSIG(curthread, JUSTLOOKING))
 734  734                          return (EINTR);
 735  735  
 736  736                  /*
 737  737                   * Do similar checks as "write" as we are writing
 738  738                   * sfv_len bytes into "vp".
 739  739                   */
 740  740                  sfv_len = (ssize_t)sfv->sfv_len;
 741  741  
 742  742                  if (sfv_len == 0) {
 743  743                          sfv++;
 744  744                          continue;
 745  745                  }
 746  746  
 747  747                  if (vp->v_type == VREG) {
 748  748                          if (*fileoff >= curproc->p_fsz_ctl) {
 749  749                                  mutex_enter(&curproc->p_lock);
 750  750                                  (void) rctl_action(
 751  751                                      rctlproc_legacy[RLIMIT_FSIZE],
 752  752                                      curproc->p_rctls, curproc, RCA_SAFE);
 753  753                                  mutex_exit(&curproc->p_lock);
 754  754  
 755  755                                  return (EFBIG);
 756  756                          }
 757  757  
 758  758                          if (*fileoff >= maxoff)
 759  759                                  return (EFBIG);
 760  760  
 761  761                          if (*fileoff + sfv_len > maxoff)
 762  762                                  return (EINVAL);
 763  763                  }
 764  764  
 765  765                  /* Check for overflow */
 766  766  #ifdef _SYSCALL32_IMPL
 767  767                  if (model == DATAMODEL_ILP32) {
 768  768                          if (((ssize32_t)(*count + sfv_len)) < 0)
 769  769                                  return (EINVAL);
 770  770                  } else
 771  771  #endif
 772  772                  if ((*count + sfv_len) < 0)
 773  773                          return (EINVAL);
 774  774  
 775  775                  sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
 776  776  
 777  777                  if (sfv->sfv_fd == SFV_FD_SELF) {
 778  778                          if (vp->v_type == VSOCK) {
 779  779                                  while (sfv_len > 0) {
 780  780                                          size_t iov_len;
 781  781  
 782  782                                          iov_len = sfv_len;
 783  783                                          /*
 784  784                                           * Socket filters can limit the mblk
 785  785                                           * size, so limit reads to maxblk if
 786  786                                           * there are filters present.
 787  787                                           */
 788  788                                          if (so->so_filter_active > 0 &&
 789  789                                              maxblk != INFPSZ)
 790  790                                                  iov_len = MIN(iov_len, maxblk);
 791  791  
 792  792                                          aiov.iov_len = iov_len;
 793  793                                          aiov.iov_base =
 794  794                                              (caddr_t)(uintptr_t)sfv_off;
 795  795  
 796  796                                          auio.uio_iov = &aiov;
 797  797                                          auio.uio_iovcnt = 1;
 798  798                                          auio.uio_loffset = *fileoff;
 799  799                                          auio.uio_segflg = UIO_USERSPACE;
 800  800                                          auio.uio_fmode = fflag;
 801  801                                          auio.uio_llimit = curproc->p_fsz_ctl;
 802  802                                          auio.uio_resid = iov_len;
 803  803  
 804  804                                          dmp = allocb(iov_len + extra, BPRI_HI);
 805  805                                          if (dmp == NULL)
 806  806                                                  return (ENOMEM);
 807  807                                          dmp->b_wptr = dmp->b_rptr =
 808  808                                              dmp->b_rptr + wroff;
 809  809                                          error = uiomove((caddr_t)dmp->b_wptr,
 810  810                                              iov_len, UIO_WRITE, &auio);
 811  811                                          if (error != 0) {
 812  812                                                  freeb(dmp);
 813  813                                                  return (error);
 814  814                                          }
 815  815                                          dmp->b_wptr += iov_len;
 816  816                                          error = socket_sendmblk(VTOSO(vp),
 817  817                                              &msg, fflag, CRED(), &dmp);
 818  818  
 819  819                                          if (error != 0) {
 820  820                                                  if (dmp != NULL)
 821  821                                                          freeb(dmp);
 822  822                                                  return (error);
 823  823                                          }
 824  824                                          ttolwp(curthread)->lwp_ru.ioch +=
 825  825                                              (ulong_t)iov_len;
 826  826                                          *count += iov_len;
 827  827                                          sfv_len -= iov_len;
 828  828                                          sfv_off += iov_len;
 829  829                                  }
 830  830                          } else {
 831  831                                  aiov.iov_len = sfv_len;
 832  832                                  aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
 833  833  
 834  834                                  auio.uio_iov = &aiov;
 835  835                                  auio.uio_iovcnt = 1;
 836  836                                  auio.uio_loffset = *fileoff;
 837  837                                  auio.uio_segflg = UIO_USERSPACE;
 838  838                                  auio.uio_fmode = fflag;
 839  839                                  auio.uio_llimit = curproc->p_fsz_ctl;
 840  840                                  auio.uio_resid = sfv_len;
 841  841  
 842  842                                  ioflag = auio.uio_fmode &
 843  843                                      (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 844  844                                  while (sfv_len > 0) {
 845  845                                          error = VOP_WRITE(vp, &auio, ioflag,
 846  846                                              fp->f_cred, NULL);
 847  847                                          cnt = sfv_len - auio.uio_resid;
 848  848                                          sfv_len -= cnt;
 849  849                                          ttolwp(curthread)->lwp_ru.ioch +=
 850  850                                              (ulong_t)cnt;
 851  851                                          *fileoff += cnt;
 852  852                                          *count += cnt;
 853  853                                          if (error != 0)
 854  854                                                  return (error);
 855  855                                  }
 856  856                          }
 857  857                  } else {
 858  858                          int segmapit = 0;
 859  859                          file_t  *ffp;
 860  860                          vnode_t *readvp;
 861  861                          struct vnode *realvp;
 862  862                          size_t  size;
 863  863                          caddr_t ptr;
 864  864  
 865  865                          if ((ffp = getf(sfv->sfv_fd)) == NULL)
 866  866                                  return (EBADF);
 867  867  
 868  868                          if ((ffp->f_flag & FREAD) == 0) {
 869  869                                  releasef(sfv->sfv_fd);
 870  870                                  return (EBADF);
 871  871                          }
 872  872  
 873  873                          readvp = ffp->f_vnode;
 874  874                          if (VOP_REALVP(readvp, &realvp, NULL) == 0)
 875  875                                  readvp = realvp;
 876  876                          if (readvp->v_type != VREG) {
 877  877                                  releasef(sfv->sfv_fd);
 878  878                                  return (EINVAL);
 879  879                          }
 880  880  
 881  881                          /*
 882  882                           * No point reading and writing to same vp,
 883  883                           * as long as both are regular files. readvp is not
 884  884                           * locked; but since we got it from an open file the
 885  885                           * contents will be valid during the time of access.
 886  886                           */
 887  887                          if (vn_compare(vp, readvp)) {
 888  888                                  releasef(sfv->sfv_fd);
 889  889                                  return (EINVAL);
 890  890                          }
 891  891  
 892  892                          /*
 893  893                           * Note: we assume readvp != vp. "vp" is already
 894  894                           * locked, and "readvp" must not be.
 895  895                           */
 896  896                          if (readvp < vp) {
 897  897                                  VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 898  898                                  (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 899  899                                      NULL);
 900  900                                  (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 901  901                          } else {
 902  902                                  (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 903  903                                      NULL);
 904  904                          }
 905  905  
 906  906                          /* Same checks as in pread */
 907  907                          if (sfv_off > maxoff) {
 908  908                                  VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 909  909                                  releasef(sfv->sfv_fd);
 910  910                                  return (EINVAL);
 911  911                          }
 912  912                          if (sfv_off + sfv_len > maxoff) {
 913  913                                  sfv_len = (ssize_t)((offset_t)maxoff -
 914  914                                      sfv_off);
 915  915                          }
 916  916                          /* Find the native blocksize to transfer data */
 917  917                          size = MIN(vp->v_vfsp->vfs_bsize,
 918  918                              readvp->v_vfsp->vfs_bsize);
 919  919                          size = sfv_len < size ? sfv_len : size;
 920  920  
 921  921                          if (vp->v_type != VSOCK) {
 922  922                                  segmapit = 0;
 923  923                                  buf = kmem_alloc(size, KM_NOSLEEP);
 924  924                                  if (buf == NULL) {
 925  925                                          VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 926  926                                              NULL);
 927  927                                          releasef(sfv->sfv_fd);
 928  928                                          return (ENOMEM);
 929  929                                  }
 930  930                          } else {
 931  931                                  uint_t  copyflag;
 932  932  
 933  933                                  copyflag = stp != NULL ? stp->sd_copyflag :
 934  934                                      so->so_proto_props.sopp_zcopyflag;
 935  935  
 936  936                                  /*
 937  937                                   * Socket filters can limit the mblk size,
 938  938                                   * so limit reads to maxblk if there are
 939  939                                   * filters present.
 940  940                                   */
 941  941                                  if (so->so_filter_active > 0 &&
 942  942                                      maxblk != INFPSZ)
 943  943                                          size = MIN(size, maxblk);
 944  944  
 945  945                                  if (vn_has_flocks(readvp) ||
 946  946                                      readvp->v_flag & VNOMAP ||
 947  947                                      copyflag & STZCVMUNSAFE) {
 948  948                                          segmapit = 0;
 949  949                                  } else if (copyflag & STZCVMSAFE) {
 950  950                                          segmapit = 1;
 951  951                                  } else {
 952  952                                          int on = 1;
 953  953                                          if (socket_setsockopt(VTOSO(vp),
 954  954                                              SOL_SOCKET, SO_SND_COPYAVOID,
 955  955                                              &on, sizeof (on), CRED()) == 0)
 956  956                                          segmapit = 1;
 957  957                                  }
 958  958                          }
 959  959  
 960  960                          if (segmapit) {
 961  961                                  boolean_t nowait;
 962  962  
 963  963                                  nowait = (sfv->sfv_flag & SFV_NOWAIT) != 0;
 964  964                                  error = snf_segmap(fp, readvp, sfv_off,
 965  965                                      (u_offset_t)sfv_len, (ssize_t *)&cnt,
 966  966                                      nowait);
 967  967                                  releasef(sfv->sfv_fd);
 968  968                                  *count += cnt;
 969  969                                  if (error)
 970  970                                          return (error);
 971  971                                  sfv++;
 972  972                                  continue;
 973  973                          }
 974  974  
 975  975                          while (sfv_len > 0) {
 976  976                                  size_t  iov_len;
 977  977  
 978  978                                  iov_len = MIN(size, sfv_len);
 979  979  
 980  980                                  if (vp->v_type == VSOCK) {
 981  981                                          dmp = allocb(iov_len + extra, BPRI_HI);
 982  982                                          if (dmp == NULL) {
 983  983                                                  VOP_RWUNLOCK(readvp,
 984  984                                                      V_WRITELOCK_FALSE, NULL);
 985  985                                                  releasef(sfv->sfv_fd);
 986  986                                                  return (ENOMEM);
 987  987                                          }
 988  988                                          dmp->b_wptr = dmp->b_rptr =
 989  989                                              dmp->b_rptr + wroff;
 990  990                                          ptr = (caddr_t)dmp->b_rptr;
 991  991                                  } else {
 992  992                                          ptr = buf;
 993  993                                  }
 994  994  
 995  995                                  aiov.iov_base = ptr;
 996  996                                  aiov.iov_len = iov_len;
 997  997                                  auio.uio_loffset = sfv_off;
 998  998                                  auio.uio_iov = &aiov;
 999  999                                  auio.uio_iovcnt = 1;
1000 1000                                  auio.uio_resid = iov_len;
1001 1001                                  auio.uio_segflg = UIO_SYSSPACE;
1002 1002                                  auio.uio_llimit = MAXOFFSET_T;
1003 1003                                  auio.uio_fmode = ffp->f_flag;
1004 1004                                  ioflag = auio.uio_fmode &
1005 1005                                      (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1006 1006  
1007 1007                                  /*
1008 1008                                   * If read sync is not asked for,
1009 1009                                   * filter sync flags
1010 1010                                   */
1011 1011                                  if ((ioflag & FRSYNC) == 0)
1012 1012                                          ioflag &= ~(FSYNC|FDSYNC);
1013 1013                                  error = VOP_READ(readvp, &auio, ioflag,
1014 1014                                      fp->f_cred, NULL);
1015 1015                                  if (error != 0) {
1016 1016                                          /*
1017 1017                                           * If we were reading a pipe (currently
1018 1018                                           * not implemented), we may now lose
1019 1019                                           * data.
1020 1020                                           */
1021 1021                                          if (vp->v_type == VSOCK)
1022 1022                                                  freeb(dmp);
1023 1023                                          else
1024 1024                                                  kmem_free(buf, size);
1025 1025                                          VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
1026 1026                                              NULL);
1027 1027                                          releasef(sfv->sfv_fd);
1028 1028                                          return (error);
1029 1029                                  }
1030 1030  
1031 1031                                  /*
1032 1032                                   * Check how much data was really read.
1033 1033                                   * Decrement the 'len' and increment the
1034 1034                                   * 'off' appropriately.
1035 1035                                   */
1036 1036                                  cnt = iov_len - auio.uio_resid;
1037 1037                                  if (cnt == 0) {
1038 1038                                          if (vp->v_type == VSOCK)
1039 1039                                                  freeb(dmp);
1040 1040                                          else
1041 1041                                                  kmem_free(buf, size);
1042 1042                                          VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
1043 1043                                              NULL);
1044 1044                                          releasef(sfv->sfv_fd);
1045 1045                                          return (EINVAL);
1046 1046                                  }
1047 1047                                  sfv_len -= cnt;
1048 1048                                  sfv_off += cnt;
1049 1049  
1050 1050                                  if (vp->v_type == VSOCK) {
1051 1051                                          dmp->b_wptr = dmp->b_rptr + cnt;
1052 1052  
1053 1053                                          error = socket_sendmblk(VTOSO(vp),
1054 1054                                              &msg, fflag, CRED(), &dmp);
1055 1055  
1056 1056                                          if (error != 0) {
1057 1057                                                  if (dmp != NULL)
1058 1058                                                          freeb(dmp);
1059 1059                                                  VOP_RWUNLOCK(readvp,
1060 1060                                                      V_WRITELOCK_FALSE, NULL);
1061 1061                                                  releasef(sfv->sfv_fd);
1062 1062                                                  return (error);
1063 1063                                          }
1064 1064  
1065 1065                                          ttolwp(curthread)->lwp_ru.ioch +=
1066 1066                                              (ulong_t)cnt;
1067 1067                                          *count += cnt;
1068 1068                                  } else {
1069 1069  
1070 1070                                          aiov.iov_base = ptr;
1071 1071                                          aiov.iov_len = cnt;
1072 1072                                          auio.uio_loffset = *fileoff;
1073 1073                                          auio.uio_resid = cnt;
1074 1074                                          auio.uio_iov = &aiov;
1075 1075                                          auio.uio_iovcnt = 1;
1076 1076                                          auio.uio_segflg = UIO_SYSSPACE;
1077 1077                                          auio.uio_llimit = curproc->p_fsz_ctl;
1078 1078                                          auio.uio_fmode = fflag;
1079 1079                                          ioflag = auio.uio_fmode &
1080 1080                                              (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1081 1081                                          error = VOP_WRITE(vp, &auio, ioflag,
1082 1082                                              fp->f_cred, NULL);
1083 1083  
1084 1084                                          /*
1085 1085                                           * Check how much data was written.
1086 1086                                           * Increment the 'len' and decrement the
1087 1087                                           * 'off' if all the data was not
1088 1088                                           * written.
1089 1089                                           */
1090 1090                                          cnt -= auio.uio_resid;
1091 1091                                          sfv_len += auio.uio_resid;
1092 1092                                          sfv_off -= auio.uio_resid;
1093 1093                                          ttolwp(curthread)->lwp_ru.ioch +=
1094 1094                                              (ulong_t)cnt;
1095 1095                                          *fileoff += cnt;
1096 1096                                          *count += cnt;
1097 1097                                          if (error != 0) {
1098 1098                                                  kmem_free(buf, size);
1099 1099                                                  VOP_RWUNLOCK(readvp,
1100 1100                                                      V_WRITELOCK_FALSE, NULL);
1101 1101                                                  releasef(sfv->sfv_fd);
1102 1102                                                  return (error);
1103 1103                                          }
1104 1104                                  }
1105 1105                          }
1106 1106                          if (buf) {
1107 1107                                  kmem_free(buf, size);
1108 1108                                  buf = NULL;
1109 1109                          }
1110 1110                          VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
1111 1111                          releasef(sfv->sfv_fd);
1112 1112                  }
1113 1113                  sfv++;
1114 1114          }
1115 1115          return (0);
1116 1116  }
1117 1117  
1118 1118  ssize_t
1119 1119  sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
1120 1120      size_t *xferred)
1121 1121  {
1122 1122          int error = 0;
1123 1123          int first_vector_error = 0;
1124 1124          file_t *fp;
1125 1125          struct vnode *vp;
1126 1126          struct sonode *so;
1127 1127          u_offset_t fileoff;
1128 1128          int copy_cnt;
1129 1129          const struct sendfilevec *copy_vec;
1130 1130          struct sendfilevec sfv[SEND_MAX_CHUNK];
1131 1131          ssize_t count = 0;
1132 1132  #ifdef _SYSCALL32_IMPL
1133 1133          struct ksendfilevec32 sfv32[SEND_MAX_CHUNK];
1134 1134  #endif
1135 1135          ssize_t total_size;
1136 1136          int i;
1137 1137          boolean_t is_sock = B_FALSE;
1138 1138          int maxblk = 0;
1139 1139  
1140 1140          if (sfvcnt <= 0)
1141 1141                  return (set_errno(EINVAL));
1142 1142  
1143 1143          if ((fp = getf(fildes)) == NULL)
1144 1144                  return (set_errno(EBADF));
1145 1145  
1146 1146          if (((fp->f_flag) & FWRITE) == 0) {
1147 1147                  error = EBADF;
1148 1148                  goto err;
1149 1149          }
1150 1150  
1151 1151          fileoff = fp->f_offset;
1152 1152          vp = fp->f_vnode;
1153 1153  
1154 1154          switch (vp->v_type) {
1155 1155          case VSOCK:
1156 1156                  so = VTOSO(vp);
1157 1157                  is_sock = B_TRUE;
1158 1158                  if (SOCK_IS_NONSTR(so)) {
1159 1159                          maxblk = so->so_proto_props.sopp_maxblk;
1160 1160                  } else {
1161 1161                          maxblk = (int)vp->v_stream->sd_maxblk;
1162 1162                  }
1163 1163  
1164 1164                  /*
1165 1165                   * We need to make sure that the socket that we're sending on
1166 1166                   * supports sendfile behavior. sockfs doesn't know that the APIs
1167 1167                   * we want to use are coming from sendfile, so we can't rely on
1168 1168                   * it to check for us.
1169 1169                   */
1170 1170                  if ((so->so_mode & SM_SENDFILESUPP) == 0) {
1171 1171                          error = EOPNOTSUPP;
1172 1172                          goto err;
1173 1173                  }
1174 1174                  break;
1175 1175          case VREG:
1176 1176                  break;
1177 1177          default:
1178 1178                  error = EINVAL;
1179 1179                  goto err;
1180 1180          }
1181 1181  
1182 1182          switch (opcode) {
1183 1183          case SENDFILEV :
1184 1184                  break;
1185 1185  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1186 1186          case SENDFILEV64 :
1187 1187                  return (sendvec64(fp, (struct ksendfilevec64 *)vec, sfvcnt,
1188 1188                      (size32_t *)xferred, fildes));
1189 1189  #endif
1190 1190          default :
1191 1191                  error = ENOSYS;
1192 1192                  break;
1193 1193          }
1194 1194  
1195 1195          (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1196 1196          copy_vec = vec;
1197 1197  
1198 1198          do {
1199 1199                  total_size = 0;
1200 1200                  copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
1201 1201  #ifdef _SYSCALL32_IMPL
1202 1202                  /* 32-bit callers need to have their iovec expanded. */
1203 1203                  if (get_udatamodel() == DATAMODEL_ILP32) {
1204 1204                          if (copyin(copy_vec, sfv32,
1205 1205                              copy_cnt * sizeof (ksendfilevec32_t))) {
1206 1206                                  error = EFAULT;
1207 1207                                  break;
1208 1208                          }
1209 1209  
1210 1210                          for (i = 0; i < copy_cnt; i++) {
1211 1211                                  sfv[i].sfv_fd = sfv32[i].sfv_fd;
1212 1212                                  sfv[i].sfv_off =
1213 1213                                      (off_t)(uint32_t)sfv32[i].sfv_off;
1214 1214                                  sfv[i].sfv_len = (size_t)sfv32[i].sfv_len;
1215 1215                                  total_size += sfv[i].sfv_len;
1216 1216                                  sfv[i].sfv_flag = sfv32[i].sfv_flag;
1217 1217                                  /*
1218 1218                                   * Individual elements of the vector must not
1219 1219                                   * wrap or overflow, as later math is signed.
1220 1220                                   * Equally total_size needs to be checked after
1221 1221                                   * each vector is added in, to be sure that
1222 1222                                   * rogue values haven't overflowed the counter.
1223 1223                                   */
1224 1224                                  if (((ssize32_t)sfv[i].sfv_len < 0) ||
1225 1225                                      ((ssize32_t)total_size < 0)) {
1226 1226                                          /*
1227 1227                                           * Truncate the vector to send data
1228 1228                                           * described by elements before the
1229 1229                                           * error.
1230 1230                                           */
1231 1231                                          copy_cnt = i;
1232 1232                                          first_vector_error = EINVAL;
1233 1233                                          /* total_size can't be trusted */
1234 1234                                          if ((ssize32_t)total_size < 0)
1235 1235                                                  error = EINVAL;
1236 1236                                          break;
1237 1237                                  }
1238 1238                          }
1239 1239                          /* Nothing to do, process errors */
1240 1240                          if (copy_cnt == 0)
1241 1241                                  break;
1242 1242  
1243 1243                  } else {
1244 1244  #endif
1245 1245                          if (copyin(copy_vec, sfv,
1246 1246                              copy_cnt * sizeof (sendfilevec_t))) {
1247 1247                                  error = EFAULT;
1248 1248                                  break;
1249 1249                          }
1250 1250  
1251 1251                          for (i = 0; i < copy_cnt; i++) {
1252 1252                                  total_size += sfv[i].sfv_len;
1253 1253                                  /*
1254 1254                                   * Individual elements of the vector must not
1255 1255                                   * wrap or overflow, as later math is signed.
1256 1256                                   * Equally total_size needs to be checked after
1257 1257                                   * each vector is added in, to be sure that
1258 1258                                   * rogue values haven't overflowed the counter.
1259 1259                                   */
1260 1260                                  if (((ssize_t)sfv[i].sfv_len < 0) ||
1261 1261                                      (total_size < 0)) {
1262 1262                                          /*
1263 1263                                           * Truncate the vector to send data
1264 1264                                           * described by elements before the
1265 1265                                           * error.
1266 1266                                           */
1267 1267                                          copy_cnt = i;
1268 1268                                          first_vector_error = EINVAL;
1269 1269                                          /* total_size can't be trusted */
1270 1270                                          if (total_size < 0)
1271 1271                                                  error = EINVAL;
1272 1272                                          break;
1273 1273                                  }
1274 1274                          }
1275 1275                          /* Nothing to do, process errors */
1276 1276                          if (copy_cnt == 0)
1277 1277                                  break;
1278 1278  #ifdef _SYSCALL32_IMPL
1279 1279                  }
1280 1280  #endif
1281 1281  
1282 1282                  /*
1283 1283                   * The task between deciding to use sendvec_small_chunk
1284 1284                   * and sendvec_chunk is dependant on multiple things:
1285 1285                   *
1286 1286                   * i) latency is important for smaller files. So if the
1287 1287                   * data is smaller than 'tcp_slow_start_initial' times
1288 1288                   * maxblk, then use sendvec_small_chunk which creates
1289 1289                   * maxblk size mblks and chains them together and sends
1290 1290                   * them to TCP in one shot. It also leaves 'wroff' size
1291 1291                   * space for the headers in each mblk.
1292 1292                   *
1293 1293                   * ii) for total size bigger than 'tcp_slow_start_initial'
1294 1294                   * time maxblk, its probably real file data which is
1295 1295                   * dominating. So its better to use sendvec_chunk because
1296 1296                   * performance goes to dog if we don't do pagesize reads.
1297 1297                   * sendvec_chunk will do pagesize reads and write them
1298 1298                   * in pagesize mblks to TCP.
1299 1299                   *
1300 1300                   * Side Notes: A write to file has not been optimized.
1301 1301                   * Future zero copy code will plugin into sendvec_chunk
1302 1302                   * only because doing zero copy for files smaller then
1303 1303                   * pagesize is useless.
1304 1304                   *
1305 1305                   * Note, if socket has NL7C enabled then call NL7C's
1306 1306                   * senfilev() function to consume the sfv[].
1307 1307                   */
1308 1308                  if (is_sock) {
1309 1309                          if (!SOCK_IS_NONSTR(so) &&
1310 1310                              _SOTOTPI(so)->sti_nl7c_flags != 0) {
1311 1311                                  error = nl7c_sendfilev(so, &fileoff,
1312 1312                                      sfv, copy_cnt, &count);
1313 1313                          } else if ((total_size <= (4 * maxblk)) &&
1314 1314                              error == 0) {
1315 1315                                  error = sendvec_small_chunk(fp,
1316 1316                                      &fileoff, sfv, copy_cnt,
1317 1317                                      total_size, maxblk, &count);
1318 1318                          } else {
1319 1319                                  error = sendvec_chunk(fp, &fileoff,
1320 1320                                      sfv, copy_cnt, &count);
1321 1321                          }
1322 1322                  } else {
1323 1323                          ASSERT(vp->v_type == VREG);
1324 1324                          error = sendvec_chunk(fp, &fileoff, sfv, copy_cnt,
1325 1325                              &count);
1326 1326                  }
1327 1327  
1328 1328  
1329 1329  #ifdef _SYSCALL32_IMPL
1330 1330          if (get_udatamodel() == DATAMODEL_ILP32)
1331 1331                  copy_vec = (const struct sendfilevec *)((char *)copy_vec +
1332 1332                      (copy_cnt * sizeof (ksendfilevec32_t)));
1333 1333          else
1334 1334  #endif
1335 1335                  copy_vec += copy_cnt;
1336 1336                  sfvcnt -= copy_cnt;
1337 1337  
1338 1338          /* Process all vector members up to first error */
1339 1339          } while ((sfvcnt > 0) && first_vector_error == 0 && error == 0);
1340 1340  
1341 1341          if (vp->v_type == VREG)
1342 1342                  fp->f_offset += count;
1343 1343  
1344 1344          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1345 1345  
1346 1346  #ifdef _SYSCALL32_IMPL
1347 1347          if (get_udatamodel() == DATAMODEL_ILP32) {
1348 1348                  ssize32_t count32 = (ssize32_t)count;
1349 1349                  if (copyout(&count32, xferred, sizeof (count32)))
1350 1350                          error = EFAULT;
1351 1351                  releasef(fildes);
1352 1352                  if (error != 0)
1353 1353                          return (set_errno(error));
1354 1354                  if (first_vector_error != 0)
1355 1355                          return (set_errno(first_vector_error));
1356 1356                  return (count32);
1357 1357          }
1358 1358  #endif
1359 1359          if (copyout(&count, xferred, sizeof (count)))
1360 1360                  error = EFAULT;
1361 1361          releasef(fildes);
1362 1362          if (error != 0)
1363 1363                  return (set_errno(error));
1364 1364          if (first_vector_error != 0)
1365 1365                  return (set_errno(first_vector_error));
1366 1366          return (count);
1367 1367  err:
1368 1368          ASSERT(error != 0);
1369 1369          releasef(fildes);
1370 1370          return (set_errno(error));
1371 1371  }
  
    | 
      ↓ open down ↓ | 
    1371 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX