1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/t_lock.h>
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/buf.h>
  31 #include <sys/conf.h>
  32 #include <sys/cred.h>
  33 #include <sys/kmem.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/vfs.h>
  36 #include <sys/vnode.h>
  37 #include <sys/debug.h>
  38 #include <sys/errno.h>
  39 #include <sys/time.h>
  40 #include <sys/file.h>
  41 #include <sys/open.h>
  42 #include <sys/user.h>
  43 #include <sys/termios.h>
  44 #include <sys/stream.h>
  45 #include <sys/strsubr.h>
  46 #include <sys/sunddi.h>
  47 #include <sys/esunddi.h>
  48 #include <sys/flock.h>
  49 #include <sys/modctl.h>
  50 #include <sys/cmn_err.h>
  51 #include <sys/vmsystm.h>
  52 
  53 #include <sys/socket.h>
  54 #include <sys/socketvar.h>
  55 #include <fs/sockfs/sockcommon.h>
  56 #include <fs/sockfs/socktpi.h>
  57 
  58 #include <netinet/in.h>
  59 #include <sys/sendfile.h>
  60 #include <sys/un.h>
  61 #include <sys/tihdr.h>
  62 #include <sys/atomic.h>
  63 
  64 #include <inet/common.h>
  65 #include <inet/ip.h>
  66 #include <inet/ip6.h>
  67 #include <inet/tcp.h>
  68 
  69 extern int sosendfile64(file_t *, file_t *, const struct ksendfilevec64 *,
  70                 ssize32_t *);
  71 extern int nl7c_sendfilev(struct sonode *, u_offset_t *, struct sendfilevec *,
  72                 int, ssize_t *);
  73 extern int snf_segmap(file_t *, vnode_t *, u_offset_t, u_offset_t, ssize_t *,
  74                 boolean_t);
  75 extern sotpi_info_t *sotpi_sototpi(struct sonode *);
  76 
  77 #define SEND_MAX_CHUNK  16
  78 
  79 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
  80 /*
  81  * 64 bit offsets for 32 bit applications only running either on
  82  * 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
  83  * more than 2GB of data.
  84  */
  85 int
  86 sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
  87     int copy_cnt, ssize32_t *count)
  88 {
  89         struct vnode *vp;
  90         ushort_t fflag;
  91         int ioflag;
  92         size32_t cnt;
  93         ssize32_t sfv_len;
  94         ssize32_t tmpcount;
  95         u_offset_t sfv_off;
  96         struct uio auio;
  97         struct iovec aiov;
  98         int i, error;
  99 
 100         fflag = fp->f_flag;
 101         vp = fp->f_vnode;
 102         for (i = 0; i < copy_cnt; i++) {
 103 
 104                 if (ISSIG(curthread, JUSTLOOKING))
 105                         return (EINTR);
 106 
 107                 /*
 108                  * Do similar checks as "write" as we are writing
 109                  * sfv_len bytes into "vp".
 110                  */
 111                 sfv_len = (ssize32_t)sfv->sfv_len;
 112 
 113                 if (sfv_len == 0) {
 114                         sfv++;
 115                         continue;
 116                 }
 117 
 118                 if (sfv_len < 0)
 119                         return (EINVAL);
 120 
 121                 if (vp->v_type == VREG) {
 122                         if (*fileoff >= curproc->p_fsz_ctl) {
 123                                 mutex_enter(&curproc->p_lock);
 124                                 (void) rctl_action(
 125                                     rctlproc_legacy[RLIMIT_FSIZE],
 126                                     curproc->p_rctls, curproc, RCA_SAFE);
 127                                 mutex_exit(&curproc->p_lock);
 128                                 return (EFBIG);
 129                         }
 130 
 131                         if (*fileoff >= OFFSET_MAX(fp))
 132                                 return (EFBIG);
 133 
 134                         if (*fileoff + sfv_len > OFFSET_MAX(fp))
 135                                 return (EINVAL);
 136                 }
 137 
 138                 tmpcount = *count + sfv_len;
 139                 if (tmpcount < 0)
 140                         return (EINVAL);
 141 
 142                 sfv_off = sfv->sfv_off;
 143 
 144                 auio.uio_extflg = UIO_COPY_DEFAULT;
 145                 if (sfv->sfv_fd == SFV_FD_SELF) {
 146                         aiov.iov_len = sfv_len;
 147                         aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
 148                         auio.uio_loffset = *fileoff;
 149                         auio.uio_iovcnt = 1;
 150                         auio.uio_resid = sfv_len;
 151                         auio.uio_iov = &aiov;
 152                         auio.uio_segflg = UIO_USERSPACE;
 153                         auio.uio_llimit = curproc->p_fsz_ctl;
 154                         auio.uio_fmode = fflag;
 155                         ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 156                         while (sfv_len > 0) {
 157                                 error = VOP_WRITE(vp, &auio, ioflag,
 158                                     fp->f_cred, NULL);
 159                                 cnt = sfv_len - auio.uio_resid;
 160                                 sfv_len -= cnt;
 161                                 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
 162                                 if (vp->v_type == VREG)
 163                                         *fileoff += cnt;
 164                                 *count += cnt;
 165                                 if (error != 0)
 166                                         return (error);
 167                         }
 168                 } else {
 169                         file_t  *ffp;
 170                         vnode_t *readvp;
 171                         size_t  size;
 172                         caddr_t ptr;
 173 
 174                         if ((ffp = getf(sfv->sfv_fd)) == NULL)
 175                                 return (EBADF);
 176 
 177                         if ((ffp->f_flag & FREAD) == 0) {
 178                                 releasef(sfv->sfv_fd);
 179                                 return (EBADF);
 180                         }
 181 
 182                         readvp = ffp->f_vnode;
 183                         if (readvp->v_type != VREG) {
 184                                 releasef(sfv->sfv_fd);
 185                                 return (EINVAL);
 186                         }
 187 
 188                         /*
 189                          * No point reading and writing to same vp,
 190                          * as long as both are regular files. readvp is not
 191                          * locked; but since we got it from an open file the
 192                          * contents will be valid during the time of access.
 193                          */
 194                         if (vn_compare(vp, readvp)) {
 195                                 releasef(sfv->sfv_fd);
 196                                 return (EINVAL);
 197                         }
 198 
 199                         /*
 200                          * Optimize the regular file over
 201                          * the socket case.
 202                          */
 203                         if (vp->v_type == VSOCK) {
 204                                 error = sosendfile64(fp, ffp, sfv,
 205                                     (ssize32_t *)&cnt);
 206                                 *count += cnt;
 207                                 if (error)
 208                                         return (error);
 209                                 sfv++;
 210                                 continue;
 211                         }
 212 
 213                         /*
 214                          * Note: we assume readvp != vp. "vp" is already
 215                          * locked, and "readvp" must not be.
 216                          */
 217                         if (readvp < vp) {
 218                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 219                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 220                                     NULL);
 221                                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 222                         } else {
 223                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 224                                     NULL);
 225                         }
 226 
 227                         /*
 228                          * Same checks as in pread64.
 229                          */
 230                         if (sfv_off > MAXOFFSET_T) {
 231                                 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 232                                 releasef(sfv->sfv_fd);
 233                                 return (EINVAL);
 234                         }
 235 
 236                         if (sfv_off + sfv_len > MAXOFFSET_T)
 237                                 sfv_len = (ssize32_t)(MAXOFFSET_T - sfv_off);
 238 
 239                         /* Find the native blocksize to transfer data */
 240                         size = MIN(vp->v_vfsp->vfs_bsize,
 241                             readvp->v_vfsp->vfs_bsize);
 242                         size = sfv_len < size ? sfv_len : size;
 243                         ptr = kmem_alloc(size, KM_NOSLEEP);
 244                         if (ptr == NULL) {
 245                                 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 246                                 releasef(sfv->sfv_fd);
 247                                 return (ENOMEM);
 248                         }
 249 
 250                         while (sfv_len > 0) {
 251                                 size_t  iov_len;
 252 
 253                                 iov_len = MIN(size, sfv_len);
 254                                 aiov.iov_base = ptr;
 255                                 aiov.iov_len = iov_len;
 256                                 auio.uio_loffset = sfv_off;
 257                                 auio.uio_iov = &aiov;
 258                                 auio.uio_iovcnt = 1;
 259                                 auio.uio_resid = iov_len;
 260                                 auio.uio_segflg = UIO_SYSSPACE;
 261                                 auio.uio_llimit = MAXOFFSET_T;
 262                                 auio.uio_fmode = ffp->f_flag;
 263                                 ioflag = auio.uio_fmode &
 264                                     (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 265 
 266                                 /*
 267                                  * If read sync is not asked for,
 268                                  * filter sync flags
 269                                  */
 270                                 if ((ioflag & FRSYNC) == 0)
 271                                         ioflag &= ~(FSYNC|FDSYNC);
 272                                 error = VOP_READ(readvp, &auio, ioflag,
 273                                     fp->f_cred, NULL);
 274                                 if (error) {
 275                                         kmem_free(ptr, size);
 276                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 277                                             NULL);
 278                                         releasef(sfv->sfv_fd);
 279                                         return (error);
 280                                 }
 281 
 282                                 /*
 283                                  * Check how must data was really read.
 284                                  * Decrement the 'len' and increment the
 285                                  * 'off' appropriately.
 286                                  */
 287                                 cnt = iov_len - auio.uio_resid;
 288                                 if (cnt == 0) {
 289                                         /*
 290                                          * If we were reading a pipe (currently
 291                                          * not implemented), we may now lose
 292                                          * data.
 293                                          */
 294                                         kmem_free(ptr, size);
 295                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 296                                             NULL);
 297                                         releasef(sfv->sfv_fd);
 298                                         return (EINVAL);
 299                                 }
 300                                 sfv_len -= cnt;
 301                                 sfv_off += cnt;
 302 
 303                                 aiov.iov_base = ptr;
 304                                 aiov.iov_len = cnt;
 305                                 auio.uio_loffset = *fileoff;
 306                                 auio.uio_iov = &aiov;
 307                                 auio.uio_iovcnt = 1;
 308                                 auio.uio_resid = cnt;
 309                                 auio.uio_segflg = UIO_SYSSPACE;
 310                                 auio.uio_llimit = curproc->p_fsz_ctl;
 311                                 auio.uio_fmode = fflag;
 312                                 ioflag = auio.uio_fmode &
 313                                     (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 314                                 error = VOP_WRITE(vp, &auio, ioflag,
 315                                     fp->f_cred, NULL);
 316 
 317                                 /*
 318                                  * Check how much data was written. Increment
 319                                  * the 'len' and decrement the 'off' if all
 320                                  * the data was not written.
 321                                  */
 322                                 cnt -= auio.uio_resid;
 323                                 sfv_len += auio.uio_resid;
 324                                 sfv_off -= auio.uio_resid;
 325                                 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
 326                                 if (vp->v_type == VREG)
 327                                         *fileoff += cnt;
 328                                 *count += cnt;
 329                                 if (error != 0) {
 330                                         kmem_free(ptr, size);
 331                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 332                                             NULL);
 333                                         releasef(sfv->sfv_fd);
 334                                         return (error);
 335                                 }
 336                         }
 337                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 338                         releasef(sfv->sfv_fd);
 339                         kmem_free(ptr, size);
 340                 }
 341                 sfv++;
 342         }
 343         return (0);
 344 }
 345 
 346 ssize32_t
 347 sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
 348         size32_t *xferred, int fildes)
 349 {
 350         u_offset_t              fileoff;
 351         int                     copy_cnt;
 352         const struct ksendfilevec64 *copy_vec;
 353         struct ksendfilevec64 sfv[SEND_MAX_CHUNK];
 354         struct vnode *vp;
 355         int error;
 356         ssize32_t count = 0;
 357 
 358         vp = fp->f_vnode;
 359         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 360 
 361         copy_vec = vec;
 362         fileoff = fp->f_offset;
 363 
 364         do {
 365                 copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
 366                 if (copyin(copy_vec, sfv, copy_cnt *
 367                     sizeof (struct ksendfilevec64))) {
 368                         error = EFAULT;
 369                         break;
 370                 }
 371 
 372                 error = sendvec_chunk64(fp, &fileoff, sfv, copy_cnt, &count);
 373                 if (error != 0)
 374                         break;
 375 
 376                 copy_vec += copy_cnt;
 377                 sfvcnt -= copy_cnt;
 378         } while (sfvcnt > 0);
 379 
 380         if (vp->v_type == VREG)
 381                 fp->f_offset += count;
 382 
 383         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 384         if (copyout(&count, xferred, sizeof (count)))
 385                 error = EFAULT;
 386         releasef(fildes);
 387         if (error != 0)
 388                 return (set_errno(error));
 389         return (count);
 390 }
 391 #endif
 392 
 393 int
 394 sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
 395     int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count)
 396 {
 397         struct vnode *vp;
 398         struct uio auio;
 399         struct iovec aiov;
 400         ushort_t fflag;
 401         int ioflag;
 402         int i, error;
 403         size_t cnt;
 404         ssize_t sfv_len;
 405         u_offset_t sfv_off;
 406 #ifdef _SYSCALL32_IMPL
 407         model_t model = get_udatamodel();
 408         u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
 409             MAXOFF32_T : MAXOFFSET_T;
 410 #else
 411         const u_offset_t maxoff = MAXOFF32_T;
 412 #endif
 413         mblk_t *dmp = NULL;
 414         int wroff;
 415         int buf_left = 0;
 416         size_t  iov_len;
 417         mblk_t  *head, *tmp;
 418         size_t  size = total_size;
 419         size_t  extra;
 420         int tail_len;
 421         struct nmsghdr msg;
 422 
 423         fflag = fp->f_flag;
 424         vp = fp->f_vnode;
 425 
 426         ASSERT(vp->v_type == VSOCK);
 427         ASSERT(maxblk > 0);
 428 
 429         /* If nothing to send, return */
 430         if (total_size == 0)
 431                 return (0);
 432 
 433         if (vp->v_stream != NULL) {
 434                 wroff = (int)vp->v_stream->sd_wroff;
 435                 tail_len = (int)vp->v_stream->sd_tail;
 436         } else {
 437                 struct sonode *so;
 438 
 439                 so = VTOSO(vp);
 440                 wroff = so->so_proto_props.sopp_wroff;
 441                 tail_len = so->so_proto_props.sopp_tail;
 442         }
 443 
 444         extra = wroff + tail_len;
 445 
 446         buf_left = MIN(total_size, maxblk);
 447         head = dmp = allocb(buf_left + extra, BPRI_HI);
 448         if (head == NULL)
 449                 return (ENOMEM);
 450         head->b_wptr = head->b_rptr = head->b_rptr + wroff;
 451         bzero(&msg, sizeof (msg));
 452 
 453         auio.uio_extflg = UIO_COPY_DEFAULT;
 454         for (i = 0; i < copy_cnt; i++) {
 455                 if (ISSIG(curthread, JUSTLOOKING)) {
 456                         freemsg(head);
 457                         return (EINTR);
 458                 }
 459 
 460                 /*
 461                  * Do similar checks as "write" as we are writing
 462                  * sfv_len bytes into "vp".
 463                  */
 464                 sfv_len = (ssize_t)sfv->sfv_len;
 465 
 466                 if (sfv_len == 0) {
 467                         sfv++;
 468                         continue;
 469                 }
 470 
 471                 /* Check for overflow */
 472 #ifdef _SYSCALL32_IMPL
 473                 if (model == DATAMODEL_ILP32) {
 474                         if (((ssize32_t)(*count + sfv_len)) < 0) {
 475                                 freemsg(head);
 476                                 return (EINVAL);
 477                         }
 478                 } else
 479 #endif
 480                 if ((*count + sfv_len) < 0) {
 481                         freemsg(head);
 482                         return (EINVAL);
 483                 }
 484 
 485                 sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
 486 
 487                 if (sfv->sfv_fd == SFV_FD_SELF) {
 488                         while (sfv_len > 0) {
 489                                 if (buf_left == 0) {
 490                                         tmp = dmp;
 491                                         buf_left = MIN(total_size, maxblk);
 492                                         iov_len = MIN(buf_left, sfv_len);
 493                                         dmp = allocb(buf_left + extra, BPRI_HI);
 494                                         if (dmp == NULL) {
 495                                                 freemsg(head);
 496                                                 return (ENOMEM);
 497                                         }
 498                                         dmp->b_wptr = dmp->b_rptr =
 499                                             dmp->b_rptr + wroff;
 500                                         tmp->b_cont = dmp;
 501                                 } else {
 502                                         iov_len = MIN(buf_left, sfv_len);
 503                                 }
 504 
 505                                 aiov.iov_len = iov_len;
 506                                 aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
 507                                 auio.uio_loffset = *fileoff;
 508                                 auio.uio_iovcnt = 1;
 509                                 auio.uio_resid = iov_len;
 510                                 auio.uio_iov = &aiov;
 511                                 auio.uio_segflg = UIO_USERSPACE;
 512                                 auio.uio_llimit = curproc->p_fsz_ctl;
 513                                 auio.uio_fmode = fflag;
 514 
 515                                 buf_left -= iov_len;
 516                                 total_size -= iov_len;
 517                                 sfv_len -= iov_len;
 518                                 sfv_off += iov_len;
 519 
 520                                 error = uiomove((caddr_t)dmp->b_wptr,
 521                                     iov_len, UIO_WRITE, &auio);
 522                                 if (error != 0) {
 523                                         freemsg(head);
 524                                         return (error);
 525                                 }
 526                                 dmp->b_wptr += iov_len;
 527                         }
 528                 } else {
 529                         file_t  *ffp;
 530                         vnode_t *readvp;
 531 
 532                         if ((ffp = getf(sfv->sfv_fd)) == NULL) {
 533                                 freemsg(head);
 534                                 return (EBADF);
 535                         }
 536 
 537                         if ((ffp->f_flag & FREAD) == 0) {
 538                                 releasef(sfv->sfv_fd);
 539                                 freemsg(head);
 540                                 return (EACCES);
 541                         }
 542 
 543                         readvp = ffp->f_vnode;
 544                         if (readvp->v_type != VREG) {
 545                                 releasef(sfv->sfv_fd);
 546                                 freemsg(head);
 547                                 return (EINVAL);
 548                         }
 549 
 550                         /*
 551                          * No point reading and writing to same vp,
 552                          * as long as both are regular files. readvp is not
 553                          * locked; but since we got it from an open file the
 554                          * contents will be valid during the time of access.
 555                          */
 556 
 557                         if (vn_compare(vp, readvp)) {
 558                                 releasef(sfv->sfv_fd);
 559                                 freemsg(head);
 560                                 return (EINVAL);
 561                         }
 562 
 563                         /*
 564                          * Note: we assume readvp != vp. "vp" is already
 565                          * locked, and "readvp" must not be.
 566                          */
 567 
 568                         if (readvp < vp) {
 569                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 570                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 571                                     NULL);
 572                                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 573                         } else {
 574                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 575                                     NULL);
 576                         }
 577 
 578                         /* Same checks as in pread */
 579                         if (sfv_off > maxoff) {
 580                                 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 581                                 releasef(sfv->sfv_fd);
 582                                 freemsg(head);
 583                                 return (EINVAL);
 584                         }
 585                         if (sfv_off + sfv_len > maxoff) {
 586                                 total_size -= (sfv_off + sfv_len - maxoff);
 587                                 sfv_len = (ssize_t)((offset_t)maxoff -
 588                                     sfv_off);
 589                         }
 590 
 591                         while (sfv_len > 0) {
 592                                 if (buf_left == 0) {
 593                                         tmp = dmp;
 594                                         buf_left = MIN(total_size, maxblk);
 595                                         iov_len = MIN(buf_left, sfv_len);
 596                                         dmp = allocb(buf_left + extra, BPRI_HI);
 597                                         if (dmp == NULL) {
 598                                                 VOP_RWUNLOCK(readvp,
 599                                                     V_WRITELOCK_FALSE, NULL);
 600                                                 releasef(sfv->sfv_fd);
 601                                                 freemsg(head);
 602                                                 return (ENOMEM);
 603                                         }
 604                                         dmp->b_wptr = dmp->b_rptr =
 605                                             dmp->b_rptr + wroff;
 606                                         tmp->b_cont = dmp;
 607                                 } else {
 608                                         iov_len = MIN(buf_left, sfv_len);
 609                                 }
 610                                 aiov.iov_base = (caddr_t)dmp->b_wptr;
 611                                 aiov.iov_len = iov_len;
 612                                 auio.uio_loffset = sfv_off;
 613                                 auio.uio_iov = &aiov;
 614                                 auio.uio_iovcnt = 1;
 615                                 auio.uio_resid = iov_len;
 616                                 auio.uio_segflg = UIO_SYSSPACE;
 617                                 auio.uio_llimit = MAXOFFSET_T;
 618                                 auio.uio_fmode = ffp->f_flag;
 619                                 ioflag = auio.uio_fmode &
 620                                     (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 621 
 622                                 /*
 623                                  * If read sync is not asked for,
 624                                  * filter sync flags
 625                                  */
 626                                 if ((ioflag & FRSYNC) == 0)
 627                                         ioflag &= ~(FSYNC|FDSYNC);
 628                                 error = VOP_READ(readvp, &auio, ioflag,
 629                                     fp->f_cred, NULL);
 630                                 if (error != 0) {
 631                                         /*
 632                                          * If we were reading a pipe (currently
 633                                          * not implemented), we may now loose
 634                                          * data.
 635                                          */
 636                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 637                                             NULL);
 638                                         releasef(sfv->sfv_fd);
 639                                         freemsg(head);
 640                                         return (error);
 641                                 }
 642 
 643                                 /*
 644                                  * Check how much data was really read.
 645                                  * Decrement the 'len' and increment the
 646                                  * 'off' appropriately.
 647                                  */
 648                                 cnt = iov_len - auio.uio_resid;
 649                                 if (cnt == 0) {
 650                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 651                                             NULL);
 652                                         releasef(sfv->sfv_fd);
 653                                         freemsg(head);
 654                                         return (EINVAL);
 655                                 }
 656                                 sfv_len -= cnt;
 657                                 sfv_off += cnt;
 658                                 total_size -= cnt;
 659                                 buf_left -= cnt;
 660 
 661                                 dmp->b_wptr += cnt;
 662                         }
 663                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 664                         releasef(sfv->sfv_fd);
 665                 }
 666                 sfv++;
 667         }
 668 
 669         ASSERT(total_size == 0);
 670         error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &head);
 671         if (error != 0) {
 672                 if (head != NULL)
 673                         freemsg(head);
 674                 return (error);
 675         }
 676         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)size;
 677         *count += size;
 678 
 679         return (0);
 680 }
 681 
 682 
 683 int
 684 sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
 685     int copy_cnt, ssize_t *count)
 686 {
 687         struct vnode *vp;
 688         struct uio auio;
 689         struct iovec aiov;
 690         ushort_t fflag;
 691         int ioflag;
 692         int i, error;
 693         size_t cnt;
 694         ssize_t sfv_len;
 695         u_offset_t sfv_off;
 696 #ifdef _SYSCALL32_IMPL
 697         model_t model = get_udatamodel();
 698         u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
 699             MAXOFF32_T : MAXOFFSET_T;
 700 #else
 701         const u_offset_t maxoff = MAXOFF32_T;
 702 #endif
 703         mblk_t  *dmp = NULL;
 704         char    *buf = NULL;
 705         size_t  extra;
 706         int maxblk, wroff, tail_len;
 707         struct sonode *so;
 708         stdata_t *stp;
 709         struct nmsghdr msg;
 710 
 711         fflag = fp->f_flag;
 712         vp = fp->f_vnode;
 713 
 714         if (vp->v_type == VSOCK) {
 715                 so = VTOSO(vp);
 716                 if (vp->v_stream != NULL) {
 717                         stp = vp->v_stream;
 718                         wroff = (int)stp->sd_wroff;
 719                         tail_len = (int)stp->sd_tail;
 720                         maxblk = (int)stp->sd_maxblk;
 721                 } else {
 722                         stp = NULL;
 723                         wroff = so->so_proto_props.sopp_wroff;
 724                         tail_len = so->so_proto_props.sopp_tail;
 725                         maxblk = so->so_proto_props.sopp_maxblk;
 726                 }
 727                 extra = wroff + tail_len;
 728         }
 729 
 730         bzero(&msg, sizeof (msg));
 731         auio.uio_extflg = UIO_COPY_DEFAULT;
 732         for (i = 0; i < copy_cnt; i++) {
 733                 if (ISSIG(curthread, JUSTLOOKING))
 734                         return (EINTR);
 735 
 736                 /*
 737                  * Do similar checks as "write" as we are writing
 738                  * sfv_len bytes into "vp".
 739                  */
 740                 sfv_len = (ssize_t)sfv->sfv_len;
 741 
 742                 if (sfv_len == 0) {
 743                         sfv++;
 744                         continue;
 745                 }
 746 
 747                 if (vp->v_type == VREG) {
 748                         if (*fileoff >= curproc->p_fsz_ctl) {
 749                                 mutex_enter(&curproc->p_lock);
 750                                 (void) rctl_action(
 751                                     rctlproc_legacy[RLIMIT_FSIZE],
 752                                     curproc->p_rctls, curproc, RCA_SAFE);
 753                                 mutex_exit(&curproc->p_lock);
 754 
 755                                 return (EFBIG);
 756                         }
 757 
 758                         if (*fileoff >= maxoff)
 759                                 return (EFBIG);
 760 
 761                         if (*fileoff + sfv_len > maxoff)
 762                                 return (EINVAL);
 763                 }
 764 
 765                 /* Check for overflow */
 766 #ifdef _SYSCALL32_IMPL
 767                 if (model == DATAMODEL_ILP32) {
 768                         if (((ssize32_t)(*count + sfv_len)) < 0)
 769                                 return (EINVAL);
 770                 } else
 771 #endif
 772                 if ((*count + sfv_len) < 0)
 773                         return (EINVAL);
 774 
 775                 sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
 776 
 777                 if (sfv->sfv_fd == SFV_FD_SELF) {
 778                         if (vp->v_type == VSOCK) {
 779                                 while (sfv_len > 0) {
 780                                         size_t iov_len;
 781 
 782                                         iov_len = sfv_len;
 783                                         /*
 784                                          * Socket filters can limit the mblk
 785                                          * size, so limit reads to maxblk if
 786                                          * there are filters present.
 787                                          */
 788                                         if (so->so_filter_active > 0 &&
 789                                             maxblk != INFPSZ)
 790                                                 iov_len = MIN(iov_len, maxblk);
 791 
 792                                         aiov.iov_len = iov_len;
 793                                         aiov.iov_base =
 794                                             (caddr_t)(uintptr_t)sfv_off;
 795 
 796                                         auio.uio_iov = &aiov;
 797                                         auio.uio_iovcnt = 1;
 798                                         auio.uio_loffset = *fileoff;
 799                                         auio.uio_segflg = UIO_USERSPACE;
 800                                         auio.uio_fmode = fflag;
 801                                         auio.uio_llimit = curproc->p_fsz_ctl;
 802                                         auio.uio_resid = iov_len;
 803 
 804                                         dmp = allocb(iov_len + extra, BPRI_HI);
 805                                         if (dmp == NULL)
 806                                                 return (ENOMEM);
 807                                         dmp->b_wptr = dmp->b_rptr =
 808                                             dmp->b_rptr + wroff;
 809                                         error = uiomove((caddr_t)dmp->b_wptr,
 810                                             iov_len, UIO_WRITE, &auio);
 811                                         if (error != 0) {
 812                                                 freeb(dmp);
 813                                                 return (error);
 814                                         }
 815                                         dmp->b_wptr += iov_len;
 816                                         error = socket_sendmblk(VTOSO(vp),
 817                                             &msg, fflag, CRED(), &dmp);
 818 
 819                                         if (error != 0) {
 820                                                 if (dmp != NULL)
 821                                                         freeb(dmp);
 822                                                 return (error);
 823                                         }
 824                                         ttolwp(curthread)->lwp_ru.ioch +=
 825                                             (ulong_t)iov_len;
 826                                         *count += iov_len;
 827                                         sfv_len -= iov_len;
 828                                         sfv_off += iov_len;
 829                                 }
 830                         } else {
 831                                 aiov.iov_len = sfv_len;
 832                                 aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
 833 
 834                                 auio.uio_iov = &aiov;
 835                                 auio.uio_iovcnt = 1;
 836                                 auio.uio_loffset = *fileoff;
 837                                 auio.uio_segflg = UIO_USERSPACE;
 838                                 auio.uio_fmode = fflag;
 839                                 auio.uio_llimit = curproc->p_fsz_ctl;
 840                                 auio.uio_resid = sfv_len;
 841 
 842                                 ioflag = auio.uio_fmode &
 843                                     (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 844                                 while (sfv_len > 0) {
 845                                         error = VOP_WRITE(vp, &auio, ioflag,
 846                                             fp->f_cred, NULL);
 847                                         cnt = sfv_len - auio.uio_resid;
 848                                         sfv_len -= cnt;
 849                                         ttolwp(curthread)->lwp_ru.ioch +=
 850                                             (ulong_t)cnt;
 851                                         *fileoff += cnt;
 852                                         *count += cnt;
 853                                         if (error != 0)
 854                                                 return (error);
 855                                 }
 856                         }
 857                 } else {
 858                         int segmapit = 0;
 859                         file_t  *ffp;
 860                         vnode_t *readvp;
 861                         struct vnode *realvp;
 862                         size_t  size;
 863                         caddr_t ptr;
 864 
 865                         if ((ffp = getf(sfv->sfv_fd)) == NULL)
 866                                 return (EBADF);
 867 
 868                         if ((ffp->f_flag & FREAD) == 0) {
 869                                 releasef(sfv->sfv_fd);
 870                                 return (EBADF);
 871                         }
 872 
 873                         readvp = ffp->f_vnode;
 874                         if (VOP_REALVP(readvp, &realvp, NULL) == 0)
 875                                 readvp = realvp;
 876                         if (readvp->v_type != VREG) {
 877                                 releasef(sfv->sfv_fd);
 878                                 return (EINVAL);
 879                         }
 880 
 881                         /*
 882                          * No point reading and writing to same vp,
 883                          * as long as both are regular files. readvp is not
 884                          * locked; but since we got it from an open file the
 885                          * contents will be valid during the time of access.
 886                          */
 887                         if (vn_compare(vp, readvp)) {
 888                                 releasef(sfv->sfv_fd);
 889                                 return (EINVAL);
 890                         }
 891 
 892                         /*
 893                          * Note: we assume readvp != vp. "vp" is already
 894                          * locked, and "readvp" must not be.
 895                          */
 896                         if (readvp < vp) {
 897                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 898                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 899                                     NULL);
 900                                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 901                         } else {
 902                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 903                                     NULL);
 904                         }
 905 
 906                         /* Same checks as in pread */
 907                         if (sfv_off > maxoff) {
 908                                 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 909                                 releasef(sfv->sfv_fd);
 910                                 return (EINVAL);
 911                         }
 912                         if (sfv_off + sfv_len > maxoff) {
 913                                 sfv_len = (ssize_t)((offset_t)maxoff -
 914                                     sfv_off);
 915                         }
 916                         /* Find the native blocksize to transfer data */
 917                         size = MIN(vp->v_vfsp->vfs_bsize,
 918                             readvp->v_vfsp->vfs_bsize);
 919                         size = sfv_len < size ? sfv_len : size;
 920 
 921                         if (vp->v_type != VSOCK) {
 922                                 segmapit = 0;
 923                                 buf = kmem_alloc(size, KM_NOSLEEP);
 924                                 if (buf == NULL) {
 925                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 926                                             NULL);
 927                                         releasef(sfv->sfv_fd);
 928                                         return (ENOMEM);
 929                                 }
 930                         } else {
 931                                 uint_t  copyflag;
 932 
 933                                 copyflag = stp != NULL ? stp->sd_copyflag :
 934                                     so->so_proto_props.sopp_zcopyflag;
 935 
 936                                 /*
 937                                  * Socket filters can limit the mblk size,
 938                                  * so limit reads to maxblk if there are
 939                                  * filters present.
 940                                  */
 941                                 if (so->so_filter_active > 0 &&
 942                                     maxblk != INFPSZ)
 943                                         size = MIN(size, maxblk);
 944 
 945                                 if (vn_has_flocks(readvp) ||
 946                                     readvp->v_flag & VNOMAP ||
 947                                     copyflag & STZCVMUNSAFE) {
 948                                         segmapit = 0;
 949                                 } else if (copyflag & STZCVMSAFE) {
 950                                         segmapit = 1;
 951                                 } else {
 952                                         int on = 1;
 953                                         if (socket_setsockopt(VTOSO(vp),
 954                                             SOL_SOCKET, SO_SND_COPYAVOID,
 955                                             &on, sizeof (on), CRED()) == 0)
 956                                         segmapit = 1;
 957                                 }
 958                         }
 959 
 960                         if (segmapit) {
 961                                 boolean_t nowait;
 962 
 963                                 nowait = (sfv->sfv_flag & SFV_NOWAIT) != 0;
 964                                 error = snf_segmap(fp, readvp, sfv_off,
 965                                     (u_offset_t)sfv_len, (ssize_t *)&cnt,
 966                                     nowait);
 967                                 releasef(sfv->sfv_fd);
 968                                 *count += cnt;
 969                                 if (error)
 970                                         return (error);
 971                                 sfv++;
 972                                 continue;
 973                         }
 974 
 975                         while (sfv_len > 0) {
 976                                 size_t  iov_len;
 977 
 978                                 iov_len = MIN(size, sfv_len);
 979 
 980                                 if (vp->v_type == VSOCK) {
 981                                         dmp = allocb(iov_len + extra, BPRI_HI);
 982                                         if (dmp == NULL) {
 983                                                 VOP_RWUNLOCK(readvp,
 984                                                     V_WRITELOCK_FALSE, NULL);
 985                                                 releasef(sfv->sfv_fd);
 986                                                 return (ENOMEM);
 987                                         }
 988                                         dmp->b_wptr = dmp->b_rptr =
 989                                             dmp->b_rptr + wroff;
 990                                         ptr = (caddr_t)dmp->b_rptr;
 991                                 } else {
 992                                         ptr = buf;
 993                                 }
 994 
 995                                 aiov.iov_base = ptr;
 996                                 aiov.iov_len = iov_len;
 997                                 auio.uio_loffset = sfv_off;
 998                                 auio.uio_iov = &aiov;
 999                                 auio.uio_iovcnt = 1;
1000                                 auio.uio_resid = iov_len;
1001                                 auio.uio_segflg = UIO_SYSSPACE;
1002                                 auio.uio_llimit = MAXOFFSET_T;
1003                                 auio.uio_fmode = ffp->f_flag;
1004                                 ioflag = auio.uio_fmode &
1005                                     (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1006 
1007                                 /*
1008                                  * If read sync is not asked for,
1009                                  * filter sync flags
1010                                  */
1011                                 if ((ioflag & FRSYNC) == 0)
1012                                         ioflag &= ~(FSYNC|FDSYNC);
1013                                 error = VOP_READ(readvp, &auio, ioflag,
1014                                     fp->f_cred, NULL);
1015                                 if (error != 0) {
1016                                         /*
1017                                          * If we were reading a pipe (currently
1018                                          * not implemented), we may now lose
1019                                          * data.
1020                                          */
1021                                         if (vp->v_type == VSOCK)
1022                                                 freeb(dmp);
1023                                         else
1024                                                 kmem_free(buf, size);
1025                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
1026                                             NULL);
1027                                         releasef(sfv->sfv_fd);
1028                                         return (error);
1029                                 }
1030 
1031                                 /*
1032                                  * Check how much data was really read.
1033                                  * Decrement the 'len' and increment the
1034                                  * 'off' appropriately.
1035                                  */
1036                                 cnt = iov_len - auio.uio_resid;
1037                                 if (cnt == 0) {
1038                                         if (vp->v_type == VSOCK)
1039                                                 freeb(dmp);
1040                                         else
1041                                                 kmem_free(buf, size);
1042                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
1043                                             NULL);
1044                                         releasef(sfv->sfv_fd);
1045                                         return (EINVAL);
1046                                 }
1047                                 sfv_len -= cnt;
1048                                 sfv_off += cnt;
1049 
1050                                 if (vp->v_type == VSOCK) {
1051                                         dmp->b_wptr = dmp->b_rptr + cnt;
1052 
1053                                         error = socket_sendmblk(VTOSO(vp),
1054                                             &msg, fflag, CRED(), &dmp);
1055 
1056                                         if (error != 0) {
1057                                                 if (dmp != NULL)
1058                                                         freeb(dmp);
1059                                                 VOP_RWUNLOCK(readvp,
1060                                                     V_WRITELOCK_FALSE, NULL);
1061                                                 releasef(sfv->sfv_fd);
1062                                                 return (error);
1063                                         }
1064 
1065                                         ttolwp(curthread)->lwp_ru.ioch +=
1066                                             (ulong_t)cnt;
1067                                         *count += cnt;
1068                                 } else {
1069 
1070                                         aiov.iov_base = ptr;
1071                                         aiov.iov_len = cnt;
1072                                         auio.uio_loffset = *fileoff;
1073                                         auio.uio_resid = cnt;
1074                                         auio.uio_iov = &aiov;
1075                                         auio.uio_iovcnt = 1;
1076                                         auio.uio_segflg = UIO_SYSSPACE;
1077                                         auio.uio_llimit = curproc->p_fsz_ctl;
1078                                         auio.uio_fmode = fflag;
1079                                         ioflag = auio.uio_fmode &
1080                                             (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1081                                         error = VOP_WRITE(vp, &auio, ioflag,
1082                                             fp->f_cred, NULL);
1083 
1084                                         /*
1085                                          * Check how much data was written.
1086                                          * Increment the 'len' and decrement the
1087                                          * 'off' if all the data was not
1088                                          * written.
1089                                          */
1090                                         cnt -= auio.uio_resid;
1091                                         sfv_len += auio.uio_resid;
1092                                         sfv_off -= auio.uio_resid;
1093                                         ttolwp(curthread)->lwp_ru.ioch +=
1094                                             (ulong_t)cnt;
1095                                         *fileoff += cnt;
1096                                         *count += cnt;
1097                                         if (error != 0) {
1098                                                 kmem_free(buf, size);
1099                                                 VOP_RWUNLOCK(readvp,
1100                                                     V_WRITELOCK_FALSE, NULL);
1101                                                 releasef(sfv->sfv_fd);
1102                                                 return (error);
1103                                         }
1104                                 }
1105                         }
1106                         if (buf) {
1107                                 kmem_free(buf, size);
1108                                 buf = NULL;
1109                         }
1110                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
1111                         releasef(sfv->sfv_fd);
1112                 }
1113                 sfv++;
1114         }
1115         return (0);
1116 }
1117 
1118 ssize_t
1119 sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
1120     size_t *xferred)
1121 {
1122         int error = 0;
1123         int first_vector_error = 0;
1124         file_t *fp;
1125         struct vnode *vp;
1126         struct sonode *so;
1127         u_offset_t fileoff;
1128         int copy_cnt;
1129         const struct sendfilevec *copy_vec;
1130         struct sendfilevec sfv[SEND_MAX_CHUNK];
1131         ssize_t count = 0;
1132 #ifdef _SYSCALL32_IMPL
1133         struct ksendfilevec32 sfv32[SEND_MAX_CHUNK];
1134 #endif
1135         ssize_t total_size;
1136         int i;
1137         boolean_t is_sock = B_FALSE;
1138         int maxblk = 0;
1139 
1140         if (sfvcnt <= 0)
1141                 return (set_errno(EINVAL));
1142 
1143         if ((fp = getf(fildes)) == NULL)
1144                 return (set_errno(EBADF));
1145 
1146         if (((fp->f_flag) & FWRITE) == 0) {
1147                 error = EBADF;
1148                 goto err;
1149         }
1150 
1151         fileoff = fp->f_offset;
1152         vp = fp->f_vnode;
1153 
1154         switch (vp->v_type) {
1155         case VSOCK:
1156                 so = VTOSO(vp);
1157                 is_sock = B_TRUE;
1158                 if (SOCK_IS_NONSTR(so)) {
1159                         maxblk = so->so_proto_props.sopp_maxblk;
1160                 } else {
1161                         maxblk = (int)vp->v_stream->sd_maxblk;
1162                 }
1163                 break;
1164         case VREG:
1165                 break;
1166         default:
1167                 error = EINVAL;
1168                 goto err;
1169         }
1170 
1171         switch (opcode) {
1172         case SENDFILEV :
1173                 break;
1174 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1175         case SENDFILEV64 :
1176                 return (sendvec64(fp, (struct ksendfilevec64 *)vec, sfvcnt,
1177                     (size32_t *)xferred, fildes));
1178 #endif
1179         default :
1180                 error = ENOSYS;
1181                 break;
1182         }
1183 
1184         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1185         copy_vec = vec;
1186 
1187         do {
1188                 total_size = 0;
1189                 copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
1190 #ifdef _SYSCALL32_IMPL
1191                 /* 32-bit callers need to have their iovec expanded. */
1192                 if (get_udatamodel() == DATAMODEL_ILP32) {
1193                         if (copyin(copy_vec, sfv32,
1194                             copy_cnt * sizeof (ksendfilevec32_t))) {
1195                                 error = EFAULT;
1196                                 break;
1197                         }
1198 
1199                         for (i = 0; i < copy_cnt; i++) {
1200                                 sfv[i].sfv_fd = sfv32[i].sfv_fd;
1201                                 sfv[i].sfv_off =
1202                                     (off_t)(uint32_t)sfv32[i].sfv_off;
1203                                 sfv[i].sfv_len = (size_t)sfv32[i].sfv_len;
1204                                 total_size += sfv[i].sfv_len;
1205                                 sfv[i].sfv_flag = sfv32[i].sfv_flag;
1206                                 /*
1207                                  * Individual elements of the vector must not
1208                                  * wrap or overflow, as later math is signed.
1209                                  * Equally total_size needs to be checked after
1210                                  * each vector is added in, to be sure that
1211                                  * rogue values haven't overflowed the counter.
1212                                  */
1213                                 if (((ssize32_t)sfv[i].sfv_len < 0) ||
1214                                     ((ssize32_t)total_size < 0)) {
1215                                         /*
1216                                          * Truncate the vector to send data
1217                                          * described by elements before the
1218                                          * error.
1219                                          */
1220                                         copy_cnt = i;
1221                                         first_vector_error = EINVAL;
1222                                         /* total_size can't be trusted */
1223                                         if ((ssize32_t)total_size < 0)
1224                                                 error = EINVAL;
1225                                         break;
1226                                 }
1227                         }
1228                         /* Nothing to do, process errors */
1229                         if (copy_cnt == 0)
1230                                 break;
1231 
1232                 } else {
1233 #endif
1234                         if (copyin(copy_vec, sfv,
1235                             copy_cnt * sizeof (sendfilevec_t))) {
1236                                 error = EFAULT;
1237                                 break;
1238                         }
1239 
1240                         for (i = 0; i < copy_cnt; i++) {
1241                                 total_size += sfv[i].sfv_len;
1242                                 /*
1243                                  * Individual elements of the vector must not
1244                                  * wrap or overflow, as later math is signed.
1245                                  * Equally total_size needs to be checked after
1246                                  * each vector is added in, to be sure that
1247                                  * rogue values haven't overflowed the counter.
1248                                  */
1249                                 if (((ssize_t)sfv[i].sfv_len < 0) ||
1250                                     (total_size < 0)) {
1251                                         /*
1252                                          * Truncate the vector to send data
1253                                          * described by elements before the
1254                                          * error.
1255                                          */
1256                                         copy_cnt = i;
1257                                         first_vector_error = EINVAL;
1258                                         /* total_size can't be trusted */
1259                                         if (total_size < 0)
1260                                                 error = EINVAL;
1261                                         break;
1262                                 }
1263                         }
1264                         /* Nothing to do, process errors */
1265                         if (copy_cnt == 0)
1266                                 break;
1267 #ifdef _SYSCALL32_IMPL
1268                 }
1269 #endif
1270 
1271                 /*
1272                  * The task between deciding to use sendvec_small_chunk
1273                  * and sendvec_chunk is dependant on multiple things:
1274                  *
1275                  * i) latency is important for smaller files. So if the
1276                  * data is smaller than 'tcp_slow_start_initial' times
1277                  * maxblk, then use sendvec_small_chunk which creates
1278                  * maxblk size mblks and chains them together and sends
1279                  * them to TCP in one shot. It also leaves 'wroff' size
1280                  * space for the headers in each mblk.
1281                  *
1282                  * ii) for total size bigger than 'tcp_slow_start_initial'
1283                  * time maxblk, its probably real file data which is
1284                  * dominating. So its better to use sendvec_chunk because
1285                  * performance goes to dog if we don't do pagesize reads.
1286                  * sendvec_chunk will do pagesize reads and write them
1287                  * in pagesize mblks to TCP.
1288                  *
1289                  * Side Notes: A write to file has not been optimized.
1290                  * Future zero copy code will plugin into sendvec_chunk
1291                  * only because doing zero copy for files smaller then
1292                  * pagesize is useless.
1293                  *
1294                  * Note, if socket has NL7C enabled then call NL7C's
1295                  * senfilev() function to consume the sfv[].
1296                  */
1297                 if (is_sock) {
1298                         if (!SOCK_IS_NONSTR(so) &&
1299                             _SOTOTPI(so)->sti_nl7c_flags != 0) {
1300                                 error = nl7c_sendfilev(so, &fileoff,
1301                                     sfv, copy_cnt, &count);
1302                         } else if ((total_size <= (4 * maxblk)) &&
1303                             error == 0) {
1304                                 error = sendvec_small_chunk(fp,
1305                                     &fileoff, sfv, copy_cnt,
1306                                     total_size, maxblk, &count);
1307                         } else {
1308                                 error = sendvec_chunk(fp, &fileoff,
1309                                     sfv, copy_cnt, &count);
1310                         }
1311                 } else {
1312                         ASSERT(vp->v_type == VREG);
1313                         error = sendvec_chunk(fp, &fileoff, sfv, copy_cnt,
1314                             &count);
1315                 }
1316 
1317 
1318 #ifdef _SYSCALL32_IMPL
1319         if (get_udatamodel() == DATAMODEL_ILP32)
1320                 copy_vec = (const struct sendfilevec *)((char *)copy_vec +
1321                     (copy_cnt * sizeof (ksendfilevec32_t)));
1322         else
1323 #endif
1324                 copy_vec += copy_cnt;
1325                 sfvcnt -= copy_cnt;
1326 
1327         /* Process all vector members up to first error */
1328         } while ((sfvcnt > 0) && first_vector_error == 0 && error == 0);
1329 
1330         if (vp->v_type == VREG)
1331                 fp->f_offset += count;
1332 
1333         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1334 
1335 #ifdef _SYSCALL32_IMPL
1336         if (get_udatamodel() == DATAMODEL_ILP32) {
1337                 ssize32_t count32 = (ssize32_t)count;
1338                 if (copyout(&count32, xferred, sizeof (count32)))
1339                         error = EFAULT;
1340                 releasef(fildes);
1341                 if (error != 0)
1342                         return (set_errno(error));
1343                 if (first_vector_error != 0)
1344                         return (set_errno(first_vector_error));
1345                 return (count32);
1346         }
1347 #endif
1348         if (copyout(&count, xferred, sizeof (count)))
1349                 error = EFAULT;
1350         releasef(fildes);
1351         if (error != 0)
1352                 return (set_errno(error));
1353         if (first_vector_error != 0)
1354                 return (set_errno(first_vector_error));
1355         return (count);
1356 err:
1357         ASSERT(error != 0);
1358         releasef(fildes);
1359         return (set_errno(error));
1360 }