1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/t_lock.h>
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/buf.h>
  31 #include <sys/conf.h>
  32 #include <sys/cred.h>
  33 #include <sys/kmem.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/vfs.h>
  36 #include <sys/vnode.h>
  37 #include <sys/debug.h>
  38 #include <sys/errno.h>
  39 #include <sys/time.h>
  40 #include <sys/file.h>
  41 #include <sys/open.h>
  42 #include <sys/user.h>
  43 #include <sys/termios.h>
  44 #include <sys/stream.h>
  45 #include <sys/strsubr.h>
  46 #include <sys/sunddi.h>
  47 #include <sys/esunddi.h>
  48 #include <sys/flock.h>
  49 #include <sys/modctl.h>
  50 #include <sys/cmn_err.h>
  51 #include <sys/vmsystm.h>
  52 
  53 #include <sys/socket.h>
  54 #include <sys/socketvar.h>
  55 #include <fs/sockfs/sockcommon.h>
  56 #include <fs/sockfs/socktpi.h>
  57 
  58 #include <netinet/in.h>
  59 #include <sys/sendfile.h>
  60 #include <sys/un.h>
  61 #include <sys/tihdr.h>
  62 #include <sys/atomic.h>
  63 
  64 #include <inet/common.h>
  65 #include <inet/ip.h>
  66 #include <inet/ip6.h>
  67 #include <inet/tcp.h>
  68 
  69 extern int sosendfile64(file_t *, file_t *, const struct ksendfilevec64 *,
  70                 ssize32_t *);
  71 extern int nl7c_sendfilev(struct sonode *, u_offset_t *, struct sendfilevec *,
  72                 int, ssize_t *);
  73 extern int snf_segmap(file_t *, vnode_t *, u_offset_t, u_offset_t, ssize_t *,
  74                 boolean_t);
  75 extern sotpi_info_t *sotpi_sototpi(struct sonode *);
  76 
  77 #define SEND_MAX_CHUNK  16
  78 
  79 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
  80 /*
  81  * 64 bit offsets for 32 bit applications only running either on
  82  * 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
  83  * more than 2GB of data.
  84  */
  85 static int
  86 sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
  87     int copy_cnt, ssize32_t *count)
  88 {
  89         struct vnode *vp;
  90         ushort_t fflag;
  91         int ioflag;
  92         size32_t cnt;
  93         ssize32_t sfv_len;
  94         ssize32_t tmpcount;
  95         u_offset_t sfv_off;
  96         struct uio auio;
  97         struct iovec aiov;
  98         int i, error;
  99 
 100         fflag = fp->f_flag;
 101         vp = fp->f_vnode;
 102         for (i = 0; i < copy_cnt; i++) {
 103 
 104                 if (ISSIG(curthread, JUSTLOOKING))
 105                         return (EINTR);
 106 
 107                 /*
 108                  * Do similar checks as "write" as we are writing
 109                  * sfv_len bytes into "vp".
 110                  */
 111                 sfv_len = (ssize32_t)sfv->sfv_len;
 112 
 113                 if (sfv_len == 0) {
 114                         sfv++;
 115                         continue;
 116                 }
 117 
 118                 if (sfv_len < 0)
 119                         return (EINVAL);
 120 
 121                 if (vp->v_type == VREG) {
 122                         if (*fileoff >= curproc->p_fsz_ctl) {
 123                                 mutex_enter(&curproc->p_lock);
 124                                 (void) rctl_action(
 125                                     rctlproc_legacy[RLIMIT_FSIZE],
 126                                     curproc->p_rctls, curproc, RCA_SAFE);
 127                                 mutex_exit(&curproc->p_lock);
 128                                 return (EFBIG);
 129                         }
 130 
 131                         if (*fileoff >= OFFSET_MAX(fp))
 132                                 return (EFBIG);
 133 
 134                         if (*fileoff + sfv_len > OFFSET_MAX(fp))
 135                                 return (EINVAL);
 136                 }
 137 
 138                 tmpcount = *count + sfv_len;
 139                 if (tmpcount < 0)
 140                         return (EINVAL);
 141 
 142                 sfv_off = sfv->sfv_off;
 143 
 144                 auio.uio_extflg = UIO_COPY_DEFAULT;
 145                 if (sfv->sfv_fd == SFV_FD_SELF) {
 146                         aiov.iov_len = sfv_len;
 147                         aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
 148                         auio.uio_loffset = *fileoff;
 149                         auio.uio_iovcnt = 1;
 150                         auio.uio_resid = sfv_len;
 151                         auio.uio_iov = &aiov;
 152                         auio.uio_segflg = UIO_USERSPACE;
 153                         auio.uio_llimit = curproc->p_fsz_ctl;
 154                         auio.uio_fmode = fflag;
 155                         ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 156                         while (sfv_len > 0) {
 157                                 error = VOP_WRITE(vp, &auio, ioflag,
 158                                     fp->f_cred, NULL);
 159                                 cnt = sfv_len - auio.uio_resid;
 160                                 sfv_len -= cnt;
 161                                 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
 162                                 if (vp->v_type == VREG)
 163                                         *fileoff += cnt;
 164                                 *count += cnt;
 165                                 if (error != 0)
 166                                         return (error);
 167                         }
 168                 } else {
 169                         file_t  *ffp;
 170                         vnode_t *readvp;
 171                         size_t  size;
 172                         caddr_t ptr;
 173 
 174                         if ((ffp = getf(sfv->sfv_fd)) == NULL)
 175                                 return (EBADF);
 176 
 177                         if ((ffp->f_flag & FREAD) == 0) {
 178                                 releasef(sfv->sfv_fd);
 179                                 return (EBADF);
 180                         }
 181 
 182                         readvp = ffp->f_vnode;
 183                         if (readvp->v_type != VREG) {
 184                                 releasef(sfv->sfv_fd);
 185                                 return (EINVAL);
 186                         }
 187 
 188                         /*
 189                          * No point reading and writing to same vp,
 190                          * as long as both are regular files. readvp is not
 191                          * locked; but since we got it from an open file the
 192                          * contents will be valid during the time of access.
 193                          */
 194                         if (vn_compare(vp, readvp)) {
 195                                 releasef(sfv->sfv_fd);
 196                                 return (EINVAL);
 197                         }
 198 
 199                         /*
 200                          * Optimize the regular file over
 201                          * the socket case.
 202                          */
 203                         if (vp->v_type == VSOCK) {
 204                                 error = sosendfile64(fp, ffp, sfv,
 205                                     (ssize32_t *)&cnt);
 206                                 *count += cnt;
 207                                 if (error)
 208                                         return (error);
 209                                 sfv++;
 210                                 continue;
 211                         }
 212 
 213                         /*
 214                          * Note: we assume readvp != vp. "vp" is already
 215                          * locked, and "readvp" must not be.
 216                          */
 217                         if (readvp < vp) {
 218                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 219                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 220                                     NULL);
 221                                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 222                         } else {
 223                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 224                                     NULL);
 225                         }
 226 
 227                         /*
 228                          * Same checks as in pread64.
 229                          */
 230                         if (sfv_off > MAXOFFSET_T) {
 231                                 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 232                                 releasef(sfv->sfv_fd);
 233                                 return (EINVAL);
 234                         }
 235 
 236                         if (sfv_off + sfv_len > MAXOFFSET_T)
 237                                 sfv_len = (ssize32_t)(MAXOFFSET_T - sfv_off);
 238 
 239                         /* Find the native blocksize to transfer data */
 240                         size = MIN(vp->v_vfsp->vfs_bsize,
 241                             readvp->v_vfsp->vfs_bsize);
 242                         size = sfv_len < size ? sfv_len : size;
 243                         ptr = kmem_alloc(size, KM_NOSLEEP);
 244                         if (ptr == NULL) {
 245                                 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 246                                 releasef(sfv->sfv_fd);
 247                                 return (ENOMEM);
 248                         }
 249 
 250                         while (sfv_len > 0) {
 251                                 size_t  iov_len;
 252 
 253                                 iov_len = MIN(size, sfv_len);
 254                                 aiov.iov_base = ptr;
 255                                 aiov.iov_len = iov_len;
 256                                 auio.uio_loffset = sfv_off;
 257                                 auio.uio_iov = &aiov;
 258                                 auio.uio_iovcnt = 1;
 259                                 auio.uio_resid = iov_len;
 260                                 auio.uio_segflg = UIO_SYSSPACE;
 261                                 auio.uio_llimit = MAXOFFSET_T;
 262                                 auio.uio_fmode = ffp->f_flag;
 263                                 ioflag = auio.uio_fmode &
 264                                     (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 265 
 266                                 /*
 267                                  * If read sync is not asked for,
 268                                  * filter sync flags
 269                                  */
 270                                 if ((ioflag & FRSYNC) == 0)
 271                                         ioflag &= ~(FSYNC|FDSYNC);
 272                                 error = VOP_READ(readvp, &auio, ioflag,
 273                                     fp->f_cred, NULL);
 274                                 if (error) {
 275                                         kmem_free(ptr, size);
 276                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 277                                             NULL);
 278                                         releasef(sfv->sfv_fd);
 279                                         return (error);
 280                                 }
 281 
 282                                 /*
 283                                  * Check how must data was really read.
 284                                  * Decrement the 'len' and increment the
 285                                  * 'off' appropriately.
 286                                  */
 287                                 cnt = iov_len - auio.uio_resid;
 288                                 if (cnt == 0) {
 289                                         /*
 290                                          * If we were reading a pipe (currently
 291                                          * not implemented), we may now lose
 292                                          * data.
 293                                          */
 294                                         kmem_free(ptr, size);
 295                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 296                                             NULL);
 297                                         releasef(sfv->sfv_fd);
 298                                         return (EINVAL);
 299                                 }
 300                                 sfv_len -= cnt;
 301                                 sfv_off += cnt;
 302 
 303                                 aiov.iov_base = ptr;
 304                                 aiov.iov_len = cnt;
 305                                 auio.uio_loffset = *fileoff;
 306                                 auio.uio_iov = &aiov;
 307                                 auio.uio_iovcnt = 1;
 308                                 auio.uio_resid = cnt;
 309                                 auio.uio_segflg = UIO_SYSSPACE;
 310                                 auio.uio_llimit = curproc->p_fsz_ctl;
 311                                 auio.uio_fmode = fflag;
 312                                 ioflag = auio.uio_fmode &
 313                                     (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 314                                 error = VOP_WRITE(vp, &auio, ioflag,
 315                                     fp->f_cred, NULL);
 316 
 317                                 /*
 318                                  * Check how much data was written. Increment
 319                                  * the 'len' and decrement the 'off' if all
 320                                  * the data was not written.
 321                                  */
 322                                 cnt -= auio.uio_resid;
 323                                 sfv_len += auio.uio_resid;
 324                                 sfv_off -= auio.uio_resid;
 325                                 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
 326                                 if (vp->v_type == VREG)
 327                                         *fileoff += cnt;
 328                                 *count += cnt;
 329                                 if (error != 0) {
 330                                         kmem_free(ptr, size);
 331                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 332                                             NULL);
 333                                         releasef(sfv->sfv_fd);
 334                                         return (error);
 335                                 }
 336                         }
 337                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 338                         releasef(sfv->sfv_fd);
 339                         kmem_free(ptr, size);
 340                 }
 341                 sfv++;
 342         }
 343         return (0);
 344 }
 345 
 346 static ssize32_t
 347 sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
 348         size32_t *xferred, int fildes)
 349 {
 350         u_offset_t              fileoff;
 351         int                     copy_cnt;
 352         const struct ksendfilevec64 *copy_vec;
 353         struct ksendfilevec64 sfv[SEND_MAX_CHUNK];
 354         struct vnode *vp;
 355         int error;
 356         ssize32_t count = 0;
 357 
 358         vp = fp->f_vnode;
 359         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 360 
 361         copy_vec = vec;
 362         fileoff = fp->f_offset;
 363 
 364         do {
 365                 copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
 366                 if (copyin(copy_vec, sfv, copy_cnt *
 367                     sizeof (struct ksendfilevec64))) {
 368                         error = EFAULT;
 369                         break;
 370                 }
 371 
 372                 error = sendvec_chunk64(fp, &fileoff, sfv, copy_cnt, &count);
 373                 if (error != 0)
 374                         break;
 375 
 376                 copy_vec += copy_cnt;
 377                 sfvcnt -= copy_cnt;
 378         } while (sfvcnt > 0);
 379 
 380         if (vp->v_type == VREG)
 381                 fp->f_offset += count;
 382 
 383         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 384         if (copyout(&count, xferred, sizeof (count)))
 385                 error = EFAULT;
 386         releasef(fildes);
 387         if (error != 0)
 388                 return (set_errno(error));
 389         return (count);
 390 }
 391 #endif
 392 
 393 static int
 394 sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
 395     int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count)
 396 {
 397         struct vnode *vp;
 398         struct uio auio;
 399         struct iovec aiov;
 400         ushort_t fflag;
 401         int ioflag;
 402         int i, error;
 403         size_t cnt;
 404         ssize_t sfv_len;
 405         u_offset_t sfv_off;
 406 #ifdef _SYSCALL32_IMPL
 407         model_t model = get_udatamodel();
 408         u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
 409             MAXOFF32_T : MAXOFFSET_T;
 410 #else
 411         const u_offset_t maxoff = MAXOFF32_T;
 412 #endif
 413         mblk_t *dmp = NULL;
 414         int wroff;
 415         int buf_left = 0;
 416         size_t  iov_len;
 417         mblk_t  *head, *tmp;
 418         size_t  size = total_size;
 419         size_t  extra;
 420         int tail_len;
 421         struct nmsghdr msg;
 422 
 423         fflag = fp->f_flag;
 424         vp = fp->f_vnode;
 425 
 426         ASSERT(vp->v_type == VSOCK);
 427         ASSERT(maxblk > 0);
 428 
 429         /* If nothing to send, return */
 430         if (total_size == 0)
 431                 return (0);
 432 
 433         if (vp->v_stream != NULL) {
 434                 wroff = (int)vp->v_stream->sd_wroff;
 435                 tail_len = (int)vp->v_stream->sd_tail;
 436         } else {
 437                 struct sonode *so;
 438 
 439                 so = VTOSO(vp);
 440                 wroff = so->so_proto_props.sopp_wroff;
 441                 tail_len = so->so_proto_props.sopp_tail;
 442         }
 443 
 444         extra = wroff + tail_len;
 445 
 446         buf_left = MIN(total_size, maxblk);
 447         head = dmp = allocb(buf_left + extra, BPRI_HI);
 448         if (head == NULL)
 449                 return (ENOMEM);
 450         head->b_wptr = head->b_rptr = head->b_rptr + wroff;
 451         bzero(&msg, sizeof (msg));
 452 
 453         auio.uio_extflg = UIO_COPY_DEFAULT;
 454         for (i = 0; i < copy_cnt; i++) {
 455                 if (ISSIG(curthread, JUSTLOOKING)) {
 456                         freemsg(head);
 457                         return (EINTR);
 458                 }
 459 
 460                 /*
 461                  * Do similar checks as "write" as we are writing
 462                  * sfv_len bytes into "vp".
 463                  */
 464                 sfv_len = (ssize_t)sfv->sfv_len;
 465 
 466                 if (sfv_len == 0) {
 467                         sfv++;
 468                         continue;
 469                 }
 470 
 471                 /* Check for overflow */
 472 #ifdef _SYSCALL32_IMPL
 473                 if (model == DATAMODEL_ILP32) {
 474                         if (((ssize32_t)(*count + sfv_len)) < 0) {
 475                                 freemsg(head);
 476                                 return (EINVAL);
 477                         }
 478                 } else
 479 #endif
 480                 if ((*count + sfv_len) < 0) {
 481                         freemsg(head);
 482                         return (EINVAL);
 483                 }
 484 
 485                 sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
 486 
 487                 if (sfv->sfv_fd == SFV_FD_SELF) {
 488                         while (sfv_len > 0) {
 489                                 if (buf_left == 0) {
 490                                         tmp = dmp;
 491                                         buf_left = MIN(total_size, maxblk);
 492                                         iov_len = MIN(buf_left, sfv_len);
 493                                         dmp = allocb(buf_left + extra, BPRI_HI);
 494                                         if (dmp == NULL) {
 495                                                 freemsg(head);
 496                                                 return (ENOMEM);
 497                                         }
 498                                         dmp->b_wptr = dmp->b_rptr =
 499                                             dmp->b_rptr + wroff;
 500                                         tmp->b_cont = dmp;
 501                                 } else {
 502                                         iov_len = MIN(buf_left, sfv_len);
 503                                 }
 504 
 505                                 aiov.iov_len = iov_len;
 506                                 aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
 507                                 auio.uio_loffset = *fileoff;
 508                                 auio.uio_iovcnt = 1;
 509                                 auio.uio_resid = iov_len;
 510                                 auio.uio_iov = &aiov;
 511                                 auio.uio_segflg = UIO_USERSPACE;
 512                                 auio.uio_llimit = curproc->p_fsz_ctl;
 513                                 auio.uio_fmode = fflag;
 514 
 515                                 buf_left -= iov_len;
 516                                 total_size -= iov_len;
 517                                 sfv_len -= iov_len;
 518                                 sfv_off += iov_len;
 519 
 520                                 error = uiomove((caddr_t)dmp->b_wptr,
 521                                     iov_len, UIO_WRITE, &auio);
 522                                 if (error != 0) {
 523                                         freemsg(head);
 524                                         return (error);
 525                                 }
 526                                 dmp->b_wptr += iov_len;
 527                         }
 528                 } else {
 529                         file_t  *ffp;
 530                         vnode_t *readvp;
 531 
 532                         if ((ffp = getf(sfv->sfv_fd)) == NULL) {
 533                                 freemsg(head);
 534                                 return (EBADF);
 535                         }
 536 
 537                         if ((ffp->f_flag & FREAD) == 0) {
 538                                 releasef(sfv->sfv_fd);
 539                                 freemsg(head);
 540                                 return (EACCES);
 541                         }
 542 
 543                         readvp = ffp->f_vnode;
 544                         if (readvp->v_type != VREG) {
 545                                 releasef(sfv->sfv_fd);
 546                                 freemsg(head);
 547                                 return (EINVAL);
 548                         }
 549 
 550                         /*
 551                          * No point reading and writing to same vp,
 552                          * as long as both are regular files. readvp is not
 553                          * locked; but since we got it from an open file the
 554                          * contents will be valid during the time of access.
 555                          */
 556 
 557                         if (vn_compare(vp, readvp)) {
 558                                 releasef(sfv->sfv_fd);
 559                                 freemsg(head);
 560                                 return (EINVAL);
 561                         }
 562 
 563                         /*
 564                          * Note: we assume readvp != vp. "vp" is already
 565                          * locked, and "readvp" must not be.
 566                          */
 567 
 568                         if (readvp < vp) {
 569                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 570                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 571                                     NULL);
 572                                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 573                         } else {
 574                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 575                                     NULL);
 576                         }
 577 
 578                         /* Same checks as in pread */
 579                         if (sfv_off > maxoff) {
 580                                 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 581                                 releasef(sfv->sfv_fd);
 582                                 freemsg(head);
 583                                 return (EINVAL);
 584                         }
 585                         if (sfv_off + sfv_len > maxoff) {
 586                                 total_size -= (sfv_off + sfv_len - maxoff);
 587                                 sfv_len = (ssize_t)((offset_t)maxoff -
 588                                     sfv_off);
 589                         }
 590 
 591                         while (sfv_len > 0) {
 592                                 if (buf_left == 0) {
 593                                         tmp = dmp;
 594                                         buf_left = MIN(total_size, maxblk);
 595                                         iov_len = MIN(buf_left, sfv_len);
 596                                         dmp = allocb(buf_left + extra, BPRI_HI);
 597                                         if (dmp == NULL) {
 598                                                 VOP_RWUNLOCK(readvp,
 599                                                     V_WRITELOCK_FALSE, NULL);
 600                                                 releasef(sfv->sfv_fd);
 601                                                 freemsg(head);
 602                                                 return (ENOMEM);
 603                                         }
 604                                         dmp->b_wptr = dmp->b_rptr =
 605                                             dmp->b_rptr + wroff;
 606                                         tmp->b_cont = dmp;
 607                                 } else {
 608                                         iov_len = MIN(buf_left, sfv_len);
 609                                 }
 610                                 aiov.iov_base = (caddr_t)dmp->b_wptr;
 611                                 aiov.iov_len = iov_len;
 612                                 auio.uio_loffset = sfv_off;
 613                                 auio.uio_iov = &aiov;
 614                                 auio.uio_iovcnt = 1;
 615                                 auio.uio_resid = iov_len;
 616                                 auio.uio_segflg = UIO_SYSSPACE;
 617                                 auio.uio_llimit = MAXOFFSET_T;
 618                                 auio.uio_fmode = ffp->f_flag;
 619                                 ioflag = auio.uio_fmode &
 620                                     (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 621 
 622                                 /*
 623                                  * If read sync is not asked for,
 624                                  * filter sync flags
 625                                  */
 626                                 if ((ioflag & FRSYNC) == 0)
 627                                         ioflag &= ~(FSYNC|FDSYNC);
 628                                 error = VOP_READ(readvp, &auio, ioflag,
 629                                     fp->f_cred, NULL);
 630                                 if (error != 0) {
 631                                         /*
 632                                          * If we were reading a pipe (currently
 633                                          * not implemented), we may now loose
 634                                          * data.
 635                                          */
 636                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 637                                             NULL);
 638                                         releasef(sfv->sfv_fd);
 639                                         freemsg(head);
 640                                         return (error);
 641                                 }
 642 
 643                                 /*
 644                                  * Check how much data was really read.
 645                                  * Decrement the 'len' and increment the
 646                                  * 'off' appropriately.
 647                                  */
 648                                 cnt = iov_len - auio.uio_resid;
 649                                 if (cnt == 0) {
 650                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 651                                             NULL);
 652                                         releasef(sfv->sfv_fd);
 653                                         freemsg(head);
 654                                         return (EINVAL);
 655                                 }
 656                                 sfv_len -= cnt;
 657                                 sfv_off += cnt;
 658                                 total_size -= cnt;
 659                                 buf_left -= cnt;
 660 
 661                                 dmp->b_wptr += cnt;
 662                         }
 663                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 664                         releasef(sfv->sfv_fd);
 665                 }
 666                 sfv++;
 667         }
 668 
 669         ASSERT(total_size == 0);
 670         error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &head);
 671         if (error != 0) {
 672                 if (head != NULL)
 673                         freemsg(head);
 674                 return (error);
 675         }
 676         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)size;
 677         *count += size;
 678 
 679         return (0);
 680 }
 681 
 682 
 683 static int
 684 sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
 685     int copy_cnt, ssize_t *count)
 686 {
 687         struct vnode *vp;
 688         struct uio auio;
 689         struct iovec aiov;
 690         ushort_t fflag;
 691         int ioflag;
 692         int i, error;
 693         size_t cnt;
 694         ssize_t sfv_len;
 695         u_offset_t sfv_off;
 696 #ifdef _SYSCALL32_IMPL
 697         model_t model = get_udatamodel();
 698         u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
 699             MAXOFF32_T : MAXOFFSET_T;
 700 #else
 701         const u_offset_t maxoff = MAXOFF32_T;
 702 #endif
 703         mblk_t  *dmp = NULL;
 704         char    *buf = NULL;
 705         size_t  extra;
 706         int maxblk, wroff, tail_len;
 707         struct sonode *so;
 708         stdata_t *stp;
 709         struct nmsghdr msg;
 710 
 711         fflag = fp->f_flag;
 712         vp = fp->f_vnode;
 713 
 714         if (vp->v_type == VSOCK) {
 715                 so = VTOSO(vp);
 716                 if (vp->v_stream != NULL) {
 717                         stp = vp->v_stream;
 718                         wroff = (int)stp->sd_wroff;
 719                         tail_len = (int)stp->sd_tail;
 720                         maxblk = (int)stp->sd_maxblk;
 721                 } else {
 722                         stp = NULL;
 723                         wroff = so->so_proto_props.sopp_wroff;
 724                         tail_len = so->so_proto_props.sopp_tail;
 725                         maxblk = so->so_proto_props.sopp_maxblk;
 726                 }
 727                 extra = wroff + tail_len;
 728         }
 729 
 730         bzero(&msg, sizeof (msg));
 731         auio.uio_extflg = UIO_COPY_DEFAULT;
 732         for (i = 0; i < copy_cnt; i++) {
 733                 if (ISSIG(curthread, JUSTLOOKING))
 734                         return (EINTR);
 735 
 736                 /*
 737                  * Do similar checks as "write" as we are writing
 738                  * sfv_len bytes into "vp".
 739                  */
 740                 sfv_len = (ssize_t)sfv->sfv_len;
 741 
 742                 if (sfv_len == 0) {
 743                         sfv++;
 744                         continue;
 745                 }
 746 
 747                 if (vp->v_type == VREG) {
 748                         if (*fileoff >= curproc->p_fsz_ctl) {
 749                                 mutex_enter(&curproc->p_lock);
 750                                 (void) rctl_action(
 751                                     rctlproc_legacy[RLIMIT_FSIZE],
 752                                     curproc->p_rctls, curproc, RCA_SAFE);
 753                                 mutex_exit(&curproc->p_lock);
 754 
 755                                 return (EFBIG);
 756                         }
 757 
 758                         if (*fileoff >= maxoff)
 759                                 return (EFBIG);
 760 
 761                         if (*fileoff + sfv_len > maxoff)
 762                                 return (EINVAL);
 763                 }
 764 
 765                 /* Check for overflow */
 766 #ifdef _SYSCALL32_IMPL
 767                 if (model == DATAMODEL_ILP32) {
 768                         if (((ssize32_t)(*count + sfv_len)) < 0)
 769                                 return (EINVAL);
 770                 } else
 771 #endif
 772                 if ((*count + sfv_len) < 0)
 773                         return (EINVAL);
 774 
 775                 sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
 776 
 777                 if (sfv->sfv_fd == SFV_FD_SELF) {
 778                         if (vp->v_type == VSOCK) {
 779                                 while (sfv_len > 0) {
 780                                         size_t iov_len;
 781 
 782                                         iov_len = sfv_len;
 783                                         /*
 784                                          * Socket filters can limit the mblk
 785                                          * size, so limit reads to maxblk if
 786                                          * there are filters present.
 787                                          */
 788                                         if (so->so_filter_active > 0 &&
 789                                             maxblk != INFPSZ)
 790                                                 iov_len = MIN(iov_len, maxblk);
 791 
 792                                         aiov.iov_len = iov_len;
 793                                         aiov.iov_base =
 794                                             (caddr_t)(uintptr_t)sfv_off;
 795 
 796                                         auio.uio_iov = &aiov;
 797                                         auio.uio_iovcnt = 1;
 798                                         auio.uio_loffset = *fileoff;
 799                                         auio.uio_segflg = UIO_USERSPACE;
 800                                         auio.uio_fmode = fflag;
 801                                         auio.uio_llimit = curproc->p_fsz_ctl;
 802                                         auio.uio_resid = iov_len;
 803 
 804                                         dmp = allocb(iov_len + extra, BPRI_HI);
 805                                         if (dmp == NULL)
 806                                                 return (ENOMEM);
 807                                         dmp->b_wptr = dmp->b_rptr =
 808                                             dmp->b_rptr + wroff;
 809                                         error = uiomove((caddr_t)dmp->b_wptr,
 810                                             iov_len, UIO_WRITE, &auio);
 811                                         if (error != 0) {
 812                                                 freeb(dmp);
 813                                                 return (error);
 814                                         }
 815                                         dmp->b_wptr += iov_len;
 816                                         error = socket_sendmblk(VTOSO(vp),
 817                                             &msg, fflag, CRED(), &dmp);
 818 
 819                                         if (error != 0) {
 820                                                 if (dmp != NULL)
 821                                                         freeb(dmp);
 822                                                 return (error);
 823                                         }
 824                                         ttolwp(curthread)->lwp_ru.ioch +=
 825                                             (ulong_t)iov_len;
 826                                         *count += iov_len;
 827                                         sfv_len -= iov_len;
 828                                         sfv_off += iov_len;
 829                                 }
 830                         } else {
 831                                 aiov.iov_len = sfv_len;
 832                                 aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
 833 
 834                                 auio.uio_iov = &aiov;
 835                                 auio.uio_iovcnt = 1;
 836                                 auio.uio_loffset = *fileoff;
 837                                 auio.uio_segflg = UIO_USERSPACE;
 838                                 auio.uio_fmode = fflag;
 839                                 auio.uio_llimit = curproc->p_fsz_ctl;
 840                                 auio.uio_resid = sfv_len;
 841 
 842                                 ioflag = auio.uio_fmode &
 843                                     (FAPPEND|FSYNC|FDSYNC|FRSYNC);
 844                                 while (sfv_len > 0) {
 845                                         error = VOP_WRITE(vp, &auio, ioflag,
 846                                             fp->f_cred, NULL);
 847                                         cnt = sfv_len - auio.uio_resid;
 848                                         sfv_len -= cnt;
 849                                         ttolwp(curthread)->lwp_ru.ioch +=
 850                                             (ulong_t)cnt;
 851                                         *fileoff += cnt;
 852                                         *count += cnt;
 853                                         if (error != 0)
 854                                                 return (error);
 855                                 }
 856                         }
 857                 } else {
 858                         int segmapit = 0;
 859                         file_t  *ffp;
 860                         vnode_t *readvp;
 861                         struct vnode *realvp;
 862                         size_t  size;
 863                         caddr_t ptr;
 864 
 865                         if ((ffp = getf(sfv->sfv_fd)) == NULL)
 866                                 return (EBADF);
 867 
 868                         if ((ffp->f_flag & FREAD) == 0) {
 869                                 releasef(sfv->sfv_fd);
 870                                 return (EBADF);
 871                         }
 872 
 873                         readvp = ffp->f_vnode;
 874                         if (VOP_REALVP(readvp, &realvp, NULL) == 0)
 875                                 readvp = realvp;
 876                         if (readvp->v_type != VREG) {
 877                                 releasef(sfv->sfv_fd);
 878                                 return (EINVAL);
 879                         }
 880 
 881                         /*
 882                          * No point reading and writing to same vp,
 883                          * as long as both are regular files. readvp is not
 884                          * locked; but since we got it from an open file the
 885                          * contents will be valid during the time of access.
 886                          */
 887                         if (vn_compare(vp, readvp)) {
 888                                 releasef(sfv->sfv_fd);
 889                                 return (EINVAL);
 890                         }
 891 
 892                         /*
 893                          * Note: we assume readvp != vp. "vp" is already
 894                          * locked, and "readvp" must not be.
 895                          */
 896                         if (readvp < vp) {
 897                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 898                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 899                                     NULL);
 900                                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 901                         } else {
 902                                 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
 903                                     NULL);
 904                         }
 905 
 906                         /* Same checks as in pread */
 907                         if (sfv_off > maxoff) {
 908                                 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
 909                                 releasef(sfv->sfv_fd);
 910                                 return (EINVAL);
 911                         }
 912                         if (sfv_off + sfv_len > maxoff) {
 913                                 sfv_len = (ssize_t)((offset_t)maxoff -
 914                                     sfv_off);
 915                         }
 916                         /* Find the native blocksize to transfer data */
 917                         size = MIN(vp->v_vfsp->vfs_bsize,
 918                             readvp->v_vfsp->vfs_bsize);
 919                         size = sfv_len < size ? sfv_len : size;
 920 
 921                         if (vp->v_type != VSOCK) {
 922                                 segmapit = 0;
 923                                 buf = kmem_alloc(size, KM_NOSLEEP);
 924                                 if (buf == NULL) {
 925                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
 926                                             NULL);
 927                                         releasef(sfv->sfv_fd);
 928                                         return (ENOMEM);
 929                                 }
 930                         } else {
 931                                 uint_t  copyflag;
 932 
 933                                 copyflag = stp != NULL ? stp->sd_copyflag :
 934                                     so->so_proto_props.sopp_zcopyflag;
 935 
 936                                 /*
 937                                  * Socket filters can limit the mblk size,
 938                                  * so limit reads to maxblk if there are
 939                                  * filters present.
 940                                  */
 941                                 if (so->so_filter_active > 0 &&
 942                                     maxblk != INFPSZ)
 943                                         size = MIN(size, maxblk);
 944 
 945                                 if (vn_has_flocks(readvp) ||
 946                                     readvp->v_flag & VNOMAP ||
 947                                     copyflag & STZCVMUNSAFE) {
 948                                         segmapit = 0;
 949                                 } else if (copyflag & STZCVMSAFE) {
 950                                         segmapit = 1;
 951                                 } else {
 952                                         int on = 1;
 953                                         if (socket_setsockopt(VTOSO(vp),
 954                                             SOL_SOCKET, SO_SND_COPYAVOID,
 955                                             &on, sizeof (on), CRED()) == 0)
 956                                         segmapit = 1;
 957                                 }
 958                         }
 959 
 960                         if (segmapit) {
 961                                 boolean_t nowait;
 962 
 963                                 nowait = (sfv->sfv_flag & SFV_NOWAIT) != 0;
 964                                 error = snf_segmap(fp, readvp, sfv_off,
 965                                     (u_offset_t)sfv_len, (ssize_t *)&cnt,
 966                                     nowait);
 967                                 releasef(sfv->sfv_fd);
 968                                 *count += cnt;
 969                                 if (error)
 970                                         return (error);
 971                                 sfv++;
 972                                 continue;
 973                         }
 974 
 975                         while (sfv_len > 0) {
 976                                 size_t  iov_len;
 977 
 978                                 iov_len = MIN(size, sfv_len);
 979 
 980                                 if (vp->v_type == VSOCK) {
 981                                         dmp = allocb(iov_len + extra, BPRI_HI);
 982                                         if (dmp == NULL) {
 983                                                 VOP_RWUNLOCK(readvp,
 984                                                     V_WRITELOCK_FALSE, NULL);
 985                                                 releasef(sfv->sfv_fd);
 986                                                 return (ENOMEM);
 987                                         }
 988                                         dmp->b_wptr = dmp->b_rptr =
 989                                             dmp->b_rptr + wroff;
 990                                         ptr = (caddr_t)dmp->b_rptr;
 991                                 } else {
 992                                         ptr = buf;
 993                                 }
 994 
 995                                 aiov.iov_base = ptr;
 996                                 aiov.iov_len = iov_len;
 997                                 auio.uio_loffset = sfv_off;
 998                                 auio.uio_iov = &aiov;
 999                                 auio.uio_iovcnt = 1;
1000                                 auio.uio_resid = iov_len;
1001                                 auio.uio_segflg = UIO_SYSSPACE;
1002                                 auio.uio_llimit = MAXOFFSET_T;
1003                                 auio.uio_fmode = ffp->f_flag;
1004                                 ioflag = auio.uio_fmode &
1005                                     (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1006 
1007                                 /*
1008                                  * If read sync is not asked for,
1009                                  * filter sync flags
1010                                  */
1011                                 if ((ioflag & FRSYNC) == 0)
1012                                         ioflag &= ~(FSYNC|FDSYNC);
1013                                 error = VOP_READ(readvp, &auio, ioflag,
1014                                     fp->f_cred, NULL);
1015                                 if (error != 0) {
1016                                         /*
1017                                          * If we were reading a pipe (currently
1018                                          * not implemented), we may now lose
1019                                          * data.
1020                                          */
1021                                         if (vp->v_type == VSOCK)
1022                                                 freeb(dmp);
1023                                         else
1024                                                 kmem_free(buf, size);
1025                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
1026                                             NULL);
1027                                         releasef(sfv->sfv_fd);
1028                                         return (error);
1029                                 }
1030 
1031                                 /*
1032                                  * Check how much data was really read.
1033                                  * Decrement the 'len' and increment the
1034                                  * 'off' appropriately.
1035                                  */
1036                                 cnt = iov_len - auio.uio_resid;
1037                                 if (cnt == 0) {
1038                                         if (vp->v_type == VSOCK)
1039                                                 freeb(dmp);
1040                                         else
1041                                                 kmem_free(buf, size);
1042                                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
1043                                             NULL);
1044                                         releasef(sfv->sfv_fd);
1045                                         return (EINVAL);
1046                                 }
1047                                 sfv_len -= cnt;
1048                                 sfv_off += cnt;
1049 
1050                                 if (vp->v_type == VSOCK) {
1051                                         dmp->b_wptr = dmp->b_rptr + cnt;
1052 
1053                                         error = socket_sendmblk(VTOSO(vp),
1054                                             &msg, fflag, CRED(), &dmp);
1055 
1056                                         if (error != 0) {
1057                                                 if (dmp != NULL)
1058                                                         freeb(dmp);
1059                                                 VOP_RWUNLOCK(readvp,
1060                                                     V_WRITELOCK_FALSE, NULL);
1061                                                 releasef(sfv->sfv_fd);
1062                                                 return (error);
1063                                         }
1064 
1065                                         ttolwp(curthread)->lwp_ru.ioch +=
1066                                             (ulong_t)cnt;
1067                                         *count += cnt;
1068                                 } else {
1069 
1070                                         aiov.iov_base = ptr;
1071                                         aiov.iov_len = cnt;
1072                                         auio.uio_loffset = *fileoff;
1073                                         auio.uio_resid = cnt;
1074                                         auio.uio_iov = &aiov;
1075                                         auio.uio_iovcnt = 1;
1076                                         auio.uio_segflg = UIO_SYSSPACE;
1077                                         auio.uio_llimit = curproc->p_fsz_ctl;
1078                                         auio.uio_fmode = fflag;
1079                                         ioflag = auio.uio_fmode &
1080                                             (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1081                                         error = VOP_WRITE(vp, &auio, ioflag,
1082                                             fp->f_cred, NULL);
1083 
1084                                         /*
1085                                          * Check how much data was written.
1086                                          * Increment the 'len' and decrement the
1087                                          * 'off' if all the data was not
1088                                          * written.
1089                                          */
1090                                         cnt -= auio.uio_resid;
1091                                         sfv_len += auio.uio_resid;
1092                                         sfv_off -= auio.uio_resid;
1093                                         ttolwp(curthread)->lwp_ru.ioch +=
1094                                             (ulong_t)cnt;
1095                                         *fileoff += cnt;
1096                                         *count += cnt;
1097                                         if (error != 0) {
1098                                                 kmem_free(buf, size);
1099                                                 VOP_RWUNLOCK(readvp,
1100                                                     V_WRITELOCK_FALSE, NULL);
1101                                                 releasef(sfv->sfv_fd);
1102                                                 return (error);
1103                                         }
1104                                 }
1105                         }
1106                         if (buf) {
1107                                 kmem_free(buf, size);
1108                                 buf = NULL;
1109                         }
1110                         VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
1111                         releasef(sfv->sfv_fd);
1112                 }
1113                 sfv++;
1114         }
1115         return (0);
1116 }
1117 
1118 ssize_t
1119 sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
1120     size_t *xferred)
1121 {
1122         int error = 0;
1123         int first_vector_error = 0;
1124         file_t *fp;
1125         struct vnode *vp;
1126         struct sonode *so;
1127         u_offset_t fileoff;
1128         int copy_cnt;
1129         const struct sendfilevec *copy_vec;
1130         struct sendfilevec sfv[SEND_MAX_CHUNK];
1131         ssize_t count = 0;
1132 #ifdef _SYSCALL32_IMPL
1133         struct ksendfilevec32 sfv32[SEND_MAX_CHUNK];
1134 #endif
1135         ssize_t total_size;
1136         int i;
1137         boolean_t is_sock = B_FALSE;
1138         int maxblk = 0;
1139 
1140         if (sfvcnt <= 0)
1141                 return (set_errno(EINVAL));
1142 
1143         if ((fp = getf(fildes)) == NULL)
1144                 return (set_errno(EBADF));
1145 
1146         if (((fp->f_flag) & FWRITE) == 0) {
1147                 error = EBADF;
1148                 goto err;
1149         }
1150 
1151         fileoff = fp->f_offset;
1152         vp = fp->f_vnode;
1153 
1154         switch (vp->v_type) {
1155         case VSOCK:
1156                 so = VTOSO(vp);
1157                 is_sock = B_TRUE;
1158                 if (SOCK_IS_NONSTR(so)) {
1159                         maxblk = so->so_proto_props.sopp_maxblk;
1160                 } else {
1161                         maxblk = (int)vp->v_stream->sd_maxblk;
1162                 }
1163 
1164                 /*
1165                  * We need to make sure that the socket that we're sending on
1166                  * supports sendfile behavior. sockfs doesn't know that the APIs
1167                  * we want to use are coming from sendfile, so we can't rely on
1168                  * it to check for us.
1169                  */
1170                 if ((so->so_mode & SM_SENDFILESUPP) == 0) {
1171                         error = EOPNOTSUPP;
1172                         goto err;
1173                 }
1174                 break;
1175         case VREG:
1176                 break;
1177         default:
1178                 error = EINVAL;
1179                 goto err;
1180         }
1181 
1182         switch (opcode) {
1183         case SENDFILEV :
1184                 break;
1185 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1186         case SENDFILEV64 :
1187                 return (sendvec64(fp, (struct ksendfilevec64 *)vec, sfvcnt,
1188                     (size32_t *)xferred, fildes));
1189 #endif
1190         default :
1191                 error = ENOSYS;
1192                 break;
1193         }
1194 
1195         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1196         copy_vec = vec;
1197 
1198         do {
1199                 total_size = 0;
1200                 copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
1201 #ifdef _SYSCALL32_IMPL
1202                 /* 32-bit callers need to have their iovec expanded. */
1203                 if (get_udatamodel() == DATAMODEL_ILP32) {
1204                         if (copyin(copy_vec, sfv32,
1205                             copy_cnt * sizeof (ksendfilevec32_t))) {
1206                                 error = EFAULT;
1207                                 break;
1208                         }
1209 
1210                         for (i = 0; i < copy_cnt; i++) {
1211                                 sfv[i].sfv_fd = sfv32[i].sfv_fd;
1212                                 sfv[i].sfv_off =
1213                                     (off_t)(uint32_t)sfv32[i].sfv_off;
1214                                 sfv[i].sfv_len = (size_t)sfv32[i].sfv_len;
1215                                 total_size += sfv[i].sfv_len;
1216                                 sfv[i].sfv_flag = sfv32[i].sfv_flag;
1217                                 /*
1218                                  * Individual elements of the vector must not
1219                                  * wrap or overflow, as later math is signed.
1220                                  * Equally total_size needs to be checked after
1221                                  * each vector is added in, to be sure that
1222                                  * rogue values haven't overflowed the counter.
1223                                  */
1224                                 if (((ssize32_t)sfv[i].sfv_len < 0) ||
1225                                     ((ssize32_t)total_size < 0)) {
1226                                         /*
1227                                          * Truncate the vector to send data
1228                                          * described by elements before the
1229                                          * error.
1230                                          */
1231                                         copy_cnt = i;
1232                                         first_vector_error = EINVAL;
1233                                         /* total_size can't be trusted */
1234                                         if ((ssize32_t)total_size < 0)
1235                                                 error = EINVAL;
1236                                         break;
1237                                 }
1238                         }
1239                         /* Nothing to do, process errors */
1240                         if (copy_cnt == 0)
1241                                 break;
1242 
1243                 } else {
1244 #endif
1245                         if (copyin(copy_vec, sfv,
1246                             copy_cnt * sizeof (sendfilevec_t))) {
1247                                 error = EFAULT;
1248                                 break;
1249                         }
1250 
1251                         for (i = 0; i < copy_cnt; i++) {
1252                                 total_size += sfv[i].sfv_len;
1253                                 /*
1254                                  * Individual elements of the vector must not
1255                                  * wrap or overflow, as later math is signed.
1256                                  * Equally total_size needs to be checked after
1257                                  * each vector is added in, to be sure that
1258                                  * rogue values haven't overflowed the counter.
1259                                  */
1260                                 if (((ssize_t)sfv[i].sfv_len < 0) ||
1261                                     (total_size < 0)) {
1262                                         /*
1263                                          * Truncate the vector to send data
1264                                          * described by elements before the
1265                                          * error.
1266                                          */
1267                                         copy_cnt = i;
1268                                         first_vector_error = EINVAL;
1269                                         /* total_size can't be trusted */
1270                                         if (total_size < 0)
1271                                                 error = EINVAL;
1272                                         break;
1273                                 }
1274                         }
1275                         /* Nothing to do, process errors */
1276                         if (copy_cnt == 0)
1277                                 break;
1278 #ifdef _SYSCALL32_IMPL
1279                 }
1280 #endif
1281 
1282                 /*
1283                  * The task between deciding to use sendvec_small_chunk
1284                  * and sendvec_chunk is dependant on multiple things:
1285                  *
1286                  * i) latency is important for smaller files. So if the
1287                  * data is smaller than 'tcp_slow_start_initial' times
1288                  * maxblk, then use sendvec_small_chunk which creates
1289                  * maxblk size mblks and chains them together and sends
1290                  * them to TCP in one shot. It also leaves 'wroff' size
1291                  * space for the headers in each mblk.
1292                  *
1293                  * ii) for total size bigger than 'tcp_slow_start_initial'
1294                  * time maxblk, its probably real file data which is
1295                  * dominating. So its better to use sendvec_chunk because
1296                  * performance goes to dog if we don't do pagesize reads.
1297                  * sendvec_chunk will do pagesize reads and write them
1298                  * in pagesize mblks to TCP.
1299                  *
1300                  * Side Notes: A write to file has not been optimized.
1301                  * Future zero copy code will plugin into sendvec_chunk
1302                  * only because doing zero copy for files smaller then
1303                  * pagesize is useless.
1304                  *
1305                  * Note, if socket has NL7C enabled then call NL7C's
1306                  * senfilev() function to consume the sfv[].
1307                  */
1308                 if (is_sock) {
1309                         if (!SOCK_IS_NONSTR(so) &&
1310                             _SOTOTPI(so)->sti_nl7c_flags != 0) {
1311                                 error = nl7c_sendfilev(so, &fileoff,
1312                                     sfv, copy_cnt, &count);
1313                         } else if ((total_size <= (4 * maxblk)) &&
1314                             error == 0) {
1315                                 error = sendvec_small_chunk(fp,
1316                                     &fileoff, sfv, copy_cnt,
1317                                     total_size, maxblk, &count);
1318                         } else {
1319                                 error = sendvec_chunk(fp, &fileoff,
1320                                     sfv, copy_cnt, &count);
1321                         }
1322                 } else {
1323                         ASSERT(vp->v_type == VREG);
1324                         error = sendvec_chunk(fp, &fileoff, sfv, copy_cnt,
1325                             &count);
1326                 }
1327 
1328 
1329 #ifdef _SYSCALL32_IMPL
1330         if (get_udatamodel() == DATAMODEL_ILP32)
1331                 copy_vec = (const struct sendfilevec *)((char *)copy_vec +
1332                     (copy_cnt * sizeof (ksendfilevec32_t)));
1333         else
1334 #endif
1335                 copy_vec += copy_cnt;
1336                 sfvcnt -= copy_cnt;
1337 
1338         /* Process all vector members up to first error */
1339         } while ((sfvcnt > 0) && first_vector_error == 0 && error == 0);
1340 
1341         if (vp->v_type == VREG)
1342                 fp->f_offset += count;
1343 
1344         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1345 
1346 #ifdef _SYSCALL32_IMPL
1347         if (get_udatamodel() == DATAMODEL_ILP32) {
1348                 ssize32_t count32 = (ssize32_t)count;
1349                 if (copyout(&count32, xferred, sizeof (count32)))
1350                         error = EFAULT;
1351                 releasef(fildes);
1352                 if (error != 0)
1353                         return (set_errno(error));
1354                 if (first_vector_error != 0)
1355                         return (set_errno(first_vector_error));
1356                 return (count32);
1357         }
1358 #endif
1359         if (copyout(&count, xferred, sizeof (count)))
1360                 error = EFAULT;
1361         releasef(fildes);
1362         if (error != 0)
1363                 return (set_errno(error));
1364         if (first_vector_error != 0)
1365                 return (set_errno(first_vector_error));
1366         return (count);
1367 err:
1368         ASSERT(error != 0);
1369         releasef(fildes);
1370         return (set_errno(error));
1371 }