Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/syscall/sendfile.c
+++ new/usr/src/uts/common/syscall/sendfile.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 #include <sys/types.h>
27 27 #include <sys/t_lock.h>
28 28 #include <sys/param.h>
29 29 #include <sys/systm.h>
30 30 #include <sys/buf.h>
31 31 #include <sys/conf.h>
32 32 #include <sys/cred.h>
33 33 #include <sys/kmem.h>
34 34 #include <sys/sysmacros.h>
35 35 #include <sys/vfs.h>
36 36 #include <sys/vnode.h>
37 37 #include <sys/debug.h>
38 38 #include <sys/errno.h>
39 39 #include <sys/time.h>
40 40 #include <sys/file.h>
41 41 #include <sys/open.h>
42 42 #include <sys/user.h>
43 43 #include <sys/termios.h>
44 44 #include <sys/stream.h>
45 45 #include <sys/strsubr.h>
46 46 #include <sys/sunddi.h>
47 47 #include <sys/esunddi.h>
48 48 #include <sys/flock.h>
49 49 #include <sys/modctl.h>
50 50 #include <sys/cmn_err.h>
51 51 #include <sys/vmsystm.h>
52 52
53 53 #include <sys/socket.h>
54 54 #include <sys/socketvar.h>
55 55 #include <fs/sockfs/sockcommon.h>
56 56 #include <fs/sockfs/socktpi.h>
57 57
58 58 #include <netinet/in.h>
59 59 #include <sys/sendfile.h>
60 60 #include <sys/un.h>
61 61 #include <sys/tihdr.h>
62 62 #include <sys/atomic.h>
63 63
64 64 #include <inet/common.h>
65 65 #include <inet/ip.h>
66 66 #include <inet/ip6.h>
67 67 #include <inet/tcp.h>
68 68
69 69 extern int sosendfile64(file_t *, file_t *, const struct ksendfilevec64 *,
70 70 ssize32_t *);
71 71 extern int nl7c_sendfilev(struct sonode *, u_offset_t *, struct sendfilevec *,
72 72 int, ssize_t *);
73 73 extern int snf_segmap(file_t *, vnode_t *, u_offset_t, u_offset_t, ssize_t *,
74 74 boolean_t);
75 75 extern sotpi_info_t *sotpi_sototpi(struct sonode *);
76 76
77 77 #define SEND_MAX_CHUNK 16
78 78
79 79 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
80 80 /*
81 81 * 64 bit offsets for 32 bit applications only running either on
82 82 * 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
83 83 * more than 2GB of data.
84 84 */
85 85 static int
86 86 sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
87 87 int copy_cnt, ssize32_t *count)
88 88 {
89 89 struct vnode *vp;
90 90 ushort_t fflag;
91 91 int ioflag;
92 92 size32_t cnt;
93 93 ssize32_t sfv_len;
94 94 ssize32_t tmpcount;
95 95 u_offset_t sfv_off;
96 96 struct uio auio;
97 97 struct iovec aiov;
98 98 int i, error;
99 99
100 100 fflag = fp->f_flag;
101 101 vp = fp->f_vnode;
102 102 for (i = 0; i < copy_cnt; i++) {
103 103
104 104 if (ISSIG(curthread, JUSTLOOKING))
105 105 return (EINTR);
106 106
107 107 /*
108 108 * Do similar checks as "write" as we are writing
109 109 * sfv_len bytes into "vp".
110 110 */
111 111 sfv_len = (ssize32_t)sfv->sfv_len;
112 112
113 113 if (sfv_len == 0) {
114 114 sfv++;
115 115 continue;
116 116 }
117 117
118 118 if (sfv_len < 0)
119 119 return (EINVAL);
120 120
121 121 if (vp->v_type == VREG) {
122 122 if (*fileoff >= curproc->p_fsz_ctl) {
123 123 mutex_enter(&curproc->p_lock);
124 124 (void) rctl_action(
125 125 rctlproc_legacy[RLIMIT_FSIZE],
126 126 curproc->p_rctls, curproc, RCA_SAFE);
127 127 mutex_exit(&curproc->p_lock);
128 128 return (EFBIG);
129 129 }
130 130
131 131 if (*fileoff >= OFFSET_MAX(fp))
132 132 return (EFBIG);
133 133
134 134 if (*fileoff + sfv_len > OFFSET_MAX(fp))
135 135 return (EINVAL);
136 136 }
137 137
138 138 tmpcount = *count + sfv_len;
139 139 if (tmpcount < 0)
140 140 return (EINVAL);
141 141
142 142 sfv_off = sfv->sfv_off;
143 143
144 144 auio.uio_extflg = UIO_COPY_DEFAULT;
145 145 if (sfv->sfv_fd == SFV_FD_SELF) {
146 146 aiov.iov_len = sfv_len;
147 147 aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
148 148 auio.uio_loffset = *fileoff;
149 149 auio.uio_iovcnt = 1;
150 150 auio.uio_resid = sfv_len;
151 151 auio.uio_iov = &aiov;
152 152 auio.uio_segflg = UIO_USERSPACE;
153 153 auio.uio_llimit = curproc->p_fsz_ctl;
154 154 auio.uio_fmode = fflag;
155 155 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
156 156 while (sfv_len > 0) {
157 157 error = VOP_WRITE(vp, &auio, ioflag,
158 158 fp->f_cred, NULL);
159 159 cnt = sfv_len - auio.uio_resid;
160 160 sfv_len -= cnt;
161 161 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
162 162 if (vp->v_type == VREG)
163 163 *fileoff += cnt;
164 164 *count += cnt;
165 165 if (error != 0)
166 166 return (error);
167 167 }
168 168 } else {
169 169 file_t *ffp;
170 170 vnode_t *readvp;
171 171 size_t size;
172 172 caddr_t ptr;
173 173
174 174 if ((ffp = getf(sfv->sfv_fd)) == NULL)
175 175 return (EBADF);
176 176
177 177 if ((ffp->f_flag & FREAD) == 0) {
178 178 releasef(sfv->sfv_fd);
179 179 return (EBADF);
180 180 }
181 181
182 182 readvp = ffp->f_vnode;
183 183 if (readvp->v_type != VREG) {
184 184 releasef(sfv->sfv_fd);
185 185 return (EINVAL);
186 186 }
187 187
188 188 /*
189 189 * No point reading and writing to same vp,
190 190 * as long as both are regular files. readvp is not
191 191 * locked; but since we got it from an open file the
192 192 * contents will be valid during the time of access.
193 193 */
194 194 if (vn_compare(vp, readvp)) {
195 195 releasef(sfv->sfv_fd);
196 196 return (EINVAL);
197 197 }
198 198
199 199 /*
200 200 * Optimize the regular file over
201 201 * the socket case.
202 202 */
203 203 if (vp->v_type == VSOCK) {
204 204 error = sosendfile64(fp, ffp, sfv,
205 205 (ssize32_t *)&cnt);
206 206 *count += cnt;
207 207 if (error)
208 208 return (error);
209 209 sfv++;
210 210 continue;
211 211 }
212 212
213 213 /*
214 214 * Note: we assume readvp != vp. "vp" is already
215 215 * locked, and "readvp" must not be.
216 216 */
217 217 if (readvp < vp) {
218 218 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
219 219 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
220 220 NULL);
221 221 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
222 222 } else {
223 223 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
224 224 NULL);
225 225 }
226 226
227 227 /*
228 228 * Same checks as in pread64.
229 229 */
230 230 if (sfv_off > MAXOFFSET_T) {
231 231 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
232 232 releasef(sfv->sfv_fd);
233 233 return (EINVAL);
234 234 }
235 235
236 236 if (sfv_off + sfv_len > MAXOFFSET_T)
237 237 sfv_len = (ssize32_t)(MAXOFFSET_T - sfv_off);
238 238
239 239 /* Find the native blocksize to transfer data */
240 240 size = MIN(vp->v_vfsp->vfs_bsize,
241 241 readvp->v_vfsp->vfs_bsize);
242 242 size = sfv_len < size ? sfv_len : size;
243 243 ptr = kmem_alloc(size, KM_NOSLEEP);
244 244 if (ptr == NULL) {
245 245 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
246 246 releasef(sfv->sfv_fd);
247 247 return (ENOMEM);
248 248 }
249 249
250 250 while (sfv_len > 0) {
251 251 size_t iov_len;
252 252
253 253 iov_len = MIN(size, sfv_len);
254 254 aiov.iov_base = ptr;
255 255 aiov.iov_len = iov_len;
256 256 auio.uio_loffset = sfv_off;
257 257 auio.uio_iov = &aiov;
258 258 auio.uio_iovcnt = 1;
259 259 auio.uio_resid = iov_len;
260 260 auio.uio_segflg = UIO_SYSSPACE;
261 261 auio.uio_llimit = MAXOFFSET_T;
262 262 auio.uio_fmode = ffp->f_flag;
263 263 ioflag = auio.uio_fmode &
264 264 (FAPPEND|FSYNC|FDSYNC|FRSYNC);
265 265
266 266 /*
267 267 * If read sync is not asked for,
268 268 * filter sync flags
269 269 */
270 270 if ((ioflag & FRSYNC) == 0)
271 271 ioflag &= ~(FSYNC|FDSYNC);
272 272 error = VOP_READ(readvp, &auio, ioflag,
273 273 fp->f_cred, NULL);
274 274 if (error) {
275 275 kmem_free(ptr, size);
276 276 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
277 277 NULL);
278 278 releasef(sfv->sfv_fd);
279 279 return (error);
280 280 }
281 281
282 282 /*
283 283 * Check how must data was really read.
284 284 * Decrement the 'len' and increment the
285 285 * 'off' appropriately.
286 286 */
287 287 cnt = iov_len - auio.uio_resid;
288 288 if (cnt == 0) {
289 289 /*
290 290 * If we were reading a pipe (currently
291 291 * not implemented), we may now lose
292 292 * data.
293 293 */
294 294 kmem_free(ptr, size);
295 295 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
296 296 NULL);
297 297 releasef(sfv->sfv_fd);
298 298 return (EINVAL);
299 299 }
300 300 sfv_len -= cnt;
301 301 sfv_off += cnt;
302 302
303 303 aiov.iov_base = ptr;
304 304 aiov.iov_len = cnt;
305 305 auio.uio_loffset = *fileoff;
306 306 auio.uio_iov = &aiov;
307 307 auio.uio_iovcnt = 1;
308 308 auio.uio_resid = cnt;
309 309 auio.uio_segflg = UIO_SYSSPACE;
310 310 auio.uio_llimit = curproc->p_fsz_ctl;
311 311 auio.uio_fmode = fflag;
312 312 ioflag = auio.uio_fmode &
313 313 (FAPPEND|FSYNC|FDSYNC|FRSYNC);
314 314 error = VOP_WRITE(vp, &auio, ioflag,
315 315 fp->f_cred, NULL);
316 316
317 317 /*
318 318 * Check how much data was written. Increment
319 319 * the 'len' and decrement the 'off' if all
320 320 * the data was not written.
321 321 */
322 322 cnt -= auio.uio_resid;
323 323 sfv_len += auio.uio_resid;
324 324 sfv_off -= auio.uio_resid;
325 325 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
326 326 if (vp->v_type == VREG)
327 327 *fileoff += cnt;
328 328 *count += cnt;
329 329 if (error != 0) {
330 330 kmem_free(ptr, size);
331 331 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
332 332 NULL);
333 333 releasef(sfv->sfv_fd);
334 334 return (error);
335 335 }
336 336 }
337 337 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
338 338 releasef(sfv->sfv_fd);
339 339 kmem_free(ptr, size);
340 340 }
341 341 sfv++;
342 342 }
343 343 return (0);
344 344 }
345 345
346 346 static ssize32_t
347 347 sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
348 348 size32_t *xferred, int fildes)
349 349 {
350 350 u_offset_t fileoff;
351 351 int copy_cnt;
352 352 const struct ksendfilevec64 *copy_vec;
353 353 struct ksendfilevec64 sfv[SEND_MAX_CHUNK];
354 354 struct vnode *vp;
355 355 int error;
356 356 ssize32_t count = 0;
357 357
358 358 vp = fp->f_vnode;
359 359 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
360 360
361 361 copy_vec = vec;
362 362 fileoff = fp->f_offset;
363 363
364 364 do {
365 365 copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
366 366 if (copyin(copy_vec, sfv, copy_cnt *
367 367 sizeof (struct ksendfilevec64))) {
368 368 error = EFAULT;
369 369 break;
370 370 }
371 371
372 372 error = sendvec_chunk64(fp, &fileoff, sfv, copy_cnt, &count);
373 373 if (error != 0)
374 374 break;
375 375
376 376 copy_vec += copy_cnt;
377 377 sfvcnt -= copy_cnt;
378 378 } while (sfvcnt > 0);
379 379
380 380 if (vp->v_type == VREG)
381 381 fp->f_offset += count;
382 382
383 383 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
384 384 if (copyout(&count, xferred, sizeof (count)))
385 385 error = EFAULT;
386 386 releasef(fildes);
387 387 if (error != 0)
388 388 return (set_errno(error));
389 389 return (count);
390 390 }
391 391 #endif
392 392
393 393 static int
394 394 sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
395 395 int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count)
396 396 {
397 397 struct vnode *vp;
398 398 struct uio auio;
399 399 struct iovec aiov;
400 400 ushort_t fflag;
401 401 int ioflag;
402 402 int i, error;
403 403 size_t cnt;
404 404 ssize_t sfv_len;
405 405 u_offset_t sfv_off;
406 406 #ifdef _SYSCALL32_IMPL
407 407 model_t model = get_udatamodel();
408 408 u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
409 409 MAXOFF32_T : MAXOFFSET_T;
410 410 #else
411 411 const u_offset_t maxoff = MAXOFF32_T;
412 412 #endif
413 413 mblk_t *dmp = NULL;
414 414 int wroff;
415 415 int buf_left = 0;
416 416 size_t iov_len;
417 417 mblk_t *head, *tmp;
418 418 size_t size = total_size;
419 419 size_t extra;
420 420 int tail_len;
421 421 struct nmsghdr msg;
422 422
423 423 fflag = fp->f_flag;
424 424 vp = fp->f_vnode;
425 425
426 426 ASSERT(vp->v_type == VSOCK);
427 427 ASSERT(maxblk > 0);
428 428
429 429 /* If nothing to send, return */
430 430 if (total_size == 0)
431 431 return (0);
432 432
433 433 if (vp->v_stream != NULL) {
434 434 wroff = (int)vp->v_stream->sd_wroff;
435 435 tail_len = (int)vp->v_stream->sd_tail;
436 436 } else {
437 437 struct sonode *so;
438 438
439 439 so = VTOSO(vp);
440 440 wroff = so->so_proto_props.sopp_wroff;
441 441 tail_len = so->so_proto_props.sopp_tail;
442 442 }
443 443
444 444 extra = wroff + tail_len;
445 445
446 446 buf_left = MIN(total_size, maxblk);
447 447 head = dmp = allocb(buf_left + extra, BPRI_HI);
448 448 if (head == NULL)
449 449 return (ENOMEM);
450 450 head->b_wptr = head->b_rptr = head->b_rptr + wroff;
451 451 bzero(&msg, sizeof (msg));
452 452
453 453 auio.uio_extflg = UIO_COPY_DEFAULT;
454 454 for (i = 0; i < copy_cnt; i++) {
455 455 if (ISSIG(curthread, JUSTLOOKING)) {
456 456 freemsg(head);
457 457 return (EINTR);
458 458 }
459 459
460 460 /*
461 461 * Do similar checks as "write" as we are writing
462 462 * sfv_len bytes into "vp".
463 463 */
464 464 sfv_len = (ssize_t)sfv->sfv_len;
465 465
466 466 if (sfv_len == 0) {
467 467 sfv++;
468 468 continue;
469 469 }
470 470
471 471 /* Check for overflow */
472 472 #ifdef _SYSCALL32_IMPL
473 473 if (model == DATAMODEL_ILP32) {
474 474 if (((ssize32_t)(*count + sfv_len)) < 0) {
475 475 freemsg(head);
476 476 return (EINVAL);
477 477 }
478 478 } else
479 479 #endif
480 480 if ((*count + sfv_len) < 0) {
481 481 freemsg(head);
482 482 return (EINVAL);
483 483 }
484 484
485 485 sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
486 486
487 487 if (sfv->sfv_fd == SFV_FD_SELF) {
488 488 while (sfv_len > 0) {
489 489 if (buf_left == 0) {
490 490 tmp = dmp;
491 491 buf_left = MIN(total_size, maxblk);
492 492 iov_len = MIN(buf_left, sfv_len);
493 493 dmp = allocb(buf_left + extra, BPRI_HI);
494 494 if (dmp == NULL) {
495 495 freemsg(head);
496 496 return (ENOMEM);
497 497 }
498 498 dmp->b_wptr = dmp->b_rptr =
499 499 dmp->b_rptr + wroff;
500 500 tmp->b_cont = dmp;
501 501 } else {
502 502 iov_len = MIN(buf_left, sfv_len);
503 503 }
504 504
505 505 aiov.iov_len = iov_len;
506 506 aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
507 507 auio.uio_loffset = *fileoff;
508 508 auio.uio_iovcnt = 1;
509 509 auio.uio_resid = iov_len;
510 510 auio.uio_iov = &aiov;
511 511 auio.uio_segflg = UIO_USERSPACE;
512 512 auio.uio_llimit = curproc->p_fsz_ctl;
513 513 auio.uio_fmode = fflag;
514 514
515 515 buf_left -= iov_len;
516 516 total_size -= iov_len;
517 517 sfv_len -= iov_len;
518 518 sfv_off += iov_len;
519 519
520 520 error = uiomove((caddr_t)dmp->b_wptr,
521 521 iov_len, UIO_WRITE, &auio);
522 522 if (error != 0) {
523 523 freemsg(head);
524 524 return (error);
525 525 }
526 526 dmp->b_wptr += iov_len;
527 527 }
528 528 } else {
529 529 file_t *ffp;
530 530 vnode_t *readvp;
531 531
532 532 if ((ffp = getf(sfv->sfv_fd)) == NULL) {
533 533 freemsg(head);
534 534 return (EBADF);
535 535 }
536 536
537 537 if ((ffp->f_flag & FREAD) == 0) {
538 538 releasef(sfv->sfv_fd);
539 539 freemsg(head);
540 540 return (EACCES);
541 541 }
542 542
543 543 readvp = ffp->f_vnode;
544 544 if (readvp->v_type != VREG) {
545 545 releasef(sfv->sfv_fd);
546 546 freemsg(head);
547 547 return (EINVAL);
548 548 }
549 549
550 550 /*
551 551 * No point reading and writing to same vp,
552 552 * as long as both are regular files. readvp is not
553 553 * locked; but since we got it from an open file the
554 554 * contents will be valid during the time of access.
555 555 */
556 556
557 557 if (vn_compare(vp, readvp)) {
558 558 releasef(sfv->sfv_fd);
559 559 freemsg(head);
560 560 return (EINVAL);
561 561 }
562 562
563 563 /*
564 564 * Note: we assume readvp != vp. "vp" is already
565 565 * locked, and "readvp" must not be.
566 566 */
567 567
568 568 if (readvp < vp) {
569 569 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
570 570 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
571 571 NULL);
572 572 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
573 573 } else {
574 574 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
575 575 NULL);
576 576 }
577 577
578 578 /* Same checks as in pread */
579 579 if (sfv_off > maxoff) {
580 580 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
581 581 releasef(sfv->sfv_fd);
582 582 freemsg(head);
583 583 return (EINVAL);
584 584 }
585 585 if (sfv_off + sfv_len > maxoff) {
586 586 total_size -= (sfv_off + sfv_len - maxoff);
587 587 sfv_len = (ssize_t)((offset_t)maxoff -
588 588 sfv_off);
589 589 }
590 590
591 591 while (sfv_len > 0) {
592 592 if (buf_left == 0) {
593 593 tmp = dmp;
594 594 buf_left = MIN(total_size, maxblk);
595 595 iov_len = MIN(buf_left, sfv_len);
596 596 dmp = allocb(buf_left + extra, BPRI_HI);
597 597 if (dmp == NULL) {
598 598 VOP_RWUNLOCK(readvp,
599 599 V_WRITELOCK_FALSE, NULL);
600 600 releasef(sfv->sfv_fd);
601 601 freemsg(head);
602 602 return (ENOMEM);
603 603 }
604 604 dmp->b_wptr = dmp->b_rptr =
605 605 dmp->b_rptr + wroff;
606 606 tmp->b_cont = dmp;
607 607 } else {
608 608 iov_len = MIN(buf_left, sfv_len);
609 609 }
610 610 aiov.iov_base = (caddr_t)dmp->b_wptr;
611 611 aiov.iov_len = iov_len;
612 612 auio.uio_loffset = sfv_off;
613 613 auio.uio_iov = &aiov;
614 614 auio.uio_iovcnt = 1;
615 615 auio.uio_resid = iov_len;
616 616 auio.uio_segflg = UIO_SYSSPACE;
617 617 auio.uio_llimit = MAXOFFSET_T;
618 618 auio.uio_fmode = ffp->f_flag;
619 619 ioflag = auio.uio_fmode &
620 620 (FAPPEND|FSYNC|FDSYNC|FRSYNC);
621 621
622 622 /*
623 623 * If read sync is not asked for,
624 624 * filter sync flags
625 625 */
626 626 if ((ioflag & FRSYNC) == 0)
627 627 ioflag &= ~(FSYNC|FDSYNC);
628 628 error = VOP_READ(readvp, &auio, ioflag,
629 629 fp->f_cred, NULL);
630 630 if (error != 0) {
631 631 /*
632 632 * If we were reading a pipe (currently
633 633 * not implemented), we may now loose
634 634 * data.
635 635 */
636 636 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
637 637 NULL);
638 638 releasef(sfv->sfv_fd);
639 639 freemsg(head);
640 640 return (error);
641 641 }
642 642
643 643 /*
644 644 * Check how much data was really read.
645 645 * Decrement the 'len' and increment the
646 646 * 'off' appropriately.
647 647 */
648 648 cnt = iov_len - auio.uio_resid;
649 649 if (cnt == 0) {
650 650 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
651 651 NULL);
652 652 releasef(sfv->sfv_fd);
653 653 freemsg(head);
654 654 return (EINVAL);
655 655 }
656 656 sfv_len -= cnt;
657 657 sfv_off += cnt;
658 658 total_size -= cnt;
659 659 buf_left -= cnt;
660 660
661 661 dmp->b_wptr += cnt;
662 662 }
663 663 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
664 664 releasef(sfv->sfv_fd);
665 665 }
666 666 sfv++;
667 667 }
668 668
669 669 ASSERT(total_size == 0);
670 670 error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &head);
671 671 if (error != 0) {
672 672 if (head != NULL)
673 673 freemsg(head);
674 674 return (error);
675 675 }
676 676 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)size;
677 677 *count += size;
678 678
679 679 return (0);
680 680 }
681 681
682 682
683 683 static int
684 684 sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
685 685 int copy_cnt, ssize_t *count)
686 686 {
687 687 struct vnode *vp;
688 688 struct uio auio;
689 689 struct iovec aiov;
690 690 ushort_t fflag;
691 691 int ioflag;
692 692 int i, error;
693 693 size_t cnt;
694 694 ssize_t sfv_len;
695 695 u_offset_t sfv_off;
696 696 #ifdef _SYSCALL32_IMPL
697 697 model_t model = get_udatamodel();
698 698 u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
699 699 MAXOFF32_T : MAXOFFSET_T;
700 700 #else
701 701 const u_offset_t maxoff = MAXOFF32_T;
702 702 #endif
703 703 mblk_t *dmp = NULL;
704 704 char *buf = NULL;
705 705 size_t extra;
706 706 int maxblk, wroff, tail_len;
707 707 struct sonode *so;
708 708 stdata_t *stp;
709 709 struct nmsghdr msg;
710 710
711 711 fflag = fp->f_flag;
712 712 vp = fp->f_vnode;
713 713
714 714 if (vp->v_type == VSOCK) {
715 715 so = VTOSO(vp);
716 716 if (vp->v_stream != NULL) {
717 717 stp = vp->v_stream;
718 718 wroff = (int)stp->sd_wroff;
719 719 tail_len = (int)stp->sd_tail;
720 720 maxblk = (int)stp->sd_maxblk;
721 721 } else {
722 722 stp = NULL;
723 723 wroff = so->so_proto_props.sopp_wroff;
724 724 tail_len = so->so_proto_props.sopp_tail;
725 725 maxblk = so->so_proto_props.sopp_maxblk;
726 726 }
727 727 extra = wroff + tail_len;
728 728 }
729 729
730 730 bzero(&msg, sizeof (msg));
731 731 auio.uio_extflg = UIO_COPY_DEFAULT;
732 732 for (i = 0; i < copy_cnt; i++) {
733 733 if (ISSIG(curthread, JUSTLOOKING))
734 734 return (EINTR);
735 735
736 736 /*
737 737 * Do similar checks as "write" as we are writing
738 738 * sfv_len bytes into "vp".
739 739 */
740 740 sfv_len = (ssize_t)sfv->sfv_len;
741 741
742 742 if (sfv_len == 0) {
743 743 sfv++;
744 744 continue;
745 745 }
746 746
747 747 if (vp->v_type == VREG) {
748 748 if (*fileoff >= curproc->p_fsz_ctl) {
749 749 mutex_enter(&curproc->p_lock);
750 750 (void) rctl_action(
751 751 rctlproc_legacy[RLIMIT_FSIZE],
752 752 curproc->p_rctls, curproc, RCA_SAFE);
753 753 mutex_exit(&curproc->p_lock);
754 754
755 755 return (EFBIG);
756 756 }
757 757
758 758 if (*fileoff >= maxoff)
759 759 return (EFBIG);
760 760
761 761 if (*fileoff + sfv_len > maxoff)
762 762 return (EINVAL);
763 763 }
764 764
765 765 /* Check for overflow */
766 766 #ifdef _SYSCALL32_IMPL
767 767 if (model == DATAMODEL_ILP32) {
768 768 if (((ssize32_t)(*count + sfv_len)) < 0)
769 769 return (EINVAL);
770 770 } else
771 771 #endif
772 772 if ((*count + sfv_len) < 0)
773 773 return (EINVAL);
774 774
775 775 sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
776 776
777 777 if (sfv->sfv_fd == SFV_FD_SELF) {
778 778 if (vp->v_type == VSOCK) {
779 779 while (sfv_len > 0) {
780 780 size_t iov_len;
781 781
782 782 iov_len = sfv_len;
783 783 /*
784 784 * Socket filters can limit the mblk
785 785 * size, so limit reads to maxblk if
786 786 * there are filters present.
787 787 */
788 788 if (so->so_filter_active > 0 &&
789 789 maxblk != INFPSZ)
790 790 iov_len = MIN(iov_len, maxblk);
791 791
792 792 aiov.iov_len = iov_len;
793 793 aiov.iov_base =
794 794 (caddr_t)(uintptr_t)sfv_off;
795 795
796 796 auio.uio_iov = &aiov;
797 797 auio.uio_iovcnt = 1;
798 798 auio.uio_loffset = *fileoff;
799 799 auio.uio_segflg = UIO_USERSPACE;
800 800 auio.uio_fmode = fflag;
801 801 auio.uio_llimit = curproc->p_fsz_ctl;
802 802 auio.uio_resid = iov_len;
803 803
804 804 dmp = allocb(iov_len + extra, BPRI_HI);
805 805 if (dmp == NULL)
806 806 return (ENOMEM);
807 807 dmp->b_wptr = dmp->b_rptr =
808 808 dmp->b_rptr + wroff;
809 809 error = uiomove((caddr_t)dmp->b_wptr,
810 810 iov_len, UIO_WRITE, &auio);
811 811 if (error != 0) {
812 812 freeb(dmp);
813 813 return (error);
814 814 }
815 815 dmp->b_wptr += iov_len;
816 816 error = socket_sendmblk(VTOSO(vp),
817 817 &msg, fflag, CRED(), &dmp);
818 818
819 819 if (error != 0) {
820 820 if (dmp != NULL)
821 821 freeb(dmp);
822 822 return (error);
823 823 }
824 824 ttolwp(curthread)->lwp_ru.ioch +=
825 825 (ulong_t)iov_len;
826 826 *count += iov_len;
827 827 sfv_len -= iov_len;
828 828 sfv_off += iov_len;
829 829 }
830 830 } else {
831 831 aiov.iov_len = sfv_len;
832 832 aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
833 833
834 834 auio.uio_iov = &aiov;
835 835 auio.uio_iovcnt = 1;
836 836 auio.uio_loffset = *fileoff;
837 837 auio.uio_segflg = UIO_USERSPACE;
838 838 auio.uio_fmode = fflag;
839 839 auio.uio_llimit = curproc->p_fsz_ctl;
840 840 auio.uio_resid = sfv_len;
841 841
842 842 ioflag = auio.uio_fmode &
843 843 (FAPPEND|FSYNC|FDSYNC|FRSYNC);
844 844 while (sfv_len > 0) {
845 845 error = VOP_WRITE(vp, &auio, ioflag,
846 846 fp->f_cred, NULL);
847 847 cnt = sfv_len - auio.uio_resid;
848 848 sfv_len -= cnt;
849 849 ttolwp(curthread)->lwp_ru.ioch +=
850 850 (ulong_t)cnt;
851 851 *fileoff += cnt;
852 852 *count += cnt;
853 853 if (error != 0)
854 854 return (error);
855 855 }
856 856 }
857 857 } else {
858 858 int segmapit = 0;
859 859 file_t *ffp;
860 860 vnode_t *readvp;
861 861 struct vnode *realvp;
862 862 size_t size;
863 863 caddr_t ptr;
864 864
865 865 if ((ffp = getf(sfv->sfv_fd)) == NULL)
866 866 return (EBADF);
867 867
868 868 if ((ffp->f_flag & FREAD) == 0) {
869 869 releasef(sfv->sfv_fd);
870 870 return (EBADF);
871 871 }
872 872
873 873 readvp = ffp->f_vnode;
874 874 if (VOP_REALVP(readvp, &realvp, NULL) == 0)
875 875 readvp = realvp;
876 876 if (readvp->v_type != VREG) {
877 877 releasef(sfv->sfv_fd);
878 878 return (EINVAL);
879 879 }
880 880
881 881 /*
882 882 * No point reading and writing to same vp,
883 883 * as long as both are regular files. readvp is not
884 884 * locked; but since we got it from an open file the
885 885 * contents will be valid during the time of access.
886 886 */
887 887 if (vn_compare(vp, readvp)) {
888 888 releasef(sfv->sfv_fd);
889 889 return (EINVAL);
890 890 }
891 891
892 892 /*
893 893 * Note: we assume readvp != vp. "vp" is already
894 894 * locked, and "readvp" must not be.
895 895 */
896 896 if (readvp < vp) {
897 897 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
898 898 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
899 899 NULL);
900 900 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
901 901 } else {
902 902 (void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
903 903 NULL);
904 904 }
905 905
906 906 /* Same checks as in pread */
907 907 if (sfv_off > maxoff) {
908 908 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
909 909 releasef(sfv->sfv_fd);
910 910 return (EINVAL);
911 911 }
912 912 if (sfv_off + sfv_len > maxoff) {
913 913 sfv_len = (ssize_t)((offset_t)maxoff -
914 914 sfv_off);
915 915 }
916 916 /* Find the native blocksize to transfer data */
917 917 size = MIN(vp->v_vfsp->vfs_bsize,
918 918 readvp->v_vfsp->vfs_bsize);
919 919 size = sfv_len < size ? sfv_len : size;
920 920
921 921 if (vp->v_type != VSOCK) {
922 922 segmapit = 0;
923 923 buf = kmem_alloc(size, KM_NOSLEEP);
924 924 if (buf == NULL) {
925 925 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
926 926 NULL);
927 927 releasef(sfv->sfv_fd);
928 928 return (ENOMEM);
929 929 }
930 930 } else {
931 931 uint_t copyflag;
932 932
933 933 copyflag = stp != NULL ? stp->sd_copyflag :
934 934 so->so_proto_props.sopp_zcopyflag;
935 935
936 936 /*
937 937 * Socket filters can limit the mblk size,
938 938 * so limit reads to maxblk if there are
939 939 * filters present.
940 940 */
941 941 if (so->so_filter_active > 0 &&
942 942 maxblk != INFPSZ)
943 943 size = MIN(size, maxblk);
944 944
945 945 if (vn_has_flocks(readvp) ||
946 946 readvp->v_flag & VNOMAP ||
947 947 copyflag & STZCVMUNSAFE) {
948 948 segmapit = 0;
949 949 } else if (copyflag & STZCVMSAFE) {
950 950 segmapit = 1;
951 951 } else {
952 952 int on = 1;
953 953 if (socket_setsockopt(VTOSO(vp),
954 954 SOL_SOCKET, SO_SND_COPYAVOID,
955 955 &on, sizeof (on), CRED()) == 0)
956 956 segmapit = 1;
957 957 }
958 958 }
959 959
960 960 if (segmapit) {
961 961 boolean_t nowait;
962 962
963 963 nowait = (sfv->sfv_flag & SFV_NOWAIT) != 0;
964 964 error = snf_segmap(fp, readvp, sfv_off,
965 965 (u_offset_t)sfv_len, (ssize_t *)&cnt,
966 966 nowait);
967 967 releasef(sfv->sfv_fd);
968 968 *count += cnt;
969 969 if (error)
970 970 return (error);
971 971 sfv++;
972 972 continue;
973 973 }
974 974
975 975 while (sfv_len > 0) {
976 976 size_t iov_len;
977 977
978 978 iov_len = MIN(size, sfv_len);
979 979
980 980 if (vp->v_type == VSOCK) {
981 981 dmp = allocb(iov_len + extra, BPRI_HI);
982 982 if (dmp == NULL) {
983 983 VOP_RWUNLOCK(readvp,
984 984 V_WRITELOCK_FALSE, NULL);
985 985 releasef(sfv->sfv_fd);
986 986 return (ENOMEM);
987 987 }
988 988 dmp->b_wptr = dmp->b_rptr =
989 989 dmp->b_rptr + wroff;
990 990 ptr = (caddr_t)dmp->b_rptr;
991 991 } else {
992 992 ptr = buf;
993 993 }
994 994
995 995 aiov.iov_base = ptr;
996 996 aiov.iov_len = iov_len;
997 997 auio.uio_loffset = sfv_off;
998 998 auio.uio_iov = &aiov;
999 999 auio.uio_iovcnt = 1;
1000 1000 auio.uio_resid = iov_len;
1001 1001 auio.uio_segflg = UIO_SYSSPACE;
1002 1002 auio.uio_llimit = MAXOFFSET_T;
1003 1003 auio.uio_fmode = ffp->f_flag;
1004 1004 ioflag = auio.uio_fmode &
1005 1005 (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1006 1006
1007 1007 /*
1008 1008 * If read sync is not asked for,
1009 1009 * filter sync flags
1010 1010 */
1011 1011 if ((ioflag & FRSYNC) == 0)
1012 1012 ioflag &= ~(FSYNC|FDSYNC);
1013 1013 error = VOP_READ(readvp, &auio, ioflag,
1014 1014 fp->f_cred, NULL);
1015 1015 if (error != 0) {
1016 1016 /*
1017 1017 * If we were reading a pipe (currently
1018 1018 * not implemented), we may now lose
1019 1019 * data.
1020 1020 */
1021 1021 if (vp->v_type == VSOCK)
1022 1022 freeb(dmp);
1023 1023 else
1024 1024 kmem_free(buf, size);
1025 1025 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
1026 1026 NULL);
1027 1027 releasef(sfv->sfv_fd);
1028 1028 return (error);
1029 1029 }
1030 1030
1031 1031 /*
1032 1032 * Check how much data was really read.
1033 1033 * Decrement the 'len' and increment the
1034 1034 * 'off' appropriately.
1035 1035 */
1036 1036 cnt = iov_len - auio.uio_resid;
1037 1037 if (cnt == 0) {
1038 1038 if (vp->v_type == VSOCK)
1039 1039 freeb(dmp);
1040 1040 else
1041 1041 kmem_free(buf, size);
1042 1042 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
1043 1043 NULL);
1044 1044 releasef(sfv->sfv_fd);
1045 1045 return (EINVAL);
1046 1046 }
1047 1047 sfv_len -= cnt;
1048 1048 sfv_off += cnt;
1049 1049
1050 1050 if (vp->v_type == VSOCK) {
1051 1051 dmp->b_wptr = dmp->b_rptr + cnt;
1052 1052
1053 1053 error = socket_sendmblk(VTOSO(vp),
1054 1054 &msg, fflag, CRED(), &dmp);
1055 1055
1056 1056 if (error != 0) {
1057 1057 if (dmp != NULL)
1058 1058 freeb(dmp);
1059 1059 VOP_RWUNLOCK(readvp,
1060 1060 V_WRITELOCK_FALSE, NULL);
1061 1061 releasef(sfv->sfv_fd);
1062 1062 return (error);
1063 1063 }
1064 1064
1065 1065 ttolwp(curthread)->lwp_ru.ioch +=
1066 1066 (ulong_t)cnt;
1067 1067 *count += cnt;
1068 1068 } else {
1069 1069
1070 1070 aiov.iov_base = ptr;
1071 1071 aiov.iov_len = cnt;
1072 1072 auio.uio_loffset = *fileoff;
1073 1073 auio.uio_resid = cnt;
1074 1074 auio.uio_iov = &aiov;
1075 1075 auio.uio_iovcnt = 1;
1076 1076 auio.uio_segflg = UIO_SYSSPACE;
1077 1077 auio.uio_llimit = curproc->p_fsz_ctl;
1078 1078 auio.uio_fmode = fflag;
1079 1079 ioflag = auio.uio_fmode &
1080 1080 (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1081 1081 error = VOP_WRITE(vp, &auio, ioflag,
1082 1082 fp->f_cred, NULL);
1083 1083
1084 1084 /*
1085 1085 * Check how much data was written.
1086 1086 * Increment the 'len' and decrement the
1087 1087 * 'off' if all the data was not
1088 1088 * written.
1089 1089 */
1090 1090 cnt -= auio.uio_resid;
1091 1091 sfv_len += auio.uio_resid;
1092 1092 sfv_off -= auio.uio_resid;
1093 1093 ttolwp(curthread)->lwp_ru.ioch +=
1094 1094 (ulong_t)cnt;
1095 1095 *fileoff += cnt;
1096 1096 *count += cnt;
1097 1097 if (error != 0) {
1098 1098 kmem_free(buf, size);
1099 1099 VOP_RWUNLOCK(readvp,
1100 1100 V_WRITELOCK_FALSE, NULL);
1101 1101 releasef(sfv->sfv_fd);
1102 1102 return (error);
1103 1103 }
1104 1104 }
1105 1105 }
1106 1106 if (buf) {
1107 1107 kmem_free(buf, size);
1108 1108 buf = NULL;
1109 1109 }
1110 1110 VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
1111 1111 releasef(sfv->sfv_fd);
1112 1112 }
1113 1113 sfv++;
1114 1114 }
1115 1115 return (0);
1116 1116 }
1117 1117
1118 1118 ssize_t
1119 1119 sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
1120 1120 size_t *xferred)
1121 1121 {
1122 1122 int error = 0;
1123 1123 int first_vector_error = 0;
1124 1124 file_t *fp;
1125 1125 struct vnode *vp;
1126 1126 struct sonode *so;
1127 1127 u_offset_t fileoff;
1128 1128 int copy_cnt;
1129 1129 const struct sendfilevec *copy_vec;
1130 1130 struct sendfilevec sfv[SEND_MAX_CHUNK];
1131 1131 ssize_t count = 0;
1132 1132 #ifdef _SYSCALL32_IMPL
1133 1133 struct ksendfilevec32 sfv32[SEND_MAX_CHUNK];
1134 1134 #endif
1135 1135 ssize_t total_size;
1136 1136 int i;
1137 1137 boolean_t is_sock = B_FALSE;
1138 1138 int maxblk = 0;
1139 1139
1140 1140 if (sfvcnt <= 0)
1141 1141 return (set_errno(EINVAL));
1142 1142
1143 1143 if ((fp = getf(fildes)) == NULL)
1144 1144 return (set_errno(EBADF));
1145 1145
1146 1146 if (((fp->f_flag) & FWRITE) == 0) {
1147 1147 error = EBADF;
1148 1148 goto err;
1149 1149 }
1150 1150
1151 1151 fileoff = fp->f_offset;
1152 1152 vp = fp->f_vnode;
1153 1153
1154 1154 switch (vp->v_type) {
1155 1155 case VSOCK:
1156 1156 so = VTOSO(vp);
1157 1157 is_sock = B_TRUE;
1158 1158 if (SOCK_IS_NONSTR(so)) {
1159 1159 maxblk = so->so_proto_props.sopp_maxblk;
1160 1160 } else {
1161 1161 maxblk = (int)vp->v_stream->sd_maxblk;
1162 1162 }
1163 1163
1164 1164 /*
1165 1165 * We need to make sure that the socket that we're sending on
1166 1166 * supports sendfile behavior. sockfs doesn't know that the APIs
1167 1167 * we want to use are coming from sendfile, so we can't rely on
1168 1168 * it to check for us.
1169 1169 */
1170 1170 if ((so->so_mode & SM_SENDFILESUPP) == 0) {
1171 1171 error = EOPNOTSUPP;
1172 1172 goto err;
1173 1173 }
1174 1174 break;
1175 1175 case VREG:
1176 1176 break;
1177 1177 default:
1178 1178 error = EINVAL;
1179 1179 goto err;
1180 1180 }
1181 1181
1182 1182 switch (opcode) {
1183 1183 case SENDFILEV :
1184 1184 break;
1185 1185 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1186 1186 case SENDFILEV64 :
1187 1187 return (sendvec64(fp, (struct ksendfilevec64 *)vec, sfvcnt,
1188 1188 (size32_t *)xferred, fildes));
1189 1189 #endif
1190 1190 default :
1191 1191 error = ENOSYS;
1192 1192 break;
1193 1193 }
1194 1194
1195 1195 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1196 1196 copy_vec = vec;
1197 1197
1198 1198 do {
1199 1199 total_size = 0;
1200 1200 copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
1201 1201 #ifdef _SYSCALL32_IMPL
1202 1202 /* 32-bit callers need to have their iovec expanded. */
1203 1203 if (get_udatamodel() == DATAMODEL_ILP32) {
1204 1204 if (copyin(copy_vec, sfv32,
1205 1205 copy_cnt * sizeof (ksendfilevec32_t))) {
1206 1206 error = EFAULT;
1207 1207 break;
1208 1208 }
1209 1209
1210 1210 for (i = 0; i < copy_cnt; i++) {
1211 1211 sfv[i].sfv_fd = sfv32[i].sfv_fd;
1212 1212 sfv[i].sfv_off =
1213 1213 (off_t)(uint32_t)sfv32[i].sfv_off;
1214 1214 sfv[i].sfv_len = (size_t)sfv32[i].sfv_len;
1215 1215 total_size += sfv[i].sfv_len;
1216 1216 sfv[i].sfv_flag = sfv32[i].sfv_flag;
1217 1217 /*
1218 1218 * Individual elements of the vector must not
1219 1219 * wrap or overflow, as later math is signed.
1220 1220 * Equally total_size needs to be checked after
1221 1221 * each vector is added in, to be sure that
1222 1222 * rogue values haven't overflowed the counter.
1223 1223 */
1224 1224 if (((ssize32_t)sfv[i].sfv_len < 0) ||
1225 1225 ((ssize32_t)total_size < 0)) {
1226 1226 /*
1227 1227 * Truncate the vector to send data
1228 1228 * described by elements before the
1229 1229 * error.
1230 1230 */
1231 1231 copy_cnt = i;
1232 1232 first_vector_error = EINVAL;
1233 1233 /* total_size can't be trusted */
1234 1234 if ((ssize32_t)total_size < 0)
1235 1235 error = EINVAL;
1236 1236 break;
1237 1237 }
1238 1238 }
1239 1239 /* Nothing to do, process errors */
1240 1240 if (copy_cnt == 0)
1241 1241 break;
1242 1242
1243 1243 } else {
1244 1244 #endif
1245 1245 if (copyin(copy_vec, sfv,
1246 1246 copy_cnt * sizeof (sendfilevec_t))) {
1247 1247 error = EFAULT;
1248 1248 break;
1249 1249 }
1250 1250
1251 1251 for (i = 0; i < copy_cnt; i++) {
1252 1252 total_size += sfv[i].sfv_len;
1253 1253 /*
1254 1254 * Individual elements of the vector must not
1255 1255 * wrap or overflow, as later math is signed.
1256 1256 * Equally total_size needs to be checked after
1257 1257 * each vector is added in, to be sure that
1258 1258 * rogue values haven't overflowed the counter.
1259 1259 */
1260 1260 if (((ssize_t)sfv[i].sfv_len < 0) ||
1261 1261 (total_size < 0)) {
1262 1262 /*
1263 1263 * Truncate the vector to send data
1264 1264 * described by elements before the
1265 1265 * error.
1266 1266 */
1267 1267 copy_cnt = i;
1268 1268 first_vector_error = EINVAL;
1269 1269 /* total_size can't be trusted */
1270 1270 if (total_size < 0)
1271 1271 error = EINVAL;
1272 1272 break;
1273 1273 }
1274 1274 }
1275 1275 /* Nothing to do, process errors */
1276 1276 if (copy_cnt == 0)
1277 1277 break;
1278 1278 #ifdef _SYSCALL32_IMPL
1279 1279 }
1280 1280 #endif
1281 1281
1282 1282 /*
1283 1283 * The task between deciding to use sendvec_small_chunk
1284 1284 * and sendvec_chunk is dependant on multiple things:
1285 1285 *
1286 1286 * i) latency is important for smaller files. So if the
1287 1287 * data is smaller than 'tcp_slow_start_initial' times
1288 1288 * maxblk, then use sendvec_small_chunk which creates
1289 1289 * maxblk size mblks and chains them together and sends
1290 1290 * them to TCP in one shot. It also leaves 'wroff' size
1291 1291 * space for the headers in each mblk.
1292 1292 *
1293 1293 * ii) for total size bigger than 'tcp_slow_start_initial'
1294 1294 * time maxblk, its probably real file data which is
1295 1295 * dominating. So its better to use sendvec_chunk because
1296 1296 * performance goes to dog if we don't do pagesize reads.
1297 1297 * sendvec_chunk will do pagesize reads and write them
1298 1298 * in pagesize mblks to TCP.
1299 1299 *
1300 1300 * Side Notes: A write to file has not been optimized.
1301 1301 * Future zero copy code will plugin into sendvec_chunk
1302 1302 * only because doing zero copy for files smaller then
1303 1303 * pagesize is useless.
1304 1304 *
1305 1305 * Note, if socket has NL7C enabled then call NL7C's
1306 1306 * senfilev() function to consume the sfv[].
1307 1307 */
1308 1308 if (is_sock) {
1309 1309 if (!SOCK_IS_NONSTR(so) &&
1310 1310 _SOTOTPI(so)->sti_nl7c_flags != 0) {
1311 1311 error = nl7c_sendfilev(so, &fileoff,
1312 1312 sfv, copy_cnt, &count);
1313 1313 } else if ((total_size <= (4 * maxblk)) &&
1314 1314 error == 0) {
1315 1315 error = sendvec_small_chunk(fp,
1316 1316 &fileoff, sfv, copy_cnt,
1317 1317 total_size, maxblk, &count);
1318 1318 } else {
1319 1319 error = sendvec_chunk(fp, &fileoff,
1320 1320 sfv, copy_cnt, &count);
1321 1321 }
1322 1322 } else {
1323 1323 ASSERT(vp->v_type == VREG);
1324 1324 error = sendvec_chunk(fp, &fileoff, sfv, copy_cnt,
1325 1325 &count);
1326 1326 }
1327 1327
1328 1328
1329 1329 #ifdef _SYSCALL32_IMPL
1330 1330 if (get_udatamodel() == DATAMODEL_ILP32)
1331 1331 copy_vec = (const struct sendfilevec *)((char *)copy_vec +
1332 1332 (copy_cnt * sizeof (ksendfilevec32_t)));
1333 1333 else
1334 1334 #endif
1335 1335 copy_vec += copy_cnt;
1336 1336 sfvcnt -= copy_cnt;
1337 1337
1338 1338 /* Process all vector members up to first error */
1339 1339 } while ((sfvcnt > 0) && first_vector_error == 0 && error == 0);
1340 1340
1341 1341 if (vp->v_type == VREG)
1342 1342 fp->f_offset += count;
1343 1343
1344 1344 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1345 1345
1346 1346 #ifdef _SYSCALL32_IMPL
1347 1347 if (get_udatamodel() == DATAMODEL_ILP32) {
1348 1348 ssize32_t count32 = (ssize32_t)count;
1349 1349 if (copyout(&count32, xferred, sizeof (count32)))
1350 1350 error = EFAULT;
1351 1351 releasef(fildes);
1352 1352 if (error != 0)
1353 1353 return (set_errno(error));
1354 1354 if (first_vector_error != 0)
1355 1355 return (set_errno(first_vector_error));
1356 1356 return (count32);
1357 1357 }
1358 1358 #endif
1359 1359 if (copyout(&count, xferred, sizeof (count)))
1360 1360 error = EFAULT;
1361 1361 releasef(fildes);
1362 1362 if (error != 0)
1363 1363 return (set_errno(error));
1364 1364 if (first_vector_error != 0)
1365 1365 return (set_errno(first_vector_error));
1366 1366 return (count);
1367 1367 err:
1368 1368 ASSERT(error != 0);
1369 1369 releasef(fildes);
1370 1370 return (set_errno(error));
1371 1371 }
|
↓ open down ↓ |
1371 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX