Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/sockfs/socksubr.c
+++ new/usr/src/uts/common/fs/sockfs/socksubr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015, Joyent, Inc. All rights reserved.
25 25 */
26 26
27 27 #include <sys/types.h>
28 28 #include <sys/t_lock.h>
29 29 #include <sys/param.h>
30 30 #include <sys/systm.h>
31 31 #include <sys/buf.h>
32 32 #include <sys/conf.h>
33 33 #include <sys/cred.h>
34 34 #include <sys/kmem.h>
35 35 #include <sys/sysmacros.h>
36 36 #include <sys/vfs.h>
37 37 #include <sys/vfs_opreg.h>
38 38 #include <sys/vnode.h>
39 39 #include <sys/debug.h>
40 40 #include <sys/errno.h>
41 41 #include <sys/time.h>
42 42 #include <sys/file.h>
43 43 #include <sys/open.h>
44 44 #include <sys/user.h>
45 45 #include <sys/termios.h>
46 46 #include <sys/stream.h>
47 47 #include <sys/strsubr.h>
48 48 #include <sys/strsun.h>
49 49 #include <sys/esunddi.h>
50 50 #include <sys/flock.h>
51 51 #include <sys/modctl.h>
52 52 #include <sys/cmn_err.h>
53 53 #include <sys/mkdev.h>
54 54 #include <sys/pathname.h>
55 55 #include <sys/ddi.h>
56 56 #include <sys/stat.h>
57 57 #include <sys/fs/snode.h>
58 58 #include <sys/fs/dv_node.h>
59 59 #include <sys/zone.h>
60 60
61 61 #include <sys/socket.h>
62 62 #include <sys/socketvar.h>
63 63 #include <netinet/in.h>
64 64 #include <sys/un.h>
65 65 #include <sys/ucred.h>
66 66
67 67 #include <sys/tiuser.h>
68 68 #define _SUN_TPI_VERSION 2
69 69 #include <sys/tihdr.h>
70 70
71 71 #include <c2/audit.h>
72 72
73 73 #include <fs/sockfs/nl7c.h>
74 74 #include <fs/sockfs/sockcommon.h>
75 75 #include <fs/sockfs/sockfilter_impl.h>
76 76 #include <fs/sockfs/socktpi.h>
77 77 #include <fs/sockfs/socktpi_impl.h>
78 78 #include <fs/sockfs/sodirect.h>
79 79
80 80 /*
81 81 * Macros that operate on struct cmsghdr.
82 82 * The CMSG_VALID macro does not assume that the last option buffer is padded.
83 83 */
84 84 #define CMSG_CONTENT(cmsg) (&((cmsg)[1]))
85 85 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr))
86 86 #define CMSG_VALID(cmsg, start, end) \
87 87 (ISALIGNED_cmsghdr(cmsg) && \
88 88 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \
89 89 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \
90 90 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \
91 91 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
92 92 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */
93 93
94 94 dev_t sockdev; /* For fsid in getattr */
95 95 int sockfs_defer_nl7c_init = 0;
96 96
97 97 struct socklist socklist;
98 98
99 99 struct kmem_cache *socket_cache;
100 100
101 101 /*
102 102 * sockconf_lock protects the socket configuration (socket types and
103 103 * socket filters) which is changed via the sockconfig system call.
104 104 */
105 105 krwlock_t sockconf_lock;
106 106
107 107 static int sockfs_update(kstat_t *, int);
108 108 static int sockfs_snapshot(kstat_t *, void *, int);
109 109 extern smod_info_t *sotpi_smod_create(void);
110 110
111 111 extern void sendfile_init();
112 112
113 113 extern void nl7c_init(void);
114 114
115 115 extern int modrootloaded;
116 116
117 117 #define ADRSTRLEN (2 * sizeof (void *) + 1)
118 118 /*
119 119 * kernel structure for passing the sockinfo data back up to the user.
120 120 * the strings array allows us to convert AF_UNIX addresses into strings
121 121 * with a common method regardless of which n-bit kernel we're running.
122 122 */
123 123 struct k_sockinfo {
124 124 struct sockinfo ks_si;
125 125 char ks_straddr[3][ADRSTRLEN];
126 126 };
127 127
128 128 /*
129 129 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode.
130 130 * Returns with the vnode held.
131 131 */
132 132 int
133 133 sogetvp(char *devpath, vnode_t **vpp, int uioflag)
134 134 {
135 135 struct snode *csp;
136 136 vnode_t *vp, *dvp;
137 137 major_t maj;
138 138 int error;
139 139
140 140 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE);
141 141
142 142 /*
143 143 * Lookup the underlying filesystem vnode.
144 144 */
145 145 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp);
146 146 if (error)
147 147 return (error);
148 148
149 149 /* Check that it is the correct vnode */
150 150 if (vp->v_type != VCHR) {
151 151 VN_RELE(vp);
152 152 return (ENOTSOCK);
153 153 }
154 154
155 155 /*
156 156 * If devpath went through devfs, the device should already
157 157 * be configured. If devpath is a mknod file, however, we
158 158 * need to make sure the device is properly configured.
159 159 * To do this, we do something similar to spec_open()
160 160 * except that we resolve to the minor/leaf level since
161 161 * we need to return a vnode.
162 162 */
163 163 csp = VTOS(VTOS(vp)->s_commonvp);
164 164 if (!(csp->s_flag & SDIPSET)) {
165 165 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
166 166 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname);
167 167 if (error == 0)
168 168 error = devfs_lookupname(pathname, NULLVPP, &dvp);
169 169 VN_RELE(vp);
170 170 kmem_free(pathname, MAXPATHLEN);
171 171 if (error != 0)
172 172 return (ENXIO);
173 173 vp = dvp; /* use the devfs vp */
174 174 }
175 175
176 176 /* device is configured at this point */
177 177 maj = getmajor(vp->v_rdev);
178 178 if (!STREAMSTAB(maj)) {
179 179 VN_RELE(vp);
180 180 return (ENOSTR);
181 181 }
182 182
183 183 *vpp = vp;
184 184 return (0);
185 185 }
186 186
187 187 /*
188 188 * Update the accessed, updated, or changed times in an sonode
189 189 * with the current time.
190 190 *
191 191 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable
192 192 * attributes in a fstat call. (They return the current time and 0 for
193 193 * all timestamps, respectively.) We maintain the current timestamps
194 194 * here primarily so that should sockmod be popped the resulting
195 195 * file descriptor will behave like a stream w.r.t. the timestamps.
196 196 */
197 197 void
198 198 so_update_attrs(struct sonode *so, int flag)
199 199 {
200 200 time_t now = gethrestime_sec();
201 201
202 202 if (SOCK_IS_NONSTR(so))
203 203 return;
204 204
205 205 mutex_enter(&so->so_lock);
206 206 so->so_flag |= flag;
207 207 if (flag & SOACC)
208 208 SOTOTPI(so)->sti_atime = now;
209 209 if (flag & SOMOD)
210 210 SOTOTPI(so)->sti_mtime = now;
211 211 mutex_exit(&so->so_lock);
212 212 }
213 213
214 214 extern so_create_func_t sock_comm_create_function;
215 215 extern so_destroy_func_t sock_comm_destroy_function;
216 216 /*
217 217 * Init function called when sockfs is loaded.
218 218 */
219 219 int
220 220 sockinit(int fstype, char *name)
221 221 {
222 222 static const fs_operation_def_t sock_vfsops_template[] = {
223 223 NULL, NULL
224 224 };
225 225 int error;
226 226 major_t dev;
227 227 char *err_str;
228 228
229 229 error = vfs_setfsops(fstype, sock_vfsops_template, NULL);
230 230 if (error != 0) {
231 231 zcmn_err(GLOBAL_ZONEID, CE_WARN,
232 232 "sockinit: bad vfs ops template");
233 233 return (error);
234 234 }
235 235
236 236 error = vn_make_ops(name, socket_vnodeops_template,
237 237 &socket_vnodeops);
238 238 if (error != 0) {
239 239 err_str = "sockinit: bad socket vnode ops template";
240 240 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */
241 241 socket_vnodeops = NULL;
242 242 goto failure;
243 243 }
244 244
245 245 socket_cache = kmem_cache_create("socket_cache",
246 246 sizeof (struct sonode), 0, sonode_constructor,
247 247 sonode_destructor, NULL, NULL, NULL, 0);
248 248
249 249 rw_init(&sockconf_lock, NULL, RW_DEFAULT, NULL);
250 250
251 251 error = socktpi_init();
252 252 if (error != 0) {
253 253 err_str = NULL;
254 254 goto failure;
255 255 }
256 256
257 257 error = sod_init();
258 258 if (error != 0) {
259 259 err_str = NULL;
260 260 goto failure;
261 261 }
262 262
263 263 /*
264 264 * Set up the default create and destroy functions
265 265 */
266 266 sock_comm_create_function = socket_sonode_create;
267 267 sock_comm_destroy_function = socket_sonode_destroy;
268 268
269 269 /*
270 270 * Build initial list mapping socket parameters to vnode.
271 271 */
272 272 smod_init();
273 273 smod_add(sotpi_smod_create());
274 274
275 275 sockparams_init();
276 276
277 277 /*
278 278 * If sockets are needed before init runs /sbin/soconfig
279 279 * it is possible to preload the sockparams list here using
280 280 * calls like:
281 281 * sockconfig(1,2,3, "/dev/tcp", 0);
282 282 */
283 283
284 284 /*
285 285 * Create a unique dev_t for use in so_fsid.
286 286 */
287 287
288 288 if ((dev = getudev()) == (major_t)-1)
289 289 dev = 0;
290 290 sockdev = makedevice(dev, 0);
291 291
292 292 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL);
293 293 sendfile_init();
294 294 if (!modrootloaded) {
295 295 sockfs_defer_nl7c_init = 1;
296 296 } else {
297 297 nl7c_init();
298 298 }
299 299
300 300 /* Initialize socket filters */
301 301 sof_init();
302 302
303 303 return (0);
304 304
305 305 failure:
306 306 (void) vfs_freevfsops_by_type(fstype);
307 307 if (socket_vnodeops != NULL)
308 308 vn_freevnodeops(socket_vnodeops);
309 309 if (err_str != NULL)
310 310 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str);
311 311 return (error);
312 312 }
313 313
314 314 /*
315 315 * Caller must hold the mutex. Used to set SOLOCKED.
316 316 */
317 317 void
318 318 so_lock_single(struct sonode *so)
319 319 {
320 320 ASSERT(MUTEX_HELD(&so->so_lock));
321 321
322 322 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) {
323 323 cv_wait_stop(&so->so_single_cv, &so->so_lock,
324 324 SO_LOCK_WAKEUP_TIME);
325 325 }
326 326 so->so_flag |= SOLOCKED;
327 327 }
328 328
329 329 /*
330 330 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND.
331 331 * Used to clear SOLOCKED or SOASYNC_UNBIND.
332 332 */
333 333 void
334 334 so_unlock_single(struct sonode *so, int flag)
335 335 {
336 336 ASSERT(MUTEX_HELD(&so->so_lock));
337 337 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND));
338 338 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0);
339 339 ASSERT(so->so_flag & flag);
340 340 /*
341 341 * Process the T_DISCON_IND on sti_discon_ind_mp.
342 342 *
343 343 * Call to so_drain_discon_ind will result in so_lock
344 344 * being dropped and re-acquired later.
345 345 */
346 346 if (!SOCK_IS_NONSTR(so)) {
347 347 sotpi_info_t *sti = SOTOTPI(so);
348 348
349 349 if (sti->sti_discon_ind_mp != NULL)
350 350 so_drain_discon_ind(so);
351 351 }
352 352
353 353 cv_signal(&so->so_single_cv);
354 354 so->so_flag &= ~flag;
355 355 }
356 356
357 357 /*
358 358 * Caller must hold the mutex. Used to set SOREADLOCKED.
359 359 * If the caller wants nonblocking behavior it should set fmode.
360 360 */
361 361 int
362 362 so_lock_read(struct sonode *so, int fmode)
363 363 {
364 364 ASSERT(MUTEX_HELD(&so->so_lock));
365 365
366 366 while (so->so_flag & SOREADLOCKED) {
367 367 if (fmode & (FNDELAY|FNONBLOCK))
368 368 return (EWOULDBLOCK);
369 369 cv_wait_stop(&so->so_read_cv, &so->so_lock,
370 370 SO_LOCK_WAKEUP_TIME);
371 371 }
372 372 so->so_flag |= SOREADLOCKED;
373 373 return (0);
374 374 }
375 375
376 376 /*
377 377 * Like so_lock_read above but allows signals.
378 378 */
379 379 int
380 380 so_lock_read_intr(struct sonode *so, int fmode)
381 381 {
382 382 ASSERT(MUTEX_HELD(&so->so_lock));
383 383
384 384 while (so->so_flag & SOREADLOCKED) {
385 385 if (fmode & (FNDELAY|FNONBLOCK))
386 386 return (EWOULDBLOCK);
387 387 if (!cv_wait_sig(&so->so_read_cv, &so->so_lock))
388 388 return (EINTR);
389 389 }
390 390 so->so_flag |= SOREADLOCKED;
391 391 return (0);
392 392 }
393 393
394 394 /*
395 395 * Caller must hold the mutex. Used to clear SOREADLOCKED,
396 396 * set in so_lock_read() or so_lock_read_intr().
397 397 */
398 398 void
399 399 so_unlock_read(struct sonode *so)
400 400 {
401 401 ASSERT(MUTEX_HELD(&so->so_lock));
402 402 ASSERT(so->so_flag & SOREADLOCKED);
403 403
404 404 cv_signal(&so->so_read_cv);
405 405 so->so_flag &= ~SOREADLOCKED;
406 406 }
407 407
408 408 /*
409 409 * Verify that the specified offset falls within the mblk and
410 410 * that the resulting pointer is aligned.
411 411 * Returns NULL if not.
412 412 */
413 413 void *
414 414 sogetoff(mblk_t *mp, t_uscalar_t offset,
415 415 t_uscalar_t length, uint_t align_size)
416 416 {
417 417 uintptr_t ptr1, ptr2;
418 418
419 419 ASSERT(mp && mp->b_wptr >= mp->b_rptr);
420 420 ptr1 = (uintptr_t)mp->b_rptr + offset;
421 421 ptr2 = (uintptr_t)ptr1 + length;
422 422 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) {
423 423 eprintline(0);
424 424 return (NULL);
425 425 }
426 426 if ((ptr1 & (align_size - 1)) != 0) {
427 427 eprintline(0);
428 428 return (NULL);
429 429 }
430 430 return ((void *)ptr1);
431 431 }
432 432
433 433 /*
434 434 * Return the AF_UNIX underlying filesystem vnode matching a given name.
435 435 * Makes sure the sending and the destination sonodes are compatible.
436 436 * The vnode is returned held.
437 437 *
438 438 * The underlying filesystem VSOCK vnode has a v_stream pointer that
439 439 * references the actual stream head (hence indirectly the actual sonode).
440 440 *
441 441 * This function is non-static so it can be used by brand emulation.
442 442 */
443 443 int
444 444 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess,
445 445 vnode_t **vpp)
446 446 {
447 447 vnode_t *vp; /* Underlying filesystem vnode */
448 448 vnode_t *rvp; /* real vnode */
449 449 vnode_t *svp; /* sockfs vnode */
450 450 struct sonode *so2;
451 451 int error;
452 452
453 453 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so,
454 454 soun->sun_path));
455 455
456 456 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
457 457 if (error) {
458 458 eprintsoline(so, error);
459 459 return (error);
460 460 }
461 461
462 462 /*
463 463 * Traverse lofs mounts get the real vnode
464 464 */
465 465 if (VOP_REALVP(vp, &rvp, NULL) == 0) {
466 466 VN_HOLD(rvp); /* hold the real vnode */
467 467 VN_RELE(vp); /* release hold from lookup */
468 468 vp = rvp;
469 469 }
470 470
471 471 if (vp->v_type != VSOCK) {
472 472 error = ENOTSOCK;
473 473 eprintsoline(so, error);
474 474 goto done2;
475 475 }
476 476
477 477 if (checkaccess) {
478 478 /*
479 479 * Check that we have permissions to access the destination
480 480 * vnode. This check is not done in BSD but it is required
481 481 * by X/Open.
482 482 */
483 483 if (error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL)) {
484 484 eprintsoline(so, error);
485 485 goto done2;
486 486 }
487 487 }
488 488
489 489 /*
490 490 * Check if the remote socket has been closed.
491 491 *
492 492 * Synchronize with vn_rele_stream by holding v_lock while traversing
493 493 * v_stream->sd_vnode.
494 494 */
495 495 mutex_enter(&vp->v_lock);
496 496 if (vp->v_stream == NULL) {
497 497 mutex_exit(&vp->v_lock);
498 498 if (so->so_type == SOCK_DGRAM)
499 499 error = EDESTADDRREQ;
500 500 else
501 501 error = ECONNREFUSED;
502 502
503 503 eprintsoline(so, error);
504 504 goto done2;
505 505 }
506 506 ASSERT(vp->v_stream->sd_vnode);
507 507 svp = vp->v_stream->sd_vnode;
508 508 /*
509 509 * holding v_lock on underlying filesystem vnode and acquiring
510 510 * it on sockfs vnode. Assumes that no code ever attempts to
511 511 * acquire these locks in the reverse order.
512 512 */
513 513 VN_HOLD(svp);
514 514 mutex_exit(&vp->v_lock);
515 515
516 516 if (svp->v_type != VSOCK) {
517 517 error = ENOTSOCK;
518 518 eprintsoline(so, error);
519 519 goto done;
520 520 }
521 521
522 522 so2 = VTOSO(svp);
523 523
524 524 if (so->so_type != so2->so_type) {
525 525 error = EPROTOTYPE;
526 526 eprintsoline(so, error);
527 527 goto done;
528 528 }
529 529
530 530 VN_RELE(svp);
531 531 *vpp = vp;
532 532 return (0);
533 533
534 534 done:
535 535 VN_RELE(svp);
536 536 done2:
537 537 VN_RELE(vp);
538 538 return (error);
539 539 }
540 540
541 541 /*
542 542 * Verify peer address for connect and sendto/sendmsg.
543 543 * Since sendto/sendmsg would not get synchronous errors from the transport
544 544 * provider we have to do these ugly checks in the socket layer to
545 545 * preserve compatibility with SunOS 4.X.
546 546 */
547 547 int
548 548 so_addr_verify(struct sonode *so, const struct sockaddr *name,
549 549 socklen_t namelen)
550 550 {
551 551 int family;
552 552
553 553 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n",
554 554 (void *)so, (void *)name, namelen));
555 555
556 556 ASSERT(name != NULL);
557 557
558 558 family = so->so_family;
559 559 switch (family) {
560 560 case AF_INET:
561 561 if (name->sa_family != family) {
562 562 eprintsoline(so, EAFNOSUPPORT);
563 563 return (EAFNOSUPPORT);
564 564 }
565 565 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) {
566 566 eprintsoline(so, EINVAL);
567 567 return (EINVAL);
568 568 }
569 569 break;
570 570 case AF_INET6: {
571 571 #ifdef DEBUG
572 572 struct sockaddr_in6 *sin6;
573 573 #endif /* DEBUG */
574 574
575 575 if (name->sa_family != family) {
576 576 eprintsoline(so, EAFNOSUPPORT);
577 577 return (EAFNOSUPPORT);
578 578 }
579 579 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) {
580 580 eprintsoline(so, EINVAL);
581 581 return (EINVAL);
582 582 }
583 583 #ifdef DEBUG
584 584 /* Verify that apps don't forget to clear sin6_scope_id etc */
585 585 sin6 = (struct sockaddr_in6 *)name;
586 586 if (sin6->sin6_scope_id != 0 &&
587 587 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
588 588 zcmn_err(getzoneid(), CE_WARN,
589 589 "connect/send* with uninitialized sin6_scope_id "
590 590 "(%d) on socket. Pid = %d\n",
591 591 (int)sin6->sin6_scope_id, (int)curproc->p_pid);
592 592 }
593 593 #endif /* DEBUG */
594 594 break;
595 595 }
596 596 case AF_UNIX:
597 597 if (SOTOTPI(so)->sti_faddr_noxlate) {
598 598 return (0);
599 599 }
600 600 if (namelen < (socklen_t)sizeof (short)) {
601 601 eprintsoline(so, ENOENT);
602 602 return (ENOENT);
603 603 }
604 604 if (name->sa_family != family) {
605 605 eprintsoline(so, EAFNOSUPPORT);
606 606 return (EAFNOSUPPORT);
607 607 }
608 608 /* MAXPATHLEN + soun_family + nul termination */
609 609 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
610 610 eprintsoline(so, ENAMETOOLONG);
611 611 return (ENAMETOOLONG);
612 612 }
613 613
614 614 break;
615 615
616 616 default:
617 617 /*
618 618 * Default is don't do any length or sa_family check
619 619 * to allow non-sockaddr style addresses.
620 620 */
621 621 break;
622 622 }
623 623
624 624 return (0);
625 625 }
626 626
627 627
628 628 /*
629 629 * Translate an AF_UNIX sockaddr_un to the transport internal name.
630 630 * Assumes caller has called so_addr_verify first.
631 631 */
632 632 /*ARGSUSED*/
633 633 int
634 634 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name,
635 635 socklen_t namelen, int checkaccess,
636 636 void **addrp, socklen_t *addrlenp)
637 637 {
638 638 int error;
639 639 struct sockaddr_un *soun;
640 640 vnode_t *vp;
641 641 void *addr;
642 642 socklen_t addrlen;
643 643 sotpi_info_t *sti = SOTOTPI(so);
644 644
645 645 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n",
646 646 (void *)so, (void *)name, namelen, checkaccess));
647 647
648 648 ASSERT(name != NULL);
649 649 ASSERT(so->so_family == AF_UNIX);
650 650 ASSERT(!sti->sti_faddr_noxlate);
651 651 ASSERT(namelen >= (socklen_t)sizeof (short));
652 652 ASSERT(name->sa_family == AF_UNIX);
653 653 soun = (struct sockaddr_un *)name;
654 654 /*
655 655 * Lookup vnode for the specified path name and verify that
656 656 * it is a socket.
657 657 */
658 658 error = so_ux_lookup(so, soun, checkaccess, &vp);
659 659 if (error) {
660 660 eprintsoline(so, error);
661 661 return (error);
662 662 }
663 663 /*
664 664 * Use the address of the peer vnode as the address to send
665 665 * to. We release the peer vnode here. In case it has been
666 666 * closed by the time the T_CONN_REQ or T_UNITDATA_REQ reaches the
667 667 * transport the message will get an error or be dropped.
668 668 */
669 669 sti->sti_ux_faddr.soua_vp = vp;
670 670 sti->sti_ux_faddr.soua_magic = SOU_MAGIC_EXPLICIT;
671 671 addr = &sti->sti_ux_faddr;
672 672 addrlen = (socklen_t)sizeof (sti->sti_ux_faddr);
673 673 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n",
674 674 addrlen, (void *)vp));
675 675 VN_RELE(vp);
676 676 *addrp = addr;
677 677 *addrlenp = (socklen_t)addrlen;
678 678 return (0);
679 679 }
680 680
681 681 /*
682 682 * Esballoc free function for messages that contain SO_FILEP option.
683 683 * Decrement the reference count on the file pointers using closef.
684 684 */
685 685 void
686 686 fdbuf_free(struct fdbuf *fdbuf)
687 687 {
688 688 int i;
689 689 struct file *fp;
690 690
691 691 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd));
692 692 for (i = 0; i < fdbuf->fd_numfd; i++) {
693 693 /*
694 694 * We need pointer size alignment for fd_fds. On a LP64
695 695 * kernel, the required alignment is 8 bytes while
696 696 * the option headers and values are only 4 bytes
697 697 * aligned. So its safer to do a bcopy compared to
698 698 * assigning fdbuf->fd_fds[i] to fp.
699 699 */
700 700 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
701 701 dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp));
702 702 (void) closef(fp);
703 703 }
704 704 if (fdbuf->fd_ebuf != NULL)
705 705 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen);
706 706 kmem_free(fdbuf, fdbuf->fd_size);
707 707 }
708 708
709 709 /*
710 710 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing.
711 711 * Waits if memory is not available.
712 712 */
713 713 mblk_t *
714 714 fdbuf_allocmsg(int size, struct fdbuf *fdbuf)
715 715 {
716 716 uchar_t *buf;
717 717 mblk_t *mp;
718 718
719 719 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd));
720 720 buf = kmem_alloc(size, KM_SLEEP);
721 721 fdbuf->fd_ebuf = (caddr_t)buf;
722 722 fdbuf->fd_ebuflen = size;
723 723 fdbuf->fd_frtn.free_func = fdbuf_free;
724 724 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf;
725 725
726 726 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn);
727 727 mp->b_datap->db_type = M_PROTO;
728 728 return (mp);
729 729 }
730 730
731 731 /*
732 732 * Extract file descriptors from a fdbuf.
733 733 * Return list in rights/rightslen.
734 734 */
735 735 /*ARGSUSED*/
736 736 static int
737 737 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen)
738 738 {
739 739 int i, fd;
740 740 int *rp;
741 741 struct file *fp;
742 742 int numfd;
743 743
744 744 dprint(1, ("fdbuf_extract: %d fds, len %d\n",
745 745 fdbuf->fd_numfd, rightslen));
746 746
747 747 numfd = fdbuf->fd_numfd;
748 748 ASSERT(rightslen == numfd * (int)sizeof (int));
749 749
750 750 /*
751 751 * Allocate a file descriptor and increment the f_count.
752 752 * The latter is needed since we always call fdbuf_free
753 753 * which performs a closef.
754 754 */
755 755 rp = (int *)rights;
756 756 for (i = 0; i < numfd; i++) {
757 757 if ((fd = ufalloc(0)) == -1)
758 758 goto cleanup;
759 759 /*
760 760 * We need pointer size alignment for fd_fds. On a LP64
761 761 * kernel, the required alignment is 8 bytes while
762 762 * the option headers and values are only 4 bytes
763 763 * aligned. So its safer to do a bcopy compared to
764 764 * assigning fdbuf->fd_fds[i] to fp.
765 765 */
766 766 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
767 767 mutex_enter(&fp->f_tlock);
768 768 fp->f_count++;
769 769 mutex_exit(&fp->f_tlock);
770 770 setf(fd, fp);
771 771 *rp++ = fd;
772 772 if (AU_AUDITING())
773 773 audit_fdrecv(fd, fp);
774 774 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n",
775 775 i, fd, (void *)fp, fp->f_count));
776 776 }
777 777 return (0);
778 778
779 779 cleanup:
780 780 /*
781 781 * Undo whatever partial work the loop above has done.
782 782 */
783 783 {
784 784 int j;
785 785
786 786 rp = (int *)rights;
787 787 for (j = 0; j < i; j++) {
788 788 dprint(0,
789 789 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp));
790 790 (void) closeandsetf(*rp++, NULL);
791 791 }
792 792 }
793 793
794 794 return (EMFILE);
795 795 }
796 796
797 797 /*
798 798 * Insert file descriptors into an fdbuf.
799 799 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed
800 800 * by calling fdbuf_free().
801 801 */
802 802 int
803 803 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp)
804 804 {
805 805 int numfd, i;
806 806 int *fds;
807 807 struct file *fp;
808 808 struct fdbuf *fdbuf;
809 809 int fdbufsize;
810 810
811 811 dprint(1, ("fdbuf_create: len %d\n", rightslen));
812 812
813 813 numfd = rightslen / (int)sizeof (int);
814 814
815 815 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *));
816 816 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP);
817 817 fdbuf->fd_size = fdbufsize;
818 818 fdbuf->fd_numfd = 0;
819 819 fdbuf->fd_ebuf = NULL;
820 820 fdbuf->fd_ebuflen = 0;
821 821 fds = (int *)rights;
822 822 for (i = 0; i < numfd; i++) {
823 823 if ((fp = getf(fds[i])) == NULL) {
824 824 fdbuf_free(fdbuf);
825 825 return (EBADF);
826 826 }
827 827 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n",
828 828 i, fds[i], (void *)fp, fp->f_count));
829 829 mutex_enter(&fp->f_tlock);
830 830 fp->f_count++;
831 831 mutex_exit(&fp->f_tlock);
832 832 /*
833 833 * The maximum alignment for fdbuf (or any option header
834 834 * and its value) it 4 bytes. On a LP64 kernel, the alignment
835 835 * is not sufficient for pointers (fd_fds in this case). Since
836 836 * we just did a kmem_alloc (we get a double word alignment),
837 837 * we don't need to do anything on the send side (we loose
838 838 * the double word alignment because fdbuf goes after an
839 839 * option header (eg T_unitdata_req) which is only 4 byte
840 840 * aligned). We take care of this when we extract the file
841 841 * descriptor in fdbuf_extract or fdbuf_free.
842 842 */
843 843 fdbuf->fd_fds[i] = fp;
844 844 fdbuf->fd_numfd++;
845 845 releasef(fds[i]);
846 846 if (AU_AUDITING())
847 847 audit_fdsend(fds[i], fp, 0);
848 848 }
849 849 *fdbufp = fdbuf;
850 850 return (0);
851 851 }
852 852
853 853 static int
854 854 fdbuf_optlen(int rightslen)
855 855 {
856 856 int numfd;
857 857
858 858 numfd = rightslen / (int)sizeof (int);
859 859
860 860 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)));
861 861 }
862 862
863 863 static t_uscalar_t
864 864 fdbuf_cmsglen(int fdbuflen)
865 865 {
866 866 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) /
867 867 (int)sizeof (struct file *) * (int)sizeof (int));
868 868 }
869 869
870 870
871 871 /*
872 872 * Return non-zero if the mblk and fdbuf are consistent.
873 873 */
874 874 static int
875 875 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen)
876 876 {
877 877 if (fdbuflen >= FDBUF_HDRSIZE &&
878 878 fdbuflen == fdbuf->fd_size) {
879 879 frtn_t *frp = mp->b_datap->db_frtnp;
880 880 /*
881 881 * Check that the SO_FILEP portion of the
882 882 * message has not been modified by
883 883 * the loopback transport. The sending sockfs generates
884 884 * a message that is esballoc'ed with the free function
885 885 * being fdbuf_free() and where free_arg contains the
886 886 * identical information as the SO_FILEP content.
887 887 *
888 888 * If any of these constraints are not satisfied we
889 889 * silently ignore the option.
890 890 */
891 891 ASSERT(mp);
892 892 if (frp != NULL &&
893 893 frp->free_func == fdbuf_free &&
894 894 frp->free_arg != NULL &&
895 895 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) {
896 896 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n",
897 897 (void *)fdbuf, fdbuflen));
898 898 return (1);
899 899 } else {
900 900 zcmn_err(getzoneid(), CE_WARN,
901 901 "sockfs: mismatched fdbuf content (%p)",
902 902 (void *)mp);
903 903 return (0);
904 904 }
905 905 } else {
906 906 zcmn_err(getzoneid(), CE_WARN,
907 907 "sockfs: mismatched fdbuf len %d, %d\n",
908 908 fdbuflen, fdbuf->fd_size);
909 909 return (0);
910 910 }
911 911 }
912 912
913 913 /*
914 914 * When the file descriptors returned by sorecvmsg can not be passed
915 915 * to the application this routine will cleanup the references on
916 916 * the files. Start at startoff bytes into the buffer.
917 917 */
918 918 static void
919 919 close_fds(void *fdbuf, int fdbuflen, int startoff)
920 920 {
921 921 int *fds = (int *)fdbuf;
922 922 int numfd = fdbuflen / (int)sizeof (int);
923 923 int i;
924 924
925 925 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff));
926 926
927 927 for (i = 0; i < numfd; i++) {
928 928 if (startoff < 0)
929 929 startoff = 0;
930 930 if (startoff < (int)sizeof (int)) {
931 931 /*
932 932 * This file descriptor is partially or fully after
933 933 * the offset
934 934 */
935 935 dprint(0,
936 936 ("close_fds: cleanup[%d] = %d\n", i, fds[i]));
937 937 (void) closeandsetf(fds[i], NULL);
938 938 }
939 939 startoff -= (int)sizeof (int);
940 940 }
941 941 }
942 942
943 943 /*
944 944 * Close all file descriptors contained in the control part starting at
945 945 * the startoffset.
946 946 */
947 947 void
948 948 so_closefds(void *control, t_uscalar_t controllen, int oldflg,
949 949 int startoff)
950 950 {
951 951 struct cmsghdr *cmsg;
952 952
953 953 if (control == NULL)
954 954 return;
955 955
956 956 if (oldflg) {
957 957 close_fds(control, controllen, startoff);
958 958 return;
959 959 }
960 960 /* Scan control part for file descriptors. */
961 961 for (cmsg = (struct cmsghdr *)control;
962 962 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
963 963 cmsg = CMSG_NEXT(cmsg)) {
964 964 if (cmsg->cmsg_level == SOL_SOCKET &&
965 965 cmsg->cmsg_type == SCM_RIGHTS) {
966 966 close_fds(CMSG_CONTENT(cmsg),
967 967 (int)CMSG_CONTENTLEN(cmsg),
968 968 startoff - (int)sizeof (struct cmsghdr));
969 969 }
970 970 startoff -= cmsg->cmsg_len;
971 971 }
972 972 }
973 973
974 974 /*
975 975 * Returns a pointer/length for the file descriptors contained
976 976 * in the control buffer. Returns with *fdlenp == -1 if there are no
977 977 * file descriptor options present. This is different than there being
978 978 * a zero-length file descriptor option.
979 979 * Fail if there are multiple SCM_RIGHT cmsgs.
980 980 */
981 981 int
982 982 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg,
983 983 void **fdsp, int *fdlenp)
984 984 {
985 985 struct cmsghdr *cmsg;
986 986 void *fds;
987 987 int fdlen;
988 988
989 989 if (control == NULL) {
990 990 *fdsp = NULL;
991 991 *fdlenp = -1;
992 992 return (0);
993 993 }
994 994
995 995 if (oldflg) {
996 996 *fdsp = control;
997 997 if (controllen == 0)
998 998 *fdlenp = -1;
999 999 else
1000 1000 *fdlenp = controllen;
1001 1001 dprint(1, ("so_getfdopt: old %d\n", *fdlenp));
1002 1002 return (0);
1003 1003 }
1004 1004
1005 1005 fds = NULL;
1006 1006 fdlen = 0;
1007 1007
1008 1008 for (cmsg = (struct cmsghdr *)control;
1009 1009 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1010 1010 cmsg = CMSG_NEXT(cmsg)) {
1011 1011 if (cmsg->cmsg_level == SOL_SOCKET &&
1012 1012 cmsg->cmsg_type == SCM_RIGHTS) {
1013 1013 if (fds != NULL)
1014 1014 return (EINVAL);
1015 1015 fds = CMSG_CONTENT(cmsg);
1016 1016 fdlen = (int)CMSG_CONTENTLEN(cmsg);
1017 1017 dprint(1, ("so_getfdopt: new %lu\n",
1018 1018 (size_t)CMSG_CONTENTLEN(cmsg)));
1019 1019 }
1020 1020 }
1021 1021 if (fds == NULL) {
1022 1022 dprint(1, ("so_getfdopt: NONE\n"));
1023 1023 *fdlenp = -1;
1024 1024 } else
1025 1025 *fdlenp = fdlen;
1026 1026 *fdsp = fds;
1027 1027 return (0);
1028 1028 }
1029 1029
1030 1030 /*
1031 1031 * Return the length of the options including any file descriptor options.
1032 1032 */
1033 1033 t_uscalar_t
1034 1034 so_optlen(void *control, t_uscalar_t controllen, int oldflg)
1035 1035 {
1036 1036 struct cmsghdr *cmsg;
1037 1037 t_uscalar_t optlen = 0;
1038 1038 t_uscalar_t len;
1039 1039
1040 1040 if (control == NULL)
1041 1041 return (0);
1042 1042
1043 1043 if (oldflg)
1044 1044 return ((t_uscalar_t)(sizeof (struct T_opthdr) +
1045 1045 fdbuf_optlen(controllen)));
1046 1046
1047 1047 for (cmsg = (struct cmsghdr *)control;
1048 1048 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1049 1049 cmsg = CMSG_NEXT(cmsg)) {
1050 1050 if (cmsg->cmsg_level == SOL_SOCKET &&
1051 1051 cmsg->cmsg_type == SCM_RIGHTS) {
1052 1052 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg));
1053 1053 } else {
1054 1054 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1055 1055 }
1056 1056 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) +
1057 1057 sizeof (struct T_opthdr));
1058 1058 }
1059 1059 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n",
1060 1060 controllen, oldflg, optlen));
1061 1061 return (optlen);
1062 1062 }
1063 1063
1064 1064 /*
1065 1065 * Copy options from control to the mblk. Skip any file descriptor options.
1066 1066 */
1067 1067 void
1068 1068 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp)
1069 1069 {
1070 1070 struct T_opthdr toh;
1071 1071 struct cmsghdr *cmsg;
1072 1072
1073 1073 if (control == NULL)
1074 1074 return;
1075 1075
1076 1076 if (oldflg) {
1077 1077 /* No real options - caller has handled file descriptors */
1078 1078 return;
1079 1079 }
1080 1080 for (cmsg = (struct cmsghdr *)control;
1081 1081 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1082 1082 cmsg = CMSG_NEXT(cmsg)) {
1083 1083 /*
1084 1084 * Note: The caller handles file descriptors prior
1085 1085 * to calling this function.
1086 1086 */
1087 1087 t_uscalar_t len;
1088 1088
1089 1089 if (cmsg->cmsg_level == SOL_SOCKET &&
1090 1090 cmsg->cmsg_type == SCM_RIGHTS)
1091 1091 continue;
1092 1092
1093 1093 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1094 1094 toh.level = cmsg->cmsg_level;
1095 1095 toh.name = cmsg->cmsg_type;
1096 1096 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr);
1097 1097 toh.status = 0;
1098 1098
1099 1099 soappendmsg(mp, &toh, sizeof (toh));
1100 1100 soappendmsg(mp, CMSG_CONTENT(cmsg), len);
1101 1101 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len;
1102 1102 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1103 1103 }
1104 1104 }
1105 1105
1106 1106 /*
1107 1107 * Return the length of the control message derived from the options.
1108 1108 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP.
1109 1109 * When oldflg is set only include SO_FILEP.
1110 1110 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1111 1111 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1112 1112 * also be checked for any possible impacts.
1113 1113 */
1114 1114 t_uscalar_t
1115 1115 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg)
1116 1116 {
1117 1117 t_uscalar_t cmsglen = 0;
1118 1118 struct T_opthdr *tohp;
1119 1119 t_uscalar_t len;
1120 1120 t_uscalar_t last_roundup = 0;
1121 1121
1122 1122 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1123 1123
1124 1124 for (tohp = (struct T_opthdr *)opt;
1125 1125 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1126 1126 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1127 1127 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n",
1128 1128 tohp->level, tohp->name, tohp->len));
1129 1129 if (tohp->level == SOL_SOCKET &&
1130 1130 (tohp->name == SO_SRCADDR ||
1131 1131 tohp->name == SO_UNIX_CLOSE)) {
1132 1132 continue;
1133 1133 }
1134 1134 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1135 1135 struct fdbuf *fdbuf;
1136 1136 int fdbuflen;
1137 1137
1138 1138 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1139 1139 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1140 1140
1141 1141 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1142 1142 continue;
1143 1143 if (oldflg) {
1144 1144 cmsglen += fdbuf_cmsglen(fdbuflen);
1145 1145 continue;
1146 1146 }
1147 1147 len = fdbuf_cmsglen(fdbuflen);
1148 1148 } else if (tohp->level == SOL_SOCKET &&
1149 1149 tohp->name == SCM_TIMESTAMP) {
1150 1150 if (oldflg)
1151 1151 continue;
1152 1152
1153 1153 if (get_udatamodel() == DATAMODEL_NATIVE) {
1154 1154 len = sizeof (struct timeval);
1155 1155 } else {
1156 1156 len = sizeof (struct timeval32);
1157 1157 }
1158 1158 } else {
1159 1159 if (oldflg)
1160 1160 continue;
1161 1161 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1162 1162 }
1163 1163 /*
1164 1164 * Exclude roundup for last option to not set
1165 1165 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit.
1166 1166 */
1167 1167 last_roundup = (t_uscalar_t)
1168 1168 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) -
1169 1169 (len + (int)sizeof (struct cmsghdr)));
1170 1170 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) +
1171 1171 last_roundup;
1172 1172 }
1173 1173 cmsglen -= last_roundup;
1174 1174 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n",
1175 1175 optlen, oldflg, cmsglen));
1176 1176 return (cmsglen);
1177 1177 }
1178 1178
1179 1179 /*
1180 1180 * Copy options from options to the control. Convert SO_FILEP to
1181 1181 * file descriptors.
1182 1182 * Returns errno or zero.
1183 1183 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1184 1184 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1185 1185 * also be checked for any possible impacts.
1186 1186 */
1187 1187 int
1188 1188 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg,
1189 1189 void *control, t_uscalar_t controllen)
1190 1190 {
1191 1191 struct T_opthdr *tohp;
1192 1192 struct cmsghdr *cmsg;
1193 1193 struct fdbuf *fdbuf;
1194 1194 int fdbuflen;
1195 1195 int error;
1196 1196 #if defined(DEBUG) || defined(__lint)
1197 1197 struct cmsghdr *cend = (struct cmsghdr *)
1198 1198 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen));
1199 1199 #endif
1200 1200 cmsg = (struct cmsghdr *)control;
1201 1201
1202 1202 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1203 1203
1204 1204 for (tohp = (struct T_opthdr *)opt;
1205 1205 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1206 1206 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1207 1207 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n",
1208 1208 tohp->level, tohp->name, tohp->len));
1209 1209
1210 1210 if (tohp->level == SOL_SOCKET &&
1211 1211 (tohp->name == SO_SRCADDR ||
1212 1212 tohp->name == SO_UNIX_CLOSE)) {
1213 1213 continue;
1214 1214 }
1215 1215 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen);
1216 1216 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1217 1217 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1218 1218 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1219 1219
1220 1220 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1221 1221 return (EPROTO);
1222 1222 if (oldflg) {
1223 1223 error = fdbuf_extract(fdbuf, control,
1224 1224 (int)controllen);
1225 1225 if (error != 0)
1226 1226 return (error);
1227 1227 continue;
1228 1228 } else {
1229 1229 int fdlen;
1230 1230
1231 1231 fdlen = (int)fdbuf_cmsglen(
1232 1232 (int)_TPI_TOPT_DATALEN(tohp));
1233 1233
1234 1234 cmsg->cmsg_level = tohp->level;
1235 1235 cmsg->cmsg_type = SCM_RIGHTS;
1236 1236 cmsg->cmsg_len = (socklen_t)(fdlen +
1237 1237 sizeof (struct cmsghdr));
1238 1238
1239 1239 error = fdbuf_extract(fdbuf,
1240 1240 CMSG_CONTENT(cmsg), fdlen);
1241 1241 if (error != 0)
1242 1242 return (error);
1243 1243 }
1244 1244 } else if (tohp->level == SOL_SOCKET &&
1245 1245 tohp->name == SCM_TIMESTAMP) {
1246 1246 timestruc_t *timestamp;
1247 1247
1248 1248 if (oldflg)
1249 1249 continue;
1250 1250
1251 1251 cmsg->cmsg_level = tohp->level;
1252 1252 cmsg->cmsg_type = tohp->name;
1253 1253
1254 1254 timestamp =
1255 1255 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1],
1256 1256 sizeof (intptr_t));
1257 1257
1258 1258 if (get_udatamodel() == DATAMODEL_NATIVE) {
1259 1259 struct timeval tv;
1260 1260
1261 1261 cmsg->cmsg_len = sizeof (struct timeval) +
1262 1262 sizeof (struct cmsghdr);
1263 1263 tv.tv_sec = timestamp->tv_sec;
1264 1264 tv.tv_usec = timestamp->tv_nsec /
1265 1265 (NANOSEC / MICROSEC);
1266 1266 /*
1267 1267 * on LP64 systems, the struct timeval in
1268 1268 * the destination will not be 8-byte aligned,
1269 1269 * so use bcopy to avoid alignment trouble
1270 1270 */
1271 1271 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv));
1272 1272 } else {
1273 1273 struct timeval32 *time32;
1274 1274
1275 1275 cmsg->cmsg_len = sizeof (struct timeval32) +
1276 1276 sizeof (struct cmsghdr);
1277 1277 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg);
1278 1278 time32->tv_sec = (time32_t)timestamp->tv_sec;
1279 1279 time32->tv_usec =
1280 1280 (int32_t)(timestamp->tv_nsec /
1281 1281 (NANOSEC / MICROSEC));
1282 1282 }
1283 1283
1284 1284 } else {
1285 1285 if (oldflg)
1286 1286 continue;
1287 1287
1288 1288 cmsg->cmsg_level = tohp->level;
1289 1289 cmsg->cmsg_type = tohp->name;
1290 1290 cmsg->cmsg_len = (socklen_t)(_TPI_TOPT_DATALEN(tohp) +
1291 1291 sizeof (struct cmsghdr));
1292 1292
1293 1293 /* copy content to control data part */
1294 1294 bcopy(&tohp[1], CMSG_CONTENT(cmsg),
1295 1295 CMSG_CONTENTLEN(cmsg));
1296 1296 }
1297 1297 /* move to next CMSG structure! */
1298 1298 cmsg = CMSG_NEXT(cmsg);
1299 1299 }
1300 1300 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n",
1301 1301 control, controllen, (void *)cend, (void *)cmsg));
1302 1302 ASSERT(cmsg <= cend);
1303 1303 return (0);
1304 1304 }
1305 1305
1306 1306 /*
1307 1307 * Extract the SO_SRCADDR option value if present.
1308 1308 */
1309 1309 void
1310 1310 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp,
1311 1311 t_uscalar_t *srclenp)
1312 1312 {
1313 1313 struct T_opthdr *tohp;
1314 1314
1315 1315 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1316 1316
1317 1317 ASSERT(srcp != NULL && srclenp != NULL);
1318 1318 *srcp = NULL;
1319 1319 *srclenp = 0;
1320 1320
1321 1321 for (tohp = (struct T_opthdr *)opt;
1322 1322 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1323 1323 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1324 1324 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n",
1325 1325 tohp->level, tohp->name, tohp->len));
1326 1326 if (tohp->level == SOL_SOCKET &&
1327 1327 tohp->name == SO_SRCADDR) {
1328 1328 *srcp = _TPI_TOPT_DATA(tohp);
1329 1329 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1330 1330 }
1331 1331 }
1332 1332 }
1333 1333
1334 1334 /*
1335 1335 * Verify if the SO_UNIX_CLOSE option is present.
1336 1336 */
1337 1337 int
1338 1338 so_getopt_unix_close(void *opt, t_uscalar_t optlen)
1339 1339 {
1340 1340 struct T_opthdr *tohp;
1341 1341
1342 1342 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1343 1343
1344 1344 for (tohp = (struct T_opthdr *)opt;
1345 1345 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1346 1346 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1347 1347 dprint(1,
1348 1348 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n",
1349 1349 tohp->level, tohp->name, tohp->len));
1350 1350 if (tohp->level == SOL_SOCKET &&
1351 1351 tohp->name == SO_UNIX_CLOSE)
1352 1352 return (1);
1353 1353 }
1354 1354 return (0);
1355 1355 }
1356 1356
1357 1357 /*
1358 1358 * Allocate an M_PROTO message.
1359 1359 *
1360 1360 * If allocation fails the behavior depends on sleepflg:
1361 1361 * _ALLOC_NOSLEEP fail immediately
1362 1362 * _ALLOC_INTR sleep for memory until a signal is caught
1363 1363 * _ALLOC_SLEEP sleep forever. Don't return NULL.
1364 1364 */
1365 1365 mblk_t *
1366 1366 soallocproto(size_t size, int sleepflg, cred_t *cr)
1367 1367 {
1368 1368 mblk_t *mp;
1369 1369
1370 1370 /* Round up size for reuse */
1371 1371 size = MAX(size, 64);
1372 1372 if (cr != NULL)
1373 1373 mp = allocb_cred(size, cr, curproc->p_pid);
1374 1374 else
1375 1375 mp = allocb(size, BPRI_MED);
1376 1376
1377 1377 if (mp == NULL) {
1378 1378 int error; /* Dummy - error not returned to caller */
1379 1379
1380 1380 switch (sleepflg) {
1381 1381 case _ALLOC_SLEEP:
1382 1382 if (cr != NULL) {
1383 1383 mp = allocb_cred_wait(size, STR_NOSIG, &error,
1384 1384 cr, curproc->p_pid);
1385 1385 } else {
1386 1386 mp = allocb_wait(size, BPRI_MED, STR_NOSIG,
1387 1387 &error);
1388 1388 }
1389 1389 ASSERT(mp);
1390 1390 break;
1391 1391 case _ALLOC_INTR:
1392 1392 if (cr != NULL) {
1393 1393 mp = allocb_cred_wait(size, 0, &error, cr,
1394 1394 curproc->p_pid);
1395 1395 } else {
1396 1396 mp = allocb_wait(size, BPRI_MED, 0, &error);
1397 1397 }
1398 1398 if (mp == NULL) {
1399 1399 /* Caught signal while sleeping for memory */
1400 1400 eprintline(ENOBUFS);
1401 1401 return (NULL);
1402 1402 }
1403 1403 break;
1404 1404 case _ALLOC_NOSLEEP:
1405 1405 default:
1406 1406 eprintline(ENOBUFS);
1407 1407 return (NULL);
1408 1408 }
1409 1409 }
1410 1410 DB_TYPE(mp) = M_PROTO;
1411 1411 return (mp);
1412 1412 }
1413 1413
1414 1414 /*
1415 1415 * Allocate an M_PROTO message with a single component.
1416 1416 * len is the length of buf. size is the amount to allocate.
1417 1417 *
1418 1418 * buf can be NULL with a non-zero len.
1419 1419 * This results in a bzero'ed chunk being placed the message.
1420 1420 */
1421 1421 mblk_t *
1422 1422 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg,
1423 1423 cred_t *cr)
1424 1424 {
1425 1425 mblk_t *mp;
1426 1426
1427 1427 if (size == 0)
1428 1428 size = len;
1429 1429
1430 1430 ASSERT(size >= len);
1431 1431 /* Round up size for reuse */
1432 1432 size = MAX(size, 64);
1433 1433 mp = soallocproto(size, sleepflg, cr);
1434 1434 if (mp == NULL)
1435 1435 return (NULL);
1436 1436 mp->b_datap->db_type = M_PROTO;
1437 1437 if (len != 0) {
1438 1438 if (buf != NULL)
1439 1439 bcopy(buf, mp->b_wptr, len);
1440 1440 else
1441 1441 bzero(mp->b_wptr, len);
1442 1442 mp->b_wptr += len;
1443 1443 }
1444 1444 return (mp);
1445 1445 }
1446 1446
1447 1447 /*
1448 1448 * Append buf/len to mp.
1449 1449 * The caller has to ensure that there is enough room in the mblk.
1450 1450 *
1451 1451 * buf can be NULL with a non-zero len.
1452 1452 * This results in a bzero'ed chunk being placed the message.
1453 1453 */
1454 1454 void
1455 1455 soappendmsg(mblk_t *mp, const void *buf, ssize_t len)
1456 1456 {
1457 1457 ASSERT(mp);
1458 1458
1459 1459 if (len != 0) {
1460 1460 /* Assert for room left */
1461 1461 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len);
1462 1462 if (buf != NULL)
1463 1463 bcopy(buf, mp->b_wptr, len);
1464 1464 else
1465 1465 bzero(mp->b_wptr, len);
1466 1466 }
1467 1467 mp->b_wptr += len;
1468 1468 }
1469 1469
1470 1470 /*
1471 1471 * Create a message using two kernel buffers.
1472 1472 * If size is set that will determine the allocation size (e.g. for future
1473 1473 * soappendmsg calls). If size is zero it is derived from the buffer
1474 1474 * lengths.
1475 1475 */
1476 1476 mblk_t *
1477 1477 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
1478 1478 ssize_t size, int sleepflg, cred_t *cr)
1479 1479 {
1480 1480 mblk_t *mp;
1481 1481
1482 1482 if (size == 0)
1483 1483 size = len1 + len2;
1484 1484 ASSERT(size >= len1 + len2);
1485 1485
1486 1486 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
1487 1487 if (mp)
1488 1488 soappendmsg(mp, buf2, len2);
1489 1489 return (mp);
1490 1490 }
1491 1491
1492 1492 /*
1493 1493 * Create a message using three kernel buffers.
1494 1494 * If size is set that will determine the allocation size (for future
1495 1495 * soappendmsg calls). If size is zero it is derived from the buffer
1496 1496 * lengths.
1497 1497 */
1498 1498 mblk_t *
1499 1499 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
1500 1500 const void *buf3, ssize_t len3, ssize_t size, int sleepflg, cred_t *cr)
1501 1501 {
1502 1502 mblk_t *mp;
1503 1503
1504 1504 if (size == 0)
1505 1505 size = len1 + len2 +len3;
1506 1506 ASSERT(size >= len1 + len2 + len3);
1507 1507
1508 1508 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
1509 1509 if (mp != NULL) {
1510 1510 soappendmsg(mp, buf2, len2);
1511 1511 soappendmsg(mp, buf3, len3);
1512 1512 }
1513 1513 return (mp);
1514 1514 }
1515 1515
1516 1516 #ifdef DEBUG
1517 1517 char *
1518 1518 pr_state(uint_t state, uint_t mode)
1519 1519 {
1520 1520 static char buf[1024];
1521 1521
1522 1522 buf[0] = 0;
1523 1523 if (state & SS_ISCONNECTED)
1524 1524 (void) strcat(buf, "ISCONNECTED ");
1525 1525 if (state & SS_ISCONNECTING)
1526 1526 (void) strcat(buf, "ISCONNECTING ");
1527 1527 if (state & SS_ISDISCONNECTING)
1528 1528 (void) strcat(buf, "ISDISCONNECTING ");
1529 1529 if (state & SS_CANTSENDMORE)
1530 1530 (void) strcat(buf, "CANTSENDMORE ");
1531 1531
1532 1532 if (state & SS_CANTRCVMORE)
1533 1533 (void) strcat(buf, "CANTRCVMORE ");
1534 1534 if (state & SS_ISBOUND)
1535 1535 (void) strcat(buf, "ISBOUND ");
1536 1536 if (state & SS_NDELAY)
1537 1537 (void) strcat(buf, "NDELAY ");
1538 1538 if (state & SS_NONBLOCK)
1539 1539 (void) strcat(buf, "NONBLOCK ");
1540 1540
1541 1541 if (state & SS_ASYNC)
1542 1542 (void) strcat(buf, "ASYNC ");
1543 1543 if (state & SS_ACCEPTCONN)
1544 1544 (void) strcat(buf, "ACCEPTCONN ");
1545 1545 if (state & SS_SAVEDEOR)
1546 1546 (void) strcat(buf, "SAVEDEOR ");
1547 1547
1548 1548 if (state & SS_RCVATMARK)
1549 1549 (void) strcat(buf, "RCVATMARK ");
1550 1550 if (state & SS_OOBPEND)
1551 1551 (void) strcat(buf, "OOBPEND ");
1552 1552 if (state & SS_HAVEOOBDATA)
1553 1553 (void) strcat(buf, "HAVEOOBDATA ");
1554 1554 if (state & SS_HADOOBDATA)
1555 1555 (void) strcat(buf, "HADOOBDATA ");
1556 1556
1557 1557 if (mode & SM_PRIV)
1558 1558 (void) strcat(buf, "PRIV ");
1559 1559 if (mode & SM_ATOMIC)
1560 1560 (void) strcat(buf, "ATOMIC ");
1561 1561 if (mode & SM_ADDR)
1562 1562 (void) strcat(buf, "ADDR ");
1563 1563 if (mode & SM_CONNREQUIRED)
1564 1564 (void) strcat(buf, "CONNREQUIRED ");
1565 1565
1566 1566 if (mode & SM_FDPASSING)
1567 1567 (void) strcat(buf, "FDPASSING ");
1568 1568 if (mode & SM_EXDATA)
1569 1569 (void) strcat(buf, "EXDATA ");
1570 1570 if (mode & SM_OPTDATA)
1571 1571 (void) strcat(buf, "OPTDATA ");
1572 1572 if (mode & SM_BYTESTREAM)
1573 1573 (void) strcat(buf, "BYTESTREAM ");
1574 1574 return (buf);
1575 1575 }
1576 1576
1577 1577 char *
1578 1578 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen)
1579 1579 {
1580 1580 static char buf[1024];
1581 1581
1582 1582 if (addr == NULL || addrlen == 0) {
1583 1583 (void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr);
1584 1584 return (buf);
1585 1585 }
1586 1586 switch (family) {
1587 1587 case AF_INET: {
1588 1588 struct sockaddr_in sin;
1589 1589
1590 1590 bcopy(addr, &sin, sizeof (sin));
1591 1591
1592 1592 (void) sprintf(buf, "(len %d) %x/%d",
1593 1593 addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1594 1594 break;
1595 1595 }
1596 1596 case AF_INET6: {
1597 1597 struct sockaddr_in6 sin6;
1598 1598 uint16_t *piece = (uint16_t *)&sin6.sin6_addr;
1599 1599
1600 1600 bcopy((char *)addr, (char *)&sin6, sizeof (sin6));
1601 1601 (void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d",
1602 1602 addrlen,
1603 1603 ntohs(piece[0]), ntohs(piece[1]),
1604 1604 ntohs(piece[2]), ntohs(piece[3]),
1605 1605 ntohs(piece[4]), ntohs(piece[5]),
1606 1606 ntohs(piece[6]), ntohs(piece[7]),
1607 1607 ntohs(sin6.sin6_port));
1608 1608 break;
1609 1609 }
1610 1610 case AF_UNIX: {
1611 1611 struct sockaddr_un *soun = (struct sockaddr_un *)addr;
1612 1612
1613 1613 (void) sprintf(buf, "(len %d) %s", addrlen,
1614 1614 (soun == NULL) ? "(none)" : soun->sun_path);
1615 1615 break;
1616 1616 }
1617 1617 default:
1618 1618 (void) sprintf(buf, "(unknown af %d)", family);
1619 1619 break;
1620 1620 }
1621 1621 return (buf);
1622 1622 }
1623 1623
1624 1624 /* The logical equivalence operator (a if-and-only-if b) */
1625 1625 #define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b))))
1626 1626
1627 1627 /*
1628 1628 * Verify limitations and invariants on oob state.
1629 1629 * Return 1 if OK, otherwise 0 so that it can be used as
1630 1630 * ASSERT(verify_oobstate(so));
1631 1631 */
1632 1632 int
1633 1633 so_verify_oobstate(struct sonode *so)
1634 1634 {
1635 1635 boolean_t havemark;
1636 1636
1637 1637 ASSERT(MUTEX_HELD(&so->so_lock));
1638 1638
1639 1639 /*
1640 1640 * The possible state combinations are:
1641 1641 * 0
1642 1642 * SS_OOBPEND
1643 1643 * SS_OOBPEND|SS_HAVEOOBDATA
1644 1644 * SS_OOBPEND|SS_HADOOBDATA
1645 1645 * SS_HADOOBDATA
1646 1646 */
1647 1647 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) {
1648 1648 case 0:
1649 1649 case SS_OOBPEND:
1650 1650 case SS_OOBPEND|SS_HAVEOOBDATA:
1651 1651 case SS_OOBPEND|SS_HADOOBDATA:
1652 1652 case SS_HADOOBDATA:
1653 1653 break;
1654 1654 default:
1655 1655 printf("Bad oob state 1 (%p): state %s\n",
1656 1656 (void *)so, pr_state(so->so_state, so->so_mode));
1657 1657 return (0);
1658 1658 }
1659 1659
1660 1660 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */
1661 1661 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) {
1662 1662 printf("Bad oob state 2 (%p): state %s\n",
1663 1663 (void *)so, pr_state(so->so_state, so->so_mode));
1664 1664 return (0);
1665 1665 }
1666 1666
1667 1667 /*
1668 1668 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND
1669 1669 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt.
1670 1670 */
1671 1671 havemark = (SOCK_IS_NONSTR(so)) ? so->so_oobmark > 0 :
1672 1672 SOTOTPI(so)->sti_oobsigcnt > 0;
1673 1673
1674 1674 if (!EQUIVALENT(havemark || (so->so_state & SS_RCVATMARK),
1675 1675 so->so_state & SS_OOBPEND)) {
1676 1676 printf("Bad oob state 3 (%p): state %s\n",
1677 1677 (void *)so, pr_state(so->so_state, so->so_mode));
1678 1678 return (0);
1679 1679 }
1680 1680
1681 1681 /*
1682 1682 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA
1683 1683 */
1684 1684 if (!(so->so_options & SO_OOBINLINE) &&
1685 1685 !EQUIVALENT(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) {
1686 1686 printf("Bad oob state 4 (%p): state %s\n",
1687 1687 (void *)so, pr_state(so->so_state, so->so_mode));
1688 1688 return (0);
1689 1689 }
1690 1690
1691 1691 if (!SOCK_IS_NONSTR(so) &&
1692 1692 SOTOTPI(so)->sti_oobsigcnt < SOTOTPI(so)->sti_oobcnt) {
1693 1693 printf("Bad oob state 5 (%p): counts %d/%d state %s\n",
1694 1694 (void *)so, SOTOTPI(so)->sti_oobsigcnt,
1695 1695 SOTOTPI(so)->sti_oobcnt,
1696 1696 pr_state(so->so_state, so->so_mode));
1697 1697 return (0);
1698 1698 }
1699 1699
1700 1700 return (1);
1701 1701 }
1702 1702 #undef EQUIVALENT
1703 1703 #endif /* DEBUG */
1704 1704
1705 1705 /* initialize sockfs zone specific kstat related items */
1706 1706 void *
1707 1707 sock_kstat_init(zoneid_t zoneid)
1708 1708 {
1709 1709 kstat_t *ksp;
1710 1710
1711 1711 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc",
1712 1712 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid);
1713 1713
1714 1714 if (ksp != NULL) {
1715 1715 ksp->ks_update = sockfs_update;
1716 1716 ksp->ks_snapshot = sockfs_snapshot;
1717 1717 ksp->ks_lock = &socklist.sl_lock;
1718 1718 ksp->ks_private = (void *)(uintptr_t)zoneid;
1719 1719 kstat_install(ksp);
1720 1720 }
1721 1721
1722 1722 return (ksp);
1723 1723 }
1724 1724
1725 1725 /* tear down sockfs zone specific kstat related items */
1726 1726 /*ARGSUSED*/
1727 1727 void
1728 1728 sock_kstat_fini(zoneid_t zoneid, void *arg)
1729 1729 {
1730 1730 kstat_t *ksp = (kstat_t *)arg;
1731 1731
1732 1732 if (ksp != NULL) {
1733 1733 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private);
1734 1734 kstat_delete(ksp);
1735 1735 }
1736 1736 }
1737 1737
1738 1738 /*
1739 1739 * Zones:
1740 1740 * Note that nactive is going to be different for each zone.
1741 1741 * This means we require kstat to call sockfs_update and then sockfs_snapshot
1742 1742 * for the same zone, or sockfs_snapshot will be taken into the wrong size
1743 1743 * buffer. This is safe, but if the buffer is too small, user will not be
1744 1744 * given details of all sockets. However, as this kstat has a ks_lock, kstat
1745 1745 * driver will keep it locked between the update and the snapshot, so no
1746 1746 * other process (zone) can currently get inbetween resulting in a wrong size
1747 1747 * buffer allocation.
1748 1748 */
1749 1749 static int
1750 1750 sockfs_update(kstat_t *ksp, int rw)
1751 1751 {
1752 1752 uint_t nactive = 0; /* # of active AF_UNIX sockets */
1753 1753 struct sonode *so; /* current sonode on socklist */
1754 1754 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
1755 1755
1756 1756 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1757 1757
1758 1758 if (rw == KSTAT_WRITE) { /* bounce all writes */
1759 1759 return (EACCES);
1760 1760 }
1761 1761
1762 1762 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
1763 1763 if (so->so_count != 0 && so->so_zoneid == myzoneid) {
1764 1764 nactive++;
1765 1765 }
1766 1766 }
1767 1767 ksp->ks_ndata = nactive;
1768 1768 ksp->ks_data_size = nactive * sizeof (struct k_sockinfo);
1769 1769
1770 1770 return (0);
1771 1771 }
1772 1772
1773 1773 static int
1774 1774 sockfs_snapshot(kstat_t *ksp, void *buf, int rw)
1775 1775 {
1776 1776 int ns; /* # of sonodes we've copied */
1777 1777 struct sonode *so; /* current sonode on socklist */
1778 1778 struct k_sockinfo *pksi; /* where we put sockinfo data */
1779 1779 t_uscalar_t sn_len; /* soa_len */
1780 1780 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
1781 1781 sotpi_info_t *sti;
1782 1782
1783 1783 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1784 1784
1785 1785 ksp->ks_snaptime = gethrtime();
1786 1786
1787 1787 if (rw == KSTAT_WRITE) { /* bounce all writes */
1788 1788 return (EACCES);
1789 1789 }
1790 1790
1791 1791 /*
1792 1792 * for each sonode on the socklist, we massage the important
1793 1793 * info into buf, in k_sockinfo format.
1794 1794 */
1795 1795 pksi = (struct k_sockinfo *)buf;
1796 1796 ns = 0;
1797 1797 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
1798 1798 /* only stuff active sonodes and the same zone: */
1799 1799 if (so->so_count == 0 || so->so_zoneid != myzoneid) {
1800 1800 continue;
1801 1801 }
1802 1802
1803 1803 /*
1804 1804 * If the sonode was activated between the update and the
1805 1805 * snapshot, we're done - as this is only a snapshot.
1806 1806 */
1807 1807 if ((caddr_t)(pksi) >= (caddr_t)buf + ksp->ks_data_size) {
1808 1808 break;
1809 1809 }
1810 1810
1811 1811 sti = SOTOTPI(so);
1812 1812 /* copy important info into buf: */
1813 1813 pksi->ks_si.si_size = sizeof (struct k_sockinfo);
1814 1814 pksi->ks_si.si_family = so->so_family;
1815 1815 pksi->ks_si.si_type = so->so_type;
1816 1816 pksi->ks_si.si_flag = so->so_flag;
1817 1817 pksi->ks_si.si_state = so->so_state;
1818 1818 pksi->ks_si.si_serv_type = sti->sti_serv_type;
1819 1819 pksi->ks_si.si_ux_laddr_sou_magic =
1820 1820 sti->sti_ux_laddr.soua_magic;
1821 1821 pksi->ks_si.si_ux_faddr_sou_magic =
1822 1822 sti->sti_ux_faddr.soua_magic;
1823 1823 pksi->ks_si.si_laddr_soa_len = sti->sti_laddr.soa_len;
1824 1824 pksi->ks_si.si_faddr_soa_len = sti->sti_faddr.soa_len;
1825 1825 pksi->ks_si.si_szoneid = so->so_zoneid;
1826 1826 pksi->ks_si.si_faddr_noxlate = sti->sti_faddr_noxlate;
1827 1827
1828 1828 mutex_enter(&so->so_lock);
1829 1829
1830 1830 if (sti->sti_laddr_sa != NULL) {
1831 1831 ASSERT(sti->sti_laddr_sa->sa_data != NULL);
1832 1832 sn_len = sti->sti_laddr_len;
1833 1833 ASSERT(sn_len <= sizeof (short) +
1834 1834 sizeof (pksi->ks_si.si_laddr_sun_path));
1835 1835
1836 1836 pksi->ks_si.si_laddr_family =
1837 1837 sti->sti_laddr_sa->sa_family;
1838 1838 if (sn_len != 0) {
1839 1839 /* AF_UNIX socket names are NULL terminated */
1840 1840 (void) strncpy(pksi->ks_si.si_laddr_sun_path,
1841 1841 sti->sti_laddr_sa->sa_data,
1842 1842 sizeof (pksi->ks_si.si_laddr_sun_path));
1843 1843 sn_len = strlen(pksi->ks_si.si_laddr_sun_path);
1844 1844 }
1845 1845 pksi->ks_si.si_laddr_sun_path[sn_len] = 0;
1846 1846 }
1847 1847
1848 1848 if (sti->sti_faddr_sa != NULL) {
1849 1849 ASSERT(sti->sti_faddr_sa->sa_data != NULL);
1850 1850 sn_len = sti->sti_faddr_len;
1851 1851 ASSERT(sn_len <= sizeof (short) +
1852 1852 sizeof (pksi->ks_si.si_faddr_sun_path));
1853 1853
1854 1854 pksi->ks_si.si_faddr_family =
1855 1855 sti->sti_faddr_sa->sa_family;
1856 1856 if (sn_len != 0) {
1857 1857 (void) strncpy(pksi->ks_si.si_faddr_sun_path,
1858 1858 sti->sti_faddr_sa->sa_data,
1859 1859 sizeof (pksi->ks_si.si_faddr_sun_path));
1860 1860 sn_len = strlen(pksi->ks_si.si_faddr_sun_path);
1861 1861 }
1862 1862 pksi->ks_si.si_faddr_sun_path[sn_len] = 0;
1863 1863 }
1864 1864
1865 1865 mutex_exit(&so->so_lock);
1866 1866
1867 1867 (void) sprintf(pksi->ks_straddr[0], "%p", (void *)so);
1868 1868 (void) sprintf(pksi->ks_straddr[1], "%p",
1869 1869 (void *)sti->sti_ux_laddr.soua_vp);
1870 1870 (void) sprintf(pksi->ks_straddr[2], "%p",
1871 1871 (void *)sti->sti_ux_faddr.soua_vp);
1872 1872
1873 1873 ns++;
1874 1874 pksi++;
1875 1875 }
1876 1876
1877 1877 ksp->ks_ndata = ns;
1878 1878 return (0);
1879 1879 }
1880 1880
1881 1881 ssize_t
1882 1882 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size)
1883 1883 {
1884 1884 struct uio auio;
1885 1885 struct iovec aiov[1];
1886 1886 register vnode_t *vp;
1887 1887 int ioflag, rwflag;
1888 1888 ssize_t cnt;
1889 1889 int error = 0;
1890 1890 int iovcnt = 0;
1891 1891 short fflag;
1892 1892
1893 1893 vp = fp->f_vnode;
1894 1894 fflag = fp->f_flag;
1895 1895
1896 1896 rwflag = 0;
1897 1897 aiov[0].iov_base = (caddr_t)buf;
1898 1898 aiov[0].iov_len = size;
1899 1899 iovcnt = 1;
1900 1900 cnt = (ssize_t)size;
1901 1901 (void) VOP_RWLOCK(vp, rwflag, NULL);
1902 1902
1903 1903 auio.uio_loffset = fileoff;
1904 1904 auio.uio_iov = aiov;
1905 1905 auio.uio_iovcnt = iovcnt;
1906 1906 auio.uio_resid = cnt;
1907 1907 auio.uio_segflg = UIO_SYSSPACE;
1908 1908 auio.uio_llimit = MAXOFFSET_T;
1909 1909 auio.uio_fmode = fflag;
1910 1910 auio.uio_extflg = UIO_COPY_CACHED;
1911 1911
1912 1912 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1913 1913
1914 1914 /* If read sync is not asked for, filter sync flags */
1915 1915 if ((ioflag & FRSYNC) == 0)
1916 1916 ioflag &= ~(FSYNC|FDSYNC);
1917 1917 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1918 1918 cnt -= auio.uio_resid;
1919 1919
1920 1920 VOP_RWUNLOCK(vp, rwflag, NULL);
1921 1921
1922 1922 if (error == EINTR && cnt != 0)
1923 1923 error = 0;
1924 1924 out:
1925 1925 if (error != 0) {
1926 1926 *err = error;
1927 1927 return (0);
1928 1928 } else {
1929 1929 *err = 0;
1930 1930 return (cnt);
1931 1931 }
1932 1932 }
1933 1933
1934 1934 int
1935 1935 so_copyin(const void *from, void *to, size_t size, int fromkernel)
1936 1936 {
1937 1937 if (fromkernel) {
1938 1938 bcopy(from, to, size);
1939 1939 return (0);
1940 1940 }
1941 1941 return (xcopyin(from, to, size));
1942 1942 }
1943 1943
1944 1944 int
1945 1945 so_copyout(const void *from, void *to, size_t size, int tokernel)
1946 1946 {
1947 1947 if (tokernel) {
1948 1948 bcopy(from, to, size);
1949 1949 return (0);
1950 1950 }
1951 1951 return (xcopyout(from, to, size));
1952 1952 }
|
↓ open down ↓ |
1952 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX