Print this page
12276 smatch-clean sockfs
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/sockfs/socktpi.c
+++ new/usr/src/uts/common/fs/sockfs/socktpi.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015, Joyent, Inc.
25 25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
26 26 */
27 27
28 28 #include <sys/types.h>
29 29 #include <sys/t_lock.h>
30 30 #include <sys/param.h>
31 31 #include <sys/systm.h>
32 32 #include <sys/buf.h>
33 33 #include <sys/conf.h>
34 34 #include <sys/cred.h>
35 35 #include <sys/kmem.h>
36 36 #include <sys/kmem_impl.h>
37 37 #include <sys/sysmacros.h>
38 38 #include <sys/vfs.h>
39 39 #include <sys/vnode.h>
40 40 #include <sys/debug.h>
41 41 #include <sys/errno.h>
42 42 #include <sys/time.h>
43 43 #include <sys/file.h>
44 44 #include <sys/open.h>
45 45 #include <sys/user.h>
46 46 #include <sys/termios.h>
47 47 #include <sys/stream.h>
48 48 #include <sys/strsubr.h>
49 49 #include <sys/strsun.h>
50 50 #include <sys/suntpi.h>
51 51 #include <sys/ddi.h>
52 52 #include <sys/esunddi.h>
53 53 #include <sys/flock.h>
54 54 #include <sys/modctl.h>
55 55 #include <sys/vtrace.h>
56 56 #include <sys/cmn_err.h>
57 57 #include <sys/pathname.h>
58 58
59 59 #include <sys/socket.h>
60 60 #include <sys/socketvar.h>
61 61 #include <sys/sockio.h>
62 62 #include <netinet/in.h>
63 63 #include <sys/un.h>
64 64 #include <sys/strsun.h>
65 65
66 66 #include <sys/tiuser.h>
67 67 #define _SUN_TPI_VERSION 2
68 68 #include <sys/tihdr.h>
69 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */
70 70
71 71 #include <c2/audit.h>
72 72
73 73 #include <inet/common.h>
74 74 #include <inet/ip.h>
75 75 #include <inet/ip6.h>
76 76 #include <inet/tcp.h>
77 77 #include <inet/udp_impl.h>
78 78
79 79 #include <sys/zone.h>
80 80
81 81 #include <fs/sockfs/nl7c.h>
82 82 #include <fs/sockfs/nl7curi.h>
83 83
84 84 #include <fs/sockfs/sockcommon.h>
85 85 #include <fs/sockfs/socktpi.h>
86 86 #include <fs/sockfs/socktpi_impl.h>
87 87
88 88 /*
89 89 * Possible failures when memory can't be allocated. The documented behavior:
90 90 *
91 91 * 5.5: 4.X: XNET:
92 92 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/
93 93 * EINTR
94 94 * (4.X does not document EINTR but returns it)
95 95 * bind: ENOSR - ENOBUFS/ENOSR
96 96 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR
97 97 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR
98 98 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR
99 99 * (4.X getpeername and getsockname do not fail in practice)
100 100 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR
101 101 * listen: - - ENOBUFS
102 102 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/
103 103 * EINTR
104 104 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/
105 105 * EINTR
106 106 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR
107 107 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR
108 108 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR
109 109 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR
110 110 *
111 111 * Resolution. When allocation fails:
112 112 * recv: return EINTR
113 113 * send: return EINTR
114 114 * connect, accept: EINTR
115 115 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep
116 116 * socket, socketpair: ENOBUFS
117 117 * getpeername, getsockname: sleep
118 118 * getsockopt, setsockopt: sleep
119 119 */
120 120
121 121 #ifdef SOCK_TEST
122 122 /*
123 123 * Variables that make sockfs do something other than the standard TPI
124 124 * for the AF_INET transports.
125 125 *
126 126 * solisten_tpi_tcp:
127 127 * TCP can handle a O_T_BIND_REQ with an increased backlog even though
128 128 * the transport is already bound. This is needed to avoid loosing the
129 129 * port number should listen() do a T_UNBIND_REQ followed by a
130 130 * O_T_BIND_REQ.
131 131 *
132 132 * soconnect_tpi_udp:
133 133 * UDP and ICMP can handle a T_CONN_REQ.
134 134 * This is needed to make the sequence of connect(), getsockname()
135 135 * return the local IP address used to send packets to the connected to
136 136 * destination.
137 137 *
138 138 * soconnect_tpi_tcp:
139 139 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ.
140 140 * Set this to non-zero to send TPI conformant messages to TCP in this
141 141 * respect. This is a performance optimization.
142 142 *
143 143 * soaccept_tpi_tcp:
144 144 * TCP can handle a T_CONN_REQ without the acceptor being bound.
145 145 * This is a performance optimization that has been picked up in XTI.
146 146 *
147 147 * soaccept_tpi_multioptions:
148 148 * When inheriting SOL_SOCKET options from the listener to the accepting
149 149 * socket send them as a single message for AF_INET{,6}.
150 150 */
151 151 int solisten_tpi_tcp = 0;
152 152 int soconnect_tpi_udp = 0;
153 153 int soconnect_tpi_tcp = 0;
154 154 int soaccept_tpi_tcp = 0;
155 155 int soaccept_tpi_multioptions = 1;
156 156 #else /* SOCK_TEST */
157 157 #define soconnect_tpi_tcp 0
158 158 #define soconnect_tpi_udp 0
159 159 #define solisten_tpi_tcp 0
160 160 #define soaccept_tpi_tcp 0
161 161 #define soaccept_tpi_multioptions 1
162 162 #endif /* SOCK_TEST */
163 163
164 164 #ifdef SOCK_TEST
165 165 extern int do_useracc;
166 166 extern clock_t sock_test_timelimit;
167 167 #endif /* SOCK_TEST */
168 168
169 169 extern uint32_t ucredsize;
170 170
171 171 /*
172 172 * Some X/Open added checks might have to be backed out to keep SunOS 4.X
173 173 * applications working. Turn on this flag to disable these checks.
174 174 */
175 175 int xnet_skip_checks = 0;
176 176 int xnet_check_print = 0;
177 177 int xnet_truncate_print = 0;
178 178
179 179 static void sotpi_destroy(struct sonode *);
180 180 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int,
181 181 int, int *, cred_t *cr);
182 182
183 183 static boolean_t sotpi_info_create(struct sonode *, int);
184 184 static void sotpi_info_init(struct sonode *);
185 185 static void sotpi_info_fini(struct sonode *);
186 186 static void sotpi_info_destroy(struct sonode *);
187 187
188 188 /*
189 189 * Do direct function call to the transport layer below; this would
190 190 * also allow the transport to utilize read-side synchronous stream
191 191 * interface if necessary. This is a /etc/system tunable that must
192 192 * not be modified on a running system. By default this is enabled
193 193 * for performance reasons and may be disabled for debugging purposes.
194 194 */
195 195 boolean_t socktpi_direct = B_TRUE;
196 196
197 197 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache;
198 198
199 199 extern void sigintr(k_sigset_t *, int);
200 200 extern void sigunintr(k_sigset_t *);
201 201
202 202 static int sotpi_unbind(struct sonode *, int);
203 203
204 204 /* TPI sockfs sonode operations */
205 205 int sotpi_init(struct sonode *, struct sonode *, struct cred *,
206 206 int);
207 207 static int sotpi_accept(struct sonode *, int, struct cred *,
208 208 struct sonode **);
209 209 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t,
210 210 int, struct cred *);
211 211 static int sotpi_listen(struct sonode *, int, struct cred *);
212 212 static int sotpi_connect(struct sonode *, struct sockaddr *,
213 213 socklen_t, int, int, struct cred *);
214 214 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *,
215 215 struct uio *, struct cred *);
216 216 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *,
217 217 struct uio *, struct cred *);
218 218 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int,
219 219 struct cred *, mblk_t **);
220 220 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t,
221 221 struct uio *, void *, t_uscalar_t, int);
222 222 static int sodgram_direct(struct sonode *, struct sockaddr *,
223 223 socklen_t, struct uio *, int);
224 224 extern int sotpi_getpeername(struct sonode *, struct sockaddr *,
225 225 socklen_t *, boolean_t, struct cred *);
226 226 static int sotpi_getsockname(struct sonode *, struct sockaddr *,
227 227 socklen_t *, struct cred *);
228 228 static int sotpi_shutdown(struct sonode *, int, struct cred *);
229 229 extern int sotpi_getsockopt(struct sonode *, int, int, void *,
230 230 socklen_t *, int, struct cred *);
231 231 extern int sotpi_setsockopt(struct sonode *, int, int, const void *,
232 232 socklen_t, struct cred *);
233 233 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *,
234 234 int32_t *);
235 235 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int,
236 236 struct cred *, int32_t *);
237 237 static int sotpi_poll(struct sonode *, short, int, short *,
238 238 struct pollhead **);
239 239 static int sotpi_close(struct sonode *, int, struct cred *);
240 240
241 241 static int i_sotpi_info_constructor(sotpi_info_t *);
242 242 static void i_sotpi_info_destructor(sotpi_info_t *);
243 243
244 244 sonodeops_t sotpi_sonodeops = {
245 245 sotpi_init, /* sop_init */
246 246 sotpi_accept, /* sop_accept */
247 247 sotpi_bind, /* sop_bind */
248 248 sotpi_listen, /* sop_listen */
249 249 sotpi_connect, /* sop_connect */
250 250 sotpi_recvmsg, /* sop_recvmsg */
251 251 sotpi_sendmsg, /* sop_sendmsg */
252 252 sotpi_sendmblk, /* sop_sendmblk */
253 253 sotpi_getpeername, /* sop_getpeername */
254 254 sotpi_getsockname, /* sop_getsockname */
255 255 sotpi_shutdown, /* sop_shutdown */
256 256 sotpi_getsockopt, /* sop_getsockopt */
257 257 sotpi_setsockopt, /* sop_setsockopt */
258 258 sotpi_ioctl, /* sop_ioctl */
259 259 sotpi_poll, /* sop_poll */
260 260 sotpi_close, /* sop_close */
261 261 };
262 262
263 263 /*
264 264 * Return a TPI socket vnode.
265 265 *
266 266 * Note that sockets assume that the driver will clone (either itself
267 267 * or by using the clone driver) i.e. a socket() call will always
268 268 * result in a new vnode being created.
269 269 */
270 270
271 271 /*
272 272 * Common create code for socket and accept. If tso is set the values
273 273 * from that node is used instead of issuing a T_INFO_REQ.
274 274 */
275 275
276 276 /* ARGSUSED */
277 277 static struct sonode *
278 278 sotpi_create(struct sockparams *sp, int family, int type, int protocol,
279 279 int version, int sflags, int *errorp, cred_t *cr)
280 280 {
281 281 struct sonode *so;
282 282 kmem_cache_t *cp;
283 283 int sfamily = family;
284 284
285 285 ASSERT(sp->sp_sdev_info.sd_vnode != NULL);
286 286
287 287 if (family == AF_NCA) {
288 288 /*
289 289 * The request is for an NCA socket so for NL7C use the
290 290 * INET domain instead and mark NL7C_AF_NCA below.
291 291 */
292 292 family = AF_INET;
293 293 /*
294 294 * NL7C is not supported in the non-global zone,
295 295 * we enforce this restriction here.
296 296 */
297 297 if (getzoneid() != GLOBAL_ZONEID) {
298 298 *errorp = ENOTSUP;
299 299 return (NULL);
300 300 }
301 301 }
302 302
303 303 /*
304 304 * to be compatible with old tpi socket implementation ignore
305 305 * sleep flag (sflags) passed in
306 306 */
307 307 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache;
308 308 so = kmem_cache_alloc(cp, KM_SLEEP);
309 309 if (so == NULL) {
310 310 *errorp = ENOMEM;
311 311 return (NULL);
312 312 }
313 313
314 314 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops);
315 315 sotpi_info_init(so);
316 316
317 317 if (sfamily == AF_NCA) {
318 318 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA;
319 319 }
320 320
321 321 if (version == SOV_DEFAULT)
322 322 version = so_default_version;
323 323
324 324 so->so_version = (short)version;
325 325 *errorp = 0;
326 326
327 327 return (so);
328 328 }
329 329
330 330 static void
331 331 sotpi_destroy(struct sonode *so)
332 332 {
333 333 kmem_cache_t *cp;
334 334 struct sockparams *origsp;
335 335
336 336 /*
337 337 * If there is a new dealloc function (ie. smod_destroy_func),
338 338 * then it should check the correctness of the ops.
339 339 */
340 340
341 341 ASSERT(so->so_ops == &sotpi_sonodeops);
342 342
343 343 origsp = SOTOTPI(so)->sti_orig_sp;
344 344
345 345 sotpi_info_fini(so);
346 346
347 347 if (so->so_state & SS_FALLBACK_COMP) {
348 348 /*
349 349 * A fallback happend, which means that a sotpi_info_t struct
350 350 * was allocated (as opposed to being allocated from the TPI
351 351 * sonode cache. Therefore we explicitly free the struct
352 352 * here.
353 353 */
354 354 sotpi_info_destroy(so);
355 355 ASSERT(origsp != NULL);
356 356
357 357 origsp->sp_smod_info->smod_sock_destroy_func(so);
358 358 SOCKPARAMS_DEC_REF(origsp);
359 359 } else {
360 360 sonode_fini(so);
361 361 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache :
362 362 socktpi_cache;
363 363 kmem_cache_free(cp, so);
364 364 }
365 365 }
366 366
367 367 /* ARGSUSED1 */
368 368 int
369 369 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags)
370 370 {
371 371 major_t maj;
372 372 dev_t newdev;
373 373 struct vnode *vp;
374 374 int error = 0;
375 375 struct stdata *stp;
376 376
377 377 sotpi_info_t *sti = SOTOTPI(so);
378 378
379 379 dprint(1, ("sotpi_init()\n"));
380 380
381 381 /*
382 382 * over write the sleep flag passed in but that is ok
383 383 * as tpi socket does not honor sleep flag.
384 384 */
385 385 flags |= FREAD|FWRITE;
386 386
387 387 /*
388 388 * Record in so_flag that it is a clone.
389 389 */
390 390 if (getmajor(sti->sti_dev) == clone_major)
391 391 so->so_flag |= SOCLONE;
392 392
393 393 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) &&
394 394 (so->so_family == AF_INET || so->so_family == AF_INET6) &&
395 395 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP ||
396 396 so->so_protocol == IPPROTO_IP)) {
397 397 /* Tell tcp or udp that it's talking to sockets */
398 398 flags |= SO_SOCKSTR;
399 399
400 400 /*
401 401 * Here we indicate to socktpi_open() our attempt to
402 402 * make direct calls between sockfs and transport.
403 403 * The final decision is left to socktpi_open().
404 404 */
405 405 sti->sti_direct = 1;
406 406
407 407 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL);
408 408 if (so->so_type == SOCK_STREAM && tso != NULL) {
409 409 if (SOTOTPI(tso)->sti_direct) {
410 410 /*
411 411 * Inherit sti_direct from listener and pass
412 412 * SO_ACCEPTOR open flag to tcp, indicating
413 413 * that this is an accept fast-path instance.
414 414 */
415 415 flags |= SO_ACCEPTOR;
416 416 } else {
417 417 /*
418 418 * sti_direct is not set on listener, meaning
419 419 * that the listener has been converted from
420 420 * a socket to a stream. Ensure that the
421 421 * acceptor inherits these settings.
422 422 */
423 423 sti->sti_direct = 0;
424 424 flags &= ~SO_SOCKSTR;
425 425 }
426 426 }
427 427 }
428 428
429 429 /*
430 430 * Tell local transport that it is talking to sockets.
431 431 */
432 432 if (so->so_family == AF_UNIX) {
433 433 flags |= SO_SOCKSTR;
434 434 }
435 435
436 436 vp = SOTOV(so);
437 437 newdev = vp->v_rdev;
438 438 maj = getmajor(newdev);
439 439 ASSERT(STREAMSTAB(maj));
440 440
441 441 error = stropen(vp, &newdev, flags, cr);
442 442
443 443 stp = vp->v_stream;
444 444 if (error == 0) {
445 445 if (so->so_flag & SOCLONE)
446 446 ASSERT(newdev != vp->v_rdev);
447 447 mutex_enter(&so->so_lock);
448 448 sti->sti_dev = newdev;
449 449 vp->v_rdev = newdev;
450 450 mutex_exit(&so->so_lock);
451 451
452 452 if (stp->sd_flag & STRISTTY) {
453 453 /*
454 454 * this is a post SVR4 tty driver - a socket can not
455 455 * be a controlling terminal. Fail the open.
456 456 */
457 457 (void) sotpi_close(so, flags, cr);
458 458 return (ENOTTY); /* XXX */
459 459 }
460 460
461 461 ASSERT(stp->sd_wrq != NULL);
462 462 sti->sti_provinfo = tpi_findprov(stp->sd_wrq);
463 463
464 464 /*
465 465 * If caller is interested in doing direct function call
466 466 * interface to/from transport module, probe the module
467 467 * directly beneath the streamhead to see if it qualifies.
468 468 *
469 469 * We turn off the direct interface when qualifications fail.
470 470 * In the acceptor case, we simply turn off the sti_direct
471 471 * flag on the socket. We do the fallback after the accept
472 472 * has completed, before the new socket is returned to the
473 473 * application.
474 474 */
475 475 if (sti->sti_direct) {
476 476 queue_t *tq = stp->sd_wrq->q_next;
477 477
478 478 /*
479 479 * sti_direct is currently supported and tested
480 480 * only for tcp/udp; this is the main reason to
481 481 * have the following assertions.
482 482 */
483 483 ASSERT(so->so_family == AF_INET ||
484 484 so->so_family == AF_INET6);
485 485 ASSERT(so->so_protocol == IPPROTO_UDP ||
486 486 so->so_protocol == IPPROTO_TCP ||
487 487 so->so_protocol == IPPROTO_IP);
488 488 ASSERT(so->so_type == SOCK_DGRAM ||
489 489 so->so_type == SOCK_STREAM);
490 490
491 491 /*
492 492 * Abort direct call interface if the module directly
493 493 * underneath the stream head is not defined with the
494 494 * _D_DIRECT flag. This could happen in the tcp or
495 495 * udp case, when some other module is autopushed
496 496 * above it, or for some reasons the expected module
497 497 * isn't purely D_MP (which is the main requirement).
498 498 */
499 499 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) ||
500 500 !(_OTHERQ(tq)->q_flag & _QDIRECT)) {
501 501 int rval;
502 502
503 503 /* Continue on without direct calls */
504 504 sti->sti_direct = 0;
505 505
506 506 /*
507 507 * Cannot issue ioctl on fallback socket since
508 508 * there is no conn associated with the queue.
509 509 * The fallback downcall will notify the proto
510 510 * of the change.
511 511 */
512 512 if (!(flags & SO_ACCEPTOR) &&
513 513 !(flags & SO_FALLBACK)) {
514 514 if ((error = strioctl(vp,
515 515 _SIOCSOCKFALLBACK, 0, 0, K_TO_K,
516 516 cr, &rval)) != 0) {
517 517 (void) sotpi_close(so, flags,
518 518 cr);
519 519 return (error);
520 520 }
521 521 }
522 522 }
523 523 }
524 524
525 525 if (flags & SO_FALLBACK) {
526 526 /*
527 527 * The stream created does not have a conn.
528 528 * do stream set up after conn has been assigned
529 529 */
530 530 return (error);
531 531 }
532 532 if (error = so_strinit(so, tso)) {
533 533 (void) sotpi_close(so, flags, cr);
534 534 return (error);
535 535 }
536 536
537 537 /* Enable sendfile() on AF_UNIX streams */
538 538 if (so->so_family == AF_UNIX && so->so_type == SOCK_STREAM) {
539 539 mutex_enter(&so->so_lock);
540 540 so->so_mode |= SM_SENDFILESUPP;
541 541 mutex_exit(&so->so_lock);
542 542 }
543 543
544 544 /* Wildcard */
545 545 if (so->so_protocol != so->so_sockparams->sp_protocol) {
546 546 int protocol = so->so_protocol;
547 547 /*
548 548 * Issue SO_PROTOTYPE setsockopt.
549 549 */
550 550 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE,
551 551 &protocol, (t_uscalar_t)sizeof (protocol), cr);
552 552 if (error != 0) {
553 553 (void) sotpi_close(so, flags, cr);
554 554 /*
555 555 * Setsockopt often fails with ENOPROTOOPT but
556 556 * socket() should fail with
557 557 * EPROTONOSUPPORT/EPROTOTYPE.
558 558 */
559 559 return (EPROTONOSUPPORT);
560 560 }
561 561 }
562 562
563 563 } else {
564 564 /*
565 565 * While the same socket can not be reopened (unlike specfs)
566 566 * the stream head sets STREOPENFAIL when the autopush fails.
567 567 */
568 568 if ((stp != NULL) &&
569 569 (stp->sd_flag & STREOPENFAIL)) {
570 570 /*
571 571 * Open failed part way through.
572 572 */
573 573 mutex_enter(&stp->sd_lock);
574 574 stp->sd_flag &= ~STREOPENFAIL;
575 575 mutex_exit(&stp->sd_lock);
576 576 (void) sotpi_close(so, flags, cr);
577 577 return (error);
578 578 /*NOTREACHED*/
579 579 }
580 580 ASSERT(stp == NULL);
581 581 }
582 582 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN,
583 583 "sockfs open:maj %d vp %p so %p error %d",
584 584 maj, vp, so, error);
585 585 return (error);
586 586 }
587 587
588 588 /*
589 589 * Bind the socket to an unspecified address in sockfs only.
590 590 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't
591 591 * required in all cases.
592 592 */
593 593 static void
594 594 so_automatic_bind(struct sonode *so)
595 595 {
596 596 sotpi_info_t *sti = SOTOTPI(so);
597 597 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6);
598 598
599 599 ASSERT(MUTEX_HELD(&so->so_lock));
600 600 ASSERT(!(so->so_state & SS_ISBOUND));
601 601 ASSERT(sti->sti_unbind_mp);
602 602
603 603 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
604 604 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
605 605 sti->sti_laddr_sa->sa_family = so->so_family;
606 606 so->so_state |= SS_ISBOUND;
607 607 }
608 608
609 609
610 610 /*
611 611 * bind the socket.
612 612 *
613 613 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2
614 614 * are passed in we allow rebinding. Note that for backwards compatibility
615 615 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind.
616 616 * Thus the rebinding code is currently not executed.
617 617 *
618 618 * The constraints for rebinding are:
619 619 * - it is a SOCK_DGRAM, or
620 620 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
621 621 * and no listen() has been done.
622 622 * This rebinding code was added based on some language in the XNET book
623 623 * about not returning EINVAL it the protocol allows rebinding. However,
624 624 * this language is not present in the Posix socket draft. Thus maybe the
625 625 * rebinding logic should be deleted from the source.
626 626 *
627 627 * A null "name" can be used to unbind the socket if:
628 628 * - it is a SOCK_DGRAM, or
629 629 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
630 630 * and no listen() has been done.
631 631 */
632 632 /* ARGSUSED */
633 633 static int
634 634 sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
635 635 socklen_t namelen, int backlog, int flags, struct cred *cr)
636 636 {
637 637 struct T_bind_req bind_req;
638 638 struct T_bind_ack *bind_ack;
639 639 int error = 0;
640 640 mblk_t *mp;
641 641 void *addr;
642 642 t_uscalar_t addrlen;
643 643 int unbind_on_err = 1;
644 644 boolean_t clear_acceptconn_on_err = B_FALSE;
645 645 boolean_t restore_backlog_on_err = B_FALSE;
646 646 int save_so_backlog;
647 647 t_scalar_t PRIM_type = O_T_BIND_REQ;
648 648 boolean_t tcp_udp_xport;
649 649 void *nl7c = NULL;
650 650 sotpi_info_t *sti = SOTOTPI(so);
651 651
652 652 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n",
653 653 (void *)so, (void *)name, namelen, backlog, flags,
654 654 pr_state(so->so_state, so->so_mode)));
655 655
656 656 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM;
657 657
658 658 if (!(flags & _SOBIND_LOCK_HELD)) {
659 659 mutex_enter(&so->so_lock);
660 660 so_lock_single(so); /* Set SOLOCKED */
661 661 } else {
662 662 ASSERT(MUTEX_HELD(&so->so_lock));
663 663 ASSERT(so->so_flag & SOLOCKED);
664 664 }
665 665
666 666 /*
667 667 * Make sure that there is a preallocated unbind_req message
668 668 * before binding. This message allocated when the socket is
669 669 * created but it might be have been consumed.
670 670 */
671 671 if (sti->sti_unbind_mp == NULL) {
672 672 dprintso(so, 1, ("sobind: allocating unbind_req\n"));
673 673 /* NOTE: holding so_lock while sleeping */
674 674 sti->sti_unbind_mp =
675 675 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP,
676 676 cr);
677 677 }
678 678
679 679 if (flags & _SOBIND_REBIND) {
680 680 /*
681 681 * Called from solisten after doing an sotpi_unbind() or
682 682 * potentially without the unbind (latter for AF_INET{,6}).
683 683 */
684 684 ASSERT(name == NULL && namelen == 0);
685 685
686 686 if (so->so_family == AF_UNIX) {
687 687 ASSERT(sti->sti_ux_bound_vp);
688 688 addr = &sti->sti_ux_laddr;
689 689 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
690 690 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, "
691 691 "addr 0x%p, vp %p\n",
692 692 addrlen,
693 693 (void *)((struct so_ux_addr *)addr)->soua_vp,
694 694 (void *)sti->sti_ux_bound_vp));
695 695 } else {
696 696 addr = sti->sti_laddr_sa;
697 697 addrlen = (t_uscalar_t)sti->sti_laddr_len;
698 698 }
699 699 } else if (flags & _SOBIND_UNSPEC) {
700 700 ASSERT(name == NULL && namelen == 0);
701 701
702 702 /*
703 703 * The caller checked SS_ISBOUND but not necessarily
704 704 * under so_lock
705 705 */
706 706 if (so->so_state & SS_ISBOUND) {
707 707 /* No error */
708 708 goto done;
709 709 }
710 710
711 711 /* Set an initial local address */
712 712 switch (so->so_family) {
713 713 case AF_UNIX:
714 714 /*
715 715 * Use an address with same size as struct sockaddr
716 716 * just like BSD.
717 717 */
718 718 sti->sti_laddr_len =
719 719 (socklen_t)sizeof (struct sockaddr);
720 720 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
721 721 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
722 722 sti->sti_laddr_sa->sa_family = so->so_family;
723 723
724 724 /*
725 725 * Pass down an address with the implicit bind
726 726 * magic number and the rest all zeros.
727 727 * The transport will return a unique address.
728 728 */
729 729 sti->sti_ux_laddr.soua_vp = NULL;
730 730 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT;
731 731 addr = &sti->sti_ux_laddr;
732 732 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
733 733 break;
734 734
735 735 case AF_INET:
736 736 case AF_INET6:
737 737 /*
738 738 * An unspecified bind in TPI has a NULL address.
739 739 * Set the address in sockfs to have the sa_family.
740 740 */
741 741 sti->sti_laddr_len = (so->so_family == AF_INET) ?
742 742 (socklen_t)sizeof (sin_t) :
743 743 (socklen_t)sizeof (sin6_t);
744 744 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
745 745 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
746 746 sti->sti_laddr_sa->sa_family = so->so_family;
747 747 addr = NULL;
748 748 addrlen = 0;
749 749 break;
750 750
751 751 default:
752 752 /*
753 753 * An unspecified bind in TPI has a NULL address.
754 754 * Set the address in sockfs to be zero length.
755 755 *
756 756 * Can not assume there is a sa_family for all
757 757 * protocol families. For example, AF_X25 does not
758 758 * have a family field.
759 759 */
760 760 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
761 761 sti->sti_laddr_len = 0; /* XXX correct? */
762 762 addr = NULL;
763 763 addrlen = 0;
764 764 break;
765 765 }
766 766
767 767 } else {
768 768 if (so->so_state & SS_ISBOUND) {
769 769 /*
770 770 * If it is ok to rebind the socket, first unbind
771 771 * with the transport. A rebind to the NULL address
772 772 * is interpreted as an unbind.
773 773 * Note that a bind to NULL in BSD does unbind the
774 774 * socket but it fails with EINVAL.
775 775 * Note that regular sockets set SOV_SOCKBSD i.e.
776 776 * _SOBIND_SOCKBSD gets set here hence no type of
777 777 * socket does currently allow rebinding.
778 778 *
779 779 * If the name is NULL just do an unbind.
780 780 */
781 781 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) &&
782 782 name != NULL) {
783 783 error = EINVAL;
784 784 unbind_on_err = 0;
785 785 eprintsoline(so, error);
786 786 goto done;
787 787 }
788 788 if ((so->so_mode & SM_CONNREQUIRED) &&
789 789 (so->so_state & SS_CANTREBIND)) {
790 790 error = EINVAL;
791 791 unbind_on_err = 0;
792 792 eprintsoline(so, error);
793 793 goto done;
794 794 }
795 795 error = sotpi_unbind(so, 0);
796 796 if (error) {
797 797 eprintsoline(so, error);
798 798 goto done;
799 799 }
800 800 ASSERT(!(so->so_state & SS_ISBOUND));
801 801 if (name == NULL) {
802 802 so->so_state &=
803 803 ~(SS_ISCONNECTED|SS_ISCONNECTING);
804 804 goto done;
805 805 }
806 806 }
807 807
808 808 /* X/Open requires this check */
809 809 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
810 810 if (xnet_check_print) {
811 811 printf("sockfs: X/Open bind state check "
812 812 "caused EINVAL\n");
813 813 }
814 814 error = EINVAL;
815 815 goto done;
816 816 }
817 817
818 818 switch (so->so_family) {
819 819 case AF_UNIX:
820 820 /*
821 821 * All AF_UNIX addresses are nul terminated
822 822 * when copied (copyin_name) in so the minimum
823 823 * length is 3 bytes.
824 824 */
825 825 if (name == NULL ||
826 826 (ssize_t)namelen <= sizeof (short) + 1) {
827 827 error = EISDIR;
828 828 eprintsoline(so, error);
829 829 goto done;
830 830 }
831 831 /*
832 832 * Verify so_family matches the bound family.
833 833 * BSD does not check this for AF_UNIX resulting
834 834 * in funny mknods.
835 835 */
836 836 if (name->sa_family != so->so_family) {
837 837 error = EAFNOSUPPORT;
838 838 goto done;
839 839 }
840 840 break;
841 841 case AF_INET:
842 842 if (name == NULL) {
843 843 error = EINVAL;
844 844 eprintsoline(so, error);
845 845 goto done;
846 846 }
847 847 if ((size_t)namelen != sizeof (sin_t)) {
848 848 error = name->sa_family != so->so_family ?
849 849 EAFNOSUPPORT : EINVAL;
850 850 eprintsoline(so, error);
851 851 goto done;
852 852 }
853 853 if ((flags & _SOBIND_XPG4_2) &&
854 854 (name->sa_family != so->so_family)) {
855 855 /*
856 856 * This check has to be made for X/Open
857 857 * sockets however application failures have
858 858 * been observed when it is applied to
859 859 * all sockets.
860 860 */
861 861 error = EAFNOSUPPORT;
862 862 eprintsoline(so, error);
863 863 goto done;
864 864 }
865 865 /*
866 866 * Force a zero sa_family to match so_family.
867 867 *
868 868 * Some programs like inetd(1M) don't set the
869 869 * family field. Other programs leave
870 870 * sin_family set to garbage - SunOS 4.X does
871 871 * not check the family field on a bind.
872 872 * We use the family field that
873 873 * was passed in to the socket() call.
874 874 */
875 875 name->sa_family = so->so_family;
876 876 break;
877 877
878 878 case AF_INET6: {
879 879 #ifdef DEBUG
880 880 sin6_t *sin6 = (sin6_t *)name;
881 881 #endif /* DEBUG */
882 882
883 883 if (name == NULL) {
884 884 error = EINVAL;
885 885 eprintsoline(so, error);
886 886 goto done;
887 887 }
888 888 if ((size_t)namelen != sizeof (sin6_t)) {
889 889 error = name->sa_family != so->so_family ?
890 890 EAFNOSUPPORT : EINVAL;
891 891 eprintsoline(so, error);
892 892 goto done;
893 893 }
894 894 if (name->sa_family != so->so_family) {
895 895 /*
896 896 * With IPv6 we require the family to match
897 897 * unlike in IPv4.
898 898 */
899 899 error = EAFNOSUPPORT;
900 900 eprintsoline(so, error);
901 901 goto done;
902 902 }
903 903 #ifdef DEBUG
904 904 /*
905 905 * Verify that apps don't forget to clear
906 906 * sin6_scope_id etc
907 907 */
908 908 if (sin6->sin6_scope_id != 0 &&
909 909 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
910 910 zcmn_err(getzoneid(), CE_WARN,
911 911 "bind with uninitialized sin6_scope_id "
912 912 "(%d) on socket. Pid = %d\n",
913 913 (int)sin6->sin6_scope_id,
914 914 (int)curproc->p_pid);
915 915 }
916 916 if (sin6->__sin6_src_id != 0) {
917 917 zcmn_err(getzoneid(), CE_WARN,
918 918 "bind with uninitialized __sin6_src_id "
919 919 "(%d) on socket. Pid = %d\n",
920 920 (int)sin6->__sin6_src_id,
921 921 (int)curproc->p_pid);
922 922 }
923 923 #endif /* DEBUG */
924 924 break;
925 925 }
926 926 default:
927 927 /*
928 928 * Don't do any length or sa_family check to allow
929 929 * non-sockaddr style addresses.
930 930 */
931 931 if (name == NULL) {
932 932 error = EINVAL;
933 933 eprintsoline(so, error);
934 934 goto done;
935 935 }
936 936 break;
937 937 }
938 938
939 939 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) {
940 940 error = ENAMETOOLONG;
941 941 eprintsoline(so, error);
942 942 goto done;
943 943 }
944 944 /*
945 945 * Save local address.
946 946 */
947 947 sti->sti_laddr_len = (socklen_t)namelen;
948 948 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
949 949 bcopy(name, sti->sti_laddr_sa, namelen);
950 950
951 951 addr = sti->sti_laddr_sa;
952 952 addrlen = (t_uscalar_t)sti->sti_laddr_len;
953 953 switch (so->so_family) {
954 954 case AF_INET6:
955 955 case AF_INET:
956 956 break;
957 957 case AF_UNIX: {
958 958 struct sockaddr_un *soun =
959 959 (struct sockaddr_un *)sti->sti_laddr_sa;
960 960 struct vnode *vp, *rvp;
961 961 struct vattr vattr;
962 962
963 963 ASSERT(sti->sti_ux_bound_vp == NULL);
964 964 /*
965 965 * Create vnode for the specified path name.
966 966 * Keep vnode held with a reference in sti_ux_bound_vp.
967 967 * Use the vnode pointer as the address used in the
968 968 * bind with the transport.
969 969 *
970 970 * Use the same mode as in BSD. In particular this does
971 971 * not observe the umask.
972 972 */
973 973 /* MAXPATHLEN + soun_family + nul termination */
974 974 if (sti->sti_laddr_len >
975 975 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
976 976 error = ENAMETOOLONG;
977 977 eprintsoline(so, error);
978 978 goto done;
979 979 }
980 980 vattr.va_type = VSOCK;
981 981 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask;
982 982 vattr.va_mask = AT_TYPE|AT_MODE;
983 983 /* NOTE: holding so_lock */
984 984 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr,
985 985 EXCL, 0, &vp, CRMKNOD, 0, 0);
986 986 if (error) {
987 987 if (error == EEXIST)
988 988 error = EADDRINUSE;
989 989 eprintsoline(so, error);
990 990 goto done;
991 991 }
992 992 /*
993 993 * Establish pointer from the underlying filesystem
994 994 * vnode to the socket node.
995 995 * sti_ux_bound_vp and v_stream->sd_vnode form the
996 996 * cross-linkage between the underlying filesystem
997 997 * node and the socket node.
998 998 */
999 999
1000 1000 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) {
1001 1001 VN_HOLD(rvp);
1002 1002 VN_RELE(vp);
1003 1003 vp = rvp;
1004 1004 }
1005 1005
1006 1006 ASSERT(SOTOV(so)->v_stream);
1007 1007 mutex_enter(&vp->v_lock);
1008 1008 vp->v_stream = SOTOV(so)->v_stream;
1009 1009 sti->sti_ux_bound_vp = vp;
1010 1010 mutex_exit(&vp->v_lock);
1011 1011
1012 1012 /*
1013 1013 * Use the vnode pointer value as a unique address
1014 1014 * (together with the magic number to avoid conflicts
1015 1015 * with implicit binds) in the transport provider.
1016 1016 */
1017 1017 sti->sti_ux_laddr.soua_vp =
1018 1018 (void *)sti->sti_ux_bound_vp;
1019 1019 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT;
1020 1020 addr = &sti->sti_ux_laddr;
1021 1021 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
1022 1022 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n",
1023 1023 addrlen,
1024 1024 (void *)((struct so_ux_addr *)addr)->soua_vp));
1025 1025 break;
1026 1026 }
1027 1027 } /* end switch (so->so_family) */
1028 1028 }
1029 1029
1030 1030 /*
1031 1031 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since
1032 1032 * the transport can start passing up T_CONN_IND messages
1033 1033 * as soon as it receives the bind req and strsock_proto()
1034 1034 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs.
1035 1035 */
1036 1036 if (flags & _SOBIND_LISTEN) {
1037 1037 if ((so->so_state & SS_ACCEPTCONN) == 0)
1038 1038 clear_acceptconn_on_err = B_TRUE;
1039 1039 save_so_backlog = so->so_backlog;
1040 1040 restore_backlog_on_err = B_TRUE;
1041 1041 so->so_state |= SS_ACCEPTCONN;
1042 1042 so->so_backlog = backlog;
1043 1043 }
1044 1044
1045 1045 /*
1046 1046 * If NL7C addr(s) have been configured check for addr/port match,
1047 1047 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C.
1048 1048 *
1049 1049 * NL7C supports the TCP transport only so check AF_INET and AF_INET6
1050 1050 * family sockets only. If match mark as such.
1051 1051 */
1052 1052 if (nl7c_enabled && ((addr != NULL &&
1053 1053 (so->so_family == AF_INET || so->so_family == AF_INET6) &&
1054 1054 (nl7c = nl7c_lookup_addr(addr, addrlen))) ||
1055 1055 sti->sti_nl7c_flags == NL7C_AF_NCA)) {
1056 1056 /*
1057 1057 * NL7C is not supported in non-global zones,
1058 1058 * we enforce this restriction here.
1059 1059 */
1060 1060 if (so->so_zoneid == GLOBAL_ZONEID) {
1061 1061 /* An NL7C socket, mark it */
1062 1062 sti->sti_nl7c_flags |= NL7C_ENABLED;
1063 1063 if (nl7c == NULL) {
1064 1064 /*
1065 1065 * Was an AF_NCA bind() so add it to the
1066 1066 * addr list for reporting purposes.
1067 1067 */
1068 1068 nl7c = nl7c_add_addr(addr, addrlen);
1069 1069 }
1070 1070 } else
1071 1071 nl7c = NULL;
1072 1072 }
1073 1073
1074 1074 /*
1075 1075 * We send a T_BIND_REQ for TCP/UDP since we know it supports it,
1076 1076 * for other transports we will send in a O_T_BIND_REQ.
1077 1077 */
1078 1078 if (tcp_udp_xport &&
1079 1079 (so->so_family == AF_INET || so->so_family == AF_INET6))
1080 1080 PRIM_type = T_BIND_REQ;
1081 1081
1082 1082 bind_req.PRIM_type = PRIM_type;
1083 1083 bind_req.ADDR_length = addrlen;
1084 1084 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req);
1085 1085 bind_req.CONIND_number = backlog;
1086 1086 /* NOTE: holding so_lock while sleeping */
1087 1087 mp = soallocproto2(&bind_req, sizeof (bind_req),
1088 1088 addr, addrlen, 0, _ALLOC_SLEEP, cr);
1089 1089 sti->sti_laddr_valid = 0;
1090 1090
1091 1091 /* Done using sti_laddr_sa - can drop the lock */
1092 1092 mutex_exit(&so->so_lock);
1093 1093
1094 1094 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1095 1095 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1096 1096 if (error) {
1097 1097 eprintsoline(so, error);
1098 1098 mutex_enter(&so->so_lock);
1099 1099 goto done;
1100 1100 }
1101 1101
1102 1102 mutex_enter(&so->so_lock);
1103 1103 error = sowaitprim(so, PRIM_type, T_BIND_ACK,
1104 1104 (t_uscalar_t)sizeof (*bind_ack), &mp, 0);
1105 1105 if (error) {
1106 1106 eprintsoline(so, error);
1107 1107 goto done;
1108 1108 }
1109 1109 ASSERT(mp);
1110 1110 /*
1111 1111 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1112 1112 * strsock_proto while the lock was dropped above, the bind
1113 1113 * is allowed to complete.
1114 1114 */
1115 1115
1116 1116 /* Mark as bound. This will be undone if we detect errors below. */
1117 1117 if (flags & _SOBIND_NOXLATE) {
1118 1118 ASSERT(so->so_family == AF_UNIX);
1119 1119 sti->sti_faddr_noxlate = 1;
1120 1120 }
1121 1121 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND));
1122 1122 so->so_state |= SS_ISBOUND;
1123 1123 ASSERT(sti->sti_unbind_mp);
1124 1124
1125 1125 /* note that we've already set SS_ACCEPTCONN above */
1126 1126
1127 1127 /*
1128 1128 * Recompute addrlen - an unspecied bind sent down an
1129 1129 * address of length zero but we expect the appropriate length
1130 1130 * in return.
1131 1131 */
1132 1132 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ?
1133 1133 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len);
1134 1134
1135 1135 bind_ack = (struct T_bind_ack *)mp->b_rptr;
1136 1136 /*
1137 1137 * The alignment restriction is really too strict but
1138 1138 * we want enough alignment to inspect the fields of
1139 1139 * a sockaddr_in.
1140 1140 */
1141 1141 addr = sogetoff(mp, bind_ack->ADDR_offset,
1142 1142 bind_ack->ADDR_length,
1143 1143 __TPI_ALIGN_SIZE);
1144 1144 if (addr == NULL) {
1145 1145 freemsg(mp);
1146 1146 error = EPROTO;
1147 1147 eprintsoline(so, error);
1148 1148 goto done;
1149 1149 }
1150 1150 if (!(flags & _SOBIND_UNSPEC)) {
1151 1151 /*
1152 1152 * Verify that the transport didn't return something we
1153 1153 * did not want e.g. an address other than what we asked for.
1154 1154 *
1155 1155 * NOTE: These checks would go away if/when we switch to
1156 1156 * using the new TPI (in which the transport would fail
1157 1157 * the request instead of assigning a different address).
1158 1158 *
1159 1159 * NOTE2: For protocols that we don't know (i.e. any
1160 1160 * other than AF_INET6, AF_INET and AF_UNIX), we
1161 1161 * cannot know if the transport should be expected to
1162 1162 * return the same address as that requested.
1163 1163 *
1164 1164 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send
1165 1165 * down a T_BIND_REQ. We use O_T_BIND_REQ for others.
1166 1166 *
1167 1167 * For example, in the case of netatalk it may be
1168 1168 * inappropriate for the transport to return the
1169 1169 * requested address (as it may have allocated a local
1170 1170 * port number in behaviour similar to that of an
1171 1171 * AF_INET bind request with a port number of zero).
1172 1172 *
1173 1173 * Given the definition of O_T_BIND_REQ, where the
1174 1174 * transport may bind to an address other than the
1175 1175 * requested address, it's not possible to determine
1176 1176 * whether a returned address that differs from the
1177 1177 * requested address is a reason to fail (because the
1178 1178 * requested address was not available) or succeed
1179 1179 * (because the transport allocated an appropriate
1180 1180 * address and/or port).
1181 1181 *
1182 1182 * sockfs currently requires that the transport return
1183 1183 * the requested address in the T_BIND_ACK, unless
1184 1184 * there is code here to allow for any discrepancy.
1185 1185 * Such code exists for AF_INET and AF_INET6.
1186 1186 *
1187 1187 * Netatalk chooses to return the requested address
1188 1188 * rather than the (correct) allocated address. This
1189 1189 * means that netatalk violates the TPI specification
1190 1190 * (and would not function correctly if used from a
1191 1191 * TLI application), but it does mean that it works
1192 1192 * with sockfs.
1193 1193 *
1194 1194 * As noted above, using the newer XTI bind primitive
1195 1195 * (T_BIND_REQ) in preference to O_T_BIND_REQ would
1196 1196 * allow sockfs to be more sure about whether or not
1197 1197 * the bind request had succeeded (as transports are
1198 1198 * not permitted to bind to a different address than
1199 1199 * that requested - they must return failure).
1200 1200 * Unfortunately, support for T_BIND_REQ may not be
1201 1201 * present in all transport implementations (netatalk,
1202 1202 * for example, doesn't have it), making the
1203 1203 * transition difficult.
1204 1204 */
1205 1205 if (bind_ack->ADDR_length != addrlen) {
1206 1206 /* Assumes that the requested address was in use */
1207 1207 freemsg(mp);
1208 1208 error = EADDRINUSE;
1209 1209 eprintsoline(so, error);
1210 1210 goto done;
1211 1211 }
1212 1212
1213 1213 switch (so->so_family) {
1214 1214 case AF_INET6:
1215 1215 case AF_INET: {
1216 1216 sin_t *rname, *aname;
1217 1217
1218 1218 rname = (sin_t *)addr;
1219 1219 aname = (sin_t *)sti->sti_laddr_sa;
1220 1220
1221 1221 /*
1222 1222 * Take advantage of the alignment
1223 1223 * of sin_port and sin6_port which fall
1224 1224 * in the same place in their data structures.
1225 1225 * Just use sin_port for either address family.
1226 1226 *
1227 1227 * This may become a problem if (heaven forbid)
1228 1228 * there's a separate ipv6port_reserved... :-P
1229 1229 *
1230 1230 * Binding to port 0 has the semantics of letting
1231 1231 * the transport bind to any port.
1232 1232 *
1233 1233 * If the transport is TCP or UDP since we had sent
1234 1234 * a T_BIND_REQ we would not get a port other than
1235 1235 * what we asked for.
1236 1236 */
1237 1237 if (tcp_udp_xport) {
1238 1238 /*
1239 1239 * Pick up the new port number if we bound to
1240 1240 * port 0.
1241 1241 */
1242 1242 if (aname->sin_port == 0)
1243 1243 aname->sin_port = rname->sin_port;
1244 1244 sti->sti_laddr_valid = 1;
1245 1245 break;
1246 1246 }
1247 1247 if (aname->sin_port != 0 &&
1248 1248 aname->sin_port != rname->sin_port) {
1249 1249 freemsg(mp);
1250 1250 error = EADDRINUSE;
1251 1251 eprintsoline(so, error);
1252 1252 goto done;
1253 1253 }
1254 1254 /*
1255 1255 * Pick up the new port number if we bound to port 0.
1256 1256 */
1257 1257 aname->sin_port = rname->sin_port;
1258 1258
1259 1259 /*
1260 1260 * Unfortunately, addresses aren't _quite_ the same.
1261 1261 */
1262 1262 if (so->so_family == AF_INET) {
1263 1263 if (aname->sin_addr.s_addr !=
1264 1264 rname->sin_addr.s_addr) {
1265 1265 freemsg(mp);
1266 1266 error = EADDRNOTAVAIL;
1267 1267 eprintsoline(so, error);
1268 1268 goto done;
1269 1269 }
1270 1270 } else {
1271 1271 sin6_t *rname6 = (sin6_t *)rname;
1272 1272 sin6_t *aname6 = (sin6_t *)aname;
1273 1273
1274 1274 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr,
1275 1275 &rname6->sin6_addr)) {
1276 1276 freemsg(mp);
1277 1277 error = EADDRNOTAVAIL;
1278 1278 eprintsoline(so, error);
1279 1279 goto done;
1280 1280 }
1281 1281 }
1282 1282 break;
1283 1283 }
1284 1284 case AF_UNIX:
1285 1285 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) {
1286 1286 freemsg(mp);
1287 1287 error = EADDRINUSE;
1288 1288 eprintsoline(so, error);
1289 1289 eprintso(so,
1290 1290 ("addrlen %d, addr 0x%x, vp %p\n",
1291 1291 addrlen, *((int *)addr),
1292 1292 (void *)sti->sti_ux_bound_vp));
1293 1293 goto done;
1294 1294 }
1295 1295 sti->sti_laddr_valid = 1;
1296 1296 break;
1297 1297 default:
1298 1298 /*
1299 1299 * NOTE: This assumes that addresses can be
1300 1300 * byte-compared for equivalence.
1301 1301 */
1302 1302 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) {
1303 1303 freemsg(mp);
1304 1304 error = EADDRINUSE;
1305 1305 eprintsoline(so, error);
1306 1306 goto done;
1307 1307 }
1308 1308 /*
1309 1309 * Don't mark sti_laddr_valid, as we cannot be
1310 1310 * sure that the returned address is the real
1311 1311 * bound address when talking to an unknown
1312 1312 * transport.
1313 1313 */
1314 1314 break;
1315 1315 }
1316 1316 } else {
1317 1317 /*
1318 1318 * Save for returned address for getsockname.
1319 1319 * Needed for unspecific bind unless transport supports
1320 1320 * the TI_GETMYNAME ioctl.
1321 1321 * Do this for AF_INET{,6} even though they do, as
1322 1322 * caching info here is much better performance than
1323 1323 * a TPI/STREAMS trip to the transport for getsockname.
1324 1324 * Any which can't for some reason _must_ _not_ set
1325 1325 * sti_laddr_valid here for the caching version of
1326 1326 * getsockname to not break;
1327 1327 */
1328 1328 switch (so->so_family) {
1329 1329 case AF_UNIX:
1330 1330 /*
1331 1331 * Record the address bound with the transport
1332 1332 * for use by socketpair.
1333 1333 */
1334 1334 bcopy(addr, &sti->sti_ux_laddr, addrlen);
1335 1335 sti->sti_laddr_valid = 1;
1336 1336 break;
1337 1337 case AF_INET:
1338 1338 case AF_INET6:
1339 1339 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
1340 1340 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len);
1341 1341 sti->sti_laddr_valid = 1;
1342 1342 break;
1343 1343 default:
1344 1344 /*
1345 1345 * Don't mark sti_laddr_valid, as we cannot be
1346 1346 * sure that the returned address is the real
1347 1347 * bound address when talking to an unknown
1348 1348 * transport.
1349 1349 */
1350 1350 break;
1351 1351 }
1352 1352 }
1353 1353
1354 1354 if (nl7c != NULL) {
1355 1355 /* Register listen()er sonode pointer with NL7C */
1356 1356 nl7c_listener_addr(nl7c, so);
1357 1357 }
1358 1358
1359 1359 freemsg(mp);
1360 1360
1361 1361 done:
1362 1362 if (error) {
1363 1363 /* reset state & backlog to values held on entry */
1364 1364 if (clear_acceptconn_on_err == B_TRUE)
1365 1365 so->so_state &= ~SS_ACCEPTCONN;
1366 1366 if (restore_backlog_on_err == B_TRUE)
1367 1367 so->so_backlog = save_so_backlog;
1368 1368
1369 1369 if (unbind_on_err && so->so_state & SS_ISBOUND) {
1370 1370 int err;
1371 1371
1372 1372 err = sotpi_unbind(so, 0);
1373 1373 /* LINTED - statement has no consequent: if */
1374 1374 if (err) {
1375 1375 eprintsoline(so, error);
1376 1376 } else {
1377 1377 ASSERT(!(so->so_state & SS_ISBOUND));
1378 1378 }
1379 1379 }
1380 1380 }
1381 1381 if (!(flags & _SOBIND_LOCK_HELD)) {
1382 1382 so_unlock_single(so, SOLOCKED);
1383 1383 mutex_exit(&so->so_lock);
1384 1384 } else {
1385 1385 ASSERT(MUTEX_HELD(&so->so_lock));
1386 1386 ASSERT(so->so_flag & SOLOCKED);
1387 1387 }
1388 1388 return (error);
1389 1389 }
1390 1390
1391 1391 /* bind the socket */
1392 1392 static int
1393 1393 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
1394 1394 int flags, struct cred *cr)
1395 1395 {
1396 1396 if ((flags & _SOBIND_SOCKETPAIR) == 0)
1397 1397 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr));
1398 1398
1399 1399 flags &= ~_SOBIND_SOCKETPAIR;
1400 1400 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr));
1401 1401 }
1402 1402
1403 1403 /*
1404 1404 * Unbind a socket - used when bind() fails, when bind() specifies a NULL
1405 1405 * address, or when listen needs to unbind and bind.
1406 1406 * If the _SOUNBIND_REBIND flag is specified the addresses are retained
1407 1407 * so that a sobind can pick them up.
1408 1408 */
1409 1409 static int
1410 1410 sotpi_unbind(struct sonode *so, int flags)
1411 1411 {
1412 1412 struct T_unbind_req unbind_req;
1413 1413 int error = 0;
1414 1414 mblk_t *mp;
1415 1415 sotpi_info_t *sti = SOTOTPI(so);
1416 1416
1417 1417 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n",
1418 1418 (void *)so, flags, pr_state(so->so_state, so->so_mode)));
1419 1419
1420 1420 ASSERT(MUTEX_HELD(&so->so_lock));
1421 1421 ASSERT(so->so_flag & SOLOCKED);
1422 1422
1423 1423 if (!(so->so_state & SS_ISBOUND)) {
1424 1424 error = EINVAL;
1425 1425 eprintsoline(so, error);
1426 1426 goto done;
1427 1427 }
1428 1428
1429 1429 mutex_exit(&so->so_lock);
1430 1430
1431 1431 /*
1432 1432 * Flush the read and write side (except stream head read queue)
1433 1433 * and send down T_UNBIND_REQ.
1434 1434 */
1435 1435 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1436 1436
1437 1437 unbind_req.PRIM_type = T_UNBIND_REQ;
1438 1438 mp = soallocproto1(&unbind_req, sizeof (unbind_req),
1439 1439 0, _ALLOC_SLEEP, CRED());
1440 1440 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1441 1441 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1442 1442 mutex_enter(&so->so_lock);
1443 1443 if (error) {
1444 1444 eprintsoline(so, error);
1445 1445 goto done;
1446 1446 }
1447 1447
1448 1448 error = sowaitokack(so, T_UNBIND_REQ);
1449 1449 if (error) {
1450 1450 eprintsoline(so, error);
1451 1451 goto done;
1452 1452 }
1453 1453
1454 1454 /*
1455 1455 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1456 1456 * strsock_proto while the lock was dropped above, the unbind
1457 1457 * is allowed to complete.
1458 1458 */
1459 1459 if (!(flags & _SOUNBIND_REBIND)) {
1460 1460 /*
1461 1461 * Clear out bound address.
1462 1462 */
1463 1463 vnode_t *vp;
1464 1464
1465 1465 if ((vp = sti->sti_ux_bound_vp) != NULL) {
1466 1466 sti->sti_ux_bound_vp = NULL;
1467 1467 vn_rele_stream(vp);
1468 1468 }
1469 1469 /* Clear out address */
1470 1470 sti->sti_laddr_len = 0;
1471 1471 }
1472 1472 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN);
1473 1473 sti->sti_laddr_valid = 0;
1474 1474
1475 1475 done:
1476 1476
1477 1477 /* If the caller held the lock don't release it here */
1478 1478 ASSERT(MUTEX_HELD(&so->so_lock));
1479 1479 ASSERT(so->so_flag & SOLOCKED);
1480 1480
1481 1481 return (error);
1482 1482 }
1483 1483
1484 1484 /*
1485 1485 * listen on the socket.
1486 1486 * For TPI conforming transports this has to first unbind with the transport
1487 1487 * and then bind again using the new backlog.
1488 1488 */
1489 1489 /* ARGSUSED */
1490 1490 int
1491 1491 sotpi_listen(struct sonode *so, int backlog, struct cred *cr)
1492 1492 {
1493 1493 int error = 0;
1494 1494 sotpi_info_t *sti = SOTOTPI(so);
1495 1495
1496 1496 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n",
1497 1497 (void *)so, backlog, pr_state(so->so_state, so->so_mode)));
1498 1498
1499 1499 if (sti->sti_serv_type == T_CLTS)
1500 1500 return (EOPNOTSUPP);
1501 1501
1502 1502 /*
1503 1503 * If the socket is ready to accept connections already, then
1504 1504 * return without doing anything. This avoids a problem where
1505 1505 * a second listen() call fails if a connection is pending and
1506 1506 * leaves the socket unbound. Only when we are not unbinding
1507 1507 * with the transport can we safely increase the backlog.
1508 1508 */
1509 1509 if (so->so_state & SS_ACCEPTCONN &&
1510 1510 !((so->so_family == AF_INET || so->so_family == AF_INET6) &&
1511 1511 /*CONSTCOND*/
1512 1512 !solisten_tpi_tcp))
1513 1513 return (0);
1514 1514
1515 1515 if (so->so_state & SS_ISCONNECTED)
1516 1516 return (EINVAL);
1517 1517
1518 1518 mutex_enter(&so->so_lock);
1519 1519 so_lock_single(so); /* Set SOLOCKED */
1520 1520
1521 1521 /*
1522 1522 * If the listen doesn't change the backlog we do nothing.
1523 1523 * This avoids an EPROTO error from the transport.
1524 1524 */
1525 1525 if ((so->so_state & SS_ACCEPTCONN) &&
1526 1526 so->so_backlog == backlog)
1527 1527 goto done;
1528 1528
1529 1529 if (!(so->so_state & SS_ISBOUND)) {
1530 1530 /*
1531 1531 * Must have been explicitly bound in the UNIX domain.
1532 1532 */
1533 1533 if (so->so_family == AF_UNIX) {
1534 1534 error = EINVAL;
1535 1535 goto done;
1536 1536 }
1537 1537 error = sotpi_bindlisten(so, NULL, 0, backlog,
1538 1538 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr);
1539 1539 } else if (backlog > 0) {
1540 1540 /*
1541 1541 * AF_INET{,6} hack to avoid losing the port.
1542 1542 * Assumes that all AF_INET{,6} transports can handle a
1543 1543 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI
1544 1544 * has already bound thus it is possible to avoid the unbind.
1545 1545 */
1546 1546 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) &&
1547 1547 /*CONSTCOND*/
1548 1548 !solisten_tpi_tcp)) {
1549 1549 error = sotpi_unbind(so, _SOUNBIND_REBIND);
1550 1550 if (error)
1551 1551 goto done;
1552 1552 }
1553 1553 error = sotpi_bindlisten(so, NULL, 0, backlog,
1554 1554 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr);
1555 1555 } else {
1556 1556 so->so_state |= SS_ACCEPTCONN;
1557 1557 so->so_backlog = backlog;
1558 1558 }
1559 1559 if (error)
1560 1560 goto done;
1561 1561 ASSERT(so->so_state & SS_ACCEPTCONN);
1562 1562 done:
1563 1563 so_unlock_single(so, SOLOCKED);
1564 1564 mutex_exit(&so->so_lock);
1565 1565 return (error);
1566 1566 }
1567 1567
1568 1568 /*
1569 1569 * Disconnect either a specified seqno or all (-1).
1570 1570 * The former is used on listening sockets only.
1571 1571 *
1572 1572 * When seqno == -1 sodisconnect could call sotpi_unbind. However,
1573 1573 * the current use of sodisconnect(seqno == -1) is only for shutdown
1574 1574 * so there is no point (and potentially incorrect) to unbind.
1575 1575 */
1576 1576 static int
1577 1577 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags)
1578 1578 {
1579 1579 struct T_discon_req discon_req;
1580 1580 int error = 0;
1581 1581 mblk_t *mp;
1582 1582
1583 1583 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n",
1584 1584 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode)));
1585 1585
1586 1586 if (!(flags & _SODISCONNECT_LOCK_HELD)) {
1587 1587 mutex_enter(&so->so_lock);
1588 1588 so_lock_single(so); /* Set SOLOCKED */
1589 1589 } else {
1590 1590 ASSERT(MUTEX_HELD(&so->so_lock));
1591 1591 ASSERT(so->so_flag & SOLOCKED);
1592 1592 }
1593 1593
1594 1594 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) {
1595 1595 error = EINVAL;
1596 1596 eprintsoline(so, error);
1597 1597 goto done;
1598 1598 }
1599 1599
1600 1600 mutex_exit(&so->so_lock);
1601 1601 /*
1602 1602 * Flush the write side (unless this is a listener)
1603 1603 * and then send down a T_DISCON_REQ.
1604 1604 * (Don't flush on listener since it could flush {O_}T_CONN_RES
1605 1605 * and other messages.)
1606 1606 */
1607 1607 if (!(so->so_state & SS_ACCEPTCONN))
1608 1608 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW);
1609 1609
1610 1610 discon_req.PRIM_type = T_DISCON_REQ;
1611 1611 discon_req.SEQ_number = seqno;
1612 1612 mp = soallocproto1(&discon_req, sizeof (discon_req),
1613 1613 0, _ALLOC_SLEEP, CRED());
1614 1614 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1615 1615 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1616 1616 mutex_enter(&so->so_lock);
1617 1617 if (error) {
1618 1618 eprintsoline(so, error);
1619 1619 goto done;
1620 1620 }
1621 1621
1622 1622 error = sowaitokack(so, T_DISCON_REQ);
1623 1623 if (error) {
1624 1624 eprintsoline(so, error);
1625 1625 goto done;
1626 1626 }
1627 1627 /*
1628 1628 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1629 1629 * strsock_proto while the lock was dropped above, the disconnect
1630 1630 * is allowed to complete. However, it is not possible to
1631 1631 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set.
1632 1632 */
1633 1633 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING);
1634 1634 SOTOTPI(so)->sti_laddr_valid = 0;
1635 1635 SOTOTPI(so)->sti_faddr_valid = 0;
1636 1636 done:
1637 1637 if (!(flags & _SODISCONNECT_LOCK_HELD)) {
1638 1638 so_unlock_single(so, SOLOCKED);
1639 1639 mutex_exit(&so->so_lock);
1640 1640 } else {
1641 1641 /* If the caller held the lock don't release it here */
1642 1642 ASSERT(MUTEX_HELD(&so->so_lock));
1643 1643 ASSERT(so->so_flag & SOLOCKED);
1644 1644 }
1645 1645 return (error);
1646 1646 }
1647 1647
1648 1648 /* ARGSUSED */
1649 1649 int
1650 1650 sotpi_accept(struct sonode *so, int fflag, struct cred *cr,
1651 1651 struct sonode **nsop)
1652 1652 {
1653 1653 struct T_conn_ind *conn_ind;
1654 1654 struct T_conn_res *conn_res;
1655 1655 int error = 0;
1656 1656 mblk_t *mp, *ack_mp;
1657 1657 struct sonode *nso;
1658 1658 vnode_t *nvp;
1659 1659 void *src;
1660 1660 t_uscalar_t srclen;
1661 1661 void *opt;
1662 1662 t_uscalar_t optlen;
1663 1663 t_scalar_t PRIM_type;
1664 1664 t_scalar_t SEQ_number;
1665 1665 size_t sinlen;
1666 1666 sotpi_info_t *sti = SOTOTPI(so);
1667 1667 sotpi_info_t *nsti;
1668 1668
1669 1669 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n",
1670 1670 (void *)so, fflag, (void *)nsop,
1671 1671 pr_state(so->so_state, so->so_mode)));
1672 1672
1673 1673 /*
1674 1674 * Defer single-threading the accepting socket until
1675 1675 * the T_CONN_IND has been received and parsed and the
1676 1676 * new sonode has been opened.
1677 1677 */
1678 1678
1679 1679 /* Check that we are not already connected */
1680 1680 if ((so->so_state & SS_ACCEPTCONN) == 0)
1681 1681 goto conn_bad;
1682 1682 again:
1683 1683 if ((error = sowaitconnind(so, fflag, &mp)) != 0)
1684 1684 goto e_bad;
1685 1685
1686 1686 ASSERT(mp != NULL);
1687 1687 conn_ind = (struct T_conn_ind *)mp->b_rptr;
1688 1688
1689 1689 /*
1690 1690 * Save SEQ_number for error paths.
1691 1691 */
1692 1692 SEQ_number = conn_ind->SEQ_number;
1693 1693
1694 1694 srclen = conn_ind->SRC_length;
1695 1695 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1);
1696 1696 if (src == NULL) {
1697 1697 error = EPROTO;
1698 1698 freemsg(mp);
1699 1699 eprintsoline(so, error);
1700 1700 goto disconnect_unlocked;
1701 1701 }
1702 1702 optlen = conn_ind->OPT_length;
1703 1703 switch (so->so_family) {
1704 1704 case AF_INET:
1705 1705 case AF_INET6:
1706 1706 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) {
1707 1707 bcopy(mp->b_rptr + conn_ind->OPT_offset,
1708 1708 &opt, conn_ind->OPT_length);
1709 1709 } else {
1710 1710 /*
1711 1711 * The transport (in this case TCP) hasn't sent up
1712 1712 * a pointer to an instance for the accept fast-path.
1713 1713 * Disable fast-path completely because the call to
1714 1714 * sotpi_create() below would otherwise create an
1715 1715 * incomplete TCP instance, which would lead to
1716 1716 * problems when sockfs sends a normal T_CONN_RES
1717 1717 * message down the new stream.
1718 1718 */
1719 1719 if (sti->sti_direct) {
1720 1720 int rval;
1721 1721 /*
1722 1722 * For consistency we inform tcp to disable
1723 1723 * direct interface on the listener, though
1724 1724 * we can certainly live without doing this
1725 1725 * because no data will ever travel upstream
1726 1726 * on the listening socket.
1727 1727 */
1728 1728 sti->sti_direct = 0;
1729 1729 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK,
1730 1730 0, 0, K_TO_K, cr, &rval);
1731 1731 }
1732 1732 opt = NULL;
1733 1733 optlen = 0;
1734 1734 }
1735 1735 break;
1736 1736 case AF_UNIX:
1737 1737 default:
1738 1738 if (optlen != 0) {
1739 1739 opt = sogetoff(mp, conn_ind->OPT_offset, optlen,
1740 1740 __TPI_ALIGN_SIZE);
1741 1741 if (opt == NULL) {
1742 1742 error = EPROTO;
1743 1743 freemsg(mp);
1744 1744 eprintsoline(so, error);
1745 1745 goto disconnect_unlocked;
1746 1746 }
1747 1747 }
1748 1748 if (so->so_family == AF_UNIX) {
1749 1749 if (!sti->sti_faddr_noxlate) {
1750 1750 src = NULL;
1751 1751 srclen = 0;
1752 1752 }
1753 1753 /* Extract src address from options */
1754 1754 if (optlen != 0)
1755 1755 so_getopt_srcaddr(opt, optlen, &src, &srclen);
1756 1756 }
1757 1757 break;
1758 1758 }
1759 1759
1760 1760 /*
1761 1761 * Create the new socket.
1762 1762 */
1763 1763 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error);
1764 1764 if (nso == NULL) {
1765 1765 ASSERT(error != 0);
1766 1766 /*
1767 1767 * Accept can not fail with ENOBUFS. sotpi_create
1768 1768 * sleeps waiting for memory until a signal is caught
1769 1769 * so return EINTR.
1770 1770 */
1771 1771 freemsg(mp);
1772 1772 if (error == ENOBUFS)
1773 1773 error = EINTR;
1774 1774 goto e_disc_unl;
1775 1775 }
1776 1776 nvp = SOTOV(nso);
1777 1777 nsti = SOTOTPI(nso);
1778 1778
1779 1779 #ifdef DEBUG
1780 1780 /*
1781 1781 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus
1782 1782 * it's inherited early to allow debugging of the accept code itself.
1783 1783 */
1784 1784 nso->so_options |= so->so_options & SO_DEBUG;
1785 1785 #endif /* DEBUG */
1786 1786
1787 1787 /*
1788 1788 * Save the SRC address from the T_CONN_IND
1789 1789 * for getpeername to work on AF_UNIX and on transports that do not
1790 1790 * support TI_GETPEERNAME.
1791 1791 *
1792 1792 * NOTE: AF_UNIX NUL termination is ensured by the sender's
1793 1793 * copyin_name().
1794 1794 */
1795 1795 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) {
1796 1796 error = EINVAL;
1797 1797 freemsg(mp);
1798 1798 eprintsoline(so, error);
1799 1799 goto disconnect_vp_unlocked;
1800 1800 }
1801 1801 nsti->sti_faddr_len = (socklen_t)srclen;
1802 1802 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
1803 1803 bcopy(src, nsti->sti_faddr_sa, srclen);
1804 1804 nsti->sti_faddr_valid = 1;
1805 1805
1806 1806 /*
1807 1807 * Record so_peercred and so_cpid from a cred in the T_CONN_IND.
1808 1808 */
1809 1809 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) <
1810 1810 (sizeof (struct T_conn_res) + sizeof (intptr_t))) {
1811 1811 cred_t *cr;
1812 1812 pid_t cpid;
1813 1813
1814 1814 cr = msg_getcred(mp, &cpid);
1815 1815 if (cr != NULL) {
1816 1816 crhold(cr);
1817 1817 nso->so_peercred = cr;
1818 1818 nso->so_cpid = cpid;
1819 1819 }
1820 1820 freemsg(mp);
1821 1821
1822 1822 mp = soallocproto1(NULL, sizeof (struct T_conn_res) +
1823 1823 sizeof (intptr_t), 0, _ALLOC_INTR, cr);
1824 1824 if (mp == NULL) {
1825 1825 /*
1826 1826 * Accept can not fail with ENOBUFS.
1827 1827 * A signal was caught so return EINTR.
1828 1828 */
1829 1829 error = EINTR;
1830 1830 eprintsoline(so, error);
1831 1831 goto disconnect_vp_unlocked;
1832 1832 }
1833 1833 conn_res = (struct T_conn_res *)mp->b_rptr;
1834 1834 } else {
1835 1835 /*
1836 1836 * For efficency reasons we use msg_extractcred; no crhold
1837 1837 * needed since db_credp is cleared (i.e., we move the cred
1838 1838 * from the message to so_peercred.
1839 1839 */
1840 1840 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid);
1841 1841
1842 1842 mp->b_rptr = DB_BASE(mp);
1843 1843 conn_res = (struct T_conn_res *)mp->b_rptr;
1844 1844 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res);
1845 1845
1846 1846 mblk_setcred(mp, cr, curproc->p_pid);
1847 1847 }
1848 1848
1849 1849 /*
1850 1850 * New socket must be bound at least in sockfs and, except for AF_INET,
1851 1851 * (or AF_INET6) it also has to be bound in the transport provider.
1852 1852 * We set the local address in the sonode from the T_OK_ACK of the
1853 1853 * T_CONN_RES. For this reason the address we bind to here isn't
1854 1854 * important.
1855 1855 */
1856 1856 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) &&
1857 1857 /*CONSTCOND*/
1858 1858 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) {
1859 1859 /*
1860 1860 * Optimization for AF_INET{,6} transports
1861 1861 * that can handle a T_CONN_RES without being bound.
1862 1862 */
1863 1863 mutex_enter(&nso->so_lock);
1864 1864 so_automatic_bind(nso);
1865 1865 mutex_exit(&nso->so_lock);
1866 1866 } else {
1867 1867 /* Perform NULL bind with the transport provider. */
1868 1868 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC,
1869 1869 cr)) != 0) {
1870 1870 ASSERT(error != ENOBUFS);
1871 1871 freemsg(mp);
1872 1872 eprintsoline(nso, error);
1873 1873 goto disconnect_vp_unlocked;
1874 1874 }
1875 1875 }
1876 1876
1877 1877 /*
1878 1878 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES
1879 1879 * so that any data arriving on the new socket will cause the
1880 1880 * appropriate signals to be delivered for the new socket.
1881 1881 *
1882 1882 * No other thread (except strsock_proto and strsock_misc)
1883 1883 * can access the new socket thus we relax the locking.
1884 1884 */
1885 1885 nso->so_pgrp = so->so_pgrp;
1886 1886 nso->so_state |= so->so_state & SS_ASYNC;
1887 1887 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate;
1888 1888
1889 1889 if (nso->so_pgrp != 0) {
1890 1890 if ((error = so_set_events(nso, nvp, cr)) != 0) {
1891 1891 eprintsoline(nso, error);
1892 1892 error = 0;
1893 1893 nso->so_pgrp = 0;
1894 1894 }
1895 1895 }
1896 1896
1897 1897 /*
1898 1898 * Make note of the socket level options. TCP and IP level options
1899 1899 * are already inherited. We could do all this after accept is
1900 1900 * successful but doing it here simplifies code and no harm done
1901 1901 * for error case.
1902 1902 */
1903 1903 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE|
1904 1904 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
1905 1905 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
1906 1906 nso->so_sndbuf = so->so_sndbuf;
1907 1907 nso->so_rcvbuf = so->so_rcvbuf;
1908 1908 if (nso->so_options & SO_LINGER)
1909 1909 nso->so_linger = so->so_linger;
1910 1910
1911 1911 /*
1912 1912 * Note that the following sti_direct code path should be
1913 1913 * removed once we are confident that the direct sockets
1914 1914 * do not result in any degradation.
1915 1915 */
1916 1916 if (sti->sti_direct) {
1917 1917
1918 1918 ASSERT(opt != NULL);
1919 1919
1920 1920 conn_res->OPT_length = optlen;
1921 1921 conn_res->OPT_offset = MBLKL(mp);
1922 1922 bcopy(&opt, mp->b_wptr, optlen);
1923 1923 mp->b_wptr += optlen;
1924 1924 conn_res->PRIM_type = T_CONN_RES;
1925 1925 conn_res->ACCEPTOR_id = 0;
1926 1926 PRIM_type = T_CONN_RES;
1927 1927
1928 1928 /* Send down the T_CONN_RES on acceptor STREAM */
1929 1929 error = kstrputmsg(SOTOV(nso), mp, NULL,
1930 1930 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1931 1931 if (error) {
1932 1932 mutex_enter(&so->so_lock);
1933 1933 so_lock_single(so);
1934 1934 eprintsoline(so, error);
1935 1935 goto disconnect_vp;
1936 1936 }
1937 1937 mutex_enter(&nso->so_lock);
1938 1938 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK,
1939 1939 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0);
1940 1940 if (error) {
1941 1941 mutex_exit(&nso->so_lock);
1942 1942 mutex_enter(&so->so_lock);
1943 1943 so_lock_single(so);
1944 1944 eprintsoline(so, error);
1945 1945 goto disconnect_vp;
1946 1946 }
1947 1947 if (nso->so_family == AF_INET) {
1948 1948 sin_t *sin;
1949 1949
1950 1950 sin = (sin_t *)(ack_mp->b_rptr +
1951 1951 sizeof (struct T_ok_ack));
1952 1952 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t));
1953 1953 nsti->sti_laddr_len = sizeof (sin_t);
1954 1954 } else {
1955 1955 sin6_t *sin6;
1956 1956
1957 1957 sin6 = (sin6_t *)(ack_mp->b_rptr +
1958 1958 sizeof (struct T_ok_ack));
1959 1959 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t));
1960 1960 nsti->sti_laddr_len = sizeof (sin6_t);
1961 1961 }
1962 1962 freemsg(ack_mp);
1963 1963
1964 1964 nso->so_state |= SS_ISCONNECTED;
1965 1965 nso->so_proto_handle = (sock_lower_handle_t)opt;
1966 1966 nsti->sti_laddr_valid = 1;
1967 1967
1968 1968 if (sti->sti_nl7c_flags & NL7C_ENABLED) {
1969 1969 /*
1970 1970 * A NL7C marked listen()er so the new socket
1971 1971 * inherits the listen()er's NL7C state, except
1972 1972 * for NL7C_POLLIN.
1973 1973 *
1974 1974 * Only call NL7C to process the new socket if
1975 1975 * the listen socket allows blocking i/o.
1976 1976 */
1977 1977 nsti->sti_nl7c_flags =
1978 1978 sti->sti_nl7c_flags & (~NL7C_POLLIN);
1979 1979 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) {
1980 1980 /*
1981 1981 * Nonblocking accept() just make it
1982 1982 * persist to defer processing to the
1983 1983 * read-side syscall (e.g. read).
1984 1984 */
1985 1985 nsti->sti_nl7c_flags |= NL7C_SOPERSIST;
1986 1986 } else if (nl7c_process(nso, B_FALSE)) {
1987 1987 /*
1988 1988 * NL7C has completed processing on the
1989 1989 * socket, close the socket and back to
1990 1990 * the top to await the next T_CONN_IND.
1991 1991 */
1992 1992 mutex_exit(&nso->so_lock);
1993 1993 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0,
1994 1994 cr, NULL);
1995 1995 VN_RELE(nvp);
1996 1996 goto again;
1997 1997 }
1998 1998 /* Pass the new socket out */
1999 1999 }
2000 2000
2001 2001 mutex_exit(&nso->so_lock);
2002 2002
2003 2003 /*
2004 2004 * It's possible, through the use of autopush for example,
2005 2005 * that the acceptor stream may not support sti_direct
2006 2006 * semantics. If the new socket does not support sti_direct
2007 2007 * we issue a _SIOCSOCKFALLBACK to inform the transport
2008 2008 * as we would in the I_PUSH case.
2009 2009 */
2010 2010 if (nsti->sti_direct == 0) {
2011 2011 int rval;
2012 2012
2013 2013 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK,
2014 2014 0, 0, K_TO_K, cr, &rval)) != 0) {
2015 2015 mutex_enter(&so->so_lock);
2016 2016 so_lock_single(so);
2017 2017 eprintsoline(so, error);
2018 2018 goto disconnect_vp;
2019 2019 }
2020 2020 }
2021 2021
2022 2022 /*
2023 2023 * Pass out new socket.
2024 2024 */
2025 2025 if (nsop != NULL)
2026 2026 *nsop = nso;
2027 2027
2028 2028 return (0);
2029 2029 }
2030 2030
2031 2031 /*
2032 2032 * This is the non-performance case for sockets (e.g. AF_UNIX sockets)
2033 2033 * which don't support the FireEngine accept fast-path. It is also
2034 2034 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd
2035 2035 * again. Neither sockfs nor TCP attempt to find out if some other
2036 2036 * random module has been inserted in between (in which case we
2037 2037 * should follow TLI accept behaviour). We blindly assume the worst
2038 2038 * case and revert back to old behaviour i.e. TCP will not send us
2039 2039 * any option (eager) and the accept should happen on the listener
2040 2040 * queue. Any queued T_conn_ind have already got their options removed
2041 2041 * by so_sock2_stream() when "sockmod" was I_POP'd.
2042 2042 */
2043 2043 /*
2044 2044 * Fill in the {O_}T_CONN_RES before getting SOLOCKED.
2045 2045 */
2046 2046 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) {
2047 2047 #ifdef _ILP32
2048 2048 queue_t *q;
2049 2049
2050 2050 /*
2051 2051 * Find read queue in driver
2052 2052 * Can safely do this since we "own" nso/nvp.
2053 2053 */
2054 2054 q = strvp2wq(nvp)->q_next;
2055 2055 while (SAMESTR(q))
2056 2056 q = q->q_next;
2057 2057 q = RD(q);
2058 2058 conn_res->ACCEPTOR_id = (t_uscalar_t)q;
2059 2059 #else
2060 2060 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev);
2061 2061 #endif /* _ILP32 */
2062 2062 conn_res->PRIM_type = O_T_CONN_RES;
2063 2063 PRIM_type = O_T_CONN_RES;
2064 2064 } else {
2065 2065 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id;
2066 2066 conn_res->PRIM_type = T_CONN_RES;
2067 2067 PRIM_type = T_CONN_RES;
2068 2068 }
2069 2069 conn_res->SEQ_number = SEQ_number;
2070 2070 conn_res->OPT_length = 0;
2071 2071 conn_res->OPT_offset = 0;
2072 2072
2073 2073 mutex_enter(&so->so_lock);
2074 2074 so_lock_single(so); /* Set SOLOCKED */
2075 2075 mutex_exit(&so->so_lock);
2076 2076
2077 2077 error = kstrputmsg(SOTOV(so), mp, NULL,
2078 2078 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
2079 2079 mutex_enter(&so->so_lock);
2080 2080 if (error) {
2081 2081 eprintsoline(so, error);
2082 2082 goto disconnect_vp;
2083 2083 }
2084 2084 error = sowaitprim(so, PRIM_type, T_OK_ACK,
2085 2085 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0);
2086 2086 if (error) {
2087 2087 eprintsoline(so, error);
2088 2088 goto disconnect_vp;
2089 2089 }
2090 2090 mutex_exit(&so->so_lock);
2091 2091 /*
2092 2092 * If there is a sin/sin6 appended onto the T_OK_ACK use
2093 2093 * that to set the local address. If this is not present
2094 2094 * then we zero out the address and don't set the
2095 2095 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over
2096 2096 * the pathname from the listening socket.
2097 2097 * In the case where this is TCP or an AF_UNIX socket the
2098 2098 * client side may have queued data or a T_ORDREL in the
2099 2099 * transport. Having now sent the T_CONN_RES we may receive
2100 2100 * those queued messages at any time. Hold the acceptor
2101 2101 * so_lock until its state and laddr are finalized.
2102 2102 */
2103 2103 mutex_enter(&nso->so_lock);
2104 2104 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t);
2105 2105 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) &&
2106 2106 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) {
2107 2107 ack_mp->b_rptr += sizeof (struct T_ok_ack);
2108 2108 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen);
2109 2109 nsti->sti_laddr_len = sinlen;
2110 2110 nsti->sti_laddr_valid = 1;
2111 2111 } else if (nso->so_family == AF_UNIX) {
2112 2112 ASSERT(so->so_family == AF_UNIX);
2113 2113 nsti->sti_laddr_len = sti->sti_laddr_len;
2114 2114 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen);
2115 2115 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa,
2116 2116 nsti->sti_laddr_len);
2117 2117 nsti->sti_laddr_valid = 1;
2118 2118 } else {
2119 2119 nsti->sti_laddr_len = sti->sti_laddr_len;
2120 2120 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen);
2121 2121 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size);
2122 2122 nsti->sti_laddr_sa->sa_family = nso->so_family;
2123 2123 }
2124 2124 nso->so_state |= SS_ISCONNECTED;
2125 2125 mutex_exit(&nso->so_lock);
2126 2126
2127 2127 freemsg(ack_mp);
2128 2128
2129 2129 mutex_enter(&so->so_lock);
2130 2130 so_unlock_single(so, SOLOCKED);
2131 2131 mutex_exit(&so->so_lock);
2132 2132
2133 2133 /*
2134 2134 * Pass out new socket.
2135 2135 */
2136 2136 if (nsop != NULL)
2137 2137 *nsop = nso;
2138 2138
2139 2139 return (0);
2140 2140
2141 2141
2142 2142 eproto_disc_unl:
2143 2143 error = EPROTO;
2144 2144 e_disc_unl:
2145 2145 eprintsoline(so, error);
2146 2146 goto disconnect_unlocked;
2147 2147
2148 2148 pr_disc_vp_unl:
2149 2149 eprintsoline(so, error);
2150 2150 disconnect_vp_unlocked:
2151 2151 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL);
2152 2152 VN_RELE(nvp);
2153 2153 disconnect_unlocked:
2154 2154 (void) sodisconnect(so, SEQ_number, 0);
2155 2155 return (error);
2156 2156
2157 2157 pr_disc_vp:
2158 2158 eprintsoline(so, error);
2159 2159 disconnect_vp:
2160 2160 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD);
2161 2161 so_unlock_single(so, SOLOCKED);
2162 2162 mutex_exit(&so->so_lock);
2163 2163 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL);
2164 2164 VN_RELE(nvp);
2165 2165 return (error);
2166 2166
2167 2167 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */
2168 2168 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW)
2169 2169 ? EOPNOTSUPP : EINVAL;
2170 2170 e_bad:
2171 2171 eprintsoline(so, error);
2172 2172 return (error);
2173 2173 }
2174 2174
2175 2175 /*
2176 2176 * connect a socket.
2177 2177 *
2178 2178 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to
2179 2179 * unconnect (by specifying a null address).
2180 2180 */
2181 2181 int
2182 2182 sotpi_connect(struct sonode *so,
2183 2183 struct sockaddr *name,
2184 2184 socklen_t namelen,
2185 2185 int fflag,
2186 2186 int flags,
2187 2187 struct cred *cr)
2188 2188 {
2189 2189 struct T_conn_req conn_req;
2190 2190 int error = 0;
2191 2191 mblk_t *mp;
2192 2192 void *src;
2193 2193 socklen_t srclen;
2194 2194 void *addr;
2195 2195 socklen_t addrlen;
2196 2196 boolean_t need_unlock;
2197 2197 sotpi_info_t *sti = SOTOTPI(so);
2198 2198
2199 2199 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n",
2200 2200 (void *)so, (void *)name, namelen, fflag, flags,
2201 2201 pr_state(so->so_state, so->so_mode)));
2202 2202
2203 2203 /*
2204 2204 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to
2205 2205 * avoid sleeping for memory with SOLOCKED held.
2206 2206 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen
2207 2207 * + sizeof (struct T_opthdr).
2208 2208 * (the AF_UNIX so_ux_addr_xlate() does not make the address
2209 2209 * exceed sti_faddr_maxlen).
2210 2210 */
2211 2211 mp = soallocproto(sizeof (struct T_conn_req) +
2212 2212 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR,
2213 2213 cr);
2214 2214 if (mp == NULL) {
2215 2215 /*
2216 2216 * Connect can not fail with ENOBUFS. A signal was
2217 2217 * caught so return EINTR.
2218 2218 */
2219 2219 error = EINTR;
2220 2220 eprintsoline(so, error);
2221 2221 return (error);
2222 2222 }
2223 2223
2224 2224 mutex_enter(&so->so_lock);
2225 2225 /*
2226 2226 * Make sure there is a preallocated T_unbind_req message
2227 2227 * before any binding. This message is allocated when the
2228 2228 * socket is created. Since another thread can consume
2229 2229 * so_unbind_mp by the time we return from so_lock_single(),
2230 2230 * we should check the availability of so_unbind_mp after
2231 2231 * we return from so_lock_single().
2232 2232 */
2233 2233
2234 2234 so_lock_single(so); /* Set SOLOCKED */
2235 2235 need_unlock = B_TRUE;
2236 2236
2237 2237 if (sti->sti_unbind_mp == NULL) {
2238 2238 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n"));
2239 2239 /* NOTE: holding so_lock while sleeping */
2240 2240 sti->sti_unbind_mp =
2241 2241 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr);
2242 2242 if (sti->sti_unbind_mp == NULL) {
2243 2243 error = EINTR;
2244 2244 goto done;
2245 2245 }
2246 2246 }
2247 2247
2248 2248 /*
2249 2249 * Can't have done a listen before connecting.
2250 2250 */
2251 2251 if (so->so_state & SS_ACCEPTCONN) {
2252 2252 error = EOPNOTSUPP;
2253 2253 goto done;
2254 2254 }
2255 2255
2256 2256 /*
2257 2257 * Must be bound with the transport
2258 2258 */
2259 2259 if (!(so->so_state & SS_ISBOUND)) {
2260 2260 if ((so->so_family == AF_INET || so->so_family == AF_INET6) &&
2261 2261 /*CONSTCOND*/
2262 2262 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) {
2263 2263 /*
2264 2264 * Optimization for AF_INET{,6} transports
2265 2265 * that can handle a T_CONN_REQ without being bound.
2266 2266 */
2267 2267 so_automatic_bind(so);
2268 2268 } else {
2269 2269 error = sotpi_bind(so, NULL, 0,
2270 2270 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr);
2271 2271 if (error)
2272 2272 goto done;
2273 2273 }
2274 2274 ASSERT(so->so_state & SS_ISBOUND);
2275 2275 flags |= _SOCONNECT_DID_BIND;
2276 2276 }
2277 2277
2278 2278 /*
2279 2279 * Handle a connect to a name parameter of type AF_UNSPEC like a
2280 2280 * connect to a null address. This is the portable method to
2281 2281 * unconnect a socket.
2282 2282 */
2283 2283 if ((namelen >= sizeof (sa_family_t)) &&
2284 2284 (name->sa_family == AF_UNSPEC)) {
2285 2285 name = NULL;
2286 2286 namelen = 0;
2287 2287 }
2288 2288
2289 2289 /*
2290 2290 * Check that we are not already connected.
2291 2291 * A connection-oriented socket cannot be reconnected.
2292 2292 * A connected connection-less socket can be
2293 2293 * - connected to a different address by a subsequent connect
2294 2294 * - "unconnected" by a connect to the NULL address
2295 2295 */
2296 2296 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) {
2297 2297 ASSERT(!(flags & _SOCONNECT_DID_BIND));
2298 2298 if (so->so_mode & SM_CONNREQUIRED) {
2299 2299 /* Connection-oriented socket */
2300 2300 error = so->so_state & SS_ISCONNECTED ?
2301 2301 EISCONN : EALREADY;
2302 2302 goto done;
2303 2303 }
2304 2304 /* Connection-less socket */
2305 2305 if (name == NULL) {
2306 2306 /*
2307 2307 * Remove the connected state and clear SO_DGRAM_ERRIND
2308 2308 * since it was set when the socket was connected.
2309 2309 * If this is UDP also send down a T_DISCON_REQ.
2310 2310 */
2311 2311 int val;
2312 2312
2313 2313 if ((so->so_family == AF_INET ||
2314 2314 so->so_family == AF_INET6) &&
2315 2315 (so->so_type == SOCK_DGRAM ||
2316 2316 so->so_type == SOCK_RAW) &&
2317 2317 /*CONSTCOND*/
2318 2318 !soconnect_tpi_udp) {
2319 2319 /* XXX What about implicitly unbinding here? */
2320 2320 error = sodisconnect(so, -1,
2321 2321 _SODISCONNECT_LOCK_HELD);
2322 2322 } else {
2323 2323 so->so_state &=
2324 2324 ~(SS_ISCONNECTED | SS_ISCONNECTING);
2325 2325 sti->sti_faddr_valid = 0;
2326 2326 sti->sti_faddr_len = 0;
2327 2327 }
2328 2328
2329 2329 /* Remove SOLOCKED since setsockopt will grab it */
2330 2330 so_unlock_single(so, SOLOCKED);
2331 2331 mutex_exit(&so->so_lock);
2332 2332
2333 2333 val = 0;
2334 2334 (void) sotpi_setsockopt(so, SOL_SOCKET,
2335 2335 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val),
2336 2336 cr);
2337 2337
2338 2338 mutex_enter(&so->so_lock);
2339 2339 so_lock_single(so); /* Set SOLOCKED */
2340 2340 goto done;
2341 2341 }
2342 2342 }
2343 2343 ASSERT(so->so_state & SS_ISBOUND);
2344 2344
2345 2345 if (name == NULL || namelen == 0) {
2346 2346 error = EINVAL;
2347 2347 goto done;
2348 2348 }
2349 2349 /*
2350 2350 * Mark the socket if sti_faddr_sa represents the transport level
2351 2351 * address.
2352 2352 */
2353 2353 if (flags & _SOCONNECT_NOXLATE) {
2354 2354 struct sockaddr_ux *soaddr_ux;
2355 2355
2356 2356 ASSERT(so->so_family == AF_UNIX);
2357 2357 if (namelen != sizeof (struct sockaddr_ux)) {
2358 2358 error = EINVAL;
2359 2359 goto done;
2360 2360 }
2361 2361 soaddr_ux = (struct sockaddr_ux *)name;
2362 2362 name = (struct sockaddr *)&soaddr_ux->sou_addr;
2363 2363 namelen = sizeof (soaddr_ux->sou_addr);
2364 2364 sti->sti_faddr_noxlate = 1;
2365 2365 }
2366 2366
2367 2367 /*
2368 2368 * Length and family checks.
2369 2369 */
2370 2370 error = so_addr_verify(so, name, namelen);
2371 2371 if (error)
2372 2372 goto bad;
2373 2373
2374 2374 /*
2375 2375 * Save foreign address. Needed for AF_UNIX as well as
2376 2376 * transport providers that do not support TI_GETPEERNAME.
2377 2377 * Also used for cached foreign address for TCP and UDP.
2378 2378 */
2379 2379 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) {
2380 2380 error = EINVAL;
2381 2381 goto done;
2382 2382 }
2383 2383 sti->sti_faddr_len = (socklen_t)namelen;
2384 2384 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
2385 2385 bcopy(name, sti->sti_faddr_sa, namelen);
2386 2386 sti->sti_faddr_valid = 1;
2387 2387
2388 2388 if (so->so_family == AF_UNIX) {
2389 2389 if (sti->sti_faddr_noxlate) {
2390 2390 /*
2391 2391 * sti_faddr is a transport-level address, so
2392 2392 * don't pass it as an option. Do save it in
2393 2393 * sti_ux_faddr, used for connected DG send.
2394 2394 */
2395 2395 src = NULL;
2396 2396 srclen = 0;
2397 2397 addr = sti->sti_faddr_sa;
2398 2398 addrlen = (t_uscalar_t)sti->sti_faddr_len;
2399 2399 bcopy(addr, &sti->sti_ux_faddr,
2400 2400 sizeof (sti->sti_ux_faddr));
2401 2401 } else {
2402 2402 /*
2403 2403 * Pass the sockaddr_un source address as an option
2404 2404 * and translate the remote address.
2405 2405 * Holding so_lock thus sti_laddr_sa can not change.
2406 2406 */
2407 2407 src = sti->sti_laddr_sa;
2408 2408 srclen = (t_uscalar_t)sti->sti_laddr_len;
2409 2409 dprintso(so, 1,
2410 2410 ("sotpi_connect UNIX: srclen %d, src %p\n",
2411 2411 srclen, src));
2412 2412 /*
2413 2413 * Translate the destination address into our
2414 2414 * internal form, and save it in sti_ux_faddr.
2415 2415 * After this call, addr==&sti->sti_ux_taddr,
2416 2416 * and we copy that to sti->sti_ux_faddr so
2417 2417 * we save the connected peer address.
2418 2418 */
2419 2419 error = so_ux_addr_xlate(so,
2420 2420 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len,
2421 2421 (flags & _SOCONNECT_XPG4_2),
2422 2422 &addr, &addrlen);
2423 2423 if (error)
2424 2424 goto bad;
2425 2425 bcopy(&sti->sti_ux_taddr, &sti->sti_ux_faddr,
2426 2426 sizeof (sti->sti_ux_faddr));
2427 2427 }
2428 2428 } else {
2429 2429 addr = sti->sti_faddr_sa;
2430 2430 addrlen = (t_uscalar_t)sti->sti_faddr_len;
2431 2431 src = NULL;
2432 2432 srclen = 0;
2433 2433 }
2434 2434 /*
2435 2435 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND
2436 2436 * option which asks the transport provider to send T_UDERR_IND
2437 2437 * messages. These T_UDERR_IND messages are used to return connected
2438 2438 * style errors (e.g. ECONNRESET) for connected datagram sockets.
2439 2439 *
2440 2440 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets)
2441 2441 * we send down a T_CONN_REQ. This is needed to let the
2442 2442 * transport assign a local address that is consistent with
2443 2443 * the remote address. Applications depend on a getsockname()
2444 2444 * after a connect() to retrieve the "source" IP address for
2445 2445 * the connected socket. Invalidate the cached local address
2446 2446 * to force getsockname() to enquire of the transport.
2447 2447 */
2448 2448 if (!(so->so_mode & SM_CONNREQUIRED)) {
2449 2449 /*
2450 2450 * Datagram socket.
2451 2451 */
2452 2452 int32_t val;
2453 2453
2454 2454 so_unlock_single(so, SOLOCKED);
2455 2455 mutex_exit(&so->so_lock);
2456 2456
2457 2457 val = 1;
2458 2458 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND,
2459 2459 &val, (t_uscalar_t)sizeof (val), cr);
2460 2460
2461 2461 mutex_enter(&so->so_lock);
2462 2462 so_lock_single(so); /* Set SOLOCKED */
2463 2463 if ((so->so_family != AF_INET && so->so_family != AF_INET6) ||
2464 2464 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) ||
2465 2465 soconnect_tpi_udp) {
2466 2466 soisconnected(so);
2467 2467 goto done;
2468 2468 }
2469 2469 /*
2470 2470 * Send down T_CONN_REQ etc.
2471 2471 * Clear fflag to avoid returning EWOULDBLOCK.
2472 2472 */
2473 2473 fflag = 0;
2474 2474 ASSERT(so->so_family != AF_UNIX);
2475 2475 sti->sti_laddr_valid = 0;
2476 2476 } else if (sti->sti_laddr_len != 0) {
2477 2477 /*
2478 2478 * If the local address or port was "any" then it may be
2479 2479 * changed by the transport as a result of the
2480 2480 * connect. Invalidate the cached version if we have one.
2481 2481 */
2482 2482 switch (so->so_family) {
2483 2483 case AF_INET:
2484 2484 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t));
2485 2485 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr ==
2486 2486 INADDR_ANY ||
2487 2487 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0)
2488 2488 sti->sti_laddr_valid = 0;
2489 2489 break;
2490 2490
2491 2491 case AF_INET6:
2492 2492 ASSERT(sti->sti_laddr_len ==
2493 2493 (socklen_t)sizeof (sin6_t));
2494 2494 if (IN6_IS_ADDR_UNSPECIFIED(
2495 2495 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) ||
2496 2496 IN6_IS_ADDR_V4MAPPED_ANY(
2497 2497 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) ||
2498 2498 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0)
2499 2499 sti->sti_laddr_valid = 0;
2500 2500 break;
2501 2501
2502 2502 default:
2503 2503 break;
2504 2504 }
2505 2505 }
2506 2506
2507 2507 /*
2508 2508 * Check for failure of an earlier call
2509 2509 */
2510 2510 if (so->so_error != 0)
2511 2511 goto so_bad;
2512 2512
2513 2513 /*
2514 2514 * Send down T_CONN_REQ. Message was allocated above.
2515 2515 */
2516 2516 conn_req.PRIM_type = T_CONN_REQ;
2517 2517 conn_req.DEST_length = addrlen;
2518 2518 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req);
2519 2519 if (srclen == 0) {
2520 2520 conn_req.OPT_length = 0;
2521 2521 conn_req.OPT_offset = 0;
2522 2522 soappendmsg(mp, &conn_req, sizeof (conn_req));
2523 2523 soappendmsg(mp, addr, addrlen);
2524 2524 } else {
2525 2525 /*
2526 2526 * There is a AF_UNIX sockaddr_un to include as a source
2527 2527 * address option.
2528 2528 */
2529 2529 struct T_opthdr toh;
2530 2530
2531 2531 toh.level = SOL_SOCKET;
2532 2532 toh.name = SO_SRCADDR;
2533 2533 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
2534 2534 toh.status = 0;
2535 2535 conn_req.OPT_length =
2536 2536 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen));
2537 2537 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) +
2538 2538 _TPI_ALIGN_TOPT(addrlen));
2539 2539
2540 2540 soappendmsg(mp, &conn_req, sizeof (conn_req));
2541 2541 soappendmsg(mp, addr, addrlen);
2542 2542 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
2543 2543 soappendmsg(mp, &toh, sizeof (toh));
2544 2544 soappendmsg(mp, src, srclen);
2545 2545 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
2546 2546 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
2547 2547 }
2548 2548 /*
2549 2549 * Set SS_ISCONNECTING before sending down the T_CONN_REQ
2550 2550 * in order to have the right state when the T_CONN_CON shows up.
2551 2551 */
2552 2552 soisconnecting(so);
2553 2553 mutex_exit(&so->so_lock);
2554 2554
2555 2555 if (AU_AUDITING())
2556 2556 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0);
2557 2557
2558 2558 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
2559 2559 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
2560 2560 mp = NULL;
2561 2561 mutex_enter(&so->so_lock);
2562 2562 if (error != 0)
2563 2563 goto bad;
2564 2564
2565 2565 if ((error = sowaitokack(so, T_CONN_REQ)) != 0)
2566 2566 goto bad;
2567 2567
2568 2568 /* Allow other threads to access the socket */
2569 2569 so_unlock_single(so, SOLOCKED);
2570 2570 need_unlock = B_FALSE;
2571 2571
2572 2572 /*
2573 2573 * Wait until we get a T_CONN_CON or an error
2574 2574 */
2575 2575 if ((error = sowaitconnected(so, fflag, 0)) != 0) {
2576 2576 so_lock_single(so); /* Set SOLOCKED */
2577 2577 need_unlock = B_TRUE;
2578 2578 }
2579 2579
2580 2580 done:
2581 2581 freemsg(mp);
2582 2582 switch (error) {
2583 2583 case EINPROGRESS:
2584 2584 case EALREADY:
2585 2585 case EISCONN:
2586 2586 case EINTR:
2587 2587 /* Non-fatal errors */
2588 2588 sti->sti_laddr_valid = 0;
2589 2589 /* FALLTHRU */
2590 2590 case 0:
2591 2591 break;
2592 2592 default:
2593 2593 ASSERT(need_unlock);
2594 2594 /*
2595 2595 * Fatal errors: clear SS_ISCONNECTING in case it was set,
2596 2596 * and invalidate local-address cache
2597 2597 */
2598 2598 so->so_state &= ~SS_ISCONNECTING;
2599 2599 sti->sti_laddr_valid = 0;
2600 2600 /* A discon_ind might have already unbound us */
2601 2601 if ((flags & _SOCONNECT_DID_BIND) &&
2602 2602 (so->so_state & SS_ISBOUND)) {
2603 2603 int err;
2604 2604
2605 2605 err = sotpi_unbind(so, 0);
2606 2606 /* LINTED - statement has no conseq */
2607 2607 if (err) {
2608 2608 eprintsoline(so, err);
2609 2609 }
2610 2610 }
2611 2611 break;
2612 2612 }
2613 2613 if (need_unlock)
2614 2614 so_unlock_single(so, SOLOCKED);
2615 2615 mutex_exit(&so->so_lock);
2616 2616 return (error);
2617 2617
2618 2618 so_bad: error = sogeterr(so, B_TRUE);
2619 2619 bad: eprintsoline(so, error);
2620 2620 goto done;
2621 2621 }
2622 2622
2623 2623 /* ARGSUSED */
2624 2624 int
2625 2625 sotpi_shutdown(struct sonode *so, int how, struct cred *cr)
2626 2626 {
2627 2627 struct T_ordrel_req ordrel_req;
2628 2628 mblk_t *mp;
2629 2629 uint_t old_state, state_change;
2630 2630 int error = 0;
2631 2631 sotpi_info_t *sti = SOTOTPI(so);
2632 2632
2633 2633 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n",
2634 2634 (void *)so, how, pr_state(so->so_state, so->so_mode)));
2635 2635
2636 2636 mutex_enter(&so->so_lock);
2637 2637 so_lock_single(so); /* Set SOLOCKED */
2638 2638
2639 2639 /*
2640 2640 * SunOS 4.X has no check for datagram sockets.
2641 2641 * 5.X checks that it is connected (ENOTCONN)
2642 2642 * X/Open requires that we check the connected state.
2643 2643 */
2644 2644 if (!(so->so_state & SS_ISCONNECTED)) {
2645 2645 if (!xnet_skip_checks) {
2646 2646 error = ENOTCONN;
2647 2647 if (xnet_check_print) {
2648 2648 printf("sockfs: X/Open shutdown check "
2649 2649 "caused ENOTCONN\n");
2650 2650 }
2651 2651 }
2652 2652 goto done;
2653 2653 }
2654 2654 /*
2655 2655 * Record the current state and then perform any state changes.
2656 2656 * Then use the difference between the old and new states to
2657 2657 * determine which messages need to be sent.
2658 2658 * This prevents e.g. duplicate T_ORDREL_REQ when there are
2659 2659 * duplicate calls to shutdown().
2660 2660 */
2661 2661 old_state = so->so_state;
2662 2662
2663 2663 switch (how) {
2664 2664 case 0:
2665 2665 socantrcvmore(so);
2666 2666 break;
2667 2667 case 1:
2668 2668 socantsendmore(so);
2669 2669 break;
2670 2670 case 2:
2671 2671 socantsendmore(so);
2672 2672 socantrcvmore(so);
2673 2673 break;
2674 2674 default:
2675 2675 error = EINVAL;
2676 2676 goto done;
2677 2677 }
2678 2678
2679 2679 /*
2680 2680 * Assumes that the SS_CANT* flags are never cleared in the above code.
2681 2681 */
2682 2682 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) -
2683 2683 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE));
2684 2684 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0);
2685 2685
2686 2686 switch (state_change) {
2687 2687 case 0:
2688 2688 dprintso(so, 1,
2689 2689 ("sotpi_shutdown: nothing to send in state 0x%x\n",
2690 2690 so->so_state));
2691 2691 goto done;
2692 2692
2693 2693 case SS_CANTRCVMORE:
2694 2694 mutex_exit(&so->so_lock);
2695 2695 strseteof(SOTOV(so), 1);
2696 2696 /*
2697 2697 * strseteof takes care of read side wakeups,
2698 2698 * pollwakeups, and signals.
2699 2699 */
2700 2700 /*
2701 2701 * Get the read lock before flushing data to avoid problems
2702 2702 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
2703 2703 */
2704 2704 mutex_enter(&so->so_lock);
2705 2705 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */
2706 2706 mutex_exit(&so->so_lock);
2707 2707
2708 2708 /* Flush read side queue */
2709 2709 strflushrq(SOTOV(so), FLUSHALL);
2710 2710
2711 2711 mutex_enter(&so->so_lock);
2712 2712 so_unlock_read(so); /* Clear SOREADLOCKED */
2713 2713 break;
2714 2714
2715 2715 case SS_CANTSENDMORE:
2716 2716 mutex_exit(&so->so_lock);
2717 2717 strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2718 2718 mutex_enter(&so->so_lock);
2719 2719 break;
2720 2720
2721 2721 case SS_CANTSENDMORE|SS_CANTRCVMORE:
2722 2722 mutex_exit(&so->so_lock);
2723 2723 strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2724 2724 strseteof(SOTOV(so), 1);
2725 2725 /*
2726 2726 * strseteof takes care of read side wakeups,
2727 2727 * pollwakeups, and signals.
2728 2728 */
2729 2729 /*
2730 2730 * Get the read lock before flushing data to avoid problems
2731 2731 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
2732 2732 */
2733 2733 mutex_enter(&so->so_lock);
2734 2734 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */
2735 2735 mutex_exit(&so->so_lock);
2736 2736
2737 2737 /* Flush read side queue */
2738 2738 strflushrq(SOTOV(so), FLUSHALL);
2739 2739
2740 2740 mutex_enter(&so->so_lock);
2741 2741 so_unlock_read(so); /* Clear SOREADLOCKED */
2742 2742 break;
2743 2743 }
2744 2744
2745 2745 ASSERT(MUTEX_HELD(&so->so_lock));
2746 2746
2747 2747 /*
2748 2748 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them
2749 2749 * was set due to this call and the new state has both of them set:
2750 2750 * Send the AF_UNIX close indication
2751 2751 * For T_COTS send a discon_ind
2752 2752 *
2753 2753 * If cantsend was set due to this call:
2754 2754 * For T_COTSORD send an ordrel_ind
2755 2755 *
2756 2756 * Note that for T_CLTS there is no message sent here.
2757 2757 */
2758 2758 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) ==
2759 2759 (SS_CANTRCVMORE|SS_CANTSENDMORE)) {
2760 2760 /*
2761 2761 * For SunOS 4.X compatibility we tell the other end
2762 2762 * that we are unable to receive at this point.
2763 2763 */
2764 2764 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS)
2765 2765 so_unix_close(so);
2766 2766
2767 2767 if (sti->sti_serv_type == T_COTS)
2768 2768 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD);
2769 2769 }
2770 2770 if ((state_change & SS_CANTSENDMORE) &&
2771 2771 (sti->sti_serv_type == T_COTS_ORD)) {
2772 2772 /* Send an orderly release */
2773 2773 ordrel_req.PRIM_type = T_ORDREL_REQ;
2774 2774
2775 2775 mutex_exit(&so->so_lock);
2776 2776 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req),
2777 2777 0, _ALLOC_SLEEP, cr);
2778 2778 /*
2779 2779 * Send down the T_ORDREL_REQ even if there is flow control.
2780 2780 * This prevents shutdown from blocking.
2781 2781 * Note that there is no T_OK_ACK for ordrel_req.
2782 2782 */
2783 2783 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
2784 2784 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
2785 2785 mutex_enter(&so->so_lock);
2786 2786 if (error) {
2787 2787 eprintsoline(so, error);
2788 2788 goto done;
2789 2789 }
2790 2790 }
2791 2791
2792 2792 done:
2793 2793 so_unlock_single(so, SOLOCKED);
2794 2794 mutex_exit(&so->so_lock);
2795 2795 return (error);
2796 2796 }
2797 2797
2798 2798 /*
2799 2799 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send
2800 2800 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer
2801 2801 * that we have closed.
2802 2802 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length
2803 2803 * T_UNITDATA_REQ containing the same option.
2804 2804 *
2805 2805 * For SOCK_DGRAM half-connections (somebody connected to this end
2806 2806 * but this end is not connect) we don't know where to send any
2807 2807 * SO_UNIX_CLOSE.
2808 2808 *
2809 2809 * We have to ignore stream head errors just in case there has been
2810 2810 * a shutdown(output).
2811 2811 * Ignore any flow control to try to get the message more quickly to the peer.
2812 2812 * While locally ignoring flow control solves the problem when there
2813 2813 * is only the loopback transport on the stream it would not provide
2814 2814 * the correct AF_UNIX socket semantics when one or more modules have
2815 2815 * been pushed.
2816 2816 */
2817 2817 void
2818 2818 so_unix_close(struct sonode *so)
2819 2819 {
2820 2820 struct T_opthdr toh;
2821 2821 mblk_t *mp;
2822 2822 sotpi_info_t *sti = SOTOTPI(so);
2823 2823
2824 2824 ASSERT(MUTEX_HELD(&so->so_lock));
2825 2825
2826 2826 ASSERT(so->so_family == AF_UNIX);
2827 2827
2828 2828 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) !=
2829 2829 (SS_ISCONNECTED|SS_ISBOUND))
2830 2830 return;
2831 2831
2832 2832 dprintso(so, 1, ("so_unix_close(%p) %s\n",
2833 2833 (void *)so, pr_state(so->so_state, so->so_mode)));
2834 2834
2835 2835 toh.level = SOL_SOCKET;
2836 2836 toh.name = SO_UNIX_CLOSE;
2837 2837
2838 2838 /* zero length + header */
2839 2839 toh.len = (t_uscalar_t)sizeof (struct T_opthdr);
2840 2840 toh.status = 0;
2841 2841
2842 2842 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) {
2843 2843 struct T_optdata_req tdr;
2844 2844
2845 2845 tdr.PRIM_type = T_OPTDATA_REQ;
2846 2846 tdr.DATA_flag = 0;
2847 2847
2848 2848 tdr.OPT_length = (t_scalar_t)sizeof (toh);
2849 2849 tdr.OPT_offset = (t_scalar_t)sizeof (tdr);
2850 2850
2851 2851 /* NOTE: holding so_lock while sleeping */
2852 2852 mp = soallocproto2(&tdr, sizeof (tdr),
2853 2853 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED());
2854 2854 } else {
2855 2855 struct T_unitdata_req tudr;
2856 2856 void *addr;
2857 2857 socklen_t addrlen;
2858 2858 void *src;
2859 2859 socklen_t srclen;
2860 2860 struct T_opthdr toh2;
2861 2861 t_scalar_t size;
2862 2862
2863 2863 /*
2864 2864 * We know this is an AF_UNIX connected DGRAM socket.
2865 2865 * We therefore already have the destination address
2866 2866 * in the internal form needed for this send. This is
2867 2867 * similar to the sosend_dgram call later in this file
2868 2868 * when there's no user-specified destination address.
2869 2869 */
2870 2870 if (sti->sti_faddr_noxlate) {
2871 2871 /*
2872 2872 * Already have a transport internal address. Do not
2873 2873 * pass any (transport internal) source address.
2874 2874 */
2875 2875 addr = sti->sti_faddr_sa;
2876 2876 addrlen = (t_uscalar_t)sti->sti_faddr_len;
2877 2877 src = NULL;
2878 2878 srclen = 0;
2879 2879 } else {
2880 2880 /*
2881 2881 * Pass the sockaddr_un source address as an option
2882 2882 * and translate the remote address.
2883 2883 * Holding so_lock thus sti_laddr_sa can not change.
2884 2884 */
2885 2885 src = sti->sti_laddr_sa;
2886 2886 srclen = (socklen_t)sti->sti_laddr_len;
2887 2887 dprintso(so, 1,
2888 2888 ("so_ux_close: srclen %d, src %p\n",
2889 2889 srclen, src));
2890 2890 /*
2891 2891 * Use the destination address saved in connect.
2892 2892 */
2893 2893 addr = &sti->sti_ux_faddr;
2894 2894 addrlen = sizeof (sti->sti_ux_faddr);
2895 2895 }
2896 2896 tudr.PRIM_type = T_UNITDATA_REQ;
2897 2897 tudr.DEST_length = addrlen;
2898 2898 tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
2899 2899 if (srclen == 0) {
2900 2900 tudr.OPT_length = (t_scalar_t)sizeof (toh);
2901 2901 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
2902 2902 _TPI_ALIGN_TOPT(addrlen));
2903 2903
2904 2904 size = tudr.OPT_offset + tudr.OPT_length;
2905 2905 /* NOTE: holding so_lock while sleeping */
2906 2906 mp = soallocproto2(&tudr, sizeof (tudr),
2907 2907 addr, addrlen, size, _ALLOC_SLEEP, CRED());
2908 2908 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen);
2909 2909 soappendmsg(mp, &toh, sizeof (toh));
2910 2910 } else {
2911 2911 /*
2912 2912 * There is a AF_UNIX sockaddr_un to include as a
2913 2913 * source address option.
2914 2914 */
2915 2915 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) +
2916 2916 _TPI_ALIGN_TOPT(srclen));
2917 2917 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
2918 2918 _TPI_ALIGN_TOPT(addrlen));
2919 2919
2920 2920 toh2.level = SOL_SOCKET;
2921 2921 toh2.name = SO_SRCADDR;
2922 2922 toh2.len = (t_uscalar_t)(srclen +
2923 2923 sizeof (struct T_opthdr));
2924 2924 toh2.status = 0;
2925 2925
2926 2926 size = tudr.OPT_offset + tudr.OPT_length;
2927 2927
2928 2928 /* NOTE: holding so_lock while sleeping */
2929 2929 mp = soallocproto2(&tudr, sizeof (tudr),
2930 2930 addr, addrlen, size, _ALLOC_SLEEP, CRED());
2931 2931 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
2932 2932 soappendmsg(mp, &toh, sizeof (toh));
2933 2933 soappendmsg(mp, &toh2, sizeof (toh2));
2934 2934 soappendmsg(mp, src, srclen);
2935 2935 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
2936 2936 }
2937 2937 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
2938 2938 }
2939 2939 mutex_exit(&so->so_lock);
2940 2940 (void) kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
2941 2941 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
2942 2942 mutex_enter(&so->so_lock);
2943 2943 }
2944 2944
2945 2945 /*
2946 2946 * Called by sotpi_recvmsg when reading a non-zero amount of data.
2947 2947 * In addition, the caller typically verifies that there is some
2948 2948 * potential state to clear by checking
2949 2949 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK))
2950 2950 * before calling this routine.
2951 2951 * Note that such a check can be made without holding so_lock since
2952 2952 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg
2953 2953 * decrements sti_oobsigcnt.
2954 2954 *
2955 2955 * When data is read *after* the point that all pending
2956 2956 * oob data has been consumed the oob indication is cleared.
2957 2957 *
2958 2958 * This logic keeps select/poll returning POLLRDBAND and
2959 2959 * SIOCATMARK returning true until we have read past
2960 2960 * the mark.
2961 2961 */
2962 2962 static void
2963 2963 sorecv_update_oobstate(struct sonode *so)
2964 2964 {
2965 2965 sotpi_info_t *sti = SOTOTPI(so);
2966 2966
2967 2967 mutex_enter(&so->so_lock);
2968 2968 ASSERT(so_verify_oobstate(so));
2969 2969 dprintso(so, 1,
2970 2970 ("sorecv_update_oobstate: counts %d/%d state %s\n",
2971 2971 sti->sti_oobsigcnt,
2972 2972 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode)));
2973 2973 if (sti->sti_oobsigcnt == 0) {
2974 2974 /* No more pending oob indications */
2975 2975 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK);
2976 2976 freemsg(so->so_oobmsg);
2977 2977 so->so_oobmsg = NULL;
2978 2978 }
2979 2979 ASSERT(so_verify_oobstate(so));
2980 2980 mutex_exit(&so->so_lock);
2981 2981 }
2982 2982
2983 2983 /*
2984 2984 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s).
2985 2985 */
2986 2986 static int
2987 2987 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp)
2988 2988 {
2989 2989 sotpi_info_t *sti = SOTOTPI(so);
2990 2990 int error = 0;
2991 2991 mblk_t *tmp = NULL;
2992 2992 mblk_t *pmp = NULL;
2993 2993 mblk_t *nmp = sti->sti_nl7c_rcv_mp;
2994 2994
2995 2995 ASSERT(nmp != NULL);
2996 2996
2997 2997 while (nmp != NULL && uiop->uio_resid > 0) {
2998 2998 ssize_t n;
2999 2999
3000 3000 if (DB_TYPE(nmp) == M_DATA) {
3001 3001 /*
3002 3002 * We have some data, uiomove up to resid bytes.
3003 3003 */
3004 3004 n = MIN(MBLKL(nmp), uiop->uio_resid);
3005 3005 if (n > 0)
3006 3006 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop);
3007 3007 nmp->b_rptr += n;
3008 3008 if (nmp->b_rptr == nmp->b_wptr) {
3009 3009 pmp = nmp;
3010 3010 nmp = nmp->b_cont;
3011 3011 }
3012 3012 if (error)
3013 3013 break;
3014 3014 } else {
3015 3015 /*
3016 3016 * We only handle data, save for caller to handle.
3017 3017 */
3018 3018 if (pmp != NULL) {
3019 3019 pmp->b_cont = nmp->b_cont;
3020 3020 }
3021 3021 nmp->b_cont = NULL;
3022 3022 if (*rmp == NULL) {
3023 3023 *rmp = nmp;
3024 3024 } else {
3025 3025 tmp->b_cont = nmp;
3026 3026 }
3027 3027 nmp = nmp->b_cont;
3028 3028 tmp = nmp;
3029 3029 }
3030 3030 }
3031 3031 if (pmp != NULL) {
3032 3032 /* Free any mblk_t(s) which we have consumed */
3033 3033 pmp->b_cont = NULL;
3034 3034 freemsg(sti->sti_nl7c_rcv_mp);
3035 3035 }
3036 3036 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) {
3037 3037 /* Last mblk_t so return the saved kstrgetmsg() rval/error */
3038 3038 if (error == 0) {
3039 3039 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval;
3040 3040
3041 3041 error = p->r_v.r_v2;
3042 3042 p->r_v.r_v2 = 0;
3043 3043 }
3044 3044 rp->r_vals = sti->sti_nl7c_rcv_rval;
3045 3045 sti->sti_nl7c_rcv_rval = 0;
3046 3046 } else {
3047 3047 /* More mblk_t(s) to process so no rval to return */
3048 3048 rp->r_vals = 0;
3049 3049 }
3050 3050 return (error);
3051 3051 }
3052 3052 /*
3053 3053 * Receive the next message on the queue.
3054 3054 * If msg_controllen is non-zero when called the caller is interested in
3055 3055 * any received control info (options).
3056 3056 * If msg_namelen is non-zero when called the caller is interested in
3057 3057 * any received source address.
3058 3058 * The routine returns with msg_control and msg_name pointing to
3059 3059 * kmem_alloc'ed memory which the caller has to free.
3060 3060 */
3061 3061 /* ARGSUSED */
3062 3062 int
3063 3063 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
3064 3064 struct cred *cr)
3065 3065 {
3066 3066 union T_primitives *tpr;
3067 3067 mblk_t *mp;
3068 3068 uchar_t pri;
3069 3069 int pflag, opflag;
3070 3070 void *control;
3071 3071 t_uscalar_t controllen;
3072 3072 t_uscalar_t namelen;
3073 3073 int so_state = so->so_state; /* Snapshot */
3074 3074 ssize_t saved_resid;
3075 3075 rval_t rval;
3076 3076 int flags;
3077 3077 clock_t timout;
3078 3078 int error = 0;
3079 3079 sotpi_info_t *sti = SOTOTPI(so);
3080 3080
3081 3081 flags = msg->msg_flags;
3082 3082 msg->msg_flags = 0;
3083 3083
3084 3084 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n",
3085 3085 (void *)so, (void *)msg, flags,
3086 3086 pr_state(so->so_state, so->so_mode), so->so_error));
3087 3087
3088 3088 if (so->so_version == SOV_STREAM) {
3089 3089 so_update_attrs(so, SOACC);
3090 3090 /* The imaginary "sockmod" has been popped - act as a stream */
3091 3091 return (strread(SOTOV(so), uiop, cr));
3092 3092 }
3093 3093
3094 3094 /*
3095 3095 * If we are not connected because we have never been connected
3096 3096 * we return ENOTCONN. If we have been connected (but are no longer
3097 3097 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return
3098 3098 * the EOF.
3099 3099 *
3100 3100 * An alternative would be to post an ENOTCONN error in stream head
3101 3101 * (read+write) and clear it when we're connected. However, that error
3102 3102 * would cause incorrect poll/select behavior!
3103 3103 */
3104 3104 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
3105 3105 (so->so_mode & SM_CONNREQUIRED)) {
3106 3106 return (ENOTCONN);
3107 3107 }
3108 3108
3109 3109 /*
3110 3110 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but
3111 3111 * after checking that the read queue is empty) and returns zero.
3112 3112 * This implementation will sleep (in kstrgetmsg) even if uio_resid
3113 3113 * is zero.
3114 3114 */
3115 3115
3116 3116 if (flags & MSG_OOB) {
3117 3117 /* Check that the transport supports OOB */
3118 3118 if (!(so->so_mode & SM_EXDATA))
3119 3119 return (EOPNOTSUPP);
3120 3120 so_update_attrs(so, SOACC);
3121 3121 return (sorecvoob(so, msg, uiop, flags,
3122 3122 (so->so_options & SO_OOBINLINE)));
3123 3123 }
3124 3124
3125 3125 so_update_attrs(so, SOACC);
3126 3126
3127 3127 /*
3128 3128 * Set msg_controllen and msg_namelen to zero here to make it
3129 3129 * simpler in the cases that no control or name is returned.
3130 3130 */
3131 3131 controllen = msg->msg_controllen;
3132 3132 namelen = msg->msg_namelen;
3133 3133 msg->msg_controllen = 0;
3134 3134 msg->msg_namelen = 0;
3135 3135
3136 3136 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n",
3137 3137 namelen, controllen));
3138 3138
3139 3139 mutex_enter(&so->so_lock);
3140 3140 /*
3141 3141 * If an NL7C enabled socket and not waiting for write data.
3142 3142 */
3143 3143 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) ==
3144 3144 NL7C_ENABLED) {
3145 3145 if (sti->sti_nl7c_uri) {
3146 3146 /* Close uri processing for a previous request */
3147 3147 nl7c_close(so);
3148 3148 }
3149 3149 if ((so_state & SS_CANTRCVMORE) &&
3150 3150 sti->sti_nl7c_rcv_mp == NULL) {
3151 3151 /* Nothing to process, EOF */
3152 3152 mutex_exit(&so->so_lock);
3153 3153 return (0);
3154 3154 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
3155 3155 /* Persistent NL7C socket, try to process request */
3156 3156 boolean_t ret;
3157 3157
3158 3158 ret = nl7c_process(so,
3159 3159 (so->so_state & (SS_NONBLOCK|SS_NDELAY)));
3160 3160 rval.r_vals = sti->sti_nl7c_rcv_rval;
3161 3161 error = rval.r_v.r_v2;
3162 3162 if (error) {
3163 3163 /* Error of some sort, return it */
3164 3164 mutex_exit(&so->so_lock);
3165 3165 return (error);
3166 3166 }
3167 3167 if (sti->sti_nl7c_flags &&
3168 3168 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) {
3169 3169 /*
3170 3170 * Still an NL7C socket and no data
3171 3171 * to pass up to the caller.
3172 3172 */
3173 3173 mutex_exit(&so->so_lock);
3174 3174 if (ret) {
3175 3175 /* EOF */
3176 3176 return (0);
3177 3177 } else {
3178 3178 /* Need more data */
3179 3179 return (EAGAIN);
3180 3180 }
3181 3181 }
3182 3182 } else {
3183 3183 /*
3184 3184 * Not persistent so no further NL7C processing.
3185 3185 */
3186 3186 sti->sti_nl7c_flags = 0;
3187 3187 }
3188 3188 }
3189 3189 /*
3190 3190 * Only one reader is allowed at any given time. This is needed
3191 3191 * for T_EXDATA handling and, in the future, MSG_WAITALL.
3192 3192 *
3193 3193 * This is slightly different that BSD behavior in that it fails with
3194 3194 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access
3195 3195 * is single-threaded using sblock(), which is dropped while waiting
3196 3196 * for data to appear. The difference shows up e.g. if one
3197 3197 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor
3198 3198 * does use nonblocking io and different threads are reading each
3199 3199 * file descriptor. In BSD there would never be an EWOULDBLOCK error
3200 3200 * in this case as long as the read queue doesn't get empty.
3201 3201 * In this implementation the thread using nonblocking io can
3202 3202 * get an EWOULDBLOCK error due to the blocking thread executing
3203 3203 * e.g. in the uiomove in kstrgetmsg.
3204 3204 * This difference is not believed to be significant.
3205 3205 */
3206 3206 /* Set SOREADLOCKED */
3207 3207 error = so_lock_read_intr(so,
3208 3208 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
3209 3209 mutex_exit(&so->so_lock);
3210 3210 if (error)
3211 3211 return (error);
3212 3212
3213 3213 /*
3214 3214 * Tell kstrgetmsg to not inspect the stream head errors until all
3215 3215 * queued data has been consumed.
3216 3216 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set.
3217 3217 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block.
3218 3218 *
3219 3219 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and
3220 3220 * to T_OPTDATA_IND that do not contain any user-visible control msg.
3221 3221 * Note that MSG_WAITALL set with MSG_PEEK is a noop.
3222 3222 */
3223 3223 pflag = MSG_ANY | MSG_DELAYERROR;
3224 3224 if (flags & MSG_PEEK) {
3225 3225 pflag |= MSG_IPEEK;
3226 3226 flags &= ~MSG_WAITALL;
3227 3227 }
3228 3228 if (so->so_mode & SM_ATOMIC)
3229 3229 pflag |= MSG_DISCARDTAIL;
3230 3230
3231 3231 if (flags & MSG_DONTWAIT)
3232 3232 timout = 0;
3233 3233 else if (so->so_rcvtimeo != 0)
3234 3234 timout = TICK_TO_MSEC(so->so_rcvtimeo);
3235 3235 else
3236 3236 timout = -1;
3237 3237 opflag = pflag;
3238 3238 retry:
3239 3239 saved_resid = uiop->uio_resid;
3240 3240 pri = 0;
3241 3241 mp = NULL;
3242 3242 if (sti->sti_nl7c_rcv_mp != NULL) {
3243 3243 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */
3244 3244 error = nl7c_sorecv(so, &mp, uiop, &rval);
3245 3245 } else {
3246 3246 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag,
3247 3247 timout, &rval);
3248 3248 }
3249 3249 if (error != 0) {
3250 3250 /* kstrgetmsg returns ETIME when timeout expires */
3251 3251 if (error == ETIME)
3252 3252 error = EWOULDBLOCK;
3253 3253 goto out;
3254 3254 }
3255 3255 /*
3256 3256 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
3257 3257 * For non-datagrams MOREDATA is used to set MSG_EOR.
3258 3258 */
3259 3259 ASSERT(!(rval.r_val1 & MORECTL));
3260 3260 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
3261 3261 msg->msg_flags |= MSG_TRUNC;
3262 3262
3263 3263 if (mp == NULL) {
3264 3264 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n"));
3265 3265 /*
3266 3266 * 4.3BSD and 4.4BSD clears the mark when peeking across it.
3267 3267 * The draft Posix socket spec states that the mark should
3268 3268 * not be cleared when peeking. We follow the latter.
3269 3269 */
3270 3270 if ((so->so_state &
3271 3271 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
3272 3272 (uiop->uio_resid != saved_resid) &&
3273 3273 !(flags & MSG_PEEK)) {
3274 3274 sorecv_update_oobstate(so);
3275 3275 }
3276 3276
3277 3277 mutex_enter(&so->so_lock);
3278 3278 /* Set MSG_EOR based on MOREDATA */
3279 3279 if (!(rval.r_val1 & MOREDATA)) {
3280 3280 if (so->so_state & SS_SAVEDEOR) {
3281 3281 msg->msg_flags |= MSG_EOR;
3282 3282 so->so_state &= ~SS_SAVEDEOR;
3283 3283 }
3284 3284 }
3285 3285 /*
3286 3286 * If some data was received (i.e. not EOF) and the
3287 3287 * read/recv* has not been satisfied wait for some more.
3288 3288 */
3289 3289 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
3290 3290 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
3291 3291 mutex_exit(&so->so_lock);
3292 3292 pflag = opflag | MSG_NOMARK;
3293 3293 goto retry;
3294 3294 }
3295 3295 goto out_locked;
3296 3296 }
3297 3297
3298 3298 /* strsock_proto has already verified length and alignment */
3299 3299 tpr = (union T_primitives *)mp->b_rptr;
3300 3300 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type));
3301 3301
3302 3302 switch (tpr->type) {
3303 3303 case T_DATA_IND: {
3304 3304 if ((so->so_state &
3305 3305 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
3306 3306 (uiop->uio_resid != saved_resid) &&
3307 3307 !(flags & MSG_PEEK)) {
3308 3308 sorecv_update_oobstate(so);
3309 3309 }
3310 3310
3311 3311 /*
3312 3312 * Set msg_flags to MSG_EOR based on
3313 3313 * MORE_flag and MOREDATA.
3314 3314 */
3315 3315 mutex_enter(&so->so_lock);
3316 3316 so->so_state &= ~SS_SAVEDEOR;
3317 3317 if (!(tpr->data_ind.MORE_flag & 1)) {
3318 3318 if (!(rval.r_val1 & MOREDATA))
3319 3319 msg->msg_flags |= MSG_EOR;
3320 3320 else
3321 3321 so->so_state |= SS_SAVEDEOR;
3322 3322 }
3323 3323 freemsg(mp);
3324 3324 /*
3325 3325 * If some data was received (i.e. not EOF) and the
3326 3326 * read/recv* has not been satisfied wait for some more.
3327 3327 */
3328 3328 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
3329 3329 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
3330 3330 mutex_exit(&so->so_lock);
3331 3331 pflag = opflag | MSG_NOMARK;
3332 3332 goto retry;
3333 3333 }
3334 3334 goto out_locked;
3335 3335 }
3336 3336 case T_UNITDATA_IND: {
3337 3337 void *addr;
3338 3338 t_uscalar_t addrlen;
3339 3339 void *abuf;
3340 3340 t_uscalar_t optlen;
3341 3341 void *opt;
3342 3342
3343 3343 if ((so->so_state &
3344 3344 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
3345 3345 (uiop->uio_resid != saved_resid) &&
3346 3346 !(flags & MSG_PEEK)) {
3347 3347 sorecv_update_oobstate(so);
3348 3348 }
3349 3349
3350 3350 if (namelen != 0) {
3351 3351 /* Caller wants source address */
3352 3352 addrlen = tpr->unitdata_ind.SRC_length;
3353 3353 addr = sogetoff(mp,
3354 3354 tpr->unitdata_ind.SRC_offset,
3355 3355 addrlen, 1);
3356 3356 if (addr == NULL) {
3357 3357 freemsg(mp);
3358 3358 error = EPROTO;
3359 3359 eprintsoline(so, error);
3360 3360 goto out;
3361 3361 }
3362 3362 if (so->so_family == AF_UNIX) {
3363 3363 /*
3364 3364 * Can not use the transport level address.
3365 3365 * If there is a SO_SRCADDR option carrying
3366 3366 * the socket level address it will be
3367 3367 * extracted below.
3368 3368 */
3369 3369 addr = NULL;
3370 3370 addrlen = 0;
3371 3371 }
3372 3372 }
3373 3373 optlen = tpr->unitdata_ind.OPT_length;
3374 3374 if (optlen != 0) {
3375 3375 t_uscalar_t ncontrollen;
3376 3376
3377 3377 /*
3378 3378 * Extract any source address option.
3379 3379 * Determine how large cmsg buffer is needed.
3380 3380 */
3381 3381 opt = sogetoff(mp,
3382 3382 tpr->unitdata_ind.OPT_offset,
3383 3383 optlen, __TPI_ALIGN_SIZE);
3384 3384
3385 3385 if (opt == NULL) {
3386 3386 freemsg(mp);
3387 3387 error = EPROTO;
3388 3388 eprintsoline(so, error);
3389 3389 goto out;
3390 3390 }
3391 3391 if (so->so_family == AF_UNIX)
3392 3392 so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
3393 3393 ncontrollen = so_cmsglen(mp, opt, optlen,
3394 3394 !(flags & MSG_XPG4_2));
3395 3395 if (controllen != 0)
3396 3396 controllen = ncontrollen;
3397 3397 else if (ncontrollen != 0)
3398 3398 msg->msg_flags |= MSG_CTRUNC;
3399 3399 } else {
3400 3400 controllen = 0;
3401 3401 }
3402 3402
3403 3403 if (namelen != 0) {
3404 3404 /*
3405 3405 * Return address to caller.
3406 3406 * Caller handles truncation if length
3407 3407 * exceeds msg_namelen.
3408 3408 * NOTE: AF_UNIX NUL termination is ensured by
3409 3409 * the sender's copyin_name().
3410 3410 */
3411 3411 abuf = kmem_alloc(addrlen, KM_SLEEP);
3412 3412
3413 3413 bcopy(addr, abuf, addrlen);
3414 3414 msg->msg_name = abuf;
3415 3415 msg->msg_namelen = addrlen;
3416 3416 }
3417 3417
3418 3418 if (controllen != 0) {
3419 3419 /*
3420 3420 * Return control msg to caller.
3421 3421 * Caller handles truncation if length
3422 3422 * exceeds msg_controllen.
3423 3423 */
3424 3424 control = kmem_zalloc(controllen, KM_SLEEP);
3425 3425
3426 3426 error = so_opt2cmsg(mp, opt, optlen,
3427 3427 !(flags & MSG_XPG4_2),
3428 3428 control, controllen);
3429 3429 if (error) {
3430 3430 freemsg(mp);
3431 3431 if (msg->msg_namelen != 0)
3432 3432 kmem_free(msg->msg_name,
3433 3433 msg->msg_namelen);
3434 3434 kmem_free(control, controllen);
3435 3435 eprintsoline(so, error);
3436 3436 goto out;
3437 3437 }
3438 3438 msg->msg_control = control;
3439 3439 msg->msg_controllen = controllen;
3440 3440 }
3441 3441
3442 3442 freemsg(mp);
3443 3443 goto out;
3444 3444 }
3445 3445 case T_OPTDATA_IND: {
3446 3446 struct T_optdata_req *tdr;
3447 3447 void *opt;
3448 3448 t_uscalar_t optlen;
3449 3449
3450 3450 if ((so->so_state &
3451 3451 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
3452 3452 (uiop->uio_resid != saved_resid) &&
3453 3453 !(flags & MSG_PEEK)) {
3454 3454 sorecv_update_oobstate(so);
3455 3455 }
3456 3456
3457 3457 tdr = (struct T_optdata_req *)mp->b_rptr;
3458 3458 optlen = tdr->OPT_length;
3459 3459 if (optlen != 0) {
3460 3460 t_uscalar_t ncontrollen;
3461 3461 /*
3462 3462 * Determine how large cmsg buffer is needed.
3463 3463 */
3464 3464 opt = sogetoff(mp,
3465 3465 tpr->optdata_ind.OPT_offset,
3466 3466 optlen, __TPI_ALIGN_SIZE);
3467 3467
3468 3468 if (opt == NULL) {
3469 3469 freemsg(mp);
3470 3470 error = EPROTO;
3471 3471 eprintsoline(so, error);
3472 3472 goto out;
3473 3473 }
3474 3474
3475 3475 ncontrollen = so_cmsglen(mp, opt, optlen,
3476 3476 !(flags & MSG_XPG4_2));
3477 3477 if (controllen != 0)
3478 3478 controllen = ncontrollen;
3479 3479 else if (ncontrollen != 0)
3480 3480 msg->msg_flags |= MSG_CTRUNC;
3481 3481 } else {
3482 3482 controllen = 0;
3483 3483 }
3484 3484
3485 3485 if (controllen != 0) {
3486 3486 /*
3487 3487 * Return control msg to caller.
3488 3488 * Caller handles truncation if length
3489 3489 * exceeds msg_controllen.
3490 3490 */
3491 3491 control = kmem_zalloc(controllen, KM_SLEEP);
3492 3492
3493 3493 error = so_opt2cmsg(mp, opt, optlen,
3494 3494 !(flags & MSG_XPG4_2),
3495 3495 control, controllen);
3496 3496 if (error) {
3497 3497 freemsg(mp);
3498 3498 kmem_free(control, controllen);
3499 3499 eprintsoline(so, error);
3500 3500 goto out;
3501 3501 }
3502 3502 msg->msg_control = control;
3503 3503 msg->msg_controllen = controllen;
3504 3504 }
3505 3505
3506 3506 /*
3507 3507 * Set msg_flags to MSG_EOR based on
3508 3508 * DATA_flag and MOREDATA.
3509 3509 */
3510 3510 mutex_enter(&so->so_lock);
3511 3511 so->so_state &= ~SS_SAVEDEOR;
3512 3512 if (!(tpr->data_ind.MORE_flag & 1)) {
3513 3513 if (!(rval.r_val1 & MOREDATA))
3514 3514 msg->msg_flags |= MSG_EOR;
3515 3515 else
3516 3516 so->so_state |= SS_SAVEDEOR;
3517 3517 }
3518 3518 freemsg(mp);
3519 3519 /*
3520 3520 * If some data was received (i.e. not EOF) and the
3521 3521 * read/recv* has not been satisfied wait for some more.
3522 3522 * Not possible to wait if control info was received.
3523 3523 */
3524 3524 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
3525 3525 controllen == 0 &&
3526 3526 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
3527 3527 mutex_exit(&so->so_lock);
3528 3528 pflag = opflag | MSG_NOMARK;
3529 3529 goto retry;
3530 3530 }
3531 3531 goto out_locked;
3532 3532 }
3533 3533 case T_EXDATA_IND: {
3534 3534 dprintso(so, 1,
3535 3535 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld "
3536 3536 "state %s\n",
3537 3537 sti->sti_oobsigcnt, sti->sti_oobcnt,
3538 3538 saved_resid - uiop->uio_resid,
3539 3539 pr_state(so->so_state, so->so_mode)));
3540 3540 /*
3541 3541 * kstrgetmsg handles MSGMARK so there is nothing to
3542 3542 * inspect in the T_EXDATA_IND.
3543 3543 * strsock_proto makes the stream head queue the T_EXDATA_IND
3544 3544 * as a separate message with no M_DATA component. Furthermore,
3545 3545 * the stream head does not consolidate M_DATA messages onto
3546 3546 * an MSGMARK'ed message ensuring that the T_EXDATA_IND
3547 3547 * remains a message by itself. This is needed since MSGMARK
3548 3548 * marks both the whole message as well as the last byte
3549 3549 * of the message.
3550 3550 */
3551 3551 freemsg(mp);
3552 3552 ASSERT(uiop->uio_resid == saved_resid); /* No data */
3553 3553 if (flags & MSG_PEEK) {
3554 3554 /*
3555 3555 * Even though we are peeking we consume the
3556 3556 * T_EXDATA_IND thereby moving the mark information
3557 3557 * to SS_RCVATMARK. Then the oob code below will
3558 3558 * retry the peeking kstrgetmsg.
3559 3559 * Note that the stream head read queue is
3560 3560 * never flushed without holding SOREADLOCKED
3561 3561 * thus the T_EXDATA_IND can not disappear
3562 3562 * underneath us.
3563 3563 */
3564 3564 dprintso(so, 1,
3565 3565 ("sotpi_recvmsg: consume EXDATA_IND "
3566 3566 "counts %d/%d state %s\n",
3567 3567 sti->sti_oobsigcnt,
3568 3568 sti->sti_oobcnt,
3569 3569 pr_state(so->so_state, so->so_mode)));
3570 3570
3571 3571 pflag = MSG_ANY | MSG_DELAYERROR;
3572 3572 if (so->so_mode & SM_ATOMIC)
3573 3573 pflag |= MSG_DISCARDTAIL;
3574 3574
3575 3575 pri = 0;
3576 3576 mp = NULL;
3577 3577
3578 3578 error = kstrgetmsg(SOTOV(so), &mp, uiop,
3579 3579 &pri, &pflag, (clock_t)-1, &rval);
3580 3580 ASSERT(uiop->uio_resid == saved_resid);
3581 3581
3582 3582 if (error) {
3583 3583 #ifdef SOCK_DEBUG
3584 3584 if (error != EWOULDBLOCK && error != EINTR) {
3585 3585 eprintsoline(so, error);
3586 3586 }
3587 3587 #endif /* SOCK_DEBUG */
3588 3588 goto out;
3589 3589 }
3590 3590 ASSERT(mp);
3591 3591 tpr = (union T_primitives *)mp->b_rptr;
3592 3592 ASSERT(tpr->type == T_EXDATA_IND);
3593 3593 freemsg(mp);
3594 3594 } /* end "if (flags & MSG_PEEK)" */
3595 3595
3596 3596 /*
3597 3597 * Decrement the number of queued and pending oob.
3598 3598 *
3599 3599 * SS_RCVATMARK is cleared when we read past a mark.
3600 3600 * SS_HAVEOOBDATA is cleared when we've read past the
3601 3601 * last mark.
3602 3602 * SS_OOBPEND is cleared if we've read past the last
3603 3603 * mark and no (new) SIGURG has been posted.
3604 3604 */
3605 3605 mutex_enter(&so->so_lock);
3606 3606 ASSERT(so_verify_oobstate(so));
3607 3607 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
3608 3608 ASSERT(sti->sti_oobsigcnt > 0);
3609 3609 sti->sti_oobsigcnt--;
3610 3610 ASSERT(sti->sti_oobcnt > 0);
3611 3611 sti->sti_oobcnt--;
3612 3612 /*
3613 3613 * Since the T_EXDATA_IND has been removed from the stream
3614 3614 * head, but we have not read data past the mark,
3615 3615 * sockfs needs to track that the socket is still at the mark.
3616 3616 *
3617 3617 * Since no data was received call kstrgetmsg again to wait
3618 3618 * for data.
3619 3619 */
3620 3620 so->so_state |= SS_RCVATMARK;
3621 3621 mutex_exit(&so->so_lock);
3622 3622 dprintso(so, 1,
3623 3623 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n",
3624 3624 sti->sti_oobsigcnt, sti->sti_oobcnt,
3625 3625 pr_state(so->so_state, so->so_mode)));
3626 3626 pflag = opflag;
3627 3627 goto retry;
3628 3628 }
3629 3629 default:
3630 3630 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n",
3631 3631 (void *)so, tpr->type, (void *)mp);
3632 3632 ASSERT(0);
3633 3633 freemsg(mp);
3634 3634 error = EPROTO;
3635 3635 eprintsoline(so, error);
3636 3636 goto out;
3637 3637 }
3638 3638 /* NOTREACHED */
3639 3639 out:
3640 3640 mutex_enter(&so->so_lock);
3641 3641 out_locked:
3642 3642 so_unlock_read(so); /* Clear SOREADLOCKED */
3643 3643 mutex_exit(&so->so_lock);
3644 3644 return (error);
3645 3645 }
3646 3646
3647 3647 /*
3648 3648 * Sending data with options on a datagram socket.
3649 3649 * Assumes caller has verified that SS_ISBOUND etc. are set.
3650 3650 *
3651 3651 * For AF_UNIX the destination address may be already in
3652 3652 * internal form, as indicated by sti->sti_faddr_noxlate
3653 3653 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to
3654 3654 * translate the destination address to internal form.
3655 3655 *
3656 3656 * The source address is passed as an option. If passing
3657 3657 * file descriptors, those are passed as file pointers in
3658 3658 * another option.
3659 3659 */
3660 3660 static int
3661 3661 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen,
3662 3662 struct uio *uiop, void *control, t_uscalar_t controllen, int flags)
3663 3663 {
3664 3664 struct T_unitdata_req tudr;
3665 3665 mblk_t *mp;
3666 3666 int error;
3667 3667 void *addr;
3668 3668 socklen_t addrlen;
3669 3669 void *src;
3670 3670 socklen_t srclen;
3671 3671 ssize_t len;
3672 3672 int size;
3673 3673 struct T_opthdr toh;
3674 3674 struct fdbuf *fdbuf;
3675 3675 t_uscalar_t optlen;
3676 3676 void *fds;
3677 3677 int fdlen;
3678 3678 sotpi_info_t *sti = SOTOTPI(so);
3679 3679
3680 3680 ASSERT(name && namelen);
3681 3681 ASSERT(control && controllen);
3682 3682
3683 3683 len = uiop->uio_resid;
3684 3684 if (len > (ssize_t)sti->sti_tidu_size) {
3685 3685 return (EMSGSIZE);
3686 3686 }
3687 3687
3688 3688 if (sti->sti_faddr_noxlate == 0 &&
3689 3689 (flags & MSG_SENDTO_NOXLATE) == 0) {
3690 3690 /*
3691 3691 * Length and family checks.
3692 3692 * Don't verify internal form.
3693 3693 */
3694 3694 error = so_addr_verify(so, name, namelen);
3695 3695 if (error) {
3696 3696 eprintsoline(so, error);
3697 3697 return (error);
3698 3698 }
3699 3699 }
3700 3700
3701 3701 if (so->so_family == AF_UNIX) {
3702 3702 if (sti->sti_faddr_noxlate) {
3703 3703 /*
3704 3704 * Already have a transport internal address. Do not
3705 3705 * pass any (transport internal) source address.
3706 3706 */
3707 3707 addr = name;
3708 3708 addrlen = namelen;
3709 3709 src = NULL;
3710 3710 srclen = 0;
3711 3711 } else if (flags & MSG_SENDTO_NOXLATE) {
3712 3712 /*
3713 3713 * Have an internal form dest. address.
3714 3714 * Pass the source address as usual.
3715 3715 */
3716 3716 addr = name;
3717 3717 addrlen = namelen;
3718 3718 src = sti->sti_laddr_sa;
3719 3719 srclen = (socklen_t)sti->sti_laddr_len;
3720 3720 } else {
3721 3721 /*
3722 3722 * Pass the sockaddr_un source address as an option
3723 3723 * and translate the remote address.
3724 3724 *
3725 3725 * Note that this code does not prevent sti_laddr_sa
3726 3726 * from changing while it is being used. Thus
3727 3727 * if an unbind+bind occurs concurrently with this
3728 3728 * send the peer might see a partially new and a
3729 3729 * partially old "from" address.
3730 3730 */
3731 3731 src = sti->sti_laddr_sa;
3732 3732 srclen = (socklen_t)sti->sti_laddr_len;
3733 3733 dprintso(so, 1,
3734 3734 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n",
3735 3735 srclen, src));
3736 3736 /*
3737 3737 * The sendmsg caller specified a destination
3738 3738 * address, which we must translate into our
3739 3739 * internal form. addr = &sti->sti_ux_taddr
3740 3740 */
3741 3741 error = so_ux_addr_xlate(so, name, namelen,
3742 3742 (flags & MSG_XPG4_2),
3743 3743 &addr, &addrlen);
3744 3744 if (error) {
3745 3745 eprintsoline(so, error);
3746 3746 return (error);
3747 3747 }
3748 3748 }
3749 3749 } else {
3750 3750 addr = name;
3751 3751 addrlen = namelen;
3752 3752 src = NULL;
3753 3753 srclen = 0;
3754 3754 }
3755 3755 optlen = so_optlen(control, controllen,
3756 3756 !(flags & MSG_XPG4_2));
3757 3757 tudr.PRIM_type = T_UNITDATA_REQ;
3758 3758 tudr.DEST_length = addrlen;
3759 3759 tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
3760 3760 if (srclen != 0)
3761 3761 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) +
3762 3762 _TPI_ALIGN_TOPT(srclen));
3763 3763 else
3764 3764 tudr.OPT_length = optlen;
3765 3765 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
3766 3766 _TPI_ALIGN_TOPT(addrlen));
3767 3767
3768 3768 size = tudr.OPT_offset + tudr.OPT_length;
3769 3769
3770 3770 /*
3771 3771 * File descriptors only when SM_FDPASSING set.
3772 3772 */
3773 3773 error = so_getfdopt(control, controllen,
3774 3774 !(flags & MSG_XPG4_2), &fds, &fdlen);
3775 3775 if (error)
3776 3776 return (error);
3777 3777 if (fdlen != -1) {
3778 3778 if (!(so->so_mode & SM_FDPASSING))
3779 3779 return (EOPNOTSUPP);
3780 3780
3781 3781 error = fdbuf_create(fds, fdlen, &fdbuf);
3782 3782 if (error)
3783 3783 return (error);
3784 3784
3785 3785 /*
3786 3786 * Pre-allocate enough additional space for lower level modules
3787 3787 * to append an option (e.g. see tl_unitdata). The following
3788 3788 * is enough extra space for the largest option we might append.
3789 3789 */
3790 3790 size += sizeof (struct T_opthdr) + ucredsize;
3791 3791 mp = fdbuf_allocmsg(size, fdbuf);
3792 3792 } else {
3793 3793 mp = soallocproto(size, _ALLOC_INTR, CRED());
3794 3794 if (mp == NULL) {
3795 3795 /*
3796 3796 * Caught a signal waiting for memory.
3797 3797 * Let send* return EINTR.
3798 3798 */
3799 3799 return (EINTR);
3800 3800 }
3801 3801 }
3802 3802 soappendmsg(mp, &tudr, sizeof (tudr));
3803 3803 soappendmsg(mp, addr, addrlen);
3804 3804 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
3805 3805
3806 3806 if (fdlen != -1) {
3807 3807 ASSERT(fdbuf != NULL);
3808 3808 toh.level = SOL_SOCKET;
3809 3809 toh.name = SO_FILEP;
3810 3810 toh.len = fdbuf->fd_size +
3811 3811 (t_uscalar_t)sizeof (struct T_opthdr);
3812 3812 toh.status = 0;
3813 3813 soappendmsg(mp, &toh, sizeof (toh));
3814 3814 soappendmsg(mp, fdbuf, fdbuf->fd_size);
3815 3815 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
3816 3816 }
3817 3817 if (srclen != 0) {
3818 3818 /*
3819 3819 * There is a AF_UNIX sockaddr_un to include as a source
3820 3820 * address option.
3821 3821 */
3822 3822 toh.level = SOL_SOCKET;
3823 3823 toh.name = SO_SRCADDR;
3824 3824 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
3825 3825 toh.status = 0;
3826 3826 soappendmsg(mp, &toh, sizeof (toh));
3827 3827 soappendmsg(mp, src, srclen);
3828 3828 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
3829 3829 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
3830 3830 }
3831 3831 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
3832 3832 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp);
3833 3833 /*
3834 3834 * Normally at most 3 bytes left in the message, but we might have
3835 3835 * allowed for extra space if we're passing fd's through.
3836 3836 */
3837 3837 ASSERT(MBLKL(mp) <= (ssize_t)size);
3838 3838
3839 3839 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
3840 3840 if (AU_AUDITING())
3841 3841 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
3842 3842
3843 3843 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
3844 3844 #ifdef SOCK_DEBUG
3845 3845 if (error) {
3846 3846 eprintsoline(so, error);
3847 3847 }
3848 3848 #endif /* SOCK_DEBUG */
3849 3849 return (error);
3850 3850 }
3851 3851
3852 3852 /*
3853 3853 * Sending data with options on a connected stream socket.
3854 3854 * Assumes caller has verified that SS_ISCONNECTED is set.
3855 3855 */
3856 3856 static int
3857 3857 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control,
3858 3858 t_uscalar_t controllen, int flags)
3859 3859 {
3860 3860 struct T_optdata_req tdr;
3861 3861 mblk_t *mp;
3862 3862 int error;
3863 3863 ssize_t iosize;
3864 3864 int size;
3865 3865 struct fdbuf *fdbuf;
3866 3866 t_uscalar_t optlen;
3867 3867 void *fds;
3868 3868 int fdlen;
3869 3869 struct T_opthdr toh;
3870 3870 sotpi_info_t *sti = SOTOTPI(so);
3871 3871
3872 3872 dprintso(so, 1,
3873 3873 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid));
3874 3874
3875 3875 /*
3876 3876 * Has to be bound and connected. However, since no locks are
3877 3877 * held the state could have changed after sotpi_sendmsg checked it
3878 3878 * thus it is not possible to ASSERT on the state.
3879 3879 */
3880 3880
3881 3881 /* Options on connection-oriented only when SM_OPTDATA set. */
3882 3882 if (!(so->so_mode & SM_OPTDATA))
3883 3883 return (EOPNOTSUPP);
3884 3884
3885 3885 do {
3886 3886 /*
3887 3887 * Set the MORE flag if uio_resid does not fit in this
3888 3888 * message or if the caller passed in "more".
3889 3889 * Error for transports with zero tidu_size.
3890 3890 */
3891 3891 tdr.PRIM_type = T_OPTDATA_REQ;
3892 3892 iosize = sti->sti_tidu_size;
3893 3893 if (iosize <= 0)
3894 3894 return (EMSGSIZE);
3895 3895 if (uiop->uio_resid > iosize) {
3896 3896 tdr.DATA_flag = 1;
3897 3897 } else {
3898 3898 if (more)
3899 3899 tdr.DATA_flag = 1;
3900 3900 else
3901 3901 tdr.DATA_flag = 0;
3902 3902 iosize = uiop->uio_resid;
3903 3903 }
3904 3904 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n",
3905 3905 tdr.DATA_flag, iosize));
3906 3906
3907 3907 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2));
3908 3908 tdr.OPT_length = optlen;
3909 3909 tdr.OPT_offset = (t_scalar_t)sizeof (tdr);
3910 3910
3911 3911 size = (int)sizeof (tdr) + optlen;
3912 3912 /*
3913 3913 * File descriptors only when SM_FDPASSING set.
3914 3914 */
3915 3915 error = so_getfdopt(control, controllen,
3916 3916 !(flags & MSG_XPG4_2), &fds, &fdlen);
3917 3917 if (error)
3918 3918 return (error);
3919 3919 if (fdlen != -1) {
3920 3920 if (!(so->so_mode & SM_FDPASSING))
3921 3921 return (EOPNOTSUPP);
3922 3922
3923 3923 error = fdbuf_create(fds, fdlen, &fdbuf);
3924 3924 if (error)
3925 3925 return (error);
3926 3926
3927 3927 /*
3928 3928 * Pre-allocate enough additional space for lower level
3929 3929 * modules to append an option (e.g. see tl_unitdata).
3930 3930 * The following is enough extra space for the largest
3931 3931 * option we might append.
3932 3932 */
3933 3933 size += sizeof (struct T_opthdr) + ucredsize;
3934 3934 mp = fdbuf_allocmsg(size, fdbuf);
3935 3935 } else {
3936 3936 mp = soallocproto(size, _ALLOC_INTR, CRED());
3937 3937 if (mp == NULL) {
3938 3938 /*
3939 3939 * Caught a signal waiting for memory.
3940 3940 * Let send* return EINTR.
3941 3941 */
3942 3942 return (EINTR);
3943 3943 }
3944 3944 }
3945 3945 soappendmsg(mp, &tdr, sizeof (tdr));
3946 3946
3947 3947 if (fdlen != -1) {
3948 3948 ASSERT(fdbuf != NULL);
3949 3949 toh.level = SOL_SOCKET;
3950 3950 toh.name = SO_FILEP;
3951 3951 toh.len = fdbuf->fd_size +
3952 3952 (t_uscalar_t)sizeof (struct T_opthdr);
3953 3953 toh.status = 0;
3954 3954 soappendmsg(mp, &toh, sizeof (toh));
3955 3955 soappendmsg(mp, fdbuf, fdbuf->fd_size);
3956 3956 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
3957 3957 }
3958 3958 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp);
3959 3959 /*
3960 3960 * Normally at most 3 bytes left in the message, but we might
3961 3961 * have allowed for extra space if we're passing fd's through.
3962 3962 */
3963 3963 ASSERT(MBLKL(mp) <= (ssize_t)size);
3964 3964
3965 3965 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
3966 3966
3967 3967 error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
3968 3968 0, MSG_BAND, 0);
3969 3969 if (error) {
3970 3970 eprintsoline(so, error);
3971 3971 return (error);
3972 3972 }
3973 3973 control = NULL;
3974 3974 if (uiop->uio_resid > 0) {
3975 3975 /*
3976 3976 * Recheck for fatal errors. Fail write even though
3977 3977 * some data have been written. This is consistent
3978 3978 * with strwrite semantics and BSD sockets semantics.
3979 3979 */
3980 3980 if (so->so_state & SS_CANTSENDMORE) {
3981 3981 eprintsoline(so, error);
3982 3982 return (EPIPE);
3983 3983 }
3984 3984 if (so->so_error != 0) {
3985 3985 mutex_enter(&so->so_lock);
3986 3986 error = sogeterr(so, B_TRUE);
3987 3987 mutex_exit(&so->so_lock);
3988 3988 if (error != 0) {
3989 3989 eprintsoline(so, error);
3990 3990 return (error);
3991 3991 }
3992 3992 }
3993 3993 }
3994 3994 } while (uiop->uio_resid > 0);
3995 3995 return (0);
3996 3996 }
3997 3997
3998 3998 /*
3999 3999 * Sending data on a datagram socket.
4000 4000 * Assumes caller has verified that SS_ISBOUND etc. are set.
4001 4001 *
4002 4002 * For AF_UNIX the destination address may be already in
4003 4003 * internal form, as indicated by sti->sti_faddr_noxlate
4004 4004 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to
4005 4005 * translate the destination address to internal form.
4006 4006 *
4007 4007 * The source address is passed as an option.
4008 4008 */
4009 4009 int
4010 4010 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen,
4011 4011 struct uio *uiop, int flags)
4012 4012 {
4013 4013 struct T_unitdata_req tudr;
4014 4014 mblk_t *mp;
4015 4015 int error;
4016 4016 void *addr;
4017 4017 socklen_t addrlen;
4018 4018 void *src;
4019 4019 socklen_t srclen;
4020 4020 ssize_t len;
4021 4021 sotpi_info_t *sti = SOTOTPI(so);
4022 4022
4023 4023 ASSERT(name != NULL && namelen != 0);
4024 4024
4025 4025 len = uiop->uio_resid;
4026 4026 if (len > sti->sti_tidu_size) {
4027 4027 error = EMSGSIZE;
4028 4028 goto done;
4029 4029 }
4030 4030
4031 4031 if (sti->sti_faddr_noxlate == 0 &&
4032 4032 (flags & MSG_SENDTO_NOXLATE) == 0) {
4033 4033 /*
4034 4034 * Length and family checks.
4035 4035 * Don't verify internal form.
4036 4036 */
4037 4037 error = so_addr_verify(so, name, namelen);
4038 4038 if (error != 0)
4039 4039 goto done;
4040 4040 }
4041 4041
4042 4042 if (sti->sti_direct) /* Never on AF_UNIX */
4043 4043 return (sodgram_direct(so, name, namelen, uiop, flags));
4044 4044
4045 4045 if (so->so_family == AF_UNIX) {
4046 4046 if (sti->sti_faddr_noxlate) {
4047 4047 /*
4048 4048 * Already have a transport internal address. Do not
4049 4049 * pass any (transport internal) source address.
4050 4050 */
4051 4051 addr = name;
4052 4052 addrlen = namelen;
4053 4053 src = NULL;
4054 4054 srclen = 0;
4055 4055 } else if (flags & MSG_SENDTO_NOXLATE) {
4056 4056 /*
4057 4057 * Have an internal form dest. address.
4058 4058 * Pass the source address as usual.
4059 4059 */
4060 4060 addr = name;
4061 4061 addrlen = namelen;
4062 4062 src = sti->sti_laddr_sa;
4063 4063 srclen = (socklen_t)sti->sti_laddr_len;
4064 4064 } else {
4065 4065 /*
4066 4066 * Pass the sockaddr_un source address as an option
4067 4067 * and translate the remote address.
4068 4068 *
4069 4069 * Note that this code does not prevent sti_laddr_sa
4070 4070 * from changing while it is being used. Thus
4071 4071 * if an unbind+bind occurs concurrently with this
4072 4072 * send the peer might see a partially new and a
4073 4073 * partially old "from" address.
4074 4074 */
4075 4075 src = sti->sti_laddr_sa;
4076 4076 srclen = (socklen_t)sti->sti_laddr_len;
4077 4077 dprintso(so, 1,
4078 4078 ("sosend_dgram UNIX: srclen %d, src %p\n",
4079 4079 srclen, src));
4080 4080 /*
4081 4081 * The sendmsg caller specified a destination
4082 4082 * address, which we must translate into our
4083 4083 * internal form. addr = &sti->sti_ux_taddr
4084 4084 */
4085 4085 error = so_ux_addr_xlate(so, name, namelen,
4086 4086 (flags & MSG_XPG4_2),
4087 4087 &addr, &addrlen);
4088 4088 if (error) {
4089 4089 eprintsoline(so, error);
4090 4090 goto done;
4091 4091 }
4092 4092 }
4093 4093 } else {
4094 4094 addr = name;
4095 4095 addrlen = namelen;
4096 4096 src = NULL;
4097 4097 srclen = 0;
4098 4098 }
4099 4099 tudr.PRIM_type = T_UNITDATA_REQ;
4100 4100 tudr.DEST_length = addrlen;
4101 4101 tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
4102 4102 if (srclen == 0) {
4103 4103 tudr.OPT_length = 0;
4104 4104 tudr.OPT_offset = 0;
4105 4105
4106 4106 mp = soallocproto2(&tudr, sizeof (tudr),
4107 4107 addr, addrlen, 0, _ALLOC_INTR, CRED());
4108 4108 if (mp == NULL) {
4109 4109 /*
4110 4110 * Caught a signal waiting for memory.
4111 4111 * Let send* return EINTR.
4112 4112 */
4113 4113 error = EINTR;
4114 4114 goto done;
4115 4115 }
4116 4116 } else {
4117 4117 /*
4118 4118 * There is a AF_UNIX sockaddr_un to include as a source
4119 4119 * address option.
4120 4120 */
4121 4121 struct T_opthdr toh;
4122 4122 ssize_t size;
4123 4123
4124 4124 tudr.OPT_length = (t_scalar_t)(sizeof (toh) +
4125 4125 _TPI_ALIGN_TOPT(srclen));
4126 4126 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
4127 4127 _TPI_ALIGN_TOPT(addrlen));
4128 4128
4129 4129 toh.level = SOL_SOCKET;
4130 4130 toh.name = SO_SRCADDR;
4131 4131 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
4132 4132 toh.status = 0;
4133 4133
4134 4134 size = tudr.OPT_offset + tudr.OPT_length;
4135 4135 mp = soallocproto2(&tudr, sizeof (tudr),
4136 4136 addr, addrlen, size, _ALLOC_INTR, CRED());
4137 4137 if (mp == NULL) {
4138 4138 /*
4139 4139 * Caught a signal waiting for memory.
4140 4140 * Let send* return EINTR.
4141 4141 */
4142 4142 error = EINTR;
4143 4143 goto done;
4144 4144 }
4145 4145 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
4146 4146 soappendmsg(mp, &toh, sizeof (toh));
4147 4147 soappendmsg(mp, src, srclen);
4148 4148 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
4149 4149 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
4150 4150 }
4151 4151
4152 4152 if (AU_AUDITING())
4153 4153 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
4154 4154
4155 4155 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
4156 4156 done:
4157 4157 #ifdef SOCK_DEBUG
4158 4158 if (error) {
4159 4159 eprintsoline(so, error);
4160 4160 }
4161 4161 #endif /* SOCK_DEBUG */
4162 4162 return (error);
4163 4163 }
4164 4164
4165 4165 /*
4166 4166 * Sending data on a connected stream socket.
4167 4167 * Assumes caller has verified that SS_ISCONNECTED is set.
4168 4168 */
4169 4169 int
4170 4170 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more,
4171 4171 int sflag)
4172 4172 {
4173 4173 struct T_data_req tdr;
4174 4174 mblk_t *mp;
4175 4175 int error;
4176 4176 ssize_t iosize;
4177 4177 sotpi_info_t *sti = SOTOTPI(so);
4178 4178
4179 4179 dprintso(so, 1,
4180 4180 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n",
4181 4181 (void *)so, uiop->uio_resid, prim, sflag));
4182 4182
4183 4183 /*
4184 4184 * Has to be bound and connected. However, since no locks are
4185 4185 * held the state could have changed after sotpi_sendmsg checked it
4186 4186 * thus it is not possible to ASSERT on the state.
4187 4187 */
4188 4188
4189 4189 do {
4190 4190 /*
4191 4191 * Set the MORE flag if uio_resid does not fit in this
4192 4192 * message or if the caller passed in "more".
4193 4193 * Error for transports with zero tidu_size.
4194 4194 */
4195 4195 tdr.PRIM_type = prim;
4196 4196 iosize = sti->sti_tidu_size;
4197 4197 if (iosize <= 0)
4198 4198 return (EMSGSIZE);
4199 4199 if (uiop->uio_resid > iosize) {
4200 4200 tdr.MORE_flag = 1;
4201 4201 } else {
4202 4202 if (more)
4203 4203 tdr.MORE_flag = 1;
4204 4204 else
4205 4205 tdr.MORE_flag = 0;
4206 4206 iosize = uiop->uio_resid;
4207 4207 }
4208 4208 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n",
4209 4209 prim, tdr.MORE_flag, iosize));
4210 4210 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED());
4211 4211 if (mp == NULL) {
4212 4212 /*
4213 4213 * Caught a signal waiting for memory.
4214 4214 * Let send* return EINTR.
4215 4215 */
4216 4216 return (EINTR);
4217 4217 }
4218 4218
4219 4219 error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
4220 4220 0, sflag | MSG_BAND, 0);
4221 4221 if (error) {
4222 4222 eprintsoline(so, error);
4223 4223 return (error);
4224 4224 }
4225 4225 if (uiop->uio_resid > 0) {
4226 4226 /*
4227 4227 * Recheck for fatal errors. Fail write even though
4228 4228 * some data have been written. This is consistent
4229 4229 * with strwrite semantics and BSD sockets semantics.
4230 4230 */
4231 4231 if (so->so_state & SS_CANTSENDMORE) {
4232 4232 eprintsoline(so, error);
4233 4233 return (EPIPE);
4234 4234 }
4235 4235 if (so->so_error != 0) {
4236 4236 mutex_enter(&so->so_lock);
4237 4237 error = sogeterr(so, B_TRUE);
4238 4238 mutex_exit(&so->so_lock);
4239 4239 if (error != 0) {
4240 4240 eprintsoline(so, error);
4241 4241 return (error);
4242 4242 }
4243 4243 }
4244 4244 }
4245 4245 } while (uiop->uio_resid > 0);
4246 4246 return (0);
4247 4247 }
4248 4248
4249 4249 /*
4250 4250 * Check the state for errors and call the appropriate send function.
4251 4251 *
4252 4252 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set)
4253 4253 * this function issues a setsockopt to toggle SO_DONTROUTE before and
4254 4254 * after sending the message.
4255 4255 *
4256 4256 * The caller may optionally specify a destination address, for either
4257 4257 * stream or datagram sockets. This table summarizes the cases:
4258 4258 *
4259 4259 * Socket type Dest. given Connected Result
4260 4260 * ----------- ----------- --------- --------------
4261 4261 * Stream * Yes send to conn. addr.
4262 4262 * Stream * No error ENOTCONN
4263 4263 * Dgram yes * send to given addr.
4264 4264 * Dgram no yes send to conn. addr.
4265 4265 * Dgram no no error EDESTADDRREQ
4266 4266 *
4267 4267 * There are subtleties around the destination address when using
4268 4268 * AF_UNIX datagram sockets. When the sendmsg call specifies the
4269 4269 * destination address, it's in (struct sockaddr_un) form and we
4270 4270 * need to translate it to our internal form (struct so_ux_addr).
4271 4271 *
4272 4272 * When the sendmsg call does not specify a destination address
4273 4273 * we're using the peer address saved during sotpi_connect, and
4274 4274 * that address is already in internal form. In this case, the
4275 4275 * (internal only) flag MSG_SENDTO_NOXLATE is set in the flags
4276 4276 * passed to sosend_dgram or sosend_dgramcmsg to indicate that
4277 4277 * those functions should skip translation to internal form.
4278 4278 * Avoiding that translation is not only more efficient, but it's
4279 4279 * also necessary when a process does a connect on an AF_UNIX
4280 4280 * datagram socket and then drops privileges. After the process
4281 4281 * has dropped privileges, it may no longer be able to lookup the
4282 4282 * the external name in the filesystem, but it should still be
4283 4283 * able to send messages on the connected socket by leaving the
4284 4284 * destination name unspecified.
4285 4285 *
4286 4286 * Yet more subtleties arise with sockets connected by socketpair(),
4287 4287 * which puts internal form addresses in the fields where normally
4288 4288 * the external form is found, and sets sti_faddr_noxlate=1, which
4289 4289 * (like flag MSG_SENDTO_NOXLATE) causes the sosend_dgram functions
4290 4290 * to skip translation of destination addresses to internal form.
4291 4291 * However, beware that the flag sti_faddr_noxlate=1 also triggers
4292 4292 * different behaviour almost everywhere AF_UNIX addresses appear.
4293 4293 */
4294 4294 static int
4295 4295 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
4296 4296 struct cred *cr)
4297 4297 {
4298 4298 int so_state;
4299 4299 int so_mode;
4300 4300 int error;
4301 4301 struct sockaddr *name;
4302 4302 t_uscalar_t namelen;
4303 4303 int dontroute;
4304 4304 int flags;
4305 4305 sotpi_info_t *sti = SOTOTPI(so);
4306 4306
4307 4307 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n",
4308 4308 (void *)so, (void *)msg, msg->msg_flags,
4309 4309 pr_state(so->so_state, so->so_mode), so->so_error));
4310 4310
4311 4311 if (so->so_version == SOV_STREAM) {
4312 4312 /* The imaginary "sockmod" has been popped - act as a stream */
4313 4313 so_update_attrs(so, SOMOD);
4314 4314 return (strwrite(SOTOV(so), uiop, cr));
4315 4315 }
4316 4316
4317 4317 mutex_enter(&so->so_lock);
4318 4318 so_state = so->so_state;
4319 4319
4320 4320 if (so_state & SS_CANTSENDMORE) {
4321 4321 mutex_exit(&so->so_lock);
4322 4322 return (EPIPE);
4323 4323 }
4324 4324
4325 4325 if (so->so_error != 0) {
4326 4326 error = sogeterr(so, B_TRUE);
4327 4327 if (error != 0) {
4328 4328 mutex_exit(&so->so_lock);
4329 4329 return (error);
4330 4330 }
4331 4331 }
4332 4332
4333 4333 name = (struct sockaddr *)msg->msg_name;
4334 4334 namelen = msg->msg_namelen;
4335 4335 flags = msg->msg_flags;
4336 4336
4337 4337 /*
4338 4338 * Historically, this function does not validate the flags
4339 4339 * passed in, and any errant bits are ignored. However,
4340 4340 * we would not want any such errant flag bits accidently
4341 4341 * being treated as one of the internal-only flags, so
4342 4342 * clear the internal-only flag bits.
4343 4343 */
4344 4344 flags &= ~MSG_SENDTO_NOXLATE;
4345 4345
4346 4346 so_mode = so->so_mode;
4347 4347
4348 4348 if (name == NULL) {
4349 4349 if (!(so_state & SS_ISCONNECTED)) {
4350 4350 mutex_exit(&so->so_lock);
4351 4351 if (so_mode & SM_CONNREQUIRED)
4352 4352 return (ENOTCONN);
4353 4353 else
4354 4354 return (EDESTADDRREQ);
4355 4355 }
4356 4356 /*
4357 4357 * This is a connected socket.
4358 4358 */
4359 4359 if (so_mode & SM_CONNREQUIRED) {
4360 4360 /*
4361 4361 * This is a connected STREAM socket,
4362 4362 * destination not specified.
4363 4363 */
4364 4364 name = NULL;
4365 4365 namelen = 0;
4366 4366 } else {
4367 4367 /*
4368 4368 * Datagram send on connected socket with
4369 4369 * the destination name not specified.
4370 4370 * Use the peer address from connect.
4371 4371 */
4372 4372 if (so->so_family == AF_UNIX) {
4373 4373 /*
4374 4374 * Use the (internal form) address saved
4375 4375 * in sotpi_connect. See above.
4376 4376 */
4377 4377 name = (void *)&sti->sti_ux_faddr;
4378 4378 namelen = sizeof (sti->sti_ux_faddr);
4379 4379 flags |= MSG_SENDTO_NOXLATE;
4380 4380 } else {
4381 4381 ASSERT(sti->sti_faddr_sa);
4382 4382 name = sti->sti_faddr_sa;
4383 4383 namelen = (t_uscalar_t)sti->sti_faddr_len;
4384 4384 }
4385 4385 }
4386 4386 } else {
4387 4387 /*
4388 4388 * Sendmsg specifies a destination name
4389 4389 */
4390 4390 if (!(so_state & SS_ISCONNECTED) &&
4391 4391 (so_mode & SM_CONNREQUIRED)) {
4392 4392 /* i.e. TCP not connected */
4393 4393 mutex_exit(&so->so_lock);
4394 4394 return (ENOTCONN);
4395 4395 }
4396 4396 /*
4397 4397 * Ignore the address on connection-oriented sockets.
4398 4398 * Just like BSD this code does not generate an error for
4399 4399 * TCP (a CONNREQUIRED socket) when sending to an address
4400 4400 * passed in with sendto/sendmsg. Instead the data is
4401 4401 * delivered on the connection as if no address had been
4402 4402 * supplied.
4403 4403 */
4404 4404 if ((so_state & SS_ISCONNECTED) &&
4405 4405 !(so_mode & SM_CONNREQUIRED)) {
4406 4406 mutex_exit(&so->so_lock);
4407 4407 return (EISCONN);
4408 4408 }
4409 4409 if (!(so_state & SS_ISBOUND)) {
4410 4410 so_lock_single(so); /* Set SOLOCKED */
4411 4411 error = sotpi_bind(so, NULL, 0,
4412 4412 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr);
4413 4413 so_unlock_single(so, SOLOCKED);
4414 4414 if (error) {
4415 4415 mutex_exit(&so->so_lock);
4416 4416 eprintsoline(so, error);
4417 4417 return (error);
4418 4418 }
4419 4419 }
4420 4420 /*
4421 4421 * Handle delayed datagram errors. These are only queued
4422 4422 * when the application sets SO_DGRAM_ERRIND.
4423 4423 * Return the error if we are sending to the address
4424 4424 * that was returned in the last T_UDERROR_IND.
4425 4425 * If sending to some other address discard the delayed
4426 4426 * error indication.
4427 4427 */
4428 4428 if (sti->sti_delayed_error) {
4429 4429 struct T_uderror_ind *tudi;
4430 4430 void *addr;
4431 4431 t_uscalar_t addrlen;
4432 4432 boolean_t match = B_FALSE;
4433 4433
4434 4434 ASSERT(sti->sti_eaddr_mp);
4435 4435 error = sti->sti_delayed_error;
4436 4436 sti->sti_delayed_error = 0;
4437 4437 tudi =
4438 4438 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr;
4439 4439 addrlen = tudi->DEST_length;
4440 4440 addr = sogetoff(sti->sti_eaddr_mp,
4441 4441 tudi->DEST_offset, addrlen, 1);
4442 4442 ASSERT(addr); /* Checked by strsock_proto */
4443 4443 switch (so->so_family) {
4444 4444 case AF_INET: {
4445 4445 /* Compare just IP address and port */
4446 4446 sin_t *sin1 = (sin_t *)name;
4447 4447 sin_t *sin2 = (sin_t *)addr;
4448 4448
4449 4449 if (addrlen == sizeof (sin_t) &&
4450 4450 namelen == addrlen &&
4451 4451 sin1->sin_port == sin2->sin_port &&
4452 4452 sin1->sin_addr.s_addr ==
4453 4453 sin2->sin_addr.s_addr)
4454 4454 match = B_TRUE;
4455 4455 break;
4456 4456 }
4457 4457 case AF_INET6: {
4458 4458 /* Compare just IP address and port. Not flow */
4459 4459 sin6_t *sin1 = (sin6_t *)name;
4460 4460 sin6_t *sin2 = (sin6_t *)addr;
4461 4461
4462 4462 if (addrlen == sizeof (sin6_t) &&
4463 4463 namelen == addrlen &&
4464 4464 sin1->sin6_port == sin2->sin6_port &&
4465 4465 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
4466 4466 &sin2->sin6_addr))
4467 4467 match = B_TRUE;
4468 4468 break;
4469 4469 }
4470 4470 case AF_UNIX:
4471 4471 default:
4472 4472 if (namelen == addrlen &&
4473 4473 bcmp(name, addr, namelen) == 0)
4474 4474 match = B_TRUE;
4475 4475 }
4476 4476 if (match) {
4477 4477 freemsg(sti->sti_eaddr_mp);
4478 4478 sti->sti_eaddr_mp = NULL;
4479 4479 mutex_exit(&so->so_lock);
4480 4480 #ifdef DEBUG
4481 4481 dprintso(so, 0,
4482 4482 ("sockfs delayed error %d for %s\n",
4483 4483 error,
4484 4484 pr_addr(so->so_family, name, namelen)));
4485 4485 #endif /* DEBUG */
4486 4486 return (error);
4487 4487 }
4488 4488 freemsg(sti->sti_eaddr_mp);
4489 4489 sti->sti_eaddr_mp = NULL;
4490 4490 }
4491 4491 }
4492 4492 mutex_exit(&so->so_lock);
4493 4493
4494 4494 dontroute = 0;
4495 4495 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) {
4496 4496 uint32_t val;
4497 4497
4498 4498 val = 1;
4499 4499 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
4500 4500 &val, (t_uscalar_t)sizeof (val), cr);
4501 4501 if (error)
4502 4502 return (error);
4503 4503 dontroute = 1;
4504 4504 }
4505 4505
4506 4506 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) {
4507 4507 error = EOPNOTSUPP;
4508 4508 goto done;
4509 4509 }
4510 4510 if (msg->msg_controllen != 0) {
4511 4511 if (!(so_mode & SM_CONNREQUIRED)) {
4512 4512 so_update_attrs(so, SOMOD);
4513 4513 error = sosend_dgramcmsg(so, name, namelen, uiop,
4514 4514 msg->msg_control, msg->msg_controllen, flags);
4515 4515 } else {
4516 4516 if (flags & MSG_OOB) {
4517 4517 /* Can't generate T_EXDATA_REQ with options */
4518 4518 error = EOPNOTSUPP;
4519 4519 goto done;
4520 4520 }
4521 4521 so_update_attrs(so, SOMOD);
4522 4522 error = sosend_svccmsg(so, uiop,
4523 4523 !(flags & MSG_EOR),
4524 4524 msg->msg_control, msg->msg_controllen,
4525 4525 flags);
4526 4526 }
4527 4527 goto done;
4528 4528 }
4529 4529
4530 4530 so_update_attrs(so, SOMOD);
4531 4531 if (!(so_mode & SM_CONNREQUIRED)) {
4532 4532 /*
4533 4533 * If there is no SO_DONTROUTE to turn off return immediately
4534 4534 * from send_dgram. This can allow tail-call optimizations.
4535 4535 */
4536 4536 if (!dontroute) {
4537 4537 return (sosend_dgram(so, name, namelen, uiop, flags));
4538 4538 }
4539 4539 error = sosend_dgram(so, name, namelen, uiop, flags);
4540 4540 } else {
4541 4541 t_scalar_t prim;
4542 4542 int sflag;
4543 4543
4544 4544 /* Ignore msg_name in the connected state */
4545 4545 if (flags & MSG_OOB) {
4546 4546 prim = T_EXDATA_REQ;
4547 4547 /*
4548 4548 * Send down T_EXDATA_REQ even if there is flow
4549 4549 * control for data.
4550 4550 */
4551 4551 sflag = MSG_IGNFLOW;
4552 4552 } else {
4553 4553 if (so_mode & SM_BYTESTREAM) {
4554 4554 /* Byte stream transport - use write */
4555 4555 dprintso(so, 1, ("sotpi_sendmsg: write\n"));
4556 4556
4557 4557 /* Send M_DATA messages */
4558 4558 if ((sti->sti_nl7c_flags & NL7C_ENABLED) &&
4559 4559 (error = nl7c_data(so, uiop)) >= 0) {
4560 4560 /* NL7C consumed the data */
4561 4561 return (error);
4562 4562 }
4563 4563 /*
4564 4564 * If there is no SO_DONTROUTE to turn off,
4565 4565 * sti_direct is on, and there is no flow
4566 4566 * control, we can take the fast path.
4567 4567 */
4568 4568 if (!dontroute && sti->sti_direct != 0 &&
4569 4569 canputnext(SOTOV(so)->v_stream->sd_wrq)) {
4570 4570 return (sostream_direct(so, uiop,
4571 4571 NULL, cr));
4572 4572 }
4573 4573 error = strwrite(SOTOV(so), uiop, cr);
4574 4574 goto done;
4575 4575 }
4576 4576 prim = T_DATA_REQ;
4577 4577 sflag = 0;
4578 4578 }
4579 4579 /*
4580 4580 * If there is no SO_DONTROUTE to turn off return immediately
4581 4581 * from sosend_svc. This can allow tail-call optimizations.
4582 4582 */
4583 4583 if (!dontroute)
4584 4584 return (sosend_svc(so, uiop, prim,
4585 4585 !(flags & MSG_EOR), sflag));
4586 4586 error = sosend_svc(so, uiop, prim,
4587 4587 !(flags & MSG_EOR), sflag);
4588 4588 }
4589 4589 ASSERT(dontroute);
4590 4590 done:
4591 4591 if (dontroute) {
4592 4592 uint32_t val;
4593 4593
4594 4594 val = 0;
4595 4595 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
4596 4596 &val, (t_uscalar_t)sizeof (val), cr);
4597 4597 }
4598 4598 return (error);
4599 4599 }
4600 4600
4601 4601 /*
4602 4602 * kstrwritemp() has very similar semantics as that of strwrite().
4603 4603 * The main difference is it obtains mblks from the caller and also
4604 4604 * does not do any copy as done in strwrite() from user buffers to
4605 4605 * kernel buffers.
4606 4606 *
4607 4607 * Currently, this routine is used by sendfile to send data allocated
4608 4608 * within the kernel without any copying. This interface does not use the
4609 4609 * synchronous stream interface as synch. stream interface implies
4610 4610 * copying.
4611 4611 */
4612 4612 int
4613 4613 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode)
4614 4614 {
4615 4615 struct stdata *stp;
4616 4616 struct queue *wqp;
4617 4617 mblk_t *newmp;
4618 4618 char waitflag;
4619 4619 int tempmode;
4620 4620 int error = 0;
4621 4621 int done = 0;
4622 4622 struct sonode *so;
4623 4623 boolean_t direct;
4624 4624
4625 4625 ASSERT(vp->v_stream);
4626 4626 stp = vp->v_stream;
4627 4627
4628 4628 so = VTOSO(vp);
4629 4629 direct = _SOTOTPI(so)->sti_direct;
4630 4630
4631 4631 /*
4632 4632 * This is the sockfs direct fast path. canputnext() need
4633 4633 * not be accurate so we don't grab the sd_lock here. If
4634 4634 * we get flow-controlled, we grab sd_lock just before the
4635 4635 * do..while loop below to emulate what strwrite() does.
4636 4636 */
4637 4637 wqp = stp->sd_wrq;
4638 4638 if (canputnext(wqp) && direct &&
4639 4639 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
4640 4640 return (sostream_direct(so, NULL, mp, CRED()));
4641 4641 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
4642 4642 /* Fast check of flags before acquiring the lock */
4643 4643 mutex_enter(&stp->sd_lock);
4644 4644 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
4645 4645 mutex_exit(&stp->sd_lock);
4646 4646 if (error != 0) {
4647 4647 if (!(stp->sd_flag & STPLEX) &&
4648 4648 (stp->sd_wput_opt & SW_SIGPIPE)) {
4649 4649 error = EPIPE;
4650 4650 }
4651 4651 return (error);
4652 4652 }
4653 4653 }
4654 4654
4655 4655 waitflag = WRITEWAIT;
4656 4656 if (stp->sd_flag & OLDNDELAY)
4657 4657 tempmode = fmode & ~FNDELAY;
4658 4658 else
4659 4659 tempmode = fmode;
4660 4660
4661 4661 mutex_enter(&stp->sd_lock);
4662 4662 do {
4663 4663 if (canputnext(wqp)) {
4664 4664 mutex_exit(&stp->sd_lock);
4665 4665 if (stp->sd_wputdatafunc != NULL) {
4666 4666 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL,
4667 4667 NULL, NULL, NULL);
4668 4668 if (newmp == NULL) {
4669 4669 /* The caller will free mp */
4670 4670 return (ECOMM);
4671 4671 }
4672 4672 mp = newmp;
4673 4673 }
4674 4674 putnext(wqp, mp);
4675 4675 return (0);
4676 4676 }
4677 4677 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1,
4678 4678 &done);
4679 4679 } while (error == 0 && !done);
4680 4680
4681 4681 mutex_exit(&stp->sd_lock);
4682 4682 /*
4683 4683 * EAGAIN tells the application to try again. ENOMEM
4684 4684 * is returned only if the memory allocation size
4685 4685 * exceeds the physical limits of the system. ENOMEM
4686 4686 * can't be true here.
4687 4687 */
4688 4688 if (error == ENOMEM)
4689 4689 error = EAGAIN;
4690 4690 return (error);
4691 4691 }
4692 4692
4693 4693 /* ARGSUSED */
4694 4694 static int
4695 4695 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
4696 4696 struct cred *cr, mblk_t **mpp)
4697 4697 {
4698 4698 int error;
4699 4699
4700 4700 switch (so->so_family) {
4701 4701 case AF_INET:
4702 4702 case AF_INET6:
4703 4703 case AF_UNIX:
4704 4704 break;
4705 4705 default:
4706 4706 return (EAFNOSUPPORT);
4707 4707
4708 4708 }
4709 4709
4710 4710 if (so->so_state & SS_CANTSENDMORE)
4711 4711 return (EPIPE);
4712 4712
4713 4713 if (so->so_type != SOCK_STREAM)
4714 4714 return (EOPNOTSUPP);
4715 4715
4716 4716 if ((so->so_state & SS_ISCONNECTED) == 0)
4717 4717 return (ENOTCONN);
4718 4718
4719 4719 error = kstrwritemp(so->so_vnode, *mpp, fflag);
4720 4720 if (error == 0)
4721 4721 *mpp = NULL;
4722 4722 return (error);
4723 4723 }
4724 4724
4725 4725 /*
4726 4726 * Sending data on a datagram socket.
4727 4727 * Assumes caller has verified that SS_ISBOUND etc. are set.
4728 4728 */
4729 4729 /* ARGSUSED */
4730 4730 static int
4731 4731 sodgram_direct(struct sonode *so, struct sockaddr *name,
4732 4732 socklen_t namelen, struct uio *uiop, int flags)
4733 4733 {
4734 4734 struct T_unitdata_req tudr;
4735 4735 mblk_t *mp = NULL;
4736 4736 int error = 0;
4737 4737 void *addr;
4738 4738 socklen_t addrlen;
4739 4739 ssize_t len;
4740 4740 struct stdata *stp = SOTOV(so)->v_stream;
4741 4741 int so_state;
4742 4742 queue_t *udp_wq;
4743 4743 boolean_t connected;
4744 4744 mblk_t *mpdata = NULL;
4745 4745 sotpi_info_t *sti = SOTOTPI(so);
4746 4746 uint32_t auditing = AU_AUDITING();
4747 4747
4748 4748 ASSERT(name != NULL && namelen != 0);
4749 4749 ASSERT(!(so->so_mode & SM_CONNREQUIRED));
4750 4750 ASSERT(!(so->so_mode & SM_EXDATA));
4751 4751 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6);
4752 4752 ASSERT(SOTOV(so)->v_type == VSOCK);
4753 4753
4754 4754 /* Caller checked for proper length */
4755 4755 len = uiop->uio_resid;
4756 4756 ASSERT(len <= sti->sti_tidu_size);
4757 4757
4758 4758 /* Length and family checks have been done by caller */
4759 4759 ASSERT(name->sa_family == so->so_family);
4760 4760 ASSERT(so->so_family == AF_INET ||
4761 4761 (namelen == (socklen_t)sizeof (struct sockaddr_in6)));
4762 4762 ASSERT(so->so_family == AF_INET6 ||
4763 4763 (namelen == (socklen_t)sizeof (struct sockaddr_in)));
4764 4764
4765 4765 addr = name;
4766 4766 addrlen = namelen;
4767 4767
4768 4768 if (stp->sd_sidp != NULL &&
4769 4769 (error = straccess(stp, JCWRITE)) != 0)
4770 4770 goto done;
4771 4771
4772 4772 so_state = so->so_state;
4773 4773
4774 4774 connected = so_state & SS_ISCONNECTED;
4775 4775 if (!connected) {
4776 4776 tudr.PRIM_type = T_UNITDATA_REQ;
4777 4777 tudr.DEST_length = addrlen;
4778 4778 tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
4779 4779 tudr.OPT_length = 0;
4780 4780 tudr.OPT_offset = 0;
4781 4781
4782 4782 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0,
4783 4783 _ALLOC_INTR, CRED());
4784 4784 if (mp == NULL) {
4785 4785 /*
4786 4786 * Caught a signal waiting for memory.
4787 4787 * Let send* return EINTR.
4788 4788 */
4789 4789 error = EINTR;
4790 4790 goto done;
4791 4791 }
4792 4792 }
4793 4793
4794 4794 /*
4795 4795 * For UDP we don't break up the copyin into smaller pieces
4796 4796 * as in the TCP case. That means if ENOMEM is returned by
4797 4797 * mcopyinuio() then the uio vector has not been modified at
4798 4798 * all and we fallback to either strwrite() or kstrputmsg()
4799 4799 * below. Note also that we never generate priority messages
4800 4800 * from here.
4801 4801 */
4802 4802 udp_wq = stp->sd_wrq->q_next;
4803 4803 if (canput(udp_wq) &&
|
↓ open down ↓ |
4803 lines elided |
↑ open up ↑ |
4804 4804 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) {
4805 4805 ASSERT(DB_TYPE(mpdata) == M_DATA);
4806 4806 ASSERT(uiop->uio_resid == 0);
4807 4807 if (!connected)
4808 4808 linkb(mp, mpdata);
4809 4809 else
4810 4810 mp = mpdata;
4811 4811 if (auditing)
4812 4812 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
4813 4813
4814 - udp_wput(udp_wq, mp);
4815 - return (0);
4814 + /* Always returns 0... */
4815 + return (udp_wput(udp_wq, mp));
4816 4816 }
4817 4817
4818 4818 ASSERT(mpdata == NULL);
4819 4819 if (error != 0 && error != ENOMEM) {
4820 4820 freemsg(mp);
4821 4821 return (error);
4822 4822 }
4823 4823
4824 4824 /*
4825 4825 * For connected, let strwrite() handle the blocking case.
4826 4826 * Otherwise we fall thru and use kstrputmsg().
4827 4827 */
4828 4828 if (connected)
4829 4829 return (strwrite(SOTOV(so), uiop, CRED()));
4830 4830
4831 4831 if (auditing)
4832 4832 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
4833 4833
4834 4834 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
4835 4835 done:
4836 4836 #ifdef SOCK_DEBUG
4837 4837 if (error != 0) {
4838 4838 eprintsoline(so, error);
4839 4839 }
4840 4840 #endif /* SOCK_DEBUG */
4841 4841 return (error);
4842 4842 }
4843 4843
4844 4844 int
4845 4845 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr)
4846 4846 {
4847 4847 struct stdata *stp = SOTOV(so)->v_stream;
4848 4848 ssize_t iosize, rmax, maxblk;
4849 4849 queue_t *tcp_wq = stp->sd_wrq->q_next;
4850 4850 mblk_t *newmp;
4851 4851 int error = 0, wflag = 0;
4852 4852
4853 4853 ASSERT(so->so_mode & SM_BYTESTREAM);
4854 4854 ASSERT(SOTOV(so)->v_type == VSOCK);
4855 4855
4856 4856 if (stp->sd_sidp != NULL &&
4857 4857 (error = straccess(stp, JCWRITE)) != 0)
4858 4858 return (error);
4859 4859
4860 4860 if (uiop == NULL) {
4861 4861 /*
4862 4862 * kstrwritemp() should have checked sd_flag and
4863 4863 * flow-control before coming here. If we end up
4864 4864 * here it means that we can simply pass down the
4865 4865 * data to tcp.
4866 4866 */
|
↓ open down ↓ |
41 lines elided |
↑ open up ↑ |
4867 4867 ASSERT(mp != NULL);
4868 4868 if (stp->sd_wputdatafunc != NULL) {
4869 4869 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL,
4870 4870 NULL, NULL, NULL);
4871 4871 if (newmp == NULL) {
4872 4872 /* The caller will free mp */
4873 4873 return (ECOMM);
4874 4874 }
4875 4875 mp = newmp;
4876 4876 }
4877 - tcp_wput(tcp_wq, mp);
4878 - return (0);
4877 + /* Always returns 0... */
4878 + return (tcp_wput(tcp_wq, mp));
4879 4879 }
4880 4880
4881 4881 /* Fallback to strwrite() to do proper error handling */
4882 4882 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))
4883 4883 return (strwrite(SOTOV(so), uiop, cr));
4884 4884
4885 4885 rmax = stp->sd_qn_maxpsz;
4886 4886 ASSERT(rmax >= 0 || rmax == INFPSZ);
4887 4887 if (rmax == 0 || uiop->uio_resid <= 0)
4888 4888 return (0);
4889 4889
4890 4890 if (rmax == INFPSZ)
4891 4891 rmax = uiop->uio_resid;
4892 4892
4893 4893 maxblk = stp->sd_maxblk;
4894 4894
4895 4895 for (;;) {
4896 4896 iosize = MIN(uiop->uio_resid, rmax);
4897 4897
4898 4898 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error);
4899 4899 if (mp == NULL) {
4900 4900 /*
4901 4901 * Fallback to strwrite() for ENOMEM; if this
4902 4902 * is our first time in this routine and the uio
4903 4903 * vector has not been modified, we will end up
4904 4904 * calling strwrite() without any flag set.
4905 4905 */
4906 4906 if (error == ENOMEM)
4907 4907 goto slow_send;
4908 4908 else
4909 4909 return (error);
4910 4910 }
4911 4911 ASSERT(uiop->uio_resid >= 0);
4912 4912 /*
4913 4913 * If mp is non-NULL and ENOMEM is set, it means that
4914 4914 * mcopyinuio() was able to break down some of the user
4915 4915 * data into one or more mblks. Send the partial data
4916 4916 * to tcp and let the rest be handled in strwrite().
4917 4917 */
|
↓ open down ↓ |
29 lines elided |
↑ open up ↑ |
4918 4918 ASSERT(error == 0 || error == ENOMEM);
4919 4919 if (stp->sd_wputdatafunc != NULL) {
4920 4920 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL,
4921 4921 NULL, NULL, NULL);
4922 4922 if (newmp == NULL) {
4923 4923 /* The caller will free mp */
4924 4924 return (ECOMM);
4925 4925 }
4926 4926 mp = newmp;
4927 4927 }
4928 - tcp_wput(tcp_wq, mp);
4928 + (void) tcp_wput(tcp_wq, mp); /* Always returns 0 anyway. */
4929 4929
4930 4930 wflag |= NOINTR;
4931 4931
4932 4932 if (uiop->uio_resid == 0) { /* No more data; we're done */
4933 4933 ASSERT(error == 0);
4934 4934 break;
4935 4935 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag &
4936 4936 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) {
4937 4937 slow_send:
4938 4938 /*
4939 4939 * We were able to send down partial data using
4940 4940 * the direct call interface, but are now relying
4941 4941 * on strwrite() to handle the non-fastpath cases.
4942 4942 * If the socket is blocking we will sleep in
4943 4943 * strwaitq() until write is permitted, otherwise,
4944 4944 * we will need to return the amount of bytes
4945 4945 * written so far back to the app. This is the
4946 4946 * reason why we pass NOINTR flag to strwrite()
4947 4947 * for non-blocking socket, because we don't want
4948 4948 * to return EAGAIN when portion of the user data
4949 4949 * has actually been sent down.
4950 4950 */
4951 4951 return (strwrite_common(SOTOV(so), uiop, cr, wflag));
4952 4952 }
4953 4953 }
4954 4954 return (0);
4955 4955 }
4956 4956
4957 4957 /*
4958 4958 * Update sti_faddr by asking the transport (unless AF_UNIX).
4959 4959 */
4960 4960 /* ARGSUSED */
4961 4961 int
4962 4962 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen,
4963 4963 boolean_t accept, struct cred *cr)
4964 4964 {
4965 4965 struct strbuf strbuf;
4966 4966 int error = 0, res;
4967 4967 void *addr;
4968 4968 t_uscalar_t addrlen;
4969 4969 k_sigset_t smask;
4970 4970 sotpi_info_t *sti = SOTOTPI(so);
4971 4971
4972 4972 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n",
4973 4973 (void *)so, pr_state(so->so_state, so->so_mode)));
4974 4974
4975 4975 ASSERT(*namelen > 0);
4976 4976 mutex_enter(&so->so_lock);
4977 4977 so_lock_single(so); /* Set SOLOCKED */
4978 4978
4979 4979 if (accept) {
4980 4980 bcopy(sti->sti_faddr_sa, name,
4981 4981 MIN(*namelen, sti->sti_faddr_len));
4982 4982 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len;
4983 4983 goto done;
4984 4984 }
4985 4985
4986 4986 if (!(so->so_state & SS_ISCONNECTED)) {
4987 4987 error = ENOTCONN;
4988 4988 goto done;
4989 4989 }
4990 4990 /* Added this check for X/Open */
4991 4991 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
4992 4992 error = EINVAL;
4993 4993 if (xnet_check_print) {
4994 4994 printf("sockfs: X/Open getpeername check => EINVAL\n");
4995 4995 }
4996 4996 goto done;
4997 4997 }
4998 4998
4999 4999 if (sti->sti_faddr_valid) {
5000 5000 bcopy(sti->sti_faddr_sa, name,
5001 5001 MIN(*namelen, sti->sti_faddr_len));
5002 5002 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len;
5003 5003 goto done;
5004 5004 }
5005 5005
5006 5006 #ifdef DEBUG
5007 5007 dprintso(so, 1, ("sotpi_getpeername (local): %s\n",
5008 5008 pr_addr(so->so_family, sti->sti_faddr_sa,
5009 5009 (t_uscalar_t)sti->sti_faddr_len)));
5010 5010 #endif /* DEBUG */
5011 5011
5012 5012 if (so->so_family == AF_UNIX) {
5013 5013 /* Transport has different name space - return local info */
5014 5014 if (sti->sti_faddr_noxlate)
5015 5015 *namelen = 0;
5016 5016 error = 0;
5017 5017 goto done;
5018 5018 }
5019 5019
5020 5020 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0);
5021 5021
5022 5022 ASSERT(sti->sti_faddr_sa);
5023 5023 /* Allocate local buffer to use with ioctl */
5024 5024 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen;
5025 5025 mutex_exit(&so->so_lock);
5026 5026 addr = kmem_alloc(addrlen, KM_SLEEP);
5027 5027
5028 5028 /*
5029 5029 * Issue TI_GETPEERNAME with signals masked.
5030 5030 * Put the result in sti_faddr_sa so that getpeername works after
5031 5031 * a shutdown(output).
5032 5032 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
5033 5033 * back to the socket.
5034 5034 */
5035 5035 strbuf.buf = addr;
5036 5036 strbuf.maxlen = addrlen;
5037 5037 strbuf.len = 0;
5038 5038
5039 5039 sigintr(&smask, 0);
5040 5040 res = 0;
5041 5041 ASSERT(cr);
5042 5042 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf,
5043 5043 0, K_TO_K, cr, &res);
5044 5044 sigunintr(&smask);
5045 5045
5046 5046 mutex_enter(&so->so_lock);
5047 5047 /*
5048 5048 * If there is an error record the error in so_error put don't fail
5049 5049 * the getpeername. Instead fallback on the recorded
5050 5050 * sti->sti_faddr_sa.
5051 5051 */
5052 5052 if (error) {
5053 5053 /*
5054 5054 * Various stream head errors can be returned to the ioctl.
5055 5055 * However, it is impossible to determine which ones of
5056 5056 * these are really socket level errors that were incorrectly
5057 5057 * consumed by the ioctl. Thus this code silently ignores the
5058 5058 * error - to code explicitly does not reinstate the error
5059 5059 * using soseterror().
5060 5060 * Experiments have shows that at least this set of
5061 5061 * errors are reported and should not be reinstated on the
5062 5062 * socket:
5063 5063 * EINVAL E.g. if an I_LINK was in effect when
5064 5064 * getpeername was called.
5065 5065 * EPIPE The ioctl error semantics prefer the write
5066 5066 * side error over the read side error.
5067 5067 * ENOTCONN The transport just got disconnected but
5068 5068 * sockfs had not yet seen the T_DISCON_IND
5069 5069 * when issuing the ioctl.
5070 5070 */
5071 5071 error = 0;
5072 5072 } else if (res == 0 && strbuf.len > 0 &&
5073 5073 (so->so_state & SS_ISCONNECTED)) {
5074 5074 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen);
5075 5075 sti->sti_faddr_len = (socklen_t)strbuf.len;
5076 5076 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len);
5077 5077 sti->sti_faddr_valid = 1;
5078 5078
5079 5079 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len));
5080 5080 *namelen = sti->sti_faddr_len;
5081 5081 }
5082 5082 kmem_free(addr, addrlen);
5083 5083 #ifdef DEBUG
5084 5084 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n",
5085 5085 pr_addr(so->so_family, sti->sti_faddr_sa,
5086 5086 (t_uscalar_t)sti->sti_faddr_len)));
5087 5087 #endif /* DEBUG */
5088 5088 done:
5089 5089 so_unlock_single(so, SOLOCKED);
5090 5090 mutex_exit(&so->so_lock);
5091 5091 return (error);
5092 5092 }
5093 5093
5094 5094 /*
5095 5095 * Update sti_laddr by asking the transport (unless AF_UNIX).
5096 5096 */
5097 5097 int
5098 5098 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen,
5099 5099 struct cred *cr)
5100 5100 {
5101 5101 struct strbuf strbuf;
5102 5102 int error = 0, res;
5103 5103 void *addr;
5104 5104 t_uscalar_t addrlen;
5105 5105 k_sigset_t smask;
5106 5106 sotpi_info_t *sti = SOTOTPI(so);
5107 5107
5108 5108 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n",
5109 5109 (void *)so, pr_state(so->so_state, so->so_mode)));
5110 5110
5111 5111 ASSERT(*namelen > 0);
5112 5112 mutex_enter(&so->so_lock);
5113 5113 so_lock_single(so); /* Set SOLOCKED */
5114 5114
5115 5115 #ifdef DEBUG
5116 5116
5117 5117 dprintso(so, 1, ("sotpi_getsockname (local): %s\n",
5118 5118 pr_addr(so->so_family, sti->sti_laddr_sa,
5119 5119 (t_uscalar_t)sti->sti_laddr_len)));
5120 5120 #endif /* DEBUG */
5121 5121 if (sti->sti_laddr_valid) {
5122 5122 bcopy(sti->sti_laddr_sa, name,
5123 5123 MIN(*namelen, sti->sti_laddr_len));
5124 5124 *namelen = sti->sti_laddr_len;
5125 5125 goto done;
5126 5126 }
5127 5127
5128 5128 if (so->so_family == AF_UNIX) {
5129 5129 /*
5130 5130 * Transport has different name space - return local info. If we
5131 5131 * have enough space, let consumers know the family.
5132 5132 */
5133 5133 if (*namelen >= sizeof (sa_family_t)) {
5134 5134 name->sa_family = AF_UNIX;
5135 5135 *namelen = sizeof (sa_family_t);
5136 5136 } else {
5137 5137 *namelen = 0;
5138 5138 }
5139 5139 error = 0;
5140 5140 goto done;
5141 5141 }
5142 5142 if (!(so->so_state & SS_ISBOUND)) {
5143 5143 /* If not bound, then nothing to return. */
5144 5144 error = 0;
5145 5145 goto done;
5146 5146 }
5147 5147
5148 5148 /* Allocate local buffer to use with ioctl */
5149 5149 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen;
5150 5150 mutex_exit(&so->so_lock);
5151 5151 addr = kmem_alloc(addrlen, KM_SLEEP);
5152 5152
5153 5153 /*
5154 5154 * Issue TI_GETMYNAME with signals masked.
5155 5155 * Put the result in sti_laddr_sa so that getsockname works after
5156 5156 * a shutdown(output).
5157 5157 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
5158 5158 * back to the socket.
5159 5159 */
5160 5160 strbuf.buf = addr;
5161 5161 strbuf.maxlen = addrlen;
5162 5162 strbuf.len = 0;
5163 5163
5164 5164 sigintr(&smask, 0);
5165 5165 res = 0;
5166 5166 ASSERT(cr);
5167 5167 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf,
5168 5168 0, K_TO_K, cr, &res);
5169 5169 sigunintr(&smask);
5170 5170
5171 5171 mutex_enter(&so->so_lock);
5172 5172 /*
5173 5173 * If there is an error record the error in so_error put don't fail
5174 5174 * the getsockname. Instead fallback on the recorded
5175 5175 * sti->sti_laddr_sa.
5176 5176 */
5177 5177 if (error) {
5178 5178 /*
5179 5179 * Various stream head errors can be returned to the ioctl.
5180 5180 * However, it is impossible to determine which ones of
5181 5181 * these are really socket level errors that were incorrectly
5182 5182 * consumed by the ioctl. Thus this code silently ignores the
5183 5183 * error - to code explicitly does not reinstate the error
5184 5184 * using soseterror().
5185 5185 * Experiments have shows that at least this set of
5186 5186 * errors are reported and should not be reinstated on the
5187 5187 * socket:
5188 5188 * EINVAL E.g. if an I_LINK was in effect when
5189 5189 * getsockname was called.
5190 5190 * EPIPE The ioctl error semantics prefer the write
5191 5191 * side error over the read side error.
5192 5192 */
5193 5193 error = 0;
5194 5194 } else if (res == 0 && strbuf.len > 0 &&
5195 5195 (so->so_state & SS_ISBOUND)) {
5196 5196 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen);
5197 5197 sti->sti_laddr_len = (socklen_t)strbuf.len;
5198 5198 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len);
5199 5199 sti->sti_laddr_valid = 1;
5200 5200
5201 5201 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen));
5202 5202 *namelen = sti->sti_laddr_len;
5203 5203 }
5204 5204 kmem_free(addr, addrlen);
5205 5205 #ifdef DEBUG
5206 5206 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n",
5207 5207 pr_addr(so->so_family, sti->sti_laddr_sa,
5208 5208 (t_uscalar_t)sti->sti_laddr_len)));
5209 5209 #endif /* DEBUG */
5210 5210 done:
5211 5211 so_unlock_single(so, SOLOCKED);
5212 5212 mutex_exit(&so->so_lock);
5213 5213 return (error);
5214 5214 }
5215 5215
5216 5216 /*
5217 5217 * Get socket options. For SOL_SOCKET options some options are handled
5218 5218 * by the sockfs while others use the value recorded in the sonode as a
5219 5219 * fallback should the T_SVR4_OPTMGMT_REQ fail.
5220 5220 *
5221 5221 * On the return most *optlenp bytes are copied to optval.
5222 5222 */
5223 5223 /* ARGSUSED */
5224 5224 int
5225 5225 sotpi_getsockopt(struct sonode *so, int level, int option_name,
5226 5226 void *optval, socklen_t *optlenp, int flags, struct cred *cr)
5227 5227 {
5228 5228 struct T_optmgmt_req optmgmt_req;
5229 5229 struct T_optmgmt_ack *optmgmt_ack;
5230 5230 struct opthdr oh;
5231 5231 struct opthdr *opt_res;
5232 5232 mblk_t *mp = NULL;
5233 5233 int error = 0;
5234 5234 void *option = NULL; /* Set if fallback value */
5235 5235 t_uscalar_t maxlen = *optlenp;
5236 5236 t_uscalar_t len;
5237 5237 uint32_t value;
5238 5238 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */
5239 5239 struct timeval32 tmo_val32;
5240 5240 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */
5241 5241
5242 5242 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n",
5243 5243 (void *)so, level, option_name, optval, (void *)optlenp,
5244 5244 pr_state(so->so_state, so->so_mode)));
5245 5245
5246 5246 mutex_enter(&so->so_lock);
5247 5247 so_lock_single(so); /* Set SOLOCKED */
5248 5248
5249 5249 /*
5250 5250 * Check for SOL_SOCKET options.
5251 5251 * Certain SOL_SOCKET options are returned directly whereas
5252 5252 * others only provide a default (fallback) value should
5253 5253 * the T_SVR4_OPTMGMT_REQ fail.
5254 5254 */
5255 5255 if (level == SOL_SOCKET) {
5256 5256 /* Check parameters */
5257 5257 switch (option_name) {
5258 5258 case SO_TYPE:
5259 5259 case SO_ERROR:
5260 5260 case SO_DEBUG:
5261 5261 case SO_ACCEPTCONN:
5262 5262 case SO_REUSEADDR:
5263 5263 case SO_KEEPALIVE:
5264 5264 case SO_DONTROUTE:
5265 5265 case SO_BROADCAST:
5266 5266 case SO_USELOOPBACK:
5267 5267 case SO_OOBINLINE:
5268 5268 case SO_SNDBUF:
5269 5269 case SO_RCVBUF:
5270 5270 #ifdef notyet
5271 5271 case SO_SNDLOWAT:
5272 5272 case SO_RCVLOWAT:
5273 5273 #endif /* notyet */
5274 5274 case SO_DOMAIN:
5275 5275 case SO_DGRAM_ERRIND:
5276 5276 if (maxlen < (t_uscalar_t)sizeof (int32_t)) {
5277 5277 error = EINVAL;
5278 5278 eprintsoline(so, error);
5279 5279 goto done2;
5280 5280 }
5281 5281 break;
5282 5282 case SO_RCVTIMEO:
5283 5283 case SO_SNDTIMEO:
5284 5284 if (get_udatamodel() == DATAMODEL_NONE ||
5285 5285 get_udatamodel() == DATAMODEL_NATIVE) {
5286 5286 if (maxlen < sizeof (struct timeval)) {
5287 5287 error = EINVAL;
5288 5288 eprintsoline(so, error);
5289 5289 goto done2;
5290 5290 }
5291 5291 } else {
5292 5292 if (maxlen < sizeof (struct timeval32)) {
5293 5293 error = EINVAL;
5294 5294 eprintsoline(so, error);
5295 5295 goto done2;
5296 5296 }
5297 5297
5298 5298 }
5299 5299 break;
5300 5300 case SO_LINGER:
5301 5301 if (maxlen < (t_uscalar_t)sizeof (struct linger)) {
5302 5302 error = EINVAL;
5303 5303 eprintsoline(so, error);
5304 5304 goto done2;
5305 5305 }
5306 5306 break;
5307 5307 case SO_SND_BUFINFO:
5308 5308 if (maxlen < (t_uscalar_t)
5309 5309 sizeof (struct so_snd_bufinfo)) {
5310 5310 error = EINVAL;
5311 5311 eprintsoline(so, error);
5312 5312 goto done2;
5313 5313 }
5314 5314 break;
5315 5315 }
5316 5316
5317 5317 len = (t_uscalar_t)sizeof (uint32_t); /* Default */
5318 5318
5319 5319 switch (option_name) {
5320 5320 case SO_TYPE:
5321 5321 value = so->so_type;
5322 5322 option = &value;
5323 5323 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
5324 5324
5325 5325 case SO_ERROR:
5326 5326 value = sogeterr(so, B_TRUE);
5327 5327 option = &value;
5328 5328 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
5329 5329
5330 5330 case SO_ACCEPTCONN:
5331 5331 if (so->so_state & SS_ACCEPTCONN)
5332 5332 value = SO_ACCEPTCONN;
5333 5333 else
5334 5334 value = 0;
5335 5335 #ifdef DEBUG
5336 5336 if (value) {
5337 5337 dprintso(so, 1,
5338 5338 ("sotpi_getsockopt: 0x%x is set\n",
5339 5339 option_name));
5340 5340 } else {
5341 5341 dprintso(so, 1,
5342 5342 ("sotpi_getsockopt: 0x%x not set\n",
5343 5343 option_name));
5344 5344 }
5345 5345 #endif /* DEBUG */
5346 5346 option = &value;
5347 5347 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
5348 5348
5349 5349 case SO_DEBUG:
5350 5350 case SO_REUSEADDR:
5351 5351 case SO_KEEPALIVE:
5352 5352 case SO_DONTROUTE:
5353 5353 case SO_BROADCAST:
5354 5354 case SO_USELOOPBACK:
5355 5355 case SO_OOBINLINE:
5356 5356 case SO_DGRAM_ERRIND:
5357 5357 value = (so->so_options & option_name);
5358 5358 #ifdef DEBUG
5359 5359 if (value) {
5360 5360 dprintso(so, 1,
5361 5361 ("sotpi_getsockopt: 0x%x is set\n",
5362 5362 option_name));
5363 5363 } else {
5364 5364 dprintso(so, 1,
5365 5365 ("sotpi_getsockopt: 0x%x not set\n",
5366 5366 option_name));
5367 5367 }
5368 5368 #endif /* DEBUG */
5369 5369 option = &value;
5370 5370 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
5371 5371
5372 5372 /*
5373 5373 * The following options are only returned by sockfs when the
5374 5374 * T_SVR4_OPTMGMT_REQ fails.
5375 5375 */
5376 5376 case SO_LINGER:
5377 5377 option = &so->so_linger;
5378 5378 len = (t_uscalar_t)sizeof (struct linger);
5379 5379 break;
5380 5380 case SO_SNDBUF: {
5381 5381 ssize_t lvalue;
5382 5382
5383 5383 /*
5384 5384 * If the option has not been set then get a default
5385 5385 * value from the read queue. This value is
5386 5386 * returned if the transport fails
5387 5387 * the T_SVR4_OPTMGMT_REQ.
5388 5388 */
5389 5389 lvalue = so->so_sndbuf;
5390 5390 if (lvalue == 0) {
5391 5391 mutex_exit(&so->so_lock);
5392 5392 (void) strqget(strvp2wq(SOTOV(so))->q_next,
5393 5393 QHIWAT, 0, &lvalue);
5394 5394 mutex_enter(&so->so_lock);
5395 5395 dprintso(so, 1,
5396 5396 ("got SO_SNDBUF %ld from q\n", lvalue));
5397 5397 }
5398 5398 value = (int)lvalue;
5399 5399 option = &value;
5400 5400 len = (t_uscalar_t)sizeof (so->so_sndbuf);
5401 5401 break;
5402 5402 }
5403 5403 case SO_RCVBUF: {
5404 5404 ssize_t lvalue;
5405 5405
5406 5406 /*
5407 5407 * If the option has not been set then get a default
5408 5408 * value from the read queue. This value is
5409 5409 * returned if the transport fails
5410 5410 * the T_SVR4_OPTMGMT_REQ.
5411 5411 *
5412 5412 * XXX If SO_RCVBUF has been set and this is an
5413 5413 * XPG 4.2 application then do not ask the transport
5414 5414 * since the transport might adjust the value and not
5415 5415 * return exactly what was set by the application.
5416 5416 * For non-XPG 4.2 application we return the value
5417 5417 * that the transport is actually using.
5418 5418 */
5419 5419 lvalue = so->so_rcvbuf;
5420 5420 if (lvalue == 0) {
5421 5421 mutex_exit(&so->so_lock);
5422 5422 (void) strqget(RD(strvp2wq(SOTOV(so))),
5423 5423 QHIWAT, 0, &lvalue);
5424 5424 mutex_enter(&so->so_lock);
5425 5425 dprintso(so, 1,
5426 5426 ("got SO_RCVBUF %ld from q\n", lvalue));
5427 5427 } else if (flags & _SOGETSOCKOPT_XPG4_2) {
5428 5428 value = (int)lvalue;
5429 5429 option = &value;
5430 5430 goto copyout; /* skip asking transport */
5431 5431 }
5432 5432 value = (int)lvalue;
5433 5433 option = &value;
5434 5434 len = (t_uscalar_t)sizeof (so->so_rcvbuf);
5435 5435 break;
5436 5436 }
5437 5437 case SO_DOMAIN:
5438 5438 value = so->so_family;
5439 5439 option = &value;
5440 5440 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
5441 5441
5442 5442 #ifdef notyet
5443 5443 /*
5444 5444 * We do not implement the semantics of these options
5445 5445 * thus we shouldn't implement the options either.
5446 5446 */
5447 5447 case SO_SNDLOWAT:
5448 5448 value = so->so_sndlowat;
5449 5449 option = &value;
5450 5450 break;
5451 5451 case SO_RCVLOWAT:
5452 5452 value = so->so_rcvlowat;
5453 5453 option = &value;
5454 5454 break;
5455 5455 #endif /* notyet */
5456 5456 case SO_SNDTIMEO:
5457 5457 case SO_RCVTIMEO: {
5458 5458 clock_t val;
5459 5459
5460 5460 if (option_name == SO_RCVTIMEO)
5461 5461 val = drv_hztousec(so->so_rcvtimeo);
5462 5462 else
5463 5463 val = drv_hztousec(so->so_sndtimeo);
5464 5464 tmo_val.tv_sec = val / (1000 * 1000);
5465 5465 tmo_val.tv_usec = val % (1000 * 1000);
5466 5466 if (get_udatamodel() == DATAMODEL_NONE ||
5467 5467 get_udatamodel() == DATAMODEL_NATIVE) {
5468 5468 option = &tmo_val;
5469 5469 len = sizeof (struct timeval);
5470 5470 } else {
5471 5471 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val);
5472 5472 option = &tmo_val32;
5473 5473 len = sizeof (struct timeval32);
5474 5474 }
5475 5475 break;
5476 5476 }
5477 5477 case SO_SND_BUFINFO: {
5478 5478 snd_bufinfo.sbi_wroff =
5479 5479 (so->so_proto_props).sopp_wroff;
5480 5480 snd_bufinfo.sbi_maxblk =
5481 5481 (so->so_proto_props).sopp_maxblk;
5482 5482 snd_bufinfo.sbi_maxpsz =
5483 5483 (so->so_proto_props).sopp_maxpsz;
5484 5484 snd_bufinfo.sbi_tail =
5485 5485 (so->so_proto_props).sopp_tail;
5486 5486 option = &snd_bufinfo;
5487 5487 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo);
5488 5488 break;
5489 5489 }
5490 5490 }
5491 5491 }
5492 5492
5493 5493 mutex_exit(&so->so_lock);
5494 5494
5495 5495 /* Send request */
5496 5496 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ;
5497 5497 optmgmt_req.MGMT_flags = T_CHECK;
5498 5498 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen);
5499 5499 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req);
5500 5500
5501 5501 oh.level = level;
5502 5502 oh.name = option_name;
5503 5503 oh.len = maxlen;
5504 5504
5505 5505 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req),
5506 5506 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr);
5507 5507 /* Let option management work in the presence of data flow control */
5508 5508 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
5509 5509 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
5510 5510 mp = NULL;
5511 5511 mutex_enter(&so->so_lock);
5512 5512 if (error) {
5513 5513 eprintsoline(so, error);
5514 5514 goto done2;
5515 5515 }
5516 5516 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK,
5517 5517 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0);
5518 5518 if (error) {
5519 5519 if (option != NULL) {
5520 5520 /* We have a fallback value */
5521 5521 error = 0;
5522 5522 goto copyout;
5523 5523 }
5524 5524 eprintsoline(so, error);
5525 5525 goto done2;
5526 5526 }
5527 5527 ASSERT(mp);
5528 5528 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr;
5529 5529 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset,
5530 5530 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE);
5531 5531 if (opt_res == NULL) {
5532 5532 if (option != NULL) {
5533 5533 /* We have a fallback value */
5534 5534 error = 0;
5535 5535 goto copyout;
5536 5536 }
5537 5537 error = EPROTO;
5538 5538 eprintsoline(so, error);
5539 5539 goto done;
5540 5540 }
5541 5541 option = &opt_res[1];
5542 5542
5543 5543 /* check to ensure that the option is within bounds */
5544 5544 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) ||
5545 5545 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) {
5546 5546 if (option != NULL) {
5547 5547 /* We have a fallback value */
5548 5548 error = 0;
5549 5549 goto copyout;
5550 5550 }
5551 5551 error = EPROTO;
5552 5552 eprintsoline(so, error);
5553 5553 goto done;
5554 5554 }
5555 5555
5556 5556 len = opt_res->len;
5557 5557
5558 5558 copyout: {
5559 5559 t_uscalar_t size = MIN(len, maxlen);
5560 5560 bcopy(option, optval, size);
5561 5561 bcopy(&size, optlenp, sizeof (size));
5562 5562 }
5563 5563 done:
5564 5564 freemsg(mp);
5565 5565 done2:
5566 5566 so_unlock_single(so, SOLOCKED);
5567 5567 mutex_exit(&so->so_lock);
5568 5568
5569 5569 return (error);
5570 5570 }
5571 5571
5572 5572 /*
5573 5573 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ.
5574 5574 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for
5575 5575 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails -
5576 5576 * setsockopt has to work even if the transport does not support the option.
5577 5577 */
5578 5578 /* ARGSUSED */
5579 5579 int
5580 5580 sotpi_setsockopt(struct sonode *so, int level, int option_name,
5581 5581 const void *optval, t_uscalar_t optlen, struct cred *cr)
5582 5582 {
5583 5583 struct T_optmgmt_req optmgmt_req;
5584 5584 struct opthdr oh;
5585 5585 mblk_t *mp;
5586 5586 int error = 0;
5587 5587 boolean_t handled = B_FALSE;
5588 5588
5589 5589 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n",
5590 5590 (void *)so, level, option_name, optval, optlen,
5591 5591 pr_state(so->so_state, so->so_mode)));
5592 5592
5593 5593 /* X/Open requires this check */
5594 5594 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
5595 5595 if (xnet_check_print)
5596 5596 printf("sockfs: X/Open setsockopt check => EINVAL\n");
5597 5597 return (EINVAL);
5598 5598 }
5599 5599
5600 5600 mutex_enter(&so->so_lock);
5601 5601 so_lock_single(so); /* Set SOLOCKED */
5602 5602 mutex_exit(&so->so_lock);
5603 5603
5604 5604 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ;
5605 5605 optmgmt_req.MGMT_flags = T_NEGOTIATE;
5606 5606 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen;
5607 5607 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req);
5608 5608
5609 5609 oh.level = level;
5610 5610 oh.name = option_name;
5611 5611 oh.len = optlen;
5612 5612
5613 5613 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req),
5614 5614 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr);
5615 5615 /* Let option management work in the presence of data flow control */
5616 5616 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
5617 5617 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
5618 5618 mp = NULL;
5619 5619 mutex_enter(&so->so_lock);
5620 5620 if (error) {
5621 5621 eprintsoline(so, error);
5622 5622 goto done2;
5623 5623 }
5624 5624 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK,
5625 5625 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0);
5626 5626 if (error) {
5627 5627 eprintsoline(so, error);
5628 5628 goto done;
5629 5629 }
5630 5630 ASSERT(mp);
5631 5631 /* No need to verify T_optmgmt_ack */
5632 5632 freemsg(mp);
5633 5633 done:
5634 5634 /*
5635 5635 * Check for SOL_SOCKET options and record their values.
5636 5636 * If we know about a SOL_SOCKET parameter and the transport
5637 5637 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or
5638 5638 * EPROTO) we let the setsockopt succeed.
5639 5639 */
5640 5640 if (level == SOL_SOCKET) {
5641 5641 /* Check parameters */
5642 5642 switch (option_name) {
5643 5643 case SO_DEBUG:
5644 5644 case SO_REUSEADDR:
5645 5645 case SO_KEEPALIVE:
5646 5646 case SO_DONTROUTE:
5647 5647 case SO_BROADCAST:
5648 5648 case SO_USELOOPBACK:
5649 5649 case SO_OOBINLINE:
5650 5650 case SO_SNDBUF:
5651 5651 case SO_RCVBUF:
5652 5652 #ifdef notyet
5653 5653 case SO_SNDLOWAT:
5654 5654 case SO_RCVLOWAT:
5655 5655 #endif /* notyet */
5656 5656 case SO_DGRAM_ERRIND:
5657 5657 if (optlen != (t_uscalar_t)sizeof (int32_t)) {
5658 5658 error = EINVAL;
5659 5659 eprintsoline(so, error);
5660 5660 goto done2;
5661 5661 }
5662 5662 ASSERT(optval);
5663 5663 handled = B_TRUE;
5664 5664 break;
5665 5665 case SO_SNDTIMEO:
5666 5666 case SO_RCVTIMEO:
5667 5667 if (get_udatamodel() == DATAMODEL_NONE ||
5668 5668 get_udatamodel() == DATAMODEL_NATIVE) {
5669 5669 if (optlen != sizeof (struct timeval)) {
5670 5670 error = EINVAL;
5671 5671 eprintsoline(so, error);
5672 5672 goto done2;
5673 5673 }
5674 5674 } else {
5675 5675 if (optlen != sizeof (struct timeval32)) {
5676 5676 error = EINVAL;
5677 5677 eprintsoline(so, error);
5678 5678 goto done2;
5679 5679 }
5680 5680 }
5681 5681 ASSERT(optval);
5682 5682 handled = B_TRUE;
5683 5683 break;
5684 5684 case SO_LINGER:
5685 5685 if (optlen != (t_uscalar_t)sizeof (struct linger)) {
5686 5686 error = EINVAL;
5687 5687 eprintsoline(so, error);
5688 5688 goto done2;
5689 5689 }
5690 5690 ASSERT(optval);
5691 5691 handled = B_TRUE;
5692 5692 break;
5693 5693 }
5694 5694
5695 5695 #define intvalue (*(int32_t *)optval)
5696 5696
5697 5697 switch (option_name) {
5698 5698 case SO_TYPE:
5699 5699 case SO_ERROR:
5700 5700 case SO_ACCEPTCONN:
5701 5701 /* Can't be set */
5702 5702 error = ENOPROTOOPT;
5703 5703 goto done2;
5704 5704 case SO_LINGER: {
5705 5705 struct linger *l = (struct linger *)optval;
5706 5706
5707 5707 so->so_linger.l_linger = l->l_linger;
5708 5708 if (l->l_onoff) {
5709 5709 so->so_linger.l_onoff = SO_LINGER;
5710 5710 so->so_options |= SO_LINGER;
5711 5711 } else {
5712 5712 so->so_linger.l_onoff = 0;
5713 5713 so->so_options &= ~SO_LINGER;
5714 5714 }
5715 5715 break;
5716 5716 }
5717 5717
5718 5718 case SO_DEBUG:
5719 5719 #ifdef SOCK_TEST
5720 5720 if (intvalue & 2)
5721 5721 sock_test_timelimit = 10 * hz;
5722 5722 else
5723 5723 sock_test_timelimit = 0;
5724 5724
5725 5725 if (intvalue & 4)
5726 5726 do_useracc = 0;
5727 5727 else
5728 5728 do_useracc = 1;
5729 5729 #endif /* SOCK_TEST */
5730 5730 /* FALLTHRU */
5731 5731 case SO_REUSEADDR:
5732 5732 case SO_KEEPALIVE:
5733 5733 case SO_DONTROUTE:
5734 5734 case SO_BROADCAST:
5735 5735 case SO_USELOOPBACK:
5736 5736 case SO_OOBINLINE:
5737 5737 case SO_DGRAM_ERRIND:
5738 5738 if (intvalue != 0) {
5739 5739 dprintso(so, 1,
5740 5740 ("socket_setsockopt: setting 0x%x\n",
5741 5741 option_name));
5742 5742 so->so_options |= option_name;
5743 5743 } else {
5744 5744 dprintso(so, 1,
5745 5745 ("socket_setsockopt: clearing 0x%x\n",
5746 5746 option_name));
5747 5747 so->so_options &= ~option_name;
5748 5748 }
5749 5749 break;
5750 5750 /*
5751 5751 * The following options are only returned by us when the
5752 5752 * transport layer fails.
5753 5753 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs
5754 5754 * since the transport might adjust the value and not
5755 5755 * return exactly what was set by the application.
5756 5756 */
5757 5757 case SO_SNDBUF:
5758 5758 so->so_sndbuf = intvalue;
5759 5759 break;
5760 5760 case SO_RCVBUF:
5761 5761 so->so_rcvbuf = intvalue;
5762 5762 break;
5763 5763 case SO_RCVPSH:
5764 5764 so->so_rcv_timer_interval = intvalue;
5765 5765 break;
5766 5766 #ifdef notyet
5767 5767 /*
5768 5768 * We do not implement the semantics of these options
5769 5769 * thus we shouldn't implement the options either.
5770 5770 */
5771 5771 case SO_SNDLOWAT:
5772 5772 so->so_sndlowat = intvalue;
5773 5773 break;
5774 5774 case SO_RCVLOWAT:
5775 5775 so->so_rcvlowat = intvalue;
5776 5776 break;
5777 5777 #endif /* notyet */
5778 5778 case SO_SNDTIMEO:
5779 5779 case SO_RCVTIMEO: {
5780 5780 struct timeval tl;
5781 5781 clock_t val;
5782 5782
5783 5783 if (get_udatamodel() == DATAMODEL_NONE ||
5784 5784 get_udatamodel() == DATAMODEL_NATIVE)
5785 5785 bcopy(&tl, (struct timeval *)optval,
5786 5786 sizeof (struct timeval));
5787 5787 else
5788 5788 TIMEVAL32_TO_TIMEVAL(&tl,
5789 5789 (struct timeval32 *)optval);
5790 5790 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
5791 5791 if (option_name == SO_RCVTIMEO)
5792 5792 so->so_rcvtimeo = drv_usectohz(val);
5793 5793 else
5794 5794 so->so_sndtimeo = drv_usectohz(val);
5795 5795 break;
5796 5796 }
5797 5797 }
5798 5798 #undef intvalue
5799 5799
5800 5800 if (error) {
5801 5801 if ((error == ENOPROTOOPT || error == EPROTO ||
5802 5802 error == EINVAL) && handled) {
5803 5803 dprintso(so, 1,
5804 5804 ("setsockopt: ignoring error %d for 0x%x\n",
5805 5805 error, option_name));
5806 5806 error = 0;
5807 5807 }
5808 5808 }
5809 5809 }
5810 5810 done2:
5811 5811 so_unlock_single(so, SOLOCKED);
5812 5812 mutex_exit(&so->so_lock);
5813 5813 return (error);
5814 5814 }
5815 5815
5816 5816 /*
5817 5817 * sotpi_close() is called when the last open reference goes away.
5818 5818 */
5819 5819 /* ARGSUSED */
5820 5820 int
5821 5821 sotpi_close(struct sonode *so, int flag, struct cred *cr)
5822 5822 {
5823 5823 struct vnode *vp = SOTOV(so);
5824 5824 dev_t dev;
5825 5825 int error = 0;
5826 5826 sotpi_info_t *sti = SOTOTPI(so);
5827 5827
5828 5828 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n",
5829 5829 (void *)vp, flag, pr_state(so->so_state, so->so_mode)));
5830 5830
5831 5831 dev = sti->sti_dev;
5832 5832
5833 5833 ASSERT(STREAMSTAB(getmajor(dev)));
5834 5834
5835 5835 mutex_enter(&so->so_lock);
5836 5836 so_lock_single(so); /* Set SOLOCKED */
5837 5837
5838 5838 ASSERT(so_verify_oobstate(so));
5839 5839
5840 5840 if (sti->sti_nl7c_flags & NL7C_ENABLED) {
5841 5841 sti->sti_nl7c_flags = 0;
5842 5842 nl7c_close(so);
5843 5843 }
5844 5844
5845 5845 if (vp->v_stream != NULL) {
5846 5846 vnode_t *ux_vp;
5847 5847
5848 5848 if (so->so_family == AF_UNIX) {
5849 5849 /* Could avoid this when CANTSENDMORE for !dgram */
5850 5850 so_unix_close(so);
5851 5851 }
5852 5852
5853 5853 mutex_exit(&so->so_lock);
5854 5854 /*
5855 5855 * Disassemble the linkage from the AF_UNIX underlying file
5856 5856 * system vnode to this socket (by atomically clearing
5857 5857 * v_stream in vn_rele_stream) before strclose clears sd_vnode
5858 5858 * and frees the stream head.
5859 5859 */
5860 5860 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) {
5861 5861 ASSERT(ux_vp->v_stream);
5862 5862 sti->sti_ux_bound_vp = NULL;
5863 5863 vn_rele_stream(ux_vp);
5864 5864 }
5865 5865 error = strclose(vp, flag, cr);
5866 5866 vp->v_stream = NULL;
5867 5867 mutex_enter(&so->so_lock);
5868 5868 }
5869 5869
5870 5870 /*
5871 5871 * Flush the T_DISCON_IND on sti_discon_ind_mp.
5872 5872 */
5873 5873 so_flush_discon_ind(so);
5874 5874
5875 5875 so_unlock_single(so, SOLOCKED);
5876 5876 mutex_exit(&so->so_lock);
5877 5877
5878 5878 /*
5879 5879 * Needed for STREAMs.
5880 5880 * Decrement the device driver's reference count for streams
5881 5881 * opened via the clone dip. The driver was held in clone_open().
5882 5882 * The absence of clone_close() forces this asymmetry.
5883 5883 */
5884 5884 if (so->so_flag & SOCLONE)
5885 5885 ddi_rele_driver(getmajor(dev));
5886 5886
5887 5887 return (error);
5888 5888 }
5889 5889
5890 5890 static int
5891 5891 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
5892 5892 struct cred *cr, int32_t *rvalp)
5893 5893 {
5894 5894 struct vnode *vp = SOTOV(so);
5895 5895 sotpi_info_t *sti = SOTOTPI(so);
5896 5896 int error = 0;
5897 5897
5898 5898 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n",
5899 5899 cmd, arg, pr_state(so->so_state, so->so_mode)));
5900 5900
5901 5901 switch (cmd) {
5902 5902 case SIOCSQPTR:
5903 5903 /*
5904 5904 * SIOCSQPTR is valid only when helper stream is created
5905 5905 * by the protocol.
5906 5906 */
5907 5907 case _I_INSERT:
5908 5908 case _I_REMOVE:
5909 5909 /*
5910 5910 * Since there's no compelling reason to support these ioctls
5911 5911 * on sockets, and doing so would increase the complexity
5912 5912 * markedly, prevent it.
5913 5913 */
5914 5914 return (EOPNOTSUPP);
5915 5915
5916 5916 case I_FIND:
5917 5917 case I_LIST:
5918 5918 case I_LOOK:
5919 5919 case I_POP:
5920 5920 case I_PUSH:
5921 5921 /*
5922 5922 * To prevent races and inconsistencies between the actual
5923 5923 * state of the stream and the state according to the sonode,
5924 5924 * we serialize all operations which modify or operate on the
5925 5925 * list of modules on the socket's stream.
5926 5926 */
5927 5927 mutex_enter(&sti->sti_plumb_lock);
5928 5928 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp);
5929 5929 mutex_exit(&sti->sti_plumb_lock);
5930 5930 return (error);
5931 5931
5932 5932 default:
5933 5933 if (so->so_version != SOV_STREAM)
5934 5934 break;
5935 5935
5936 5936 /*
5937 5937 * The imaginary "sockmod" has been popped; act as a stream.
5938 5938 */
5939 5939 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
5940 5940 }
5941 5941
5942 5942 ASSERT(so->so_version != SOV_STREAM);
5943 5943
5944 5944 /*
5945 5945 * Process socket-specific ioctls.
5946 5946 */
5947 5947 switch (cmd) {
5948 5948 case FIONBIO: {
5949 5949 int32_t value;
5950 5950
5951 5951 if (so_copyin((void *)arg, &value, sizeof (int32_t),
5952 5952 (mode & (int)FKIOCTL)))
5953 5953 return (EFAULT);
5954 5954
5955 5955 mutex_enter(&so->so_lock);
5956 5956 if (value) {
5957 5957 so->so_state |= SS_NDELAY;
5958 5958 } else {
5959 5959 so->so_state &= ~SS_NDELAY;
5960 5960 }
5961 5961 mutex_exit(&so->so_lock);
5962 5962 return (0);
5963 5963 }
5964 5964
5965 5965 case FIOASYNC: {
5966 5966 int32_t value;
5967 5967
5968 5968 if (so_copyin((void *)arg, &value, sizeof (int32_t),
5969 5969 (mode & (int)FKIOCTL)))
5970 5970 return (EFAULT);
5971 5971
5972 5972 mutex_enter(&so->so_lock);
5973 5973 /*
5974 5974 * SS_ASYNC flag not already set correctly?
5975 5975 * (!value != !(so->so_state & SS_ASYNC))
5976 5976 * but some engineers find that too hard to read.
5977 5977 */
5978 5978 if (value == 0 && (so->so_state & SS_ASYNC) != 0 ||
5979 5979 value != 0 && (so->so_state & SS_ASYNC) == 0)
5980 5980 error = so_flip_async(so, vp, mode, cr);
5981 5981 mutex_exit(&so->so_lock);
5982 5982 return (error);
5983 5983 }
5984 5984
5985 5985 case SIOCSPGRP:
5986 5986 case FIOSETOWN: {
5987 5987 pid_t pgrp;
5988 5988
5989 5989 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t),
5990 5990 (mode & (int)FKIOCTL)))
5991 5991 return (EFAULT);
5992 5992
5993 5993 mutex_enter(&so->so_lock);
5994 5994 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp));
5995 5995 /* Any change? */
5996 5996 if (pgrp != so->so_pgrp)
5997 5997 error = so_set_siggrp(so, vp, pgrp, mode, cr);
5998 5998 mutex_exit(&so->so_lock);
5999 5999 return (error);
6000 6000 }
6001 6001 case SIOCGPGRP:
6002 6002 case FIOGETOWN:
6003 6003 if (so_copyout(&so->so_pgrp, (void *)arg,
6004 6004 sizeof (pid_t), (mode & (int)FKIOCTL)))
6005 6005 return (EFAULT);
6006 6006 return (0);
6007 6007
6008 6008 case SIOCATMARK: {
6009 6009 int retval;
6010 6010 uint_t so_state;
6011 6011
6012 6012 /*
6013 6013 * strwaitmark has a finite timeout after which it
6014 6014 * returns -1 if the mark state is undetermined.
6015 6015 * In order to avoid any race between the mark state
6016 6016 * in sockfs and the mark state in the stream head this
6017 6017 * routine loops until the mark state can be determined
6018 6018 * (or the urgent data indication has been removed by some
6019 6019 * other thread).
6020 6020 */
6021 6021 do {
6022 6022 mutex_enter(&so->so_lock);
6023 6023 so_state = so->so_state;
6024 6024 mutex_exit(&so->so_lock);
6025 6025 if (so_state & SS_RCVATMARK) {
6026 6026 retval = 1;
6027 6027 } else if (!(so_state & SS_OOBPEND)) {
6028 6028 /*
6029 6029 * No SIGURG has been generated -- there is no
6030 6030 * pending or present urgent data. Thus can't
6031 6031 * possibly be at the mark.
6032 6032 */
6033 6033 retval = 0;
6034 6034 } else {
6035 6035 /*
6036 6036 * Have the stream head wait until there is
6037 6037 * either some messages on the read queue, or
6038 6038 * STRATMARK or STRNOTATMARK gets set. The
6039 6039 * STRNOTATMARK flag is used so that the
6040 6040 * transport can send up a MSGNOTMARKNEXT
6041 6041 * M_DATA to indicate that it is not
6042 6042 * at the mark and additional data is not about
6043 6043 * to be send upstream.
6044 6044 *
6045 6045 * If the mark state is undetermined this will
6046 6046 * return -1 and we will loop rechecking the
6047 6047 * socket state.
6048 6048 */
6049 6049 retval = strwaitmark(vp);
6050 6050 }
6051 6051 } while (retval == -1);
6052 6052
6053 6053 if (so_copyout(&retval, (void *)arg, sizeof (int),
6054 6054 (mode & (int)FKIOCTL)))
6055 6055 return (EFAULT);
6056 6056 return (0);
6057 6057 }
6058 6058
6059 6059 case I_FDINSERT:
6060 6060 case I_SENDFD:
6061 6061 case I_RECVFD:
6062 6062 case I_ATMARK:
6063 6063 case _SIOCSOCKFALLBACK:
6064 6064 /*
6065 6065 * These ioctls do not apply to sockets. I_FDINSERT can be
6066 6066 * used to send M_PROTO messages without modifying the socket
6067 6067 * state. I_SENDFD/RECVFD should not be used for socket file
6068 6068 * descriptor passing since they assume a twisted stream.
6069 6069 * SIOCATMARK must be used instead of I_ATMARK.
6070 6070 *
6071 6071 * _SIOCSOCKFALLBACK from an application should never be
6072 6072 * processed. It is only generated by socktpi_open() or
6073 6073 * in response to I_POP or I_PUSH.
6074 6074 */
6075 6075 #ifdef DEBUG
6076 6076 zcmn_err(getzoneid(), CE_WARN,
6077 6077 "Unsupported STREAMS ioctl 0x%x on socket. "
6078 6078 "Pid = %d\n", cmd, curproc->p_pid);
6079 6079 #endif /* DEBUG */
6080 6080 return (EOPNOTSUPP);
6081 6081
6082 6082 case _I_GETPEERCRED:
6083 6083 if ((mode & FKIOCTL) == 0)
6084 6084 return (EINVAL);
6085 6085
6086 6086 mutex_enter(&so->so_lock);
6087 6087 if ((so->so_mode & SM_CONNREQUIRED) == 0) {
6088 6088 error = ENOTSUP;
6089 6089 } else if ((so->so_state & SS_ISCONNECTED) == 0) {
6090 6090 error = ENOTCONN;
6091 6091 } else if (so->so_peercred != NULL) {
6092 6092 k_peercred_t *kp = (k_peercred_t *)arg;
6093 6093 kp->pc_cr = so->so_peercred;
6094 6094 kp->pc_cpid = so->so_cpid;
6095 6095 crhold(so->so_peercred);
6096 6096 } else {
6097 6097 error = EINVAL;
6098 6098 }
6099 6099 mutex_exit(&so->so_lock);
6100 6100 return (error);
6101 6101
6102 6102 default:
6103 6103 /*
6104 6104 * Do the higher-order bits of the ioctl cmd indicate
6105 6105 * that it is an I_* streams ioctl?
6106 6106 */
6107 6107 if ((cmd & 0xffffff00U) == STR &&
6108 6108 so->so_version == SOV_SOCKBSD) {
6109 6109 #ifdef DEBUG
6110 6110 zcmn_err(getzoneid(), CE_WARN,
6111 6111 "Unsupported STREAMS ioctl 0x%x on socket. "
6112 6112 "Pid = %d\n", cmd, curproc->p_pid);
6113 6113 #endif /* DEBUG */
6114 6114 return (EOPNOTSUPP);
6115 6115 }
6116 6116 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
6117 6117 }
6118 6118 }
6119 6119
6120 6120 /*
6121 6121 * Handle plumbing-related ioctls.
6122 6122 */
6123 6123 static int
6124 6124 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
6125 6125 struct cred *cr, int32_t *rvalp)
6126 6126 {
6127 6127 static const char sockmod_name[] = "sockmod";
6128 6128 struct sonode *so = VTOSO(vp);
6129 6129 char mname[FMNAMESZ + 1];
6130 6130 int error;
6131 6131 sotpi_info_t *sti = SOTOTPI(so);
6132 6132
6133 6133 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
6134 6134
6135 6135 if (so->so_version == SOV_SOCKBSD)
6136 6136 return (EOPNOTSUPP);
6137 6137
6138 6138 if (so->so_version == SOV_STREAM) {
6139 6139 /*
6140 6140 * The imaginary "sockmod" has been popped - act as a stream.
6141 6141 * If this is a push of sockmod then change back to a socket.
6142 6142 */
6143 6143 if (cmd == I_PUSH) {
6144 6144 error = ((mode & FKIOCTL) ? copystr : copyinstr)(
6145 6145 (void *)arg, mname, sizeof (mname), NULL);
6146 6146
6147 6147 if (error == 0 && strcmp(mname, sockmod_name) == 0) {
6148 6148 dprintso(so, 0, ("socktpi_ioctl: going to "
6149 6149 "socket version\n"));
6150 6150 so_stream2sock(so);
6151 6151 return (0);
6152 6152 }
6153 6153 }
6154 6154 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
6155 6155 }
6156 6156
6157 6157 switch (cmd) {
6158 6158 case I_PUSH:
6159 6159 if (sti->sti_direct) {
6160 6160 mutex_enter(&so->so_lock);
6161 6161 so_lock_single(so);
6162 6162 mutex_exit(&so->so_lock);
6163 6163
6164 6164 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K,
6165 6165 cr, rvalp);
6166 6166
6167 6167 mutex_enter(&so->so_lock);
6168 6168 if (error == 0)
6169 6169 sti->sti_direct = 0;
6170 6170 so_unlock_single(so, SOLOCKED);
6171 6171 mutex_exit(&so->so_lock);
6172 6172
6173 6173 if (error != 0)
6174 6174 return (error);
6175 6175 }
6176 6176
6177 6177 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
6178 6178 if (error == 0)
6179 6179 sti->sti_pushcnt++;
6180 6180 return (error);
6181 6181
6182 6182 case I_POP:
6183 6183 if (sti->sti_pushcnt == 0) {
6184 6184 /* Emulate sockmod being popped */
6185 6185 dprintso(so, 0,
6186 6186 ("socktpi_ioctl: going to STREAMS version\n"));
6187 6187 return (so_sock2stream(so));
6188 6188 }
6189 6189
6190 6190 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
6191 6191 if (error == 0)
6192 6192 sti->sti_pushcnt--;
6193 6193 return (error);
6194 6194
6195 6195 case I_LIST: {
6196 6196 struct str_mlist *kmlistp, *umlistp;
6197 6197 struct str_list kstrlist;
6198 6198 ssize_t kstrlistsize;
6199 6199 int i, nmods;
6200 6200
6201 6201 STRUCT_DECL(str_list, ustrlist);
6202 6202 STRUCT_INIT(ustrlist, mode);
6203 6203
6204 6204 if (arg == 0) {
6205 6205 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
6206 6206 if (error == 0)
6207 6207 (*rvalp)++; /* Add one for sockmod */
6208 6208 return (error);
6209 6209 }
6210 6210
6211 6211 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist),
6212 6212 STRUCT_SIZE(ustrlist), mode & FKIOCTL);
6213 6213 if (error != 0)
6214 6214 return (error);
6215 6215
6216 6216 nmods = STRUCT_FGET(ustrlist, sl_nmods);
6217 6217 if (nmods <= 0)
6218 6218 return (EINVAL);
6219 6219 /*
6220 6220 * Ceiling nmods at nstrpush to prevent someone from
6221 6221 * maliciously consuming lots of kernel memory.
6222 6222 */
6223 6223 nmods = MIN(nmods, nstrpush);
6224 6224
6225 6225 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist);
6226 6226 kstrlist.sl_nmods = nmods;
6227 6227 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP);
6228 6228
6229 6229 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K,
6230 6230 cr, rvalp);
6231 6231 if (error != 0)
6232 6232 goto done;
6233 6233
6234 6234 /*
6235 6235 * Considering the module list as a 0-based array of sl_nmods
6236 6236 * modules, sockmod should conceptually exist at slot
6237 6237 * sti_pushcnt. Insert sockmod at this location by sliding all
6238 6238 * of the module names after so_pushcnt over by one. We know
6239 6239 * that there will be room to do this since we allocated
6240 6240 * sl_modlist with an additional slot.
6241 6241 */
6242 6242 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--)
6243 6243 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1];
6244 6244
6245 6245 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name);
6246 6246 kstrlist.sl_nmods++;
6247 6247
6248 6248 /*
6249 6249 * Copy all of the entries out to ustrlist.
6250 6250 */
6251 6251 kmlistp = kstrlist.sl_modlist;
6252 6252 umlistp = STRUCT_FGETP(ustrlist, sl_modlist);
6253 6253 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) {
6254 6254 error = so_copyout(kmlistp++, umlistp++,
6255 6255 sizeof (struct str_mlist), mode & FKIOCTL);
6256 6256 if (error != 0)
6257 6257 goto done;
6258 6258 }
6259 6259
6260 6260 error = so_copyout(&i, (void *)arg, sizeof (int32_t),
6261 6261 mode & FKIOCTL);
6262 6262 if (error == 0)
6263 6263 *rvalp = 0;
6264 6264 done:
6265 6265 kmem_free(kstrlist.sl_modlist, kstrlistsize);
6266 6266 return (error);
6267 6267 }
6268 6268 case I_LOOK:
6269 6269 if (sti->sti_pushcnt == 0) {
6270 6270 return (so_copyout(sockmod_name, (void *)arg,
6271 6271 sizeof (sockmod_name), mode & FKIOCTL));
6272 6272 }
6273 6273 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
6274 6274
6275 6275 case I_FIND:
6276 6276 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
6277 6277 if (error && error != EINVAL)
6278 6278 return (error);
6279 6279
6280 6280 /* if not found and string was sockmod return 1 */
6281 6281 if (*rvalp == 0 || error == EINVAL) {
6282 6282 error = ((mode & FKIOCTL) ? copystr : copyinstr)(
6283 6283 (void *)arg, mname, sizeof (mname), NULL);
6284 6284 if (error == ENAMETOOLONG)
6285 6285 error = EINVAL;
6286 6286
6287 6287 if (error == 0 && strcmp(mname, sockmod_name) == 0)
6288 6288 *rvalp = 1;
6289 6289 }
6290 6290 return (error);
6291 6291
6292 6292 default:
6293 6293 panic("socktpi_plumbioctl: unknown ioctl %d", cmd);
6294 6294 break;
6295 6295 }
6296 6296
6297 6297 return (0);
6298 6298 }
6299 6299
6300 6300 /*
6301 6301 * Wrapper around the streams poll routine that implements socket poll
6302 6302 * semantics.
6303 6303 * The sockfs never calls pollwakeup itself - the stream head take care
6304 6304 * of all pollwakeups. Since sockfs never holds so_lock when calling the
6305 6305 * stream head there can never be a deadlock due to holding so_lock across
6306 6306 * pollwakeup and acquiring so_lock in this routine.
6307 6307 *
6308 6308 * However, since the performance of VOP_POLL is critical we avoid
6309 6309 * acquiring so_lock here. This is based on two assumptions:
6310 6310 * - The poll implementation holds locks to serialize the VOP_POLL call
6311 6311 * and a pollwakeup for the same pollhead. This ensures that should
6312 6312 * e.g. so_state change during a socktpi_poll call the pollwakeup
6313 6313 * (which strsock_* and strrput conspire to issue) is issued after
6314 6314 * the state change. Thus the pollwakeup will block until VOP_POLL has
6315 6315 * returned and then wake up poll and have it call VOP_POLL again.
6316 6316 * - The reading of so_state without holding so_lock does not result in
6317 6317 * stale data that is older than the latest state change that has dropped
6318 6318 * so_lock. This is ensured by the mutex_exit issuing the appropriate
6319 6319 * memory barrier to force the data into the coherency domain.
6320 6320 */
6321 6321 static int
6322 6322 sotpi_poll(
6323 6323 struct sonode *so,
6324 6324 short events,
6325 6325 int anyyet,
6326 6326 short *reventsp,
6327 6327 struct pollhead **phpp)
6328 6328 {
6329 6329 short origevents = events;
6330 6330 struct vnode *vp = SOTOV(so);
6331 6331 int error;
6332 6332 int so_state = so->so_state; /* snapshot */
6333 6333 sotpi_info_t *sti = SOTOTPI(so);
6334 6334
6335 6335 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n",
6336 6336 (void *)vp, pr_state(so_state, so->so_mode), so->so_error));
6337 6337
6338 6338 ASSERT(vp->v_type == VSOCK);
6339 6339 ASSERT(vp->v_stream != NULL);
6340 6340
6341 6341 if (so->so_version == SOV_STREAM) {
6342 6342 /* The imaginary "sockmod" has been popped - act as a stream */
6343 6343 return (strpoll(vp->v_stream, events, anyyet,
6344 6344 reventsp, phpp));
6345 6345 }
6346 6346
6347 6347 if (!(so_state & SS_ISCONNECTED) &&
6348 6348 (so->so_mode & SM_CONNREQUIRED)) {
6349 6349 /* Not connected yet - turn off write side events */
6350 6350 events &= ~(POLLOUT|POLLWRBAND);
6351 6351 }
6352 6352 /*
6353 6353 * Check for errors without calling strpoll if the caller wants them.
6354 6354 * In sockets the errors are represented as input/output events
6355 6355 * and there is no need to ask the stream head for this information.
6356 6356 */
6357 6357 if (so->so_error != 0 &&
6358 6358 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) {
6359 6359 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents;
6360 6360 return (0);
6361 6361 }
6362 6362 /*
6363 6363 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages.
6364 6364 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA
6365 6365 * will not trigger a POLLIN event with POLLRDDATA set.
6366 6366 * The handling of urgent data (causing POLLRDBAND) is done by
6367 6367 * inspecting SS_OOBPEND below.
6368 6368 */
6369 6369 events |= POLLRDDATA;
6370 6370
6371 6371 /*
6372 6372 * After shutdown(output) a stream head write error is set.
6373 6373 * However, we should not return output events.
6374 6374 */
6375 6375 events |= POLLNOERR;
6376 6376 error = strpoll(vp->v_stream, events, anyyet,
6377 6377 reventsp, phpp);
6378 6378 if (error)
6379 6379 return (error);
6380 6380
6381 6381 ASSERT(!(*reventsp & POLLERR));
6382 6382
6383 6383 /*
6384 6384 * Notes on T_CONN_IND handling for sockets.
6385 6385 *
6386 6386 * If strpoll() returned without events, SR_POLLIN is guaranteed
6387 6387 * to be set, ensuring any subsequent strrput() runs pollwakeup().
6388 6388 *
6389 6389 * Since the so_lock is not held, soqueueconnind() may have run
6390 6390 * and a T_CONN_IND may be waiting. We now check for any queued
6391 6391 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events
6392 6392 * to ensure poll returns.
6393 6393 *
6394 6394 * However:
6395 6395 * If the T_CONN_IND hasn't arrived by the time strpoll() returns,
6396 6396 * when strrput() does run for an arriving M_PROTO with T_CONN_IND
6397 6397 * the following actions will occur; taken together they ensure the
6398 6398 * syscall will return.
6399 6399 *
6400 6400 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if
6401 6401 * the accept() was run on a non-blocking socket sowaitconnind()
6402 6402 * may have already returned EWOULDBLOCK, so not be waiting to
6403 6403 * process the message. Additionally socktpi_poll() has probably
6404 6404 * proceeded past the sti_conn_ind_head check below.
6405 6405 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake
6406 6406 * this thread, however that could occur before poll_common()
6407 6407 * has entered cv_wait.
6408 6408 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock.
6409 6409 *
6410 6410 * Before proceeding to cv_wait() in poll_common() for an event,
6411 6411 * poll_common() atomically checks for T_POLLWAKE under the pc_lock,
6412 6412 * and if set, re-calls strpoll() to ensure the late arriving
6413 6413 * T_CONN_IND is recognized, and pollsys() returns.
6414 6414 */
6415 6415
6416 6416 if (sti->sti_conn_ind_head != NULL)
6417 6417 *reventsp |= (POLLIN|POLLRDNORM) & events;
6418 6418
6419 6419 if (so->so_state & SS_CANTRCVMORE) {
6420 6420 *reventsp |= POLLRDHUP & events;
6421 6421
6422 6422 if (so->so_state & SS_CANTSENDMORE)
6423 6423 *reventsp |= POLLHUP;
6424 6424 }
6425 6425
6426 6426 if (so->so_state & SS_OOBPEND)
6427 6427 *reventsp |= POLLRDBAND & events;
6428 6428
6429 6429 if (sti->sti_nl7c_rcv_mp != NULL) {
6430 6430 *reventsp |= (POLLIN|POLLRDNORM) & events;
6431 6431 }
6432 6432 if ((sti->sti_nl7c_flags & NL7C_ENABLED) &&
6433 6433 ((POLLIN|POLLRDNORM) & *reventsp)) {
6434 6434 sti->sti_nl7c_flags |= NL7C_POLLIN;
6435 6435 }
6436 6436
6437 6437 return (0);
6438 6438 }
6439 6439
6440 6440 /*ARGSUSED*/
6441 6441 static int
6442 6442 socktpi_constructor(void *buf, void *cdrarg, int kmflags)
6443 6443 {
6444 6444 sotpi_sonode_t *st = (sotpi_sonode_t *)buf;
6445 6445 int error = 0;
6446 6446
6447 6447 error = sonode_constructor(buf, cdrarg, kmflags);
6448 6448 if (error != 0)
6449 6449 return (error);
6450 6450
6451 6451 error = i_sotpi_info_constructor(&st->st_info);
6452 6452 if (error != 0)
6453 6453 sonode_destructor(buf, cdrarg);
6454 6454
6455 6455 st->st_sonode.so_priv = &st->st_info;
6456 6456
6457 6457 return (error);
6458 6458 }
6459 6459
6460 6460 /*ARGSUSED1*/
6461 6461 static void
6462 6462 socktpi_destructor(void *buf, void *cdrarg)
6463 6463 {
6464 6464 sotpi_sonode_t *st = (sotpi_sonode_t *)buf;
6465 6465
6466 6466 ASSERT(st->st_sonode.so_priv == &st->st_info);
6467 6467 st->st_sonode.so_priv = NULL;
6468 6468
6469 6469 i_sotpi_info_destructor(&st->st_info);
6470 6470 sonode_destructor(buf, cdrarg);
6471 6471 }
6472 6472
6473 6473 static int
6474 6474 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags)
6475 6475 {
6476 6476 int retval;
6477 6477
6478 6478 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) {
6479 6479 struct sonode *so = (struct sonode *)buf;
6480 6480 sotpi_info_t *sti = SOTOTPI(so);
6481 6481
6482 6482 mutex_enter(&socklist.sl_lock);
6483 6483
6484 6484 sti->sti_next_so = socklist.sl_list;
6485 6485 sti->sti_prev_so = NULL;
6486 6486 if (sti->sti_next_so != NULL)
6487 6487 SOTOTPI(sti->sti_next_so)->sti_prev_so = so;
6488 6488 socklist.sl_list = so;
6489 6489
6490 6490 mutex_exit(&socklist.sl_lock);
6491 6491
6492 6492 }
6493 6493 return (retval);
6494 6494 }
6495 6495
6496 6496 static void
6497 6497 socktpi_unix_destructor(void *buf, void *cdrarg)
6498 6498 {
6499 6499 struct sonode *so = (struct sonode *)buf;
6500 6500 sotpi_info_t *sti = SOTOTPI(so);
6501 6501
6502 6502 mutex_enter(&socklist.sl_lock);
6503 6503
6504 6504 if (sti->sti_next_so != NULL)
6505 6505 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so;
6506 6506 if (sti->sti_prev_so != NULL)
6507 6507 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so;
6508 6508 else
6509 6509 socklist.sl_list = sti->sti_next_so;
6510 6510
6511 6511 mutex_exit(&socklist.sl_lock);
6512 6512
6513 6513 socktpi_destructor(buf, cdrarg);
6514 6514 }
6515 6515
6516 6516 int
6517 6517 socktpi_init(void)
6518 6518 {
6519 6519 /*
6520 6520 * Create sonode caches. We create a special one for AF_UNIX so
6521 6521 * that we can track them for netstat(1m).
6522 6522 */
6523 6523 socktpi_cache = kmem_cache_create("socktpi_cache",
6524 6524 sizeof (struct sotpi_sonode), 0, socktpi_constructor,
6525 6525 socktpi_destructor, NULL, NULL, NULL, 0);
6526 6526
6527 6527 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache",
6528 6528 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor,
6529 6529 socktpi_unix_destructor, NULL, NULL, NULL, 0);
6530 6530
6531 6531 return (0);
6532 6532 }
6533 6533
6534 6534 /*
6535 6535 * Given a non-TPI sonode, allocate and prep it to be ready for TPI.
6536 6536 *
6537 6537 * Caller must still update state and mode using sotpi_update_state().
6538 6538 */
6539 6539 int
6540 6540 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp,
6541 6541 boolean_t *direct, queue_t **qp, struct cred *cr)
6542 6542 {
6543 6543 sotpi_info_t *sti;
6544 6544 struct sockparams *origsp = so->so_sockparams;
6545 6545 sock_lower_handle_t handle = so->so_proto_handle;
6546 6546 struct stdata *stp;
6547 6547 struct vnode *vp;
6548 6548 queue_t *q;
6549 6549 int error = 0;
6550 6550
6551 6551 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) ==
6552 6552 SS_FALLBACK_PENDING);
6553 6553 ASSERT(SOCK_IS_NONSTR(so));
6554 6554
6555 6555 *qp = NULL;
6556 6556 *direct = B_FALSE;
6557 6557 so->so_sockparams = newsp;
6558 6558 /*
6559 6559 * Allocate and initalize fields required by TPI.
6560 6560 */
6561 6561 (void) sotpi_info_create(so, KM_SLEEP);
6562 6562 sotpi_info_init(so);
6563 6563
6564 6564 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) {
6565 6565 sotpi_info_fini(so);
6566 6566 sotpi_info_destroy(so);
6567 6567 return (error);
6568 6568 }
6569 6569 ASSERT(handle == so->so_proto_handle);
6570 6570 sti = SOTOTPI(so);
6571 6571 if (sti->sti_direct != 0)
6572 6572 *direct = B_TRUE;
6573 6573
6574 6574 /*
6575 6575 * Keep the original sp around so we can properly dispose of the
6576 6576 * sonode when the socket is being closed.
6577 6577 */
6578 6578 sti->sti_orig_sp = origsp;
6579 6579
6580 6580 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */
6581 6581 so_alloc_addr(so, so->so_max_addr_len);
6582 6582
6583 6583 /*
6584 6584 * If the application has done a SIOCSPGRP, make sure the
6585 6585 * STREAM head is aware. This needs to take place before
6586 6586 * the protocol start sending up messages. Otherwise we
6587 6587 * might miss to generate SIGPOLL.
6588 6588 *
6589 6589 * It is possible that the application will receive duplicate
6590 6590 * signals if some were already generated for either data or
6591 6591 * connection indications.
6592 6592 */
6593 6593 if (so->so_pgrp != 0) {
6594 6594 if (so_set_events(so, so->so_vnode, cr) != 0)
6595 6595 so->so_pgrp = 0;
6596 6596 }
6597 6597
6598 6598 /*
6599 6599 * Determine which queue to use.
6600 6600 */
6601 6601 vp = SOTOV(so);
6602 6602 stp = vp->v_stream;
6603 6603 ASSERT(stp != NULL);
6604 6604 q = stp->sd_wrq->q_next;
6605 6605
6606 6606 /*
6607 6607 * Skip any modules that may have been auto pushed when the device
6608 6608 * was opened
6609 6609 */
6610 6610 while (q->q_next != NULL)
6611 6611 q = q->q_next;
6612 6612 *qp = _RD(q);
6613 6613
6614 6614 /* This is now a STREAMS sockets */
6615 6615 so->so_not_str = B_FALSE;
6616 6616
6617 6617 return (error);
6618 6618 }
6619 6619
6620 6620 /*
6621 6621 * Revert a TPI sonode. It is only allowed to revert the sonode during
6622 6622 * the fallback process.
6623 6623 */
6624 6624 void
6625 6625 sotpi_revert_sonode(struct sonode *so, struct cred *cr)
6626 6626 {
6627 6627 vnode_t *vp = SOTOV(so);
6628 6628
6629 6629 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) ==
6630 6630 SS_FALLBACK_PENDING);
6631 6631 ASSERT(!SOCK_IS_NONSTR(so));
6632 6632 ASSERT(vp->v_stream != NULL);
6633 6633
6634 6634 strclean(vp);
6635 6635 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr);
6636 6636
6637 6637 /*
6638 6638 * Restore the original sockparams. The caller is responsible for
6639 6639 * dropping the ref to the new sp.
6640 6640 */
6641 6641 so->so_sockparams = SOTOTPI(so)->sti_orig_sp;
6642 6642
6643 6643 sotpi_info_fini(so);
6644 6644 sotpi_info_destroy(so);
6645 6645
6646 6646 /* This is no longer a STREAMS sockets */
6647 6647 so->so_not_str = B_TRUE;
6648 6648 }
6649 6649
6650 6650 void
6651 6651 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap,
6652 6652 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr,
6653 6653 socklen_t faddrlen, short opts)
6654 6654 {
6655 6655 sotpi_info_t *sti = SOTOTPI(so);
6656 6656
6657 6657 so_proc_tcapability_ack(so, tcap);
6658 6658
6659 6659 so->so_options |= opts;
6660 6660
6661 6661 /*
6662 6662 * Determine whether the foreign and local address are valid
6663 6663 */
6664 6664 if (laddrlen != 0) {
6665 6665 ASSERT(laddrlen <= sti->sti_laddr_maxlen);
6666 6666 sti->sti_laddr_len = laddrlen;
6667 6667 bcopy(laddr, sti->sti_laddr_sa, laddrlen);
6668 6668 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND);
6669 6669 }
6670 6670
6671 6671 if (faddrlen != 0) {
6672 6672 ASSERT(faddrlen <= sti->sti_faddr_maxlen);
6673 6673 sti->sti_faddr_len = faddrlen;
6674 6674 bcopy(faddr, sti->sti_faddr_sa, faddrlen);
6675 6675 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED);
6676 6676 }
6677 6677
6678 6678 }
6679 6679
6680 6680 /*
6681 6681 * Allocate enough space to cache the local and foreign addresses.
6682 6682 */
6683 6683 void
6684 6684 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen)
6685 6685 {
6686 6686 sotpi_info_t *sti = SOTOTPI(so);
6687 6687
6688 6688 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL);
6689 6689 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0);
6690 6690 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen =
6691 6691 P2ROUNDUP(maxlen, KMEM_ALIGN);
6692 6692 so->so_max_addr_len = sti->sti_laddr_maxlen;
6693 6693 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP);
6694 6694 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa
6695 6695 + sti->sti_laddr_maxlen);
6696 6696
6697 6697 if (so->so_family == AF_UNIX) {
6698 6698 /*
6699 6699 * Initialize AF_UNIX related fields.
6700 6700 */
6701 6701 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr));
6702 6702 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr));
6703 6703 }
6704 6704 }
6705 6705
6706 6706
6707 6707 sotpi_info_t *
6708 6708 sotpi_sototpi(struct sonode *so)
6709 6709 {
6710 6710 sotpi_info_t *sti;
6711 6711
6712 6712 ASSERT(so != NULL);
6713 6713
6714 6714 sti = (sotpi_info_t *)so->so_priv;
6715 6715
6716 6716 ASSERT(sti != NULL);
6717 6717 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC);
6718 6718
6719 6719 return (sti);
6720 6720 }
6721 6721
6722 6722 static int
6723 6723 i_sotpi_info_constructor(sotpi_info_t *sti)
6724 6724 {
6725 6725 sti->sti_magic = SOTPI_INFO_MAGIC;
6726 6726 sti->sti_ack_mp = NULL;
6727 6727 sti->sti_discon_ind_mp = NULL;
6728 6728 sti->sti_ux_bound_vp = NULL;
6729 6729 sti->sti_unbind_mp = NULL;
6730 6730
6731 6731 sti->sti_conn_ind_head = NULL;
6732 6732 sti->sti_conn_ind_tail = NULL;
6733 6733
6734 6734 sti->sti_laddr_sa = NULL;
6735 6735 sti->sti_faddr_sa = NULL;
6736 6736
6737 6737 sti->sti_nl7c_flags = 0;
6738 6738 sti->sti_nl7c_uri = NULL;
6739 6739 sti->sti_nl7c_rcv_mp = NULL;
6740 6740
6741 6741 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL);
6742 6742 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL);
6743 6743
6744 6744 return (0);
6745 6745 }
6746 6746
6747 6747 static void
6748 6748 i_sotpi_info_destructor(sotpi_info_t *sti)
6749 6749 {
6750 6750 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC);
6751 6751 ASSERT(sti->sti_ack_mp == NULL);
6752 6752 ASSERT(sti->sti_discon_ind_mp == NULL);
6753 6753 ASSERT(sti->sti_ux_bound_vp == NULL);
6754 6754 ASSERT(sti->sti_unbind_mp == NULL);
6755 6755
6756 6756 ASSERT(sti->sti_conn_ind_head == NULL);
6757 6757 ASSERT(sti->sti_conn_ind_tail == NULL);
6758 6758
6759 6759 ASSERT(sti->sti_laddr_sa == NULL);
6760 6760 ASSERT(sti->sti_faddr_sa == NULL);
6761 6761
6762 6762 ASSERT(sti->sti_nl7c_flags == 0);
6763 6763 ASSERT(sti->sti_nl7c_uri == NULL);
6764 6764 ASSERT(sti->sti_nl7c_rcv_mp == NULL);
6765 6765
6766 6766 mutex_destroy(&sti->sti_plumb_lock);
6767 6767 cv_destroy(&sti->sti_ack_cv);
6768 6768 }
6769 6769
6770 6770 /*
6771 6771 * Creates and attaches TPI information to the given sonode
6772 6772 */
6773 6773 static boolean_t
6774 6774 sotpi_info_create(struct sonode *so, int kmflags)
6775 6775 {
6776 6776 sotpi_info_t *sti;
6777 6777
6778 6778 ASSERT(so->so_priv == NULL);
6779 6779
6780 6780 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL)
6781 6781 return (B_FALSE);
6782 6782
6783 6783 if (i_sotpi_info_constructor(sti) != 0) {
6784 6784 kmem_free(sti, sizeof (*sti));
6785 6785 return (B_FALSE);
6786 6786 }
6787 6787
6788 6788 so->so_priv = (void *)sti;
6789 6789 return (B_TRUE);
6790 6790 }
6791 6791
6792 6792 /*
6793 6793 * Initializes the TPI information.
6794 6794 */
6795 6795 static void
6796 6796 sotpi_info_init(struct sonode *so)
6797 6797 {
6798 6798 struct vnode *vp = SOTOV(so);
6799 6799 sotpi_info_t *sti = SOTOTPI(so);
6800 6800 time_t now;
6801 6801
6802 6802 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev;
6803 6803 vp->v_rdev = sti->sti_dev;
6804 6804
6805 6805 sti->sti_orig_sp = NULL;
6806 6806
6807 6807 sti->sti_pushcnt = 0;
6808 6808
6809 6809 now = gethrestime_sec();
6810 6810 sti->sti_atime = now;
6811 6811 sti->sti_mtime = now;
6812 6812 sti->sti_ctime = now;
6813 6813
6814 6814 sti->sti_eaddr_mp = NULL;
6815 6815 sti->sti_delayed_error = 0;
6816 6816
6817 6817 sti->sti_provinfo = NULL;
6818 6818
6819 6819 sti->sti_oobcnt = 0;
6820 6820 sti->sti_oobsigcnt = 0;
6821 6821
6822 6822 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL);
6823 6823
6824 6824 sti->sti_laddr_sa = 0;
6825 6825 sti->sti_faddr_sa = 0;
6826 6826 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0;
6827 6827 sti->sti_laddr_len = sti->sti_faddr_len = 0;
6828 6828
6829 6829 sti->sti_laddr_valid = 0;
6830 6830 sti->sti_faddr_valid = 0;
6831 6831 sti->sti_faddr_noxlate = 0;
6832 6832
6833 6833 sti->sti_direct = 0;
6834 6834
6835 6835 ASSERT(sti->sti_ack_mp == NULL);
6836 6836 ASSERT(sti->sti_ux_bound_vp == NULL);
6837 6837 ASSERT(sti->sti_unbind_mp == NULL);
6838 6838
6839 6839 ASSERT(sti->sti_conn_ind_head == NULL);
6840 6840 ASSERT(sti->sti_conn_ind_tail == NULL);
6841 6841 }
6842 6842
6843 6843 /*
6844 6844 * Given a sonode, grab the TPI info and free any data.
6845 6845 */
6846 6846 static void
6847 6847 sotpi_info_fini(struct sonode *so)
6848 6848 {
6849 6849 sotpi_info_t *sti = SOTOTPI(so);
6850 6850 mblk_t *mp;
6851 6851
6852 6852 ASSERT(sti->sti_discon_ind_mp == NULL);
6853 6853
6854 6854 if ((mp = sti->sti_conn_ind_head) != NULL) {
6855 6855 mblk_t *mp1;
6856 6856
6857 6857 while (mp) {
6858 6858 mp1 = mp->b_next;
6859 6859 mp->b_next = NULL;
6860 6860 freemsg(mp);
6861 6861 mp = mp1;
6862 6862 }
6863 6863 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL;
6864 6864 }
6865 6865
6866 6866 /*
6867 6867 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely
6868 6868 * indirect them. It also uses so_count as a validity test.
6869 6869 */
6870 6870 mutex_enter(&so->so_lock);
6871 6871
6872 6872 if (sti->sti_laddr_sa) {
6873 6873 ASSERT((caddr_t)sti->sti_faddr_sa ==
6874 6874 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen);
6875 6875 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen);
6876 6876 sti->sti_laddr_valid = 0;
6877 6877 sti->sti_faddr_valid = 0;
6878 6878 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2);
6879 6879 sti->sti_laddr_sa = NULL;
6880 6880 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0;
6881 6881 sti->sti_faddr_sa = NULL;
6882 6882 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0;
6883 6883 }
6884 6884
6885 6885 mutex_exit(&so->so_lock);
6886 6886
6887 6887 if ((mp = sti->sti_eaddr_mp) != NULL) {
6888 6888 freemsg(mp);
6889 6889 sti->sti_eaddr_mp = NULL;
6890 6890 sti->sti_delayed_error = 0;
6891 6891 }
6892 6892
6893 6893 if ((mp = sti->sti_ack_mp) != NULL) {
6894 6894 freemsg(mp);
6895 6895 sti->sti_ack_mp = NULL;
6896 6896 }
6897 6897
6898 6898 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) {
6899 6899 sti->sti_nl7c_rcv_mp = NULL;
6900 6900 freemsg(mp);
6901 6901 }
6902 6902 sti->sti_nl7c_rcv_rval = 0;
6903 6903 if (sti->sti_nl7c_uri != NULL) {
6904 6904 nl7c_urifree(so);
6905 6905 /* urifree() cleared nl7c_uri */
6906 6906 }
6907 6907 if (sti->sti_nl7c_flags) {
6908 6908 sti->sti_nl7c_flags = 0;
6909 6909 }
6910 6910
6911 6911 ASSERT(sti->sti_ux_bound_vp == NULL);
6912 6912 if ((mp = sti->sti_unbind_mp) != NULL) {
6913 6913 freemsg(mp);
6914 6914 sti->sti_unbind_mp = NULL;
6915 6915 }
6916 6916 }
6917 6917
6918 6918 /*
6919 6919 * Destroys the TPI information attached to a sonode.
6920 6920 */
6921 6921 static void
6922 6922 sotpi_info_destroy(struct sonode *so)
6923 6923 {
6924 6924 sotpi_info_t *sti = SOTOTPI(so);
6925 6925
6926 6926 i_sotpi_info_destructor(sti);
6927 6927 kmem_free(sti, sizeof (*sti));
6928 6928
6929 6929 so->so_priv = NULL;
6930 6930 }
6931 6931
6932 6932 /*
6933 6933 * Create the global sotpi socket module entry. It will never be freed.
6934 6934 */
6935 6935 smod_info_t *
6936 6936 sotpi_smod_create(void)
6937 6937 {
6938 6938 smod_info_t *smodp;
6939 6939
6940 6940 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP);
6941 6941 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP);
6942 6942 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME);
6943 6943 /*
6944 6944 * Initialize the smod_refcnt to 1 so it will never be freed.
6945 6945 */
6946 6946 smodp->smod_refcnt = 1;
6947 6947 smodp->smod_uc_version = SOCK_UC_VERSION;
6948 6948 smodp->smod_dc_version = SOCK_DC_VERSION;
6949 6949 smodp->smod_sock_create_func = &sotpi_create;
6950 6950 smodp->smod_sock_destroy_func = &sotpi_destroy;
6951 6951 return (smodp);
6952 6952 }
|
↓ open down ↓ |
2014 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX