Print this page
14685 sotpi ops need to be wary of null v_stream
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/sockfs/socktpi.c
+++ new/usr/src/uts/common/fs/sockfs/socktpi.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
|
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015, Joyent, Inc.
25 25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
26 + * Copyright 2022 MNX Cloud, Inc.
26 27 */
27 28
28 29 #include <sys/types.h>
29 30 #include <sys/t_lock.h>
30 31 #include <sys/param.h>
31 32 #include <sys/systm.h>
32 33 #include <sys/buf.h>
33 34 #include <sys/conf.h>
34 35 #include <sys/cred.h>
35 36 #include <sys/kmem.h>
36 37 #include <sys/kmem_impl.h>
37 38 #include <sys/sysmacros.h>
38 39 #include <sys/vfs.h>
39 40 #include <sys/vnode.h>
40 41 #include <sys/debug.h>
41 42 #include <sys/errno.h>
42 43 #include <sys/time.h>
43 44 #include <sys/file.h>
44 45 #include <sys/open.h>
45 46 #include <sys/user.h>
46 47 #include <sys/termios.h>
47 48 #include <sys/stream.h>
48 49 #include <sys/strsubr.h>
49 50 #include <sys/strsun.h>
50 51 #include <sys/suntpi.h>
51 52 #include <sys/ddi.h>
52 53 #include <sys/esunddi.h>
53 54 #include <sys/flock.h>
54 55 #include <sys/modctl.h>
55 56 #include <sys/vtrace.h>
56 57 #include <sys/cmn_err.h>
57 58 #include <sys/pathname.h>
58 59
59 60 #include <sys/socket.h>
60 61 #include <sys/socketvar.h>
61 62 #include <sys/sockio.h>
62 63 #include <netinet/in.h>
63 64 #include <sys/un.h>
64 65 #include <sys/strsun.h>
65 66
66 67 #include <sys/tiuser.h>
67 68 #define _SUN_TPI_VERSION 2
68 69 #include <sys/tihdr.h>
69 70 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */
70 71
71 72 #include <c2/audit.h>
72 73
73 74 #include <inet/common.h>
74 75 #include <inet/ip.h>
75 76 #include <inet/ip6.h>
76 77 #include <inet/tcp.h>
77 78 #include <inet/udp_impl.h>
78 79
79 80 #include <sys/zone.h>
80 81
81 82 #include <fs/sockfs/nl7c.h>
82 83 #include <fs/sockfs/nl7curi.h>
83 84
84 85 #include <fs/sockfs/sockcommon.h>
85 86 #include <fs/sockfs/socktpi.h>
86 87 #include <fs/sockfs/socktpi_impl.h>
87 88
88 89 /*
89 90 * Possible failures when memory can't be allocated. The documented behavior:
90 91 *
91 92 * 5.5: 4.X: XNET:
92 93 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/
93 94 * EINTR
94 95 * (4.X does not document EINTR but returns it)
95 96 * bind: ENOSR - ENOBUFS/ENOSR
96 97 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR
97 98 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR
98 99 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR
99 100 * (4.X getpeername and getsockname do not fail in practice)
100 101 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR
101 102 * listen: - - ENOBUFS
102 103 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/
103 104 * EINTR
104 105 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/
105 106 * EINTR
106 107 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR
107 108 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR
108 109 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR
109 110 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR
110 111 *
111 112 * Resolution. When allocation fails:
112 113 * recv: return EINTR
113 114 * send: return EINTR
114 115 * connect, accept: EINTR
115 116 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep
116 117 * socket, socketpair: ENOBUFS
117 118 * getpeername, getsockname: sleep
118 119 * getsockopt, setsockopt: sleep
119 120 */
120 121
121 122 #ifdef SOCK_TEST
122 123 /*
123 124 * Variables that make sockfs do something other than the standard TPI
124 125 * for the AF_INET transports.
125 126 *
126 127 * solisten_tpi_tcp:
127 128 * TCP can handle a O_T_BIND_REQ with an increased backlog even though
128 129 * the transport is already bound. This is needed to avoid loosing the
129 130 * port number should listen() do a T_UNBIND_REQ followed by a
130 131 * O_T_BIND_REQ.
131 132 *
132 133 * soconnect_tpi_udp:
133 134 * UDP and ICMP can handle a T_CONN_REQ.
134 135 * This is needed to make the sequence of connect(), getsockname()
135 136 * return the local IP address used to send packets to the connected to
136 137 * destination.
137 138 *
138 139 * soconnect_tpi_tcp:
139 140 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ.
140 141 * Set this to non-zero to send TPI conformant messages to TCP in this
141 142 * respect. This is a performance optimization.
142 143 *
143 144 * soaccept_tpi_tcp:
144 145 * TCP can handle a T_CONN_REQ without the acceptor being bound.
145 146 * This is a performance optimization that has been picked up in XTI.
146 147 *
147 148 * soaccept_tpi_multioptions:
148 149 * When inheriting SOL_SOCKET options from the listener to the accepting
149 150 * socket send them as a single message for AF_INET{,6}.
150 151 */
151 152 int solisten_tpi_tcp = 0;
152 153 int soconnect_tpi_udp = 0;
153 154 int soconnect_tpi_tcp = 0;
154 155 int soaccept_tpi_tcp = 0;
155 156 int soaccept_tpi_multioptions = 1;
156 157 #else /* SOCK_TEST */
157 158 #define soconnect_tpi_tcp 0
158 159 #define soconnect_tpi_udp 0
159 160 #define solisten_tpi_tcp 0
160 161 #define soaccept_tpi_tcp 0
161 162 #define soaccept_tpi_multioptions 1
162 163 #endif /* SOCK_TEST */
163 164
164 165 #ifdef SOCK_TEST
165 166 extern int do_useracc;
166 167 extern clock_t sock_test_timelimit;
167 168 #endif /* SOCK_TEST */
168 169
169 170 extern uint32_t ucredsize;
170 171
171 172 /*
172 173 * Some X/Open added checks might have to be backed out to keep SunOS 4.X
173 174 * applications working. Turn on this flag to disable these checks.
174 175 */
175 176 int xnet_skip_checks = 0;
176 177 int xnet_check_print = 0;
177 178 int xnet_truncate_print = 0;
178 179
179 180 static void sotpi_destroy(struct sonode *);
180 181 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int,
181 182 int, int *, cred_t *cr);
182 183
183 184 static boolean_t sotpi_info_create(struct sonode *, int);
184 185 static void sotpi_info_init(struct sonode *);
185 186 static void sotpi_info_fini(struct sonode *);
186 187 static void sotpi_info_destroy(struct sonode *);
187 188
188 189 /*
189 190 * Do direct function call to the transport layer below; this would
190 191 * also allow the transport to utilize read-side synchronous stream
191 192 * interface if necessary. This is a /etc/system tunable that must
192 193 * not be modified on a running system. By default this is enabled
193 194 * for performance reasons and may be disabled for debugging purposes.
194 195 */
195 196 boolean_t socktpi_direct = B_TRUE;
196 197
197 198 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache;
198 199
199 200 extern void sigintr(k_sigset_t *, int);
200 201 extern void sigunintr(k_sigset_t *);
201 202
202 203 static int sotpi_unbind(struct sonode *, int);
203 204
204 205 /* TPI sockfs sonode operations */
205 206 int sotpi_init(struct sonode *, struct sonode *, struct cred *,
206 207 int);
207 208 static int sotpi_accept(struct sonode *, int, struct cred *,
208 209 struct sonode **);
209 210 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t,
210 211 int, struct cred *);
211 212 static int sotpi_listen(struct sonode *, int, struct cred *);
212 213 static int sotpi_connect(struct sonode *, struct sockaddr *,
213 214 socklen_t, int, int, struct cred *);
214 215 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *,
215 216 struct uio *, struct cred *);
216 217 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *,
217 218 struct uio *, struct cred *);
218 219 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int,
219 220 struct cred *, mblk_t **);
220 221 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t,
221 222 struct uio *, void *, t_uscalar_t, int);
222 223 static int sodgram_direct(struct sonode *, struct sockaddr *,
223 224 socklen_t, struct uio *, int);
224 225 extern int sotpi_getpeername(struct sonode *, struct sockaddr *,
225 226 socklen_t *, boolean_t, struct cred *);
226 227 static int sotpi_getsockname(struct sonode *, struct sockaddr *,
227 228 socklen_t *, struct cred *);
228 229 static int sotpi_shutdown(struct sonode *, int, struct cred *);
229 230 extern int sotpi_getsockopt(struct sonode *, int, int, void *,
230 231 socklen_t *, int, struct cred *);
231 232 extern int sotpi_setsockopt(struct sonode *, int, int, const void *,
232 233 socklen_t, struct cred *);
233 234 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *,
234 235 int32_t *);
235 236 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int,
236 237 struct cred *, int32_t *);
237 238 static int sotpi_poll(struct sonode *, short, int, short *,
238 239 struct pollhead **);
239 240 static int sotpi_close(struct sonode *, int, struct cred *);
240 241
241 242 static int i_sotpi_info_constructor(sotpi_info_t *);
242 243 static void i_sotpi_info_destructor(sotpi_info_t *);
243 244
244 245 sonodeops_t sotpi_sonodeops = {
245 246 sotpi_init, /* sop_init */
246 247 sotpi_accept, /* sop_accept */
247 248 sotpi_bind, /* sop_bind */
248 249 sotpi_listen, /* sop_listen */
249 250 sotpi_connect, /* sop_connect */
250 251 sotpi_recvmsg, /* sop_recvmsg */
251 252 sotpi_sendmsg, /* sop_sendmsg */
252 253 sotpi_sendmblk, /* sop_sendmblk */
253 254 sotpi_getpeername, /* sop_getpeername */
|
↓ open down ↓ |
218 lines elided |
↑ open up ↑ |
254 255 sotpi_getsockname, /* sop_getsockname */
255 256 sotpi_shutdown, /* sop_shutdown */
256 257 sotpi_getsockopt, /* sop_getsockopt */
257 258 sotpi_setsockopt, /* sop_setsockopt */
258 259 sotpi_ioctl, /* sop_ioctl */
259 260 sotpi_poll, /* sop_poll */
260 261 sotpi_close, /* sop_close */
261 262 };
262 263
263 264 /*
265 + * Post-close reality check for NULL v_stream...
266 + *
267 + * Kernel callers (e.g. in procfs) may attempt socket operations, after
268 + * holding the vnode, after it has been closed. For TPI sockets, post-close
269 + * operations will have a NULL v_stream (which all functions here assume
270 + * or even ASSERT() is non-NULL). See sotpi_close for where we wipe it out.
271 + *
272 + * If we are in a state where we lost a race to close(), we need to stop ASAP,
273 + * and return the acceptable-as-an-errno EBADF. Because cleanup may be
274 + * required, this macro only checks the v_stream.
275 + *
276 + * Checking should only be relevant for in-kernel other-thread inspectors.
277 + * Userland ones (i.e. same process that opened the socktpi socket) SHOULD be
278 + * protected by higher-level mechanisms. The only in-kernel inspector in the
279 + * source base is procfs, which only accesses get{sockname,peername,sockopt}().
280 + */
281 +#define SOTPI_VN_NOSTREAM(vn) ((vn)->v_stream == NULL)
282 +
283 +/*
264 284 * Return a TPI socket vnode.
265 285 *
266 286 * Note that sockets assume that the driver will clone (either itself
267 287 * or by using the clone driver) i.e. a socket() call will always
268 288 * result in a new vnode being created.
269 289 */
270 290
271 291 /*
272 292 * Common create code for socket and accept. If tso is set the values
273 293 * from that node is used instead of issuing a T_INFO_REQ.
274 294 */
275 295
276 296 /* ARGSUSED */
277 297 static struct sonode *
278 298 sotpi_create(struct sockparams *sp, int family, int type, int protocol,
279 299 int version, int sflags, int *errorp, cred_t *cr)
280 300 {
281 301 struct sonode *so;
282 302 kmem_cache_t *cp;
283 303 int sfamily = family;
284 304
285 305 ASSERT(sp->sp_sdev_info.sd_vnode != NULL);
286 306
287 307 if (family == AF_NCA) {
288 308 /*
289 309 * The request is for an NCA socket so for NL7C use the
290 310 * INET domain instead and mark NL7C_AF_NCA below.
291 311 */
292 312 family = AF_INET;
293 313 /*
294 314 * NL7C is not supported in the non-global zone,
295 315 * we enforce this restriction here.
296 316 */
297 317 if (getzoneid() != GLOBAL_ZONEID) {
298 318 *errorp = ENOTSUP;
299 319 return (NULL);
300 320 }
301 321 }
302 322
303 323 /*
304 324 * to be compatible with old tpi socket implementation ignore
305 325 * sleep flag (sflags) passed in
306 326 */
307 327 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache;
308 328 so = kmem_cache_alloc(cp, KM_SLEEP);
309 329 if (so == NULL) {
310 330 *errorp = ENOMEM;
311 331 return (NULL);
312 332 }
313 333
314 334 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops);
315 335 sotpi_info_init(so);
316 336
317 337 if (sfamily == AF_NCA) {
318 338 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA;
319 339 }
320 340
321 341 if (version == SOV_DEFAULT)
322 342 version = so_default_version;
323 343
324 344 so->so_version = (short)version;
325 345 *errorp = 0;
326 346
327 347 return (so);
328 348 }
329 349
330 350 static void
331 351 sotpi_destroy(struct sonode *so)
332 352 {
333 353 kmem_cache_t *cp;
334 354 struct sockparams *origsp;
335 355
336 356 /*
337 357 * If there is a new dealloc function (ie. smod_destroy_func),
338 358 * then it should check the correctness of the ops.
339 359 */
340 360
341 361 ASSERT(so->so_ops == &sotpi_sonodeops);
342 362
343 363 origsp = SOTOTPI(so)->sti_orig_sp;
344 364
345 365 sotpi_info_fini(so);
346 366
347 367 if (so->so_state & SS_FALLBACK_COMP) {
348 368 /*
349 369 * A fallback happend, which means that a sotpi_info_t struct
350 370 * was allocated (as opposed to being allocated from the TPI
351 371 * sonode cache. Therefore we explicitly free the struct
352 372 * here.
353 373 */
354 374 sotpi_info_destroy(so);
355 375 ASSERT(origsp != NULL);
356 376
357 377 origsp->sp_smod_info->smod_sock_destroy_func(so);
358 378 SOCKPARAMS_DEC_REF(origsp);
359 379 } else {
360 380 sonode_fini(so);
361 381 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache :
362 382 socktpi_cache;
363 383 kmem_cache_free(cp, so);
364 384 }
365 385 }
366 386
367 387 /* ARGSUSED1 */
368 388 int
369 389 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags)
370 390 {
371 391 major_t maj;
372 392 dev_t newdev;
373 393 struct vnode *vp;
374 394 int error = 0;
375 395 struct stdata *stp;
376 396
377 397 sotpi_info_t *sti = SOTOTPI(so);
378 398
379 399 dprint(1, ("sotpi_init()\n"));
380 400
381 401 /*
382 402 * over write the sleep flag passed in but that is ok
383 403 * as tpi socket does not honor sleep flag.
384 404 */
385 405 flags |= FREAD|FWRITE;
386 406
387 407 /*
388 408 * Record in so_flag that it is a clone.
389 409 */
390 410 if (getmajor(sti->sti_dev) == clone_major)
391 411 so->so_flag |= SOCLONE;
392 412
393 413 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) &&
394 414 (so->so_family == AF_INET || so->so_family == AF_INET6) &&
395 415 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP ||
396 416 so->so_protocol == IPPROTO_IP)) {
397 417 /* Tell tcp or udp that it's talking to sockets */
398 418 flags |= SO_SOCKSTR;
399 419
400 420 /*
401 421 * Here we indicate to socktpi_open() our attempt to
402 422 * make direct calls between sockfs and transport.
403 423 * The final decision is left to socktpi_open().
404 424 */
405 425 sti->sti_direct = 1;
406 426
407 427 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL);
408 428 if (so->so_type == SOCK_STREAM && tso != NULL) {
409 429 if (SOTOTPI(tso)->sti_direct) {
410 430 /*
411 431 * Inherit sti_direct from listener and pass
412 432 * SO_ACCEPTOR open flag to tcp, indicating
413 433 * that this is an accept fast-path instance.
414 434 */
415 435 flags |= SO_ACCEPTOR;
416 436 } else {
417 437 /*
418 438 * sti_direct is not set on listener, meaning
419 439 * that the listener has been converted from
420 440 * a socket to a stream. Ensure that the
421 441 * acceptor inherits these settings.
422 442 */
423 443 sti->sti_direct = 0;
424 444 flags &= ~SO_SOCKSTR;
425 445 }
426 446 }
427 447 }
428 448
429 449 /*
430 450 * Tell local transport that it is talking to sockets.
431 451 */
432 452 if (so->so_family == AF_UNIX) {
433 453 flags |= SO_SOCKSTR;
434 454 }
435 455
436 456 vp = SOTOV(so);
437 457 newdev = vp->v_rdev;
438 458 maj = getmajor(newdev);
439 459 ASSERT(STREAMSTAB(maj));
440 460
441 461 error = stropen(vp, &newdev, flags, cr);
442 462
443 463 stp = vp->v_stream;
444 464 if (error == 0) {
445 465 if (so->so_flag & SOCLONE)
446 466 ASSERT(newdev != vp->v_rdev);
447 467 mutex_enter(&so->so_lock);
448 468 sti->sti_dev = newdev;
449 469 vp->v_rdev = newdev;
450 470 mutex_exit(&so->so_lock);
451 471
452 472 if (stp->sd_flag & STRISTTY) {
453 473 /*
454 474 * this is a post SVR4 tty driver - a socket can not
455 475 * be a controlling terminal. Fail the open.
456 476 */
457 477 (void) sotpi_close(so, flags, cr);
458 478 return (ENOTTY); /* XXX */
459 479 }
460 480
461 481 ASSERT(stp->sd_wrq != NULL);
462 482 sti->sti_provinfo = tpi_findprov(stp->sd_wrq);
463 483
464 484 /*
465 485 * If caller is interested in doing direct function call
466 486 * interface to/from transport module, probe the module
467 487 * directly beneath the streamhead to see if it qualifies.
468 488 *
469 489 * We turn off the direct interface when qualifications fail.
470 490 * In the acceptor case, we simply turn off the sti_direct
471 491 * flag on the socket. We do the fallback after the accept
472 492 * has completed, before the new socket is returned to the
473 493 * application.
474 494 */
475 495 if (sti->sti_direct) {
476 496 queue_t *tq = stp->sd_wrq->q_next;
477 497
478 498 /*
479 499 * sti_direct is currently supported and tested
480 500 * only for tcp/udp; this is the main reason to
481 501 * have the following assertions.
482 502 */
483 503 ASSERT(so->so_family == AF_INET ||
484 504 so->so_family == AF_INET6);
485 505 ASSERT(so->so_protocol == IPPROTO_UDP ||
486 506 so->so_protocol == IPPROTO_TCP ||
487 507 so->so_protocol == IPPROTO_IP);
488 508 ASSERT(so->so_type == SOCK_DGRAM ||
489 509 so->so_type == SOCK_STREAM);
490 510
491 511 /*
492 512 * Abort direct call interface if the module directly
493 513 * underneath the stream head is not defined with the
494 514 * _D_DIRECT flag. This could happen in the tcp or
495 515 * udp case, when some other module is autopushed
496 516 * above it, or for some reasons the expected module
497 517 * isn't purely D_MP (which is the main requirement).
498 518 */
499 519 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) ||
500 520 !(_OTHERQ(tq)->q_flag & _QDIRECT)) {
501 521 int rval;
502 522
503 523 /* Continue on without direct calls */
504 524 sti->sti_direct = 0;
505 525
506 526 /*
507 527 * Cannot issue ioctl on fallback socket since
508 528 * there is no conn associated with the queue.
509 529 * The fallback downcall will notify the proto
510 530 * of the change.
511 531 */
512 532 if (!(flags & SO_ACCEPTOR) &&
513 533 !(flags & SO_FALLBACK)) {
514 534 if ((error = strioctl(vp,
515 535 _SIOCSOCKFALLBACK, 0, 0, K_TO_K,
516 536 cr, &rval)) != 0) {
517 537 (void) sotpi_close(so, flags,
518 538 cr);
519 539 return (error);
520 540 }
521 541 }
522 542 }
523 543 }
524 544
525 545 if (flags & SO_FALLBACK) {
526 546 /*
527 547 * The stream created does not have a conn.
528 548 * do stream set up after conn has been assigned
529 549 */
530 550 return (error);
531 551 }
532 552 if (error = so_strinit(so, tso)) {
533 553 (void) sotpi_close(so, flags, cr);
534 554 return (error);
535 555 }
536 556
537 557 /* Enable sendfile() on AF_UNIX streams */
538 558 if (so->so_family == AF_UNIX && so->so_type == SOCK_STREAM) {
539 559 mutex_enter(&so->so_lock);
540 560 so->so_mode |= SM_SENDFILESUPP;
541 561 mutex_exit(&so->so_lock);
542 562 }
543 563
544 564 /* Wildcard */
545 565 if (so->so_protocol != so->so_sockparams->sp_protocol) {
546 566 int protocol = so->so_protocol;
547 567 /*
548 568 * Issue SO_PROTOTYPE setsockopt.
549 569 */
550 570 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE,
551 571 &protocol, (t_uscalar_t)sizeof (protocol), cr);
552 572 if (error != 0) {
553 573 (void) sotpi_close(so, flags, cr);
554 574 /*
555 575 * Setsockopt often fails with ENOPROTOOPT but
556 576 * socket() should fail with
557 577 * EPROTONOSUPPORT/EPROTOTYPE.
558 578 */
559 579 return (EPROTONOSUPPORT);
560 580 }
561 581 }
562 582
563 583 } else {
564 584 /*
565 585 * While the same socket can not be reopened (unlike specfs)
566 586 * the stream head sets STREOPENFAIL when the autopush fails.
567 587 */
568 588 if ((stp != NULL) &&
569 589 (stp->sd_flag & STREOPENFAIL)) {
570 590 /*
571 591 * Open failed part way through.
572 592 */
573 593 mutex_enter(&stp->sd_lock);
574 594 stp->sd_flag &= ~STREOPENFAIL;
575 595 mutex_exit(&stp->sd_lock);
576 596 (void) sotpi_close(so, flags, cr);
577 597 return (error);
578 598 /*NOTREACHED*/
579 599 }
580 600 ASSERT(stp == NULL);
581 601 }
582 602 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN,
583 603 "sockfs open:maj %d vp %p so %p error %d",
584 604 maj, vp, so, error);
585 605 return (error);
586 606 }
587 607
588 608 /*
589 609 * Bind the socket to an unspecified address in sockfs only.
590 610 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't
591 611 * required in all cases.
592 612 */
593 613 static void
594 614 so_automatic_bind(struct sonode *so)
595 615 {
596 616 sotpi_info_t *sti = SOTOTPI(so);
597 617 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6);
598 618
599 619 ASSERT(MUTEX_HELD(&so->so_lock));
600 620 ASSERT(!(so->so_state & SS_ISBOUND));
601 621 ASSERT(sti->sti_unbind_mp);
602 622
603 623 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
604 624 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
605 625 sti->sti_laddr_sa->sa_family = so->so_family;
606 626 so->so_state |= SS_ISBOUND;
607 627 }
608 628
609 629
610 630 /*
611 631 * bind the socket.
612 632 *
613 633 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2
614 634 * are passed in we allow rebinding. Note that for backwards compatibility
615 635 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind.
616 636 * Thus the rebinding code is currently not executed.
617 637 *
618 638 * The constraints for rebinding are:
619 639 * - it is a SOCK_DGRAM, or
620 640 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
621 641 * and no listen() has been done.
622 642 * This rebinding code was added based on some language in the XNET book
623 643 * about not returning EINVAL it the protocol allows rebinding. However,
624 644 * this language is not present in the Posix socket draft. Thus maybe the
625 645 * rebinding logic should be deleted from the source.
626 646 *
627 647 * A null "name" can be used to unbind the socket if:
628 648 * - it is a SOCK_DGRAM, or
629 649 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
630 650 * and no listen() has been done.
631 651 */
632 652 /* ARGSUSED */
633 653 static int
634 654 sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
635 655 socklen_t namelen, int backlog, int flags, struct cred *cr)
636 656 {
637 657 struct T_bind_req bind_req;
638 658 struct T_bind_ack *bind_ack;
639 659 int error = 0;
640 660 mblk_t *mp;
641 661 void *addr;
642 662 t_uscalar_t addrlen;
643 663 int unbind_on_err = 1;
644 664 boolean_t clear_acceptconn_on_err = B_FALSE;
645 665 boolean_t restore_backlog_on_err = B_FALSE;
646 666 int save_so_backlog;
647 667 t_scalar_t PRIM_type = O_T_BIND_REQ;
648 668 boolean_t tcp_udp_xport;
649 669 void *nl7c = NULL;
650 670 sotpi_info_t *sti = SOTOTPI(so);
651 671
652 672 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n",
653 673 (void *)so, (void *)name, namelen, backlog, flags,
654 674 pr_state(so->so_state, so->so_mode)));
655 675
656 676 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM;
657 677
658 678 if (!(flags & _SOBIND_LOCK_HELD)) {
659 679 mutex_enter(&so->so_lock);
660 680 so_lock_single(so); /* Set SOLOCKED */
661 681 } else {
662 682 ASSERT(MUTEX_HELD(&so->so_lock));
663 683 ASSERT(so->so_flag & SOLOCKED);
664 684 }
665 685
666 686 /*
667 687 * Make sure that there is a preallocated unbind_req message
668 688 * before binding. This message allocated when the socket is
669 689 * created but it might be have been consumed.
670 690 */
671 691 if (sti->sti_unbind_mp == NULL) {
672 692 dprintso(so, 1, ("sobind: allocating unbind_req\n"));
673 693 /* NOTE: holding so_lock while sleeping */
674 694 sti->sti_unbind_mp =
675 695 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP,
676 696 cr);
677 697 }
678 698
679 699 if (flags & _SOBIND_REBIND) {
680 700 /*
681 701 * Called from solisten after doing an sotpi_unbind() or
682 702 * potentially without the unbind (latter for AF_INET{,6}).
683 703 */
684 704 ASSERT(name == NULL && namelen == 0);
685 705
686 706 if (so->so_family == AF_UNIX) {
687 707 ASSERT(sti->sti_ux_bound_vp);
688 708 addr = &sti->sti_ux_laddr;
689 709 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
690 710 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, "
691 711 "addr 0x%p, vp %p\n",
692 712 addrlen,
693 713 (void *)((struct so_ux_addr *)addr)->soua_vp,
694 714 (void *)sti->sti_ux_bound_vp));
695 715 } else {
696 716 addr = sti->sti_laddr_sa;
697 717 addrlen = (t_uscalar_t)sti->sti_laddr_len;
698 718 }
699 719 } else if (flags & _SOBIND_UNSPEC) {
700 720 ASSERT(name == NULL && namelen == 0);
701 721
702 722 /*
703 723 * The caller checked SS_ISBOUND but not necessarily
704 724 * under so_lock
705 725 */
706 726 if (so->so_state & SS_ISBOUND) {
707 727 /* No error */
708 728 goto done;
709 729 }
710 730
711 731 /* Set an initial local address */
712 732 switch (so->so_family) {
713 733 case AF_UNIX:
714 734 /*
715 735 * Use an address with same size as struct sockaddr
716 736 * just like BSD.
717 737 */
718 738 sti->sti_laddr_len =
719 739 (socklen_t)sizeof (struct sockaddr);
720 740 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
721 741 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
722 742 sti->sti_laddr_sa->sa_family = so->so_family;
723 743
724 744 /*
725 745 * Pass down an address with the implicit bind
726 746 * magic number and the rest all zeros.
727 747 * The transport will return a unique address.
728 748 */
729 749 sti->sti_ux_laddr.soua_vp = NULL;
730 750 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT;
731 751 addr = &sti->sti_ux_laddr;
732 752 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
733 753 break;
734 754
735 755 case AF_INET:
736 756 case AF_INET6:
737 757 /*
738 758 * An unspecified bind in TPI has a NULL address.
739 759 * Set the address in sockfs to have the sa_family.
740 760 */
741 761 sti->sti_laddr_len = (so->so_family == AF_INET) ?
742 762 (socklen_t)sizeof (sin_t) :
743 763 (socklen_t)sizeof (sin6_t);
744 764 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
745 765 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
746 766 sti->sti_laddr_sa->sa_family = so->so_family;
747 767 addr = NULL;
748 768 addrlen = 0;
749 769 break;
750 770
751 771 default:
752 772 /*
753 773 * An unspecified bind in TPI has a NULL address.
754 774 * Set the address in sockfs to be zero length.
755 775 *
756 776 * Can not assume there is a sa_family for all
757 777 * protocol families. For example, AF_X25 does not
758 778 * have a family field.
759 779 */
760 780 bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
761 781 sti->sti_laddr_len = 0; /* XXX correct? */
762 782 addr = NULL;
763 783 addrlen = 0;
764 784 break;
765 785 }
766 786
767 787 } else {
768 788 if (so->so_state & SS_ISBOUND) {
769 789 /*
770 790 * If it is ok to rebind the socket, first unbind
771 791 * with the transport. A rebind to the NULL address
772 792 * is interpreted as an unbind.
773 793 * Note that a bind to NULL in BSD does unbind the
774 794 * socket but it fails with EINVAL.
775 795 * Note that regular sockets set SOV_SOCKBSD i.e.
776 796 * _SOBIND_SOCKBSD gets set here hence no type of
777 797 * socket does currently allow rebinding.
778 798 *
779 799 * If the name is NULL just do an unbind.
780 800 */
781 801 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) &&
782 802 name != NULL) {
783 803 error = EINVAL;
784 804 unbind_on_err = 0;
785 805 eprintsoline(so, error);
786 806 goto done;
787 807 }
788 808 if ((so->so_mode & SM_CONNREQUIRED) &&
789 809 (so->so_state & SS_CANTREBIND)) {
790 810 error = EINVAL;
791 811 unbind_on_err = 0;
792 812 eprintsoline(so, error);
793 813 goto done;
794 814 }
795 815 error = sotpi_unbind(so, 0);
796 816 if (error) {
797 817 eprintsoline(so, error);
798 818 goto done;
799 819 }
800 820 ASSERT(!(so->so_state & SS_ISBOUND));
801 821 if (name == NULL) {
802 822 so->so_state &=
803 823 ~(SS_ISCONNECTED|SS_ISCONNECTING);
804 824 goto done;
805 825 }
806 826 }
807 827
808 828 /* X/Open requires this check */
809 829 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
810 830 if (xnet_check_print) {
811 831 printf("sockfs: X/Open bind state check "
812 832 "caused EINVAL\n");
813 833 }
814 834 error = EINVAL;
815 835 goto done;
816 836 }
817 837
818 838 switch (so->so_family) {
819 839 case AF_UNIX:
820 840 /*
821 841 * All AF_UNIX addresses are nul terminated
822 842 * when copied (copyin_name) in so the minimum
823 843 * length is 3 bytes.
824 844 */
825 845 if (name == NULL ||
826 846 (ssize_t)namelen <= sizeof (short) + 1) {
827 847 error = EISDIR;
828 848 eprintsoline(so, error);
829 849 goto done;
830 850 }
831 851 /*
832 852 * Verify so_family matches the bound family.
833 853 * BSD does not check this for AF_UNIX resulting
834 854 * in funny mknods.
835 855 */
836 856 if (name->sa_family != so->so_family) {
837 857 error = EAFNOSUPPORT;
838 858 goto done;
839 859 }
840 860 break;
841 861 case AF_INET:
842 862 if (name == NULL) {
843 863 error = EINVAL;
844 864 eprintsoline(so, error);
845 865 goto done;
846 866 }
847 867 if ((size_t)namelen != sizeof (sin_t)) {
848 868 error = name->sa_family != so->so_family ?
849 869 EAFNOSUPPORT : EINVAL;
850 870 eprintsoline(so, error);
851 871 goto done;
852 872 }
853 873 if ((flags & _SOBIND_XPG4_2) &&
854 874 (name->sa_family != so->so_family)) {
855 875 /*
856 876 * This check has to be made for X/Open
857 877 * sockets however application failures have
858 878 * been observed when it is applied to
859 879 * all sockets.
860 880 */
861 881 error = EAFNOSUPPORT;
862 882 eprintsoline(so, error);
863 883 goto done;
864 884 }
865 885 /*
866 886 * Force a zero sa_family to match so_family.
867 887 *
868 888 * Some programs like inetd(8) don't set the
869 889 * family field. Other programs leave
870 890 * sin_family set to garbage - SunOS 4.X does
871 891 * not check the family field on a bind.
872 892 * We use the family field that
873 893 * was passed in to the socket() call.
874 894 */
875 895 name->sa_family = so->so_family;
876 896 break;
877 897
878 898 case AF_INET6: {
879 899 #ifdef DEBUG
880 900 sin6_t *sin6 = (sin6_t *)name;
881 901 #endif /* DEBUG */
882 902
883 903 if (name == NULL) {
884 904 error = EINVAL;
885 905 eprintsoline(so, error);
886 906 goto done;
887 907 }
888 908 if ((size_t)namelen != sizeof (sin6_t)) {
889 909 error = name->sa_family != so->so_family ?
890 910 EAFNOSUPPORT : EINVAL;
891 911 eprintsoline(so, error);
892 912 goto done;
893 913 }
894 914 if (name->sa_family != so->so_family) {
895 915 /*
896 916 * With IPv6 we require the family to match
897 917 * unlike in IPv4.
898 918 */
899 919 error = EAFNOSUPPORT;
900 920 eprintsoline(so, error);
901 921 goto done;
902 922 }
903 923 #ifdef DEBUG
904 924 /*
905 925 * Verify that apps don't forget to clear
906 926 * sin6_scope_id etc
907 927 */
908 928 if (sin6->sin6_scope_id != 0 &&
909 929 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
910 930 zcmn_err(getzoneid(), CE_WARN,
911 931 "bind with uninitialized sin6_scope_id "
912 932 "(%d) on socket. Pid = %d\n",
913 933 (int)sin6->sin6_scope_id,
914 934 (int)curproc->p_pid);
915 935 }
916 936 if (sin6->__sin6_src_id != 0) {
917 937 zcmn_err(getzoneid(), CE_WARN,
918 938 "bind with uninitialized __sin6_src_id "
919 939 "(%d) on socket. Pid = %d\n",
920 940 (int)sin6->__sin6_src_id,
921 941 (int)curproc->p_pid);
922 942 }
923 943 #endif /* DEBUG */
924 944 break;
925 945 }
926 946 default:
927 947 /*
928 948 * Don't do any length or sa_family check to allow
929 949 * non-sockaddr style addresses.
930 950 */
931 951 if (name == NULL) {
932 952 error = EINVAL;
933 953 eprintsoline(so, error);
934 954 goto done;
935 955 }
936 956 break;
937 957 }
938 958
939 959 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) {
940 960 error = ENAMETOOLONG;
941 961 eprintsoline(so, error);
942 962 goto done;
943 963 }
944 964 /*
945 965 * Save local address.
946 966 */
947 967 sti->sti_laddr_len = (socklen_t)namelen;
948 968 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
949 969 bcopy(name, sti->sti_laddr_sa, namelen);
950 970
951 971 addr = sti->sti_laddr_sa;
952 972 addrlen = (t_uscalar_t)sti->sti_laddr_len;
953 973 switch (so->so_family) {
954 974 case AF_INET6:
955 975 case AF_INET:
956 976 break;
957 977 case AF_UNIX: {
958 978 struct sockaddr_un *soun =
959 979 (struct sockaddr_un *)sti->sti_laddr_sa;
960 980 struct vnode *vp, *rvp;
961 981 struct vattr vattr;
962 982
963 983 ASSERT(sti->sti_ux_bound_vp == NULL);
964 984 /*
965 985 * Create vnode for the specified path name.
966 986 * Keep vnode held with a reference in sti_ux_bound_vp.
967 987 * Use the vnode pointer as the address used in the
968 988 * bind with the transport.
969 989 *
970 990 * Use the same mode as in BSD. In particular this does
971 991 * not observe the umask.
972 992 */
973 993 /* MAXPATHLEN + soun_family + nul termination */
974 994 if (sti->sti_laddr_len >
975 995 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
976 996 error = ENAMETOOLONG;
977 997 eprintsoline(so, error);
978 998 goto done;
979 999 }
980 1000 vattr.va_type = VSOCK;
981 1001 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask;
982 1002 vattr.va_mask = AT_TYPE|AT_MODE;
983 1003 /* NOTE: holding so_lock */
984 1004 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr,
985 1005 EXCL, 0, &vp, CRMKNOD, 0, 0);
986 1006 if (error) {
987 1007 if (error == EEXIST)
988 1008 error = EADDRINUSE;
989 1009 eprintsoline(so, error);
990 1010 goto done;
991 1011 }
992 1012 /*
993 1013 * Establish pointer from the underlying filesystem
994 1014 * vnode to the socket node.
995 1015 * sti_ux_bound_vp and v_stream->sd_vnode form the
|
↓ open down ↓ |
722 lines elided |
↑ open up ↑ |
996 1016 * cross-linkage between the underlying filesystem
997 1017 * node and the socket node.
998 1018 */
999 1019
1000 1020 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) {
1001 1021 VN_HOLD(rvp);
1002 1022 VN_RELE(vp);
1003 1023 vp = rvp;
1004 1024 }
1005 1025
1006 - ASSERT(SOTOV(so)->v_stream);
1026 + ASSERT(SOTOV(so)->v_stream != NULL);
1007 1027 mutex_enter(&vp->v_lock);
1008 1028 vp->v_stream = SOTOV(so)->v_stream;
1009 1029 sti->sti_ux_bound_vp = vp;
1010 1030 mutex_exit(&vp->v_lock);
1011 1031
1012 1032 /*
1013 1033 * Use the vnode pointer value as a unique address
1014 1034 * (together with the magic number to avoid conflicts
1015 1035 * with implicit binds) in the transport provider.
1016 1036 */
1017 1037 sti->sti_ux_laddr.soua_vp =
1018 1038 (void *)sti->sti_ux_bound_vp;
1019 1039 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT;
1020 1040 addr = &sti->sti_ux_laddr;
1021 1041 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
1022 1042 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n",
1023 1043 addrlen,
1024 1044 (void *)((struct so_ux_addr *)addr)->soua_vp));
1025 1045 break;
1026 1046 }
1027 1047 } /* end switch (so->so_family) */
1028 1048 }
1029 1049
1030 1050 /*
1031 1051 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since
1032 1052 * the transport can start passing up T_CONN_IND messages
1033 1053 * as soon as it receives the bind req and strsock_proto()
1034 1054 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs.
1035 1055 */
1036 1056 if (flags & _SOBIND_LISTEN) {
1037 1057 if ((so->so_state & SS_ACCEPTCONN) == 0)
1038 1058 clear_acceptconn_on_err = B_TRUE;
1039 1059 save_so_backlog = so->so_backlog;
1040 1060 restore_backlog_on_err = B_TRUE;
1041 1061 so->so_state |= SS_ACCEPTCONN;
1042 1062 so->so_backlog = backlog;
1043 1063 }
1044 1064
1045 1065 /*
1046 1066 * If NL7C addr(s) have been configured check for addr/port match,
1047 1067 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C.
1048 1068 *
1049 1069 * NL7C supports the TCP transport only so check AF_INET and AF_INET6
1050 1070 * family sockets only. If match mark as such.
1051 1071 */
1052 1072 if (nl7c_enabled && ((addr != NULL &&
1053 1073 (so->so_family == AF_INET || so->so_family == AF_INET6) &&
1054 1074 (nl7c = nl7c_lookup_addr(addr, addrlen))) ||
1055 1075 sti->sti_nl7c_flags == NL7C_AF_NCA)) {
1056 1076 /*
1057 1077 * NL7C is not supported in non-global zones,
1058 1078 * we enforce this restriction here.
1059 1079 */
1060 1080 if (so->so_zoneid == GLOBAL_ZONEID) {
1061 1081 /* An NL7C socket, mark it */
1062 1082 sti->sti_nl7c_flags |= NL7C_ENABLED;
1063 1083 if (nl7c == NULL) {
1064 1084 /*
1065 1085 * Was an AF_NCA bind() so add it to the
1066 1086 * addr list for reporting purposes.
1067 1087 */
1068 1088 nl7c = nl7c_add_addr(addr, addrlen);
1069 1089 }
1070 1090 } else
1071 1091 nl7c = NULL;
1072 1092 }
1073 1093
1074 1094 /*
1075 1095 * We send a T_BIND_REQ for TCP/UDP since we know it supports it,
1076 1096 * for other transports we will send in a O_T_BIND_REQ.
1077 1097 */
1078 1098 if (tcp_udp_xport &&
1079 1099 (so->so_family == AF_INET || so->so_family == AF_INET6))
1080 1100 PRIM_type = T_BIND_REQ;
1081 1101
1082 1102 bind_req.PRIM_type = PRIM_type;
1083 1103 bind_req.ADDR_length = addrlen;
1084 1104 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req);
1085 1105 bind_req.CONIND_number = backlog;
1086 1106 /* NOTE: holding so_lock while sleeping */
1087 1107 mp = soallocproto2(&bind_req, sizeof (bind_req),
1088 1108 addr, addrlen, 0, _ALLOC_SLEEP, cr);
1089 1109 sti->sti_laddr_valid = 0;
1090 1110
1091 1111 /* Done using sti_laddr_sa - can drop the lock */
1092 1112 mutex_exit(&so->so_lock);
1093 1113
1094 1114 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1095 1115 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1096 1116 if (error) {
1097 1117 eprintsoline(so, error);
1098 1118 mutex_enter(&so->so_lock);
1099 1119 goto done;
1100 1120 }
1101 1121
1102 1122 mutex_enter(&so->so_lock);
1103 1123 error = sowaitprim(so, PRIM_type, T_BIND_ACK,
1104 1124 (t_uscalar_t)sizeof (*bind_ack), &mp, 0);
1105 1125 if (error) {
1106 1126 eprintsoline(so, error);
1107 1127 goto done;
1108 1128 }
1109 1129 ASSERT(mp);
1110 1130 /*
1111 1131 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1112 1132 * strsock_proto while the lock was dropped above, the bind
1113 1133 * is allowed to complete.
1114 1134 */
1115 1135
1116 1136 /* Mark as bound. This will be undone if we detect errors below. */
1117 1137 if (flags & _SOBIND_NOXLATE) {
1118 1138 ASSERT(so->so_family == AF_UNIX);
1119 1139 sti->sti_faddr_noxlate = 1;
1120 1140 }
1121 1141 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND));
1122 1142 so->so_state |= SS_ISBOUND;
1123 1143 ASSERT(sti->sti_unbind_mp);
1124 1144
1125 1145 /* note that we've already set SS_ACCEPTCONN above */
1126 1146
1127 1147 /*
1128 1148 * Recompute addrlen - an unspecied bind sent down an
1129 1149 * address of length zero but we expect the appropriate length
1130 1150 * in return.
1131 1151 */
1132 1152 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ?
1133 1153 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len);
1134 1154
1135 1155 bind_ack = (struct T_bind_ack *)mp->b_rptr;
1136 1156 /*
1137 1157 * The alignment restriction is really too strict but
1138 1158 * we want enough alignment to inspect the fields of
1139 1159 * a sockaddr_in.
1140 1160 */
1141 1161 addr = sogetoff(mp, bind_ack->ADDR_offset,
1142 1162 bind_ack->ADDR_length,
1143 1163 __TPI_ALIGN_SIZE);
1144 1164 if (addr == NULL) {
1145 1165 freemsg(mp);
1146 1166 error = EPROTO;
1147 1167 eprintsoline(so, error);
1148 1168 goto done;
1149 1169 }
1150 1170 if (!(flags & _SOBIND_UNSPEC)) {
1151 1171 /*
1152 1172 * Verify that the transport didn't return something we
1153 1173 * did not want e.g. an address other than what we asked for.
1154 1174 *
1155 1175 * NOTE: These checks would go away if/when we switch to
1156 1176 * using the new TPI (in which the transport would fail
1157 1177 * the request instead of assigning a different address).
1158 1178 *
1159 1179 * NOTE2: For protocols that we don't know (i.e. any
1160 1180 * other than AF_INET6, AF_INET and AF_UNIX), we
1161 1181 * cannot know if the transport should be expected to
1162 1182 * return the same address as that requested.
1163 1183 *
1164 1184 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send
1165 1185 * down a T_BIND_REQ. We use O_T_BIND_REQ for others.
1166 1186 *
1167 1187 * For example, in the case of netatalk it may be
1168 1188 * inappropriate for the transport to return the
1169 1189 * requested address (as it may have allocated a local
1170 1190 * port number in behaviour similar to that of an
1171 1191 * AF_INET bind request with a port number of zero).
1172 1192 *
1173 1193 * Given the definition of O_T_BIND_REQ, where the
1174 1194 * transport may bind to an address other than the
1175 1195 * requested address, it's not possible to determine
1176 1196 * whether a returned address that differs from the
1177 1197 * requested address is a reason to fail (because the
1178 1198 * requested address was not available) or succeed
1179 1199 * (because the transport allocated an appropriate
1180 1200 * address and/or port).
1181 1201 *
1182 1202 * sockfs currently requires that the transport return
1183 1203 * the requested address in the T_BIND_ACK, unless
1184 1204 * there is code here to allow for any discrepancy.
1185 1205 * Such code exists for AF_INET and AF_INET6.
1186 1206 *
1187 1207 * Netatalk chooses to return the requested address
1188 1208 * rather than the (correct) allocated address. This
1189 1209 * means that netatalk violates the TPI specification
1190 1210 * (and would not function correctly if used from a
1191 1211 * TLI application), but it does mean that it works
1192 1212 * with sockfs.
1193 1213 *
1194 1214 * As noted above, using the newer XTI bind primitive
1195 1215 * (T_BIND_REQ) in preference to O_T_BIND_REQ would
1196 1216 * allow sockfs to be more sure about whether or not
1197 1217 * the bind request had succeeded (as transports are
1198 1218 * not permitted to bind to a different address than
1199 1219 * that requested - they must return failure).
1200 1220 * Unfortunately, support for T_BIND_REQ may not be
1201 1221 * present in all transport implementations (netatalk,
1202 1222 * for example, doesn't have it), making the
1203 1223 * transition difficult.
1204 1224 */
1205 1225 if (bind_ack->ADDR_length != addrlen) {
1206 1226 /* Assumes that the requested address was in use */
1207 1227 freemsg(mp);
1208 1228 error = EADDRINUSE;
1209 1229 eprintsoline(so, error);
1210 1230 goto done;
1211 1231 }
1212 1232
1213 1233 switch (so->so_family) {
1214 1234 case AF_INET6:
1215 1235 case AF_INET: {
1216 1236 sin_t *rname, *aname;
1217 1237
1218 1238 rname = (sin_t *)addr;
1219 1239 aname = (sin_t *)sti->sti_laddr_sa;
1220 1240
1221 1241 /*
1222 1242 * Take advantage of the alignment
1223 1243 * of sin_port and sin6_port which fall
1224 1244 * in the same place in their data structures.
1225 1245 * Just use sin_port for either address family.
1226 1246 *
1227 1247 * This may become a problem if (heaven forbid)
1228 1248 * there's a separate ipv6port_reserved... :-P
1229 1249 *
1230 1250 * Binding to port 0 has the semantics of letting
1231 1251 * the transport bind to any port.
1232 1252 *
1233 1253 * If the transport is TCP or UDP since we had sent
1234 1254 * a T_BIND_REQ we would not get a port other than
1235 1255 * what we asked for.
1236 1256 */
1237 1257 if (tcp_udp_xport) {
1238 1258 /*
1239 1259 * Pick up the new port number if we bound to
1240 1260 * port 0.
1241 1261 */
1242 1262 if (aname->sin_port == 0)
1243 1263 aname->sin_port = rname->sin_port;
1244 1264 sti->sti_laddr_valid = 1;
1245 1265 break;
1246 1266 }
1247 1267 if (aname->sin_port != 0 &&
1248 1268 aname->sin_port != rname->sin_port) {
1249 1269 freemsg(mp);
1250 1270 error = EADDRINUSE;
1251 1271 eprintsoline(so, error);
1252 1272 goto done;
1253 1273 }
1254 1274 /*
1255 1275 * Pick up the new port number if we bound to port 0.
1256 1276 */
1257 1277 aname->sin_port = rname->sin_port;
1258 1278
1259 1279 /*
1260 1280 * Unfortunately, addresses aren't _quite_ the same.
1261 1281 */
1262 1282 if (so->so_family == AF_INET) {
1263 1283 if (aname->sin_addr.s_addr !=
1264 1284 rname->sin_addr.s_addr) {
1265 1285 freemsg(mp);
1266 1286 error = EADDRNOTAVAIL;
1267 1287 eprintsoline(so, error);
1268 1288 goto done;
1269 1289 }
1270 1290 } else {
1271 1291 sin6_t *rname6 = (sin6_t *)rname;
1272 1292 sin6_t *aname6 = (sin6_t *)aname;
1273 1293
1274 1294 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr,
1275 1295 &rname6->sin6_addr)) {
1276 1296 freemsg(mp);
1277 1297 error = EADDRNOTAVAIL;
1278 1298 eprintsoline(so, error);
1279 1299 goto done;
1280 1300 }
1281 1301 }
1282 1302 break;
1283 1303 }
1284 1304 case AF_UNIX:
1285 1305 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) {
1286 1306 freemsg(mp);
1287 1307 error = EADDRINUSE;
1288 1308 eprintsoline(so, error);
1289 1309 eprintso(so,
1290 1310 ("addrlen %d, addr 0x%x, vp %p\n",
1291 1311 addrlen, *((int *)addr),
1292 1312 (void *)sti->sti_ux_bound_vp));
1293 1313 goto done;
1294 1314 }
1295 1315 sti->sti_laddr_valid = 1;
1296 1316 break;
1297 1317 default:
1298 1318 /*
1299 1319 * NOTE: This assumes that addresses can be
1300 1320 * byte-compared for equivalence.
1301 1321 */
1302 1322 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) {
1303 1323 freemsg(mp);
1304 1324 error = EADDRINUSE;
1305 1325 eprintsoline(so, error);
1306 1326 goto done;
1307 1327 }
1308 1328 /*
1309 1329 * Don't mark sti_laddr_valid, as we cannot be
1310 1330 * sure that the returned address is the real
1311 1331 * bound address when talking to an unknown
1312 1332 * transport.
1313 1333 */
1314 1334 break;
1315 1335 }
1316 1336 } else {
1317 1337 /*
1318 1338 * Save for returned address for getsockname.
1319 1339 * Needed for unspecific bind unless transport supports
1320 1340 * the TI_GETMYNAME ioctl.
1321 1341 * Do this for AF_INET{,6} even though they do, as
1322 1342 * caching info here is much better performance than
1323 1343 * a TPI/STREAMS trip to the transport for getsockname.
1324 1344 * Any which can't for some reason _must_ _not_ set
1325 1345 * sti_laddr_valid here for the caching version of
1326 1346 * getsockname to not break;
1327 1347 */
1328 1348 switch (so->so_family) {
1329 1349 case AF_UNIX:
1330 1350 /*
1331 1351 * Record the address bound with the transport
1332 1352 * for use by socketpair.
1333 1353 */
1334 1354 bcopy(addr, &sti->sti_ux_laddr, addrlen);
1335 1355 sti->sti_laddr_valid = 1;
1336 1356 break;
1337 1357 case AF_INET:
1338 1358 case AF_INET6:
1339 1359 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
1340 1360 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len);
1341 1361 sti->sti_laddr_valid = 1;
1342 1362 break;
1343 1363 default:
1344 1364 /*
1345 1365 * Don't mark sti_laddr_valid, as we cannot be
1346 1366 * sure that the returned address is the real
1347 1367 * bound address when talking to an unknown
1348 1368 * transport.
1349 1369 */
1350 1370 break;
1351 1371 }
1352 1372 }
1353 1373
1354 1374 if (nl7c != NULL) {
1355 1375 /* Register listen()er sonode pointer with NL7C */
1356 1376 nl7c_listener_addr(nl7c, so);
1357 1377 }
1358 1378
1359 1379 freemsg(mp);
1360 1380
1361 1381 done:
1362 1382 if (error) {
1363 1383 /* reset state & backlog to values held on entry */
1364 1384 if (clear_acceptconn_on_err == B_TRUE)
1365 1385 so->so_state &= ~SS_ACCEPTCONN;
1366 1386 if (restore_backlog_on_err == B_TRUE)
1367 1387 so->so_backlog = save_so_backlog;
1368 1388
1369 1389 if (unbind_on_err && so->so_state & SS_ISBOUND) {
1370 1390 int err;
1371 1391
1372 1392 err = sotpi_unbind(so, 0);
1373 1393 /* LINTED - statement has no consequent: if */
1374 1394 if (err) {
1375 1395 eprintsoline(so, error);
1376 1396 } else {
1377 1397 ASSERT(!(so->so_state & SS_ISBOUND));
1378 1398 }
1379 1399 }
1380 1400 }
1381 1401 if (!(flags & _SOBIND_LOCK_HELD)) {
1382 1402 so_unlock_single(so, SOLOCKED);
1383 1403 mutex_exit(&so->so_lock);
1384 1404 } else {
1385 1405 ASSERT(MUTEX_HELD(&so->so_lock));
1386 1406 ASSERT(so->so_flag & SOLOCKED);
1387 1407 }
1388 1408 return (error);
1389 1409 }
1390 1410
1391 1411 /* bind the socket */
1392 1412 static int
1393 1413 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
1394 1414 int flags, struct cred *cr)
1395 1415 {
1396 1416 if ((flags & _SOBIND_SOCKETPAIR) == 0)
1397 1417 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr));
1398 1418
1399 1419 flags &= ~_SOBIND_SOCKETPAIR;
1400 1420 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr));
1401 1421 }
1402 1422
1403 1423 /*
1404 1424 * Unbind a socket - used when bind() fails, when bind() specifies a NULL
1405 1425 * address, or when listen needs to unbind and bind.
1406 1426 * If the _SOUNBIND_REBIND flag is specified the addresses are retained
1407 1427 * so that a sobind can pick them up.
1408 1428 */
1409 1429 static int
1410 1430 sotpi_unbind(struct sonode *so, int flags)
1411 1431 {
1412 1432 struct T_unbind_req unbind_req;
1413 1433 int error = 0;
1414 1434 mblk_t *mp;
1415 1435 sotpi_info_t *sti = SOTOTPI(so);
1416 1436
1417 1437 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n",
1418 1438 (void *)so, flags, pr_state(so->so_state, so->so_mode)));
1419 1439
1420 1440 ASSERT(MUTEX_HELD(&so->so_lock));
1421 1441 ASSERT(so->so_flag & SOLOCKED);
1422 1442
1423 1443 if (!(so->so_state & SS_ISBOUND)) {
1424 1444 error = EINVAL;
1425 1445 eprintsoline(so, error);
1426 1446 goto done;
1427 1447 }
1428 1448
1429 1449 mutex_exit(&so->so_lock);
1430 1450
1431 1451 /*
1432 1452 * Flush the read and write side (except stream head read queue)
1433 1453 * and send down T_UNBIND_REQ.
1434 1454 */
1435 1455 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1436 1456
1437 1457 unbind_req.PRIM_type = T_UNBIND_REQ;
1438 1458 mp = soallocproto1(&unbind_req, sizeof (unbind_req),
1439 1459 0, _ALLOC_SLEEP, CRED());
1440 1460 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1441 1461 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1442 1462 mutex_enter(&so->so_lock);
1443 1463 if (error) {
1444 1464 eprintsoline(so, error);
1445 1465 goto done;
1446 1466 }
1447 1467
1448 1468 error = sowaitokack(so, T_UNBIND_REQ);
1449 1469 if (error) {
1450 1470 eprintsoline(so, error);
1451 1471 goto done;
1452 1472 }
1453 1473
1454 1474 /*
1455 1475 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1456 1476 * strsock_proto while the lock was dropped above, the unbind
1457 1477 * is allowed to complete.
1458 1478 */
1459 1479 if (!(flags & _SOUNBIND_REBIND)) {
1460 1480 /*
1461 1481 * Clear out bound address.
1462 1482 */
1463 1483 vnode_t *vp;
1464 1484
1465 1485 if ((vp = sti->sti_ux_bound_vp) != NULL) {
1466 1486 sti->sti_ux_bound_vp = NULL;
1467 1487 vn_rele_stream(vp);
1468 1488 }
1469 1489 /* Clear out address */
1470 1490 sti->sti_laddr_len = 0;
1471 1491 }
1472 1492 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN);
1473 1493 sti->sti_laddr_valid = 0;
1474 1494
1475 1495 done:
1476 1496
1477 1497 /* If the caller held the lock don't release it here */
1478 1498 ASSERT(MUTEX_HELD(&so->so_lock));
1479 1499 ASSERT(so->so_flag & SOLOCKED);
1480 1500
1481 1501 return (error);
1482 1502 }
1483 1503
1484 1504 /*
1485 1505 * listen on the socket.
1486 1506 * For TPI conforming transports this has to first unbind with the transport
1487 1507 * and then bind again using the new backlog.
1488 1508 */
1489 1509 /* ARGSUSED */
1490 1510 int
1491 1511 sotpi_listen(struct sonode *so, int backlog, struct cred *cr)
1492 1512 {
1493 1513 int error = 0;
1494 1514 sotpi_info_t *sti = SOTOTPI(so);
1495 1515
1496 1516 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n",
1497 1517 (void *)so, backlog, pr_state(so->so_state, so->so_mode)));
1498 1518
1499 1519 if (sti->sti_serv_type == T_CLTS)
1500 1520 return (EOPNOTSUPP);
1501 1521
1502 1522 /*
1503 1523 * If the socket is ready to accept connections already, then
1504 1524 * return without doing anything. This avoids a problem where
1505 1525 * a second listen() call fails if a connection is pending and
1506 1526 * leaves the socket unbound. Only when we are not unbinding
1507 1527 * with the transport can we safely increase the backlog.
1508 1528 */
1509 1529 if (so->so_state & SS_ACCEPTCONN &&
1510 1530 !((so->so_family == AF_INET || so->so_family == AF_INET6) &&
1511 1531 /*CONSTCOND*/
1512 1532 !solisten_tpi_tcp))
1513 1533 return (0);
1514 1534
1515 1535 if (so->so_state & SS_ISCONNECTED)
1516 1536 return (EINVAL);
1517 1537
1518 1538 mutex_enter(&so->so_lock);
1519 1539 so_lock_single(so); /* Set SOLOCKED */
1520 1540
1521 1541 /*
1522 1542 * If the listen doesn't change the backlog we do nothing.
1523 1543 * This avoids an EPROTO error from the transport.
1524 1544 */
1525 1545 if ((so->so_state & SS_ACCEPTCONN) &&
1526 1546 so->so_backlog == backlog)
1527 1547 goto done;
1528 1548
1529 1549 if (!(so->so_state & SS_ISBOUND)) {
1530 1550 /*
1531 1551 * Must have been explicitly bound in the UNIX domain.
1532 1552 */
1533 1553 if (so->so_family == AF_UNIX) {
1534 1554 error = EINVAL;
1535 1555 goto done;
1536 1556 }
1537 1557 error = sotpi_bindlisten(so, NULL, 0, backlog,
1538 1558 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr);
1539 1559 } else if (backlog > 0) {
1540 1560 /*
1541 1561 * AF_INET{,6} hack to avoid losing the port.
1542 1562 * Assumes that all AF_INET{,6} transports can handle a
1543 1563 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI
1544 1564 * has already bound thus it is possible to avoid the unbind.
1545 1565 */
1546 1566 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) &&
1547 1567 /*CONSTCOND*/
1548 1568 !solisten_tpi_tcp)) {
1549 1569 error = sotpi_unbind(so, _SOUNBIND_REBIND);
1550 1570 if (error)
1551 1571 goto done;
1552 1572 }
1553 1573 error = sotpi_bindlisten(so, NULL, 0, backlog,
1554 1574 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr);
1555 1575 } else {
1556 1576 so->so_state |= SS_ACCEPTCONN;
1557 1577 so->so_backlog = backlog;
1558 1578 }
1559 1579 if (error)
1560 1580 goto done;
1561 1581 ASSERT(so->so_state & SS_ACCEPTCONN);
1562 1582 done:
1563 1583 so_unlock_single(so, SOLOCKED);
1564 1584 mutex_exit(&so->so_lock);
1565 1585 return (error);
1566 1586 }
1567 1587
1568 1588 /*
1569 1589 * Disconnect either a specified seqno or all (-1).
1570 1590 * The former is used on listening sockets only.
1571 1591 *
1572 1592 * When seqno == -1 sodisconnect could call sotpi_unbind. However,
1573 1593 * the current use of sodisconnect(seqno == -1) is only for shutdown
1574 1594 * so there is no point (and potentially incorrect) to unbind.
1575 1595 */
1576 1596 static int
1577 1597 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags)
1578 1598 {
1579 1599 struct T_discon_req discon_req;
1580 1600 int error = 0;
1581 1601 mblk_t *mp;
1582 1602
1583 1603 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n",
1584 1604 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode)));
1585 1605
1586 1606 if (!(flags & _SODISCONNECT_LOCK_HELD)) {
1587 1607 mutex_enter(&so->so_lock);
1588 1608 so_lock_single(so); /* Set SOLOCKED */
1589 1609 } else {
1590 1610 ASSERT(MUTEX_HELD(&so->so_lock));
1591 1611 ASSERT(so->so_flag & SOLOCKED);
1592 1612 }
1593 1613
1594 1614 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) {
1595 1615 error = EINVAL;
1596 1616 eprintsoline(so, error);
1597 1617 goto done;
1598 1618 }
1599 1619
1600 1620 mutex_exit(&so->so_lock);
1601 1621 /*
1602 1622 * Flush the write side (unless this is a listener)
1603 1623 * and then send down a T_DISCON_REQ.
1604 1624 * (Don't flush on listener since it could flush {O_}T_CONN_RES
1605 1625 * and other messages.)
1606 1626 */
1607 1627 if (!(so->so_state & SS_ACCEPTCONN))
1608 1628 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW);
1609 1629
1610 1630 discon_req.PRIM_type = T_DISCON_REQ;
1611 1631 discon_req.SEQ_number = seqno;
1612 1632 mp = soallocproto1(&discon_req, sizeof (discon_req),
1613 1633 0, _ALLOC_SLEEP, CRED());
1614 1634 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1615 1635 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1616 1636 mutex_enter(&so->so_lock);
1617 1637 if (error) {
1618 1638 eprintsoline(so, error);
1619 1639 goto done;
1620 1640 }
1621 1641
1622 1642 error = sowaitokack(so, T_DISCON_REQ);
1623 1643 if (error) {
1624 1644 eprintsoline(so, error);
1625 1645 goto done;
1626 1646 }
1627 1647 /*
1628 1648 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1629 1649 * strsock_proto while the lock was dropped above, the disconnect
1630 1650 * is allowed to complete. However, it is not possible to
1631 1651 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set.
1632 1652 */
1633 1653 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING);
1634 1654 SOTOTPI(so)->sti_laddr_valid = 0;
1635 1655 SOTOTPI(so)->sti_faddr_valid = 0;
1636 1656 done:
1637 1657 if (!(flags & _SODISCONNECT_LOCK_HELD)) {
1638 1658 so_unlock_single(so, SOLOCKED);
1639 1659 mutex_exit(&so->so_lock);
1640 1660 } else {
1641 1661 /* If the caller held the lock don't release it here */
1642 1662 ASSERT(MUTEX_HELD(&so->so_lock));
1643 1663 ASSERT(so->so_flag & SOLOCKED);
1644 1664 }
1645 1665 return (error);
1646 1666 }
1647 1667
1648 1668 /* ARGSUSED */
1649 1669 int
1650 1670 sotpi_accept(struct sonode *so, int fflag, struct cred *cr,
1651 1671 struct sonode **nsop)
1652 1672 {
1653 1673 struct T_conn_ind *conn_ind;
1654 1674 struct T_conn_res *conn_res;
1655 1675 int error = 0;
1656 1676 mblk_t *mp, *ack_mp;
1657 1677 struct sonode *nso;
1658 1678 vnode_t *nvp;
1659 1679 void *src;
1660 1680 t_uscalar_t srclen;
1661 1681 void *opt;
1662 1682 t_uscalar_t optlen;
1663 1683 t_scalar_t PRIM_type;
1664 1684 t_scalar_t SEQ_number;
1665 1685 size_t sinlen;
1666 1686 sotpi_info_t *sti = SOTOTPI(so);
1667 1687 sotpi_info_t *nsti;
1668 1688
1669 1689 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n",
1670 1690 (void *)so, fflag, (void *)nsop,
1671 1691 pr_state(so->so_state, so->so_mode)));
1672 1692
1673 1693 /*
1674 1694 * Defer single-threading the accepting socket until
1675 1695 * the T_CONN_IND has been received and parsed and the
1676 1696 * new sonode has been opened.
1677 1697 */
1678 1698
1679 1699 /* Check that we are not already connected */
1680 1700 if ((so->so_state & SS_ACCEPTCONN) == 0)
1681 1701 goto conn_bad;
1682 1702 again:
1683 1703 if ((error = sowaitconnind(so, fflag, &mp)) != 0)
1684 1704 goto e_bad;
1685 1705
1686 1706 ASSERT(mp != NULL);
1687 1707 conn_ind = (struct T_conn_ind *)mp->b_rptr;
1688 1708
1689 1709 /*
1690 1710 * Save SEQ_number for error paths.
1691 1711 */
1692 1712 SEQ_number = conn_ind->SEQ_number;
1693 1713
1694 1714 srclen = conn_ind->SRC_length;
1695 1715 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1);
1696 1716 if (src == NULL) {
1697 1717 error = EPROTO;
1698 1718 freemsg(mp);
1699 1719 eprintsoline(so, error);
1700 1720 goto disconnect_unlocked;
1701 1721 }
1702 1722 optlen = conn_ind->OPT_length;
1703 1723 switch (so->so_family) {
1704 1724 case AF_INET:
1705 1725 case AF_INET6:
1706 1726 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) {
1707 1727 bcopy(mp->b_rptr + conn_ind->OPT_offset,
1708 1728 &opt, conn_ind->OPT_length);
1709 1729 } else {
1710 1730 /*
1711 1731 * The transport (in this case TCP) hasn't sent up
1712 1732 * a pointer to an instance for the accept fast-path.
1713 1733 * Disable fast-path completely because the call to
1714 1734 * sotpi_create() below would otherwise create an
1715 1735 * incomplete TCP instance, which would lead to
1716 1736 * problems when sockfs sends a normal T_CONN_RES
1717 1737 * message down the new stream.
1718 1738 */
1719 1739 if (sti->sti_direct) {
1720 1740 int rval;
1721 1741 /*
1722 1742 * For consistency we inform tcp to disable
1723 1743 * direct interface on the listener, though
1724 1744 * we can certainly live without doing this
1725 1745 * because no data will ever travel upstream
1726 1746 * on the listening socket.
1727 1747 */
1728 1748 sti->sti_direct = 0;
1729 1749 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK,
1730 1750 0, 0, K_TO_K, cr, &rval);
1731 1751 }
1732 1752 opt = NULL;
1733 1753 optlen = 0;
1734 1754 }
1735 1755 break;
1736 1756 case AF_UNIX:
1737 1757 default:
1738 1758 if (optlen != 0) {
1739 1759 opt = sogetoff(mp, conn_ind->OPT_offset, optlen,
1740 1760 __TPI_ALIGN_SIZE);
1741 1761 if (opt == NULL) {
1742 1762 error = EPROTO;
1743 1763 freemsg(mp);
1744 1764 eprintsoline(so, error);
1745 1765 goto disconnect_unlocked;
1746 1766 }
1747 1767 }
1748 1768 if (so->so_family == AF_UNIX) {
1749 1769 if (!sti->sti_faddr_noxlate) {
1750 1770 src = NULL;
1751 1771 srclen = 0;
1752 1772 }
1753 1773 /* Extract src address from options */
1754 1774 if (optlen != 0)
1755 1775 so_getopt_srcaddr(opt, optlen, &src, &srclen);
1756 1776 }
1757 1777 break;
1758 1778 }
1759 1779
1760 1780 /*
1761 1781 * Create the new socket.
1762 1782 */
1763 1783 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error);
1764 1784 if (nso == NULL) {
1765 1785 ASSERT(error != 0);
1766 1786 /*
1767 1787 * Accept can not fail with ENOBUFS. sotpi_create
1768 1788 * sleeps waiting for memory until a signal is caught
1769 1789 * so return EINTR.
1770 1790 */
1771 1791 freemsg(mp);
1772 1792 if (error == ENOBUFS)
1773 1793 error = EINTR;
1774 1794 goto e_disc_unl;
1775 1795 }
1776 1796 nvp = SOTOV(nso);
1777 1797 nsti = SOTOTPI(nso);
1778 1798
1779 1799 #ifdef DEBUG
1780 1800 /*
1781 1801 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus
1782 1802 * it's inherited early to allow debugging of the accept code itself.
1783 1803 */
1784 1804 nso->so_options |= so->so_options & SO_DEBUG;
1785 1805 #endif /* DEBUG */
1786 1806
1787 1807 /*
1788 1808 * Save the SRC address from the T_CONN_IND
1789 1809 * for getpeername to work on AF_UNIX and on transports that do not
1790 1810 * support TI_GETPEERNAME.
1791 1811 *
1792 1812 * NOTE: AF_UNIX NUL termination is ensured by the sender's
1793 1813 * copyin_name().
1794 1814 */
1795 1815 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) {
1796 1816 error = EINVAL;
1797 1817 freemsg(mp);
1798 1818 eprintsoline(so, error);
1799 1819 goto disconnect_vp_unlocked;
1800 1820 }
1801 1821 nsti->sti_faddr_len = (socklen_t)srclen;
1802 1822 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
1803 1823 bcopy(src, nsti->sti_faddr_sa, srclen);
1804 1824 nsti->sti_faddr_valid = 1;
1805 1825
1806 1826 /*
1807 1827 * Record so_peercred and so_cpid from a cred in the T_CONN_IND.
1808 1828 */
1809 1829 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) <
1810 1830 (sizeof (struct T_conn_res) + sizeof (intptr_t))) {
1811 1831 cred_t *cr;
1812 1832 pid_t cpid;
1813 1833
1814 1834 cr = msg_getcred(mp, &cpid);
1815 1835 if (cr != NULL) {
1816 1836 crhold(cr);
1817 1837 nso->so_peercred = cr;
1818 1838 nso->so_cpid = cpid;
1819 1839 }
1820 1840 freemsg(mp);
1821 1841
1822 1842 mp = soallocproto1(NULL, sizeof (struct T_conn_res) +
1823 1843 sizeof (intptr_t), 0, _ALLOC_INTR, cr);
1824 1844 if (mp == NULL) {
1825 1845 /*
1826 1846 * Accept can not fail with ENOBUFS.
1827 1847 * A signal was caught so return EINTR.
1828 1848 */
1829 1849 error = EINTR;
1830 1850 eprintsoline(so, error);
1831 1851 goto disconnect_vp_unlocked;
1832 1852 }
1833 1853 conn_res = (struct T_conn_res *)mp->b_rptr;
1834 1854 } else {
1835 1855 /*
1836 1856 * For efficency reasons we use msg_extractcred; no crhold
1837 1857 * needed since db_credp is cleared (i.e., we move the cred
1838 1858 * from the message to so_peercred.
1839 1859 */
1840 1860 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid);
1841 1861
1842 1862 mp->b_rptr = DB_BASE(mp);
1843 1863 conn_res = (struct T_conn_res *)mp->b_rptr;
1844 1864 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res);
1845 1865
1846 1866 mblk_setcred(mp, cr, curproc->p_pid);
1847 1867 }
1848 1868
1849 1869 /*
1850 1870 * New socket must be bound at least in sockfs and, except for AF_INET,
1851 1871 * (or AF_INET6) it also has to be bound in the transport provider.
1852 1872 * We set the local address in the sonode from the T_OK_ACK of the
1853 1873 * T_CONN_RES. For this reason the address we bind to here isn't
1854 1874 * important.
1855 1875 */
1856 1876 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) &&
1857 1877 /*CONSTCOND*/
1858 1878 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) {
1859 1879 /*
1860 1880 * Optimization for AF_INET{,6} transports
1861 1881 * that can handle a T_CONN_RES without being bound.
1862 1882 */
1863 1883 mutex_enter(&nso->so_lock);
1864 1884 so_automatic_bind(nso);
1865 1885 mutex_exit(&nso->so_lock);
1866 1886 } else {
1867 1887 /* Perform NULL bind with the transport provider. */
1868 1888 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC,
1869 1889 cr)) != 0) {
1870 1890 ASSERT(error != ENOBUFS);
1871 1891 freemsg(mp);
1872 1892 eprintsoline(nso, error);
1873 1893 goto disconnect_vp_unlocked;
1874 1894 }
1875 1895 }
1876 1896
1877 1897 /*
1878 1898 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES
1879 1899 * so that any data arriving on the new socket will cause the
1880 1900 * appropriate signals to be delivered for the new socket.
1881 1901 *
1882 1902 * No other thread (except strsock_proto and strsock_misc)
1883 1903 * can access the new socket thus we relax the locking.
1884 1904 */
1885 1905 nso->so_pgrp = so->so_pgrp;
1886 1906 nso->so_state |= so->so_state & SS_ASYNC;
1887 1907 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate;
1888 1908
1889 1909 if (nso->so_pgrp != 0) {
1890 1910 if ((error = so_set_events(nso, nvp, cr)) != 0) {
1891 1911 eprintsoline(nso, error);
1892 1912 error = 0;
1893 1913 nso->so_pgrp = 0;
1894 1914 }
1895 1915 }
1896 1916
1897 1917 /*
1898 1918 * Make note of the socket level options. TCP and IP level options
1899 1919 * are already inherited. We could do all this after accept is
1900 1920 * successful but doing it here simplifies code and no harm done
1901 1921 * for error case.
1902 1922 */
1903 1923 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE|
1904 1924 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
1905 1925 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
1906 1926 nso->so_sndbuf = so->so_sndbuf;
1907 1927 nso->so_rcvbuf = so->so_rcvbuf;
1908 1928 if (nso->so_options & SO_LINGER)
1909 1929 nso->so_linger = so->so_linger;
1910 1930
1911 1931 /*
1912 1932 * Note that the following sti_direct code path should be
1913 1933 * removed once we are confident that the direct sockets
1914 1934 * do not result in any degradation.
1915 1935 */
1916 1936 if (sti->sti_direct) {
1917 1937
1918 1938 ASSERT(opt != NULL);
1919 1939
1920 1940 conn_res->OPT_length = optlen;
1921 1941 conn_res->OPT_offset = MBLKL(mp);
1922 1942 bcopy(&opt, mp->b_wptr, optlen);
1923 1943 mp->b_wptr += optlen;
1924 1944 conn_res->PRIM_type = T_CONN_RES;
1925 1945 conn_res->ACCEPTOR_id = 0;
1926 1946 PRIM_type = T_CONN_RES;
1927 1947
1928 1948 /* Send down the T_CONN_RES on acceptor STREAM */
1929 1949 error = kstrputmsg(SOTOV(nso), mp, NULL,
1930 1950 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
1931 1951 if (error) {
1932 1952 mutex_enter(&so->so_lock);
1933 1953 so_lock_single(so);
1934 1954 eprintsoline(so, error);
1935 1955 goto disconnect_vp;
1936 1956 }
1937 1957 mutex_enter(&nso->so_lock);
1938 1958 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK,
1939 1959 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0);
1940 1960 if (error) {
1941 1961 mutex_exit(&nso->so_lock);
1942 1962 mutex_enter(&so->so_lock);
1943 1963 so_lock_single(so);
1944 1964 eprintsoline(so, error);
1945 1965 goto disconnect_vp;
1946 1966 }
1947 1967 if (nso->so_family == AF_INET) {
1948 1968 sin_t *sin;
1949 1969
1950 1970 sin = (sin_t *)(ack_mp->b_rptr +
1951 1971 sizeof (struct T_ok_ack));
1952 1972 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t));
1953 1973 nsti->sti_laddr_len = sizeof (sin_t);
1954 1974 } else {
1955 1975 sin6_t *sin6;
1956 1976
1957 1977 sin6 = (sin6_t *)(ack_mp->b_rptr +
1958 1978 sizeof (struct T_ok_ack));
1959 1979 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t));
1960 1980 nsti->sti_laddr_len = sizeof (sin6_t);
1961 1981 }
1962 1982 freemsg(ack_mp);
1963 1983
1964 1984 nso->so_state |= SS_ISCONNECTED;
1965 1985 nso->so_proto_handle = (sock_lower_handle_t)opt;
1966 1986 nsti->sti_laddr_valid = 1;
1967 1987
1968 1988 if (sti->sti_nl7c_flags & NL7C_ENABLED) {
1969 1989 /*
1970 1990 * A NL7C marked listen()er so the new socket
1971 1991 * inherits the listen()er's NL7C state, except
1972 1992 * for NL7C_POLLIN.
1973 1993 *
1974 1994 * Only call NL7C to process the new socket if
1975 1995 * the listen socket allows blocking i/o.
1976 1996 */
1977 1997 nsti->sti_nl7c_flags =
1978 1998 sti->sti_nl7c_flags & (~NL7C_POLLIN);
1979 1999 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) {
1980 2000 /*
1981 2001 * Nonblocking accept() just make it
1982 2002 * persist to defer processing to the
1983 2003 * read-side syscall (e.g. read).
1984 2004 */
1985 2005 nsti->sti_nl7c_flags |= NL7C_SOPERSIST;
1986 2006 } else if (nl7c_process(nso, B_FALSE)) {
1987 2007 /*
1988 2008 * NL7C has completed processing on the
1989 2009 * socket, close the socket and back to
1990 2010 * the top to await the next T_CONN_IND.
1991 2011 */
1992 2012 mutex_exit(&nso->so_lock);
1993 2013 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0,
1994 2014 cr, NULL);
1995 2015 VN_RELE(nvp);
1996 2016 goto again;
1997 2017 }
1998 2018 /* Pass the new socket out */
1999 2019 }
2000 2020
2001 2021 mutex_exit(&nso->so_lock);
2002 2022
2003 2023 /*
2004 2024 * It's possible, through the use of autopush for example,
2005 2025 * that the acceptor stream may not support sti_direct
2006 2026 * semantics. If the new socket does not support sti_direct
2007 2027 * we issue a _SIOCSOCKFALLBACK to inform the transport
2008 2028 * as we would in the I_PUSH case.
2009 2029 */
2010 2030 if (nsti->sti_direct == 0) {
2011 2031 int rval;
2012 2032
2013 2033 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK,
2014 2034 0, 0, K_TO_K, cr, &rval)) != 0) {
2015 2035 mutex_enter(&so->so_lock);
2016 2036 so_lock_single(so);
2017 2037 eprintsoline(so, error);
2018 2038 goto disconnect_vp;
2019 2039 }
2020 2040 }
2021 2041
2022 2042 /*
2023 2043 * Pass out new socket.
2024 2044 */
2025 2045 if (nsop != NULL)
2026 2046 *nsop = nso;
2027 2047
2028 2048 return (0);
2029 2049 }
2030 2050
2031 2051 /*
2032 2052 * This is the non-performance case for sockets (e.g. AF_UNIX sockets)
2033 2053 * which don't support the FireEngine accept fast-path. It is also
2034 2054 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd
2035 2055 * again. Neither sockfs nor TCP attempt to find out if some other
2036 2056 * random module has been inserted in between (in which case we
2037 2057 * should follow TLI accept behaviour). We blindly assume the worst
2038 2058 * case and revert back to old behaviour i.e. TCP will not send us
2039 2059 * any option (eager) and the accept should happen on the listener
2040 2060 * queue. Any queued T_conn_ind have already got their options removed
2041 2061 * by so_sock2_stream() when "sockmod" was I_POP'd.
2042 2062 */
2043 2063 /*
2044 2064 * Fill in the {O_}T_CONN_RES before getting SOLOCKED.
2045 2065 */
2046 2066 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) {
2047 2067 #ifdef _ILP32
2048 2068 queue_t *q;
2049 2069
2050 2070 /*
2051 2071 * Find read queue in driver
2052 2072 * Can safely do this since we "own" nso/nvp.
2053 2073 */
2054 2074 q = strvp2wq(nvp)->q_next;
2055 2075 while (SAMESTR(q))
2056 2076 q = q->q_next;
2057 2077 q = RD(q);
2058 2078 conn_res->ACCEPTOR_id = (t_uscalar_t)q;
2059 2079 #else
2060 2080 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev);
2061 2081 #endif /* _ILP32 */
2062 2082 conn_res->PRIM_type = O_T_CONN_RES;
2063 2083 PRIM_type = O_T_CONN_RES;
2064 2084 } else {
2065 2085 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id;
2066 2086 conn_res->PRIM_type = T_CONN_RES;
2067 2087 PRIM_type = T_CONN_RES;
2068 2088 }
2069 2089 conn_res->SEQ_number = SEQ_number;
2070 2090 conn_res->OPT_length = 0;
2071 2091 conn_res->OPT_offset = 0;
2072 2092
2073 2093 mutex_enter(&so->so_lock);
2074 2094 so_lock_single(so); /* Set SOLOCKED */
2075 2095 mutex_exit(&so->so_lock);
2076 2096
2077 2097 error = kstrputmsg(SOTOV(so), mp, NULL,
2078 2098 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
2079 2099 mutex_enter(&so->so_lock);
2080 2100 if (error) {
2081 2101 eprintsoline(so, error);
2082 2102 goto disconnect_vp;
2083 2103 }
2084 2104 error = sowaitprim(so, PRIM_type, T_OK_ACK,
2085 2105 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0);
2086 2106 if (error) {
2087 2107 eprintsoline(so, error);
2088 2108 goto disconnect_vp;
2089 2109 }
2090 2110 mutex_exit(&so->so_lock);
2091 2111 /*
2092 2112 * If there is a sin/sin6 appended onto the T_OK_ACK use
2093 2113 * that to set the local address. If this is not present
2094 2114 * then we zero out the address and don't set the
2095 2115 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over
2096 2116 * the pathname from the listening socket.
2097 2117 * In the case where this is TCP or an AF_UNIX socket the
2098 2118 * client side may have queued data or a T_ORDREL in the
2099 2119 * transport. Having now sent the T_CONN_RES we may receive
2100 2120 * those queued messages at any time. Hold the acceptor
2101 2121 * so_lock until its state and laddr are finalized.
2102 2122 */
2103 2123 mutex_enter(&nso->so_lock);
2104 2124 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t);
2105 2125 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) &&
2106 2126 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) {
2107 2127 ack_mp->b_rptr += sizeof (struct T_ok_ack);
2108 2128 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen);
2109 2129 nsti->sti_laddr_len = sinlen;
2110 2130 nsti->sti_laddr_valid = 1;
2111 2131 } else if (nso->so_family == AF_UNIX) {
2112 2132 ASSERT(so->so_family == AF_UNIX);
2113 2133 nsti->sti_laddr_len = sti->sti_laddr_len;
2114 2134 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen);
2115 2135 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa,
2116 2136 nsti->sti_laddr_len);
2117 2137 nsti->sti_laddr_valid = 1;
2118 2138 } else {
2119 2139 nsti->sti_laddr_len = sti->sti_laddr_len;
2120 2140 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen);
2121 2141 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size);
2122 2142 nsti->sti_laddr_sa->sa_family = nso->so_family;
2123 2143 }
2124 2144 nso->so_state |= SS_ISCONNECTED;
2125 2145 mutex_exit(&nso->so_lock);
2126 2146
2127 2147 freemsg(ack_mp);
2128 2148
2129 2149 mutex_enter(&so->so_lock);
2130 2150 so_unlock_single(so, SOLOCKED);
2131 2151 mutex_exit(&so->so_lock);
2132 2152
2133 2153 /*
2134 2154 * Pass out new socket.
2135 2155 */
2136 2156 if (nsop != NULL)
2137 2157 *nsop = nso;
2138 2158
2139 2159 return (0);
2140 2160
2141 2161
2142 2162 eproto_disc_unl:
2143 2163 error = EPROTO;
2144 2164 e_disc_unl:
2145 2165 eprintsoline(so, error);
2146 2166 goto disconnect_unlocked;
2147 2167
2148 2168 pr_disc_vp_unl:
2149 2169 eprintsoline(so, error);
2150 2170 disconnect_vp_unlocked:
2151 2171 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL);
2152 2172 VN_RELE(nvp);
2153 2173 disconnect_unlocked:
2154 2174 (void) sodisconnect(so, SEQ_number, 0);
2155 2175 return (error);
2156 2176
2157 2177 pr_disc_vp:
2158 2178 eprintsoline(so, error);
2159 2179 disconnect_vp:
2160 2180 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD);
2161 2181 so_unlock_single(so, SOLOCKED);
2162 2182 mutex_exit(&so->so_lock);
2163 2183 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL);
2164 2184 VN_RELE(nvp);
2165 2185 return (error);
2166 2186
2167 2187 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */
2168 2188 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW)
2169 2189 ? EOPNOTSUPP : EINVAL;
2170 2190 e_bad:
2171 2191 eprintsoline(so, error);
2172 2192 return (error);
2173 2193 }
2174 2194
2175 2195 /*
2176 2196 * connect a socket.
2177 2197 *
2178 2198 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to
2179 2199 * unconnect (by specifying a null address).
2180 2200 */
2181 2201 int
2182 2202 sotpi_connect(struct sonode *so,
2183 2203 struct sockaddr *name,
2184 2204 socklen_t namelen,
2185 2205 int fflag,
2186 2206 int flags,
2187 2207 struct cred *cr)
2188 2208 {
2189 2209 struct T_conn_req conn_req;
2190 2210 int error = 0;
2191 2211 mblk_t *mp;
2192 2212 void *src;
2193 2213 socklen_t srclen;
2194 2214 void *addr;
2195 2215 socklen_t addrlen;
2196 2216 boolean_t need_unlock;
2197 2217 sotpi_info_t *sti = SOTOTPI(so);
2198 2218
2199 2219 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n",
2200 2220 (void *)so, (void *)name, namelen, fflag, flags,
2201 2221 pr_state(so->so_state, so->so_mode)));
2202 2222
2203 2223 /*
2204 2224 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to
2205 2225 * avoid sleeping for memory with SOLOCKED held.
2206 2226 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen
2207 2227 * + sizeof (struct T_opthdr).
2208 2228 * (the AF_UNIX so_ux_addr_xlate() does not make the address
2209 2229 * exceed sti_faddr_maxlen).
2210 2230 */
2211 2231 mp = soallocproto(sizeof (struct T_conn_req) +
2212 2232 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR,
2213 2233 cr);
2214 2234 if (mp == NULL) {
2215 2235 /*
2216 2236 * Connect can not fail with ENOBUFS. A signal was
2217 2237 * caught so return EINTR.
2218 2238 */
2219 2239 error = EINTR;
2220 2240 eprintsoline(so, error);
2221 2241 return (error);
2222 2242 }
2223 2243
2224 2244 mutex_enter(&so->so_lock);
2225 2245 /*
2226 2246 * Make sure there is a preallocated T_unbind_req message
2227 2247 * before any binding. This message is allocated when the
2228 2248 * socket is created. Since another thread can consume
2229 2249 * so_unbind_mp by the time we return from so_lock_single(),
2230 2250 * we should check the availability of so_unbind_mp after
2231 2251 * we return from so_lock_single().
2232 2252 */
2233 2253
2234 2254 so_lock_single(so); /* Set SOLOCKED */
2235 2255 need_unlock = B_TRUE;
2236 2256
2237 2257 if (sti->sti_unbind_mp == NULL) {
2238 2258 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n"));
2239 2259 /* NOTE: holding so_lock while sleeping */
2240 2260 sti->sti_unbind_mp =
2241 2261 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr);
2242 2262 if (sti->sti_unbind_mp == NULL) {
2243 2263 error = EINTR;
2244 2264 goto done;
2245 2265 }
2246 2266 }
2247 2267
2248 2268 /*
2249 2269 * Can't have done a listen before connecting.
2250 2270 */
2251 2271 if (so->so_state & SS_ACCEPTCONN) {
2252 2272 error = EOPNOTSUPP;
2253 2273 goto done;
2254 2274 }
2255 2275
2256 2276 /*
2257 2277 * Must be bound with the transport
2258 2278 */
2259 2279 if (!(so->so_state & SS_ISBOUND)) {
2260 2280 if ((so->so_family == AF_INET || so->so_family == AF_INET6) &&
2261 2281 /*CONSTCOND*/
2262 2282 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) {
2263 2283 /*
2264 2284 * Optimization for AF_INET{,6} transports
2265 2285 * that can handle a T_CONN_REQ without being bound.
2266 2286 */
2267 2287 so_automatic_bind(so);
2268 2288 } else {
2269 2289 error = sotpi_bind(so, NULL, 0,
2270 2290 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr);
2271 2291 if (error)
2272 2292 goto done;
2273 2293 }
2274 2294 ASSERT(so->so_state & SS_ISBOUND);
2275 2295 flags |= _SOCONNECT_DID_BIND;
2276 2296 }
2277 2297
2278 2298 /*
2279 2299 * Handle a connect to a name parameter of type AF_UNSPEC like a
2280 2300 * connect to a null address. This is the portable method to
2281 2301 * unconnect a socket.
2282 2302 */
2283 2303 if ((namelen >= sizeof (sa_family_t)) &&
2284 2304 (name->sa_family == AF_UNSPEC)) {
2285 2305 name = NULL;
2286 2306 namelen = 0;
2287 2307 }
2288 2308
2289 2309 /*
2290 2310 * Check that we are not already connected.
2291 2311 * A connection-oriented socket cannot be reconnected.
2292 2312 * A connected connection-less socket can be
2293 2313 * - connected to a different address by a subsequent connect
2294 2314 * - "unconnected" by a connect to the NULL address
2295 2315 */
2296 2316 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) {
2297 2317 ASSERT(!(flags & _SOCONNECT_DID_BIND));
2298 2318 if (so->so_mode & SM_CONNREQUIRED) {
2299 2319 /* Connection-oriented socket */
2300 2320 error = so->so_state & SS_ISCONNECTED ?
2301 2321 EISCONN : EALREADY;
2302 2322 goto done;
2303 2323 }
2304 2324 /* Connection-less socket */
2305 2325 if (name == NULL) {
2306 2326 /*
2307 2327 * Remove the connected state and clear SO_DGRAM_ERRIND
2308 2328 * since it was set when the socket was connected.
2309 2329 * If this is UDP also send down a T_DISCON_REQ.
2310 2330 */
2311 2331 int val;
2312 2332
2313 2333 if ((so->so_family == AF_INET ||
2314 2334 so->so_family == AF_INET6) &&
2315 2335 (so->so_type == SOCK_DGRAM ||
2316 2336 so->so_type == SOCK_RAW) &&
2317 2337 /*CONSTCOND*/
2318 2338 !soconnect_tpi_udp) {
2319 2339 /* XXX What about implicitly unbinding here? */
2320 2340 error = sodisconnect(so, -1,
2321 2341 _SODISCONNECT_LOCK_HELD);
2322 2342 } else {
2323 2343 so->so_state &=
2324 2344 ~(SS_ISCONNECTED | SS_ISCONNECTING);
2325 2345 sti->sti_faddr_valid = 0;
2326 2346 sti->sti_faddr_len = 0;
2327 2347 }
2328 2348
2329 2349 /* Remove SOLOCKED since setsockopt will grab it */
2330 2350 so_unlock_single(so, SOLOCKED);
2331 2351 mutex_exit(&so->so_lock);
2332 2352
2333 2353 val = 0;
2334 2354 (void) sotpi_setsockopt(so, SOL_SOCKET,
2335 2355 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val),
2336 2356 cr);
2337 2357
2338 2358 mutex_enter(&so->so_lock);
2339 2359 so_lock_single(so); /* Set SOLOCKED */
2340 2360 goto done;
2341 2361 }
2342 2362 }
2343 2363 ASSERT(so->so_state & SS_ISBOUND);
2344 2364
2345 2365 if (name == NULL || namelen == 0) {
2346 2366 error = EINVAL;
2347 2367 goto done;
2348 2368 }
2349 2369 /*
2350 2370 * Mark the socket if sti_faddr_sa represents the transport level
2351 2371 * address.
2352 2372 */
2353 2373 if (flags & _SOCONNECT_NOXLATE) {
2354 2374 struct sockaddr_ux *soaddr_ux;
2355 2375
2356 2376 ASSERT(so->so_family == AF_UNIX);
2357 2377 if (namelen != sizeof (struct sockaddr_ux)) {
2358 2378 error = EINVAL;
2359 2379 goto done;
2360 2380 }
2361 2381 soaddr_ux = (struct sockaddr_ux *)name;
2362 2382 name = (struct sockaddr *)&soaddr_ux->sou_addr;
2363 2383 namelen = sizeof (soaddr_ux->sou_addr);
2364 2384 sti->sti_faddr_noxlate = 1;
2365 2385 }
2366 2386
2367 2387 /*
2368 2388 * Length and family checks.
2369 2389 */
2370 2390 error = so_addr_verify(so, name, namelen);
2371 2391 if (error)
2372 2392 goto bad;
2373 2393
2374 2394 /*
2375 2395 * Save foreign address. Needed for AF_UNIX as well as
2376 2396 * transport providers that do not support TI_GETPEERNAME.
2377 2397 * Also used for cached foreign address for TCP and UDP.
2378 2398 */
2379 2399 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) {
2380 2400 error = EINVAL;
2381 2401 goto done;
2382 2402 }
2383 2403 sti->sti_faddr_len = (socklen_t)namelen;
2384 2404 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
2385 2405 bcopy(name, sti->sti_faddr_sa, namelen);
2386 2406 sti->sti_faddr_valid = 1;
2387 2407
2388 2408 if (so->so_family == AF_UNIX) {
2389 2409 if (sti->sti_faddr_noxlate) {
2390 2410 /*
2391 2411 * sti_faddr is a transport-level address, so
2392 2412 * don't pass it as an option. Do save it in
2393 2413 * sti_ux_faddr, used for connected DG send.
2394 2414 */
2395 2415 src = NULL;
2396 2416 srclen = 0;
2397 2417 addr = sti->sti_faddr_sa;
2398 2418 addrlen = (t_uscalar_t)sti->sti_faddr_len;
2399 2419 bcopy(addr, &sti->sti_ux_faddr,
2400 2420 sizeof (sti->sti_ux_faddr));
2401 2421 } else {
2402 2422 /*
2403 2423 * Pass the sockaddr_un source address as an option
2404 2424 * and translate the remote address.
2405 2425 * Holding so_lock thus sti_laddr_sa can not change.
2406 2426 */
2407 2427 src = sti->sti_laddr_sa;
2408 2428 srclen = (t_uscalar_t)sti->sti_laddr_len;
2409 2429 dprintso(so, 1,
2410 2430 ("sotpi_connect UNIX: srclen %d, src %p\n",
2411 2431 srclen, src));
2412 2432 /*
2413 2433 * Translate the destination address into our
2414 2434 * internal form, and save it in sti_ux_faddr.
2415 2435 * After this call, addr==&sti->sti_ux_taddr,
2416 2436 * and we copy that to sti->sti_ux_faddr so
2417 2437 * we save the connected peer address.
2418 2438 */
2419 2439 error = so_ux_addr_xlate(so,
2420 2440 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len,
2421 2441 (flags & _SOCONNECT_XPG4_2),
2422 2442 &addr, &addrlen);
2423 2443 if (error)
2424 2444 goto bad;
2425 2445 bcopy(&sti->sti_ux_taddr, &sti->sti_ux_faddr,
2426 2446 sizeof (sti->sti_ux_faddr));
2427 2447 }
2428 2448 } else {
2429 2449 addr = sti->sti_faddr_sa;
2430 2450 addrlen = (t_uscalar_t)sti->sti_faddr_len;
2431 2451 src = NULL;
2432 2452 srclen = 0;
2433 2453 }
2434 2454 /*
2435 2455 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND
2436 2456 * option which asks the transport provider to send T_UDERR_IND
2437 2457 * messages. These T_UDERR_IND messages are used to return connected
2438 2458 * style errors (e.g. ECONNRESET) for connected datagram sockets.
2439 2459 *
2440 2460 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets)
2441 2461 * we send down a T_CONN_REQ. This is needed to let the
2442 2462 * transport assign a local address that is consistent with
2443 2463 * the remote address. Applications depend on a getsockname()
2444 2464 * after a connect() to retrieve the "source" IP address for
2445 2465 * the connected socket. Invalidate the cached local address
2446 2466 * to force getsockname() to enquire of the transport.
2447 2467 */
2448 2468 if (!(so->so_mode & SM_CONNREQUIRED)) {
2449 2469 /*
2450 2470 * Datagram socket.
2451 2471 */
2452 2472 int32_t val;
2453 2473
2454 2474 so_unlock_single(so, SOLOCKED);
2455 2475 mutex_exit(&so->so_lock);
2456 2476
2457 2477 val = 1;
2458 2478 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND,
2459 2479 &val, (t_uscalar_t)sizeof (val), cr);
2460 2480
2461 2481 mutex_enter(&so->so_lock);
2462 2482 so_lock_single(so); /* Set SOLOCKED */
2463 2483 if ((so->so_family != AF_INET && so->so_family != AF_INET6) ||
2464 2484 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) ||
2465 2485 soconnect_tpi_udp) {
2466 2486 soisconnected(so);
2467 2487 goto done;
2468 2488 }
2469 2489 /*
2470 2490 * Send down T_CONN_REQ etc.
2471 2491 * Clear fflag to avoid returning EWOULDBLOCK.
2472 2492 */
2473 2493 fflag = 0;
2474 2494 ASSERT(so->so_family != AF_UNIX);
2475 2495 sti->sti_laddr_valid = 0;
2476 2496 } else if (sti->sti_laddr_len != 0) {
2477 2497 /*
2478 2498 * If the local address or port was "any" then it may be
2479 2499 * changed by the transport as a result of the
2480 2500 * connect. Invalidate the cached version if we have one.
2481 2501 */
2482 2502 switch (so->so_family) {
2483 2503 case AF_INET:
2484 2504 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t));
2485 2505 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr ==
2486 2506 INADDR_ANY ||
2487 2507 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0)
2488 2508 sti->sti_laddr_valid = 0;
2489 2509 break;
2490 2510
2491 2511 case AF_INET6:
2492 2512 ASSERT(sti->sti_laddr_len ==
2493 2513 (socklen_t)sizeof (sin6_t));
2494 2514 if (IN6_IS_ADDR_UNSPECIFIED(
2495 2515 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) ||
2496 2516 IN6_IS_ADDR_V4MAPPED_ANY(
2497 2517 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) ||
2498 2518 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0)
2499 2519 sti->sti_laddr_valid = 0;
2500 2520 break;
2501 2521
2502 2522 default:
2503 2523 break;
2504 2524 }
2505 2525 }
2506 2526
2507 2527 /*
2508 2528 * Check for failure of an earlier call
2509 2529 */
2510 2530 if (so->so_error != 0)
2511 2531 goto so_bad;
2512 2532
2513 2533 /*
2514 2534 * Send down T_CONN_REQ. Message was allocated above.
2515 2535 */
2516 2536 conn_req.PRIM_type = T_CONN_REQ;
2517 2537 conn_req.DEST_length = addrlen;
2518 2538 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req);
2519 2539 if (srclen == 0) {
2520 2540 conn_req.OPT_length = 0;
2521 2541 conn_req.OPT_offset = 0;
2522 2542 soappendmsg(mp, &conn_req, sizeof (conn_req));
2523 2543 soappendmsg(mp, addr, addrlen);
2524 2544 } else {
2525 2545 /*
2526 2546 * There is a AF_UNIX sockaddr_un to include as a source
2527 2547 * address option.
2528 2548 */
2529 2549 struct T_opthdr toh;
2530 2550
2531 2551 toh.level = SOL_SOCKET;
2532 2552 toh.name = SO_SRCADDR;
2533 2553 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
2534 2554 toh.status = 0;
2535 2555 conn_req.OPT_length =
2536 2556 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen));
2537 2557 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) +
2538 2558 _TPI_ALIGN_TOPT(addrlen));
2539 2559
2540 2560 soappendmsg(mp, &conn_req, sizeof (conn_req));
2541 2561 soappendmsg(mp, addr, addrlen);
2542 2562 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
2543 2563 soappendmsg(mp, &toh, sizeof (toh));
2544 2564 soappendmsg(mp, src, srclen);
2545 2565 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
2546 2566 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
2547 2567 }
2548 2568 /*
2549 2569 * Set SS_ISCONNECTING before sending down the T_CONN_REQ
2550 2570 * in order to have the right state when the T_CONN_CON shows up.
2551 2571 */
2552 2572 soisconnecting(so);
2553 2573 mutex_exit(&so->so_lock);
2554 2574
2555 2575 if (AU_AUDITING())
2556 2576 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0);
2557 2577
2558 2578 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
2559 2579 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
2560 2580 mp = NULL;
2561 2581 mutex_enter(&so->so_lock);
2562 2582 if (error != 0)
2563 2583 goto bad;
2564 2584
2565 2585 if ((error = sowaitokack(so, T_CONN_REQ)) != 0)
2566 2586 goto bad;
2567 2587
2568 2588 /* Allow other threads to access the socket */
2569 2589 so_unlock_single(so, SOLOCKED);
2570 2590 need_unlock = B_FALSE;
2571 2591
2572 2592 /*
2573 2593 * Wait until we get a T_CONN_CON or an error
2574 2594 */
2575 2595 if ((error = sowaitconnected(so, fflag, 0)) != 0) {
2576 2596 so_lock_single(so); /* Set SOLOCKED */
2577 2597 need_unlock = B_TRUE;
2578 2598 }
2579 2599
2580 2600 done:
2581 2601 freemsg(mp);
2582 2602 switch (error) {
2583 2603 case EINPROGRESS:
2584 2604 case EALREADY:
2585 2605 case EISCONN:
2586 2606 case EINTR:
2587 2607 /* Non-fatal errors */
2588 2608 sti->sti_laddr_valid = 0;
2589 2609 /* FALLTHRU */
2590 2610 case 0:
2591 2611 break;
2592 2612 default:
2593 2613 ASSERT(need_unlock);
2594 2614 /*
2595 2615 * Fatal errors: clear SS_ISCONNECTING in case it was set,
2596 2616 * and invalidate local-address cache
2597 2617 */
2598 2618 so->so_state &= ~SS_ISCONNECTING;
2599 2619 sti->sti_laddr_valid = 0;
2600 2620 /* A discon_ind might have already unbound us */
2601 2621 if ((flags & _SOCONNECT_DID_BIND) &&
2602 2622 (so->so_state & SS_ISBOUND)) {
2603 2623 int err;
2604 2624
2605 2625 err = sotpi_unbind(so, 0);
2606 2626 /* LINTED - statement has no conseq */
2607 2627 if (err) {
2608 2628 eprintsoline(so, err);
2609 2629 }
2610 2630 }
2611 2631 break;
2612 2632 }
2613 2633 if (need_unlock)
2614 2634 so_unlock_single(so, SOLOCKED);
2615 2635 mutex_exit(&so->so_lock);
2616 2636 return (error);
2617 2637
2618 2638 so_bad: error = sogeterr(so, B_TRUE);
2619 2639 bad: eprintsoline(so, error);
2620 2640 goto done;
2621 2641 }
2622 2642
2623 2643 /* ARGSUSED */
2624 2644 int
2625 2645 sotpi_shutdown(struct sonode *so, int how, struct cred *cr)
2626 2646 {
2627 2647 struct T_ordrel_req ordrel_req;
2628 2648 mblk_t *mp;
2629 2649 uint_t old_state, state_change;
2630 2650 int error = 0;
2631 2651 sotpi_info_t *sti = SOTOTPI(so);
2632 2652
2633 2653 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n",
2634 2654 (void *)so, how, pr_state(so->so_state, so->so_mode)));
2635 2655
2636 2656 mutex_enter(&so->so_lock);
2637 2657 so_lock_single(so); /* Set SOLOCKED */
2638 2658
2639 2659 /*
2640 2660 * SunOS 4.X has no check for datagram sockets.
2641 2661 * 5.X checks that it is connected (ENOTCONN)
2642 2662 * X/Open requires that we check the connected state.
2643 2663 */
2644 2664 if (!(so->so_state & SS_ISCONNECTED)) {
2645 2665 if (!xnet_skip_checks) {
2646 2666 error = ENOTCONN;
2647 2667 if (xnet_check_print) {
2648 2668 printf("sockfs: X/Open shutdown check "
2649 2669 "caused ENOTCONN\n");
2650 2670 }
2651 2671 }
2652 2672 goto done;
2653 2673 }
2654 2674 /*
2655 2675 * Record the current state and then perform any state changes.
2656 2676 * Then use the difference between the old and new states to
2657 2677 * determine which messages need to be sent.
2658 2678 * This prevents e.g. duplicate T_ORDREL_REQ when there are
2659 2679 * duplicate calls to shutdown().
2660 2680 */
2661 2681 old_state = so->so_state;
2662 2682
2663 2683 switch (how) {
2664 2684 case 0:
2665 2685 socantrcvmore(so);
2666 2686 break;
2667 2687 case 1:
2668 2688 socantsendmore(so);
2669 2689 break;
2670 2690 case 2:
2671 2691 socantsendmore(so);
2672 2692 socantrcvmore(so);
2673 2693 break;
2674 2694 default:
2675 2695 error = EINVAL;
2676 2696 goto done;
2677 2697 }
2678 2698
2679 2699 /*
2680 2700 * Assumes that the SS_CANT* flags are never cleared in the above code.
2681 2701 */
2682 2702 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) -
2683 2703 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE));
2684 2704 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0);
2685 2705
2686 2706 switch (state_change) {
2687 2707 case 0:
2688 2708 dprintso(so, 1,
2689 2709 ("sotpi_shutdown: nothing to send in state 0x%x\n",
2690 2710 so->so_state));
2691 2711 goto done;
2692 2712
2693 2713 case SS_CANTRCVMORE:
2694 2714 mutex_exit(&so->so_lock);
2695 2715 strseteof(SOTOV(so), 1);
2696 2716 /*
2697 2717 * strseteof takes care of read side wakeups,
2698 2718 * pollwakeups, and signals.
2699 2719 */
2700 2720 /*
2701 2721 * Get the read lock before flushing data to avoid problems
2702 2722 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
2703 2723 */
2704 2724 mutex_enter(&so->so_lock);
2705 2725 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */
2706 2726 mutex_exit(&so->so_lock);
2707 2727
2708 2728 /* Flush read side queue */
2709 2729 strflushrq(SOTOV(so), FLUSHALL);
2710 2730
2711 2731 mutex_enter(&so->so_lock);
2712 2732 so_unlock_read(so); /* Clear SOREADLOCKED */
2713 2733 break;
2714 2734
2715 2735 case SS_CANTSENDMORE:
2716 2736 mutex_exit(&so->so_lock);
2717 2737 strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2718 2738 mutex_enter(&so->so_lock);
2719 2739 break;
2720 2740
2721 2741 case SS_CANTSENDMORE|SS_CANTRCVMORE:
2722 2742 mutex_exit(&so->so_lock);
2723 2743 strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2724 2744 strseteof(SOTOV(so), 1);
2725 2745 /*
2726 2746 * strseteof takes care of read side wakeups,
2727 2747 * pollwakeups, and signals.
2728 2748 */
2729 2749 /*
2730 2750 * Get the read lock before flushing data to avoid problems
2731 2751 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
2732 2752 */
2733 2753 mutex_enter(&so->so_lock);
2734 2754 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */
2735 2755 mutex_exit(&so->so_lock);
2736 2756
2737 2757 /* Flush read side queue */
2738 2758 strflushrq(SOTOV(so), FLUSHALL);
2739 2759
2740 2760 mutex_enter(&so->so_lock);
2741 2761 so_unlock_read(so); /* Clear SOREADLOCKED */
2742 2762 break;
2743 2763 }
2744 2764
2745 2765 ASSERT(MUTEX_HELD(&so->so_lock));
2746 2766
2747 2767 /*
2748 2768 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them
2749 2769 * was set due to this call and the new state has both of them set:
2750 2770 * Send the AF_UNIX close indication
2751 2771 * For T_COTS send a discon_ind
2752 2772 *
2753 2773 * If cantsend was set due to this call:
2754 2774 * For T_COTSORD send an ordrel_ind
2755 2775 *
2756 2776 * Note that for T_CLTS there is no message sent here.
2757 2777 */
2758 2778 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) ==
2759 2779 (SS_CANTRCVMORE|SS_CANTSENDMORE)) {
2760 2780 /*
2761 2781 * For SunOS 4.X compatibility we tell the other end
2762 2782 * that we are unable to receive at this point.
2763 2783 */
2764 2784 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS)
2765 2785 so_unix_close(so);
2766 2786
2767 2787 if (sti->sti_serv_type == T_COTS)
2768 2788 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD);
2769 2789 }
2770 2790 if ((state_change & SS_CANTSENDMORE) &&
2771 2791 (sti->sti_serv_type == T_COTS_ORD)) {
2772 2792 /* Send an orderly release */
2773 2793 ordrel_req.PRIM_type = T_ORDREL_REQ;
2774 2794
2775 2795 mutex_exit(&so->so_lock);
2776 2796 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req),
2777 2797 0, _ALLOC_SLEEP, cr);
2778 2798 /*
2779 2799 * Send down the T_ORDREL_REQ even if there is flow control.
2780 2800 * This prevents shutdown from blocking.
2781 2801 * Note that there is no T_OK_ACK for ordrel_req.
2782 2802 */
2783 2803 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
2784 2804 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
2785 2805 mutex_enter(&so->so_lock);
2786 2806 if (error) {
2787 2807 eprintsoline(so, error);
2788 2808 goto done;
2789 2809 }
2790 2810 }
2791 2811
2792 2812 done:
2793 2813 so_unlock_single(so, SOLOCKED);
2794 2814 mutex_exit(&so->so_lock);
2795 2815 return (error);
2796 2816 }
2797 2817
2798 2818 /*
2799 2819 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send
2800 2820 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer
2801 2821 * that we have closed.
2802 2822 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length
2803 2823 * T_UNITDATA_REQ containing the same option.
2804 2824 *
2805 2825 * For SOCK_DGRAM half-connections (somebody connected to this end
2806 2826 * but this end is not connect) we don't know where to send any
2807 2827 * SO_UNIX_CLOSE.
2808 2828 *
2809 2829 * We have to ignore stream head errors just in case there has been
2810 2830 * a shutdown(output).
2811 2831 * Ignore any flow control to try to get the message more quickly to the peer.
2812 2832 * While locally ignoring flow control solves the problem when there
2813 2833 * is only the loopback transport on the stream it would not provide
2814 2834 * the correct AF_UNIX socket semantics when one or more modules have
2815 2835 * been pushed.
2816 2836 */
2817 2837 void
2818 2838 so_unix_close(struct sonode *so)
2819 2839 {
2820 2840 struct T_opthdr toh;
2821 2841 mblk_t *mp;
2822 2842 sotpi_info_t *sti = SOTOTPI(so);
2823 2843
2824 2844 ASSERT(MUTEX_HELD(&so->so_lock));
2825 2845
2826 2846 ASSERT(so->so_family == AF_UNIX);
2827 2847
2828 2848 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) !=
2829 2849 (SS_ISCONNECTED|SS_ISBOUND))
2830 2850 return;
2831 2851
2832 2852 dprintso(so, 1, ("so_unix_close(%p) %s\n",
2833 2853 (void *)so, pr_state(so->so_state, so->so_mode)));
2834 2854
2835 2855 toh.level = SOL_SOCKET;
2836 2856 toh.name = SO_UNIX_CLOSE;
2837 2857
2838 2858 /* zero length + header */
2839 2859 toh.len = (t_uscalar_t)sizeof (struct T_opthdr);
2840 2860 toh.status = 0;
2841 2861
2842 2862 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) {
2843 2863 struct T_optdata_req tdr;
2844 2864
2845 2865 tdr.PRIM_type = T_OPTDATA_REQ;
2846 2866 tdr.DATA_flag = 0;
2847 2867
2848 2868 tdr.OPT_length = (t_scalar_t)sizeof (toh);
2849 2869 tdr.OPT_offset = (t_scalar_t)sizeof (tdr);
2850 2870
2851 2871 /* NOTE: holding so_lock while sleeping */
2852 2872 mp = soallocproto2(&tdr, sizeof (tdr),
2853 2873 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED());
2854 2874 } else {
2855 2875 struct T_unitdata_req tudr;
2856 2876 void *addr;
2857 2877 socklen_t addrlen;
2858 2878 void *src;
2859 2879 socklen_t srclen;
2860 2880 struct T_opthdr toh2;
2861 2881 t_scalar_t size;
2862 2882
2863 2883 /*
2864 2884 * We know this is an AF_UNIX connected DGRAM socket.
2865 2885 * We therefore already have the destination address
2866 2886 * in the internal form needed for this send. This is
2867 2887 * similar to the sosend_dgram call later in this file
2868 2888 * when there's no user-specified destination address.
2869 2889 */
2870 2890 if (sti->sti_faddr_noxlate) {
2871 2891 /*
2872 2892 * Already have a transport internal address. Do not
2873 2893 * pass any (transport internal) source address.
2874 2894 */
2875 2895 addr = sti->sti_faddr_sa;
2876 2896 addrlen = (t_uscalar_t)sti->sti_faddr_len;
2877 2897 src = NULL;
2878 2898 srclen = 0;
2879 2899 } else {
2880 2900 /*
2881 2901 * Pass the sockaddr_un source address as an option
2882 2902 * and translate the remote address.
2883 2903 * Holding so_lock thus sti_laddr_sa can not change.
2884 2904 */
2885 2905 src = sti->sti_laddr_sa;
2886 2906 srclen = (socklen_t)sti->sti_laddr_len;
2887 2907 dprintso(so, 1,
2888 2908 ("so_ux_close: srclen %d, src %p\n",
2889 2909 srclen, src));
2890 2910 /*
2891 2911 * Use the destination address saved in connect.
2892 2912 */
2893 2913 addr = &sti->sti_ux_faddr;
2894 2914 addrlen = sizeof (sti->sti_ux_faddr);
2895 2915 }
2896 2916 tudr.PRIM_type = T_UNITDATA_REQ;
2897 2917 tudr.DEST_length = addrlen;
2898 2918 tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
2899 2919 if (srclen == 0) {
2900 2920 tudr.OPT_length = (t_scalar_t)sizeof (toh);
2901 2921 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
2902 2922 _TPI_ALIGN_TOPT(addrlen));
2903 2923
2904 2924 size = tudr.OPT_offset + tudr.OPT_length;
2905 2925 /* NOTE: holding so_lock while sleeping */
2906 2926 mp = soallocproto2(&tudr, sizeof (tudr),
2907 2927 addr, addrlen, size, _ALLOC_SLEEP, CRED());
2908 2928 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen);
2909 2929 soappendmsg(mp, &toh, sizeof (toh));
2910 2930 } else {
2911 2931 /*
2912 2932 * There is a AF_UNIX sockaddr_un to include as a
2913 2933 * source address option.
2914 2934 */
2915 2935 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) +
2916 2936 _TPI_ALIGN_TOPT(srclen));
2917 2937 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
2918 2938 _TPI_ALIGN_TOPT(addrlen));
2919 2939
2920 2940 toh2.level = SOL_SOCKET;
2921 2941 toh2.name = SO_SRCADDR;
2922 2942 toh2.len = (t_uscalar_t)(srclen +
2923 2943 sizeof (struct T_opthdr));
2924 2944 toh2.status = 0;
2925 2945
2926 2946 size = tudr.OPT_offset + tudr.OPT_length;
2927 2947
2928 2948 /* NOTE: holding so_lock while sleeping */
2929 2949 mp = soallocproto2(&tudr, sizeof (tudr),
2930 2950 addr, addrlen, size, _ALLOC_SLEEP, CRED());
2931 2951 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
2932 2952 soappendmsg(mp, &toh, sizeof (toh));
2933 2953 soappendmsg(mp, &toh2, sizeof (toh2));
2934 2954 soappendmsg(mp, src, srclen);
2935 2955 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
2936 2956 }
2937 2957 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
2938 2958 }
2939 2959 mutex_exit(&so->so_lock);
2940 2960 (void) kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
2941 2961 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
2942 2962 mutex_enter(&so->so_lock);
2943 2963 }
2944 2964
2945 2965 /*
2946 2966 * Called by sotpi_recvmsg when reading a non-zero amount of data.
2947 2967 * In addition, the caller typically verifies that there is some
2948 2968 * potential state to clear by checking
2949 2969 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK))
2950 2970 * before calling this routine.
2951 2971 * Note that such a check can be made without holding so_lock since
2952 2972 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg
2953 2973 * decrements sti_oobsigcnt.
2954 2974 *
2955 2975 * When data is read *after* the point that all pending
2956 2976 * oob data has been consumed the oob indication is cleared.
2957 2977 *
2958 2978 * This logic keeps select/poll returning POLLRDBAND and
2959 2979 * SIOCATMARK returning true until we have read past
2960 2980 * the mark.
2961 2981 */
2962 2982 static void
2963 2983 sorecv_update_oobstate(struct sonode *so)
2964 2984 {
2965 2985 sotpi_info_t *sti = SOTOTPI(so);
2966 2986
2967 2987 mutex_enter(&so->so_lock);
2968 2988 ASSERT(so_verify_oobstate(so));
2969 2989 dprintso(so, 1,
2970 2990 ("sorecv_update_oobstate: counts %d/%d state %s\n",
2971 2991 sti->sti_oobsigcnt,
2972 2992 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode)));
2973 2993 if (sti->sti_oobsigcnt == 0) {
2974 2994 /* No more pending oob indications */
2975 2995 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK);
2976 2996 freemsg(so->so_oobmsg);
2977 2997 so->so_oobmsg = NULL;
2978 2998 }
2979 2999 ASSERT(so_verify_oobstate(so));
2980 3000 mutex_exit(&so->so_lock);
2981 3001 }
2982 3002
2983 3003 /*
2984 3004 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s).
2985 3005 */
2986 3006 static int
2987 3007 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp)
2988 3008 {
2989 3009 sotpi_info_t *sti = SOTOTPI(so);
2990 3010 int error = 0;
2991 3011 mblk_t *tmp = NULL;
2992 3012 mblk_t *pmp = NULL;
2993 3013 mblk_t *nmp = sti->sti_nl7c_rcv_mp;
2994 3014
2995 3015 ASSERT(nmp != NULL);
2996 3016
2997 3017 while (nmp != NULL && uiop->uio_resid > 0) {
2998 3018 ssize_t n;
2999 3019
3000 3020 if (DB_TYPE(nmp) == M_DATA) {
3001 3021 /*
3002 3022 * We have some data, uiomove up to resid bytes.
3003 3023 */
3004 3024 n = MIN(MBLKL(nmp), uiop->uio_resid);
3005 3025 if (n > 0)
3006 3026 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop);
3007 3027 nmp->b_rptr += n;
3008 3028 if (nmp->b_rptr == nmp->b_wptr) {
3009 3029 pmp = nmp;
3010 3030 nmp = nmp->b_cont;
3011 3031 }
3012 3032 if (error)
3013 3033 break;
3014 3034 } else {
3015 3035 /*
3016 3036 * We only handle data, save for caller to handle.
3017 3037 */
3018 3038 if (pmp != NULL) {
3019 3039 pmp->b_cont = nmp->b_cont;
3020 3040 }
3021 3041 nmp->b_cont = NULL;
3022 3042 if (*rmp == NULL) {
3023 3043 *rmp = nmp;
3024 3044 } else {
3025 3045 tmp->b_cont = nmp;
3026 3046 }
3027 3047 nmp = nmp->b_cont;
3028 3048 tmp = nmp;
3029 3049 }
3030 3050 }
3031 3051 if (pmp != NULL) {
3032 3052 /* Free any mblk_t(s) which we have consumed */
3033 3053 pmp->b_cont = NULL;
3034 3054 freemsg(sti->sti_nl7c_rcv_mp);
3035 3055 }
3036 3056 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) {
3037 3057 /* Last mblk_t so return the saved kstrgetmsg() rval/error */
3038 3058 if (error == 0) {
3039 3059 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval;
3040 3060
3041 3061 error = p->r_v.r_v2;
3042 3062 p->r_v.r_v2 = 0;
3043 3063 }
3044 3064 rp->r_vals = sti->sti_nl7c_rcv_rval;
3045 3065 sti->sti_nl7c_rcv_rval = 0;
3046 3066 } else {
3047 3067 /* More mblk_t(s) to process so no rval to return */
3048 3068 rp->r_vals = 0;
3049 3069 }
3050 3070 return (error);
3051 3071 }
3052 3072 /*
3053 3073 * Receive the next message on the queue.
3054 3074 * If msg_controllen is non-zero when called the caller is interested in
3055 3075 * any received control info (options).
3056 3076 * If msg_namelen is non-zero when called the caller is interested in
3057 3077 * any received source address.
3058 3078 * The routine returns with msg_control and msg_name pointing to
3059 3079 * kmem_alloc'ed memory which the caller has to free.
3060 3080 */
3061 3081 /* ARGSUSED */
3062 3082 int
3063 3083 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
3064 3084 struct cred *cr)
3065 3085 {
3066 3086 union T_primitives *tpr;
3067 3087 mblk_t *mp;
3068 3088 uchar_t pri;
3069 3089 int pflag, opflag;
3070 3090 void *control;
3071 3091 t_uscalar_t controllen;
3072 3092 t_uscalar_t namelen;
3073 3093 int so_state = so->so_state; /* Snapshot */
3074 3094 ssize_t saved_resid;
3075 3095 rval_t rval;
3076 3096 int flags;
3077 3097 clock_t timout;
3078 3098 int error = 0;
3079 3099 sotpi_info_t *sti = SOTOTPI(so);
3080 3100
3081 3101 flags = msg->msg_flags;
3082 3102 msg->msg_flags = 0;
3083 3103
3084 3104 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n",
3085 3105 (void *)so, (void *)msg, flags,
3086 3106 pr_state(so->so_state, so->so_mode), so->so_error));
3087 3107
3088 3108 if (so->so_version == SOV_STREAM) {
3089 3109 so_update_attrs(so, SOACC);
3090 3110 /* The imaginary "sockmod" has been popped - act as a stream */
3091 3111 return (strread(SOTOV(so), uiop, cr));
3092 3112 }
3093 3113
3094 3114 /*
3095 3115 * If we are not connected because we have never been connected
3096 3116 * we return ENOTCONN. If we have been connected (but are no longer
3097 3117 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return
3098 3118 * the EOF.
3099 3119 *
3100 3120 * An alternative would be to post an ENOTCONN error in stream head
3101 3121 * (read+write) and clear it when we're connected. However, that error
3102 3122 * would cause incorrect poll/select behavior!
3103 3123 */
3104 3124 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
3105 3125 (so->so_mode & SM_CONNREQUIRED)) {
3106 3126 return (ENOTCONN);
3107 3127 }
3108 3128
3109 3129 /*
3110 3130 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but
3111 3131 * after checking that the read queue is empty) and returns zero.
3112 3132 * This implementation will sleep (in kstrgetmsg) even if uio_resid
3113 3133 * is zero.
3114 3134 */
3115 3135
3116 3136 if (flags & MSG_OOB) {
3117 3137 /* Check that the transport supports OOB */
3118 3138 if (!(so->so_mode & SM_EXDATA))
3119 3139 return (EOPNOTSUPP);
3120 3140 so_update_attrs(so, SOACC);
3121 3141 return (sorecvoob(so, msg, uiop, flags,
3122 3142 (so->so_options & SO_OOBINLINE)));
3123 3143 }
3124 3144
3125 3145 so_update_attrs(so, SOACC);
3126 3146
3127 3147 /*
3128 3148 * Set msg_controllen and msg_namelen to zero here to make it
3129 3149 * simpler in the cases that no control or name is returned.
3130 3150 */
3131 3151 controllen = msg->msg_controllen;
3132 3152 namelen = msg->msg_namelen;
3133 3153 msg->msg_controllen = 0;
3134 3154 msg->msg_namelen = 0;
3135 3155
3136 3156 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n",
3137 3157 namelen, controllen));
3138 3158
3139 3159 mutex_enter(&so->so_lock);
3140 3160 /*
3141 3161 * If an NL7C enabled socket and not waiting for write data.
3142 3162 */
3143 3163 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) ==
3144 3164 NL7C_ENABLED) {
3145 3165 if (sti->sti_nl7c_uri) {
3146 3166 /* Close uri processing for a previous request */
3147 3167 nl7c_close(so);
3148 3168 }
3149 3169 if ((so_state & SS_CANTRCVMORE) &&
3150 3170 sti->sti_nl7c_rcv_mp == NULL) {
3151 3171 /* Nothing to process, EOF */
3152 3172 mutex_exit(&so->so_lock);
3153 3173 return (0);
3154 3174 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
3155 3175 /* Persistent NL7C socket, try to process request */
3156 3176 boolean_t ret;
3157 3177
3158 3178 ret = nl7c_process(so,
3159 3179 (so->so_state & (SS_NONBLOCK|SS_NDELAY)));
3160 3180 rval.r_vals = sti->sti_nl7c_rcv_rval;
3161 3181 error = rval.r_v.r_v2;
3162 3182 if (error) {
3163 3183 /* Error of some sort, return it */
3164 3184 mutex_exit(&so->so_lock);
3165 3185 return (error);
3166 3186 }
3167 3187 if (sti->sti_nl7c_flags &&
3168 3188 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) {
3169 3189 /*
3170 3190 * Still an NL7C socket and no data
3171 3191 * to pass up to the caller.
3172 3192 */
3173 3193 mutex_exit(&so->so_lock);
3174 3194 if (ret) {
3175 3195 /* EOF */
3176 3196 return (0);
3177 3197 } else {
3178 3198 /* Need more data */
3179 3199 return (EAGAIN);
3180 3200 }
3181 3201 }
3182 3202 } else {
3183 3203 /*
3184 3204 * Not persistent so no further NL7C processing.
3185 3205 */
3186 3206 sti->sti_nl7c_flags = 0;
3187 3207 }
3188 3208 }
3189 3209 /*
3190 3210 * Only one reader is allowed at any given time. This is needed
3191 3211 * for T_EXDATA handling and, in the future, MSG_WAITALL.
3192 3212 *
3193 3213 * This is slightly different that BSD behavior in that it fails with
3194 3214 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access
3195 3215 * is single-threaded using sblock(), which is dropped while waiting
3196 3216 * for data to appear. The difference shows up e.g. if one
3197 3217 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor
3198 3218 * does use nonblocking io and different threads are reading each
3199 3219 * file descriptor. In BSD there would never be an EWOULDBLOCK error
3200 3220 * in this case as long as the read queue doesn't get empty.
3201 3221 * In this implementation the thread using nonblocking io can
3202 3222 * get an EWOULDBLOCK error due to the blocking thread executing
3203 3223 * e.g. in the uiomove in kstrgetmsg.
3204 3224 * This difference is not believed to be significant.
3205 3225 */
3206 3226 /* Set SOREADLOCKED */
3207 3227 error = so_lock_read_intr(so,
3208 3228 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
3209 3229 mutex_exit(&so->so_lock);
3210 3230 if (error)
3211 3231 return (error);
3212 3232
3213 3233 /*
3214 3234 * Tell kstrgetmsg to not inspect the stream head errors until all
3215 3235 * queued data has been consumed.
3216 3236 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set.
3217 3237 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block.
3218 3238 *
3219 3239 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and
3220 3240 * to T_OPTDATA_IND that do not contain any user-visible control msg.
3221 3241 * Note that MSG_WAITALL set with MSG_PEEK is a noop.
3222 3242 */
3223 3243 pflag = MSG_ANY | MSG_DELAYERROR;
3224 3244 if (flags & MSG_PEEK) {
3225 3245 pflag |= MSG_IPEEK;
3226 3246 flags &= ~MSG_WAITALL;
3227 3247 }
3228 3248 if (so->so_mode & SM_ATOMIC)
3229 3249 pflag |= MSG_DISCARDTAIL;
3230 3250
3231 3251 if (flags & MSG_DONTWAIT)
3232 3252 timout = 0;
3233 3253 else if (so->so_rcvtimeo != 0)
3234 3254 timout = TICK_TO_MSEC(so->so_rcvtimeo);
3235 3255 else
3236 3256 timout = -1;
3237 3257 opflag = pflag;
3238 3258 retry:
3239 3259 saved_resid = uiop->uio_resid;
3240 3260 pri = 0;
3241 3261 mp = NULL;
3242 3262 if (sti->sti_nl7c_rcv_mp != NULL) {
3243 3263 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */
3244 3264 error = nl7c_sorecv(so, &mp, uiop, &rval);
3245 3265 } else {
3246 3266 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag,
3247 3267 timout, &rval);
3248 3268 }
3249 3269 if (error != 0) {
3250 3270 /* kstrgetmsg returns ETIME when timeout expires */
3251 3271 if (error == ETIME)
3252 3272 error = EWOULDBLOCK;
3253 3273 goto out;
3254 3274 }
3255 3275 /*
3256 3276 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
3257 3277 * For non-datagrams MOREDATA is used to set MSG_EOR.
3258 3278 */
3259 3279 ASSERT(!(rval.r_val1 & MORECTL));
3260 3280 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
3261 3281 msg->msg_flags |= MSG_TRUNC;
3262 3282
3263 3283 if (mp == NULL) {
3264 3284 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n"));
3265 3285 /*
3266 3286 * 4.3BSD and 4.4BSD clears the mark when peeking across it.
3267 3287 * The draft Posix socket spec states that the mark should
3268 3288 * not be cleared when peeking. We follow the latter.
3269 3289 */
3270 3290 if ((so->so_state &
3271 3291 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
3272 3292 (uiop->uio_resid != saved_resid) &&
3273 3293 !(flags & MSG_PEEK)) {
3274 3294 sorecv_update_oobstate(so);
3275 3295 }
3276 3296
3277 3297 mutex_enter(&so->so_lock);
3278 3298 /* Set MSG_EOR based on MOREDATA */
3279 3299 if (!(rval.r_val1 & MOREDATA)) {
3280 3300 if (so->so_state & SS_SAVEDEOR) {
3281 3301 msg->msg_flags |= MSG_EOR;
3282 3302 so->so_state &= ~SS_SAVEDEOR;
3283 3303 }
3284 3304 }
3285 3305 /*
3286 3306 * If some data was received (i.e. not EOF) and the
3287 3307 * read/recv* has not been satisfied wait for some more.
3288 3308 */
3289 3309 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
3290 3310 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
3291 3311 mutex_exit(&so->so_lock);
3292 3312 pflag = opflag | MSG_NOMARK;
3293 3313 goto retry;
3294 3314 }
3295 3315 goto out_locked;
3296 3316 }
3297 3317
3298 3318 /* strsock_proto has already verified length and alignment */
3299 3319 tpr = (union T_primitives *)mp->b_rptr;
3300 3320 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type));
3301 3321
3302 3322 switch (tpr->type) {
3303 3323 case T_DATA_IND: {
3304 3324 if ((so->so_state &
3305 3325 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
3306 3326 (uiop->uio_resid != saved_resid) &&
3307 3327 !(flags & MSG_PEEK)) {
3308 3328 sorecv_update_oobstate(so);
3309 3329 }
3310 3330
3311 3331 /*
3312 3332 * Set msg_flags to MSG_EOR based on
3313 3333 * MORE_flag and MOREDATA.
3314 3334 */
3315 3335 mutex_enter(&so->so_lock);
3316 3336 so->so_state &= ~SS_SAVEDEOR;
3317 3337 if (!(tpr->data_ind.MORE_flag & 1)) {
3318 3338 if (!(rval.r_val1 & MOREDATA))
3319 3339 msg->msg_flags |= MSG_EOR;
3320 3340 else
3321 3341 so->so_state |= SS_SAVEDEOR;
3322 3342 }
3323 3343 freemsg(mp);
3324 3344 /*
3325 3345 * If some data was received (i.e. not EOF) and the
3326 3346 * read/recv* has not been satisfied wait for some more.
3327 3347 */
3328 3348 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
3329 3349 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
3330 3350 mutex_exit(&so->so_lock);
3331 3351 pflag = opflag | MSG_NOMARK;
3332 3352 goto retry;
3333 3353 }
3334 3354 goto out_locked;
3335 3355 }
3336 3356 case T_UNITDATA_IND: {
3337 3357 void *addr;
3338 3358 t_uscalar_t addrlen;
3339 3359 void *abuf;
3340 3360 t_uscalar_t optlen;
3341 3361 void *opt;
3342 3362
3343 3363 if ((so->so_state &
3344 3364 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
3345 3365 (uiop->uio_resid != saved_resid) &&
3346 3366 !(flags & MSG_PEEK)) {
3347 3367 sorecv_update_oobstate(so);
3348 3368 }
3349 3369
3350 3370 if (namelen != 0) {
3351 3371 /* Caller wants source address */
3352 3372 addrlen = tpr->unitdata_ind.SRC_length;
3353 3373 addr = sogetoff(mp,
3354 3374 tpr->unitdata_ind.SRC_offset,
3355 3375 addrlen, 1);
3356 3376 if (addr == NULL) {
3357 3377 freemsg(mp);
3358 3378 error = EPROTO;
3359 3379 eprintsoline(so, error);
3360 3380 goto out;
3361 3381 }
3362 3382 if (so->so_family == AF_UNIX) {
3363 3383 /*
3364 3384 * Can not use the transport level address.
3365 3385 * If there is a SO_SRCADDR option carrying
3366 3386 * the socket level address it will be
3367 3387 * extracted below.
3368 3388 */
3369 3389 addr = NULL;
3370 3390 addrlen = 0;
3371 3391 }
3372 3392 }
3373 3393 optlen = tpr->unitdata_ind.OPT_length;
3374 3394 if (optlen != 0) {
3375 3395 t_uscalar_t ncontrollen;
3376 3396
3377 3397 /*
3378 3398 * Extract any source address option.
3379 3399 * Determine how large cmsg buffer is needed.
3380 3400 */
3381 3401 opt = sogetoff(mp,
3382 3402 tpr->unitdata_ind.OPT_offset,
3383 3403 optlen, __TPI_ALIGN_SIZE);
3384 3404
3385 3405 if (opt == NULL) {
3386 3406 freemsg(mp);
3387 3407 error = EPROTO;
3388 3408 eprintsoline(so, error);
3389 3409 goto out;
3390 3410 }
3391 3411 if (so->so_family == AF_UNIX)
3392 3412 so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
3393 3413 ncontrollen = so_cmsglen(mp, opt, optlen,
3394 3414 !(flags & MSG_XPG4_2));
3395 3415 if (controllen != 0)
3396 3416 controllen = ncontrollen;
3397 3417 else if (ncontrollen != 0)
3398 3418 msg->msg_flags |= MSG_CTRUNC;
3399 3419 } else {
3400 3420 controllen = 0;
3401 3421 }
3402 3422
3403 3423 if (namelen != 0) {
3404 3424 /*
3405 3425 * Return address to caller.
3406 3426 * Caller handles truncation if length
3407 3427 * exceeds msg_namelen.
3408 3428 * NOTE: AF_UNIX NUL termination is ensured by
3409 3429 * the sender's copyin_name().
3410 3430 */
3411 3431 abuf = kmem_alloc(addrlen, KM_SLEEP);
3412 3432
3413 3433 bcopy(addr, abuf, addrlen);
3414 3434 msg->msg_name = abuf;
3415 3435 msg->msg_namelen = addrlen;
3416 3436 }
3417 3437
3418 3438 if (controllen != 0) {
3419 3439 /*
3420 3440 * Return control msg to caller.
3421 3441 * Caller handles truncation if length
3422 3442 * exceeds msg_controllen.
3423 3443 */
3424 3444 control = kmem_zalloc(controllen, KM_SLEEP);
3425 3445
3426 3446 error = so_opt2cmsg(mp, opt, optlen,
3427 3447 !(flags & MSG_XPG4_2),
3428 3448 control, controllen);
3429 3449 if (error) {
3430 3450 freemsg(mp);
3431 3451 if (msg->msg_namelen != 0)
3432 3452 kmem_free(msg->msg_name,
3433 3453 msg->msg_namelen);
3434 3454 kmem_free(control, controllen);
3435 3455 eprintsoline(so, error);
3436 3456 goto out;
3437 3457 }
3438 3458 msg->msg_control = control;
3439 3459 msg->msg_controllen = controllen;
3440 3460 }
3441 3461
3442 3462 freemsg(mp);
3443 3463 goto out;
3444 3464 }
3445 3465 case T_OPTDATA_IND: {
3446 3466 struct T_optdata_req *tdr;
3447 3467 void *opt;
3448 3468 t_uscalar_t optlen;
3449 3469
3450 3470 if ((so->so_state &
3451 3471 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
3452 3472 (uiop->uio_resid != saved_resid) &&
3453 3473 !(flags & MSG_PEEK)) {
3454 3474 sorecv_update_oobstate(so);
3455 3475 }
3456 3476
3457 3477 tdr = (struct T_optdata_req *)mp->b_rptr;
3458 3478 optlen = tdr->OPT_length;
3459 3479 if (optlen != 0) {
3460 3480 t_uscalar_t ncontrollen;
3461 3481 /*
3462 3482 * Determine how large cmsg buffer is needed.
3463 3483 */
3464 3484 opt = sogetoff(mp,
3465 3485 tpr->optdata_ind.OPT_offset,
3466 3486 optlen, __TPI_ALIGN_SIZE);
3467 3487
3468 3488 if (opt == NULL) {
3469 3489 freemsg(mp);
3470 3490 error = EPROTO;
3471 3491 eprintsoline(so, error);
3472 3492 goto out;
3473 3493 }
3474 3494
3475 3495 ncontrollen = so_cmsglen(mp, opt, optlen,
3476 3496 !(flags & MSG_XPG4_2));
3477 3497 if (controllen != 0)
3478 3498 controllen = ncontrollen;
3479 3499 else if (ncontrollen != 0)
3480 3500 msg->msg_flags |= MSG_CTRUNC;
3481 3501 } else {
3482 3502 controllen = 0;
3483 3503 }
3484 3504
3485 3505 if (controllen != 0) {
3486 3506 /*
3487 3507 * Return control msg to caller.
3488 3508 * Caller handles truncation if length
3489 3509 * exceeds msg_controllen.
3490 3510 */
3491 3511 control = kmem_zalloc(controllen, KM_SLEEP);
3492 3512
3493 3513 error = so_opt2cmsg(mp, opt, optlen,
3494 3514 !(flags & MSG_XPG4_2),
3495 3515 control, controllen);
3496 3516 if (error) {
3497 3517 freemsg(mp);
3498 3518 kmem_free(control, controllen);
3499 3519 eprintsoline(so, error);
3500 3520 goto out;
3501 3521 }
3502 3522 msg->msg_control = control;
3503 3523 msg->msg_controllen = controllen;
3504 3524 }
3505 3525
3506 3526 /*
3507 3527 * Set msg_flags to MSG_EOR based on
3508 3528 * DATA_flag and MOREDATA.
3509 3529 */
3510 3530 mutex_enter(&so->so_lock);
3511 3531 so->so_state &= ~SS_SAVEDEOR;
3512 3532 if (!(tpr->data_ind.MORE_flag & 1)) {
3513 3533 if (!(rval.r_val1 & MOREDATA))
3514 3534 msg->msg_flags |= MSG_EOR;
3515 3535 else
3516 3536 so->so_state |= SS_SAVEDEOR;
3517 3537 }
3518 3538 freemsg(mp);
3519 3539 /*
3520 3540 * If some data was received (i.e. not EOF) and the
3521 3541 * read/recv* has not been satisfied wait for some more.
3522 3542 * Not possible to wait if control info was received.
3523 3543 */
3524 3544 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
3525 3545 controllen == 0 &&
3526 3546 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
3527 3547 mutex_exit(&so->so_lock);
3528 3548 pflag = opflag | MSG_NOMARK;
3529 3549 goto retry;
3530 3550 }
3531 3551 goto out_locked;
3532 3552 }
3533 3553 case T_EXDATA_IND: {
3534 3554 dprintso(so, 1,
3535 3555 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld "
3536 3556 "state %s\n",
3537 3557 sti->sti_oobsigcnt, sti->sti_oobcnt,
3538 3558 saved_resid - uiop->uio_resid,
3539 3559 pr_state(so->so_state, so->so_mode)));
3540 3560 /*
3541 3561 * kstrgetmsg handles MSGMARK so there is nothing to
3542 3562 * inspect in the T_EXDATA_IND.
3543 3563 * strsock_proto makes the stream head queue the T_EXDATA_IND
3544 3564 * as a separate message with no M_DATA component. Furthermore,
3545 3565 * the stream head does not consolidate M_DATA messages onto
3546 3566 * an MSGMARK'ed message ensuring that the T_EXDATA_IND
3547 3567 * remains a message by itself. This is needed since MSGMARK
3548 3568 * marks both the whole message as well as the last byte
3549 3569 * of the message.
3550 3570 */
3551 3571 freemsg(mp);
3552 3572 ASSERT(uiop->uio_resid == saved_resid); /* No data */
3553 3573 if (flags & MSG_PEEK) {
3554 3574 /*
3555 3575 * Even though we are peeking we consume the
3556 3576 * T_EXDATA_IND thereby moving the mark information
3557 3577 * to SS_RCVATMARK. Then the oob code below will
3558 3578 * retry the peeking kstrgetmsg.
3559 3579 * Note that the stream head read queue is
3560 3580 * never flushed without holding SOREADLOCKED
3561 3581 * thus the T_EXDATA_IND can not disappear
3562 3582 * underneath us.
3563 3583 */
3564 3584 dprintso(so, 1,
3565 3585 ("sotpi_recvmsg: consume EXDATA_IND "
3566 3586 "counts %d/%d state %s\n",
3567 3587 sti->sti_oobsigcnt,
3568 3588 sti->sti_oobcnt,
3569 3589 pr_state(so->so_state, so->so_mode)));
3570 3590
3571 3591 pflag = MSG_ANY | MSG_DELAYERROR;
3572 3592 if (so->so_mode & SM_ATOMIC)
3573 3593 pflag |= MSG_DISCARDTAIL;
3574 3594
3575 3595 pri = 0;
3576 3596 mp = NULL;
3577 3597
3578 3598 error = kstrgetmsg(SOTOV(so), &mp, uiop,
3579 3599 &pri, &pflag, (clock_t)-1, &rval);
3580 3600 ASSERT(uiop->uio_resid == saved_resid);
3581 3601
3582 3602 if (error) {
3583 3603 #ifdef SOCK_DEBUG
3584 3604 if (error != EWOULDBLOCK && error != EINTR) {
3585 3605 eprintsoline(so, error);
3586 3606 }
3587 3607 #endif /* SOCK_DEBUG */
3588 3608 goto out;
3589 3609 }
3590 3610 ASSERT(mp);
3591 3611 tpr = (union T_primitives *)mp->b_rptr;
3592 3612 ASSERT(tpr->type == T_EXDATA_IND);
3593 3613 freemsg(mp);
3594 3614 } /* end "if (flags & MSG_PEEK)" */
3595 3615
3596 3616 /*
3597 3617 * Decrement the number of queued and pending oob.
3598 3618 *
3599 3619 * SS_RCVATMARK is cleared when we read past a mark.
3600 3620 * SS_HAVEOOBDATA is cleared when we've read past the
3601 3621 * last mark.
3602 3622 * SS_OOBPEND is cleared if we've read past the last
3603 3623 * mark and no (new) SIGURG has been posted.
3604 3624 */
3605 3625 mutex_enter(&so->so_lock);
3606 3626 ASSERT(so_verify_oobstate(so));
3607 3627 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
3608 3628 ASSERT(sti->sti_oobsigcnt > 0);
3609 3629 sti->sti_oobsigcnt--;
3610 3630 ASSERT(sti->sti_oobcnt > 0);
3611 3631 sti->sti_oobcnt--;
3612 3632 /*
3613 3633 * Since the T_EXDATA_IND has been removed from the stream
3614 3634 * head, but we have not read data past the mark,
3615 3635 * sockfs needs to track that the socket is still at the mark.
3616 3636 *
3617 3637 * Since no data was received call kstrgetmsg again to wait
3618 3638 * for data.
3619 3639 */
3620 3640 so->so_state |= SS_RCVATMARK;
3621 3641 mutex_exit(&so->so_lock);
3622 3642 dprintso(so, 1,
3623 3643 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n",
3624 3644 sti->sti_oobsigcnt, sti->sti_oobcnt,
3625 3645 pr_state(so->so_state, so->so_mode)));
3626 3646 pflag = opflag;
3627 3647 goto retry;
3628 3648 }
3629 3649 default:
3630 3650 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n",
3631 3651 (void *)so, tpr->type, (void *)mp);
3632 3652 ASSERT(0);
3633 3653 freemsg(mp);
3634 3654 error = EPROTO;
3635 3655 eprintsoline(so, error);
3636 3656 goto out;
3637 3657 }
3638 3658 /* NOTREACHED */
3639 3659 out:
3640 3660 mutex_enter(&so->so_lock);
3641 3661 out_locked:
3642 3662 so_unlock_read(so); /* Clear SOREADLOCKED */
3643 3663 mutex_exit(&so->so_lock);
3644 3664 return (error);
3645 3665 }
3646 3666
3647 3667 /*
3648 3668 * Sending data with options on a datagram socket.
3649 3669 * Assumes caller has verified that SS_ISBOUND etc. are set.
3650 3670 *
3651 3671 * For AF_UNIX the destination address may be already in
3652 3672 * internal form, as indicated by sti->sti_faddr_noxlate
3653 3673 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to
3654 3674 * translate the destination address to internal form.
3655 3675 *
3656 3676 * The source address is passed as an option. If passing
3657 3677 * file descriptors, those are passed as file pointers in
3658 3678 * another option.
3659 3679 */
3660 3680 static int
3661 3681 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen,
3662 3682 struct uio *uiop, void *control, t_uscalar_t controllen, int flags)
3663 3683 {
3664 3684 struct T_unitdata_req tudr;
3665 3685 mblk_t *mp;
3666 3686 int error;
3667 3687 void *addr;
3668 3688 socklen_t addrlen;
3669 3689 void *src;
3670 3690 socklen_t srclen;
3671 3691 ssize_t len;
3672 3692 int size;
3673 3693 struct T_opthdr toh;
3674 3694 struct fdbuf *fdbuf;
3675 3695 t_uscalar_t optlen;
3676 3696 void *fds;
3677 3697 int fdlen;
3678 3698 sotpi_info_t *sti = SOTOTPI(so);
3679 3699
3680 3700 ASSERT(name && namelen);
3681 3701 ASSERT(control && controllen);
3682 3702
3683 3703 len = uiop->uio_resid;
3684 3704 if (len > (ssize_t)sti->sti_tidu_size) {
3685 3705 return (EMSGSIZE);
3686 3706 }
3687 3707
3688 3708 if (sti->sti_faddr_noxlate == 0 &&
3689 3709 (flags & MSG_SENDTO_NOXLATE) == 0) {
3690 3710 /*
3691 3711 * Length and family checks.
3692 3712 * Don't verify internal form.
3693 3713 */
3694 3714 error = so_addr_verify(so, name, namelen);
3695 3715 if (error) {
3696 3716 eprintsoline(so, error);
3697 3717 return (error);
3698 3718 }
3699 3719 }
3700 3720
3701 3721 if (so->so_family == AF_UNIX) {
3702 3722 if (sti->sti_faddr_noxlate) {
3703 3723 /*
3704 3724 * Already have a transport internal address. Do not
3705 3725 * pass any (transport internal) source address.
3706 3726 */
3707 3727 addr = name;
3708 3728 addrlen = namelen;
3709 3729 src = NULL;
3710 3730 srclen = 0;
3711 3731 } else if (flags & MSG_SENDTO_NOXLATE) {
3712 3732 /*
3713 3733 * Have an internal form dest. address.
3714 3734 * Pass the source address as usual.
3715 3735 */
3716 3736 addr = name;
3717 3737 addrlen = namelen;
3718 3738 src = sti->sti_laddr_sa;
3719 3739 srclen = (socklen_t)sti->sti_laddr_len;
3720 3740 } else {
3721 3741 /*
3722 3742 * Pass the sockaddr_un source address as an option
3723 3743 * and translate the remote address.
3724 3744 *
3725 3745 * Note that this code does not prevent sti_laddr_sa
3726 3746 * from changing while it is being used. Thus
3727 3747 * if an unbind+bind occurs concurrently with this
3728 3748 * send the peer might see a partially new and a
3729 3749 * partially old "from" address.
3730 3750 */
3731 3751 src = sti->sti_laddr_sa;
3732 3752 srclen = (socklen_t)sti->sti_laddr_len;
3733 3753 dprintso(so, 1,
3734 3754 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n",
3735 3755 srclen, src));
3736 3756 /*
3737 3757 * The sendmsg caller specified a destination
3738 3758 * address, which we must translate into our
3739 3759 * internal form. addr = &sti->sti_ux_taddr
3740 3760 */
3741 3761 error = so_ux_addr_xlate(so, name, namelen,
3742 3762 (flags & MSG_XPG4_2),
3743 3763 &addr, &addrlen);
3744 3764 if (error) {
3745 3765 eprintsoline(so, error);
3746 3766 return (error);
3747 3767 }
3748 3768 }
3749 3769 } else {
3750 3770 addr = name;
3751 3771 addrlen = namelen;
3752 3772 src = NULL;
3753 3773 srclen = 0;
3754 3774 }
3755 3775 optlen = so_optlen(control, controllen,
3756 3776 !(flags & MSG_XPG4_2));
3757 3777 tudr.PRIM_type = T_UNITDATA_REQ;
3758 3778 tudr.DEST_length = addrlen;
3759 3779 tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
3760 3780 if (srclen != 0)
3761 3781 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) +
3762 3782 _TPI_ALIGN_TOPT(srclen));
3763 3783 else
3764 3784 tudr.OPT_length = optlen;
3765 3785 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
3766 3786 _TPI_ALIGN_TOPT(addrlen));
3767 3787
3768 3788 size = tudr.OPT_offset + tudr.OPT_length;
3769 3789
3770 3790 /*
3771 3791 * File descriptors only when SM_FDPASSING set.
3772 3792 */
3773 3793 error = so_getfdopt(control, controllen,
3774 3794 !(flags & MSG_XPG4_2), &fds, &fdlen);
3775 3795 if (error)
3776 3796 return (error);
3777 3797 if (fdlen != -1) {
3778 3798 if (!(so->so_mode & SM_FDPASSING))
3779 3799 return (EOPNOTSUPP);
3780 3800
3781 3801 error = fdbuf_create(fds, fdlen, &fdbuf);
3782 3802 if (error)
3783 3803 return (error);
3784 3804
3785 3805 /*
3786 3806 * Pre-allocate enough additional space for lower level modules
3787 3807 * to append an option (e.g. see tl_unitdata). The following
3788 3808 * is enough extra space for the largest option we might append.
3789 3809 */
3790 3810 size += sizeof (struct T_opthdr) + ucredsize;
3791 3811 mp = fdbuf_allocmsg(size, fdbuf);
3792 3812 } else {
3793 3813 mp = soallocproto(size, _ALLOC_INTR, CRED());
3794 3814 if (mp == NULL) {
3795 3815 /*
3796 3816 * Caught a signal waiting for memory.
3797 3817 * Let send* return EINTR.
3798 3818 */
3799 3819 return (EINTR);
3800 3820 }
3801 3821 }
3802 3822 soappendmsg(mp, &tudr, sizeof (tudr));
3803 3823 soappendmsg(mp, addr, addrlen);
3804 3824 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
3805 3825
3806 3826 if (fdlen != -1) {
3807 3827 ASSERT(fdbuf != NULL);
3808 3828 toh.level = SOL_SOCKET;
3809 3829 toh.name = SO_FILEP;
3810 3830 toh.len = fdbuf->fd_size +
3811 3831 (t_uscalar_t)sizeof (struct T_opthdr);
3812 3832 toh.status = 0;
3813 3833 soappendmsg(mp, &toh, sizeof (toh));
3814 3834 soappendmsg(mp, fdbuf, fdbuf->fd_size);
3815 3835 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
3816 3836 }
3817 3837 if (srclen != 0) {
3818 3838 /*
3819 3839 * There is a AF_UNIX sockaddr_un to include as a source
3820 3840 * address option.
3821 3841 */
3822 3842 toh.level = SOL_SOCKET;
3823 3843 toh.name = SO_SRCADDR;
3824 3844 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
3825 3845 toh.status = 0;
3826 3846 soappendmsg(mp, &toh, sizeof (toh));
3827 3847 soappendmsg(mp, src, srclen);
3828 3848 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
3829 3849 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
3830 3850 }
3831 3851 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
3832 3852 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp);
3833 3853 /*
3834 3854 * Normally at most 3 bytes left in the message, but we might have
3835 3855 * allowed for extra space if we're passing fd's through.
3836 3856 */
3837 3857 ASSERT(MBLKL(mp) <= (ssize_t)size);
3838 3858
3839 3859 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
3840 3860 if (AU_AUDITING())
3841 3861 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
3842 3862
3843 3863 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
3844 3864 #ifdef SOCK_DEBUG
3845 3865 if (error) {
3846 3866 eprintsoline(so, error);
3847 3867 }
3848 3868 #endif /* SOCK_DEBUG */
3849 3869 return (error);
3850 3870 }
3851 3871
3852 3872 /*
3853 3873 * Sending data with options on a connected stream socket.
3854 3874 * Assumes caller has verified that SS_ISCONNECTED is set.
3855 3875 */
3856 3876 static int
3857 3877 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control,
3858 3878 t_uscalar_t controllen, int flags)
3859 3879 {
3860 3880 struct T_optdata_req tdr;
3861 3881 mblk_t *mp;
3862 3882 int error;
3863 3883 ssize_t iosize;
3864 3884 int size;
3865 3885 struct fdbuf *fdbuf;
3866 3886 t_uscalar_t optlen;
3867 3887 void *fds;
3868 3888 int fdlen;
3869 3889 struct T_opthdr toh;
3870 3890 sotpi_info_t *sti = SOTOTPI(so);
3871 3891
3872 3892 dprintso(so, 1,
3873 3893 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid));
3874 3894
3875 3895 /*
3876 3896 * Has to be bound and connected. However, since no locks are
3877 3897 * held the state could have changed after sotpi_sendmsg checked it
3878 3898 * thus it is not possible to ASSERT on the state.
3879 3899 */
3880 3900
3881 3901 /* Options on connection-oriented only when SM_OPTDATA set. */
3882 3902 if (!(so->so_mode & SM_OPTDATA))
3883 3903 return (EOPNOTSUPP);
3884 3904
3885 3905 do {
3886 3906 /*
3887 3907 * Set the MORE flag if uio_resid does not fit in this
3888 3908 * message or if the caller passed in "more".
3889 3909 * Error for transports with zero tidu_size.
3890 3910 */
3891 3911 tdr.PRIM_type = T_OPTDATA_REQ;
3892 3912 iosize = sti->sti_tidu_size;
3893 3913 if (iosize <= 0)
3894 3914 return (EMSGSIZE);
3895 3915 if (uiop->uio_resid > iosize) {
3896 3916 tdr.DATA_flag = 1;
3897 3917 } else {
3898 3918 if (more)
3899 3919 tdr.DATA_flag = 1;
3900 3920 else
3901 3921 tdr.DATA_flag = 0;
3902 3922 iosize = uiop->uio_resid;
3903 3923 }
3904 3924 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n",
3905 3925 tdr.DATA_flag, iosize));
3906 3926
3907 3927 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2));
3908 3928 tdr.OPT_length = optlen;
3909 3929 tdr.OPT_offset = (t_scalar_t)sizeof (tdr);
3910 3930
3911 3931 size = (int)sizeof (tdr) + optlen;
3912 3932 /*
3913 3933 * File descriptors only when SM_FDPASSING set.
3914 3934 */
3915 3935 error = so_getfdopt(control, controllen,
3916 3936 !(flags & MSG_XPG4_2), &fds, &fdlen);
3917 3937 if (error)
3918 3938 return (error);
3919 3939 if (fdlen != -1) {
3920 3940 if (!(so->so_mode & SM_FDPASSING))
3921 3941 return (EOPNOTSUPP);
3922 3942
3923 3943 error = fdbuf_create(fds, fdlen, &fdbuf);
3924 3944 if (error)
3925 3945 return (error);
3926 3946
3927 3947 /*
3928 3948 * Pre-allocate enough additional space for lower level
3929 3949 * modules to append an option (e.g. see tl_unitdata).
3930 3950 * The following is enough extra space for the largest
3931 3951 * option we might append.
3932 3952 */
3933 3953 size += sizeof (struct T_opthdr) + ucredsize;
3934 3954 mp = fdbuf_allocmsg(size, fdbuf);
3935 3955 } else {
3936 3956 mp = soallocproto(size, _ALLOC_INTR, CRED());
3937 3957 if (mp == NULL) {
3938 3958 /*
3939 3959 * Caught a signal waiting for memory.
3940 3960 * Let send* return EINTR.
3941 3961 */
3942 3962 return (EINTR);
3943 3963 }
3944 3964 }
3945 3965 soappendmsg(mp, &tdr, sizeof (tdr));
3946 3966
3947 3967 if (fdlen != -1) {
3948 3968 ASSERT(fdbuf != NULL);
3949 3969 toh.level = SOL_SOCKET;
3950 3970 toh.name = SO_FILEP;
3951 3971 toh.len = fdbuf->fd_size +
3952 3972 (t_uscalar_t)sizeof (struct T_opthdr);
3953 3973 toh.status = 0;
3954 3974 soappendmsg(mp, &toh, sizeof (toh));
3955 3975 soappendmsg(mp, fdbuf, fdbuf->fd_size);
3956 3976 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
3957 3977 }
3958 3978 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp);
3959 3979 /*
3960 3980 * Normally at most 3 bytes left in the message, but we might
3961 3981 * have allowed for extra space if we're passing fd's through.
3962 3982 */
3963 3983 ASSERT(MBLKL(mp) <= (ssize_t)size);
3964 3984
3965 3985 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
3966 3986
3967 3987 error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
3968 3988 0, MSG_BAND, 0);
3969 3989 if (error) {
3970 3990 eprintsoline(so, error);
3971 3991 return (error);
3972 3992 }
3973 3993 control = NULL;
3974 3994 if (uiop->uio_resid > 0) {
3975 3995 /*
3976 3996 * Recheck for fatal errors. Fail write even though
3977 3997 * some data have been written. This is consistent
3978 3998 * with strwrite semantics and BSD sockets semantics.
3979 3999 */
3980 4000 if (so->so_state & SS_CANTSENDMORE) {
3981 4001 eprintsoline(so, error);
3982 4002 return (EPIPE);
3983 4003 }
3984 4004 if (so->so_error != 0) {
3985 4005 mutex_enter(&so->so_lock);
3986 4006 error = sogeterr(so, B_TRUE);
3987 4007 mutex_exit(&so->so_lock);
3988 4008 if (error != 0) {
3989 4009 eprintsoline(so, error);
3990 4010 return (error);
3991 4011 }
3992 4012 }
3993 4013 }
3994 4014 } while (uiop->uio_resid > 0);
3995 4015 return (0);
3996 4016 }
3997 4017
3998 4018 /*
3999 4019 * Sending data on a datagram socket.
4000 4020 * Assumes caller has verified that SS_ISBOUND etc. are set.
4001 4021 *
4002 4022 * For AF_UNIX the destination address may be already in
4003 4023 * internal form, as indicated by sti->sti_faddr_noxlate
4004 4024 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to
4005 4025 * translate the destination address to internal form.
4006 4026 *
4007 4027 * The source address is passed as an option.
4008 4028 */
4009 4029 int
4010 4030 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen,
4011 4031 struct uio *uiop, int flags)
4012 4032 {
4013 4033 struct T_unitdata_req tudr;
4014 4034 mblk_t *mp;
4015 4035 int error;
4016 4036 void *addr;
4017 4037 socklen_t addrlen;
4018 4038 void *src;
4019 4039 socklen_t srclen;
4020 4040 ssize_t len;
4021 4041 sotpi_info_t *sti = SOTOTPI(so);
4022 4042
4023 4043 ASSERT(name != NULL && namelen != 0);
4024 4044
4025 4045 len = uiop->uio_resid;
4026 4046 if (len > sti->sti_tidu_size) {
4027 4047 error = EMSGSIZE;
4028 4048 goto done;
4029 4049 }
4030 4050
4031 4051 if (sti->sti_faddr_noxlate == 0 &&
4032 4052 (flags & MSG_SENDTO_NOXLATE) == 0) {
4033 4053 /*
4034 4054 * Length and family checks.
4035 4055 * Don't verify internal form.
4036 4056 */
4037 4057 error = so_addr_verify(so, name, namelen);
4038 4058 if (error != 0)
4039 4059 goto done;
4040 4060 }
4041 4061
4042 4062 if (sti->sti_direct) /* Never on AF_UNIX */
4043 4063 return (sodgram_direct(so, name, namelen, uiop, flags));
4044 4064
4045 4065 if (so->so_family == AF_UNIX) {
4046 4066 if (sti->sti_faddr_noxlate) {
4047 4067 /*
4048 4068 * Already have a transport internal address. Do not
4049 4069 * pass any (transport internal) source address.
4050 4070 */
4051 4071 addr = name;
4052 4072 addrlen = namelen;
4053 4073 src = NULL;
4054 4074 srclen = 0;
4055 4075 } else if (flags & MSG_SENDTO_NOXLATE) {
4056 4076 /*
4057 4077 * Have an internal form dest. address.
4058 4078 * Pass the source address as usual.
4059 4079 */
4060 4080 addr = name;
4061 4081 addrlen = namelen;
4062 4082 src = sti->sti_laddr_sa;
4063 4083 srclen = (socklen_t)sti->sti_laddr_len;
4064 4084 } else {
4065 4085 /*
4066 4086 * Pass the sockaddr_un source address as an option
4067 4087 * and translate the remote address.
4068 4088 *
4069 4089 * Note that this code does not prevent sti_laddr_sa
4070 4090 * from changing while it is being used. Thus
4071 4091 * if an unbind+bind occurs concurrently with this
4072 4092 * send the peer might see a partially new and a
4073 4093 * partially old "from" address.
4074 4094 */
4075 4095 src = sti->sti_laddr_sa;
4076 4096 srclen = (socklen_t)sti->sti_laddr_len;
4077 4097 dprintso(so, 1,
4078 4098 ("sosend_dgram UNIX: srclen %d, src %p\n",
4079 4099 srclen, src));
4080 4100 /*
4081 4101 * The sendmsg caller specified a destination
4082 4102 * address, which we must translate into our
4083 4103 * internal form. addr = &sti->sti_ux_taddr
4084 4104 */
4085 4105 error = so_ux_addr_xlate(so, name, namelen,
4086 4106 (flags & MSG_XPG4_2),
4087 4107 &addr, &addrlen);
4088 4108 if (error) {
4089 4109 eprintsoline(so, error);
4090 4110 goto done;
4091 4111 }
4092 4112 }
4093 4113 } else {
4094 4114 addr = name;
4095 4115 addrlen = namelen;
4096 4116 src = NULL;
4097 4117 srclen = 0;
4098 4118 }
4099 4119 tudr.PRIM_type = T_UNITDATA_REQ;
4100 4120 tudr.DEST_length = addrlen;
4101 4121 tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
4102 4122 if (srclen == 0) {
4103 4123 tudr.OPT_length = 0;
4104 4124 tudr.OPT_offset = 0;
4105 4125
4106 4126 mp = soallocproto2(&tudr, sizeof (tudr),
4107 4127 addr, addrlen, 0, _ALLOC_INTR, CRED());
4108 4128 if (mp == NULL) {
4109 4129 /*
4110 4130 * Caught a signal waiting for memory.
4111 4131 * Let send* return EINTR.
4112 4132 */
4113 4133 error = EINTR;
4114 4134 goto done;
4115 4135 }
4116 4136 } else {
4117 4137 /*
4118 4138 * There is a AF_UNIX sockaddr_un to include as a source
4119 4139 * address option.
4120 4140 */
4121 4141 struct T_opthdr toh;
4122 4142 ssize_t size;
4123 4143
4124 4144 tudr.OPT_length = (t_scalar_t)(sizeof (toh) +
4125 4145 _TPI_ALIGN_TOPT(srclen));
4126 4146 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
4127 4147 _TPI_ALIGN_TOPT(addrlen));
4128 4148
4129 4149 toh.level = SOL_SOCKET;
4130 4150 toh.name = SO_SRCADDR;
4131 4151 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
4132 4152 toh.status = 0;
4133 4153
4134 4154 size = tudr.OPT_offset + tudr.OPT_length;
4135 4155 mp = soallocproto2(&tudr, sizeof (tudr),
4136 4156 addr, addrlen, size, _ALLOC_INTR, CRED());
4137 4157 if (mp == NULL) {
4138 4158 /*
4139 4159 * Caught a signal waiting for memory.
4140 4160 * Let send* return EINTR.
4141 4161 */
4142 4162 error = EINTR;
4143 4163 goto done;
4144 4164 }
4145 4165 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
4146 4166 soappendmsg(mp, &toh, sizeof (toh));
4147 4167 soappendmsg(mp, src, srclen);
4148 4168 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
4149 4169 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
4150 4170 }
4151 4171
4152 4172 if (AU_AUDITING())
4153 4173 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
4154 4174
4155 4175 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
4156 4176 done:
4157 4177 #ifdef SOCK_DEBUG
4158 4178 if (error) {
4159 4179 eprintsoline(so, error);
4160 4180 }
4161 4181 #endif /* SOCK_DEBUG */
4162 4182 return (error);
4163 4183 }
4164 4184
4165 4185 /*
4166 4186 * Sending data on a connected stream socket.
4167 4187 * Assumes caller has verified that SS_ISCONNECTED is set.
4168 4188 */
4169 4189 int
4170 4190 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more,
4171 4191 int sflag)
4172 4192 {
4173 4193 struct T_data_req tdr;
4174 4194 mblk_t *mp;
4175 4195 int error;
4176 4196 ssize_t iosize;
4177 4197 sotpi_info_t *sti = SOTOTPI(so);
4178 4198
4179 4199 dprintso(so, 1,
4180 4200 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n",
4181 4201 (void *)so, uiop->uio_resid, prim, sflag));
4182 4202
4183 4203 /*
4184 4204 * Has to be bound and connected. However, since no locks are
4185 4205 * held the state could have changed after sotpi_sendmsg checked it
4186 4206 * thus it is not possible to ASSERT on the state.
4187 4207 */
4188 4208
4189 4209 do {
4190 4210 /*
4191 4211 * Set the MORE flag if uio_resid does not fit in this
4192 4212 * message or if the caller passed in "more".
4193 4213 * Error for transports with zero tidu_size.
4194 4214 */
4195 4215 tdr.PRIM_type = prim;
4196 4216 iosize = sti->sti_tidu_size;
4197 4217 if (iosize <= 0)
4198 4218 return (EMSGSIZE);
4199 4219 if (uiop->uio_resid > iosize) {
4200 4220 tdr.MORE_flag = 1;
4201 4221 } else {
4202 4222 if (more)
4203 4223 tdr.MORE_flag = 1;
4204 4224 else
4205 4225 tdr.MORE_flag = 0;
4206 4226 iosize = uiop->uio_resid;
4207 4227 }
4208 4228 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n",
4209 4229 prim, tdr.MORE_flag, iosize));
4210 4230 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED());
4211 4231 if (mp == NULL) {
4212 4232 /*
4213 4233 * Caught a signal waiting for memory.
4214 4234 * Let send* return EINTR.
4215 4235 */
4216 4236 return (EINTR);
4217 4237 }
4218 4238
4219 4239 error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
4220 4240 0, sflag | MSG_BAND, 0);
4221 4241 if (error) {
4222 4242 eprintsoline(so, error);
4223 4243 return (error);
4224 4244 }
4225 4245 if (uiop->uio_resid > 0) {
4226 4246 /*
4227 4247 * Recheck for fatal errors. Fail write even though
4228 4248 * some data have been written. This is consistent
4229 4249 * with strwrite semantics and BSD sockets semantics.
4230 4250 */
4231 4251 if (so->so_state & SS_CANTSENDMORE) {
4232 4252 eprintsoline(so, error);
4233 4253 return (EPIPE);
4234 4254 }
4235 4255 if (so->so_error != 0) {
4236 4256 mutex_enter(&so->so_lock);
4237 4257 error = sogeterr(so, B_TRUE);
4238 4258 mutex_exit(&so->so_lock);
4239 4259 if (error != 0) {
4240 4260 eprintsoline(so, error);
4241 4261 return (error);
4242 4262 }
4243 4263 }
4244 4264 }
4245 4265 } while (uiop->uio_resid > 0);
4246 4266 return (0);
4247 4267 }
4248 4268
4249 4269 /*
4250 4270 * Check the state for errors and call the appropriate send function.
4251 4271 *
4252 4272 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set)
4253 4273 * this function issues a setsockopt to toggle SO_DONTROUTE before and
4254 4274 * after sending the message.
4255 4275 *
4256 4276 * The caller may optionally specify a destination address, for either
4257 4277 * stream or datagram sockets. This table summarizes the cases:
4258 4278 *
4259 4279 * Socket type Dest. given Connected Result
4260 4280 * ----------- ----------- --------- --------------
4261 4281 * Stream * Yes send to conn. addr.
4262 4282 * Stream * No error ENOTCONN
4263 4283 * Dgram yes * send to given addr.
4264 4284 * Dgram no yes send to conn. addr.
4265 4285 * Dgram no no error EDESTADDRREQ
4266 4286 *
4267 4287 * There are subtleties around the destination address when using
4268 4288 * AF_UNIX datagram sockets. When the sendmsg call specifies the
4269 4289 * destination address, it's in (struct sockaddr_un) form and we
4270 4290 * need to translate it to our internal form (struct so_ux_addr).
4271 4291 *
4272 4292 * When the sendmsg call does not specify a destination address
4273 4293 * we're using the peer address saved during sotpi_connect, and
4274 4294 * that address is already in internal form. In this case, the
4275 4295 * (internal only) flag MSG_SENDTO_NOXLATE is set in the flags
4276 4296 * passed to sosend_dgram or sosend_dgramcmsg to indicate that
4277 4297 * those functions should skip translation to internal form.
4278 4298 * Avoiding that translation is not only more efficient, but it's
4279 4299 * also necessary when a process does a connect on an AF_UNIX
4280 4300 * datagram socket and then drops privileges. After the process
4281 4301 * has dropped privileges, it may no longer be able to lookup the
4282 4302 * the external name in the filesystem, but it should still be
4283 4303 * able to send messages on the connected socket by leaving the
4284 4304 * destination name unspecified.
4285 4305 *
4286 4306 * Yet more subtleties arise with sockets connected by socketpair(),
4287 4307 * which puts internal form addresses in the fields where normally
4288 4308 * the external form is found, and sets sti_faddr_noxlate=1, which
4289 4309 * (like flag MSG_SENDTO_NOXLATE) causes the sosend_dgram functions
4290 4310 * to skip translation of destination addresses to internal form.
4291 4311 * However, beware that the flag sti_faddr_noxlate=1 also triggers
4292 4312 * different behaviour almost everywhere AF_UNIX addresses appear.
4293 4313 */
4294 4314 static int
4295 4315 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
4296 4316 struct cred *cr)
4297 4317 {
4298 4318 int so_state;
4299 4319 int so_mode;
4300 4320 int error;
4301 4321 struct sockaddr *name;
4302 4322 t_uscalar_t namelen;
4303 4323 int dontroute;
4304 4324 int flags;
4305 4325 sotpi_info_t *sti = SOTOTPI(so);
4306 4326
4307 4327 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n",
4308 4328 (void *)so, (void *)msg, msg->msg_flags,
4309 4329 pr_state(so->so_state, so->so_mode), so->so_error));
4310 4330
4311 4331 if (so->so_version == SOV_STREAM) {
4312 4332 /* The imaginary "sockmod" has been popped - act as a stream */
4313 4333 so_update_attrs(so, SOMOD);
4314 4334 return (strwrite(SOTOV(so), uiop, cr));
4315 4335 }
4316 4336
4317 4337 mutex_enter(&so->so_lock);
4318 4338 so_state = so->so_state;
4319 4339
4320 4340 if (so_state & SS_CANTSENDMORE) {
4321 4341 mutex_exit(&so->so_lock);
4322 4342 return (EPIPE);
4323 4343 }
4324 4344
4325 4345 if (so->so_error != 0) {
4326 4346 error = sogeterr(so, B_TRUE);
4327 4347 if (error != 0) {
4328 4348 mutex_exit(&so->so_lock);
4329 4349 return (error);
4330 4350 }
4331 4351 }
4332 4352
4333 4353 name = (struct sockaddr *)msg->msg_name;
4334 4354 namelen = msg->msg_namelen;
4335 4355 flags = msg->msg_flags;
4336 4356
4337 4357 /*
4338 4358 * Historically, this function does not validate the flags
4339 4359 * passed in, and any errant bits are ignored. However,
4340 4360 * we would not want any such errant flag bits accidently
4341 4361 * being treated as one of the internal-only flags, so
4342 4362 * clear the internal-only flag bits.
4343 4363 */
4344 4364 flags &= ~MSG_SENDTO_NOXLATE;
4345 4365
4346 4366 so_mode = so->so_mode;
4347 4367
4348 4368 if (name == NULL) {
4349 4369 if (!(so_state & SS_ISCONNECTED)) {
4350 4370 mutex_exit(&so->so_lock);
4351 4371 if (so_mode & SM_CONNREQUIRED)
4352 4372 return (ENOTCONN);
4353 4373 else
4354 4374 return (EDESTADDRREQ);
4355 4375 }
4356 4376 /*
4357 4377 * This is a connected socket.
4358 4378 */
4359 4379 if (so_mode & SM_CONNREQUIRED) {
4360 4380 /*
4361 4381 * This is a connected STREAM socket,
4362 4382 * destination not specified.
4363 4383 */
4364 4384 name = NULL;
4365 4385 namelen = 0;
4366 4386 } else {
4367 4387 /*
4368 4388 * Datagram send on connected socket with
4369 4389 * the destination name not specified.
4370 4390 * Use the peer address from connect.
4371 4391 */
4372 4392 if (so->so_family == AF_UNIX) {
4373 4393 /*
4374 4394 * Use the (internal form) address saved
4375 4395 * in sotpi_connect. See above.
4376 4396 */
4377 4397 name = (void *)&sti->sti_ux_faddr;
4378 4398 namelen = sizeof (sti->sti_ux_faddr);
4379 4399 flags |= MSG_SENDTO_NOXLATE;
4380 4400 } else {
4381 4401 ASSERT(sti->sti_faddr_sa);
4382 4402 name = sti->sti_faddr_sa;
4383 4403 namelen = (t_uscalar_t)sti->sti_faddr_len;
4384 4404 }
4385 4405 }
4386 4406 } else {
4387 4407 /*
4388 4408 * Sendmsg specifies a destination name
4389 4409 */
4390 4410 if (!(so_state & SS_ISCONNECTED) &&
4391 4411 (so_mode & SM_CONNREQUIRED)) {
4392 4412 /* i.e. TCP not connected */
4393 4413 mutex_exit(&so->so_lock);
4394 4414 return (ENOTCONN);
4395 4415 }
4396 4416 /*
4397 4417 * Ignore the address on connection-oriented sockets.
4398 4418 * Just like BSD this code does not generate an error for
4399 4419 * TCP (a CONNREQUIRED socket) when sending to an address
4400 4420 * passed in with sendto/sendmsg. Instead the data is
4401 4421 * delivered on the connection as if no address had been
4402 4422 * supplied.
4403 4423 */
4404 4424 if ((so_state & SS_ISCONNECTED) &&
4405 4425 !(so_mode & SM_CONNREQUIRED)) {
4406 4426 mutex_exit(&so->so_lock);
4407 4427 return (EISCONN);
4408 4428 }
4409 4429 if (!(so_state & SS_ISBOUND)) {
4410 4430 so_lock_single(so); /* Set SOLOCKED */
4411 4431 error = sotpi_bind(so, NULL, 0,
4412 4432 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr);
4413 4433 so_unlock_single(so, SOLOCKED);
4414 4434 if (error) {
4415 4435 mutex_exit(&so->so_lock);
4416 4436 eprintsoline(so, error);
4417 4437 return (error);
4418 4438 }
4419 4439 }
4420 4440 /*
4421 4441 * Handle delayed datagram errors. These are only queued
4422 4442 * when the application sets SO_DGRAM_ERRIND.
4423 4443 * Return the error if we are sending to the address
4424 4444 * that was returned in the last T_UDERROR_IND.
4425 4445 * If sending to some other address discard the delayed
4426 4446 * error indication.
4427 4447 */
4428 4448 if (sti->sti_delayed_error) {
4429 4449 struct T_uderror_ind *tudi;
4430 4450 void *addr;
4431 4451 t_uscalar_t addrlen;
4432 4452 boolean_t match = B_FALSE;
4433 4453
4434 4454 ASSERT(sti->sti_eaddr_mp);
4435 4455 error = sti->sti_delayed_error;
4436 4456 sti->sti_delayed_error = 0;
4437 4457 tudi =
4438 4458 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr;
4439 4459 addrlen = tudi->DEST_length;
4440 4460 addr = sogetoff(sti->sti_eaddr_mp,
4441 4461 tudi->DEST_offset, addrlen, 1);
4442 4462 ASSERT(addr); /* Checked by strsock_proto */
4443 4463 switch (so->so_family) {
4444 4464 case AF_INET: {
4445 4465 /* Compare just IP address and port */
4446 4466 sin_t *sin1 = (sin_t *)name;
4447 4467 sin_t *sin2 = (sin_t *)addr;
4448 4468
4449 4469 if (addrlen == sizeof (sin_t) &&
4450 4470 namelen == addrlen &&
4451 4471 sin1->sin_port == sin2->sin_port &&
4452 4472 sin1->sin_addr.s_addr ==
4453 4473 sin2->sin_addr.s_addr)
4454 4474 match = B_TRUE;
4455 4475 break;
4456 4476 }
4457 4477 case AF_INET6: {
4458 4478 /* Compare just IP address and port. Not flow */
4459 4479 sin6_t *sin1 = (sin6_t *)name;
4460 4480 sin6_t *sin2 = (sin6_t *)addr;
4461 4481
4462 4482 if (addrlen == sizeof (sin6_t) &&
4463 4483 namelen == addrlen &&
4464 4484 sin1->sin6_port == sin2->sin6_port &&
4465 4485 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
4466 4486 &sin2->sin6_addr))
4467 4487 match = B_TRUE;
4468 4488 break;
4469 4489 }
4470 4490 case AF_UNIX:
4471 4491 default:
4472 4492 if (namelen == addrlen &&
4473 4493 bcmp(name, addr, namelen) == 0)
4474 4494 match = B_TRUE;
4475 4495 }
4476 4496 if (match) {
4477 4497 freemsg(sti->sti_eaddr_mp);
4478 4498 sti->sti_eaddr_mp = NULL;
4479 4499 mutex_exit(&so->so_lock);
4480 4500 #ifdef DEBUG
4481 4501 dprintso(so, 0,
4482 4502 ("sockfs delayed error %d for %s\n",
4483 4503 error,
4484 4504 pr_addr(so->so_family, name, namelen)));
4485 4505 #endif /* DEBUG */
4486 4506 return (error);
4487 4507 }
4488 4508 freemsg(sti->sti_eaddr_mp);
4489 4509 sti->sti_eaddr_mp = NULL;
4490 4510 }
4491 4511 }
4492 4512 mutex_exit(&so->so_lock);
4493 4513
4494 4514 dontroute = 0;
4495 4515 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) {
4496 4516 uint32_t val;
4497 4517
4498 4518 val = 1;
4499 4519 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
4500 4520 &val, (t_uscalar_t)sizeof (val), cr);
4501 4521 if (error)
4502 4522 return (error);
4503 4523 dontroute = 1;
4504 4524 }
4505 4525
4506 4526 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) {
4507 4527 error = EOPNOTSUPP;
4508 4528 goto done;
4509 4529 }
4510 4530 if (msg->msg_controllen != 0) {
4511 4531 if (!(so_mode & SM_CONNREQUIRED)) {
4512 4532 so_update_attrs(so, SOMOD);
4513 4533 error = sosend_dgramcmsg(so, name, namelen, uiop,
4514 4534 msg->msg_control, msg->msg_controllen, flags);
4515 4535 } else {
4516 4536 if (flags & MSG_OOB) {
4517 4537 /* Can't generate T_EXDATA_REQ with options */
4518 4538 error = EOPNOTSUPP;
4519 4539 goto done;
4520 4540 }
4521 4541 so_update_attrs(so, SOMOD);
4522 4542 error = sosend_svccmsg(so, uiop,
4523 4543 !(flags & MSG_EOR),
4524 4544 msg->msg_control, msg->msg_controllen,
4525 4545 flags);
4526 4546 }
4527 4547 goto done;
4528 4548 }
4529 4549
4530 4550 so_update_attrs(so, SOMOD);
4531 4551 if (!(so_mode & SM_CONNREQUIRED)) {
4532 4552 /*
4533 4553 * If there is no SO_DONTROUTE to turn off return immediately
4534 4554 * from send_dgram. This can allow tail-call optimizations.
4535 4555 */
4536 4556 if (!dontroute) {
4537 4557 return (sosend_dgram(so, name, namelen, uiop, flags));
4538 4558 }
4539 4559 error = sosend_dgram(so, name, namelen, uiop, flags);
4540 4560 } else {
4541 4561 t_scalar_t prim;
4542 4562 int sflag;
4543 4563
4544 4564 /* Ignore msg_name in the connected state */
4545 4565 if (flags & MSG_OOB) {
4546 4566 prim = T_EXDATA_REQ;
4547 4567 /*
4548 4568 * Send down T_EXDATA_REQ even if there is flow
4549 4569 * control for data.
4550 4570 */
4551 4571 sflag = MSG_IGNFLOW;
4552 4572 } else {
4553 4573 if (so_mode & SM_BYTESTREAM) {
4554 4574 /* Byte stream transport - use write */
4555 4575 dprintso(so, 1, ("sotpi_sendmsg: write\n"));
4556 4576
4557 4577 /* Send M_DATA messages */
4558 4578 if ((sti->sti_nl7c_flags & NL7C_ENABLED) &&
4559 4579 (error = nl7c_data(so, uiop)) >= 0) {
4560 4580 /* NL7C consumed the data */
4561 4581 return (error);
4562 4582 }
4563 4583 /*
4564 4584 * If there is no SO_DONTROUTE to turn off,
4565 4585 * sti_direct is on, and there is no flow
4566 4586 * control, we can take the fast path.
4567 4587 */
4568 4588 if (!dontroute && sti->sti_direct != 0 &&
4569 4589 canputnext(SOTOV(so)->v_stream->sd_wrq)) {
4570 4590 return (sostream_direct(so, uiop,
4571 4591 NULL, cr));
4572 4592 }
4573 4593 error = strwrite(SOTOV(so), uiop, cr);
4574 4594 goto done;
4575 4595 }
4576 4596 prim = T_DATA_REQ;
4577 4597 sflag = 0;
4578 4598 }
4579 4599 /*
4580 4600 * If there is no SO_DONTROUTE to turn off return immediately
4581 4601 * from sosend_svc. This can allow tail-call optimizations.
4582 4602 */
4583 4603 if (!dontroute)
4584 4604 return (sosend_svc(so, uiop, prim,
4585 4605 !(flags & MSG_EOR), sflag));
4586 4606 error = sosend_svc(so, uiop, prim,
4587 4607 !(flags & MSG_EOR), sflag);
4588 4608 }
4589 4609 ASSERT(dontroute);
4590 4610 done:
4591 4611 if (dontroute) {
4592 4612 uint32_t val;
4593 4613
4594 4614 val = 0;
4595 4615 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
4596 4616 &val, (t_uscalar_t)sizeof (val), cr);
4597 4617 }
4598 4618 return (error);
4599 4619 }
4600 4620
4601 4621 /*
4602 4622 * kstrwritemp() has very similar semantics as that of strwrite().
4603 4623 * The main difference is it obtains mblks from the caller and also
4604 4624 * does not do any copy as done in strwrite() from user buffers to
4605 4625 * kernel buffers.
4606 4626 *
4607 4627 * Currently, this routine is used by sendfile to send data allocated
4608 4628 * within the kernel without any copying. This interface does not use the
4609 4629 * synchronous stream interface as synch. stream interface implies
4610 4630 * copying.
4611 4631 */
4612 4632 int
4613 4633 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode)
4614 4634 {
4615 4635 struct stdata *stp;
4616 4636 struct queue *wqp;
4617 4637 mblk_t *newmp;
4618 4638 char waitflag;
4619 4639 int tempmode;
4620 4640 int error = 0;
4621 4641 int done = 0;
4622 4642 struct sonode *so;
4623 4643 boolean_t direct;
4624 4644
4625 4645 ASSERT(vp->v_stream);
4626 4646 stp = vp->v_stream;
4627 4647
4628 4648 so = VTOSO(vp);
4629 4649 direct = _SOTOTPI(so)->sti_direct;
4630 4650
4631 4651 /*
4632 4652 * This is the sockfs direct fast path. canputnext() need
4633 4653 * not be accurate so we don't grab the sd_lock here. If
4634 4654 * we get flow-controlled, we grab sd_lock just before the
4635 4655 * do..while loop below to emulate what strwrite() does.
4636 4656 */
4637 4657 wqp = stp->sd_wrq;
4638 4658 if (canputnext(wqp) && direct &&
4639 4659 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
4640 4660 return (sostream_direct(so, NULL, mp, CRED()));
4641 4661 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
4642 4662 /* Fast check of flags before acquiring the lock */
4643 4663 mutex_enter(&stp->sd_lock);
4644 4664 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
4645 4665 mutex_exit(&stp->sd_lock);
4646 4666 if (error != 0) {
4647 4667 if (!(stp->sd_flag & STPLEX) &&
4648 4668 (stp->sd_wput_opt & SW_SIGPIPE)) {
4649 4669 error = EPIPE;
4650 4670 }
4651 4671 return (error);
4652 4672 }
4653 4673 }
4654 4674
4655 4675 waitflag = WRITEWAIT;
4656 4676 if (stp->sd_flag & OLDNDELAY)
4657 4677 tempmode = fmode & ~FNDELAY;
4658 4678 else
4659 4679 tempmode = fmode;
4660 4680
4661 4681 mutex_enter(&stp->sd_lock);
4662 4682 do {
4663 4683 if (canputnext(wqp)) {
4664 4684 mutex_exit(&stp->sd_lock);
4665 4685 if (stp->sd_wputdatafunc != NULL) {
4666 4686 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL,
4667 4687 NULL, NULL, NULL);
4668 4688 if (newmp == NULL) {
4669 4689 /* The caller will free mp */
4670 4690 return (ECOMM);
4671 4691 }
4672 4692 mp = newmp;
4673 4693 }
4674 4694 putnext(wqp, mp);
4675 4695 return (0);
4676 4696 }
4677 4697 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1,
4678 4698 &done);
4679 4699 } while (error == 0 && !done);
4680 4700
4681 4701 mutex_exit(&stp->sd_lock);
4682 4702 /*
4683 4703 * EAGAIN tells the application to try again. ENOMEM
4684 4704 * is returned only if the memory allocation size
4685 4705 * exceeds the physical limits of the system. ENOMEM
4686 4706 * can't be true here.
4687 4707 */
4688 4708 if (error == ENOMEM)
4689 4709 error = EAGAIN;
4690 4710 return (error);
4691 4711 }
4692 4712
4693 4713 /* ARGSUSED */
4694 4714 static int
4695 4715 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
4696 4716 struct cred *cr, mblk_t **mpp)
4697 4717 {
4698 4718 int error;
4699 4719
4700 4720 switch (so->so_family) {
4701 4721 case AF_INET:
4702 4722 case AF_INET6:
4703 4723 case AF_UNIX:
4704 4724 break;
4705 4725 default:
4706 4726 return (EAFNOSUPPORT);
4707 4727
4708 4728 }
4709 4729
4710 4730 if (so->so_state & SS_CANTSENDMORE)
4711 4731 return (EPIPE);
4712 4732
4713 4733 if (so->so_type != SOCK_STREAM)
4714 4734 return (EOPNOTSUPP);
4715 4735
4716 4736 if ((so->so_state & SS_ISCONNECTED) == 0)
4717 4737 return (ENOTCONN);
4718 4738
4719 4739 error = kstrwritemp(so->so_vnode, *mpp, fflag);
4720 4740 if (error == 0)
4721 4741 *mpp = NULL;
4722 4742 return (error);
4723 4743 }
4724 4744
4725 4745 /*
4726 4746 * Sending data on a datagram socket.
4727 4747 * Assumes caller has verified that SS_ISBOUND etc. are set.
4728 4748 */
4729 4749 /* ARGSUSED */
4730 4750 static int
4731 4751 sodgram_direct(struct sonode *so, struct sockaddr *name,
4732 4752 socklen_t namelen, struct uio *uiop, int flags)
4733 4753 {
4734 4754 struct T_unitdata_req tudr;
4735 4755 mblk_t *mp = NULL;
4736 4756 int error = 0;
4737 4757 void *addr;
4738 4758 socklen_t addrlen;
4739 4759 ssize_t len;
4740 4760 struct stdata *stp = SOTOV(so)->v_stream;
4741 4761 int so_state;
4742 4762 queue_t *udp_wq;
4743 4763 boolean_t connected;
4744 4764 mblk_t *mpdata = NULL;
4745 4765 sotpi_info_t *sti = SOTOTPI(so);
4746 4766 uint32_t auditing = AU_AUDITING();
4747 4767
4748 4768 ASSERT(name != NULL && namelen != 0);
4749 4769 ASSERT(!(so->so_mode & SM_CONNREQUIRED));
4750 4770 ASSERT(!(so->so_mode & SM_EXDATA));
4751 4771 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6);
4752 4772 ASSERT(SOTOV(so)->v_type == VSOCK);
4753 4773
4754 4774 /* Caller checked for proper length */
4755 4775 len = uiop->uio_resid;
4756 4776 ASSERT(len <= sti->sti_tidu_size);
4757 4777
4758 4778 /* Length and family checks have been done by caller */
4759 4779 ASSERT(name->sa_family == so->so_family);
4760 4780 ASSERT(so->so_family == AF_INET ||
4761 4781 (namelen == (socklen_t)sizeof (struct sockaddr_in6)));
4762 4782 ASSERT(so->so_family == AF_INET6 ||
4763 4783 (namelen == (socklen_t)sizeof (struct sockaddr_in)));
4764 4784
4765 4785 addr = name;
4766 4786 addrlen = namelen;
4767 4787
4768 4788 if (stp->sd_sidp != NULL &&
4769 4789 (error = straccess(stp, JCWRITE)) != 0)
4770 4790 goto done;
4771 4791
4772 4792 so_state = so->so_state;
4773 4793
4774 4794 connected = so_state & SS_ISCONNECTED;
4775 4795 if (!connected) {
4776 4796 tudr.PRIM_type = T_UNITDATA_REQ;
4777 4797 tudr.DEST_length = addrlen;
4778 4798 tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
4779 4799 tudr.OPT_length = 0;
4780 4800 tudr.OPT_offset = 0;
4781 4801
4782 4802 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0,
4783 4803 _ALLOC_INTR, CRED());
4784 4804 if (mp == NULL) {
4785 4805 /*
4786 4806 * Caught a signal waiting for memory.
4787 4807 * Let send* return EINTR.
4788 4808 */
4789 4809 error = EINTR;
4790 4810 goto done;
4791 4811 }
4792 4812 }
4793 4813
4794 4814 /*
4795 4815 * For UDP we don't break up the copyin into smaller pieces
4796 4816 * as in the TCP case. That means if ENOMEM is returned by
4797 4817 * mcopyinuio() then the uio vector has not been modified at
4798 4818 * all and we fallback to either strwrite() or kstrputmsg()
4799 4819 * below. Note also that we never generate priority messages
4800 4820 * from here.
4801 4821 */
4802 4822 udp_wq = stp->sd_wrq->q_next;
4803 4823 if (canput(udp_wq) &&
4804 4824 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) {
4805 4825 ASSERT(DB_TYPE(mpdata) == M_DATA);
4806 4826 ASSERT(uiop->uio_resid == 0);
4807 4827 if (!connected)
4808 4828 linkb(mp, mpdata);
4809 4829 else
4810 4830 mp = mpdata;
4811 4831 if (auditing)
4812 4832 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
4813 4833
4814 4834 /* Always returns 0... */
4815 4835 return (udp_wput(udp_wq, mp));
4816 4836 }
4817 4837
4818 4838 ASSERT(mpdata == NULL);
4819 4839 if (error != 0 && error != ENOMEM) {
4820 4840 freemsg(mp);
4821 4841 return (error);
4822 4842 }
4823 4843
4824 4844 /*
4825 4845 * For connected, let strwrite() handle the blocking case.
4826 4846 * Otherwise we fall thru and use kstrputmsg().
4827 4847 */
4828 4848 if (connected)
4829 4849 return (strwrite(SOTOV(so), uiop, CRED()));
4830 4850
4831 4851 if (auditing)
4832 4852 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
4833 4853
4834 4854 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
4835 4855 done:
4836 4856 #ifdef SOCK_DEBUG
4837 4857 if (error != 0) {
4838 4858 eprintsoline(so, error);
4839 4859 }
4840 4860 #endif /* SOCK_DEBUG */
4841 4861 return (error);
4842 4862 }
4843 4863
4844 4864 int
4845 4865 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr)
4846 4866 {
4847 4867 struct stdata *stp = SOTOV(so)->v_stream;
4848 4868 ssize_t iosize, rmax, maxblk;
4849 4869 queue_t *tcp_wq = stp->sd_wrq->q_next;
4850 4870 mblk_t *newmp;
4851 4871 int error = 0, wflag = 0;
4852 4872
4853 4873 ASSERT(so->so_mode & SM_BYTESTREAM);
4854 4874 ASSERT(SOTOV(so)->v_type == VSOCK);
4855 4875
4856 4876 if (stp->sd_sidp != NULL &&
4857 4877 (error = straccess(stp, JCWRITE)) != 0)
4858 4878 return (error);
4859 4879
4860 4880 if (uiop == NULL) {
4861 4881 /*
4862 4882 * kstrwritemp() should have checked sd_flag and
4863 4883 * flow-control before coming here. If we end up
4864 4884 * here it means that we can simply pass down the
4865 4885 * data to tcp.
4866 4886 */
4867 4887 ASSERT(mp != NULL);
4868 4888 if (stp->sd_wputdatafunc != NULL) {
4869 4889 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL,
4870 4890 NULL, NULL, NULL);
4871 4891 if (newmp == NULL) {
4872 4892 /* The caller will free mp */
4873 4893 return (ECOMM);
4874 4894 }
4875 4895 mp = newmp;
4876 4896 }
4877 4897 /* Always returns 0... */
4878 4898 return (tcp_wput(tcp_wq, mp));
4879 4899 }
4880 4900
4881 4901 /* Fallback to strwrite() to do proper error handling */
4882 4902 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))
4883 4903 return (strwrite(SOTOV(so), uiop, cr));
4884 4904
4885 4905 rmax = stp->sd_qn_maxpsz;
4886 4906 ASSERT(rmax >= 0 || rmax == INFPSZ);
4887 4907 if (rmax == 0 || uiop->uio_resid <= 0)
4888 4908 return (0);
4889 4909
4890 4910 if (rmax == INFPSZ)
4891 4911 rmax = uiop->uio_resid;
4892 4912
4893 4913 maxblk = stp->sd_maxblk;
4894 4914
4895 4915 for (;;) {
4896 4916 iosize = MIN(uiop->uio_resid, rmax);
4897 4917
4898 4918 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error);
4899 4919 if (mp == NULL) {
4900 4920 /*
4901 4921 * Fallback to strwrite() for ENOMEM; if this
4902 4922 * is our first time in this routine and the uio
4903 4923 * vector has not been modified, we will end up
4904 4924 * calling strwrite() without any flag set.
4905 4925 */
4906 4926 if (error == ENOMEM)
4907 4927 goto slow_send;
4908 4928 else
4909 4929 return (error);
4910 4930 }
4911 4931 ASSERT(uiop->uio_resid >= 0);
4912 4932 /*
4913 4933 * If mp is non-NULL and ENOMEM is set, it means that
4914 4934 * mcopyinuio() was able to break down some of the user
4915 4935 * data into one or more mblks. Send the partial data
4916 4936 * to tcp and let the rest be handled in strwrite().
4917 4937 */
4918 4938 ASSERT(error == 0 || error == ENOMEM);
4919 4939 if (stp->sd_wputdatafunc != NULL) {
4920 4940 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL,
4921 4941 NULL, NULL, NULL);
4922 4942 if (newmp == NULL) {
4923 4943 /* The caller will free mp */
4924 4944 return (ECOMM);
4925 4945 }
4926 4946 mp = newmp;
4927 4947 }
4928 4948 (void) tcp_wput(tcp_wq, mp); /* Always returns 0 anyway. */
4929 4949
4930 4950 wflag |= NOINTR;
4931 4951
4932 4952 if (uiop->uio_resid == 0) { /* No more data; we're done */
4933 4953 ASSERT(error == 0);
4934 4954 break;
4935 4955 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag &
4936 4956 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) {
4937 4957 slow_send:
4938 4958 /*
4939 4959 * We were able to send down partial data using
4940 4960 * the direct call interface, but are now relying
4941 4961 * on strwrite() to handle the non-fastpath cases.
4942 4962 * If the socket is blocking we will sleep in
4943 4963 * strwaitq() until write is permitted, otherwise,
4944 4964 * we will need to return the amount of bytes
4945 4965 * written so far back to the app. This is the
4946 4966 * reason why we pass NOINTR flag to strwrite()
4947 4967 * for non-blocking socket, because we don't want
4948 4968 * to return EAGAIN when portion of the user data
4949 4969 * has actually been sent down.
4950 4970 */
4951 4971 return (strwrite_common(SOTOV(so), uiop, cr, wflag));
4952 4972 }
4953 4973 }
4954 4974 return (0);
4955 4975 }
4956 4976
4957 4977 /*
4958 4978 * Update sti_faddr by asking the transport (unless AF_UNIX).
4959 4979 */
4960 4980 /* ARGSUSED */
|
↓ open down ↓ |
3944 lines elided |
↑ open up ↑ |
4961 4981 int
4962 4982 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen,
4963 4983 boolean_t accept, struct cred *cr)
4964 4984 {
4965 4985 struct strbuf strbuf;
4966 4986 int error = 0, res;
4967 4987 void *addr;
4968 4988 t_uscalar_t addrlen;
4969 4989 k_sigset_t smask;
4970 4990 sotpi_info_t *sti = SOTOTPI(so);
4991 + vnode_t *vn;
4971 4992
4972 4993 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n",
4973 4994 (void *)so, pr_state(so->so_state, so->so_mode)));
4974 4995
4975 4996 ASSERT(*namelen > 0);
4976 4997 mutex_enter(&so->so_lock);
4977 4998 so_lock_single(so); /* Set SOLOCKED */
4999 + vn = SOTOV(so);
5000 + if (SOTPI_VN_NOSTREAM(vn)) {
5001 + error = EBADF;
5002 + goto done;
5003 + }
4978 5004
4979 5005 if (accept) {
4980 5006 bcopy(sti->sti_faddr_sa, name,
4981 5007 MIN(*namelen, sti->sti_faddr_len));
4982 5008 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len;
4983 5009 goto done;
4984 5010 }
4985 5011
4986 5012 if (!(so->so_state & SS_ISCONNECTED)) {
4987 5013 error = ENOTCONN;
4988 5014 goto done;
4989 5015 }
4990 5016 /* Added this check for X/Open */
4991 5017 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
4992 5018 error = EINVAL;
4993 5019 if (xnet_check_print) {
4994 5020 printf("sockfs: X/Open getpeername check => EINVAL\n");
4995 5021 }
4996 5022 goto done;
4997 5023 }
4998 5024
4999 5025 if (sti->sti_faddr_valid) {
5000 5026 bcopy(sti->sti_faddr_sa, name,
5001 5027 MIN(*namelen, sti->sti_faddr_len));
5002 5028 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len;
5003 5029 goto done;
5004 5030 }
5005 5031
5006 5032 #ifdef DEBUG
5007 5033 dprintso(so, 1, ("sotpi_getpeername (local): %s\n",
5008 5034 pr_addr(so->so_family, sti->sti_faddr_sa,
5009 5035 (t_uscalar_t)sti->sti_faddr_len)));
5010 5036 #endif /* DEBUG */
5011 5037
5012 5038 if (so->so_family == AF_UNIX) {
5013 5039 /* Transport has different name space - return local info */
5014 5040 if (sti->sti_faddr_noxlate)
5015 5041 *namelen = 0;
5016 5042 error = 0;
5017 5043 goto done;
5018 5044 }
5019 5045
5020 5046 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0);
5021 5047
5022 5048 ASSERT(sti->sti_faddr_sa);
5023 5049 /* Allocate local buffer to use with ioctl */
5024 5050 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen;
5025 5051 mutex_exit(&so->so_lock);
5026 5052 addr = kmem_alloc(addrlen, KM_SLEEP);
5027 5053
5028 5054 /*
5029 5055 * Issue TI_GETPEERNAME with signals masked.
5030 5056 * Put the result in sti_faddr_sa so that getpeername works after
5031 5057 * a shutdown(output).
|
↓ open down ↓ |
44 lines elided |
↑ open up ↑ |
5032 5058 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
5033 5059 * back to the socket.
5034 5060 */
5035 5061 strbuf.buf = addr;
5036 5062 strbuf.maxlen = addrlen;
5037 5063 strbuf.len = 0;
5038 5064
5039 5065 sigintr(&smask, 0);
5040 5066 res = 0;
5041 5067 ASSERT(cr);
5042 - error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf,
5068 + error = strioctl(vn, TI_GETPEERNAME, (intptr_t)&strbuf,
5043 5069 0, K_TO_K, cr, &res);
5044 5070 sigunintr(&smask);
5045 5071
5046 5072 mutex_enter(&so->so_lock);
5047 5073 /*
5048 5074 * If there is an error record the error in so_error put don't fail
5049 5075 * the getpeername. Instead fallback on the recorded
5050 5076 * sti->sti_faddr_sa.
5051 5077 */
5052 5078 if (error) {
5053 5079 /*
5054 5080 * Various stream head errors can be returned to the ioctl.
5055 5081 * However, it is impossible to determine which ones of
5056 5082 * these are really socket level errors that were incorrectly
5057 5083 * consumed by the ioctl. Thus this code silently ignores the
5058 5084 * error - to code explicitly does not reinstate the error
5059 5085 * using soseterror().
5060 5086 * Experiments have shows that at least this set of
5061 5087 * errors are reported and should not be reinstated on the
5062 5088 * socket:
5063 5089 * EINVAL E.g. if an I_LINK was in effect when
5064 5090 * getpeername was called.
5065 5091 * EPIPE The ioctl error semantics prefer the write
5066 5092 * side error over the read side error.
5067 5093 * ENOTCONN The transport just got disconnected but
5068 5094 * sockfs had not yet seen the T_DISCON_IND
5069 5095 * when issuing the ioctl.
5070 5096 */
5071 5097 error = 0;
5072 5098 } else if (res == 0 && strbuf.len > 0 &&
5073 5099 (so->so_state & SS_ISCONNECTED)) {
5074 5100 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen);
5075 5101 sti->sti_faddr_len = (socklen_t)strbuf.len;
5076 5102 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len);
5077 5103 sti->sti_faddr_valid = 1;
5078 5104
5079 5105 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len));
5080 5106 *namelen = sti->sti_faddr_len;
5081 5107 }
5082 5108 kmem_free(addr, addrlen);
5083 5109 #ifdef DEBUG
5084 5110 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n",
5085 5111 pr_addr(so->so_family, sti->sti_faddr_sa,
5086 5112 (t_uscalar_t)sti->sti_faddr_len)));
5087 5113 #endif /* DEBUG */
5088 5114 done:
5089 5115 so_unlock_single(so, SOLOCKED);
5090 5116 mutex_exit(&so->so_lock);
5091 5117 return (error);
5092 5118 }
5093 5119
5094 5120 /*
5095 5121 * Update sti_laddr by asking the transport (unless AF_UNIX).
5096 5122 */
|
↓ open down ↓ |
44 lines elided |
↑ open up ↑ |
5097 5123 int
5098 5124 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen,
5099 5125 struct cred *cr)
5100 5126 {
5101 5127 struct strbuf strbuf;
5102 5128 int error = 0, res;
5103 5129 void *addr;
5104 5130 t_uscalar_t addrlen;
5105 5131 k_sigset_t smask;
5106 5132 sotpi_info_t *sti = SOTOTPI(so);
5133 + vnode_t *vn;
5107 5134
5108 5135 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n",
5109 5136 (void *)so, pr_state(so->so_state, so->so_mode)));
5110 5137
5111 5138 ASSERT(*namelen > 0);
5112 5139 mutex_enter(&so->so_lock);
5113 5140 so_lock_single(so); /* Set SOLOCKED */
5141 + vn = SOTOV(so);
5142 + if (SOTPI_VN_NOSTREAM(vn)) {
5143 + error = EBADF;
5144 + goto done;
5145 + }
5114 5146
5115 5147 #ifdef DEBUG
5116 5148
5117 5149 dprintso(so, 1, ("sotpi_getsockname (local): %s\n",
5118 5150 pr_addr(so->so_family, sti->sti_laddr_sa,
5119 5151 (t_uscalar_t)sti->sti_laddr_len)));
5120 5152 #endif /* DEBUG */
5121 5153 if (sti->sti_laddr_valid) {
5122 5154 bcopy(sti->sti_laddr_sa, name,
5123 5155 MIN(*namelen, sti->sti_laddr_len));
5124 5156 *namelen = sti->sti_laddr_len;
5125 5157 goto done;
5126 5158 }
5127 5159
5128 5160 if (so->so_family == AF_UNIX) {
5129 5161 /*
5130 5162 * Transport has different name space - return local info. If we
5131 5163 * have enough space, let consumers know the family.
5132 5164 */
5133 5165 if (*namelen >= sizeof (sa_family_t)) {
5134 5166 name->sa_family = AF_UNIX;
5135 5167 *namelen = sizeof (sa_family_t);
5136 5168 } else {
5137 5169 *namelen = 0;
5138 5170 }
5139 5171 error = 0;
5140 5172 goto done;
5141 5173 }
5142 5174 if (!(so->so_state & SS_ISBOUND)) {
5143 5175 /* If not bound, then nothing to return. */
5144 5176 error = 0;
5145 5177 goto done;
5146 5178 }
5147 5179
5148 5180 /* Allocate local buffer to use with ioctl */
5149 5181 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen;
5150 5182 mutex_exit(&so->so_lock);
5151 5183 addr = kmem_alloc(addrlen, KM_SLEEP);
5152 5184
5153 5185 /*
5154 5186 * Issue TI_GETMYNAME with signals masked.
5155 5187 * Put the result in sti_laddr_sa so that getsockname works after
5156 5188 * a shutdown(output).
|
↓ open down ↓ |
33 lines elided |
↑ open up ↑ |
5157 5189 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
5158 5190 * back to the socket.
5159 5191 */
5160 5192 strbuf.buf = addr;
5161 5193 strbuf.maxlen = addrlen;
5162 5194 strbuf.len = 0;
5163 5195
5164 5196 sigintr(&smask, 0);
5165 5197 res = 0;
5166 5198 ASSERT(cr);
5167 - error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf,
5199 + error = strioctl(vn, TI_GETMYNAME, (intptr_t)&strbuf,
5168 5200 0, K_TO_K, cr, &res);
5169 5201 sigunintr(&smask);
5170 5202
5171 5203 mutex_enter(&so->so_lock);
5172 5204 /*
5173 5205 * If there is an error record the error in so_error put don't fail
5174 5206 * the getsockname. Instead fallback on the recorded
5175 5207 * sti->sti_laddr_sa.
5176 5208 */
5177 5209 if (error) {
5178 5210 /*
5179 5211 * Various stream head errors can be returned to the ioctl.
5180 5212 * However, it is impossible to determine which ones of
5181 5213 * these are really socket level errors that were incorrectly
5182 5214 * consumed by the ioctl. Thus this code silently ignores the
5183 5215 * error - to code explicitly does not reinstate the error
5184 5216 * using soseterror().
5185 5217 * Experiments have shows that at least this set of
5186 5218 * errors are reported and should not be reinstated on the
5187 5219 * socket:
5188 5220 * EINVAL E.g. if an I_LINK was in effect when
5189 5221 * getsockname was called.
5190 5222 * EPIPE The ioctl error semantics prefer the write
5191 5223 * side error over the read side error.
5192 5224 */
5193 5225 error = 0;
5194 5226 } else if (res == 0 && strbuf.len > 0 &&
5195 5227 (so->so_state & SS_ISBOUND)) {
5196 5228 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen);
5197 5229 sti->sti_laddr_len = (socklen_t)strbuf.len;
5198 5230 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len);
5199 5231 sti->sti_laddr_valid = 1;
5200 5232
5201 5233 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen));
5202 5234 *namelen = sti->sti_laddr_len;
5203 5235 }
5204 5236 kmem_free(addr, addrlen);
5205 5237 #ifdef DEBUG
5206 5238 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n",
5207 5239 pr_addr(so->so_family, sti->sti_laddr_sa,
5208 5240 (t_uscalar_t)sti->sti_laddr_len)));
5209 5241 #endif /* DEBUG */
5210 5242 done:
5211 5243 so_unlock_single(so, SOLOCKED);
5212 5244 mutex_exit(&so->so_lock);
5213 5245 return (error);
5214 5246 }
5215 5247
5216 5248 /*
5217 5249 * Get socket options. For SOL_SOCKET options some options are handled
5218 5250 * by the sockfs while others use the value recorded in the sonode as a
5219 5251 * fallback should the T_SVR4_OPTMGMT_REQ fail.
5220 5252 *
5221 5253 * On the return most *optlenp bytes are copied to optval.
5222 5254 */
5223 5255 /* ARGSUSED */
5224 5256 int
5225 5257 sotpi_getsockopt(struct sonode *so, int level, int option_name,
5226 5258 void *optval, socklen_t *optlenp, int flags, struct cred *cr)
5227 5259 {
5228 5260 struct T_optmgmt_req optmgmt_req;
5229 5261 struct T_optmgmt_ack *optmgmt_ack;
5230 5262 struct opthdr oh;
|
↓ open down ↓ |
53 lines elided |
↑ open up ↑ |
5231 5263 struct opthdr *opt_res;
5232 5264 mblk_t *mp = NULL;
5233 5265 int error = 0;
5234 5266 void *option = NULL; /* Set if fallback value */
5235 5267 t_uscalar_t maxlen = *optlenp;
5236 5268 t_uscalar_t len;
5237 5269 uint32_t value;
5238 5270 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */
5239 5271 struct timeval32 tmo_val32;
5240 5272 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */
5273 + vnode_t *vn;
5241 5274
5242 5275 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n",
5243 5276 (void *)so, level, option_name, optval, (void *)optlenp,
5244 5277 pr_state(so->so_state, so->so_mode)));
5245 5278
5246 5279 mutex_enter(&so->so_lock);
5247 5280 so_lock_single(so); /* Set SOLOCKED */
5281 + vn = SOTOV(so);
5282 + if (SOTPI_VN_NOSTREAM(vn)) {
5283 + error = EBADF;
5284 + eprintsoline(so, error);
5285 + goto done2;
5286 + }
5248 5287
5249 5288 /*
5250 5289 * Check for SOL_SOCKET options.
5251 5290 * Certain SOL_SOCKET options are returned directly whereas
5252 5291 * others only provide a default (fallback) value should
5253 5292 * the T_SVR4_OPTMGMT_REQ fail.
5254 5293 */
5255 5294 if (level == SOL_SOCKET) {
5256 5295 /* Check parameters */
5257 5296 switch (option_name) {
5258 5297 case SO_TYPE:
5259 5298 case SO_ERROR:
5260 5299 case SO_DEBUG:
5261 5300 case SO_ACCEPTCONN:
5262 5301 case SO_REUSEADDR:
5263 5302 case SO_KEEPALIVE:
5264 5303 case SO_DONTROUTE:
5265 5304 case SO_BROADCAST:
5266 5305 case SO_USELOOPBACK:
5267 5306 case SO_OOBINLINE:
5268 5307 case SO_SNDBUF:
5269 5308 case SO_RCVBUF:
5270 5309 #ifdef notyet
5271 5310 case SO_SNDLOWAT:
5272 5311 case SO_RCVLOWAT:
5273 5312 #endif /* notyet */
5274 5313 case SO_DOMAIN:
5275 5314 case SO_DGRAM_ERRIND:
5276 5315 if (maxlen < (t_uscalar_t)sizeof (int32_t)) {
5277 5316 error = EINVAL;
5278 5317 eprintsoline(so, error);
5279 5318 goto done2;
5280 5319 }
5281 5320 break;
5282 5321 case SO_RCVTIMEO:
5283 5322 case SO_SNDTIMEO:
5284 5323 if (get_udatamodel() == DATAMODEL_NONE ||
5285 5324 get_udatamodel() == DATAMODEL_NATIVE) {
5286 5325 if (maxlen < sizeof (struct timeval)) {
5287 5326 error = EINVAL;
5288 5327 eprintsoline(so, error);
5289 5328 goto done2;
5290 5329 }
5291 5330 } else {
5292 5331 if (maxlen < sizeof (struct timeval32)) {
5293 5332 error = EINVAL;
5294 5333 eprintsoline(so, error);
5295 5334 goto done2;
5296 5335 }
5297 5336
5298 5337 }
5299 5338 break;
5300 5339 case SO_LINGER:
5301 5340 if (maxlen < (t_uscalar_t)sizeof (struct linger)) {
5302 5341 error = EINVAL;
5303 5342 eprintsoline(so, error);
5304 5343 goto done2;
5305 5344 }
5306 5345 break;
5307 5346 case SO_SND_BUFINFO:
5308 5347 if (maxlen < (t_uscalar_t)
5309 5348 sizeof (struct so_snd_bufinfo)) {
5310 5349 error = EINVAL;
5311 5350 eprintsoline(so, error);
5312 5351 goto done2;
5313 5352 }
5314 5353 break;
5315 5354 }
5316 5355
5317 5356 len = (t_uscalar_t)sizeof (uint32_t); /* Default */
5318 5357
5319 5358 switch (option_name) {
5320 5359 case SO_TYPE:
5321 5360 value = so->so_type;
5322 5361 option = &value;
5323 5362 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
5324 5363
5325 5364 case SO_ERROR:
5326 5365 value = sogeterr(so, B_TRUE);
5327 5366 option = &value;
5328 5367 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
5329 5368
5330 5369 case SO_ACCEPTCONN:
5331 5370 if (so->so_state & SS_ACCEPTCONN)
5332 5371 value = SO_ACCEPTCONN;
5333 5372 else
5334 5373 value = 0;
5335 5374 #ifdef DEBUG
5336 5375 if (value) {
5337 5376 dprintso(so, 1,
5338 5377 ("sotpi_getsockopt: 0x%x is set\n",
5339 5378 option_name));
5340 5379 } else {
5341 5380 dprintso(so, 1,
5342 5381 ("sotpi_getsockopt: 0x%x not set\n",
5343 5382 option_name));
5344 5383 }
5345 5384 #endif /* DEBUG */
5346 5385 option = &value;
5347 5386 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
5348 5387
5349 5388 case SO_DEBUG:
5350 5389 case SO_REUSEADDR:
5351 5390 case SO_KEEPALIVE:
5352 5391 case SO_DONTROUTE:
5353 5392 case SO_BROADCAST:
5354 5393 case SO_USELOOPBACK:
5355 5394 case SO_OOBINLINE:
5356 5395 case SO_DGRAM_ERRIND:
5357 5396 value = (so->so_options & option_name);
5358 5397 #ifdef DEBUG
5359 5398 if (value) {
5360 5399 dprintso(so, 1,
5361 5400 ("sotpi_getsockopt: 0x%x is set\n",
5362 5401 option_name));
5363 5402 } else {
5364 5403 dprintso(so, 1,
5365 5404 ("sotpi_getsockopt: 0x%x not set\n",
5366 5405 option_name));
5367 5406 }
5368 5407 #endif /* DEBUG */
5369 5408 option = &value;
5370 5409 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
5371 5410
5372 5411 /*
5373 5412 * The following options are only returned by sockfs when the
5374 5413 * T_SVR4_OPTMGMT_REQ fails.
5375 5414 */
5376 5415 case SO_LINGER:
5377 5416 option = &so->so_linger;
5378 5417 len = (t_uscalar_t)sizeof (struct linger);
5379 5418 break;
5380 5419 case SO_SNDBUF: {
5381 5420 ssize_t lvalue;
|
↓ open down ↓ |
124 lines elided |
↑ open up ↑ |
5382 5421
5383 5422 /*
5384 5423 * If the option has not been set then get a default
5385 5424 * value from the read queue. This value is
5386 5425 * returned if the transport fails
5387 5426 * the T_SVR4_OPTMGMT_REQ.
5388 5427 */
5389 5428 lvalue = so->so_sndbuf;
5390 5429 if (lvalue == 0) {
5391 5430 mutex_exit(&so->so_lock);
5392 - (void) strqget(strvp2wq(SOTOV(so))->q_next,
5431 + (void) strqget(strvp2wq(vn)->q_next,
5393 5432 QHIWAT, 0, &lvalue);
5394 5433 mutex_enter(&so->so_lock);
5395 5434 dprintso(so, 1,
5396 5435 ("got SO_SNDBUF %ld from q\n", lvalue));
5397 5436 }
5398 5437 value = (int)lvalue;
5399 5438 option = &value;
5400 5439 len = (t_uscalar_t)sizeof (so->so_sndbuf);
5401 5440 break;
5402 5441 }
5403 5442 case SO_RCVBUF: {
5404 5443 ssize_t lvalue;
5405 5444
5406 5445 /*
5407 5446 * If the option has not been set then get a default
5408 5447 * value from the read queue. This value is
5409 5448 * returned if the transport fails
5410 5449 * the T_SVR4_OPTMGMT_REQ.
5411 5450 *
|
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
5412 5451 * XXX If SO_RCVBUF has been set and this is an
5413 5452 * XPG 4.2 application then do not ask the transport
5414 5453 * since the transport might adjust the value and not
5415 5454 * return exactly what was set by the application.
5416 5455 * For non-XPG 4.2 application we return the value
5417 5456 * that the transport is actually using.
5418 5457 */
5419 5458 lvalue = so->so_rcvbuf;
5420 5459 if (lvalue == 0) {
5421 5460 mutex_exit(&so->so_lock);
5422 - (void) strqget(RD(strvp2wq(SOTOV(so))),
5461 + (void) strqget(RD(strvp2wq(vn)),
5423 5462 QHIWAT, 0, &lvalue);
5424 5463 mutex_enter(&so->so_lock);
5425 5464 dprintso(so, 1,
5426 5465 ("got SO_RCVBUF %ld from q\n", lvalue));
5427 5466 } else if (flags & _SOGETSOCKOPT_XPG4_2) {
5428 5467 value = (int)lvalue;
5429 5468 option = &value;
5430 5469 goto copyout; /* skip asking transport */
5431 5470 }
5432 5471 value = (int)lvalue;
5433 5472 option = &value;
5434 5473 len = (t_uscalar_t)sizeof (so->so_rcvbuf);
5435 5474 break;
5436 5475 }
5437 5476 case SO_DOMAIN:
5438 5477 value = so->so_family;
5439 5478 option = &value;
5440 5479 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
5441 5480
5442 5481 #ifdef notyet
5443 5482 /*
5444 5483 * We do not implement the semantics of these options
5445 5484 * thus we shouldn't implement the options either.
5446 5485 */
5447 5486 case SO_SNDLOWAT:
5448 5487 value = so->so_sndlowat;
5449 5488 option = &value;
5450 5489 break;
5451 5490 case SO_RCVLOWAT:
5452 5491 value = so->so_rcvlowat;
5453 5492 option = &value;
5454 5493 break;
5455 5494 #endif /* notyet */
5456 5495 case SO_SNDTIMEO:
5457 5496 case SO_RCVTIMEO: {
5458 5497 clock_t val;
5459 5498
5460 5499 if (option_name == SO_RCVTIMEO)
5461 5500 val = drv_hztousec(so->so_rcvtimeo);
5462 5501 else
5463 5502 val = drv_hztousec(so->so_sndtimeo);
5464 5503 tmo_val.tv_sec = val / (1000 * 1000);
5465 5504 tmo_val.tv_usec = val % (1000 * 1000);
5466 5505 if (get_udatamodel() == DATAMODEL_NONE ||
5467 5506 get_udatamodel() == DATAMODEL_NATIVE) {
5468 5507 option = &tmo_val;
5469 5508 len = sizeof (struct timeval);
5470 5509 } else {
5471 5510 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val);
5472 5511 option = &tmo_val32;
5473 5512 len = sizeof (struct timeval32);
5474 5513 }
5475 5514 break;
5476 5515 }
5477 5516 case SO_SND_BUFINFO: {
5478 5517 snd_bufinfo.sbi_wroff =
5479 5518 (so->so_proto_props).sopp_wroff;
5480 5519 snd_bufinfo.sbi_maxblk =
5481 5520 (so->so_proto_props).sopp_maxblk;
5482 5521 snd_bufinfo.sbi_maxpsz =
5483 5522 (so->so_proto_props).sopp_maxpsz;
5484 5523 snd_bufinfo.sbi_tail =
5485 5524 (so->so_proto_props).sopp_tail;
5486 5525 option = &snd_bufinfo;
5487 5526 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo);
5488 5527 break;
5489 5528 }
5490 5529 }
5491 5530 }
5492 5531
5493 5532 mutex_exit(&so->so_lock);
5494 5533
5495 5534 /* Send request */
5496 5535 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ;
5497 5536 optmgmt_req.MGMT_flags = T_CHECK;
|
↓ open down ↓ |
65 lines elided |
↑ open up ↑ |
5498 5537 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen);
5499 5538 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req);
5500 5539
5501 5540 oh.level = level;
5502 5541 oh.name = option_name;
5503 5542 oh.len = maxlen;
5504 5543
5505 5544 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req),
5506 5545 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr);
5507 5546 /* Let option management work in the presence of data flow control */
5508 - error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
5547 + error = kstrputmsg(vn, mp, NULL, 0, 0,
5509 5548 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
5510 5549 mp = NULL;
5511 5550 mutex_enter(&so->so_lock);
5512 5551 if (error) {
5513 5552 eprintsoline(so, error);
5514 5553 goto done2;
5515 5554 }
5516 5555 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK,
5517 5556 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0);
5518 5557 if (error) {
5519 5558 if (option != NULL) {
5520 5559 /* We have a fallback value */
5521 5560 error = 0;
5522 5561 goto copyout;
5523 5562 }
5524 5563 eprintsoline(so, error);
5525 5564 goto done2;
5526 5565 }
5527 5566 ASSERT(mp);
5528 5567 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr;
5529 5568 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset,
5530 5569 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE);
5531 5570 if (opt_res == NULL) {
5532 5571 if (option != NULL) {
5533 5572 /* We have a fallback value */
5534 5573 error = 0;
5535 5574 goto copyout;
5536 5575 }
5537 5576 error = EPROTO;
5538 5577 eprintsoline(so, error);
5539 5578 goto done;
5540 5579 }
5541 5580 option = &opt_res[1];
5542 5581
5543 5582 /* check to ensure that the option is within bounds */
5544 5583 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) ||
5545 5584 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) {
5546 5585 if (option != NULL) {
5547 5586 /* We have a fallback value */
5548 5587 error = 0;
5549 5588 goto copyout;
5550 5589 }
5551 5590 error = EPROTO;
5552 5591 eprintsoline(so, error);
5553 5592 goto done;
5554 5593 }
5555 5594
5556 5595 len = opt_res->len;
5557 5596
5558 5597 copyout: {
5559 5598 t_uscalar_t size = MIN(len, maxlen);
5560 5599 bcopy(option, optval, size);
5561 5600 bcopy(&size, optlenp, sizeof (size));
5562 5601 }
5563 5602 done:
5564 5603 freemsg(mp);
5565 5604 done2:
5566 5605 so_unlock_single(so, SOLOCKED);
5567 5606 mutex_exit(&so->so_lock);
5568 5607
5569 5608 return (error);
5570 5609 }
5571 5610
5572 5611 /*
5573 5612 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ.
5574 5613 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for
5575 5614 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails -
5576 5615 * setsockopt has to work even if the transport does not support the option.
5577 5616 */
5578 5617 /* ARGSUSED */
5579 5618 int
5580 5619 sotpi_setsockopt(struct sonode *so, int level, int option_name,
5581 5620 const void *optval, t_uscalar_t optlen, struct cred *cr)
5582 5621 {
5583 5622 struct T_optmgmt_req optmgmt_req;
5584 5623 struct opthdr oh;
5585 5624 mblk_t *mp;
5586 5625 int error = 0;
5587 5626 boolean_t handled = B_FALSE;
5588 5627
5589 5628 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n",
5590 5629 (void *)so, level, option_name, optval, optlen,
5591 5630 pr_state(so->so_state, so->so_mode)));
5592 5631
5593 5632 /* X/Open requires this check */
5594 5633 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
5595 5634 if (xnet_check_print)
5596 5635 printf("sockfs: X/Open setsockopt check => EINVAL\n");
5597 5636 return (EINVAL);
5598 5637 }
5599 5638
5600 5639 mutex_enter(&so->so_lock);
5601 5640 so_lock_single(so); /* Set SOLOCKED */
5602 5641 mutex_exit(&so->so_lock);
5603 5642
5604 5643 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ;
5605 5644 optmgmt_req.MGMT_flags = T_NEGOTIATE;
5606 5645 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen;
5607 5646 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req);
5608 5647
5609 5648 oh.level = level;
5610 5649 oh.name = option_name;
5611 5650 oh.len = optlen;
5612 5651
5613 5652 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req),
5614 5653 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr);
5615 5654 /* Let option management work in the presence of data flow control */
5616 5655 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
5617 5656 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
5618 5657 mp = NULL;
5619 5658 mutex_enter(&so->so_lock);
5620 5659 if (error) {
5621 5660 eprintsoline(so, error);
5622 5661 goto done2;
5623 5662 }
5624 5663 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK,
5625 5664 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0);
5626 5665 if (error) {
5627 5666 eprintsoline(so, error);
5628 5667 goto done;
5629 5668 }
5630 5669 ASSERT(mp);
5631 5670 /* No need to verify T_optmgmt_ack */
5632 5671 freemsg(mp);
5633 5672 done:
5634 5673 /*
5635 5674 * Check for SOL_SOCKET options and record their values.
5636 5675 * If we know about a SOL_SOCKET parameter and the transport
5637 5676 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or
5638 5677 * EPROTO) we let the setsockopt succeed.
5639 5678 */
5640 5679 if (level == SOL_SOCKET) {
5641 5680 /* Check parameters */
5642 5681 switch (option_name) {
5643 5682 case SO_DEBUG:
5644 5683 case SO_REUSEADDR:
5645 5684 case SO_KEEPALIVE:
5646 5685 case SO_DONTROUTE:
5647 5686 case SO_BROADCAST:
5648 5687 case SO_USELOOPBACK:
5649 5688 case SO_OOBINLINE:
5650 5689 case SO_SNDBUF:
5651 5690 case SO_RCVBUF:
5652 5691 #ifdef notyet
5653 5692 case SO_SNDLOWAT:
5654 5693 case SO_RCVLOWAT:
5655 5694 #endif /* notyet */
5656 5695 case SO_DGRAM_ERRIND:
5657 5696 if (optlen != (t_uscalar_t)sizeof (int32_t)) {
5658 5697 error = EINVAL;
5659 5698 eprintsoline(so, error);
5660 5699 goto done2;
5661 5700 }
5662 5701 ASSERT(optval);
5663 5702 handled = B_TRUE;
5664 5703 break;
5665 5704 case SO_SNDTIMEO:
5666 5705 case SO_RCVTIMEO:
5667 5706 if (get_udatamodel() == DATAMODEL_NONE ||
5668 5707 get_udatamodel() == DATAMODEL_NATIVE) {
5669 5708 if (optlen != sizeof (struct timeval)) {
5670 5709 error = EINVAL;
5671 5710 eprintsoline(so, error);
5672 5711 goto done2;
5673 5712 }
5674 5713 } else {
5675 5714 if (optlen != sizeof (struct timeval32)) {
5676 5715 error = EINVAL;
5677 5716 eprintsoline(so, error);
5678 5717 goto done2;
5679 5718 }
5680 5719 }
5681 5720 ASSERT(optval);
5682 5721 handled = B_TRUE;
5683 5722 break;
5684 5723 case SO_LINGER:
5685 5724 if (optlen != (t_uscalar_t)sizeof (struct linger)) {
5686 5725 error = EINVAL;
5687 5726 eprintsoline(so, error);
5688 5727 goto done2;
5689 5728 }
5690 5729 ASSERT(optval);
5691 5730 handled = B_TRUE;
5692 5731 break;
5693 5732 }
5694 5733
5695 5734 #define intvalue (*(int32_t *)optval)
5696 5735
5697 5736 switch (option_name) {
5698 5737 case SO_TYPE:
5699 5738 case SO_ERROR:
5700 5739 case SO_ACCEPTCONN:
5701 5740 /* Can't be set */
5702 5741 error = ENOPROTOOPT;
5703 5742 goto done2;
5704 5743 case SO_LINGER: {
5705 5744 struct linger *l = (struct linger *)optval;
5706 5745
5707 5746 so->so_linger.l_linger = l->l_linger;
5708 5747 if (l->l_onoff) {
5709 5748 so->so_linger.l_onoff = SO_LINGER;
5710 5749 so->so_options |= SO_LINGER;
5711 5750 } else {
5712 5751 so->so_linger.l_onoff = 0;
5713 5752 so->so_options &= ~SO_LINGER;
5714 5753 }
5715 5754 break;
5716 5755 }
5717 5756
5718 5757 case SO_DEBUG:
5719 5758 #ifdef SOCK_TEST
5720 5759 if (intvalue & 2)
5721 5760 sock_test_timelimit = 10 * hz;
5722 5761 else
5723 5762 sock_test_timelimit = 0;
5724 5763
5725 5764 if (intvalue & 4)
5726 5765 do_useracc = 0;
5727 5766 else
5728 5767 do_useracc = 1;
5729 5768 #endif /* SOCK_TEST */
5730 5769 /* FALLTHRU */
5731 5770 case SO_REUSEADDR:
5732 5771 case SO_KEEPALIVE:
5733 5772 case SO_DONTROUTE:
5734 5773 case SO_BROADCAST:
5735 5774 case SO_USELOOPBACK:
5736 5775 case SO_OOBINLINE:
5737 5776 case SO_DGRAM_ERRIND:
5738 5777 if (intvalue != 0) {
5739 5778 dprintso(so, 1,
5740 5779 ("socket_setsockopt: setting 0x%x\n",
5741 5780 option_name));
5742 5781 so->so_options |= option_name;
5743 5782 } else {
5744 5783 dprintso(so, 1,
5745 5784 ("socket_setsockopt: clearing 0x%x\n",
5746 5785 option_name));
5747 5786 so->so_options &= ~option_name;
5748 5787 }
5749 5788 break;
5750 5789 /*
5751 5790 * The following options are only returned by us when the
5752 5791 * transport layer fails.
5753 5792 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs
5754 5793 * since the transport might adjust the value and not
5755 5794 * return exactly what was set by the application.
5756 5795 */
5757 5796 case SO_SNDBUF:
5758 5797 so->so_sndbuf = intvalue;
5759 5798 break;
5760 5799 case SO_RCVBUF:
5761 5800 so->so_rcvbuf = intvalue;
5762 5801 break;
5763 5802 case SO_RCVPSH:
5764 5803 so->so_rcv_timer_interval = intvalue;
5765 5804 break;
5766 5805 #ifdef notyet
5767 5806 /*
5768 5807 * We do not implement the semantics of these options
5769 5808 * thus we shouldn't implement the options either.
5770 5809 */
5771 5810 case SO_SNDLOWAT:
5772 5811 so->so_sndlowat = intvalue;
5773 5812 break;
5774 5813 case SO_RCVLOWAT:
5775 5814 so->so_rcvlowat = intvalue;
5776 5815 break;
5777 5816 #endif /* notyet */
5778 5817 case SO_SNDTIMEO:
5779 5818 case SO_RCVTIMEO: {
5780 5819 struct timeval tl;
5781 5820 clock_t val;
5782 5821
5783 5822 if (get_udatamodel() == DATAMODEL_NONE ||
5784 5823 get_udatamodel() == DATAMODEL_NATIVE)
5785 5824 bcopy(&tl, (struct timeval *)optval,
5786 5825 sizeof (struct timeval));
5787 5826 else
5788 5827 TIMEVAL32_TO_TIMEVAL(&tl,
5789 5828 (struct timeval32 *)optval);
5790 5829 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
5791 5830 if (option_name == SO_RCVTIMEO)
5792 5831 so->so_rcvtimeo = drv_usectohz(val);
5793 5832 else
5794 5833 so->so_sndtimeo = drv_usectohz(val);
5795 5834 break;
5796 5835 }
5797 5836 }
5798 5837 #undef intvalue
5799 5838
5800 5839 if (error) {
5801 5840 if ((error == ENOPROTOOPT || error == EPROTO ||
5802 5841 error == EINVAL) && handled) {
5803 5842 dprintso(so, 1,
5804 5843 ("setsockopt: ignoring error %d for 0x%x\n",
5805 5844 error, option_name));
5806 5845 error = 0;
5807 5846 }
5808 5847 }
5809 5848 }
5810 5849 done2:
5811 5850 so_unlock_single(so, SOLOCKED);
5812 5851 mutex_exit(&so->so_lock);
5813 5852 return (error);
5814 5853 }
5815 5854
5816 5855 /*
5817 5856 * sotpi_close() is called when the last open reference goes away.
5818 5857 */
5819 5858 /* ARGSUSED */
5820 5859 int
5821 5860 sotpi_close(struct sonode *so, int flag, struct cred *cr)
5822 5861 {
5823 5862 struct vnode *vp = SOTOV(so);
5824 5863 dev_t dev;
5825 5864 int error = 0;
5826 5865 sotpi_info_t *sti = SOTOTPI(so);
5827 5866
5828 5867 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n",
5829 5868 (void *)vp, flag, pr_state(so->so_state, so->so_mode)));
5830 5869
5831 5870 dev = sti->sti_dev;
5832 5871
5833 5872 ASSERT(STREAMSTAB(getmajor(dev)));
5834 5873
5835 5874 mutex_enter(&so->so_lock);
5836 5875 so_lock_single(so); /* Set SOLOCKED */
5837 5876
5838 5877 ASSERT(so_verify_oobstate(so));
5839 5878
5840 5879 if (sti->sti_nl7c_flags & NL7C_ENABLED) {
5841 5880 sti->sti_nl7c_flags = 0;
5842 5881 nl7c_close(so);
5843 5882 }
5844 5883
5845 5884 if (vp->v_stream != NULL) {
5846 5885 vnode_t *ux_vp;
5847 5886
5848 5887 if (so->so_family == AF_UNIX) {
5849 5888 /* Could avoid this when CANTSENDMORE for !dgram */
5850 5889 so_unix_close(so);
5851 5890 }
5852 5891
5853 5892 mutex_exit(&so->so_lock);
5854 5893 /*
5855 5894 * Disassemble the linkage from the AF_UNIX underlying file
5856 5895 * system vnode to this socket (by atomically clearing
5857 5896 * v_stream in vn_rele_stream) before strclose clears sd_vnode
5858 5897 * and frees the stream head.
5859 5898 */
5860 5899 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) {
5861 5900 ASSERT(ux_vp->v_stream);
5862 5901 sti->sti_ux_bound_vp = NULL;
5863 5902 vn_rele_stream(ux_vp);
5864 5903 }
5865 5904 error = strclose(vp, flag, cr);
5866 5905 vp->v_stream = NULL;
5867 5906 mutex_enter(&so->so_lock);
5868 5907 }
5869 5908
5870 5909 /*
5871 5910 * Flush the T_DISCON_IND on sti_discon_ind_mp.
5872 5911 */
5873 5912 so_flush_discon_ind(so);
5874 5913
5875 5914 so_unlock_single(so, SOLOCKED);
5876 5915 mutex_exit(&so->so_lock);
5877 5916
5878 5917 /*
5879 5918 * Needed for STREAMs.
5880 5919 * Decrement the device driver's reference count for streams
5881 5920 * opened via the clone dip. The driver was held in clone_open().
5882 5921 * The absence of clone_close() forces this asymmetry.
5883 5922 */
5884 5923 if (so->so_flag & SOCLONE)
5885 5924 ddi_rele_driver(getmajor(dev));
5886 5925
5887 5926 return (error);
5888 5927 }
5889 5928
5890 5929 static int
5891 5930 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
5892 5931 struct cred *cr, int32_t *rvalp)
5893 5932 {
5894 5933 struct vnode *vp = SOTOV(so);
5895 5934 sotpi_info_t *sti = SOTOTPI(so);
5896 5935 int error = 0;
5897 5936
5898 5937 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n",
5899 5938 cmd, arg, pr_state(so->so_state, so->so_mode)));
5900 5939
5901 5940 switch (cmd) {
5902 5941 case SIOCSQPTR:
5903 5942 /*
5904 5943 * SIOCSQPTR is valid only when helper stream is created
5905 5944 * by the protocol.
5906 5945 */
5907 5946 case _I_INSERT:
5908 5947 case _I_REMOVE:
5909 5948 /*
5910 5949 * Since there's no compelling reason to support these ioctls
5911 5950 * on sockets, and doing so would increase the complexity
5912 5951 * markedly, prevent it.
5913 5952 */
5914 5953 return (EOPNOTSUPP);
5915 5954
5916 5955 case I_FIND:
5917 5956 case I_LIST:
5918 5957 case I_LOOK:
5919 5958 case I_POP:
5920 5959 case I_PUSH:
5921 5960 /*
5922 5961 * To prevent races and inconsistencies between the actual
5923 5962 * state of the stream and the state according to the sonode,
5924 5963 * we serialize all operations which modify or operate on the
5925 5964 * list of modules on the socket's stream.
5926 5965 */
5927 5966 mutex_enter(&sti->sti_plumb_lock);
5928 5967 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp);
5929 5968 mutex_exit(&sti->sti_plumb_lock);
5930 5969 return (error);
5931 5970
5932 5971 default:
5933 5972 if (so->so_version != SOV_STREAM)
5934 5973 break;
5935 5974
5936 5975 /*
5937 5976 * The imaginary "sockmod" has been popped; act as a stream.
5938 5977 */
5939 5978 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
5940 5979 }
5941 5980
5942 5981 ASSERT(so->so_version != SOV_STREAM);
5943 5982
5944 5983 /*
5945 5984 * Process socket-specific ioctls.
5946 5985 */
5947 5986 switch (cmd) {
5948 5987 case FIONBIO: {
5949 5988 int32_t value;
5950 5989
5951 5990 if (so_copyin((void *)arg, &value, sizeof (int32_t),
5952 5991 (mode & (int)FKIOCTL)))
5953 5992 return (EFAULT);
5954 5993
5955 5994 mutex_enter(&so->so_lock);
5956 5995 if (value) {
5957 5996 so->so_state |= SS_NDELAY;
5958 5997 } else {
5959 5998 so->so_state &= ~SS_NDELAY;
5960 5999 }
5961 6000 mutex_exit(&so->so_lock);
5962 6001 return (0);
5963 6002 }
5964 6003
5965 6004 case FIOASYNC: {
5966 6005 int32_t value;
5967 6006
5968 6007 if (so_copyin((void *)arg, &value, sizeof (int32_t),
5969 6008 (mode & (int)FKIOCTL)))
5970 6009 return (EFAULT);
5971 6010
5972 6011 mutex_enter(&so->so_lock);
5973 6012 /*
5974 6013 * SS_ASYNC flag not already set correctly?
5975 6014 * (!value != !(so->so_state & SS_ASYNC))
5976 6015 * but some engineers find that too hard to read.
5977 6016 */
5978 6017 if (value == 0 && (so->so_state & SS_ASYNC) != 0 ||
5979 6018 value != 0 && (so->so_state & SS_ASYNC) == 0)
5980 6019 error = so_flip_async(so, vp, mode, cr);
5981 6020 mutex_exit(&so->so_lock);
5982 6021 return (error);
5983 6022 }
5984 6023
5985 6024 case SIOCSPGRP:
5986 6025 case FIOSETOWN: {
5987 6026 pid_t pgrp;
5988 6027
5989 6028 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t),
5990 6029 (mode & (int)FKIOCTL)))
5991 6030 return (EFAULT);
5992 6031
5993 6032 mutex_enter(&so->so_lock);
5994 6033 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp));
5995 6034 /* Any change? */
5996 6035 if (pgrp != so->so_pgrp)
5997 6036 error = so_set_siggrp(so, vp, pgrp, mode, cr);
5998 6037 mutex_exit(&so->so_lock);
5999 6038 return (error);
6000 6039 }
6001 6040 case SIOCGPGRP:
6002 6041 case FIOGETOWN:
6003 6042 if (so_copyout(&so->so_pgrp, (void *)arg,
6004 6043 sizeof (pid_t), (mode & (int)FKIOCTL)))
6005 6044 return (EFAULT);
6006 6045 return (0);
6007 6046
6008 6047 case SIOCATMARK: {
6009 6048 int retval;
6010 6049 uint_t so_state;
6011 6050
6012 6051 /*
6013 6052 * strwaitmark has a finite timeout after which it
6014 6053 * returns -1 if the mark state is undetermined.
6015 6054 * In order to avoid any race between the mark state
6016 6055 * in sockfs and the mark state in the stream head this
6017 6056 * routine loops until the mark state can be determined
6018 6057 * (or the urgent data indication has been removed by some
6019 6058 * other thread).
6020 6059 */
6021 6060 do {
6022 6061 mutex_enter(&so->so_lock);
6023 6062 so_state = so->so_state;
6024 6063 mutex_exit(&so->so_lock);
6025 6064 if (so_state & SS_RCVATMARK) {
6026 6065 retval = 1;
6027 6066 } else if (!(so_state & SS_OOBPEND)) {
6028 6067 /*
6029 6068 * No SIGURG has been generated -- there is no
6030 6069 * pending or present urgent data. Thus can't
6031 6070 * possibly be at the mark.
6032 6071 */
6033 6072 retval = 0;
6034 6073 } else {
6035 6074 /*
6036 6075 * Have the stream head wait until there is
6037 6076 * either some messages on the read queue, or
6038 6077 * STRATMARK or STRNOTATMARK gets set. The
6039 6078 * STRNOTATMARK flag is used so that the
6040 6079 * transport can send up a MSGNOTMARKNEXT
6041 6080 * M_DATA to indicate that it is not
6042 6081 * at the mark and additional data is not about
6043 6082 * to be send upstream.
6044 6083 *
6045 6084 * If the mark state is undetermined this will
6046 6085 * return -1 and we will loop rechecking the
6047 6086 * socket state.
6048 6087 */
6049 6088 retval = strwaitmark(vp);
6050 6089 }
6051 6090 } while (retval == -1);
6052 6091
6053 6092 if (so_copyout(&retval, (void *)arg, sizeof (int),
6054 6093 (mode & (int)FKIOCTL)))
6055 6094 return (EFAULT);
6056 6095 return (0);
6057 6096 }
6058 6097
6059 6098 case I_FDINSERT:
6060 6099 case I_SENDFD:
6061 6100 case I_RECVFD:
6062 6101 case I_ATMARK:
6063 6102 case _SIOCSOCKFALLBACK:
6064 6103 /*
6065 6104 * These ioctls do not apply to sockets. I_FDINSERT can be
6066 6105 * used to send M_PROTO messages without modifying the socket
6067 6106 * state. I_SENDFD/RECVFD should not be used for socket file
6068 6107 * descriptor passing since they assume a twisted stream.
6069 6108 * SIOCATMARK must be used instead of I_ATMARK.
6070 6109 *
6071 6110 * _SIOCSOCKFALLBACK from an application should never be
6072 6111 * processed. It is only generated by socktpi_open() or
6073 6112 * in response to I_POP or I_PUSH.
6074 6113 */
6075 6114 #ifdef DEBUG
6076 6115 zcmn_err(getzoneid(), CE_WARN,
6077 6116 "Unsupported STREAMS ioctl 0x%x on socket. "
6078 6117 "Pid = %d\n", cmd, curproc->p_pid);
6079 6118 #endif /* DEBUG */
6080 6119 return (EOPNOTSUPP);
6081 6120
6082 6121 case _I_GETPEERCRED:
6083 6122 if ((mode & FKIOCTL) == 0)
6084 6123 return (EINVAL);
6085 6124
6086 6125 mutex_enter(&so->so_lock);
6087 6126 if ((so->so_mode & SM_CONNREQUIRED) == 0) {
6088 6127 error = ENOTSUP;
6089 6128 } else if ((so->so_state & SS_ISCONNECTED) == 0) {
6090 6129 error = ENOTCONN;
6091 6130 } else if (so->so_peercred != NULL) {
6092 6131 k_peercred_t *kp = (k_peercred_t *)arg;
6093 6132 kp->pc_cr = so->so_peercred;
6094 6133 kp->pc_cpid = so->so_cpid;
6095 6134 crhold(so->so_peercred);
6096 6135 } else {
6097 6136 error = EINVAL;
6098 6137 }
6099 6138 mutex_exit(&so->so_lock);
6100 6139 return (error);
6101 6140
6102 6141 default:
6103 6142 /*
6104 6143 * Do the higher-order bits of the ioctl cmd indicate
6105 6144 * that it is an I_* streams ioctl?
6106 6145 */
6107 6146 if ((cmd & 0xffffff00U) == STR &&
6108 6147 so->so_version == SOV_SOCKBSD) {
6109 6148 #ifdef DEBUG
6110 6149 zcmn_err(getzoneid(), CE_WARN,
6111 6150 "Unsupported STREAMS ioctl 0x%x on socket. "
6112 6151 "Pid = %d\n", cmd, curproc->p_pid);
6113 6152 #endif /* DEBUG */
6114 6153 return (EOPNOTSUPP);
6115 6154 }
6116 6155 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
6117 6156 }
6118 6157 }
6119 6158
6120 6159 /*
6121 6160 * Handle plumbing-related ioctls.
6122 6161 */
6123 6162 static int
6124 6163 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
6125 6164 struct cred *cr, int32_t *rvalp)
6126 6165 {
6127 6166 static const char sockmod_name[] = "sockmod";
6128 6167 struct sonode *so = VTOSO(vp);
6129 6168 char mname[FMNAMESZ + 1];
6130 6169 int error;
6131 6170 sotpi_info_t *sti = SOTOTPI(so);
6132 6171
6133 6172 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
6134 6173
6135 6174 if (so->so_version == SOV_SOCKBSD)
6136 6175 return (EOPNOTSUPP);
6137 6176
6138 6177 if (so->so_version == SOV_STREAM) {
6139 6178 /*
6140 6179 * The imaginary "sockmod" has been popped - act as a stream.
6141 6180 * If this is a push of sockmod then change back to a socket.
6142 6181 */
6143 6182 if (cmd == I_PUSH) {
6144 6183 error = ((mode & FKIOCTL) ? copystr : copyinstr)(
6145 6184 (void *)arg, mname, sizeof (mname), NULL);
6146 6185
6147 6186 if (error == 0 && strcmp(mname, sockmod_name) == 0) {
6148 6187 dprintso(so, 0, ("socktpi_ioctl: going to "
6149 6188 "socket version\n"));
6150 6189 so_stream2sock(so);
6151 6190 return (0);
6152 6191 }
6153 6192 }
6154 6193 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
6155 6194 }
6156 6195
6157 6196 switch (cmd) {
6158 6197 case I_PUSH:
6159 6198 if (sti->sti_direct) {
6160 6199 mutex_enter(&so->so_lock);
6161 6200 so_lock_single(so);
6162 6201 mutex_exit(&so->so_lock);
6163 6202
6164 6203 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K,
6165 6204 cr, rvalp);
6166 6205
6167 6206 mutex_enter(&so->so_lock);
6168 6207 if (error == 0)
6169 6208 sti->sti_direct = 0;
6170 6209 so_unlock_single(so, SOLOCKED);
6171 6210 mutex_exit(&so->so_lock);
6172 6211
6173 6212 if (error != 0)
6174 6213 return (error);
6175 6214 }
6176 6215
6177 6216 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
6178 6217 if (error == 0)
6179 6218 sti->sti_pushcnt++;
6180 6219 return (error);
6181 6220
6182 6221 case I_POP:
6183 6222 if (sti->sti_pushcnt == 0) {
6184 6223 /* Emulate sockmod being popped */
6185 6224 dprintso(so, 0,
6186 6225 ("socktpi_ioctl: going to STREAMS version\n"));
6187 6226 return (so_sock2stream(so));
6188 6227 }
6189 6228
6190 6229 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
6191 6230 if (error == 0)
6192 6231 sti->sti_pushcnt--;
6193 6232 return (error);
6194 6233
6195 6234 case I_LIST: {
6196 6235 struct str_mlist *kmlistp, *umlistp;
6197 6236 struct str_list kstrlist;
6198 6237 ssize_t kstrlistsize;
6199 6238 int i, nmods;
6200 6239
6201 6240 STRUCT_DECL(str_list, ustrlist);
6202 6241 STRUCT_INIT(ustrlist, mode);
6203 6242
6204 6243 if (arg == 0) {
6205 6244 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
6206 6245 if (error == 0)
6207 6246 (*rvalp)++; /* Add one for sockmod */
6208 6247 return (error);
6209 6248 }
6210 6249
6211 6250 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist),
6212 6251 STRUCT_SIZE(ustrlist), mode & FKIOCTL);
6213 6252 if (error != 0)
6214 6253 return (error);
6215 6254
6216 6255 nmods = STRUCT_FGET(ustrlist, sl_nmods);
6217 6256 if (nmods <= 0)
6218 6257 return (EINVAL);
6219 6258 /*
6220 6259 * Ceiling nmods at nstrpush to prevent someone from
6221 6260 * maliciously consuming lots of kernel memory.
6222 6261 */
6223 6262 nmods = MIN(nmods, nstrpush);
6224 6263
6225 6264 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist);
6226 6265 kstrlist.sl_nmods = nmods;
6227 6266 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP);
6228 6267
6229 6268 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K,
6230 6269 cr, rvalp);
6231 6270 if (error != 0)
6232 6271 goto done;
6233 6272
6234 6273 /*
6235 6274 * Considering the module list as a 0-based array of sl_nmods
6236 6275 * modules, sockmod should conceptually exist at slot
6237 6276 * sti_pushcnt. Insert sockmod at this location by sliding all
6238 6277 * of the module names after so_pushcnt over by one. We know
6239 6278 * that there will be room to do this since we allocated
6240 6279 * sl_modlist with an additional slot.
6241 6280 */
6242 6281 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--)
6243 6282 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1];
6244 6283
6245 6284 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name);
6246 6285 kstrlist.sl_nmods++;
6247 6286
6248 6287 /*
6249 6288 * Copy all of the entries out to ustrlist.
6250 6289 */
6251 6290 kmlistp = kstrlist.sl_modlist;
6252 6291 umlistp = STRUCT_FGETP(ustrlist, sl_modlist);
6253 6292 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) {
6254 6293 error = so_copyout(kmlistp++, umlistp++,
6255 6294 sizeof (struct str_mlist), mode & FKIOCTL);
6256 6295 if (error != 0)
6257 6296 goto done;
6258 6297 }
6259 6298
6260 6299 error = so_copyout(&i, (void *)arg, sizeof (int32_t),
6261 6300 mode & FKIOCTL);
6262 6301 if (error == 0)
6263 6302 *rvalp = 0;
6264 6303 done:
6265 6304 kmem_free(kstrlist.sl_modlist, kstrlistsize);
6266 6305 return (error);
6267 6306 }
6268 6307 case I_LOOK:
6269 6308 if (sti->sti_pushcnt == 0) {
6270 6309 return (so_copyout(sockmod_name, (void *)arg,
6271 6310 sizeof (sockmod_name), mode & FKIOCTL));
6272 6311 }
6273 6312 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
6274 6313
6275 6314 case I_FIND:
6276 6315 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
6277 6316 if (error && error != EINVAL)
6278 6317 return (error);
6279 6318
6280 6319 /* if not found and string was sockmod return 1 */
6281 6320 if (*rvalp == 0 || error == EINVAL) {
6282 6321 error = ((mode & FKIOCTL) ? copystr : copyinstr)(
6283 6322 (void *)arg, mname, sizeof (mname), NULL);
6284 6323 if (error == ENAMETOOLONG)
6285 6324 error = EINVAL;
6286 6325
6287 6326 if (error == 0 && strcmp(mname, sockmod_name) == 0)
6288 6327 *rvalp = 1;
6289 6328 }
6290 6329 return (error);
6291 6330
6292 6331 default:
6293 6332 panic("socktpi_plumbioctl: unknown ioctl %d", cmd);
6294 6333 break;
6295 6334 }
6296 6335
6297 6336 return (0);
6298 6337 }
6299 6338
6300 6339 /*
6301 6340 * Wrapper around the streams poll routine that implements socket poll
6302 6341 * semantics.
6303 6342 * The sockfs never calls pollwakeup itself - the stream head take care
6304 6343 * of all pollwakeups. Since sockfs never holds so_lock when calling the
6305 6344 * stream head there can never be a deadlock due to holding so_lock across
6306 6345 * pollwakeup and acquiring so_lock in this routine.
6307 6346 *
6308 6347 * However, since the performance of VOP_POLL is critical we avoid
6309 6348 * acquiring so_lock here. This is based on two assumptions:
6310 6349 * - The poll implementation holds locks to serialize the VOP_POLL call
6311 6350 * and a pollwakeup for the same pollhead. This ensures that should
6312 6351 * e.g. so_state change during a socktpi_poll call the pollwakeup
6313 6352 * (which strsock_* and strrput conspire to issue) is issued after
6314 6353 * the state change. Thus the pollwakeup will block until VOP_POLL has
6315 6354 * returned and then wake up poll and have it call VOP_POLL again.
6316 6355 * - The reading of so_state without holding so_lock does not result in
6317 6356 * stale data that is older than the latest state change that has dropped
6318 6357 * so_lock. This is ensured by the mutex_exit issuing the appropriate
6319 6358 * memory barrier to force the data into the coherency domain.
6320 6359 */
6321 6360 static int
6322 6361 sotpi_poll(
6323 6362 struct sonode *so,
6324 6363 short events,
6325 6364 int anyyet,
6326 6365 short *reventsp,
6327 6366 struct pollhead **phpp)
6328 6367 {
6329 6368 short origevents = events;
6330 6369 struct vnode *vp = SOTOV(so);
6331 6370 int error;
6332 6371 int so_state = so->so_state; /* snapshot */
6333 6372 sotpi_info_t *sti = SOTOTPI(so);
6334 6373
6335 6374 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n",
6336 6375 (void *)vp, pr_state(so_state, so->so_mode), so->so_error));
6337 6376
6338 6377 ASSERT(vp->v_type == VSOCK);
6339 6378 ASSERT(vp->v_stream != NULL);
6340 6379
6341 6380 if (so->so_version == SOV_STREAM) {
6342 6381 /* The imaginary "sockmod" has been popped - act as a stream */
6343 6382 return (strpoll(vp->v_stream, events, anyyet,
6344 6383 reventsp, phpp));
6345 6384 }
6346 6385
6347 6386 if (!(so_state & SS_ISCONNECTED) &&
6348 6387 (so->so_mode & SM_CONNREQUIRED)) {
6349 6388 /* Not connected yet - turn off write side events */
6350 6389 events &= ~(POLLOUT|POLLWRBAND);
6351 6390 }
6352 6391 /*
6353 6392 * Check for errors without calling strpoll if the caller wants them.
6354 6393 * In sockets the errors are represented as input/output events
6355 6394 * and there is no need to ask the stream head for this information.
6356 6395 */
6357 6396 if (so->so_error != 0 &&
6358 6397 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) {
6359 6398 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents;
6360 6399 return (0);
6361 6400 }
6362 6401 /*
6363 6402 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages.
6364 6403 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA
6365 6404 * will not trigger a POLLIN event with POLLRDDATA set.
6366 6405 * The handling of urgent data (causing POLLRDBAND) is done by
6367 6406 * inspecting SS_OOBPEND below.
6368 6407 */
6369 6408 events |= POLLRDDATA;
6370 6409
6371 6410 /*
6372 6411 * After shutdown(output) a stream head write error is set.
6373 6412 * However, we should not return output events.
6374 6413 */
6375 6414 events |= POLLNOERR;
6376 6415 error = strpoll(vp->v_stream, events, anyyet,
6377 6416 reventsp, phpp);
6378 6417 if (error)
6379 6418 return (error);
6380 6419
6381 6420 ASSERT(!(*reventsp & POLLERR));
6382 6421
6383 6422 /*
6384 6423 * Notes on T_CONN_IND handling for sockets.
6385 6424 *
6386 6425 * If strpoll() returned without events, SR_POLLIN is guaranteed
6387 6426 * to be set, ensuring any subsequent strrput() runs pollwakeup().
6388 6427 *
6389 6428 * Since the so_lock is not held, soqueueconnind() may have run
6390 6429 * and a T_CONN_IND may be waiting. We now check for any queued
6391 6430 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events
6392 6431 * to ensure poll returns.
6393 6432 *
6394 6433 * However:
6395 6434 * If the T_CONN_IND hasn't arrived by the time strpoll() returns,
6396 6435 * when strrput() does run for an arriving M_PROTO with T_CONN_IND
6397 6436 * the following actions will occur; taken together they ensure the
6398 6437 * syscall will return.
6399 6438 *
6400 6439 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if
6401 6440 * the accept() was run on a non-blocking socket sowaitconnind()
6402 6441 * may have already returned EWOULDBLOCK, so not be waiting to
6403 6442 * process the message. Additionally socktpi_poll() has probably
6404 6443 * proceeded past the sti_conn_ind_head check below.
6405 6444 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake
6406 6445 * this thread, however that could occur before poll_common()
6407 6446 * has entered cv_wait.
6408 6447 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock.
6409 6448 *
6410 6449 * Before proceeding to cv_wait() in poll_common() for an event,
6411 6450 * poll_common() atomically checks for T_POLLWAKE under the pc_lock,
6412 6451 * and if set, re-calls strpoll() to ensure the late arriving
6413 6452 * T_CONN_IND is recognized, and pollsys() returns.
6414 6453 */
6415 6454
6416 6455 if (sti->sti_conn_ind_head != NULL)
6417 6456 *reventsp |= (POLLIN|POLLRDNORM) & events;
6418 6457
6419 6458 if (so->so_state & SS_CANTRCVMORE) {
6420 6459 *reventsp |= POLLRDHUP & events;
6421 6460
6422 6461 if (so->so_state & SS_CANTSENDMORE)
6423 6462 *reventsp |= POLLHUP;
6424 6463 }
6425 6464
6426 6465 if (so->so_state & SS_OOBPEND)
6427 6466 *reventsp |= POLLRDBAND & events;
6428 6467
6429 6468 if (sti->sti_nl7c_rcv_mp != NULL) {
6430 6469 *reventsp |= (POLLIN|POLLRDNORM) & events;
6431 6470 }
6432 6471 if ((sti->sti_nl7c_flags & NL7C_ENABLED) &&
6433 6472 ((POLLIN|POLLRDNORM) & *reventsp)) {
6434 6473 sti->sti_nl7c_flags |= NL7C_POLLIN;
6435 6474 }
6436 6475
6437 6476 return (0);
6438 6477 }
6439 6478
6440 6479 /*ARGSUSED*/
6441 6480 static int
6442 6481 socktpi_constructor(void *buf, void *cdrarg, int kmflags)
6443 6482 {
6444 6483 sotpi_sonode_t *st = (sotpi_sonode_t *)buf;
6445 6484 int error = 0;
6446 6485
6447 6486 error = sonode_constructor(buf, cdrarg, kmflags);
6448 6487 if (error != 0)
6449 6488 return (error);
6450 6489
6451 6490 error = i_sotpi_info_constructor(&st->st_info);
6452 6491 if (error != 0)
6453 6492 sonode_destructor(buf, cdrarg);
6454 6493
6455 6494 st->st_sonode.so_priv = &st->st_info;
6456 6495
6457 6496 return (error);
6458 6497 }
6459 6498
6460 6499 /*ARGSUSED1*/
6461 6500 static void
6462 6501 socktpi_destructor(void *buf, void *cdrarg)
6463 6502 {
6464 6503 sotpi_sonode_t *st = (sotpi_sonode_t *)buf;
6465 6504
6466 6505 ASSERT(st->st_sonode.so_priv == &st->st_info);
6467 6506 st->st_sonode.so_priv = NULL;
6468 6507
6469 6508 i_sotpi_info_destructor(&st->st_info);
6470 6509 sonode_destructor(buf, cdrarg);
6471 6510 }
6472 6511
6473 6512 static int
6474 6513 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags)
6475 6514 {
6476 6515 int retval;
6477 6516
6478 6517 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) {
6479 6518 struct sonode *so = (struct sonode *)buf;
6480 6519 sotpi_info_t *sti = SOTOTPI(so);
6481 6520
6482 6521 mutex_enter(&socklist.sl_lock);
6483 6522
6484 6523 sti->sti_next_so = socklist.sl_list;
6485 6524 sti->sti_prev_so = NULL;
6486 6525 if (sti->sti_next_so != NULL)
6487 6526 SOTOTPI(sti->sti_next_so)->sti_prev_so = so;
6488 6527 socklist.sl_list = so;
6489 6528
6490 6529 mutex_exit(&socklist.sl_lock);
6491 6530
6492 6531 }
6493 6532 return (retval);
6494 6533 }
6495 6534
6496 6535 static void
6497 6536 socktpi_unix_destructor(void *buf, void *cdrarg)
6498 6537 {
6499 6538 struct sonode *so = (struct sonode *)buf;
6500 6539 sotpi_info_t *sti = SOTOTPI(so);
6501 6540
6502 6541 mutex_enter(&socklist.sl_lock);
6503 6542
6504 6543 if (sti->sti_next_so != NULL)
6505 6544 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so;
6506 6545 if (sti->sti_prev_so != NULL)
6507 6546 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so;
6508 6547 else
6509 6548 socklist.sl_list = sti->sti_next_so;
6510 6549
6511 6550 mutex_exit(&socklist.sl_lock);
6512 6551
6513 6552 socktpi_destructor(buf, cdrarg);
6514 6553 }
6515 6554
6516 6555 int
6517 6556 socktpi_init(void)
6518 6557 {
6519 6558 /*
6520 6559 * Create sonode caches. We create a special one for AF_UNIX so
6521 6560 * that we can track them for netstat(8).
6522 6561 */
6523 6562 socktpi_cache = kmem_cache_create("socktpi_cache",
6524 6563 sizeof (struct sotpi_sonode), 0, socktpi_constructor,
6525 6564 socktpi_destructor, NULL, NULL, NULL, 0);
6526 6565
6527 6566 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache",
6528 6567 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor,
6529 6568 socktpi_unix_destructor, NULL, NULL, NULL, 0);
6530 6569
6531 6570 return (0);
6532 6571 }
6533 6572
6534 6573 /*
6535 6574 * Given a non-TPI sonode, allocate and prep it to be ready for TPI.
6536 6575 *
6537 6576 * Caller must still update state and mode using sotpi_update_state().
6538 6577 */
6539 6578 int
6540 6579 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp,
6541 6580 boolean_t *direct, queue_t **qp, struct cred *cr)
6542 6581 {
6543 6582 sotpi_info_t *sti;
6544 6583 struct sockparams *origsp = so->so_sockparams;
6545 6584 sock_lower_handle_t handle = so->so_proto_handle;
6546 6585 struct stdata *stp;
6547 6586 struct vnode *vp;
6548 6587 queue_t *q;
6549 6588 int error = 0;
6550 6589
6551 6590 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) ==
6552 6591 SS_FALLBACK_PENDING);
6553 6592 ASSERT(SOCK_IS_NONSTR(so));
6554 6593
6555 6594 *qp = NULL;
6556 6595 *direct = B_FALSE;
6557 6596 so->so_sockparams = newsp;
6558 6597 /*
6559 6598 * Allocate and initalize fields required by TPI.
6560 6599 */
6561 6600 (void) sotpi_info_create(so, KM_SLEEP);
6562 6601 sotpi_info_init(so);
6563 6602
6564 6603 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) {
6565 6604 sotpi_info_fini(so);
6566 6605 sotpi_info_destroy(so);
6567 6606 return (error);
6568 6607 }
6569 6608 ASSERT(handle == so->so_proto_handle);
6570 6609 sti = SOTOTPI(so);
6571 6610 if (sti->sti_direct != 0)
6572 6611 *direct = B_TRUE;
6573 6612
6574 6613 /*
6575 6614 * Keep the original sp around so we can properly dispose of the
6576 6615 * sonode when the socket is being closed.
6577 6616 */
6578 6617 sti->sti_orig_sp = origsp;
6579 6618
6580 6619 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */
6581 6620 so_alloc_addr(so, so->so_max_addr_len);
6582 6621
6583 6622 /*
6584 6623 * If the application has done a SIOCSPGRP, make sure the
6585 6624 * STREAM head is aware. This needs to take place before
6586 6625 * the protocol start sending up messages. Otherwise we
6587 6626 * might miss to generate SIGPOLL.
6588 6627 *
6589 6628 * It is possible that the application will receive duplicate
6590 6629 * signals if some were already generated for either data or
6591 6630 * connection indications.
6592 6631 */
6593 6632 if (so->so_pgrp != 0) {
6594 6633 if (so_set_events(so, so->so_vnode, cr) != 0)
6595 6634 so->so_pgrp = 0;
6596 6635 }
6597 6636
6598 6637 /*
6599 6638 * Determine which queue to use.
6600 6639 */
6601 6640 vp = SOTOV(so);
6602 6641 stp = vp->v_stream;
6603 6642 ASSERT(stp != NULL);
6604 6643 q = stp->sd_wrq->q_next;
6605 6644
6606 6645 /*
6607 6646 * Skip any modules that may have been auto pushed when the device
6608 6647 * was opened
6609 6648 */
6610 6649 while (q->q_next != NULL)
6611 6650 q = q->q_next;
6612 6651 *qp = _RD(q);
6613 6652
6614 6653 /* This is now a STREAMS sockets */
6615 6654 so->so_not_str = B_FALSE;
6616 6655
6617 6656 return (error);
6618 6657 }
6619 6658
6620 6659 /*
6621 6660 * Revert a TPI sonode. It is only allowed to revert the sonode during
6622 6661 * the fallback process.
6623 6662 */
6624 6663 void
6625 6664 sotpi_revert_sonode(struct sonode *so, struct cred *cr)
6626 6665 {
6627 6666 vnode_t *vp = SOTOV(so);
6628 6667
6629 6668 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) ==
6630 6669 SS_FALLBACK_PENDING);
6631 6670 ASSERT(!SOCK_IS_NONSTR(so));
6632 6671 ASSERT(vp->v_stream != NULL);
6633 6672
6634 6673 strclean(vp);
6635 6674 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr);
6636 6675
6637 6676 /*
6638 6677 * Restore the original sockparams. The caller is responsible for
6639 6678 * dropping the ref to the new sp.
6640 6679 */
6641 6680 so->so_sockparams = SOTOTPI(so)->sti_orig_sp;
6642 6681
6643 6682 sotpi_info_fini(so);
6644 6683 sotpi_info_destroy(so);
6645 6684
6646 6685 /* This is no longer a STREAMS sockets */
6647 6686 so->so_not_str = B_TRUE;
6648 6687 }
6649 6688
6650 6689 void
6651 6690 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap,
6652 6691 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr,
6653 6692 socklen_t faddrlen, short opts)
6654 6693 {
6655 6694 sotpi_info_t *sti = SOTOTPI(so);
6656 6695
6657 6696 so_proc_tcapability_ack(so, tcap);
6658 6697
6659 6698 so->so_options |= opts;
6660 6699
6661 6700 /*
6662 6701 * Determine whether the foreign and local address are valid
6663 6702 */
6664 6703 if (laddrlen != 0) {
6665 6704 ASSERT(laddrlen <= sti->sti_laddr_maxlen);
6666 6705 sti->sti_laddr_len = laddrlen;
6667 6706 bcopy(laddr, sti->sti_laddr_sa, laddrlen);
6668 6707 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND);
6669 6708 }
6670 6709
6671 6710 if (faddrlen != 0) {
6672 6711 ASSERT(faddrlen <= sti->sti_faddr_maxlen);
6673 6712 sti->sti_faddr_len = faddrlen;
6674 6713 bcopy(faddr, sti->sti_faddr_sa, faddrlen);
6675 6714 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED);
6676 6715 }
6677 6716
6678 6717 }
6679 6718
6680 6719 /*
6681 6720 * Allocate enough space to cache the local and foreign addresses.
6682 6721 */
6683 6722 void
6684 6723 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen)
6685 6724 {
6686 6725 sotpi_info_t *sti = SOTOTPI(so);
6687 6726
6688 6727 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL);
6689 6728 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0);
6690 6729 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen =
6691 6730 P2ROUNDUP(maxlen, KMEM_ALIGN);
6692 6731 so->so_max_addr_len = sti->sti_laddr_maxlen;
6693 6732 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP);
6694 6733 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa
6695 6734 + sti->sti_laddr_maxlen);
6696 6735
6697 6736 if (so->so_family == AF_UNIX) {
6698 6737 /*
6699 6738 * Initialize AF_UNIX related fields.
6700 6739 */
6701 6740 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr));
6702 6741 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr));
6703 6742 }
6704 6743 }
6705 6744
6706 6745
6707 6746 sotpi_info_t *
6708 6747 sotpi_sototpi(struct sonode *so)
6709 6748 {
6710 6749 sotpi_info_t *sti;
6711 6750
6712 6751 ASSERT(so != NULL);
6713 6752
6714 6753 sti = (sotpi_info_t *)so->so_priv;
6715 6754
6716 6755 ASSERT(sti != NULL);
6717 6756 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC);
6718 6757
6719 6758 return (sti);
6720 6759 }
6721 6760
6722 6761 static int
6723 6762 i_sotpi_info_constructor(sotpi_info_t *sti)
6724 6763 {
6725 6764 sti->sti_magic = SOTPI_INFO_MAGIC;
6726 6765 sti->sti_ack_mp = NULL;
6727 6766 sti->sti_discon_ind_mp = NULL;
6728 6767 sti->sti_ux_bound_vp = NULL;
6729 6768 sti->sti_unbind_mp = NULL;
6730 6769
6731 6770 sti->sti_conn_ind_head = NULL;
6732 6771 sti->sti_conn_ind_tail = NULL;
6733 6772
6734 6773 sti->sti_laddr_sa = NULL;
6735 6774 sti->sti_faddr_sa = NULL;
6736 6775
6737 6776 sti->sti_nl7c_flags = 0;
6738 6777 sti->sti_nl7c_uri = NULL;
6739 6778 sti->sti_nl7c_rcv_mp = NULL;
6740 6779
6741 6780 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL);
6742 6781 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL);
6743 6782
6744 6783 return (0);
6745 6784 }
6746 6785
6747 6786 static void
6748 6787 i_sotpi_info_destructor(sotpi_info_t *sti)
6749 6788 {
6750 6789 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC);
6751 6790 ASSERT(sti->sti_ack_mp == NULL);
6752 6791 ASSERT(sti->sti_discon_ind_mp == NULL);
6753 6792 ASSERT(sti->sti_ux_bound_vp == NULL);
6754 6793 ASSERT(sti->sti_unbind_mp == NULL);
6755 6794
6756 6795 ASSERT(sti->sti_conn_ind_head == NULL);
6757 6796 ASSERT(sti->sti_conn_ind_tail == NULL);
6758 6797
6759 6798 ASSERT(sti->sti_laddr_sa == NULL);
6760 6799 ASSERT(sti->sti_faddr_sa == NULL);
6761 6800
6762 6801 ASSERT(sti->sti_nl7c_flags == 0);
6763 6802 ASSERT(sti->sti_nl7c_uri == NULL);
6764 6803 ASSERT(sti->sti_nl7c_rcv_mp == NULL);
6765 6804
6766 6805 mutex_destroy(&sti->sti_plumb_lock);
6767 6806 cv_destroy(&sti->sti_ack_cv);
6768 6807 }
6769 6808
6770 6809 /*
6771 6810 * Creates and attaches TPI information to the given sonode
6772 6811 */
6773 6812 static boolean_t
6774 6813 sotpi_info_create(struct sonode *so, int kmflags)
6775 6814 {
6776 6815 sotpi_info_t *sti;
6777 6816
6778 6817 ASSERT(so->so_priv == NULL);
6779 6818
6780 6819 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL)
6781 6820 return (B_FALSE);
6782 6821
6783 6822 if (i_sotpi_info_constructor(sti) != 0) {
6784 6823 kmem_free(sti, sizeof (*sti));
6785 6824 return (B_FALSE);
6786 6825 }
6787 6826
6788 6827 so->so_priv = (void *)sti;
6789 6828 return (B_TRUE);
6790 6829 }
6791 6830
6792 6831 /*
6793 6832 * Initializes the TPI information.
6794 6833 */
6795 6834 static void
6796 6835 sotpi_info_init(struct sonode *so)
6797 6836 {
6798 6837 struct vnode *vp = SOTOV(so);
6799 6838 sotpi_info_t *sti = SOTOTPI(so);
6800 6839 time_t now;
6801 6840
6802 6841 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev;
6803 6842 vp->v_rdev = sti->sti_dev;
6804 6843
6805 6844 sti->sti_orig_sp = NULL;
6806 6845
6807 6846 sti->sti_pushcnt = 0;
6808 6847
6809 6848 now = gethrestime_sec();
6810 6849 sti->sti_atime = now;
6811 6850 sti->sti_mtime = now;
6812 6851 sti->sti_ctime = now;
6813 6852
6814 6853 sti->sti_eaddr_mp = NULL;
6815 6854 sti->sti_delayed_error = 0;
6816 6855
6817 6856 sti->sti_provinfo = NULL;
6818 6857
6819 6858 sti->sti_oobcnt = 0;
6820 6859 sti->sti_oobsigcnt = 0;
6821 6860
6822 6861 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL);
6823 6862
6824 6863 sti->sti_laddr_sa = 0;
6825 6864 sti->sti_faddr_sa = 0;
6826 6865 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0;
6827 6866 sti->sti_laddr_len = sti->sti_faddr_len = 0;
6828 6867
6829 6868 sti->sti_laddr_valid = 0;
6830 6869 sti->sti_faddr_valid = 0;
6831 6870 sti->sti_faddr_noxlate = 0;
6832 6871
6833 6872 sti->sti_direct = 0;
6834 6873
6835 6874 ASSERT(sti->sti_ack_mp == NULL);
6836 6875 ASSERT(sti->sti_ux_bound_vp == NULL);
6837 6876 ASSERT(sti->sti_unbind_mp == NULL);
6838 6877
6839 6878 ASSERT(sti->sti_conn_ind_head == NULL);
6840 6879 ASSERT(sti->sti_conn_ind_tail == NULL);
6841 6880 }
6842 6881
6843 6882 /*
6844 6883 * Given a sonode, grab the TPI info and free any data.
6845 6884 */
6846 6885 static void
6847 6886 sotpi_info_fini(struct sonode *so)
6848 6887 {
6849 6888 sotpi_info_t *sti = SOTOTPI(so);
6850 6889 mblk_t *mp;
6851 6890
6852 6891 ASSERT(sti->sti_discon_ind_mp == NULL);
6853 6892
6854 6893 if ((mp = sti->sti_conn_ind_head) != NULL) {
6855 6894 mblk_t *mp1;
6856 6895
6857 6896 while (mp) {
6858 6897 mp1 = mp->b_next;
6859 6898 mp->b_next = NULL;
6860 6899 freemsg(mp);
6861 6900 mp = mp1;
6862 6901 }
6863 6902 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL;
6864 6903 }
6865 6904
6866 6905 /*
6867 6906 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely
6868 6907 * indirect them. It also uses so_count as a validity test.
6869 6908 */
6870 6909 mutex_enter(&so->so_lock);
6871 6910
6872 6911 if (sti->sti_laddr_sa) {
6873 6912 ASSERT((caddr_t)sti->sti_faddr_sa ==
6874 6913 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen);
6875 6914 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen);
6876 6915 sti->sti_laddr_valid = 0;
6877 6916 sti->sti_faddr_valid = 0;
6878 6917 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2);
6879 6918 sti->sti_laddr_sa = NULL;
6880 6919 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0;
6881 6920 sti->sti_faddr_sa = NULL;
6882 6921 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0;
6883 6922 }
6884 6923
6885 6924 mutex_exit(&so->so_lock);
6886 6925
6887 6926 if ((mp = sti->sti_eaddr_mp) != NULL) {
6888 6927 freemsg(mp);
6889 6928 sti->sti_eaddr_mp = NULL;
6890 6929 sti->sti_delayed_error = 0;
6891 6930 }
6892 6931
6893 6932 if ((mp = sti->sti_ack_mp) != NULL) {
6894 6933 freemsg(mp);
6895 6934 sti->sti_ack_mp = NULL;
6896 6935 }
6897 6936
6898 6937 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) {
6899 6938 sti->sti_nl7c_rcv_mp = NULL;
6900 6939 freemsg(mp);
6901 6940 }
6902 6941 sti->sti_nl7c_rcv_rval = 0;
6903 6942 if (sti->sti_nl7c_uri != NULL) {
6904 6943 nl7c_urifree(so);
6905 6944 /* urifree() cleared nl7c_uri */
6906 6945 }
6907 6946 if (sti->sti_nl7c_flags) {
6908 6947 sti->sti_nl7c_flags = 0;
6909 6948 }
6910 6949
6911 6950 ASSERT(sti->sti_ux_bound_vp == NULL);
6912 6951 if ((mp = sti->sti_unbind_mp) != NULL) {
6913 6952 freemsg(mp);
6914 6953 sti->sti_unbind_mp = NULL;
6915 6954 }
6916 6955 }
6917 6956
6918 6957 /*
6919 6958 * Destroys the TPI information attached to a sonode.
6920 6959 */
6921 6960 static void
6922 6961 sotpi_info_destroy(struct sonode *so)
6923 6962 {
6924 6963 sotpi_info_t *sti = SOTOTPI(so);
6925 6964
6926 6965 i_sotpi_info_destructor(sti);
6927 6966 kmem_free(sti, sizeof (*sti));
6928 6967
6929 6968 so->so_priv = NULL;
6930 6969 }
6931 6970
6932 6971 /*
6933 6972 * Create the global sotpi socket module entry. It will never be freed.
6934 6973 */
6935 6974 smod_info_t *
6936 6975 sotpi_smod_create(void)
6937 6976 {
6938 6977 smod_info_t *smodp;
6939 6978
6940 6979 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP);
6941 6980 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP);
6942 6981 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME);
6943 6982 /*
6944 6983 * Initialize the smod_refcnt to 1 so it will never be freed.
6945 6984 */
6946 6985 smodp->smod_refcnt = 1;
6947 6986 smodp->smod_uc_version = SOCK_UC_VERSION;
6948 6987 smodp->smod_dc_version = SOCK_DC_VERSION;
6949 6988 smodp->smod_sock_create_func = &sotpi_create;
6950 6989 smodp->smod_sock_destroy_func = &sotpi_destroy;
6951 6990 return (smodp);
6952 6991 }
|
↓ open down ↓ |
1434 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX