1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2015 Joyent, Inc.
25 */
26
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 /*
31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 * The Regents of the University of California
33 * All Rights Reserved
34 *
35 * University Acknowledgment- Portions of this document are derived from
36 * software developed by the University of California, Berkeley, and its
37 * contributors.
38 */
39 /*
40 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
41 */
42
43 #ifndef _SYS_SOCKETVAR_H
44 #define _SYS_SOCKETVAR_H
45
46 #include <sys/types.h>
47 #include <sys/stream.h>
48 #include <sys/t_lock.h>
49 #include <sys/cred.h>
50 #include <sys/vnode.h>
51 #include <sys/file.h>
52 #include <sys/param.h>
53 #include <sys/zone.h>
54 #include <sys/sdt.h>
55 #include <sys/modctl.h>
56 #include <sys/atomic.h>
57 #include <sys/socket.h>
58 #include <sys/ksocket.h>
59 #include <sys/kstat.h>
60
61 #ifdef _KERNEL
62 #include <sys/vfs_opreg.h>
63 #endif
64
65 #ifdef __cplusplus
66 extern "C" {
67 #endif
68
69 /*
70 * Internal representation of the address used to represent addresses
71 * in the loopback transport for AF_UNIX. While the sockaddr_un is used
72 * as the sockfs layer address for AF_UNIX the pathnames contained in
73 * these addresses are not unique (due to relative pathnames) thus can not
74 * be used in the transport.
75 *
76 * The transport level address consists of a magic number (used to separate the
77 * name space for specific and implicit binds). For a specific bind
78 * this is followed by a "vnode *" which ensures that all specific binds
79 * have a unique transport level address. For implicit binds the latter
80 * part of the address is a byte string (of the same length as a pointer)
81 * that is assigned by the loopback transport.
82 *
83 * The uniqueness assumes that the loopback transport has a separate namespace
84 * for sockets in order to avoid name conflicts with e.g. TLI use of the
85 * same transport.
86 */
87 struct so_ux_addr {
88 void *soua_vp; /* vnode pointer or assigned by tl */
89 uint_t soua_magic; /* See below */
90 };
91
92 #define SOU_MAGIC_EXPLICIT 0x75787670 /* "uxvp" */
93 #define SOU_MAGIC_IMPLICIT 0x616e6f6e /* "anon" */
94
95 struct sockaddr_ux {
96 sa_family_t sou_family; /* AF_UNIX */
97 struct so_ux_addr sou_addr;
98 };
99
100 #if defined(_KERNEL) || defined(_KMEMUSER)
101
102 #include <sys/socket_proto.h>
103
104 typedef struct sonodeops sonodeops_t;
105 typedef struct sonode sonode_t;
106 typedef boolean_t (*so_krecv_f)(sonode_t *, mblk_t *, size_t, int, void *);
107
108 struct sodirect_s;
109
110 /*
111 * The sonode represents a socket. A sonode never exist in the file system
112 * name space and can not be opened using open() - only the socket, socketpair
113 * and accept calls create sonodes.
114 *
115 * The locking of sockfs uses the so_lock mutex plus the SOLOCKED and
116 * SOREADLOCKED flags in so_flag. The mutex protects all the state in the
117 * sonode. It is expected that the underlying transport protocol serializes
118 * socket operations, so sockfs will not normally not single-thread
119 * operations. However, certain sockets, including TPI based ones, can only
120 * handle one control operation at a time. The SOLOCKED flag is used to
121 * single-thread operations from sockfs users to prevent e.g. multiple bind()
122 * calls to operate on the same sonode concurrently. The SOREADLOCKED flag is
123 * used to ensure that only one thread sleeps in kstrgetmsg for a given
124 * sonode. This is needed to ensure atomic operation for things like
125 * MSG_WAITALL.
126 *
127 * The so_fallback_rwlock is used to ensure that for sockets that can
128 * fall back to TPI, the fallback is not initiated until all pending
129 * operations have completed.
130 *
131 * Note that so_lock is sometimes held across calls that might go to sleep
132 * (kmem_alloc and soallocproto*). This implies that no other lock in
133 * the system should be held when calling into sockfs; from the system call
134 * side or from strrput (in case of TPI based sockets). If locks are held
135 * while calling into sockfs the system might hang when running low on memory.
136 */
137 struct sonode {
138 struct vnode *so_vnode; /* vnode associated with this sonode */
139
140 sonodeops_t *so_ops; /* operations vector for this sonode */
141 void *so_priv; /* sonode private data */
142
143 krwlock_t so_fallback_rwlock;
144 kmutex_t so_lock; /* protects sonode fields */
145
146 kcondvar_t so_state_cv; /* synchronize state changes */
147 kcondvar_t so_single_cv; /* wait due to SOLOCKED */
148 kcondvar_t so_read_cv; /* wait due to SOREADLOCKED */
149
150 /* These fields are protected by so_lock */
151
152 uint_t so_state; /* internal state flags SS_*, below */
153 uint_t so_mode; /* characteristics on socket. SM_* */
154 ushort_t so_flag; /* flags, see below */
155 int so_count; /* count of opened references */
156
157 sock_connid_t so_proto_connid; /* protocol generation number */
158
159 ushort_t so_error; /* error affecting connection */
160
161 struct sockparams *so_sockparams; /* vnode or socket module */
162 /* Needed to recreate the same socket for accept */
163 short so_family;
164 short so_type;
165 short so_protocol;
166 short so_version; /* From so_socket call */
167
168 /* Accept queue */
169 kmutex_t so_acceptq_lock; /* protects accept queue */
170 list_t so_acceptq_list; /* pending conns */
171 list_t so_acceptq_defer; /* deferred conns */
172 list_node_t so_acceptq_node; /* acceptq list node */
173 unsigned int so_acceptq_len; /* # of conns (both lists) */
174 unsigned int so_backlog; /* Listen backlog */
175 kcondvar_t so_acceptq_cv; /* wait for new conn. */
176 struct sonode *so_listener; /* parent socket */
177
178 /* Options */
179 short so_options; /* From socket call, see socket.h */
180 struct linger so_linger; /* SO_LINGER value */
181 #define so_sndbuf so_proto_props.sopp_txhiwat /* SO_SNDBUF value */
182 #define so_sndlowat so_proto_props.sopp_txlowat /* tx low water mark */
183 #define so_rcvbuf so_proto_props.sopp_rxhiwat /* SO_RCVBUF value */
184 #define so_rcvlowat so_proto_props.sopp_rxlowat /* rx low water mark */
185 #define so_max_addr_len so_proto_props.sopp_maxaddrlen
186 #define so_minpsz so_proto_props.sopp_minpsz
187 #define so_maxpsz so_proto_props.sopp_maxpsz
188
189 int so_xpg_rcvbuf; /* SO_RCVBUF value for XPG4 socket */
190 clock_t so_sndtimeo; /* send timeout */
191 clock_t so_rcvtimeo; /* recv timeout */
192
193 mblk_t *so_oobmsg; /* outofline oob data */
194 ssize_t so_oobmark; /* offset of the oob data */
195
196 pid_t so_pgrp; /* pgrp for signals */
197
198 cred_t *so_peercred; /* connected socket peer cred */
199 pid_t so_cpid; /* connected socket peer cached pid */
200 zoneid_t so_zoneid; /* opener's zoneid */
201
202 struct pollhead so_poll_list; /* common pollhead */
203 short so_pollev; /* events that should be generated */
204
205 /* Receive */
206 unsigned int so_rcv_queued; /* # bytes on both rcv lists */
207 mblk_t *so_rcv_q_head; /* processing/copyout rcv queue */
208 mblk_t *so_rcv_q_last_head;
209 mblk_t *so_rcv_head; /* protocol prequeue */
210 mblk_t *so_rcv_last_head; /* last mblk in b_next chain */
211 kcondvar_t so_rcv_cv; /* wait for data */
212 uint_t so_rcv_wanted; /* # of bytes wanted by app */
213 timeout_id_t so_rcv_timer_tid;
214
215 #define so_rcv_thresh so_proto_props.sopp_rcvthresh
216 #define so_rcv_timer_interval so_proto_props.sopp_rcvtimer
217
218 kcondvar_t so_snd_cv; /* wait for snd buffers */
219 uint32_t
220 so_snd_qfull: 1, /* Transmit full */
221 so_rcv_wakeup: 1,
222 so_snd_wakeup: 1,
223 so_not_str: 1, /* B_TRUE if not streams based socket */
224 so_pad_to_bit_31: 28;
225
226 /* Communication channel with protocol */
227 sock_lower_handle_t so_proto_handle;
228 sock_downcalls_t *so_downcalls;
229
230 struct sock_proto_props so_proto_props; /* protocol settings */
231 boolean_t so_flowctrld; /* Flow controlled */
232 uint_t so_copyflag; /* Copy related flag */
233 kcondvar_t so_copy_cv; /* Copy cond variable */
234
235 /* kernel sockets */
236 ksocket_callbacks_t so_ksock_callbacks;
237 void *so_ksock_cb_arg; /* callback argument */
238 kcondvar_t so_closing_cv;
239
240 /* != NULL for sodirect enabled socket */
241 struct sodirect_s *so_direct;
242
243 /* socket filters */
244 uint_t so_filter_active; /* # of active fil */
245 uint_t so_filter_tx; /* pending tx ops */
246 struct sof_instance *so_filter_top; /* top of stack */
247 struct sof_instance *so_filter_bottom; /* bottom of stack */
248 clock_t so_filter_defertime; /* time when deferred */
249
250 /* Kernel direct receive callbacks */
251 so_krecv_f so_krecv_cb; /* recv callback */
252 void *so_krecv_arg; /* recv cb arg */
253 };
254
255 #define SO_HAVE_DATA(so) \
256 /* \
257 * For the (tid == 0) case we must check so_rcv_{q_,}head \
258 * rather than (so_rcv_queued > 0), since the latter does not \
259 * take into account mblks with only control/name information. \
260 */ \
261 ((so)->so_rcv_timer_tid == 0 && ((so)->so_rcv_head != NULL || \
262 (so)->so_rcv_q_head != NULL)) || \
263 ((so)->so_state & SS_CANTRCVMORE)
264
265 /*
266 * Events handled by the protocol (in case sd_poll is set)
267 */
268 #define SO_PROTO_POLLEV (POLLIN|POLLRDNORM|POLLRDBAND)
269
270
271 #endif /* _KERNEL || _KMEMUSER */
272
273 /* flags */
274 #define SOMOD 0x0001 /* update socket modification time */
275 #define SOACC 0x0002 /* update socket access time */
276
277 #define SOLOCKED 0x0010 /* use to serialize open/closes */
278 #define SOREADLOCKED 0x0020 /* serialize kstrgetmsg calls */
279 #define SOCLONE 0x0040 /* child of clone driver */
280 #define SOASYNC_UNBIND 0x0080 /* wait for ACK of async unbind */
281
282 #define SOCK_IS_NONSTR(so) ((so)->so_not_str)
283
284 /*
285 * Socket state bits.
286 */
287 #define SS_ISCONNECTED 0x00000001 /* socket connected to a peer */
288 #define SS_ISCONNECTING 0x00000002 /* in process, connecting to peer */
289 #define SS_ISDISCONNECTING 0x00000004 /* in process of disconnecting */
290 #define SS_CANTSENDMORE 0x00000008 /* can't send more data to peer */
291
292 #define SS_CANTRCVMORE 0x00000010 /* can't receive more data */
293 #define SS_ISBOUND 0x00000020 /* socket is bound */
294 #define SS_NDELAY 0x00000040 /* FNDELAY non-blocking */
295 #define SS_NONBLOCK 0x00000080 /* O_NONBLOCK non-blocking */
296
297 #define SS_ASYNC 0x00000100 /* async i/o notify */
298 #define SS_ACCEPTCONN 0x00000200 /* listen done */
299 /* unused 0x00000400 */ /* was SS_HASCONNIND */
300 #define SS_SAVEDEOR 0x00000800 /* Saved MSG_EOR rcv side state */
301
302 #define SS_RCVATMARK 0x00001000 /* at mark on input */
303 #define SS_OOBPEND 0x00002000 /* OOB pending or present - poll */
304 #define SS_HAVEOOBDATA 0x00004000 /* OOB data present */
305 #define SS_HADOOBDATA 0x00008000 /* OOB data consumed */
306
307 #define SS_CLOSING 0x00010000 /* in process of closing */
308 #define SS_FIL_DEFER 0x00020000 /* filter deferred notification */
309 #define SS_FILOP_OK 0x00040000 /* socket can attach filters */
310 #define SS_FIL_RCV_FLOWCTRL 0x00080000 /* filter asserted rcv flow ctrl */
311
312 #define SS_FIL_SND_FLOWCTRL 0x00100000 /* filter asserted snd flow ctrl */
313 #define SS_FIL_STOP 0x00200000 /* no more filter actions */
314 #define SS_SODIRECT 0x00400000 /* transport supports sodirect */
315 #define SS_FILOP_UNSF 0x00800000 /* block attaching unsafe filters */
316
317 #define SS_SENTLASTREADSIG 0x01000000 /* last rx signal has been sent */
318 #define SS_SENTLASTWRITESIG 0x02000000 /* last tx signal has been sent */
319
320 #define SS_FALLBACK_DRAIN 0x20000000 /* data was/is being drained */
321 #define SS_FALLBACK_PENDING 0x40000000 /* fallback is pending */
322 #define SS_FALLBACK_COMP 0x80000000 /* fallback has completed */
323
324
325 /* Set of states when the socket can't be rebound */
326 #define SS_CANTREBIND (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING|\
327 SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ACCEPTCONN)
328
329 /*
330 * Sockets that can fall back to TPI must ensure that fall back is not
331 * initiated while a thread is using a socket. Otherwise this disables all
332 * future filter attachment.
333 */
334 #define SO_BLOCK_FALLBACK(so, fn) \
335 ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \
336 rw_enter(&(so)->so_fallback_rwlock, RW_READER); \
337 if ((so)->so_state & (SS_FALLBACK_COMP|SS_FILOP_OK)) { \
338 if ((so)->so_state & SS_FALLBACK_COMP) { \
339 rw_exit(&(so)->so_fallback_rwlock); \
340 return (fn); \
341 } else { \
342 mutex_enter(&(so)->so_lock); \
343 (so)->so_state &= ~SS_FILOP_OK; \
344 mutex_exit(&(so)->so_lock); \
345 } \
346 }
347
348 /*
349 * Sockets that can fall back to TPI must ensure that fall back is not
350 * initiated while a thread is using a socket. Otherwise this disables all
351 * future unsafe filter attachment. Safe filters can still attach after
352 * we execute the function in which this macro is used.
353 */
354 #define SO_BLOCK_FALLBACK_SAFE(so, fn) \
355 ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \
356 rw_enter(&(so)->so_fallback_rwlock, RW_READER); \
357 if ((so)->so_state & SS_FALLBACK_COMP) { \
358 rw_exit(&(so)->so_fallback_rwlock); \
359 return (fn); \
360 } else if (((so)->so_state & SS_FILOP_UNSF) == 0) { \
361 mutex_enter(&(so)->so_lock); \
362 (so)->so_state |= SS_FILOP_UNSF; \
363 mutex_exit(&(so)->so_lock); \
364 }
365
366 #define SO_UNBLOCK_FALLBACK(so) { \
367 rw_exit(&(so)->so_fallback_rwlock); \
368 }
369
370 #define SO_SND_FLOWCTRLD(so) \
371 ((so)->so_snd_qfull || (so)->so_state & SS_FIL_SND_FLOWCTRL)
372
373 /* Poll events */
374 #define SO_POLLEV_IN 0x1 /* POLLIN wakeup needed */
375 #define SO_POLLEV_ALWAYS 0x2 /* wakeups */
376
377 /*
378 * Characteristics of sockets. Not changed after the socket is created.
379 */
380 #define SM_PRIV 0x001 /* privileged for broadcast, raw... */
381 #define SM_ATOMIC 0x002 /* atomic data transmission */
382 #define SM_ADDR 0x004 /* addresses given with messages */
383 #define SM_CONNREQUIRED 0x008 /* connection required by protocol */
384
385 #define SM_FDPASSING 0x010 /* passes file descriptors */
386 #define SM_EXDATA 0x020 /* Can handle T_EXDATA_REQ */
387 #define SM_OPTDATA 0x040 /* Can handle T_OPTDATA_REQ */
388 #define SM_BYTESTREAM 0x080 /* Byte stream - can use M_DATA */
389
390 #define SM_ACCEPTOR_ID 0x100 /* so_acceptor_id is valid */
391
392 #define SM_KERNEL 0x200 /* kernel socket */
393
394 /* The modes below are only for non-streams sockets */
395 #define SM_ACCEPTSUPP 0x400 /* can handle accept() */
396 #define SM_SENDFILESUPP 0x800 /* Private: proto supp sendfile */
397 #define SM_DEFERERR 0x1000 /* Private: defer so_error delivery */
398
399 /*
400 * Socket versions. Used by the socket library when calling _so_socket().
401 */
402 #define SOV_STREAM 0 /* Not a socket - just a stream */
403 #define SOV_DEFAULT 1 /* Select based on so_default_version */
404 #define SOV_SOCKSTREAM 2 /* Socket plus streams operations */
405 #define SOV_SOCKBSD 3 /* Socket with no streams operations */
406 #define SOV_XPG4_2 4 /* Xnet socket */
407
408 #if defined(_KERNEL) || defined(_KMEMUSER)
409
410 /*
411 * sonode create and destroy functions.
412 */
413 typedef struct sonode *(*so_create_func_t)(struct sockparams *,
414 int, int, int, int, int, int *, cred_t *);
415 typedef void (*so_destroy_func_t)(struct sonode *);
416
417 /* STREAM device information */
418 typedef struct sdev_info {
419 char *sd_devpath;
420 int sd_devpathlen; /* Is 0 if sp_devpath is a static string */
421 vnode_t *sd_vnode;
422 } sdev_info_t;
423
424 #define SOCKMOD_VERSION_1 1
425 #define SOCKMOD_VERSION 2
426
427 /* name of the TPI pseudo socket module */
428 #define SOTPI_SMOD_NAME "socktpi"
429
430 typedef struct __smod_priv_s {
431 so_create_func_t smodp_sock_create_func;
432 so_destroy_func_t smodp_sock_destroy_func;
433 so_proto_fallback_func_t smodp_proto_fallback_func;
434 const char *smodp_fallback_devpath_v4;
435 const char *smodp_fallback_devpath_v6;
436 } __smod_priv_t;
437
438 /*
439 * Socket module register information
440 */
441 typedef struct smod_reg_s {
442 int smod_version;
443 char *smod_name;
444 size_t smod_uc_version;
445 size_t smod_dc_version;
446 so_proto_create_func_t smod_proto_create_func;
447
448 /* __smod_priv_data must be NULL */
449 __smod_priv_t *__smod_priv;
450 } smod_reg_t;
451
452 /*
453 * Socket module information
454 */
455 typedef struct smod_info {
456 int smod_version;
457 char *smod_name;
458 uint_t smod_refcnt; /* # of entries */
459 size_t smod_uc_version; /* upcall version */
460 size_t smod_dc_version; /* down call version */
461 so_proto_create_func_t smod_proto_create_func;
462 so_proto_fallback_func_t smod_proto_fallback_func;
463 const char *smod_fallback_devpath_v4;
464 const char *smod_fallback_devpath_v6;
465 so_create_func_t smod_sock_create_func;
466 so_destroy_func_t smod_sock_destroy_func;
467 list_node_t smod_node;
468 } smod_info_t;
469
470 typedef struct sockparams_stats {
471 kstat_named_t sps_nfallback; /* # of fallbacks to TPI */
472 kstat_named_t sps_nactive; /* # of active sockets */
473 kstat_named_t sps_ncreate; /* total # of created sockets */
474 } sockparams_stats_t;
475
476 /*
477 * sockparams
478 *
479 * Used for mapping family/type/protocol to a socket module or STREAMS device
480 */
481 struct sockparams {
482 /*
483 * The family, type, protocol, sdev_info and smod_name are
484 * set when the entry is created, and they will never change
485 * thereafter.
486 */
487 int sp_family;
488 int sp_type;
489 int sp_protocol;
490
491 sdev_info_t sp_sdev_info; /* STREAM device */
492 char *sp_smod_name; /* socket module name */
493
494 kmutex_t sp_lock; /* lock for refcnt and smod_info */
495 uint64_t sp_refcnt; /* entry reference count */
496 smod_info_t *sp_smod_info; /* socket module */
497
498 sockparams_stats_t sp_stats;
499 kstat_t *sp_kstat;
500
501 /*
502 * The entries below are only modified while holding
503 * sockconf_lock as a writer.
504 */
505 int sp_flags; /* see below */
506 list_node_t sp_node;
507
508 list_t sp_auto_filters; /* list of automatic filters */
509 list_t sp_prog_filters; /* list of programmatic filters */
510 };
511
512 struct sof_entry;
513
514 typedef struct sp_filter {
515 struct sof_entry *spf_filter;
516 list_node_t spf_node;
517 } sp_filter_t;
518
519
520 /*
521 * sockparams flags
522 */
523 #define SOCKPARAMS_EPHEMERAL 0x1 /* temp. entry, not on global list */
524
525 extern void sockparams_init(void);
526 extern struct sockparams *sockparams_hold_ephemeral_bydev(int, int, int,
527 const char *, int, int *);
528 extern struct sockparams *sockparams_hold_ephemeral_bymod(int, int, int,
529 const char *, int, int *);
530 extern void sockparams_ephemeral_drop_last_ref(struct sockparams *);
531
532 extern struct sockparams *sockparams_create(int, int, int, char *, char *, int,
533 int, int, int *);
534 extern void sockparams_destroy(struct sockparams *);
535 extern int sockparams_add(struct sockparams *);
536 extern int sockparams_delete(int, int, int);
537 extern int sockparams_new_filter(struct sof_entry *);
538 extern void sockparams_filter_cleanup(struct sof_entry *);
539 extern int sockparams_copyout_socktable(uintptr_t);
540
541 extern void smod_init(void);
542 extern void smod_add(smod_info_t *);
543 extern int smod_register(const smod_reg_t *);
544 extern int smod_unregister(const char *);
545 extern smod_info_t *smod_lookup_byname(const char *);
546
547 #define SOCKPARAMS_HAS_DEVICE(sp) \
548 ((sp)->sp_sdev_info.sd_devpath != NULL)
549
550 /* Increase the smod_info_t reference count */
551 #define SMOD_INC_REF(smodp) { \
552 ASSERT((smodp) != NULL); \
553 DTRACE_PROBE1(smodinfo__inc__ref, struct smod_info *, (smodp)); \
554 atomic_inc_uint(&(smodp)->smod_refcnt); \
555 }
556
557 /*
558 * Decreace the socket module entry reference count.
559 * When no one mapping to the entry, we try to unload the module from the
560 * kernel. If the module can't unload, just leave the module entry with
561 * a zero refcnt.
562 */
563 #define SMOD_DEC_REF(smodp, modname) { \
564 ASSERT((smodp) != NULL); \
565 ASSERT((smodp)->smod_refcnt != 0); \
566 atomic_dec_uint(&(smodp)->smod_refcnt); \
567 /* \
568 * No need to atomically check the return value because the \
569 * socket module framework will verify that no one is using \
570 * the module before unloading. Worst thing that can happen \
571 * here is multiple calls to mod_remove_by_name(), which is OK. \
572 */ \
573 if ((smodp)->smod_refcnt == 0) \
574 (void) mod_remove_by_name(modname); \
575 }
576
577 /* Increase the reference count */
578 #define SOCKPARAMS_INC_REF(sp) { \
579 ASSERT((sp) != NULL); \
580 DTRACE_PROBE1(sockparams__inc__ref, struct sockparams *, (sp)); \
581 mutex_enter(&(sp)->sp_lock); \
582 (sp)->sp_refcnt++; \
583 ASSERT((sp)->sp_refcnt != 0); \
584 mutex_exit(&(sp)->sp_lock); \
585 }
586
587 /*
588 * Decrease the reference count.
589 *
590 * If the sockparams is ephemeral, then the thread dropping the last ref
591 * count will destroy the entry.
592 */
593 #define SOCKPARAMS_DEC_REF(sp) { \
594 ASSERT((sp) != NULL); \
595 DTRACE_PROBE1(sockparams__dec__ref, struct sockparams *, (sp)); \
596 mutex_enter(&(sp)->sp_lock); \
597 ASSERT((sp)->sp_refcnt > 0); \
598 if ((sp)->sp_refcnt == 1) { \
599 if ((sp)->sp_flags & SOCKPARAMS_EPHEMERAL) { \
600 mutex_exit(&(sp)->sp_lock); \
601 sockparams_ephemeral_drop_last_ref((sp)); \
602 } else { \
603 (sp)->sp_refcnt--; \
604 if ((sp)->sp_smod_info != NULL) { \
605 SMOD_DEC_REF((sp)->sp_smod_info, \
606 (sp)->sp_smod_name); \
607 } \
608 (sp)->sp_smod_info = NULL; \
609 mutex_exit(&(sp)->sp_lock); \
610 } \
611 } else { \
612 (sp)->sp_refcnt--; \
613 mutex_exit(&(sp)->sp_lock); \
614 } \
615 }
616
617 /*
618 * Used to traverse the list of AF_UNIX sockets to construct the kstat
619 * for netstat(1m).
620 */
621 struct socklist {
622 kmutex_t sl_lock;
623 struct sonode *sl_list;
624 };
625
626 extern struct socklist socklist;
627 /*
628 * ss_full_waits is the number of times the reader thread
629 * waits when the queue is full and ss_empty_waits is the number
630 * of times the consumer thread waits when the queue is empty.
631 * No locks for these as they are just indicators of whether
632 * disk or network or both is slow or fast.
633 */
634 struct sendfile_stats {
635 uint32_t ss_file_cached;
636 uint32_t ss_file_not_cached;
637 uint32_t ss_full_waits;
638 uint32_t ss_empty_waits;
639 uint32_t ss_file_segmap;
640 };
641
642 /*
643 * A single sendfile request is represented by snf_req.
644 */
645 typedef struct snf_req {
646 struct snf_req *sr_next;
647 mblk_t *sr_mp_head;
648 mblk_t *sr_mp_tail;
649 kmutex_t sr_lock;
650 kcondvar_t sr_cv;
651 uint_t sr_qlen;
652 int sr_hiwat;
653 int sr_lowat;
654 int sr_operation;
655 struct vnode *sr_vp;
656 file_t *sr_fp;
657 ssize_t sr_maxpsz;
658 u_offset_t sr_file_off;
659 u_offset_t sr_file_size;
660 #define SR_READ_DONE 0x80000000
661 int sr_read_error;
662 int sr_write_error;
663 } snf_req_t;
664
665 /* A queue of sendfile requests */
666 struct sendfile_queue {
667 snf_req_t *snfq_req_head;
668 snf_req_t *snfq_req_tail;
669 kmutex_t snfq_lock;
670 kcondvar_t snfq_cv;
671 int snfq_svc_threads; /* # of service threads */
672 int snfq_idle_cnt; /* # of idling threads */
673 int snfq_max_threads;
674 int snfq_req_cnt; /* Number of requests */
675 };
676
677 #define READ_OP 1
678 #define SNFQ_TIMEOUT (60 * 5 * hz) /* 5 minutes */
679
680 /* Socket network operations switch */
681 struct sonodeops {
682 int (*sop_init)(struct sonode *, struct sonode *, cred_t *,
683 int);
684 int (*sop_accept)(struct sonode *, int, cred_t *, struct sonode **);
685 int (*sop_bind)(struct sonode *, struct sockaddr *, socklen_t,
686 int, cred_t *);
687 int (*sop_listen)(struct sonode *, int, cred_t *);
688 int (*sop_connect)(struct sonode *, struct sockaddr *,
689 socklen_t, int, int, cred_t *);
690 int (*sop_recvmsg)(struct sonode *, struct msghdr *,
691 struct uio *, cred_t *);
692 int (*sop_sendmsg)(struct sonode *, struct msghdr *,
693 struct uio *, cred_t *);
694 int (*sop_sendmblk)(struct sonode *, struct msghdr *, int,
695 cred_t *, mblk_t **);
696 int (*sop_getpeername)(struct sonode *, struct sockaddr *,
697 socklen_t *, boolean_t, cred_t *);
698 int (*sop_getsockname)(struct sonode *, struct sockaddr *,
699 socklen_t *, cred_t *);
700 int (*sop_shutdown)(struct sonode *, int, cred_t *);
701 int (*sop_getsockopt)(struct sonode *, int, int, void *,
702 socklen_t *, int, cred_t *);
703 int (*sop_setsockopt)(struct sonode *, int, int, const void *,
704 socklen_t, cred_t *);
705 int (*sop_ioctl)(struct sonode *, int, intptr_t, int,
706 cred_t *, int32_t *);
707 int (*sop_poll)(struct sonode *, short, int, short *,
708 struct pollhead **);
709 int (*sop_close)(struct sonode *, int, cred_t *);
710 };
711
712 #define SOP_INIT(so, flag, cr, flags) \
713 ((so)->so_ops->sop_init((so), (flag), (cr), (flags)))
714 #define SOP_ACCEPT(so, fflag, cr, nsop) \
715 ((so)->so_ops->sop_accept((so), (fflag), (cr), (nsop)))
716 #define SOP_BIND(so, name, namelen, flags, cr) \
717 ((so)->so_ops->sop_bind((so), (name), (namelen), (flags), (cr)))
718 #define SOP_LISTEN(so, backlog, cr) \
719 ((so)->so_ops->sop_listen((so), (backlog), (cr)))
720 #define SOP_CONNECT(so, name, namelen, fflag, flags, cr) \
721 ((so)->so_ops->sop_connect((so), (name), (namelen), (fflag), (flags), \
722 (cr)))
723 #define SOP_RECVMSG(so, msg, uiop, cr) \
724 ((so)->so_ops->sop_recvmsg((so), (msg), (uiop), (cr)))
725 #define SOP_SENDMSG(so, msg, uiop, cr) \
726 ((so)->so_ops->sop_sendmsg((so), (msg), (uiop), (cr)))
727 #define SOP_SENDMBLK(so, msg, size, cr, mpp) \
728 ((so)->so_ops->sop_sendmblk((so), (msg), (size), (cr), (mpp)))
729 #define SOP_GETPEERNAME(so, addr, addrlen, accept, cr) \
730 ((so)->so_ops->sop_getpeername((so), (addr), (addrlen), (accept), (cr)))
731 #define SOP_GETSOCKNAME(so, addr, addrlen, cr) \
732 ((so)->so_ops->sop_getsockname((so), (addr), (addrlen), (cr)))
733 #define SOP_SHUTDOWN(so, how, cr) \
734 ((so)->so_ops->sop_shutdown((so), (how), (cr)))
735 #define SOP_GETSOCKOPT(so, level, optionname, optval, optlenp, flags, cr) \
736 ((so)->so_ops->sop_getsockopt((so), (level), (optionname), \
737 (optval), (optlenp), (flags), (cr)))
738 #define SOP_SETSOCKOPT(so, level, optionname, optval, optlen, cr) \
739 ((so)->so_ops->sop_setsockopt((so), (level), (optionname), \
740 (optval), (optlen), (cr)))
741 #define SOP_IOCTL(so, cmd, arg, mode, cr, rvalp) \
742 ((so)->so_ops->sop_ioctl((so), (cmd), (arg), (mode), (cr), (rvalp)))
743 #define SOP_POLL(so, events, anyyet, reventsp, phpp) \
744 ((so)->so_ops->sop_poll((so), (events), (anyyet), (reventsp), (phpp)))
745 #define SOP_CLOSE(so, flag, cr) \
746 ((so)->so_ops->sop_close((so), (flag), (cr)))
747
748 #endif /* defined(_KERNEL) || defined(_KMEMUSER) */
749
750 #ifdef _KERNEL
751
752 #define ISALIGNED_cmsghdr(addr) \
753 (((uintptr_t)(addr) & (_CMSG_HDR_ALIGNMENT - 1)) == 0)
754
755 #define ROUNDUP_cmsglen(len) \
756 (((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1))
757
758 #define IS_NON_STREAM_SOCK(vp) \
759 ((vp)->v_type == VSOCK && (vp)->v_stream == NULL)
760 /*
761 * Macros that operate on struct cmsghdr.
762 * Used in parsing msg_control.
763 * The CMSG_VALID macro does not assume that the last option buffer is padded.
764 */
765 #define CMSG_NEXT(cmsg) \
766 (struct cmsghdr *)((uintptr_t)(cmsg) + \
767 ROUNDUP_cmsglen((cmsg)->cmsg_len))
768 #define CMSG_CONTENT(cmsg) (&((cmsg)[1]))
769 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr))
770 #define CMSG_VALID(cmsg, start, end) \
771 (ISALIGNED_cmsghdr(cmsg) && \
772 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \
773 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \
774 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \
775 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
776
777 /*
778 * Maximum size of any argument that is copied in (addresses, options,
779 * access rights). MUST be at least MAXPATHLEN + 3.
780 * BSD and SunOS 4.X limited this to MLEN or MCLBYTES.
781 */
782 #define SO_MAXARGSIZE 8192
783
784 /*
785 * Convert between vnode and sonode
786 */
787 #define VTOSO(vp) ((struct sonode *)((vp)->v_data))
788 #define SOTOV(sp) ((sp)->so_vnode)
789
790 /*
791 * Internal flags for sobind()
792 */
793 #define _SOBIND_REBIND 0x01 /* Bind to existing local address */
794 #define _SOBIND_UNSPEC 0x02 /* Bind to unspecified address */
795 #define _SOBIND_LOCK_HELD 0x04 /* so_excl_lock held by caller */
796 #define _SOBIND_NOXLATE 0x08 /* No addr translation for AF_UNIX */
797 #define _SOBIND_XPG4_2 0x10 /* xpg4.2 semantics */
798 #define _SOBIND_SOCKBSD 0x20 /* BSD semantics */
799 #define _SOBIND_LISTEN 0x40 /* Make into SS_ACCEPTCONN */
800 #define _SOBIND_SOCKETPAIR 0x80 /* Internal flag for so_socketpair() */
801 /* to enable listen with backlog = 1 */
802
803 /*
804 * Internal flags for sounbind()
805 */
806 #define _SOUNBIND_REBIND 0x01 /* Don't clear fields - will rebind */
807
808 /*
809 * Internal flags for soconnect()
810 */
811 #define _SOCONNECT_NOXLATE 0x01 /* No addr translation for AF_UNIX */
812 #define _SOCONNECT_DID_BIND 0x02 /* Unbind when connect fails */
813 #define _SOCONNECT_XPG4_2 0x04 /* xpg4.2 semantics */
814
815 /*
816 * Internal flags for sodisconnect()
817 */
818 #define _SODISCONNECT_LOCK_HELD 0x01 /* so_excl_lock held by caller */
819
820 /*
821 * Internal flags for sotpi_getsockopt().
822 */
823 #define _SOGETSOCKOPT_XPG4_2 0x01 /* xpg4.2 semantics */
824
825 /*
826 * Internal flags for soallocproto*()
827 */
828 #define _ALLOC_NOSLEEP 0 /* Don't sleep for memory */
829 #define _ALLOC_INTR 1 /* Sleep until interrupt */
830 #define _ALLOC_SLEEP 2 /* Sleep forever */
831
832 /*
833 * Internal structure for handling AF_UNIX file descriptor passing
834 */
835 struct fdbuf {
836 int fd_size; /* In bytes, for kmem_free */
837 int fd_numfd; /* Number of elements below */
838 char *fd_ebuf; /* Extra buffer to free */
839 int fd_ebuflen;
840 frtn_t fd_frtn;
841 struct file *fd_fds[1]; /* One or more */
842 };
843 #define FDBUF_HDRSIZE (sizeof (struct fdbuf) - sizeof (struct file *))
844
845 /*
846 * Variable that can be patched to set what version of socket socket()
847 * will create.
848 */
849 extern int so_default_version;
850
851 #ifdef DEBUG
852 /* Turn on extra testing capabilities */
853 #define SOCK_TEST
854 #endif /* DEBUG */
855
856 #ifdef DEBUG
857 char *pr_state(uint_t, uint_t);
858 char *pr_addr(int, struct sockaddr *, t_uscalar_t);
859 int so_verify_oobstate(struct sonode *);
860 #endif /* DEBUG */
861
862 /*
863 * DEBUG macros
864 */
865 #if defined(DEBUG)
866 #define SOCK_DEBUG
867
868 extern int sockdebug;
869 extern int sockprinterr;
870
871 #define eprint(args) printf args
872 #define eprintso(so, args) \
873 { if (sockprinterr && ((so)->so_options & SO_DEBUG)) printf args; }
874 #define eprintline(error) \
875 { \
876 if (error != EINTR && (sockprinterr || sockdebug > 0)) \
877 printf("socket error %d: line %d file %s\n", \
878 (error), __LINE__, __FILE__); \
879 }
880
881 #define eprintsoline(so, error) \
882 { if (sockprinterr && ((so)->so_options & SO_DEBUG)) \
883 printf("socket(%p) error %d: line %d file %s\n", \
884 (void *)(so), (error), __LINE__, __FILE__); \
885 }
886 #define dprint(level, args) { if (sockdebug > (level)) printf args; }
887 #define dprintso(so, level, args) \
888 { if (sockdebug > (level) && ((so)->so_options & SO_DEBUG)) printf args; }
889
890 #else /* define(DEBUG) */
891
892 #define eprint(args) {}
893 #define eprintso(so, args) {}
894 #define eprintline(error) {}
895 #define eprintsoline(so, error) {}
896 #define dprint(level, args) {}
897 #define dprintso(so, level, args) {}
898
899 #endif /* defined(DEBUG) */
900
901 extern struct vfsops sock_vfsops;
902 extern struct vnodeops *socket_vnodeops;
903 extern const struct fs_operation_def socket_vnodeops_template[];
904
905 extern dev_t sockdev;
906
907 extern krwlock_t sockconf_lock;
908
909 /*
910 * sockfs functions
911 */
912 extern int sock_getmsg(vnode_t *, struct strbuf *, struct strbuf *,
913 uchar_t *, int *, int, rval_t *);
914 extern int sock_putmsg(vnode_t *, struct strbuf *, struct strbuf *,
915 uchar_t, int, int);
916 extern int sogetvp(char *, vnode_t **, int);
917 extern int sockinit(int, char *);
918 extern int solookup(int, int, int, struct sockparams **);
919 extern void so_lock_single(struct sonode *);
920 extern void so_unlock_single(struct sonode *, int);
921 extern int so_lock_read(struct sonode *, int);
922 extern int so_lock_read_intr(struct sonode *, int);
923 extern void so_unlock_read(struct sonode *);
924 extern void *sogetoff(mblk_t *, t_uscalar_t, t_uscalar_t, uint_t);
925 extern void so_getopt_srcaddr(void *, t_uscalar_t,
926 void **, t_uscalar_t *);
927 extern int so_getopt_unix_close(void *, t_uscalar_t);
928 extern void fdbuf_free(struct fdbuf *);
929 extern mblk_t *fdbuf_allocmsg(int, struct fdbuf *);
930 extern int fdbuf_create(void *, int, struct fdbuf **);
931 extern void so_closefds(void *, t_uscalar_t, int, int);
932 extern int so_getfdopt(void *, t_uscalar_t, int, void **, int *);
933 t_uscalar_t so_optlen(void *, t_uscalar_t, int);
934 extern void so_cmsg2opt(void *, t_uscalar_t, int, mblk_t *);
935 extern t_uscalar_t
936 so_cmsglen(mblk_t *, void *, t_uscalar_t, int);
937 extern int so_opt2cmsg(mblk_t *, void *, t_uscalar_t, int,
938 void *, t_uscalar_t);
939 extern void soisconnecting(struct sonode *);
940 extern void soisconnected(struct sonode *);
941 extern void soisdisconnected(struct sonode *, int);
942 extern void socantsendmore(struct sonode *);
943 extern void socantrcvmore(struct sonode *);
944 extern void soseterror(struct sonode *, int);
945 extern int sogeterr(struct sonode *, boolean_t);
946 extern int sowaitconnected(struct sonode *, int, int);
947
948 extern ssize_t soreadfile(file_t *, uchar_t *, u_offset_t, int *, size_t);
949 extern void *sock_kstat_init(zoneid_t);
950 extern void sock_kstat_fini(zoneid_t, void *);
951 extern struct sonode *getsonode(int, int *, file_t **);
952 /*
953 * Function wrappers (mostly around the sonode switch) for
954 * backward compatibility.
955 */
956 extern int soaccept(struct sonode *, int, struct sonode **);
957 extern int sobind(struct sonode *, struct sockaddr *, socklen_t,
958 int, int);
959 extern int solisten(struct sonode *, int);
960 extern int soconnect(struct sonode *, struct sockaddr *, socklen_t,
961 int, int);
962 extern int sorecvmsg(struct sonode *, struct nmsghdr *, struct uio *);
963 extern int sosendmsg(struct sonode *, struct nmsghdr *, struct uio *);
964 extern int soshutdown(struct sonode *, int);
965 extern int sogetsockopt(struct sonode *, int, int, void *, socklen_t *,
966 int);
967 extern int sosetsockopt(struct sonode *, int, int, const void *,
968 t_uscalar_t);
969
970 extern struct sonode *socreate(struct sockparams *, int, int, int, int,
971 int *);
972
973 extern int so_copyin(const void *, void *, size_t, int);
974 extern int so_copyout(const void *, void *, size_t, int);
975
976 /*
977 * Functions to manipulate the use of direct receive callbacks. This should not
978 * be used outside of sockfs and ksocket. These are generally considered a use
979 * once interface for a socket and will cause all outstanding data on the socket
980 * to be flushed.
981 */
982 extern int so_krecv_set(sonode_t *, so_krecv_f, void *);
983 extern void so_krecv_unblock(sonode_t *);
984
985 #endif
986
987 /*
988 * Internal structure for obtaining sonode information from the socklist.
989 * These types match those corresponding in the sonode structure.
990 * This is not a published interface, and may change at any time.
991 */
992 struct sockinfo {
993 uint_t si_size; /* real length of this struct */
994 short si_family;
995 short si_type;
996 ushort_t si_flag;
997 uint_t si_state;
998 uint_t si_ux_laddr_sou_magic;
999 uint_t si_ux_faddr_sou_magic;
1000 t_scalar_t si_serv_type;
1001 t_uscalar_t si_laddr_soa_len;
1002 t_uscalar_t si_faddr_soa_len;
1003 uint16_t si_laddr_family;
1004 uint16_t si_faddr_family;
1005 char si_laddr_sun_path[MAXPATHLEN + 1]; /* NULL terminated */
1006 char si_faddr_sun_path[MAXPATHLEN + 1];
1007 boolean_t si_faddr_noxlate;
1008 zoneid_t si_szoneid;
1009 };
1010
1011 /*
1012 * Subcodes for sockconf() system call
1013 */
1014 #define SOCKCONFIG_ADD_SOCK 0
1015 #define SOCKCONFIG_REMOVE_SOCK 1
1016 #define SOCKCONFIG_ADD_FILTER 2
1017 #define SOCKCONFIG_REMOVE_FILTER 3
1018 #define SOCKCONFIG_GET_SOCKTABLE 4
1019
1020 /*
1021 * Data structures for configuring socket filters.
1022 */
1023
1024 /*
1025 * Placement hint for automatic filters
1026 */
1027 typedef enum {
1028 SOF_HINT_NONE,
1029 SOF_HINT_TOP,
1030 SOF_HINT_BOTTOM,
1031 SOF_HINT_BEFORE,
1032 SOF_HINT_AFTER
1033 } sof_hint_t;
1034
1035 /*
1036 * Socket tuple. Used by sockconfig_filter_props to list socket
1037 * types of interest.
1038 */
1039 typedef struct sof_socktuple {
1040 int sofst_family;
1041 int sofst_type;
1042 int sofst_protocol;
1043 } sof_socktuple_t;
1044
1045 /*
1046 * Socket filter properties used by sockconfig() system call.
1047 */
1048 struct sockconfig_filter_props {
1049 char *sfp_modname;
1050 boolean_t sfp_autoattach;
1051 sof_hint_t sfp_hint;
1052 char *sfp_hintarg;
1053 uint_t sfp_socktuple_cnt;
1054 sof_socktuple_t *sfp_socktuple;
1055 };
1056
1057 /*
1058 * Data structures for the in-kernel socket configuration table.
1059 */
1060 typedef struct sockconfig_socktable_entry {
1061 int se_family;
1062 int se_type;
1063 int se_protocol;
1064 int se_refcnt;
1065 int se_flags;
1066 char se_modname[MODMAXNAMELEN];
1067 char se_strdev[MAXPATHLEN];
1068 } sockconfig_socktable_entry_t;
1069
1070 typedef struct sockconfig_socktable {
1071 uint_t num_of_entries;
1072 sockconfig_socktable_entry_t *st_entries;
1073 } sockconfig_socktable_t;
1074
1075 #ifdef _SYSCALL32
1076
1077 typedef struct sof_socktuple32 {
1078 int32_t sofst_family;
1079 int32_t sofst_type;
1080 int32_t sofst_protocol;
1081 } sof_socktuple32_t;
1082
1083 struct sockconfig_filter_props32 {
1084 caddr32_t sfp_modname;
1085 boolean_t sfp_autoattach;
1086 sof_hint_t sfp_hint;
1087 caddr32_t sfp_hintarg;
1088 uint32_t sfp_socktuple_cnt;
1089 caddr32_t sfp_socktuple;
1090 };
1091
1092 typedef struct sockconfig_socktable32 {
1093 uint_t num_of_entries;
1094 caddr32_t st_entries;
1095 } sockconfig_socktable32_t;
1096
1097 #endif /* _SYSCALL32 */
1098
1099 #define SOCKMOD_PATH "socketmod" /* dir where sockmods are stored */
1100
1101 #ifdef __cplusplus
1102 }
1103 #endif
1104
1105 #endif /* _SYS_SOCKETVAR_H */