Print this page
14025 ipnet sniffing leaks promisc mode
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/ipnet/ipnet.c
+++ new/usr/src/uts/common/inet/ipnet/ipnet.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
|
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 - * Copyright (c) 2016, Joyent, Inc. All rights reserved.
29 + * Copyright 2021 Joyent, Inc.
30 30 */
31 31
32 32 /*
33 33 * The ipnet device defined here provides access to packets at the IP layer. To
34 34 * provide access to packets at this layer it registers a callback function in
35 35 * the ip module and when there are open instances of the device ip will pass
36 36 * packets into the device. Packets from ip are passed on the input, output and
37 37 * loopback paths. Internally the module returns to ip as soon as possible by
38 38 * deferring processing using a taskq.
39 39 *
40 40 * Management of the devices in /dev/ipnet/ is handled by the devname
41 41 * filesystem and use of the neti interfaces. This module registers for NIC
42 42 * events using the neti framework so that when IP interfaces are bought up,
43 43 * taken down etc. the ipnet module is notified and its view of the interfaces
44 44 * configured on the system adjusted. On attach, the module gets an initial
45 45 * view of the system again using the neti framework but as it has already
46 46 * registered for IP interface events, it is still up-to-date with any changes.
47 47 */
48 48
49 49 #include <sys/types.h>
50 50 #include <sys/conf.h>
51 51 #include <sys/cred.h>
52 52 #include <sys/stat.h>
53 53 #include <sys/ddi.h>
54 54 #include <sys/sunddi.h>
55 55 #include <sys/modctl.h>
56 56 #include <sys/dlpi.h>
57 57 #include <sys/strsun.h>
58 58 #include <sys/id_space.h>
59 59 #include <sys/kmem.h>
60 60 #include <sys/mkdev.h>
61 61 #include <sys/neti.h>
62 62 #include <net/if.h>
63 63 #include <sys/errno.h>
64 64 #include <sys/list.h>
65 65 #include <sys/ksynch.h>
66 66 #include <sys/hook_event.h>
67 67 #include <sys/sdt.h>
68 68 #include <sys/stropts.h>
69 69 #include <sys/sysmacros.h>
70 70 #include <inet/ip.h>
71 71 #include <inet/ip_if.h>
72 72 #include <inet/ip_multi.h>
73 73 #include <inet/ip6.h>
74 74 #include <inet/ipnet.h>
75 75 #include <net/bpf.h>
76 76 #include <net/bpfdesc.h>
77 77 #include <net/dlt.h>
78 78
79 79 static struct module_info ipnet_minfo = {
80 80 1, /* mi_idnum */
81 81 "ipnet", /* mi_idname */
82 82 0, /* mi_minpsz */
83 83 INFPSZ, /* mi_maxpsz */
84 84 2048, /* mi_hiwat */
85 85 0 /* mi_lowat */
86 86 };
87 87
88 88 /*
89 89 * List to hold static view of ipnetif_t's on the system. This is needed to
90 90 * avoid holding the lock protecting the avl tree of ipnetif's over the
91 91 * callback into the dev filesystem.
92 92 */
93 93 typedef struct ipnetif_cbdata {
94 94 char ic_ifname[LIFNAMSIZ];
95 95 dev_t ic_dev;
96 96 list_node_t ic_next;
97 97 } ipnetif_cbdata_t;
98 98
99 99 /*
100 100 * Convenience enumerated type for ipnet_accept(). It describes the
101 101 * properties of a given ipnet_addrp_t relative to a single ipnet_t
102 102 * client stream. The values represent whether the address is ...
103 103 */
104 104 typedef enum {
105 105 IPNETADDR_MYADDR, /* an address on my ipnetif_t. */
106 106 IPNETADDR_MBCAST, /* a multicast or broadcast address. */
107 107 IPNETADDR_UNKNOWN /* none of the above. */
108 108 } ipnet_addrtype_t;
109 109
110 110 /* Argument used for the ipnet_nicevent_taskq callback. */
111 111 typedef struct ipnet_nicevent_s {
112 112 nic_event_t ipne_event;
113 113 net_handle_t ipne_protocol;
114 114 netstackid_t ipne_stackid;
115 115 uint64_t ipne_ifindex;
116 116 uint64_t ipne_lifindex;
117 117 char ipne_ifname[LIFNAMSIZ];
118 118 } ipnet_nicevent_t;
119 119
120 120 static dev_info_t *ipnet_dip;
121 121 static major_t ipnet_major;
122 122 static ddi_taskq_t *ipnet_taskq; /* taskq for packets */
123 123 static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */
124 124 static id_space_t *ipnet_minor_space;
125 125 static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */
126 126 static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */
127 127 static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT;
128 128 static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept;
129 129 static bpf_itap_fn_t ipnet_itap;
130 130
131 131 static void ipnet_input(mblk_t *);
132 132 static int ipnet_wput(queue_t *, mblk_t *);
133 133 static int ipnet_rsrv(queue_t *);
134 134 static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *);
135 135 static int ipnet_close(queue_t *, int, cred_t *);
136 136 static void ipnet_ioctl(queue_t *, mblk_t *);
137 137 static void ipnet_iocdata(queue_t *, mblk_t *);
138 138 static void ipnet_wputnondata(queue_t *, mblk_t *);
139 139 static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t);
140 140 static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t);
141 141 static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
142 142 static void ipnet_inforeq(queue_t *q, mblk_t *mp);
143 143 static void ipnet_bindreq(queue_t *q, mblk_t *mp);
144 144 static void ipnet_unbindreq(queue_t *q, mblk_t *mp);
145 145 static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp);
146 146 static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp);
147 147 static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *);
148 148 static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *);
149 149 static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *);
150 150 static void ipnet_nicevent_task(void *);
151 151 static ipnetif_t *ipnetif_create(const char *, uint64_t, ipnet_stack_t *,
152 152 uint64_t);
153 153 static void ipnetif_remove(ipnetif_t *, ipnet_stack_t *);
154 154 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t);
155 155 static ipnetif_t *ipnetif_getby_index(uint64_t, ipnet_stack_t *);
156 156 static ipnetif_t *ipnetif_getby_dev(dev_t, ipnet_stack_t *);
157 157 static boolean_t ipnetif_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
158 158 static void ipnetif_zonecheck(ipnetif_t *, ipnet_stack_t *);
159 159 static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t);
160 160 static int ipnetif_compare_name(const void *, const void *);
161 161 static int ipnetif_compare_name_zone(const void *, const void *);
162 162 static int ipnetif_compare_index(const void *, const void *);
163 163 static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t);
164 164 static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t);
165 165 static void ipnetif_refhold(ipnetif_t *);
166 166 static void ipnetif_refrele(ipnetif_t *);
167 167 static void ipnet_walkers_inc(ipnet_stack_t *);
168 168 static void ipnet_walkers_dec(ipnet_stack_t *);
169 169 static void ipnet_register_netihook(ipnet_stack_t *);
170 170 static void *ipnet_stack_init(netstackid_t, netstack_t *);
171 171 static void ipnet_stack_fini(netstackid_t, void *);
172 172 static void ipnet_dispatch(void *);
173 173 static int ipobs_bounce_func(hook_event_token_t, hook_data_t, void *);
174 174 static int ipnet_bpf_bounce(hook_event_token_t, hook_data_t, void *);
175 175 static ipnetif_t *ipnetif_clone_create(ipnetif_t *, zoneid_t);
176 176 static void ipnetif_clone_release(ipnetif_t *);
177 177
178 178 static struct qinit ipnet_rinit = {
179 179 NULL, /* qi_putp */
180 180 ipnet_rsrv, /* qi_srvp */
181 181 ipnet_open, /* qi_qopen */
182 182 ipnet_close, /* qi_qclose */
183 183 NULL, /* qi_qadmin */
184 184 &ipnet_minfo, /* qi_minfo */
185 185 };
186 186
187 187 static struct qinit ipnet_winit = {
188 188 ipnet_wput, /* qi_putp */
189 189 NULL, /* qi_srvp */
190 190 NULL, /* qi_qopen */
191 191 NULL, /* qi_qclose */
192 192 NULL, /* qi_qadmin */
193 193 &ipnet_minfo, /* qi_minfo */
194 194 };
195 195
196 196 static struct streamtab ipnet_info = {
197 197 &ipnet_rinit, &ipnet_winit
198 198 };
199 199
200 200 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach,
201 201 ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info,
202 202 ddi_quiesce_not_supported);
203 203
204 204 static struct modldrv modldrv = {
205 205 &mod_driverops,
206 206 "STREAMS ipnet driver",
207 207 &ipnet_ops
208 208 };
209 209
210 210 static struct modlinkage modlinkage = {
211 211 MODREV_1, &modldrv, NULL
212 212 };
213 213
214 214 /*
215 215 * This structure contains the template data (names and type) that is
216 216 * copied, in bulk, into the new kstats structure created by net_kstat_create.
217 217 * No actual statistical information is stored in this instance of the
218 218 * ipnet_kstats_t structure.
219 219 */
220 220 static ipnet_kstats_t stats_template = {
221 221 { "duplicationFail", KSTAT_DATA_UINT64 },
222 222 { "dispatchOk", KSTAT_DATA_UINT64 },
223 223 { "dispatchFail", KSTAT_DATA_UINT64 },
224 224 { "dispatchHeaderDrop", KSTAT_DATA_UINT64 },
225 225 { "dispatchDupDrop", KSTAT_DATA_UINT64 },
226 226 { "dispatchDeliver", KSTAT_DATA_UINT64 },
227 227 { "acceptOk", KSTAT_DATA_UINT64 },
228 228 { "acceptFail", KSTAT_DATA_UINT64 }
229 229 };
230 230
231 231 /*
232 232 * Walk the list of physical interfaces on the machine, for each
233 233 * interface create a new ipnetif_t and add any addresses to it. We
234 234 * need to do the walk twice, once for IPv4 and once for IPv6.
235 235 *
236 236 * The interfaces are destroyed as part of ipnet_stack_fini() for each
237 237 * stack. Note that we cannot do this initialization in
238 238 * ipnet_stack_init(), since ipnet_stack_init() cannot fail.
239 239 */
240 240 static int
241 241 ipnetif_init(void)
242 242 {
243 243 netstack_handle_t nh;
244 244 netstack_t *ns;
245 245 ipnet_stack_t *ips;
246 246 int ret = 0;
247 247
248 248 netstack_next_init(&nh);
249 249 while ((ns = netstack_next(&nh)) != NULL) {
250 250 ips = ns->netstack_ipnet;
251 251 if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0)
252 252 ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE);
253 253 netstack_rele(ns);
254 254 if (ret != 0)
255 255 break;
256 256 }
257 257 netstack_next_fini(&nh);
258 258 return (ret);
259 259 }
260 260
261 261 /*
262 262 * Standard module entry points.
263 263 */
264 264 int
265 265 _init(void)
266 266 {
267 267 int ret;
268 268 boolean_t netstack_registered = B_FALSE;
269 269
270 270 if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1)
271 271 return (ENODEV);
272 272 ipnet_minor_space = id_space_create("ipnet_minor_space",
273 273 IPNET_MINOR_MIN, MAXMIN32);
274 274
275 275 /*
276 276 * We call ddi_taskq_create() with nthread == 1 to ensure in-order
277 277 * delivery of packets to clients. Note that we need to create the
278 278 * taskqs before calling netstack_register() since ipnet_stack_init()
279 279 * registers callbacks that use 'em.
280 280 */
281 281 ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0);
282 282 ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue",
283 283 1, TASKQ_DEFAULTPRI, 0);
284 284 if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) {
285 285 ret = ENOMEM;
286 286 goto done;
287 287 }
288 288
289 289 netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini);
290 290 netstack_registered = B_TRUE;
291 291
292 292 if ((ret = ipnetif_init()) == 0)
293 293 ret = mod_install(&modlinkage);
294 294 done:
295 295 if (ret != 0) {
296 296 if (ipnet_taskq != NULL)
297 297 ddi_taskq_destroy(ipnet_taskq);
298 298 if (ipnet_nicevent_taskq != NULL)
299 299 ddi_taskq_destroy(ipnet_nicevent_taskq);
300 300 if (netstack_registered)
301 301 netstack_unregister(NS_IPNET);
302 302 id_space_destroy(ipnet_minor_space);
303 303 }
304 304 return (ret);
305 305 }
306 306
307 307 int
308 308 _fini(void)
309 309 {
310 310 int err;
311 311
312 312 if ((err = mod_remove(&modlinkage)) != 0)
313 313 return (err);
314 314
315 315 netstack_unregister(NS_IPNET);
316 316 ddi_taskq_destroy(ipnet_nicevent_taskq);
317 317 ddi_taskq_destroy(ipnet_taskq);
318 318 id_space_destroy(ipnet_minor_space);
319 319 return (0);
320 320 }
321 321
322 322 int
323 323 _info(struct modinfo *modinfop)
324 324 {
325 325 return (mod_info(&modlinkage, modinfop));
326 326 }
327 327
328 328 static void
329 329 ipnet_register_netihook(ipnet_stack_t *ips)
330 330 {
331 331 int ret;
332 332 zoneid_t zoneid;
333 333 netid_t netid;
334 334
335 335 HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents",
336 336 ips);
337 337
338 338 /*
339 339 * It is possible for an exclusive stack to be in the process of
340 340 * shutting down here, and the netid and protocol lookups could fail
341 341 * in that case.
342 342 */
343 343 zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid);
344 344 if ((netid = net_zoneidtonetid(zoneid)) == -1)
345 345 return;
346 346
347 347 if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) {
348 348 if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS,
349 349 ips->ips_nicevents)) != 0) {
350 350 VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
351 351 ips->ips_ndv4 = NULL;
352 352 cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks"
353 353 " in zone %d: %d", zoneid, ret);
354 354 }
355 355 }
356 356 if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) {
357 357 if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS,
358 358 ips->ips_nicevents)) != 0) {
359 359 VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
360 360 ips->ips_ndv6 = NULL;
361 361 cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks"
362 362 " in zone %d: %d", zoneid, ret);
363 363 }
364 364 }
365 365
366 366 /*
367 367 * Create a local set of kstats for each zone.
368 368 */
369 369 ips->ips_kstatp = net_kstat_create(netid, "ipnet", 0, "ipnet_stats",
370 370 "misc", KSTAT_TYPE_NAMED,
371 371 sizeof (ipnet_kstats_t) / sizeof (kstat_named_t), 0);
372 372 if (ips->ips_kstatp != NULL) {
373 373 bcopy(&stats_template, &ips->ips_stats,
374 374 sizeof (ips->ips_stats));
375 375 ips->ips_kstatp->ks_data = &ips->ips_stats;
376 376 ips->ips_kstatp->ks_private =
377 377 (void *)(uintptr_t)ips->ips_netstack->netstack_stackid;
378 378 kstat_install(ips->ips_kstatp);
379 379 } else {
380 380 cmn_err(CE_WARN, "net_kstat_create(%s,%s,%s) failed",
381 381 "ipnet", "ipnet_stats", "misc");
382 382 }
383 383 }
384 384
385 385 /*
386 386 * This function is called on attach to build an initial view of the
387 387 * interfaces on the system. It will be called once for IPv4 and once
388 388 * for IPv6, although there is only one ipnet interface for both IPv4
389 389 * and IPv6 there are separate address lists.
390 390 */
391 391 static int
392 392 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6)
393 393 {
394 394 phy_if_t phyif;
395 395 lif_if_t lif;
396 396 ipnetif_t *ipnetif;
397 397 char name[LIFNAMSIZ];
398 398 boolean_t new_if = B_FALSE;
399 399 uint64_t ifflags;
400 400 int ret = 0;
401 401
402 402 /*
403 403 * If ipnet_register_netihook() was unable to initialize this
404 404 * stack's net_handle_t, then we cannot populate any interface
405 405 * information. This usually happens when we attempted to
406 406 * grab a net_handle_t as a stack was shutting down. We don't
407 407 * want to fail the entire _init() operation because of a
408 408 * stack shutdown (other stacks will continue to work just
409 409 * fine), so we silently return success here.
410 410 */
411 411 if (nd == NULL)
412 412 return (0);
413 413
414 414 /*
415 415 * Make sure we're not processing NIC events during the
416 416 * population of our interfaces and address lists.
417 417 */
418 418 mutex_enter(&ips->ips_event_lock);
419 419
420 420 for (phyif = net_phygetnext(nd, 0); phyif != 0;
421 421 phyif = net_phygetnext(nd, phyif)) {
422 422 if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0)
423 423 continue;
424 424 ifflags = 0;
425 425 (void) net_getlifflags(nd, phyif, 0, &ifflags);
426 426 if ((ipnetif = ipnetif_getby_index(phyif, ips)) == NULL) {
427 427 ipnetif = ipnetif_create(name, phyif, ips, ifflags);
428 428 if (ipnetif == NULL) {
429 429 ret = ENOMEM;
430 430 goto done;
431 431 }
432 432 new_if = B_TRUE;
433 433 }
434 434 ipnetif->if_flags |=
435 435 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
436 436
437 437 for (lif = net_lifgetnext(nd, phyif, 0); lif != 0;
438 438 lif = net_lifgetnext(nd, phyif, lif)) {
439 439 /*
440 440 * Skip addresses that aren't up. We'll add
441 441 * them when we receive an NE_LIF_UP event.
442 442 */
443 443 if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 ||
444 444 !(ifflags & IFF_UP))
445 445 continue;
446 446 /* Don't add it if we already have it. */
447 447 if (ipnet_match_lif(ipnetif, lif, isv6) != NULL)
448 448 continue;
449 449 ipnet_add_ifaddr(lif, ipnetif, nd);
450 450 }
451 451 if (!new_if)
452 452 ipnetif_refrele(ipnetif);
453 453 }
454 454
455 455 done:
456 456 mutex_exit(&ips->ips_event_lock);
457 457 return (ret);
458 458 }
459 459
460 460 static int
461 461 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
462 462 {
463 463 if (cmd != DDI_ATTACH)
464 464 return (DDI_FAILURE);
465 465
466 466 if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO,
467 467 DDI_PSEUDO, 0) == DDI_FAILURE)
468 468 return (DDI_FAILURE);
469 469
470 470 ipnet_dip = dip;
471 471 return (DDI_SUCCESS);
472 472 }
473 473
474 474 static int
475 475 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
476 476 {
477 477 if (cmd != DDI_DETACH)
478 478 return (DDI_FAILURE);
479 479
480 480 ASSERT(dip == ipnet_dip);
481 481 ddi_remove_minor_node(ipnet_dip, NULL);
482 482 ipnet_dip = NULL;
483 483 return (DDI_SUCCESS);
484 484 }
485 485
486 486 /* ARGSUSED */
487 487 static int
488 488 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
489 489 {
490 490 int error = DDI_FAILURE;
491 491
492 492 switch (infocmd) {
493 493 case DDI_INFO_DEVT2INSTANCE:
494 494 *result = (void *)0;
495 495 error = DDI_SUCCESS;
496 496 break;
497 497 case DDI_INFO_DEVT2DEVINFO:
498 498 if (ipnet_dip != NULL) {
499 499 *result = ipnet_dip;
500 500 error = DDI_SUCCESS;
501 501 }
502 502 break;
503 503 }
504 504 return (error);
505 505 }
506 506
507 507 /* ARGSUSED */
508 508 static int
509 509 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
510 510 {
511 511 ipnet_t *ipnet;
512 512 netstack_t *ns = NULL;
513 513 ipnet_stack_t *ips;
514 514 int err = 0;
515 515 zoneid_t zoneid = crgetzoneid(crp);
516 516
517 517 /*
518 518 * If the system is labeled, only the global zone is allowed to open
519 519 * IP observability nodes.
520 520 */
521 521 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
522 522 return (EACCES);
523 523
524 524 /* We don't support open as a module */
525 525 if (sflag & MODOPEN)
526 526 return (ENOTSUP);
527 527
528 528 /* This driver is self-cloning, we don't support re-open. */
529 529 if (rq->q_ptr != NULL)
530 530 return (EBUSY);
531 531
532 532 if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL)
533 533 return (ENOMEM);
534 534
535 535 VERIFY((ns = netstack_find_by_cred(crp)) != NULL);
536 536 ips = ns->netstack_ipnet;
537 537
538 538 rq->q_ptr = WR(rq)->q_ptr = ipnet;
539 539 ipnet->ipnet_rq = rq;
540 540 ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space);
541 541 ipnet->ipnet_zoneid = zoneid;
542 542 ipnet->ipnet_dlstate = DL_UNBOUND;
543 543 ipnet->ipnet_ns = ns;
544 544
545 545 /*
546 546 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need
547 547 * to be processed after ipnet_if is set and the ipnet_t has been
548 548 * inserted in the ips_str_list.
549 549 */
550 550 mutex_enter(&ips->ips_event_lock);
551 551 if (getminor(*dev) == IPNET_MINOR_LO) {
552 552 ipnet->ipnet_flags |= IPNET_LOMODE;
553 553 ipnet->ipnet_acceptfn = ipnet_loaccept;
554 554 } else {
555 555 ipnet->ipnet_acceptfn = ipnet_accept;
556 556 ipnet->ipnet_if = ipnetif_getby_dev(*dev, ips);
557 557 if (ipnet->ipnet_if == NULL ||
558 558 !ipnetif_in_zone(ipnet->ipnet_if, zoneid, ips)) {
559 559 err = ENODEV;
560 560 goto done;
561 561 }
562 562 }
563 563
564 564 mutex_enter(&ips->ips_walkers_lock);
565 565 while (ips->ips_walkers_cnt != 0)
566 566 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
567 567 list_insert_head(&ips->ips_str_list, ipnet);
568 568 *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor);
569 569 qprocson(rq);
570 570
571 571 /*
572 572 * Only register our callback if we're the first open client; we call
573 573 * unregister in close() for the last open client.
574 574 */
575 575 if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list))
576 576 ips->ips_hook = ipobs_register_hook(ns, ipnet_input);
577 577 mutex_exit(&ips->ips_walkers_lock);
578 578
579 579 done:
580 580 mutex_exit(&ips->ips_event_lock);
581 581 if (err != 0) {
582 582 netstack_rele(ns);
583 583 id_free(ipnet_minor_space, ipnet->ipnet_minor);
584 584 if (ipnet->ipnet_if != NULL)
585 585 ipnetif_refrele(ipnet->ipnet_if);
586 586 kmem_free(ipnet, sizeof (*ipnet));
587 587 }
588 588 return (err);
589 589 }
590 590
591 591 /* ARGSUSED */
592 592 static int
593 593 ipnet_close(queue_t *rq, int flags __unused, cred_t *credp __unused)
594 594 {
595 595 ipnet_t *ipnet = rq->q_ptr;
596 596 ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet;
597 597
598 598 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
599 599 ipnet_leave_allmulti(ipnet->ipnet_if, ips);
600 600 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
601 601 ipnet_leave_allmulti(ipnet->ipnet_if, ips);
602 602
603 603 mutex_enter(&ips->ips_walkers_lock);
604 604 while (ips->ips_walkers_cnt != 0)
605 605 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
606 606
607 607 qprocsoff(rq);
608 608
609 609 list_remove(&ips->ips_str_list, ipnet);
610 610 if (ipnet->ipnet_if != NULL)
611 611 ipnetif_refrele(ipnet->ipnet_if);
612 612 id_free(ipnet_minor_space, ipnet->ipnet_minor);
613 613
614 614 if (list_is_empty(&ips->ips_str_list)) {
615 615 ipobs_unregister_hook(ips->ips_netstack, ips->ips_hook);
616 616 ips->ips_hook = NULL;
617 617 }
618 618
619 619 kmem_free(ipnet, sizeof (*ipnet));
620 620
621 621 mutex_exit(&ips->ips_walkers_lock);
622 622 netstack_rele(ips->ips_netstack);
623 623 return (0);
624 624 }
625 625
626 626 static int
627 627 ipnet_wput(queue_t *q, mblk_t *mp)
628 628 {
629 629 switch (mp->b_datap->db_type) {
630 630 case M_FLUSH:
631 631 if (*mp->b_rptr & FLUSHW) {
632 632 flushq(q, FLUSHDATA);
633 633 *mp->b_rptr &= ~FLUSHW;
634 634 }
635 635 if (*mp->b_rptr & FLUSHR)
636 636 qreply(q, mp);
637 637 else
638 638 freemsg(mp);
639 639 break;
640 640 case M_PROTO:
641 641 case M_PCPROTO:
642 642 ipnet_wputnondata(q, mp);
643 643 break;
644 644 case M_IOCTL:
645 645 ipnet_ioctl(q, mp);
646 646 break;
647 647 case M_IOCDATA:
648 648 ipnet_iocdata(q, mp);
649 649 break;
650 650 default:
651 651 freemsg(mp);
652 652 break;
653 653 }
654 654 return (0);
655 655 }
656 656
657 657 static int
658 658 ipnet_rsrv(queue_t *q)
659 659 {
660 660 mblk_t *mp;
661 661
662 662 while ((mp = getq(q)) != NULL) {
663 663 ASSERT(DB_TYPE(mp) == M_DATA);
664 664 if (canputnext(q)) {
665 665 putnext(q, mp);
666 666 } else {
667 667 (void) putbq(q, mp);
668 668 break;
669 669 }
670 670 }
671 671 return (0);
672 672 }
673 673
674 674 static void
675 675 ipnet_ioctl(queue_t *q, mblk_t *mp)
676 676 {
677 677 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
678 678
679 679 switch (iocp->ioc_cmd) {
680 680 case DLIOCRAW:
681 681 miocack(q, mp, 0, 0);
682 682 break;
683 683 case DLIOCIPNETINFO:
684 684 if (iocp->ioc_count == TRANSPARENT) {
685 685 mcopyin(mp, NULL, sizeof (uint_t), NULL);
686 686 qreply(q, mp);
687 687 break;
688 688 }
689 689 /* We don't support I_STR with DLIOCIPNETINFO. */
690 690 /* FALLTHROUGH */
691 691 default:
692 692 miocnak(q, mp, 0, EINVAL);
693 693 break;
694 694 }
695 695 }
696 696
697 697 static void
698 698 ipnet_iocdata(queue_t *q, mblk_t *mp)
699 699 {
700 700 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
701 701 ipnet_t *ipnet = q->q_ptr;
702 702
703 703 switch (iocp->ioc_cmd) {
704 704 case DLIOCIPNETINFO:
705 705 if (*(int *)mp->b_cont->b_rptr == 1)
706 706 ipnet->ipnet_flags |= IPNET_INFO;
707 707 else if (*(int *)mp->b_cont->b_rptr == 0)
708 708 ipnet->ipnet_flags &= ~IPNET_INFO;
709 709 else
710 710 goto iocnak;
711 711 miocack(q, mp, 0, DL_IPNETINFO_VERSION);
712 712 break;
713 713 default:
714 714 iocnak:
715 715 miocnak(q, mp, 0, EINVAL);
716 716 break;
717 717 }
718 718 }
719 719
720 720 static void
721 721 ipnet_wputnondata(queue_t *q, mblk_t *mp)
722 722 {
723 723 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
724 724 t_uscalar_t prim = dlp->dl_primitive;
725 725
726 726 switch (prim) {
727 727 case DL_INFO_REQ:
728 728 ipnet_inforeq(q, mp);
729 729 break;
730 730 case DL_UNBIND_REQ:
731 731 ipnet_unbindreq(q, mp);
732 732 break;
733 733 case DL_BIND_REQ:
734 734 ipnet_bindreq(q, mp);
735 735 break;
736 736 case DL_PROMISCON_REQ:
737 737 ipnet_dlpromisconreq(q, mp);
738 738 break;
739 739 case DL_PROMISCOFF_REQ:
740 740 ipnet_dlpromiscoffreq(q, mp);
741 741 break;
742 742 case DL_UNITDATA_REQ:
743 743 case DL_DETACH_REQ:
744 744 case DL_PHYS_ADDR_REQ:
745 745 case DL_SET_PHYS_ADDR_REQ:
746 746 case DL_ENABMULTI_REQ:
747 747 case DL_DISABMULTI_REQ:
748 748 case DL_ATTACH_REQ:
749 749 dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0);
750 750 break;
751 751 default:
752 752 dlerrorack(q, mp, prim, DL_BADPRIM, 0);
753 753 break;
754 754 }
755 755 }
756 756
757 757 static void
758 758 ipnet_inforeq(queue_t *q, mblk_t *mp)
759 759 {
760 760 dl_info_ack_t *dlip;
761 761 size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t);
762 762
763 763 if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
764 764 dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0);
765 765 return;
766 766 }
767 767
768 768 if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL)
769 769 return;
770 770
771 771 dlip = (dl_info_ack_t *)mp->b_rptr;
772 772 *dlip = ipnet_infoack;
773 773 qreply(q, mp);
774 774 }
775 775
776 776 static void
777 777 ipnet_bindreq(queue_t *q, mblk_t *mp)
778 778 {
779 779 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
780 780 ipnet_t *ipnet = q->q_ptr;
781 781
782 782 if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
783 783 dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0);
784 784 return;
785 785 }
786 786
787 787 switch (dlp->bind_req.dl_sap) {
788 788 case 0 :
789 789 ipnet->ipnet_family = AF_UNSPEC;
790 790 break;
791 791 case IPV4_VERSION :
792 792 ipnet->ipnet_family = AF_INET;
793 793 break;
794 794 case IPV6_VERSION :
795 795 ipnet->ipnet_family = AF_INET6;
796 796 break;
797 797 default :
798 798 dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0);
799 799 return;
800 800 /*NOTREACHED*/
801 801 }
802 802
803 803 ipnet->ipnet_dlstate = DL_IDLE;
804 804 dlbindack(q, mp, dlp->bind_req.dl_sap, 0, 0, 0, 0);
805 805 }
806 806
807 807 static void
808 808 ipnet_unbindreq(queue_t *q, mblk_t *mp)
809 809 {
810 810 ipnet_t *ipnet = q->q_ptr;
811 811
812 812 if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
813 813 dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
814 814 return;
815 815 }
816 816
817 817 if (ipnet->ipnet_dlstate != DL_IDLE) {
818 818 dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
819 819 } else {
820 820 ipnet->ipnet_dlstate = DL_UNBOUND;
821 821 ipnet->ipnet_family = AF_UNSPEC;
822 822 dlokack(q, mp, DL_UNBIND_REQ);
823 823 }
824 824 }
825 825
826 826 static void
827 827 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp)
828 828 {
829 829 ipnet_t *ipnet = q->q_ptr;
830 830 t_uscalar_t level;
831 831 int err;
832 832
833 833 if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
834 834 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
835 835 return;
836 836 }
837 837
838 838 if (ipnet->ipnet_flags & IPNET_LOMODE) {
839 839 dlokack(q, mp, DL_PROMISCON_REQ);
840 840 return;
841 841 }
842 842
843 843 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
844 844 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
845 845 if ((err = ipnet_join_allmulti(ipnet->ipnet_if,
846 846 ipnet->ipnet_ns->netstack_ipnet)) != 0) {
847 847 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err);
848 848 return;
849 849 }
850 850 }
851 851
852 852 switch (level) {
853 853 case DL_PROMISC_PHYS:
854 854 ipnet->ipnet_flags |= IPNET_PROMISC_PHYS;
855 855 break;
856 856 case DL_PROMISC_SAP:
857 857 ipnet->ipnet_flags |= IPNET_PROMISC_SAP;
858 858 break;
859 859 case DL_PROMISC_MULTI:
860 860 ipnet->ipnet_flags |= IPNET_PROMISC_MULTI;
861 861 break;
862 862 default:
863 863 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
864 864 return;
865 865 }
866 866
867 867 dlokack(q, mp, DL_PROMISCON_REQ);
868 868 }
869 869
870 870 static void
871 871 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp)
872 872 {
873 873 ipnet_t *ipnet = q->q_ptr;
874 874 t_uscalar_t level;
875 875 uint16_t orig_ipnet_flags = ipnet->ipnet_flags;
876 876
877 877 if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
878 878 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
879 879 return;
880 880 }
881 881
882 882 if (ipnet->ipnet_flags & IPNET_LOMODE) {
883 883 dlokack(q, mp, DL_PROMISCOFF_REQ);
884 884 return;
885 885 }
886 886
887 887 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
888 888 switch (level) {
889 889 case DL_PROMISC_PHYS:
890 890 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
891 891 ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS;
892 892 break;
893 893 case DL_PROMISC_SAP:
894 894 if (ipnet->ipnet_flags & IPNET_PROMISC_SAP)
895 895 ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP;
896 896 break;
897 897 case DL_PROMISC_MULTI:
898 898 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
899 899 ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI;
900 900 break;
901 901 default:
902 902 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
903 903 return;
904 904 }
905 905
906 906 if (orig_ipnet_flags == ipnet->ipnet_flags) {
907 907 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
908 908 return;
909 909 }
910 910
911 911 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
912 912 ipnet_leave_allmulti(ipnet->ipnet_if,
913 913 ipnet->ipnet_ns->netstack_ipnet);
914 914 }
915 915
916 916 dlokack(q, mp, DL_PROMISCOFF_REQ);
917 917 }
918 918
919 919 static int
920 920 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
921 921 {
922 922 int err = 0;
923 923 ip_stack_t *ipst = ips->ips_netstack->netstack_ip;
924 924 uint64_t index = ipnetif->if_index;
925 925
926 926 mutex_enter(&ips->ips_event_lock);
927 927 if (ipnetif->if_multicnt == 0) {
928 928 ASSERT((ipnetif->if_flags &
929 929 (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0);
930 930 if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) {
931 931 err = ip_join_allmulti(index, B_FALSE, ipst);
932 932 if (err != 0)
933 933 goto done;
934 934 ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI;
935 935 }
936 936 if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) {
937 937 err = ip_join_allmulti(index, B_TRUE, ipst);
938 938 if (err != 0 &&
939 939 (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) {
940 940 (void) ip_leave_allmulti(index, B_FALSE, ipst);
941 941 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
942 942 goto done;
943 943 }
944 944 ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI;
945 945 }
946 946 }
947 947 ipnetif->if_multicnt++;
948 948
949 949 done:
950 950 mutex_exit(&ips->ips_event_lock);
951 951 return (err);
952 952 }
953 953
954 954 static void
955 955 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
956 956 {
957 957 int err;
958 958 ip_stack_t *ipst = ips->ips_netstack->netstack_ip;
959 959 uint64_t index = ipnetif->if_index;
960 960
961 961 mutex_enter(&ips->ips_event_lock);
962 962 ASSERT(ipnetif->if_multicnt != 0);
963 963 if (--ipnetif->if_multicnt == 0) {
964 964 if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) {
965 965 err = ip_leave_allmulti(index, B_FALSE, ipst);
966 966 ASSERT(err == 0 || err == ENODEV);
967 967 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
968 968 }
969 969 if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) {
970 970 err = ip_leave_allmulti(index, B_TRUE, ipst);
971 971 ASSERT(err == 0 || err == ENODEV);
972 972 ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI;
973 973 }
974 974 }
975 975 mutex_exit(&ips->ips_event_lock);
976 976 }
977 977
978 978 /*
979 979 * Allocate a new mblk_t and put a dl_ipnetinfo_t in it.
980 980 * The structure it copies the header information from,
981 981 * hook_pkt_observe_t, is constructed using network byte
982 982 * order in ipobs_hook(), so there is no conversion here.
983 983 */
984 984 static mblk_t *
985 985 ipnet_addheader(hook_pkt_observe_t *hdr, mblk_t *mp)
986 986 {
987 987 mblk_t *dlhdr;
988 988 dl_ipnetinfo_t *dl;
989 989
990 990 if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) {
991 991 freemsg(mp);
992 992 return (NULL);
993 993 }
994 994 dl = (dl_ipnetinfo_t *)dlhdr->b_rptr;
995 995 dl->dli_version = DL_IPNETINFO_VERSION;
996 996 dl->dli_family = hdr->hpo_family;
997 997 dl->dli_htype = hdr->hpo_htype;
998 998 dl->dli_pktlen = hdr->hpo_pktlen;
999 999 dl->dli_ifindex = hdr->hpo_ifindex;
1000 1000 dl->dli_grifindex = hdr->hpo_grifindex;
1001 1001 dl->dli_zsrc = hdr->hpo_zsrc;
1002 1002 dl->dli_zdst = hdr->hpo_zdst;
1003 1003 dlhdr->b_wptr += sizeof (*dl);
1004 1004 dlhdr->b_cont = mp;
1005 1005
1006 1006 return (dlhdr);
1007 1007 }
1008 1008
1009 1009 static ipnet_addrtype_t
1010 1010 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr)
1011 1011 {
1012 1012 list_t *list;
1013 1013 ipnetif_t *ipnetif = ipnet->ipnet_if;
1014 1014 ipnetif_addr_t *ifaddr;
1015 1015 ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN;
1016 1016
1017 1017 /* First check if the address is multicast or limited broadcast. */
1018 1018 switch (addr->iap_family) {
1019 1019 case AF_INET:
1020 1020 if (CLASSD(*(addr->iap_addr4)) ||
1021 1021 *(addr->iap_addr4) == INADDR_BROADCAST)
1022 1022 return (IPNETADDR_MBCAST);
1023 1023 break;
1024 1024 case AF_INET6:
1025 1025 if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6))
1026 1026 return (IPNETADDR_MBCAST);
1027 1027 break;
1028 1028 }
1029 1029
1030 1030 /*
1031 1031 * Walk the address list to see if the address belongs to our
1032 1032 * interface or is one of our subnet broadcast addresses.
1033 1033 */
1034 1034 mutex_enter(&ipnetif->if_addr_lock);
1035 1035 list = (addr->iap_family == AF_INET) ?
1036 1036 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list;
1037 1037 for (ifaddr = list_head(list);
1038 1038 ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN;
1039 1039 ifaddr = list_next(list, ifaddr)) {
1040 1040 /*
1041 1041 * If we're not in the global zone, then only look at
1042 1042 * addresses in our zone.
1043 1043 */
1044 1044 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
1045 1045 ipnet->ipnet_zoneid != ifaddr->ifa_zone)
1046 1046 continue;
1047 1047 switch (addr->iap_family) {
1048 1048 case AF_INET:
1049 1049 if (ifaddr->ifa_ip4addr != INADDR_ANY &&
1050 1050 *(addr->iap_addr4) == ifaddr->ifa_ip4addr)
1051 1051 addrtype = IPNETADDR_MYADDR;
1052 1052 else if (ifaddr->ifa_brdaddr != INADDR_ANY &&
1053 1053 *(addr->iap_addr4) == ifaddr->ifa_brdaddr)
1054 1054 addrtype = IPNETADDR_MBCAST;
1055 1055 break;
1056 1056 case AF_INET6:
1057 1057 if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6,
1058 1058 &ifaddr->ifa_ip6addr))
1059 1059 addrtype = IPNETADDR_MYADDR;
1060 1060 break;
1061 1061 }
1062 1062 }
1063 1063 mutex_exit(&ipnetif->if_addr_lock);
1064 1064
1065 1065 return (addrtype);
1066 1066 }
1067 1067
1068 1068 /*
1069 1069 * Verify if the packet contained in hdr should be passed up to the
1070 1070 * ipnet client stream.
1071 1071 */
1072 1072 static boolean_t
1073 1073 ipnet_accept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
1074 1074 ipnet_addrp_t *dst)
1075 1075 {
1076 1076 boolean_t obsif;
1077 1077 uint64_t ifindex = ipnet->ipnet_if->if_index;
1078 1078 ipnet_addrtype_t srctype;
1079 1079 ipnet_addrtype_t dsttype;
1080 1080
1081 1081 srctype = ipnet_get_addrtype(ipnet, src);
1082 1082 dsttype = ipnet_get_addrtype(ipnet, dst);
1083 1083
1084 1084 /*
1085 1085 * If the packet's ifindex matches ours, or the packet's group ifindex
1086 1086 * matches ours, it's on the interface we're observing. (Thus,
1087 1087 * observing on the group ifindex matches all ifindexes in the group.)
1088 1088 */
1089 1089 obsif = (ntohl(hdr->hpo_ifindex) == ifindex ||
1090 1090 ntohl(hdr->hpo_grifindex) == ifindex);
1091 1091
1092 1092 DTRACE_PROBE5(ipnet_accept__addr,
1093 1093 ipnet_addrtype_t, srctype, ipnet_addrp_t *, src,
1094 1094 ipnet_addrtype_t, dsttype, ipnet_addrp_t *, dst,
1095 1095 boolean_t, obsif);
1096 1096
1097 1097 /*
1098 1098 * Do not allow an ipnet stream to see packets that are not from or to
1099 1099 * its zone. The exception is when zones are using the shared stack
1100 1100 * model. In this case, streams in the global zone have visibility
1101 1101 * into other shared-stack zones, and broadcast and multicast traffic
1102 1102 * is visible by all zones in the stack.
1103 1103 */
1104 1104 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
1105 1105 dsttype != IPNETADDR_MBCAST) {
1106 1106 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
1107 1107 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
1108 1108 return (B_FALSE);
1109 1109 }
1110 1110
1111 1111 /*
1112 1112 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the
1113 1113 * packet's IP version.
1114 1114 */
1115 1115 if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) &&
1116 1116 ipnet->ipnet_family != hdr->hpo_family)
1117 1117 return (B_FALSE);
1118 1118
1119 1119 /* If the destination address is ours, then accept the packet. */
1120 1120 if (dsttype == IPNETADDR_MYADDR)
1121 1121 return (B_TRUE);
1122 1122
1123 1123 /*
1124 1124 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are
1125 1125 * sent or received on the interface we're observing, or packets that
1126 1126 * have our source address (this allows us to see packets we send).
1127 1127 */
1128 1128 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) {
1129 1129 if (srctype == IPNETADDR_MYADDR || obsif)
1130 1130 return (B_TRUE);
1131 1131 }
1132 1132
1133 1133 /*
1134 1134 * We accept multicast and broadcast packets transmitted or received
1135 1135 * on the interface we're observing.
1136 1136 */
1137 1137 if (dsttype == IPNETADDR_MBCAST && obsif)
1138 1138 return (B_TRUE);
1139 1139
1140 1140 return (B_FALSE);
1141 1141 }
1142 1142
1143 1143 /*
1144 1144 * Verify if the packet contained in hdr should be passed up to the ipnet
1145 1145 * client stream that's in IPNET_LOMODE.
1146 1146 */
1147 1147 /* ARGSUSED */
1148 1148 static boolean_t
1149 1149 ipnet_loaccept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
1150 1150 ipnet_addrp_t *dst)
1151 1151 {
1152 1152 if (hdr->hpo_htype != htons(IPOBS_HOOK_LOCAL)) {
1153 1153 /*
1154 1154 * ipnet_if is only NULL for IPNET_MINOR_LO devices.
1155 1155 */
1156 1156 if (ipnet->ipnet_if == NULL)
1157 1157 return (B_FALSE);
1158 1158 }
1159 1159
1160 1160 /*
1161 1161 * An ipnet stream must not see packets that are not from/to its zone.
1162 1162 */
1163 1163 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) {
1164 1164 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
1165 1165 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
1166 1166 return (B_FALSE);
1167 1167 }
1168 1168
1169 1169 return (ipnet->ipnet_family == AF_UNSPEC ||
1170 1170 ipnet->ipnet_family == hdr->hpo_family);
1171 1171 }
1172 1172
1173 1173 static void
1174 1174 ipnet_dispatch(void *arg)
1175 1175 {
1176 1176 mblk_t *mp = arg;
1177 1177 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr;
1178 1178 ipnet_t *ipnet;
1179 1179 mblk_t *netmp;
1180 1180 list_t *list;
1181 1181 ipnet_stack_t *ips;
1182 1182 ipnet_addrp_t src;
1183 1183 ipnet_addrp_t dst;
1184 1184
1185 1185 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
1186 1186
1187 1187 netmp = hdr->hpo_pkt->b_cont;
1188 1188 src.iap_family = hdr->hpo_family;
1189 1189 dst.iap_family = hdr->hpo_family;
1190 1190
1191 1191 if (hdr->hpo_family == AF_INET) {
1192 1192 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
1193 1193 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
1194 1194 } else {
1195 1195 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
1196 1196 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
1197 1197 }
1198 1198
1199 1199 ipnet_walkers_inc(ips);
1200 1200
1201 1201 list = &ips->ips_str_list;
1202 1202 for (ipnet = list_head(list); ipnet != NULL;
1203 1203 ipnet = list_next(list, ipnet)) {
1204 1204 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
1205 1205 IPSK_BUMP(ips, ik_acceptFail);
1206 1206 continue;
1207 1207 }
1208 1208 IPSK_BUMP(ips, ik_acceptOk);
1209 1209
1210 1210 if (list_next(list, ipnet) == NULL) {
1211 1211 netmp = hdr->hpo_pkt->b_cont;
1212 1212 hdr->hpo_pkt->b_cont = NULL;
1213 1213 } else {
1214 1214 if ((netmp = dupmsg(hdr->hpo_pkt->b_cont)) == NULL &&
1215 1215 (netmp = copymsg(hdr->hpo_pkt->b_cont)) == NULL) {
1216 1216 IPSK_BUMP(ips, ik_duplicationFail);
1217 1217 continue;
1218 1218 }
1219 1219 }
1220 1220
1221 1221 if (ipnet->ipnet_flags & IPNET_INFO) {
1222 1222 if ((netmp = ipnet_addheader(hdr, netmp)) == NULL) {
1223 1223 IPSK_BUMP(ips, ik_dispatchHeaderDrop);
1224 1224 continue;
1225 1225 }
1226 1226 }
1227 1227
1228 1228 if (ipnet->ipnet_rq->q_first == NULL &&
1229 1229 canputnext(ipnet->ipnet_rq)) {
1230 1230 putnext(ipnet->ipnet_rq, netmp);
1231 1231 IPSK_BUMP(ips, ik_dispatchDeliver);
1232 1232 } else if (canput(ipnet->ipnet_rq)) {
1233 1233 (void) putq(ipnet->ipnet_rq, netmp);
1234 1234 IPSK_BUMP(ips, ik_dispatchDeliver);
1235 1235 } else {
1236 1236 freemsg(netmp);
1237 1237 IPSK_BUMP(ips, ik_dispatchPutDrop);
1238 1238 }
1239 1239 }
1240 1240
1241 1241 ipnet_walkers_dec(ips);
1242 1242
1243 1243 freemsg(mp);
1244 1244 }
1245 1245
1246 1246 static void
1247 1247 ipnet_input(mblk_t *mp)
1248 1248 {
1249 1249 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr;
1250 1250 ipnet_stack_t *ips;
1251 1251
1252 1252 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
1253 1253
1254 1254 if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) !=
1255 1255 DDI_SUCCESS) {
1256 1256 IPSK_BUMP(ips, ik_dispatchFail);
1257 1257 freemsg(mp);
1258 1258 } else {
1259 1259 IPSK_BUMP(ips, ik_dispatchOk);
1260 1260 }
1261 1261 }
1262 1262
1263 1263 static ipnetif_t *
1264 1264 ipnet_alloc_if(ipnet_stack_t *ips)
1265 1265 {
1266 1266 ipnetif_t *ipnetif;
1267 1267
1268 1268 if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL)
1269 1269 return (NULL);
1270 1270
1271 1271 mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0);
1272 1272 list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t),
1273 1273 offsetof(ipnetif_addr_t, ifa_link));
1274 1274 list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t),
1275 1275 offsetof(ipnetif_addr_t, ifa_link));
1276 1276 mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0);
1277 1277
1278 1278 ipnetif->if_stackp = ips;
1279 1279
1280 1280 return (ipnetif);
1281 1281 }
1282 1282
1283 1283 /*
1284 1284 * Create a new ipnetif_t and new minor node for it. If creation is
1285 1285 * successful the new ipnetif_t is inserted into an avl_tree
1286 1286 * containing ipnetif's for this stack instance.
1287 1287 */
1288 1288 static ipnetif_t *
1289 1289 ipnetif_create(const char *name, uint64_t index, ipnet_stack_t *ips,
1290 1290 uint64_t ifflags)
1291 1291 {
1292 1292 ipnetif_t *ipnetif;
1293 1293 avl_index_t where = 0;
1294 1294 minor_t ifminor;
1295 1295
1296 1296 /*
1297 1297 * Because ipnetif_create() can be called from a NIC event
1298 1298 * callback, it should not block.
1299 1299 */
1300 1300 ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space);
1301 1301 if (ifminor == (minor_t)-1)
1302 1302 return (NULL);
1303 1303 if ((ipnetif = ipnet_alloc_if(ips)) == NULL) {
1304 1304 id_free(ipnet_minor_space, ifminor);
1305 1305 return (NULL);
1306 1306 }
1307 1307
1308 1308 (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ);
1309 1309 ipnetif->if_index = (uint_t)index;
1310 1310 ipnetif->if_zoneid = netstack_get_zoneid(ips->ips_netstack);
1311 1311 ipnetif->if_dev = makedevice(ipnet_major, ifminor);
1312 1312
1313 1313 ipnetif->if_refcnt = 1;
1314 1314 if ((ifflags & IFF_LOOPBACK) != 0)
1315 1315 ipnetif->if_flags = IPNETIF_LOOPBACK;
1316 1316
1317 1317 mutex_enter(&ips->ips_avl_lock);
1318 1318 VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL);
1319 1319 avl_insert(&ips->ips_avl_by_index, ipnetif, where);
1320 1320 VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL);
1321 1321 avl_insert(&ips->ips_avl_by_name, ipnetif, where);
1322 1322 mutex_exit(&ips->ips_avl_lock);
1323 1323
1324 1324 return (ipnetif);
1325 1325 }
1326 1326
1327 1327 static void
1328 1328 ipnetif_remove(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1329 1329 {
1330 1330 ipnet_t *ipnet;
1331 1331
1332 1332 ipnet_walkers_inc(ips);
1333 1333 /* Send a SIGHUP to all open streams associated with this ipnetif. */
1334 1334 for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL;
1335 1335 ipnet = list_next(&ips->ips_str_list, ipnet)) {
1336 1336 if (ipnet->ipnet_if == ipnetif)
1337 1337 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1338 1338 }
1339 1339 ipnet_walkers_dec(ips);
1340 1340 mutex_enter(&ips->ips_avl_lock);
1341 1341 avl_remove(&ips->ips_avl_by_index, ipnetif);
1342 1342 avl_remove(&ips->ips_avl_by_name, ipnetif);
1343 1343 mutex_exit(&ips->ips_avl_lock);
1344 1344 /*
1345 1345 * Release the reference we implicitly held in ipnetif_create().
1346 1346 */
1347 1347 ipnetif_refrele(ipnetif);
1348 1348 }
1349 1349
1350 1350 static void
1351 1351 ipnet_purge_addrlist(list_t *addrlist)
1352 1352 {
1353 1353 ipnetif_addr_t *ifa;
1354 1354
1355 1355 while ((ifa = list_head(addrlist)) != NULL) {
1356 1356 list_remove(addrlist, ifa);
1357 1357 if (ifa->ifa_shared != NULL)
1358 1358 ipnetif_clone_release(ifa->ifa_shared);
1359 1359 kmem_free(ifa, sizeof (*ifa));
1360 1360 }
1361 1361 }
1362 1362
1363 1363 static void
1364 1364 ipnetif_free(ipnetif_t *ipnetif)
1365 1365 {
1366 1366 ASSERT(ipnetif->if_refcnt == 0);
1367 1367 ASSERT(ipnetif->if_sharecnt == 0);
1368 1368
1369 1369 /* Remove IPv4/v6 address lists from the ipnetif */
1370 1370 ipnet_purge_addrlist(&ipnetif->if_ip4addr_list);
1371 1371 list_destroy(&ipnetif->if_ip4addr_list);
1372 1372 ipnet_purge_addrlist(&ipnetif->if_ip6addr_list);
1373 1373 list_destroy(&ipnetif->if_ip6addr_list);
1374 1374 mutex_destroy(&ipnetif->if_addr_lock);
1375 1375 mutex_destroy(&ipnetif->if_reflock);
1376 1376 if (ipnetif->if_dev != 0)
1377 1377 id_free(ipnet_minor_space, getminor(ipnetif->if_dev));
1378 1378 kmem_free(ipnetif, sizeof (*ipnetif));
1379 1379 }
1380 1380
1381 1381 /*
1382 1382 * Create an ipnetif_addr_t with the given logical interface id (lif)
1383 1383 * and add it to the supplied ipnetif. The lif is the netinfo
1384 1384 * representation of logical interface id, and we use this id to match
1385 1385 * incoming netinfo events against our lists of addresses.
1386 1386 */
1387 1387 static void
1388 1388 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd)
1389 1389 {
1390 1390 ipnetif_addr_t *ifaddr;
1391 1391 zoneid_t zoneid;
1392 1392 struct sockaddr_in bcast;
1393 1393 struct sockaddr_storage addr;
1394 1394 net_ifaddr_t type = NA_ADDRESS;
1395 1395 uint64_t phyif = ipnetif->if_index;
1396 1396
1397 1397 if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 ||
1398 1398 net_getlifzone(nd, phyif, lif, &zoneid) != 0)
1399 1399 return;
1400 1400
1401 1401 if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL)
1402 1402 return;
1403 1403 ifaddr->ifa_zone = zoneid;
1404 1404 ifaddr->ifa_id = lif;
1405 1405 ifaddr->ifa_shared = NULL;
1406 1406
1407 1407 switch (addr.ss_family) {
1408 1408 case AF_INET:
1409 1409 ifaddr->ifa_ip4addr =
1410 1410 ((struct sockaddr_in *)&addr)->sin_addr.s_addr;
1411 1411 /*
1412 1412 * Try and get the broadcast address. Note that it's okay for
1413 1413 * an interface to not have a broadcast address, so we don't
1414 1414 * fail the entire operation if net_getlifaddr() fails here.
1415 1415 */
1416 1416 type = NA_BROADCAST;
1417 1417 if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0)
1418 1418 ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr;
1419 1419 break;
1420 1420 case AF_INET6:
1421 1421 ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr;
1422 1422 break;
1423 1423 }
1424 1424
1425 1425 /*
1426 1426 * The zoneid stored in ipnetif_t needs to correspond to the actual
1427 1427 * zone the address is being used in. This facilitates finding the
1428 1428 * correct netstack_t pointer, amongst other things, later.
1429 1429 */
1430 1430 if (zoneid == ALL_ZONES)
1431 1431 zoneid = GLOBAL_ZONEID;
1432 1432
1433 1433 mutex_enter(&ipnetif->if_addr_lock);
1434 1434 if (zoneid != ipnetif->if_zoneid) {
1435 1435 ipnetif_t *ifp2;
1436 1436
1437 1437 ifp2 = ipnetif_clone_create(ipnetif, zoneid);
1438 1438 ifaddr->ifa_shared = ifp2;
1439 1439 }
1440 1440 list_insert_tail(addr.ss_family == AF_INET ?
1441 1441 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr);
1442 1442 mutex_exit(&ipnetif->if_addr_lock);
1443 1443 }
1444 1444
1445 1445 static void
1446 1446 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6)
1447 1447 {
1448 1448 mutex_enter(&ipnetif->if_addr_lock);
1449 1449 if (ifaddr->ifa_shared != NULL)
1450 1450 ipnetif_clone_release(ifaddr->ifa_shared);
1451 1451
1452 1452 list_remove(isv6 ?
1453 1453 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr);
1454 1454 mutex_exit(&ipnetif->if_addr_lock);
1455 1455 kmem_free(ifaddr, sizeof (*ifaddr));
1456 1456 }
1457 1457
1458 1458 static void
1459 1459 ipnet_plumb_ev(ipnet_nicevent_t *ipne, ipnet_stack_t *ips, boolean_t isv6)
1460 1460 {
1461 1461 ipnetif_t *ipnetif;
1462 1462 boolean_t refrele_needed = B_TRUE;
1463 1463 uint64_t ifflags;
1464 1464 uint64_t ifindex;
1465 1465 char *ifname;
1466 1466
1467 1467 ifflags = 0;
1468 1468 ifname = ipne->ipne_ifname;
1469 1469 ifindex = ipne->ipne_ifindex;
1470 1470
1471 1471 (void) net_getlifflags(ipne->ipne_protocol, ifindex, 0, &ifflags);
1472 1472
1473 1473 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) {
1474 1474 ipnetif = ipnetif_create(ifname, ifindex, ips, ifflags);
1475 1475 refrele_needed = B_FALSE;
1476 1476 }
1477 1477 if (ipnetif != NULL) {
1478 1478 ipnetif->if_flags |=
1479 1479 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
1480 1480 }
1481 1481
1482 1482 if (ipnetif->if_multicnt != 0) {
1483 1483 if (ip_join_allmulti(ifindex, isv6,
1484 1484 ips->ips_netstack->netstack_ip) == 0) {
1485 1485 ipnetif->if_flags |=
1486 1486 isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI;
1487 1487 }
1488 1488 }
1489 1489
1490 1490 if (refrele_needed)
1491 1491 ipnetif_refrele(ipnetif);
1492 1492 }
1493 1493
1494 1494 static void
1495 1495 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6)
1496 1496 {
1497 1497 ipnetif_t *ipnetif;
1498 1498
1499 1499 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1500 1500 return;
1501 1501
1502 1502 mutex_enter(&ipnetif->if_addr_lock);
1503 1503 ipnet_purge_addrlist(isv6 ?
1504 1504 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list);
1505 1505 mutex_exit(&ipnetif->if_addr_lock);
1506 1506
1507 1507 /*
1508 1508 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive
1509 1509 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif
1510 1510 * if both IPv4 and IPv6 interfaces have been unplumbed.
1511 1511 */
1512 1512 ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED;
1513 1513 if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED)))
1514 1514 ipnetif_remove(ipnetif, ips);
1515 1515 ipnetif_refrele(ipnetif);
1516 1516 }
1517 1517
1518 1518 static void
1519 1519 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd,
1520 1520 ipnet_stack_t *ips, boolean_t isv6)
1521 1521 {
1522 1522 ipnetif_t *ipnetif;
1523 1523 ipnetif_addr_t *ifaddr;
1524 1524
1525 1525 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1526 1526 return;
1527 1527 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) {
1528 1528 /*
1529 1529 * We must have missed a NE_LIF_DOWN event. Delete this
1530 1530 * ifaddr and re-create it.
1531 1531 */
1532 1532 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1533 1533 }
1534 1534
1535 1535 ipnet_add_ifaddr(lifindex, ipnetif, nd);
1536 1536 ipnetif_refrele(ipnetif);
1537 1537 }
1538 1538
1539 1539 static void
1540 1540 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips,
1541 1541 boolean_t isv6)
1542 1542 {
1543 1543 ipnetif_t *ipnetif;
1544 1544 ipnetif_addr_t *ifaddr;
1545 1545
1546 1546 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1547 1547 return;
1548 1548 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL)
1549 1549 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1550 1550 ipnetif_refrele(ipnetif);
1551 1551 /*
1552 1552 * Make sure that open streams on this ipnetif are still allowed to
1553 1553 * have it open.
1554 1554 */
1555 1555 ipnetif_zonecheck(ipnetif, ips);
1556 1556 }
1557 1557
1558 1558 /*
1559 1559 * This callback from the NIC event framework dispatches a taskq as the event
1560 1560 * handlers may block.
1561 1561 */
1562 1562 /* ARGSUSED */
1563 1563 static int
1564 1564 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg)
1565 1565 {
1566 1566 ipnet_stack_t *ips = arg;
1567 1567 hook_nic_event_t *hn = (hook_nic_event_t *)info;
1568 1568 ipnet_nicevent_t *ipne;
1569 1569
1570 1570 if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL)
1571 1571 return (0);
1572 1572 ipne->ipne_event = hn->hne_event;
1573 1573 ipne->ipne_protocol = hn->hne_protocol;
1574 1574 ipne->ipne_stackid = ips->ips_netstack->netstack_stackid;
1575 1575 ipne->ipne_ifindex = hn->hne_nic;
1576 1576 ipne->ipne_lifindex = hn->hne_lif;
1577 1577 if (hn->hne_datalen != 0) {
1578 1578 (void) strlcpy(ipne->ipne_ifname, hn->hne_data,
1579 1579 sizeof (ipne->ipne_ifname));
1580 1580 }
1581 1581 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task,
1582 1582 ipne, DDI_NOSLEEP);
1583 1583 return (0);
1584 1584 }
1585 1585
1586 1586 static void
1587 1587 ipnet_nicevent_task(void *arg)
1588 1588 {
1589 1589 ipnet_nicevent_t *ipne = arg;
1590 1590 netstack_t *ns;
1591 1591 ipnet_stack_t *ips;
1592 1592 boolean_t isv6;
1593 1593
1594 1594 if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL)
1595 1595 goto done;
1596 1596 ips = ns->netstack_ipnet;
1597 1597 isv6 = (ipne->ipne_protocol == ips->ips_ndv6);
1598 1598
1599 1599 mutex_enter(&ips->ips_event_lock);
1600 1600 switch (ipne->ipne_event) {
1601 1601 case NE_PLUMB:
1602 1602 ipnet_plumb_ev(ipne, ips, isv6);
1603 1603 break;
1604 1604 case NE_UNPLUMB:
1605 1605 ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6);
1606 1606 break;
1607 1607 case NE_LIF_UP:
1608 1608 ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex,
1609 1609 ipne->ipne_protocol, ips, isv6);
1610 1610 break;
1611 1611 case NE_LIF_DOWN:
1612 1612 ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips,
1613 1613 isv6);
1614 1614 break;
1615 1615 default:
1616 1616 break;
1617 1617 }
1618 1618 mutex_exit(&ips->ips_event_lock);
1619 1619 done:
1620 1620 if (ns != NULL)
1621 1621 netstack_rele(ns);
1622 1622 kmem_free(ipne, sizeof (ipnet_nicevent_t));
1623 1623 }
1624 1624
1625 1625 dev_t
1626 1626 ipnet_if_getdev(char *name, zoneid_t zoneid)
1627 1627 {
1628 1628 netstack_t *ns;
1629 1629 ipnet_stack_t *ips;
1630 1630 ipnetif_t *ipnetif;
1631 1631 dev_t dev = (dev_t)-1;
1632 1632
1633 1633 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1634 1634 return (dev);
1635 1635 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1636 1636 return (dev);
1637 1637
1638 1638 ips = ns->netstack_ipnet;
1639 1639 mutex_enter(&ips->ips_avl_lock);
1640 1640 if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) {
1641 1641 if (ipnetif_in_zone(ipnetif, zoneid, ips))
1642 1642 dev = ipnetif->if_dev;
1643 1643 }
1644 1644 mutex_exit(&ips->ips_avl_lock);
1645 1645 netstack_rele(ns);
1646 1646
1647 1647 return (dev);
1648 1648 }
1649 1649
1650 1650 static ipnetif_t *
1651 1651 ipnetif_getby_index(uint64_t id, ipnet_stack_t *ips)
1652 1652 {
1653 1653 ipnetif_t *ipnetif;
1654 1654
1655 1655 mutex_enter(&ips->ips_avl_lock);
1656 1656 if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL)
1657 1657 ipnetif_refhold(ipnetif);
1658 1658 mutex_exit(&ips->ips_avl_lock);
1659 1659 return (ipnetif);
1660 1660 }
1661 1661
1662 1662 static ipnetif_t *
1663 1663 ipnetif_getby_dev(dev_t dev, ipnet_stack_t *ips)
1664 1664 {
1665 1665 ipnetif_t *ipnetif;
1666 1666 avl_tree_t *tree;
1667 1667
1668 1668 mutex_enter(&ips->ips_avl_lock);
1669 1669 tree = &ips->ips_avl_by_index;
1670 1670 for (ipnetif = avl_first(tree); ipnetif != NULL;
1671 1671 ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) {
1672 1672 if (ipnetif->if_dev == dev) {
1673 1673 ipnetif_refhold(ipnetif);
1674 1674 break;
1675 1675 }
1676 1676 }
1677 1677 mutex_exit(&ips->ips_avl_lock);
1678 1678 return (ipnetif);
1679 1679 }
1680 1680
1681 1681 static ipnetif_addr_t *
1682 1682 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6)
1683 1683 {
1684 1684 ipnetif_addr_t *ifaddr;
1685 1685 list_t *list;
1686 1686
1687 1687 mutex_enter(&ipnetif->if_addr_lock);
1688 1688 list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list;
1689 1689 for (ifaddr = list_head(list); ifaddr != NULL;
1690 1690 ifaddr = list_next(list, ifaddr)) {
1691 1691 if (lid == ifaddr->ifa_id)
1692 1692 break;
1693 1693 }
1694 1694 mutex_exit(&ipnetif->if_addr_lock);
1695 1695 return (ifaddr);
1696 1696 }
1697 1697
1698 1698 /* ARGSUSED */
1699 1699 static void *
1700 1700 ipnet_stack_init(netstackid_t stackid, netstack_t *ns)
1701 1701 {
1702 1702 ipnet_stack_t *ips;
1703 1703
1704 1704 ips = kmem_zalloc(sizeof (*ips), KM_SLEEP);
1705 1705 ips->ips_netstack = ns;
1706 1706 mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0);
1707 1707 avl_create(&ips->ips_avl_by_index, ipnetif_compare_index,
1708 1708 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index));
1709 1709 avl_create(&ips->ips_avl_by_name, ipnetif_compare_name,
1710 1710 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name));
1711 1711 avl_create(&ips->ips_avl_by_shared, ipnetif_compare_name_zone,
1712 1712 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_shared));
1713 1713 mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL);
1714 1714 cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL);
1715 1715 list_create(&ips->ips_str_list, sizeof (ipnet_t),
1716 1716 offsetof(ipnet_t, ipnet_next));
1717 1717 ipnet_register_netihook(ips);
1718 1718 return (ips);
1719 1719 }
1720 1720
1721 1721 /* ARGSUSED */
1722 1722 static void
1723 1723 ipnet_stack_fini(netstackid_t stackid, void *arg)
1724 1724 {
1725 1725 ipnet_stack_t *ips = arg;
1726 1726 ipnetif_t *ipnetif, *nipnetif;
1727 1727
1728 1728 if (ips->ips_kstatp != NULL) {
1729 1729 zoneid_t zoneid;
1730 1730
1731 1731 zoneid = netstackid_to_zoneid(stackid);
1732 1732 net_kstat_delete(net_zoneidtonetid(zoneid), ips->ips_kstatp);
1733 1733 }
1734 1734 if (ips->ips_ndv4 != NULL) {
1735 1735 VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS,
1736 1736 ips->ips_nicevents) == 0);
1737 1737 VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
1738 1738 }
1739 1739 if (ips->ips_ndv6 != NULL) {
1740 1740 VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS,
1741 1741 ips->ips_nicevents) == 0);
1742 1742 VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
1743 1743 }
1744 1744 hook_free(ips->ips_nicevents);
1745 1745
1746 1746 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1747 1747 ipnetif = nipnetif) {
1748 1748 nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif);
1749 1749 ipnetif_remove(ipnetif, ips);
1750 1750 }
1751 1751 avl_destroy(&ips->ips_avl_by_shared);
1752 1752 avl_destroy(&ips->ips_avl_by_index);
1753 1753 avl_destroy(&ips->ips_avl_by_name);
1754 1754 mutex_destroy(&ips->ips_avl_lock);
1755 1755 mutex_destroy(&ips->ips_walkers_lock);
1756 1756 cv_destroy(&ips->ips_walkers_cv);
1757 1757 list_destroy(&ips->ips_str_list);
1758 1758 kmem_free(ips, sizeof (*ips));
1759 1759 }
1760 1760
1761 1761 /* Do any of the addresses in addrlist belong the supplied zoneid? */
1762 1762 static boolean_t
1763 1763 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid)
1764 1764 {
1765 1765 ipnetif_addr_t *ifa;
1766 1766
1767 1767 for (ifa = list_head(addrlist); ifa != NULL;
1768 1768 ifa = list_next(addrlist, ifa)) {
1769 1769 if (ifa->ifa_zone == zoneid)
1770 1770 return (B_TRUE);
1771 1771 }
1772 1772 return (B_FALSE);
1773 1773 }
1774 1774
1775 1775 /* Should the supplied ipnetif be visible from the supplied zoneid? */
1776 1776 static boolean_t
1777 1777 ipnetif_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
1778 1778 {
1779 1779 int ret;
1780 1780
1781 1781 /*
1782 1782 * The global zone has visibility into all interfaces in the global
1783 1783 * stack, and exclusive stack zones have visibility into all
1784 1784 * interfaces in their stack.
1785 1785 */
1786 1786 if (zoneid == GLOBAL_ZONEID ||
1787 1787 ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
1788 1788 return (B_TRUE);
1789 1789
1790 1790 /*
1791 1791 * Shared-stack zones only have visibility for interfaces that have
1792 1792 * addresses in their zone.
1793 1793 */
1794 1794 mutex_enter(&ipnetif->if_addr_lock);
1795 1795 ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) ||
1796 1796 ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid);
1797 1797 mutex_exit(&ipnetif->if_addr_lock);
1798 1798 return (ret);
1799 1799 }
1800 1800
1801 1801 /*
1802 1802 * Verify that any ipnet_t that has a reference to the supplied ipnetif should
1803 1803 * still be allowed to have it open. A given ipnet_t may no longer be allowed
1804 1804 * to have an ipnetif open if there are no longer any addresses that belong to
1805 1805 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the
1806 1806 * case, send the ipnet_t an M_HANGUP.
1807 1807 */
1808 1808 static void
1809 1809 ipnetif_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1810 1810 {
1811 1811 list_t *strlist = &ips->ips_str_list;
1812 1812 ipnet_t *ipnet;
1813 1813
1814 1814 ipnet_walkers_inc(ips);
1815 1815 for (ipnet = list_head(strlist); ipnet != NULL;
1816 1816 ipnet = list_next(strlist, ipnet)) {
1817 1817 if (ipnet->ipnet_if != ipnetif)
1818 1818 continue;
1819 1819 if (!ipnetif_in_zone(ipnetif, ipnet->ipnet_zoneid, ips))
1820 1820 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1821 1821 }
1822 1822 ipnet_walkers_dec(ips);
1823 1823 }
1824 1824
1825 1825 void
1826 1826 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid)
1827 1827 {
1828 1828 ipnetif_t *ipnetif;
1829 1829 list_t cbdata;
1830 1830 ipnetif_cbdata_t *cbnode;
1831 1831 netstack_t *ns;
1832 1832 ipnet_stack_t *ips;
1833 1833
1834 1834 /*
1835 1835 * On labeled systems, non-global zones shouldn't see anything
1836 1836 * in /dev/ipnet.
1837 1837 */
1838 1838 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1839 1839 return;
1840 1840
1841 1841 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1842 1842 return;
1843 1843
1844 1844 ips = ns->netstack_ipnet;
1845 1845 list_create(&cbdata, sizeof (ipnetif_cbdata_t),
1846 1846 offsetof(ipnetif_cbdata_t, ic_next));
1847 1847
1848 1848 mutex_enter(&ips->ips_avl_lock);
1849 1849 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1850 1850 ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) {
1851 1851 if (!ipnetif_in_zone(ipnetif, zoneid, ips))
1852 1852 continue;
1853 1853 cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP);
1854 1854 (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ);
1855 1855 cbnode->ic_dev = ipnetif->if_dev;
1856 1856 list_insert_head(&cbdata, cbnode);
1857 1857 }
1858 1858 mutex_exit(&ips->ips_avl_lock);
1859 1859
1860 1860 while ((cbnode = list_head(&cbdata)) != NULL) {
1861 1861 cb(cbnode->ic_ifname, arg, cbnode->ic_dev);
1862 1862 list_remove(&cbdata, cbnode);
1863 1863 kmem_free(cbnode, sizeof (ipnetif_cbdata_t));
1864 1864 }
1865 1865 list_destroy(&cbdata);
1866 1866 netstack_rele(ns);
1867 1867 }
1868 1868
1869 1869 static int
1870 1870 ipnetif_compare_index(const void *index_ptr, const void *ipnetifp)
1871 1871 {
1872 1872 int64_t index1 = *((int64_t *)index_ptr);
1873 1873 int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index;
1874 1874
1875 1875 return (SIGNOF(index2 - index1));
1876 1876 }
1877 1877
1878 1878 static int
1879 1879 ipnetif_compare_name(const void *name_ptr, const void *ipnetifp)
1880 1880 {
1881 1881 int res;
1882 1882
1883 1883 res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr);
1884 1884 return (SIGNOF(res));
1885 1885 }
1886 1886
1887 1887 static int
1888 1888 ipnetif_compare_name_zone(const void *key_ptr, const void *ipnetifp)
1889 1889 {
1890 1890 const uintptr_t *ptr = key_ptr;
1891 1891 const ipnetif_t *ifp;
1892 1892 int res;
1893 1893
1894 1894 ifp = ipnetifp;
1895 1895 res = ifp->if_zoneid - ptr[0];
1896 1896 if (res != 0)
1897 1897 return (SIGNOF(res));
1898 1898 res = strcmp(ifp->if_name, (char *)ptr[1]);
1899 1899 return (SIGNOF(res));
1900 1900 }
1901 1901
1902 1902 static void
1903 1903 ipnetif_refhold(ipnetif_t *ipnetif)
1904 1904 {
1905 1905 mutex_enter(&ipnetif->if_reflock);
1906 1906 ipnetif->if_refcnt++;
1907 1907 mutex_exit(&ipnetif->if_reflock);
1908 1908 }
1909 1909
1910 1910 static void
1911 1911 ipnetif_refrele(ipnetif_t *ipnetif)
1912 1912 {
1913 1913 mutex_enter(&ipnetif->if_reflock);
1914 1914 ASSERT(ipnetif->if_refcnt > 0);
1915 1915 if (--ipnetif->if_refcnt == 0)
1916 1916 ipnetif_free(ipnetif);
1917 1917 else
1918 1918 mutex_exit(&ipnetif->if_reflock);
1919 1919 }
1920 1920
1921 1921 static void
1922 1922 ipnet_walkers_inc(ipnet_stack_t *ips)
1923 1923 {
1924 1924 mutex_enter(&ips->ips_walkers_lock);
1925 1925 ips->ips_walkers_cnt++;
1926 1926 mutex_exit(&ips->ips_walkers_lock);
1927 1927 }
1928 1928
1929 1929 static void
1930 1930 ipnet_walkers_dec(ipnet_stack_t *ips)
1931 1931 {
1932 1932 mutex_enter(&ips->ips_walkers_lock);
1933 1933 ASSERT(ips->ips_walkers_cnt != 0);
1934 1934 if (--ips->ips_walkers_cnt == 0)
1935 1935 cv_broadcast(&ips->ips_walkers_cv);
1936 1936 mutex_exit(&ips->ips_walkers_lock);
1937 1937 }
1938 1938
1939 1939 /*ARGSUSED*/
1940 1940 static int
1941 1941 ipobs_bounce_func(hook_event_token_t token, hook_data_t info, void *arg)
1942 1942 {
1943 1943 hook_pkt_observe_t *hdr;
1944 1944 pfv_t func = (pfv_t)arg;
1945 1945 mblk_t *mp;
1946 1946
1947 1947 hdr = (hook_pkt_observe_t *)info;
1948 1948 /*
1949 1949 * Code in ip_input() expects that it is the only one accessing the
1950 1950 * packet.
1951 1951 */
1952 1952 mp = copymsg(hdr->hpo_pkt);
1953 1953 if (mp == NULL) {
1954 1954 netstack_t *ns = hdr->hpo_ctx;
1955 1955 ipnet_stack_t *ips = ns->netstack_ipnet;
1956 1956
1957 1957 IPSK_BUMP(ips, ik_dispatchDupDrop);
1958 1958 return (0);
1959 1959 }
1960 1960
1961 1961 hdr = (hook_pkt_observe_t *)mp->b_rptr;
1962 1962 hdr->hpo_pkt = mp;
1963 1963
1964 1964 func(mp);
1965 1965
1966 1966 return (0);
1967 1967 }
1968 1968
1969 1969 hook_t *
1970 1970 ipobs_register_hook(netstack_t *ns, pfv_t func)
1971 1971 {
1972 1972 ip_stack_t *ipst = ns->netstack_ip;
1973 1973 char name[32];
1974 1974 hook_t *hook;
1975 1975
1976 1976 HOOK_INIT(hook, ipobs_bounce_func, "", (void *)func);
1977 1977 VERIFY(hook != NULL);
1978 1978
1979 1979 /*
1980 1980 * To register multiple hooks with the same callback function,
1981 1981 * a unique name is needed.
1982 1982 */
1983 1983 (void) snprintf(name, sizeof (name), "ipobserve_%p", (void *)hook);
1984 1984 hook->h_name = strdup(name);
1985 1985
1986 1986 (void) net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
1987 1987 (void) net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
1988 1988
1989 1989 return (hook);
1990 1990 }
1991 1991
1992 1992 void
1993 1993 ipobs_unregister_hook(netstack_t *ns, hook_t *hook)
1994 1994 {
1995 1995 ip_stack_t *ipst = ns->netstack_ip;
1996 1996
1997 1997 (void) net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
1998 1998
1999 1999 (void) net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
2000 2000
2001 2001 strfree(hook->h_name);
2002 2002
2003 2003 hook_free(hook);
2004 2004 }
2005 2005
2006 2006 /* ******************************************************************** */
2007 2007 /* BPF Functions below */
2008 2008 /* ******************************************************************** */
2009 2009
2010 2010 /*
2011 2011 * Convenience function to make mapping a zoneid to an ipnet_stack_t easy.
2012 2012 */
2013 2013 ipnet_stack_t *
2014 2014 ipnet_find_by_zoneid(zoneid_t zoneid)
2015 2015 {
2016 2016 netstack_t *ns;
2017 2017
2018 2018 VERIFY((ns = netstack_find_by_zoneid(zoneid)) != NULL);
2019 2019 return (ns->netstack_ipnet);
2020 2020 }
2021 2021
2022 2022 /*
2023 2023 * Functions, such as the above ipnet_find_by_zoneid(), will return a
2024 2024 * pointer to ipnet_stack_t by calling a netstack lookup function.
2025 2025 * The netstack_find_*() functions return a pointer after doing a "hold"
2026 2026 * on the data structure and thereby require a "release" when the caller
2027 2027 * is finished with it. We need to mirror that API here and thus a caller
2028 2028 * of ipnet_find_by_zoneid() is required to call ipnet_rele().
2029 2029 */
2030 2030 void
2031 2031 ipnet_rele(ipnet_stack_t *ips)
2032 2032 {
2033 2033 netstack_rele(ips->ips_netstack);
2034 2034 }
2035 2035
2036 2036 /*
2037 2037 */
2038 2038 void
2039 2039 ipnet_set_itap(bpf_itap_fn_t tapfunc)
2040 2040 {
2041 2041 ipnet_itap = tapfunc;
2042 2042 }
2043 2043
2044 2044 /*
2045 2045 * The list of interfaces available via ipnet is private for each zone,
2046 2046 * so the AVL tree of each zone must be searched for a given name, even
2047 2047 * if all names are unique.
2048 2048 */
2049 2049 int
2050 2050 ipnet_open_byname(const char *name, ipnetif_t **ptr, zoneid_t zoneid)
2051 2051 {
2052 2052 ipnet_stack_t *ips;
2053 2053 ipnetif_t *ipnetif;
2054 2054
2055 2055 ASSERT(ptr != NULL);
2056 2056 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
2057 2057
2058 2058 mutex_enter(&ips->ips_avl_lock);
2059 2059
2060 2060 /*
2061 2061 * Shared instance zone?
2062 2062 */
2063 2063 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) {
2064 2064 uintptr_t key[2] = { zoneid, (uintptr_t)name };
2065 2065
2066 2066 ipnetif = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL);
2067 2067 } else {
2068 2068 ipnetif = avl_find(&ips->ips_avl_by_name, (void *)name, NULL);
2069 2069 }
2070 2070 if (ipnetif != NULL)
2071 2071 ipnetif_refhold(ipnetif);
2072 2072 mutex_exit(&ips->ips_avl_lock);
2073 2073
2074 2074 *ptr = ipnetif;
2075 2075 ipnet_rele(ips);
2076 2076
2077 2077 if (ipnetif == NULL)
2078 2078 return (ESRCH);
2079 2079 return (0);
2080 2080 }
2081 2081
2082 2082 void
2083 2083 ipnet_close_byhandle(ipnetif_t *ifp)
2084 2084 {
2085 2085 ASSERT(ifp != NULL);
2086 2086 ipnetif_refrele(ifp);
2087 2087 }
2088 2088
2089 2089 const char *
2090 2090 ipnet_name(ipnetif_t *ifp)
2091 2091 {
2092 2092 ASSERT(ifp != NULL);
2093 2093 return (ifp->if_name);
2094 2094 }
2095 2095
2096 2096 /*
2097 2097 * To find the linkid for a given name, it is necessary to know which zone
2098 2098 * the interface name belongs to and to search the avl tree for that zone
2099 2099 * as there is no master list of all interfaces and which zone they belong
2100 2100 * to. It is assumed that the caller of this function is somehow already
2101 2101 * working with the ipnet interfaces and hence the ips_event_lock is held.
2102 2102 * When BPF calls into this function, it is doing so because of an event
2103 2103 * in ipnet, and thus ipnet holds the ips_event_lock. Thus the datalink id
2104 2104 * value returned has meaning without the need for grabbing a hold on the
2105 2105 * owning structure.
2106 2106 */
2107 2107 int
2108 2108 ipnet_get_linkid_byname(const char *name, uint_t *idp, zoneid_t zoneid)
2109 2109 {
2110 2110 ipnet_stack_t *ips;
2111 2111 ipnetif_t *ifp;
2112 2112
2113 2113 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
2114 2114 ASSERT(mutex_owned(&ips->ips_event_lock));
2115 2115
2116 2116 mutex_enter(&ips->ips_avl_lock);
2117 2117 ifp = avl_find(&ips->ips_avl_by_name, (void *)name, NULL);
2118 2118 if (ifp != NULL)
2119 2119 *idp = (uint_t)ifp->if_index;
2120 2120
2121 2121 /*
2122 2122 * Shared instance zone?
2123 2123 */
2124 2124 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) {
2125 2125 uintptr_t key[2] = { zoneid, (uintptr_t)name };
2126 2126
2127 2127 ifp = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL);
2128 2128 if (ifp != NULL)
2129 2129 *idp = (uint_t)ifp->if_index;
2130 2130 }
2131 2131
2132 2132 mutex_exit(&ips->ips_avl_lock);
2133 2133 ipnet_rele(ips);
2134 2134
2135 2135 if (ifp == NULL)
2136 2136 return (ESRCH);
2137 2137 return (0);
2138 2138 }
2139 2139
2140 2140 /*
2141 2141 * Strictly speaking, there is no such thing as a "client" in ipnet, like
2142 2142 * there is in mac. BPF only needs to have this because it is required as
2143 2143 * part of interfacing correctly with mac. The reuse of the original
2144 2144 * ipnetif_t as a client poses no danger, so long as it is done with its
2145 2145 * own ref-count'd hold that is given up on close.
2146 2146 */
2147 2147 int
2148 2148 ipnet_client_open(ipnetif_t *ptr, ipnetif_t **result)
2149 2149 {
2150 2150 ASSERT(ptr != NULL);
2151 2151 ASSERT(result != NULL);
2152 2152 ipnetif_refhold(ptr);
2153 2153 *result = ptr;
2154 2154
2155 2155 return (0);
2156 2156 }
2157 2157
2158 2158 void
2159 2159 ipnet_client_close(ipnetif_t *ptr)
2160 2160 {
2161 2161 ASSERT(ptr != NULL);
2162 2162 ipnetif_refrele(ptr);
2163 2163 }
2164 2164
2165 2165 /*
2166 2166 * This is called from BPF when it needs to start receiving packets
2167 2167 * from ipnet.
2168 2168 *
2169 2169 * The use of the ipnet_t structure here is somewhat lightweight when
2170 2170 * compared to how it is used elsewhere but it already has all of the
2171 2171 * right fields in it, so reuse here doesn't seem out of order. Its
2172 2172 * primary purpose here is to provide the means to store pointers for
2173 2173 * use when ipnet_promisc_remove() needs to be called.
2174 2174 *
2175 2175 * This should never be called for the IPNET_MINOR_LO device as it is
2176 2176 * never created via ipnetif_create.
2177 2177 */
2178 2178 /*ARGSUSED*/
2179 2179 int
2180 2180 ipnet_promisc_add(void *handle, uint_t how, void *data, uintptr_t *mhandle,
2181 2181 int flags)
2182 2182 {
2183 2183 ip_stack_t *ipst;
2184 2184 netstack_t *ns;
2185 2185 ipnetif_t *ifp;
2186 2186 ipnet_t *ipnet;
2187 2187 char name[32];
2188 2188 int error;
2189 2189
2190 2190 ifp = (ipnetif_t *)handle;
2191 2191
2192 2192 if (how != DL_PROMISC_PHYS && how != DL_PROMISC_MULTI)
2193 2193 return (EINVAL);
2194 2194
2195 2195 ns = netstack_find_by_zoneid(ifp->if_zoneid);
2196 2196
2197 2197 if ((error = ipnet_join_allmulti(ifp, ns->netstack_ipnet)) != 0) {
2198 2198 netstack_rele(ns);
2199 2199 return (error);
2200 2200 }
2201 2201
2202 2202 ipnet = kmem_zalloc(sizeof (*ipnet), KM_SLEEP);
2203 2203 ipnet->ipnet_if = ifp;
2204 2204 ipnet->ipnet_ns = ns;
2205 2205 ipnet->ipnet_flags = flags;
2206 2206
2207 2207 if ((ifp->if_flags & IPNETIF_LOOPBACK) != 0) {
2208 2208 ipnet->ipnet_acceptfn = ipnet_loaccept;
2209 2209 } else {
2210 2210 ipnet->ipnet_acceptfn = ipnet_accept;
2211 2211 }
2212 2212
2213 2213 /*
2214 2214 * To register multiple hooks with the same callback function,
2215 2215 * a unique name is needed.
2216 2216 */
2217 2217 HOOK_INIT(ipnet->ipnet_hook, ipnet_bpf_bounce, "", ipnet);
2218 2218 (void) snprintf(name, sizeof (name), "ipnet_promisc_%p",
2219 2219 (void *)ipnet->ipnet_hook);
2220 2220 ipnet->ipnet_hook->h_name = strdup(name);
2221 2221 ipnet->ipnet_data = data;
2222 2222 ipnet->ipnet_zoneid = ifp->if_zoneid;
2223 2223
2224 2224 ipst = ns->netstack_ip;
2225 2225
2226 2226 error = net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE,
2227 2227 ipnet->ipnet_hook);
2228 2228 if (error != 0)
2229 2229 goto regfail;
2230 2230
2231 2231 error = net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE,
2232 2232 ipnet->ipnet_hook);
2233 2233 if (error != 0) {
2234 2234 (void) net_hook_unregister(ipst->ips_ip4_observe_pr,
2235 2235 NH_OBSERVE, ipnet->ipnet_hook);
2236 2236 goto regfail;
2237 2237 }
|
↓ open down ↓ |
2198 lines elided |
↑ open up ↑ |
2238 2238
2239 2239 *mhandle = (uintptr_t)ipnet;
2240 2240 netstack_rele(ns);
2241 2241
2242 2242 return (0);
2243 2243
2244 2244 regfail:
2245 2245 cmn_err(CE_WARN, "net_hook_register failed: %d", error);
2246 2246 strfree(ipnet->ipnet_hook->h_name);
2247 2247 hook_free(ipnet->ipnet_hook);
2248 + ipnet_leave_allmulti(ifp, ns->netstack_ipnet);
2248 2249 netstack_rele(ns);
2249 2250 return (error);
2250 2251 }
2251 2252
2252 2253 void
2253 2254 ipnet_promisc_remove(void *data)
2254 2255 {
2255 2256 ip_stack_t *ipst;
2256 2257 ipnet_t *ipnet;
2257 2258 hook_t *hook;
2258 2259
2259 2260 ipnet = data;
2260 2261 ipst = ipnet->ipnet_ns->netstack_ip;
2261 2262 hook = ipnet->ipnet_hook;
2262 2263
|
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
2263 2264 VERIFY(net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE,
2264 2265 hook) == 0);
2265 2266
2266 2267 VERIFY(net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE,
2267 2268 hook) == 0);
2268 2269
2269 2270 strfree(hook->h_name);
2270 2271
2271 2272 hook_free(hook);
2272 2273
2274 + ipnet_leave_allmulti(ipnet->ipnet_if, ipnet->ipnet_ns->netstack_ipnet);
2275 +
2273 2276 kmem_free(ipnet, sizeof (*ipnet));
2274 2277 }
2275 2278
2276 2279 /*
2277 2280 * arg here comes from the ipnet_t allocated in ipnet_promisc_add.
2278 2281 * An important field from that structure is "ipnet_data" that
2279 2282 * contains the "data" pointer passed into ipnet_promisc_add: it needs
2280 2283 * to be passed back to bpf when we call into ipnet_itap.
2281 2284 *
2282 2285 * ipnet_itap is set by ipnet_set_bpfattach, which in turn is called
2283 2286 * from BPF.
2284 2287 */
2285 2288 /*ARGSUSED*/
2286 2289 static int
2287 2290 ipnet_bpf_bounce(hook_event_token_t token, hook_data_t info, void *arg)
2288 2291 {
2289 2292 hook_pkt_observe_t *hdr;
2290 2293 ipnet_addrp_t src;
2291 2294 ipnet_addrp_t dst;
2292 2295 ipnet_stack_t *ips;
2293 2296 ipnet_t *ipnet;
2294 2297 mblk_t *netmp;
2295 2298 mblk_t *mp;
2296 2299
2297 2300 hdr = (hook_pkt_observe_t *)info;
2298 2301 mp = hdr->hpo_pkt;
2299 2302 ipnet = (ipnet_t *)arg;
2300 2303 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
2301 2304
2302 2305 netmp = hdr->hpo_pkt->b_cont;
2303 2306 src.iap_family = hdr->hpo_family;
2304 2307 dst.iap_family = hdr->hpo_family;
2305 2308
2306 2309 if (hdr->hpo_family == AF_INET) {
2307 2310 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
2308 2311 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
2309 2312 } else {
2310 2313 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
2311 2314 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
2312 2315 }
2313 2316
2314 2317 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
2315 2318 IPSK_BUMP(ips, ik_acceptFail);
2316 2319 return (0);
2317 2320 }
2318 2321 IPSK_BUMP(ips, ik_acceptOk);
2319 2322
2320 2323 ipnet_itap(ipnet->ipnet_data, mp,
2321 2324 hdr->hpo_htype == htons(IPOBS_HOOK_OUTBOUND),
2322 2325 ntohl(hdr->hpo_pktlen) + MBLKL(mp));
2323 2326
2324 2327 return (0);
2325 2328 }
2326 2329
2327 2330 /*
2328 2331 * clone'd ipnetif_t's are created when a shared IP instance zone comes
2329 2332 * to life and configures an IP address. The model that BPF uses is that
2330 2333 * each interface must have a unique pointer and each interface must be
2331 2334 * representative of what it can capture. They are limited to one DLT
2332 2335 * per interface and one zone per interface. Thus every interface that
2333 2336 * can be seen in a zone must be announced via an attach to bpf. For
2334 2337 * shared instance zones, this means the ipnet driver needs to detect
2335 2338 * when an address is added to an interface in a zone for the first
2336 2339 * time (and also when the last address is removed.)
2337 2340 */
2338 2341 static ipnetif_t *
2339 2342 ipnetif_clone_create(ipnetif_t *ifp, zoneid_t zoneid)
2340 2343 {
2341 2344 uintptr_t key[2] = { zoneid, (uintptr_t)ifp->if_name };
2342 2345 ipnet_stack_t *ips = ifp->if_stackp;
2343 2346 avl_index_t where = 0;
2344 2347 ipnetif_t *newif;
2345 2348
2346 2349 mutex_enter(&ips->ips_avl_lock);
2347 2350 newif = avl_find(&ips->ips_avl_by_shared, (void *)key, &where);
2348 2351 if (newif != NULL) {
2349 2352 ipnetif_refhold(newif);
2350 2353 newif->if_sharecnt++;
2351 2354 mutex_exit(&ips->ips_avl_lock);
2352 2355 return (newif);
2353 2356 }
2354 2357
2355 2358 newif = ipnet_alloc_if(ips);
2356 2359 if (newif == NULL) {
2357 2360 mutex_exit(&ips->ips_avl_lock);
2358 2361 return (NULL);
2359 2362 }
2360 2363
2361 2364 newif->if_refcnt = 1;
2362 2365 newif->if_sharecnt = 1;
2363 2366 newif->if_zoneid = zoneid;
2364 2367 (void) strlcpy(newif->if_name, ifp->if_name, LIFNAMSIZ);
2365 2368 newif->if_flags = ifp->if_flags & IPNETIF_LOOPBACK;
2366 2369 newif->if_index = ifp->if_index;
2367 2370
2368 2371 avl_insert(&ips->ips_avl_by_shared, newif, where);
2369 2372 mutex_exit(&ips->ips_avl_lock);
2370 2373
2371 2374 return (newif);
2372 2375 }
2373 2376
2374 2377 static void
2375 2378 ipnetif_clone_release(ipnetif_t *ipnetif)
2376 2379 {
2377 2380 boolean_t dofree = B_FALSE;
2378 2381 boolean_t doremove = B_FALSE;
2379 2382 ipnet_stack_t *ips = ipnetif->if_stackp;
2380 2383
2381 2384 mutex_enter(&ipnetif->if_reflock);
2382 2385 ASSERT(ipnetif->if_refcnt > 0);
2383 2386 if (--ipnetif->if_refcnt == 0)
2384 2387 dofree = B_TRUE;
2385 2388 ASSERT(ipnetif->if_sharecnt > 0);
2386 2389 if (--ipnetif->if_sharecnt == 0)
2387 2390 doremove = B_TRUE;
2388 2391 mutex_exit(&ipnetif->if_reflock);
2389 2392 if (doremove) {
2390 2393 mutex_enter(&ips->ips_avl_lock);
2391 2394 avl_remove(&ips->ips_avl_by_shared, ipnetif);
2392 2395 mutex_exit(&ips->ips_avl_lock);
2393 2396 }
2394 2397 if (dofree) {
2395 2398 ASSERT(ipnetif->if_sharecnt == 0);
2396 2399 ipnetif_free(ipnetif);
2397 2400 }
2398 2401 }
|
↓ open down ↓ |
116 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX