Print this page
OS-5549 move bpf filter functions into ip module
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/sockmods/sockmod_pfp.c
+++ new/usr/src/uts/common/inet/sockmods/sockmod_pfp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 - * Copyright 2015 Joyent, Inc. All rights reserved.
24 + * Copyright 2016 Joyent, Inc.
25 25 */
26 26
27 27 #include <sys/types.h>
28 28 #include <sys/param.h>
29 29 #include <sys/systm.h>
30 30 #include <sys/stropts.h>
31 31 #include <sys/socket.h>
32 32 #include <sys/socketvar.h>
33 33 #include <sys/socket_proto.h>
34 34 #include <sys/sockio.h>
35 35 #include <sys/strsun.h>
36 36 #include <sys/kstat.h>
37 37 #include <sys/modctl.h>
38 38 #include <sys/policy.h>
39 39 #include <sys/priv_const.h>
40 40 #include <sys/tihdr.h>
41 41 #include <sys/zone.h>
42 42 #include <sys/time.h>
43 43 #include <sys/ethernet.h>
|
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
44 44 #include <sys/llc1.h>
45 45 #include <fs/sockfs/sockcommon.h>
46 46 #include <net/if.h>
47 47 #include <inet/ip_arp.h>
48 48
49 49 #include <sys/dls.h>
50 50 #include <sys/mac.h>
51 51 #include <sys/mac_client.h>
52 52 #include <sys/mac_provider.h>
53 53 #include <sys/mac_client_priv.h>
54 +#include <inet/bpf.h>
54 55
55 56 #include <netpacket/packet.h>
56 57
57 58 static void pfp_close(mac_handle_t, mac_client_handle_t);
58 59 static int pfp_dl_to_arphrd(int);
59 60 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
60 61 socklen_t *);
61 62 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *, int);
62 63 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *,
63 64 int);
64 65 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
65 66 cred_t *);
66 67 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
67 68 static void pfp_release_bpf(struct pfpsock *);
68 69 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
69 70 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
70 71 socklen_t);
71 72 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
72 73 socklen_t);
73 74
74 75 /*
75 76 * PFP sockfs operations
76 77 * Most are currently no-ops because they have no meaning for a connectionless
77 78 * socket.
78 79 */
79 80 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
80 81 sock_upcalls_t *, int, struct cred *);
81 82 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
82 83 struct cred *);
83 84 static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
84 85 static void sdpfp_clr_flowctrl(sock_lower_handle_t);
85 86 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
86 87 socklen_t *, struct cred *);
87 88 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
88 89 struct cred *);
89 90 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
90 91 struct cred *);
91 92 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
92 93 socklen_t, struct cred *);
93 94
94 95 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
95 96 uint_t *, int *, int, cred_t *);
96 97
97 98 static int sockpfp_init(void);
98 99 static void sockpfp_fini(void);
99 100
100 101 static kstat_t *pfp_ksp;
101 102 static pfp_kstats_t ks_stats;
102 103 static pfp_kstats_t pfp_kstats = {
103 104 /*
104 105 * Each one of these kstats is a different return path in handling
105 106 * a packet received from the mac layer.
106 107 */
107 108 { "recvMacHeaderFail", KSTAT_DATA_UINT64 },
108 109 { "recvBadProtocol", KSTAT_DATA_UINT64 },
109 110 { "recvAllocbFail", KSTAT_DATA_UINT64 },
110 111 { "recvOk", KSTAT_DATA_UINT64 },
111 112 { "recvFail", KSTAT_DATA_UINT64 },
112 113 { "recvFiltered", KSTAT_DATA_UINT64 },
113 114 { "recvFlowControl", KSTAT_DATA_UINT64 },
114 115 /*
115 116 * A global set of counters is maintained to track the behaviour
116 117 * of the system (kernel & applications) in sending packets.
117 118 */
118 119 { "sendUnbound", KSTAT_DATA_UINT64 },
119 120 { "sendFailed", KSTAT_DATA_UINT64 },
120 121 { "sendTooBig", KSTAT_DATA_UINT64 },
121 122 { "sendAllocFail", KSTAT_DATA_UINT64 },
122 123 { "sendUiomoveFail", KSTAT_DATA_UINT64 },
123 124 { "sendNoMemory", KSTAT_DATA_UINT64 },
124 125 { "sendOpenFail", KSTAT_DATA_UINT64 },
125 126 { "sendWrongFamily", KSTAT_DATA_UINT64 },
126 127 { "sendShortMsg", KSTAT_DATA_UINT64 },
127 128 { "sendOk", KSTAT_DATA_UINT64 }
128 129 };
129 130
130 131 sock_downcalls_t pfp_downcalls = {
131 132 sdpfp_activate,
132 133 sock_accept_notsupp,
133 134 sdpfp_bind,
134 135 sock_listen_notsupp,
135 136 sock_connect_notsupp,
136 137 sock_getpeername_notsupp,
137 138 sock_getsockname_notsupp,
138 139 sdpfp_getsockopt,
139 140 sdpfp_setsockopt,
140 141 sock_send_notsupp,
141 142 sdpfp_senduio,
142 143 NULL,
143 144 sock_poll_notsupp,
144 145 sock_shutdown_notsupp,
145 146 sdpfp_clr_flowctrl,
146 147 sdpfp_ioctl,
147 148 sdpfp_close,
148 149 };
149 150
150 151 static smod_reg_t sinfo = {
151 152 SOCKMOD_VERSION,
152 153 "sockpfp",
153 154 SOCK_UC_VERSION,
154 155 SOCK_DC_VERSION,
155 156 sockpfp_create,
156 157 NULL
157 158 };
158 159
159 160 static int accepted_protos[3][2] = {
160 161 { ETH_P_ALL, 0 },
161 162 { ETH_P_802_2, LLC_SNAP_SAP },
162 163 { ETH_P_803_3, 0 },
163 164 };
164 165
165 166 /*
166 167 * This sets an upper bound on the size of the receive buffer for a PF_PACKET
167 168 * socket. More properly, this should be controlled through ipadm, ala TCP, UDP,
168 169 * SCTP, etc. Until that's done, this provides a hard cap of 4 MB and allows an
169 170 * opportunity for it to be changed, should it be needed.
170 171 */
171 172 int sockmod_pfp_rcvbuf_max = 1024 * 1024 * 4;
172 173
173 174 /*
174 175 * Module linkage information for the kernel.
175 176 */
176 177 static struct modlsockmod modlsockmod = {
177 178 &mod_sockmodops, "PF Packet socket module", &sinfo
178 179 };
179 180
180 181 static struct modlinkage modlinkage = {
181 182 MODREV_1,
182 183 &modlsockmod,
183 184 NULL
184 185 };
185 186
186 187 int
187 188 _init(void)
188 189 {
189 190 int error;
190 191
191 192 error = sockpfp_init();
192 193 if (error != 0)
193 194 return (error);
194 195
195 196 error = mod_install(&modlinkage);
196 197 if (error != 0)
197 198 sockpfp_fini();
198 199
199 200 return (error);
200 201 }
201 202
202 203 int
203 204 _fini(void)
204 205 {
205 206 int error;
206 207
207 208 error = mod_remove(&modlinkage);
208 209 if (error == 0)
209 210 sockpfp_fini();
210 211
211 212 return (error);
212 213 }
213 214
214 215 int
215 216 _info(struct modinfo *modinfop)
216 217 {
217 218 return (mod_info(&modlinkage, modinfop));
218 219 }
219 220
220 221 /*
221 222 * sockpfp_init: called as part of the initialisation of the module when
222 223 * loaded into the kernel.
223 224 *
224 225 * Being able to create and record the kstats data in the kernel is not
225 226 * considered to be vital to the operation of this kernel module, thus
226 227 * its failure is tolerated.
227 228 */
228 229 static int
229 230 sockpfp_init(void)
230 231 {
231 232 (void) memset(&ks_stats, 0, sizeof (ks_stats));
232 233
233 234 (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
234 235
235 236 pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
236 237 KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
237 238 KSTAT_FLAG_VIRTUAL);
238 239 if (pfp_ksp != NULL) {
239 240 pfp_ksp->ks_data = &ks_stats;
240 241 kstat_install(pfp_ksp);
241 242 }
242 243
243 244 return (0);
244 245 }
245 246
246 247 /*
247 248 * sockpfp_fini: called when the operating system wants to unload the
248 249 * socket module from the kernel.
249 250 */
250 251 static void
251 252 sockpfp_fini(void)
252 253 {
253 254 if (pfp_ksp != NULL)
254 255 kstat_delete(pfp_ksp);
255 256 }
256 257
257 258 /*
258 259 * Due to sockets being created read-write by default, all PF_PACKET sockets
259 260 * therefore require the NET_RAWACCESS priviliege, even if the socket is only
260 261 * being used for reading packets from.
261 262 *
262 263 * This create function enforces this module only being used with PF_PACKET
263 264 * sockets and the policy that we support via the config file in sock2path.d:
264 265 * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
265 266 */
266 267 /* ARGSUSED */
267 268 static sock_lower_handle_t
268 269 sockpfp_create(int family, int type, int proto,
269 270 sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
270 271 int sflags, cred_t *cred)
271 272 {
272 273 struct pfpsock *ps;
273 274 int kmflags;
274 275 int newproto;
275 276 int i;
276 277
277 278 if (secpolicy_net_rawaccess(cred) != 0) {
278 279 *errorp = EACCES;
279 280 return (NULL);
280 281 }
281 282
282 283 if (family != AF_PACKET) {
283 284 *errorp = EAFNOSUPPORT;
284 285 return (NULL);
285 286 }
286 287
287 288 if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
288 289 *errorp = ESOCKTNOSUPPORT;
289 290 return (NULL);
290 291 }
291 292
292 293 /*
293 294 * First check to see if the protocol number passed in via the socket
294 295 * creation should be mapped to a different number for internal use.
295 296 */
296 297 for (i = 0, newproto = -1;
297 298 i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) {
298 299 if (accepted_protos[i][0] == proto) {
299 300 newproto = accepted_protos[i][1];
300 301 break;
301 302 }
302 303 }
303 304
304 305 /*
305 306 * If the mapping of the protocol that was under 0x800 failed to find
306 307 * a local equivalent then fail the socket creation. If the protocol
307 308 * for the socket is over 0x800 and it was not found in the mapping
308 309 * table above, then use the value as is.
309 310 */
310 311 if (newproto == -1) {
311 312 if (proto < 0x800) {
312 313 *errorp = ENOPROTOOPT;
313 314 return (NULL);
314 315 }
315 316 newproto = proto;
316 317 }
317 318 proto = newproto;
318 319
319 320 kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
320 321 ps = kmem_zalloc(sizeof (*ps), kmflags);
321 322 if (ps == NULL) {
322 323 *errorp = ENOMEM;
323 324 return (NULL);
324 325 }
325 326
326 327 ps->ps_type = type;
327 328 ps->ps_proto = proto;
328 329 rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
329 330 mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
330 331
331 332 *sock_downcalls = &pfp_downcalls;
332 333 /*
333 334 * Setting this causes bytes from a packet that do not fit into the
334 335 * destination user buffer to be discarded. Thus the API is one
335 336 * packet per receive and callers are required to use a buffer large
336 337 * enough for the biggest packet that the interface can provide.
337 338 */
338 339 *smodep = SM_ATOMIC;
339 340
340 341 return ((sock_lower_handle_t)ps);
341 342 }
342 343
343 344 /* ************************************************************************* */
344 345
345 346 /*
346 347 * pfp_packet is the callback function that is given to the mac layer for
347 348 * PF_PACKET to receive packets with. One packet at a time is passed into
348 349 * this function from the mac layer. Each packet is a private copy given
349 350 * to PF_PACKET to modify or free as it wishes and does not harm the original
350 351 * packet from which it was cloned.
351 352 */
352 353 /* ARGSUSED */
353 354 static void
354 355 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
355 356 {
356 357 struct T_unitdata_ind *tunit;
357 358 struct sockaddr_ll *sll;
358 359 struct sockaddr_ll *sol;
359 360 mac_header_info_t hdr;
360 361 struct pfpsock *ps;
361 362 size_t tusz;
362 363 mblk_t *mp0;
363 364 int error;
364 365
365 366 if (mp == NULL)
366 367 return;
367 368
368 369 ps = arg;
369 370 if (ps->ps_flow_ctrld) {
370 371 ps->ps_flow_ctrl_drops++;
371 372 ps->ps_stats.tp_drops++;
372 373 ks_stats.kp_recv_flow_cntrld.value.ui64++;
373 374 freemsg(mp);
374 375 return;
375 376 }
376 377
377 378 if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
378 379 /*
379 380 * Can't decode the packet header information so drop it.
380 381 */
381 382 ps->ps_stats.tp_drops++;
382 383 ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
383 384 freemsg(mp);
384 385 return;
385 386 }
386 387
387 388 if (mac_type(ps->ps_mh) == DL_ETHER &&
388 389 hdr.mhi_bindsap == ETHERTYPE_VLAN) {
389 390 struct ether_vlan_header *evhp;
390 391 struct ether_vlan_header evh;
391 392
392 393 hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
393 394 hdr.mhi_istagged = B_TRUE;
394 395
395 396 if (MBLKL(mp) >= sizeof (*evhp)) {
396 397 evhp = (struct ether_vlan_header *)mp->b_rptr;
397 398 } else {
398 399 int sz = sizeof (*evhp);
399 400 char *s = (char *)&evh;
400 401 mblk_t *tmp;
401 402 int len;
402 403
403 404 for (tmp = mp; sz > 0 && tmp != NULL;
404 405 tmp = tmp->b_cont) {
405 406 len = min(sz, MBLKL(tmp));
406 407 bcopy(tmp->b_rptr, s, len);
407 408 sz -= len;
408 409 }
409 410 evhp = &evh;
410 411 }
411 412 hdr.mhi_tci = ntohs(evhp->ether_tci);
412 413 hdr.mhi_bindsap = ntohs(evhp->ether_type);
413 414 }
414 415
415 416 if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
416 417 /*
417 418 * The packet is not of interest to this socket so
418 419 * drop it on the floor. Here the SAP is being used
419 420 * as a very course filter.
420 421 */
421 422 ps->ps_stats.tp_drops++;
422 423 ks_stats.kp_recv_bad_proto.value.ui64++;
423 424 freemsg(mp);
424 425 return;
425 426 }
426 427
427 428 /*
428 429 * This field is not often set, even for ethernet,
429 430 * by mac_header_info, so compute it if it is 0.
430 431 */
431 432 if (hdr.mhi_pktsize == 0)
432 433 hdr.mhi_pktsize = msgdsize(mp);
433 434
434 435 /*
435 436 * If a BPF filter is present, pass the raw packet into that.
436 437 * A failed match will result in zero being returned, indicating
437 438 * that this socket is not interested in the packet.
438 439 */
439 440 if (ps->ps_bpf.bf_len != 0) {
440 441 uchar_t *buffer;
|
↓ open down ↓ |
377 lines elided |
↑ open up ↑ |
441 442 int buflen;
442 443
443 444 buflen = MBLKL(mp);
444 445 if (hdr.mhi_pktsize == buflen) {
445 446 buffer = mp->b_rptr;
446 447 } else {
447 448 buflen = 0;
448 449 buffer = (uchar_t *)mp;
449 450 }
450 451 rw_enter(&ps->ps_bpflock, RW_READER);
451 - if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
452 + if (ip_bpf_filter((ip_bpf_insn_t *)ps->ps_bpf.bf_insns, buffer,
452 453 hdr.mhi_pktsize, buflen) == 0) {
453 454 rw_exit(&ps->ps_bpflock);
454 455 ps->ps_stats.tp_drops++;
455 456 ks_stats.kp_recv_filtered.value.ui64++;
456 457 freemsg(mp);
457 458 return;
458 459 }
459 460 rw_exit(&ps->ps_bpflock);
460 461 }
461 462
462 463 if (ps->ps_type == SOCK_DGRAM) {
463 464 /*
464 465 * SOCK_DGRAM socket expect a "layer 3" packet, so advance
465 466 * past the link layer header.
466 467 */
467 468 mp->b_rptr += hdr.mhi_hdrsize;
468 469 hdr.mhi_pktsize -= hdr.mhi_hdrsize;
469 470 }
470 471
471 472 tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
472 473 if (ps->ps_auxdata) {
473 474 tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
474 475 tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
475 476 }
476 477
477 478 /*
478 479 * It is tempting to think that this could be optimised by having
479 480 * the base mblk_t allocated and hung off the pfpsock structure,
480 481 * except that then another one would need to be allocated for the
481 482 * sockaddr_ll that is included. Even creating a template to copy
482 483 * from is of questionable value, as read-write from one structure
483 484 * to the other is going to be slower than all of the initialisation.
484 485 */
485 486 mp0 = allocb(tusz, BPRI_HI);
486 487 if (mp0 == NULL) {
487 488 ps->ps_stats.tp_drops++;
488 489 ks_stats.kp_recv_alloc_fail.value.ui64++;
489 490 freemsg(mp);
490 491 return;
491 492 }
492 493
493 494 (void) memset(mp0->b_rptr, 0, tusz);
494 495
495 496 mp0->b_datap->db_type = M_PROTO;
496 497 mp0->b_wptr = mp0->b_rptr + tusz;
497 498
498 499 tunit = (struct T_unitdata_ind *)mp0->b_rptr;
499 500 tunit->PRIM_type = T_UNITDATA_IND;
500 501 tunit->SRC_length = sizeof (struct sockaddr);
501 502 tunit->SRC_offset = sizeof (*tunit);
502 503
503 504 sol = &ps->ps_sock;
504 505 sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
505 506 sll->sll_ifindex = sol->sll_ifindex;
506 507 sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
507 508 sll->sll_halen = sol->sll_halen;
508 509 if (hdr.mhi_saddr != NULL)
509 510 (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
510 511
511 512 switch (hdr.mhi_dsttype) {
512 513 case MAC_ADDRTYPE_MULTICAST :
513 514 sll->sll_pkttype = PACKET_MULTICAST;
514 515 break;
515 516 case MAC_ADDRTYPE_BROADCAST :
516 517 sll->sll_pkttype = PACKET_BROADCAST;
517 518 break;
518 519 case MAC_ADDRTYPE_UNICAST :
519 520 if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
520 521 sll->sll_pkttype = PACKET_HOST;
521 522 else
522 523 sll->sll_pkttype = PACKET_OTHERHOST;
523 524 break;
524 525 }
525 526
526 527 if (ps->ps_auxdata) {
527 528 struct tpacket_auxdata *aux;
528 529 struct T_opthdr *topt;
529 530
530 531 tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
531 532 sizeof (struct sockaddr_ll));
532 533 tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
533 534 _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
534 535
535 536 topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
536 537 aux = (struct tpacket_auxdata *)
537 538 ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
538 539
539 540 topt->len = tunit->OPT_length;
540 541 topt->level = SOL_PACKET;
541 542 topt->name = PACKET_AUXDATA;
542 543 topt->status = 0;
543 544 /*
544 545 * libpcap doesn't seem to use any other field,
545 546 * so it isn't clear how they should be filled in.
546 547 */
547 548 aux->tp_vlan_vci = hdr.mhi_tci;
548 549 }
549 550
550 551 linkb(mp0, mp);
551 552
552 553 (void) gethrestime(&ps->ps_timestamp);
553 554
554 555 ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
555 556 &error, NULL);
556 557
557 558 if (error == 0) {
558 559 ps->ps_stats.tp_packets++;
559 560 ks_stats.kp_recv_ok.value.ui64++;
560 561 } else {
561 562 mutex_enter(&ps->ps_lock);
562 563 if (error == ENOSPC) {
563 564 ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
564 565 &error, NULL);
565 566 if (error == ENOSPC)
566 567 ps->ps_flow_ctrld = B_TRUE;
567 568 }
568 569 mutex_exit(&ps->ps_lock);
569 570 ps->ps_stats.tp_drops++;
570 571 ks_stats.kp_recv_fail.value.ui64++;
571 572 }
572 573 }
573 574
574 575 /*
575 576 * Bind a PF_PACKET socket to a network interface.
576 577 *
577 578 * The default operation of this bind() is to place the socket (and thus the
578 579 * network interface) into promiscuous mode. It is then up to the application
579 580 * to turn that down by issuing the relevant ioctls, if desired.
580 581 */
581 582 static int
582 583 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
583 584 socklen_t addrlen, struct cred *cred)
584 585 {
585 586 struct sockaddr_ll *addr_ll, *sol;
586 587 mac_client_handle_t mch;
587 588 struct pfpsock *ps;
588 589 mac_handle_t mh;
589 590 int error;
590 591
591 592 ps = (struct pfpsock *)handle;
592 593 if (ps->ps_bound)
593 594 return (EINVAL);
594 595
595 596 if (addrlen < sizeof (struct sockaddr_ll) || addr == NULL)
596 597 return (EINVAL);
597 598
598 599 addr_ll = (struct sockaddr_ll *)addr;
599 600
600 601 error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
601 602 if (error != 0)
602 603 return (error);
603 604 /*
604 605 * Ensure that each socket is only bound once.
605 606 */
606 607 mutex_enter(&ps->ps_lock);
607 608 if (ps->ps_mh != 0) {
608 609 mutex_exit(&ps->ps_lock);
609 610 pfp_close(mh, mch);
610 611 return (EADDRINUSE);
611 612 }
612 613 ps->ps_mh = mh;
613 614 ps->ps_mch = mch;
614 615 mutex_exit(&ps->ps_lock);
615 616
616 617 /*
617 618 * Cache all of the information from bind so that it's in an easy
618 619 * place to get at when packets are received.
619 620 */
620 621 sol = &ps->ps_sock;
621 622 sol->sll_family = AF_PACKET;
622 623 sol->sll_ifindex = addr_ll->sll_ifindex;
623 624 sol->sll_protocol = addr_ll->sll_protocol;
624 625 sol->sll_halen = mac_addr_len(ps->ps_mh);
625 626 mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
626 627 mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
627 628 ps->ps_linkid = addr_ll->sll_ifindex;
628 629
629 630 error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
630 631 pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
631 632 if (error == 0) {
632 633 ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
633 634 ps->ps_bound = B_TRUE;
634 635 }
635 636
636 637 return (error);
637 638 }
638 639
639 640 /* ARGSUSED */
640 641 static void
641 642 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
642 643 sock_upcalls_t *upcalls, int flags, cred_t *cred)
643 644 {
644 645 struct pfpsock *ps;
645 646
646 647 ps = (struct pfpsock *)lower;
647 648 ps->ps_upper = upper;
648 649 ps->ps_upcalls = upcalls;
649 650 }
650 651
651 652 /*
652 653 * This module only implements getting socket options for the new socket
653 654 * option level (SOL_PACKET) that it introduces. All other requests are
654 655 * passed back to the sockfs layer.
655 656 */
656 657 /* ARGSUSED */
657 658 static int
658 659 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
659 660 void *optval, socklen_t *optlenp, struct cred *cred)
660 661 {
661 662 struct pfpsock *ps;
662 663 int error = 0;
663 664
664 665 ps = (struct pfpsock *)handle;
665 666
666 667 switch (level) {
667 668 case SOL_PACKET :
668 669 error = pfp_getpacket_sockopt(handle, option_name, optval,
669 670 optlenp);
670 671 break;
671 672
672 673 case SOL_SOCKET :
673 674 if (option_name == SO_RCVBUF) {
674 675 if (*optlenp < sizeof (int32_t))
675 676 return (EINVAL);
676 677 *((int32_t *)optval) = ps->ps_rcvbuf;
677 678 *optlenp = sizeof (int32_t);
678 679 } else {
679 680 error = ENOPROTOOPT;
680 681 }
681 682 break;
682 683
683 684 default :
684 685 /*
685 686 * If sockfs code receives this error in return from the
686 687 * getsockopt downcall it handles the option locally, if
687 688 * it can.
688 689 */
689 690 error = ENOPROTOOPT;
690 691 break;
691 692 }
692 693
693 694 return (error);
694 695 }
695 696
696 697 /*
697 698 * PF_PACKET supports setting socket options at only two levels:
698 699 * SOL_SOCKET and SOL_PACKET.
699 700 */
700 701 /* ARGSUSED */
701 702 static int
702 703 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
703 704 const void *optval, socklen_t optlen, struct cred *cred)
704 705 {
705 706 int error = 0;
706 707
707 708 switch (level) {
708 709 case SOL_SOCKET :
709 710 error = pfp_setsocket_sockopt(handle, option_name, optval,
710 711 optlen);
711 712 break;
712 713 case SOL_PACKET :
713 714 error = pfp_setpacket_sockopt(handle, option_name, optval,
714 715 optlen);
715 716 break;
716 717 default :
717 718 error = EINVAL;
718 719 break;
719 720 }
720 721
721 722 return (error);
722 723 }
723 724
724 725 /*
725 726 * This function is incredibly inefficient for sending any packet that
726 727 * comes with a msghdr asking to be sent to an interface to which the
727 728 * socket has not been bound. Some possibilities here are keeping a
728 729 * cache of all open mac's and mac_client's, for the purpose of sending,
729 730 * and closing them after some amount of inactivity. Clearly, applications
730 731 * should not be written to use one socket for multiple interfaces if
731 732 * performance is desired with the code as is.
732 733 */
733 734 /* ARGSUSED */
734 735 static int
735 736 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
736 737 struct nmsghdr *msg, struct cred *cred)
737 738 {
738 739 struct sockaddr_ll *sol;
739 740 mac_client_handle_t mch;
740 741 struct pfpsock *ps;
741 742 boolean_t new_open;
742 743 mac_handle_t mh;
743 744 size_t mpsize;
744 745 uint_t maxsdu;
745 746 mblk_t *mp0;
746 747 mblk_t *mp;
747 748 int error;
748 749
749 750 mp = NULL;
750 751 mp0 = NULL;
751 752 new_open = B_FALSE;
752 753 ps = (struct pfpsock *)handle;
753 754 mh = ps->ps_mh;
754 755 mch = ps->ps_mch;
755 756 maxsdu = ps->ps_max_sdu;
756 757
757 758 sol = (struct sockaddr_ll *)msg->msg_name;
758 759 if (sol == NULL) {
759 760 /*
760 761 * If no sockaddr_ll has been provided with the send call,
761 762 * use the one constructed when the socket was bound to an
762 763 * interface and fail if it hasn't been bound.
763 764 */
764 765 if (!ps->ps_bound) {
765 766 ks_stats.kp_send_unbound.value.ui64++;
766 767 return (EPROTO);
767 768 }
768 769 sol = &ps->ps_sock;
769 770 } else {
770 771 /*
771 772 * Verify the sockaddr_ll message passed down before using
772 773 * it to send a packet out with. If it refers to an interface
773 774 * that has not been bound, it is necessary to open it.
774 775 */
775 776 struct sockaddr_ll *sll;
776 777
777 778 if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
778 779 ks_stats.kp_send_short_msg.value.ui64++;
779 780 return (EINVAL);
780 781 }
781 782
782 783 if (sol->sll_family != AF_PACKET) {
783 784 ks_stats.kp_send_wrong_family.value.ui64++;
784 785 return (EAFNOSUPPORT);
785 786 }
786 787
787 788 sll = &ps->ps_sock;
788 789 if (sol->sll_ifindex != sll->sll_ifindex) {
789 790 error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
790 791 cred);
791 792 if (error != 0) {
792 793 ks_stats.kp_send_open_fail.value.ui64++;
793 794 return (error);
794 795 }
795 796 mac_sdu_get(mh, NULL, &maxsdu);
796 797 new_open = B_TRUE;
797 798 }
798 799 }
799 800
800 801 mpsize = uiop->uio_resid;
801 802 if (mpsize > maxsdu) {
802 803 ks_stats.kp_send_too_big.value.ui64++;
803 804 error = EMSGSIZE;
804 805 goto done;
805 806 }
806 807
807 808 if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
808 809 ks_stats.kp_send_alloc_fail.value.ui64++;
809 810 error = ENOBUFS;
810 811 goto done;
811 812 }
812 813
813 814 mp->b_wptr = mp->b_rptr + mpsize;
814 815 error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
815 816 if (error != 0) {
816 817 ks_stats.kp_send_uiomove_fail.value.ui64++;
817 818 goto done;
818 819 }
819 820
820 821 if (ps->ps_type == SOCK_DGRAM) {
821 822 mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
822 823 if (mp0 == NULL) {
823 824 ks_stats.kp_send_no_memory.value.ui64++;
824 825 error = ENOBUFS;
825 826 goto done;
826 827 }
827 828 linkb(mp0, mp);
828 829 mp = mp0;
829 830 }
830 831
831 832 /*
832 833 * As this is sending datagrams and no promise is made about
833 834 * how or if a packet will be sent/delivered, no effort is to
834 835 * be expended in recovering from a situation where the packet
835 836 * cannot be sent - it is just dropped.
836 837 */
837 838 error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
838 839 if (error == 0) {
839 840 mp = NULL;
840 841 ks_stats.kp_send_ok.value.ui64++;
841 842 } else {
842 843 ks_stats.kp_send_failed.value.ui64++;
843 844 }
844 845
845 846 done:
846 847
847 848 if (new_open) {
848 849 ASSERT(mch != ps->ps_mch);
849 850 ASSERT(mh != ps->ps_mh);
850 851 pfp_close(mh, mch);
851 852 }
852 853 if (mp != NULL)
853 854 freemsg(mp);
854 855
855 856 return (error);
856 857
857 858 }
858 859
859 860 /*
860 861 * There's no use of a lock here, or at the bottom of pfp_packet() where
861 862 * ps_flow_ctrld is set to true, because in a situation where these two
862 863 * are racing to set the flag one way or the other, the end result is
863 864 * going to be ultimately determined by the scheduler anyway - which of
864 865 * the two threads gets the lock first? In such an operational environment,
865 866 * we've got packets arriving too fast to be delt with so packets are going
866 867 * to be dropped. Grabbing a lock just makes the drop more expensive.
867 868 */
868 869 static void
869 870 sdpfp_clr_flowctrl(sock_lower_handle_t handle)
870 871 {
871 872 struct pfpsock *ps;
872 873
873 874 ps = (struct pfpsock *)handle;
874 875
875 876 mutex_enter(&ps->ps_lock);
876 877 ps->ps_flow_ctrld = B_FALSE;
877 878 mutex_exit(&ps->ps_lock);
878 879 }
879 880
880 881 /*
881 882 * The implementation of this ioctl() handler is intended to function
882 883 * in the absence of a bind() being made before it is called. Thus the
883 884 * function calls mac_open() itself to provide a handle
884 885 * This function is structured like this:
885 886 * - determine the linkid for the interface being targetted
886 887 * - open the interface with said linkid
887 888 * - perform ioctl
888 889 * - copy results back to caller
889 890 *
890 891 * The ioctls that interact with interface flags have been implented below
891 892 * to assume that the interface is always up and running (IFF_RUNNING) and
892 893 * to use the state of this socket to determine whether or not the network
893 894 * interface is in promiscuous mode. Thus an ioctl to get the interface flags
894 895 * of an interface that has been put in promiscuous mode by another socket
895 896 * (in the same program or different), will not report that status.
896 897 */
897 898 /* ARGSUSED */
898 899 static int
899 900 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
900 901 int32_t *rval, struct cred *cr)
901 902 {
902 903 struct timeval tival;
903 904 mac_client_promisc_type_t mtype;
904 905 struct sockaddr_dl *sock;
905 906 datalink_id_t linkid;
906 907 struct lifreq lifreq;
907 908 struct ifreq ifreq;
908 909 struct pfpsock *ps;
909 910 mac_handle_t mh;
910 911 int error;
911 912
912 913 ps = (struct pfpsock *)handle;
913 914
914 915 switch (cmd) {
915 916 /*
916 917 * ioctls that work on "struct lifreq"
917 918 */
918 919 case SIOCSLIFFLAGS :
919 920 case SIOCGLIFINDEX :
920 921 case SIOCGLIFFLAGS :
921 922 case SIOCGLIFMTU :
922 923 case SIOCGLIFHWADDR :
923 924 error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid, mod);
924 925 if (error != 0)
925 926 return (error);
926 927 break;
927 928
928 929 /*
929 930 * ioctls that work on "struct ifreq".
930 931 * Not all of these have a "struct lifreq" partner, for example
931 932 * SIOCGIFHWADDR, for the simple reason that the logical interface
932 933 * does not have a hardware address.
933 934 */
934 935 case SIOCSIFFLAGS :
935 936 case SIOCGIFINDEX :
936 937 case SIOCGIFFLAGS :
937 938 case SIOCGIFMTU :
938 939 case SIOCGIFHWADDR :
939 940 error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid, mod);
940 941 if (error != 0)
941 942 return (error);
942 943 break;
943 944
944 945 case SIOCGSTAMP :
945 946 tival.tv_sec = (time_t)ps->ps_timestamp.tv_sec;
946 947 tival.tv_usec = ps->ps_timestamp.tv_nsec / 1000;
947 948 if (get_udatamodel() == DATAMODEL_NATIVE) {
948 949 error = ddi_copyout(&tival, (void *)arg,
949 950 sizeof (tival), mod);
950 951 }
951 952 #ifdef _SYSCALL32_IMPL
952 953 else {
953 954 struct timeval32 tv32;
954 955 TIMEVAL_TO_TIMEVAL32(&tv32, &tival);
955 956 error = ddi_copyout(&tv32, (void *)arg,
956 957 sizeof (tv32), mod);
957 958 }
958 959 #endif
959 960 return (error);
960 961 }
961 962
962 963 error = mac_open_by_linkid(linkid, &mh);
963 964 if (error != 0)
964 965 return (error);
965 966
966 967 switch (cmd) {
967 968 case SIOCGLIFINDEX :
968 969 lifreq.lifr_index = linkid;
969 970 break;
970 971
971 972 case SIOCGIFINDEX :
972 973 ifreq.ifr_index = linkid;
973 974 break;
974 975
975 976 case SIOCGIFFLAGS :
976 977 ifreq.ifr_flags = IFF_RUNNING;
977 978 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
978 979 ifreq.ifr_flags |= IFF_PROMISC;
979 980 break;
980 981
981 982 case SIOCGLIFFLAGS :
982 983 lifreq.lifr_flags = IFF_RUNNING;
983 984 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
984 985 lifreq.lifr_flags |= IFF_PROMISC;
985 986 break;
986 987
987 988 case SIOCSIFFLAGS :
988 989 if (linkid != ps->ps_linkid) {
989 990 error = EINVAL;
990 991 } else {
991 992 if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
992 993 mtype = MAC_CLIENT_PROMISC_ALL;
993 994 else
994 995 mtype = MAC_CLIENT_PROMISC_FILTERED;
995 996 error = pfp_set_promisc(ps, mtype);
996 997 }
997 998 break;
998 999
999 1000 case SIOCSLIFFLAGS :
1000 1001 if (linkid != ps->ps_linkid) {
1001 1002 error = EINVAL;
1002 1003 } else {
1003 1004 if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
1004 1005 mtype = MAC_CLIENT_PROMISC_ALL;
1005 1006 else
1006 1007 mtype = MAC_CLIENT_PROMISC_FILTERED;
1007 1008 error = pfp_set_promisc(ps, mtype);
1008 1009 }
1009 1010 break;
1010 1011
1011 1012 case SIOCGIFMTU :
1012 1013 mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
1013 1014 break;
1014 1015
1015 1016 case SIOCGLIFMTU :
1016 1017 mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
1017 1018 break;
1018 1019
1019 1020 case SIOCGIFHWADDR :
1020 1021 if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) {
1021 1022 error = EPFNOSUPPORT;
1022 1023 break;
1023 1024 }
1024 1025
1025 1026 if (mac_addr_len(mh) == 0) {
1026 1027 (void) memset(ifreq.ifr_addr.sa_data, 0,
1027 1028 sizeof (ifreq.ifr_addr.sa_data));
1028 1029 } else {
1029 1030 mac_unicast_primary_get(mh,
1030 1031 (uint8_t *)ifreq.ifr_addr.sa_data);
1031 1032 }
1032 1033
1033 1034 /*
1034 1035 * The behaviour here in setting sa_family is consistent
1035 1036 * with what applications such as tcpdump would expect
1036 1037 * for a Linux PF_PACKET socket.
1037 1038 */
1038 1039 ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
1039 1040 break;
1040 1041
1041 1042 case SIOCGLIFHWADDR :
1042 1043 lifreq.lifr_type = 0;
1043 1044 sock = (struct sockaddr_dl *)&lifreq.lifr_addr;
1044 1045
1045 1046 if (mac_addr_len(mh) > sizeof (sock->sdl_data)) {
1046 1047 error = EPFNOSUPPORT;
1047 1048 break;
1048 1049 }
1049 1050
1050 1051 /*
1051 1052 * Fill in the sockaddr_dl with link layer details. Of note,
1052 1053 * the index is returned as 0 for a couple of reasons:
1053 1054 * (1) there is no public API that uses or requires it
1054 1055 * (2) the MAC index is currently 32bits and sdl_index is 16.
1055 1056 */
1056 1057 sock->sdl_family = AF_LINK;
1057 1058 sock->sdl_index = 0;
1058 1059 sock->sdl_type = mac_type(mh);
1059 1060 sock->sdl_nlen = 0;
1060 1061 sock->sdl_alen = mac_addr_len(mh);
1061 1062 sock->sdl_slen = 0;
1062 1063 if (mac_addr_len(mh) == 0) {
1063 1064 (void) memset(sock->sdl_data, 0,
1064 1065 sizeof (sock->sdl_data));
1065 1066 } else {
1066 1067 mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data);
1067 1068 }
1068 1069 break;
1069 1070
1070 1071 default :
1071 1072 break;
1072 1073 }
1073 1074
1074 1075 mac_close(mh);
1075 1076
1076 1077 if (error == 0) {
1077 1078 /*
1078 1079 * Only the "GET" ioctls need to copy data back to userace.
1079 1080 */
1080 1081 switch (cmd) {
1081 1082 case SIOCGLIFINDEX :
1082 1083 case SIOCGLIFFLAGS :
1083 1084 case SIOCGLIFMTU :
1084 1085 case SIOCGLIFHWADDR :
1085 1086 error = ddi_copyout(&lifreq, (void *)arg,
1086 1087 sizeof (lifreq), mod);
1087 1088 break;
1088 1089
1089 1090 case SIOCGIFINDEX :
1090 1091 case SIOCGIFFLAGS :
1091 1092 case SIOCGIFMTU :
1092 1093 case SIOCGIFHWADDR :
1093 1094 error = ddi_copyout(&ifreq, (void *)arg,
1094 1095 sizeof (ifreq), mod);
1095 1096 break;
1096 1097 default :
1097 1098 break;
1098 1099 }
1099 1100 }
1100 1101
1101 1102 return (error);
1102 1103 }
1103 1104
1104 1105 /*
1105 1106 * Closing the socket requires that all open references to network
1106 1107 * interfaces be closed.
1107 1108 */
1108 1109 /* ARGSUSED */
1109 1110 static int
1110 1111 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
1111 1112 {
1112 1113 struct pfpsock *ps = (struct pfpsock *)handle;
1113 1114
1114 1115 if (ps->ps_phd != 0) {
1115 1116 mac_promisc_remove(ps->ps_phd);
1116 1117 ps->ps_phd = 0;
1117 1118 }
1118 1119
1119 1120 if (ps->ps_mch != 0) {
1120 1121 mac_client_close(ps->ps_mch, 0);
1121 1122 ps->ps_mch = 0;
1122 1123 }
1123 1124
1124 1125 if (ps->ps_mh != 0) {
1125 1126 mac_close(ps->ps_mh);
1126 1127 ps->ps_mh = 0;
1127 1128 }
1128 1129
1129 1130 kmem_free(ps, sizeof (*ps));
1130 1131
1131 1132 return (0);
1132 1133 }
1133 1134
1134 1135 /* ************************************************************************* */
1135 1136
1136 1137 /*
1137 1138 * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1138 1139 * determine the linkid for the interface name stored in that structure.
1139 1140 * name is used as a buffer so that we can ensure a trailing \0 is appended
1140 1141 * to the name safely.
1141 1142 */
1142 1143 static int
1143 1144 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1144 1145 datalink_id_t *linkidp, int mode)
1145 1146 {
1146 1147 char name[IFNAMSIZ + 1];
1147 1148 int error;
1148 1149
1149 1150 if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), mode) != 0)
1150 1151 return (EFAULT);
1151 1152
1152 1153 (void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1153 1154
1154 1155 error = dls_mgmt_get_linkid(name, linkidp);
1155 1156 if (error != 0)
1156 1157 error = dls_devnet_macname2linkid(name, linkidp);
1157 1158
1158 1159 return (error);
1159 1160 }
1160 1161
1161 1162 /*
1162 1163 * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1163 1164 * determine the linkid for the interface name stored in that structure.
1164 1165 * name is used as a buffer so that we can ensure a trailing \0 is appended
1165 1166 * to the name safely.
1166 1167 */
1167 1168 static int
1168 1169 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1169 1170 datalink_id_t *linkidp, int mode)
1170 1171 {
1171 1172 char name[LIFNAMSIZ + 1];
1172 1173 int error;
1173 1174
1174 1175 if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), mode) != 0)
1175 1176 return (EFAULT);
1176 1177
1177 1178 (void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1178 1179
1179 1180 error = dls_mgmt_get_linkid(name, linkidp);
1180 1181 if (error != 0)
1181 1182 error = dls_devnet_macname2linkid(name, linkidp);
1182 1183
1183 1184 return (error);
1184 1185 }
1185 1186
1186 1187 /*
1187 1188 * Although there are several new SOL_PACKET options that can be set and
1188 1189 * are specific to this implementation of PF_PACKET, the current API does
1189 1190 * not support doing a get on them to retrieve accompanying status. Thus
1190 1191 * it is only currently possible to use SOL_PACKET with getsockopt to
1191 1192 * retrieve statistical information. This remains consistant with the
1192 1193 * Linux API at the time of writing.
1193 1194 */
1194 1195 static int
1195 1196 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1196 1197 void *optval, socklen_t *optlenp)
1197 1198 {
1198 1199 struct pfpsock *ps;
1199 1200 struct tpacket_stats_short tpss;
1200 1201 int error = 0;
1201 1202
1202 1203 ps = (struct pfpsock *)handle;
1203 1204
1204 1205 switch (option_name) {
1205 1206 case PACKET_STATISTICS :
1206 1207 if (*optlenp < sizeof (ps->ps_stats)) {
1207 1208 error = EINVAL;
1208 1209 break;
1209 1210 }
1210 1211 *optlenp = sizeof (ps->ps_stats);
1211 1212 bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1212 1213 break;
1213 1214 case PACKET_STATISTICS_SHORT :
1214 1215 if (*optlenp < sizeof (tpss)) {
1215 1216 error = EINVAL;
1216 1217 break;
1217 1218 }
1218 1219 *optlenp = sizeof (tpss);
1219 1220 tpss.tp_packets = ps->ps_stats.tp_packets;
1220 1221 tpss.tp_drops = ps->ps_stats.tp_drops;
1221 1222 bcopy(&tpss, optval, sizeof (tpss));
1222 1223 break;
1223 1224 default :
1224 1225 error = EINVAL;
1225 1226 break;
1226 1227 }
1227 1228
1228 1229 return (error);
1229 1230 }
1230 1231
1231 1232 /*
1232 1233 * The SOL_PACKET level for socket options supports three options,
1233 1234 * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1234 1235 * This function is responsible for mapping the two socket options
1235 1236 * that manage multicast membership into the appropriate internal
1236 1237 * function calls to bring the option into effect. Whilst direct
1237 1238 * changes to the multicast membership (ADD/DROP) groups is handled
1238 1239 * by calls directly into the mac module, changes to the promiscuos
1239 1240 * mode are vectored through pfp_set_promisc() so that the logic for
1240 1241 * managing the promiscuous mode is in one place.
1241 1242 */
1242 1243 /* ARGSUSED */
1243 1244 static int
1244 1245 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1245 1246 const void *optval, socklen_t optlen)
1246 1247 {
1247 1248 struct packet_mreq mreq;
1248 1249 struct pfpsock *ps;
1249 1250 int error = 0;
1250 1251 int opt;
1251 1252
1252 1253 ps = (struct pfpsock *)handle;
1253 1254 if (!ps->ps_bound)
1254 1255 return (EPROTO);
1255 1256
1256 1257 if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1257 1258 (option_name == PACKET_DROP_MEMBERSHIP)) {
1258 1259 if (!ps->ps_bound)
1259 1260 return (EPROTO);
1260 1261 bcopy(optval, &mreq, sizeof (mreq));
1261 1262 if (ps->ps_linkid != mreq.mr_ifindex)
1262 1263 return (EINVAL);
1263 1264 }
1264 1265
1265 1266 switch (option_name) {
1266 1267 case PACKET_ADD_MEMBERSHIP :
1267 1268 switch (mreq.mr_type) {
1268 1269 case PACKET_MR_MULTICAST :
1269 1270 if (mreq.mr_alen != ps->ps_sock.sll_halen)
1270 1271 return (EINVAL);
1271 1272
1272 1273 error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1273 1274 break;
1274 1275
1275 1276 case PACKET_MR_PROMISC :
1276 1277 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1277 1278 break;
1278 1279
1279 1280 case PACKET_MR_ALLMULTI :
1280 1281 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1281 1282 break;
1282 1283 }
1283 1284 break;
1284 1285
1285 1286 case PACKET_DROP_MEMBERSHIP :
1286 1287 switch (mreq.mr_type) {
1287 1288 case PACKET_MR_MULTICAST :
1288 1289 if (mreq.mr_alen != ps->ps_sock.sll_halen)
1289 1290 return (EINVAL);
1290 1291
1291 1292 mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1292 1293 break;
1293 1294
1294 1295 case PACKET_MR_PROMISC :
1295 1296 if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1296 1297 return (EINVAL);
1297 1298 error = pfp_set_promisc(ps,
1298 1299 MAC_CLIENT_PROMISC_FILTERED);
1299 1300 break;
1300 1301
1301 1302 case PACKET_MR_ALLMULTI :
1302 1303 if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1303 1304 return (EINVAL);
1304 1305 error = pfp_set_promisc(ps,
1305 1306 MAC_CLIENT_PROMISC_FILTERED);
1306 1307 break;
1307 1308 }
1308 1309 break;
1309 1310
1310 1311 case PACKET_AUXDATA :
1311 1312 if (optlen == sizeof (int)) {
1312 1313 opt = *(int *)optval;
1313 1314 ps->ps_auxdata = (opt != 0);
1314 1315 } else {
1315 1316 error = EINVAL;
1316 1317 }
1317 1318 break;
1318 1319 default :
1319 1320 error = EINVAL;
1320 1321 break;
1321 1322 }
1322 1323
1323 1324 return (error);
1324 1325 }
1325 1326
1326 1327 /*
1327 1328 * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1328 1329 * SO_ATTACH_FILTER and SO_DETACH_FILTER.
|
↓ open down ↓ |
867 lines elided |
↑ open up ↑ |
1329 1330 *
1330 1331 * Both of these setsockopt values are candidates for being handled by the
1331 1332 * socket layer itself in future, however this requires understanding how
1332 1333 * they would interact with all other sockets.
1333 1334 */
1334 1335 static int
1335 1336 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1336 1337 const void *optval, socklen_t optlen)
1337 1338 {
1338 1339 struct bpf_program prog;
1339 - struct bpf_insn *fcode;
1340 + ip_bpf_insn_t *fcode;
1340 1341 struct pfpsock *ps;
1341 1342 struct sock_proto_props sopp;
1342 1343 int error = 0;
1343 1344 int size;
1344 1345
1345 1346 ps = (struct pfpsock *)handle;
1346 1347
1347 1348 switch (option_name) {
1348 1349 case SO_ATTACH_FILTER :
1349 1350 #ifdef _LP64
1350 1351 if (optlen == sizeof (struct bpf_program32)) {
1351 1352 struct bpf_program32 prog32;
1352 1353
1353 1354 bcopy(optval, &prog32, sizeof (prog32));
1354 1355 prog.bf_len = prog32.bf_len;
1355 1356 prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1356 1357 } else
1357 1358 #endif
1358 1359 if (optlen == sizeof (struct bpf_program)) {
1359 1360 bcopy(optval, &prog, sizeof (prog));
1360 1361 } else if (optlen != sizeof (struct bpf_program)) {
1361 1362 return (EINVAL);
1362 1363 }
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
1363 1364 if (prog.bf_len > BPF_MAXINSNS)
1364 1365 return (EINVAL);
1365 1366
1366 1367 size = prog.bf_len * sizeof (*prog.bf_insns);
1367 1368 fcode = kmem_alloc(size, KM_SLEEP);
1368 1369 if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1369 1370 kmem_free(fcode, size);
1370 1371 return (EFAULT);
1371 1372 }
1372 1373
1373 - if (bpf_validate(fcode, (int)prog.bf_len)) {
1374 + if (ip_bpf_validate(fcode, prog.bf_len)) {
1374 1375 rw_enter(&ps->ps_bpflock, RW_WRITER);
1375 1376 pfp_release_bpf(ps);
1376 - ps->ps_bpf.bf_insns = fcode;
1377 + ps->ps_bpf.bf_insns = (struct bpf_insn *)fcode;
1377 1378 ps->ps_bpf.bf_len = size;
1378 1379 rw_exit(&ps->ps_bpflock);
1379 1380
1380 1381 return (0);
1381 1382 }
1382 1383 kmem_free(fcode, size);
1383 1384 error = EINVAL;
1384 1385 break;
1385 1386
1386 1387 case SO_DETACH_FILTER :
1387 1388 pfp_release_bpf(ps);
1388 1389 break;
1389 1390
1390 1391 case SO_RCVBUF :
1391 1392 size = *(int32_t *)optval;
1392 1393 if (size > sockmod_pfp_rcvbuf_max || size < 0)
1393 1394 return (ENOBUFS);
1394 1395 sopp.sopp_flags = SOCKOPT_RCVHIWAT;
1395 1396 sopp.sopp_rxhiwat = size;
1396 1397 ps->ps_upcalls->su_set_proto_props(ps->ps_upper, &sopp);
1397 1398 ps->ps_rcvbuf = size;
1398 1399 break;
1399 1400
1400 1401 default :
1401 1402 error = ENOPROTOOPT;
1402 1403 break;
1403 1404 }
1404 1405
1405 1406 return (error);
1406 1407 }
1407 1408
1408 1409 /*
1409 1410 * pfp_open_index is an internal function used to open a MAC device by
1410 1411 * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1411 1412 * because some of the interfaces provided by the mac layer require either
1412 1413 * only the mac_handle_t or both it and mac_handle_t.
1413 1414 *
1414 1415 * Whilst inside the kernel we can access data structures supporting any
1415 1416 * zone, access to interfaces from non-global zones is restricted to those
1416 1417 * interfaces (if any) that are exclusively assigned to a zone.
1417 1418 */
1418 1419 static int
1419 1420 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1420 1421 cred_t *cred)
1421 1422 {
1422 1423 mac_client_handle_t mch;
1423 1424 zoneid_t ifzoneid;
1424 1425 mac_handle_t mh;
1425 1426 zoneid_t zoneid;
1426 1427 int error;
1427 1428
1428 1429 mh = 0;
1429 1430 mch = 0;
1430 1431 error = mac_open_by_linkid(index, &mh);
1431 1432 if (error != 0)
1432 1433 goto bad_open;
1433 1434
1434 1435 error = mac_client_open(mh, &mch, NULL,
1435 1436 MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1436 1437 if (error != 0)
1437 1438 goto bad_open;
1438 1439
1439 1440 zoneid = crgetzoneid(cred);
1440 1441 if (zoneid != GLOBAL_ZONEID) {
1441 1442 mac_perim_handle_t perim;
1442 1443
1443 1444 mac_perim_enter_by_mh(mh, &perim);
1444 1445 error = dls_link_getzid(mac_name(mh), &ifzoneid);
1445 1446 mac_perim_exit(perim);
1446 1447 if (error != 0)
1447 1448 goto bad_open;
1448 1449 if (ifzoneid != zoneid) {
1449 1450 error = EACCES;
1450 1451 goto bad_open;
1451 1452 }
1452 1453 }
1453 1454
1454 1455 *mcip = mch;
1455 1456 *mhp = mh;
1456 1457
1457 1458 return (0);
1458 1459 bad_open:
1459 1460 if (mch != 0)
1460 1461 mac_client_close(mch, 0);
1461 1462 if (mh != 0)
1462 1463 mac_close(mh);
1463 1464 return (error);
1464 1465 }
1465 1466
1466 1467 static void
1467 1468 pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1468 1469 {
1469 1470 mac_client_close(mch, 0);
1470 1471 mac_close(mh);
1471 1472 }
1472 1473
1473 1474 /*
1474 1475 * The purpose of this function is to provide a single place where we free
1475 1476 * the loaded BPF program and reset all pointers/counters associated with
1476 1477 * it.
1477 1478 */
1478 1479 static void
1479 1480 pfp_release_bpf(struct pfpsock *ps)
1480 1481 {
1481 1482 if (ps->ps_bpf.bf_len != 0) {
1482 1483 kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1483 1484 ps->ps_bpf.bf_len = 0;
1484 1485 ps->ps_bpf.bf_insns = NULL;
1485 1486 }
1486 1487 }
1487 1488
1488 1489 /*
1489 1490 * Set the promiscuous mode of a network interface.
1490 1491 * This function only calls the mac layer when there is a change to the
1491 1492 * status of a network interface's promiscous mode. Tracking of how many
1492 1493 * sockets have the network interface in promiscuous mode, and thus the
1493 1494 * control over the physical device's status, is left to the mac layer.
1494 1495 */
1495 1496 static int
1496 1497 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1497 1498 {
1498 1499 int error = 0;
1499 1500 int flags;
1500 1501
1501 1502 /*
1502 1503 * There are 4 combinations of turnon/ps_promisc.
1503 1504 * This if handles 2 (both false, both true) and the if() below
1504 1505 * handles the remaining one - when change is required.
1505 1506 */
1506 1507 if (turnon == ps->ps_promisc)
1507 1508 return (error);
1508 1509
1509 1510 if (ps->ps_phd != 0) {
1510 1511 mac_promisc_remove(ps->ps_phd);
1511 1512 ps->ps_phd = 0;
1512 1513
1513 1514 /*
1514 1515 * ps_promisc is set here in case the call to mac_promisc_add
1515 1516 * fails: leaving it to indicate that the interface is still
1516 1517 * in some sort of promiscuous mode is false.
1517 1518 */
1518 1519 if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1519 1520 ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1520 1521 flags = MAC_PROMISC_FLAGS_NO_PHYS;
1521 1522 } else {
1522 1523 flags = 0;
1523 1524 }
1524 1525 flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1525 1526 }
1526 1527
1527 1528 error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1528 1529 &ps->ps_phd, flags);
1529 1530 if (error == 0)
1530 1531 ps->ps_promisc = turnon;
1531 1532
1532 1533 return (error);
1533 1534 }
1534 1535
1535 1536 /*
1536 1537 * This table maps the MAC types in Solaris to the ARPHRD_* values used
1537 1538 * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl.
1538 1539 *
1539 1540 * The symbols in this table are *not* pulled in from <net/if_arp.h>,
1540 1541 * they are pulled from <netpacket/packet.h>, thus it acts as a source
1541 1542 * of supplementary information to the ARP table.
1542 1543 */
1543 1544 static uint_t arphrd_to_dl[][2] = {
1544 1545 { ARPHRD_IEEE80211, DL_WIFI },
1545 1546 { ARPHRD_TUNNEL, DL_IPV4 },
1546 1547 { ARPHRD_TUNNEL, DL_IPV6 },
1547 1548 { ARPHRD_TUNNEL, DL_6TO4 },
1548 1549 { ARPHRD_AX25, DL_X25 },
1549 1550 { ARPHRD_ATM, DL_ATM },
1550 1551 { 0, 0 }
1551 1552 };
1552 1553
1553 1554 static int
1554 1555 pfp_dl_to_arphrd(int dltype)
1555 1556 {
1556 1557 int i;
1557 1558
1558 1559 for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1559 1560 if (arphrd_to_dl[i][1] == dltype)
1560 1561 return (arphrd_to_dl[i][0]);
1561 1562 return (arp_hw_type(dltype));
1562 1563 }
|
↓ open down ↓ |
176 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX