1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016 Joyent, Inc.
25 */
26
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/stropts.h>
31 #include <sys/socket.h>
32 #include <sys/socketvar.h>
33 #include <sys/socket_proto.h>
34 #include <sys/sockio.h>
35 #include <sys/strsun.h>
36 #include <sys/kstat.h>
37 #include <sys/modctl.h>
38 #include <sys/policy.h>
39 #include <sys/priv_const.h>
40 #include <sys/tihdr.h>
41 #include <sys/zone.h>
42 #include <sys/time.h>
43 #include <sys/ethernet.h>
44 #include <sys/llc1.h>
45 #include <fs/sockfs/sockcommon.h>
46 #include <net/if.h>
47 #include <inet/ip_arp.h>
48
49 #include <sys/dls.h>
50 #include <sys/mac.h>
51 #include <sys/mac_client.h>
52 #include <sys/mac_provider.h>
53 #include <sys/mac_client_priv.h>
54 #include <inet/bpf.h>
55
56 #include <netpacket/packet.h>
57
58 static void pfp_close(mac_handle_t, mac_client_handle_t);
59 static int pfp_dl_to_arphrd(int);
60 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
61 socklen_t *);
62 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *, int);
63 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *,
64 int);
65 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
66 cred_t *);
67 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
68 static void pfp_release_bpf(struct pfpsock *);
69 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
70 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
71 socklen_t);
72 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
73 socklen_t);
74
75 /*
76 * PFP sockfs operations
77 * Most are currently no-ops because they have no meaning for a connectionless
78 * socket.
79 */
80 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
81 sock_upcalls_t *, int, struct cred *);
82 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
83 struct cred *);
84 static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
85 static void sdpfp_clr_flowctrl(sock_lower_handle_t);
86 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
87 socklen_t *, struct cred *);
88 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
89 struct cred *);
90 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
91 struct cred *);
92 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
93 socklen_t, struct cred *);
94
95 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
96 uint_t *, int *, int, cred_t *);
97
98 static int sockpfp_init(void);
99 static void sockpfp_fini(void);
100
101 static kstat_t *pfp_ksp;
102 static pfp_kstats_t ks_stats;
103 static pfp_kstats_t pfp_kstats = {
104 /*
105 * Each one of these kstats is a different return path in handling
106 * a packet received from the mac layer.
107 */
108 { "recvMacHeaderFail", KSTAT_DATA_UINT64 },
109 { "recvBadProtocol", KSTAT_DATA_UINT64 },
110 { "recvAllocbFail", KSTAT_DATA_UINT64 },
111 { "recvOk", KSTAT_DATA_UINT64 },
112 { "recvFail", KSTAT_DATA_UINT64 },
113 { "recvFiltered", KSTAT_DATA_UINT64 },
114 { "recvFlowControl", KSTAT_DATA_UINT64 },
115 /*
116 * A global set of counters is maintained to track the behaviour
117 * of the system (kernel & applications) in sending packets.
118 */
119 { "sendUnbound", KSTAT_DATA_UINT64 },
120 { "sendFailed", KSTAT_DATA_UINT64 },
121 { "sendTooBig", KSTAT_DATA_UINT64 },
122 { "sendAllocFail", KSTAT_DATA_UINT64 },
123 { "sendUiomoveFail", KSTAT_DATA_UINT64 },
124 { "sendNoMemory", KSTAT_DATA_UINT64 },
125 { "sendOpenFail", KSTAT_DATA_UINT64 },
126 { "sendWrongFamily", KSTAT_DATA_UINT64 },
127 { "sendShortMsg", KSTAT_DATA_UINT64 },
128 { "sendOk", KSTAT_DATA_UINT64 }
129 };
130
131 sock_downcalls_t pfp_downcalls = {
132 sdpfp_activate,
133 sock_accept_notsupp,
134 sdpfp_bind,
135 sock_listen_notsupp,
136 sock_connect_notsupp,
137 sock_getpeername_notsupp,
138 sock_getsockname_notsupp,
139 sdpfp_getsockopt,
140 sdpfp_setsockopt,
141 sock_send_notsupp,
142 sdpfp_senduio,
143 NULL,
144 sock_poll_notsupp,
145 sock_shutdown_notsupp,
146 sdpfp_clr_flowctrl,
147 sdpfp_ioctl,
148 sdpfp_close,
149 };
150
151 static smod_reg_t sinfo = {
152 SOCKMOD_VERSION,
153 "sockpfp",
154 SOCK_UC_VERSION,
155 SOCK_DC_VERSION,
156 sockpfp_create,
157 NULL
158 };
159
160 static int accepted_protos[3][2] = {
161 { ETH_P_ALL, 0 },
162 { ETH_P_802_2, LLC_SNAP_SAP },
163 { ETH_P_803_3, 0 },
164 };
165
166 /*
167 * This sets an upper bound on the size of the receive buffer for a PF_PACKET
168 * socket. More properly, this should be controlled through ipadm, ala TCP, UDP,
169 * SCTP, etc. Until that's done, this provides a hard cap of 4 MB and allows an
170 * opportunity for it to be changed, should it be needed.
171 */
172 int sockmod_pfp_rcvbuf_max = 1024 * 1024 * 4;
173
174 /*
175 * Module linkage information for the kernel.
176 */
177 static struct modlsockmod modlsockmod = {
178 &mod_sockmodops, "PF Packet socket module", &sinfo
179 };
180
181 static struct modlinkage modlinkage = {
182 MODREV_1,
183 &modlsockmod,
184 NULL
185 };
186
187 int
188 _init(void)
189 {
190 int error;
191
192 error = sockpfp_init();
193 if (error != 0)
194 return (error);
195
196 error = mod_install(&modlinkage);
197 if (error != 0)
198 sockpfp_fini();
199
200 return (error);
201 }
202
203 int
204 _fini(void)
205 {
206 int error;
207
208 error = mod_remove(&modlinkage);
209 if (error == 0)
210 sockpfp_fini();
211
212 return (error);
213 }
214
215 int
216 _info(struct modinfo *modinfop)
217 {
218 return (mod_info(&modlinkage, modinfop));
219 }
220
221 /*
222 * sockpfp_init: called as part of the initialisation of the module when
223 * loaded into the kernel.
224 *
225 * Being able to create and record the kstats data in the kernel is not
226 * considered to be vital to the operation of this kernel module, thus
227 * its failure is tolerated.
228 */
229 static int
230 sockpfp_init(void)
231 {
232 (void) memset(&ks_stats, 0, sizeof (ks_stats));
233
234 (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
235
236 pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
237 KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
238 KSTAT_FLAG_VIRTUAL);
239 if (pfp_ksp != NULL) {
240 pfp_ksp->ks_data = &ks_stats;
241 kstat_install(pfp_ksp);
242 }
243
244 return (0);
245 }
246
247 /*
248 * sockpfp_fini: called when the operating system wants to unload the
249 * socket module from the kernel.
250 */
251 static void
252 sockpfp_fini(void)
253 {
254 if (pfp_ksp != NULL)
255 kstat_delete(pfp_ksp);
256 }
257
258 /*
259 * Due to sockets being created read-write by default, all PF_PACKET sockets
260 * therefore require the NET_RAWACCESS priviliege, even if the socket is only
261 * being used for reading packets from.
262 *
263 * This create function enforces this module only being used with PF_PACKET
264 * sockets and the policy that we support via the config file in sock2path.d:
265 * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
266 */
267 /* ARGSUSED */
268 static sock_lower_handle_t
269 sockpfp_create(int family, int type, int proto,
270 sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
271 int sflags, cred_t *cred)
272 {
273 struct pfpsock *ps;
274 int kmflags;
275 int newproto;
276 int i;
277
278 if (secpolicy_net_rawaccess(cred) != 0) {
279 *errorp = EACCES;
280 return (NULL);
281 }
282
283 if (family != AF_PACKET) {
284 *errorp = EAFNOSUPPORT;
285 return (NULL);
286 }
287
288 if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
289 *errorp = ESOCKTNOSUPPORT;
290 return (NULL);
291 }
292
293 /*
294 * First check to see if the protocol number passed in via the socket
295 * creation should be mapped to a different number for internal use.
296 */
297 for (i = 0, newproto = -1;
298 i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) {
299 if (accepted_protos[i][0] == proto) {
300 newproto = accepted_protos[i][1];
301 break;
302 }
303 }
304
305 /*
306 * If the mapping of the protocol that was under 0x800 failed to find
307 * a local equivalent then fail the socket creation. If the protocol
308 * for the socket is over 0x800 and it was not found in the mapping
309 * table above, then use the value as is.
310 */
311 if (newproto == -1) {
312 if (proto < 0x800) {
313 *errorp = ENOPROTOOPT;
314 return (NULL);
315 }
316 newproto = proto;
317 }
318 proto = newproto;
319
320 kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
321 ps = kmem_zalloc(sizeof (*ps), kmflags);
322 if (ps == NULL) {
323 *errorp = ENOMEM;
324 return (NULL);
325 }
326
327 ps->ps_type = type;
328 ps->ps_proto = proto;
329 rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
330 mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
331
332 *sock_downcalls = &pfp_downcalls;
333 /*
334 * Setting this causes bytes from a packet that do not fit into the
335 * destination user buffer to be discarded. Thus the API is one
336 * packet per receive and callers are required to use a buffer large
337 * enough for the biggest packet that the interface can provide.
338 */
339 *smodep = SM_ATOMIC;
340
341 return ((sock_lower_handle_t)ps);
342 }
343
344 /* ************************************************************************* */
345
346 /*
347 * pfp_packet is the callback function that is given to the mac layer for
348 * PF_PACKET to receive packets with. One packet at a time is passed into
349 * this function from the mac layer. Each packet is a private copy given
350 * to PF_PACKET to modify or free as it wishes and does not harm the original
351 * packet from which it was cloned.
352 */
353 /* ARGSUSED */
354 static void
355 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
356 {
357 struct T_unitdata_ind *tunit;
358 struct sockaddr_ll *sll;
359 struct sockaddr_ll *sol;
360 mac_header_info_t hdr;
361 struct pfpsock *ps;
362 size_t tusz;
363 mblk_t *mp0;
364 int error;
365
366 if (mp == NULL)
367 return;
368
369 ps = arg;
370 if (ps->ps_flow_ctrld) {
371 ps->ps_flow_ctrl_drops++;
372 ps->ps_stats.tp_drops++;
373 ks_stats.kp_recv_flow_cntrld.value.ui64++;
374 freemsg(mp);
375 return;
376 }
377
378 if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
379 /*
380 * Can't decode the packet header information so drop it.
381 */
382 ps->ps_stats.tp_drops++;
383 ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
384 freemsg(mp);
385 return;
386 }
387
388 if (mac_type(ps->ps_mh) == DL_ETHER &&
389 hdr.mhi_bindsap == ETHERTYPE_VLAN) {
390 struct ether_vlan_header *evhp;
391 struct ether_vlan_header evh;
392
393 hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
394 hdr.mhi_istagged = B_TRUE;
395
396 if (MBLKL(mp) >= sizeof (*evhp)) {
397 evhp = (struct ether_vlan_header *)mp->b_rptr;
398 } else {
399 int sz = sizeof (*evhp);
400 char *s = (char *)&evh;
401 mblk_t *tmp;
402 int len;
403
404 for (tmp = mp; sz > 0 && tmp != NULL;
405 tmp = tmp->b_cont) {
406 len = min(sz, MBLKL(tmp));
407 bcopy(tmp->b_rptr, s, len);
408 sz -= len;
409 }
410 evhp = &evh;
411 }
412 hdr.mhi_tci = ntohs(evhp->ether_tci);
413 hdr.mhi_bindsap = ntohs(evhp->ether_type);
414 }
415
416 if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
417 /*
418 * The packet is not of interest to this socket so
419 * drop it on the floor. Here the SAP is being used
420 * as a very course filter.
421 */
422 ps->ps_stats.tp_drops++;
423 ks_stats.kp_recv_bad_proto.value.ui64++;
424 freemsg(mp);
425 return;
426 }
427
428 /*
429 * This field is not often set, even for ethernet,
430 * by mac_header_info, so compute it if it is 0.
431 */
432 if (hdr.mhi_pktsize == 0)
433 hdr.mhi_pktsize = msgdsize(mp);
434
435 /*
436 * If a BPF filter is present, pass the raw packet into that.
437 * A failed match will result in zero being returned, indicating
438 * that this socket is not interested in the packet.
439 */
440 if (ps->ps_bpf.bf_len != 0) {
441 uchar_t *buffer;
442 int buflen;
443
444 buflen = MBLKL(mp);
445 if (hdr.mhi_pktsize == buflen) {
446 buffer = mp->b_rptr;
447 } else {
448 buflen = 0;
449 buffer = (uchar_t *)mp;
450 }
451 rw_enter(&ps->ps_bpflock, RW_READER);
452 if (ip_bpf_filter((ip_bpf_insn_t *)ps->ps_bpf.bf_insns, buffer,
453 hdr.mhi_pktsize, buflen) == 0) {
454 rw_exit(&ps->ps_bpflock);
455 ps->ps_stats.tp_drops++;
456 ks_stats.kp_recv_filtered.value.ui64++;
457 freemsg(mp);
458 return;
459 }
460 rw_exit(&ps->ps_bpflock);
461 }
462
463 if (ps->ps_type == SOCK_DGRAM) {
464 /*
465 * SOCK_DGRAM socket expect a "layer 3" packet, so advance
466 * past the link layer header.
467 */
468 mp->b_rptr += hdr.mhi_hdrsize;
469 hdr.mhi_pktsize -= hdr.mhi_hdrsize;
470 }
471
472 tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
473 if (ps->ps_auxdata) {
474 tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
475 tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
476 }
477
478 /*
479 * It is tempting to think that this could be optimised by having
480 * the base mblk_t allocated and hung off the pfpsock structure,
481 * except that then another one would need to be allocated for the
482 * sockaddr_ll that is included. Even creating a template to copy
483 * from is of questionable value, as read-write from one structure
484 * to the other is going to be slower than all of the initialisation.
485 */
486 mp0 = allocb(tusz, BPRI_HI);
487 if (mp0 == NULL) {
488 ps->ps_stats.tp_drops++;
489 ks_stats.kp_recv_alloc_fail.value.ui64++;
490 freemsg(mp);
491 return;
492 }
493
494 (void) memset(mp0->b_rptr, 0, tusz);
495
496 mp0->b_datap->db_type = M_PROTO;
497 mp0->b_wptr = mp0->b_rptr + tusz;
498
499 tunit = (struct T_unitdata_ind *)mp0->b_rptr;
500 tunit->PRIM_type = T_UNITDATA_IND;
501 tunit->SRC_length = sizeof (struct sockaddr);
502 tunit->SRC_offset = sizeof (*tunit);
503
504 sol = &ps->ps_sock;
505 sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
506 sll->sll_ifindex = sol->sll_ifindex;
507 sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
508 sll->sll_halen = sol->sll_halen;
509 if (hdr.mhi_saddr != NULL)
510 (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
511
512 switch (hdr.mhi_dsttype) {
513 case MAC_ADDRTYPE_MULTICAST :
514 sll->sll_pkttype = PACKET_MULTICAST;
515 break;
516 case MAC_ADDRTYPE_BROADCAST :
517 sll->sll_pkttype = PACKET_BROADCAST;
518 break;
519 case MAC_ADDRTYPE_UNICAST :
520 if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
521 sll->sll_pkttype = PACKET_HOST;
522 else
523 sll->sll_pkttype = PACKET_OTHERHOST;
524 break;
525 }
526
527 if (ps->ps_auxdata) {
528 struct tpacket_auxdata *aux;
529 struct T_opthdr *topt;
530
531 tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
532 sizeof (struct sockaddr_ll));
533 tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
534 _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
535
536 topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
537 aux = (struct tpacket_auxdata *)
538 ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
539
540 topt->len = tunit->OPT_length;
541 topt->level = SOL_PACKET;
542 topt->name = PACKET_AUXDATA;
543 topt->status = 0;
544 /*
545 * libpcap doesn't seem to use any other field,
546 * so it isn't clear how they should be filled in.
547 */
548 aux->tp_vlan_vci = hdr.mhi_tci;
549 }
550
551 linkb(mp0, mp);
552
553 (void) gethrestime(&ps->ps_timestamp);
554
555 ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
556 &error, NULL);
557
558 if (error == 0) {
559 ps->ps_stats.tp_packets++;
560 ks_stats.kp_recv_ok.value.ui64++;
561 } else {
562 mutex_enter(&ps->ps_lock);
563 if (error == ENOSPC) {
564 ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
565 &error, NULL);
566 if (error == ENOSPC)
567 ps->ps_flow_ctrld = B_TRUE;
568 }
569 mutex_exit(&ps->ps_lock);
570 ps->ps_stats.tp_drops++;
571 ks_stats.kp_recv_fail.value.ui64++;
572 }
573 }
574
575 /*
576 * Bind a PF_PACKET socket to a network interface.
577 *
578 * The default operation of this bind() is to place the socket (and thus the
579 * network interface) into promiscuous mode. It is then up to the application
580 * to turn that down by issuing the relevant ioctls, if desired.
581 */
582 static int
583 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
584 socklen_t addrlen, struct cred *cred)
585 {
586 struct sockaddr_ll *addr_ll, *sol;
587 mac_client_handle_t mch;
588 struct pfpsock *ps;
589 mac_handle_t mh;
590 int error;
591
592 ps = (struct pfpsock *)handle;
593 if (ps->ps_bound)
594 return (EINVAL);
595
596 if (addrlen < sizeof (struct sockaddr_ll) || addr == NULL)
597 return (EINVAL);
598
599 addr_ll = (struct sockaddr_ll *)addr;
600
601 error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
602 if (error != 0)
603 return (error);
604 /*
605 * Ensure that each socket is only bound once.
606 */
607 mutex_enter(&ps->ps_lock);
608 if (ps->ps_mh != 0) {
609 mutex_exit(&ps->ps_lock);
610 pfp_close(mh, mch);
611 return (EADDRINUSE);
612 }
613 ps->ps_mh = mh;
614 ps->ps_mch = mch;
615 mutex_exit(&ps->ps_lock);
616
617 /*
618 * Cache all of the information from bind so that it's in an easy
619 * place to get at when packets are received.
620 */
621 sol = &ps->ps_sock;
622 sol->sll_family = AF_PACKET;
623 sol->sll_ifindex = addr_ll->sll_ifindex;
624 sol->sll_protocol = addr_ll->sll_protocol;
625 sol->sll_halen = mac_addr_len(ps->ps_mh);
626 mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
627 mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
628 ps->ps_linkid = addr_ll->sll_ifindex;
629
630 error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
631 pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
632 if (error == 0) {
633 ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
634 ps->ps_bound = B_TRUE;
635 }
636
637 return (error);
638 }
639
640 /* ARGSUSED */
641 static void
642 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
643 sock_upcalls_t *upcalls, int flags, cred_t *cred)
644 {
645 struct pfpsock *ps;
646
647 ps = (struct pfpsock *)lower;
648 ps->ps_upper = upper;
649 ps->ps_upcalls = upcalls;
650 }
651
652 /*
653 * This module only implements getting socket options for the new socket
654 * option level (SOL_PACKET) that it introduces. All other requests are
655 * passed back to the sockfs layer.
656 */
657 /* ARGSUSED */
658 static int
659 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
660 void *optval, socklen_t *optlenp, struct cred *cred)
661 {
662 struct pfpsock *ps;
663 int error = 0;
664
665 ps = (struct pfpsock *)handle;
666
667 switch (level) {
668 case SOL_PACKET :
669 error = pfp_getpacket_sockopt(handle, option_name, optval,
670 optlenp);
671 break;
672
673 case SOL_SOCKET :
674 if (option_name == SO_RCVBUF) {
675 if (*optlenp < sizeof (int32_t))
676 return (EINVAL);
677 *((int32_t *)optval) = ps->ps_rcvbuf;
678 *optlenp = sizeof (int32_t);
679 } else {
680 error = ENOPROTOOPT;
681 }
682 break;
683
684 default :
685 /*
686 * If sockfs code receives this error in return from the
687 * getsockopt downcall it handles the option locally, if
688 * it can.
689 */
690 error = ENOPROTOOPT;
691 break;
692 }
693
694 return (error);
695 }
696
697 /*
698 * PF_PACKET supports setting socket options at only two levels:
699 * SOL_SOCKET and SOL_PACKET.
700 */
701 /* ARGSUSED */
702 static int
703 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
704 const void *optval, socklen_t optlen, struct cred *cred)
705 {
706 int error = 0;
707
708 switch (level) {
709 case SOL_SOCKET :
710 error = pfp_setsocket_sockopt(handle, option_name, optval,
711 optlen);
712 break;
713 case SOL_PACKET :
714 error = pfp_setpacket_sockopt(handle, option_name, optval,
715 optlen);
716 break;
717 default :
718 error = EINVAL;
719 break;
720 }
721
722 return (error);
723 }
724
725 /*
726 * This function is incredibly inefficient for sending any packet that
727 * comes with a msghdr asking to be sent to an interface to which the
728 * socket has not been bound. Some possibilities here are keeping a
729 * cache of all open mac's and mac_client's, for the purpose of sending,
730 * and closing them after some amount of inactivity. Clearly, applications
731 * should not be written to use one socket for multiple interfaces if
732 * performance is desired with the code as is.
733 */
734 /* ARGSUSED */
735 static int
736 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
737 struct nmsghdr *msg, struct cred *cred)
738 {
739 struct sockaddr_ll *sol;
740 mac_client_handle_t mch;
741 struct pfpsock *ps;
742 boolean_t new_open;
743 mac_handle_t mh;
744 size_t mpsize;
745 uint_t maxsdu;
746 mblk_t *mp0;
747 mblk_t *mp;
748 int error;
749
750 mp = NULL;
751 mp0 = NULL;
752 new_open = B_FALSE;
753 ps = (struct pfpsock *)handle;
754 mh = ps->ps_mh;
755 mch = ps->ps_mch;
756 maxsdu = ps->ps_max_sdu;
757
758 sol = (struct sockaddr_ll *)msg->msg_name;
759 if (sol == NULL) {
760 /*
761 * If no sockaddr_ll has been provided with the send call,
762 * use the one constructed when the socket was bound to an
763 * interface and fail if it hasn't been bound.
764 */
765 if (!ps->ps_bound) {
766 ks_stats.kp_send_unbound.value.ui64++;
767 return (EPROTO);
768 }
769 sol = &ps->ps_sock;
770 } else {
771 /*
772 * Verify the sockaddr_ll message passed down before using
773 * it to send a packet out with. If it refers to an interface
774 * that has not been bound, it is necessary to open it.
775 */
776 struct sockaddr_ll *sll;
777
778 if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
779 ks_stats.kp_send_short_msg.value.ui64++;
780 return (EINVAL);
781 }
782
783 if (sol->sll_family != AF_PACKET) {
784 ks_stats.kp_send_wrong_family.value.ui64++;
785 return (EAFNOSUPPORT);
786 }
787
788 sll = &ps->ps_sock;
789 if (sol->sll_ifindex != sll->sll_ifindex) {
790 error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
791 cred);
792 if (error != 0) {
793 ks_stats.kp_send_open_fail.value.ui64++;
794 return (error);
795 }
796 mac_sdu_get(mh, NULL, &maxsdu);
797 new_open = B_TRUE;
798 }
799 }
800
801 mpsize = uiop->uio_resid;
802 if (mpsize > maxsdu) {
803 ks_stats.kp_send_too_big.value.ui64++;
804 error = EMSGSIZE;
805 goto done;
806 }
807
808 if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
809 ks_stats.kp_send_alloc_fail.value.ui64++;
810 error = ENOBUFS;
811 goto done;
812 }
813
814 mp->b_wptr = mp->b_rptr + mpsize;
815 error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
816 if (error != 0) {
817 ks_stats.kp_send_uiomove_fail.value.ui64++;
818 goto done;
819 }
820
821 if (ps->ps_type == SOCK_DGRAM) {
822 mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
823 if (mp0 == NULL) {
824 ks_stats.kp_send_no_memory.value.ui64++;
825 error = ENOBUFS;
826 goto done;
827 }
828 linkb(mp0, mp);
829 mp = mp0;
830 }
831
832 /*
833 * As this is sending datagrams and no promise is made about
834 * how or if a packet will be sent/delivered, no effort is to
835 * be expended in recovering from a situation where the packet
836 * cannot be sent - it is just dropped.
837 */
838 error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
839 if (error == 0) {
840 mp = NULL;
841 ks_stats.kp_send_ok.value.ui64++;
842 } else {
843 ks_stats.kp_send_failed.value.ui64++;
844 }
845
846 done:
847
848 if (new_open) {
849 ASSERT(mch != ps->ps_mch);
850 ASSERT(mh != ps->ps_mh);
851 pfp_close(mh, mch);
852 }
853 if (mp != NULL)
854 freemsg(mp);
855
856 return (error);
857
858 }
859
860 /*
861 * There's no use of a lock here, or at the bottom of pfp_packet() where
862 * ps_flow_ctrld is set to true, because in a situation where these two
863 * are racing to set the flag one way or the other, the end result is
864 * going to be ultimately determined by the scheduler anyway - which of
865 * the two threads gets the lock first? In such an operational environment,
866 * we've got packets arriving too fast to be delt with so packets are going
867 * to be dropped. Grabbing a lock just makes the drop more expensive.
868 */
869 static void
870 sdpfp_clr_flowctrl(sock_lower_handle_t handle)
871 {
872 struct pfpsock *ps;
873
874 ps = (struct pfpsock *)handle;
875
876 mutex_enter(&ps->ps_lock);
877 ps->ps_flow_ctrld = B_FALSE;
878 mutex_exit(&ps->ps_lock);
879 }
880
881 /*
882 * The implementation of this ioctl() handler is intended to function
883 * in the absence of a bind() being made before it is called. Thus the
884 * function calls mac_open() itself to provide a handle
885 * This function is structured like this:
886 * - determine the linkid for the interface being targetted
887 * - open the interface with said linkid
888 * - perform ioctl
889 * - copy results back to caller
890 *
891 * The ioctls that interact with interface flags have been implented below
892 * to assume that the interface is always up and running (IFF_RUNNING) and
893 * to use the state of this socket to determine whether or not the network
894 * interface is in promiscuous mode. Thus an ioctl to get the interface flags
895 * of an interface that has been put in promiscuous mode by another socket
896 * (in the same program or different), will not report that status.
897 */
898 /* ARGSUSED */
899 static int
900 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
901 int32_t *rval, struct cred *cr)
902 {
903 struct timeval tival;
904 mac_client_promisc_type_t mtype;
905 struct sockaddr_dl *sock;
906 datalink_id_t linkid;
907 struct lifreq lifreq;
908 struct ifreq ifreq;
909 struct pfpsock *ps;
910 mac_handle_t mh;
911 int error;
912
913 ps = (struct pfpsock *)handle;
914
915 switch (cmd) {
916 /*
917 * ioctls that work on "struct lifreq"
918 */
919 case SIOCSLIFFLAGS :
920 case SIOCGLIFINDEX :
921 case SIOCGLIFFLAGS :
922 case SIOCGLIFMTU :
923 case SIOCGLIFHWADDR :
924 error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid, mod);
925 if (error != 0)
926 return (error);
927 break;
928
929 /*
930 * ioctls that work on "struct ifreq".
931 * Not all of these have a "struct lifreq" partner, for example
932 * SIOCGIFHWADDR, for the simple reason that the logical interface
933 * does not have a hardware address.
934 */
935 case SIOCSIFFLAGS :
936 case SIOCGIFINDEX :
937 case SIOCGIFFLAGS :
938 case SIOCGIFMTU :
939 case SIOCGIFHWADDR :
940 error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid, mod);
941 if (error != 0)
942 return (error);
943 break;
944
945 case SIOCGSTAMP :
946 tival.tv_sec = (time_t)ps->ps_timestamp.tv_sec;
947 tival.tv_usec = ps->ps_timestamp.tv_nsec / 1000;
948 if (get_udatamodel() == DATAMODEL_NATIVE) {
949 error = ddi_copyout(&tival, (void *)arg,
950 sizeof (tival), mod);
951 }
952 #ifdef _SYSCALL32_IMPL
953 else {
954 struct timeval32 tv32;
955 TIMEVAL_TO_TIMEVAL32(&tv32, &tival);
956 error = ddi_copyout(&tv32, (void *)arg,
957 sizeof (tv32), mod);
958 }
959 #endif
960 return (error);
961 }
962
963 error = mac_open_by_linkid(linkid, &mh);
964 if (error != 0)
965 return (error);
966
967 switch (cmd) {
968 case SIOCGLIFINDEX :
969 lifreq.lifr_index = linkid;
970 break;
971
972 case SIOCGIFINDEX :
973 ifreq.ifr_index = linkid;
974 break;
975
976 case SIOCGIFFLAGS :
977 ifreq.ifr_flags = IFF_RUNNING;
978 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
979 ifreq.ifr_flags |= IFF_PROMISC;
980 break;
981
982 case SIOCGLIFFLAGS :
983 lifreq.lifr_flags = IFF_RUNNING;
984 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
985 lifreq.lifr_flags |= IFF_PROMISC;
986 break;
987
988 case SIOCSIFFLAGS :
989 if (linkid != ps->ps_linkid) {
990 error = EINVAL;
991 } else {
992 if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
993 mtype = MAC_CLIENT_PROMISC_ALL;
994 else
995 mtype = MAC_CLIENT_PROMISC_FILTERED;
996 error = pfp_set_promisc(ps, mtype);
997 }
998 break;
999
1000 case SIOCSLIFFLAGS :
1001 if (linkid != ps->ps_linkid) {
1002 error = EINVAL;
1003 } else {
1004 if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
1005 mtype = MAC_CLIENT_PROMISC_ALL;
1006 else
1007 mtype = MAC_CLIENT_PROMISC_FILTERED;
1008 error = pfp_set_promisc(ps, mtype);
1009 }
1010 break;
1011
1012 case SIOCGIFMTU :
1013 mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
1014 break;
1015
1016 case SIOCGLIFMTU :
1017 mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
1018 break;
1019
1020 case SIOCGIFHWADDR :
1021 if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) {
1022 error = EPFNOSUPPORT;
1023 break;
1024 }
1025
1026 if (mac_addr_len(mh) == 0) {
1027 (void) memset(ifreq.ifr_addr.sa_data, 0,
1028 sizeof (ifreq.ifr_addr.sa_data));
1029 } else {
1030 mac_unicast_primary_get(mh,
1031 (uint8_t *)ifreq.ifr_addr.sa_data);
1032 }
1033
1034 /*
1035 * The behaviour here in setting sa_family is consistent
1036 * with what applications such as tcpdump would expect
1037 * for a Linux PF_PACKET socket.
1038 */
1039 ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
1040 break;
1041
1042 case SIOCGLIFHWADDR :
1043 lifreq.lifr_type = 0;
1044 sock = (struct sockaddr_dl *)&lifreq.lifr_addr;
1045
1046 if (mac_addr_len(mh) > sizeof (sock->sdl_data)) {
1047 error = EPFNOSUPPORT;
1048 break;
1049 }
1050
1051 /*
1052 * Fill in the sockaddr_dl with link layer details. Of note,
1053 * the index is returned as 0 for a couple of reasons:
1054 * (1) there is no public API that uses or requires it
1055 * (2) the MAC index is currently 32bits and sdl_index is 16.
1056 */
1057 sock->sdl_family = AF_LINK;
1058 sock->sdl_index = 0;
1059 sock->sdl_type = mac_type(mh);
1060 sock->sdl_nlen = 0;
1061 sock->sdl_alen = mac_addr_len(mh);
1062 sock->sdl_slen = 0;
1063 if (mac_addr_len(mh) == 0) {
1064 (void) memset(sock->sdl_data, 0,
1065 sizeof (sock->sdl_data));
1066 } else {
1067 mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data);
1068 }
1069 break;
1070
1071 default :
1072 break;
1073 }
1074
1075 mac_close(mh);
1076
1077 if (error == 0) {
1078 /*
1079 * Only the "GET" ioctls need to copy data back to userace.
1080 */
1081 switch (cmd) {
1082 case SIOCGLIFINDEX :
1083 case SIOCGLIFFLAGS :
1084 case SIOCGLIFMTU :
1085 case SIOCGLIFHWADDR :
1086 error = ddi_copyout(&lifreq, (void *)arg,
1087 sizeof (lifreq), mod);
1088 break;
1089
1090 case SIOCGIFINDEX :
1091 case SIOCGIFFLAGS :
1092 case SIOCGIFMTU :
1093 case SIOCGIFHWADDR :
1094 error = ddi_copyout(&ifreq, (void *)arg,
1095 sizeof (ifreq), mod);
1096 break;
1097 default :
1098 break;
1099 }
1100 }
1101
1102 return (error);
1103 }
1104
1105 /*
1106 * Closing the socket requires that all open references to network
1107 * interfaces be closed.
1108 */
1109 /* ARGSUSED */
1110 static int
1111 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
1112 {
1113 struct pfpsock *ps = (struct pfpsock *)handle;
1114
1115 if (ps->ps_phd != 0) {
1116 mac_promisc_remove(ps->ps_phd);
1117 ps->ps_phd = 0;
1118 }
1119
1120 if (ps->ps_mch != 0) {
1121 mac_client_close(ps->ps_mch, 0);
1122 ps->ps_mch = 0;
1123 }
1124
1125 if (ps->ps_mh != 0) {
1126 mac_close(ps->ps_mh);
1127 ps->ps_mh = 0;
1128 }
1129
1130 kmem_free(ps, sizeof (*ps));
1131
1132 return (0);
1133 }
1134
1135 /* ************************************************************************* */
1136
1137 /*
1138 * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1139 * determine the linkid for the interface name stored in that structure.
1140 * name is used as a buffer so that we can ensure a trailing \0 is appended
1141 * to the name safely.
1142 */
1143 static int
1144 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1145 datalink_id_t *linkidp, int mode)
1146 {
1147 char name[IFNAMSIZ + 1];
1148 int error;
1149
1150 if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), mode) != 0)
1151 return (EFAULT);
1152
1153 (void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1154
1155 error = dls_mgmt_get_linkid(name, linkidp);
1156 if (error != 0)
1157 error = dls_devnet_macname2linkid(name, linkidp);
1158
1159 return (error);
1160 }
1161
1162 /*
1163 * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1164 * determine the linkid for the interface name stored in that structure.
1165 * name is used as a buffer so that we can ensure a trailing \0 is appended
1166 * to the name safely.
1167 */
1168 static int
1169 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1170 datalink_id_t *linkidp, int mode)
1171 {
1172 char name[LIFNAMSIZ + 1];
1173 int error;
1174
1175 if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), mode) != 0)
1176 return (EFAULT);
1177
1178 (void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1179
1180 error = dls_mgmt_get_linkid(name, linkidp);
1181 if (error != 0)
1182 error = dls_devnet_macname2linkid(name, linkidp);
1183
1184 return (error);
1185 }
1186
1187 /*
1188 * Although there are several new SOL_PACKET options that can be set and
1189 * are specific to this implementation of PF_PACKET, the current API does
1190 * not support doing a get on them to retrieve accompanying status. Thus
1191 * it is only currently possible to use SOL_PACKET with getsockopt to
1192 * retrieve statistical information. This remains consistant with the
1193 * Linux API at the time of writing.
1194 */
1195 static int
1196 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1197 void *optval, socklen_t *optlenp)
1198 {
1199 struct pfpsock *ps;
1200 struct tpacket_stats_short tpss;
1201 int error = 0;
1202
1203 ps = (struct pfpsock *)handle;
1204
1205 switch (option_name) {
1206 case PACKET_STATISTICS :
1207 if (*optlenp < sizeof (ps->ps_stats)) {
1208 error = EINVAL;
1209 break;
1210 }
1211 *optlenp = sizeof (ps->ps_stats);
1212 bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1213 break;
1214 case PACKET_STATISTICS_SHORT :
1215 if (*optlenp < sizeof (tpss)) {
1216 error = EINVAL;
1217 break;
1218 }
1219 *optlenp = sizeof (tpss);
1220 tpss.tp_packets = ps->ps_stats.tp_packets;
1221 tpss.tp_drops = ps->ps_stats.tp_drops;
1222 bcopy(&tpss, optval, sizeof (tpss));
1223 break;
1224 default :
1225 error = EINVAL;
1226 break;
1227 }
1228
1229 return (error);
1230 }
1231
1232 /*
1233 * The SOL_PACKET level for socket options supports three options,
1234 * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1235 * This function is responsible for mapping the two socket options
1236 * that manage multicast membership into the appropriate internal
1237 * function calls to bring the option into effect. Whilst direct
1238 * changes to the multicast membership (ADD/DROP) groups is handled
1239 * by calls directly into the mac module, changes to the promiscuos
1240 * mode are vectored through pfp_set_promisc() so that the logic for
1241 * managing the promiscuous mode is in one place.
1242 */
1243 /* ARGSUSED */
1244 static int
1245 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1246 const void *optval, socklen_t optlen)
1247 {
1248 struct packet_mreq mreq;
1249 struct pfpsock *ps;
1250 int error = 0;
1251 int opt;
1252
1253 ps = (struct pfpsock *)handle;
1254 if (!ps->ps_bound)
1255 return (EPROTO);
1256
1257 if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1258 (option_name == PACKET_DROP_MEMBERSHIP)) {
1259 if (!ps->ps_bound)
1260 return (EPROTO);
1261 bcopy(optval, &mreq, sizeof (mreq));
1262 if (ps->ps_linkid != mreq.mr_ifindex)
1263 return (EINVAL);
1264 }
1265
1266 switch (option_name) {
1267 case PACKET_ADD_MEMBERSHIP :
1268 switch (mreq.mr_type) {
1269 case PACKET_MR_MULTICAST :
1270 if (mreq.mr_alen != ps->ps_sock.sll_halen)
1271 return (EINVAL);
1272
1273 error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1274 break;
1275
1276 case PACKET_MR_PROMISC :
1277 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1278 break;
1279
1280 case PACKET_MR_ALLMULTI :
1281 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1282 break;
1283 }
1284 break;
1285
1286 case PACKET_DROP_MEMBERSHIP :
1287 switch (mreq.mr_type) {
1288 case PACKET_MR_MULTICAST :
1289 if (mreq.mr_alen != ps->ps_sock.sll_halen)
1290 return (EINVAL);
1291
1292 mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1293 break;
1294
1295 case PACKET_MR_PROMISC :
1296 if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1297 return (EINVAL);
1298 error = pfp_set_promisc(ps,
1299 MAC_CLIENT_PROMISC_FILTERED);
1300 break;
1301
1302 case PACKET_MR_ALLMULTI :
1303 if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1304 return (EINVAL);
1305 error = pfp_set_promisc(ps,
1306 MAC_CLIENT_PROMISC_FILTERED);
1307 break;
1308 }
1309 break;
1310
1311 case PACKET_AUXDATA :
1312 if (optlen == sizeof (int)) {
1313 opt = *(int *)optval;
1314 ps->ps_auxdata = (opt != 0);
1315 } else {
1316 error = EINVAL;
1317 }
1318 break;
1319 default :
1320 error = EINVAL;
1321 break;
1322 }
1323
1324 return (error);
1325 }
1326
1327 /*
1328 * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1329 * SO_ATTACH_FILTER and SO_DETACH_FILTER.
1330 *
1331 * Both of these setsockopt values are candidates for being handled by the
1332 * socket layer itself in future, however this requires understanding how
1333 * they would interact with all other sockets.
1334 */
1335 static int
1336 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1337 const void *optval, socklen_t optlen)
1338 {
1339 struct bpf_program prog;
1340 ip_bpf_insn_t *fcode;
1341 struct pfpsock *ps;
1342 struct sock_proto_props sopp;
1343 int error = 0;
1344 int size;
1345
1346 ps = (struct pfpsock *)handle;
1347
1348 switch (option_name) {
1349 case SO_ATTACH_FILTER :
1350 #ifdef _LP64
1351 if (optlen == sizeof (struct bpf_program32)) {
1352 struct bpf_program32 prog32;
1353
1354 bcopy(optval, &prog32, sizeof (prog32));
1355 prog.bf_len = prog32.bf_len;
1356 prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1357 } else
1358 #endif
1359 if (optlen == sizeof (struct bpf_program)) {
1360 bcopy(optval, &prog, sizeof (prog));
1361 } else if (optlen != sizeof (struct bpf_program)) {
1362 return (EINVAL);
1363 }
1364 if (prog.bf_len > BPF_MAXINSNS)
1365 return (EINVAL);
1366
1367 size = prog.bf_len * sizeof (*prog.bf_insns);
1368 fcode = kmem_alloc(size, KM_SLEEP);
1369 if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1370 kmem_free(fcode, size);
1371 return (EFAULT);
1372 }
1373
1374 if (ip_bpf_validate(fcode, prog.bf_len)) {
1375 rw_enter(&ps->ps_bpflock, RW_WRITER);
1376 pfp_release_bpf(ps);
1377 ps->ps_bpf.bf_insns = (struct bpf_insn *)fcode;
1378 ps->ps_bpf.bf_len = size;
1379 rw_exit(&ps->ps_bpflock);
1380
1381 return (0);
1382 }
1383 kmem_free(fcode, size);
1384 error = EINVAL;
1385 break;
1386
1387 case SO_DETACH_FILTER :
1388 pfp_release_bpf(ps);
1389 break;
1390
1391 case SO_RCVBUF :
1392 size = *(int32_t *)optval;
1393 if (size > sockmod_pfp_rcvbuf_max || size < 0)
1394 return (ENOBUFS);
1395 sopp.sopp_flags = SOCKOPT_RCVHIWAT;
1396 sopp.sopp_rxhiwat = size;
1397 ps->ps_upcalls->su_set_proto_props(ps->ps_upper, &sopp);
1398 ps->ps_rcvbuf = size;
1399 break;
1400
1401 default :
1402 error = ENOPROTOOPT;
1403 break;
1404 }
1405
1406 return (error);
1407 }
1408
1409 /*
1410 * pfp_open_index is an internal function used to open a MAC device by
1411 * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1412 * because some of the interfaces provided by the mac layer require either
1413 * only the mac_handle_t or both it and mac_handle_t.
1414 *
1415 * Whilst inside the kernel we can access data structures supporting any
1416 * zone, access to interfaces from non-global zones is restricted to those
1417 * interfaces (if any) that are exclusively assigned to a zone.
1418 */
1419 static int
1420 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1421 cred_t *cred)
1422 {
1423 mac_client_handle_t mch;
1424 zoneid_t ifzoneid;
1425 mac_handle_t mh;
1426 zoneid_t zoneid;
1427 int error;
1428
1429 mh = 0;
1430 mch = 0;
1431 error = mac_open_by_linkid(index, &mh);
1432 if (error != 0)
1433 goto bad_open;
1434
1435 error = mac_client_open(mh, &mch, NULL,
1436 MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1437 if (error != 0)
1438 goto bad_open;
1439
1440 zoneid = crgetzoneid(cred);
1441 if (zoneid != GLOBAL_ZONEID) {
1442 mac_perim_handle_t perim;
1443
1444 mac_perim_enter_by_mh(mh, &perim);
1445 error = dls_link_getzid(mac_name(mh), &ifzoneid);
1446 mac_perim_exit(perim);
1447 if (error != 0)
1448 goto bad_open;
1449 if (ifzoneid != zoneid) {
1450 error = EACCES;
1451 goto bad_open;
1452 }
1453 }
1454
1455 *mcip = mch;
1456 *mhp = mh;
1457
1458 return (0);
1459 bad_open:
1460 if (mch != 0)
1461 mac_client_close(mch, 0);
1462 if (mh != 0)
1463 mac_close(mh);
1464 return (error);
1465 }
1466
1467 static void
1468 pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1469 {
1470 mac_client_close(mch, 0);
1471 mac_close(mh);
1472 }
1473
1474 /*
1475 * The purpose of this function is to provide a single place where we free
1476 * the loaded BPF program and reset all pointers/counters associated with
1477 * it.
1478 */
1479 static void
1480 pfp_release_bpf(struct pfpsock *ps)
1481 {
1482 if (ps->ps_bpf.bf_len != 0) {
1483 kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1484 ps->ps_bpf.bf_len = 0;
1485 ps->ps_bpf.bf_insns = NULL;
1486 }
1487 }
1488
1489 /*
1490 * Set the promiscuous mode of a network interface.
1491 * This function only calls the mac layer when there is a change to the
1492 * status of a network interface's promiscous mode. Tracking of how many
1493 * sockets have the network interface in promiscuous mode, and thus the
1494 * control over the physical device's status, is left to the mac layer.
1495 */
1496 static int
1497 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1498 {
1499 int error = 0;
1500 int flags;
1501
1502 /*
1503 * There are 4 combinations of turnon/ps_promisc.
1504 * This if handles 2 (both false, both true) and the if() below
1505 * handles the remaining one - when change is required.
1506 */
1507 if (turnon == ps->ps_promisc)
1508 return (error);
1509
1510 if (ps->ps_phd != 0) {
1511 mac_promisc_remove(ps->ps_phd);
1512 ps->ps_phd = 0;
1513
1514 /*
1515 * ps_promisc is set here in case the call to mac_promisc_add
1516 * fails: leaving it to indicate that the interface is still
1517 * in some sort of promiscuous mode is false.
1518 */
1519 if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1520 ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1521 flags = MAC_PROMISC_FLAGS_NO_PHYS;
1522 } else {
1523 flags = 0;
1524 }
1525 flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1526 }
1527
1528 error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1529 &ps->ps_phd, flags);
1530 if (error == 0)
1531 ps->ps_promisc = turnon;
1532
1533 return (error);
1534 }
1535
1536 /*
1537 * This table maps the MAC types in Solaris to the ARPHRD_* values used
1538 * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl.
1539 *
1540 * The symbols in this table are *not* pulled in from <net/if_arp.h>,
1541 * they are pulled from <netpacket/packet.h>, thus it acts as a source
1542 * of supplementary information to the ARP table.
1543 */
1544 static uint_t arphrd_to_dl[][2] = {
1545 { ARPHRD_IEEE80211, DL_WIFI },
1546 { ARPHRD_TUNNEL, DL_IPV4 },
1547 { ARPHRD_TUNNEL, DL_IPV6 },
1548 { ARPHRD_TUNNEL, DL_6TO4 },
1549 { ARPHRD_AX25, DL_X25 },
1550 { ARPHRD_ATM, DL_ATM },
1551 { 0, 0 }
1552 };
1553
1554 static int
1555 pfp_dl_to_arphrd(int dltype)
1556 {
1557 int i;
1558
1559 for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1560 if (arphrd_to_dl[i][1] == dltype)
1561 return (arphrd_to_dl[i][0]);
1562 return (arp_hw_type(dltype));
1563 }