io-lx-public-vs-joyent New usr/src/uts/common/inet/sockmods/sockmod

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2016 Joyent, Inc.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/stropts.h>
  31 #include <sys/socket.h>
  32 #include <sys/socketvar.h>
  33 #include <sys/socket_proto.h>
  34 #include <sys/sockio.h>
  35 #include <sys/strsun.h>
  36 #include <sys/kstat.h>
  37 #include <sys/modctl.h>
  38 #include <sys/policy.h>
  39 #include <sys/priv_const.h>
  40 #include <sys/tihdr.h>
  41 #include <sys/zone.h>
  42 #include <sys/time.h>
  43 #include <sys/ethernet.h>
  44 #include <sys/llc1.h>
  45 #include <fs/sockfs/sockcommon.h>
  46 #include <net/if.h>
  47 #include <inet/ip_arp.h>
  48 
  49 #include <sys/dls.h>
  50 #include <sys/mac.h>
  51 #include <sys/mac_client.h>
  52 #include <sys/mac_provider.h>
  53 #include <sys/mac_client_priv.h>
  54 #include <inet/bpf.h>
  55 
  56 #include <netpacket/packet.h>
  57 
  58 static void pfp_close(mac_handle_t, mac_client_handle_t);
  59 static int pfp_dl_to_arphrd(int);
  60 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
  61     socklen_t *);
  62 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *, int);
  63 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *,
  64     int);
  65 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
  66     cred_t *);
  67 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
  68 static void pfp_release_bpf(struct pfpsock *);
  69 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
  70 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
  71     socklen_t);
  72 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
  73     socklen_t);
  74 
  75 /*
  76  * PFP sockfs operations
  77  * Most are currently no-ops because they have no meaning for a connectionless
  78  * socket.
  79  */
  80 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
  81     sock_upcalls_t *, int, struct cred *);
  82 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
  83     struct cred *);
  84 static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
  85 static void sdpfp_clr_flowctrl(sock_lower_handle_t);
  86 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
  87     socklen_t *, struct cred *);
  88 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
  89     struct cred *);
  90 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
  91     struct cred *);
  92 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
  93     socklen_t, struct cred *);
  94 
  95 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
  96     uint_t *, int *, int, cred_t *);
  97 
  98 static int sockpfp_init(void);
  99 static void sockpfp_fini(void);
 100 
 101 static kstat_t *pfp_ksp;
 102 static pfp_kstats_t ks_stats;
 103 static pfp_kstats_t pfp_kstats = {
 104         /*
 105          * Each one of these kstats is a different return path in handling
 106          * a packet received from the mac layer.
 107          */
 108         { "recvMacHeaderFail",  KSTAT_DATA_UINT64 },
 109         { "recvBadProtocol",    KSTAT_DATA_UINT64 },
 110         { "recvAllocbFail",     KSTAT_DATA_UINT64 },
 111         { "recvOk",             KSTAT_DATA_UINT64 },
 112         { "recvFail",           KSTAT_DATA_UINT64 },
 113         { "recvFiltered",       KSTAT_DATA_UINT64 },
 114         { "recvFlowControl",    KSTAT_DATA_UINT64 },
 115         /*
 116          * A global set of counters is maintained to track the behaviour
 117          * of the system (kernel & applications) in sending packets.
 118          */
 119         { "sendUnbound",        KSTAT_DATA_UINT64 },
 120         { "sendFailed",         KSTAT_DATA_UINT64 },
 121         { "sendTooBig",         KSTAT_DATA_UINT64 },
 122         { "sendAllocFail",      KSTAT_DATA_UINT64 },
 123         { "sendUiomoveFail",    KSTAT_DATA_UINT64 },
 124         { "sendNoMemory",       KSTAT_DATA_UINT64 },
 125         { "sendOpenFail",       KSTAT_DATA_UINT64 },
 126         { "sendWrongFamily",    KSTAT_DATA_UINT64 },
 127         { "sendShortMsg",       KSTAT_DATA_UINT64 },
 128         { "sendOk",             KSTAT_DATA_UINT64 }
 129 };
 130 
 131 sock_downcalls_t pfp_downcalls = {
 132         sdpfp_activate,
 133         sock_accept_notsupp,
 134         sdpfp_bind,
 135         sock_listen_notsupp,
 136         sock_connect_notsupp,
 137         sock_getpeername_notsupp,
 138         sock_getsockname_notsupp,
 139         sdpfp_getsockopt,
 140         sdpfp_setsockopt,
 141         sock_send_notsupp,
 142         sdpfp_senduio,
 143         NULL,
 144         sock_poll_notsupp,
 145         sock_shutdown_notsupp,
 146         sdpfp_clr_flowctrl,
 147         sdpfp_ioctl,
 148         sdpfp_close,
 149 };
 150 
 151 static smod_reg_t sinfo = {
 152         SOCKMOD_VERSION,
 153         "sockpfp",
 154         SOCK_UC_VERSION,
 155         SOCK_DC_VERSION,
 156         sockpfp_create,
 157         NULL
 158 };
 159 
 160 static int accepted_protos[3][2] = {
 161         { ETH_P_ALL,    0 },
 162         { ETH_P_802_2,  LLC_SNAP_SAP },
 163         { ETH_P_803_3,  0 },
 164 };
 165 
 166 /*
 167  * This sets an upper bound on the size of the receive buffer for a PF_PACKET
 168  * socket. More properly, this should be controlled through ipadm, ala TCP, UDP,
 169  * SCTP, etc. Until that's done, this provides a hard cap of 4 MB and allows an
 170  * opportunity for it to be changed, should it be needed.
 171  */
 172 int sockmod_pfp_rcvbuf_max = 1024 * 1024 * 4;
 173 
 174 /*
 175  * Module linkage information for the kernel.
 176  */
 177 static struct modlsockmod modlsockmod = {
 178         &mod_sockmodops, "PF Packet socket module", &sinfo
 179 };
 180 
 181 static struct modlinkage modlinkage = {
 182         MODREV_1,
 183         &modlsockmod,
 184         NULL
 185 };
 186 
 187 int
 188 _init(void)
 189 {
 190         int error;
 191 
 192         error = sockpfp_init();
 193         if (error != 0)
 194                 return (error);
 195 
 196         error = mod_install(&modlinkage);
 197         if (error != 0)
 198                 sockpfp_fini();
 199 
 200         return (error);
 201 }
 202 
 203 int
 204 _fini(void)
 205 {
 206         int error;
 207 
 208         error = mod_remove(&modlinkage);
 209         if (error == 0)
 210                 sockpfp_fini();
 211 
 212         return (error);
 213 }
 214 
 215 int
 216 _info(struct modinfo *modinfop)
 217 {
 218         return (mod_info(&modlinkage, modinfop));
 219 }
 220 
 221 /*
 222  * sockpfp_init: called as part of the initialisation of the module when
 223  * loaded into the kernel.
 224  *
 225  * Being able to create and record the kstats data in the kernel is not
 226  * considered to be vital to the operation of this kernel module, thus
 227  * its failure is tolerated.
 228  */
 229 static int
 230 sockpfp_init(void)
 231 {
 232         (void) memset(&ks_stats, 0, sizeof (ks_stats));
 233 
 234         (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
 235 
 236         pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
 237             KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
 238             KSTAT_FLAG_VIRTUAL);
 239         if (pfp_ksp != NULL) {
 240                 pfp_ksp->ks_data = &ks_stats;
 241                 kstat_install(pfp_ksp);
 242         }
 243 
 244         return (0);
 245 }
 246 
 247 /*
 248  * sockpfp_fini: called when the operating system wants to unload the
 249  * socket module from the kernel.
 250  */
 251 static void
 252 sockpfp_fini(void)
 253 {
 254         if (pfp_ksp != NULL)
 255                 kstat_delete(pfp_ksp);
 256 }
 257 
 258 /*
 259  * Due to sockets being created read-write by default, all PF_PACKET sockets
 260  * therefore require the NET_RAWACCESS priviliege, even if the socket is only
 261  * being used for reading packets from.
 262  *
 263  * This create function enforces this module only being used with PF_PACKET
 264  * sockets and the policy that we support via the config file in sock2path.d:
 265  * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
 266  */
 267 /* ARGSUSED */
 268 static sock_lower_handle_t
 269 sockpfp_create(int family, int type, int proto,
 270     sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
 271     int sflags, cred_t *cred)
 272 {
 273         struct pfpsock *ps;
 274         int kmflags;
 275         int newproto;
 276         int i;
 277 
 278         if (secpolicy_net_rawaccess(cred) != 0) {
 279                 *errorp = EACCES;
 280                 return (NULL);
 281         }
 282 
 283         if (family != AF_PACKET) {
 284                 *errorp = EAFNOSUPPORT;
 285                 return (NULL);
 286         }
 287 
 288         if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
 289                 *errorp = ESOCKTNOSUPPORT;
 290                 return (NULL);
 291         }
 292 
 293         /*
 294          * First check to see if the protocol number passed in via the socket
 295          * creation should be mapped to a different number for internal use.
 296          */
 297         for (i = 0, newproto = -1;
 298             i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) {
 299                 if (accepted_protos[i][0] == proto) {
 300                         newproto = accepted_protos[i][1];
 301                         break;
 302                 }
 303         }
 304 
 305         /*
 306          * If the mapping of the protocol that was under 0x800 failed to find
 307          * a local equivalent then fail the socket creation. If the protocol
 308          * for the socket is over 0x800 and it was not found in the mapping
 309          * table above, then use the value as is.
 310          */
 311         if (newproto == -1) {
 312                 if (proto < 0x800) {
 313                         *errorp = ENOPROTOOPT;
 314                         return (NULL);
 315                 }
 316                 newproto = proto;
 317         }
 318         proto = newproto;
 319 
 320         kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 321         ps = kmem_zalloc(sizeof (*ps), kmflags);
 322         if (ps == NULL) {
 323                 *errorp = ENOMEM;
 324                 return (NULL);
 325         }
 326 
 327         ps->ps_type = type;
 328         ps->ps_proto = proto;
 329         rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
 330         mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
 331 
 332         *sock_downcalls = &pfp_downcalls;
 333         /*
 334          * Setting this causes bytes from a packet that do not fit into the
 335          * destination user buffer to be discarded. Thus the API is one
 336          * packet per receive and callers are required to use a buffer large
 337          * enough for the biggest packet that the interface can provide.
 338          */
 339         *smodep = SM_ATOMIC;
 340 
 341         return ((sock_lower_handle_t)ps);
 342 }
 343 
 344 /* ************************************************************************* */
 345 
 346 /*
 347  * pfp_packet is the callback function that is given to the mac layer for
 348  * PF_PACKET to receive packets with. One packet at a time is passed into
 349  * this function from the mac layer. Each packet is a private copy given
 350  * to PF_PACKET to modify or free as it wishes and does not harm the original
 351  * packet from which it was cloned.
 352  */
 353 /* ARGSUSED */
 354 static void
 355 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
 356 {
 357         struct T_unitdata_ind *tunit;
 358         struct sockaddr_ll *sll;
 359         struct sockaddr_ll *sol;
 360         mac_header_info_t hdr;
 361         struct pfpsock *ps;
 362         size_t tusz;
 363         mblk_t *mp0;
 364         int error;
 365 
 366         if (mp == NULL)
 367                 return;
 368 
 369         ps = arg;
 370         if (ps->ps_flow_ctrld) {
 371                 ps->ps_flow_ctrl_drops++;
 372                 ps->ps_stats.tp_drops++;
 373                 ks_stats.kp_recv_flow_cntrld.value.ui64++;
 374                 freemsg(mp);
 375                 return;
 376         }
 377 
 378         if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
 379                 /*
 380                  * Can't decode the packet header information so drop it.
 381                  */
 382                 ps->ps_stats.tp_drops++;
 383                 ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
 384                 freemsg(mp);
 385                 return;
 386         }
 387 
 388         if (mac_type(ps->ps_mh) == DL_ETHER &&
 389             hdr.mhi_bindsap == ETHERTYPE_VLAN) {
 390                 struct ether_vlan_header *evhp;
 391                 struct ether_vlan_header evh;
 392 
 393                 hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
 394                 hdr.mhi_istagged = B_TRUE;
 395 
 396                 if (MBLKL(mp) >= sizeof (*evhp)) {
 397                         evhp = (struct ether_vlan_header *)mp->b_rptr;
 398                 } else {
 399                         int sz = sizeof (*evhp);
 400                         char *s = (char *)&evh;
 401                         mblk_t *tmp;
 402                         int len;
 403 
 404                         for (tmp = mp; sz > 0 && tmp != NULL;
 405                             tmp = tmp->b_cont) {
 406                                 len = min(sz, MBLKL(tmp));
 407                                 bcopy(tmp->b_rptr, s, len);
 408                                 sz -= len;
 409                         }
 410                         evhp = &evh;
 411                 }
 412                 hdr.mhi_tci = ntohs(evhp->ether_tci);
 413                 hdr.mhi_bindsap = ntohs(evhp->ether_type);
 414         }
 415 
 416         if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
 417                 /*
 418                  * The packet is not of interest to this socket so
 419                  * drop it on the floor. Here the SAP is being used
 420                  * as a very course filter.
 421                  */
 422                 ps->ps_stats.tp_drops++;
 423                 ks_stats.kp_recv_bad_proto.value.ui64++;
 424                 freemsg(mp);
 425                 return;
 426         }
 427 
 428         /*
 429          * This field is not often set, even for ethernet,
 430          * by mac_header_info, so compute it if it is 0.
 431          */
 432         if (hdr.mhi_pktsize == 0)
 433                 hdr.mhi_pktsize = msgdsize(mp);
 434 
 435         /*
 436          * If a BPF filter is present, pass the raw packet into that.
 437          * A failed match will result in zero being returned, indicating
 438          * that this socket is not interested in the packet.
 439          */
 440         if (ps->ps_bpf.bf_len != 0) {
 441                 uchar_t *buffer;
 442                 int buflen;
 443 
 444                 buflen = MBLKL(mp);
 445                 if (hdr.mhi_pktsize == buflen) {
 446                         buffer = mp->b_rptr;
 447                 } else {
 448                         buflen = 0;
 449                         buffer = (uchar_t *)mp;
 450                 }
 451                 rw_enter(&ps->ps_bpflock, RW_READER);
 452                 if (ip_bpf_filter((ip_bpf_insn_t *)ps->ps_bpf.bf_insns, buffer,
 453                     hdr.mhi_pktsize, buflen) == 0) {
 454                         rw_exit(&ps->ps_bpflock);
 455                         ps->ps_stats.tp_drops++;
 456                         ks_stats.kp_recv_filtered.value.ui64++;
 457                         freemsg(mp);
 458                         return;
 459                 }
 460                 rw_exit(&ps->ps_bpflock);
 461         }
 462 
 463         if (ps->ps_type == SOCK_DGRAM) {
 464                 /*
 465                  * SOCK_DGRAM socket expect a "layer 3" packet, so advance
 466                  * past the link layer header.
 467                  */
 468                 mp->b_rptr += hdr.mhi_hdrsize;
 469                 hdr.mhi_pktsize -= hdr.mhi_hdrsize;
 470         }
 471 
 472         tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
 473         if (ps->ps_auxdata) {
 474                 tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
 475                 tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
 476         }
 477 
 478         /*
 479          * It is tempting to think that this could be optimised by having
 480          * the base mblk_t allocated and hung off the pfpsock structure,
 481          * except that then another one would need to be allocated for the
 482          * sockaddr_ll that is included. Even creating a template to copy
 483          * from is of questionable value, as read-write from one structure
 484          * to the other is going to be slower than all of the initialisation.
 485          */
 486         mp0 = allocb(tusz, BPRI_HI);
 487         if (mp0 == NULL) {
 488                 ps->ps_stats.tp_drops++;
 489                 ks_stats.kp_recv_alloc_fail.value.ui64++;
 490                 freemsg(mp);
 491                 return;
 492         }
 493 
 494         (void) memset(mp0->b_rptr, 0, tusz);
 495 
 496         mp0->b_datap->db_type = M_PROTO;
 497         mp0->b_wptr = mp0->b_rptr + tusz;
 498 
 499         tunit = (struct T_unitdata_ind *)mp0->b_rptr;
 500         tunit->PRIM_type = T_UNITDATA_IND;
 501         tunit->SRC_length = sizeof (struct sockaddr);
 502         tunit->SRC_offset = sizeof (*tunit);
 503 
 504         sol = &ps->ps_sock;
 505         sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
 506         sll->sll_ifindex = sol->sll_ifindex;
 507         sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
 508         sll->sll_halen = sol->sll_halen;
 509         if (hdr.mhi_saddr != NULL)
 510                 (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
 511 
 512         switch (hdr.mhi_dsttype) {
 513         case MAC_ADDRTYPE_MULTICAST :
 514                 sll->sll_pkttype = PACKET_MULTICAST;
 515                 break;
 516         case MAC_ADDRTYPE_BROADCAST :
 517                 sll->sll_pkttype = PACKET_BROADCAST;
 518                 break;
 519         case MAC_ADDRTYPE_UNICAST :
 520                 if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
 521                         sll->sll_pkttype = PACKET_HOST;
 522                 else
 523                         sll->sll_pkttype = PACKET_OTHERHOST;
 524                 break;
 525         }
 526 
 527         if (ps->ps_auxdata) {
 528                 struct tpacket_auxdata *aux;
 529                 struct T_opthdr *topt;
 530 
 531                 tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
 532                     sizeof (struct sockaddr_ll));
 533                 tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
 534                     _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
 535 
 536                 topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
 537                 aux = (struct tpacket_auxdata *)
 538                     ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
 539 
 540                 topt->len = tunit->OPT_length;
 541                 topt->level = SOL_PACKET;
 542                 topt->name = PACKET_AUXDATA;
 543                 topt->status = 0;
 544                 /*
 545                  * libpcap doesn't seem to use any other field,
 546                  * so it isn't clear how they should be filled in.
 547                  */
 548                 aux->tp_vlan_vci = hdr.mhi_tci;
 549         }
 550 
 551         linkb(mp0, mp);
 552 
 553         (void) gethrestime(&ps->ps_timestamp);
 554 
 555         ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
 556             &error, NULL);
 557 
 558         if (error == 0) {
 559                 ps->ps_stats.tp_packets++;
 560                 ks_stats.kp_recv_ok.value.ui64++;
 561         } else {
 562                 mutex_enter(&ps->ps_lock);
 563                 if (error == ENOSPC) {
 564                         ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
 565                             &error, NULL);
 566                         if (error == ENOSPC)
 567                                 ps->ps_flow_ctrld = B_TRUE;
 568                 }
 569                 mutex_exit(&ps->ps_lock);
 570                 ps->ps_stats.tp_drops++;
 571                 ks_stats.kp_recv_fail.value.ui64++;
 572         }
 573 }
 574 
 575 /*
 576  * Bind a PF_PACKET socket to a network interface.
 577  *
 578  * The default operation of this bind() is to place the socket (and thus the
 579  * network interface) into promiscuous mode. It is then up to the application
 580  * to turn that down by issuing the relevant ioctls, if desired.
 581  */
 582 static int
 583 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
 584     socklen_t addrlen, struct cred *cred)
 585 {
 586         struct sockaddr_ll *addr_ll, *sol;
 587         mac_client_handle_t mch;
 588         struct pfpsock *ps;
 589         mac_handle_t mh;
 590         int error;
 591 
 592         ps = (struct pfpsock *)handle;
 593         if (ps->ps_bound)
 594                 return (EINVAL);
 595 
 596         if (addrlen < sizeof (struct sockaddr_ll) || addr == NULL)
 597                 return (EINVAL);
 598 
 599         addr_ll = (struct sockaddr_ll *)addr;
 600 
 601         error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
 602         if (error != 0)
 603                 return (error);
 604         /*
 605          * Ensure that each socket is only bound once.
 606          */
 607         mutex_enter(&ps->ps_lock);
 608         if (ps->ps_mh != 0) {
 609                 mutex_exit(&ps->ps_lock);
 610                 pfp_close(mh, mch);
 611                 return (EADDRINUSE);
 612         }
 613         ps->ps_mh = mh;
 614         ps->ps_mch = mch;
 615         mutex_exit(&ps->ps_lock);
 616 
 617         /*
 618          * Cache all of the information from bind so that it's in an easy
 619          * place to get at when packets are received.
 620          */
 621         sol = &ps->ps_sock;
 622         sol->sll_family = AF_PACKET;
 623         sol->sll_ifindex = addr_ll->sll_ifindex;
 624         sol->sll_protocol = addr_ll->sll_protocol;
 625         sol->sll_halen = mac_addr_len(ps->ps_mh);
 626         mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
 627         mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
 628         ps->ps_linkid = addr_ll->sll_ifindex;
 629 
 630         error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
 631             pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
 632         if (error == 0) {
 633                 ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
 634                 ps->ps_bound = B_TRUE;
 635         }
 636 
 637         return (error);
 638 }
 639 
 640 /* ARGSUSED */
 641 static void
 642 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
 643     sock_upcalls_t *upcalls, int flags, cred_t *cred)
 644 {
 645         struct pfpsock *ps;
 646 
 647         ps = (struct pfpsock *)lower;
 648         ps->ps_upper = upper;
 649         ps->ps_upcalls = upcalls;
 650 }
 651 
 652 /*
 653  * This module only implements getting socket options for the new socket
 654  * option level (SOL_PACKET) that it introduces. All other requests are
 655  * passed back to the sockfs layer.
 656  */
 657 /* ARGSUSED */
 658 static int
 659 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
 660     void *optval, socklen_t *optlenp, struct cred *cred)
 661 {
 662         struct pfpsock *ps;
 663         int error = 0;
 664 
 665         ps = (struct pfpsock *)handle;
 666 
 667         switch (level) {
 668         case SOL_PACKET :
 669                 error = pfp_getpacket_sockopt(handle, option_name, optval,
 670                     optlenp);
 671                 break;
 672 
 673         case SOL_SOCKET :
 674                 if (option_name == SO_RCVBUF) {
 675                         if (*optlenp < sizeof (int32_t))
 676                                 return (EINVAL);
 677                         *((int32_t *)optval) = ps->ps_rcvbuf;
 678                         *optlenp = sizeof (int32_t);
 679                 } else {
 680                         error = ENOPROTOOPT;
 681                 }
 682                 break;
 683 
 684         default :
 685                 /*
 686                  * If sockfs code receives this error in return from the
 687                  * getsockopt downcall it handles the option locally, if
 688                  * it can.
 689                  */
 690                 error = ENOPROTOOPT;
 691                 break;
 692         }
 693 
 694         return (error);
 695 }
 696 
 697 /*
 698  * PF_PACKET supports setting socket options at only two levels:
 699  * SOL_SOCKET and SOL_PACKET.
 700  */
 701 /* ARGSUSED */
 702 static int
 703 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
 704     const void *optval, socklen_t optlen, struct cred *cred)
 705 {
 706         int error = 0;
 707 
 708         switch (level) {
 709         case SOL_SOCKET :
 710                 error = pfp_setsocket_sockopt(handle, option_name, optval,
 711                     optlen);
 712                 break;
 713         case SOL_PACKET :
 714                 error = pfp_setpacket_sockopt(handle, option_name, optval,
 715                     optlen);
 716                 break;
 717         default :
 718                 error = EINVAL;
 719                 break;
 720         }
 721 
 722         return (error);
 723 }
 724 
 725 /*
 726  * This function is incredibly inefficient for sending any packet that
 727  * comes with a msghdr asking to be sent to an interface to which the
 728  * socket has not been bound. Some possibilities here are keeping a
 729  * cache of all open mac's and mac_client's, for the purpose of sending,
 730  * and closing them after some amount of inactivity. Clearly, applications
 731  * should not be written to use one socket for multiple interfaces if
 732  * performance is desired with the code as is.
 733  */
 734 /* ARGSUSED */
 735 static int
 736 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
 737     struct nmsghdr *msg, struct cred *cred)
 738 {
 739         struct sockaddr_ll *sol;
 740         mac_client_handle_t mch;
 741         struct pfpsock *ps;
 742         boolean_t new_open;
 743         mac_handle_t mh;
 744         size_t mpsize;
 745         uint_t maxsdu;
 746         mblk_t *mp0;
 747         mblk_t *mp;
 748         int error;
 749 
 750         mp = NULL;
 751         mp0 = NULL;
 752         new_open = B_FALSE;
 753         ps = (struct pfpsock *)handle;
 754         mh = ps->ps_mh;
 755         mch = ps->ps_mch;
 756         maxsdu = ps->ps_max_sdu;
 757 
 758         sol = (struct sockaddr_ll *)msg->msg_name;
 759         if (sol == NULL) {
 760                 /*
 761                  * If no sockaddr_ll has been provided with the send call,
 762                  * use the one constructed when the socket was bound to an
 763                  * interface and fail if it hasn't been bound.
 764                  */
 765                 if (!ps->ps_bound) {
 766                         ks_stats.kp_send_unbound.value.ui64++;
 767                         return (EPROTO);
 768                 }
 769                 sol = &ps->ps_sock;
 770         } else {
 771                 /*
 772                  * Verify the sockaddr_ll message passed down before using
 773                  * it to send a packet out with. If it refers to an interface
 774                  * that has not been bound, it is necessary to open it.
 775                  */
 776                 struct sockaddr_ll *sll;
 777 
 778                 if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
 779                         ks_stats.kp_send_short_msg.value.ui64++;
 780                         return (EINVAL);
 781                 }
 782 
 783                 if (sol->sll_family != AF_PACKET) {
 784                         ks_stats.kp_send_wrong_family.value.ui64++;
 785                         return (EAFNOSUPPORT);
 786                 }
 787 
 788                 sll = &ps->ps_sock;
 789                 if (sol->sll_ifindex != sll->sll_ifindex) {
 790                         error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
 791                             cred);
 792                         if (error != 0) {
 793                                 ks_stats.kp_send_open_fail.value.ui64++;
 794                                 return (error);
 795                         }
 796                         mac_sdu_get(mh, NULL, &maxsdu);
 797                         new_open = B_TRUE;
 798                 }
 799         }
 800 
 801         mpsize = uiop->uio_resid;
 802         if (mpsize > maxsdu) {
 803                 ks_stats.kp_send_too_big.value.ui64++;
 804                 error = EMSGSIZE;
 805                 goto done;
 806         }
 807 
 808         if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
 809                 ks_stats.kp_send_alloc_fail.value.ui64++;
 810                 error = ENOBUFS;
 811                 goto done;
 812         }
 813 
 814         mp->b_wptr = mp->b_rptr + mpsize;
 815         error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
 816         if (error != 0) {
 817                 ks_stats.kp_send_uiomove_fail.value.ui64++;
 818                 goto done;
 819         }
 820 
 821         if (ps->ps_type == SOCK_DGRAM) {
 822                 mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
 823                 if (mp0 == NULL) {
 824                         ks_stats.kp_send_no_memory.value.ui64++;
 825                         error = ENOBUFS;
 826                         goto done;
 827                 }
 828                 linkb(mp0, mp);
 829                 mp = mp0;
 830         }
 831 
 832         /*
 833          * As this is sending datagrams and no promise is made about
 834          * how or if a packet will be sent/delivered, no effort is to
 835          * be expended in recovering from a situation where the packet
 836          * cannot be sent - it is just dropped.
 837          */
 838         error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
 839         if (error == 0) {
 840                 mp = NULL;
 841                 ks_stats.kp_send_ok.value.ui64++;
 842         } else {
 843                 ks_stats.kp_send_failed.value.ui64++;
 844         }
 845 
 846 done:
 847 
 848         if (new_open) {
 849                 ASSERT(mch != ps->ps_mch);
 850                 ASSERT(mh != ps->ps_mh);
 851                 pfp_close(mh, mch);
 852         }
 853         if (mp != NULL)
 854                 freemsg(mp);
 855 
 856         return (error);
 857 
 858 }
 859 
 860 /*
 861  * There's no use of a lock here, or at the bottom of pfp_packet() where
 862  * ps_flow_ctrld is set to true, because in a situation where these two
 863  * are racing to set the flag one way or the other, the end result is
 864  * going to be ultimately determined by the scheduler anyway - which of
 865  * the two threads gets the lock first? In such an operational environment,
 866  * we've got packets arriving too fast to be delt with so packets are going
 867  * to be dropped. Grabbing a lock just makes the drop more expensive.
 868  */
 869 static void
 870 sdpfp_clr_flowctrl(sock_lower_handle_t handle)
 871 {
 872         struct pfpsock *ps;
 873 
 874         ps = (struct pfpsock *)handle;
 875 
 876         mutex_enter(&ps->ps_lock);
 877         ps->ps_flow_ctrld = B_FALSE;
 878         mutex_exit(&ps->ps_lock);
 879 }
 880 
 881 /*
 882  * The implementation of this ioctl() handler is intended to function
 883  * in the absence of a bind() being made before it is called. Thus the
 884  * function calls mac_open() itself to provide a handle
 885  * This function is structured like this:
 886  * - determine the linkid for the interface being targetted
 887  * - open the interface with said linkid
 888  * - perform ioctl
 889  * - copy results back to caller
 890  *
 891  * The ioctls that interact with interface flags have been implented below
 892  * to assume that the interface is always up and running (IFF_RUNNING) and
 893  * to use the state of this socket to determine whether or not the network
 894  * interface is in promiscuous mode. Thus an ioctl to get the interface flags
 895  * of an interface that has been put in promiscuous mode by another socket
 896  * (in the same program or different), will not report that status.
 897  */
 898 /* ARGSUSED */
 899 static int
 900 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
 901     int32_t *rval, struct cred *cr)
 902 {
 903         struct timeval tival;
 904         mac_client_promisc_type_t mtype;
 905         struct sockaddr_dl *sock;
 906         datalink_id_t linkid;
 907         struct lifreq lifreq;
 908         struct ifreq ifreq;
 909         struct pfpsock *ps;
 910         mac_handle_t mh;
 911         int error;
 912 
 913         ps = (struct pfpsock *)handle;
 914 
 915         switch (cmd) {
 916         /*
 917          * ioctls that work on "struct lifreq"
 918          */
 919         case SIOCSLIFFLAGS :
 920         case SIOCGLIFINDEX :
 921         case SIOCGLIFFLAGS :
 922         case SIOCGLIFMTU :
 923         case SIOCGLIFHWADDR :
 924                 error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid, mod);
 925                 if (error != 0)
 926                         return (error);
 927                 break;
 928 
 929         /*
 930          * ioctls that work on "struct ifreq".
 931          * Not all of these have a "struct lifreq" partner, for example
 932          * SIOCGIFHWADDR, for the simple reason that the logical interface
 933          * does not have a hardware address.
 934          */
 935         case SIOCSIFFLAGS :
 936         case SIOCGIFINDEX :
 937         case SIOCGIFFLAGS :
 938         case SIOCGIFMTU :
 939         case SIOCGIFHWADDR :
 940                 error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid, mod);
 941                 if (error != 0)
 942                         return (error);
 943                 break;
 944 
 945         case SIOCGSTAMP :
 946                 tival.tv_sec = (time_t)ps->ps_timestamp.tv_sec;
 947                 tival.tv_usec = ps->ps_timestamp.tv_nsec / 1000;
 948                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 949                         error = ddi_copyout(&tival, (void *)arg,
 950                             sizeof (tival), mod);
 951                 }
 952 #ifdef _SYSCALL32_IMPL
 953                 else {
 954                         struct timeval32 tv32;
 955                         TIMEVAL_TO_TIMEVAL32(&tv32, &tival);
 956                         error = ddi_copyout(&tv32, (void *)arg,
 957                             sizeof (tv32), mod);
 958                 }
 959 #endif
 960                 return (error);
 961         }
 962 
 963         error =  mac_open_by_linkid(linkid, &mh);
 964         if (error != 0)
 965                 return (error);
 966 
 967         switch (cmd) {
 968         case SIOCGLIFINDEX :
 969                 lifreq.lifr_index = linkid;
 970                 break;
 971 
 972         case SIOCGIFINDEX :
 973                 ifreq.ifr_index = linkid;
 974                 break;
 975 
 976         case SIOCGIFFLAGS :
 977                 ifreq.ifr_flags = IFF_RUNNING;
 978                 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
 979                         ifreq.ifr_flags |= IFF_PROMISC;
 980                 break;
 981 
 982         case SIOCGLIFFLAGS :
 983                 lifreq.lifr_flags = IFF_RUNNING;
 984                 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
 985                         lifreq.lifr_flags |= IFF_PROMISC;
 986                 break;
 987 
 988         case SIOCSIFFLAGS :
 989                 if (linkid != ps->ps_linkid) {
 990                         error = EINVAL;
 991                 } else {
 992                         if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
 993                                 mtype = MAC_CLIENT_PROMISC_ALL;
 994                         else
 995                                 mtype = MAC_CLIENT_PROMISC_FILTERED;
 996                         error = pfp_set_promisc(ps, mtype);
 997                 }
 998                 break;
 999 
1000         case SIOCSLIFFLAGS :
1001                 if (linkid != ps->ps_linkid) {
1002                         error = EINVAL;
1003                 } else {
1004                         if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
1005                                 mtype = MAC_CLIENT_PROMISC_ALL;
1006                         else
1007                                 mtype = MAC_CLIENT_PROMISC_FILTERED;
1008                         error = pfp_set_promisc(ps, mtype);
1009                 }
1010                 break;
1011 
1012         case SIOCGIFMTU :
1013                 mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
1014                 break;
1015 
1016         case SIOCGLIFMTU :
1017                 mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
1018                 break;
1019 
1020         case SIOCGIFHWADDR :
1021                 if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) {
1022                         error = EPFNOSUPPORT;
1023                         break;
1024                 }
1025 
1026                 if (mac_addr_len(mh) == 0) {
1027                         (void) memset(ifreq.ifr_addr.sa_data, 0,
1028                             sizeof (ifreq.ifr_addr.sa_data));
1029                 } else {
1030                         mac_unicast_primary_get(mh,
1031                             (uint8_t *)ifreq.ifr_addr.sa_data);
1032                 }
1033 
1034                 /*
1035                  * The behaviour here in setting sa_family is consistent
1036                  * with what applications such as tcpdump would expect
1037                  * for a Linux PF_PACKET socket.
1038                  */
1039                 ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
1040                 break;
1041 
1042         case SIOCGLIFHWADDR :
1043                 lifreq.lifr_type = 0;
1044                 sock = (struct sockaddr_dl *)&lifreq.lifr_addr;
1045 
1046                 if (mac_addr_len(mh) > sizeof (sock->sdl_data)) {
1047                         error = EPFNOSUPPORT;
1048                         break;
1049                 }
1050 
1051                 /*
1052                  * Fill in the sockaddr_dl with link layer details. Of note,
1053                  * the index is returned as 0 for a couple of reasons:
1054                  * (1) there is no public API that uses or requires it
1055                  * (2) the MAC index is currently 32bits and sdl_index is 16.
1056                  */
1057                 sock->sdl_family = AF_LINK;
1058                 sock->sdl_index = 0;
1059                 sock->sdl_type = mac_type(mh);
1060                 sock->sdl_nlen = 0;
1061                 sock->sdl_alen = mac_addr_len(mh);
1062                 sock->sdl_slen = 0;
1063                 if (mac_addr_len(mh) == 0) {
1064                         (void) memset(sock->sdl_data, 0,
1065                             sizeof (sock->sdl_data));
1066                 } else {
1067                         mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data);
1068                 }
1069                 break;
1070 
1071         default :
1072                 break;
1073         }
1074 
1075         mac_close(mh);
1076 
1077         if (error == 0) {
1078                 /*
1079                  * Only the "GET" ioctls need to copy data back to userace.
1080                  */
1081                 switch (cmd) {
1082                 case SIOCGLIFINDEX :
1083                 case SIOCGLIFFLAGS :
1084                 case SIOCGLIFMTU :
1085                 case SIOCGLIFHWADDR :
1086                         error = ddi_copyout(&lifreq, (void *)arg,
1087                             sizeof (lifreq), mod);
1088                         break;
1089 
1090                 case SIOCGIFINDEX :
1091                 case SIOCGIFFLAGS :
1092                 case SIOCGIFMTU :
1093                 case SIOCGIFHWADDR :
1094                         error = ddi_copyout(&ifreq, (void *)arg,
1095                             sizeof (ifreq), mod);
1096                         break;
1097                 default :
1098                         break;
1099                 }
1100         }
1101 
1102         return (error);
1103 }
1104 
1105 /*
1106  * Closing the socket requires that all open references to network
1107  * interfaces be closed.
1108  */
1109 /* ARGSUSED */
1110 static int
1111 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
1112 {
1113         struct pfpsock *ps = (struct pfpsock *)handle;
1114 
1115         if (ps->ps_phd != 0) {
1116                 mac_promisc_remove(ps->ps_phd);
1117                 ps->ps_phd = 0;
1118         }
1119 
1120         if (ps->ps_mch != 0) {
1121                 mac_client_close(ps->ps_mch, 0);
1122                 ps->ps_mch = 0;
1123         }
1124 
1125         if (ps->ps_mh != 0) {
1126                 mac_close(ps->ps_mh);
1127                 ps->ps_mh = 0;
1128         }
1129 
1130         kmem_free(ps, sizeof (*ps));
1131 
1132         return (0);
1133 }
1134 
1135 /* ************************************************************************* */
1136 
1137 /*
1138  * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1139  * determine the linkid for the interface name stored in that structure.
1140  * name is used as a buffer so that we can ensure a trailing \0 is appended
1141  * to the name safely.
1142  */
1143 static int
1144 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1145     datalink_id_t *linkidp, int mode)
1146 {
1147         char name[IFNAMSIZ + 1];
1148         int error;
1149 
1150         if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), mode) != 0)
1151                 return (EFAULT);
1152 
1153         (void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1154 
1155         error = dls_mgmt_get_linkid(name, linkidp);
1156         if (error != 0)
1157                 error = dls_devnet_macname2linkid(name, linkidp);
1158 
1159         return (error);
1160 }
1161 
1162 /*
1163  * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1164  * determine the linkid for the interface name stored in that structure.
1165  * name is used as a buffer so that we can ensure a trailing \0 is appended
1166  * to the name safely.
1167  */
1168 static int
1169 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1170     datalink_id_t *linkidp, int mode)
1171 {
1172         char name[LIFNAMSIZ + 1];
1173         int error;
1174 
1175         if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), mode) != 0)
1176                 return (EFAULT);
1177 
1178         (void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1179 
1180         error = dls_mgmt_get_linkid(name, linkidp);
1181         if (error != 0)
1182                 error = dls_devnet_macname2linkid(name, linkidp);
1183 
1184         return (error);
1185 }
1186 
1187 /*
1188  * Although there are several new SOL_PACKET options that can be set and
1189  * are specific to this implementation of PF_PACKET, the current API does
1190  * not support doing a get on them to retrieve accompanying status. Thus
1191  * it is only currently possible to use SOL_PACKET with getsockopt to
1192  * retrieve statistical information. This remains consistant with the
1193  * Linux API at the time of writing.
1194  */
1195 static int
1196 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1197     void *optval, socklen_t *optlenp)
1198 {
1199         struct pfpsock *ps;
1200         struct tpacket_stats_short tpss;
1201         int error = 0;
1202 
1203         ps = (struct pfpsock *)handle;
1204 
1205         switch (option_name) {
1206         case PACKET_STATISTICS :
1207                 if (*optlenp < sizeof (ps->ps_stats)) {
1208                         error = EINVAL;
1209                         break;
1210                 }
1211                 *optlenp = sizeof (ps->ps_stats);
1212                 bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1213                 break;
1214         case PACKET_STATISTICS_SHORT :
1215                 if (*optlenp < sizeof (tpss)) {
1216                         error = EINVAL;
1217                         break;
1218                 }
1219                 *optlenp = sizeof (tpss);
1220                 tpss.tp_packets = ps->ps_stats.tp_packets;
1221                 tpss.tp_drops = ps->ps_stats.tp_drops;
1222                 bcopy(&tpss, optval, sizeof (tpss));
1223                 break;
1224         default :
1225                 error = EINVAL;
1226                 break;
1227         }
1228 
1229         return (error);
1230 }
1231 
1232 /*
1233  * The SOL_PACKET level for socket options supports three options,
1234  * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1235  * This function is responsible for mapping the two socket options
1236  * that manage multicast membership into the appropriate internal
1237  * function calls to bring the option into effect. Whilst direct
1238  * changes to the multicast membership (ADD/DROP) groups is handled
1239  * by calls directly into the mac module, changes to the promiscuos
1240  * mode are vectored through pfp_set_promisc() so that the logic for
1241  * managing the promiscuous mode is in one place.
1242  */
1243 /* ARGSUSED */
1244 static int
1245 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1246     const void *optval, socklen_t optlen)
1247 {
1248         struct packet_mreq mreq;
1249         struct pfpsock *ps;
1250         int error = 0;
1251         int opt;
1252 
1253         ps = (struct pfpsock *)handle;
1254         if (!ps->ps_bound)
1255                 return (EPROTO);
1256 
1257         if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1258             (option_name == PACKET_DROP_MEMBERSHIP)) {
1259                 if (!ps->ps_bound)
1260                         return (EPROTO);
1261                 bcopy(optval, &mreq, sizeof (mreq));
1262                 if (ps->ps_linkid != mreq.mr_ifindex)
1263                         return (EINVAL);
1264         }
1265 
1266         switch (option_name) {
1267         case PACKET_ADD_MEMBERSHIP :
1268                 switch (mreq.mr_type) {
1269                 case PACKET_MR_MULTICAST :
1270                         if (mreq.mr_alen != ps->ps_sock.sll_halen)
1271                                 return (EINVAL);
1272 
1273                         error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1274                         break;
1275 
1276                 case PACKET_MR_PROMISC :
1277                         error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1278                         break;
1279 
1280                 case PACKET_MR_ALLMULTI :
1281                         error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1282                         break;
1283                 }
1284                 break;
1285 
1286         case PACKET_DROP_MEMBERSHIP :
1287                 switch (mreq.mr_type) {
1288                 case PACKET_MR_MULTICAST :
1289                         if (mreq.mr_alen != ps->ps_sock.sll_halen)
1290                                 return (EINVAL);
1291 
1292                         mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1293                         break;
1294 
1295                 case PACKET_MR_PROMISC :
1296                         if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1297                                 return (EINVAL);
1298                         error = pfp_set_promisc(ps,
1299                             MAC_CLIENT_PROMISC_FILTERED);
1300                         break;
1301 
1302                 case PACKET_MR_ALLMULTI :
1303                         if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1304                                 return (EINVAL);
1305                         error = pfp_set_promisc(ps,
1306                             MAC_CLIENT_PROMISC_FILTERED);
1307                         break;
1308                 }
1309                 break;
1310 
1311         case PACKET_AUXDATA :
1312                 if (optlen == sizeof (int)) {
1313                         opt = *(int *)optval;
1314                         ps->ps_auxdata = (opt != 0);
1315                 } else {
1316                         error = EINVAL;
1317                 }
1318                 break;
1319         default :
1320                 error = EINVAL;
1321                 break;
1322         }
1323 
1324         return (error);
1325 }
1326 
1327 /*
1328  * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1329  * SO_ATTACH_FILTER and SO_DETACH_FILTER.
1330  *
1331  * Both of these setsockopt values are candidates for being handled by the
1332  * socket layer itself in future, however this requires understanding how
1333  * they would interact with all other sockets.
1334  */
1335 static int
1336 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1337     const void *optval, socklen_t optlen)
1338 {
1339         struct bpf_program prog;
1340         ip_bpf_insn_t *fcode;
1341         struct pfpsock *ps;
1342         struct sock_proto_props sopp;
1343         int error = 0;
1344         int size;
1345 
1346         ps = (struct pfpsock *)handle;
1347 
1348         switch (option_name) {
1349         case SO_ATTACH_FILTER :
1350 #ifdef _LP64
1351                 if (optlen == sizeof (struct bpf_program32)) {
1352                         struct bpf_program32 prog32;
1353 
1354                         bcopy(optval, &prog32, sizeof (prog32));
1355                         prog.bf_len = prog32.bf_len;
1356                         prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1357                 } else
1358 #endif
1359                 if (optlen == sizeof (struct bpf_program)) {
1360                         bcopy(optval, &prog, sizeof (prog));
1361                 } else if (optlen != sizeof (struct bpf_program)) {
1362                         return (EINVAL);
1363                 }
1364                 if (prog.bf_len > BPF_MAXINSNS)
1365                         return (EINVAL);
1366 
1367                 size = prog.bf_len * sizeof (*prog.bf_insns);
1368                 fcode = kmem_alloc(size, KM_SLEEP);
1369                 if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1370                         kmem_free(fcode, size);
1371                         return (EFAULT);
1372                 }
1373 
1374                 if (ip_bpf_validate(fcode, prog.bf_len)) {
1375                         rw_enter(&ps->ps_bpflock, RW_WRITER);
1376                         pfp_release_bpf(ps);
1377                         ps->ps_bpf.bf_insns = (struct bpf_insn *)fcode;
1378                         ps->ps_bpf.bf_len = size;
1379                         rw_exit(&ps->ps_bpflock);
1380 
1381                         return (0);
1382                 }
1383                 kmem_free(fcode, size);
1384                 error = EINVAL;
1385                 break;
1386 
1387         case SO_DETACH_FILTER :
1388                 pfp_release_bpf(ps);
1389                 break;
1390 
1391         case SO_RCVBUF :
1392                 size = *(int32_t *)optval;
1393                 if (size > sockmod_pfp_rcvbuf_max || size < 0)
1394                         return (ENOBUFS);
1395                 sopp.sopp_flags = SOCKOPT_RCVHIWAT;
1396                 sopp.sopp_rxhiwat = size;
1397                 ps->ps_upcalls->su_set_proto_props(ps->ps_upper, &sopp);
1398                 ps->ps_rcvbuf = size;
1399                 break;
1400 
1401         default :
1402                 error = ENOPROTOOPT;
1403                 break;
1404         }
1405 
1406         return (error);
1407 }
1408 
1409 /*
1410  * pfp_open_index is an internal function used to open a MAC device by
1411  * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1412  * because some of the interfaces provided by the mac layer require either
1413  * only the mac_handle_t or both it and mac_handle_t.
1414  *
1415  * Whilst inside the kernel we can access data structures supporting any
1416  * zone, access to interfaces from non-global zones is restricted to those
1417  * interfaces (if any) that are exclusively assigned to a zone.
1418  */
1419 static int
1420 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1421     cred_t *cred)
1422 {
1423         mac_client_handle_t mch;
1424         zoneid_t ifzoneid;
1425         mac_handle_t mh;
1426         zoneid_t zoneid;
1427         int error;
1428 
1429         mh = 0;
1430         mch = 0;
1431         error = mac_open_by_linkid(index, &mh);
1432         if (error != 0)
1433                 goto bad_open;
1434 
1435         error = mac_client_open(mh, &mch, NULL,
1436             MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1437         if (error != 0)
1438                 goto bad_open;
1439 
1440         zoneid = crgetzoneid(cred);
1441         if (zoneid != GLOBAL_ZONEID) {
1442                 mac_perim_handle_t perim;
1443 
1444                 mac_perim_enter_by_mh(mh, &perim);
1445                 error = dls_link_getzid(mac_name(mh), &ifzoneid);
1446                 mac_perim_exit(perim);
1447                 if (error != 0)
1448                         goto bad_open;
1449                 if (ifzoneid != zoneid) {
1450                         error = EACCES;
1451                         goto bad_open;
1452                 }
1453         }
1454 
1455         *mcip = mch;
1456         *mhp = mh;
1457 
1458         return (0);
1459 bad_open:
1460         if (mch != 0)
1461                 mac_client_close(mch, 0);
1462         if (mh != 0)
1463                 mac_close(mh);
1464         return (error);
1465 }
1466 
1467 static void
1468 pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1469 {
1470         mac_client_close(mch, 0);
1471         mac_close(mh);
1472 }
1473 
1474 /*
1475  * The purpose of this function is to provide a single place where we free
1476  * the loaded BPF program and reset all pointers/counters associated with
1477  * it.
1478  */
1479 static void
1480 pfp_release_bpf(struct pfpsock *ps)
1481 {
1482         if (ps->ps_bpf.bf_len != 0) {
1483                 kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1484                 ps->ps_bpf.bf_len = 0;
1485                 ps->ps_bpf.bf_insns = NULL;
1486         }
1487 }
1488 
1489 /*
1490  * Set the promiscuous mode of a network interface.
1491  * This function only calls the mac layer when there is a change to the
1492  * status of a network interface's promiscous mode. Tracking of how many
1493  * sockets have the network interface in promiscuous mode, and thus the
1494  * control over the physical device's status, is left to the mac layer.
1495  */
1496 static int
1497 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1498 {
1499         int error = 0;
1500         int flags;
1501 
1502         /*
1503          * There are 4 combinations of turnon/ps_promisc.
1504          * This if handles 2 (both false, both true) and the if() below
1505          * handles the remaining one - when change is required.
1506          */
1507         if (turnon == ps->ps_promisc)
1508                 return (error);
1509 
1510         if (ps->ps_phd != 0) {
1511                 mac_promisc_remove(ps->ps_phd);
1512                 ps->ps_phd = 0;
1513 
1514                 /*
1515                  * ps_promisc is set here in case the call to mac_promisc_add
1516                  * fails: leaving it to indicate that the interface is still
1517                  * in some sort of promiscuous mode is false.
1518                  */
1519                 if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1520                         ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1521                         flags = MAC_PROMISC_FLAGS_NO_PHYS;
1522                 } else {
1523                         flags = 0;
1524                 }
1525                 flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1526         }
1527 
1528         error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1529             &ps->ps_phd, flags);
1530         if (error == 0)
1531                 ps->ps_promisc = turnon;
1532 
1533         return (error);
1534 }
1535 
1536 /*
1537  * This table maps the MAC types in Solaris to the ARPHRD_* values used
1538  * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl.
1539  *
1540  * The symbols in this table are *not* pulled in from <net/if_arp.h>,
1541  * they are pulled from <netpacket/packet.h>, thus it acts as a source
1542  * of supplementary information to the ARP table.
1543  */
1544 static uint_t arphrd_to_dl[][2] = {
1545         { ARPHRD_IEEE80211,     DL_WIFI },
1546         { ARPHRD_TUNNEL,        DL_IPV4 },
1547         { ARPHRD_TUNNEL,        DL_IPV6 },
1548         { ARPHRD_TUNNEL,        DL_6TO4 },
1549         { ARPHRD_AX25,          DL_X25 },
1550         { ARPHRD_ATM,           DL_ATM },
1551         { 0,                    0 }
1552 };
1553 
1554 static int
1555 pfp_dl_to_arphrd(int dltype)
1556 {
1557         int i;
1558 
1559         for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1560                 if (arphrd_to_dl[i][1] == dltype)
1561                         return (arphrd_to_dl[i][0]);
1562         return (arp_hw_type(dltype));
1563 }