io-lx-public-vs-joyent Wdiff usr/src/uts/common/inet/sockmods/sockmod_pfp.c

Print this page

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/inet/sockmods/sockmod_pfp.c
          +++ new/usr/src/uts/common/inet/sockmods/sockmod_pfp.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright 2016 Joyent, Inc.
  25   25   */
  26   26  
  27   27  #include <sys/types.h>
  28   28  #include <sys/param.h>
  29   29  #include <sys/systm.h>
  30   30  #include <sys/stropts.h>
  31   31  #include <sys/socket.h>
  32   32  #include <sys/socketvar.h>
  33   33  #include <sys/socket_proto.h>
  34   34  #include <sys/sockio.h>
  35   35  #include <sys/strsun.h>
  36   36  #include <sys/kstat.h>
  37   37  #include <sys/modctl.h>
  38   38  #include <sys/policy.h>
  39   39  #include <sys/priv_const.h>
  40   40  #include <sys/tihdr.h>
  41   41  #include <sys/zone.h>
  42   42  #include <sys/time.h>
  43   43  #include <sys/ethernet.h>
  44   44  #include <sys/llc1.h>
  45   45  #include <fs/sockfs/sockcommon.h>
  46   46  #include <net/if.h>
  47   47  #include <inet/ip_arp.h>
  48   48  
  49   49  #include <sys/dls.h>
  50   50  #include <sys/mac.h>
  51   51  #include <sys/mac_client.h>
  52   52  #include <sys/mac_provider.h>
  53   53  #include <sys/mac_client_priv.h>
  54   54  #include <inet/bpf.h>
  55   55  
  56   56  #include <netpacket/packet.h>
  57   57  
  58   58  static void pfp_close(mac_handle_t, mac_client_handle_t);
  59   59  static int pfp_dl_to_arphrd(int);
  60   60  static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
  61   61      socklen_t *);
  62   62  static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *, int);
  63   63  static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *,
  64   64      int);
  65   65  static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
  66   66      cred_t *);
  67   67  static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
  68   68  static void pfp_release_bpf(struct pfpsock *);
  69   69  static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
  70   70  static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
  71   71      socklen_t);
  72   72  static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
  73   73      socklen_t);
  74   74  
  75   75  /*
  76   76   * PFP sockfs operations
  77   77   * Most are currently no-ops because they have no meaning for a connectionless
  78   78   * socket.
  79   79   */
  80   80  static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
  81   81      sock_upcalls_t *, int, struct cred *);
  82   82  static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
  83   83      struct cred *);
  84   84  static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
  85   85  static void sdpfp_clr_flowctrl(sock_lower_handle_t);
  86   86  static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
  87   87      socklen_t *, struct cred *);
  88   88  static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
  89   89      struct cred *);
  90   90  static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
  91   91      struct cred *);
  92   92  static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
  93   93      socklen_t, struct cred *);
  94   94  
  95   95  static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
  96   96      uint_t *, int *, int, cred_t *);
  97   97  
  98   98  static int sockpfp_init(void);
  99   99  static void sockpfp_fini(void);
 100  100  
 101  101  static kstat_t *pfp_ksp;
 102  102  static pfp_kstats_t ks_stats;
 103  103  static pfp_kstats_t pfp_kstats = {
 104  104          /*
 105  105           * Each one of these kstats is a different return path in handling
 106  106           * a packet received from the mac layer.
 107  107           */
 108  108          { "recvMacHeaderFail",  KSTAT_DATA_UINT64 },
 109  109          { "recvBadProtocol",    KSTAT_DATA_UINT64 },
 110  110          { "recvAllocbFail",     KSTAT_DATA_UINT64 },
 111  111          { "recvOk",             KSTAT_DATA_UINT64 },
 112  112          { "recvFail",           KSTAT_DATA_UINT64 },
 113  113          { "recvFiltered",       KSTAT_DATA_UINT64 },
 114  114          { "recvFlowControl",    KSTAT_DATA_UINT64 },
 115  115          /*
 116  116           * A global set of counters is maintained to track the behaviour
 117  117           * of the system (kernel & applications) in sending packets.
 118  118           */
 119  119          { "sendUnbound",        KSTAT_DATA_UINT64 },
 120  120          { "sendFailed",         KSTAT_DATA_UINT64 },
 121  121          { "sendTooBig",         KSTAT_DATA_UINT64 },
 122  122          { "sendAllocFail",      KSTAT_DATA_UINT64 },
 123  123          { "sendUiomoveFail",    KSTAT_DATA_UINT64 },
 124  124          { "sendNoMemory",       KSTAT_DATA_UINT64 },
 125  125          { "sendOpenFail",       KSTAT_DATA_UINT64 },
 126  126          { "sendWrongFamily",    KSTAT_DATA_UINT64 },
 127  127          { "sendShortMsg",       KSTAT_DATA_UINT64 },
 128  128          { "sendOk",             KSTAT_DATA_UINT64 }
 129  129  };
 130  130  
 131  131  sock_downcalls_t pfp_downcalls = {
 132  132          sdpfp_activate,
 133  133          sock_accept_notsupp,
 134  134          sdpfp_bind,
 135  135          sock_listen_notsupp,
 136  136          sock_connect_notsupp,
 137  137          sock_getpeername_notsupp,
 138  138          sock_getsockname_notsupp,
 139  139          sdpfp_getsockopt,
 140  140          sdpfp_setsockopt,
 141  141          sock_send_notsupp,
 142  142          sdpfp_senduio,
 143  143          NULL,
 144  144          sock_poll_notsupp,
 145  145          sock_shutdown_notsupp,
 146  146          sdpfp_clr_flowctrl,
 147  147          sdpfp_ioctl,
 148  148          sdpfp_close,
 149  149  };
 150  150  
 151  151  static smod_reg_t sinfo = {
 152  152          SOCKMOD_VERSION,
 153  153          "sockpfp",
 154  154          SOCK_UC_VERSION,
 155  155          SOCK_DC_VERSION,
 156  156          sockpfp_create,
 157  157          NULL
 158  158  };
 159  159  
 160  160  static int accepted_protos[3][2] = {
 161  161          { ETH_P_ALL,    0 },
 162  162          { ETH_P_802_2,  LLC_SNAP_SAP },
 163  163          { ETH_P_803_3,  0 },
 164  164  };
 165  165  
 166  166  /*
 167  167   * This sets an upper bound on the size of the receive buffer for a PF_PACKET
 168  168   * socket. More properly, this should be controlled through ipadm, ala TCP, UDP,
 169  169   * SCTP, etc. Until that's done, this provides a hard cap of 4 MB and allows an
 170  170   * opportunity for it to be changed, should it be needed.
 171  171   */
 172  172  int sockmod_pfp_rcvbuf_max = 1024 * 1024 * 4;
 173  173  
 174  174  /*
 175  175   * Module linkage information for the kernel.
 176  176   */
 177  177  static struct modlsockmod modlsockmod = {
 178  178          &mod_sockmodops, "PF Packet socket module", &sinfo
 179  179  };
 180  180  
 181  181  static struct modlinkage modlinkage = {
 182  182          MODREV_1,
 183  183          &modlsockmod,
 184  184          NULL
 185  185  };
 186  186  
 187  187  int
 188  188  _init(void)
 189  189  {
 190  190          int error;
 191  191  
 192  192          error = sockpfp_init();
 193  193          if (error != 0)
 194  194                  return (error);
 195  195  
 196  196          error = mod_install(&modlinkage);
 197  197          if (error != 0)
 198  198                  sockpfp_fini();
 199  199  
 200  200          return (error);
 201  201  }
 202  202  
 203  203  int
 204  204  _fini(void)
 205  205  {
 206  206          int error;
 207  207  
 208  208          error = mod_remove(&modlinkage);
 209  209          if (error == 0)
 210  210                  sockpfp_fini();
 211  211  
 212  212          return (error);
 213  213  }
 214  214  
 215  215  int
 216  216  _info(struct modinfo *modinfop)
 217  217  {
 218  218          return (mod_info(&modlinkage, modinfop));
 219  219  }
 220  220  
 221  221  /*
 222  222   * sockpfp_init: called as part of the initialisation of the module when
 223  223   * loaded into the kernel.
 224  224   *
 225  225   * Being able to create and record the kstats data in the kernel is not
 226  226   * considered to be vital to the operation of this kernel module, thus
 227  227   * its failure is tolerated.
 228  228   */
 229  229  static int
 230  230  sockpfp_init(void)
 231  231  {
 232  232          (void) memset(&ks_stats, 0, sizeof (ks_stats));
 233  233  
 234  234          (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
 235  235  
 236  236          pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
 237  237              KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
 238  238              KSTAT_FLAG_VIRTUAL);
 239  239          if (pfp_ksp != NULL) {
 240  240                  pfp_ksp->ks_data = &ks_stats;
 241  241                  kstat_install(pfp_ksp);
 242  242          }
 243  243  
 244  244          return (0);
 245  245  }
 246  246  
 247  247  /*
 248  248   * sockpfp_fini: called when the operating system wants to unload the
 249  249   * socket module from the kernel.
 250  250   */
 251  251  static void
 252  252  sockpfp_fini(void)
 253  253  {
 254  254          if (pfp_ksp != NULL)
 255  255                  kstat_delete(pfp_ksp);
 256  256  }
 257  257  
 258  258  /*
 259  259   * Due to sockets being created read-write by default, all PF_PACKET sockets
 260  260   * therefore require the NET_RAWACCESS priviliege, even if the socket is only
 261  261   * being used for reading packets from.
 262  262   *
 263  263   * This create function enforces this module only being used with PF_PACKET
 264  264   * sockets and the policy that we support via the config file in sock2path.d:
 265  265   * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
 266  266   */
 267  267  /* ARGSUSED */
 268  268  static sock_lower_handle_t
 269  269  sockpfp_create(int family, int type, int proto,
 270  270      sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
 271  271      int sflags, cred_t *cred)
 272  272  {
 273  273          struct pfpsock *ps;
 274  274          int kmflags;
 275  275          int newproto;
 276  276          int i;
 277  277  
 278  278          if (secpolicy_net_rawaccess(cred) != 0) {
 279  279                  *errorp = EACCES;
 280  280                  return (NULL);
 281  281          }
 282  282  
 283  283          if (family != AF_PACKET) {
 284  284                  *errorp = EAFNOSUPPORT;
 285  285                  return (NULL);
 286  286          }
 287  287  
 288  288          if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
 289  289                  *errorp = ESOCKTNOSUPPORT;
 290  290                  return (NULL);
 291  291          }
 292  292  
 293  293          /*
 294  294           * First check to see if the protocol number passed in via the socket
 295  295           * creation should be mapped to a different number for internal use.
 296  296           */
 297  297          for (i = 0, newproto = -1;
 298  298              i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) {
 299  299                  if (accepted_protos[i][0] == proto) {
 300  300                          newproto = accepted_protos[i][1];
 301  301                          break;
 302  302                  }
 303  303          }
 304  304  
 305  305          /*
 306  306           * If the mapping of the protocol that was under 0x800 failed to find
 307  307           * a local equivalent then fail the socket creation. If the protocol
 308  308           * for the socket is over 0x800 and it was not found in the mapping
 309  309           * table above, then use the value as is.
 310  310           */
 311  311          if (newproto == -1) {
 312  312                  if (proto < 0x800) {
 313  313                          *errorp = ENOPROTOOPT;
 314  314                          return (NULL);
 315  315                  }
 316  316                  newproto = proto;
 317  317          }
 318  318          proto = newproto;
 319  319  
 320  320          kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 321  321          ps = kmem_zalloc(sizeof (*ps), kmflags);
 322  322          if (ps == NULL) {
 323  323                  *errorp = ENOMEM;
 324  324                  return (NULL);
 325  325          }
 326  326  
 327  327          ps->ps_type = type;
 328  328          ps->ps_proto = proto;
 329  329          rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
 330  330          mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
 331  331  
 332  332          *sock_downcalls = &pfp_downcalls;
 333  333          /*
 334  334           * Setting this causes bytes from a packet that do not fit into the
 335  335           * destination user buffer to be discarded. Thus the API is one
 336  336           * packet per receive and callers are required to use a buffer large
 337  337           * enough for the biggest packet that the interface can provide.
 338  338           */
 339  339          *smodep = SM_ATOMIC;
 340  340  
 341  341          return ((sock_lower_handle_t)ps);
 342  342  }
 343  343  
 344  344  /* ************************************************************************* */
 345  345  
 346  346  /*
 347  347   * pfp_packet is the callback function that is given to the mac layer for
 348  348   * PF_PACKET to receive packets with. One packet at a time is passed into
 349  349   * this function from the mac layer. Each packet is a private copy given
 350  350   * to PF_PACKET to modify or free as it wishes and does not harm the original
 351  351   * packet from which it was cloned.
 352  352   */
 353  353  /* ARGSUSED */
 354  354  static void
 355  355  pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
 356  356  {
 357  357          struct T_unitdata_ind *tunit;
 358  358          struct sockaddr_ll *sll;
 359  359          struct sockaddr_ll *sol;
 360  360          mac_header_info_t hdr;
 361  361          struct pfpsock *ps;
 362  362          size_t tusz;
 363  363          mblk_t *mp0;
 364  364          int error;
 365  365  
 366  366          if (mp == NULL)
 367  367                  return;
 368  368  
 369  369          ps = arg;
 370  370          if (ps->ps_flow_ctrld) {
 371  371                  ps->ps_flow_ctrl_drops++;
 372  372                  ps->ps_stats.tp_drops++;
 373  373                  ks_stats.kp_recv_flow_cntrld.value.ui64++;
 374  374                  freemsg(mp);
 375  375                  return;
 376  376          }
 377  377  
 378  378          if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
 379  379                  /*
 380  380                   * Can't decode the packet header information so drop it.
 381  381                   */
 382  382                  ps->ps_stats.tp_drops++;
 383  383                  ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
 384  384                  freemsg(mp);
 385  385                  return;
 386  386          }
 387  387  
 388  388          if (mac_type(ps->ps_mh) == DL_ETHER &&
 389  389              hdr.mhi_bindsap == ETHERTYPE_VLAN) {
 390  390                  struct ether_vlan_header *evhp;
 391  391                  struct ether_vlan_header evh;
 392  392  
 393  393                  hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
 394  394                  hdr.mhi_istagged = B_TRUE;
 395  395  
 396  396                  if (MBLKL(mp) >= sizeof (*evhp)) {
 397  397                          evhp = (struct ether_vlan_header *)mp->b_rptr;
 398  398                  } else {
 399  399                          int sz = sizeof (*evhp);
 400  400                          char *s = (char *)&evh;
 401  401                          mblk_t *tmp;
 402  402                          int len;
 403  403  
 404  404                          for (tmp = mp; sz > 0 && tmp != NULL;
 405  405                              tmp = tmp->b_cont) {
 406  406                                  len = min(sz, MBLKL(tmp));
 407  407                                  bcopy(tmp->b_rptr, s, len);
 408  408                                  sz -= len;
 409  409                          }
 410  410                          evhp = &evh;
 411  411                  }
 412  412                  hdr.mhi_tci = ntohs(evhp->ether_tci);
 413  413                  hdr.mhi_bindsap = ntohs(evhp->ether_type);
 414  414          }
 415  415  
 416  416          if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
 417  417                  /*
 418  418                   * The packet is not of interest to this socket so
 419  419                   * drop it on the floor. Here the SAP is being used
 420  420                   * as a very course filter.
 421  421                   */
 422  422                  ps->ps_stats.tp_drops++;
 423  423                  ks_stats.kp_recv_bad_proto.value.ui64++;
 424  424                  freemsg(mp);
 425  425                  return;
 426  426          }
 427  427  
 428  428          /*
 429  429           * This field is not often set, even for ethernet,
 430  430           * by mac_header_info, so compute it if it is 0.
 431  431           */
 432  432          if (hdr.mhi_pktsize == 0)
 433  433                  hdr.mhi_pktsize = msgdsize(mp);
 434  434  
 435  435          /*
 436  436           * If a BPF filter is present, pass the raw packet into that.
 437  437           * A failed match will result in zero being returned, indicating
 438  438           * that this socket is not interested in the packet.
 439  439           */
 440  440          if (ps->ps_bpf.bf_len != 0) {
 441  441                  uchar_t *buffer;
 442  442                  int buflen;
 443  443  
 444  444                  buflen = MBLKL(mp);
 445  445                  if (hdr.mhi_pktsize == buflen) {
 446  446                          buffer = mp->b_rptr;
 447  447                  } else {
 448  448                          buflen = 0;
 449  449                          buffer = (uchar_t *)mp;
 450  450                  }
 451  451                  rw_enter(&ps->ps_bpflock, RW_READER);
 452  452                  if (ip_bpf_filter((ip_bpf_insn_t *)ps->ps_bpf.bf_insns, buffer,
 453  453                      hdr.mhi_pktsize, buflen) == 0) {
 454  454                          rw_exit(&ps->ps_bpflock);
 455  455                          ps->ps_stats.tp_drops++;
 456  456                          ks_stats.kp_recv_filtered.value.ui64++;
 457  457                          freemsg(mp);
 458  458                          return;
 459  459                  }
 460  460                  rw_exit(&ps->ps_bpflock);
 461  461          }
 462  462  
 463  463          if (ps->ps_type == SOCK_DGRAM) {
 464  464                  /*
 465  465                   * SOCK_DGRAM socket expect a "layer 3" packet, so advance
 466  466                   * past the link layer header.
 467  467                   */
 468  468                  mp->b_rptr += hdr.mhi_hdrsize;
 469  469                  hdr.mhi_pktsize -= hdr.mhi_hdrsize;
 470  470          }
 471  471  
 472  472          tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
 473  473          if (ps->ps_auxdata) {
 474  474                  tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
 475  475                  tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
 476  476          }
 477  477  
 478  478          /*
 479  479           * It is tempting to think that this could be optimised by having
 480  480           * the base mblk_t allocated and hung off the pfpsock structure,
 481  481           * except that then another one would need to be allocated for the
 482  482           * sockaddr_ll that is included. Even creating a template to copy
 483  483           * from is of questionable value, as read-write from one structure
 484  484           * to the other is going to be slower than all of the initialisation.
 485  485           */
 486  486          mp0 = allocb(tusz, BPRI_HI);
 487  487          if (mp0 == NULL) {
 488  488                  ps->ps_stats.tp_drops++;
 489  489                  ks_stats.kp_recv_alloc_fail.value.ui64++;
 490  490                  freemsg(mp);
 491  491                  return;
 492  492          }
 493  493  
 494  494          (void) memset(mp0->b_rptr, 0, tusz);
 495  495  
 496  496          mp0->b_datap->db_type = M_PROTO;
 497  497          mp0->b_wptr = mp0->b_rptr + tusz;
 498  498  
 499  499          tunit = (struct T_unitdata_ind *)mp0->b_rptr;
 500  500          tunit->PRIM_type = T_UNITDATA_IND;
 501  501          tunit->SRC_length = sizeof (struct sockaddr);
 502  502          tunit->SRC_offset = sizeof (*tunit);
 503  503  
 504  504          sol = &ps->ps_sock;
 505  505          sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
 506  506          sll->sll_ifindex = sol->sll_ifindex;
 507  507          sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
 508  508          sll->sll_halen = sol->sll_halen;
 509  509          if (hdr.mhi_saddr != NULL)
 510  510                  (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
 511  511  
 512  512          switch (hdr.mhi_dsttype) {
 513  513          case MAC_ADDRTYPE_MULTICAST :
 514  514                  sll->sll_pkttype = PACKET_MULTICAST;
 515  515                  break;
 516  516          case MAC_ADDRTYPE_BROADCAST :
 517  517                  sll->sll_pkttype = PACKET_BROADCAST;
 518  518                  break;
 519  519          case MAC_ADDRTYPE_UNICAST :
 520  520                  if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
 521  521                          sll->sll_pkttype = PACKET_HOST;
 522  522                  else
 523  523                          sll->sll_pkttype = PACKET_OTHERHOST;
 524  524                  break;
 525  525          }
 526  526  
 527  527          if (ps->ps_auxdata) {
 528  528                  struct tpacket_auxdata *aux;
 529  529                  struct T_opthdr *topt;
 530  530  
 531  531                  tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
 532  532                      sizeof (struct sockaddr_ll));
 533  533                  tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
 534  534                      _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
 535  535  
 536  536                  topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
 537  537                  aux = (struct tpacket_auxdata *)
 538  538                      ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
 539  539  
 540  540                  topt->len = tunit->OPT_length;
 541  541                  topt->level = SOL_PACKET;
 542  542                  topt->name = PACKET_AUXDATA;
 543  543                  topt->status = 0;
 544  544                  /*
 545  545                   * libpcap doesn't seem to use any other field,
 546  546                   * so it isn't clear how they should be filled in.
 547  547                   */
 548  548                  aux->tp_vlan_vci = hdr.mhi_tci;
 549  549          }
 550  550  
 551  551          linkb(mp0, mp);
 552  552  
 553  553          (void) gethrestime(&ps->ps_timestamp);
 554  554  
 555  555          ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
 556  556              &error, NULL);
 557  557  
 558  558          if (error == 0) {
 559  559                  ps->ps_stats.tp_packets++;
 560  560                  ks_stats.kp_recv_ok.value.ui64++;
 561  561          } else {
 562  562                  mutex_enter(&ps->ps_lock);
 563  563                  if (error == ENOSPC) {
 564  564                          ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
 565  565                              &error, NULL);
 566  566                          if (error == ENOSPC)
 567  567                                  ps->ps_flow_ctrld = B_TRUE;
 568  568                  }
 569  569                  mutex_exit(&ps->ps_lock);
 570  570                  ps->ps_stats.tp_drops++;
 571  571                  ks_stats.kp_recv_fail.value.ui64++;
 572  572          }
 573  573  }
 574  574  
 575  575  /*
 576  576   * Bind a PF_PACKET socket to a network interface.
 577  577   *
 578  578   * The default operation of this bind() is to place the socket (and thus the
 579  579   * network interface) into promiscuous mode. It is then up to the application
 580  580   * to turn that down by issuing the relevant ioctls, if desired.
 581  581   */
 582  582  static int
 583  583  sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
 584  584      socklen_t addrlen, struct cred *cred)
 585  585  {
 586  586          struct sockaddr_ll *addr_ll, *sol;
 587  587          mac_client_handle_t mch;
 588  588          struct pfpsock *ps;
 589  589          mac_handle_t mh;
 590  590          int error;
 591  591  
 592  592          ps = (struct pfpsock *)handle;
 593  593          if (ps->ps_bound)
 594  594                  return (EINVAL);
 595  595  
 596  596          if (addrlen < sizeof (struct sockaddr_ll) || addr == NULL)
 597  597                  return (EINVAL);
 598  598  
 599  599          addr_ll = (struct sockaddr_ll *)addr;
 600  600  
 601  601          error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
 602  602          if (error != 0)
 603  603                  return (error);
 604  604          /*
 605  605           * Ensure that each socket is only bound once.
 606  606           */
 607  607          mutex_enter(&ps->ps_lock);
 608  608          if (ps->ps_mh != 0) {
 609  609                  mutex_exit(&ps->ps_lock);
 610  610                  pfp_close(mh, mch);
 611  611                  return (EADDRINUSE);
 612  612          }
 613  613          ps->ps_mh = mh;
 614  614          ps->ps_mch = mch;
 615  615          mutex_exit(&ps->ps_lock);
 616  616  
 617  617          /*
 618  618           * Cache all of the information from bind so that it's in an easy
 619  619           * place to get at when packets are received.
 620  620           */
 621  621          sol = &ps->ps_sock;
 622  622          sol->sll_family = AF_PACKET;
 623  623          sol->sll_ifindex = addr_ll->sll_ifindex;
 624  624          sol->sll_protocol = addr_ll->sll_protocol;
 625  625          sol->sll_halen = mac_addr_len(ps->ps_mh);
 626  626          mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
 627  627          mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
 628  628          ps->ps_linkid = addr_ll->sll_ifindex;
 629  629  
 630  630          error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
 631  631              pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
 632  632          if (error == 0) {
 633  633                  ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
 634  634                  ps->ps_bound = B_TRUE;
 635  635          }
 636  636  
 637  637          return (error);
 638  638  }
 639  639  
 640  640  /* ARGSUSED */
 641  641  static void
 642  642  sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
 643  643      sock_upcalls_t *upcalls, int flags, cred_t *cred)
 644  644  {
 645  645          struct pfpsock *ps;
 646  646  
 647  647          ps = (struct pfpsock *)lower;
 648  648          ps->ps_upper = upper;
 649  649          ps->ps_upcalls = upcalls;
 650  650  }
 651  651  
 652  652  /*
 653  653   * This module only implements getting socket options for the new socket
 654  654   * option level (SOL_PACKET) that it introduces. All other requests are
 655  655   * passed back to the sockfs layer.
 656  656   */
 657  657  /* ARGSUSED */
 658  658  static int
 659  659  sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
 660  660      void *optval, socklen_t *optlenp, struct cred *cred)
 661  661  {
 662  662          struct pfpsock *ps;
 663  663          int error = 0;
 664  664  
 665  665          ps = (struct pfpsock *)handle;
 666  666  
 667  667          switch (level) {
 668  668          case SOL_PACKET :
 669  669                  error = pfp_getpacket_sockopt(handle, option_name, optval,
 670  670                      optlenp);
 671  671                  break;
 672  672  
 673  673          case SOL_SOCKET :
 674  674                  if (option_name == SO_RCVBUF) {
 675  675                          if (*optlenp < sizeof (int32_t))
 676  676                                  return (EINVAL);
 677  677                          *((int32_t *)optval) = ps->ps_rcvbuf;
 678  678                          *optlenp = sizeof (int32_t);
 679  679                  } else {
 680  680                          error = ENOPROTOOPT;
 681  681                  }
 682  682                  break;
 683  683  
 684  684          default :
 685  685                  /*
 686  686                   * If sockfs code receives this error in return from the
 687  687                   * getsockopt downcall it handles the option locally, if
 688  688                   * it can.
 689  689                   */
 690  690                  error = ENOPROTOOPT;
 691  691                  break;
 692  692          }
 693  693  
 694  694          return (error);
 695  695  }
 696  696  
 697  697  /*
 698  698   * PF_PACKET supports setting socket options at only two levels:
 699  699   * SOL_SOCKET and SOL_PACKET.
 700  700   */
 701  701  /* ARGSUSED */
 702  702  static int
 703  703  sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
 704  704      const void *optval, socklen_t optlen, struct cred *cred)
 705  705  {
 706  706          int error = 0;
 707  707  
 708  708          switch (level) {
 709  709          case SOL_SOCKET :
 710  710                  error = pfp_setsocket_sockopt(handle, option_name, optval,
 711  711                      optlen);
 712  712                  break;
 713  713          case SOL_PACKET :
 714  714                  error = pfp_setpacket_sockopt(handle, option_name, optval,
 715  715                      optlen);
 716  716                  break;
 717  717          default :
 718  718                  error = EINVAL;
 719  719                  break;
 720  720          }
 721  721  
 722  722          return (error);
 723  723  }
 724  724  
 725  725  /*
 726  726   * This function is incredibly inefficient for sending any packet that
 727  727   * comes with a msghdr asking to be sent to an interface to which the
 728  728   * socket has not been bound. Some possibilities here are keeping a
 729  729   * cache of all open mac's and mac_client's, for the purpose of sending,
 730  730   * and closing them after some amount of inactivity. Clearly, applications
 731  731   * should not be written to use one socket for multiple interfaces if
 732  732   * performance is desired with the code as is.
 733  733   */
 734  734  /* ARGSUSED */
 735  735  static int
 736  736  sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
 737  737      struct nmsghdr *msg, struct cred *cred)
 738  738  {
 739  739          struct sockaddr_ll *sol;
 740  740          mac_client_handle_t mch;
 741  741          struct pfpsock *ps;
 742  742          boolean_t new_open;
 743  743          mac_handle_t mh;
 744  744          size_t mpsize;
 745  745          uint_t maxsdu;
 746  746          mblk_t *mp0;
 747  747          mblk_t *mp;
 748  748          int error;
 749  749  
 750  750          mp = NULL;
 751  751          mp0 = NULL;
 752  752          new_open = B_FALSE;
 753  753          ps = (struct pfpsock *)handle;
 754  754          mh = ps->ps_mh;
 755  755          mch = ps->ps_mch;
 756  756          maxsdu = ps->ps_max_sdu;
 757  757  
 758  758          sol = (struct sockaddr_ll *)msg->msg_name;
 759  759          if (sol == NULL) {
 760  760                  /*
 761  761                   * If no sockaddr_ll has been provided with the send call,
 762  762                   * use the one constructed when the socket was bound to an
 763  763                   * interface and fail if it hasn't been bound.
 764  764                   */
 765  765                  if (!ps->ps_bound) {
 766  766                          ks_stats.kp_send_unbound.value.ui64++;
 767  767                          return (EPROTO);
 768  768                  }
 769  769                  sol = &ps->ps_sock;
 770  770          } else {
 771  771                  /*
 772  772                   * Verify the sockaddr_ll message passed down before using
 773  773                   * it to send a packet out with. If it refers to an interface
 774  774                   * that has not been bound, it is necessary to open it.
 775  775                   */
 776  776                  struct sockaddr_ll *sll;
 777  777  
 778  778                  if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
 779  779                          ks_stats.kp_send_short_msg.value.ui64++;
 780  780                          return (EINVAL);
 781  781                  }
 782  782  
 783  783                  if (sol->sll_family != AF_PACKET) {
 784  784                          ks_stats.kp_send_wrong_family.value.ui64++;
 785  785                          return (EAFNOSUPPORT);
 786  786                  }
 787  787  
 788  788                  sll = &ps->ps_sock;
 789  789                  if (sol->sll_ifindex != sll->sll_ifindex) {
 790  790                          error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
 791  791                              cred);
 792  792                          if (error != 0) {
 793  793                                  ks_stats.kp_send_open_fail.value.ui64++;
 794  794                                  return (error);
 795  795                          }
 796  796                          mac_sdu_get(mh, NULL, &maxsdu);
 797  797                          new_open = B_TRUE;
 798  798                  }
 799  799          }
 800  800  
 801  801          mpsize = uiop->uio_resid;
 802  802          if (mpsize > maxsdu) {
 803  803                  ks_stats.kp_send_too_big.value.ui64++;
 804  804                  error = EMSGSIZE;
 805  805                  goto done;
 806  806          }
 807  807  
 808  808          if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
 809  809                  ks_stats.kp_send_alloc_fail.value.ui64++;
 810  810                  error = ENOBUFS;
 811  811                  goto done;
 812  812          }
 813  813  
 814  814          mp->b_wptr = mp->b_rptr + mpsize;
 815  815          error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
 816  816          if (error != 0) {
 817  817                  ks_stats.kp_send_uiomove_fail.value.ui64++;
 818  818                  goto done;
 819  819          }
 820  820  
 821  821          if (ps->ps_type == SOCK_DGRAM) {
 822  822                  mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
 823  823                  if (mp0 == NULL) {
 824  824                          ks_stats.kp_send_no_memory.value.ui64++;
 825  825                          error = ENOBUFS;
 826  826                          goto done;
 827  827                  }
 828  828                  linkb(mp0, mp);
 829  829                  mp = mp0;
 830  830          }
 831  831  
 832  832          /*
 833  833           * As this is sending datagrams and no promise is made about
 834  834           * how or if a packet will be sent/delivered, no effort is to
 835  835           * be expended in recovering from a situation where the packet
 836  836           * cannot be sent - it is just dropped.
 837  837           */
 838  838          error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
 839  839          if (error == 0) {
 840  840                  mp = NULL;
 841  841                  ks_stats.kp_send_ok.value.ui64++;
 842  842          } else {
 843  843                  ks_stats.kp_send_failed.value.ui64++;
 844  844          }
 845  845  
 846  846  done:
 847  847  
 848  848          if (new_open) {
 849  849                  ASSERT(mch != ps->ps_mch);
 850  850                  ASSERT(mh != ps->ps_mh);
 851  851                  pfp_close(mh, mch);
 852  852          }
 853  853          if (mp != NULL)
 854  854                  freemsg(mp);
 855  855  
 856  856          return (error);
 857  857  
 858  858  }
 859  859  
 860  860  /*
 861  861   * There's no use of a lock here, or at the bottom of pfp_packet() where
 862  862   * ps_flow_ctrld is set to true, because in a situation where these two
 863  863   * are racing to set the flag one way or the other, the end result is
 864  864   * going to be ultimately determined by the scheduler anyway - which of
 865  865   * the two threads gets the lock first? In such an operational environment,
 866  866   * we've got packets arriving too fast to be delt with so packets are going
 867  867   * to be dropped. Grabbing a lock just makes the drop more expensive.
 868  868   */
 869  869  static void
 870  870  sdpfp_clr_flowctrl(sock_lower_handle_t handle)
 871  871  {
 872  872          struct pfpsock *ps;
 873  873  
 874  874          ps = (struct pfpsock *)handle;
 875  875  
 876  876          mutex_enter(&ps->ps_lock);
 877  877          ps->ps_flow_ctrld = B_FALSE;
 878  878          mutex_exit(&ps->ps_lock);
 879  879  }
 880  880  
 881  881  /*
 882  882   * The implementation of this ioctl() handler is intended to function
 883  883   * in the absence of a bind() being made before it is called. Thus the
 884  884   * function calls mac_open() itself to provide a handle
 885  885   * This function is structured like this:
 886  886   * - determine the linkid for the interface being targetted
 887  887   * - open the interface with said linkid
 888  888   * - perform ioctl
 889  889   * - copy results back to caller
 890  890   *
 891  891   * The ioctls that interact with interface flags have been implented below
 892  892   * to assume that the interface is always up and running (IFF_RUNNING) and
 893  893   * to use the state of this socket to determine whether or not the network
 894  894   * interface is in promiscuous mode. Thus an ioctl to get the interface flags
 895  895   * of an interface that has been put in promiscuous mode by another socket
 896  896   * (in the same program or different), will not report that status.
 897  897   */
 898  898  /* ARGSUSED */
 899  899  static int
 900  900  sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
 901  901      int32_t *rval, struct cred *cr)
 902  902  {
 903  903          struct timeval tival;
 904  904          mac_client_promisc_type_t mtype;
 905  905          struct sockaddr_dl *sock;
 906  906          datalink_id_t linkid;
 907  907          struct lifreq lifreq;
 908  908          struct ifreq ifreq;
 909  909          struct pfpsock *ps;
 910  910          mac_handle_t mh;
 911  911          int error;
 912  912  
 913  913          ps = (struct pfpsock *)handle;
 914  914  
 915  915          switch (cmd) {
 916  916          /*
 917  917           * ioctls that work on "struct lifreq"
 918  918           */
 919  919          case SIOCSLIFFLAGS :
 920  920          case SIOCGLIFINDEX :
 921  921          case SIOCGLIFFLAGS :
 922  922          case SIOCGLIFMTU :
 923  923          case SIOCGLIFHWADDR :
 924  924                  error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid, mod);
 925  925                  if (error != 0)
 926  926                          return (error);
 927  927                  break;
 928  928  
 929  929          /*
 930  930           * ioctls that work on "struct ifreq".
 931  931           * Not all of these have a "struct lifreq" partner, for example
 932  932           * SIOCGIFHWADDR, for the simple reason that the logical interface
 933  933           * does not have a hardware address.
 934  934           */
 935  935          case SIOCSIFFLAGS :
 936  936          case SIOCGIFINDEX :
 937  937          case SIOCGIFFLAGS :
 938  938          case SIOCGIFMTU :
 939  939          case SIOCGIFHWADDR :
 940  940                  error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid, mod);
 941  941                  if (error != 0)
 942  942                          return (error);
 943  943                  break;
 944  944  
 945  945          case SIOCGSTAMP :
 946  946                  tival.tv_sec = (time_t)ps->ps_timestamp.tv_sec;
 947  947                  tival.tv_usec = ps->ps_timestamp.tv_nsec / 1000;
 948  948                  if (get_udatamodel() == DATAMODEL_NATIVE) {
 949  949                          error = ddi_copyout(&tival, (void *)arg,
 950  950                              sizeof (tival), mod);
 951  951                  }
 952  952  #ifdef _SYSCALL32_IMPL
 953  953                  else {
 954  954                          struct timeval32 tv32;
 955  955                          TIMEVAL_TO_TIMEVAL32(&tv32, &tival);
 956  956                          error = ddi_copyout(&tv32, (void *)arg,
 957  957                              sizeof (tv32), mod);
 958  958                  }
 959  959  #endif
 960  960                  return (error);
 961  961          }
 962  962  
 963  963          error =  mac_open_by_linkid(linkid, &mh);
 964  964          if (error != 0)
 965  965                  return (error);
 966  966  
 967  967          switch (cmd) {
 968  968          case SIOCGLIFINDEX :
 969  969                  lifreq.lifr_index = linkid;
 970  970                  break;
 971  971  
 972  972          case SIOCGIFINDEX :
 973  973                  ifreq.ifr_index = linkid;
 974  974                  break;
 975  975  
 976  976          case SIOCGIFFLAGS :
 977  977                  ifreq.ifr_flags = IFF_RUNNING;
 978  978                  if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
 979  979                          ifreq.ifr_flags |= IFF_PROMISC;
 980  980                  break;
 981  981  
 982  982          case SIOCGLIFFLAGS :
 983  983                  lifreq.lifr_flags = IFF_RUNNING;
 984  984                  if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
 985  985                          lifreq.lifr_flags |= IFF_PROMISC;
 986  986                  break;
 987  987  
 988  988          case SIOCSIFFLAGS :
 989  989                  if (linkid != ps->ps_linkid) {
 990  990                          error = EINVAL;
 991  991                  } else {
 992  992                          if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
 993  993                                  mtype = MAC_CLIENT_PROMISC_ALL;
 994  994                          else
 995  995                                  mtype = MAC_CLIENT_PROMISC_FILTERED;
 996  996                          error = pfp_set_promisc(ps, mtype);
 997  997                  }
 998  998                  break;
 999  999  
1000 1000          case SIOCSLIFFLAGS :
1001 1001                  if (linkid != ps->ps_linkid) {
1002 1002                          error = EINVAL;
1003 1003                  } else {
1004 1004                          if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
1005 1005                                  mtype = MAC_CLIENT_PROMISC_ALL;
1006 1006                          else
1007 1007                                  mtype = MAC_CLIENT_PROMISC_FILTERED;
1008 1008                          error = pfp_set_promisc(ps, mtype);
1009 1009                  }
1010 1010                  break;
1011 1011  
1012 1012          case SIOCGIFMTU :
1013 1013                  mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
1014 1014                  break;
1015 1015  
1016 1016          case SIOCGLIFMTU :
1017 1017                  mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
1018 1018                  break;
1019 1019  
1020 1020          case SIOCGIFHWADDR :
1021 1021                  if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) {
1022 1022                          error = EPFNOSUPPORT;
1023 1023                          break;
1024 1024                  }
1025 1025  
1026 1026                  if (mac_addr_len(mh) == 0) {
1027 1027                          (void) memset(ifreq.ifr_addr.sa_data, 0,
1028 1028                              sizeof (ifreq.ifr_addr.sa_data));
1029 1029                  } else {
1030 1030                          mac_unicast_primary_get(mh,
1031 1031                              (uint8_t *)ifreq.ifr_addr.sa_data);
1032 1032                  }
1033 1033  
1034 1034                  /*
1035 1035                   * The behaviour here in setting sa_family is consistent
1036 1036                   * with what applications such as tcpdump would expect
1037 1037                   * for a Linux PF_PACKET socket.
1038 1038                   */
1039 1039                  ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
1040 1040                  break;
1041 1041  
1042 1042          case SIOCGLIFHWADDR :
1043 1043                  lifreq.lifr_type = 0;
1044 1044                  sock = (struct sockaddr_dl *)&lifreq.lifr_addr;
1045 1045  
1046 1046                  if (mac_addr_len(mh) > sizeof (sock->sdl_data)) {
1047 1047                          error = EPFNOSUPPORT;
1048 1048                          break;
1049 1049                  }
1050 1050  
1051 1051                  /*
1052 1052                   * Fill in the sockaddr_dl with link layer details. Of note,
1053 1053                   * the index is returned as 0 for a couple of reasons:
1054 1054                   * (1) there is no public API that uses or requires it
1055 1055                   * (2) the MAC index is currently 32bits and sdl_index is 16.
1056 1056                   */
1057 1057                  sock->sdl_family = AF_LINK;
1058 1058                  sock->sdl_index = 0;
1059 1059                  sock->sdl_type = mac_type(mh);
1060 1060                  sock->sdl_nlen = 0;
1061 1061                  sock->sdl_alen = mac_addr_len(mh);
1062 1062                  sock->sdl_slen = 0;
1063 1063                  if (mac_addr_len(mh) == 0) {
1064 1064                          (void) memset(sock->sdl_data, 0,
1065 1065                              sizeof (sock->sdl_data));
1066 1066                  } else {
1067 1067                          mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data);
1068 1068                  }
1069 1069                  break;
1070 1070  
1071 1071          default :
1072 1072                  break;
1073 1073          }
1074 1074  
1075 1075          mac_close(mh);
1076 1076  
1077 1077          if (error == 0) {
1078 1078                  /*
1079 1079                   * Only the "GET" ioctls need to copy data back to userace.
1080 1080                   */
1081 1081                  switch (cmd) {
1082 1082                  case SIOCGLIFINDEX :
1083 1083                  case SIOCGLIFFLAGS :
1084 1084                  case SIOCGLIFMTU :
1085 1085                  case SIOCGLIFHWADDR :
1086 1086                          error = ddi_copyout(&lifreq, (void *)arg,
1087 1087                              sizeof (lifreq), mod);
1088 1088                          break;
1089 1089  
1090 1090                  case SIOCGIFINDEX :
1091 1091                  case SIOCGIFFLAGS :
1092 1092                  case SIOCGIFMTU :
1093 1093                  case SIOCGIFHWADDR :
1094 1094                          error = ddi_copyout(&ifreq, (void *)arg,
1095 1095                              sizeof (ifreq), mod);
1096 1096                          break;
1097 1097                  default :
1098 1098                          break;
1099 1099                  }
1100 1100          }
1101 1101  
1102 1102          return (error);
1103 1103  }
1104 1104  
1105 1105  /*
1106 1106   * Closing the socket requires that all open references to network
1107 1107   * interfaces be closed.
1108 1108   */
1109 1109  /* ARGSUSED */
1110 1110  static int
1111 1111  sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
1112 1112  {
1113 1113          struct pfpsock *ps = (struct pfpsock *)handle;
1114 1114  
1115 1115          if (ps->ps_phd != 0) {
1116 1116                  mac_promisc_remove(ps->ps_phd);
1117 1117                  ps->ps_phd = 0;
1118 1118          }
1119 1119  
1120 1120          if (ps->ps_mch != 0) {
1121 1121                  mac_client_close(ps->ps_mch, 0);
1122 1122                  ps->ps_mch = 0;
1123 1123          }
1124 1124  
1125 1125          if (ps->ps_mh != 0) {
1126 1126                  mac_close(ps->ps_mh);
1127 1127                  ps->ps_mh = 0;
1128 1128          }
1129 1129  
1130 1130          kmem_free(ps, sizeof (*ps));
1131 1131  
1132 1132          return (0);
1133 1133  }
1134 1134  
1135 1135  /* ************************************************************************* */
1136 1136  
1137 1137  /*
1138 1138   * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1139 1139   * determine the linkid for the interface name stored in that structure.
1140 1140   * name is used as a buffer so that we can ensure a trailing \0 is appended
1141 1141   * to the name safely.
1142 1142   */
1143 1143  static int
1144 1144  pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1145 1145      datalink_id_t *linkidp, int mode)
1146 1146  {
1147 1147          char name[IFNAMSIZ + 1];
1148 1148          int error;
1149 1149  
1150 1150          if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), mode) != 0)
1151 1151                  return (EFAULT);
1152 1152  
1153 1153          (void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1154 1154  
1155 1155          error = dls_mgmt_get_linkid(name, linkidp);
1156 1156          if (error != 0)
1157 1157                  error = dls_devnet_macname2linkid(name, linkidp);
1158 1158  
1159 1159          return (error);
1160 1160  }
1161 1161  
1162 1162  /*
1163 1163   * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1164 1164   * determine the linkid for the interface name stored in that structure.
1165 1165   * name is used as a buffer so that we can ensure a trailing \0 is appended
1166 1166   * to the name safely.
1167 1167   */
1168 1168  static int
1169 1169  pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1170 1170      datalink_id_t *linkidp, int mode)
1171 1171  {
1172 1172          char name[LIFNAMSIZ + 1];
1173 1173          int error;
1174 1174  
1175 1175          if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), mode) != 0)
1176 1176                  return (EFAULT);
1177 1177  
1178 1178          (void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1179 1179  
1180 1180          error = dls_mgmt_get_linkid(name, linkidp);
1181 1181          if (error != 0)
1182 1182                  error = dls_devnet_macname2linkid(name, linkidp);
1183 1183  
1184 1184          return (error);
1185 1185  }
1186 1186  
1187 1187  /*
1188 1188   * Although there are several new SOL_PACKET options that can be set and
1189 1189   * are specific to this implementation of PF_PACKET, the current API does
1190 1190   * not support doing a get on them to retrieve accompanying status. Thus
1191 1191   * it is only currently possible to use SOL_PACKET with getsockopt to
1192 1192   * retrieve statistical information. This remains consistant with the
1193 1193   * Linux API at the time of writing.
1194 1194   */
1195 1195  static int
1196 1196  pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1197 1197      void *optval, socklen_t *optlenp)
1198 1198  {
1199 1199          struct pfpsock *ps;
1200 1200          struct tpacket_stats_short tpss;
1201 1201          int error = 0;
1202 1202  
1203 1203          ps = (struct pfpsock *)handle;
1204 1204  
1205 1205          switch (option_name) {
1206 1206          case PACKET_STATISTICS :
1207 1207                  if (*optlenp < sizeof (ps->ps_stats)) {
1208 1208                          error = EINVAL;
1209 1209                          break;
1210 1210                  }
1211 1211                  *optlenp = sizeof (ps->ps_stats);
1212 1212                  bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1213 1213                  break;
1214 1214          case PACKET_STATISTICS_SHORT :
1215 1215                  if (*optlenp < sizeof (tpss)) {
1216 1216                          error = EINVAL;
1217 1217                          break;
1218 1218                  }
1219 1219                  *optlenp = sizeof (tpss);
1220 1220                  tpss.tp_packets = ps->ps_stats.tp_packets;
1221 1221                  tpss.tp_drops = ps->ps_stats.tp_drops;
1222 1222                  bcopy(&tpss, optval, sizeof (tpss));
1223 1223                  break;
1224 1224          default :
1225 1225                  error = EINVAL;
1226 1226                  break;
1227 1227          }
1228 1228  
1229 1229          return (error);
1230 1230  }
1231 1231  
1232 1232  /*
1233 1233   * The SOL_PACKET level for socket options supports three options,
1234 1234   * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1235 1235   * This function is responsible for mapping the two socket options
1236 1236   * that manage multicast membership into the appropriate internal
1237 1237   * function calls to bring the option into effect. Whilst direct
1238 1238   * changes to the multicast membership (ADD/DROP) groups is handled
1239 1239   * by calls directly into the mac module, changes to the promiscuos
1240 1240   * mode are vectored through pfp_set_promisc() so that the logic for
1241 1241   * managing the promiscuous mode is in one place.
1242 1242   */
1243 1243  /* ARGSUSED */
1244 1244  static int
1245 1245  pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1246 1246      const void *optval, socklen_t optlen)
1247 1247  {
1248 1248          struct packet_mreq mreq;
1249 1249          struct pfpsock *ps;
1250 1250          int error = 0;
1251 1251          int opt;
1252 1252  
1253 1253          ps = (struct pfpsock *)handle;
1254 1254          if (!ps->ps_bound)
1255 1255                  return (EPROTO);
1256 1256  
1257 1257          if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1258 1258              (option_name == PACKET_DROP_MEMBERSHIP)) {
1259 1259                  if (!ps->ps_bound)
1260 1260                          return (EPROTO);
1261 1261                  bcopy(optval, &mreq, sizeof (mreq));
1262 1262                  if (ps->ps_linkid != mreq.mr_ifindex)
1263 1263                          return (EINVAL);
1264 1264          }
1265 1265  
1266 1266          switch (option_name) {
1267 1267          case PACKET_ADD_MEMBERSHIP :
1268 1268                  switch (mreq.mr_type) {
1269 1269                  case PACKET_MR_MULTICAST :
1270 1270                          if (mreq.mr_alen != ps->ps_sock.sll_halen)
1271 1271                                  return (EINVAL);
1272 1272  
1273 1273                          error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1274 1274                          break;
1275 1275  
1276 1276                  case PACKET_MR_PROMISC :
1277 1277                          error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1278 1278                          break;
1279 1279  
1280 1280                  case PACKET_MR_ALLMULTI :
1281 1281                          error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1282 1282                          break;
1283 1283                  }
1284 1284                  break;
1285 1285  
1286 1286          case PACKET_DROP_MEMBERSHIP :
1287 1287                  switch (mreq.mr_type) {
1288 1288                  case PACKET_MR_MULTICAST :
1289 1289                          if (mreq.mr_alen != ps->ps_sock.sll_halen)
1290 1290                                  return (EINVAL);
1291 1291  
1292 1292                          mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1293 1293                          break;
1294 1294  
1295 1295                  case PACKET_MR_PROMISC :
1296 1296                          if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1297 1297                                  return (EINVAL);
1298 1298                          error = pfp_set_promisc(ps,
1299 1299                              MAC_CLIENT_PROMISC_FILTERED);
1300 1300                          break;
1301 1301  
1302 1302                  case PACKET_MR_ALLMULTI :
1303 1303                          if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1304 1304                                  return (EINVAL);
1305 1305                          error = pfp_set_promisc(ps,
1306 1306                              MAC_CLIENT_PROMISC_FILTERED);
1307 1307                          break;
1308 1308                  }
1309 1309                  break;
1310 1310  
1311 1311          case PACKET_AUXDATA :
1312 1312                  if (optlen == sizeof (int)) {
1313 1313                          opt = *(int *)optval;
1314 1314                          ps->ps_auxdata = (opt != 0);
1315 1315                  } else {
1316 1316                          error = EINVAL;
1317 1317                  }
1318 1318                  break;
1319 1319          default :
1320 1320                  error = EINVAL;
1321 1321                  break;
1322 1322          }
1323 1323  
1324 1324          return (error);
1325 1325  }
1326 1326  
1327 1327  /*
1328 1328   * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1329 1329   * SO_ATTACH_FILTER and SO_DETACH_FILTER.
1330 1330   *
1331 1331   * Both of these setsockopt values are candidates for being handled by the
1332 1332   * socket layer itself in future, however this requires understanding how
1333 1333   * they would interact with all other sockets.
1334 1334   */
1335 1335  static int
1336 1336  pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1337 1337      const void *optval, socklen_t optlen)
1338 1338  {
1339 1339          struct bpf_program prog;
1340 1340          ip_bpf_insn_t *fcode;
1341 1341          struct pfpsock *ps;
1342 1342          struct sock_proto_props sopp;
1343 1343          int error = 0;
1344 1344          int size;
1345 1345  
1346 1346          ps = (struct pfpsock *)handle;
1347 1347  
1348 1348          switch (option_name) {
1349 1349          case SO_ATTACH_FILTER :
1350 1350  #ifdef _LP64
1351 1351                  if (optlen == sizeof (struct bpf_program32)) {
1352 1352                          struct bpf_program32 prog32;
1353 1353  
1354 1354                          bcopy(optval, &prog32, sizeof (prog32));
1355 1355                          prog.bf_len = prog32.bf_len;
1356 1356                          prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1357 1357                  } else
1358 1358  #endif
1359 1359                  if (optlen == sizeof (struct bpf_program)) {
1360 1360                          bcopy(optval, &prog, sizeof (prog));
1361 1361                  } else if (optlen != sizeof (struct bpf_program)) {
1362 1362                          return (EINVAL);
1363 1363                  }
1364 1364                  if (prog.bf_len > BPF_MAXINSNS)
1365 1365                          return (EINVAL);
1366 1366  
1367 1367                  size = prog.bf_len * sizeof (*prog.bf_insns);
1368 1368                  fcode = kmem_alloc(size, KM_SLEEP);
1369 1369                  if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1370 1370                          kmem_free(fcode, size);
1371 1371                          return (EFAULT);
1372 1372                  }
1373 1373  
1374 1374                  if (ip_bpf_validate(fcode, prog.bf_len)) {
1375 1375                          rw_enter(&ps->ps_bpflock, RW_WRITER);
1376 1376                          pfp_release_bpf(ps);
1377 1377                          ps->ps_bpf.bf_insns = (struct bpf_insn *)fcode;
1378 1378                          ps->ps_bpf.bf_len = size;
1379 1379                          rw_exit(&ps->ps_bpflock);
1380 1380  
1381 1381                          return (0);
1382 1382                  }
1383 1383                  kmem_free(fcode, size);
1384 1384                  error = EINVAL;
1385 1385                  break;
1386 1386  
1387 1387          case SO_DETACH_FILTER :
1388 1388                  pfp_release_bpf(ps);
1389 1389                  break;
1390 1390  
1391 1391          case SO_RCVBUF :
1392 1392                  size = *(int32_t *)optval;
1393 1393                  if (size > sockmod_pfp_rcvbuf_max || size < 0)
1394 1394                          return (ENOBUFS);
1395 1395                  sopp.sopp_flags = SOCKOPT_RCVHIWAT;
1396 1396                  sopp.sopp_rxhiwat = size;
1397 1397                  ps->ps_upcalls->su_set_proto_props(ps->ps_upper, &sopp);
1398 1398                  ps->ps_rcvbuf = size;
1399 1399                  break;
1400 1400  
1401 1401          default :
1402 1402                  error = ENOPROTOOPT;
1403 1403                  break;
1404 1404          }
1405 1405  
1406 1406          return (error);
1407 1407  }
1408 1408  
1409 1409  /*
1410 1410   * pfp_open_index is an internal function used to open a MAC device by
1411 1411   * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1412 1412   * because some of the interfaces provided by the mac layer require either
1413 1413   * only the mac_handle_t or both it and mac_handle_t.
1414 1414   *
1415 1415   * Whilst inside the kernel we can access data structures supporting any
1416 1416   * zone, access to interfaces from non-global zones is restricted to those
1417 1417   * interfaces (if any) that are exclusively assigned to a zone.
1418 1418   */
1419 1419  static int
1420 1420  pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1421 1421      cred_t *cred)
1422 1422  {
1423 1423          mac_client_handle_t mch;
1424 1424          zoneid_t ifzoneid;
1425 1425          mac_handle_t mh;
1426 1426          zoneid_t zoneid;
1427 1427          int error;
1428 1428  
1429 1429          mh = 0;
1430 1430          mch = 0;
1431 1431          error = mac_open_by_linkid(index, &mh);
1432 1432          if (error != 0)
1433 1433                  goto bad_open;
1434 1434  
1435 1435          error = mac_client_open(mh, &mch, NULL,
1436 1436              MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1437 1437          if (error != 0)
1438 1438                  goto bad_open;
1439 1439  
1440 1440          zoneid = crgetzoneid(cred);
1441 1441          if (zoneid != GLOBAL_ZONEID) {
1442 1442                  mac_perim_handle_t perim;
1443 1443  
1444 1444                  mac_perim_enter_by_mh(mh, &perim);
1445 1445                  error = dls_link_getzid(mac_name(mh), &ifzoneid);
1446 1446                  mac_perim_exit(perim);
1447 1447                  if (error != 0)
1448 1448                          goto bad_open;
1449 1449                  if (ifzoneid != zoneid) {
1450 1450                          error = EACCES;
1451 1451                          goto bad_open;
1452 1452                  }
1453 1453          }
1454 1454  
1455 1455          *mcip = mch;
1456 1456          *mhp = mh;
1457 1457  
1458 1458          return (0);
1459 1459  bad_open:
1460 1460          if (mch != 0)
1461 1461                  mac_client_close(mch, 0);
1462 1462          if (mh != 0)
1463 1463                  mac_close(mh);
1464 1464          return (error);
1465 1465  }
1466 1466  
1467 1467  static void
1468 1468  pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1469 1469  {
1470 1470          mac_client_close(mch, 0);
1471 1471          mac_close(mh);
1472 1472  }
1473 1473  
1474 1474  /*
1475 1475   * The purpose of this function is to provide a single place where we free
1476 1476   * the loaded BPF program and reset all pointers/counters associated with
1477 1477   * it.
1478 1478   */
1479 1479  static void
1480 1480  pfp_release_bpf(struct pfpsock *ps)
1481 1481  {
1482 1482          if (ps->ps_bpf.bf_len != 0) {
1483 1483                  kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1484 1484                  ps->ps_bpf.bf_len = 0;
1485 1485                  ps->ps_bpf.bf_insns = NULL;
1486 1486          }
1487 1487  }
1488 1488  
1489 1489  /*
1490 1490   * Set the promiscuous mode of a network interface.
1491 1491   * This function only calls the mac layer when there is a change to the
1492 1492   * status of a network interface's promiscous mode. Tracking of how many
1493 1493   * sockets have the network interface in promiscuous mode, and thus the
1494 1494   * control over the physical device's status, is left to the mac layer.
1495 1495   */
1496 1496  static int
1497 1497  pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1498 1498  {
1499 1499          int error = 0;
1500 1500          int flags;
1501 1501  
1502 1502          /*
1503 1503           * There are 4 combinations of turnon/ps_promisc.
1504 1504           * This if handles 2 (both false, both true) and the if() below
1505 1505           * handles the remaining one - when change is required.
1506 1506           */
1507 1507          if (turnon == ps->ps_promisc)
1508 1508                  return (error);
1509 1509  
1510 1510          if (ps->ps_phd != 0) {
1511 1511                  mac_promisc_remove(ps->ps_phd);
1512 1512                  ps->ps_phd = 0;
1513 1513  
1514 1514                  /*
1515 1515                   * ps_promisc is set here in case the call to mac_promisc_add
1516 1516                   * fails: leaving it to indicate that the interface is still
1517 1517                   * in some sort of promiscuous mode is false.
1518 1518                   */
1519 1519                  if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1520 1520                          ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1521 1521                          flags = MAC_PROMISC_FLAGS_NO_PHYS;
1522 1522                  } else {
1523 1523                          flags = 0;
1524 1524                  }
1525 1525                  flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1526 1526          }
1527 1527  
1528 1528          error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1529 1529              &ps->ps_phd, flags);
1530 1530          if (error == 0)
1531 1531                  ps->ps_promisc = turnon;
1532 1532  
1533 1533          return (error);
1534 1534  }
1535 1535  
1536 1536  /*
1537 1537   * This table maps the MAC types in Solaris to the ARPHRD_* values used
1538 1538   * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl.
1539 1539   *
1540 1540   * The symbols in this table are *not* pulled in from <net/if_arp.h>,
1541 1541   * they are pulled from <netpacket/packet.h>, thus it acts as a source
1542 1542   * of supplementary information to the ARP table.
1543 1543   */
1544 1544  static uint_t arphrd_to_dl[][2] = {
1545 1545          { ARPHRD_IEEE80211,     DL_WIFI },
1546 1546          { ARPHRD_TUNNEL,        DL_IPV4 },
1547 1547          { ARPHRD_TUNNEL,        DL_IPV6 },
1548 1548          { ARPHRD_TUNNEL,        DL_6TO4 },
1549 1549          { ARPHRD_AX25,          DL_X25 },
1550 1550          { ARPHRD_ATM,           DL_ATM },
1551 1551          { 0,                    0 }
1552 1552  };
1553 1553  
1554 1554  static int
1555 1555  pfp_dl_to_arphrd(int dltype)
1556 1556  {
1557 1557          int i;
1558 1558  
1559 1559          for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1560 1560                  if (arphrd_to_dl[i][1] == dltype)
1561 1561                          return (arphrd_to_dl[i][0]);
1562 1562          return (arp_hw_type(dltype));
1563 1563  }

↓ open down ↓

1563 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX