Print this page
    
6470 mac_unregister() needs to mod_hash_remove() BEFORE holding the perimeter.
Reviewed by: Ryan Zezeski <ryan@zinascii.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/io/mac/mac_provider.c
          +++ new/usr/src/uts/common/io/mac/mac_provider.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
       24 + * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
  24   25   */
  25   26  
  26   27  #include <sys/types.h>
  27   28  #include <sys/conf.h>
  28   29  #include <sys/id_space.h>
  29   30  #include <sys/esunddi.h>
  30   31  #include <sys/stat.h>
  31   32  #include <sys/mkdev.h>
  32   33  #include <sys/stream.h>
  33   34  #include <sys/strsubr.h>
  34   35  #include <sys/dlpi.h>
  35   36  #include <sys/modhash.h>
  36   37  #include <sys/mac.h>
  37   38  #include <sys/mac_provider.h>
  38   39  #include <sys/mac_impl.h>
  39   40  #include <sys/mac_client_impl.h>
  40   41  #include <sys/mac_client_priv.h>
  41   42  #include <sys/mac_soft_ring.h>
  42   43  #include <sys/mac_stat.h>
  43   44  #include <sys/dld.h>
  44   45  #include <sys/modctl.h>
  45   46  #include <sys/fs/dv_node.h>
  46   47  #include <sys/thread.h>
  47   48  #include <sys/proc.h>
  48   49  #include <sys/callb.h>
  49   50  #include <sys/cpuvar.h>
  50   51  #include <sys/atomic.h>
  51   52  #include <sys/sdt.h>
  52   53  #include <sys/mac_flow.h>
  53   54  #include <sys/ddi_intr_impl.h>
  54   55  #include <sys/disp.h>
  55   56  #include <sys/sdt.h>
  56   57  #include <sys/pattr.h>
  57   58  #include <sys/strsun.h>
  58   59  
  59   60  /*
  60   61   * MAC Provider Interface.
  61   62   *
  62   63   * Interface for GLDv3 compatible NIC drivers.
  63   64   */
  64   65  
  65   66  static void i_mac_notify_thread(void *);
  66   67  
  67   68  typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
  68   69  
  69   70  static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
  70   71          mac_fanout_recompute,   /* MAC_NOTE_LINK */
  71   72          NULL,           /* MAC_NOTE_UNICST */
  72   73          NULL,           /* MAC_NOTE_TX */
  73   74          NULL,           /* MAC_NOTE_DEVPROMISC */
  74   75          NULL,           /* MAC_NOTE_FASTPATH_FLUSH */
  75   76          NULL,           /* MAC_NOTE_SDU_SIZE */
  76   77          NULL,           /* MAC_NOTE_MARGIN */
  77   78          NULL,           /* MAC_NOTE_CAPAB_CHG */
  78   79          NULL            /* MAC_NOTE_LOWLINK */
  79   80  };
  80   81  
  81   82  /*
  82   83   * Driver support functions.
  83   84   */
  84   85  
  85   86  /* REGISTRATION */
  86   87  
  87   88  mac_register_t *
  88   89  mac_alloc(uint_t mac_version)
  89   90  {
  90   91          mac_register_t *mregp;
  91   92  
  92   93          /*
  93   94           * Make sure there isn't a version mismatch between the driver and
  94   95           * the framework.  In the future, if multiple versions are
  95   96           * supported, this check could become more sophisticated.
  96   97           */
  97   98          if (mac_version != MAC_VERSION)
  98   99                  return (NULL);
  99  100  
 100  101          mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
 101  102          mregp->m_version = mac_version;
 102  103          return (mregp);
 103  104  }
 104  105  
 105  106  void
 106  107  mac_free(mac_register_t *mregp)
 107  108  {
 108  109          kmem_free(mregp, sizeof (mac_register_t));
 109  110  }
 110  111  
 111  112  /*
 112  113   * mac_register() is how drivers register new MACs with the GLDv3
 113  114   * framework.  The mregp argument is allocated by drivers using the
 114  115   * mac_alloc() function, and can be freed using mac_free() immediately upon
 115  116   * return from mac_register().  Upon success (0 return value), the mhp
 116  117   * opaque pointer becomes the driver's handle to its MAC interface, and is
 117  118   * the argument to all other mac module entry points.
 118  119   */
 119  120  /* ARGSUSED */
 120  121  int
 121  122  mac_register(mac_register_t *mregp, mac_handle_t *mhp)
 122  123  {
 123  124          mac_impl_t              *mip;
 124  125          mactype_t               *mtype;
 125  126          int                     err = EINVAL;
 126  127          struct devnames         *dnp = NULL;
 127  128          uint_t                  instance;
 128  129          boolean_t               style1_created = B_FALSE;
 129  130          boolean_t               style2_created = B_FALSE;
 130  131          char                    *driver;
 131  132          minor_t                 minor = 0;
 132  133  
 133  134          /* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
 134  135          if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
 135  136                  return (EINVAL);
 136  137  
 137  138          /* Find the required MAC-Type plugin. */
 138  139          if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
 139  140                  return (EINVAL);
 140  141  
 141  142          /* Create a mac_impl_t to represent this MAC. */
 142  143          mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
 143  144  
 144  145          /*
 145  146           * The mac is not ready for open yet.
 146  147           */
 147  148          mip->mi_state_flags |= MIS_DISABLED;
 148  149  
 149  150          /*
 150  151           * When a mac is registered, the m_instance field can be set to:
 151  152           *
 152  153           *  0:  Get the mac's instance number from m_dip.
 153  154           *      This is usually used for physical device dips.
 154  155           *
 155  156           *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
 156  157           *      For example, when an aggregation is created with the key option,
 157  158           *      "key" will be used as the instance number.
 158  159           *
 159  160           *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
 160  161           *      This is often used when a MAC of a virtual link is registered
 161  162           *      (e.g., aggregation when "key" is not specified, or vnic).
 162  163           *
 163  164           * Note that the instance number is used to derive the mi_minor field
 164  165           * of mac_impl_t, which will then be used to derive the name of kstats
 165  166           * and the devfs nodes.  The first 2 cases are needed to preserve
 166  167           * backward compatibility.
 167  168           */
 168  169          switch (mregp->m_instance) {
 169  170          case 0:
 170  171                  instance = ddi_get_instance(mregp->m_dip);
 171  172                  break;
 172  173          case ((uint_t)-1):
 173  174                  minor = mac_minor_hold(B_TRUE);
 174  175                  if (minor == 0) {
 175  176                          err = ENOSPC;
 176  177                          goto fail;
 177  178                  }
 178  179                  instance = minor - 1;
 179  180                  break;
 180  181          default:
 181  182                  instance = mregp->m_instance;
 182  183                  if (instance >= MAC_MAX_MINOR) {
 183  184                          err = EINVAL;
 184  185                          goto fail;
 185  186                  }
 186  187                  break;
 187  188          }
 188  189  
 189  190          mip->mi_minor = (minor_t)(instance + 1);
 190  191          mip->mi_dip = mregp->m_dip;
 191  192          mip->mi_clients_list = NULL;
 192  193          mip->mi_nclients = 0;
 193  194  
 194  195          /* Set the default IEEE Port VLAN Identifier */
 195  196          mip->mi_pvid = 1;
 196  197  
 197  198          /* Default bridge link learning protection values */
 198  199          mip->mi_llimit = 1000;
 199  200          mip->mi_ldecay = 200;
 200  201  
 201  202          driver = (char *)ddi_driver_name(mip->mi_dip);
 202  203  
 203  204          /* Construct the MAC name as <drvname><instance> */
 204  205          (void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
 205  206              driver, instance);
 206  207  
 207  208          mip->mi_driver = mregp->m_driver;
 208  209  
 209  210          mip->mi_type = mtype;
 210  211          mip->mi_margin = mregp->m_margin;
 211  212          mip->mi_info.mi_media = mtype->mt_type;
 212  213          mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
 213  214          if (mregp->m_max_sdu <= mregp->m_min_sdu)
 214  215                  goto fail;
 215  216          if (mregp->m_multicast_sdu == 0)
 216  217                  mregp->m_multicast_sdu = mregp->m_max_sdu;
 217  218          if (mregp->m_multicast_sdu < mregp->m_min_sdu ||
 218  219              mregp->m_multicast_sdu > mregp->m_max_sdu)
 219  220                  goto fail;
 220  221          mip->mi_sdu_min = mregp->m_min_sdu;
 221  222          mip->mi_sdu_max = mregp->m_max_sdu;
 222  223          mip->mi_sdu_multicast = mregp->m_multicast_sdu;
 223  224          mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
 224  225          /*
 225  226           * If the media supports a broadcast address, cache a pointer to it
 226  227           * in the mac_info_t so that upper layers can use it.
 227  228           */
 228  229          mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
 229  230  
 230  231          mip->mi_v12n_level = mregp->m_v12n;
 231  232  
 232  233          /*
 233  234           * Copy the unicast source address into the mac_info_t, but only if
 234  235           * the MAC-Type defines a non-zero address length.  We need to
 235  236           * handle MAC-Types that have an address length of 0
 236  237           * (point-to-point protocol MACs for example).
 237  238           */
 238  239          if (mip->mi_type->mt_addr_length > 0) {
 239  240                  if (mregp->m_src_addr == NULL)
 240  241                          goto fail;
 241  242                  mip->mi_info.mi_unicst_addr =
 242  243                      kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
 243  244                  bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
 244  245                      mip->mi_type->mt_addr_length);
 245  246  
 246  247                  /*
 247  248                   * Copy the fixed 'factory' MAC address from the immutable
 248  249                   * info.  This is taken to be the MAC address currently in
 249  250                   * use.
 250  251                   */
 251  252                  bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
 252  253                      mip->mi_type->mt_addr_length);
 253  254  
 254  255                  /*
 255  256                   * At this point, we should set up the classification
 256  257                   * rules etc but we delay it till mac_open() so that
 257  258                   * the resource discovery has taken place and we
 258  259                   * know someone wants to use the device. Otherwise
 259  260                   * memory gets allocated for Rx ring structures even
 260  261                   * during probe.
 261  262                   */
 262  263  
 263  264                  /* Copy the destination address if one is provided. */
 264  265                  if (mregp->m_dst_addr != NULL) {
 265  266                          bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
 266  267                              mip->mi_type->mt_addr_length);
 267  268                          mip->mi_dstaddr_set = B_TRUE;
 268  269                  }
 269  270          } else if (mregp->m_src_addr != NULL) {
 270  271                  goto fail;
 271  272          }
 272  273  
 273  274          /*
 274  275           * The format of the m_pdata is specific to the plugin.  It is
 275  276           * passed in as an argument to all of the plugin callbacks.  The
 276  277           * driver can update this information by calling
 277  278           * mac_pdata_update().
 278  279           */
 279  280          if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
 280  281                  /*
 281  282                   * Verify if the supplied plugin data is valid.  Note that
 282  283                   * even if the caller passed in a NULL pointer as plugin data,
 283  284                   * we still need to verify if that's valid as the plugin may
 284  285                   * require plugin data to function.
 285  286                   */
 286  287                  if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
 287  288                      mregp->m_pdata_size)) {
 288  289                          goto fail;
 289  290                  }
 290  291                  if (mregp->m_pdata != NULL) {
 291  292                          mip->mi_pdata =
 292  293                              kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
 293  294                          bcopy(mregp->m_pdata, mip->mi_pdata,
 294  295                              mregp->m_pdata_size);
 295  296                          mip->mi_pdata_size = mregp->m_pdata_size;
 296  297                  }
 297  298          } else if (mregp->m_pdata != NULL) {
 298  299                  /*
 299  300                   * The caller supplied non-NULL plugin data, but the plugin
 300  301                   * does not recognize plugin data.
 301  302                   */
 302  303                  err = EINVAL;
 303  304                  goto fail;
 304  305          }
 305  306  
 306  307          /*
 307  308           * Register the private properties.
 308  309           */
 309  310          mac_register_priv_prop(mip, mregp->m_priv_props);
 310  311  
 311  312          /*
 312  313           * Stash the driver callbacks into the mac_impl_t, but first sanity
 313  314           * check to make sure all mandatory callbacks are set.
 314  315           */
 315  316          if (mregp->m_callbacks->mc_getstat == NULL ||
 316  317              mregp->m_callbacks->mc_start == NULL ||
 317  318              mregp->m_callbacks->mc_stop == NULL ||
 318  319              mregp->m_callbacks->mc_setpromisc == NULL ||
 319  320              mregp->m_callbacks->mc_multicst == NULL) {
 320  321                  goto fail;
 321  322          }
 322  323          mip->mi_callbacks = mregp->m_callbacks;
 323  324  
 324  325          if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
 325  326              &mip->mi_capab_legacy)) {
 326  327                  mip->mi_state_flags |= MIS_LEGACY;
 327  328                  mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
 328  329          } else {
 329  330                  mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
 330  331                      mip->mi_minor);
 331  332          }
 332  333  
 333  334          /*
 334  335           * Allocate a notification thread. thread_create blocks for memory
 335  336           * if needed, it never fails.
 336  337           */
 337  338          mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
 338  339              mip, 0, &p0, TS_RUN, minclsyspri);
 339  340  
 340  341          /*
 341  342           * Initialize the capabilities
 342  343           */
 343  344  
 344  345          bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t));
 345  346          bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t));
 346  347  
 347  348          if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
 348  349                  mip->mi_state_flags |= MIS_IS_VNIC;
 349  350  
 350  351          if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
 351  352                  mip->mi_state_flags |= MIS_IS_AGGR;
 352  353  
 353  354          mac_addr_factory_init(mip);
 354  355  
 355  356          /*
 356  357           * Enforce the virtrualization level registered.
 357  358           */
 358  359          if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
 359  360                  if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
 360  361                      mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
 361  362                          goto fail;
 362  363  
 363  364                  /*
 364  365                   * The driver needs to register at least rx rings for this
 365  366                   * virtualization level.
 366  367                   */
 367  368                  if (mip->mi_rx_groups == NULL)
 368  369                          goto fail;
 369  370          }
 370  371  
 371  372          /*
 372  373           * The driver must set mc_unicst entry point to NULL when it advertises
 373  374           * CAP_RINGS for rx groups.
 374  375           */
 375  376          if (mip->mi_rx_groups != NULL) {
 376  377                  if (mregp->m_callbacks->mc_unicst != NULL)
 377  378                          goto fail;
 378  379          } else {
 379  380                  if (mregp->m_callbacks->mc_unicst == NULL)
 380  381                          goto fail;
 381  382          }
 382  383  
 383  384          /*
 384  385           * Initialize MAC addresses. Must be called after mac_init_rings().
 385  386           */
 386  387          mac_init_macaddr(mip);
 387  388  
 388  389          mip->mi_share_capab.ms_snum = 0;
 389  390          if (mip->mi_v12n_level & MAC_VIRT_HIO) {
 390  391                  (void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
 391  392                      &mip->mi_share_capab);
 392  393          }
 393  394  
 394  395          /*
 395  396           * Initialize the kstats for this device.
 396  397           */
 397  398          mac_driver_stat_create(mip);
 398  399  
 399  400          /* Zero out any properties. */
 400  401          bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
 401  402  
 402  403          if (mip->mi_minor <= MAC_MAX_MINOR) {
 403  404                  /* Create a style-2 DLPI device */
 404  405                  if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
 405  406                      DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
 406  407                          goto fail;
 407  408                  style2_created = B_TRUE;
 408  409  
 409  410                  /* Create a style-1 DLPI device */
 410  411                  if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
 411  412                      mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
 412  413                          goto fail;
 413  414                  style1_created = B_TRUE;
 414  415          }
 415  416  
 416  417          mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
 417  418  
 418  419          rw_enter(&i_mac_impl_lock, RW_WRITER);
 419  420          if (mod_hash_insert(i_mac_impl_hash,
 420  421              (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
 421  422                  rw_exit(&i_mac_impl_lock);
 422  423                  err = EEXIST;
 423  424                  goto fail;
 424  425          }
 425  426  
 426  427          DTRACE_PROBE2(mac__register, struct devnames *, dnp,
 427  428              (mac_impl_t *), mip);
 428  429  
 429  430          /*
 430  431           * Mark the MAC to be ready for open.
 431  432           */
 432  433          mip->mi_state_flags &= ~MIS_DISABLED;
 433  434          rw_exit(&i_mac_impl_lock);
 434  435  
 435  436          atomic_inc_32(&i_mac_impl_count);
 436  437  
 437  438          cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
 438  439          *mhp = (mac_handle_t)mip;
 439  440          return (0);
 440  441  
 441  442  fail:
 442  443          if (style1_created)
 443  444                  ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
 444  445  
 445  446          if (style2_created)
 446  447                  ddi_remove_minor_node(mip->mi_dip, driver);
 447  448  
 448  449          mac_addr_factory_fini(mip);
 449  450  
 450  451          /* Clean up registered MAC addresses */
 451  452          mac_fini_macaddr(mip);
 452  453  
 453  454          /* Clean up registered rings */
 454  455          mac_free_rings(mip, MAC_RING_TYPE_RX);
 455  456          mac_free_rings(mip, MAC_RING_TYPE_TX);
 456  457  
 457  458          /* Clean up notification thread */
 458  459          if (mip->mi_notify_thread != NULL)
 459  460                  i_mac_notify_exit(mip);
 460  461  
 461  462          if (mip->mi_info.mi_unicst_addr != NULL) {
 462  463                  kmem_free(mip->mi_info.mi_unicst_addr,
 463  464                      mip->mi_type->mt_addr_length);
 464  465                  mip->mi_info.mi_unicst_addr = NULL;
 465  466          }
 466  467  
 467  468          mac_driver_stat_delete(mip);
 468  469  
 469  470          if (mip->mi_type != NULL) {
 470  471                  atomic_dec_32(&mip->mi_type->mt_ref);
 471  472                  mip->mi_type = NULL;
 472  473          }
 473  474  
 474  475          if (mip->mi_pdata != NULL) {
 475  476                  kmem_free(mip->mi_pdata, mip->mi_pdata_size);
 476  477                  mip->mi_pdata = NULL;
 477  478                  mip->mi_pdata_size = 0;
 478  479          }
 479  480  
 480  481          if (minor != 0) {
 481  482                  ASSERT(minor > MAC_MAX_MINOR);
 482  483                  mac_minor_rele(minor);
 483  484          }
 484  485  
 485  486          mip->mi_state_flags = 0;
 486  487          mac_unregister_priv_prop(mip);
 487  488  
 488  489          /*
 489  490           * Clear the state before destroying the mac_impl_t
 490  491           */
 491  492          mip->mi_state_flags = 0;
 492  493  
 493  494          kmem_cache_free(i_mac_impl_cachep, mip);
 494  495          return (err);
 495  496  }
 496  497  
 497  498  /*
 498  499   * Unregister from the GLDv3 framework
 499  500   */
 500  501  int
 501  502  mac_unregister(mac_handle_t mh)
 502  503  {
 503  504          int                     err;
 504  505          mac_impl_t              *mip = (mac_impl_t *)mh;
 505  506          mod_hash_val_t          val;
 506  507          mac_margin_req_t        *mmr, *nextmmr;
  
    | 
      ↓ open down ↓ | 
    473 lines elided | 
    
      ↑ open up ↑ | 
  
 507  508  
 508  509          /* Fail the unregister if there are any open references to this mac. */
 509  510          if ((err = mac_disable_nowait(mh)) != 0)
 510  511                  return (err);
 511  512  
 512  513          /*
 513  514           * Clean up notification thread and wait for it to exit.
 514  515           */
 515  516          i_mac_notify_exit(mip);
 516  517  
      518 +        /*
      519 +         * Prior to acquiring the MAC perimeter, remove the MAC instance from
      520 +         * the internal hash table. Such removal means table-walkers that
      521 +         * acquire the perimeter will not do so on behalf of what we are
      522 +         * unregistering, which prevents a deadlock.
      523 +         */
      524 +        rw_enter(&i_mac_impl_lock, RW_WRITER);
      525 +        (void) mod_hash_remove(i_mac_impl_hash,
      526 +            (mod_hash_key_t)mip->mi_name, &val);
      527 +        rw_exit(&i_mac_impl_lock);
      528 +        ASSERT(mip == (mac_impl_t *)val);
      529 +
 517  530          i_mac_perim_enter(mip);
 518  531  
 519  532          /*
 520  533           * There is still resource properties configured over this mac.
 521  534           */
 522  535          if (mip->mi_resource_props.mrp_mask != 0)
 523  536                  mac_fastpath_enable((mac_handle_t)mip);
 524  537  
 525  538          if (mip->mi_minor < MAC_MAX_MINOR + 1) {
 526  539                  ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
 527  540                  ddi_remove_minor_node(mip->mi_dip,
 528  541                      (char *)ddi_driver_name(mip->mi_dip));
 529  542          }
 530  543  
 531  544          ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
 532  545              MIS_EXCLUSIVE));
 533  546  
 534  547          mac_driver_stat_delete(mip);
 535  548  
 536      -        (void) mod_hash_remove(i_mac_impl_hash,
 537      -            (mod_hash_key_t)mip->mi_name, &val);
 538      -        ASSERT(mip == (mac_impl_t *)val);
 539      -
 540  549          ASSERT(i_mac_impl_count > 0);
 541  550          atomic_dec_32(&i_mac_impl_count);
 542  551  
 543  552          if (mip->mi_pdata != NULL)
 544  553                  kmem_free(mip->mi_pdata, mip->mi_pdata_size);
 545  554          mip->mi_pdata = NULL;
 546  555          mip->mi_pdata_size = 0;
 547  556  
 548  557          /*
 549  558           * Free the list of margin request.
 550  559           */
 551  560          for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
 552  561                  nextmmr = mmr->mmr_nextp;
 553  562                  kmem_free(mmr, sizeof (mac_margin_req_t));
 554  563          }
 555  564          mip->mi_mmrp = NULL;
 556  565  
 557  566          mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
 558  567          kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
 559  568          mip->mi_info.mi_unicst_addr = NULL;
 560  569  
 561  570          atomic_dec_32(&mip->mi_type->mt_ref);
 562  571          mip->mi_type = NULL;
 563  572  
 564  573          /*
 565  574           * Free the primary MAC address.
 566  575           */
 567  576          mac_fini_macaddr(mip);
 568  577  
 569  578          /*
 570  579           * free all rings
 571  580           */
 572  581          mac_free_rings(mip, MAC_RING_TYPE_RX);
 573  582          mac_free_rings(mip, MAC_RING_TYPE_TX);
 574  583  
 575  584          mac_addr_factory_fini(mip);
 576  585  
 577  586          bzero(mip->mi_addr, MAXMACADDRLEN);
 578  587          bzero(mip->mi_dstaddr, MAXMACADDRLEN);
 579  588          mip->mi_dstaddr_set = B_FALSE;
 580  589  
 581  590          /* and the flows */
 582  591          mac_flow_tab_destroy(mip->mi_flow_tab);
 583  592          mip->mi_flow_tab = NULL;
 584  593  
 585  594          if (mip->mi_minor > MAC_MAX_MINOR)
 586  595                  mac_minor_rele(mip->mi_minor);
 587  596  
 588  597          cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
 589  598  
 590  599          /*
 591  600           * Reset the perim related fields to default values before
 592  601           * kmem_cache_free
 593  602           */
 594  603          i_mac_perim_exit(mip);
 595  604          mip->mi_state_flags = 0;
 596  605  
 597  606          mac_unregister_priv_prop(mip);
 598  607  
 599  608          ASSERT(mip->mi_bridge_link == NULL);
 600  609          kmem_cache_free(i_mac_impl_cachep, mip);
 601  610  
 602  611          return (0);
 603  612  }
 604  613  
 605  614  /* DATA RECEPTION */
 606  615  
 607  616  /*
 608  617   * This function is invoked for packets received by the MAC driver in
 609  618   * interrupt context. The ring generation number provided by the driver
 610  619   * is matched with the ring generation number held in MAC. If they do not
 611  620   * match, received packets are considered stale packets coming from an older
 612  621   * assignment of the ring. Drop them.
 613  622   */
 614  623  void
 615  624  mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
 616  625      uint64_t mr_gen_num)
 617  626  {
 618  627          mac_ring_t              *mr = (mac_ring_t *)mrh;
 619  628  
 620  629          if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
 621  630                  DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
 622  631                      mr->mr_gen_num, uint64_t, mr_gen_num);
 623  632                  freemsgchain(mp_chain);
 624  633                  return;
 625  634          }
 626  635          mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
 627  636  }
 628  637  
 629  638  /*
 630  639   * This function is invoked for each packet received by the underlying driver.
 631  640   */
 632  641  void
 633  642  mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
 634  643  {
 635  644          mac_impl_t *mip = (mac_impl_t *)mh;
 636  645  
 637  646          /*
 638  647           * Check if the link is part of a bridge.  If not, then we don't need
 639  648           * to take the lock to remain consistent.  Make this common case
 640  649           * lock-free and tail-call optimized.
 641  650           */
 642  651          if (mip->mi_bridge_link == NULL) {
 643  652                  mac_rx_common(mh, mrh, mp_chain);
 644  653          } else {
 645  654                  /*
 646  655                   * Once we take a reference on the bridge link, the bridge
 647  656                   * module itself can't unload, so the callback pointers are
 648  657                   * stable.
 649  658                   */
 650  659                  mutex_enter(&mip->mi_bridge_lock);
 651  660                  if ((mh = mip->mi_bridge_link) != NULL)
 652  661                          mac_bridge_ref_cb(mh, B_TRUE);
 653  662                  mutex_exit(&mip->mi_bridge_lock);
 654  663                  if (mh == NULL) {
 655  664                          mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
 656  665                  } else {
 657  666                          mac_bridge_rx_cb(mh, mrh, mp_chain);
 658  667                          mac_bridge_ref_cb(mh, B_FALSE);
 659  668                  }
 660  669          }
 661  670  }
 662  671  
 663  672  /*
 664  673   * Special case function: this allows snooping of packets transmitted and
 665  674   * received by TRILL. By design, they go directly into the TRILL module.
 666  675   */
 667  676  void
 668  677  mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
 669  678  {
 670  679          mac_impl_t *mip = (mac_impl_t *)mh;
 671  680  
 672  681          if (mip->mi_promisc_list != NULL)
 673  682                  mac_promisc_dispatch(mip, mp, NULL);
 674  683  }
 675  684  
 676  685  /*
 677  686   * This is the upward reentry point for packets arriving from the bridging
 678  687   * module and from mac_rx for links not part of a bridge.
 679  688   */
 680  689  void
 681  690  mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
 682  691  {
 683  692          mac_impl_t              *mip = (mac_impl_t *)mh;
 684  693          mac_ring_t              *mr = (mac_ring_t *)mrh;
 685  694          mac_soft_ring_set_t     *mac_srs;
 686  695          mblk_t                  *bp = mp_chain;
 687  696          boolean_t               hw_classified = B_FALSE;
 688  697  
 689  698          /*
 690  699           * If there are any promiscuous mode callbacks defined for
 691  700           * this MAC, pass them a copy if appropriate.
 692  701           */
 693  702          if (mip->mi_promisc_list != NULL)
 694  703                  mac_promisc_dispatch(mip, mp_chain, NULL);
 695  704  
 696  705          if (mr != NULL) {
 697  706                  /*
 698  707                   * If the SRS teardown has started, just return. The 'mr'
 699  708                   * continues to be valid until the driver unregisters the mac.
 700  709                   * Hardware classified packets will not make their way up
 701  710                   * beyond this point once the teardown has started. The driver
 702  711                   * is never passed a pointer to a flow entry or SRS or any
 703  712                   * structure that can be freed much before mac_unregister.
 704  713                   */
 705  714                  mutex_enter(&mr->mr_lock);
 706  715                  if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
 707  716                      (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
 708  717                          mutex_exit(&mr->mr_lock);
 709  718                          freemsgchain(mp_chain);
 710  719                          return;
 711  720                  }
 712  721                  if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
 713  722                          hw_classified = B_TRUE;
 714  723                          MR_REFHOLD_LOCKED(mr);
 715  724                  }
 716  725                  mutex_exit(&mr->mr_lock);
 717  726  
 718  727                  /*
 719  728                   * We check if an SRS is controlling this ring.
 720  729                   * If so, we can directly call the srs_lower_proc
 721  730                   * routine otherwise we need to go through mac_rx_classify
 722  731                   * to reach the right place.
 723  732                   */
 724  733                  if (hw_classified) {
 725  734                          mac_srs = mr->mr_srs;
 726  735                          /*
 727  736                           * This is supposed to be the fast path.
 728  737                           * All packets received though here were steered by
 729  738                           * the hardware classifier, and share the same
 730  739                           * MAC header info.
 731  740                           */
 732  741                          mac_srs->srs_rx.sr_lower_proc(mh,
 733  742                              (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
 734  743                          MR_REFRELE(mr);
 735  744                          return;
 736  745                  }
 737  746                  /* We'll fall through to software classification */
 738  747          } else {
 739  748                  flow_entry_t *flent;
 740  749                  int err;
 741  750  
 742  751                  rw_enter(&mip->mi_rw_lock, RW_READER);
 743  752                  if (mip->mi_single_active_client != NULL) {
 744  753                          flent = mip->mi_single_active_client->mci_flent_list;
 745  754                          FLOW_TRY_REFHOLD(flent, err);
 746  755                          rw_exit(&mip->mi_rw_lock);
 747  756                          if (err == 0) {
 748  757                                  (flent->fe_cb_fn)(flent->fe_cb_arg1,
 749  758                                      flent->fe_cb_arg2, mp_chain, B_FALSE);
 750  759                                  FLOW_REFRELE(flent);
 751  760                                  return;
 752  761                          }
 753  762                  } else {
 754  763                          rw_exit(&mip->mi_rw_lock);
 755  764                  }
 756  765          }
 757  766  
 758  767          if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
 759  768                  if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
 760  769                          return;
 761  770          }
 762  771  
 763  772          freemsgchain(bp);
 764  773  }
 765  774  
 766  775  /* DATA TRANSMISSION */
 767  776  
 768  777  /*
 769  778   * A driver's notification to resume transmission, in case of a provider
 770  779   * without TX rings.
 771  780   */
 772  781  void
 773  782  mac_tx_update(mac_handle_t mh)
 774  783  {
 775  784          mac_tx_ring_update(mh, NULL);
 776  785  }
 777  786  
 778  787  /*
 779  788   * A driver's notification to resume transmission on the specified TX ring.
 780  789   */
 781  790  void
 782  791  mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
 783  792  {
 784  793          i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
 785  794  }
 786  795  
 787  796  /* LINK STATE */
 788  797  /*
 789  798   * Notify the MAC layer about a link state change
 790  799   */
 791  800  void
 792  801  mac_link_update(mac_handle_t mh, link_state_t link)
 793  802  {
 794  803          mac_impl_t      *mip = (mac_impl_t *)mh;
 795  804  
 796  805          /*
 797  806           * Save the link state.
 798  807           */
 799  808          mip->mi_lowlinkstate = link;
 800  809  
 801  810          /*
 802  811           * Send a MAC_NOTE_LOWLINK notification.  This tells the notification
 803  812           * thread to deliver both lower and upper notifications.
 804  813           */
 805  814          i_mac_notify(mip, MAC_NOTE_LOWLINK);
 806  815  }
 807  816  
 808  817  /*
 809  818   * Notify the MAC layer about a link state change due to bridging.
 810  819   */
 811  820  void
 812  821  mac_link_redo(mac_handle_t mh, link_state_t link)
 813  822  {
 814  823          mac_impl_t      *mip = (mac_impl_t *)mh;
 815  824  
 816  825          /*
 817  826           * Save the link state.
 818  827           */
 819  828          mip->mi_linkstate = link;
 820  829  
 821  830          /*
 822  831           * Send a MAC_NOTE_LINK notification.  Only upper notifications are
 823  832           * made.
 824  833           */
 825  834          i_mac_notify(mip, MAC_NOTE_LINK);
 826  835  }
 827  836  
 828  837  /* MINOR NODE HANDLING */
 829  838  
 830  839  /*
 831  840   * Given a dev_t, return the instance number (PPA) associated with it.
 832  841   * Drivers can use this in their getinfo(9e) implementation to lookup
 833  842   * the instance number (i.e. PPA) of the device, to use as an index to
 834  843   * their own array of soft state structures.
 835  844   *
 836  845   * Returns -1 on error.
 837  846   */
 838  847  int
 839  848  mac_devt_to_instance(dev_t devt)
 840  849  {
 841  850          return (dld_devt_to_instance(devt));
 842  851  }
 843  852  
 844  853  /*
 845  854   * This function returns the first minor number that is available for
 846  855   * driver private use.  All minor numbers smaller than this are
 847  856   * reserved for GLDv3 use.
 848  857   */
 849  858  minor_t
 850  859  mac_private_minor(void)
 851  860  {
 852  861          return (MAC_PRIVATE_MINOR);
 853  862  }
 854  863  
 855  864  /* OTHER CONTROL INFORMATION */
 856  865  
 857  866  /*
 858  867   * A driver notified us that its primary MAC address has changed.
 859  868   */
 860  869  void
 861  870  mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
 862  871  {
 863  872          mac_impl_t      *mip = (mac_impl_t *)mh;
 864  873  
 865  874          if (mip->mi_type->mt_addr_length == 0)
 866  875                  return;
 867  876  
 868  877          i_mac_perim_enter(mip);
 869  878  
 870  879          /*
 871  880           * If address changes, freshen the MAC address value and update
 872  881           * all MAC clients that share this MAC address.
 873  882           */
 874  883          if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) {
 875  884                  mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
 876  885                      (uint8_t *)addr);
 877  886          }
 878  887  
 879  888          i_mac_perim_exit(mip);
 880  889  
 881  890          /*
 882  891           * Send a MAC_NOTE_UNICST notification.
 883  892           */
 884  893          i_mac_notify(mip, MAC_NOTE_UNICST);
 885  894  }
 886  895  
 887  896  void
 888  897  mac_dst_update(mac_handle_t mh, const uint8_t *addr)
 889  898  {
 890  899          mac_impl_t      *mip = (mac_impl_t *)mh;
 891  900  
 892  901          if (mip->mi_type->mt_addr_length == 0)
 893  902                  return;
 894  903  
 895  904          i_mac_perim_enter(mip);
 896  905          bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
 897  906          i_mac_perim_exit(mip);
 898  907          i_mac_notify(mip, MAC_NOTE_DEST);
 899  908  }
 900  909  
 901  910  /*
 902  911   * MAC plugin information changed.
 903  912   */
 904  913  int
 905  914  mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
 906  915  {
 907  916          mac_impl_t      *mip = (mac_impl_t *)mh;
 908  917  
 909  918          /*
 910  919           * Verify that the plugin supports MAC plugin data and that the
 911  920           * supplied data is valid.
 912  921           */
 913  922          if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
 914  923                  return (EINVAL);
 915  924          if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
 916  925                  return (EINVAL);
 917  926  
 918  927          if (mip->mi_pdata != NULL)
 919  928                  kmem_free(mip->mi_pdata, mip->mi_pdata_size);
 920  929  
 921  930          mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
 922  931          bcopy(mac_pdata, mip->mi_pdata, dsize);
 923  932          mip->mi_pdata_size = dsize;
 924  933  
 925  934          /*
 926  935           * Since the MAC plugin data is used to construct MAC headers that
 927  936           * were cached in fast-path headers, we need to flush fast-path
 928  937           * information for links associated with this mac.
 929  938           */
 930  939          i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
 931  940          return (0);
 932  941  }
 933  942  
 934  943  /*
 935  944   * Invoked by driver as well as the framework to notify its capability change.
 936  945   */
 937  946  void
 938  947  mac_capab_update(mac_handle_t mh)
 939  948  {
 940  949          /* Send MAC_NOTE_CAPAB_CHG notification */
 941  950          i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
 942  951  }
 943  952  
 944  953  /*
 945  954   * Used by normal drivers to update the max sdu size.
 946  955   * We need to handle the case of a smaller mi_sdu_multicast
 947  956   * since this is called by mac_set_mtu() even for drivers that
 948  957   * have differing unicast and multicast mtu and we don't want to
 949  958   * increase the multicast mtu by accident in that case.
 950  959   */
 951  960  int
 952  961  mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
 953  962  {
 954  963          mac_impl_t      *mip = (mac_impl_t *)mh;
 955  964  
 956  965          if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
 957  966                  return (EINVAL);
 958  967          mip->mi_sdu_max = sdu_max;
 959  968          if (mip->mi_sdu_multicast > mip->mi_sdu_max)
 960  969                  mip->mi_sdu_multicast = mip->mi_sdu_max;
 961  970  
 962  971          /* Send a MAC_NOTE_SDU_SIZE notification. */
 963  972          i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
 964  973          return (0);
 965  974  }
 966  975  
 967  976  /*
 968  977   * Version of the above function that is used by drivers that have a different
 969  978   * max sdu size for multicast/broadcast vs. unicast.
 970  979   */
 971  980  int
 972  981  mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast)
 973  982  {
 974  983          mac_impl_t      *mip = (mac_impl_t *)mh;
 975  984  
 976  985          if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
 977  986                  return (EINVAL);
 978  987          if (sdu_multicast == 0)
 979  988                  sdu_multicast = sdu_max;
 980  989          if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min)
 981  990                  return (EINVAL);
 982  991          mip->mi_sdu_max = sdu_max;
 983  992          mip->mi_sdu_multicast = sdu_multicast;
 984  993  
 985  994          /* Send a MAC_NOTE_SDU_SIZE notification. */
 986  995          i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
 987  996          return (0);
 988  997  }
 989  998  
 990  999  static void
 991 1000  mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring)
 992 1001  {
 993 1002          mac_client_impl_t *mcip;
 994 1003          flow_entry_t *flent;
 995 1004          mac_soft_ring_set_t *mac_rx_srs;
 996 1005          mac_cpus_t *srs_cpu;
 997 1006          int i;
 998 1007  
 999 1008          if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) &&
1000 1009              (!ring->mr_info.mri_intr.mi_ddi_shared)) {
1001 1010                  /* interrupt can be re-targeted */
1002 1011                  ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED);
1003 1012                  flent = mcip->mci_flent;
1004 1013                  if (ring->mr_type == MAC_RING_TYPE_RX) {
1005 1014                          for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
1006 1015                                  mac_rx_srs = flent->fe_rx_srs[i];
1007 1016                                  if (mac_rx_srs->srs_ring != ring)
1008 1017                                          continue;
1009 1018                                  srs_cpu = &mac_rx_srs->srs_cpu;
1010 1019                                  mutex_enter(&cpu_lock);
1011 1020                                  mac_rx_srs_retarget_intr(mac_rx_srs,
1012 1021                                      srs_cpu->mc_rx_intr_cpu);
1013 1022                                  mutex_exit(&cpu_lock);
1014 1023                                  break;
1015 1024                          }
1016 1025                  } else {
1017 1026                          if (flent->fe_tx_srs != NULL) {
1018 1027                                  mutex_enter(&cpu_lock);
1019 1028                                  mac_tx_srs_retarget_intr(
1020 1029                                      flent->fe_tx_srs);
1021 1030                                  mutex_exit(&cpu_lock);
1022 1031                          }
1023 1032                  }
1024 1033          }
1025 1034  }
1026 1035  
1027 1036  /*
1028 1037   * Clients like aggr create pseudo rings (mac_ring_t) and expose them to
1029 1038   * their clients. There is a 1-1 mapping pseudo ring and the hardware
1030 1039   * ring. ddi interrupt handles are exported from the hardware ring to
1031 1040   * the pseudo ring. Thus when the interrupt handle changes, clients of
1032 1041   * aggr that are using the handle need to use the new handle and
1033 1042   * re-target their interrupts.
1034 1043   */
1035 1044  static void
1036 1045  mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring,
1037 1046      ddi_intr_handle_t ddh)
1038 1047  {
1039 1048          mac_ring_t *pring;
1040 1049          mac_group_t *pgroup;
1041 1050          mac_impl_t *pmip;
1042 1051          char macname[MAXNAMELEN];
1043 1052          mac_perim_handle_t p_mph;
1044 1053          uint64_t saved_gen_num;
1045 1054  
1046 1055  again:
1047 1056          pring = (mac_ring_t *)ring->mr_prh;
1048 1057          pgroup = (mac_group_t *)pring->mr_gh;
1049 1058          pmip = (mac_impl_t *)pgroup->mrg_mh;
1050 1059          saved_gen_num = ring->mr_gen_num;
1051 1060          (void) strlcpy(macname, pmip->mi_name, MAXNAMELEN);
1052 1061          /*
1053 1062           * We need to enter aggr's perimeter. The locking hierarchy
1054 1063           * dictates that aggr's perimeter should be entered first
1055 1064           * and then the port's perimeter. So drop the port's
1056 1065           * perimeter, enter aggr's and then re-enter port's
1057 1066           * perimeter.
1058 1067           */
1059 1068          i_mac_perim_exit(mip);
1060 1069          /*
1061 1070           * While we know pmip is the aggr's mip, there is a
1062 1071           * possibility that aggr could have unregistered by
1063 1072           * the time we exit port's perimeter (mip) and
1064 1073           * enter aggr's perimeter (pmip). To avoid that
1065 1074           * scenario, enter aggr's perimeter using its name.
1066 1075           */
1067 1076          if (mac_perim_enter_by_macname(macname, &p_mph) != 0)
1068 1077                  return;
1069 1078          i_mac_perim_enter(mip);
1070 1079          /*
1071 1080           * Check if the ring got assigned to another aggregation before
1072 1081           * be could enter aggr's and the port's perimeter. When a ring
1073 1082           * gets deleted from an aggregation, it calls mac_stop_ring()
1074 1083           * which increments the generation number. So checking
1075 1084           * generation number will be enough.
1076 1085           */
1077 1086          if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) {
1078 1087                  i_mac_perim_exit(mip);
1079 1088                  mac_perim_exit(p_mph);
1080 1089                  i_mac_perim_enter(mip);
1081 1090                  goto again;
1082 1091          }
1083 1092  
1084 1093          /* Check if pseudo ring is still present */
1085 1094          if (ring->mr_prh != NULL) {
1086 1095                  pring->mr_info.mri_intr.mi_ddi_handle = ddh;
1087 1096                  pring->mr_info.mri_intr.mi_ddi_shared =
1088 1097                      ring->mr_info.mri_intr.mi_ddi_shared;
1089 1098                  if (ddh != NULL)
1090 1099                          mac_ring_intr_retarget(pgroup, pring);
1091 1100          }
1092 1101          i_mac_perim_exit(mip);
1093 1102          mac_perim_exit(p_mph);
1094 1103  }
1095 1104  /*
1096 1105   * API called by driver to provide new interrupt handle for TX/RX rings.
1097 1106   * This usually happens when IRM (Interrupt Resource Manangement)
1098 1107   * framework either gives the driver more MSI-x interrupts or takes
1099 1108   * away MSI-x interrupts from the driver.
1100 1109   */
1101 1110  void
1102 1111  mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh)
1103 1112  {
1104 1113          mac_ring_t      *ring = (mac_ring_t *)mrh;
1105 1114          mac_group_t     *group = (mac_group_t *)ring->mr_gh;
1106 1115          mac_impl_t      *mip = (mac_impl_t *)group->mrg_mh;
1107 1116  
1108 1117          i_mac_perim_enter(mip);
1109 1118          ring->mr_info.mri_intr.mi_ddi_handle = ddh;
1110 1119          if (ddh == NULL) {
1111 1120                  /* Interrupts being reset */
1112 1121                  ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE;
1113 1122                  if (ring->mr_prh != NULL) {
1114 1123                          mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1115 1124                          return;
1116 1125                  }
1117 1126          } else {
1118 1127                  /* New interrupt handle */
1119 1128                  mac_compare_ddi_handle(mip->mi_rx_groups,
1120 1129                      mip->mi_rx_group_count, ring);
1121 1130                  if (!ring->mr_info.mri_intr.mi_ddi_shared) {
1122 1131                          mac_compare_ddi_handle(mip->mi_tx_groups,
1123 1132                              mip->mi_tx_group_count, ring);
1124 1133                  }
1125 1134                  if (ring->mr_prh != NULL) {
1126 1135                          mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1127 1136                          return;
1128 1137                  } else {
1129 1138                          mac_ring_intr_retarget(group, ring);
1130 1139                  }
1131 1140          }
1132 1141          i_mac_perim_exit(mip);
1133 1142  }
1134 1143  
1135 1144  /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
1136 1145  
1137 1146  /*
1138 1147   * Updates the mac_impl structure with the current state of the link
1139 1148   */
1140 1149  static void
1141 1150  i_mac_log_link_state(mac_impl_t *mip)
1142 1151  {
1143 1152          /*
1144 1153           * If no change, then it is not interesting.
1145 1154           */
1146 1155          if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
1147 1156                  return;
1148 1157  
1149 1158          switch (mip->mi_lowlinkstate) {
1150 1159          case LINK_STATE_UP:
1151 1160                  if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
1152 1161                          char det[200];
1153 1162  
1154 1163                          mip->mi_type->mt_ops.mtops_link_details(det,
1155 1164                              sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
1156 1165  
1157 1166                          cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
1158 1167                  } else {
1159 1168                          cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
1160 1169                  }
1161 1170                  break;
1162 1171  
1163 1172          case LINK_STATE_DOWN:
1164 1173                  /*
1165 1174                   * Only transitions from UP to DOWN are interesting
1166 1175                   */
1167 1176                  if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
1168 1177                          cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
1169 1178                  break;
1170 1179  
1171 1180          case LINK_STATE_UNKNOWN:
1172 1181                  /*
1173 1182                   * This case is normally not interesting.
1174 1183                   */
1175 1184                  break;
1176 1185          }
1177 1186          mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
1178 1187  }
1179 1188  
1180 1189  /*
1181 1190   * Main routine for the callbacks notifications thread
1182 1191   */
1183 1192  static void
1184 1193  i_mac_notify_thread(void *arg)
1185 1194  {
1186 1195          mac_impl_t      *mip = arg;
1187 1196          callb_cpr_t     cprinfo;
1188 1197          mac_cb_t        *mcb;
1189 1198          mac_cb_info_t   *mcbi;
1190 1199          mac_notify_cb_t *mncb;
1191 1200  
1192 1201          mcbi = &mip->mi_notify_cb_info;
1193 1202          CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
1194 1203              "i_mac_notify_thread");
1195 1204  
1196 1205          mutex_enter(mcbi->mcbi_lockp);
1197 1206  
1198 1207          for (;;) {
1199 1208                  uint32_t        bits;
1200 1209                  uint32_t        type;
1201 1210  
1202 1211                  bits = mip->mi_notify_bits;
1203 1212                  if (bits == 0) {
1204 1213                          CALLB_CPR_SAFE_BEGIN(&cprinfo);
1205 1214                          cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1206 1215                          CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1207 1216                          continue;
1208 1217                  }
1209 1218                  mip->mi_notify_bits = 0;
1210 1219                  if ((bits & (1 << MAC_NNOTE)) != 0) {
1211 1220                          /* request to quit */
1212 1221                          ASSERT(mip->mi_state_flags & MIS_DISABLED);
1213 1222                          break;
1214 1223                  }
1215 1224  
1216 1225                  mutex_exit(mcbi->mcbi_lockp);
1217 1226  
1218 1227                  /*
1219 1228                   * Log link changes on the actual link, but then do reports on
1220 1229                   * synthetic state (if part of a bridge).
1221 1230                   */
1222 1231                  if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1223 1232                          link_state_t newstate;
1224 1233                          mac_handle_t mh;
1225 1234  
1226 1235                          i_mac_log_link_state(mip);
1227 1236                          newstate = mip->mi_lowlinkstate;
1228 1237                          if (mip->mi_bridge_link != NULL) {
1229 1238                                  mutex_enter(&mip->mi_bridge_lock);
1230 1239                                  if ((mh = mip->mi_bridge_link) != NULL) {
1231 1240                                          newstate = mac_bridge_ls_cb(mh,
1232 1241                                              newstate);
1233 1242                                  }
1234 1243                                  mutex_exit(&mip->mi_bridge_lock);
1235 1244                          }
1236 1245                          if (newstate != mip->mi_linkstate) {
1237 1246                                  mip->mi_linkstate = newstate;
1238 1247                                  bits |= 1 << MAC_NOTE_LINK;
1239 1248                          }
1240 1249                  }
1241 1250  
1242 1251                  /*
1243 1252                   * Do notification callbacks for each notification type.
1244 1253                   */
1245 1254                  for (type = 0; type < MAC_NNOTE; type++) {
1246 1255                          if ((bits & (1 << type)) == 0) {
1247 1256                                  continue;
1248 1257                          }
1249 1258  
1250 1259                          if (mac_notify_cb_list[type] != NULL)
1251 1260                                  (*mac_notify_cb_list[type])(mip);
1252 1261  
1253 1262                          /*
1254 1263                           * Walk the list of notifications.
1255 1264                           */
1256 1265                          MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1257 1266                          for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1258 1267                              mcb = mcb->mcb_nextp) {
1259 1268                                  mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1260 1269                                  mncb->mncb_fn(mncb->mncb_arg, type);
1261 1270                          }
1262 1271                          MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1263 1272                              &mip->mi_notify_cb_list);
1264 1273                  }
1265 1274  
1266 1275                  mutex_enter(mcbi->mcbi_lockp);
1267 1276          }
1268 1277  
1269 1278          mip->mi_state_flags |= MIS_NOTIFY_DONE;
1270 1279          cv_broadcast(&mcbi->mcbi_cv);
1271 1280  
1272 1281          /* CALLB_CPR_EXIT drops the lock */
1273 1282          CALLB_CPR_EXIT(&cprinfo);
1274 1283          thread_exit();
1275 1284  }
1276 1285  
1277 1286  /*
1278 1287   * Signal the i_mac_notify_thread asking it to quit.
1279 1288   * Then wait till it is done.
1280 1289   */
1281 1290  void
1282 1291  i_mac_notify_exit(mac_impl_t *mip)
1283 1292  {
1284 1293          mac_cb_info_t   *mcbi;
1285 1294  
1286 1295          mcbi = &mip->mi_notify_cb_info;
1287 1296  
1288 1297          mutex_enter(mcbi->mcbi_lockp);
1289 1298          mip->mi_notify_bits = (1 << MAC_NNOTE);
1290 1299          cv_broadcast(&mcbi->mcbi_cv);
1291 1300  
1292 1301  
1293 1302          while ((mip->mi_notify_thread != NULL) &&
1294 1303              !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1295 1304                  cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1296 1305          }
1297 1306  
1298 1307          /* Necessary clean up before doing kmem_cache_free */
1299 1308          mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1300 1309          mip->mi_notify_bits = 0;
1301 1310          mip->mi_notify_thread = NULL;
1302 1311          mutex_exit(mcbi->mcbi_lockp);
1303 1312  }
1304 1313  
1305 1314  /*
1306 1315   * Entry point invoked by drivers to dynamically add a ring to an
1307 1316   * existing group.
1308 1317   */
1309 1318  int
1310 1319  mac_group_add_ring(mac_group_handle_t gh, int index)
1311 1320  {
1312 1321          mac_group_t *group = (mac_group_t *)gh;
1313 1322          mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1314 1323          int ret;
1315 1324  
1316 1325          i_mac_perim_enter(mip);
1317 1326          ret = i_mac_group_add_ring(group, NULL, index);
1318 1327          i_mac_perim_exit(mip);
1319 1328          return (ret);
1320 1329  }
1321 1330  
1322 1331  /*
1323 1332   * Entry point invoked by drivers to dynamically remove a ring
1324 1333   * from an existing group. The specified ring handle must no longer
1325 1334   * be used by the driver after a call to this function.
1326 1335   */
1327 1336  void
1328 1337  mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1329 1338  {
1330 1339          mac_group_t *group = (mac_group_t *)gh;
1331 1340          mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1332 1341  
1333 1342          i_mac_perim_enter(mip);
1334 1343          i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1335 1344          i_mac_perim_exit(mip);
1336 1345  }
1337 1346  
1338 1347  /*
1339 1348   * mac_prop_info_*() callbacks called from the driver's prefix_propinfo()
1340 1349   * entry points.
1341 1350   */
1342 1351  
1343 1352  void
1344 1353  mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val)
1345 1354  {
1346 1355          mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1347 1356  
1348 1357          /* nothing to do if the caller doesn't want the default value */
1349 1358          if (pr->pr_default == NULL)
1350 1359                  return;
1351 1360  
1352 1361          ASSERT(pr->pr_default_size >= sizeof (uint8_t));
1353 1362  
1354 1363          *(uint8_t *)(pr->pr_default) = val;
1355 1364          pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1356 1365  }
1357 1366  
1358 1367  void
1359 1368  mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val)
1360 1369  {
1361 1370          mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1362 1371  
1363 1372          /* nothing to do if the caller doesn't want the default value */
1364 1373          if (pr->pr_default == NULL)
1365 1374                  return;
1366 1375  
1367 1376          ASSERT(pr->pr_default_size >= sizeof (uint64_t));
1368 1377  
1369 1378          bcopy(&val, pr->pr_default, sizeof (val));
1370 1379  
1371 1380          pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1372 1381  }
1373 1382  
1374 1383  void
1375 1384  mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val)
1376 1385  {
1377 1386          mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1378 1387  
1379 1388          /* nothing to do if the caller doesn't want the default value */
1380 1389          if (pr->pr_default == NULL)
1381 1390                  return;
1382 1391  
1383 1392          ASSERT(pr->pr_default_size >= sizeof (uint32_t));
1384 1393  
1385 1394          bcopy(&val, pr->pr_default, sizeof (val));
1386 1395  
1387 1396          pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1388 1397  }
1389 1398  
1390 1399  void
1391 1400  mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str)
1392 1401  {
1393 1402          mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1394 1403  
1395 1404          /* nothing to do if the caller doesn't want the default value */
1396 1405          if (pr->pr_default == NULL)
1397 1406                  return;
1398 1407  
1399 1408          if (strlen(str) >= pr->pr_default_size)
1400 1409                  pr->pr_errno = ENOBUFS;
1401 1410          else
1402 1411                  (void) strlcpy(pr->pr_default, str, pr->pr_default_size);
1403 1412          pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1404 1413  }
1405 1414  
1406 1415  void
1407 1416  mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
1408 1417      link_flowctrl_t val)
1409 1418  {
1410 1419          mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1411 1420  
1412 1421          /* nothing to do if the caller doesn't want the default value */
1413 1422          if (pr->pr_default == NULL)
1414 1423                  return;
1415 1424  
1416 1425          ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t));
1417 1426  
1418 1427          bcopy(&val, pr->pr_default, sizeof (val));
1419 1428  
1420 1429          pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1421 1430  }
1422 1431  
1423 1432  void
1424 1433  mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
1425 1434      uint32_t max)
1426 1435  {
1427 1436          mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1428 1437          mac_propval_range_t *range = pr->pr_range;
1429 1438          mac_propval_uint32_range_t *range32;
1430 1439  
1431 1440          /* nothing to do if the caller doesn't want the range info */
1432 1441          if (range == NULL)
1433 1442                  return;
1434 1443  
1435 1444          if (pr->pr_range_cur_count++ == 0) {
1436 1445                  /* first range */
1437 1446                  pr->pr_flags |= MAC_PROP_INFO_RANGE;
1438 1447                  range->mpr_type = MAC_PROPVAL_UINT32;
1439 1448          } else {
1440 1449                  /* all ranges of a property should be of the same type */
1441 1450                  ASSERT(range->mpr_type == MAC_PROPVAL_UINT32);
1442 1451                  if (pr->pr_range_cur_count > range->mpr_count) {
1443 1452                          pr->pr_errno = ENOSPC;
1444 1453                          return;
1445 1454                  }
1446 1455          }
1447 1456  
1448 1457          range32 = range->mpr_range_uint32;
1449 1458          range32[pr->pr_range_cur_count - 1].mpur_min = min;
1450 1459          range32[pr->pr_range_cur_count - 1].mpur_max = max;
1451 1460  }
1452 1461  
1453 1462  void
1454 1463  mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm)
1455 1464  {
1456 1465          mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1457 1466  
1458 1467          pr->pr_perm = perm;
1459 1468          pr->pr_flags |= MAC_PROP_INFO_PERM;
1460 1469  }
1461 1470  
1462 1471  void mac_hcksum_get(mblk_t *mp, uint32_t *start, uint32_t *stuff,
1463 1472      uint32_t *end, uint32_t *value, uint32_t *flags_ptr)
1464 1473  {
1465 1474          uint32_t flags;
1466 1475  
1467 1476          ASSERT(DB_TYPE(mp) == M_DATA);
1468 1477  
1469 1478          flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS;
1470 1479          if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) {
1471 1480                  if (value != NULL)
1472 1481                          *value = (uint32_t)DB_CKSUM16(mp);
1473 1482                  if ((flags & HCK_PARTIALCKSUM) != 0) {
1474 1483                          if (start != NULL)
1475 1484                                  *start = (uint32_t)DB_CKSUMSTART(mp);
1476 1485                          if (stuff != NULL)
1477 1486                                  *stuff = (uint32_t)DB_CKSUMSTUFF(mp);
1478 1487                          if (end != NULL)
1479 1488                                  *end = (uint32_t)DB_CKSUMEND(mp);
1480 1489                  }
1481 1490          }
1482 1491  
1483 1492          if (flags_ptr != NULL)
1484 1493                  *flags_ptr = flags;
1485 1494  }
1486 1495  
1487 1496  void mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff,
1488 1497      uint32_t end, uint32_t value, uint32_t flags)
1489 1498  {
1490 1499          ASSERT(DB_TYPE(mp) == M_DATA);
1491 1500  
1492 1501          DB_CKSUMSTART(mp) = (intptr_t)start;
1493 1502          DB_CKSUMSTUFF(mp) = (intptr_t)stuff;
1494 1503          DB_CKSUMEND(mp) = (intptr_t)end;
1495 1504          DB_CKSUMFLAGS(mp) = (uint16_t)flags;
1496 1505          DB_CKSUM16(mp) = (uint16_t)value;
1497 1506  }
1498 1507  
1499 1508  void
1500 1509  mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
1501 1510  {
1502 1511          ASSERT(DB_TYPE(mp) == M_DATA);
1503 1512  
1504 1513          if (flags != NULL) {
1505 1514                  *flags = DB_CKSUMFLAGS(mp) & HW_LSO;
1506 1515                  if ((*flags != 0) && (mss != NULL))
1507 1516                          *mss = (uint32_t)DB_LSOMSS(mp);
1508 1517          }
1509 1518  }
  
    | 
      ↓ open down ↓ | 
    960 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX