1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/conf.h>
  28 #include <sys/id_space.h>
  29 #include <sys/esunddi.h>
  30 #include <sys/stat.h>
  31 #include <sys/mkdev.h>
  32 #include <sys/stream.h>
  33 #include <sys/strsubr.h>
  34 #include <sys/dlpi.h>
  35 #include <sys/modhash.h>
  36 #include <sys/mac.h>
  37 #include <sys/mac_provider.h>
  38 #include <sys/mac_impl.h>
  39 #include <sys/mac_client_impl.h>
  40 #include <sys/mac_client_priv.h>
  41 #include <sys/mac_soft_ring.h>
  42 #include <sys/mac_stat.h>
  43 #include <sys/dld.h>
  44 #include <sys/modctl.h>
  45 #include <sys/fs/dv_node.h>
  46 #include <sys/thread.h>
  47 #include <sys/proc.h>
  48 #include <sys/callb.h>
  49 #include <sys/cpuvar.h>
  50 #include <sys/atomic.h>
  51 #include <sys/sdt.h>
  52 #include <sys/mac_flow.h>
  53 #include <sys/ddi_intr_impl.h>
  54 #include <sys/disp.h>
  55 #include <sys/sdt.h>
  56 #include <sys/pattr.h>
  57 #include <sys/strsun.h>
  58 
  59 /*
  60  * MAC Provider Interface.
  61  *
  62  * Interface for GLDv3 compatible NIC drivers.
  63  */
  64 
  65 static void i_mac_notify_thread(void *);
  66 
  67 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
  68 
  69 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
  70         mac_fanout_recompute,   /* MAC_NOTE_LINK */
  71         NULL,           /* MAC_NOTE_UNICST */
  72         NULL,           /* MAC_NOTE_TX */
  73         NULL,           /* MAC_NOTE_DEVPROMISC */
  74         NULL,           /* MAC_NOTE_FASTPATH_FLUSH */
  75         NULL,           /* MAC_NOTE_SDU_SIZE */
  76         NULL,           /* MAC_NOTE_MARGIN */
  77         NULL,           /* MAC_NOTE_CAPAB_CHG */
  78         NULL            /* MAC_NOTE_LOWLINK */
  79 };
  80 
  81 /*
  82  * Driver support functions.
  83  */
  84 
  85 /* REGISTRATION */
  86 
  87 mac_register_t *
  88 mac_alloc(uint_t mac_version)
  89 {
  90         mac_register_t *mregp;
  91 
  92         /*
  93          * Make sure there isn't a version mismatch between the driver and
  94          * the framework.  In the future, if multiple versions are
  95          * supported, this check could become more sophisticated.
  96          */
  97         if (mac_version != MAC_VERSION)
  98                 return (NULL);
  99 
 100         mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
 101         mregp->m_version = mac_version;
 102         return (mregp);
 103 }
 104 
 105 void
 106 mac_free(mac_register_t *mregp)
 107 {
 108         kmem_free(mregp, sizeof (mac_register_t));
 109 }
 110 
 111 /*
 112  * mac_register() is how drivers register new MACs with the GLDv3
 113  * framework.  The mregp argument is allocated by drivers using the
 114  * mac_alloc() function, and can be freed using mac_free() immediately upon
 115  * return from mac_register().  Upon success (0 return value), the mhp
 116  * opaque pointer becomes the driver's handle to its MAC interface, and is
 117  * the argument to all other mac module entry points.
 118  */
 119 /* ARGSUSED */
 120 int
 121 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
 122 {
 123         mac_impl_t              *mip;
 124         mactype_t               *mtype;
 125         int                     err = EINVAL;
 126         struct devnames         *dnp = NULL;
 127         uint_t                  instance;
 128         boolean_t               style1_created = B_FALSE;
 129         boolean_t               style2_created = B_FALSE;
 130         char                    *driver;
 131         minor_t                 minor = 0;
 132 
 133         /* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
 134         if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
 135                 return (EINVAL);
 136 
 137         /* Find the required MAC-Type plugin. */
 138         if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
 139                 return (EINVAL);
 140 
 141         /* Create a mac_impl_t to represent this MAC. */
 142         mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
 143 
 144         /*
 145          * The mac is not ready for open yet.
 146          */
 147         mip->mi_state_flags |= MIS_DISABLED;
 148 
 149         /*
 150          * When a mac is registered, the m_instance field can be set to:
 151          *
 152          *  0:  Get the mac's instance number from m_dip.
 153          *      This is usually used for physical device dips.
 154          *
 155          *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
 156          *      For example, when an aggregation is created with the key option,
 157          *      "key" will be used as the instance number.
 158          *
 159          *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
 160          *      This is often used when a MAC of a virtual link is registered
 161          *      (e.g., aggregation when "key" is not specified, or vnic).
 162          *
 163          * Note that the instance number is used to derive the mi_minor field
 164          * of mac_impl_t, which will then be used to derive the name of kstats
 165          * and the devfs nodes.  The first 2 cases are needed to preserve
 166          * backward compatibility.
 167          */
 168         switch (mregp->m_instance) {
 169         case 0:
 170                 instance = ddi_get_instance(mregp->m_dip);
 171                 break;
 172         case ((uint_t)-1):
 173                 minor = mac_minor_hold(B_TRUE);
 174                 if (minor == 0) {
 175                         err = ENOSPC;
 176                         goto fail;
 177                 }
 178                 instance = minor - 1;
 179                 break;
 180         default:
 181                 instance = mregp->m_instance;
 182                 if (instance >= MAC_MAX_MINOR) {
 183                         err = EINVAL;
 184                         goto fail;
 185                 }
 186                 break;
 187         }
 188 
 189         mip->mi_minor = (minor_t)(instance + 1);
 190         mip->mi_dip = mregp->m_dip;
 191         mip->mi_clients_list = NULL;
 192         mip->mi_nclients = 0;
 193 
 194         /* Set the default IEEE Port VLAN Identifier */
 195         mip->mi_pvid = 1;
 196 
 197         /* Default bridge link learning protection values */
 198         mip->mi_llimit = 1000;
 199         mip->mi_ldecay = 200;
 200 
 201         driver = (char *)ddi_driver_name(mip->mi_dip);
 202 
 203         /* Construct the MAC name as <drvname><instance> */
 204         (void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
 205             driver, instance);
 206 
 207         mip->mi_driver = mregp->m_driver;
 208 
 209         mip->mi_type = mtype;
 210         mip->mi_margin = mregp->m_margin;
 211         mip->mi_info.mi_media = mtype->mt_type;
 212         mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
 213         if (mregp->m_max_sdu <= mregp->m_min_sdu)
 214                 goto fail;
 215         if (mregp->m_multicast_sdu == 0)
 216                 mregp->m_multicast_sdu = mregp->m_max_sdu;
 217         if (mregp->m_multicast_sdu < mregp->m_min_sdu ||
 218             mregp->m_multicast_sdu > mregp->m_max_sdu)
 219                 goto fail;
 220         mip->mi_sdu_min = mregp->m_min_sdu;
 221         mip->mi_sdu_max = mregp->m_max_sdu;
 222         mip->mi_sdu_multicast = mregp->m_multicast_sdu;
 223         mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
 224         /*
 225          * If the media supports a broadcast address, cache a pointer to it
 226          * in the mac_info_t so that upper layers can use it.
 227          */
 228         mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
 229 
 230         mip->mi_v12n_level = mregp->m_v12n;
 231 
 232         /*
 233          * Copy the unicast source address into the mac_info_t, but only if
 234          * the MAC-Type defines a non-zero address length.  We need to
 235          * handle MAC-Types that have an address length of 0
 236          * (point-to-point protocol MACs for example).
 237          */
 238         if (mip->mi_type->mt_addr_length > 0) {
 239                 if (mregp->m_src_addr == NULL)
 240                         goto fail;
 241                 mip->mi_info.mi_unicst_addr =
 242                     kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
 243                 bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
 244                     mip->mi_type->mt_addr_length);
 245 
 246                 /*
 247                  * Copy the fixed 'factory' MAC address from the immutable
 248                  * info.  This is taken to be the MAC address currently in
 249                  * use.
 250                  */
 251                 bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
 252                     mip->mi_type->mt_addr_length);
 253 
 254                 /*
 255                  * At this point, we should set up the classification
 256                  * rules etc but we delay it till mac_open() so that
 257                  * the resource discovery has taken place and we
 258                  * know someone wants to use the device. Otherwise
 259                  * memory gets allocated for Rx ring structures even
 260                  * during probe.
 261                  */
 262 
 263                 /* Copy the destination address if one is provided. */
 264                 if (mregp->m_dst_addr != NULL) {
 265                         bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
 266                             mip->mi_type->mt_addr_length);
 267                         mip->mi_dstaddr_set = B_TRUE;
 268                 }
 269         } else if (mregp->m_src_addr != NULL) {
 270                 goto fail;
 271         }
 272 
 273         /*
 274          * The format of the m_pdata is specific to the plugin.  It is
 275          * passed in as an argument to all of the plugin callbacks.  The
 276          * driver can update this information by calling
 277          * mac_pdata_update().
 278          */
 279         if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
 280                 /*
 281                  * Verify if the supplied plugin data is valid.  Note that
 282                  * even if the caller passed in a NULL pointer as plugin data,
 283                  * we still need to verify if that's valid as the plugin may
 284                  * require plugin data to function.
 285                  */
 286                 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
 287                     mregp->m_pdata_size)) {
 288                         goto fail;
 289                 }
 290                 if (mregp->m_pdata != NULL) {
 291                         mip->mi_pdata =
 292                             kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
 293                         bcopy(mregp->m_pdata, mip->mi_pdata,
 294                             mregp->m_pdata_size);
 295                         mip->mi_pdata_size = mregp->m_pdata_size;
 296                 }
 297         } else if (mregp->m_pdata != NULL) {
 298                 /*
 299                  * The caller supplied non-NULL plugin data, but the plugin
 300                  * does not recognize plugin data.
 301                  */
 302                 err = EINVAL;
 303                 goto fail;
 304         }
 305 
 306         /*
 307          * Register the private properties.
 308          */
 309         mac_register_priv_prop(mip, mregp->m_priv_props);
 310 
 311         /*
 312          * Stash the driver callbacks into the mac_impl_t, but first sanity
 313          * check to make sure all mandatory callbacks are set.
 314          */
 315         if (mregp->m_callbacks->mc_getstat == NULL ||
 316             mregp->m_callbacks->mc_start == NULL ||
 317             mregp->m_callbacks->mc_stop == NULL ||
 318             mregp->m_callbacks->mc_setpromisc == NULL ||
 319             mregp->m_callbacks->mc_multicst == NULL) {
 320                 goto fail;
 321         }
 322         mip->mi_callbacks = mregp->m_callbacks;
 323 
 324         if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
 325             &mip->mi_capab_legacy)) {
 326                 mip->mi_state_flags |= MIS_LEGACY;
 327                 mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
 328         } else {
 329                 mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
 330                     mip->mi_minor);
 331         }
 332 
 333         /*
 334          * Allocate a notification thread. thread_create blocks for memory
 335          * if needed, it never fails.
 336          */
 337         mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
 338             mip, 0, &p0, TS_RUN, minclsyspri);
 339 
 340         /*
 341          * Initialize the capabilities
 342          */
 343 
 344         bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t));
 345         bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t));
 346 
 347         if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
 348                 mip->mi_state_flags |= MIS_IS_VNIC;
 349 
 350         if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
 351                 mip->mi_state_flags |= MIS_IS_AGGR;
 352 
 353         mac_addr_factory_init(mip);
 354 
 355         /*
 356          * Enforce the virtrualization level registered.
 357          */
 358         if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
 359                 if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
 360                     mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
 361                         goto fail;
 362 
 363                 /*
 364                  * The driver needs to register at least rx rings for this
 365                  * virtualization level.
 366                  */
 367                 if (mip->mi_rx_groups == NULL)
 368                         goto fail;
 369         }
 370 
 371         /*
 372          * The driver must set mc_unicst entry point to NULL when it advertises
 373          * CAP_RINGS for rx groups.
 374          */
 375         if (mip->mi_rx_groups != NULL) {
 376                 if (mregp->m_callbacks->mc_unicst != NULL)
 377                         goto fail;
 378         } else {
 379                 if (mregp->m_callbacks->mc_unicst == NULL)
 380                         goto fail;
 381         }
 382 
 383         /*
 384          * Initialize MAC addresses. Must be called after mac_init_rings().
 385          */
 386         mac_init_macaddr(mip);
 387 
 388         mip->mi_share_capab.ms_snum = 0;
 389         if (mip->mi_v12n_level & MAC_VIRT_HIO) {
 390                 (void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
 391                     &mip->mi_share_capab);
 392         }
 393 
 394         /*
 395          * Initialize the kstats for this device.
 396          */
 397         mac_driver_stat_create(mip);
 398 
 399         /* Zero out any properties. */
 400         bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
 401 
 402         if (mip->mi_minor <= MAC_MAX_MINOR) {
 403                 /* Create a style-2 DLPI device */
 404                 if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
 405                     DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
 406                         goto fail;
 407                 style2_created = B_TRUE;
 408 
 409                 /* Create a style-1 DLPI device */
 410                 if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
 411                     mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
 412                         goto fail;
 413                 style1_created = B_TRUE;
 414         }
 415 
 416         mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
 417 
 418         rw_enter(&i_mac_impl_lock, RW_WRITER);
 419         if (mod_hash_insert(i_mac_impl_hash,
 420             (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
 421                 rw_exit(&i_mac_impl_lock);
 422                 err = EEXIST;
 423                 goto fail;
 424         }
 425 
 426         DTRACE_PROBE2(mac__register, struct devnames *, dnp,
 427             (mac_impl_t *), mip);
 428 
 429         /*
 430          * Mark the MAC to be ready for open.
 431          */
 432         mip->mi_state_flags &= ~MIS_DISABLED;
 433         rw_exit(&i_mac_impl_lock);
 434 
 435         atomic_inc_32(&i_mac_impl_count);
 436 
 437         cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
 438         *mhp = (mac_handle_t)mip;
 439         return (0);
 440 
 441 fail:
 442         if (style1_created)
 443                 ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
 444 
 445         if (style2_created)
 446                 ddi_remove_minor_node(mip->mi_dip, driver);
 447 
 448         mac_addr_factory_fini(mip);
 449 
 450         /* Clean up registered MAC addresses */
 451         mac_fini_macaddr(mip);
 452 
 453         /* Clean up registered rings */
 454         mac_free_rings(mip, MAC_RING_TYPE_RX);
 455         mac_free_rings(mip, MAC_RING_TYPE_TX);
 456 
 457         /* Clean up notification thread */
 458         if (mip->mi_notify_thread != NULL)
 459                 i_mac_notify_exit(mip);
 460 
 461         if (mip->mi_info.mi_unicst_addr != NULL) {
 462                 kmem_free(mip->mi_info.mi_unicst_addr,
 463                     mip->mi_type->mt_addr_length);
 464                 mip->mi_info.mi_unicst_addr = NULL;
 465         }
 466 
 467         mac_driver_stat_delete(mip);
 468 
 469         if (mip->mi_type != NULL) {
 470                 atomic_dec_32(&mip->mi_type->mt_ref);
 471                 mip->mi_type = NULL;
 472         }
 473 
 474         if (mip->mi_pdata != NULL) {
 475                 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
 476                 mip->mi_pdata = NULL;
 477                 mip->mi_pdata_size = 0;
 478         }
 479 
 480         if (minor != 0) {
 481                 ASSERT(minor > MAC_MAX_MINOR);
 482                 mac_minor_rele(minor);
 483         }
 484 
 485         mip->mi_state_flags = 0;
 486         mac_unregister_priv_prop(mip);
 487 
 488         /*
 489          * Clear the state before destroying the mac_impl_t
 490          */
 491         mip->mi_state_flags = 0;
 492 
 493         kmem_cache_free(i_mac_impl_cachep, mip);
 494         return (err);
 495 }
 496 
 497 /*
 498  * Unregister from the GLDv3 framework
 499  */
 500 int
 501 mac_unregister(mac_handle_t mh)
 502 {
 503         int                     err;
 504         mac_impl_t              *mip = (mac_impl_t *)mh;
 505         mod_hash_val_t          val;
 506         mac_margin_req_t        *mmr, *nextmmr;
 507 
 508         /* Fail the unregister if there are any open references to this mac. */
 509         if ((err = mac_disable_nowait(mh)) != 0)
 510                 return (err);
 511 
 512         /*
 513          * Clean up notification thread and wait for it to exit.
 514          */
 515         i_mac_notify_exit(mip);
 516 
 517         i_mac_perim_enter(mip);
 518 
 519         /*
 520          * There is still resource properties configured over this mac.
 521          */
 522         if (mip->mi_resource_props.mrp_mask != 0)
 523                 mac_fastpath_enable((mac_handle_t)mip);
 524 
 525         if (mip->mi_minor < MAC_MAX_MINOR + 1) {
 526                 ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
 527                 ddi_remove_minor_node(mip->mi_dip,
 528                     (char *)ddi_driver_name(mip->mi_dip));
 529         }
 530 
 531         ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
 532             MIS_EXCLUSIVE));
 533 
 534         mac_driver_stat_delete(mip);
 535 
 536         (void) mod_hash_remove(i_mac_impl_hash,
 537             (mod_hash_key_t)mip->mi_name, &val);
 538         ASSERT(mip == (mac_impl_t *)val);
 539 
 540         ASSERT(i_mac_impl_count > 0);
 541         atomic_dec_32(&i_mac_impl_count);
 542 
 543         if (mip->mi_pdata != NULL)
 544                 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
 545         mip->mi_pdata = NULL;
 546         mip->mi_pdata_size = 0;
 547 
 548         /*
 549          * Free the list of margin request.
 550          */
 551         for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
 552                 nextmmr = mmr->mmr_nextp;
 553                 kmem_free(mmr, sizeof (mac_margin_req_t));
 554         }
 555         mip->mi_mmrp = NULL;
 556 
 557         mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
 558         kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
 559         mip->mi_info.mi_unicst_addr = NULL;
 560 
 561         atomic_dec_32(&mip->mi_type->mt_ref);
 562         mip->mi_type = NULL;
 563 
 564         /*
 565          * Free the primary MAC address.
 566          */
 567         mac_fini_macaddr(mip);
 568 
 569         /*
 570          * free all rings
 571          */
 572         mac_free_rings(mip, MAC_RING_TYPE_RX);
 573         mac_free_rings(mip, MAC_RING_TYPE_TX);
 574 
 575         mac_addr_factory_fini(mip);
 576 
 577         bzero(mip->mi_addr, MAXMACADDRLEN);
 578         bzero(mip->mi_dstaddr, MAXMACADDRLEN);
 579         mip->mi_dstaddr_set = B_FALSE;
 580 
 581         /* and the flows */
 582         mac_flow_tab_destroy(mip->mi_flow_tab);
 583         mip->mi_flow_tab = NULL;
 584 
 585         if (mip->mi_minor > MAC_MAX_MINOR)
 586                 mac_minor_rele(mip->mi_minor);
 587 
 588         cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
 589 
 590         /*
 591          * Reset the perim related fields to default values before
 592          * kmem_cache_free
 593          */
 594         i_mac_perim_exit(mip);
 595         mip->mi_state_flags = 0;
 596 
 597         mac_unregister_priv_prop(mip);
 598 
 599         ASSERT(mip->mi_bridge_link == NULL);
 600         kmem_cache_free(i_mac_impl_cachep, mip);
 601 
 602         return (0);
 603 }
 604 
 605 /* DATA RECEPTION */
 606 
 607 /*
 608  * This function is invoked for packets received by the MAC driver in
 609  * interrupt context. The ring generation number provided by the driver
 610  * is matched with the ring generation number held in MAC. If they do not
 611  * match, received packets are considered stale packets coming from an older
 612  * assignment of the ring. Drop them.
 613  */
 614 void
 615 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
 616     uint64_t mr_gen_num)
 617 {
 618         mac_ring_t              *mr = (mac_ring_t *)mrh;
 619 
 620         if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
 621                 DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
 622                     mr->mr_gen_num, uint64_t, mr_gen_num);
 623                 freemsgchain(mp_chain);
 624                 return;
 625         }
 626         mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
 627 }
 628 
 629 /*
 630  * This function is invoked for each packet received by the underlying driver.
 631  */
 632 void
 633 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
 634 {
 635         mac_impl_t *mip = (mac_impl_t *)mh;
 636 
 637         /*
 638          * Check if the link is part of a bridge.  If not, then we don't need
 639          * to take the lock to remain consistent.  Make this common case
 640          * lock-free and tail-call optimized.
 641          */
 642         if (mip->mi_bridge_link == NULL) {
 643                 mac_rx_common(mh, mrh, mp_chain);
 644         } else {
 645                 /*
 646                  * Once we take a reference on the bridge link, the bridge
 647                  * module itself can't unload, so the callback pointers are
 648                  * stable.
 649                  */
 650                 mutex_enter(&mip->mi_bridge_lock);
 651                 if ((mh = mip->mi_bridge_link) != NULL)
 652                         mac_bridge_ref_cb(mh, B_TRUE);
 653                 mutex_exit(&mip->mi_bridge_lock);
 654                 if (mh == NULL) {
 655                         mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
 656                 } else {
 657                         mac_bridge_rx_cb(mh, mrh, mp_chain);
 658                         mac_bridge_ref_cb(mh, B_FALSE);
 659                 }
 660         }
 661 }
 662 
 663 /*
 664  * Special case function: this allows snooping of packets transmitted and
 665  * received by TRILL. By design, they go directly into the TRILL module.
 666  */
 667 void
 668 mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
 669 {
 670         mac_impl_t *mip = (mac_impl_t *)mh;
 671 
 672         if (mip->mi_promisc_list != NULL)
 673                 mac_promisc_dispatch(mip, mp, NULL);
 674 }
 675 
 676 /*
 677  * This is the upward reentry point for packets arriving from the bridging
 678  * module and from mac_rx for links not part of a bridge.
 679  */
 680 void
 681 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
 682 {
 683         mac_impl_t              *mip = (mac_impl_t *)mh;
 684         mac_ring_t              *mr = (mac_ring_t *)mrh;
 685         mac_soft_ring_set_t     *mac_srs;
 686         mblk_t                  *bp = mp_chain;
 687         boolean_t               hw_classified = B_FALSE;
 688 
 689         /*
 690          * If there are any promiscuous mode callbacks defined for
 691          * this MAC, pass them a copy if appropriate.
 692          */
 693         if (mip->mi_promisc_list != NULL)
 694                 mac_promisc_dispatch(mip, mp_chain, NULL);
 695 
 696         if (mr != NULL) {
 697                 /*
 698                  * If the SRS teardown has started, just return. The 'mr'
 699                  * continues to be valid until the driver unregisters the mac.
 700                  * Hardware classified packets will not make their way up
 701                  * beyond this point once the teardown has started. The driver
 702                  * is never passed a pointer to a flow entry or SRS or any
 703                  * structure that can be freed much before mac_unregister.
 704                  */
 705                 mutex_enter(&mr->mr_lock);
 706                 if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
 707                     (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
 708                         mutex_exit(&mr->mr_lock);
 709                         freemsgchain(mp_chain);
 710                         return;
 711                 }
 712                 if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
 713                         hw_classified = B_TRUE;
 714                         MR_REFHOLD_LOCKED(mr);
 715                 }
 716                 mutex_exit(&mr->mr_lock);
 717 
 718                 /*
 719                  * We check if an SRS is controlling this ring.
 720                  * If so, we can directly call the srs_lower_proc
 721                  * routine otherwise we need to go through mac_rx_classify
 722                  * to reach the right place.
 723                  */
 724                 if (hw_classified) {
 725                         mac_srs = mr->mr_srs;
 726                         /*
 727                          * This is supposed to be the fast path.
 728                          * All packets received though here were steered by
 729                          * the hardware classifier, and share the same
 730                          * MAC header info.
 731                          */
 732                         mac_srs->srs_rx.sr_lower_proc(mh,
 733                             (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
 734                         MR_REFRELE(mr);
 735                         return;
 736                 }
 737                 /* We'll fall through to software classification */
 738         } else {
 739                 flow_entry_t *flent;
 740                 int err;
 741 
 742                 rw_enter(&mip->mi_rw_lock, RW_READER);
 743                 if (mip->mi_single_active_client != NULL) {
 744                         flent = mip->mi_single_active_client->mci_flent_list;
 745                         FLOW_TRY_REFHOLD(flent, err);
 746                         rw_exit(&mip->mi_rw_lock);
 747                         if (err == 0) {
 748                                 (flent->fe_cb_fn)(flent->fe_cb_arg1,
 749                                     flent->fe_cb_arg2, mp_chain, B_FALSE);
 750                                 FLOW_REFRELE(flent);
 751                                 return;
 752                         }
 753                 } else {
 754                         rw_exit(&mip->mi_rw_lock);
 755                 }
 756         }
 757 
 758         if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
 759                 if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
 760                         return;
 761         }
 762 
 763         freemsgchain(bp);
 764 }
 765 
 766 /* DATA TRANSMISSION */
 767 
 768 /*
 769  * A driver's notification to resume transmission, in case of a provider
 770  * without TX rings.
 771  */
 772 void
 773 mac_tx_update(mac_handle_t mh)
 774 {
 775         mac_tx_ring_update(mh, NULL);
 776 }
 777 
 778 /*
 779  * A driver's notification to resume transmission on the specified TX ring.
 780  */
 781 void
 782 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
 783 {
 784         i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
 785 }
 786 
 787 /* LINK STATE */
 788 /*
 789  * Notify the MAC layer about a link state change
 790  */
 791 void
 792 mac_link_update(mac_handle_t mh, link_state_t link)
 793 {
 794         mac_impl_t      *mip = (mac_impl_t *)mh;
 795 
 796         /*
 797          * Save the link state.
 798          */
 799         mip->mi_lowlinkstate = link;
 800 
 801         /*
 802          * Send a MAC_NOTE_LOWLINK notification.  This tells the notification
 803          * thread to deliver both lower and upper notifications.
 804          */
 805         i_mac_notify(mip, MAC_NOTE_LOWLINK);
 806 }
 807 
 808 /*
 809  * Notify the MAC layer about a link state change due to bridging.
 810  */
 811 void
 812 mac_link_redo(mac_handle_t mh, link_state_t link)
 813 {
 814         mac_impl_t      *mip = (mac_impl_t *)mh;
 815 
 816         /*
 817          * Save the link state.
 818          */
 819         mip->mi_linkstate = link;
 820 
 821         /*
 822          * Send a MAC_NOTE_LINK notification.  Only upper notifications are
 823          * made.
 824          */
 825         i_mac_notify(mip, MAC_NOTE_LINK);
 826 }
 827 
 828 /* MINOR NODE HANDLING */
 829 
 830 /*
 831  * Given a dev_t, return the instance number (PPA) associated with it.
 832  * Drivers can use this in their getinfo(9e) implementation to lookup
 833  * the instance number (i.e. PPA) of the device, to use as an index to
 834  * their own array of soft state structures.
 835  *
 836  * Returns -1 on error.
 837  */
 838 int
 839 mac_devt_to_instance(dev_t devt)
 840 {
 841         return (dld_devt_to_instance(devt));
 842 }
 843 
 844 /*
 845  * This function returns the first minor number that is available for
 846  * driver private use.  All minor numbers smaller than this are
 847  * reserved for GLDv3 use.
 848  */
 849 minor_t
 850 mac_private_minor(void)
 851 {
 852         return (MAC_PRIVATE_MINOR);
 853 }
 854 
 855 /* OTHER CONTROL INFORMATION */
 856 
 857 /*
 858  * A driver notified us that its primary MAC address has changed.
 859  */
 860 void
 861 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
 862 {
 863         mac_impl_t      *mip = (mac_impl_t *)mh;
 864 
 865         if (mip->mi_type->mt_addr_length == 0)
 866                 return;
 867 
 868         i_mac_perim_enter(mip);
 869 
 870         /*
 871          * If address changes, freshen the MAC address value and update
 872          * all MAC clients that share this MAC address.
 873          */
 874         if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) {
 875                 mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
 876                     (uint8_t *)addr);
 877         }
 878 
 879         i_mac_perim_exit(mip);
 880 
 881         /*
 882          * Send a MAC_NOTE_UNICST notification.
 883          */
 884         i_mac_notify(mip, MAC_NOTE_UNICST);
 885 }
 886 
 887 void
 888 mac_dst_update(mac_handle_t mh, const uint8_t *addr)
 889 {
 890         mac_impl_t      *mip = (mac_impl_t *)mh;
 891 
 892         if (mip->mi_type->mt_addr_length == 0)
 893                 return;
 894 
 895         i_mac_perim_enter(mip);
 896         bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
 897         i_mac_perim_exit(mip);
 898         i_mac_notify(mip, MAC_NOTE_DEST);
 899 }
 900 
 901 /*
 902  * MAC plugin information changed.
 903  */
 904 int
 905 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
 906 {
 907         mac_impl_t      *mip = (mac_impl_t *)mh;
 908 
 909         /*
 910          * Verify that the plugin supports MAC plugin data and that the
 911          * supplied data is valid.
 912          */
 913         if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
 914                 return (EINVAL);
 915         if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
 916                 return (EINVAL);
 917 
 918         if (mip->mi_pdata != NULL)
 919                 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
 920 
 921         mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
 922         bcopy(mac_pdata, mip->mi_pdata, dsize);
 923         mip->mi_pdata_size = dsize;
 924 
 925         /*
 926          * Since the MAC plugin data is used to construct MAC headers that
 927          * were cached in fast-path headers, we need to flush fast-path
 928          * information for links associated with this mac.
 929          */
 930         i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
 931         return (0);
 932 }
 933 
 934 /*
 935  * Invoked by driver as well as the framework to notify its capability change.
 936  */
 937 void
 938 mac_capab_update(mac_handle_t mh)
 939 {
 940         /* Send MAC_NOTE_CAPAB_CHG notification */
 941         i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
 942 }
 943 
 944 /*
 945  * Used by normal drivers to update the max sdu size.
 946  * We need to handle the case of a smaller mi_sdu_multicast
 947  * since this is called by mac_set_mtu() even for drivers that
 948  * have differing unicast and multicast mtu and we don't want to
 949  * increase the multicast mtu by accident in that case.
 950  */
 951 int
 952 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
 953 {
 954         mac_impl_t      *mip = (mac_impl_t *)mh;
 955 
 956         if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
 957                 return (EINVAL);
 958         mip->mi_sdu_max = sdu_max;
 959         if (mip->mi_sdu_multicast > mip->mi_sdu_max)
 960                 mip->mi_sdu_multicast = mip->mi_sdu_max;
 961 
 962         /* Send a MAC_NOTE_SDU_SIZE notification. */
 963         i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
 964         return (0);
 965 }
 966 
 967 /*
 968  * Version of the above function that is used by drivers that have a different
 969  * max sdu size for multicast/broadcast vs. unicast.
 970  */
 971 int
 972 mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast)
 973 {
 974         mac_impl_t      *mip = (mac_impl_t *)mh;
 975 
 976         if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
 977                 return (EINVAL);
 978         if (sdu_multicast == 0)
 979                 sdu_multicast = sdu_max;
 980         if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min)
 981                 return (EINVAL);
 982         mip->mi_sdu_max = sdu_max;
 983         mip->mi_sdu_multicast = sdu_multicast;
 984 
 985         /* Send a MAC_NOTE_SDU_SIZE notification. */
 986         i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
 987         return (0);
 988 }
 989 
 990 static void
 991 mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring)
 992 {
 993         mac_client_impl_t *mcip;
 994         flow_entry_t *flent;
 995         mac_soft_ring_set_t *mac_rx_srs;
 996         mac_cpus_t *srs_cpu;
 997         int i;
 998 
 999         if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) &&
1000             (!ring->mr_info.mri_intr.mi_ddi_shared)) {
1001                 /* interrupt can be re-targeted */
1002                 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED);
1003                 flent = mcip->mci_flent;
1004                 if (ring->mr_type == MAC_RING_TYPE_RX) {
1005                         for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
1006                                 mac_rx_srs = flent->fe_rx_srs[i];
1007                                 if (mac_rx_srs->srs_ring != ring)
1008                                         continue;
1009                                 srs_cpu = &mac_rx_srs->srs_cpu;
1010                                 mutex_enter(&cpu_lock);
1011                                 mac_rx_srs_retarget_intr(mac_rx_srs,
1012                                     srs_cpu->mc_rx_intr_cpu);
1013                                 mutex_exit(&cpu_lock);
1014                                 break;
1015                         }
1016                 } else {
1017                         if (flent->fe_tx_srs != NULL) {
1018                                 mutex_enter(&cpu_lock);
1019                                 mac_tx_srs_retarget_intr(
1020                                     flent->fe_tx_srs);
1021                                 mutex_exit(&cpu_lock);
1022                         }
1023                 }
1024         }
1025 }
1026 
1027 /*
1028  * Clients like aggr create pseudo rings (mac_ring_t) and expose them to
1029  * their clients. There is a 1-1 mapping pseudo ring and the hardware
1030  * ring. ddi interrupt handles are exported from the hardware ring to
1031  * the pseudo ring. Thus when the interrupt handle changes, clients of
1032  * aggr that are using the handle need to use the new handle and
1033  * re-target their interrupts.
1034  */
1035 static void
1036 mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring,
1037     ddi_intr_handle_t ddh)
1038 {
1039         mac_ring_t *pring;
1040         mac_group_t *pgroup;
1041         mac_impl_t *pmip;
1042         char macname[MAXNAMELEN];
1043         mac_perim_handle_t p_mph;
1044         uint64_t saved_gen_num;
1045 
1046 again:
1047         pring = (mac_ring_t *)ring->mr_prh;
1048         pgroup = (mac_group_t *)pring->mr_gh;
1049         pmip = (mac_impl_t *)pgroup->mrg_mh;
1050         saved_gen_num = ring->mr_gen_num;
1051         (void) strlcpy(macname, pmip->mi_name, MAXNAMELEN);
1052         /*
1053          * We need to enter aggr's perimeter. The locking hierarchy
1054          * dictates that aggr's perimeter should be entered first
1055          * and then the port's perimeter. So drop the port's
1056          * perimeter, enter aggr's and then re-enter port's
1057          * perimeter.
1058          */
1059         i_mac_perim_exit(mip);
1060         /*
1061          * While we know pmip is the aggr's mip, there is a
1062          * possibility that aggr could have unregistered by
1063          * the time we exit port's perimeter (mip) and
1064          * enter aggr's perimeter (pmip). To avoid that
1065          * scenario, enter aggr's perimeter using its name.
1066          */
1067         if (mac_perim_enter_by_macname(macname, &p_mph) != 0)
1068                 return;
1069         i_mac_perim_enter(mip);
1070         /*
1071          * Check if the ring got assigned to another aggregation before
1072          * be could enter aggr's and the port's perimeter. When a ring
1073          * gets deleted from an aggregation, it calls mac_stop_ring()
1074          * which increments the generation number. So checking
1075          * generation number will be enough.
1076          */
1077         if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) {
1078                 i_mac_perim_exit(mip);
1079                 mac_perim_exit(p_mph);
1080                 i_mac_perim_enter(mip);
1081                 goto again;
1082         }
1083 
1084         /* Check if pseudo ring is still present */
1085         if (ring->mr_prh != NULL) {
1086                 pring->mr_info.mri_intr.mi_ddi_handle = ddh;
1087                 pring->mr_info.mri_intr.mi_ddi_shared =
1088                     ring->mr_info.mri_intr.mi_ddi_shared;
1089                 if (ddh != NULL)
1090                         mac_ring_intr_retarget(pgroup, pring);
1091         }
1092         i_mac_perim_exit(mip);
1093         mac_perim_exit(p_mph);
1094 }
1095 /*
1096  * API called by driver to provide new interrupt handle for TX/RX rings.
1097  * This usually happens when IRM (Interrupt Resource Manangement)
1098  * framework either gives the driver more MSI-x interrupts or takes
1099  * away MSI-x interrupts from the driver.
1100  */
1101 void
1102 mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh)
1103 {
1104         mac_ring_t      *ring = (mac_ring_t *)mrh;
1105         mac_group_t     *group = (mac_group_t *)ring->mr_gh;
1106         mac_impl_t      *mip = (mac_impl_t *)group->mrg_mh;
1107 
1108         i_mac_perim_enter(mip);
1109         ring->mr_info.mri_intr.mi_ddi_handle = ddh;
1110         if (ddh == NULL) {
1111                 /* Interrupts being reset */
1112                 ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE;
1113                 if (ring->mr_prh != NULL) {
1114                         mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1115                         return;
1116                 }
1117         } else {
1118                 /* New interrupt handle */
1119                 mac_compare_ddi_handle(mip->mi_rx_groups,
1120                     mip->mi_rx_group_count, ring);
1121                 if (!ring->mr_info.mri_intr.mi_ddi_shared) {
1122                         mac_compare_ddi_handle(mip->mi_tx_groups,
1123                             mip->mi_tx_group_count, ring);
1124                 }
1125                 if (ring->mr_prh != NULL) {
1126                         mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1127                         return;
1128                 } else {
1129                         mac_ring_intr_retarget(group, ring);
1130                 }
1131         }
1132         i_mac_perim_exit(mip);
1133 }
1134 
1135 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
1136 
1137 /*
1138  * Updates the mac_impl structure with the current state of the link
1139  */
1140 static void
1141 i_mac_log_link_state(mac_impl_t *mip)
1142 {
1143         /*
1144          * If no change, then it is not interesting.
1145          */
1146         if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
1147                 return;
1148 
1149         switch (mip->mi_lowlinkstate) {
1150         case LINK_STATE_UP:
1151                 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
1152                         char det[200];
1153 
1154                         mip->mi_type->mt_ops.mtops_link_details(det,
1155                             sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
1156 
1157                         cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
1158                 } else {
1159                         cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
1160                 }
1161                 break;
1162 
1163         case LINK_STATE_DOWN:
1164                 /*
1165                  * Only transitions from UP to DOWN are interesting
1166                  */
1167                 if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
1168                         cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
1169                 break;
1170 
1171         case LINK_STATE_UNKNOWN:
1172                 /*
1173                  * This case is normally not interesting.
1174                  */
1175                 break;
1176         }
1177         mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
1178 }
1179 
1180 /*
1181  * Main routine for the callbacks notifications thread
1182  */
1183 static void
1184 i_mac_notify_thread(void *arg)
1185 {
1186         mac_impl_t      *mip = arg;
1187         callb_cpr_t     cprinfo;
1188         mac_cb_t        *mcb;
1189         mac_cb_info_t   *mcbi;
1190         mac_notify_cb_t *mncb;
1191 
1192         mcbi = &mip->mi_notify_cb_info;
1193         CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
1194             "i_mac_notify_thread");
1195 
1196         mutex_enter(mcbi->mcbi_lockp);
1197 
1198         for (;;) {
1199                 uint32_t        bits;
1200                 uint32_t        type;
1201 
1202                 bits = mip->mi_notify_bits;
1203                 if (bits == 0) {
1204                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
1205                         cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1206                         CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1207                         continue;
1208                 }
1209                 mip->mi_notify_bits = 0;
1210                 if ((bits & (1 << MAC_NNOTE)) != 0) {
1211                         /* request to quit */
1212                         ASSERT(mip->mi_state_flags & MIS_DISABLED);
1213                         break;
1214                 }
1215 
1216                 mutex_exit(mcbi->mcbi_lockp);
1217 
1218                 /*
1219                  * Log link changes on the actual link, but then do reports on
1220                  * synthetic state (if part of a bridge).
1221                  */
1222                 if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1223                         link_state_t newstate;
1224                         mac_handle_t mh;
1225 
1226                         i_mac_log_link_state(mip);
1227                         newstate = mip->mi_lowlinkstate;
1228                         if (mip->mi_bridge_link != NULL) {
1229                                 mutex_enter(&mip->mi_bridge_lock);
1230                                 if ((mh = mip->mi_bridge_link) != NULL) {
1231                                         newstate = mac_bridge_ls_cb(mh,
1232                                             newstate);
1233                                 }
1234                                 mutex_exit(&mip->mi_bridge_lock);
1235                         }
1236                         if (newstate != mip->mi_linkstate) {
1237                                 mip->mi_linkstate = newstate;
1238                                 bits |= 1 << MAC_NOTE_LINK;
1239                         }
1240                 }
1241 
1242                 /*
1243                  * Do notification callbacks for each notification type.
1244                  */
1245                 for (type = 0; type < MAC_NNOTE; type++) {
1246                         if ((bits & (1 << type)) == 0) {
1247                                 continue;
1248                         }
1249 
1250                         if (mac_notify_cb_list[type] != NULL)
1251                                 (*mac_notify_cb_list[type])(mip);
1252 
1253                         /*
1254                          * Walk the list of notifications.
1255                          */
1256                         MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1257                         for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1258                             mcb = mcb->mcb_nextp) {
1259                                 mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1260                                 mncb->mncb_fn(mncb->mncb_arg, type);
1261                         }
1262                         MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1263                             &mip->mi_notify_cb_list);
1264                 }
1265 
1266                 mutex_enter(mcbi->mcbi_lockp);
1267         }
1268 
1269         mip->mi_state_flags |= MIS_NOTIFY_DONE;
1270         cv_broadcast(&mcbi->mcbi_cv);
1271 
1272         /* CALLB_CPR_EXIT drops the lock */
1273         CALLB_CPR_EXIT(&cprinfo);
1274         thread_exit();
1275 }
1276 
1277 /*
1278  * Signal the i_mac_notify_thread asking it to quit.
1279  * Then wait till it is done.
1280  */
1281 void
1282 i_mac_notify_exit(mac_impl_t *mip)
1283 {
1284         mac_cb_info_t   *mcbi;
1285 
1286         mcbi = &mip->mi_notify_cb_info;
1287 
1288         mutex_enter(mcbi->mcbi_lockp);
1289         mip->mi_notify_bits = (1 << MAC_NNOTE);
1290         cv_broadcast(&mcbi->mcbi_cv);
1291 
1292 
1293         while ((mip->mi_notify_thread != NULL) &&
1294             !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1295                 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1296         }
1297 
1298         /* Necessary clean up before doing kmem_cache_free */
1299         mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1300         mip->mi_notify_bits = 0;
1301         mip->mi_notify_thread = NULL;
1302         mutex_exit(mcbi->mcbi_lockp);
1303 }
1304 
1305 /*
1306  * Entry point invoked by drivers to dynamically add a ring to an
1307  * existing group.
1308  */
1309 int
1310 mac_group_add_ring(mac_group_handle_t gh, int index)
1311 {
1312         mac_group_t *group = (mac_group_t *)gh;
1313         mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1314         int ret;
1315 
1316         i_mac_perim_enter(mip);
1317         ret = i_mac_group_add_ring(group, NULL, index);
1318         i_mac_perim_exit(mip);
1319         return (ret);
1320 }
1321 
1322 /*
1323  * Entry point invoked by drivers to dynamically remove a ring
1324  * from an existing group. The specified ring handle must no longer
1325  * be used by the driver after a call to this function.
1326  */
1327 void
1328 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1329 {
1330         mac_group_t *group = (mac_group_t *)gh;
1331         mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1332 
1333         i_mac_perim_enter(mip);
1334         i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1335         i_mac_perim_exit(mip);
1336 }
1337 
1338 /*
1339  * mac_prop_info_*() callbacks called from the driver's prefix_propinfo()
1340  * entry points.
1341  */
1342 
1343 void
1344 mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val)
1345 {
1346         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1347 
1348         /* nothing to do if the caller doesn't want the default value */
1349         if (pr->pr_default == NULL)
1350                 return;
1351 
1352         ASSERT(pr->pr_default_size >= sizeof (uint8_t));
1353 
1354         *(uint8_t *)(pr->pr_default) = val;
1355         pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1356 }
1357 
1358 void
1359 mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val)
1360 {
1361         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1362 
1363         /* nothing to do if the caller doesn't want the default value */
1364         if (pr->pr_default == NULL)
1365                 return;
1366 
1367         ASSERT(pr->pr_default_size >= sizeof (uint64_t));
1368 
1369         bcopy(&val, pr->pr_default, sizeof (val));
1370 
1371         pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1372 }
1373 
1374 void
1375 mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val)
1376 {
1377         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1378 
1379         /* nothing to do if the caller doesn't want the default value */
1380         if (pr->pr_default == NULL)
1381                 return;
1382 
1383         ASSERT(pr->pr_default_size >= sizeof (uint32_t));
1384 
1385         bcopy(&val, pr->pr_default, sizeof (val));
1386 
1387         pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1388 }
1389 
1390 void
1391 mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str)
1392 {
1393         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1394 
1395         /* nothing to do if the caller doesn't want the default value */
1396         if (pr->pr_default == NULL)
1397                 return;
1398 
1399         if (strlen(str) >= pr->pr_default_size)
1400                 pr->pr_errno = ENOBUFS;
1401         else
1402                 (void) strlcpy(pr->pr_default, str, pr->pr_default_size);
1403         pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1404 }
1405 
1406 void
1407 mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
1408     link_flowctrl_t val)
1409 {
1410         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1411 
1412         /* nothing to do if the caller doesn't want the default value */
1413         if (pr->pr_default == NULL)
1414                 return;
1415 
1416         ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t));
1417 
1418         bcopy(&val, pr->pr_default, sizeof (val));
1419 
1420         pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1421 }
1422 
1423 void
1424 mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
1425     uint32_t max)
1426 {
1427         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1428         mac_propval_range_t *range = pr->pr_range;
1429         mac_propval_uint32_range_t *range32;
1430 
1431         /* nothing to do if the caller doesn't want the range info */
1432         if (range == NULL)
1433                 return;
1434 
1435         if (pr->pr_range_cur_count++ == 0) {
1436                 /* first range */
1437                 pr->pr_flags |= MAC_PROP_INFO_RANGE;
1438                 range->mpr_type = MAC_PROPVAL_UINT32;
1439         } else {
1440                 /* all ranges of a property should be of the same type */
1441                 ASSERT(range->mpr_type == MAC_PROPVAL_UINT32);
1442                 if (pr->pr_range_cur_count > range->mpr_count) {
1443                         pr->pr_errno = ENOSPC;
1444                         return;
1445                 }
1446         }
1447 
1448         range32 = range->mpr_range_uint32;
1449         range32[pr->pr_range_cur_count - 1].mpur_min = min;
1450         range32[pr->pr_range_cur_count - 1].mpur_max = max;
1451 }
1452 
1453 void
1454 mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm)
1455 {
1456         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1457 
1458         pr->pr_perm = perm;
1459         pr->pr_flags |= MAC_PROP_INFO_PERM;
1460 }
1461 
1462 void mac_hcksum_get(mblk_t *mp, uint32_t *start, uint32_t *stuff,
1463     uint32_t *end, uint32_t *value, uint32_t *flags_ptr)
1464 {
1465         uint32_t flags;
1466 
1467         ASSERT(DB_TYPE(mp) == M_DATA);
1468 
1469         flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS;
1470         if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) {
1471                 if (value != NULL)
1472                         *value = (uint32_t)DB_CKSUM16(mp);
1473                 if ((flags & HCK_PARTIALCKSUM) != 0) {
1474                         if (start != NULL)
1475                                 *start = (uint32_t)DB_CKSUMSTART(mp);
1476                         if (stuff != NULL)
1477                                 *stuff = (uint32_t)DB_CKSUMSTUFF(mp);
1478                         if (end != NULL)
1479                                 *end = (uint32_t)DB_CKSUMEND(mp);
1480                 }
1481         }
1482 
1483         if (flags_ptr != NULL)
1484                 *flags_ptr = flags;
1485 }
1486 
1487 void mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff,
1488     uint32_t end, uint32_t value, uint32_t flags)
1489 {
1490         ASSERT(DB_TYPE(mp) == M_DATA);
1491 
1492         DB_CKSUMSTART(mp) = (intptr_t)start;
1493         DB_CKSUMSTUFF(mp) = (intptr_t)stuff;
1494         DB_CKSUMEND(mp) = (intptr_t)end;
1495         DB_CKSUMFLAGS(mp) = (uint16_t)flags;
1496         DB_CKSUM16(mp) = (uint16_t)value;
1497 }
1498 
1499 void
1500 mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
1501 {
1502         ASSERT(DB_TYPE(mp) == M_DATA);
1503 
1504         if (flags != NULL) {
1505                 *flags = DB_CKSUMFLAGS(mp) & HW_LSO;
1506                 if ((*flags != 0) && (mss != NULL))
1507                         *mss = (uint32_t)DB_LSOMSS(mp);
1508         }
1509 }