1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2015 Joyent, Inc.
  24  */
  25 
  26 /*
  27  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
  28  *
  29  * An instance of the structure aggr_grp_t is allocated for each
  30  * link aggregation group. When created, aggr_grp_t objects are
  31  * entered into the aggr_grp_hash hash table maintained by the modhash
  32  * module. The hash key is the linkid associated with the link
  33  * aggregation group.
  34  *
  35  * A set of MAC ports are associated with each association group.
  36  *
  37  * Aggr pseudo TX rings
  38  * --------------------
  39  * The underlying ports (NICs) in an aggregation can have TX rings. To
  40  * enhance aggr's performance, these TX rings are made available to the
  41  * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
  42  * They are already present and implemented on the RX side. It is called
  43  * as pseudo RX rings. The same concept is extended to the TX side where
  44  * each TX ring of an underlying port is reflected in aggr as a pseudo
  45  * TX ring. Thus each pseudo TX ring will map to a specific hardware TX
  46  * ring. Even in the case of a NIC that does not have a TX ring, a pseudo
  47  * TX ring is given to the aggregation layer.
  48  *
  49  * With this change, the outgoing stack depth looks much better:
  50  *
  51  * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() ->
  52  * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx()
  53  *
  54  * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings:
  55  * SRS_TX_AGGR and SRS_TX_BW_AGGR.
  56  *
  57  * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine
  58  * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX
  59  * ring belonging to a port on which the packet has to be sent.
  60  * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4
  61  * policy and then uses the fanout_hint passed to it to pick a TX ring from
  62  * the selected port.
  63  *
  64  * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where
  65  * bandwidth limit is applied first on the outgoing packet and the packets
  66  * allowed to go out would call mac_tx_aggr_mode() to send the packet on a
  67  * particular TX ring.
  68  */
  69 
  70 #include <sys/types.h>
  71 #include <sys/sysmacros.h>
  72 #include <sys/conf.h>
  73 #include <sys/cmn_err.h>
  74 #include <sys/disp.h>
  75 #include <sys/list.h>
  76 #include <sys/ksynch.h>
  77 #include <sys/kmem.h>
  78 #include <sys/stream.h>
  79 #include <sys/modctl.h>
  80 #include <sys/ddi.h>
  81 #include <sys/sunddi.h>
  82 #include <sys/atomic.h>
  83 #include <sys/stat.h>
  84 #include <sys/modhash.h>
  85 #include <sys/id_space.h>
  86 #include <sys/strsun.h>
  87 #include <sys/cred.h>
  88 #include <sys/dlpi.h>
  89 #include <sys/zone.h>
  90 #include <sys/mac_provider.h>
  91 #include <sys/dls.h>
  92 #include <sys/vlan.h>
  93 #include <sys/aggr.h>
  94 #include <sys/aggr_impl.h>
  95 
  96 static int aggr_m_start(void *);
  97 static void aggr_m_stop(void *);
  98 static int aggr_m_promisc(void *, boolean_t);
  99 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
 100 static int aggr_m_unicst(void *, const uint8_t *);
 101 static int aggr_m_stat(void *, uint_t, uint64_t *);
 102 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
 103 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
 104 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
 105     const void *);
 106 static void aggr_m_propinfo(void *, const char *, mac_prop_id_t,
 107     mac_prop_info_handle_t);
 108 
 109 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
 110 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
 111     boolean_t *);
 112 
 113 static void aggr_grp_capab_set(aggr_grp_t *);
 114 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
 115 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
 116 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
 117 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
 118 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
 119 
 120 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
 121 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
 122 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
 123 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
 124 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
 125 static void aggr_pseudo_stop_ring(mac_ring_driver_t);
 126 static int aggr_addmac(void *, const uint8_t *);
 127 static int aggr_remmac(void *, const uint8_t *);
 128 static mblk_t *aggr_rx_poll(void *, int);
 129 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
 130     const int, mac_ring_info_t *, mac_ring_handle_t);
 131 static void aggr_fill_group(void *, mac_ring_type_t, const int,
 132     mac_group_info_t *, mac_group_handle_t);
 133 
 134 static kmem_cache_t     *aggr_grp_cache;
 135 static mod_hash_t       *aggr_grp_hash;
 136 static krwlock_t        aggr_grp_lock;
 137 static uint_t           aggr_grp_cnt;
 138 static id_space_t       *key_ids;
 139 
 140 #define GRP_HASHSZ              64
 141 #define GRP_HASH_KEY(linkid)    ((mod_hash_key_t)(uintptr_t)linkid)
 142 #define AGGR_PORT_NAME_DELIMIT '-'
 143 
 144 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
 145 
 146 #define AGGR_M_CALLBACK_FLAGS   \
 147         (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
 148 
 149 static mac_callbacks_t aggr_m_callbacks = {
 150         AGGR_M_CALLBACK_FLAGS,
 151         aggr_m_stat,
 152         aggr_m_start,
 153         aggr_m_stop,
 154         aggr_m_promisc,
 155         aggr_m_multicst,
 156         NULL,
 157         NULL,
 158         NULL,
 159         aggr_m_ioctl,
 160         aggr_m_capab_get,
 161         NULL,
 162         NULL,
 163         aggr_m_setprop,
 164         NULL,
 165         aggr_m_propinfo
 166 };
 167 
 168 /*ARGSUSED*/
 169 static int
 170 aggr_grp_constructor(void *buf, void *arg, int kmflag)
 171 {
 172         aggr_grp_t *grp = buf;
 173 
 174         bzero(grp, sizeof (*grp));
 175         mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL);
 176         cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL);
 177         rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL);
 178         mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL);
 179         cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL);
 180         mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL);
 181         cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL);
 182         grp->lg_link_state = LINK_STATE_UNKNOWN;
 183         return (0);
 184 }
 185 
 186 /*ARGSUSED*/
 187 static void
 188 aggr_grp_destructor(void *buf, void *arg)
 189 {
 190         aggr_grp_t *grp = buf;
 191 
 192         if (grp->lg_tx_ports != NULL) {
 193                 kmem_free(grp->lg_tx_ports,
 194                     grp->lg_tx_ports_size * sizeof (aggr_port_t *));
 195         }
 196 
 197         mutex_destroy(&grp->lg_lacp_lock);
 198         cv_destroy(&grp->lg_lacp_cv);
 199         mutex_destroy(&grp->lg_port_lock);
 200         cv_destroy(&grp->lg_port_cv);
 201         rw_destroy(&grp->lg_tx_lock);
 202         mutex_destroy(&grp->lg_tx_flowctl_lock);
 203         cv_destroy(&grp->lg_tx_flowctl_cv);
 204 }
 205 
 206 void
 207 aggr_grp_init(void)
 208 {
 209         aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
 210             sizeof (aggr_grp_t), 0, aggr_grp_constructor,
 211             aggr_grp_destructor, NULL, NULL, NULL, 0);
 212 
 213         aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
 214             GRP_HASHSZ, mod_hash_null_valdtor);
 215         rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
 216         aggr_grp_cnt = 0;
 217 
 218         /*
 219          * Allocate an id space to manage key values (when key is not
 220          * specified). The range of the id space will be from
 221          * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
 222          * uses a 16-bit key.
 223          */
 224         key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
 225         ASSERT(key_ids != NULL);
 226 }
 227 
 228 void
 229 aggr_grp_fini(void)
 230 {
 231         id_space_destroy(key_ids);
 232         rw_destroy(&aggr_grp_lock);
 233         mod_hash_destroy_idhash(aggr_grp_hash);
 234         kmem_cache_destroy(aggr_grp_cache);
 235 }
 236 
 237 uint_t
 238 aggr_grp_count(void)
 239 {
 240         uint_t  count;
 241 
 242         rw_enter(&aggr_grp_lock, RW_READER);
 243         count = aggr_grp_cnt;
 244         rw_exit(&aggr_grp_lock);
 245         return (count);
 246 }
 247 
 248 /*
 249  * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
 250  * requires the mac perimeter, this function holds a reference of the aggr
 251  * and aggr won't call mac_unregister() until this reference drops to 0.
 252  */
 253 void
 254 aggr_grp_port_hold(aggr_port_t *port)
 255 {
 256         aggr_grp_t      *grp = port->lp_grp;
 257 
 258         AGGR_PORT_REFHOLD(port);
 259         mutex_enter(&grp->lg_port_lock);
 260         grp->lg_port_ref++;
 261         mutex_exit(&grp->lg_port_lock);
 262 }
 263 
 264 /*
 265  * Release the reference of the grp and inform aggr_grp_delete() calling
 266  * mac_unregister() is now safe.
 267  */
 268 void
 269 aggr_grp_port_rele(aggr_port_t *port)
 270 {
 271         aggr_grp_t      *grp = port->lp_grp;
 272 
 273         mutex_enter(&grp->lg_port_lock);
 274         if (--grp->lg_port_ref == 0)
 275                 cv_signal(&grp->lg_port_cv);
 276         mutex_exit(&grp->lg_port_lock);
 277         AGGR_PORT_REFRELE(port);
 278 }
 279 
 280 /*
 281  * Wait for the port's lacp timer thread and the port's notification callback
 282  * to exit.
 283  */
 284 void
 285 aggr_grp_port_wait(aggr_grp_t *grp)
 286 {
 287         mutex_enter(&grp->lg_port_lock);
 288         if (grp->lg_port_ref != 0)
 289                 cv_wait(&grp->lg_port_cv, &grp->lg_port_lock);
 290         mutex_exit(&grp->lg_port_lock);
 291 }
 292 
 293 /*
 294  * Attach a port to a link aggregation group.
 295  *
 296  * A port is attached to a link aggregation group once its speed
 297  * and link state have been verified.
 298  *
 299  * Returns B_TRUE if the group link state or speed has changed. If
 300  * it's the case, the caller must notify the MAC layer via a call
 301  * to mac_link().
 302  */
 303 boolean_t
 304 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
 305 {
 306         boolean_t link_state_changed = B_FALSE;
 307 
 308         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 309         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 310 
 311         if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
 312                 return (B_FALSE);
 313 
 314         /*
 315          * Validate the MAC port link speed and update the group
 316          * link speed if needed.
 317          */
 318         if (port->lp_ifspeed == 0 ||
 319             port->lp_link_state != LINK_STATE_UP ||
 320             port->lp_link_duplex != LINK_DUPLEX_FULL) {
 321                 /*
 322                  * Can't attach a MAC port with unknown link speed,
 323                  * down link, or not in full duplex mode.
 324                  */
 325                 return (B_FALSE);
 326         }
 327 
 328         if (grp->lg_ifspeed == 0) {
 329                 /*
 330                  * The group inherits the speed of the first link being
 331                  * attached.
 332                  */
 333                 grp->lg_ifspeed = port->lp_ifspeed;
 334                 link_state_changed = B_TRUE;
 335         } else if (grp->lg_ifspeed != port->lp_ifspeed) {
 336                 /*
 337                  * The link speed of the MAC port must be the same as
 338                  * the group link speed, as per 802.3ad. Since it is
 339                  * not, the attach is cancelled.
 340                  */
 341                 return (B_FALSE);
 342         }
 343 
 344         grp->lg_nattached_ports++;
 345 
 346         /*
 347          * Update the group link state.
 348          */
 349         if (grp->lg_link_state != LINK_STATE_UP) {
 350                 grp->lg_link_state = LINK_STATE_UP;
 351                 grp->lg_link_duplex = LINK_DUPLEX_FULL;
 352                 link_state_changed = B_TRUE;
 353         }
 354 
 355         /*
 356          * Update port's state.
 357          */
 358         port->lp_state = AGGR_PORT_STATE_ATTACHED;
 359 
 360         aggr_grp_multicst_port(port, B_TRUE);
 361 
 362         /*
 363          * Set port's receive callback
 364          */
 365         mac_rx_set(port->lp_mch, aggr_recv_cb, port);
 366 
 367         /*
 368          * If LACP is OFF, the port can be used to send data as soon
 369          * as its link is up and verified to be compatible with the
 370          * aggregation.
 371          *
 372          * If LACP is active or passive, notify the LACP subsystem, which
 373          * will enable sending on the port following the LACP protocol.
 374          */
 375         if (grp->lg_lacp_mode == AGGR_LACP_OFF)
 376                 aggr_send_port_enable(port);
 377         else
 378                 aggr_lacp_port_attached(port);
 379 
 380         return (link_state_changed);
 381 }
 382 
 383 boolean_t
 384 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
 385 {
 386         boolean_t link_state_changed = B_FALSE;
 387 
 388         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 389         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 390 
 391         /* update state */
 392         if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
 393                 return (B_FALSE);
 394 
 395         mac_rx_clear(port->lp_mch);
 396 
 397         aggr_grp_multicst_port(port, B_FALSE);
 398 
 399         if (grp->lg_lacp_mode == AGGR_LACP_OFF)
 400                 aggr_send_port_disable(port);
 401         else
 402                 aggr_lacp_port_detached(port);
 403 
 404         port->lp_state = AGGR_PORT_STATE_STANDBY;
 405 
 406         grp->lg_nattached_ports--;
 407         if (grp->lg_nattached_ports == 0) {
 408                 /* the last attached MAC port of the group is being detached */
 409                 grp->lg_ifspeed = 0;
 410                 grp->lg_link_state = LINK_STATE_DOWN;
 411                 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
 412                 link_state_changed = B_TRUE;
 413         }
 414 
 415         return (link_state_changed);
 416 }
 417 
 418 /*
 419  * Update the MAC addresses of the constituent ports of the specified
 420  * group. This function is invoked:
 421  * - after creating a new aggregation group.
 422  * - after adding new ports to an aggregation group.
 423  * - after removing a port from a group when the MAC address of
 424  *   that port was used for the MAC address of the group.
 425  * - after the MAC address of a port changed when the MAC address
 426  *   of that port was used for the MAC address of the group.
 427  *
 428  * Return true if the link state of the aggregation changed, for example
 429  * as a result of a failure changing the MAC address of one of the
 430  * constituent ports.
 431  */
 432 boolean_t
 433 aggr_grp_update_ports_mac(aggr_grp_t *grp)
 434 {
 435         aggr_port_t *cport;
 436         boolean_t link_state_changed = B_FALSE;
 437         mac_perim_handle_t mph;
 438 
 439         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 440 
 441         for (cport = grp->lg_ports; cport != NULL;
 442             cport = cport->lp_next) {
 443                 mac_perim_enter_by_mh(cport->lp_mh, &mph);
 444                 if (aggr_port_unicst(cport) != 0) {
 445                         if (aggr_grp_detach_port(grp, cport))
 446                                 link_state_changed = B_TRUE;
 447                 } else {
 448                         /*
 449                          * If a port was detached because of a previous
 450                          * failure changing the MAC address, the port is
 451                          * reattached when it successfully changes the MAC
 452                          * address now, and this might cause the link state
 453                          * of the aggregation to change.
 454                          */
 455                         if (aggr_grp_attach_port(grp, cport))
 456                                 link_state_changed = B_TRUE;
 457                 }
 458                 mac_perim_exit(mph);
 459         }
 460         return (link_state_changed);
 461 }
 462 
 463 /*
 464  * Invoked when the MAC address of a port has changed. If the port's
 465  * MAC address was used for the group MAC address, set mac_addr_changedp
 466  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
 467  * notification. If the link state changes due to detach/attach of
 468  * the constituent port, set link_state_changedp to B_TRUE to indicate
 469  * to the caller that it should send a MAC_NOTE_LINK notification. In both
 470  * cases, it is the responsibility of the caller to invoke notification
 471  * functions after releasing the the port lock.
 472  */
 473 void
 474 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
 475     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
 476 {
 477         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 478         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 479         ASSERT(mac_addr_changedp != NULL);
 480         ASSERT(link_state_changedp != NULL);
 481 
 482         *mac_addr_changedp = B_FALSE;
 483         *link_state_changedp = B_FALSE;
 484 
 485         if (grp->lg_addr_fixed) {
 486                 /*
 487                  * The group is using a fixed MAC address or an automatic
 488                  * MAC address has not been set.
 489                  */
 490                 return;
 491         }
 492 
 493         if (grp->lg_mac_addr_port == port) {
 494                 /*
 495                  * The MAC address of the port was assigned to the group
 496                  * MAC address. Update the group MAC address.
 497                  */
 498                 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
 499                 *mac_addr_changedp = B_TRUE;
 500         } else {
 501                 /*
 502                  * Update the actual port MAC address to the MAC address
 503                  * of the group.
 504                  */
 505                 if (aggr_port_unicst(port) != 0) {
 506                         *link_state_changedp = aggr_grp_detach_port(grp, port);
 507                 } else {
 508                         /*
 509                          * If a port was detached because of a previous
 510                          * failure changing the MAC address, the port is
 511                          * reattached when it successfully changes the MAC
 512                          * address now, and this might cause the link state
 513                          * of the aggregation to change.
 514                          */
 515                         *link_state_changedp = aggr_grp_attach_port(grp, port);
 516                 }
 517         }
 518 }
 519 
 520 /*
 521  * Add a port to a link aggregation group.
 522  */
 523 static int
 524 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force,
 525     aggr_port_t **pp)
 526 {
 527         aggr_port_t *port, **cport;
 528         mac_perim_handle_t mph;
 529         zoneid_t port_zoneid = ALL_ZONES;
 530         int err;
 531 
 532         /* The port must be int the same zone as the aggregation. */
 533         if (zone_check_datalink(&port_zoneid, port_linkid) != 0)
 534                 port_zoneid = GLOBAL_ZONEID;
 535         if (grp->lg_zoneid != port_zoneid)
 536                 return (EBUSY);
 537 
 538         /*
 539          * lg_mh could be NULL when the function is called during the creation
 540          * of the aggregation.
 541          */
 542         ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh));
 543 
 544         /* create new port */
 545         err = aggr_port_create(grp, port_linkid, force, &port);
 546         if (err != 0)
 547                 return (err);
 548 
 549         mac_perim_enter_by_mh(port->lp_mh, &mph);
 550 
 551         /* add port to list of group constituent ports */
 552         cport = &grp->lg_ports;
 553         while (*cport != NULL)
 554                 cport = &((*cport)->lp_next);
 555         *cport = port;
 556 
 557         /*
 558          * Back reference to the group it is member of. A port always
 559          * holds a reference to its group to ensure that the back
 560          * reference is always valid.
 561          */
 562         port->lp_grp = grp;
 563         AGGR_GRP_REFHOLD(grp);
 564         grp->lg_nports++;
 565 
 566         aggr_lacp_init_port(port);
 567         mac_perim_exit(mph);
 568 
 569         if (pp != NULL)
 570                 *pp = port;
 571 
 572         return (0);
 573 }
 574 
 575 /*
 576  * This is called in response to either our LACP state machine or a MAC
 577  * notification that the link has gone down via aggr_send_port_disable(). At
 578  * this point, we may need to update our default ring. To that end, we go
 579  * through the set of ports (underlying datalinks in an aggregation) that are
 580  * currently enabled to transmit data. If all our links have been disabled for
 581  * transmit, then we don't do anything.
 582  *
 583  * Note, because we only have a single TX group, we don't have to worry about
 584  * the rings moving between groups and the chance that mac will reassign it
 585  * unless someone removes a port, at which point, we play it safe and call this
 586  * again.
 587  */
 588 void
 589 aggr_grp_update_default(aggr_grp_t *grp)
 590 {
 591         aggr_port_t *port;
 592         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 593 
 594         rw_enter(&grp->lg_tx_lock, RW_WRITER);
 595 
 596         if (grp->lg_ntx_ports == 0) {
 597                 rw_exit(&grp->lg_tx_lock);
 598                 return;
 599         }
 600 
 601         port = grp->lg_tx_ports[0];
 602         ASSERT(port->lp_tx_ring_cnt > 0);
 603         mac_hwring_set_default(grp->lg_mh, port->lp_pseudo_tx_rings[0]);
 604         rw_exit(&grp->lg_tx_lock);
 605 }
 606 
 607 /*
 608  * Add a pseudo RX ring for the given HW ring handle.
 609  */
 610 static int
 611 aggr_add_pseudo_rx_ring(aggr_port_t *port,
 612     aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
 613 {
 614         aggr_pseudo_rx_ring_t   *ring;
 615         int                     err;
 616         int                     j;
 617 
 618         for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
 619                 ring = rx_grp->arg_rings + j;
 620                 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
 621                         break;
 622         }
 623 
 624         /*
 625          * No slot for this new RX ring.
 626          */
 627         if (j == MAX_RINGS_PER_GROUP)
 628                 return (EIO);
 629 
 630         ring->arr_flags |= MAC_PSEUDO_RING_INUSE;
 631         ring->arr_hw_rh = hw_rh;
 632         ring->arr_port = port;
 633         rx_grp->arg_ring_cnt++;
 634 
 635         /*
 636          * The group is already registered, dynamically add a new ring to the
 637          * mac group.
 638          */
 639         if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) {
 640                 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
 641                 ring->arr_hw_rh = NULL;
 642                 ring->arr_port = NULL;
 643                 rx_grp->arg_ring_cnt--;
 644         } else {
 645                 mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
 646                     mac_find_ring(rx_grp->arg_gh, j));
 647         }
 648         return (err);
 649 }
 650 
 651 /*
 652  * Remove the pseudo RX ring of the given HW ring handle.
 653  */
 654 static void
 655 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
 656 {
 657         aggr_pseudo_rx_ring_t   *ring;
 658         int                     j;
 659 
 660         for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
 661                 ring = rx_grp->arg_rings + j;
 662                 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
 663                     ring->arr_hw_rh != hw_rh) {
 664                         continue;
 665                 }
 666 
 667                 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
 668 
 669                 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
 670                 ring->arr_hw_rh = NULL;
 671                 ring->arr_port = NULL;
 672                 rx_grp->arg_ring_cnt--;
 673                 mac_hwring_teardown(hw_rh);
 674                 break;
 675         }
 676 }
 677 
 678 /*
 679  * This function is called to create pseudo rings over the hardware rings of
 680  * the underlying device. Note that there is a 1:1 mapping between the pseudo
 681  * RX rings of the aggr and the hardware rings of the underlying port.
 682  */
 683 static int
 684 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
 685 {
 686         aggr_grp_t              *grp = port->lp_grp;
 687         mac_ring_handle_t       hw_rh[MAX_RINGS_PER_GROUP];
 688         aggr_unicst_addr_t      *addr, *a;
 689         mac_perim_handle_t      pmph;
 690         int                     hw_rh_cnt, i = 0, j;
 691         int                     err = 0;
 692 
 693         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 694         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 695 
 696         /*
 697          * This function must be called after the aggr registers its mac
 698          * and its RX group has been initialized.
 699          */
 700         ASSERT(rx_grp->arg_gh != NULL);
 701 
 702         /*
 703          * Get the list the the underlying HW rings.
 704          */
 705         hw_rh_cnt = mac_hwrings_get(port->lp_mch,
 706             &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX);
 707 
 708         if (port->lp_hwgh != NULL) {
 709                 /*
 710                  * Quiesce the HW ring and the mac srs on the ring. Note
 711                  * that the HW ring will be restarted when the pseudo ring
 712                  * is started. At that time all the packets will be
 713                  * directly passed up to the pseudo RX ring and handled
 714                  * by mac srs created over the pseudo RX ring.
 715                  */
 716                 mac_rx_client_quiesce(port->lp_mch);
 717                 mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
 718         }
 719 
 720         /*
 721          * Add all the unicast addresses to the newly added port.
 722          */
 723         for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
 724                 if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
 725                         break;
 726         }
 727 
 728         for (i = 0; err == 0 && i < hw_rh_cnt; i++)
 729                 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
 730 
 731         if (err != 0) {
 732                 for (j = 0; j < i; j++)
 733                         aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
 734 
 735                 for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
 736                         aggr_port_remmac(port, a->aua_addr);
 737 
 738                 if (port->lp_hwgh != NULL) {
 739                         mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
 740                         mac_rx_client_restart(port->lp_mch);
 741                         port->lp_hwgh = NULL;
 742                 }
 743         } else {
 744                 port->lp_rx_grp_added = B_TRUE;
 745         }
 746 done:
 747         mac_perim_exit(pmph);
 748         return (err);
 749 }
 750 
 751 /*
 752  * This function is called by aggr to remove pseudo RX rings over the
 753  * HW rings of the underlying port.
 754  */
 755 static void
 756 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
 757 {
 758         aggr_grp_t              *grp = port->lp_grp;
 759         mac_ring_handle_t       hw_rh[MAX_RINGS_PER_GROUP];
 760         aggr_unicst_addr_t      *addr;
 761         mac_group_handle_t      hwgh;
 762         mac_perim_handle_t      pmph;
 763         int                     hw_rh_cnt, i;
 764 
 765         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 766         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 767 
 768         if (!port->lp_rx_grp_added)
 769                 goto done;
 770 
 771         ASSERT(rx_grp->arg_gh != NULL);
 772         hw_rh_cnt = mac_hwrings_get(port->lp_mch,
 773             &hwgh, hw_rh, MAC_RING_TYPE_RX);
 774 
 775         /*
 776          * If hw_rh_cnt is 0, it means that the underlying port does not
 777          * support RX rings. Directly return in this case.
 778          */
 779         for (i = 0; i < hw_rh_cnt; i++)
 780                 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
 781 
 782         for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
 783                 aggr_port_remmac(port, addr->aua_addr);
 784 
 785         if (port->lp_hwgh != NULL) {
 786                 port->lp_hwgh = NULL;
 787 
 788                 /*
 789                  * First clear the permanent-quiesced flag of the RX srs then
 790                  * restart the HW ring and the mac srs on the ring. Note that
 791                  * the HW ring and associated SRS will soon been removed when
 792                  * the port is removed from the aggr.
 793                  */
 794                 mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
 795                 mac_rx_client_restart(port->lp_mch);
 796         }
 797 
 798         port->lp_rx_grp_added = B_FALSE;
 799 done:
 800         mac_perim_exit(pmph);
 801 }
 802 
 803 /*
 804  * Add a pseudo TX ring for the given HW ring handle.
 805  */
 806 static int
 807 aggr_add_pseudo_tx_ring(aggr_port_t *port,
 808     aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh,
 809     mac_ring_handle_t *pseudo_rh)
 810 {
 811         aggr_pseudo_tx_ring_t   *ring;
 812         int                     err;
 813         int                     i;
 814 
 815         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 816         for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
 817                 ring = tx_grp->atg_rings + i;
 818                 if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE))
 819                         break;
 820         }
 821         /*
 822          * No slot for this new TX ring.
 823          */
 824         if (i == MAX_RINGS_PER_GROUP)
 825                 return (EIO);
 826         /*
 827          * The following 4 statements needs to be done before
 828          * calling mac_group_add_ring(). Otherwise it will
 829          * result in an assertion failure in mac_init_ring().
 830          */
 831         ring->atr_flags |= MAC_PSEUDO_RING_INUSE;
 832         ring->atr_hw_rh = hw_rh;
 833         ring->atr_port = port;
 834         tx_grp->atg_ring_cnt++;
 835 
 836         /*
 837          * The TX side has no concept of ring groups unlike RX groups.
 838          * There is just a single group which stores all the TX rings.
 839          * This group will be used to store aggr's pseudo TX rings.
 840          */
 841         if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) {
 842                 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
 843                 ring->atr_hw_rh = NULL;
 844                 ring->atr_port = NULL;
 845                 tx_grp->atg_ring_cnt--;
 846         } else {
 847                 *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i);
 848                 if (hw_rh != NULL) {
 849                         mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
 850                             mac_find_ring(tx_grp->atg_gh, i));
 851                 }
 852         }
 853 
 854         return (err);
 855 }
 856 
 857 /*
 858  * Remove the pseudo TX ring of the given HW ring handle.
 859  */
 860 static void
 861 aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp,
 862     mac_ring_handle_t pseudo_hw_rh)
 863 {
 864         aggr_pseudo_tx_ring_t   *ring;
 865         int                     i;
 866 
 867         for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
 868                 ring = tx_grp->atg_rings + i;
 869                 if (ring->atr_rh != pseudo_hw_rh)
 870                         continue;
 871 
 872                 ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE);
 873                 mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh);
 874                 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
 875                 mac_hwring_teardown(ring->atr_hw_rh);
 876                 ring->atr_hw_rh = NULL;
 877                 ring->atr_port = NULL;
 878                 tx_grp->atg_ring_cnt--;
 879                 break;
 880         }
 881 }
 882 
 883 /*
 884  * This function is called to create pseudo rings over hardware rings of
 885  * the underlying device. There is a 1:1 mapping between the pseudo TX
 886  * rings of the aggr and the hardware rings of the underlying port.
 887  */
 888 static int
 889 aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
 890 {
 891         aggr_grp_t              *grp = port->lp_grp;
 892         mac_ring_handle_t       hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh;
 893         mac_perim_handle_t      pmph;
 894         int                     hw_rh_cnt, i = 0, j;
 895         int                     err = 0;
 896 
 897         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 898         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 899 
 900         /*
 901          * Get the list the the underlying HW rings.
 902          */
 903         hw_rh_cnt = mac_hwrings_get(port->lp_mch,
 904             NULL, hw_rh, MAC_RING_TYPE_TX);
 905 
 906         /*
 907          * Even if the underlying NIC does not have TX rings, we
 908          * still make a psuedo TX ring for that NIC with NULL as
 909          * the ring handle.
 910          */
 911         if (hw_rh_cnt == 0)
 912                 port->lp_tx_ring_cnt = 1;
 913         else
 914                 port->lp_tx_ring_cnt = hw_rh_cnt;
 915 
 916         port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
 917             port->lp_tx_ring_cnt), KM_SLEEP);
 918         port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
 919             port->lp_tx_ring_cnt), KM_SLEEP);
 920 
 921         if (hw_rh_cnt == 0) {
 922                 if ((err = aggr_add_pseudo_tx_ring(port, tx_grp,
 923                     NULL, &pseudo_rh)) == 0) {
 924                         port->lp_tx_rings[0] = NULL;
 925                         port->lp_pseudo_tx_rings[0] = pseudo_rh;
 926                 }
 927         } else {
 928                 for (i = 0; err == 0 && i < hw_rh_cnt; i++) {
 929                         err = aggr_add_pseudo_tx_ring(port,
 930                             tx_grp, hw_rh[i], &pseudo_rh);
 931                         if (err != 0)
 932                                 break;
 933                         port->lp_tx_rings[i] = hw_rh[i];
 934                         port->lp_pseudo_tx_rings[i] = pseudo_rh;
 935                 }
 936         }
 937 
 938         if (err != 0) {
 939                 if (hw_rh_cnt != 0) {
 940                         for (j = 0; j < i; j++) {
 941                                 aggr_rem_pseudo_tx_ring(tx_grp,
 942                                     port->lp_pseudo_tx_rings[j]);
 943                         }
 944                 }
 945                 kmem_free(port->lp_tx_rings,
 946                     (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 947                 kmem_free(port->lp_pseudo_tx_rings,
 948                     (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 949                 port->lp_tx_ring_cnt = 0;
 950         } else {
 951                 port->lp_tx_grp_added = B_TRUE;
 952                 port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch,
 953                     aggr_tx_ring_update, port);
 954         }
 955         mac_perim_exit(pmph);
 956         aggr_grp_update_default(grp);
 957         return (err);
 958 }
 959 
 960 /*
 961  * This function is called by aggr to remove pseudo TX rings over the
 962  * HW rings of the underlying port.
 963  */
 964 static void
 965 aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
 966 {
 967         aggr_grp_t              *grp = port->lp_grp;
 968         mac_perim_handle_t      pmph;
 969         int                     i;
 970 
 971         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 972         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 973 
 974         if (!port->lp_tx_grp_added)
 975                 goto done;
 976 
 977         ASSERT(tx_grp->atg_gh != NULL);
 978 
 979         for (i = 0; i < port->lp_tx_ring_cnt; i++)
 980                 aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]);
 981 
 982         kmem_free(port->lp_tx_rings,
 983             (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 984         kmem_free(port->lp_pseudo_tx_rings,
 985             (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 986 
 987         port->lp_tx_ring_cnt = 0;
 988         (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh);
 989         port->lp_tx_grp_added = B_FALSE;
 990         aggr_grp_update_default(grp);
 991 done:
 992         mac_perim_exit(pmph);
 993 }
 994 
 995 static int
 996 aggr_pseudo_disable_intr(mac_intr_handle_t ih)
 997 {
 998         aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
 999         return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
1000 }
1001 
1002 static int
1003 aggr_pseudo_enable_intr(mac_intr_handle_t ih)
1004 {
1005         aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
1006         return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
1007 }
1008 
1009 static int
1010 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)
1011 {
1012         aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
1013         int err;
1014 
1015         err = mac_hwring_start(rr_ring->arr_hw_rh);
1016         if (err == 0)
1017                 rr_ring->arr_gen = mr_gen;
1018         return (err);
1019 }
1020 
1021 static void
1022 aggr_pseudo_stop_ring(mac_ring_driver_t arg)
1023 {
1024         aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
1025         mac_hwring_stop(rr_ring->arr_hw_rh);
1026 }
1027 
1028 /*
1029  * Add one or more ports to an existing link aggregation group.
1030  */
1031 int
1032 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
1033     laioc_port_t *ports)
1034 {
1035         int rc, i, nadded = 0;
1036         aggr_grp_t *grp = NULL;
1037         aggr_port_t *port;
1038         boolean_t link_state_changed = B_FALSE;
1039         mac_perim_handle_t mph, pmph;
1040 
1041         /* get group corresponding to linkid */
1042         rw_enter(&aggr_grp_lock, RW_READER);
1043         if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1044             (mod_hash_val_t *)&grp) != 0) {
1045                 rw_exit(&aggr_grp_lock);
1046                 return (ENOENT);
1047         }
1048         AGGR_GRP_REFHOLD(grp);
1049 
1050         /*
1051          * Hold the perimeter so that the aggregation won't be destroyed.
1052          */
1053         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1054         rw_exit(&aggr_grp_lock);
1055 
1056         /* add the specified ports to group */
1057         for (i = 0; i < nports; i++) {
1058                 /* add port to group */
1059                 if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
1060                     force, &port)) != 0) {
1061                         goto bail;
1062                 }
1063                 ASSERT(port != NULL);
1064                 nadded++;
1065 
1066                 /* check capabilities */
1067                 if (!aggr_grp_capab_check(grp, port) ||
1068                     !aggr_grp_sdu_check(grp, port) ||
1069                     !aggr_grp_margin_check(grp, port)) {
1070                         rc = ENOTSUP;
1071                         goto bail;
1072                 }
1073 
1074                 /*
1075                  * Create the pseudo ring for each HW ring of the underlying
1076                  * port.
1077                  */
1078                 rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group);
1079                 if (rc != 0)
1080                         goto bail;
1081                 rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group);
1082                 if (rc != 0)
1083                         goto bail;
1084 
1085                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1086 
1087                 /* set LACP mode */
1088                 aggr_port_lacp_set_mode(grp, port);
1089 
1090                 /* start port if group has already been started */
1091                 if (grp->lg_started) {
1092                         rc = aggr_port_start(port);
1093                         if (rc != 0) {
1094                                 mac_perim_exit(pmph);
1095                                 goto bail;
1096                         }
1097 
1098                         /*
1099                          * Turn on the promiscuous mode over the port when it
1100                          * is requested to be turned on to receive the
1101                          * non-primary address over a port, or the promiscous
1102                          * mode is enabled over the aggr.
1103                          */
1104                         if (grp->lg_promisc || port->lp_prom_addr != NULL) {
1105                                 rc = aggr_port_promisc(port, B_TRUE);
1106                                 if (rc != 0) {
1107                                         mac_perim_exit(pmph);
1108                                         goto bail;
1109                                 }
1110                         }
1111                 }
1112                 mac_perim_exit(pmph);
1113 
1114                 /*
1115                  * Attach each port if necessary.
1116                  */
1117                 if (aggr_port_notify_link(grp, port))
1118                         link_state_changed = B_TRUE;
1119 
1120                 /*
1121                  * Initialize the callback functions for this port.
1122                  */
1123                 aggr_port_init_callbacks(port);
1124         }
1125 
1126         /* update the MAC address of the constituent ports */
1127         if (aggr_grp_update_ports_mac(grp))
1128                 link_state_changed = B_TRUE;
1129 
1130         if (link_state_changed)
1131                 mac_link_update(grp->lg_mh, grp->lg_link_state);
1132 
1133 bail:
1134         if (rc != 0) {
1135                 /* stop and remove ports that have been added */
1136                 for (i = 0; i < nadded; i++) {
1137                         port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1138                         ASSERT(port != NULL);
1139                         if (grp->lg_started) {
1140                                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1141                                 (void) aggr_port_promisc(port, B_FALSE);
1142                                 aggr_port_stop(port);
1143                                 mac_perim_exit(pmph);
1144                         }
1145                         aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1146                         aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1147                         (void) aggr_grp_rem_port(grp, port, NULL, NULL);
1148                 }
1149         }
1150 
1151         mac_perim_exit(mph);
1152         AGGR_GRP_REFRELE(grp);
1153         return (rc);
1154 }
1155 
1156 static int
1157 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy,
1158     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1159     aggr_lacp_timer_t lacp_timer)
1160 {
1161         boolean_t mac_addr_changed = B_FALSE;
1162         boolean_t link_state_changed = B_FALSE;
1163         mac_perim_handle_t pmph;
1164 
1165         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1166 
1167         /* validate fixed address if specified */
1168         if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
1169             ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
1170             (mac_addr[0] & 0x01))) {
1171                 return (EINVAL);
1172         }
1173 
1174         /* update policy if requested */
1175         if (update_mask & AGGR_MODIFY_POLICY)
1176                 aggr_send_update_policy(grp, policy);
1177 
1178         /* update unicast MAC address if requested */
1179         if (update_mask & AGGR_MODIFY_MAC) {
1180                 if (mac_fixed) {
1181                         /* user-supplied MAC address */
1182                         grp->lg_mac_addr_port = NULL;
1183                         if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
1184                                 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1185                                 mac_addr_changed = B_TRUE;
1186                         }
1187                 } else if (grp->lg_addr_fixed) {
1188                         /* switch from user-supplied to automatic */
1189                         aggr_port_t *port = grp->lg_ports;
1190 
1191                         mac_perim_enter_by_mh(port->lp_mh, &pmph);
1192                         bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
1193                         grp->lg_mac_addr_port = port;
1194                         mac_addr_changed = B_TRUE;
1195                         mac_perim_exit(pmph);
1196                 }
1197                 grp->lg_addr_fixed = mac_fixed;
1198         }
1199 
1200         if (mac_addr_changed)
1201                 link_state_changed = aggr_grp_update_ports_mac(grp);
1202 
1203         if (update_mask & AGGR_MODIFY_LACP_MODE)
1204                 aggr_lacp_update_mode(grp, lacp_mode);
1205 
1206         if (update_mask & AGGR_MODIFY_LACP_TIMER)
1207                 aggr_lacp_update_timer(grp, lacp_timer);
1208 
1209         if (link_state_changed)
1210                 mac_link_update(grp->lg_mh, grp->lg_link_state);
1211 
1212         if (mac_addr_changed)
1213                 mac_unicst_update(grp->lg_mh, grp->lg_addr);
1214 
1215         return (0);
1216 }
1217 
1218 /*
1219  * Update properties of an existing link aggregation group.
1220  */
1221 int
1222 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy,
1223     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1224     aggr_lacp_timer_t lacp_timer)
1225 {
1226         aggr_grp_t *grp = NULL;
1227         mac_perim_handle_t mph;
1228         int err;
1229 
1230         /* get group corresponding to linkid */
1231         rw_enter(&aggr_grp_lock, RW_READER);
1232         if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1233             (mod_hash_val_t *)&grp) != 0) {
1234                 rw_exit(&aggr_grp_lock);
1235                 return (ENOENT);
1236         }
1237         AGGR_GRP_REFHOLD(grp);
1238 
1239         /*
1240          * Hold the perimeter so that the aggregation won't be destroyed.
1241          */
1242         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1243         rw_exit(&aggr_grp_lock);
1244 
1245         err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed,
1246             mac_addr, lacp_mode, lacp_timer);
1247 
1248         mac_perim_exit(mph);
1249         AGGR_GRP_REFRELE(grp);
1250         return (err);
1251 }
1252 
1253 /*
1254  * Create a new link aggregation group upon request from administrator.
1255  * Returns 0 on success, an errno on failure.
1256  */
1257 int
1258 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
1259     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
1260     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer,
1261     cred_t *credp)
1262 {
1263         aggr_grp_t *grp = NULL;
1264         aggr_port_t *port;
1265         mac_register_t *mac;
1266         boolean_t link_state_changed;
1267         mac_perim_handle_t mph;
1268         int err;
1269         int i;
1270         kt_did_t tid = 0;
1271 
1272         /* need at least one port */
1273         if (nports == 0)
1274                 return (EINVAL);
1275 
1276         rw_enter(&aggr_grp_lock, RW_WRITER);
1277 
1278         /* does a group with the same linkid already exist? */
1279         err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1280             (mod_hash_val_t *)&grp);
1281         if (err == 0) {
1282                 rw_exit(&aggr_grp_lock);
1283                 return (EEXIST);
1284         }
1285 
1286         grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
1287 
1288         grp->lg_refs = 1;
1289         grp->lg_closing = B_FALSE;
1290         grp->lg_force = force;
1291         grp->lg_linkid = linkid;
1292         grp->lg_zoneid = crgetzoneid(credp);
1293         grp->lg_ifspeed = 0;
1294         grp->lg_link_state = LINK_STATE_UNKNOWN;
1295         grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1296         grp->lg_started = B_FALSE;
1297         grp->lg_promisc = B_FALSE;
1298         grp->lg_lacp_done = B_FALSE;
1299         grp->lg_tx_notify_done = B_FALSE;
1300         grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1301         grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1302             aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1303         grp->lg_tx_notify_thread = thread_create(NULL, 0,
1304             aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1305         grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
1306             MAX_RINGS_PER_GROUP), KM_SLEEP);
1307         grp->lg_tx_blocked_cnt = 0;
1308         bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1309         bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
1310         aggr_lacp_init_grp(grp);
1311 
1312         /* add MAC ports to group */
1313         grp->lg_ports = NULL;
1314         grp->lg_nports = 0;
1315         grp->lg_nattached_ports = 0;
1316         grp->lg_ntx_ports = 0;
1317 
1318         /*
1319          * If key is not specified by the user, allocate the key.
1320          */
1321         if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1322                 err = ENOMEM;
1323                 goto bail;
1324         }
1325         grp->lg_key = key;
1326 
1327         for (i = 0; i < nports; i++) {
1328                 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
1329                 if (err != 0)
1330                         goto bail;
1331         }
1332 
1333         /*
1334          * If no explicit MAC address was specified by the administrator,
1335          * set it to the MAC address of the first port.
1336          */
1337         grp->lg_addr_fixed = mac_fixed;
1338         if (grp->lg_addr_fixed) {
1339                 /* validate specified address */
1340                 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1341                         err = EINVAL;
1342                         goto bail;
1343                 }
1344                 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1345         } else {
1346                 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1347                 grp->lg_mac_addr_port = grp->lg_ports;
1348         }
1349 
1350         /* set the initial group capabilities */
1351         aggr_grp_capab_set(grp);
1352 
1353         if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
1354                 err = ENOMEM;
1355                 goto bail;
1356         }
1357         mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1358         mac->m_driver = grp;
1359         mac->m_dip = aggr_dip;
1360         mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
1361         mac->m_src_addr = grp->lg_addr;
1362         mac->m_callbacks = &aggr_m_callbacks;
1363         mac->m_min_sdu = 0;
1364         mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
1365         mac->m_margin = aggr_grp_max_margin(grp);
1366         mac->m_v12n = MAC_VIRT_LEVEL1;
1367         err = mac_register(mac, &grp->lg_mh);
1368         mac_free(mac);
1369         if (err != 0)
1370                 goto bail;
1371 
1372         err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp));
1373         if (err != 0) {
1374                 (void) mac_unregister(grp->lg_mh);
1375                 grp->lg_mh = NULL;
1376                 goto bail;
1377         }
1378 
1379         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1380 
1381         /*
1382          * Update the MAC address of the constituent ports.
1383          * None of the port is attached at this time, the link state of the
1384          * aggregation will not change.
1385          */
1386         link_state_changed = aggr_grp_update_ports_mac(grp);
1387         ASSERT(!link_state_changed);
1388 
1389         /* update outbound load balancing policy */
1390         aggr_send_update_policy(grp, policy);
1391 
1392         /* set LACP mode */
1393         aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
1394 
1395         /*
1396          * Attach each port if necessary.
1397          */
1398         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1399                 /*
1400                  * Create the pseudo ring for each HW ring of the underlying
1401                  * port. Note that this is done after the aggr registers the
1402                  * mac.
1403                  */
1404                 VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0);
1405                 VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0);
1406                 if (aggr_port_notify_link(grp, port))
1407                         link_state_changed = B_TRUE;
1408 
1409                 /*
1410                  * Initialize the callback functions for this port.
1411                  */
1412                 aggr_port_init_callbacks(port);
1413         }
1414 
1415         if (link_state_changed)
1416                 mac_link_update(grp->lg_mh, grp->lg_link_state);
1417 
1418         /* add new group to hash table */
1419         err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
1420             (mod_hash_val_t)grp);
1421         ASSERT(err == 0);
1422         aggr_grp_cnt++;
1423 
1424         mac_perim_exit(mph);
1425         rw_exit(&aggr_grp_lock);
1426         return (0);
1427 
1428 bail:
1429 
1430         grp->lg_closing = B_TRUE;
1431 
1432         port = grp->lg_ports;
1433         while (port != NULL) {
1434                 aggr_port_t *cport;
1435 
1436                 cport = port->lp_next;
1437                 aggr_port_delete(port);
1438                 port = cport;
1439         }
1440 
1441         /*
1442          * Inform the lacp_rx thread to exit.
1443          */
1444         mutex_enter(&grp->lg_lacp_lock);
1445         grp->lg_lacp_done = B_TRUE;
1446         cv_signal(&grp->lg_lacp_cv);
1447         while (grp->lg_lacp_rx_thread != NULL)
1448                 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1449         mutex_exit(&grp->lg_lacp_lock);
1450         /*
1451          * Inform the tx_notify thread to exit.
1452          */
1453         mutex_enter(&grp->lg_tx_flowctl_lock);
1454         if (grp->lg_tx_notify_thread != NULL) {
1455                 tid = grp->lg_tx_notify_thread->t_did;
1456                 grp->lg_tx_notify_done = B_TRUE;
1457                 cv_signal(&grp->lg_tx_flowctl_cv);
1458         }
1459         mutex_exit(&grp->lg_tx_flowctl_lock);
1460         if (tid != 0)
1461                 thread_join(tid);
1462 
1463         kmem_free(grp->lg_tx_blocked_rings,
1464             (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1465         rw_exit(&aggr_grp_lock);
1466         AGGR_GRP_REFRELE(grp);
1467         return (err);
1468 }
1469 
1470 /*
1471  * Return a pointer to the member of a group with specified linkid.
1472  */
1473 static aggr_port_t *
1474 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
1475 {
1476         aggr_port_t *port;
1477 
1478         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1479 
1480         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1481                 if (port->lp_linkid == linkid)
1482                         break;
1483         }
1484 
1485         return (port);
1486 }
1487 
1488 /*
1489  * Stop, detach and remove a port from a link aggregation group.
1490  */
1491 static int
1492 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
1493     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
1494 {
1495         int rc = 0;
1496         aggr_port_t **pport;
1497         boolean_t mac_addr_changed = B_FALSE;
1498         boolean_t link_state_changed = B_FALSE;
1499         mac_perim_handle_t mph;
1500         uint64_t val;
1501         uint_t i;
1502         uint_t stat;
1503 
1504         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1505         ASSERT(grp->lg_nports > 1);
1506         ASSERT(!grp->lg_closing);
1507 
1508         /* unlink port */
1509         for (pport = &grp->lg_ports; *pport != port;
1510             pport = &(*pport)->lp_next) {
1511                 if (*pport == NULL) {
1512                         rc = ENOENT;
1513                         goto done;
1514                 }
1515         }
1516         *pport = port->lp_next;
1517 
1518         mac_perim_enter_by_mh(port->lp_mh, &mph);
1519 
1520         /*
1521          * If the MAC address of the port being removed was assigned
1522          * to the group, update the group MAC address
1523          * using the MAC address of a different port.
1524          */
1525         if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
1526                 /*
1527                  * Set the MAC address of the group to the
1528                  * MAC address of its first port.
1529                  */
1530                 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1531                 grp->lg_mac_addr_port = grp->lg_ports;
1532                 mac_addr_changed = B_TRUE;
1533         }
1534 
1535         link_state_changed = aggr_grp_detach_port(grp, port);
1536 
1537         /*
1538          * Add the counter statistics of the ports while it was aggregated
1539          * to the group's residual statistics.  This is done by obtaining
1540          * the current counter from the underlying MAC then subtracting the
1541          * value of the counter at the moment it was added to the
1542          * aggregation.
1543          */
1544         for (i = 0; i < MAC_NSTAT; i++) {
1545                 stat = i + MAC_STAT_MIN;
1546                 if (!MAC_STAT_ISACOUNTER(stat))
1547                         continue;
1548                 val = aggr_port_stat(port, stat);
1549                 val -= port->lp_stat[i];
1550                 grp->lg_stat[i] += val;
1551         }
1552         for (i = 0; i < ETHER_NSTAT; i++) {
1553                 stat = i + MACTYPE_STAT_MIN;
1554                 if (!ETHER_STAT_ISACOUNTER(stat))
1555                         continue;
1556                 val = aggr_port_stat(port, stat);
1557                 val -= port->lp_ether_stat[i];
1558                 grp->lg_ether_stat[i] += val;
1559         }
1560 
1561         grp->lg_nports--;
1562         mac_perim_exit(mph);
1563 
1564         aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1565         aggr_port_delete(port);
1566 
1567         /*
1568          * If the group MAC address has changed, update the MAC address of
1569          * the remaining constituent ports according to the new MAC
1570          * address of the group.
1571          */
1572         if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1573                 link_state_changed = B_TRUE;
1574 
1575 done:
1576         if (mac_addr_changedp != NULL)
1577                 *mac_addr_changedp = mac_addr_changed;
1578         if (link_state_changedp != NULL)
1579                 *link_state_changedp = link_state_changed;
1580 
1581         return (rc);
1582 }
1583 
1584 /*
1585  * Remove one or more ports from an existing link aggregation group.
1586  */
1587 int
1588 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
1589 {
1590         int rc = 0, i;
1591         aggr_grp_t *grp = NULL;
1592         aggr_port_t *port;
1593         boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
1594         boolean_t link_state_update = B_FALSE, link_state_changed;
1595         mac_perim_handle_t mph, pmph;
1596 
1597         /* get group corresponding to linkid */
1598         rw_enter(&aggr_grp_lock, RW_READER);
1599         if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1600             (mod_hash_val_t *)&grp) != 0) {
1601                 rw_exit(&aggr_grp_lock);
1602                 return (ENOENT);
1603         }
1604         AGGR_GRP_REFHOLD(grp);
1605 
1606         /*
1607          * Hold the perimeter so that the aggregation won't be destroyed.
1608          */
1609         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1610         rw_exit(&aggr_grp_lock);
1611 
1612         /* we need to keep at least one port per group */
1613         if (nports >= grp->lg_nports) {
1614                 rc = EINVAL;
1615                 goto bail;
1616         }
1617 
1618         /* first verify that all the groups are valid */
1619         for (i = 0; i < nports; i++) {
1620                 if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
1621                         /* port not found */
1622                         rc = ENOENT;
1623                         goto bail;
1624                 }
1625         }
1626 
1627         /* clear the promiscous mode for the specified ports */
1628         for (i = 0; i < nports && rc == 0; i++) {
1629                 /* lookup port */
1630                 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1631                 ASSERT(port != NULL);
1632 
1633                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1634                 rc = aggr_port_promisc(port, B_FALSE);
1635                 mac_perim_exit(pmph);
1636         }
1637         if (rc != 0) {
1638                 for (i = 0; i < nports; i++) {
1639                         port = aggr_grp_port_lookup(grp,
1640                             ports[i].lp_linkid);
1641                         ASSERT(port != NULL);
1642 
1643                         /*
1644                          * Turn the promiscuous mode back on if it is required
1645                          * to receive the non-primary address over a port, or
1646                          * the promiscous mode is enabled over the aggr.
1647                          */
1648                         mac_perim_enter_by_mh(port->lp_mh, &pmph);
1649                         if (port->lp_started && (grp->lg_promisc ||
1650                             port->lp_prom_addr != NULL)) {
1651                                 (void) aggr_port_promisc(port, B_TRUE);
1652                         }
1653                         mac_perim_exit(pmph);
1654                 }
1655                 goto bail;
1656         }
1657 
1658         /* remove the specified ports from group */
1659         for (i = 0; i < nports; i++) {
1660                 /* lookup port */
1661                 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1662                 ASSERT(port != NULL);
1663 
1664                 /* stop port if group has already been started */
1665                 if (grp->lg_started) {
1666                         mac_perim_enter_by_mh(port->lp_mh, &pmph);
1667                         aggr_port_stop(port);
1668                         mac_perim_exit(pmph);
1669                 }
1670 
1671                 /*
1672                  * aggr_rem_pseudo_tx_group() is not called here. Instead
1673                  * it is called from inside aggr_grp_rem_port() after the
1674                  * port has been detached. The reason is that
1675                  * aggr_rem_pseudo_tx_group() removes one ring at a time
1676                  * and if there is still traffic going on, then there
1677                  * is the possibility of aggr_find_tx_ring() returning a
1678                  * removed ring for transmission. Once the port has been
1679                  * detached, that port will not be used and
1680                  * aggr_find_tx_ring() will not return any rings
1681                  * belonging to it.
1682                  */
1683                 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1684 
1685                 /* remove port from group */
1686                 rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
1687                     &link_state_changed);
1688                 ASSERT(rc == 0);
1689                 mac_addr_update = mac_addr_update || mac_addr_changed;
1690                 link_state_update = link_state_update || link_state_changed;
1691         }
1692 
1693 bail:
1694         if (mac_addr_update)
1695                 mac_unicst_update(grp->lg_mh, grp->lg_addr);
1696         if (link_state_update)
1697                 mac_link_update(grp->lg_mh, grp->lg_link_state);
1698 
1699         mac_perim_exit(mph);
1700         AGGR_GRP_REFRELE(grp);
1701 
1702         return (rc);
1703 }
1704 
1705 int
1706 aggr_grp_delete(datalink_id_t linkid, cred_t *cred)
1707 {
1708         aggr_grp_t *grp = NULL;
1709         aggr_port_t *port, *cport;
1710         datalink_id_t tmpid;
1711         mod_hash_val_t val;
1712         mac_perim_handle_t mph, pmph;
1713         int err;
1714         kt_did_t tid = 0;
1715 
1716         rw_enter(&aggr_grp_lock, RW_WRITER);
1717 
1718         if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1719             (mod_hash_val_t *)&grp) != 0) {
1720                 rw_exit(&aggr_grp_lock);
1721                 return (ENOENT);
1722         }
1723 
1724         /*
1725          * Note that dls_devnet_destroy() must be called before lg_lock is
1726          * held. Otherwise, it will deadlock if another thread is in
1727          * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1728          * dls_devnet_destroy() needs to delete.
1729          */
1730         if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) {
1731                 rw_exit(&aggr_grp_lock);
1732                 return (err);
1733         }
1734         ASSERT(linkid == tmpid);
1735 
1736         /*
1737          * Unregister from the MAC service module. Since this can
1738          * fail if a client hasn't closed the MAC port, we gracefully
1739          * fail the operation.
1740          */
1741         if ((err = mac_disable(grp->lg_mh)) != 0) {
1742                 (void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred));
1743                 rw_exit(&aggr_grp_lock);
1744                 return (err);
1745         }
1746         (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1747         ASSERT(grp == (aggr_grp_t *)val);
1748 
1749         ASSERT(aggr_grp_cnt > 0);
1750         aggr_grp_cnt--;
1751         rw_exit(&aggr_grp_lock);
1752 
1753         /*
1754          * Inform the lacp_rx thread to exit.
1755          */
1756         mutex_enter(&grp->lg_lacp_lock);
1757         grp->lg_lacp_done = B_TRUE;
1758         cv_signal(&grp->lg_lacp_cv);
1759         while (grp->lg_lacp_rx_thread != NULL)
1760                 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1761         mutex_exit(&grp->lg_lacp_lock);
1762         /*
1763          * Inform the tx_notify_thread to exit.
1764          */
1765         mutex_enter(&grp->lg_tx_flowctl_lock);
1766         if (grp->lg_tx_notify_thread != NULL) {
1767                 tid = grp->lg_tx_notify_thread->t_did;
1768                 grp->lg_tx_notify_done = B_TRUE;
1769                 cv_signal(&grp->lg_tx_flowctl_cv);
1770         }
1771         mutex_exit(&grp->lg_tx_flowctl_lock);
1772         if (tid != 0)
1773                 thread_join(tid);
1774 
1775         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1776 
1777         grp->lg_closing = B_TRUE;
1778         /* detach and free MAC ports associated with group */
1779         port = grp->lg_ports;
1780         while (port != NULL) {
1781                 cport = port->lp_next;
1782                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1783                 if (grp->lg_started)
1784                         aggr_port_stop(port);
1785                 (void) aggr_grp_detach_port(grp, port);
1786                 mac_perim_exit(pmph);
1787                 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1788                 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1789                 aggr_port_delete(port);
1790                 port = cport;
1791         }
1792 
1793         mac_perim_exit(mph);
1794 
1795         kmem_free(grp->lg_tx_blocked_rings,
1796             (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1797         /*
1798          * Wait for the port's lacp timer thread and its notification callback
1799          * to exit before calling mac_unregister() since both needs to access
1800          * the mac perimeter of the grp.
1801          */
1802         aggr_grp_port_wait(grp);
1803 
1804         VERIFY(mac_unregister(grp->lg_mh) == 0);
1805         grp->lg_mh = NULL;
1806 
1807         AGGR_GRP_REFRELE(grp);
1808         return (0);
1809 }
1810 
1811 void
1812 aggr_grp_free(aggr_grp_t *grp)
1813 {
1814         ASSERT(grp->lg_refs == 0);
1815         ASSERT(grp->lg_port_ref == 0);
1816         if (grp->lg_key > AGGR_MAX_KEY) {
1817                 id_free(key_ids, grp->lg_key);
1818                 grp->lg_key = 0;
1819         }
1820         kmem_cache_free(aggr_grp_cache, grp);
1821 }
1822 
1823 int
1824 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1825     aggr_grp_info_new_grp_fn_t new_grp_fn,
1826     aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)
1827 {
1828         aggr_grp_t      *grp;
1829         aggr_port_t     *port;
1830         mac_perim_handle_t mph, pmph;
1831         int             rc = 0;
1832 
1833         /*
1834          * Make sure that the aggregation link is visible from the caller's
1835          * zone.
1836          */
1837         if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred)))
1838                 return (ENOENT);
1839 
1840         rw_enter(&aggr_grp_lock, RW_READER);
1841 
1842         if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1843             (mod_hash_val_t *)&grp) != 0) {
1844                 rw_exit(&aggr_grp_lock);
1845                 return (ENOENT);
1846         }
1847         AGGR_GRP_REFHOLD(grp);
1848 
1849         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1850         rw_exit(&aggr_grp_lock);
1851 
1852         rc = new_grp_fn(fn_arg, grp->lg_linkid,
1853             (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1854             grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
1855             grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1856 
1857         if (rc != 0)
1858                 goto bail;
1859 
1860         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1861                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1862                 rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1863                     port->lp_state, &port->lp_lacp.ActorOperPortState);
1864                 mac_perim_exit(pmph);
1865 
1866                 if (rc != 0)
1867                         goto bail;
1868         }
1869 
1870 bail:
1871         mac_perim_exit(mph);
1872         AGGR_GRP_REFRELE(grp);
1873         return (rc);
1874 }
1875 
1876 /*ARGSUSED*/
1877 static void
1878 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1879 {
1880         miocnak(q, mp, 0, ENOTSUP);
1881 }
1882 
1883 static int
1884 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1885 {
1886         aggr_port_t     *port;
1887         uint_t          stat_index;
1888 
1889         /* We only aggregate counter statistics. */
1890         if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1891             IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1892                 return (ENOTSUP);
1893         }
1894 
1895         /*
1896          * Counter statistics for a group are computed by aggregating the
1897          * counters of the members MACs while they were aggregated, plus
1898          * the residual counter of the group itself, which is updated each
1899          * time a MAC is removed from the group.
1900          */
1901         *val = 0;
1902         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1903                 /* actual port statistic */
1904                 *val += aggr_port_stat(port, stat);
1905                 /*
1906                  * minus the port stat when it was added, plus any residual
1907                  * amount for the group.
1908                  */
1909                 if (IS_MAC_STAT(stat)) {
1910                         stat_index = stat - MAC_STAT_MIN;
1911                         *val -= port->lp_stat[stat_index];
1912                         *val += grp->lg_stat[stat_index];
1913                 } else if (IS_MACTYPE_STAT(stat)) {
1914                         stat_index = stat - MACTYPE_STAT_MIN;
1915                         *val -= port->lp_ether_stat[stat_index];
1916                         *val += grp->lg_ether_stat[stat_index];
1917                 }
1918         }
1919         return (0);
1920 }
1921 
1922 int
1923 aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1924 {
1925         aggr_pseudo_rx_ring_t   *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver;
1926 
1927         if (rx_ring->arr_hw_rh != NULL) {
1928                 *val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat);
1929         } else {
1930                 aggr_port_t     *port = rx_ring->arr_port;
1931 
1932                 *val = mac_stat_get(port->lp_mh, stat);
1933 
1934         }
1935         return (0);
1936 }
1937 
1938 int
1939 aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1940 {
1941         aggr_pseudo_tx_ring_t   *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver;
1942 
1943         if (tx_ring->atr_hw_rh != NULL) {
1944                 *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat);
1945         } else {
1946                 aggr_port_t     *port = tx_ring->atr_port;
1947 
1948                 *val = mac_stat_get(port->lp_mh, stat);
1949         }
1950         return (0);
1951 }
1952 
1953 static int
1954 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1955 {
1956         aggr_grp_t              *grp = arg;
1957         mac_perim_handle_t      mph;
1958         int                     rval = 0;
1959 
1960         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1961 
1962         switch (stat) {
1963         case MAC_STAT_IFSPEED:
1964                 *val = grp->lg_ifspeed;
1965                 break;
1966 
1967         case ETHER_STAT_LINK_DUPLEX:
1968                 *val = grp->lg_link_duplex;
1969                 break;
1970 
1971         default:
1972                 /*
1973                  * For all other statistics, we return the aggregated stat
1974                  * from the underlying ports.  aggr_grp_stat() will set
1975                  * rval appropriately if the statistic isn't a counter.
1976                  */
1977                 rval = aggr_grp_stat(grp, stat, val);
1978         }
1979 
1980         mac_perim_exit(mph);
1981         return (rval);
1982 }
1983 
1984 static int
1985 aggr_m_start(void *arg)
1986 {
1987         aggr_grp_t *grp = arg;
1988         aggr_port_t *port;
1989         mac_perim_handle_t mph, pmph;
1990 
1991         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1992 
1993         /*
1994          * Attempts to start all configured members of the group.
1995          * Group members will be attached when their link-up notification
1996          * is received.
1997          */
1998         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1999                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2000                 if (aggr_port_start(port) != 0) {
2001                         mac_perim_exit(pmph);
2002                         continue;
2003                 }
2004 
2005                 /*
2006                  * Turn on the promiscuous mode if it is required to receive
2007                  * the non-primary address over a port, or the promiscous
2008                  * mode is enabled over the aggr.
2009                  */
2010                 if (grp->lg_promisc || port->lp_prom_addr != NULL) {
2011                         if (aggr_port_promisc(port, B_TRUE) != 0)
2012                                 aggr_port_stop(port);
2013                 }
2014                 mac_perim_exit(pmph);
2015         }
2016 
2017         grp->lg_started = B_TRUE;
2018 
2019         mac_perim_exit(mph);
2020         return (0);
2021 }
2022 
2023 static void
2024 aggr_m_stop(void *arg)
2025 {
2026         aggr_grp_t *grp = arg;
2027         aggr_port_t *port;
2028         mac_perim_handle_t mph, pmph;
2029 
2030         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2031 
2032         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2033                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2034 
2035                 /* reset port promiscuous mode */
2036                 (void) aggr_port_promisc(port, B_FALSE);
2037 
2038                 aggr_port_stop(port);
2039                 mac_perim_exit(pmph);
2040         }
2041 
2042         grp->lg_started = B_FALSE;
2043         mac_perim_exit(mph);
2044 }
2045 
2046 static int
2047 aggr_m_promisc(void *arg, boolean_t on)
2048 {
2049         aggr_grp_t *grp = arg;
2050         aggr_port_t *port;
2051         boolean_t link_state_changed = B_FALSE;
2052         mac_perim_handle_t mph, pmph;
2053 
2054         AGGR_GRP_REFHOLD(grp);
2055         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2056 
2057         ASSERT(!grp->lg_closing);
2058 
2059         if (on == grp->lg_promisc)
2060                 goto bail;
2061 
2062         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2063                 int     err = 0;
2064 
2065                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2066                 AGGR_PORT_REFHOLD(port);
2067                 if (!on && (port->lp_prom_addr == NULL))
2068                         err = aggr_port_promisc(port, B_FALSE);
2069                 else if (on && port->lp_started)
2070                         err = aggr_port_promisc(port, B_TRUE);
2071 
2072                 if (err != 0) {
2073                         if (aggr_grp_detach_port(grp, port))
2074                                 link_state_changed = B_TRUE;
2075                 } else {
2076                         /*
2077                          * If a port was detached because of a previous
2078                          * failure changing the promiscuity, the port
2079                          * is reattached when it successfully changes
2080                          * the promiscuity now, and this might cause
2081                          * the link state of the aggregation to change.
2082                          */
2083                         if (aggr_grp_attach_port(grp, port))
2084                                 link_state_changed = B_TRUE;
2085                 }
2086                 mac_perim_exit(pmph);
2087                 AGGR_PORT_REFRELE(port);
2088         }
2089 
2090         grp->lg_promisc = on;
2091 
2092         if (link_state_changed)
2093                 mac_link_update(grp->lg_mh, grp->lg_link_state);
2094 
2095 bail:
2096         mac_perim_exit(mph);
2097         AGGR_GRP_REFRELE(grp);
2098 
2099         return (0);
2100 }
2101 
2102 static void
2103 aggr_grp_port_rename(const char *new_name, void *arg)
2104 {
2105         /*
2106          * aggr port's mac client name is the format of "aggr link name" plus
2107          * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
2108          */
2109         int aggr_len, link_len, clnt_name_len, i;
2110         char *str_end, *str_st, *str_del;
2111         char aggr_name[MAXNAMELEN];
2112         char link_name[MAXNAMELEN];
2113         char *clnt_name;
2114         aggr_grp_t *aggr_grp = arg;
2115         aggr_port_t *aggr_port = aggr_grp->lg_ports;
2116 
2117         for (i = 0; i < aggr_grp->lg_nports; i++) {
2118                 clnt_name = mac_client_name(aggr_port->lp_mch);
2119                 clnt_name_len = strlen(clnt_name);
2120                 str_st = clnt_name;
2121                 str_end = &(clnt_name[clnt_name_len]);
2122                 str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT);
2123                 ASSERT(str_del != NULL);
2124                 aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st);
2125                 link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del);
2126                 bzero(aggr_name, MAXNAMELEN);
2127                 bzero(link_name, MAXNAMELEN);
2128                 bcopy(clnt_name, aggr_name, aggr_len);
2129                 bcopy(str_del, link_name, link_len + 1);
2130                 bzero(clnt_name, MAXNAMELEN);
2131                 (void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name,
2132                     link_name);
2133 
2134                 (void) mac_rename_primary(aggr_port->lp_mh, NULL);
2135                 aggr_port = aggr_port->lp_next;
2136         }
2137 }
2138 
2139 /*
2140  * Initialize the capabilities that are advertised for the group
2141  * according to the capabilities of the constituent ports.
2142  */
2143 static boolean_t
2144 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
2145 {
2146         aggr_grp_t *grp = arg;
2147 
2148         switch (cap) {
2149         case MAC_CAPAB_HCKSUM: {
2150                 uint32_t *hcksum_txflags = cap_data;
2151                 *hcksum_txflags = grp->lg_hcksum_txflags;
2152                 break;
2153         }
2154         case MAC_CAPAB_LSO: {
2155                 mac_capab_lso_t *cap_lso = cap_data;
2156 
2157                 if (grp->lg_lso) {
2158                         *cap_lso = grp->lg_cap_lso;
2159                         break;
2160                 } else {
2161                         return (B_FALSE);
2162                 }
2163         }
2164         case MAC_CAPAB_NO_NATIVEVLAN:
2165                 return (!grp->lg_vlan);
2166         case MAC_CAPAB_NO_ZCOPY:
2167                 return (!grp->lg_zcopy);
2168         case MAC_CAPAB_RINGS: {
2169                 mac_capab_rings_t *cap_rings = cap_data;
2170 
2171                 if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2172                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2173                         cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt;
2174 
2175                         /*
2176                          * An aggregation advertises only one (pseudo) RX
2177                          * group, which virtualizes the main/primary group of
2178                          * the underlying devices.
2179                          */
2180                         cap_rings->mr_gnum = 1;
2181                         cap_rings->mr_gaddring = NULL;
2182                         cap_rings->mr_gremring = NULL;
2183                 } else {
2184                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2185                         cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt;
2186                         cap_rings->mr_gnum = 0;
2187                 }
2188                 cap_rings->mr_rget = aggr_fill_ring;
2189                 cap_rings->mr_gget = aggr_fill_group;
2190                 break;
2191         }
2192         case MAC_CAPAB_AGGR:
2193         {
2194                 mac_capab_aggr_t *aggr_cap;
2195 
2196                 if (cap_data != NULL) {
2197                         aggr_cap = cap_data;
2198                         aggr_cap->mca_rename_fn = aggr_grp_port_rename;
2199                         aggr_cap->mca_unicst = aggr_m_unicst;
2200                         aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring;
2201                         aggr_cap->mca_arg = arg;
2202                 }
2203                 return (B_TRUE);
2204         }
2205         default:
2206                 return (B_FALSE);
2207         }
2208         return (B_TRUE);
2209 }
2210 
2211 /*
2212  * Callback funtion for MAC layer to register groups.
2213  */
2214 static void
2215 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
2216     mac_group_info_t *infop, mac_group_handle_t gh)
2217 {
2218         aggr_grp_t *grp = arg;
2219         aggr_pseudo_rx_group_t *rx_group;
2220         aggr_pseudo_tx_group_t *tx_group;
2221 
2222         ASSERT(index == 0);
2223         if (rtype == MAC_RING_TYPE_RX) {
2224                 rx_group = &grp->lg_rx_group;
2225                 rx_group->arg_gh = gh;
2226                 rx_group->arg_grp = grp;
2227 
2228                 infop->mgi_driver = (mac_group_driver_t)rx_group;
2229                 infop->mgi_start = NULL;
2230                 infop->mgi_stop = NULL;
2231                 infop->mgi_addmac = aggr_addmac;
2232                 infop->mgi_remmac = aggr_remmac;
2233                 infop->mgi_count = rx_group->arg_ring_cnt;
2234         } else {
2235                 tx_group = &grp->lg_tx_group;
2236                 tx_group->atg_gh = gh;
2237         }
2238 }
2239 
2240 /*
2241  * Callback funtion for MAC layer to register all rings.
2242  */
2243 static void
2244 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
2245     const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
2246 {
2247         aggr_grp_t      *grp = arg;
2248 
2249         switch (rtype) {
2250         case MAC_RING_TYPE_RX: {
2251                 aggr_pseudo_rx_group_t  *rx_group = &grp->lg_rx_group;
2252                 aggr_pseudo_rx_ring_t   *rx_ring;
2253                 mac_intr_t              aggr_mac_intr;
2254 
2255                 ASSERT(rg_index == 0);
2256 
2257                 ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt));
2258                 rx_ring = rx_group->arg_rings + index;
2259                 rx_ring->arr_rh = rh;
2260 
2261                 /*
2262                  * Entrypoint to enable interrupt (disable poll) and
2263                  * disable interrupt (enable poll).
2264                  */
2265                 aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring;
2266                 aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr;
2267                 aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr;
2268                 aggr_mac_intr.mi_ddi_handle = NULL;
2269 
2270                 infop->mri_driver = (mac_ring_driver_t)rx_ring;
2271                 infop->mri_start = aggr_pseudo_start_ring;
2272                 infop->mri_stop = aggr_pseudo_stop_ring;
2273 
2274                 infop->mri_intr = aggr_mac_intr;
2275                 infop->mri_poll = aggr_rx_poll;
2276 
2277                 infop->mri_stat = aggr_rx_ring_stat;
2278                 break;
2279         }
2280         case MAC_RING_TYPE_TX: {
2281                 aggr_pseudo_tx_group_t  *tx_group = &grp->lg_tx_group;
2282                 aggr_pseudo_tx_ring_t   *tx_ring;
2283 
2284                 ASSERT(rg_index == -1);
2285                 ASSERT(index < tx_group->atg_ring_cnt);
2286 
2287                 tx_ring = &tx_group->atg_rings[index];
2288                 tx_ring->atr_rh = rh;
2289 
2290                 infop->mri_driver = (mac_ring_driver_t)tx_ring;
2291                 infop->mri_start = NULL;
2292                 infop->mri_stop = NULL;
2293                 infop->mri_tx = aggr_ring_tx;
2294                 infop->mri_stat = aggr_tx_ring_stat;
2295                 /*
2296                  * Use the hw TX ring handle to find if the ring needs
2297                  * serialization or not. For NICs that do not expose
2298                  * Tx rings, atr_hw_rh will be NULL.
2299                  */
2300                 if (tx_ring->atr_hw_rh != NULL) {
2301                         infop->mri_flags =
2302                             mac_hwring_getinfo(tx_ring->atr_hw_rh);
2303                 }
2304                 break;
2305         }
2306         default:
2307                 break;
2308         }
2309 }
2310 
2311 static mblk_t *
2312 aggr_rx_poll(void *arg, int bytes_to_pickup)
2313 {
2314         aggr_pseudo_rx_ring_t *rr_ring = arg;
2315         aggr_port_t *port = rr_ring->arr_port;
2316         aggr_grp_t *grp = port->lp_grp;
2317         mblk_t *mp_chain, *mp, **mpp;
2318 
2319         mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup);
2320 
2321         if (grp->lg_lacp_mode == AGGR_LACP_OFF)
2322                 return (mp_chain);
2323 
2324         mpp = &mp_chain;
2325         while ((mp = *mpp) != NULL) {
2326                 if (MBLKL(mp) >= sizeof (struct ether_header)) {
2327                         struct ether_header *ehp;
2328 
2329                         ehp = (struct ether_header *)mp->b_rptr;
2330                         if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) {
2331                                 *mpp = mp->b_next;
2332                                 mp->b_next = NULL;
2333                                 aggr_recv_lacp(port,
2334                                     (mac_resource_handle_t)rr_ring, mp);
2335                                 continue;
2336                         }
2337                 }
2338 
2339                 if (!port->lp_collector_enabled) {
2340                         *mpp = mp->b_next;
2341                         mp->b_next = NULL;
2342                         freemsg(mp);
2343                         continue;
2344                 }
2345                 mpp = &mp->b_next;
2346         }
2347         return (mp_chain);
2348 }
2349 
2350 static int
2351 aggr_addmac(void *arg, const uint8_t *mac_addr)
2352 {
2353         aggr_pseudo_rx_group_t  *rx_group = (aggr_pseudo_rx_group_t *)arg;
2354         aggr_unicst_addr_t      *addr, **pprev;
2355         aggr_grp_t              *grp = rx_group->arg_grp;
2356         aggr_port_t             *port, *p;
2357         mac_perim_handle_t      mph;
2358         int                     err = 0;
2359 
2360         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2361 
2362         if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2363                 mac_perim_exit(mph);
2364                 return (0);
2365         }
2366 
2367         /*
2368          * Insert this mac address into the list of mac addresses owned by
2369          * the aggregation pseudo group.
2370          */
2371         pprev = &rx_group->arg_macaddr;
2372         while ((addr = *pprev) != NULL) {
2373                 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) {
2374                         mac_perim_exit(mph);
2375                         return (EEXIST);
2376                 }
2377                 pprev = &addr->aua_next;
2378         }
2379         addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
2380         bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
2381         addr->aua_next = NULL;
2382         *pprev = addr;
2383 
2384         for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2385                 if ((err = aggr_port_addmac(port, mac_addr)) != 0)
2386                         break;
2387 
2388         if (err != 0) {
2389                 for (p = grp->lg_ports; p != port; p = p->lp_next)
2390                         aggr_port_remmac(p, mac_addr);
2391 
2392                 *pprev = NULL;
2393                 kmem_free(addr, sizeof (aggr_unicst_addr_t));
2394         }
2395 
2396         mac_perim_exit(mph);
2397         return (err);
2398 }
2399 
2400 static int
2401 aggr_remmac(void *arg, const uint8_t *mac_addr)
2402 {
2403         aggr_pseudo_rx_group_t  *rx_group = (aggr_pseudo_rx_group_t *)arg;
2404         aggr_unicst_addr_t      *addr, **pprev;
2405         aggr_grp_t              *grp = rx_group->arg_grp;
2406         aggr_port_t             *port;
2407         mac_perim_handle_t      mph;
2408         int                     err = 0;
2409 
2410         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2411 
2412         if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2413                 mac_perim_exit(mph);
2414                 return (0);
2415         }
2416 
2417         /*
2418          * Insert this mac address into the list of mac addresses owned by
2419          * the aggregation pseudo group.
2420          */
2421         pprev = &rx_group->arg_macaddr;
2422         while ((addr = *pprev) != NULL) {
2423                 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2424                         pprev = &addr->aua_next;
2425                         continue;
2426                 }
2427                 break;
2428         }
2429         if (addr == NULL) {
2430                 mac_perim_exit(mph);
2431                 return (EINVAL);
2432         }
2433 
2434         for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2435                 aggr_port_remmac(port, mac_addr);
2436 
2437         *pprev = addr->aua_next;
2438         kmem_free(addr, sizeof (aggr_unicst_addr_t));
2439 
2440         mac_perim_exit(mph);
2441         return (err);
2442 }
2443 
2444 /*
2445  * Add or remove the multicast addresses that are defined for the group
2446  * to or from the specified port.
2447  *
2448  * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2449  * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2450  * called when the port is either stopped or detached.
2451  */
2452 void
2453 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2454 {
2455         aggr_grp_t *grp = port->lp_grp;
2456 
2457         ASSERT(MAC_PERIM_HELD(port->lp_mh));
2458         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2459 
2460         if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
2461                 return;
2462 
2463         mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add);
2464 }
2465 
2466 static int
2467 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
2468 {
2469         aggr_grp_t *grp = arg;
2470         aggr_port_t *port = NULL, *errport = NULL;
2471         mac_perim_handle_t mph;
2472         int err = 0;
2473 
2474         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2475         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2476                 if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2477                     !port->lp_started) {
2478                         continue;
2479                 }
2480                 err = aggr_port_multicst(port, add, addrp);
2481                 if (err != 0) {
2482                         errport = port;
2483                         break;
2484                 }
2485         }
2486 
2487         /*
2488          * At least one port caused error return and this error is returned to
2489          * mac, eventually a NAK would be sent upwards.
2490          * Some ports have this multicast address listed now, and some don't.
2491          * Treat this error as a whole aggr failure not individual port failure.
2492          * Therefore remove this multicast address from other ports.
2493          */
2494         if ((err != 0) && add) {
2495                 for (port = grp->lg_ports; port != errport;
2496                     port = port->lp_next) {
2497                         if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2498                             !port->lp_started) {
2499                                 continue;
2500                         }
2501                         (void) aggr_port_multicst(port, B_FALSE, addrp);
2502                 }
2503         }
2504         mac_perim_exit(mph);
2505         return (err);
2506 }
2507 
2508 static int
2509 aggr_m_unicst(void *arg, const uint8_t *macaddr)
2510 {
2511         aggr_grp_t *grp = arg;
2512         mac_perim_handle_t mph;
2513         int err;
2514 
2515         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2516         err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
2517             0, 0);
2518         mac_perim_exit(mph);
2519         return (err);
2520 }
2521 
2522 /*
2523  * Initialize the capabilities that are advertised for the group
2524  * according to the capabilities of the constituent ports.
2525  */
2526 static void
2527 aggr_grp_capab_set(aggr_grp_t *grp)
2528 {
2529         uint32_t cksum;
2530         aggr_port_t *port;
2531         mac_capab_lso_t cap_lso;
2532 
2533         ASSERT(grp->lg_mh == NULL);
2534         ASSERT(grp->lg_ports != NULL);
2535 
2536         grp->lg_hcksum_txflags = (uint32_t)-1;
2537         grp->lg_zcopy = B_TRUE;
2538         grp->lg_vlan = B_TRUE;
2539 
2540         grp->lg_lso = B_TRUE;
2541         grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1;
2542         grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1;
2543 
2544         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2545                 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
2546                         cksum = 0;
2547                 grp->lg_hcksum_txflags &= cksum;
2548 
2549                 grp->lg_vlan &=
2550                     !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
2551 
2552                 grp->lg_zcopy &=
2553                     !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
2554 
2555                 grp->lg_lso &=
2556                     mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso);
2557                 if (grp->lg_lso) {
2558                         grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags;
2559                         if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2560                             cap_lso.lso_basic_tcp_ipv4.lso_max)
2561                                 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max =
2562                                     cap_lso.lso_basic_tcp_ipv4.lso_max;
2563                 }
2564         }
2565 }
2566 
2567 /*
2568  * Checks whether the capabilities of the port being added are compatible
2569  * with the current capabilities of the aggregation.
2570  */
2571 static boolean_t
2572 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
2573 {
2574         uint32_t hcksum_txflags;
2575 
2576         ASSERT(grp->lg_ports != NULL);
2577 
2578         if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
2579             grp->lg_vlan) != grp->lg_vlan) {
2580                 return (B_FALSE);
2581         }
2582 
2583         if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
2584             grp->lg_zcopy) != grp->lg_zcopy) {
2585                 return (B_FALSE);
2586         }
2587 
2588         if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
2589                 if (grp->lg_hcksum_txflags != 0)
2590                         return (B_FALSE);
2591         } else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
2592             grp->lg_hcksum_txflags) {
2593                 return (B_FALSE);
2594         }
2595 
2596         if (grp->lg_lso) {
2597                 mac_capab_lso_t cap_lso;
2598 
2599                 if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) {
2600                         if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) !=
2601                             grp->lg_cap_lso.lso_flags)
2602                                 return (B_FALSE);
2603                         if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2604                             cap_lso.lso_basic_tcp_ipv4.lso_max)
2605                                 return (B_FALSE);
2606                 } else {
2607                         return (B_FALSE);
2608                 }
2609         }
2610 
2611         return (B_TRUE);
2612 }
2613 
2614 /*
2615  * Returns the maximum SDU according to the SDU of the constituent ports.
2616  */
2617 static uint_t
2618 aggr_grp_max_sdu(aggr_grp_t *grp)
2619 {
2620         uint_t max_sdu = (uint_t)-1;
2621         aggr_port_t *port;
2622 
2623         ASSERT(grp->lg_ports != NULL);
2624 
2625         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2626                 uint_t port_sdu_max;
2627 
2628                 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2629                 if (max_sdu > port_sdu_max)
2630                         max_sdu = port_sdu_max;
2631         }
2632 
2633         return (max_sdu);
2634 }
2635 
2636 /*
2637  * Checks if the maximum SDU of the specified port is compatible
2638  * with the maximum SDU of the specified aggregation group, returns
2639  * B_TRUE if it is, B_FALSE otherwise.
2640  */
2641 static boolean_t
2642 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
2643 {
2644         uint_t port_sdu_max;
2645 
2646         mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2647         return (port_sdu_max >= grp->lg_max_sdu);
2648 }
2649 
2650 /*
2651  * Returns the maximum margin according to the margin of the constituent ports.
2652  */
2653 static uint32_t
2654 aggr_grp_max_margin(aggr_grp_t *grp)
2655 {
2656         uint32_t margin = UINT32_MAX;
2657         aggr_port_t *port;
2658 
2659         ASSERT(grp->lg_mh == NULL);
2660         ASSERT(grp->lg_ports != NULL);
2661 
2662         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2663                 if (margin > port->lp_margin)
2664                         margin = port->lp_margin;
2665         }
2666 
2667         grp->lg_margin = margin;
2668         return (margin);
2669 }
2670 
2671 /*
2672  * Checks if the maximum margin of the specified port is compatible
2673  * with the maximum margin of the specified aggregation group, returns
2674  * B_TRUE if it is, B_FALSE otherwise.
2675  */
2676 static boolean_t
2677 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
2678 {
2679         if (port->lp_margin >= grp->lg_margin)
2680                 return (B_TRUE);
2681 
2682         /*
2683          * See whether the current margin value is allowed to be changed to
2684          * the new value.
2685          */
2686         if (!mac_margin_update(grp->lg_mh, port->lp_margin))
2687                 return (B_FALSE);
2688 
2689         grp->lg_margin = port->lp_margin;
2690         return (B_TRUE);
2691 }
2692 
2693 /*
2694  * Set MTU on individual ports of an aggregation group
2695  */
2696 static int
2697 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu,
2698     uint32_t *old_mtu)
2699 {
2700         boolean_t               removed = B_FALSE;
2701         mac_perim_handle_t      mph;
2702         mac_diag_t              diag;
2703         int                     err, rv, retry = 0;
2704 
2705         if (port->lp_mah != NULL) {
2706                 (void) mac_unicast_remove(port->lp_mch, port->lp_mah);
2707                 port->lp_mah = NULL;
2708                 removed = B_TRUE;
2709         }
2710         err = mac_set_mtu(port->lp_mh, sdu, old_mtu);
2711 try_again:
2712         if (removed && (rv = mac_unicast_add(port->lp_mch, NULL,
2713             MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK,
2714             &port->lp_mah, 0, &diag)) != 0) {
2715                 /*
2716                  * following is a workaround for a bug in 'bge' driver.
2717                  * See CR 6794654 for more information and this work around
2718                  * will be removed once the CR is fixed.
2719                  */
2720                 if (rv == EIO && retry++ < 3) {
2721                         delay(2 * hz);
2722                         goto try_again;
2723                 }
2724                 /*
2725                  * if mac_unicast_add() failed while setting the MTU,
2726                  * detach the port from the group.
2727                  */
2728                 mac_perim_enter_by_mh(port->lp_mh, &mph);
2729                 (void) aggr_grp_detach_port(grp, port);
2730                 mac_perim_exit(mph);
2731                 cmn_err(CE_WARN, "Unable to restart the port %s while "
2732                     "setting MTU. Detaching the port from the aggregation.",
2733                     mac_client_name(port->lp_mch));
2734         }
2735         return (err);
2736 }
2737 
2738 static int
2739 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu)
2740 {
2741         int                     err = 0, i, rv;
2742         aggr_port_t             *port;
2743         uint32_t                *mtu;
2744 
2745         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2746 
2747         /*
2748          * If the MTU being set is equal to aggr group's maximum
2749          * allowable value, then there is nothing to change
2750          */
2751         if (sdu == grp->lg_max_sdu)
2752                 return (0);
2753 
2754         /* 0 is aggr group's min sdu */
2755         if (sdu == 0)
2756                 return (EINVAL);
2757 
2758         mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP);
2759         for (port = grp->lg_ports, i = 0; port != NULL && err == 0;
2760             port = port->lp_next, i++) {
2761                 err = aggr_set_port_sdu(grp, port, sdu, mtu + i);
2762         }
2763         if (err != 0) {
2764                 /* recover from error: reset the mtus of the ports */
2765                 aggr_port_t *tmp;
2766 
2767                 for (tmp = grp->lg_ports, i = 0; tmp != port;
2768                     tmp = tmp->lp_next, i++) {
2769                         (void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL);
2770                 }
2771                 goto bail;
2772         }
2773         grp->lg_max_sdu = aggr_grp_max_sdu(grp);
2774         rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu);
2775         ASSERT(rv == 0);
2776 bail:
2777         kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports);
2778         return (err);
2779 }
2780 
2781 /*
2782  * Callback functions for set/get of properties
2783  */
2784 /*ARGSUSED*/
2785 static int
2786 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2787     uint_t pr_valsize, const void *pr_val)
2788 {
2789         int             err = ENOTSUP;
2790         aggr_grp_t      *grp = m_driver;
2791 
2792         switch (pr_num) {
2793         case MAC_PROP_MTU: {
2794                 uint32_t        mtu;
2795 
2796                 if (pr_valsize < sizeof (mtu)) {
2797                         err = EINVAL;
2798                         break;
2799                 }
2800                 bcopy(pr_val, &mtu, sizeof (mtu));
2801                 err = aggr_sdu_update(grp, mtu);
2802                 break;
2803         }
2804         default:
2805                 break;
2806         }
2807         return (err);
2808 }
2809 
2810 typedef struct rboundary {
2811         uint32_t        bval;
2812         int             btype;
2813 } rboundary_t;
2814 
2815 /*
2816  * This function finds the intersection of mtu ranges stored in arrays -
2817  * mrange[0] ... mrange[mcount -1]. It returns the intersection in rval.
2818  * Individual arrays are assumed to contain non-overlapping ranges.
2819  * Algorithm:
2820  *   A range has two boundaries - min and max. We scan all arrays and store
2821  * each boundary as a separate element in a temporary array. We also store
2822  * the boundary types, min or max, as +1 or -1 respectively in the temporary
2823  * array. Then we sort the temporary array in ascending order. We scan the
2824  * sorted array from lower to higher values and keep a cumulative sum of
2825  * boundary types. Element in the temporary array for which the sum reaches
2826  * mcount is a min boundary of a range in the result and next element will be
2827  * max boundary.
2828  *
2829  * Example for mcount = 3,
2830  *
2831  *  ----|_________|-------|_______|----|__|------ mrange[0]
2832  *
2833  *  -------|________|--|____________|-----|___|-- mrange[1]
2834  *
2835  *  --------|________________|-------|____|------ mrange[2]
2836  *
2837  *                                      3 2 1
2838  *                                       \|/
2839  *      1  23     2 1  2  3  2    1 01 2  V   0  <- the sum
2840  *  ----|--||-----|-|--|--|--|----|-||-|--|---|-- sorted array
2841  *
2842  *                                 same min and max
2843  *                                        V
2844  *  --------|_____|-------|__|------------|------ intersecting ranges
2845  */
2846 void
2847 aggr_mtu_range_intersection(mac_propval_range_t **mrange, int mcount,
2848     mac_propval_uint32_range_t **prval, int *prmaxcnt, int *prcount)
2849 {
2850         mac_propval_uint32_range_t      *rval, *ur;
2851         int                             rmaxcnt, rcount;
2852         size_t                          sz_range32;
2853         rboundary_t                     *ta; /* temporary array */
2854         rboundary_t                     temp;
2855         boolean_t                       range_started = B_FALSE;
2856         int                             i, j, m, sum;
2857 
2858         sz_range32 = sizeof (mac_propval_uint32_range_t);
2859 
2860         for (i = 0, rmaxcnt = 0; i < mcount; i++)
2861                 rmaxcnt += mrange[i]->mpr_count;
2862 
2863         /* Allocate enough space to store the results */
2864         rval = kmem_alloc(rmaxcnt * sz_range32, KM_SLEEP);
2865 
2866         /* Number of boundaries are twice as many as ranges */
2867         ta = kmem_alloc(2 * rmaxcnt * sizeof (rboundary_t), KM_SLEEP);
2868 
2869         for (i = 0, m = 0; i < mcount; i++) {
2870                 ur = &(mrange[i]->mpr_range_uint32[0]);
2871                 for (j = 0; j < mrange[i]->mpr_count; j++) {
2872                         ta[m].bval = ur[j].mpur_min;
2873                         ta[m++].btype = 1;
2874                         ta[m].bval = ur[j].mpur_max;
2875                         ta[m++].btype = -1;
2876                 }
2877         }
2878 
2879         /*
2880          * Sort the temporary array in ascending order of bval;
2881          * if boundary values are same then sort on btype.
2882          */
2883         for (i = 0; i < m-1; i++) {
2884                 for (j = i+1; j < m; j++) {
2885                         if ((ta[i].bval > ta[j].bval) ||
2886                             ((ta[i].bval == ta[j].bval) &&
2887                             (ta[i].btype < ta[j].btype))) {
2888                                 temp = ta[i];
2889                                 ta[i] = ta[j];
2890                                 ta[j] = temp;
2891                         }
2892                 }
2893         }
2894 
2895         /* Walk through temporary array to find all ranges in the results */
2896         for (i = 0, sum = 0, rcount = 0; i < m; i++) {
2897                 sum += ta[i].btype;
2898                 if (sum == mcount) {
2899                         rval[rcount].mpur_min = ta[i].bval;
2900                         range_started = B_TRUE;
2901                 } else if (sum < mcount && range_started) {
2902                         rval[rcount++].mpur_max = ta[i].bval;
2903                         range_started = B_FALSE;
2904                 }
2905         }
2906 
2907         *prval = rval;
2908         *prmaxcnt = rmaxcnt;
2909         *prcount = rcount;
2910 
2911         kmem_free(ta, 2 * rmaxcnt * sizeof (rboundary_t));
2912 }
2913 
2914 /*
2915  * Returns the mtu ranges which could be supported by aggr group.
2916  * prmaxcnt returns the size of the buffer prval, prcount returns
2917  * the number of valid entries in prval. Caller is responsible
2918  * for freeing up prval.
2919  */
2920 int
2921 aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_uint32_range_t **prval,
2922     int *prmaxcnt, int *prcount)
2923 {
2924         mac_propval_range_t             **vals;
2925         aggr_port_t                     *port;
2926         mac_perim_handle_t              mph;
2927         uint_t                          i, numr;
2928         int                             err = 0;
2929         size_t                          sz_propval, sz_range32;
2930         size_t                          size;
2931 
2932         sz_propval = sizeof (mac_propval_range_t);
2933         sz_range32 = sizeof (mac_propval_uint32_range_t);
2934 
2935         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2936 
2937         vals = kmem_zalloc(sizeof (mac_propval_range_t *) * grp->lg_nports,
2938             KM_SLEEP);
2939 
2940         for (port = grp->lg_ports, i = 0; port != NULL;
2941             port = port->lp_next, i++) {
2942 
2943                 size = sz_propval;
2944                 vals[i] = kmem_alloc(size, KM_SLEEP);
2945                 vals[i]->mpr_count = 1;
2946 
2947                 mac_perim_enter_by_mh(port->lp_mh, &mph);
2948 
2949                 err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
2950                     NULL, 0, vals[i], NULL);
2951                 if (err == ENOSPC) {
2952                         /*
2953                          * Not enough space to hold all ranges.
2954                          * Allocate extra space as indicated and retry.
2955                          */
2956                         numr = vals[i]->mpr_count;
2957                         kmem_free(vals[i], sz_propval);
2958                         size = sz_propval + (numr - 1) * sz_range32;
2959                         vals[i] = kmem_alloc(size, KM_SLEEP);
2960                         vals[i]->mpr_count = numr;
2961                         err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
2962                             NULL, 0, vals[i], NULL);
2963                         ASSERT(err != ENOSPC);
2964                 }
2965                 mac_perim_exit(mph);
2966                 if (err != 0) {
2967                         kmem_free(vals[i], size);
2968                         vals[i] = NULL;
2969                         break;
2970                 }
2971         }
2972 
2973         /*
2974          * if any of the underlying ports does not support changing MTU then
2975          * just return ENOTSUP
2976          */
2977         if (port != NULL) {
2978                 ASSERT(err != 0);
2979                 goto done;
2980         }
2981 
2982         aggr_mtu_range_intersection(vals, grp->lg_nports, prval, prmaxcnt,
2983             prcount);
2984 
2985 done:
2986         for (i = 0; i < grp->lg_nports; i++) {
2987                 if (vals[i] != NULL) {
2988                         numr = vals[i]->mpr_count;
2989                         size = sz_propval + (numr - 1) * sz_range32;
2990                         kmem_free(vals[i], size);
2991                 }
2992         }
2993 
2994         kmem_free(vals, sizeof (mac_propval_range_t *) * grp->lg_nports);
2995         return (err);
2996 }
2997 
2998 static void
2999 aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
3000     mac_prop_info_handle_t prh)
3001 {
3002         aggr_grp_t                      *grp = m_driver;
3003         mac_propval_uint32_range_t      *rval = NULL;
3004         int                             i, rcount, rmaxcnt;
3005         int                             err = 0;
3006 
3007         _NOTE(ARGUNUSED(pr_name));
3008 
3009         switch (pr_num) {
3010         case MAC_PROP_MTU:
3011 
3012                 err = aggr_grp_possible_mtu_range(grp, &rval, &rmaxcnt,
3013                     &rcount);
3014                 if (err != 0) {
3015                         ASSERT(rval == NULL);
3016                         return;
3017                 }
3018                 for (i = 0; i < rcount; i++) {
3019                         mac_prop_info_set_range_uint32(prh,
3020                             rval[i].mpur_min, rval[i].mpur_max);
3021                 }
3022                 kmem_free(rval, sizeof (mac_propval_uint32_range_t) * rmaxcnt);
3023                 break;
3024         }
3025 }