Print this page
6274 MAC tries to use aggr rings from downed links
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Richard Lowe <richlowe@richlowe.net>


   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.

  23  */
  24 
  25 /*
  26  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
  27  *
  28  * An instance of the structure aggr_grp_t is allocated for each
  29  * link aggregation group. When created, aggr_grp_t objects are
  30  * entered into the aggr_grp_hash hash table maintained by the modhash
  31  * module. The hash key is the linkid associated with the link
  32  * aggregation group.
  33  *
  34  * A set of MAC ports are associated with each association group.
  35  *
  36  * Aggr pseudo TX rings
  37  * --------------------
  38  * The underlying ports (NICs) in an aggregation can have TX rings. To
  39  * enhance aggr's performance, these TX rings are made available to the
  40  * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
  41  * They are already present and implemented on the RX side. It is called
  42  * as pseudo RX rings. The same concept is extended to the TX side where


 555 
 556         /*
 557          * Back reference to the group it is member of. A port always
 558          * holds a reference to its group to ensure that the back
 559          * reference is always valid.
 560          */
 561         port->lp_grp = grp;
 562         AGGR_GRP_REFHOLD(grp);
 563         grp->lg_nports++;
 564 
 565         aggr_lacp_init_port(port);
 566         mac_perim_exit(mph);
 567 
 568         if (pp != NULL)
 569                 *pp = port;
 570 
 571         return (0);
 572 }
 573 
 574 /*
































 575  * Add a pseudo RX ring for the given HW ring handle.
 576  */
 577 static int
 578 aggr_add_pseudo_rx_ring(aggr_port_t *port,
 579     aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
 580 {
 581         aggr_pseudo_rx_ring_t   *ring;
 582         int                     err;
 583         int                     j;
 584 
 585         for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
 586                 ring = rx_grp->arg_rings + j;
 587                 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
 588                         break;
 589         }
 590 
 591         /*
 592          * No slot for this new RX ring.
 593          */
 594         if (j == MAX_RINGS_PER_GROUP)


 800         ring->atr_port = port;
 801         tx_grp->atg_ring_cnt++;
 802 
 803         /*
 804          * The TX side has no concept of ring groups unlike RX groups.
 805          * There is just a single group which stores all the TX rings.
 806          * This group will be used to store aggr's pseudo TX rings.
 807          */
 808         if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) {
 809                 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
 810                 ring->atr_hw_rh = NULL;
 811                 ring->atr_port = NULL;
 812                 tx_grp->atg_ring_cnt--;
 813         } else {
 814                 *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i);
 815                 if (hw_rh != NULL) {
 816                         mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
 817                             mac_find_ring(tx_grp->atg_gh, i));
 818                 }
 819         }

 820         return (err);
 821 }
 822 
 823 /*
 824  * Remove the pseudo TX ring of the given HW ring handle.
 825  */
 826 static void
 827 aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp,
 828     mac_ring_handle_t pseudo_hw_rh)
 829 {
 830         aggr_pseudo_tx_ring_t   *ring;
 831         int                     i;
 832 
 833         for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
 834                 ring = tx_grp->atg_rings + i;
 835                 if (ring->atr_rh != pseudo_hw_rh)
 836                         continue;
 837 
 838                 ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE);
 839                 mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh);


 902         }
 903 
 904         if (err != 0) {
 905                 if (hw_rh_cnt != 0) {
 906                         for (j = 0; j < i; j++) {
 907                                 aggr_rem_pseudo_tx_ring(tx_grp,
 908                                     port->lp_pseudo_tx_rings[j]);
 909                         }
 910                 }
 911                 kmem_free(port->lp_tx_rings,
 912                     (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 913                 kmem_free(port->lp_pseudo_tx_rings,
 914                     (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 915                 port->lp_tx_ring_cnt = 0;
 916         } else {
 917                 port->lp_tx_grp_added = B_TRUE;
 918                 port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch,
 919                     aggr_tx_ring_update, port);
 920         }
 921         mac_perim_exit(pmph);

 922         return (err);
 923 }
 924 
 925 /*
 926  * This function is called by aggr to remove pseudo TX rings over the
 927  * HW rings of the underlying port.
 928  */
 929 static void
 930 aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
 931 {
 932         aggr_grp_t              *grp = port->lp_grp;
 933         mac_perim_handle_t      pmph;
 934         int                     i;
 935 
 936         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 937         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 938 
 939         if (!port->lp_tx_grp_added)
 940                 goto done;
 941 
 942         ASSERT(tx_grp->atg_gh != NULL);
 943 
 944         for (i = 0; i < port->lp_tx_ring_cnt; i++)
 945                 aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]);
 946 
 947         kmem_free(port->lp_tx_rings,
 948             (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 949         kmem_free(port->lp_pseudo_tx_rings,
 950             (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 951 
 952         port->lp_tx_ring_cnt = 0;
 953         (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh);
 954         port->lp_tx_grp_added = B_FALSE;

 955 done:
 956         mac_perim_exit(pmph);
 957 }
 958 
 959 static int
 960 aggr_pseudo_disable_intr(mac_intr_handle_t ih)
 961 {
 962         aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
 963         return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
 964 }
 965 
 966 static int
 967 aggr_pseudo_enable_intr(mac_intr_handle_t ih)
 968 {
 969         aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
 970         return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
 971 }
 972 
 973 static int
 974 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)




   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2015 Joyent, Inc.
  24  */
  25 
  26 /*
  27  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
  28  *
  29  * An instance of the structure aggr_grp_t is allocated for each
  30  * link aggregation group. When created, aggr_grp_t objects are
  31  * entered into the aggr_grp_hash hash table maintained by the modhash
  32  * module. The hash key is the linkid associated with the link
  33  * aggregation group.
  34  *
  35  * A set of MAC ports are associated with each association group.
  36  *
  37  * Aggr pseudo TX rings
  38  * --------------------
  39  * The underlying ports (NICs) in an aggregation can have TX rings. To
  40  * enhance aggr's performance, these TX rings are made available to the
  41  * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
  42  * They are already present and implemented on the RX side. It is called
  43  * as pseudo RX rings. The same concept is extended to the TX side where


 556 
 557         /*
 558          * Back reference to the group it is member of. A port always
 559          * holds a reference to its group to ensure that the back
 560          * reference is always valid.
 561          */
 562         port->lp_grp = grp;
 563         AGGR_GRP_REFHOLD(grp);
 564         grp->lg_nports++;
 565 
 566         aggr_lacp_init_port(port);
 567         mac_perim_exit(mph);
 568 
 569         if (pp != NULL)
 570                 *pp = port;
 571 
 572         return (0);
 573 }
 574 
 575 /*
 576  * This is called in response to either our LACP state machine or a MAC
 577  * notification that the link has gone down via aggr_send_port_disable(). At
 578  * this point, we may need to update our default ring. To that end, we go
 579  * through the set of ports (underlying datalinks in an aggregation) that are
 580  * currently enabled to transmit data. If all our links have been disabled for
 581  * transmit, then we don't do anything.
 582  *
 583  * Note, because we only have a single TX group, we don't have to worry about
 584  * the rings moving between groups and the chance that mac will reassign it
 585  * unless someone removes a port, at which point, we play it safe and call this
 586  * again.
 587  */
 588 void
 589 aggr_grp_update_default(aggr_grp_t *grp)
 590 {
 591         aggr_port_t *port;
 592         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 593 
 594         rw_enter(&grp->lg_tx_lock, RW_WRITER);
 595 
 596         if (grp->lg_ntx_ports == 0) {
 597                 rw_exit(&grp->lg_tx_lock);
 598                 return;
 599         }
 600 
 601         port = grp->lg_tx_ports[0];
 602         ASSERT(port->lp_tx_ring_cnt > 0);
 603         mac_hwring_set_default(grp->lg_mh, port->lp_pseudo_tx_rings[0]);
 604         rw_exit(&grp->lg_tx_lock);
 605 }
 606 
 607 /*
 608  * Add a pseudo RX ring for the given HW ring handle.
 609  */
 610 static int
 611 aggr_add_pseudo_rx_ring(aggr_port_t *port,
 612     aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
 613 {
 614         aggr_pseudo_rx_ring_t   *ring;
 615         int                     err;
 616         int                     j;
 617 
 618         for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
 619                 ring = rx_grp->arg_rings + j;
 620                 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
 621                         break;
 622         }
 623 
 624         /*
 625          * No slot for this new RX ring.
 626          */
 627         if (j == MAX_RINGS_PER_GROUP)


 833         ring->atr_port = port;
 834         tx_grp->atg_ring_cnt++;
 835 
 836         /*
 837          * The TX side has no concept of ring groups unlike RX groups.
 838          * There is just a single group which stores all the TX rings.
 839          * This group will be used to store aggr's pseudo TX rings.
 840          */
 841         if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) {
 842                 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
 843                 ring->atr_hw_rh = NULL;
 844                 ring->atr_port = NULL;
 845                 tx_grp->atg_ring_cnt--;
 846         } else {
 847                 *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i);
 848                 if (hw_rh != NULL) {
 849                         mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
 850                             mac_find_ring(tx_grp->atg_gh, i));
 851                 }
 852         }
 853 
 854         return (err);
 855 }
 856 
 857 /*
 858  * Remove the pseudo TX ring of the given HW ring handle.
 859  */
 860 static void
 861 aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp,
 862     mac_ring_handle_t pseudo_hw_rh)
 863 {
 864         aggr_pseudo_tx_ring_t   *ring;
 865         int                     i;
 866 
 867         for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
 868                 ring = tx_grp->atg_rings + i;
 869                 if (ring->atr_rh != pseudo_hw_rh)
 870                         continue;
 871 
 872                 ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE);
 873                 mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh);


 936         }
 937 
 938         if (err != 0) {
 939                 if (hw_rh_cnt != 0) {
 940                         for (j = 0; j < i; j++) {
 941                                 aggr_rem_pseudo_tx_ring(tx_grp,
 942                                     port->lp_pseudo_tx_rings[j]);
 943                         }
 944                 }
 945                 kmem_free(port->lp_tx_rings,
 946                     (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 947                 kmem_free(port->lp_pseudo_tx_rings,
 948                     (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 949                 port->lp_tx_ring_cnt = 0;
 950         } else {
 951                 port->lp_tx_grp_added = B_TRUE;
 952                 port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch,
 953                     aggr_tx_ring_update, port);
 954         }
 955         mac_perim_exit(pmph);
 956         aggr_grp_update_default(grp);
 957         return (err);
 958 }
 959 
 960 /*
 961  * This function is called by aggr to remove pseudo TX rings over the
 962  * HW rings of the underlying port.
 963  */
 964 static void
 965 aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
 966 {
 967         aggr_grp_t              *grp = port->lp_grp;
 968         mac_perim_handle_t      pmph;
 969         int                     i;
 970 
 971         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 972         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 973 
 974         if (!port->lp_tx_grp_added)
 975                 goto done;
 976 
 977         ASSERT(tx_grp->atg_gh != NULL);
 978 
 979         for (i = 0; i < port->lp_tx_ring_cnt; i++)
 980                 aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]);
 981 
 982         kmem_free(port->lp_tx_rings,
 983             (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 984         kmem_free(port->lp_pseudo_tx_rings,
 985             (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 986 
 987         port->lp_tx_ring_cnt = 0;
 988         (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh);
 989         port->lp_tx_grp_added = B_FALSE;
 990         aggr_grp_update_default(grp);
 991 done:
 992         mac_perim_exit(pmph);
 993 }
 994 
 995 static int
 996 aggr_pseudo_disable_intr(mac_intr_handle_t ih)
 997 {
 998         aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
 999         return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
1000 }
1001 
1002 static int
1003 aggr_pseudo_enable_intr(mac_intr_handle_t ih)
1004 {
1005         aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
1006         return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
1007 }
1008 
1009 static int
1010 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)