1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * IP interface to squeues.
  28  *
  29  * IP uses squeues to force serialization of packets, both incoming and
  30  * outgoing. Each squeue is associated with a connection instance (conn_t)
  31  * above, and a soft ring (if enabled) below. Each CPU will have a default
  32  * squeue for outbound connections, and each soft ring of an interface will
  33  * have an squeue to which it sends incoming packets. squeues are never
  34  * destroyed, and if they become unused they are kept around against future
  35  * needs.
  36  *
  37  * IP organizes its squeues using squeue sets (squeue_set_t). For each CPU
  38  * in the system there will be one squeue set, all of whose squeues will be
  39  * bound to that CPU, plus one additional set known as the unbound set. Sets
  40  * associated with CPUs will have one default squeue, for outbound
  41  * connections, and a linked list of squeues used by various NICs for inbound
  42  * packets. The unbound set also has a linked list of squeues, but no default
  43  * squeue.
  44  *
  45  * When a CPU goes offline its squeue set is destroyed, and all its squeues
  46  * are moved to the unbound set. When a CPU comes online, a new squeue set is
  47  * created and the default set is searched for a default squeue formerly bound
  48  * to this CPU. If no default squeue is found, a new one is created.
  49  *
  50  * Two fields of the squeue_t, namely sq_next and sq_set, are owned by IP
  51  * and not the squeue code. squeue.c will not touch them, and we can modify
  52  * them without holding the squeue lock because of the guarantee that squeues
  53  * are never destroyed. ip_squeue locks must be held, however.
  54  *
  55  * All the squeue sets are protected by a single lock, the sqset_lock. This
  56  * is also used to protect the sq_next and sq_set fields of an squeue_t.
  57  *
  58  * The lock order is: cpu_lock --> ill_lock --> sqset_lock --> sq_lock
  59  *
  60  * There are two modes of associating connection with squeues. The first mode
  61  * associates each connection with the CPU that creates the connection (either
  62  * during open time or during accept time). The second mode associates each
  63  * connection with a random CPU, effectively distributing load over all CPUs
  64  * and all squeues in the system. The mode is controlled by the
  65  * ip_squeue_fanout variable.
  66  *
  67  * NOTE: The fact that there is an association between each connection and
  68  * squeue and squeue and CPU does not mean that each connection is always
  69  * processed on this CPU and on this CPU only. Any thread calling squeue_enter()
  70  * may process the connection on whatever CPU it is scheduled. The squeue to CPU
  71  * binding is only relevant for the worker thread.
  72  *
  73  * INTERFACE:
  74  *
  75  * squeue_t *ip_squeue_get(ill_rx_ring_t)
  76  *
  77  * Returns the squeue associated with an ill receive ring. If the ring is
  78  * not bound to a CPU, and we're currently servicing the interrupt which
  79  * generated the packet, then bind the squeue to CPU.
  80  *
  81  *
  82  * DR Notes
  83  * ========
  84  *
  85  * The ip_squeue_init() registers a call-back function with the CPU DR
  86  * subsystem using register_cpu_setup_func(). The call-back function does two
  87  * things:
  88  *
  89  * o When the CPU is going off-line or unconfigured, the worker thread is
  90  *      unbound from the CPU. This allows the CPU unconfig code to move it to
  91  *      another CPU.
  92  *
  93  * o When the CPU is going online, it creates a new squeue for this CPU if
  94  *      necessary and binds the squeue worker thread to this CPU.
  95  *
  96  * TUNABLES:
  97  *
  98  * ip_squeue_fanout: used when TCP calls IP_SQUEUE_GET(). If 1, then
  99  * pick the default squeue from a random CPU, otherwise use our CPU's default
 100  * squeue.
 101  *
 102  * ip_squeue_fanout can be accessed and changed using ndd on /dev/tcp or
 103  * /dev/ip.
 104  *
 105  * ip_squeue_worker_wait: global value for the sq_wait field for all squeues *
 106  * created. This is the time squeue code waits before waking up the worker
 107  * thread after queuing a request.
 108  */
 109 
 110 #include <sys/types.h>
 111 #include <sys/debug.h>
 112 #include <sys/kmem.h>
 113 #include <sys/cpuvar.h>
 114 #include <sys/cmn_err.h>
 115 
 116 #include <inet/common.h>
 117 #include <inet/ip.h>
 118 #include <netinet/ip6.h>
 119 #include <inet/ip_if.h>
 120 #include <inet/ip_ire.h>
 121 #include <inet/nd.h>
 122 #include <inet/ipclassifier.h>
 123 #include <sys/types.h>
 124 #include <sys/conf.h>
 125 #include <sys/sunddi.h>
 126 #include <sys/dlpi.h>
 127 #include <sys/squeue_impl.h>
 128 #include <sys/tihdr.h>
 129 #include <inet/udp_impl.h>
 130 #include <sys/strsubr.h>
 131 #include <sys/zone.h>
 132 #include <sys/dld.h>
 133 #include <sys/atomic.h>
 134 
 135 /*
 136  * List of all created squeue sets. The list and its size are protected by
 137  * sqset_lock.
 138  */
 139 static squeue_set_t     **sqset_global_list; /* list 0 is the unbound list */
 140 static uint_t           sqset_global_size;
 141 kmutex_t                sqset_lock;
 142 
 143 static void (*ip_squeue_create_callback)(squeue_t *) = NULL;
 144 
 145 /*
 146  * ip_squeue_worker_wait: global value for the sq_wait field for all squeues
 147  *      created. This is the time squeue code waits before waking up the worker
 148  *      thread after queuing a request.
 149  */
 150 uint_t ip_squeue_worker_wait = 10;
 151 
 152 static squeue_t *ip_squeue_create(pri_t);
 153 static squeue_set_t *ip_squeue_set_create(processorid_t);
 154 static int ip_squeue_cpu_setup(cpu_setup_t, int, void *);
 155 static void ip_squeue_set_move(squeue_t *, squeue_set_t *);
 156 static void ip_squeue_set_destroy(cpu_t *);
 157 static void ip_squeue_clean(void *, mblk_t *, void *);
 158 
 159 #define CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS))
 160 
 161 static squeue_t *
 162 ip_squeue_create(pri_t pri)
 163 {
 164         squeue_t *sqp;
 165 
 166         sqp = squeue_create(ip_squeue_worker_wait, pri);
 167         ASSERT(sqp != NULL);
 168         if (ip_squeue_create_callback != NULL)
 169                 ip_squeue_create_callback(sqp);
 170         return (sqp);
 171 }
 172 
 173 /*
 174  * Create a new squeue_set. If id == -1, then we're creating the unbound set,
 175  * which should only happen once when we are first initialized. Otherwise id
 176  * is the id of the CPU that needs a set, either because we are initializing
 177  * or because the CPU has come online.
 178  *
 179  * If id != -1, then we need at a minimum to provide a default squeue for the
 180  * new set. We search the unbound set for candidates, and if none are found we
 181  * create a new one.
 182  */
 183 static squeue_set_t *
 184 ip_squeue_set_create(processorid_t id)
 185 {
 186         squeue_set_t    *sqs;
 187         squeue_set_t    *src = sqset_global_list[0];
 188         squeue_t        **lastsqp, *sq;
 189         squeue_t        **defaultq_lastp = NULL;
 190 
 191         sqs = kmem_zalloc(sizeof (squeue_set_t), KM_SLEEP);
 192         sqs->sqs_cpuid = id;
 193 
 194         if (id == -1) {
 195                 ASSERT(sqset_global_size == 0);
 196                 sqset_global_list[0] = sqs;
 197                 sqset_global_size = 1;
 198                 return (sqs);
 199         }
 200 
 201         /*
 202          * When we create an squeue set id != -1, we need to give it a
 203          * default squeue, in order to support fanout of conns across
 204          * CPUs. Try to find a former default squeue that matches this
 205          * cpu id on the unbound squeue set. If no such squeue is found,
 206          * find some non-default TCP squeue that is free. If still no such
 207          * candidate is found, create a new squeue.
 208          */
 209 
 210         ASSERT(MUTEX_HELD(&cpu_lock));
 211         mutex_enter(&sqset_lock);
 212         lastsqp = &src->sqs_head;
 213 
 214         while (*lastsqp) {
 215                 if ((*lastsqp)->sq_bind == id &&
 216                     (*lastsqp)->sq_state & SQS_DEFAULT) {
 217                         /*
 218                          * Exact match. Former default squeue of cpu 'id'
 219                          */
 220                         ASSERT(!((*lastsqp)->sq_state & SQS_ILL_BOUND));
 221                         defaultq_lastp = lastsqp;
 222                         break;
 223                 }
 224                 if (defaultq_lastp == NULL &&
 225                     !((*lastsqp)->sq_state & (SQS_ILL_BOUND | SQS_DEFAULT))) {
 226                         /*
 227                          * A free non-default TCP squeue
 228                          */
 229                         defaultq_lastp = lastsqp;
 230                 }
 231                 lastsqp = &(*lastsqp)->sq_next;
 232         }
 233 
 234         if (defaultq_lastp != NULL) {
 235                 /* Remove from src set and set SQS_DEFAULT */
 236                 sq = *defaultq_lastp;
 237                 *defaultq_lastp = sq->sq_next;
 238                 sq->sq_next = NULL;
 239                 if (!(sq->sq_state & SQS_DEFAULT)) {
 240                         mutex_enter(&sq->sq_lock);
 241                         sq->sq_state |= SQS_DEFAULT;
 242                         mutex_exit(&sq->sq_lock);
 243                 }
 244         } else {
 245                 sq = ip_squeue_create(SQUEUE_DEFAULT_PRIORITY);
 246                 sq->sq_state |= SQS_DEFAULT;
 247         }
 248 
 249         sq->sq_set = sqs;
 250         sqs->sqs_default = sq;
 251         squeue_bind(sq, id); /* this locks squeue mutex */
 252 
 253         ASSERT(sqset_global_size <= NCPU);
 254         sqset_global_list[sqset_global_size++] = sqs;
 255         mutex_exit(&sqset_lock);
 256         return (sqs);
 257 }
 258 
 259 /*
 260  * Called by ill_ring_add() to find an squeue to associate with a new ring.
 261  */
 262 
 263 squeue_t *
 264 ip_squeue_getfree(pri_t pri)
 265 {
 266         squeue_set_t    *sqs = sqset_global_list[0];
 267         squeue_t        *sq;
 268 
 269         mutex_enter(&sqset_lock);
 270         for (sq = sqs->sqs_head; sq != NULL; sq = sq->sq_next) {
 271                 /*
 272                  * Select a non-default TCP squeue that is free i.e. not
 273                  * bound to any ill.
 274                  */
 275                 if (!(sq->sq_state & (SQS_DEFAULT | SQS_ILL_BOUND)))
 276                         break;
 277         }
 278 
 279         if (sq == NULL) {
 280                 sq = ip_squeue_create(pri);
 281                 sq->sq_set = sqs;
 282                 sq->sq_next = sqs->sqs_head;
 283                 sqs->sqs_head = sq;
 284         }
 285 
 286         ASSERT(!(sq->sq_state & (SQS_POLL_THR_CONTROL | SQS_WORKER_THR_CONTROL |
 287             SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE |
 288             SQS_POLL_THR_QUIESCED)));
 289 
 290         mutex_enter(&sq->sq_lock);
 291         sq->sq_state |= SQS_ILL_BOUND;
 292         mutex_exit(&sq->sq_lock);
 293         mutex_exit(&sqset_lock);
 294 
 295         if (sq->sq_priority != pri) {
 296                 thread_lock(sq->sq_worker);
 297                 (void) thread_change_pri(sq->sq_worker, pri, 0);
 298                 thread_unlock(sq->sq_worker);
 299 
 300                 thread_lock(sq->sq_poll_thr);
 301                 (void) thread_change_pri(sq->sq_poll_thr, pri, 0);
 302                 thread_unlock(sq->sq_poll_thr);
 303 
 304                 sq->sq_priority = pri;
 305         }
 306         return (sq);
 307 }
 308 
 309 /*
 310  * Initialize IP squeues.
 311  */
 312 void
 313 ip_squeue_init(void (*callback)(squeue_t *))
 314 {
 315         int i;
 316         squeue_set_t    *sqs;
 317 
 318         ASSERT(sqset_global_list == NULL);
 319 
 320         ip_squeue_create_callback = callback;
 321         squeue_init();
 322         mutex_init(&sqset_lock, NULL, MUTEX_DEFAULT, NULL);
 323         sqset_global_list =
 324             kmem_zalloc(sizeof (squeue_set_t *) * (NCPU+1), KM_SLEEP);
 325         sqset_global_size = 0;
 326         /*
 327          * We are called at system boot time and we don't
 328          * expect memory allocation failure.
 329          */
 330         sqs = ip_squeue_set_create(-1);
 331         ASSERT(sqs != NULL);
 332 
 333         mutex_enter(&cpu_lock);
 334         /* Create squeue for each active CPU available */
 335         for (i = 0; i < NCPU; i++) {
 336                 cpu_t *cp = cpu_get(i);
 337                 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) {
 338                         /*
 339                          * We are called at system boot time and we don't
 340                          * expect memory allocation failure then
 341                          */
 342                         cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
 343                         ASSERT(cp->cpu_squeue_set != NULL);
 344                 }
 345         }
 346 
 347         register_cpu_setup_func(ip_squeue_cpu_setup, NULL);
 348         mutex_exit(&cpu_lock);
 349 }
 350 
 351 /*
 352  * Get a default squeue, either from the current CPU or a CPU derived by hash
 353  * from the index argument, depending upon the setting of ip_squeue_fanout.
 354  */
 355 squeue_t *
 356 ip_squeue_random(uint_t index)
 357 {
 358         squeue_set_t *sqs = NULL;
 359         squeue_t *sq;
 360 
 361         /*
 362          * The minimum value of sqset_global_size is 2, one for the unbound
 363          * squeue set and another for the squeue set of the zeroth CPU.
 364          * Even though the value could be changing, it can never go below 2,
 365          * so the assert does not need the lock protection.
 366          */
 367         ASSERT(sqset_global_size > 1);
 368 
 369         /* Protect against changes to sqset_global_list */
 370         mutex_enter(&sqset_lock);
 371 
 372         if (!ip_squeue_fanout)
 373                 sqs = CPU->cpu_squeue_set;
 374 
 375         /*
 376          * sqset_global_list[0] corresponds to the unbound squeue set.
 377          * The computation below picks a set other than the unbound set.
 378          */
 379         if (sqs == NULL)
 380                 sqs = sqset_global_list[(index % (sqset_global_size - 1)) + 1];
 381         sq = sqs->sqs_default;
 382 
 383         mutex_exit(&sqset_lock);
 384         ASSERT(sq);
 385         return (sq);
 386 }
 387 
 388 /*
 389  * Move squeue from its current set to newset. Not used for default squeues.
 390  * Bind or unbind the worker thread as appropriate.
 391  */
 392 
 393 static void
 394 ip_squeue_set_move(squeue_t *sq, squeue_set_t *newset)
 395 {
 396         squeue_set_t    *set;
 397         squeue_t        **lastsqp;
 398         processorid_t   cpuid = newset->sqs_cpuid;
 399 
 400         ASSERT(!(sq->sq_state & SQS_DEFAULT));
 401         ASSERT(!MUTEX_HELD(&sq->sq_lock));
 402         ASSERT(MUTEX_HELD(&sqset_lock));
 403 
 404         set = sq->sq_set;
 405         if (set == newset)
 406                 return;
 407 
 408         lastsqp = &set->sqs_head;
 409         while (*lastsqp != sq)
 410                 lastsqp = &(*lastsqp)->sq_next;
 411 
 412         *lastsqp = sq->sq_next;
 413         sq->sq_next = newset->sqs_head;
 414         newset->sqs_head = sq;
 415         sq->sq_set = newset;
 416         if (cpuid == -1)
 417                 squeue_unbind(sq);
 418         else
 419                 squeue_bind(sq, cpuid);
 420 }
 421 
 422 /*
 423  * Move squeue from its current set to cpuid's set and bind to cpuid.
 424  */
 425 
 426 int
 427 ip_squeue_cpu_move(squeue_t *sq, processorid_t cpuid)
 428 {
 429         cpu_t *cpu;
 430         squeue_set_t *set;
 431 
 432         if (sq->sq_state & SQS_DEFAULT)
 433                 return (-1);
 434 
 435         ASSERT(MUTEX_HELD(&cpu_lock));
 436 
 437         cpu = cpu_get(cpuid);
 438         if (!CPU_ISON(cpu))
 439                 return (-1);
 440 
 441         mutex_enter(&sqset_lock);
 442         set = cpu->cpu_squeue_set;
 443         if (set != NULL)
 444                 ip_squeue_set_move(sq, set);
 445         mutex_exit(&sqset_lock);
 446         return ((set == NULL) ? -1 : 0);
 447 }
 448 
 449 /*
 450  * The mac layer is calling, asking us to move an squeue to a
 451  * new CPU. This routine is called with cpu_lock held.
 452  */
 453 void
 454 ip_squeue_bind_ring(ill_t *ill, ill_rx_ring_t *rx_ring, processorid_t cpuid)
 455 {
 456         ASSERT(ILL_MAC_PERIM_HELD(ill));
 457         ASSERT(rx_ring->rr_ill == ill);
 458 
 459         mutex_enter(&ill->ill_lock);
 460         if (rx_ring->rr_ring_state == RR_FREE ||
 461             rx_ring->rr_ring_state == RR_FREE_INPROG) {
 462                 mutex_exit(&ill->ill_lock);
 463                 return;
 464         }
 465 
 466         if (ip_squeue_cpu_move(rx_ring->rr_sqp, cpuid) != -1)
 467                 rx_ring->rr_ring_state = RR_SQUEUE_BOUND;
 468 
 469         mutex_exit(&ill->ill_lock);
 470 }
 471 
 472 void *
 473 ip_squeue_add_ring(ill_t *ill, void *mrp)
 474 {
 475         mac_rx_fifo_t           *mrfp = (mac_rx_fifo_t *)mrp;
 476         ill_rx_ring_t           *rx_ring, *ring_tbl;
 477         int                     ip_rx_index;
 478         squeue_t                *sq = NULL;
 479         pri_t                   pri;
 480 
 481         ASSERT(ILL_MAC_PERIM_HELD(ill));
 482         ASSERT(mrfp->mrf_type == MAC_RX_FIFO);
 483         ASSERT(ill->ill_dld_capab != NULL);
 484 
 485         ring_tbl = ill->ill_dld_capab->idc_poll.idp_ring_tbl;
 486 
 487         mutex_enter(&ill->ill_lock);
 488         for (ip_rx_index = 0; ip_rx_index < ILL_MAX_RINGS; ip_rx_index++) {
 489                 rx_ring = &ring_tbl[ip_rx_index];
 490                 if (rx_ring->rr_ring_state == RR_FREE)
 491                         break;
 492         }
 493 
 494         if (ip_rx_index == ILL_MAX_RINGS) {
 495                 /*
 496                  * We ran out of ILL_MAX_RINGS worth rx_ring structures. If
 497                  * we have devices which can overwhelm this limit,
 498                  * ILL_MAX_RING should be made configurable. Meanwhile it
 499                  * cause no panic because driver will pass ip_input a NULL
 500                  * handle which will make IP allocate the default squeue and
 501                  * Polling mode will not be used for this ring.
 502                  */
 503                 cmn_err(CE_NOTE,
 504                     "Reached maximum number of receiving rings (%d) for %s\n",
 505                     ILL_MAX_RINGS, ill->ill_name);
 506                 mutex_exit(&ill->ill_lock);
 507                 return (NULL);
 508         }
 509 
 510         bzero(rx_ring, sizeof (ill_rx_ring_t));
 511         rx_ring->rr_rx = (ip_mac_rx_t)mrfp->mrf_receive;
 512         /* XXX: Hard code it to tcp accept for now */
 513         rx_ring->rr_ip_accept = (ip_accept_t)ip_accept_tcp;
 514 
 515         rx_ring->rr_intr_handle = mrfp->mrf_intr_handle;
 516         rx_ring->rr_intr_enable = (ip_mac_intr_enable_t)mrfp->mrf_intr_enable;
 517         rx_ring->rr_intr_disable =
 518             (ip_mac_intr_disable_t)mrfp->mrf_intr_disable;
 519         rx_ring->rr_rx_handle = mrfp->mrf_rx_arg;
 520         rx_ring->rr_ill = ill;
 521 
 522         pri = mrfp->mrf_flow_priority;
 523 
 524         sq = ip_squeue_getfree(pri);
 525 
 526         mutex_enter(&sq->sq_lock);
 527         sq->sq_rx_ring = rx_ring;
 528         rx_ring->rr_sqp = sq;
 529 
 530         sq->sq_state |= SQS_POLL_CAPAB;
 531 
 532         rx_ring->rr_ring_state = RR_SQUEUE_UNBOUND;
 533         sq->sq_ill = ill;
 534         mutex_exit(&sq->sq_lock);
 535         mutex_exit(&ill->ill_lock);
 536 
 537         DTRACE_PROBE4(ill__ring__add, char *, ill->ill_name, ill_t *, ill, int,
 538             ip_rx_index, void *, mrfp->mrf_rx_arg);
 539 
 540         /* Assign the squeue to the specified CPU as well */
 541         mutex_enter(&cpu_lock);
 542         (void) ip_squeue_bind_ring(ill, rx_ring, mrfp->mrf_cpu_id);
 543         mutex_exit(&cpu_lock);
 544 
 545         return (rx_ring);
 546 }
 547 
 548 /*
 549  * sanitize the squeue etc. Some of the processing
 550  * needs to be done from inside the perimeter.
 551  */
 552 void
 553 ip_squeue_clean_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
 554 {
 555         squeue_t *sqp;
 556 
 557         ASSERT(ILL_MAC_PERIM_HELD(ill));
 558         ASSERT(rx_ring != NULL);
 559 
 560         /* Just clean one squeue */
 561         mutex_enter(&ill->ill_lock);
 562         if (rx_ring->rr_ring_state == RR_FREE) {
 563                 mutex_exit(&ill->ill_lock);
 564                 return;
 565         }
 566         rx_ring->rr_ring_state = RR_FREE_INPROG;
 567         sqp = rx_ring->rr_sqp;
 568 
 569         mutex_enter(&sqp->sq_lock);
 570         sqp->sq_state |= SQS_POLL_CLEANUP;
 571         cv_signal(&sqp->sq_worker_cv);
 572         mutex_exit(&ill->ill_lock);
 573         while (!(sqp->sq_state & SQS_POLL_CLEANUP_DONE))
 574                 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
 575         sqp->sq_state &= ~SQS_POLL_CLEANUP_DONE;
 576 
 577         ASSERT(!(sqp->sq_state & (SQS_POLL_THR_CONTROL |
 578             SQS_WORKER_THR_CONTROL | SQS_POLL_QUIESCE_DONE |
 579             SQS_POLL_THR_QUIESCED)));
 580 
 581         cv_signal(&sqp->sq_worker_cv);
 582         mutex_exit(&sqp->sq_lock);
 583 
 584         /*
 585          * Move the squeue to sqset_global_list[0] which holds the set of
 586          * squeues not bound to any cpu. Note that the squeue is still
 587          * considered bound to an ill as long as SQS_ILL_BOUND is set.
 588          */
 589         mutex_enter(&sqset_lock);
 590         ip_squeue_set_move(sqp, sqset_global_list[0]);
 591         mutex_exit(&sqset_lock);
 592 
 593         /*
 594          * CPU going offline can also trigger a move of the squeue to the
 595          * unbound set sqset_global_list[0]. However the squeue won't be
 596          * recycled for the next use as long as the SQS_ILL_BOUND flag
 597          * is set. Hence we clear the SQS_ILL_BOUND flag only towards the
 598          * end after the move.
 599          */
 600         mutex_enter(&sqp->sq_lock);
 601         sqp->sq_state &= ~SQS_ILL_BOUND;
 602         mutex_exit(&sqp->sq_lock);
 603 
 604         mutex_enter(&ill->ill_lock);
 605         rx_ring->rr_ring_state = RR_FREE;
 606         mutex_exit(&ill->ill_lock);
 607 }
 608 
 609 /*
 610  * Stop the squeue from polling. This needs to be done
 611  * from inside the perimeter.
 612  */
 613 void
 614 ip_squeue_quiesce_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
 615 {
 616         squeue_t *sqp;
 617 
 618         ASSERT(ILL_MAC_PERIM_HELD(ill));
 619         ASSERT(rx_ring != NULL);
 620 
 621         sqp = rx_ring->rr_sqp;
 622         mutex_enter(&sqp->sq_lock);
 623         sqp->sq_state |= SQS_POLL_QUIESCE;
 624         cv_signal(&sqp->sq_worker_cv);
 625         while (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE))
 626                 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
 627 
 628         mutex_exit(&sqp->sq_lock);
 629 }
 630 
 631 /*
 632  * Restart polling etc. Needs to be inside the perimeter to
 633  * prevent races.
 634  */
 635 void
 636 ip_squeue_restart_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
 637 {
 638         squeue_t *sqp;
 639 
 640         ASSERT(ILL_MAC_PERIM_HELD(ill));
 641         ASSERT(rx_ring != NULL);
 642 
 643         sqp = rx_ring->rr_sqp;
 644         mutex_enter(&sqp->sq_lock);
 645         /*
 646          * Handle change in number of rings between the quiesce and
 647          * restart operations by checking for a previous quiesce before
 648          * attempting a restart.
 649          */
 650         if (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) {
 651                 mutex_exit(&sqp->sq_lock);
 652                 return;
 653         }
 654         sqp->sq_state |= SQS_POLL_RESTART;
 655         cv_signal(&sqp->sq_worker_cv);
 656         while (!(sqp->sq_state & SQS_POLL_RESTART_DONE))
 657                 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
 658         sqp->sq_state &= ~SQS_POLL_RESTART_DONE;
 659         mutex_exit(&sqp->sq_lock);
 660 }
 661 
 662 /*
 663  * sanitize all squeues associated with the ill.
 664  */
 665 void
 666 ip_squeue_clean_all(ill_t *ill)
 667 {
 668         int idx;
 669         ill_rx_ring_t   *rx_ring;
 670 
 671         for (idx = 0; idx < ILL_MAX_RINGS; idx++) {
 672                 rx_ring = &ill->ill_dld_capab->idc_poll.idp_ring_tbl[idx];
 673                 ip_squeue_clean_ring(ill, rx_ring);
 674         }
 675 }
 676 
 677 /*
 678  * Used by IP to get the squeue associated with a ring. If the squeue isn't
 679  * yet bound to a CPU, and we're being called directly from the NIC's
 680  * interrupt, then we know what CPU we want to assign the squeue to, so
 681  * dispatch that task to a taskq.
 682  */
 683 squeue_t *
 684 ip_squeue_get(ill_rx_ring_t *ill_rx_ring)
 685 {
 686         squeue_t        *sqp;
 687 
 688         if ((ill_rx_ring == NULL) || ((sqp = ill_rx_ring->rr_sqp) == NULL))
 689                 return (IP_SQUEUE_GET(CPU_PSEUDO_RANDOM()));
 690 
 691         return (sqp);
 692 }
 693 
 694 /*
 695  * Called when a CPU goes offline. It's squeue_set_t is destroyed, and all
 696  * squeues are unboudn and moved to the unbound set.
 697  */
 698 static void
 699 ip_squeue_set_destroy(cpu_t *cpu)
 700 {
 701         int i;
 702         squeue_t *sqp, *lastsqp = NULL;
 703         squeue_set_t *sqs, *unbound = sqset_global_list[0];
 704 
 705         mutex_enter(&sqset_lock);
 706         if ((sqs = cpu->cpu_squeue_set) == NULL) {
 707                 mutex_exit(&sqset_lock);
 708                 return;
 709         }
 710 
 711         /* Move all squeues to unbound set */
 712 
 713         for (sqp = sqs->sqs_head; sqp; lastsqp = sqp, sqp = sqp->sq_next) {
 714                 squeue_unbind(sqp);
 715                 sqp->sq_set = unbound;
 716         }
 717         if (sqs->sqs_head) {
 718                 lastsqp->sq_next = unbound->sqs_head;
 719                 unbound->sqs_head = sqs->sqs_head;
 720         }
 721 
 722         /* Also move default squeue to unbound set */
 723 
 724         sqp = sqs->sqs_default;
 725         ASSERT(sqp != NULL);
 726         ASSERT((sqp->sq_state & (SQS_DEFAULT|SQS_ILL_BOUND)) == SQS_DEFAULT);
 727 
 728         sqp->sq_next = unbound->sqs_head;
 729         unbound->sqs_head = sqp;
 730         squeue_unbind(sqp);
 731         sqp->sq_set = unbound;
 732 
 733         for (i = 1; i < sqset_global_size; i++)
 734                 if (sqset_global_list[i] == sqs)
 735                         break;
 736 
 737         ASSERT(i < sqset_global_size);
 738         sqset_global_list[i] = sqset_global_list[sqset_global_size - 1];
 739         sqset_global_list[sqset_global_size - 1] = NULL;
 740         sqset_global_size--;
 741 
 742         mutex_exit(&sqset_lock);
 743         kmem_free(sqs, sizeof (*sqs));
 744 }
 745 
 746 /*
 747  * Reconfiguration callback
 748  */
 749 /* ARGSUSED */
 750 static int
 751 ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg)
 752 {
 753         cpu_t *cp = cpu_get(id);
 754 
 755         ASSERT(MUTEX_HELD(&cpu_lock));
 756         switch (what) {
 757         case CPU_CONFIG:
 758         case CPU_ON:
 759         case CPU_INIT:
 760         case CPU_CPUPART_IN:
 761                 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL)
 762                         cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
 763                 break;
 764         case CPU_UNCONFIG:
 765         case CPU_OFF:
 766         case CPU_CPUPART_OUT:
 767                 if (cp->cpu_squeue_set != NULL) {
 768                         ip_squeue_set_destroy(cp);
 769                         cp->cpu_squeue_set = NULL;
 770                 }
 771                 break;
 772         default:
 773                 break;
 774         }
 775         return (0);
 776 }