1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
27 */
28
29 /*
30 * IP interface to squeues.
31 *
32 * IP uses squeues to force serialization of packets, both incoming and
33 * outgoing. Each squeue is associated with a connection instance (conn_t)
34 * above, and a soft ring (if enabled) below. Each CPU will have a default
35 * squeue for outbound connections, and each soft ring of an interface will
36 * have an squeue to which it sends incoming packets. squeues are never
37 * destroyed, and if they become unused they are kept around against future
38 * needs.
39 *
40 * IP organizes its squeues using squeue sets (squeue_set_t). For each CPU
41 * in the system there will be one squeue set, all of whose squeues will be
42 * bound to that CPU, plus one additional set known as the unbound set. Sets
43 * associated with CPUs will have one default squeue, for outbound
44 * connections, and a linked list of squeues used by various NICs for inbound
45 * packets. The unbound set also has a linked list of squeues, but no default
46 * squeue.
47 *
48 * When a CPU goes offline its squeue set is destroyed, and all its squeues
49 * are moved to the unbound set. When a CPU comes online, a new squeue set is
50 * created and the default set is searched for a default squeue formerly bound
51 * to this CPU. If no default squeue is found, a new one is created.
52 *
53 * Two fields of the squeue_t, namely sq_next and sq_set, are owned by IP
54 * and not the squeue code. squeue.c will not touch them, and we can modify
55 * them without holding the squeue lock because of the guarantee that squeues
56 * are never destroyed. ip_squeue locks must be held, however.
57 *
58 * All the squeue sets are protected by a single lock, the sqset_lock. This
59 * is also used to protect the sq_next and sq_set fields of an squeue_t.
60 *
61 * The lock order is: cpu_lock --> ill_lock --> sqset_lock --> sq_lock
62 *
63 * There are two modes of associating connection with squeues. The first mode
64 * associates each connection with the CPU that creates the connection (either
65 * during open time or during accept time). The second mode associates each
66 * connection with a random CPU, effectively distributing load over all CPUs
67 * and all squeues in the system. The mode is controlled by the
68 * ip_squeue_fanout variable.
69 *
70 * NOTE: The fact that there is an association between each connection and
71 * squeue and squeue and CPU does not mean that each connection is always
72 * processed on this CPU and on this CPU only. Any thread calling squeue_enter()
73 * may process the connection on whatever CPU it is scheduled. The squeue to CPU
74 * binding is only relevant for the worker thread.
75 *
76 * INTERFACE:
77 *
78 * squeue_t *ip_squeue_get(ill_rx_ring_t)
79 *
80 * Returns the squeue associated with an ill receive ring. If the ring is
81 * not bound to a CPU, and we're currently servicing the interrupt which
82 * generated the packet, then bind the squeue to CPU.
83 *
84 *
85 * DR Notes
86 * ========
87 *
88 * The ip_squeue_init() registers a call-back function with the CPU DR
89 * subsystem using register_cpu_setup_func(). The call-back function does two
90 * things:
91 *
92 * o When the CPU is going off-line or unconfigured, the worker thread is
93 * unbound from the CPU. This allows the CPU unconfig code to move it to
94 * another CPU.
95 *
96 * o When the CPU is going online, it creates a new squeue for this CPU if
97 * necessary and binds the squeue worker thread to this CPU.
98 *
99 * TUNABLES:
100 *
101 * ip_squeue_fanout: used when TCP calls IP_SQUEUE_GET(). If 1, then
102 * pick the default squeue from a random CPU, otherwise use our CPU's default
103 * squeue.
104 *
105 * ip_squeue_fanout can be accessed and changed using ndd on /dev/tcp or
106 * /dev/ip.
107 *
108 * ip_squeue_worker_wait: global value for the sq_wait field for all squeues *
109 * created. This is the time squeue code waits before waking up the worker
110 * thread after queuing a request.
111 */
112
113 #include <sys/types.h>
114 #include <sys/debug.h>
115 #include <sys/kmem.h>
116 #include <sys/cpuvar.h>
117 #include <sys/cmn_err.h>
118
119 #include <inet/common.h>
120 #include <inet/ip.h>
121 #include <netinet/ip6.h>
122 #include <inet/ip_if.h>
123 #include <inet/ip_ire.h>
124 #include <inet/nd.h>
125 #include <inet/ipclassifier.h>
126 #include <sys/types.h>
127 #include <sys/conf.h>
128 #include <sys/sunddi.h>
129 #include <sys/dlpi.h>
130 #include <sys/squeue_impl.h>
131 #include <sys/tihdr.h>
132 #include <inet/udp_impl.h>
133 #include <sys/strsubr.h>
134 #include <sys/zone.h>
135 #include <sys/dld.h>
136 #include <sys/atomic.h>
137
138 /*
139 * List of all created squeue sets. The list and its size are protected by
140 * sqset_lock.
141 */
142 static squeue_set_t **sqset_global_list; /* list 0 is the unbound list */
143 static uint_t sqset_global_size;
144 kmutex_t sqset_lock;
145
146 static void (*ip_squeue_create_callback)(squeue_t *) = NULL;
147
148 /*
149 * ip_squeue_worker_wait: global value for the sq_wait field for all squeues
150 * created. This is the time squeue code waits before waking up the worker
151 * thread after queuing a request.
152 */
153 volatile uint_t ip_squeue_worker_wait = 10;
154
155 static squeue_t *ip_squeue_create(pri_t);
156 static squeue_set_t *ip_squeue_set_create(processorid_t);
157 static int ip_squeue_cpu_setup(cpu_setup_t, int, void *);
158 static void ip_squeue_set_move(squeue_t *, squeue_set_t *);
159 static void ip_squeue_set_destroy(cpu_t *);
160 static void ip_squeue_clean(void *, mblk_t *, void *);
161
162 #define CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS))
163
164 static squeue_t *
165 ip_squeue_create(pri_t pri)
166 {
167 squeue_t *sqp;
168
169 sqp = squeue_create(ip_squeue_worker_wait, pri);
170 ASSERT(sqp != NULL);
171 if (ip_squeue_create_callback != NULL)
172 ip_squeue_create_callback(sqp);
173 return (sqp);
174 }
175
176 /*
177 * Create a new squeue_set. If id == -1, then we're creating the unbound set,
178 * which should only happen once when we are first initialized. Otherwise id
179 * is the id of the CPU that needs a set, either because we are initializing
180 * or because the CPU has come online.
181 *
182 * If id != -1, then we need at a minimum to provide a default squeue for the
183 * new set. We search the unbound set for candidates, and if none are found we
184 * create a new one.
185 */
186 static squeue_set_t *
187 ip_squeue_set_create(processorid_t id)
188 {
189 squeue_set_t *sqs;
190 squeue_set_t *src = sqset_global_list[0];
191 squeue_t **lastsqp, *sq;
192 squeue_t **defaultq_lastp = NULL;
193
194 sqs = kmem_zalloc(sizeof (squeue_set_t), KM_SLEEP);
195 sqs->sqs_cpuid = id;
196
197 if (id == -1) {
198 ASSERT(sqset_global_size == 0);
199 sqset_global_list[0] = sqs;
200 sqset_global_size = 1;
201 return (sqs);
202 }
203
204 /*
205 * When we create an squeue set id != -1, we need to give it a
206 * default squeue, in order to support fanout of conns across
207 * CPUs. Try to find a former default squeue that matches this
208 * cpu id on the unbound squeue set. If no such squeue is found,
209 * find some non-default TCP squeue that is free. If still no such
210 * candidate is found, create a new squeue.
211 */
212
213 ASSERT(MUTEX_HELD(&cpu_lock));
214 mutex_enter(&sqset_lock);
215 lastsqp = &src->sqs_head;
216
217 while (*lastsqp) {
218 if ((*lastsqp)->sq_bind == id &&
219 (*lastsqp)->sq_state & SQS_DEFAULT) {
220 /*
221 * Exact match. Former default squeue of cpu 'id'
222 */
223 ASSERT(!((*lastsqp)->sq_state & SQS_ILL_BOUND));
224 defaultq_lastp = lastsqp;
225 break;
226 }
227 if (defaultq_lastp == NULL &&
228 !((*lastsqp)->sq_state & (SQS_ILL_BOUND | SQS_DEFAULT))) {
229 /*
230 * A free non-default TCP squeue
231 */
232 defaultq_lastp = lastsqp;
233 }
234 lastsqp = &(*lastsqp)->sq_next;
235 }
236
237 if (defaultq_lastp != NULL) {
238 /* Remove from src set and set SQS_DEFAULT */
239 sq = *defaultq_lastp;
240 *defaultq_lastp = sq->sq_next;
241 sq->sq_next = NULL;
242 if (!(sq->sq_state & SQS_DEFAULT)) {
243 mutex_enter(&sq->sq_lock);
244 sq->sq_state |= SQS_DEFAULT;
245 mutex_exit(&sq->sq_lock);
246 }
247 } else {
248 sq = ip_squeue_create(SQUEUE_DEFAULT_PRIORITY);
249 sq->sq_state |= SQS_DEFAULT;
250 }
251
252 sq->sq_set = sqs;
253 sqs->sqs_default = sq;
254 squeue_bind(sq, id); /* this locks squeue mutex */
255
256 ASSERT(sqset_global_size <= NCPU);
257 sqset_global_list[sqset_global_size++] = sqs;
258 mutex_exit(&sqset_lock);
259 return (sqs);
260 }
261
262 /*
263 * Called by ill_ring_add() to find an squeue to associate with a new ring.
264 */
265
266 squeue_t *
267 ip_squeue_getfree(pri_t pri)
268 {
269 squeue_set_t *sqs = sqset_global_list[0];
270 squeue_t *sq;
271
272 mutex_enter(&sqset_lock);
273 for (sq = sqs->sqs_head; sq != NULL; sq = sq->sq_next) {
274 /*
275 * Select a non-default TCP squeue that is free i.e. not
276 * bound to any ill.
277 */
278 if (!(sq->sq_state & (SQS_DEFAULT | SQS_ILL_BOUND)))
279 break;
280 }
281
282 if (sq == NULL) {
283 sq = ip_squeue_create(pri);
284 sq->sq_set = sqs;
285 sq->sq_next = sqs->sqs_head;
286 sqs->sqs_head = sq;
287 }
288
289 ASSERT(!(sq->sq_state & (SQS_POLL_THR_CONTROL | SQS_WORKER_THR_CONTROL |
290 SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE |
291 SQS_POLL_THR_QUIESCED)));
292
293 mutex_enter(&sq->sq_lock);
294 sq->sq_state |= SQS_ILL_BOUND;
295 mutex_exit(&sq->sq_lock);
296 mutex_exit(&sqset_lock);
297
298 if (sq->sq_priority != pri) {
299 thread_lock(sq->sq_worker);
300 (void) thread_change_pri(sq->sq_worker, pri, 0);
301 thread_unlock(sq->sq_worker);
302
303 thread_lock(sq->sq_poll_thr);
304 (void) thread_change_pri(sq->sq_poll_thr, pri, 0);
305 thread_unlock(sq->sq_poll_thr);
306
307 sq->sq_priority = pri;
308 }
309 return (sq);
310 }
311
312 /*
313 * Initialize IP squeues.
314 */
315 void
316 ip_squeue_init(void (*callback)(squeue_t *))
317 {
318 int i;
319 squeue_set_t *sqs;
320
321 ASSERT(sqset_global_list == NULL);
322
323 ip_squeue_create_callback = callback;
324 squeue_init();
325 mutex_init(&sqset_lock, NULL, MUTEX_DEFAULT, NULL);
326 sqset_global_list =
327 kmem_zalloc(sizeof (squeue_set_t *) * (NCPU+1), KM_SLEEP);
328 sqset_global_size = 0;
329 /*
330 * We are called at system boot time and we don't
331 * expect memory allocation failure.
332 */
333 sqs = ip_squeue_set_create(-1);
334 ASSERT(sqs != NULL);
335
336 mutex_enter(&cpu_lock);
337 /* Create squeue for each active CPU available */
338 for (i = 0; i < NCPU; i++) {
339 cpu_t *cp = cpu_get(i);
340 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) {
341 /*
342 * We are called at system boot time and we don't
343 * expect memory allocation failure then
344 */
345 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
346 ASSERT(cp->cpu_squeue_set != NULL);
347 }
348 }
349
350 register_cpu_setup_func(ip_squeue_cpu_setup, NULL);
351 mutex_exit(&cpu_lock);
352 }
353
354 /*
355 * Get a default squeue, either from the current CPU or a CPU derived by hash
356 * from the index argument, depending upon the setting of ip_squeue_fanout.
357 */
358 squeue_t *
359 ip_squeue_random(uint_t index)
360 {
361 squeue_set_t *sqs = NULL;
362 squeue_t *sq;
363
364 /*
365 * The minimum value of sqset_global_size is 2, one for the unbound
366 * squeue set and another for the squeue set of the zeroth CPU.
367 * Even though the value could be changing, it can never go below 2,
368 * so the assert does not need the lock protection.
369 */
370 ASSERT(sqset_global_size > 1);
371
372 /* Protect against changes to sqset_global_list */
373 mutex_enter(&sqset_lock);
374
375 if (!ip_squeue_fanout)
376 sqs = CPU->cpu_squeue_set;
377
378 /*
379 * sqset_global_list[0] corresponds to the unbound squeue set.
380 * The computation below picks a set other than the unbound set.
381 */
382 if (sqs == NULL)
383 sqs = sqset_global_list[(index % (sqset_global_size - 1)) + 1];
384 sq = sqs->sqs_default;
385
386 mutex_exit(&sqset_lock);
387 ASSERT(sq);
388 return (sq);
389 }
390
391 /*
392 * Move squeue from its current set to newset. Not used for default squeues.
393 * Bind or unbind the worker thread as appropriate.
394 */
395
396 static void
397 ip_squeue_set_move(squeue_t *sq, squeue_set_t *newset)
398 {
399 squeue_set_t *set;
400 squeue_t **lastsqp;
401 processorid_t cpuid = newset->sqs_cpuid;
402
403 ASSERT(!(sq->sq_state & SQS_DEFAULT));
404 ASSERT(!MUTEX_HELD(&sq->sq_lock));
405 ASSERT(MUTEX_HELD(&sqset_lock));
406
407 set = sq->sq_set;
408 if (set == newset)
409 return;
410
411 lastsqp = &set->sqs_head;
412 while (*lastsqp != sq)
413 lastsqp = &(*lastsqp)->sq_next;
414
415 *lastsqp = sq->sq_next;
416 sq->sq_next = newset->sqs_head;
417 newset->sqs_head = sq;
418 sq->sq_set = newset;
419 if (cpuid == -1)
420 squeue_unbind(sq);
421 else
422 squeue_bind(sq, cpuid);
423 }
424
425 /*
426 * Move squeue from its current set to cpuid's set and bind to cpuid.
427 */
428
429 int
430 ip_squeue_cpu_move(squeue_t *sq, processorid_t cpuid)
431 {
432 cpu_t *cpu;
433 squeue_set_t *set;
434
435 if (sq->sq_state & SQS_DEFAULT)
436 return (-1);
437
438 ASSERT(MUTEX_HELD(&cpu_lock));
439
440 cpu = cpu_get(cpuid);
441 if (!CPU_ISON(cpu))
442 return (-1);
443
444 mutex_enter(&sqset_lock);
445 set = cpu->cpu_squeue_set;
446 if (set != NULL)
447 ip_squeue_set_move(sq, set);
448 mutex_exit(&sqset_lock);
449 return ((set == NULL) ? -1 : 0);
450 }
451
452 /*
453 * The mac layer is calling, asking us to move an squeue to a
454 * new CPU. This routine is called with cpu_lock held.
455 */
456 void
457 ip_squeue_bind_ring(ill_t *ill, ill_rx_ring_t *rx_ring, processorid_t cpuid)
458 {
459 ASSERT(ILL_MAC_PERIM_HELD(ill));
460 ASSERT(rx_ring->rr_ill == ill);
461
462 mutex_enter(&ill->ill_lock);
463 if (rx_ring->rr_ring_state == RR_FREE ||
464 rx_ring->rr_ring_state == RR_FREE_INPROG) {
465 mutex_exit(&ill->ill_lock);
466 return;
467 }
468
469 if (ip_squeue_cpu_move(rx_ring->rr_sqp, cpuid) != -1)
470 rx_ring->rr_ring_state = RR_SQUEUE_BOUND;
471
472 mutex_exit(&ill->ill_lock);
473 }
474
475 void *
476 ip_squeue_add_ring(ill_t *ill, void *mrp)
477 {
478 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp;
479 ill_rx_ring_t *rx_ring, *ring_tbl;
480 int ip_rx_index;
481 squeue_t *sq = NULL;
482 pri_t pri;
483
484 ASSERT(ILL_MAC_PERIM_HELD(ill));
485 ASSERT(mrfp->mrf_type == MAC_RX_FIFO);
486 ASSERT(ill->ill_dld_capab != NULL);
487
488 ring_tbl = ill->ill_dld_capab->idc_poll.idp_ring_tbl;
489
490 mutex_enter(&ill->ill_lock);
491 for (ip_rx_index = 0; ip_rx_index < ILL_MAX_RINGS; ip_rx_index++) {
492 rx_ring = &ring_tbl[ip_rx_index];
493 if (rx_ring->rr_ring_state == RR_FREE)
494 break;
495 }
496
497 if (ip_rx_index == ILL_MAX_RINGS) {
498 /*
499 * We ran out of ILL_MAX_RINGS worth rx_ring structures. If
500 * we have devices which can overwhelm this limit,
501 * ILL_MAX_RING should be made configurable. Meanwhile it
502 * cause no panic because driver will pass ip_input a NULL
503 * handle which will make IP allocate the default squeue and
504 * Polling mode will not be used for this ring.
505 */
506 cmn_err(CE_NOTE,
507 "Reached maximum number of receiving rings (%d) for %s\n",
508 ILL_MAX_RINGS, ill->ill_name);
509 mutex_exit(&ill->ill_lock);
510 return (NULL);
511 }
512
513 bzero(rx_ring, sizeof (ill_rx_ring_t));
514 rx_ring->rr_rx = (ip_mac_rx_t)mrfp->mrf_receive;
515 /* XXX: Hard code it to tcp accept for now */
516 rx_ring->rr_ip_accept = (ip_accept_t)ip_accept_tcp;
517
518 rx_ring->rr_intr_handle = mrfp->mrf_intr_handle;
519 rx_ring->rr_intr_enable = (ip_mac_intr_enable_t)mrfp->mrf_intr_enable;
520 rx_ring->rr_intr_disable =
521 (ip_mac_intr_disable_t)mrfp->mrf_intr_disable;
522 rx_ring->rr_rx_handle = mrfp->mrf_rx_arg;
523 rx_ring->rr_ill = ill;
524
525 pri = mrfp->mrf_flow_priority;
526
527 sq = ip_squeue_getfree(pri);
528
529 mutex_enter(&sq->sq_lock);
530 sq->sq_rx_ring = rx_ring;
531 rx_ring->rr_sqp = sq;
532
533 sq->sq_state |= SQS_POLL_CAPAB;
534
535 rx_ring->rr_ring_state = RR_SQUEUE_UNBOUND;
536 sq->sq_ill = ill;
537 mutex_exit(&sq->sq_lock);
538 mutex_exit(&ill->ill_lock);
539
540 DTRACE_PROBE4(ill__ring__add, char *, ill->ill_name, ill_t *, ill, int,
541 ip_rx_index, void *, mrfp->mrf_rx_arg);
542
543 /* Assign the squeue to the specified CPU as well */
544 mutex_enter(&cpu_lock);
545 (void) ip_squeue_bind_ring(ill, rx_ring, mrfp->mrf_cpu_id);
546 mutex_exit(&cpu_lock);
547
548 return (rx_ring);
549 }
550
551 /*
552 * sanitize the squeue etc. Some of the processing
553 * needs to be done from inside the perimeter.
554 */
555 void
556 ip_squeue_clean_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
557 {
558 squeue_t *sqp;
559
560 ASSERT(ILL_MAC_PERIM_HELD(ill));
561 ASSERT(rx_ring != NULL);
562
563 /* Just clean one squeue */
564 mutex_enter(&ill->ill_lock);
565 if (rx_ring->rr_ring_state == RR_FREE) {
566 mutex_exit(&ill->ill_lock);
567 return;
568 }
569 rx_ring->rr_ring_state = RR_FREE_INPROG;
570 sqp = rx_ring->rr_sqp;
571
572 mutex_enter(&sqp->sq_lock);
573 sqp->sq_state |= SQS_POLL_CLEANUP;
574 cv_signal(&sqp->sq_worker_cv);
575 mutex_exit(&ill->ill_lock);
576 while (!(sqp->sq_state & SQS_POLL_CLEANUP_DONE))
577 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
578 sqp->sq_state &= ~SQS_POLL_CLEANUP_DONE;
579
580 ASSERT(!(sqp->sq_state & (SQS_POLL_THR_CONTROL |
581 SQS_WORKER_THR_CONTROL | SQS_POLL_QUIESCE_DONE |
582 SQS_POLL_THR_QUIESCED)));
583
584 cv_signal(&sqp->sq_worker_cv);
585 mutex_exit(&sqp->sq_lock);
586
587 /*
588 * Move the squeue to sqset_global_list[0] which holds the set of
589 * squeues not bound to any cpu. Note that the squeue is still
590 * considered bound to an ill as long as SQS_ILL_BOUND is set.
591 */
592 mutex_enter(&sqset_lock);
593 ip_squeue_set_move(sqp, sqset_global_list[0]);
594 mutex_exit(&sqset_lock);
595
596 /*
597 * CPU going offline can also trigger a move of the squeue to the
598 * unbound set sqset_global_list[0]. However the squeue won't be
599 * recycled for the next use as long as the SQS_ILL_BOUND flag
600 * is set. Hence we clear the SQS_ILL_BOUND flag only towards the
601 * end after the move.
602 */
603 mutex_enter(&sqp->sq_lock);
604 sqp->sq_state &= ~SQS_ILL_BOUND;
605 mutex_exit(&sqp->sq_lock);
606
607 mutex_enter(&ill->ill_lock);
608 rx_ring->rr_ring_state = RR_FREE;
609 mutex_exit(&ill->ill_lock);
610 }
611
612 /*
613 * Stop the squeue from polling. This needs to be done
614 * from inside the perimeter.
615 */
616 void
617 ip_squeue_quiesce_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
618 {
619 squeue_t *sqp;
620
621 ASSERT(ILL_MAC_PERIM_HELD(ill));
622 ASSERT(rx_ring != NULL);
623
624 sqp = rx_ring->rr_sqp;
625 mutex_enter(&sqp->sq_lock);
626 sqp->sq_state |= SQS_POLL_QUIESCE;
627 cv_signal(&sqp->sq_worker_cv);
628 while (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE))
629 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
630
631 mutex_exit(&sqp->sq_lock);
632 }
633
634 /*
635 * Restart polling etc. Needs to be inside the perimeter to
636 * prevent races.
637 */
638 void
639 ip_squeue_restart_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
640 {
641 squeue_t *sqp;
642
643 ASSERT(ILL_MAC_PERIM_HELD(ill));
644 ASSERT(rx_ring != NULL);
645
646 sqp = rx_ring->rr_sqp;
647 mutex_enter(&sqp->sq_lock);
648 /*
649 * Handle change in number of rings between the quiesce and
650 * restart operations by checking for a previous quiesce before
651 * attempting a restart.
652 */
653 if (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) {
654 mutex_exit(&sqp->sq_lock);
655 return;
656 }
657 sqp->sq_state |= SQS_POLL_RESTART;
658 cv_signal(&sqp->sq_worker_cv);
659 while (!(sqp->sq_state & SQS_POLL_RESTART_DONE))
660 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
661 sqp->sq_state &= ~SQS_POLL_RESTART_DONE;
662 mutex_exit(&sqp->sq_lock);
663 }
664
665 /*
666 * sanitize all squeues associated with the ill.
667 */
668 void
669 ip_squeue_clean_all(ill_t *ill)
670 {
671 int idx;
672 ill_rx_ring_t *rx_ring;
673
674 for (idx = 0; idx < ILL_MAX_RINGS; idx++) {
675 rx_ring = &ill->ill_dld_capab->idc_poll.idp_ring_tbl[idx];
676 ip_squeue_clean_ring(ill, rx_ring);
677 }
678 }
679
680 /*
681 * Used by IP to get the squeue associated with a ring. If the squeue isn't
682 * yet bound to a CPU, and we're being called directly from the NIC's
683 * interrupt, then we know what CPU we want to assign the squeue to, so
684 * dispatch that task to a taskq.
685 */
686 squeue_t *
687 ip_squeue_get(ill_rx_ring_t *ill_rx_ring)
688 {
689 squeue_t *sqp;
690
691 if ((ill_rx_ring == NULL) || ((sqp = ill_rx_ring->rr_sqp) == NULL))
692 return (IP_SQUEUE_GET(CPU_PSEUDO_RANDOM()));
693
694 return (sqp);
695 }
696
697 /*
698 * Called when a CPU goes offline. It's squeue_set_t is destroyed, and all
699 * squeues are unboudn and moved to the unbound set.
700 */
701 static void
702 ip_squeue_set_destroy(cpu_t *cpu)
703 {
704 int i;
705 squeue_t *sqp, *lastsqp = NULL;
706 squeue_set_t *sqs, *unbound = sqset_global_list[0];
707
708 mutex_enter(&sqset_lock);
709 if ((sqs = cpu->cpu_squeue_set) == NULL) {
710 mutex_exit(&sqset_lock);
711 return;
712 }
713
714 /* Move all squeues to unbound set */
715
716 for (sqp = sqs->sqs_head; sqp; lastsqp = sqp, sqp = sqp->sq_next) {
717 squeue_unbind(sqp);
718 sqp->sq_set = unbound;
719 }
720 if (sqs->sqs_head) {
721 lastsqp->sq_next = unbound->sqs_head;
722 unbound->sqs_head = sqs->sqs_head;
723 }
724
725 /* Also move default squeue to unbound set */
726
727 sqp = sqs->sqs_default;
728 ASSERT(sqp != NULL);
729 ASSERT((sqp->sq_state & (SQS_DEFAULT|SQS_ILL_BOUND)) == SQS_DEFAULT);
730
731 sqp->sq_next = unbound->sqs_head;
732 unbound->sqs_head = sqp;
733 squeue_unbind(sqp);
734 sqp->sq_set = unbound;
735
736 for (i = 1; i < sqset_global_size; i++)
737 if (sqset_global_list[i] == sqs)
738 break;
739
740 ASSERT(i < sqset_global_size);
741 sqset_global_list[i] = sqset_global_list[sqset_global_size - 1];
742 sqset_global_list[sqset_global_size - 1] = NULL;
743 sqset_global_size--;
744
745 mutex_exit(&sqset_lock);
746 kmem_free(sqs, sizeof (*sqs));
747 }
748
749 /*
750 * Reconfiguration callback
751 */
752 /* ARGSUSED */
753 static int
754 ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg)
755 {
756 cpu_t *cp = cpu_get(id);
757
758 ASSERT(MUTEX_HELD(&cpu_lock));
759 switch (what) {
760 case CPU_CONFIG:
761 case CPU_ON:
762 case CPU_INIT:
763 case CPU_CPUPART_IN:
764 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL)
765 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
766 break;
767 case CPU_UNCONFIG:
768 case CPU_OFF:
769 case CPU_CPUPART_OUT:
770 if (cp->cpu_squeue_set != NULL) {
771 ip_squeue_set_destroy(cp);
772 cp->cpu_squeue_set = NULL;
773 }
774 break;
775 default:
776 break;
777 }
778 return (0);
779 }