Print this page
re #13613 rb4516 Tunables needs volatile keyword
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/ip/ip_squeue.c
+++ new/usr/src/uts/common/inet/ip/ip_squeue.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 +/*
26 + * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
27 + */
25 28
26 29 /*
27 30 * IP interface to squeues.
28 31 *
29 32 * IP uses squeues to force serialization of packets, both incoming and
30 33 * outgoing. Each squeue is associated with a connection instance (conn_t)
31 34 * above, and a soft ring (if enabled) below. Each CPU will have a default
32 35 * squeue for outbound connections, and each soft ring of an interface will
33 36 * have an squeue to which it sends incoming packets. squeues are never
34 37 * destroyed, and if they become unused they are kept around against future
35 38 * needs.
36 39 *
37 40 * IP organizes its squeues using squeue sets (squeue_set_t). For each CPU
38 41 * in the system there will be one squeue set, all of whose squeues will be
39 42 * bound to that CPU, plus one additional set known as the unbound set. Sets
40 43 * associated with CPUs will have one default squeue, for outbound
41 44 * connections, and a linked list of squeues used by various NICs for inbound
42 45 * packets. The unbound set also has a linked list of squeues, but no default
43 46 * squeue.
44 47 *
45 48 * When a CPU goes offline its squeue set is destroyed, and all its squeues
46 49 * are moved to the unbound set. When a CPU comes online, a new squeue set is
47 50 * created and the default set is searched for a default squeue formerly bound
48 51 * to this CPU. If no default squeue is found, a new one is created.
49 52 *
50 53 * Two fields of the squeue_t, namely sq_next and sq_set, are owned by IP
51 54 * and not the squeue code. squeue.c will not touch them, and we can modify
52 55 * them without holding the squeue lock because of the guarantee that squeues
53 56 * are never destroyed. ip_squeue locks must be held, however.
54 57 *
55 58 * All the squeue sets are protected by a single lock, the sqset_lock. This
56 59 * is also used to protect the sq_next and sq_set fields of an squeue_t.
57 60 *
58 61 * The lock order is: cpu_lock --> ill_lock --> sqset_lock --> sq_lock
59 62 *
60 63 * There are two modes of associating connection with squeues. The first mode
61 64 * associates each connection with the CPU that creates the connection (either
62 65 * during open time or during accept time). The second mode associates each
63 66 * connection with a random CPU, effectively distributing load over all CPUs
64 67 * and all squeues in the system. The mode is controlled by the
65 68 * ip_squeue_fanout variable.
66 69 *
67 70 * NOTE: The fact that there is an association between each connection and
68 71 * squeue and squeue and CPU does not mean that each connection is always
69 72 * processed on this CPU and on this CPU only. Any thread calling squeue_enter()
70 73 * may process the connection on whatever CPU it is scheduled. The squeue to CPU
71 74 * binding is only relevant for the worker thread.
72 75 *
73 76 * INTERFACE:
74 77 *
75 78 * squeue_t *ip_squeue_get(ill_rx_ring_t)
76 79 *
77 80 * Returns the squeue associated with an ill receive ring. If the ring is
78 81 * not bound to a CPU, and we're currently servicing the interrupt which
79 82 * generated the packet, then bind the squeue to CPU.
80 83 *
81 84 *
82 85 * DR Notes
83 86 * ========
84 87 *
85 88 * The ip_squeue_init() registers a call-back function with the CPU DR
86 89 * subsystem using register_cpu_setup_func(). The call-back function does two
87 90 * things:
88 91 *
89 92 * o When the CPU is going off-line or unconfigured, the worker thread is
90 93 * unbound from the CPU. This allows the CPU unconfig code to move it to
91 94 * another CPU.
92 95 *
93 96 * o When the CPU is going online, it creates a new squeue for this CPU if
94 97 * necessary and binds the squeue worker thread to this CPU.
95 98 *
96 99 * TUNABLES:
97 100 *
98 101 * ip_squeue_fanout: used when TCP calls IP_SQUEUE_GET(). If 1, then
99 102 * pick the default squeue from a random CPU, otherwise use our CPU's default
100 103 * squeue.
101 104 *
102 105 * ip_squeue_fanout can be accessed and changed using ndd on /dev/tcp or
103 106 * /dev/ip.
104 107 *
105 108 * ip_squeue_worker_wait: global value for the sq_wait field for all squeues *
106 109 * created. This is the time squeue code waits before waking up the worker
107 110 * thread after queuing a request.
108 111 */
109 112
110 113 #include <sys/types.h>
111 114 #include <sys/debug.h>
112 115 #include <sys/kmem.h>
113 116 #include <sys/cpuvar.h>
114 117 #include <sys/cmn_err.h>
115 118
116 119 #include <inet/common.h>
117 120 #include <inet/ip.h>
118 121 #include <netinet/ip6.h>
119 122 #include <inet/ip_if.h>
120 123 #include <inet/ip_ire.h>
121 124 #include <inet/nd.h>
122 125 #include <inet/ipclassifier.h>
123 126 #include <sys/types.h>
124 127 #include <sys/conf.h>
125 128 #include <sys/sunddi.h>
126 129 #include <sys/dlpi.h>
127 130 #include <sys/squeue_impl.h>
128 131 #include <sys/tihdr.h>
129 132 #include <inet/udp_impl.h>
130 133 #include <sys/strsubr.h>
131 134 #include <sys/zone.h>
132 135 #include <sys/dld.h>
133 136 #include <sys/atomic.h>
134 137
135 138 /*
136 139 * List of all created squeue sets. The list and its size are protected by
137 140 * sqset_lock.
138 141 */
139 142 static squeue_set_t **sqset_global_list; /* list 0 is the unbound list */
|
↓ open down ↓ |
105 lines elided |
↑ open up ↑ |
140 143 static uint_t sqset_global_size;
141 144 kmutex_t sqset_lock;
142 145
143 146 static void (*ip_squeue_create_callback)(squeue_t *) = NULL;
144 147
145 148 /*
146 149 * ip_squeue_worker_wait: global value for the sq_wait field for all squeues
147 150 * created. This is the time squeue code waits before waking up the worker
148 151 * thread after queuing a request.
149 152 */
150 -uint_t ip_squeue_worker_wait = 10;
153 +volatile uint_t ip_squeue_worker_wait = 10;
151 154
152 155 static squeue_t *ip_squeue_create(pri_t);
153 156 static squeue_set_t *ip_squeue_set_create(processorid_t);
154 157 static int ip_squeue_cpu_setup(cpu_setup_t, int, void *);
155 158 static void ip_squeue_set_move(squeue_t *, squeue_set_t *);
156 159 static void ip_squeue_set_destroy(cpu_t *);
157 160 static void ip_squeue_clean(void *, mblk_t *, void *);
158 161
159 162 #define CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS))
160 163
161 164 static squeue_t *
162 165 ip_squeue_create(pri_t pri)
163 166 {
164 167 squeue_t *sqp;
165 168
166 169 sqp = squeue_create(ip_squeue_worker_wait, pri);
167 170 ASSERT(sqp != NULL);
168 171 if (ip_squeue_create_callback != NULL)
169 172 ip_squeue_create_callback(sqp);
170 173 return (sqp);
171 174 }
172 175
173 176 /*
174 177 * Create a new squeue_set. If id == -1, then we're creating the unbound set,
175 178 * which should only happen once when we are first initialized. Otherwise id
176 179 * is the id of the CPU that needs a set, either because we are initializing
177 180 * or because the CPU has come online.
178 181 *
179 182 * If id != -1, then we need at a minimum to provide a default squeue for the
180 183 * new set. We search the unbound set for candidates, and if none are found we
181 184 * create a new one.
182 185 */
183 186 static squeue_set_t *
184 187 ip_squeue_set_create(processorid_t id)
185 188 {
186 189 squeue_set_t *sqs;
187 190 squeue_set_t *src = sqset_global_list[0];
188 191 squeue_t **lastsqp, *sq;
189 192 squeue_t **defaultq_lastp = NULL;
190 193
191 194 sqs = kmem_zalloc(sizeof (squeue_set_t), KM_SLEEP);
192 195 sqs->sqs_cpuid = id;
193 196
194 197 if (id == -1) {
195 198 ASSERT(sqset_global_size == 0);
196 199 sqset_global_list[0] = sqs;
197 200 sqset_global_size = 1;
198 201 return (sqs);
199 202 }
200 203
201 204 /*
202 205 * When we create an squeue set id != -1, we need to give it a
203 206 * default squeue, in order to support fanout of conns across
204 207 * CPUs. Try to find a former default squeue that matches this
205 208 * cpu id on the unbound squeue set. If no such squeue is found,
206 209 * find some non-default TCP squeue that is free. If still no such
207 210 * candidate is found, create a new squeue.
208 211 */
209 212
210 213 ASSERT(MUTEX_HELD(&cpu_lock));
211 214 mutex_enter(&sqset_lock);
212 215 lastsqp = &src->sqs_head;
213 216
214 217 while (*lastsqp) {
215 218 if ((*lastsqp)->sq_bind == id &&
216 219 (*lastsqp)->sq_state & SQS_DEFAULT) {
217 220 /*
218 221 * Exact match. Former default squeue of cpu 'id'
219 222 */
220 223 ASSERT(!((*lastsqp)->sq_state & SQS_ILL_BOUND));
221 224 defaultq_lastp = lastsqp;
222 225 break;
223 226 }
224 227 if (defaultq_lastp == NULL &&
225 228 !((*lastsqp)->sq_state & (SQS_ILL_BOUND | SQS_DEFAULT))) {
226 229 /*
227 230 * A free non-default TCP squeue
228 231 */
229 232 defaultq_lastp = lastsqp;
230 233 }
231 234 lastsqp = &(*lastsqp)->sq_next;
232 235 }
233 236
234 237 if (defaultq_lastp != NULL) {
235 238 /* Remove from src set and set SQS_DEFAULT */
236 239 sq = *defaultq_lastp;
237 240 *defaultq_lastp = sq->sq_next;
238 241 sq->sq_next = NULL;
239 242 if (!(sq->sq_state & SQS_DEFAULT)) {
240 243 mutex_enter(&sq->sq_lock);
241 244 sq->sq_state |= SQS_DEFAULT;
242 245 mutex_exit(&sq->sq_lock);
243 246 }
244 247 } else {
245 248 sq = ip_squeue_create(SQUEUE_DEFAULT_PRIORITY);
246 249 sq->sq_state |= SQS_DEFAULT;
247 250 }
248 251
249 252 sq->sq_set = sqs;
250 253 sqs->sqs_default = sq;
251 254 squeue_bind(sq, id); /* this locks squeue mutex */
252 255
253 256 ASSERT(sqset_global_size <= NCPU);
254 257 sqset_global_list[sqset_global_size++] = sqs;
255 258 mutex_exit(&sqset_lock);
256 259 return (sqs);
257 260 }
258 261
259 262 /*
260 263 * Called by ill_ring_add() to find an squeue to associate with a new ring.
261 264 */
262 265
263 266 squeue_t *
264 267 ip_squeue_getfree(pri_t pri)
265 268 {
266 269 squeue_set_t *sqs = sqset_global_list[0];
267 270 squeue_t *sq;
268 271
269 272 mutex_enter(&sqset_lock);
270 273 for (sq = sqs->sqs_head; sq != NULL; sq = sq->sq_next) {
271 274 /*
272 275 * Select a non-default TCP squeue that is free i.e. not
273 276 * bound to any ill.
274 277 */
275 278 if (!(sq->sq_state & (SQS_DEFAULT | SQS_ILL_BOUND)))
276 279 break;
277 280 }
278 281
279 282 if (sq == NULL) {
280 283 sq = ip_squeue_create(pri);
281 284 sq->sq_set = sqs;
282 285 sq->sq_next = sqs->sqs_head;
283 286 sqs->sqs_head = sq;
284 287 }
285 288
286 289 ASSERT(!(sq->sq_state & (SQS_POLL_THR_CONTROL | SQS_WORKER_THR_CONTROL |
287 290 SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE |
288 291 SQS_POLL_THR_QUIESCED)));
289 292
290 293 mutex_enter(&sq->sq_lock);
291 294 sq->sq_state |= SQS_ILL_BOUND;
292 295 mutex_exit(&sq->sq_lock);
293 296 mutex_exit(&sqset_lock);
294 297
295 298 if (sq->sq_priority != pri) {
296 299 thread_lock(sq->sq_worker);
297 300 (void) thread_change_pri(sq->sq_worker, pri, 0);
298 301 thread_unlock(sq->sq_worker);
299 302
300 303 thread_lock(sq->sq_poll_thr);
301 304 (void) thread_change_pri(sq->sq_poll_thr, pri, 0);
302 305 thread_unlock(sq->sq_poll_thr);
303 306
304 307 sq->sq_priority = pri;
305 308 }
306 309 return (sq);
307 310 }
308 311
309 312 /*
310 313 * Initialize IP squeues.
311 314 */
312 315 void
313 316 ip_squeue_init(void (*callback)(squeue_t *))
314 317 {
315 318 int i;
316 319 squeue_set_t *sqs;
317 320
318 321 ASSERT(sqset_global_list == NULL);
319 322
320 323 ip_squeue_create_callback = callback;
321 324 squeue_init();
322 325 mutex_init(&sqset_lock, NULL, MUTEX_DEFAULT, NULL);
323 326 sqset_global_list =
324 327 kmem_zalloc(sizeof (squeue_set_t *) * (NCPU+1), KM_SLEEP);
325 328 sqset_global_size = 0;
326 329 /*
327 330 * We are called at system boot time and we don't
328 331 * expect memory allocation failure.
329 332 */
330 333 sqs = ip_squeue_set_create(-1);
331 334 ASSERT(sqs != NULL);
332 335
333 336 mutex_enter(&cpu_lock);
334 337 /* Create squeue for each active CPU available */
335 338 for (i = 0; i < NCPU; i++) {
336 339 cpu_t *cp = cpu_get(i);
337 340 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) {
338 341 /*
339 342 * We are called at system boot time and we don't
340 343 * expect memory allocation failure then
341 344 */
342 345 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
343 346 ASSERT(cp->cpu_squeue_set != NULL);
344 347 }
345 348 }
346 349
347 350 register_cpu_setup_func(ip_squeue_cpu_setup, NULL);
348 351 mutex_exit(&cpu_lock);
349 352 }
350 353
351 354 /*
352 355 * Get a default squeue, either from the current CPU or a CPU derived by hash
353 356 * from the index argument, depending upon the setting of ip_squeue_fanout.
354 357 */
355 358 squeue_t *
356 359 ip_squeue_random(uint_t index)
357 360 {
358 361 squeue_set_t *sqs = NULL;
359 362 squeue_t *sq;
360 363
361 364 /*
362 365 * The minimum value of sqset_global_size is 2, one for the unbound
363 366 * squeue set and another for the squeue set of the zeroth CPU.
364 367 * Even though the value could be changing, it can never go below 2,
365 368 * so the assert does not need the lock protection.
366 369 */
367 370 ASSERT(sqset_global_size > 1);
368 371
369 372 /* Protect against changes to sqset_global_list */
370 373 mutex_enter(&sqset_lock);
371 374
372 375 if (!ip_squeue_fanout)
373 376 sqs = CPU->cpu_squeue_set;
374 377
375 378 /*
376 379 * sqset_global_list[0] corresponds to the unbound squeue set.
377 380 * The computation below picks a set other than the unbound set.
378 381 */
379 382 if (sqs == NULL)
380 383 sqs = sqset_global_list[(index % (sqset_global_size - 1)) + 1];
381 384 sq = sqs->sqs_default;
382 385
383 386 mutex_exit(&sqset_lock);
384 387 ASSERT(sq);
385 388 return (sq);
386 389 }
387 390
388 391 /*
389 392 * Move squeue from its current set to newset. Not used for default squeues.
390 393 * Bind or unbind the worker thread as appropriate.
391 394 */
392 395
393 396 static void
394 397 ip_squeue_set_move(squeue_t *sq, squeue_set_t *newset)
395 398 {
396 399 squeue_set_t *set;
397 400 squeue_t **lastsqp;
398 401 processorid_t cpuid = newset->sqs_cpuid;
399 402
400 403 ASSERT(!(sq->sq_state & SQS_DEFAULT));
401 404 ASSERT(!MUTEX_HELD(&sq->sq_lock));
402 405 ASSERT(MUTEX_HELD(&sqset_lock));
403 406
404 407 set = sq->sq_set;
405 408 if (set == newset)
406 409 return;
407 410
408 411 lastsqp = &set->sqs_head;
409 412 while (*lastsqp != sq)
410 413 lastsqp = &(*lastsqp)->sq_next;
411 414
412 415 *lastsqp = sq->sq_next;
413 416 sq->sq_next = newset->sqs_head;
414 417 newset->sqs_head = sq;
415 418 sq->sq_set = newset;
416 419 if (cpuid == -1)
417 420 squeue_unbind(sq);
418 421 else
419 422 squeue_bind(sq, cpuid);
420 423 }
421 424
422 425 /*
423 426 * Move squeue from its current set to cpuid's set and bind to cpuid.
424 427 */
425 428
426 429 int
427 430 ip_squeue_cpu_move(squeue_t *sq, processorid_t cpuid)
428 431 {
429 432 cpu_t *cpu;
430 433 squeue_set_t *set;
431 434
432 435 if (sq->sq_state & SQS_DEFAULT)
433 436 return (-1);
434 437
435 438 ASSERT(MUTEX_HELD(&cpu_lock));
436 439
437 440 cpu = cpu_get(cpuid);
438 441 if (!CPU_ISON(cpu))
439 442 return (-1);
440 443
441 444 mutex_enter(&sqset_lock);
442 445 set = cpu->cpu_squeue_set;
443 446 if (set != NULL)
444 447 ip_squeue_set_move(sq, set);
445 448 mutex_exit(&sqset_lock);
446 449 return ((set == NULL) ? -1 : 0);
447 450 }
448 451
449 452 /*
450 453 * The mac layer is calling, asking us to move an squeue to a
451 454 * new CPU. This routine is called with cpu_lock held.
452 455 */
453 456 void
454 457 ip_squeue_bind_ring(ill_t *ill, ill_rx_ring_t *rx_ring, processorid_t cpuid)
455 458 {
456 459 ASSERT(ILL_MAC_PERIM_HELD(ill));
457 460 ASSERT(rx_ring->rr_ill == ill);
458 461
459 462 mutex_enter(&ill->ill_lock);
460 463 if (rx_ring->rr_ring_state == RR_FREE ||
461 464 rx_ring->rr_ring_state == RR_FREE_INPROG) {
462 465 mutex_exit(&ill->ill_lock);
463 466 return;
464 467 }
465 468
466 469 if (ip_squeue_cpu_move(rx_ring->rr_sqp, cpuid) != -1)
467 470 rx_ring->rr_ring_state = RR_SQUEUE_BOUND;
468 471
469 472 mutex_exit(&ill->ill_lock);
470 473 }
471 474
472 475 void *
473 476 ip_squeue_add_ring(ill_t *ill, void *mrp)
474 477 {
475 478 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp;
476 479 ill_rx_ring_t *rx_ring, *ring_tbl;
477 480 int ip_rx_index;
478 481 squeue_t *sq = NULL;
479 482 pri_t pri;
480 483
481 484 ASSERT(ILL_MAC_PERIM_HELD(ill));
482 485 ASSERT(mrfp->mrf_type == MAC_RX_FIFO);
483 486 ASSERT(ill->ill_dld_capab != NULL);
484 487
485 488 ring_tbl = ill->ill_dld_capab->idc_poll.idp_ring_tbl;
486 489
487 490 mutex_enter(&ill->ill_lock);
488 491 for (ip_rx_index = 0; ip_rx_index < ILL_MAX_RINGS; ip_rx_index++) {
489 492 rx_ring = &ring_tbl[ip_rx_index];
490 493 if (rx_ring->rr_ring_state == RR_FREE)
491 494 break;
492 495 }
493 496
494 497 if (ip_rx_index == ILL_MAX_RINGS) {
495 498 /*
496 499 * We ran out of ILL_MAX_RINGS worth rx_ring structures. If
497 500 * we have devices which can overwhelm this limit,
498 501 * ILL_MAX_RING should be made configurable. Meanwhile it
499 502 * cause no panic because driver will pass ip_input a NULL
500 503 * handle which will make IP allocate the default squeue and
501 504 * Polling mode will not be used for this ring.
502 505 */
503 506 cmn_err(CE_NOTE,
504 507 "Reached maximum number of receiving rings (%d) for %s\n",
505 508 ILL_MAX_RINGS, ill->ill_name);
506 509 mutex_exit(&ill->ill_lock);
507 510 return (NULL);
508 511 }
509 512
510 513 bzero(rx_ring, sizeof (ill_rx_ring_t));
511 514 rx_ring->rr_rx = (ip_mac_rx_t)mrfp->mrf_receive;
512 515 /* XXX: Hard code it to tcp accept for now */
513 516 rx_ring->rr_ip_accept = (ip_accept_t)ip_accept_tcp;
514 517
515 518 rx_ring->rr_intr_handle = mrfp->mrf_intr_handle;
516 519 rx_ring->rr_intr_enable = (ip_mac_intr_enable_t)mrfp->mrf_intr_enable;
517 520 rx_ring->rr_intr_disable =
518 521 (ip_mac_intr_disable_t)mrfp->mrf_intr_disable;
519 522 rx_ring->rr_rx_handle = mrfp->mrf_rx_arg;
520 523 rx_ring->rr_ill = ill;
521 524
522 525 pri = mrfp->mrf_flow_priority;
523 526
524 527 sq = ip_squeue_getfree(pri);
525 528
526 529 mutex_enter(&sq->sq_lock);
527 530 sq->sq_rx_ring = rx_ring;
528 531 rx_ring->rr_sqp = sq;
529 532
530 533 sq->sq_state |= SQS_POLL_CAPAB;
531 534
532 535 rx_ring->rr_ring_state = RR_SQUEUE_UNBOUND;
533 536 sq->sq_ill = ill;
534 537 mutex_exit(&sq->sq_lock);
535 538 mutex_exit(&ill->ill_lock);
536 539
537 540 DTRACE_PROBE4(ill__ring__add, char *, ill->ill_name, ill_t *, ill, int,
538 541 ip_rx_index, void *, mrfp->mrf_rx_arg);
539 542
540 543 /* Assign the squeue to the specified CPU as well */
541 544 mutex_enter(&cpu_lock);
542 545 (void) ip_squeue_bind_ring(ill, rx_ring, mrfp->mrf_cpu_id);
543 546 mutex_exit(&cpu_lock);
544 547
545 548 return (rx_ring);
546 549 }
547 550
548 551 /*
549 552 * sanitize the squeue etc. Some of the processing
550 553 * needs to be done from inside the perimeter.
551 554 */
552 555 void
553 556 ip_squeue_clean_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
554 557 {
555 558 squeue_t *sqp;
556 559
557 560 ASSERT(ILL_MAC_PERIM_HELD(ill));
558 561 ASSERT(rx_ring != NULL);
559 562
560 563 /* Just clean one squeue */
561 564 mutex_enter(&ill->ill_lock);
562 565 if (rx_ring->rr_ring_state == RR_FREE) {
563 566 mutex_exit(&ill->ill_lock);
564 567 return;
565 568 }
566 569 rx_ring->rr_ring_state = RR_FREE_INPROG;
567 570 sqp = rx_ring->rr_sqp;
568 571
569 572 mutex_enter(&sqp->sq_lock);
570 573 sqp->sq_state |= SQS_POLL_CLEANUP;
571 574 cv_signal(&sqp->sq_worker_cv);
572 575 mutex_exit(&ill->ill_lock);
573 576 while (!(sqp->sq_state & SQS_POLL_CLEANUP_DONE))
574 577 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
575 578 sqp->sq_state &= ~SQS_POLL_CLEANUP_DONE;
576 579
577 580 ASSERT(!(sqp->sq_state & (SQS_POLL_THR_CONTROL |
578 581 SQS_WORKER_THR_CONTROL | SQS_POLL_QUIESCE_DONE |
579 582 SQS_POLL_THR_QUIESCED)));
580 583
581 584 cv_signal(&sqp->sq_worker_cv);
582 585 mutex_exit(&sqp->sq_lock);
583 586
584 587 /*
585 588 * Move the squeue to sqset_global_list[0] which holds the set of
586 589 * squeues not bound to any cpu. Note that the squeue is still
587 590 * considered bound to an ill as long as SQS_ILL_BOUND is set.
588 591 */
589 592 mutex_enter(&sqset_lock);
590 593 ip_squeue_set_move(sqp, sqset_global_list[0]);
591 594 mutex_exit(&sqset_lock);
592 595
593 596 /*
594 597 * CPU going offline can also trigger a move of the squeue to the
595 598 * unbound set sqset_global_list[0]. However the squeue won't be
596 599 * recycled for the next use as long as the SQS_ILL_BOUND flag
597 600 * is set. Hence we clear the SQS_ILL_BOUND flag only towards the
598 601 * end after the move.
599 602 */
600 603 mutex_enter(&sqp->sq_lock);
601 604 sqp->sq_state &= ~SQS_ILL_BOUND;
602 605 mutex_exit(&sqp->sq_lock);
603 606
604 607 mutex_enter(&ill->ill_lock);
605 608 rx_ring->rr_ring_state = RR_FREE;
606 609 mutex_exit(&ill->ill_lock);
607 610 }
608 611
609 612 /*
610 613 * Stop the squeue from polling. This needs to be done
611 614 * from inside the perimeter.
612 615 */
613 616 void
614 617 ip_squeue_quiesce_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
615 618 {
616 619 squeue_t *sqp;
617 620
618 621 ASSERT(ILL_MAC_PERIM_HELD(ill));
619 622 ASSERT(rx_ring != NULL);
620 623
621 624 sqp = rx_ring->rr_sqp;
622 625 mutex_enter(&sqp->sq_lock);
623 626 sqp->sq_state |= SQS_POLL_QUIESCE;
624 627 cv_signal(&sqp->sq_worker_cv);
625 628 while (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE))
626 629 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
627 630
628 631 mutex_exit(&sqp->sq_lock);
629 632 }
630 633
631 634 /*
632 635 * Restart polling etc. Needs to be inside the perimeter to
633 636 * prevent races.
634 637 */
635 638 void
636 639 ip_squeue_restart_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
637 640 {
638 641 squeue_t *sqp;
639 642
640 643 ASSERT(ILL_MAC_PERIM_HELD(ill));
641 644 ASSERT(rx_ring != NULL);
642 645
643 646 sqp = rx_ring->rr_sqp;
644 647 mutex_enter(&sqp->sq_lock);
645 648 /*
646 649 * Handle change in number of rings between the quiesce and
647 650 * restart operations by checking for a previous quiesce before
648 651 * attempting a restart.
649 652 */
650 653 if (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) {
651 654 mutex_exit(&sqp->sq_lock);
652 655 return;
653 656 }
654 657 sqp->sq_state |= SQS_POLL_RESTART;
655 658 cv_signal(&sqp->sq_worker_cv);
656 659 while (!(sqp->sq_state & SQS_POLL_RESTART_DONE))
657 660 cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
658 661 sqp->sq_state &= ~SQS_POLL_RESTART_DONE;
659 662 mutex_exit(&sqp->sq_lock);
660 663 }
661 664
662 665 /*
663 666 * sanitize all squeues associated with the ill.
664 667 */
665 668 void
666 669 ip_squeue_clean_all(ill_t *ill)
667 670 {
668 671 int idx;
669 672 ill_rx_ring_t *rx_ring;
670 673
671 674 for (idx = 0; idx < ILL_MAX_RINGS; idx++) {
672 675 rx_ring = &ill->ill_dld_capab->idc_poll.idp_ring_tbl[idx];
673 676 ip_squeue_clean_ring(ill, rx_ring);
674 677 }
675 678 }
676 679
677 680 /*
678 681 * Used by IP to get the squeue associated with a ring. If the squeue isn't
679 682 * yet bound to a CPU, and we're being called directly from the NIC's
680 683 * interrupt, then we know what CPU we want to assign the squeue to, so
681 684 * dispatch that task to a taskq.
682 685 */
683 686 squeue_t *
684 687 ip_squeue_get(ill_rx_ring_t *ill_rx_ring)
685 688 {
686 689 squeue_t *sqp;
687 690
688 691 if ((ill_rx_ring == NULL) || ((sqp = ill_rx_ring->rr_sqp) == NULL))
689 692 return (IP_SQUEUE_GET(CPU_PSEUDO_RANDOM()));
690 693
691 694 return (sqp);
692 695 }
693 696
694 697 /*
695 698 * Called when a CPU goes offline. It's squeue_set_t is destroyed, and all
696 699 * squeues are unboudn and moved to the unbound set.
697 700 */
698 701 static void
699 702 ip_squeue_set_destroy(cpu_t *cpu)
700 703 {
701 704 int i;
702 705 squeue_t *sqp, *lastsqp = NULL;
703 706 squeue_set_t *sqs, *unbound = sqset_global_list[0];
704 707
705 708 mutex_enter(&sqset_lock);
706 709 if ((sqs = cpu->cpu_squeue_set) == NULL) {
707 710 mutex_exit(&sqset_lock);
708 711 return;
709 712 }
710 713
711 714 /* Move all squeues to unbound set */
712 715
713 716 for (sqp = sqs->sqs_head; sqp; lastsqp = sqp, sqp = sqp->sq_next) {
714 717 squeue_unbind(sqp);
715 718 sqp->sq_set = unbound;
716 719 }
717 720 if (sqs->sqs_head) {
718 721 lastsqp->sq_next = unbound->sqs_head;
719 722 unbound->sqs_head = sqs->sqs_head;
720 723 }
721 724
722 725 /* Also move default squeue to unbound set */
723 726
724 727 sqp = sqs->sqs_default;
725 728 ASSERT(sqp != NULL);
726 729 ASSERT((sqp->sq_state & (SQS_DEFAULT|SQS_ILL_BOUND)) == SQS_DEFAULT);
727 730
728 731 sqp->sq_next = unbound->sqs_head;
729 732 unbound->sqs_head = sqp;
730 733 squeue_unbind(sqp);
731 734 sqp->sq_set = unbound;
732 735
733 736 for (i = 1; i < sqset_global_size; i++)
734 737 if (sqset_global_list[i] == sqs)
735 738 break;
736 739
737 740 ASSERT(i < sqset_global_size);
738 741 sqset_global_list[i] = sqset_global_list[sqset_global_size - 1];
739 742 sqset_global_list[sqset_global_size - 1] = NULL;
740 743 sqset_global_size--;
741 744
742 745 mutex_exit(&sqset_lock);
743 746 kmem_free(sqs, sizeof (*sqs));
744 747 }
745 748
746 749 /*
747 750 * Reconfiguration callback
748 751 */
749 752 /* ARGSUSED */
750 753 static int
751 754 ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg)
752 755 {
753 756 cpu_t *cp = cpu_get(id);
754 757
755 758 ASSERT(MUTEX_HELD(&cpu_lock));
756 759 switch (what) {
757 760 case CPU_CONFIG:
758 761 case CPU_ON:
759 762 case CPU_INIT:
760 763 case CPU_CPUPART_IN:
761 764 if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL)
762 765 cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
763 766 break;
764 767 case CPU_UNCONFIG:
765 768 case CPU_OFF:
766 769 case CPU_CPUPART_OUT:
767 770 if (cp->cpu_squeue_set != NULL) {
768 771 ip_squeue_set_destroy(cp);
769 772 cp->cpu_squeue_set = NULL;
770 773 }
771 774 break;
772 775 default:
773 776 break;
774 777 }
775 778 return (0);
776 779 }
|
↓ open down ↓ |
616 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX