Print this page
9832 Original bug discovered as 9560 has friends IPv4 packets coming in as IPv6 creating chaos
Reviewed by: Robert Mustacchi <rm@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/io/mac/mac_flow.c
+++ new/usr/src/uts/common/io/mac/mac_flow.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 + *
26 + * Copyright 2019, Joyent, Inc.
25 27 */
26 28
27 29 #include <sys/strsun.h>
28 30 #include <sys/sdt.h>
29 31 #include <sys/mac.h>
30 32 #include <sys/mac_impl.h>
31 33 #include <sys/mac_client_impl.h>
32 34 #include <sys/mac_stat.h>
33 35 #include <sys/dls.h>
34 36 #include <sys/dls_impl.h>
35 37 #include <sys/mac_soft_ring.h>
36 38 #include <sys/ethernet.h>
37 39 #include <sys/cpupart.h>
38 40 #include <sys/pool.h>
39 41 #include <sys/pool_pset.h>
40 42 #include <sys/vlan.h>
41 43 #include <inet/ip.h>
42 44 #include <inet/ip6.h>
43 45 #include <netinet/tcp.h>
44 46 #include <netinet/udp.h>
45 47 #include <netinet/sctp.h>
46 48
47 49 typedef struct flow_stats_s {
48 50 uint64_t fs_obytes;
49 51 uint64_t fs_opackets;
50 52 uint64_t fs_oerrors;
51 53 uint64_t fs_ibytes;
52 54 uint64_t fs_ipackets;
53 55 uint64_t fs_ierrors;
54 56 } flow_stats_t;
55 57
56 58
57 59 /* global flow table, will be a per exclusive-zone table later */
58 60 static mod_hash_t *flow_hash;
59 61 static krwlock_t flow_tab_lock;
60 62
61 63 static kmem_cache_t *flow_cache;
62 64 static kmem_cache_t *flow_tab_cache;
63 65 static flow_ops_t flow_l2_ops;
64 66
65 67 typedef struct {
66 68 const char *fs_name;
67 69 uint_t fs_offset;
68 70 } flow_stats_info_t;
69 71
70 72 #define FS_OFF(f) (offsetof(flow_stats_t, f))
71 73 static flow_stats_info_t flow_stats_list[] = {
72 74 {"rbytes", FS_OFF(fs_ibytes)},
73 75 {"ipackets", FS_OFF(fs_ipackets)},
74 76 {"ierrors", FS_OFF(fs_ierrors)},
75 77 {"obytes", FS_OFF(fs_obytes)},
76 78 {"opackets", FS_OFF(fs_opackets)},
77 79 {"oerrors", FS_OFF(fs_oerrors)}
78 80 };
79 81 #define FS_SIZE (sizeof (flow_stats_list) / sizeof (flow_stats_info_t))
80 82
81 83 /*
82 84 * Checks whether a flow mask is legal.
83 85 */
84 86 static flow_tab_info_t *mac_flow_tab_info_get(flow_mask_t);
85 87
86 88 static void
87 89 flow_stat_init(kstat_named_t *knp)
88 90 {
89 91 int i;
90 92
91 93 for (i = 0; i < FS_SIZE; i++, knp++) {
92 94 kstat_named_init(knp, flow_stats_list[i].fs_name,
93 95 KSTAT_DATA_UINT64);
94 96 }
95 97 }
96 98
97 99 static int
98 100 flow_stat_update(kstat_t *ksp, int rw)
99 101 {
100 102 flow_entry_t *fep = ksp->ks_private;
101 103 kstat_named_t *knp = ksp->ks_data;
102 104 uint64_t *statp;
103 105 int i;
104 106 mac_rx_stats_t *mac_rx_stat;
105 107 mac_tx_stats_t *mac_tx_stat;
|
↓ open down ↓ |
71 lines elided |
↑ open up ↑ |
106 108 flow_stats_t flow_stats;
107 109 mac_soft_ring_set_t *mac_srs;
108 110
109 111 if (rw != KSTAT_READ)
110 112 return (EACCES);
111 113
112 114 bzero(&flow_stats, sizeof (flow_stats_t));
113 115
114 116 for (i = 0; i < fep->fe_rx_srs_cnt; i++) {
115 117 mac_srs = (mac_soft_ring_set_t *)fep->fe_rx_srs[i];
116 - if (mac_srs == NULL) /* Multicast flow */
118 + if (mac_srs == NULL) /* Multicast flow */
117 119 break;
118 120 mac_rx_stat = &mac_srs->srs_rx.sr_stat;
119 121
120 122 flow_stats.fs_ibytes += mac_rx_stat->mrs_intrbytes +
121 123 mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes;
122 124
123 125 flow_stats.fs_ipackets += mac_rx_stat->mrs_intrcnt +
124 126 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt;
125 127
126 128 flow_stats.fs_ierrors += mac_rx_stat->mrs_ierrors;
127 129 }
128 130
129 131 mac_srs = (mac_soft_ring_set_t *)fep->fe_tx_srs;
130 - if (mac_srs == NULL) /* Multicast flow */
132 + if (mac_srs == NULL) /* Multicast flow */
131 133 goto done;
132 134 mac_tx_stat = &mac_srs->srs_tx.st_stat;
133 135
134 136 flow_stats.fs_obytes = mac_tx_stat->mts_obytes;
135 137 flow_stats.fs_opackets = mac_tx_stat->mts_opackets;
136 138 flow_stats.fs_oerrors = mac_tx_stat->mts_oerrors;
137 139
138 140 done:
139 141 for (i = 0; i < FS_SIZE; i++, knp++) {
140 142 statp = (uint64_t *)
141 143 ((uchar_t *)&flow_stats + flow_stats_list[i].fs_offset);
142 144 knp->value.ui64 = *statp;
143 145 }
144 146 return (0);
145 147 }
146 148
147 149 static void
148 150 flow_stat_create(flow_entry_t *fep)
149 151 {
150 152 kstat_t *ksp;
151 153 kstat_named_t *knp;
152 154 uint_t nstats = FS_SIZE;
153 155
154 156 /*
155 157 * Fow now, flow entries are only manipulated and visible from the
156 158 * global zone.
157 159 */
158 160 ksp = kstat_create_zone("unix", 0, (char *)fep->fe_flow_name, "flow",
159 161 KSTAT_TYPE_NAMED, nstats, 0, GLOBAL_ZONEID);
160 162 if (ksp == NULL)
161 163 return;
162 164
163 165 ksp->ks_update = flow_stat_update;
164 166 ksp->ks_private = fep;
165 167 fep->fe_ksp = ksp;
166 168
167 169 knp = (kstat_named_t *)ksp->ks_data;
168 170 flow_stat_init(knp);
169 171 kstat_install(ksp);
170 172 }
171 173
172 174 void
173 175 flow_stat_destroy(flow_entry_t *fep)
174 176 {
175 177 if (fep->fe_ksp != NULL) {
176 178 kstat_delete(fep->fe_ksp);
177 179 fep->fe_ksp = NULL;
178 180 }
179 181 }
180 182
181 183 /*
182 184 * Initialize the flow table
183 185 */
184 186 void
185 187 mac_flow_init()
186 188 {
187 189 flow_cache = kmem_cache_create("flow_entry_cache",
188 190 sizeof (flow_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
189 191 flow_tab_cache = kmem_cache_create("flow_tab_cache",
190 192 sizeof (flow_tab_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
191 193 flow_hash = mod_hash_create_extended("flow_hash",
192 194 100, mod_hash_null_keydtor, mod_hash_null_valdtor,
193 195 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
194 196 rw_init(&flow_tab_lock, NULL, RW_DEFAULT, NULL);
195 197 }
196 198
197 199 /*
198 200 * Cleanup and release the flow table
199 201 */
200 202 void
201 203 mac_flow_fini()
202 204 {
203 205 kmem_cache_destroy(flow_cache);
204 206 kmem_cache_destroy(flow_tab_cache);
205 207 mod_hash_destroy_hash(flow_hash);
206 208 rw_destroy(&flow_tab_lock);
207 209 }
208 210
209 211 /*
210 212 * mac_create_flow(): create a flow_entry_t.
211 213 */
212 214 int
213 215 mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name,
214 216 void *client_cookie, uint_t type, flow_entry_t **flentp)
215 217 {
216 218 flow_entry_t *flent = *flentp;
217 219 int err = 0;
218 220
219 221 if (mrp != NULL) {
220 222 err = mac_validate_props(NULL, mrp);
221 223 if (err != 0)
222 224 return (err);
223 225 }
224 226
225 227 if (flent == NULL) {
226 228 flent = kmem_cache_alloc(flow_cache, KM_SLEEP);
227 229 bzero(flent, sizeof (*flent));
228 230 mutex_init(&flent->fe_lock, NULL, MUTEX_DEFAULT, NULL);
229 231 cv_init(&flent->fe_cv, NULL, CV_DEFAULT, NULL);
230 232
231 233 /* Initialize the receiver function to a safe routine */
232 234 flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop;
233 235 flent->fe_index = -1;
234 236 }
235 237 (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
236 238
237 239 /* This is an initial flow, will be configured later */
238 240 if (fd == NULL) {
239 241 *flentp = flent;
240 242 return (0);
241 243 }
242 244
243 245 flent->fe_client_cookie = client_cookie;
244 246 flent->fe_type = type;
245 247
246 248 /* Save flow desc */
247 249 bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
248 250
249 251 if (mrp != NULL) {
250 252 /*
251 253 * We have already set fe_resource_props for a Link.
252 254 */
253 255 if (type & FLOW_USER) {
254 256 bcopy(mrp, &flent->fe_resource_props,
255 257 sizeof (mac_resource_props_t));
256 258 }
257 259 /*
258 260 * The effective resource list should reflect the priority
259 261 * that we set implicitly.
260 262 */
261 263 if (!(mrp->mrp_mask & MRP_PRIORITY))
262 264 mrp->mrp_mask |= MRP_PRIORITY;
263 265 if (type & FLOW_USER)
264 266 mrp->mrp_priority = MPL_SUBFLOW_DEFAULT;
265 267 else
266 268 mrp->mrp_priority = MPL_LINK_DEFAULT;
267 269 bzero(mrp->mrp_pool, MAXPATHLEN);
268 270 bzero(&mrp->mrp_cpus, sizeof (mac_cpus_t));
269 271 bcopy(mrp, &flent->fe_effective_props,
270 272 sizeof (mac_resource_props_t));
271 273 }
272 274 flow_stat_create(flent);
273 275
274 276 *flentp = flent;
275 277 return (0);
276 278 }
277 279
278 280 /*
279 281 * Validate flow entry and add it to a flow table.
280 282 */
281 283 int
282 284 mac_flow_add(flow_tab_t *ft, flow_entry_t *flent)
283 285 {
284 286 flow_entry_t **headp, **p;
285 287 flow_ops_t *ops = &ft->ft_ops;
286 288 flow_mask_t mask;
287 289 uint32_t index;
288 290 int err;
289 291
290 292 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
291 293
292 294 /*
293 295 * Check for invalid bits in mask.
294 296 */
295 297 mask = flent->fe_flow_desc.fd_mask;
296 298 if ((mask & ft->ft_mask) == 0 || (mask & ~ft->ft_mask) != 0)
297 299 return (EOPNOTSUPP);
298 300
299 301 /*
300 302 * Validate flent.
301 303 */
302 304 if ((err = ops->fo_accept_fe(ft, flent)) != 0) {
303 305 DTRACE_PROBE3(accept_failed, flow_tab_t *, ft,
304 306 flow_entry_t *, flent, int, err);
305 307 return (err);
306 308 }
307 309
308 310 /*
309 311 * Flent is valid. now calculate hash and insert it
310 312 * into hash table.
311 313 */
312 314 index = ops->fo_hash_fe(ft, flent);
313 315
314 316 /*
315 317 * We do not need a lock up until now because we were
316 318 * not accessing the flow table.
317 319 */
318 320 rw_enter(&ft->ft_lock, RW_WRITER);
319 321 headp = &ft->ft_table[index];
320 322
321 323 /*
322 324 * Check for duplicate flow.
323 325 */
324 326 for (p = headp; *p != NULL; p = &(*p)->fe_next) {
325 327 if ((*p)->fe_flow_desc.fd_mask !=
326 328 flent->fe_flow_desc.fd_mask)
327 329 continue;
328 330
329 331 if (ft->ft_ops.fo_match_fe(ft, *p, flent)) {
330 332 rw_exit(&ft->ft_lock);
331 333 DTRACE_PROBE3(dup_flow, flow_tab_t *, ft,
332 334 flow_entry_t *, flent, int, err);
333 335 return (EALREADY);
334 336 }
335 337 }
336 338
337 339 /*
338 340 * Insert flow to hash list.
339 341 */
340 342 err = ops->fo_insert_fe(ft, headp, flent);
341 343 if (err != 0) {
342 344 rw_exit(&ft->ft_lock);
343 345 DTRACE_PROBE3(insert_failed, flow_tab_t *, ft,
344 346 flow_entry_t *, flent, int, err);
345 347 return (err);
346 348 }
347 349
348 350 /*
349 351 * Save the hash index so it can be used by mac_flow_remove().
350 352 */
351 353 flent->fe_index = (int)index;
352 354
353 355 /*
354 356 * Save the flow tab back reference.
355 357 */
356 358 flent->fe_flow_tab = ft;
357 359 FLOW_MARK(flent, FE_FLOW_TAB);
358 360 ft->ft_flow_count++;
359 361 rw_exit(&ft->ft_lock);
360 362 return (0);
361 363 }
362 364
363 365 /*
364 366 * Remove a flow from a mac client's subflow table
365 367 */
366 368 void
367 369 mac_flow_rem_subflow(flow_entry_t *flent)
368 370 {
369 371 flow_tab_t *ft = flent->fe_flow_tab;
370 372 mac_client_impl_t *mcip = ft->ft_mcip;
371 373 mac_handle_t mh = (mac_handle_t)ft->ft_mip;
372 374
373 375 ASSERT(MAC_PERIM_HELD(mh));
374 376
375 377 mac_flow_remove(ft, flent, B_FALSE);
376 378 if (flent->fe_mcip == NULL) {
377 379 /*
378 380 * The interface is not yet plumbed and mac_client_flow_add
379 381 * was not done.
380 382 */
381 383 if (FLOW_TAB_EMPTY(ft)) {
382 384 mac_flow_tab_destroy(ft);
383 385 mcip->mci_subflow_tab = NULL;
384 386 }
385 387 } else {
386 388 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
387 389 mac_link_flow_clean((mac_client_handle_t)mcip, flent);
388 390 }
389 391 mac_fastpath_enable(mh);
390 392 }
391 393
392 394 /*
393 395 * Add a flow to a mac client's subflow table and instantiate the flow
394 396 * in the mac by creating the associated SRSs etc.
395 397 */
396 398 int
397 399 mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent,
398 400 boolean_t instantiate_flow)
399 401 {
400 402 mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
401 403 mac_handle_t mh = (mac_handle_t)mcip->mci_mip;
402 404 flow_tab_info_t *ftinfo;
403 405 flow_mask_t mask;
404 406 flow_tab_t *ft;
405 407 int err;
406 408 boolean_t ft_created = B_FALSE;
407 409
408 410 ASSERT(MAC_PERIM_HELD(mh));
409 411
410 412 if ((err = mac_fastpath_disable(mh)) != 0)
411 413 return (err);
412 414
413 415 /*
414 416 * If the subflow table exists already just add the new subflow
415 417 * to the existing table, else we create a new subflow table below.
416 418 */
417 419 ft = mcip->mci_subflow_tab;
418 420 if (ft == NULL) {
419 421 mask = flent->fe_flow_desc.fd_mask;
420 422 /*
421 423 * Try to create a new table and then add the subflow to the
422 424 * newly created subflow table
423 425 */
424 426 if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) {
425 427 mac_fastpath_enable(mh);
426 428 return (EOPNOTSUPP);
427 429 }
428 430
429 431 mac_flow_tab_create(ftinfo->fti_ops, mask, ftinfo->fti_size,
430 432 mcip->mci_mip, &ft);
431 433 ft_created = B_TRUE;
432 434 }
433 435
434 436 err = mac_flow_add(ft, flent);
435 437 if (err != 0) {
436 438 if (ft_created)
437 439 mac_flow_tab_destroy(ft);
438 440 mac_fastpath_enable(mh);
439 441 return (err);
440 442 }
441 443
442 444 if (instantiate_flow) {
443 445 /* Now activate the flow by creating its SRSs */
444 446 ASSERT(MCIP_DATAPATH_SETUP(mcip));
445 447 err = mac_link_flow_init((mac_client_handle_t)mcip, flent);
446 448 if (err != 0) {
447 449 mac_flow_remove(ft, flent, B_FALSE);
448 450 if (ft_created)
449 451 mac_flow_tab_destroy(ft);
450 452 mac_fastpath_enable(mh);
451 453 return (err);
452 454 }
453 455 } else {
454 456 FLOW_MARK(flent, FE_UF_NO_DATAPATH);
455 457 }
456 458 if (ft_created) {
457 459 ASSERT(mcip->mci_subflow_tab == NULL);
458 460 ft->ft_mcip = mcip;
459 461 mcip->mci_subflow_tab = ft;
460 462 if (instantiate_flow)
461 463 mac_client_update_classifier(mcip, B_TRUE);
462 464 }
463 465 return (0);
464 466 }
465 467
466 468 /*
467 469 * Remove flow entry from flow table.
468 470 */
469 471 void
470 472 mac_flow_remove(flow_tab_t *ft, flow_entry_t *flent, boolean_t temp)
471 473 {
472 474 flow_entry_t **fp;
473 475
474 476 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
475 477 if (!(flent->fe_flags & FE_FLOW_TAB))
476 478 return;
477 479
478 480 rw_enter(&ft->ft_lock, RW_WRITER);
479 481 /*
480 482 * If this is a permanent removal from the flow table, mark it
481 483 * CONDEMNED to prevent future references. If this is a temporary
482 484 * removal from the table, say to update the flow descriptor then
483 485 * we don't mark it CONDEMNED
484 486 */
485 487 if (!temp)
486 488 FLOW_MARK(flent, FE_CONDEMNED);
487 489 /*
488 490 * Locate the specified flent.
489 491 */
490 492 fp = &ft->ft_table[flent->fe_index];
491 493 while (*fp != flent)
492 494 fp = &(*fp)->fe_next;
493 495
494 496 /*
495 497 * The flent must exist. Otherwise it's a bug.
496 498 */
497 499 ASSERT(fp != NULL);
498 500 *fp = flent->fe_next;
499 501 flent->fe_next = NULL;
500 502
501 503 /*
502 504 * Reset fe_index to -1 so any attempt to call mac_flow_remove()
503 505 * on a flent that is supposed to be in the table (FE_FLOW_TAB)
504 506 * will panic.
505 507 */
506 508 flent->fe_index = -1;
507 509 FLOW_UNMARK(flent, FE_FLOW_TAB);
508 510 ft->ft_flow_count--;
509 511 rw_exit(&ft->ft_lock);
510 512 }
511 513
512 514 /*
513 515 * This is the flow lookup routine used by the mac sw classifier engine.
514 516 */
515 517 int
516 518 mac_flow_lookup(flow_tab_t *ft, mblk_t *mp, uint_t flags, flow_entry_t **flentp)
517 519 {
518 520 flow_state_t s;
519 521 flow_entry_t *flent;
520 522 flow_ops_t *ops = &ft->ft_ops;
521 523 boolean_t retried = B_FALSE;
522 524 int i, err;
523 525
524 526 s.fs_flags = flags;
525 527 retry:
526 528 s.fs_mp = mp;
527 529
528 530 /*
529 531 * Walk the list of predeclared accept functions.
530 532 * Each of these would accumulate enough state to allow the next
531 533 * accept routine to make progress.
532 534 */
533 535 for (i = 0; i < FLOW_MAX_ACCEPT && ops->fo_accept[i] != NULL; i++) {
534 536 if ((err = (ops->fo_accept[i])(ft, &s)) != 0) {
535 537 mblk_t *last;
536 538
537 539 /*
538 540 * ENOBUFS indicates that the mp could be too short
539 541 * and may need a pullup.
540 542 */
541 543 if (err != ENOBUFS || retried)
542 544 return (err);
543 545
544 546 /*
545 547 * The pullup is done on the last processed mblk, not
546 548 * the starting one. pullup is not done if the mblk
547 549 * has references or if b_cont is NULL.
548 550 */
549 551 last = s.fs_mp;
550 552 if (DB_REF(last) > 1 || last->b_cont == NULL ||
551 553 pullupmsg(last, -1) == 0)
552 554 return (EINVAL);
553 555
554 556 retried = B_TRUE;
555 557 DTRACE_PROBE2(need_pullup, flow_tab_t *, ft,
556 558 flow_state_t *, &s);
557 559 goto retry;
558 560 }
559 561 }
560 562
561 563 /*
562 564 * The packet is considered sane. We may now attempt to
563 565 * find the corresponding flent.
564 566 */
565 567 rw_enter(&ft->ft_lock, RW_READER);
566 568 flent = ft->ft_table[ops->fo_hash(ft, &s)];
567 569 for (; flent != NULL; flent = flent->fe_next) {
568 570 if (flent->fe_match(ft, flent, &s)) {
569 571 FLOW_TRY_REFHOLD(flent, err);
570 572 if (err != 0)
571 573 continue;
572 574 *flentp = flent;
573 575 rw_exit(&ft->ft_lock);
574 576 return (0);
575 577 }
576 578 }
577 579 rw_exit(&ft->ft_lock);
578 580 return (ENOENT);
579 581 }
580 582
581 583 /*
582 584 * Walk flow table.
583 585 * The caller is assumed to have proper perimeter protection.
584 586 */
585 587 int
586 588 mac_flow_walk_nolock(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *),
587 589 void *arg)
588 590 {
589 591 int err, i, cnt = 0;
590 592 flow_entry_t *flent;
591 593
592 594 if (ft == NULL)
593 595 return (0);
594 596
595 597 for (i = 0; i < ft->ft_size; i++) {
596 598 for (flent = ft->ft_table[i]; flent != NULL;
597 599 flent = flent->fe_next) {
598 600 cnt++;
599 601 err = (*fn)(flent, arg);
600 602 if (err != 0)
601 603 return (err);
602 604 }
603 605 }
604 606 VERIFY(cnt == ft->ft_flow_count);
605 607 return (0);
606 608 }
607 609
608 610 /*
609 611 * Same as the above except a mutex is used for protection here.
610 612 */
611 613 int
612 614 mac_flow_walk(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *),
613 615 void *arg)
614 616 {
615 617 int err;
616 618
617 619 if (ft == NULL)
618 620 return (0);
619 621
620 622 rw_enter(&ft->ft_lock, RW_WRITER);
621 623 err = mac_flow_walk_nolock(ft, fn, arg);
622 624 rw_exit(&ft->ft_lock);
623 625 return (err);
624 626 }
625 627
626 628 static boolean_t mac_flow_clean(flow_entry_t *);
627 629
628 630 /*
629 631 * Destroy a flow entry. Called when the last reference on a flow is released.
630 632 */
631 633 void
632 634 mac_flow_destroy(flow_entry_t *flent)
633 635 {
634 636 ASSERT(flent->fe_refcnt == 0);
635 637
636 638 if ((flent->fe_type & FLOW_USER) != 0) {
637 639 ASSERT(mac_flow_clean(flent));
638 640 } else {
639 641 mac_flow_cleanup(flent);
640 642 }
641 643 mac_misc_stat_delete(flent);
642 644 mutex_destroy(&flent->fe_lock);
643 645 cv_destroy(&flent->fe_cv);
644 646 flow_stat_destroy(flent);
645 647 kmem_cache_free(flow_cache, flent);
646 648 }
647 649
648 650 /*
649 651 * XXX eric
650 652 * The MAC_FLOW_PRIORITY checks in mac_resource_ctl_set() and
651 653 * mac_link_flow_modify() should really be moved/reworked into the
652 654 * two functions below. This would consolidate all the mac property
653 655 * checking in one place. I'm leaving this alone for now since it's
654 656 * out of scope of the new flows work.
655 657 */
656 658 /* ARGSUSED */
657 659 uint32_t
658 660 mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp)
659 661 {
660 662 uint32_t changed_mask = 0;
661 663 mac_resource_props_t *fmrp = &flent->fe_effective_props;
662 664 int i;
663 665
664 666 if ((mrp->mrp_mask & MRP_MAXBW) != 0 &&
665 667 (!(fmrp->mrp_mask & MRP_MAXBW) ||
666 668 (fmrp->mrp_maxbw != mrp->mrp_maxbw))) {
667 669 changed_mask |= MRP_MAXBW;
668 670 if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) {
669 671 fmrp->mrp_mask &= ~MRP_MAXBW;
670 672 fmrp->mrp_maxbw = 0;
671 673 } else {
672 674 fmrp->mrp_mask |= MRP_MAXBW;
673 675 fmrp->mrp_maxbw = mrp->mrp_maxbw;
674 676 }
675 677 }
676 678
677 679 if ((mrp->mrp_mask & MRP_PRIORITY) != 0) {
678 680 if (fmrp->mrp_priority != mrp->mrp_priority)
679 681 changed_mask |= MRP_PRIORITY;
680 682 if (mrp->mrp_priority == MPL_RESET) {
681 683 fmrp->mrp_priority = MPL_SUBFLOW_DEFAULT;
682 684 fmrp->mrp_mask &= ~MRP_PRIORITY;
683 685 } else {
684 686 fmrp->mrp_priority = mrp->mrp_priority;
685 687 fmrp->mrp_mask |= MRP_PRIORITY;
686 688 }
687 689 }
688 690
689 691 /* modify fanout */
690 692 if ((mrp->mrp_mask & MRP_CPUS) != 0) {
691 693 if ((fmrp->mrp_ncpus == mrp->mrp_ncpus) &&
692 694 (fmrp->mrp_fanout_mode == mrp->mrp_fanout_mode)) {
693 695 for (i = 0; i < mrp->mrp_ncpus; i++) {
694 696 if (mrp->mrp_cpu[i] != fmrp->mrp_cpu[i])
695 697 break;
696 698 }
697 699 if (i == mrp->mrp_ncpus) {
698 700 /*
699 701 * The new set of cpus passed is exactly
700 702 * the same as the existing set.
701 703 */
702 704 return (changed_mask);
703 705 }
704 706 }
705 707 changed_mask |= MRP_CPUS;
706 708 MAC_COPY_CPUS(mrp, fmrp);
707 709 }
708 710
709 711 /*
710 712 * Modify the rings property.
711 713 */
712 714 if (mrp->mrp_mask & MRP_RX_RINGS || mrp->mrp_mask & MRP_TX_RINGS)
713 715 mac_set_rings_effective(flent->fe_mcip);
714 716
715 717 if ((mrp->mrp_mask & MRP_POOL) != 0) {
716 718 if (strcmp(fmrp->mrp_pool, mrp->mrp_pool) != 0)
717 719 changed_mask |= MRP_POOL;
718 720 if (strlen(mrp->mrp_pool) == 0)
719 721 fmrp->mrp_mask &= ~MRP_POOL;
720 722 else
721 723 fmrp->mrp_mask |= MRP_POOL;
722 724 (void) strncpy(fmrp->mrp_pool, mrp->mrp_pool, MAXPATHLEN);
723 725 }
724 726 return (changed_mask);
725 727 }
726 728
727 729 void
728 730 mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp)
729 731 {
730 732 uint32_t changed_mask;
731 733 mac_client_impl_t *mcip = flent->fe_mcip;
732 734 mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip);
733 735 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip);
734 736 cpupart_t *cpupart = NULL;
735 737 boolean_t use_default = B_FALSE;
736 738
737 739 ASSERT(flent != NULL);
738 740 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
739 741
740 742 rw_enter(&ft->ft_lock, RW_WRITER);
741 743
742 744 /* Update the cached values inside the subflow entry */
743 745 changed_mask = mac_flow_modify_props(flent, mrp);
744 746 rw_exit(&ft->ft_lock);
745 747 /*
746 748 * Push the changed parameters to the scheduling code in the
747 749 * SRS's, to take effect right away.
748 750 */
749 751 if (changed_mask & MRP_MAXBW) {
750 752 mac_srs_update_bwlimit(flent, mrp);
751 753 /*
752 754 * If bandwidth is changed, we may have to change
753 755 * the number of soft ring to be used for fanout.
754 756 * Call mac_flow_update_fanout() if MAC_BIND_CPU
755 757 * is not set and there is no user supplied cpu
756 758 * info. This applies only to link at this time.
757 759 */
758 760 if (!(flent->fe_type & FLOW_USER) &&
759 761 !(changed_mask & MRP_CPUS) &&
760 762 !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) {
761 763 mac_fanout_setup(mcip, flent, mcip_mrp,
762 764 mac_rx_deliver, mcip, NULL, NULL);
763 765 }
764 766 }
765 767 if (mrp->mrp_mask & MRP_PRIORITY)
766 768 mac_flow_update_priority(mcip, flent);
767 769
768 770 if (changed_mask & MRP_CPUS)
769 771 mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL,
770 772 NULL);
771 773
772 774 if (mrp->mrp_mask & MRP_POOL) {
773 775 pool_lock();
774 776 cpupart = mac_pset_find(mrp, &use_default);
775 777 mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL,
776 778 cpupart);
777 779 mac_set_pool_effective(use_default, cpupart, mrp, emrp);
778 780 pool_unlock();
779 781 }
780 782 }
781 783
782 784 /*
783 785 * This function waits for a certain condition to be met and is generally
784 786 * used before a destructive or quiescing operation.
785 787 */
786 788 void
787 789 mac_flow_wait(flow_entry_t *flent, mac_flow_state_t event)
788 790 {
789 791 mutex_enter(&flent->fe_lock);
790 792 flent->fe_flags |= FE_WAITER;
791 793
792 794 switch (event) {
793 795 case FLOW_DRIVER_UPCALL:
794 796 /*
795 797 * We want to make sure the driver upcalls have finished before
796 798 * we signal the Rx SRS worker to quit.
797 799 */
798 800 while (flent->fe_refcnt != 1)
799 801 cv_wait(&flent->fe_cv, &flent->fe_lock);
800 802 break;
801 803
802 804 case FLOW_USER_REF:
803 805 /*
804 806 * Wait for the fe_user_refcnt to drop to 0. The flow has
805 807 * been removed from the global flow hash.
806 808 */
807 809 ASSERT(!(flent->fe_flags & FE_G_FLOW_HASH));
808 810 while (flent->fe_user_refcnt != 0)
809 811 cv_wait(&flent->fe_cv, &flent->fe_lock);
810 812 break;
811 813
812 814 default:
813 815 ASSERT(0);
814 816 }
815 817
816 818 flent->fe_flags &= ~FE_WAITER;
817 819 mutex_exit(&flent->fe_lock);
818 820 }
819 821
820 822 static boolean_t
821 823 mac_flow_clean(flow_entry_t *flent)
822 824 {
823 825 ASSERT(flent->fe_next == NULL);
824 826 ASSERT(flent->fe_tx_srs == NULL);
825 827 ASSERT(flent->fe_rx_srs_cnt == 0 && flent->fe_rx_srs[0] == NULL);
826 828 ASSERT(flent->fe_mbg == NULL);
827 829
828 830 return (B_TRUE);
829 831 }
830 832
831 833 void
832 834 mac_flow_cleanup(flow_entry_t *flent)
833 835 {
834 836 if ((flent->fe_type & FLOW_USER) == 0) {
835 837 ASSERT((flent->fe_mbg == NULL && flent->fe_mcip != NULL) ||
836 838 (flent->fe_mbg != NULL && flent->fe_mcip == NULL));
837 839 ASSERT(flent->fe_refcnt == 0);
838 840 } else {
839 841 ASSERT(flent->fe_refcnt == 1);
840 842 }
841 843
842 844 if (flent->fe_mbg != NULL) {
843 845 ASSERT(flent->fe_tx_srs == NULL);
844 846 /* This is a multicast or broadcast flow entry */
845 847 mac_bcast_grp_free(flent->fe_mbg);
846 848 flent->fe_mbg = NULL;
847 849 }
848 850
849 851 if (flent->fe_tx_srs != NULL) {
850 852 ASSERT(flent->fe_mbg == NULL);
851 853 mac_srs_free(flent->fe_tx_srs);
852 854 flent->fe_tx_srs = NULL;
853 855 }
854 856
855 857 /*
856 858 * In the normal case fe_rx_srs_cnt is 1. However in the error case
857 859 * when mac_unicast_add fails we may not have set up any SRS
858 860 * in which case fe_rx_srs_cnt will be zero.
859 861 */
860 862 if (flent->fe_rx_srs_cnt != 0) {
861 863 ASSERT(flent->fe_rx_srs_cnt == 1);
862 864 mac_srs_free(flent->fe_rx_srs[0]);
863 865 flent->fe_rx_srs[0] = NULL;
864 866 flent->fe_rx_srs_cnt = 0;
865 867 }
866 868 ASSERT(flent->fe_rx_srs[0] == NULL);
867 869 }
868 870
869 871 void
870 872 mac_flow_get_desc(flow_entry_t *flent, flow_desc_t *fd)
871 873 {
872 874 /*
873 875 * Grab the fe_lock to see a self-consistent fe_flow_desc.
874 876 * Updates to the fe_flow_desc happen under the fe_lock
875 877 * after removing the flent from the flow table
876 878 */
877 879 mutex_enter(&flent->fe_lock);
878 880 bcopy(&flent->fe_flow_desc, fd, sizeof (*fd));
879 881 mutex_exit(&flent->fe_lock);
880 882 }
881 883
882 884 /*
883 885 * Update a field of a flow entry. The mac perimeter ensures that
884 886 * this is the only thread doing a modify operation on this mac end point.
885 887 * So the flow table can't change or disappear. The ft_lock protects access
886 888 * to the flow entry, and holding the lock ensures that there isn't any thread
887 889 * accessing the flow entry or attempting a flow table lookup. However
888 890 * data threads that are using the flow entry based on the old descriptor
889 891 * will continue to use the flow entry. If strong coherence is required
890 892 * then the flow will have to be quiesced before the descriptor can be
891 893 * changed.
892 894 */
893 895 void
894 896 mac_flow_set_desc(flow_entry_t *flent, flow_desc_t *fd)
895 897 {
896 898 flow_tab_t *ft = flent->fe_flow_tab;
897 899 flow_desc_t old_desc;
898 900 int err;
899 901
900 902 if (ft == NULL) {
901 903 /*
902 904 * The flow hasn't yet been inserted into the table,
903 905 * so only the caller knows about this flow, however for
904 906 * uniformity we grab the fe_lock here.
905 907 */
906 908 mutex_enter(&flent->fe_lock);
907 909 bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
908 910 mutex_exit(&flent->fe_lock);
909 911 }
910 912
911 913 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
912 914
913 915 /*
914 916 * Need to remove the flow entry from the table and reinsert it,
915 917 * into a potentially diference hash line. The hash depends on
916 918 * the new descriptor fields. However access to fe_desc itself
917 919 * is always under the fe_lock. This helps log and stat functions
918 920 * see a self-consistent fe_flow_desc.
919 921 */
920 922 mac_flow_remove(ft, flent, B_TRUE);
921 923 old_desc = flent->fe_flow_desc;
922 924
923 925 mutex_enter(&flent->fe_lock);
924 926 bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
925 927 mutex_exit(&flent->fe_lock);
926 928
927 929 if (mac_flow_add(ft, flent) != 0) {
928 930 /*
929 931 * The add failed say due to an invalid flow descriptor.
930 932 * Undo the update
931 933 */
932 934 flent->fe_flow_desc = old_desc;
933 935 err = mac_flow_add(ft, flent);
934 936 ASSERT(err == 0);
935 937 }
936 938 }
937 939
938 940 void
939 941 mac_flow_set_name(flow_entry_t *flent, const char *name)
940 942 {
941 943 flow_tab_t *ft = flent->fe_flow_tab;
942 944
943 945 if (ft == NULL) {
944 946 /*
945 947 * The flow hasn't yet been inserted into the table,
946 948 * so only the caller knows about this flow
947 949 */
948 950 (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
949 951 } else {
950 952 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
951 953 }
952 954
953 955 mutex_enter(&flent->fe_lock);
954 956 (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
955 957 mutex_exit(&flent->fe_lock);
956 958 }
957 959
958 960 /*
959 961 * Return the client-private cookie that was associated with
960 962 * the flow when it was created.
961 963 */
962 964 void *
963 965 mac_flow_get_client_cookie(flow_entry_t *flent)
964 966 {
965 967 return (flent->fe_client_cookie);
966 968 }
967 969
968 970 /*
969 971 * Forward declarations.
970 972 */
971 973 static uint32_t flow_l2_hash(flow_tab_t *, flow_state_t *);
972 974 static uint32_t flow_l2_hash_fe(flow_tab_t *, flow_entry_t *);
973 975 static int flow_l2_accept(flow_tab_t *, flow_state_t *);
974 976 static uint32_t flow_ether_hash(flow_tab_t *, flow_state_t *);
975 977 static uint32_t flow_ether_hash_fe(flow_tab_t *, flow_entry_t *);
976 978 static int flow_ether_accept(flow_tab_t *, flow_state_t *);
977 979
978 980 /*
979 981 * Create flow table.
980 982 */
981 983 void
982 984 mac_flow_tab_create(flow_ops_t *ops, flow_mask_t mask, uint_t size,
983 985 mac_impl_t *mip, flow_tab_t **ftp)
984 986 {
985 987 flow_tab_t *ft;
986 988 flow_ops_t *new_ops;
987 989
988 990 ft = kmem_cache_alloc(flow_tab_cache, KM_SLEEP);
989 991 bzero(ft, sizeof (*ft));
990 992
991 993 ft->ft_table = kmem_zalloc(size * sizeof (flow_entry_t *), KM_SLEEP);
992 994
993 995 /*
994 996 * We make a copy of the ops vector instead of just pointing to it
995 997 * because we might want to customize the ops vector on a per table
996 998 * basis (e.g. for optimization).
997 999 */
998 1000 new_ops = &ft->ft_ops;
999 1001 bcopy(ops, new_ops, sizeof (*ops));
1000 1002 ft->ft_mask = mask;
1001 1003 ft->ft_size = size;
1002 1004 ft->ft_mip = mip;
1003 1005
1004 1006 /*
1005 1007 * Optimizations for DL_ETHER media.
1006 1008 */
1007 1009 if (mip->mi_info.mi_nativemedia == DL_ETHER) {
1008 1010 if (new_ops->fo_hash == flow_l2_hash)
1009 1011 new_ops->fo_hash = flow_ether_hash;
1010 1012 if (new_ops->fo_hash_fe == flow_l2_hash_fe)
1011 1013 new_ops->fo_hash_fe = flow_ether_hash_fe;
1012 1014 if (new_ops->fo_accept[0] == flow_l2_accept)
1013 1015 new_ops->fo_accept[0] = flow_ether_accept;
1014 1016 }
1015 1017 *ftp = ft;
1016 1018 }
1017 1019
1018 1020 void
1019 1021 mac_flow_l2tab_create(mac_impl_t *mip, flow_tab_t **ftp)
1020 1022 {
1021 1023 mac_flow_tab_create(&flow_l2_ops, FLOW_LINK_DST | FLOW_LINK_VID,
1022 1024 1024, mip, ftp);
1023 1025 }
1024 1026
1025 1027 /*
1026 1028 * Destroy flow table.
1027 1029 */
1028 1030 void
1029 1031 mac_flow_tab_destroy(flow_tab_t *ft)
1030 1032 {
1031 1033 if (ft == NULL)
1032 1034 return;
1033 1035
1034 1036 ASSERT(ft->ft_flow_count == 0);
1035 1037 kmem_free(ft->ft_table, ft->ft_size * sizeof (flow_entry_t *));
1036 1038 bzero(ft, sizeof (*ft));
1037 1039 kmem_cache_free(flow_tab_cache, ft);
1038 1040 }
1039 1041
1040 1042 /*
1041 1043 * Add a new flow entry to the global flow hash table
1042 1044 */
1043 1045 int
1044 1046 mac_flow_hash_add(flow_entry_t *flent)
1045 1047 {
1046 1048 int err;
1047 1049
1048 1050 rw_enter(&flow_tab_lock, RW_WRITER);
1049 1051 err = mod_hash_insert(flow_hash,
1050 1052 (mod_hash_key_t)flent->fe_flow_name, (mod_hash_val_t)flent);
1051 1053 if (err != 0) {
1052 1054 rw_exit(&flow_tab_lock);
1053 1055 return (EEXIST);
1054 1056 }
1055 1057 /* Mark as inserted into the global flow hash table */
1056 1058 FLOW_MARK(flent, FE_G_FLOW_HASH);
1057 1059 rw_exit(&flow_tab_lock);
1058 1060 return (err);
1059 1061 }
1060 1062
1061 1063 /*
1062 1064 * Remove a flow entry from the global flow hash table
1063 1065 */
1064 1066 void
1065 1067 mac_flow_hash_remove(flow_entry_t *flent)
1066 1068 {
1067 1069 mod_hash_val_t val;
1068 1070
1069 1071 rw_enter(&flow_tab_lock, RW_WRITER);
1070 1072 VERIFY(mod_hash_remove(flow_hash,
1071 1073 (mod_hash_key_t)flent->fe_flow_name, &val) == 0);
1072 1074
1073 1075 /* Clear the mark that says inserted into the global flow hash table */
1074 1076 FLOW_UNMARK(flent, FE_G_FLOW_HASH);
1075 1077 rw_exit(&flow_tab_lock);
1076 1078 }
1077 1079
1078 1080 /*
1079 1081 * Retrieve a flow entry from the global flow hash table.
1080 1082 */
1081 1083 int
1082 1084 mac_flow_lookup_byname(char *name, flow_entry_t **flentp)
1083 1085 {
1084 1086 int err;
1085 1087 flow_entry_t *flent;
1086 1088
1087 1089 rw_enter(&flow_tab_lock, RW_READER);
1088 1090 err = mod_hash_find(flow_hash, (mod_hash_key_t)name,
1089 1091 (mod_hash_val_t *)&flent);
1090 1092 if (err != 0) {
1091 1093 rw_exit(&flow_tab_lock);
1092 1094 return (ENOENT);
1093 1095 }
1094 1096 ASSERT(flent != NULL);
1095 1097 FLOW_USER_REFHOLD(flent);
1096 1098 rw_exit(&flow_tab_lock);
1097 1099
1098 1100 *flentp = flent;
1099 1101 return (0);
1100 1102 }
1101 1103
1102 1104 /*
1103 1105 * Initialize or release mac client flows by walking the subflow table.
1104 1106 * These are typically invoked during plumb/unplumb of links.
1105 1107 */
1106 1108
1107 1109 static int
1108 1110 mac_link_init_flows_cb(flow_entry_t *flent, void *arg)
1109 1111 {
1110 1112 mac_client_impl_t *mcip = arg;
1111 1113
1112 1114 if (mac_link_flow_init(arg, flent) != 0) {
1113 1115 cmn_err(CE_WARN, "Failed to initialize flow '%s' on link '%s'",
1114 1116 flent->fe_flow_name, mcip->mci_name);
1115 1117 } else {
1116 1118 FLOW_UNMARK(flent, FE_UF_NO_DATAPATH);
1117 1119 }
1118 1120 return (0);
1119 1121 }
1120 1122
1121 1123 void
1122 1124 mac_link_init_flows(mac_client_handle_t mch)
1123 1125 {
1124 1126 mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1125 1127
1126 1128 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
1127 1129 mac_link_init_flows_cb, mcip);
1128 1130 /*
1129 1131 * If mac client had subflow(s) configured before plumb, change
1130 1132 * function to mac_rx_srs_subflow_process and in case of hardware
1131 1133 * classification, disable polling.
1132 1134 */
1133 1135 mac_client_update_classifier(mcip, B_TRUE);
1134 1136
1135 1137 }
1136 1138
1137 1139 boolean_t
1138 1140 mac_link_has_flows(mac_client_handle_t mch)
1139 1141 {
1140 1142 mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1141 1143
1142 1144 if (!FLOW_TAB_EMPTY(mcip->mci_subflow_tab))
1143 1145 return (B_TRUE);
1144 1146
1145 1147 return (B_FALSE);
1146 1148 }
1147 1149
1148 1150 static int
1149 1151 mac_link_release_flows_cb(flow_entry_t *flent, void *arg)
1150 1152 {
1151 1153 FLOW_MARK(flent, FE_UF_NO_DATAPATH);
1152 1154 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
1153 1155 mac_link_flow_clean(arg, flent);
1154 1156 return (0);
1155 1157 }
1156 1158
1157 1159 void
1158 1160 mac_link_release_flows(mac_client_handle_t mch)
1159 1161 {
1160 1162 mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1161 1163
1162 1164 /*
1163 1165 * Change the mci_flent callback back to mac_rx_srs_process()
1164 1166 * because flows are about to be deactivated.
1165 1167 */
1166 1168 mac_client_update_classifier(mcip, B_FALSE);
1167 1169 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
1168 1170 mac_link_release_flows_cb, mcip);
1169 1171 }
1170 1172
1171 1173 void
1172 1174 mac_rename_flow(flow_entry_t *fep, const char *new_name)
1173 1175 {
1174 1176 mac_flow_set_name(fep, new_name);
1175 1177 if (fep->fe_ksp != NULL) {
1176 1178 flow_stat_destroy(fep);
1177 1179 flow_stat_create(fep);
1178 1180 }
|
↓ open down ↓ |
1038 lines elided |
↑ open up ↑ |
1179 1181 }
1180 1182
1181 1183 /*
1182 1184 * mac_link_flow_init()
1183 1185 * Internal flow interface used for allocating SRSs and related
1184 1186 * data structures. Not meant to be used by mac clients.
1185 1187 */
1186 1188 int
1187 1189 mac_link_flow_init(mac_client_handle_t mch, flow_entry_t *sub_flow)
1188 1190 {
1189 - mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1191 + mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1190 1192 mac_impl_t *mip = mcip->mci_mip;
1191 1193 int err;
1192 1194
1193 1195 ASSERT(mch != NULL);
1194 1196 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1195 1197
1196 1198 if ((err = mac_datapath_setup(mcip, sub_flow, SRST_FLOW)) != 0)
1197 1199 return (err);
1198 1200
1199 1201 sub_flow->fe_mcip = mcip;
1200 1202
1201 1203 return (0);
1202 1204 }
1203 1205
1204 1206 /*
1205 1207 * mac_link_flow_add()
1206 1208 * Used by flowadm(1m) or kernel mac clients for creating flows.
1207 1209 */
1208 1210 int
1209 1211 mac_link_flow_add(datalink_id_t linkid, char *flow_name,
1210 1212 flow_desc_t *flow_desc, mac_resource_props_t *mrp)
1211 1213 {
1212 1214 flow_entry_t *flent = NULL;
1213 1215 int err;
1214 1216 dls_dl_handle_t dlh;
1215 1217 dls_link_t *dlp;
1216 1218 boolean_t link_held = B_FALSE;
1217 1219 boolean_t hash_added = B_FALSE;
1218 1220 mac_perim_handle_t mph;
1219 1221
1220 1222 err = mac_flow_lookup_byname(flow_name, &flent);
1221 1223 if (err == 0) {
1222 1224 FLOW_USER_REFRELE(flent);
1223 1225 return (EEXIST);
1224 1226 }
1225 1227
1226 1228 /*
1227 1229 * First create a flow entry given the description provided
1228 1230 * by the caller.
1229 1231 */
1230 1232 err = mac_flow_create(flow_desc, mrp, flow_name, NULL,
1231 1233 FLOW_USER | FLOW_OTHER, &flent);
1232 1234
1233 1235 if (err != 0)
1234 1236 return (err);
1235 1237
1236 1238 /*
1237 1239 * We've got a local variable referencing this flow now, so we need
1238 1240 * to hold it. We'll release this flow before returning.
1239 1241 * All failures until we return will undo any action that may internally
1240 1242 * held the flow, so the last REFRELE will assure a clean freeing
1241 1243 * of resources.
1242 1244 */
1243 1245 FLOW_REFHOLD(flent);
1244 1246
1245 1247 flent->fe_link_id = linkid;
1246 1248 FLOW_MARK(flent, FE_INCIPIENT);
1247 1249
1248 1250 err = mac_perim_enter_by_linkid(linkid, &mph);
1249 1251 if (err != 0) {
1250 1252 FLOW_FINAL_REFRELE(flent);
1251 1253 return (err);
1252 1254 }
1253 1255
1254 1256 /*
1255 1257 * dls will eventually be merged with mac so it's ok
1256 1258 * to call dls' internal functions.
1257 1259 */
1258 1260 err = dls_devnet_hold_link(linkid, &dlh, &dlp);
1259 1261 if (err != 0)
1260 1262 goto bail;
1261 1263
1262 1264 link_held = B_TRUE;
1263 1265
1264 1266 /*
1265 1267 * Add the flow to the global flow table, this table will be per
1266 1268 * exclusive zone so each zone can have its own flow namespace.
1267 1269 * RFE 6625651 will fix this.
1268 1270 *
1269 1271 */
1270 1272 if ((err = mac_flow_hash_add(flent)) != 0)
1271 1273 goto bail;
1272 1274
1273 1275 hash_added = B_TRUE;
1274 1276
1275 1277 /*
1276 1278 * do not allow flows to be configured on an anchor VNIC
1277 1279 */
1278 1280 if (mac_capab_get(dlp->dl_mh, MAC_CAPAB_ANCHOR_VNIC, NULL)) {
1279 1281 err = ENOTSUP;
1280 1282 goto bail;
1281 1283 }
1282 1284
1283 1285 /*
1284 1286 * Add the subflow to the subflow table. Also instantiate the flow
1285 1287 * in the mac if there is an active user (we check if the MAC client's
1286 1288 * datapath has been setup).
1287 1289 */
1288 1290 err = mac_flow_add_subflow(dlp->dl_mch, flent,
1289 1291 MCIP_DATAPATH_SETUP((mac_client_impl_t *)dlp->dl_mch));
1290 1292 if (err != 0)
1291 1293 goto bail;
1292 1294
1293 1295 FLOW_UNMARK(flent, FE_INCIPIENT);
1294 1296 dls_devnet_rele_link(dlh, dlp);
1295 1297 mac_perim_exit(mph);
1296 1298 return (0);
1297 1299
1298 1300 bail:
1299 1301 if (hash_added)
1300 1302 mac_flow_hash_remove(flent);
1301 1303
1302 1304 if (link_held)
1303 1305 dls_devnet_rele_link(dlh, dlp);
1304 1306
1305 1307 /*
1306 1308 * Wait for any transient global flow hash refs to clear
1307 1309 * and then release the creation reference on the flow
1308 1310 */
1309 1311 mac_flow_wait(flent, FLOW_USER_REF);
1310 1312 FLOW_FINAL_REFRELE(flent);
1311 1313 mac_perim_exit(mph);
1312 1314 return (err);
|
↓ open down ↓ |
113 lines elided |
↑ open up ↑ |
1313 1315 }
1314 1316
1315 1317 /*
1316 1318 * mac_link_flow_clean()
1317 1319 * Internal flow interface used for freeing SRSs and related
1318 1320 * data structures. Not meant to be used by mac clients.
1319 1321 */
1320 1322 void
1321 1323 mac_link_flow_clean(mac_client_handle_t mch, flow_entry_t *sub_flow)
1322 1324 {
1323 - mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1325 + mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1324 1326 mac_impl_t *mip = mcip->mci_mip;
1325 1327 boolean_t last_subflow;
1326 1328
1327 1329 ASSERT(mch != NULL);
1328 1330 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1329 1331
1330 1332 /*
1331 1333 * This sub flow entry may fail to be fully initialized by
1332 1334 * mac_link_flow_init(). If so, simply return.
1333 1335 */
1334 1336 if (sub_flow->fe_mcip == NULL)
1335 1337 return;
1336 1338
1337 1339 last_subflow = FLOW_TAB_EMPTY(mcip->mci_subflow_tab);
1338 1340 /*
1339 1341 * Tear down the data path
1340 1342 */
1341 1343 mac_datapath_teardown(mcip, sub_flow, SRST_FLOW);
1342 1344 sub_flow->fe_mcip = NULL;
1343 1345
1344 1346 /*
1345 1347 * Delete the SRSs associated with this subflow. If this is being
1346 1348 * driven by flowadm(1M) then the subflow will be deleted by
1347 1349 * dls_rem_flow. However if this is a result of the interface being
1348 1350 * unplumbed then the subflow itself won't be deleted.
1349 1351 */
1350 1352 mac_flow_cleanup(sub_flow);
1351 1353
1352 1354 /*
1353 1355 * If all the subflows are gone, renable some of the stuff
1354 1356 * we disabled when adding a subflow, polling etc.
1355 1357 */
1356 1358 if (last_subflow) {
1357 1359 /*
1358 1360 * The subflow table itself is not protected by any locks or
1359 1361 * refcnts. Hence quiesce the client upfront before clearing
1360 1362 * mci_subflow_tab.
1361 1363 */
1362 1364 mac_client_quiesce(mcip);
1363 1365 mac_client_update_classifier(mcip, B_FALSE);
1364 1366 mac_flow_tab_destroy(mcip->mci_subflow_tab);
1365 1367 mcip->mci_subflow_tab = NULL;
1366 1368 mac_client_restart(mcip);
1367 1369 }
1368 1370 }
1369 1371
1370 1372 /*
1371 1373 * mac_link_flow_remove()
1372 1374 * Used by flowadm(1m) or kernel mac clients for removing flows.
1373 1375 */
1374 1376 int
1375 1377 mac_link_flow_remove(char *flow_name)
1376 1378 {
1377 1379 flow_entry_t *flent;
1378 1380 mac_perim_handle_t mph;
1379 1381 int err;
1380 1382 datalink_id_t linkid;
1381 1383
1382 1384 err = mac_flow_lookup_byname(flow_name, &flent);
1383 1385 if (err != 0)
1384 1386 return (err);
1385 1387
1386 1388 linkid = flent->fe_link_id;
1387 1389 FLOW_USER_REFRELE(flent);
1388 1390
1389 1391 /*
1390 1392 * The perim must be acquired before acquiring any other references
1391 1393 * to maintain the lock and perimeter hierarchy. Please note the
1392 1394 * FLOW_REFRELE above.
1393 1395 */
1394 1396 err = mac_perim_enter_by_linkid(linkid, &mph);
1395 1397 if (err != 0)
1396 1398 return (err);
1397 1399
1398 1400 /*
1399 1401 * Note the second lookup of the flow, because a concurrent thread
1400 1402 * may have removed it already while we were waiting to enter the
1401 1403 * link's perimeter.
1402 1404 */
1403 1405 err = mac_flow_lookup_byname(flow_name, &flent);
1404 1406 if (err != 0) {
1405 1407 mac_perim_exit(mph);
1406 1408 return (err);
1407 1409 }
1408 1410 FLOW_USER_REFRELE(flent);
1409 1411
1410 1412 /*
1411 1413 * Remove the flow from the subflow table and deactivate the flow
1412 1414 * by quiescing and removings its SRSs
1413 1415 */
1414 1416 mac_flow_rem_subflow(flent);
1415 1417
1416 1418 /*
1417 1419 * Finally, remove the flow from the global table.
1418 1420 */
1419 1421 mac_flow_hash_remove(flent);
1420 1422
1421 1423 /*
1422 1424 * Wait for any transient global flow hash refs to clear
1423 1425 * and then release the creation reference on the flow
1424 1426 */
1425 1427 mac_flow_wait(flent, FLOW_USER_REF);
1426 1428 FLOW_FINAL_REFRELE(flent);
1427 1429
1428 1430 mac_perim_exit(mph);
1429 1431
1430 1432 return (0);
|
↓ open down ↓ |
97 lines elided |
↑ open up ↑ |
1431 1433 }
1432 1434
1433 1435 /*
1434 1436 * mac_link_flow_modify()
1435 1437 * Modifies the properties of a flow identified by its name.
1436 1438 */
1437 1439 int
1438 1440 mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp)
1439 1441 {
1440 1442 flow_entry_t *flent;
1441 - mac_client_impl_t *mcip;
1443 + mac_client_impl_t *mcip;
1442 1444 int err = 0;
1443 1445 mac_perim_handle_t mph;
1444 1446 datalink_id_t linkid;
1445 1447 flow_tab_t *flow_tab;
1446 1448
1447 1449 err = mac_validate_props(NULL, mrp);
1448 1450 if (err != 0)
1449 1451 return (err);
1450 1452
1451 1453 err = mac_flow_lookup_byname(flow_name, &flent);
1452 1454 if (err != 0)
1453 1455 return (err);
1454 1456
1455 1457 linkid = flent->fe_link_id;
1456 1458 FLOW_USER_REFRELE(flent);
1457 1459
1458 1460 /*
1459 1461 * The perim must be acquired before acquiring any other references
1460 1462 * to maintain the lock and perimeter hierarchy. Please note the
1461 1463 * FLOW_REFRELE above.
1462 1464 */
1463 1465 err = mac_perim_enter_by_linkid(linkid, &mph);
1464 1466 if (err != 0)
1465 1467 return (err);
1466 1468
1467 1469 /*
1468 1470 * Note the second lookup of the flow, because a concurrent thread
1469 1471 * may have removed it already while we were waiting to enter the
1470 1472 * link's perimeter.
1471 1473 */
1472 1474 err = mac_flow_lookup_byname(flow_name, &flent);
1473 1475 if (err != 0) {
1474 1476 mac_perim_exit(mph);
1475 1477 return (err);
1476 1478 }
1477 1479 FLOW_USER_REFRELE(flent);
1478 1480
1479 1481 /*
1480 1482 * If this flow is attached to a MAC client, then pass the request
1481 1483 * along to the client.
1482 1484 * Otherwise, just update the cached values.
1483 1485 */
1484 1486 mcip = flent->fe_mcip;
1485 1487 mac_update_resources(mrp, &flent->fe_resource_props, B_TRUE);
1486 1488 if (mcip != NULL) {
1487 1489 if ((flow_tab = mcip->mci_subflow_tab) == NULL) {
1488 1490 err = ENOENT;
1489 1491 } else {
1490 1492 mac_flow_modify(flow_tab, flent, mrp);
1491 1493 }
1492 1494 } else {
1493 1495 (void) mac_flow_modify_props(flent, mrp);
1494 1496 }
1495 1497
1496 1498 done:
1497 1499 mac_perim_exit(mph);
1498 1500 return (err);
1499 1501 }
1500 1502
1501 1503
1502 1504 /*
1503 1505 * State structure and misc functions used by mac_link_flow_walk().
1504 1506 */
1505 1507 typedef struct {
1506 1508 int (*ws_func)(mac_flowinfo_t *, void *);
1507 1509 void *ws_arg;
1508 1510 } flow_walk_state_t;
1509 1511
1510 1512 static void
1511 1513 mac_link_flowinfo_copy(mac_flowinfo_t *finfop, flow_entry_t *flent)
1512 1514 {
1513 1515 (void) strlcpy(finfop->fi_flow_name, flent->fe_flow_name,
1514 1516 MAXFLOWNAMELEN);
1515 1517 finfop->fi_link_id = flent->fe_link_id;
1516 1518 finfop->fi_flow_desc = flent->fe_flow_desc;
1517 1519 finfop->fi_resource_props = flent->fe_resource_props;
1518 1520 }
1519 1521
1520 1522 static int
1521 1523 mac_link_flow_walk_cb(flow_entry_t *flent, void *arg)
1522 1524 {
1523 1525 flow_walk_state_t *statep = arg;
1524 1526 mac_flowinfo_t *finfo;
1525 1527 int err;
1526 1528
1527 1529 finfo = kmem_zalloc(sizeof (*finfo), KM_SLEEP);
1528 1530 mac_link_flowinfo_copy(finfo, flent);
1529 1531 err = statep->ws_func(finfo, statep->ws_arg);
1530 1532 kmem_free(finfo, sizeof (*finfo));
1531 1533 return (err);
1532 1534 }
1533 1535
1534 1536 /*
1535 1537 * mac_link_flow_walk()
1536 1538 * Invokes callback 'func' for all flows belonging to the specified link.
1537 1539 */
1538 1540 int
1539 1541 mac_link_flow_walk(datalink_id_t linkid,
1540 1542 int (*func)(mac_flowinfo_t *, void *), void *arg)
1541 1543 {
1542 1544 mac_client_impl_t *mcip;
1543 1545 mac_perim_handle_t mph;
1544 1546 flow_walk_state_t state;
1545 1547 dls_dl_handle_t dlh;
1546 1548 dls_link_t *dlp;
1547 1549 int err;
1548 1550
1549 1551 err = mac_perim_enter_by_linkid(linkid, &mph);
1550 1552 if (err != 0)
1551 1553 return (err);
1552 1554
1553 1555 err = dls_devnet_hold_link(linkid, &dlh, &dlp);
1554 1556 if (err != 0) {
1555 1557 mac_perim_exit(mph);
1556 1558 return (err);
1557 1559 }
1558 1560
1559 1561 mcip = (mac_client_impl_t *)dlp->dl_mch;
1560 1562 state.ws_func = func;
1561 1563 state.ws_arg = arg;
1562 1564
1563 1565 err = mac_flow_walk_nolock(mcip->mci_subflow_tab,
1564 1566 mac_link_flow_walk_cb, &state);
1565 1567
1566 1568 dls_devnet_rele_link(dlh, dlp);
1567 1569 mac_perim_exit(mph);
1568 1570 return (err);
1569 1571 }
1570 1572
1571 1573 /*
1572 1574 * mac_link_flow_info()
1573 1575 * Retrieves information about a specific flow.
1574 1576 */
1575 1577 int
1576 1578 mac_link_flow_info(char *flow_name, mac_flowinfo_t *finfo)
1577 1579 {
1578 1580 flow_entry_t *flent;
1579 1581 int err;
1580 1582
1581 1583 err = mac_flow_lookup_byname(flow_name, &flent);
1582 1584 if (err != 0)
1583 1585 return (err);
1584 1586
1585 1587 mac_link_flowinfo_copy(finfo, flent);
1586 1588 FLOW_USER_REFRELE(flent);
1587 1589 return (0);
1588 1590 }
1589 1591
1590 1592 /*
1591 1593 * Hash function macro that takes an Ethernet address and VLAN id as input.
1592 1594 */
1593 1595 #define HASH_ETHER_VID(a, v, s) \
1594 1596 ((((uint32_t)(a)[3] + (a)[4] + (a)[5]) ^ (v)) % (s))
1595 1597
1596 1598 /*
1597 1599 * Generic layer-2 address hashing function that takes an address and address
1598 1600 * length as input. This is the DJB hash function.
1599 1601 */
1600 1602 static uint32_t
1601 1603 flow_l2_addrhash(uint8_t *addr, size_t addrlen, size_t htsize)
1602 1604 {
1603 1605 uint32_t hash = 5381;
1604 1606 size_t i;
1605 1607
1606 1608 for (i = 0; i < addrlen; i++)
1607 1609 hash = ((hash << 5) + hash) + addr[i];
1608 1610 return (hash % htsize);
1609 1611 }
1610 1612
1611 1613 #define PKT_TOO_SMALL(s, end) ((s)->fs_mp->b_wptr < (end))
1612 1614
1613 1615 #define CHECK_AND_ADJUST_START_PTR(s, start) { \
1614 1616 if ((s)->fs_mp->b_wptr == (start)) { \
1615 1617 mblk_t *next = (s)->fs_mp->b_cont; \
1616 1618 if (next == NULL) \
1617 1619 return (EINVAL); \
1618 1620 \
1619 1621 (s)->fs_mp = next; \
1620 1622 (start) = next->b_rptr; \
1621 1623 } \
1622 1624 }
1623 1625
1624 1626 /* ARGSUSED */
1625 1627 static boolean_t
1626 1628 flow_l2_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1627 1629 {
1628 1630 flow_l2info_t *l2 = &s->fs_l2info;
1629 1631 flow_desc_t *fd = &flent->fe_flow_desc;
1630 1632
1631 1633 return (l2->l2_vid == fd->fd_vid &&
1632 1634 bcmp(l2->l2_daddr, fd->fd_dst_mac, fd->fd_mac_len) == 0);
1633 1635 }
1634 1636
1635 1637 /*
1636 1638 * Layer 2 hash function.
1637 1639 * Must be paired with flow_l2_accept() within a set of flow_ops
1638 1640 * because it assumes the dest address is already extracted.
1639 1641 */
1640 1642 static uint32_t
1641 1643 flow_l2_hash(flow_tab_t *ft, flow_state_t *s)
1642 1644 {
1643 1645 return (flow_l2_addrhash(s->fs_l2info.l2_daddr,
1644 1646 ft->ft_mip->mi_type->mt_addr_length, ft->ft_size));
1645 1647 }
1646 1648
1647 1649 /*
1648 1650 * This is the generic layer 2 accept function.
1649 1651 * It makes use of mac_header_info() to extract the header length,
1650 1652 * sap, vlan ID and destination address.
1651 1653 */
1652 1654 static int
1653 1655 flow_l2_accept(flow_tab_t *ft, flow_state_t *s)
1654 1656 {
1655 1657 boolean_t is_ether;
1656 1658 flow_l2info_t *l2 = &s->fs_l2info;
1657 1659 mac_header_info_t mhi;
1658 1660 int err;
1659 1661
1660 1662 is_ether = (ft->ft_mip->mi_info.mi_nativemedia == DL_ETHER);
1661 1663 if ((err = mac_header_info((mac_handle_t)ft->ft_mip,
1662 1664 s->fs_mp, &mhi)) != 0) {
1663 1665 if (err == EINVAL)
1664 1666 err = ENOBUFS;
1665 1667
1666 1668 return (err);
1667 1669 }
1668 1670
1669 1671 l2->l2_start = s->fs_mp->b_rptr;
1670 1672 l2->l2_daddr = (uint8_t *)mhi.mhi_daddr;
1671 1673
1672 1674 if (is_ether && mhi.mhi_bindsap == ETHERTYPE_VLAN &&
1673 1675 ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
1674 1676 struct ether_vlan_header *evhp =
1675 1677 (struct ether_vlan_header *)l2->l2_start;
1676 1678
1677 1679 if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp)))
1678 1680 return (ENOBUFS);
1679 1681
1680 1682 l2->l2_sap = ntohs(evhp->ether_type);
1681 1683 l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci));
1682 1684 l2->l2_hdrsize = sizeof (*evhp);
1683 1685 } else {
1684 1686 l2->l2_sap = mhi.mhi_bindsap;
1685 1687 l2->l2_vid = 0;
1686 1688 l2->l2_hdrsize = (uint32_t)mhi.mhi_hdrsize;
1687 1689 }
1688 1690 return (0);
1689 1691 }
1690 1692
1691 1693 /*
1692 1694 * flow_ether_hash()/accept() are optimized versions of flow_l2_hash()/
1693 1695 * accept(). The notable difference is that dest address is now extracted
1694 1696 * by hash() rather than by accept(). This saves a few memory references
1695 1697 * for flow tables that do not care about mac addresses.
1696 1698 */
1697 1699 static uint32_t
1698 1700 flow_ether_hash(flow_tab_t *ft, flow_state_t *s)
1699 1701 {
1700 1702 flow_l2info_t *l2 = &s->fs_l2info;
1701 1703 struct ether_vlan_header *evhp;
1702 1704
1703 1705 evhp = (struct ether_vlan_header *)l2->l2_start;
1704 1706 l2->l2_daddr = evhp->ether_dhost.ether_addr_octet;
1705 1707 return (HASH_ETHER_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size));
1706 1708 }
1707 1709
1708 1710 static uint32_t
1709 1711 flow_ether_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
1710 1712 {
1711 1713 flow_desc_t *fd = &flent->fe_flow_desc;
1712 1714
1713 1715 ASSERT((fd->fd_mask & FLOW_LINK_VID) != 0 || fd->fd_vid == 0);
1714 1716 return (HASH_ETHER_VID(fd->fd_dst_mac, fd->fd_vid, ft->ft_size));
1715 1717 }
1716 1718
1717 1719 /* ARGSUSED */
1718 1720 static int
1719 1721 flow_ether_accept(flow_tab_t *ft, flow_state_t *s)
1720 1722 {
1721 1723 flow_l2info_t *l2 = &s->fs_l2info;
1722 1724 struct ether_vlan_header *evhp;
1723 1725 uint16_t sap;
1724 1726
1725 1727 evhp = (struct ether_vlan_header *)s->fs_mp->b_rptr;
1726 1728 l2->l2_start = (uchar_t *)evhp;
1727 1729
1728 1730 if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (struct ether_header)))
1729 1731 return (ENOBUFS);
1730 1732
1731 1733 if ((sap = ntohs(evhp->ether_tpid)) == ETHERTYPE_VLAN &&
1732 1734 ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
1733 1735 if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp)))
1734 1736 return (ENOBUFS);
1735 1737
1736 1738 l2->l2_sap = ntohs(evhp->ether_type);
1737 1739 l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci));
1738 1740 l2->l2_hdrsize = sizeof (struct ether_vlan_header);
1739 1741 } else {
1740 1742 l2->l2_sap = sap;
1741 1743 l2->l2_vid = 0;
1742 1744 l2->l2_hdrsize = sizeof (struct ether_header);
1743 1745 }
1744 1746 return (0);
1745 1747 }
1746 1748
1747 1749 /*
1748 1750 * Validates a layer 2 flow entry.
1749 1751 */
1750 1752 static int
1751 1753 flow_l2_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
1752 1754 {
1753 1755 flow_desc_t *fd = &flent->fe_flow_desc;
1754 1756
1755 1757 /*
1756 1758 * Dest address is mandatory, and 0 length addresses are not yet
1757 1759 * supported.
1758 1760 */
1759 1761 if ((fd->fd_mask & FLOW_LINK_DST) == 0 || fd->fd_mac_len == 0)
1760 1762 return (EINVAL);
1761 1763
1762 1764 if ((fd->fd_mask & FLOW_LINK_VID) != 0) {
1763 1765 /*
1764 1766 * VLAN flows are only supported over ethernet macs.
1765 1767 */
1766 1768 if (ft->ft_mip->mi_info.mi_nativemedia != DL_ETHER)
1767 1769 return (EINVAL);
1768 1770
1769 1771 if (fd->fd_vid == 0)
1770 1772 return (EINVAL);
1771 1773
1772 1774 }
1773 1775 flent->fe_match = flow_l2_match;
1774 1776 return (0);
1775 1777 }
1776 1778
1777 1779 /*
1778 1780 * Calculates hash index of flow entry.
1779 1781 */
1780 1782 static uint32_t
1781 1783 flow_l2_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
1782 1784 {
1783 1785 flow_desc_t *fd = &flent->fe_flow_desc;
1784 1786
1785 1787 ASSERT((fd->fd_mask & FLOW_LINK_VID) == 0 && fd->fd_vid == 0);
1786 1788 return (flow_l2_addrhash(fd->fd_dst_mac,
1787 1789 ft->ft_mip->mi_type->mt_addr_length, ft->ft_size));
1788 1790 }
1789 1791
1790 1792 /*
1791 1793 * This is used for duplicate flow checking.
1792 1794 */
1793 1795 /* ARGSUSED */
1794 1796 static boolean_t
1795 1797 flow_l2_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
1796 1798 {
1797 1799 flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
1798 1800
1799 1801 ASSERT(fd1->fd_mac_len == fd2->fd_mac_len && fd1->fd_mac_len != 0);
1800 1802 return (bcmp(&fd1->fd_dst_mac, &fd2->fd_dst_mac,
1801 1803 fd1->fd_mac_len) == 0 && fd1->fd_vid == fd2->fd_vid);
1802 1804 }
1803 1805
1804 1806 /*
1805 1807 * Generic flow entry insertion function.
1806 1808 * Used by flow tables that do not have ordering requirements.
1807 1809 */
1808 1810 /* ARGSUSED */
1809 1811 static int
1810 1812 flow_generic_insert_fe(flow_tab_t *ft, flow_entry_t **headp,
1811 1813 flow_entry_t *flent)
1812 1814 {
1813 1815 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
1814 1816
1815 1817 if (*headp != NULL) {
1816 1818 ASSERT(flent->fe_next == NULL);
1817 1819 flent->fe_next = *headp;
1818 1820 }
1819 1821 *headp = flent;
1820 1822 return (0);
1821 1823 }
1822 1824
1823 1825 /*
1824 1826 * IP version independent DSField matching function.
1825 1827 */
1826 1828 /* ARGSUSED */
1827 1829 static boolean_t
1828 1830 flow_ip_dsfield_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1829 1831 {
1830 1832 flow_l3info_t *l3info = &s->fs_l3info;
1831 1833 flow_desc_t *fd = &flent->fe_flow_desc;
1832 1834
1833 1835 switch (l3info->l3_version) {
1834 1836 case IPV4_VERSION: {
1835 1837 ipha_t *ipha = (ipha_t *)l3info->l3_start;
1836 1838
1837 1839 return ((ipha->ipha_type_of_service &
1838 1840 fd->fd_dsfield_mask) == fd->fd_dsfield);
1839 1841 }
1840 1842 case IPV6_VERSION: {
1841 1843 ip6_t *ip6h = (ip6_t *)l3info->l3_start;
1842 1844
1843 1845 return ((IPV6_FLOW_TCLASS(ip6h->ip6_vcf) &
1844 1846 fd->fd_dsfield_mask) == fd->fd_dsfield);
1845 1847 }
1846 1848 default:
1847 1849 return (B_FALSE);
1848 1850 }
1849 1851 }
1850 1852
1851 1853 /*
1852 1854 * IP v4 and v6 address matching.
1853 1855 * The netmask only needs to be applied on the packet but not on the
1854 1856 * flow_desc since fd_local_addr/fd_remote_addr are premasked subnets.
1855 1857 */
1856 1858
1857 1859 /* ARGSUSED */
1858 1860 static boolean_t
1859 1861 flow_ip_v4_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1860 1862 {
1861 1863 flow_l3info_t *l3info = &s->fs_l3info;
1862 1864 flow_desc_t *fd = &flent->fe_flow_desc;
1863 1865 ipha_t *ipha = (ipha_t *)l3info->l3_start;
1864 1866 in_addr_t addr;
1865 1867
1866 1868 addr = (l3info->l3_dst_or_src ? ipha->ipha_dst : ipha->ipha_src);
1867 1869 if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) {
1868 1870 return ((addr & V4_PART_OF_V6(fd->fd_local_netmask)) ==
1869 1871 V4_PART_OF_V6(fd->fd_local_addr));
1870 1872 }
1871 1873 return ((addr & V4_PART_OF_V6(fd->fd_remote_netmask)) ==
1872 1874 V4_PART_OF_V6(fd->fd_remote_addr));
1873 1875 }
1874 1876
1875 1877 /* ARGSUSED */
1876 1878 static boolean_t
1877 1879 flow_ip_v6_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1878 1880 {
1879 1881 flow_l3info_t *l3info = &s->fs_l3info;
1880 1882 flow_desc_t *fd = &flent->fe_flow_desc;
1881 1883 ip6_t *ip6h = (ip6_t *)l3info->l3_start;
1882 1884 in6_addr_t *addrp;
1883 1885
1884 1886 addrp = (l3info->l3_dst_or_src ? &ip6h->ip6_dst : &ip6h->ip6_src);
1885 1887 if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) {
1886 1888 return (V6_MASK_EQ(*addrp, fd->fd_local_netmask,
1887 1889 fd->fd_local_addr));
1888 1890 }
1889 1891 return (V6_MASK_EQ(*addrp, fd->fd_remote_netmask, fd->fd_remote_addr));
1890 1892 }
1891 1893
1892 1894 /* ARGSUSED */
1893 1895 static boolean_t
1894 1896 flow_ip_proto_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1895 1897 {
1896 1898 flow_l3info_t *l3info = &s->fs_l3info;
1897 1899 flow_desc_t *fd = &flent->fe_flow_desc;
1898 1900
1899 1901 return (l3info->l3_protocol == fd->fd_protocol);
1900 1902 }
1901 1903
1902 1904 static uint32_t
1903 1905 flow_ip_hash(flow_tab_t *ft, flow_state_t *s)
1904 1906 {
1905 1907 flow_l3info_t *l3info = &s->fs_l3info;
1906 1908 flow_mask_t mask = ft->ft_mask;
1907 1909
1908 1910 if ((mask & FLOW_IP_LOCAL) != 0) {
1909 1911 l3info->l3_dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0);
1910 1912 } else if ((mask & FLOW_IP_REMOTE) != 0) {
1911 1913 l3info->l3_dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0);
1912 1914 } else if ((mask & FLOW_IP_DSFIELD) != 0) {
1913 1915 /*
1914 1916 * DSField flents are arranged as a single list.
1915 1917 */
1916 1918 return (0);
1917 1919 }
1918 1920 /*
1919 1921 * IP addr flents are hashed into two lists, v4 or v6.
1920 1922 */
1921 1923 ASSERT(ft->ft_size >= 2);
1922 1924 return ((l3info->l3_version == IPV4_VERSION) ? 0 : 1);
1923 1925 }
1924 1926
1925 1927 static uint32_t
1926 1928 flow_ip_proto_hash(flow_tab_t *ft, flow_state_t *s)
1927 1929 {
1928 1930 flow_l3info_t *l3info = &s->fs_l3info;
1929 1931
1930 1932 return (l3info->l3_protocol % ft->ft_size);
1931 1933 }
1932 1934
1933 1935 /* ARGSUSED */
1934 1936 static int
1935 1937 flow_ip_accept(flow_tab_t *ft, flow_state_t *s)
1936 1938 {
1937 1939 flow_l2info_t *l2info = &s->fs_l2info;
1938 1940 flow_l3info_t *l3info = &s->fs_l3info;
1939 1941 uint16_t sap = l2info->l2_sap;
1940 1942 uchar_t *l3_start;
1941 1943
1942 1944 l3_start = l2info->l2_start + l2info->l2_hdrsize;
1943 1945
1944 1946 /*
1945 1947 * Adjust start pointer if we're at the end of an mblk.
1946 1948 */
|
↓ open down ↓ |
495 lines elided |
↑ open up ↑ |
1947 1949 CHECK_AND_ADJUST_START_PTR(s, l3_start);
1948 1950
1949 1951 l3info->l3_start = l3_start;
1950 1952 if (!OK_32PTR(l3_start))
1951 1953 return (EINVAL);
1952 1954
1953 1955 switch (sap) {
1954 1956 case ETHERTYPE_IP: {
1955 1957 ipha_t *ipha = (ipha_t *)l3_start;
1956 1958
1959 + if (IPH_HDR_VERSION(ipha) != IPV4_VERSION)
1960 + return (EINVAL);
1957 1961 if (PKT_TOO_SMALL(s, l3_start + IP_SIMPLE_HDR_LENGTH))
1958 1962 return (ENOBUFS);
1959 1963
1960 1964 l3info->l3_hdrsize = IPH_HDR_LENGTH(ipha);
1961 1965 l3info->l3_protocol = ipha->ipha_protocol;
1962 1966 l3info->l3_version = IPV4_VERSION;
1963 1967 l3info->l3_fragmented =
1964 1968 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags);
1965 1969 break;
1966 1970 }
1967 1971 case ETHERTYPE_IPV6: {
1968 1972 ip6_t *ip6h = (ip6_t *)l3_start;
1969 1973 ip6_frag_t *frag = NULL;
1970 1974 uint16_t ip6_hdrlen;
1971 1975 uint8_t nexthdr;
1976 + int errno;
1972 1977
1973 - if (!mac_ip_hdr_length_v6(ip6h, s->fs_mp->b_wptr, &ip6_hdrlen,
1974 - &nexthdr, &frag)) {
1975 - return (ENOBUFS);
1976 - }
1978 + errno = mac_ip_hdr_length_v6(ip6h, s->fs_mp->b_wptr,
1979 + &ip6_hdrlen, &nexthdr, &frag);
1980 + /*
1981 + * ENOBUFS is not ENOSPC, but the semantics are the
1982 + * same for this caller.
1983 + */
1984 + if (errno != 0)
1985 + return (errno == ENOSPC ? ENOBUFS : errno);
1977 1986 l3info->l3_hdrsize = ip6_hdrlen;
1978 1987 l3info->l3_protocol = nexthdr;
1979 1988 l3info->l3_version = IPV6_VERSION;
1980 1989 l3info->l3_fragmented = (frag != NULL);
1981 1990 break;
1982 1991 }
1983 1992 default:
1984 1993 return (EINVAL);
1985 1994 }
1986 1995 return (0);
1987 1996 }
1988 1997
1989 1998 /* ARGSUSED */
1990 1999 static int
1991 2000 flow_ip_proto_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
1992 2001 {
1993 2002 flow_desc_t *fd = &flent->fe_flow_desc;
1994 2003
1995 2004 switch (fd->fd_protocol) {
1996 2005 case IPPROTO_TCP:
1997 2006 case IPPROTO_UDP:
1998 2007 case IPPROTO_SCTP:
1999 2008 case IPPROTO_ICMP:
2000 2009 case IPPROTO_ICMPV6:
2001 2010 flent->fe_match = flow_ip_proto_match;
2002 2011 return (0);
2003 2012 default:
2004 2013 return (EINVAL);
2005 2014 }
2006 2015 }
2007 2016
2008 2017 /* ARGSUSED */
2009 2018 static int
2010 2019 flow_ip_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
2011 2020 {
2012 2021 flow_desc_t *fd = &flent->fe_flow_desc;
2013 2022 flow_mask_t mask;
2014 2023 uint8_t version;
2015 2024 in6_addr_t *addr, *netmask;
2016 2025
2017 2026 /*
2018 2027 * DSField does not require a IP version.
2019 2028 */
2020 2029 if (fd->fd_mask == FLOW_IP_DSFIELD) {
2021 2030 if (fd->fd_dsfield_mask == 0)
2022 2031 return (EINVAL);
2023 2032
2024 2033 flent->fe_match = flow_ip_dsfield_match;
2025 2034 return (0);
2026 2035 }
2027 2036
2028 2037 /*
2029 2038 * IP addresses must come with a version to avoid ambiguity.
2030 2039 */
2031 2040 if ((fd->fd_mask & FLOW_IP_VERSION) == 0)
2032 2041 return (EINVAL);
2033 2042
2034 2043 version = fd->fd_ipversion;
2035 2044 if (version != IPV4_VERSION && version != IPV6_VERSION)
2036 2045 return (EINVAL);
2037 2046
2038 2047 mask = fd->fd_mask & ~FLOW_IP_VERSION;
2039 2048 switch (mask) {
2040 2049 case FLOW_IP_LOCAL:
2041 2050 addr = &fd->fd_local_addr;
2042 2051 netmask = &fd->fd_local_netmask;
2043 2052 break;
2044 2053 case FLOW_IP_REMOTE:
2045 2054 addr = &fd->fd_remote_addr;
2046 2055 netmask = &fd->fd_remote_netmask;
2047 2056 break;
2048 2057 default:
2049 2058 return (EINVAL);
2050 2059 }
2051 2060
2052 2061 /*
2053 2062 * Apply netmask onto specified address.
2054 2063 */
2055 2064 V6_MASK_COPY(*addr, *netmask, *addr);
2056 2065 if (version == IPV4_VERSION) {
2057 2066 ipaddr_t v4addr = V4_PART_OF_V6((*addr));
2058 2067 ipaddr_t v4mask = V4_PART_OF_V6((*netmask));
2059 2068
2060 2069 if (v4addr == 0 || v4mask == 0)
2061 2070 return (EINVAL);
2062 2071 flent->fe_match = flow_ip_v4_match;
2063 2072 } else {
2064 2073 if (IN6_IS_ADDR_UNSPECIFIED(addr) ||
2065 2074 IN6_IS_ADDR_UNSPECIFIED(netmask))
2066 2075 return (EINVAL);
2067 2076 flent->fe_match = flow_ip_v6_match;
2068 2077 }
2069 2078 return (0);
2070 2079 }
2071 2080
2072 2081 static uint32_t
2073 2082 flow_ip_proto_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
2074 2083 {
2075 2084 flow_desc_t *fd = &flent->fe_flow_desc;
2076 2085
2077 2086 return (fd->fd_protocol % ft->ft_size);
2078 2087 }
2079 2088
2080 2089 static uint32_t
2081 2090 flow_ip_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
2082 2091 {
2083 2092 flow_desc_t *fd = &flent->fe_flow_desc;
2084 2093
2085 2094 /*
2086 2095 * DSField flents are arranged as a single list.
2087 2096 */
2088 2097 if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0)
2089 2098 return (0);
2090 2099
2091 2100 /*
2092 2101 * IP addr flents are hashed into two lists, v4 or v6.
2093 2102 */
2094 2103 ASSERT(ft->ft_size >= 2);
2095 2104 return ((fd->fd_ipversion == IPV4_VERSION) ? 0 : 1);
2096 2105 }
2097 2106
2098 2107 /* ARGSUSED */
2099 2108 static boolean_t
2100 2109 flow_ip_proto_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
2101 2110 {
2102 2111 flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
2103 2112
2104 2113 return (fd1->fd_protocol == fd2->fd_protocol);
2105 2114 }
2106 2115
2107 2116 /* ARGSUSED */
2108 2117 static boolean_t
2109 2118 flow_ip_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
2110 2119 {
2111 2120 flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
2112 2121 in6_addr_t *a1, *m1, *a2, *m2;
2113 2122
2114 2123 ASSERT(fd1->fd_mask == fd2->fd_mask);
2115 2124 if (fd1->fd_mask == FLOW_IP_DSFIELD) {
2116 2125 return (fd1->fd_dsfield == fd2->fd_dsfield &&
2117 2126 fd1->fd_dsfield_mask == fd2->fd_dsfield_mask);
2118 2127 }
2119 2128
2120 2129 /*
2121 2130 * flow_ip_accept_fe() already validated the version.
2122 2131 */
2123 2132 ASSERT((fd1->fd_mask & FLOW_IP_VERSION) != 0);
2124 2133 if (fd1->fd_ipversion != fd2->fd_ipversion)
2125 2134 return (B_FALSE);
2126 2135
2127 2136 switch (fd1->fd_mask & ~FLOW_IP_VERSION) {
2128 2137 case FLOW_IP_LOCAL:
2129 2138 a1 = &fd1->fd_local_addr;
2130 2139 m1 = &fd1->fd_local_netmask;
2131 2140 a2 = &fd2->fd_local_addr;
2132 2141 m2 = &fd2->fd_local_netmask;
2133 2142 break;
2134 2143 case FLOW_IP_REMOTE:
2135 2144 a1 = &fd1->fd_remote_addr;
2136 2145 m1 = &fd1->fd_remote_netmask;
2137 2146 a2 = &fd2->fd_remote_addr;
2138 2147 m2 = &fd2->fd_remote_netmask;
2139 2148 break;
2140 2149 default:
2141 2150 /*
2142 2151 * This is unreachable given the checks in
2143 2152 * flow_ip_accept_fe().
2144 2153 */
2145 2154 return (B_FALSE);
2146 2155 }
2147 2156
2148 2157 if (fd1->fd_ipversion == IPV4_VERSION) {
2149 2158 return (V4_PART_OF_V6((*a1)) == V4_PART_OF_V6((*a2)) &&
2150 2159 V4_PART_OF_V6((*m1)) == V4_PART_OF_V6((*m2)));
2151 2160
2152 2161 } else {
2153 2162 return (IN6_ARE_ADDR_EQUAL(a1, a2) &&
2154 2163 IN6_ARE_ADDR_EQUAL(m1, m2));
2155 2164 }
2156 2165 }
2157 2166
2158 2167 static int
2159 2168 flow_ip_mask2plen(in6_addr_t *v6mask)
2160 2169 {
2161 2170 int bits;
2162 2171 int plen = IPV6_ABITS;
2163 2172 int i;
2164 2173
2165 2174 for (i = 3; i >= 0; i--) {
2166 2175 if (v6mask->s6_addr32[i] == 0) {
2167 2176 plen -= 32;
2168 2177 continue;
2169 2178 }
2170 2179 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
2171 2180 if (bits == 0)
2172 2181 break;
2173 2182 plen -= bits;
2174 2183 }
2175 2184 return (plen);
2176 2185 }
2177 2186
2178 2187 /* ARGSUSED */
2179 2188 static int
2180 2189 flow_ip_insert_fe(flow_tab_t *ft, flow_entry_t **headp,
2181 2190 flow_entry_t *flent)
2182 2191 {
2183 2192 flow_entry_t **p = headp;
2184 2193 flow_desc_t *fd0, *fd;
2185 2194 in6_addr_t *m0, *m;
2186 2195 int plen0, plen;
2187 2196
2188 2197 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
2189 2198
2190 2199 /*
2191 2200 * No special ordering needed for dsfield.
2192 2201 */
2193 2202 fd0 = &flent->fe_flow_desc;
2194 2203 if ((fd0->fd_mask & FLOW_IP_DSFIELD) != 0) {
2195 2204 if (*p != NULL) {
2196 2205 ASSERT(flent->fe_next == NULL);
2197 2206 flent->fe_next = *p;
2198 2207 }
2199 2208 *p = flent;
2200 2209 return (0);
2201 2210 }
2202 2211
2203 2212 /*
2204 2213 * IP address flows are arranged in descending prefix length order.
2205 2214 */
2206 2215 m0 = ((fd0->fd_mask & FLOW_IP_LOCAL) != 0) ?
2207 2216 &fd0->fd_local_netmask : &fd0->fd_remote_netmask;
2208 2217 plen0 = flow_ip_mask2plen(m0);
2209 2218 ASSERT(plen0 != 0);
2210 2219
2211 2220 for (; *p != NULL; p = &(*p)->fe_next) {
2212 2221 fd = &(*p)->fe_flow_desc;
2213 2222
2214 2223 /*
2215 2224 * Normally a dsfield flent shouldn't end up on the same
2216 2225 * list as an IP address because flow tables are (for now)
2217 2226 * disjoint. If we decide to support both IP and dsfield
2218 2227 * in the same table in the future, this check will allow
2219 2228 * for that.
2220 2229 */
2221 2230 if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0)
2222 2231 continue;
2223 2232
2224 2233 /*
2225 2234 * We also allow for the mixing of local and remote address
2226 2235 * flents within one list.
2227 2236 */
2228 2237 m = ((fd->fd_mask & FLOW_IP_LOCAL) != 0) ?
2229 2238 &fd->fd_local_netmask : &fd->fd_remote_netmask;
2230 2239 plen = flow_ip_mask2plen(m);
2231 2240
2232 2241 if (plen <= plen0)
2233 2242 break;
2234 2243 }
2235 2244 if (*p != NULL) {
2236 2245 ASSERT(flent->fe_next == NULL);
2237 2246 flent->fe_next = *p;
2238 2247 }
2239 2248 *p = flent;
2240 2249 return (0);
2241 2250 }
2242 2251
2243 2252 /*
2244 2253 * Transport layer protocol and port matching functions.
2245 2254 */
2246 2255
2247 2256 /* ARGSUSED */
2248 2257 static boolean_t
2249 2258 flow_transport_lport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
2250 2259 {
2251 2260 flow_l3info_t *l3info = &s->fs_l3info;
2252 2261 flow_l4info_t *l4info = &s->fs_l4info;
2253 2262 flow_desc_t *fd = &flent->fe_flow_desc;
2254 2263
2255 2264 return (fd->fd_protocol == l3info->l3_protocol &&
2256 2265 fd->fd_local_port == l4info->l4_hash_port);
2257 2266 }
2258 2267
2259 2268 /* ARGSUSED */
2260 2269 static boolean_t
2261 2270 flow_transport_rport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
2262 2271 {
2263 2272 flow_l3info_t *l3info = &s->fs_l3info;
2264 2273 flow_l4info_t *l4info = &s->fs_l4info;
2265 2274 flow_desc_t *fd = &flent->fe_flow_desc;
2266 2275
2267 2276 return (fd->fd_protocol == l3info->l3_protocol &&
2268 2277 fd->fd_remote_port == l4info->l4_hash_port);
2269 2278 }
2270 2279
2271 2280 /*
2272 2281 * Transport hash function.
2273 2282 * Since we only support either local or remote port flows,
2274 2283 * we only need to extract one of the ports to be used for
2275 2284 * matching.
2276 2285 */
2277 2286 static uint32_t
2278 2287 flow_transport_hash(flow_tab_t *ft, flow_state_t *s)
2279 2288 {
2280 2289 flow_l3info_t *l3info = &s->fs_l3info;
2281 2290 flow_l4info_t *l4info = &s->fs_l4info;
2282 2291 uint8_t proto = l3info->l3_protocol;
2283 2292 boolean_t dst_or_src;
2284 2293
2285 2294 if ((ft->ft_mask & FLOW_ULP_PORT_LOCAL) != 0) {
2286 2295 dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0);
2287 2296 } else {
2288 2297 dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0);
2289 2298 }
2290 2299
2291 2300 l4info->l4_hash_port = dst_or_src ? l4info->l4_dst_port :
2292 2301 l4info->l4_src_port;
2293 2302
2294 2303 return ((l4info->l4_hash_port ^ (proto << 4)) % ft->ft_size);
2295 2304 }
2296 2305
2297 2306 /*
2298 2307 * Unlike other accept() functions above, we do not need to get the header
2299 2308 * size because this is our highest layer so far. If we want to do support
2300 2309 * other higher layer protocols, we would need to save the l4_hdrsize
2301 2310 * in the code below.
2302 2311 */
2303 2312
2304 2313 /* ARGSUSED */
2305 2314 static int
2306 2315 flow_transport_accept(flow_tab_t *ft, flow_state_t *s)
2307 2316 {
2308 2317 flow_l3info_t *l3info = &s->fs_l3info;
2309 2318 flow_l4info_t *l4info = &s->fs_l4info;
2310 2319 uint8_t proto = l3info->l3_protocol;
2311 2320 uchar_t *l4_start;
2312 2321
2313 2322 l4_start = l3info->l3_start + l3info->l3_hdrsize;
2314 2323
2315 2324 /*
2316 2325 * Adjust start pointer if we're at the end of an mblk.
2317 2326 */
2318 2327 CHECK_AND_ADJUST_START_PTR(s, l4_start);
2319 2328
2320 2329 l4info->l4_start = l4_start;
2321 2330 if (!OK_32PTR(l4_start))
2322 2331 return (EINVAL);
2323 2332
2324 2333 if (l3info->l3_fragmented == B_TRUE)
2325 2334 return (EINVAL);
2326 2335
2327 2336 switch (proto) {
2328 2337 case IPPROTO_TCP: {
2329 2338 struct tcphdr *tcph = (struct tcphdr *)l4_start;
2330 2339
2331 2340 if (PKT_TOO_SMALL(s, l4_start + sizeof (*tcph)))
2332 2341 return (ENOBUFS);
2333 2342
2334 2343 l4info->l4_src_port = tcph->th_sport;
2335 2344 l4info->l4_dst_port = tcph->th_dport;
2336 2345 break;
2337 2346 }
2338 2347 case IPPROTO_UDP: {
2339 2348 struct udphdr *udph = (struct udphdr *)l4_start;
2340 2349
2341 2350 if (PKT_TOO_SMALL(s, l4_start + sizeof (*udph)))
2342 2351 return (ENOBUFS);
2343 2352
2344 2353 l4info->l4_src_port = udph->uh_sport;
2345 2354 l4info->l4_dst_port = udph->uh_dport;
2346 2355 break;
2347 2356 }
2348 2357 case IPPROTO_SCTP: {
2349 2358 sctp_hdr_t *sctph = (sctp_hdr_t *)l4_start;
2350 2359
2351 2360 if (PKT_TOO_SMALL(s, l4_start + sizeof (*sctph)))
2352 2361 return (ENOBUFS);
2353 2362
2354 2363 l4info->l4_src_port = sctph->sh_sport;
2355 2364 l4info->l4_dst_port = sctph->sh_dport;
2356 2365 break;
2357 2366 }
2358 2367 default:
2359 2368 return (EINVAL);
2360 2369 }
2361 2370
2362 2371 return (0);
2363 2372 }
2364 2373
2365 2374 /*
2366 2375 * Validates transport flow entry.
2367 2376 * The protocol field must be present.
2368 2377 */
2369 2378
2370 2379 /* ARGSUSED */
2371 2380 static int
2372 2381 flow_transport_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
2373 2382 {
2374 2383 flow_desc_t *fd = &flent->fe_flow_desc;
2375 2384 flow_mask_t mask = fd->fd_mask;
2376 2385
2377 2386 if ((mask & FLOW_IP_PROTOCOL) == 0)
2378 2387 return (EINVAL);
2379 2388
2380 2389 switch (fd->fd_protocol) {
2381 2390 case IPPROTO_TCP:
2382 2391 case IPPROTO_UDP:
2383 2392 case IPPROTO_SCTP:
2384 2393 break;
2385 2394 default:
2386 2395 return (EINVAL);
2387 2396 }
2388 2397
2389 2398 switch (mask & ~FLOW_IP_PROTOCOL) {
2390 2399 case FLOW_ULP_PORT_LOCAL:
2391 2400 if (fd->fd_local_port == 0)
2392 2401 return (EINVAL);
2393 2402
2394 2403 flent->fe_match = flow_transport_lport_match;
2395 2404 break;
2396 2405 case FLOW_ULP_PORT_REMOTE:
2397 2406 if (fd->fd_remote_port == 0)
2398 2407 return (EINVAL);
2399 2408
2400 2409 flent->fe_match = flow_transport_rport_match;
2401 2410 break;
2402 2411 case 0:
2403 2412 /*
2404 2413 * transport-only flows conflicts with our table type.
2405 2414 */
2406 2415 return (EOPNOTSUPP);
2407 2416 default:
2408 2417 return (EINVAL);
2409 2418 }
2410 2419
2411 2420 return (0);
2412 2421 }
2413 2422
2414 2423 static uint32_t
2415 2424 flow_transport_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
2416 2425 {
2417 2426 flow_desc_t *fd = &flent->fe_flow_desc;
2418 2427 uint16_t port = 0;
2419 2428
2420 2429 port = ((fd->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) ?
2421 2430 fd->fd_local_port : fd->fd_remote_port;
2422 2431
2423 2432 return ((port ^ (fd->fd_protocol << 4)) % ft->ft_size);
2424 2433 }
2425 2434
2426 2435 /* ARGSUSED */
2427 2436 static boolean_t
2428 2437 flow_transport_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
2429 2438 {
2430 2439 flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
2431 2440
2432 2441 if (fd1->fd_protocol != fd2->fd_protocol)
2433 2442 return (B_FALSE);
2434 2443
2435 2444 if ((fd1->fd_mask & FLOW_ULP_PORT_LOCAL) != 0)
2436 2445 return (fd1->fd_local_port == fd2->fd_local_port);
2437 2446
2438 2447 if ((fd1->fd_mask & FLOW_ULP_PORT_REMOTE) != 0)
2439 2448 return (fd1->fd_remote_port == fd2->fd_remote_port);
2440 2449
2441 2450 return (B_TRUE);
2442 2451 }
2443 2452
2444 2453 static flow_ops_t flow_l2_ops = {
2445 2454 flow_l2_accept_fe,
2446 2455 flow_l2_hash_fe,
2447 2456 flow_l2_match_fe,
2448 2457 flow_generic_insert_fe,
2449 2458 flow_l2_hash,
2450 2459 {flow_l2_accept}
2451 2460 };
2452 2461
2453 2462 static flow_ops_t flow_ip_ops = {
2454 2463 flow_ip_accept_fe,
2455 2464 flow_ip_hash_fe,
2456 2465 flow_ip_match_fe,
2457 2466 flow_ip_insert_fe,
2458 2467 flow_ip_hash,
2459 2468 {flow_l2_accept, flow_ip_accept}
2460 2469 };
2461 2470
2462 2471 static flow_ops_t flow_ip_proto_ops = {
2463 2472 flow_ip_proto_accept_fe,
2464 2473 flow_ip_proto_hash_fe,
2465 2474 flow_ip_proto_match_fe,
2466 2475 flow_generic_insert_fe,
2467 2476 flow_ip_proto_hash,
2468 2477 {flow_l2_accept, flow_ip_accept}
2469 2478 };
2470 2479
2471 2480 static flow_ops_t flow_transport_ops = {
2472 2481 flow_transport_accept_fe,
2473 2482 flow_transport_hash_fe,
2474 2483 flow_transport_match_fe,
2475 2484 flow_generic_insert_fe,
2476 2485 flow_transport_hash,
2477 2486 {flow_l2_accept, flow_ip_accept, flow_transport_accept}
2478 2487 };
2479 2488
2480 2489 static flow_tab_info_t flow_tab_info_list[] = {
2481 2490 {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_LOCAL, 2},
2482 2491 {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_REMOTE, 2},
2483 2492 {&flow_ip_ops, FLOW_IP_DSFIELD, 1},
2484 2493 {&flow_ip_proto_ops, FLOW_IP_PROTOCOL, 256},
2485 2494 {&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_LOCAL, 1024},
2486 2495 {&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_REMOTE, 1024}
2487 2496 };
2488 2497
2489 2498 #define FLOW_MAX_TAB_INFO \
2490 2499 ((sizeof (flow_tab_info_list)) / sizeof (flow_tab_info_t))
2491 2500
2492 2501 static flow_tab_info_t *
2493 2502 mac_flow_tab_info_get(flow_mask_t mask)
2494 2503 {
2495 2504 int i;
2496 2505
2497 2506 for (i = 0; i < FLOW_MAX_TAB_INFO; i++) {
2498 2507 if (mask == flow_tab_info_list[i].fti_mask)
2499 2508 return (&flow_tab_info_list[i]);
2500 2509 }
2501 2510 return (NULL);
2502 2511 }
|
↓ open down ↓ |
516 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX