63 list_node_t oth_link; /* overlay_target_lock */
64 kmutex_t oth_lock;
65 list_t oth_outstanding; /* oth_lock */
66 } overlay_target_hdl_t;
67
68 typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int);
69 typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *);
70 typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int);
71
72 typedef struct overaly_target_ioctl {
73 int oti_cmd; /* ioctl id */
74 boolean_t oti_write; /* ioctl requires FWRITE */
75 boolean_t oti_ncopyout; /* copyout data? */
76 overlay_target_copyin_f oti_copyin; /* copyin func */
77 overlay_target_ioctl_f oti_func; /* function to call */
78 overlay_target_copyout_f oti_copyout; /* copyin func */
79 size_t oti_size; /* size of user level structure */
80 } overlay_target_ioctl_t;
81
82 static kmem_cache_t *overlay_target_cache;
83 static kmem_cache_t *overlay_entry_cache;
84 static id_space_t *overlay_thdl_idspace;
85 static void *overlay_thdl_state;
86
87 /*
88 * When we support overlay devices in the NGZ, then all of these need to become
89 * zone aware, by plugging into the netstack engine and becoming per-netstack
90 * data.
91 */
92 static list_t overlay_thdl_list;
93 static kmutex_t overlay_target_lock;
94 static kcondvar_t overlay_target_condvar;
95 static list_t overlay_target_list;
96 static boolean_t overlay_target_excl;
97
98 /*
99 * Outstanding data per hash table entry.
100 */
101 static int overlay_ent_size = 128 * 1024;
102
103 /* ARGSUSED */
104 static int
105 overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
106 {
107 overlay_target_t *ott = buf;
108
109 mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL);
110 cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL);
111 return (0);
112 }
113
114 /* ARGSUSED */
115 static void
116 overlay_target_cache_destructor(void *buf, void *arg)
117 {
118 overlay_target_t *ott = buf;
119
120 cv_destroy(&ott->ott_cond);
121 mutex_destroy(&ott->ott_lock);
124 /* ARGSUSED */
125 static int
126 overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs)
127 {
128 overlay_target_entry_t *ote = buf;
129
130 bzero(ote, sizeof (overlay_target_entry_t));
131 mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL);
132 return (0);
133 }
134
135 /* ARGSUSED */
136 static void
137 overlay_entry_cache_destructor(void *buf, void *arg)
138 {
139 overlay_target_entry_t *ote = buf;
140
141 mutex_destroy(&ote->ote_lock);
142 }
143
144 /* TODO: we will need to modify these to hash/cmp DCID + MAC */
145
146 static uint64_t
147 overlay_mac_hash(const void *v)
148 {
149 uint32_t crc;
150 CRC32(crc, v, ETHERADDRL, -1U, crc32_table);
151 return (crc);
152 }
153
154 static int
155 overlay_mac_cmp(const void *a, const void *b)
156 {
157 return (bcmp(a, b, ETHERADDRL));
158 }
159
160 /* ARGSUSED */
161 static void
162 overlay_target_entry_dtor(void *arg)
163 {
164 overlay_target_entry_t *ote = arg;
165
166 ote->ote_flags = 0;
167 bzero(ote->ote_addr, ETHERADDRL);
168 ote->ote_ott = NULL;
169 ote->ote_odd = NULL;
170 freemsgchain(ote->ote_chead);
171 ote->ote_chead = ote->ote_ctail = NULL;
172 ote->ote_mbsize = 0;
173 ote->ote_vtime = 0;
174 kmem_cache_free(overlay_entry_cache, ote);
175 }
176
177 static int
178 overlay_mac_avl(const void *a, const void *b)
179 {
180 int i;
181 const overlay_target_entry_t *l, *r;
182 l = a;
183 r = b;
184
185 for (i = 0; i < ETHERADDRL; i++) {
186 if (l->ote_addr[i] > r->ote_addr[i])
187 return (1);
188 else if (l->ote_addr[i] < r->ote_addr[i])
189 return (-1);
219 overlay_target_fini(void)
220 {
221 id_space_destroy(overlay_thdl_idspace);
222 list_destroy(&overlay_thdl_list);
223 list_destroy(&overlay_target_list);
224 cv_destroy(&overlay_target_condvar);
225 mutex_destroy(&overlay_target_lock);
226 kmem_cache_destroy(overlay_entry_cache);
227 kmem_cache_destroy(overlay_target_cache);
228 ddi_soft_state_fini(&overlay_thdl_state);
229 }
230
231 void
232 overlay_target_free(overlay_dev_t *odd)
233 {
234 if (odd->odd_target == NULL)
235 return;
236
237 if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
238 refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
239 avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
240 overlay_target_entry_t *ote;
241
242 /* TODO: remove from L3 trees */
243
244 /*
245 * Our AVL tree and hashtable contain the same elements,
246 * therefore we should just remove it from the tree, but then
247 * delete the entries when we remove them from the hash table
248 * (which happens through the refhash dtor).
249 */
250 while ((ote = avl_first(ap)) != NULL)
251 avl_remove(ap, ote);
252
253 avl_destroy(ap);
254 for (ote = refhash_first(rp); ote != NULL;
255 ote = refhash_next(rp, ote)) {
256 refhash_remove(rp, ote);
257 }
258 refhash_destroy(rp);
259 }
260
261 ASSERT(odd->odd_target->ott_ocount == 0);
262 kmem_cache_free(overlay_target_cache, odd->odd_target);
263 }
264
265 int
266 overlay_target_busy()
267 {
268 int ret;
269
270 mutex_enter(&overlay_target_lock);
271 ret = !list_is_empty(&overlay_thdl_list);
272 mutex_exit(&overlay_target_lock);
273
274 return (ret);
275 }
276
277 static void
278 overlay_target_queue(overlay_target_entry_t *entry)
279 {
280 mutex_enter(&overlay_target_lock);
281 mutex_enter(&entry->ote_ott->ott_lock);
282 if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
283 mutex_exit(&entry->ote_ott->ott_lock);
284 mutex_exit(&overlay_target_lock);
285 return;
286 }
287 entry->ote_ott->ott_ocount++;
288 mutex_exit(&entry->ote_ott->ott_lock);
289 list_insert_tail(&overlay_target_list, entry);
290 cv_signal(&overlay_target_condvar);
291 mutex_exit(&overlay_target_lock);
292 }
293
294 void
295 overlay_target_quiesce(overlay_target_t *ott)
296 {
297 if (ott == NULL)
298 return;
299 mutex_enter(&ott->ott_lock);
300 ott->ott_flags |= OVERLAY_T_TEARDOWN;
301 while (ott->ott_ocount != 0)
302 cv_wait(&ott->ott_cond, &ott->ott_lock);
303 mutex_exit(&ott->ott_lock);
304 }
305
306 /*
307 * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
308 * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
309 * this time, say for NVGRE, we drop all packets that mcuh this.
310 *
311 * XXX: It might be better to replace the 'sock' argument with
312 * overlay_target_entry_t** and set it with the found entry in the case
313 * of OVERLAY_TARGET_OK.
314 */
315 int
316 overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
317 socklen_t *slenp)
318 {
319 int ret;
320 struct sockaddr_in6 *v6;
321 overlay_target_t *ott;
322 mac_header_info_t mhi;
323 overlay_target_entry_t *entry;
324
325 ASSERT(odd->odd_target != NULL);
326
327 /*
328 * At this point, the overlay device is in a mux which means that it's
329 * been activated. At this point, parts of the target, such as the mode
330 * and the destination are now read-only and we don't have to worry
331 * about synchronization for them.
332 */
333 ott = odd->odd_target;
334 if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
335 return (OVERLAY_TARGET_DROP);
336
337 v6 = (struct sockaddr_in6 *)sock;
338 bzero(v6, sizeof (struct sockaddr_in6));
339 v6->sin6_family = AF_INET6;
340
341 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
342 mutex_enter(&ott->ott_lock);
343 bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr,
344 sizeof (struct in6_addr));
345 v6->sin6_port = htons(ott->ott_u.ott_point.otp_port);
346 mutex_exit(&ott->ott_lock);
347 *slenp = sizeof (struct sockaddr_in6);
348
349 return (OVERLAY_TARGET_OK);
350 }
351
352 ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
353
354 /*
355 * Note we only want the MAC address here, therefore we won't bother
356 * using mac_vlan_header_info(). If any caller needs the vlan info at
357 * this point, this should change to a call to mac_vlan_header_info().
358 */
359 if (mac_header_info(odd->odd_mh, mp, &mhi) != 0)
360 return (OVERLAY_TARGET_DROP);
361
362 /*
363 * TODO: compare mhi.mhi_daddr with odd->macaddr.
364 * If match,
365 * get VL3 dest from mp
366 * lookup target using VL3 dest
367 * otherwise,
368 * lookup target using VL2 dest (existing refhash_lookup() call
369 * below)
370 */
371 mutex_enter(&ott->ott_lock);
372 entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
373 mhi.mhi_daddr);
374 if (entry == NULL) {
375 entry = kmem_cache_alloc(overlay_entry_cache,
376 KM_NOSLEEP | KM_NORMALPRI);
377 if (entry == NULL) {
378 mutex_exit(&ott->ott_lock);
379 return (OVERLAY_TARGET_DROP);
380 }
381 /*
382 * TODO: set entry->ote_dcid, if VL3 lookup, copy dst addr
383 * into entry->ote_ip. Probably zero out the address we're
384 * not lookup up (VL2 or VL3) as well.
385 */
386 bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
387 entry->ote_chead = entry->ote_ctail = mp;
388 entry->ote_mbsize = msgsize(mp);
389 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
390 entry->ote_ott = ott;
391 entry->ote_odd = odd;
392 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
393 avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
394 mutex_exit(&ott->ott_lock);
395 overlay_target_queue(entry);
396 return (OVERLAY_TARGET_ASYNC);
397 }
398 refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry);
399 mutex_exit(&ott->ott_lock);
400
401 mutex_enter(&entry->ote_lock);
402 if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
403 ret = OVERLAY_TARGET_DROP;
404 } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
405 bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
406 sizeof (struct in6_addr));
407 v6->sin6_port = htons(entry->ote_dest.otp_port);
408 *slenp = sizeof (struct sockaddr_in6);
409 ret = OVERLAY_TARGET_OK;
410 } else {
411 size_t mlen = msgsize(mp);
412
413 if (mlen + entry->ote_mbsize > overlay_ent_size) {
414 ret = OVERLAY_TARGET_DROP;
415 } else {
416 if (entry->ote_ctail != NULL) {
417 ASSERT(entry->ote_ctail->b_next ==
418 NULL);
419 entry->ote_ctail->b_next = mp;
420 entry->ote_ctail = mp;
421 } else {
422 entry->ote_chead = mp;
423 entry->ote_ctail = mp;
424 }
425 entry->ote_mbsize += mlen;
426 if ((entry->ote_flags &
427 OVERLAY_ENTRY_F_PENDING) == 0) {
428 entry->ote_flags |=
429 OVERLAY_ENTRY_F_PENDING;
430 overlay_target_queue(entry);
431 }
432 ret = OVERLAY_TARGET_ASYNC;
433 }
434 }
435 mutex_exit(&entry->ote_lock);
436
437 mutex_enter(&ott->ott_lock);
438 refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry);
439 mutex_exit(&ott->ott_lock);
440
441 return (ret);
442 }
443
444 /* ARGSUSED */
445 static int
446 overlay_target_info(overlay_target_hdl_t *thdl, void *arg)
447 {
448 overlay_dev_t *odd;
449 overlay_targ_info_t *oti = arg;
450
451 odd = overlay_hold_by_dlid(oti->oti_linkid);
452 if (odd == NULL)
453 return (ENOENT);
454
455 mutex_enter(&odd->odd_lock);
456 oti->oti_flags = 0;
457 oti->oti_needs = odd->odd_plugin->ovp_dest;
458 if (odd->odd_flags & OVERLAY_F_DEGRADED)
459 oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED;
460 if (odd->odd_flags & OVERLAY_F_ACTIVATED)
461 oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE;
462 oti->oti_vnetid = odd->odd_vid;
463 oti->oti_dcid = odd->odd_dcid;
464 mutex_exit(&odd->odd_lock);
465 overlay_hold_rele(odd);
466 return (0);
467 }
468
469 /* ARGSUSED */
470 static int
471 overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
472 {
473 overlay_dev_t *odd;
474 overlay_target_t *ott;
475 overlay_targ_associate_t *ota = arg;
476
477 odd = overlay_hold_by_dlid(ota->ota_linkid);
478 if (odd == NULL)
479 return (ENOENT);
480
481 if (ota->ota_id == 0) {
482 overlay_hold_rele(odd);
483 return (EINVAL);
484 }
485
486 if (ota->ota_mode != OVERLAY_TARGET_POINT &&
487 ota->ota_mode != OVERLAY_TARGET_DYNAMIC) {
488 overlay_hold_rele(odd);
489 return (EINVAL);
490 }
491
492 if (ota->ota_provides != odd->odd_plugin->ovp_dest) {
493 overlay_hold_rele(odd);
494 return (EINVAL);
495 }
508 if (ota->ota_point.otp_port == 0) {
509 overlay_hold_rele(odd);
510 return (EINVAL);
511 }
512 }
513 }
514
515 ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP);
516 ott->ott_flags = 0;
517 ott->ott_ocount = 0;
518 ott->ott_mode = ota->ota_mode;
519 ott->ott_dest = ota->ota_provides;
520 ott->ott_id = ota->ota_id;
521
522 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
523 bcopy(&ota->ota_point, &ott->ott_u.ott_point,
524 sizeof (overlay_target_point_t));
525 } else {
526 ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
527 overlay_mac_hash, overlay_mac_cmp,
528 overlay_target_entry_dtor, sizeof (overlay_target_entry_t),
529 offsetof(overlay_target_entry_t, ote_reflink),
530 offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
531 avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
532 sizeof (overlay_target_entry_t),
533 offsetof(overlay_target_entry_t, ote_avllink));
534 }
535 mutex_enter(&odd->odd_lock);
536 if (odd->odd_flags & OVERLAY_F_VARPD) {
537 mutex_exit(&odd->odd_lock);
538 kmem_cache_free(overlay_target_cache, ott);
539 overlay_hold_rele(odd);
540 return (EEXIST);
541 }
542
543 odd->odd_flags |= OVERLAY_F_VARPD;
544 odd->odd_target = ott;
545 mutex_exit(&odd->odd_lock);
546
547 overlay_hold_rele(odd);
548
549
550 return (0);
551 }
552
553
554 /* ARGSUSED */
555 static int
556 overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg)
557 {
558 overlay_dev_t *odd;
559 overlay_targ_degrade_t *otd = arg;
560
561 odd = overlay_hold_by_dlid(otd->otd_linkid);
562 if (odd == NULL)
563 return (ENOENT);
564
565 overlay_fm_degrade(odd, otd->otd_buf);
566 overlay_hold_rele(odd);
567 return (0);
568 }
569
591 overlay_targ_id_t *otid = arg;
592
593 odd = overlay_hold_by_dlid(otid->otid_linkid);
594 if (odd == NULL)
595 return (ENOENT);
596
597 mutex_enter(&odd->odd_lock);
598 odd->odd_flags &= ~OVERLAY_F_VARPD;
599 mutex_exit(&odd->odd_lock);
600
601 overlay_hold_rele(odd);
602 return (0);
603
604 }
605
606 static int
607 overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
608 {
609 overlay_targ_lookup_t *otl = arg;
610 overlay_target_entry_t *entry;
611 clock_t ret, timeout;
612 mac_header_info_t mhi;
613
614 timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
615 again:
616 mutex_enter(&overlay_target_lock);
617 while (list_is_empty(&overlay_target_list)) {
618 ret = cv_timedwait(&overlay_target_condvar,
619 &overlay_target_lock, timeout);
620 if (ret == -1) {
621 mutex_exit(&overlay_target_lock);
622 return (ETIME);
623 }
624 }
625 entry = list_remove_head(&overlay_target_list);
626 mutex_exit(&overlay_target_lock);
627 mutex_enter(&entry->ote_lock);
628 if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
629 ASSERT(entry->ote_chead == NULL);
630 mutex_exit(&entry->ote_lock);
631 goto again;
632 }
633 ASSERT(entry->ote_chead != NULL);
634
635 /*
636 * If we have a bogon that doesn't have a valid mac header, drop it and
637 * try again.
638 */
639 if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
640 &mhi) != 0) {
641 boolean_t queue = B_FALSE;
642 mblk_t *mp = entry->ote_chead;
643 entry->ote_chead = mp->b_next;
644 mp->b_next = NULL;
645 if (entry->ote_ctail == mp)
646 entry->ote_ctail = entry->ote_chead;
647 entry->ote_mbsize -= msgsize(mp);
648 if (entry->ote_chead != NULL)
649 queue = B_TRUE;
650 mutex_exit(&entry->ote_lock);
651 if (queue == B_TRUE)
652 overlay_target_queue(entry);
653 freemsg(mp);
654 goto again;
655 }
656
657 /*
658 * TODO: If VL3 request,
659 * set otl->otl_l3req
660 * Fill in otl_{src,dst}ip
661 * Else
662 * clear otl->otl_l3req
663 */
664 otl->otl_dlid = entry->ote_odd->odd_linkid;
665 otl->otl_reqid = (uintptr_t)entry;
666 otl->otl_varpdid = entry->ote_ott->ott_id;
667 otl->otl_vnetid = entry->ote_odd->odd_vid;
668
669 otl->otl_hdrsize = mhi.mhi_hdrsize;
670 otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
671 bcopy(mhi.mhi_daddr, otl->otl_addru.otlu_l2.otl2_dstaddr, ETHERADDRL);
672 bcopy(mhi.mhi_saddr, otl->otl_addru.otlu_l2.otl2_srcaddr, ETHERADDRL);
673 otl->otl_addru.otlu_l2.otl2_dsttype = mhi.mhi_dsttype;
674 otl->otl_addru.otlu_l2.otl2_sap = mhi.mhi_bindsap;
675 otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
676 mutex_exit(&entry->ote_lock);
677
678 mutex_enter(&thdl->oth_lock);
679 list_insert_tail(&thdl->oth_outstanding, entry);
680 mutex_exit(&thdl->oth_lock);
681
682 return (0);
683 }
684
685 static int
686 overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
687 {
688 const overlay_targ_resp_t *otr = arg;
689 overlay_target_entry_t *entry;
690 mblk_t *mp;
691
692 mutex_enter(&thdl->oth_lock);
693 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
694 entry = list_next(&thdl->oth_outstanding, entry)) {
695 if ((uintptr_t)entry == otr->otr_reqid)
696 break;
697 }
698
699 if (entry == NULL) {
700 mutex_exit(&thdl->oth_lock);
701 return (EINVAL);
702 }
703 list_remove(&thdl->oth_outstanding, entry);
704 mutex_exit(&thdl->oth_lock);
705
706 mutex_enter(&entry->ote_lock);
707 bcopy(&otr->otr_answer, &entry->ote_dest,
708 sizeof (overlay_target_point_t));
709 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
710 entry->ote_flags |= OVERLAY_ENTRY_F_VALID;
711 mp = entry->ote_chead;
712 entry->ote_chead = NULL;
713 entry->ote_ctail = NULL;
714 entry->ote_mbsize = 0;
715 entry->ote_vtime = gethrtime();
716 mutex_exit(&entry->ote_lock);
717
718 /*
719 * For now do an in-situ drain.
720 *
721 * TODO: overlay_m_tx() will need to perform remote fabric attachment
722 * checks, which may leave mblk_t's left in the msg chain for
723 * mblk_t's whose connectivity with the target entry are unknown.
724 * This will then need to deal with the leftovers.
725 */
726 mp = overlay_m_tx(entry->ote_odd, mp);
727 freemsgchain(mp);
728
729 mutex_enter(&entry->ote_ott->ott_lock);
730 entry->ote_ott->ott_ocount--;
1113 return (ENXIO);
1114 }
1115 ott = odd->odd_target;
1116 if (ott->ott_mode != OVERLAY_TARGET_POINT &&
1117 ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1118 mutex_exit(&odd->odd_lock);
1119 overlay_hold_rele(odd);
1120 return (ENOTSUP);
1121 }
1122 mutex_enter(&ott->ott_lock);
1123 mutex_exit(&odd->odd_lock);
1124
1125 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
1126 otc->otc_entry.otce_flags = 0;
1127 bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest,
1128 sizeof (overlay_target_point_t));
1129 } else {
1130 overlay_target_entry_t *ote;
1131 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1132 otc->otc_entry.otce_mac);
1133 if (ote != NULL) {
1134 mutex_enter(&ote->ote_lock);
1135 if ((ote->ote_flags &
1136 OVERLAY_ENTRY_F_VALID_MASK) != 0) {
1137 if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
1138 otc->otc_entry.otce_flags =
1139 OVERLAY_TARGET_CACHE_DROP;
1140 } else {
1141 otc->otc_entry.otce_flags = 0;
1142 bcopy(&ote->ote_dest,
1143 &otc->otc_entry.otce_dest,
1144 sizeof (overlay_target_point_t));
1145 }
1146 ret = 0;
1147 } else {
1148 ret = ENOENT;
1149 }
1150 mutex_exit(&ote->ote_lock);
1151 } else {
1152 ret = ENOENT;
1153 }
1154 }
1155
1156 mutex_exit(&ott->ott_lock);
1157 overlay_hold_rele(odd);
1158
1159 return (ret);
1160 }
1161
1162 /* ARGSUSED */
1163 static int
1164 overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg)
1165 {
1166 overlay_dev_t *odd;
1167 overlay_target_t *ott;
1168 overlay_target_entry_t *ote;
1169 overlay_targ_cache_t *otc = arg;
1170 mblk_t *mp = NULL;
1171
1172 if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP)
1173 return (EINVAL);
1174
1175 odd = overlay_hold_by_dlid(otc->otc_linkid);
1176 if (odd == NULL)
1177 return (ENOENT);
1178
1179 mutex_enter(&odd->odd_lock);
1180 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1181 mutex_exit(&odd->odd_lock);
1182 overlay_hold_rele(odd);
1183 return (ENXIO);
1184 }
1185 ott = odd->odd_target;
1186 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1187 mutex_exit(&odd->odd_lock);
1188 overlay_hold_rele(odd);
1189 return (ENOTSUP);
1190 }
1191 mutex_enter(&ott->ott_lock);
1192 mutex_exit(&odd->odd_lock);
1193
1194 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1195 otc->otc_entry.otce_mac);
1196 if (ote == NULL) {
1197 ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP);
1198 bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL);
1199 ote->ote_chead = ote->ote_ctail = NULL;
1200 ote->ote_mbsize = 0;
1201 ote->ote_ott = ott;
1202 ote->ote_odd = odd;
1203 mutex_enter(&ote->ote_lock);
1204 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote);
1205 avl_add(&ott->ott_u.ott_dyn.ott_tree, ote);
1206 } else {
1207 mutex_enter(&ote->ote_lock);
1208 }
1209
1210 if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
1211 ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
1212 } else {
1213 ote->ote_flags |= OVERLAY_ENTRY_F_VALID;
1214 bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
1215 sizeof (overlay_target_point_t));
1216 mp = ote->ote_chead;
1217 ote->ote_chead = NULL;
1218 ote->ote_ctail = NULL;
1219 ote->ote_mbsize = 0;
1220 ote->ote_vtime = gethrtime();
1221 }
1222
1223 mutex_exit(&ote->ote_lock);
1224 mutex_exit(&ott->ott_lock);
1225
1226 if (mp != NULL) {
1227 mp = overlay_m_tx(ote->ote_odd, mp);
1228 freemsgchain(mp);
1229 }
1230
1231 overlay_hold_rele(odd);
1232
1233 return (0);
|
63 list_node_t oth_link; /* overlay_target_lock */
64 kmutex_t oth_lock;
65 list_t oth_outstanding; /* oth_lock */
66 } overlay_target_hdl_t;
67
68 typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int);
69 typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *);
70 typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int);
71
72 typedef struct overaly_target_ioctl {
73 int oti_cmd; /* ioctl id */
74 boolean_t oti_write; /* ioctl requires FWRITE */
75 boolean_t oti_ncopyout; /* copyout data? */
76 overlay_target_copyin_f oti_copyin; /* copyin func */
77 overlay_target_ioctl_f oti_func; /* function to call */
78 overlay_target_copyout_f oti_copyout; /* copyin func */
79 size_t oti_size; /* size of user level structure */
80 } overlay_target_ioctl_t;
81
82 static kmem_cache_t *overlay_target_cache;
83 kmem_cache_t *overlay_entry_cache;
84 static id_space_t *overlay_thdl_idspace;
85 static void *overlay_thdl_state;
86
87 /*
88 * When we support overlay devices in the NGZ, then all of these need to become
89 * zone aware, by plugging into the netstack engine and becoming per-netstack
90 * data.
91 */
92 static list_t overlay_thdl_list;
93 static kmutex_t overlay_target_lock;
94 static kcondvar_t overlay_target_condvar;
95 static list_t overlay_target_list;
96 static boolean_t overlay_target_excl;
97
98 /*
99 * Outstanding data per hash table entry.
100 */
101 int overlay_ent_size = 128 * 1024;
102
103 /* ARGSUSED */
104 static int
105 overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
106 {
107 overlay_target_t *ott = buf;
108
109 mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL);
110 cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL);
111 return (0);
112 }
113
114 /* ARGSUSED */
115 static void
116 overlay_target_cache_destructor(void *buf, void *arg)
117 {
118 overlay_target_t *ott = buf;
119
120 cv_destroy(&ott->ott_cond);
121 mutex_destroy(&ott->ott_lock);
124 /* ARGSUSED */
125 static int
126 overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs)
127 {
128 overlay_target_entry_t *ote = buf;
129
130 bzero(ote, sizeof (overlay_target_entry_t));
131 mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL);
132 return (0);
133 }
134
135 /* ARGSUSED */
136 static void
137 overlay_entry_cache_destructor(void *buf, void *arg)
138 {
139 overlay_target_entry_t *ote = buf;
140
141 mutex_destroy(&ote->ote_lock);
142 }
143
144 static uint64_t
145 overlay_mac_hash(const void *v)
146 {
147 uint32_t crc;
148 CRC32(crc, v, ETHERADDRL, -1U, crc32_table);
149 return (crc);
150 }
151
152 static int
153 overlay_mac_cmp(const void *a, const void *b)
154 {
155 return (bcmp(a, b, ETHERADDRL));
156 }
157
158 static uint64_t
159 overlay_vl3_hash(const void *v)
160 {
161 const overlay_target_entry_t *ote = v;
162 uint32_t crc;
163
164 CRC32(crc, &ote->ote_ip, sizeof (ote->ote_ip), -1U, crc32_table);
165 CRC32(crc, &ote->ote_fab, sizeof (ote->ote_fab), crc, crc32_table);
166 return (crc);
167 }
168
169 static int
170 overlay_vl3_cmp(const void *a, const void *b)
171 {
172 const overlay_target_entry_t *l = a;
173 const overlay_target_entry_t *r = b;
174
175 if (l->ote_fab != r->ote_fab ||
176 bcmp(&l->ote_ip, &r->ote_ip, sizeof (struct in6_addr)) != 0)
177 return (1);
178 return (0);
179 }
180
181 static int
182 overlay_vl3_avl(const void *a, const void *b)
183 {
184 const overlay_target_entry_t *l = a;
185 const overlay_target_entry_t *r = b;
186
187 if (l->ote_fab < r->ote_fab)
188 return (-1);
189 if (l->ote_fab > r->ote_fab)
190 return (1);
191 return (memcmp(&l->ote_ip, &r->ote_ip, sizeof (struct in6_addr)));
192 }
193
194 /* ARGSUSED */
195 void
196 overlay_target_entry_null_dtor(void *arg)
197 {
198 }
199
200 /* ARGSUSED */
201 void
202 overlay_target_entry_dtor(void *arg)
203 {
204 overlay_target_entry_t *ote = arg;
205
206 ASSERT3U(ote->ote_refcnt, ==, 0);
207
208 ote->ote_flags = 0;
209 bzero(ote->ote_addr, ETHERADDRL);
210 bzero(&ote->ote_ip, sizeof (ote->ote_ip));
211 ote->ote_ott = NULL;
212 ote->ote_odd = NULL;
213 ote->ote_fab = NULL;
214 freemsgchain(ote->ote_chead);
215 ote->ote_chead = ote->ote_ctail = NULL;
216 ote->ote_mbsize = 0;
217 ote->ote_vtime = 0;
218 kmem_cache_free(overlay_entry_cache, ote);
219 }
220
221 static int
222 overlay_mac_avl(const void *a, const void *b)
223 {
224 int i;
225 const overlay_target_entry_t *l, *r;
226 l = a;
227 r = b;
228
229 for (i = 0; i < ETHERADDRL; i++) {
230 if (l->ote_addr[i] > r->ote_addr[i])
231 return (1);
232 else if (l->ote_addr[i] < r->ote_addr[i])
233 return (-1);
263 overlay_target_fini(void)
264 {
265 id_space_destroy(overlay_thdl_idspace);
266 list_destroy(&overlay_thdl_list);
267 list_destroy(&overlay_target_list);
268 cv_destroy(&overlay_target_condvar);
269 mutex_destroy(&overlay_target_lock);
270 kmem_cache_destroy(overlay_entry_cache);
271 kmem_cache_destroy(overlay_target_cache);
272 ddi_soft_state_fini(&overlay_thdl_state);
273 }
274
275 void
276 overlay_target_free(overlay_dev_t *odd)
277 {
278 if (odd->odd_target == NULL)
279 return;
280
281 if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
282 refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
283 refhash_t *r3p = odd->odd_target->ott_u.ott_dyn.ott_l3dhash;
284 avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
285 avl_tree_t *a3p = &odd->odd_target->ott_u.ott_dyn.ott_l3tree;
286 overlay_target_entry_t *ote;
287
288 /*
289 * Our AVL tree and hashtable contain the same elements,
290 * therefore we should just remove it from the tree, but then
291 * delete the entries when we remove them from the hash table
292 * (which happens through the refhash dtor).
293 */
294 while ((ote = avl_first(ap)) != NULL) {
295 avl_remove(ap, ote);
296 OVERLAY_TARG_ENTRY_REFRELE(ote);
297 }
298 avl_destroy(ap);
299
300 while ((ote = avl_first(a3p)) != NULL) {
301 avl_remove(a3p, ote);
302 OVERLAY_TARG_ENTRY_REFRELE(ote);
303 }
304 avl_destroy(a3p);
305
306 for (ote = refhash_first(rp); ote != NULL;
307 ote = refhash_next(rp, ote)) {
308 refhash_remove(rp, ote);
309 OVERLAY_TARG_ENTRY_REFRELE(ote);
310 }
311 refhash_destroy(rp);
312
313 for (ote = refhash_first(r3p); ote != NULL;
314 ote = refhash_next(r3p, ote)) {
315 refhash_remove(r3p, ote);
316 OVERLAY_TARG_ENTRY_REFRELE(ote);
317 }
318 refhash_destroy(r3p);
319 }
320
321 ASSERT(odd->odd_target->ott_ocount == 0);
322 bzero(&odd->odd_target->ott_u, sizeof (odd->odd_target->ott_u));
323 kmem_cache_free(overlay_target_cache, odd->odd_target);
324 odd->odd_target = NULL;
325 }
326
327 int
328 overlay_target_busy()
329 {
330 int ret;
331
332 mutex_enter(&overlay_target_lock);
333 ret = !list_is_empty(&overlay_thdl_list);
334 mutex_exit(&overlay_target_lock);
335
336 return (ret);
337 }
338
339 void
340 overlay_target_queue(overlay_target_entry_t *entry)
341 {
342 mutex_enter(&overlay_target_lock);
343 mutex_enter(&entry->ote_ott->ott_lock);
344 if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
345 mutex_exit(&entry->ote_ott->ott_lock);
346 mutex_exit(&overlay_target_lock);
347 return;
348 }
349 entry->ote_ott->ott_ocount++;
350 mutex_exit(&entry->ote_ott->ott_lock);
351 list_insert_tail(&overlay_target_list, entry);
352 cv_signal(&overlay_target_condvar);
353 mutex_exit(&overlay_target_lock);
354 }
355
356 void
357 overlay_target_quiesce(overlay_target_t *ott)
358 {
359 if (ott == NULL)
360 return;
361 mutex_enter(&ott->ott_lock);
362 ott->ott_flags |= OVERLAY_T_TEARDOWN;
363 while (ott->ott_ocount != 0)
364 cv_wait(&ott->ott_cond, &ott->ott_lock);
365 mutex_exit(&ott->ott_lock);
366 }
367
368 /*
369 * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
370 * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
371 * this time, say for NVGRE, we drop all packets that match this.
372 */
373 int
374 overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
375 socklen_t *slenp, uint64_t *vidp)
376 {
377 int ret;
378 struct sockaddr_in6 *v6;
379 overlay_target_t *ott;
380 mac_header_info_t mhi;
381 overlay_target_entry_t *entry;
382
383 ASSERT(odd->odd_target != NULL);
384
385 *vidp = odd->odd_vid;
386
387 /*
388 * At this point, the overlay device is in a mux which means that it's
389 * been activated. At this point, parts of the target, such as the mode
390 * and the destination are now read-only and we don't have to worry
391 * about synchronization for them.
392 */
393 ott = odd->odd_target;
394 if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
395 return (OVERLAY_TARGET_DROP);
396
397 v6 = (struct sockaddr_in6 *)sock;
398 bzero(v6, sizeof (struct sockaddr_in6));
399 v6->sin6_family = AF_INET6;
400
401 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
402 mutex_enter(&ott->ott_lock);
403 bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr,
404 sizeof (struct in6_addr));
405 v6->sin6_port = htons(ott->ott_u.ott_point.otp_port);
406 mutex_exit(&ott->ott_lock);
407 *slenp = sizeof (struct sockaddr_in6);
408
409 return (OVERLAY_TARGET_OK);
410 }
411
412 ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
413
414 /*
415 * VL2 -> UL3 lookups only need the destination VL2 mac address,
416 * however, if we end up having to route the packet, we will need
417 * the source vlan as part of the destination selection.
418 */
419 if (mac_vlan_header_info(odd->odd_mh, mp, &mhi) != 0)
420 return (OVERLAY_TARGET_DROP);
421
422 mutex_enter(&ott->ott_lock);
423 entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
424 mhi.mhi_daddr);
425 if (entry == NULL) {
426 entry = kmem_cache_alloc(overlay_entry_cache,
427 KM_NOSLEEP | KM_NORMALPRI);
428 if (entry == NULL) {
429 mutex_exit(&ott->ott_lock);
430 return (OVERLAY_TARGET_DROP);
431 }
432 bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
433 entry->ote_chead = entry->ote_ctail = mp;
434 entry->ote_mbsize = msgsize(mp);
435 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
436 entry->ote_ott = ott;
437 entry->ote_odd = odd;
438
439 OVERLAY_TARG_ENTRY_REFHOLD(entry);
440 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
441
442 OVERLAY_TARG_ENTRY_REFHOLD(entry);
443 avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
444
445 mutex_exit(&ott->ott_lock);
446 overlay_target_queue(entry);
447 return (OVERLAY_TARGET_ASYNC);
448 }
449 OVERLAY_TARG_ENTRY_REFHOLD(entry);
450 mutex_exit(&ott->ott_lock);
451
452 mutex_enter(&entry->ote_lock);
453 if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
454 ret = OVERLAY_TARGET_DROP;
455 } else if (entry->ote_flags & OVERLAY_ENTRY_F_ROUTER) {
456 ret = overlay_route_lookup(odd, mp, &mhi, sock, slenp, vidp);
457 } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
458 bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
459 sizeof (struct in6_addr));
460 v6->sin6_port = htons(entry->ote_dest.otp_port);
461 *slenp = sizeof (struct sockaddr_in6);
462 ret = OVERLAY_TARGET_OK;
463 } else {
464 size_t mlen = msgsize(mp);
465
466 if (mlen + entry->ote_mbsize > overlay_ent_size) {
467 ret = OVERLAY_TARGET_DROP;
468 } else {
469 if (entry->ote_ctail != NULL) {
470 ASSERT(entry->ote_ctail->b_next ==
471 NULL);
472 entry->ote_ctail->b_next = mp;
473 entry->ote_ctail = mp;
474 } else {
475 entry->ote_chead = mp;
476 entry->ote_ctail = mp;
477 }
478 entry->ote_mbsize += mlen;
479 if ((entry->ote_flags &
480 OVERLAY_ENTRY_F_PENDING) == 0) {
481 entry->ote_flags |=
482 OVERLAY_ENTRY_F_PENDING;
483 overlay_target_queue(entry);
484 }
485 ret = OVERLAY_TARGET_ASYNC;
486 }
487 }
488 mutex_exit(&entry->ote_lock);
489
490 mutex_enter(&ott->ott_lock);
491 OVERLAY_TARG_ENTRY_REFRELE(entry);
492 mutex_exit(&ott->ott_lock);
493
494 return (ret);
495 }
496
497 /* ARGSUSED */
498 static int
499 overlay_target_info(overlay_target_hdl_t *thdl, void *arg)
500 {
501 overlay_dev_t *odd;
502 overlay_targ_info_t *oti = arg;
503
504 odd = overlay_hold_by_dlid(oti->oti_linkid);
505 if (odd == NULL)
506 return (ENOENT);
507
508 mutex_enter(&odd->odd_lock);
509 oti->oti_flags = 0;
510 oti->oti_needs = odd->odd_plugin->ovp_dest;
511 if (odd->odd_flags & OVERLAY_F_DEGRADED)
512 oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED;
513 if (odd->odd_flags & OVERLAY_F_ACTIVATED)
514 oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE;
515 oti->oti_vnetid = odd->odd_vid;
516 oti->oti_dcid = odd->odd_dcid;
517 mutex_exit(&odd->odd_lock);
518 overlay_hold_rele(odd);
519 return (0);
520 }
521
522 /* ARGSUSED */
523 static int
524 overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
525 {
526 overlay_dev_t *odd;
527 overlay_target_t *ott;
528 overlay_targ_associate_t *ota = arg;
529 overlay_router_t *ort;
530
531 odd = overlay_hold_by_dlid(ota->ota_linkid);
532 if (odd == NULL)
533 return (ENOENT);
534
535 if (ota->ota_id == 0) {
536 overlay_hold_rele(odd);
537 return (EINVAL);
538 }
539
540 if (ota->ota_mode != OVERLAY_TARGET_POINT &&
541 ota->ota_mode != OVERLAY_TARGET_DYNAMIC) {
542 overlay_hold_rele(odd);
543 return (EINVAL);
544 }
545
546 if (ota->ota_provides != odd->odd_plugin->ovp_dest) {
547 overlay_hold_rele(odd);
548 return (EINVAL);
549 }
562 if (ota->ota_point.otp_port == 0) {
563 overlay_hold_rele(odd);
564 return (EINVAL);
565 }
566 }
567 }
568
569 ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP);
570 ott->ott_flags = 0;
571 ott->ott_ocount = 0;
572 ott->ott_mode = ota->ota_mode;
573 ott->ott_dest = ota->ota_provides;
574 ott->ott_id = ota->ota_id;
575
576 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
577 bcopy(&ota->ota_point, &ott->ott_u.ott_point,
578 sizeof (overlay_target_point_t));
579 } else {
580 ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
581 overlay_mac_hash, overlay_mac_cmp,
582 overlay_target_entry_null_dtor,
583 sizeof (overlay_target_entry_t),
584 offsetof(overlay_target_entry_t, ote_reflink),
585 offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
586 ott->ott_u.ott_dyn.ott_l3dhash = refhash_create(OVERLAY_HSIZE,
587 overlay_vl3_hash, overlay_vl3_cmp,
588 overlay_target_entry_null_dtor,
589 sizeof (overlay_target_entry_t),
590 offsetof(overlay_target_entry_t, ote_l3_reflink), 0,
591 KM_SLEEP);
592 avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
593 sizeof (overlay_target_entry_t),
594 offsetof(overlay_target_entry_t, ote_avllink));
595 avl_create(&ott->ott_u.ott_dyn.ott_l3tree, overlay_vl3_avl,
596 sizeof (overlay_target_entry_t),
597 offsetof(overlay_target_entry_t, ote_l3_avllink));
598
599 ort = kmem_zalloc(sizeof (*ort), KM_SLEEP);
600 mutex_init(&ort->otr_lock, NULL, MUTEX_DRIVER, NULL);
601 list_create(&ort->otr_tables, sizeof (overlay_route_table_t),
602 offsetof(overlay_route_table_t, ort_link));
603 avl_create(&ort->otr_tree, overlay_fabric_avl,
604 sizeof (overlay_fabric_entry_t),
605 offsetof(overlay_fabric_entry_t, ofe_avllink));
606 }
607 mutex_enter(&odd->odd_lock);
608 if (odd->odd_flags & OVERLAY_F_VARPD) {
609 mutex_exit(&odd->odd_lock);
610 kmem_cache_free(overlay_target_cache, ott);
611 overlay_hold_rele(odd);
612 return (EEXIST);
613 }
614
615 odd->odd_flags |= OVERLAY_F_VARPD;
616 odd->odd_target = ott;
617 mutex_exit(&odd->odd_lock);
618
619 overlay_hold_rele(odd);
620
621 return (0);
622 }
623
624
625 /* ARGSUSED */
626 static int
627 overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg)
628 {
629 overlay_dev_t *odd;
630 overlay_targ_degrade_t *otd = arg;
631
632 odd = overlay_hold_by_dlid(otd->otd_linkid);
633 if (odd == NULL)
634 return (ENOENT);
635
636 overlay_fm_degrade(odd, otd->otd_buf);
637 overlay_hold_rele(odd);
638 return (0);
639 }
640
662 overlay_targ_id_t *otid = arg;
663
664 odd = overlay_hold_by_dlid(otid->otid_linkid);
665 if (odd == NULL)
666 return (ENOENT);
667
668 mutex_enter(&odd->odd_lock);
669 odd->odd_flags &= ~OVERLAY_F_VARPD;
670 mutex_exit(&odd->odd_lock);
671
672 overlay_hold_rele(odd);
673 return (0);
674
675 }
676
677 static int
678 overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
679 {
680 overlay_targ_lookup_t *otl = arg;
681 overlay_target_entry_t *entry;
682 void *src, *dst;
683 clock_t ret, timeout;
684 mac_header_info_t mhi;
685 timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
686 again:
687 mutex_enter(&overlay_target_lock);
688 while (list_is_empty(&overlay_target_list)) {
689 ret = cv_timedwait(&overlay_target_condvar,
690 &overlay_target_lock, timeout);
691 if (ret == -1) {
692 mutex_exit(&overlay_target_lock);
693 return (ETIME);
694 }
695 }
696 entry = list_remove_head(&overlay_target_list);
697 mutex_exit(&overlay_target_lock);
698 mutex_enter(&entry->ote_lock);
699 if (entry->ote_flags &
700 (OVERLAY_ENTRY_F_PENDING | OVERLAY_ENTRY_F_VL3_PENDING)) {
701 ASSERT(entry->ote_chead == NULL);
702 mutex_exit(&entry->ote_lock);
703 goto again;
704 }
705 ASSERT(entry->ote_chead != NULL);
706
707
708 otl->otl_l3req = (entry->ote_flags & OVERLAY_ENTRY_F_VL3_PENDING) ?
709 B_TRUE : B_FALSE;
710
711 if (otl->otl_l3req) {
712 src = &otl->otl_addru.otlu_l3.otl3_srcip;
713 dst = &otl->otl_addru.otlu_l3.otl3_dstip;
714 } else {
715 src = &otl->otl_addru.otlu_l2.otl2_srcaddr;
716 dst = &otl->otl_addru.otlu_l2.otl2_dstaddr;
717 }
718
719 /*
720 * If we have a bogon that doesn't have a valid mac header, or an
721 * invalid IP header for IP requests, drop it and try again.
722 */
723 if ((mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
724 &mhi) != 0) ||
725 (otl->otl_l3req && overlay_mblk_vl3ip(entry->ote_chead, src,
726 dst) != 0)) {
727 boolean_t queue = B_FALSE;
728 mblk_t *mp = entry->ote_chead;
729 entry->ote_chead = mp->b_next;
730 mp->b_next = NULL;
731 if (entry->ote_ctail == mp)
732 entry->ote_ctail = entry->ote_chead;
733 entry->ote_mbsize -= msgsize(mp);
734 if (entry->ote_chead != NULL)
735 queue = B_TRUE;
736 mutex_exit(&entry->ote_lock);
737 if (queue == B_TRUE)
738 overlay_target_queue(entry);
739 freemsg(mp);
740 goto again;
741 }
742
743 otl->otl_dlid = entry->ote_odd->odd_linkid;
744 otl->otl_reqid = (uintptr_t)entry;
745 otl->otl_varpdid = entry->ote_ott->ott_id;
746 otl->otl_vnetid = entry->ote_odd->odd_vid;
747
748 otl->otl_hdrsize = mhi.mhi_hdrsize;
749 otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
750 otl->otl_addru.otlu_l2.otl2_dsttype = mhi.mhi_dsttype;
751 otl->otl_addru.otlu_l2.otl2_sap = mhi.mhi_bindsap;
752 otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
753
754 /*
755 * The overlay_mblk_vl3ip() call above fills in dst & src for
756 * VL3->UL3 requests, so only need to care about VL2->UL3 here.
757 */
758 if (!otl->otl_l3req) {
759 bcopy(mhi.mhi_daddr, dst, ETHERADDRL);
760 bcopy(mhi.mhi_saddr, src, ETHERADDRL);
761 }
762 mutex_exit(&entry->ote_lock);
763
764 mutex_enter(&thdl->oth_lock);
765 list_insert_tail(&thdl->oth_outstanding, entry);
766 mutex_exit(&thdl->oth_lock);
767
768 return (0);
769 }
770
771 static int
772 overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
773 {
774 const overlay_targ_resp_t *otr = arg;
775 overlay_target_entry_t *entry;
776 mblk_t *mp;
777 boolean_t is_router = B_FALSE;
778
779 /*
780 * If we ever support a protocol that uses MAC addresses for the UL
781 * destination addr, we probably should expand this to check that
782 * all of otr is zero.
783 */
784 if (IN6_IS_ADDR_UNSPECIFIED(&otr->otr_answer.otp_ip) &&
785 otr->otr_answer.otp_port == 0)
786 is_router = B_TRUE;
787
788 mutex_enter(&thdl->oth_lock);
789 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
790 entry = list_next(&thdl->oth_outstanding, entry)) {
791 if ((uintptr_t)entry == otr->otr_reqid)
792 break;
793 }
794
795 if (entry == NULL) {
796 mutex_exit(&thdl->oth_lock);
797 return (EINVAL);
798 }
799 list_remove(&thdl->oth_outstanding, entry);
800 mutex_exit(&thdl->oth_lock);
801
802 mutex_enter(&entry->ote_lock);
803 bcopy(&otr->otr_answer, &entry->ote_dest,
804 sizeof (overlay_target_point_t));
805 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
806 entry->ote_flags |= OVERLAY_ENTRY_F_VALID;
807 if (is_router)
808 entry->ote_flags |= OVERLAY_ENTRY_F_ROUTER;
809 mp = entry->ote_chead;
810 entry->ote_chead = NULL;
811 entry->ote_ctail = NULL;
812 entry->ote_mbsize = 0;
813 entry->ote_vtime = gethrtime();
814 mutex_exit(&entry->ote_lock);
815
816 /*
817 * For now do an in-situ drain.
818 *
819 * TODO: overlay_m_tx() will need to perform remote fabric attachment
820 * checks, which may leave mblk_t's left in the msg chain for
821 * mblk_t's whose connectivity with the target entry are unknown.
822 * This will then need to deal with the leftovers.
823 */
824 mp = overlay_m_tx(entry->ote_odd, mp);
825 freemsgchain(mp);
826
827 mutex_enter(&entry->ote_ott->ott_lock);
828 entry->ote_ott->ott_ocount--;
1211 return (ENXIO);
1212 }
1213 ott = odd->odd_target;
1214 if (ott->ott_mode != OVERLAY_TARGET_POINT &&
1215 ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1216 mutex_exit(&odd->odd_lock);
1217 overlay_hold_rele(odd);
1218 return (ENOTSUP);
1219 }
1220 mutex_enter(&ott->ott_lock);
1221 mutex_exit(&odd->odd_lock);
1222
1223 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
1224 otc->otc_entry.otce_flags = 0;
1225 bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest,
1226 sizeof (overlay_target_point_t));
1227 } else {
1228 overlay_target_entry_t *ote;
1229 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1230 otc->otc_entry.otce_mac);
1231 if (ote == NULL) {
1232 ret = ENOENT;
1233 goto done;
1234 }
1235
1236 mutex_enter(&ote->ote_lock);
1237 if ((ote->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) != 0) {
1238 if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
1239 otc->otc_entry.otce_flags =
1240 OVERLAY_TARGET_CACHE_DROP;
1241 } else if (ote->ote_flags & OVERLAY_ENTRY_F_ROUTER) {
1242 otc->otc_entry.otce_flags =
1243 OVERLAY_TARGET_CACHE_ROUTER;
1244 } else {
1245 otc->otc_entry.otce_flags = 0;
1246 bcopy(&ote->ote_dest, &otc->otc_entry.otce_dest,
1247 sizeof (overlay_target_point_t));
1248 }
1249 ret = 0;
1250 } else {
1251 ret = ENOENT;
1252 }
1253 mutex_exit(&ote->ote_lock);
1254 }
1255
1256 done:
1257 mutex_exit(&ott->ott_lock);
1258 overlay_hold_rele(odd);
1259
1260 return (ret);
1261 }
1262
1263 /* ARGSUSED */
1264 static int
1265 overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg)
1266 {
1267 overlay_dev_t *odd;
1268 overlay_target_t *ott;
1269 overlay_target_entry_t *ote;
1270 overlay_targ_cache_t *otc = arg;
1271 mblk_t *mp = NULL;
1272
1273 if (otc->otc_entry.otce_flags &
1274 ~(OVERLAY_TARGET_CACHE_DROP | OVERLAY_TARGET_CACHE_ROUTER))
1275 return (EINVAL);
1276
1277 if (otc->otc_entry.otce_flags ==
1278 (OVERLAY_TARGET_CACHE_DROP | OVERLAY_TARGET_CACHE_ROUTER))
1279 return (EINVAL);
1280
1281 odd = overlay_hold_by_dlid(otc->otc_linkid);
1282 if (odd == NULL)
1283 return (ENOENT);
1284
1285 mutex_enter(&odd->odd_lock);
1286 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1287 mutex_exit(&odd->odd_lock);
1288 overlay_hold_rele(odd);
1289 return (ENXIO);
1290 }
1291 ott = odd->odd_target;
1292 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1293 mutex_exit(&odd->odd_lock);
1294 overlay_hold_rele(odd);
1295 return (ENOTSUP);
1296 }
1297 mutex_enter(&ott->ott_lock);
1298 mutex_exit(&odd->odd_lock);
1299
1300 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1301 otc->otc_entry.otce_mac);
1302 if (ote == NULL) {
1303 ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP);
1304 bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL);
1305 ote->ote_chead = ote->ote_ctail = NULL;
1306 ote->ote_mbsize = 0;
1307 ote->ote_ott = ott;
1308 ote->ote_odd = odd;
1309 mutex_enter(&ote->ote_lock);
1310 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote);
1311 avl_add(&ott->ott_u.ott_dyn.ott_tree, ote);
1312 } else {
1313 mutex_enter(&ote->ote_lock);
1314 }
1315
1316 if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
1317 ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
1318 } else {
1319 ote->ote_flags |= OVERLAY_ENTRY_F_VALID;
1320 if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_ROUTER)
1321 ote->ote_flags |= OVERLAY_ENTRY_F_ROUTER;
1322 bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
1323 sizeof (overlay_target_point_t));
1324 mp = ote->ote_chead;
1325 ote->ote_chead = NULL;
1326 ote->ote_ctail = NULL;
1327 ote->ote_mbsize = 0;
1328 ote->ote_vtime = gethrtime();
1329 }
1330
1331 mutex_exit(&ote->ote_lock);
1332 mutex_exit(&ott->ott_lock);
1333
1334 if (mp != NULL) {
1335 mp = overlay_m_tx(ote->ote_odd, mp);
1336 freemsgchain(mp);
1337 }
1338
1339 overlay_hold_rele(odd);
1340
1341 return (0);
|