Print this page
Overlay fabric router


  63         list_node_t oth_link;           /* overlay_target_lock */
  64         kmutex_t oth_lock;
  65         list_t  oth_outstanding;        /* oth_lock */
  66 } overlay_target_hdl_t;
  67 
  68 typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int);
  69 typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *);
  70 typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int);
  71 
  72 typedef struct overaly_target_ioctl {
  73         int             oti_cmd;        /* ioctl id */
  74         boolean_t       oti_write;      /* ioctl requires FWRITE */
  75         boolean_t       oti_ncopyout;   /* copyout data? */
  76         overlay_target_copyin_f oti_copyin;     /* copyin func */
  77         overlay_target_ioctl_f oti_func; /* function to call */
  78         overlay_target_copyout_f oti_copyout;   /* copyin func */
  79         size_t          oti_size;       /* size of user level structure */
  80 } overlay_target_ioctl_t;
  81 
  82 static kmem_cache_t *overlay_target_cache;
  83 static kmem_cache_t *overlay_entry_cache;
  84 static id_space_t *overlay_thdl_idspace;
  85 static void *overlay_thdl_state;
  86 
  87 /*
  88  * When we support overlay devices in the NGZ, then all of these need to become
  89  * zone aware, by plugging into the netstack engine and becoming per-netstack
  90  * data.
  91  */
  92 static list_t overlay_thdl_list;
  93 static kmutex_t overlay_target_lock;
  94 static kcondvar_t overlay_target_condvar;
  95 static list_t overlay_target_list;
  96 static boolean_t overlay_target_excl;
  97 
  98 /*
  99  * Outstanding data per hash table entry.
 100  */
 101 static int overlay_ent_size = 128 * 1024;
 102 
 103 /* ARGSUSED */
 104 static int
 105 overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
 106 {
 107         overlay_target_t *ott = buf;
 108 
 109         mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL);
 110         cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL);
 111         return (0);
 112 }
 113 
 114 /* ARGSUSED */
 115 static void
 116 overlay_target_cache_destructor(void *buf, void *arg)
 117 {
 118         overlay_target_t *ott = buf;
 119 
 120         cv_destroy(&ott->ott_cond);
 121         mutex_destroy(&ott->ott_lock);


 124 /* ARGSUSED */
 125 static int
 126 overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs)
 127 {
 128         overlay_target_entry_t *ote = buf;
 129 
 130         bzero(ote, sizeof (overlay_target_entry_t));
 131         mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL);
 132         return (0);
 133 }
 134 
 135 /* ARGSUSED */
 136 static void
 137 overlay_entry_cache_destructor(void *buf, void *arg)
 138 {
 139         overlay_target_entry_t *ote = buf;
 140 
 141         mutex_destroy(&ote->ote_lock);
 142 }
 143 
 144 /* TODO: we will need to modify these to hash/cmp DCID + MAC */
 145 
 146 static uint64_t
 147 overlay_mac_hash(const void *v)
 148 {
 149         uint32_t crc;
 150         CRC32(crc, v, ETHERADDRL, -1U, crc32_table);
 151         return (crc);
 152 }
 153 
 154 static int
 155 overlay_mac_cmp(const void *a, const void *b)
 156 {
 157         return (bcmp(a, b, ETHERADDRL));
 158 }
 159 




































 160 /* ARGSUSED */
 161 static void






 162 overlay_target_entry_dtor(void *arg)
 163 {
 164         overlay_target_entry_t *ote = arg;
 165 


 166         ote->ote_flags = 0;
 167         bzero(ote->ote_addr, ETHERADDRL);

 168         ote->ote_ott = NULL;
 169         ote->ote_odd = NULL;

 170         freemsgchain(ote->ote_chead);
 171         ote->ote_chead = ote->ote_ctail = NULL;
 172         ote->ote_mbsize = 0;
 173         ote->ote_vtime = 0;
 174         kmem_cache_free(overlay_entry_cache, ote);
 175 }
 176 
 177 static int
 178 overlay_mac_avl(const void *a, const void *b)
 179 {
 180         int i;
 181         const overlay_target_entry_t *l, *r;
 182         l = a;
 183         r = b;
 184 
 185         for (i = 0; i < ETHERADDRL; i++) {
 186                 if (l->ote_addr[i] > r->ote_addr[i])
 187                         return (1);
 188                 else if (l->ote_addr[i] < r->ote_addr[i])
 189                         return (-1);


 219 overlay_target_fini(void)
 220 {
 221         id_space_destroy(overlay_thdl_idspace);
 222         list_destroy(&overlay_thdl_list);
 223         list_destroy(&overlay_target_list);
 224         cv_destroy(&overlay_target_condvar);
 225         mutex_destroy(&overlay_target_lock);
 226         kmem_cache_destroy(overlay_entry_cache);
 227         kmem_cache_destroy(overlay_target_cache);
 228         ddi_soft_state_fini(&overlay_thdl_state);
 229 }
 230 
 231 void
 232 overlay_target_free(overlay_dev_t *odd)
 233 {
 234         if (odd->odd_target == NULL)
 235                 return;
 236 
 237         if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
 238                 refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;

 239                 avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;

 240                 overlay_target_entry_t *ote;
 241 
 242                 /* TODO: remove from L3 trees */
 243 
 244                 /*
 245                  * Our AVL tree and hashtable contain the same elements,
 246                  * therefore we should just remove it from the tree, but then
 247                  * delete the entries when we remove them from the hash table
 248                  * (which happens through the refhash dtor).
 249                  */
 250                 while ((ote = avl_first(ap)) != NULL)
 251                         avl_remove(ap, ote);
 252 

 253                 avl_destroy(ap);







 254                 for (ote = refhash_first(rp); ote != NULL;
 255                     ote = refhash_next(rp, ote)) {
 256                         refhash_remove(rp, ote);

 257                 }
 258                 refhash_destroy(rp);





 259         }


 260 
 261         ASSERT(odd->odd_target->ott_ocount == 0);

 262         kmem_cache_free(overlay_target_cache, odd->odd_target);

 263 }
 264 
 265 int
 266 overlay_target_busy()
 267 {
 268         int ret;
 269 
 270         mutex_enter(&overlay_target_lock);
 271         ret = !list_is_empty(&overlay_thdl_list);
 272         mutex_exit(&overlay_target_lock);
 273 
 274         return (ret);
 275 }
 276 
 277 static void
 278 overlay_target_queue(overlay_target_entry_t *entry)
 279 {
 280         mutex_enter(&overlay_target_lock);
 281         mutex_enter(&entry->ote_ott->ott_lock);
 282         if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
 283                 mutex_exit(&entry->ote_ott->ott_lock);
 284                 mutex_exit(&overlay_target_lock);
 285                 return;
 286         }
 287         entry->ote_ott->ott_ocount++;
 288         mutex_exit(&entry->ote_ott->ott_lock);
 289         list_insert_tail(&overlay_target_list, entry);
 290         cv_signal(&overlay_target_condvar);
 291         mutex_exit(&overlay_target_lock);
 292 }
 293 
 294 void
 295 overlay_target_quiesce(overlay_target_t *ott)
 296 {
 297         if (ott == NULL)
 298                 return;
 299         mutex_enter(&ott->ott_lock);
 300         ott->ott_flags |= OVERLAY_T_TEARDOWN;
 301         while (ott->ott_ocount != 0)
 302                 cv_wait(&ott->ott_cond, &ott->ott_lock);
 303         mutex_exit(&ott->ott_lock);
 304 }
 305 
 306 /*
 307  * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
 308  * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
 309  * this time, say for NVGRE, we drop all packets that mcuh this.
 310  *
 311  * XXX: It might be better to replace the 'sock' argument with
 312  * overlay_target_entry_t** and set it with the found entry in the case
 313  * of OVERLAY_TARGET_OK.
 314  */
 315 int
 316 overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
 317     socklen_t *slenp)
 318 {
 319         int ret;
 320         struct sockaddr_in6 *v6;
 321         overlay_target_t *ott;
 322         mac_header_info_t mhi;
 323         overlay_target_entry_t *entry;
 324 
 325         ASSERT(odd->odd_target != NULL);
 326 


 327         /*
 328          * At this point, the overlay device is in a mux which means that it's
 329          * been activated. At this point, parts of the target, such as the mode
 330          * and the destination are now read-only and we don't have to worry
 331          * about synchronization for them.
 332          */
 333         ott = odd->odd_target;
 334         if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
 335                 return (OVERLAY_TARGET_DROP);
 336 
 337         v6 = (struct sockaddr_in6 *)sock;
 338         bzero(v6, sizeof (struct sockaddr_in6));
 339         v6->sin6_family = AF_INET6;
 340 
 341         if (ott->ott_mode == OVERLAY_TARGET_POINT) {
 342                 mutex_enter(&ott->ott_lock);
 343                 bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr,
 344                     sizeof (struct in6_addr));
 345                 v6->sin6_port = htons(ott->ott_u.ott_point.otp_port);
 346                 mutex_exit(&ott->ott_lock);
 347                 *slenp = sizeof (struct sockaddr_in6);
 348 
 349                 return (OVERLAY_TARGET_OK);
 350         }
 351 
 352         ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
 353 
 354         /*
 355          * Note we only want the MAC address here, therefore we won't bother
 356          * using mac_vlan_header_info(). If any caller needs the vlan info at
 357          * this point, this should change to a call to mac_vlan_header_info().
 358          */
 359         if (mac_header_info(odd->odd_mh, mp, &mhi) != 0)
 360                 return (OVERLAY_TARGET_DROP);
 361 
 362         /*
 363          * TODO: compare mhi.mhi_daddr with odd->macaddr.
 364          * If match,
 365          *      get VL3 dest from mp
 366          *      lookup target using VL3 dest
 367          * otherwise,
 368          *      lookup target using VL2 dest (existing refhash_lookup() call
 369          *      below)
 370          */
 371         mutex_enter(&ott->ott_lock);
 372         entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
 373             mhi.mhi_daddr);
 374         if (entry == NULL) {
 375                 entry = kmem_cache_alloc(overlay_entry_cache,
 376                     KM_NOSLEEP | KM_NORMALPRI);
 377                 if (entry == NULL) {
 378                         mutex_exit(&ott->ott_lock);
 379                         return (OVERLAY_TARGET_DROP);
 380                 }
 381                 /*
 382                  * TODO: set entry->ote_dcid, if VL3 lookup, copy dst addr
 383                  * into entry->ote_ip.  Probably zero out the address we're
 384                  * not lookup up (VL2 or VL3) as well.
 385                  */
 386                 bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
 387                 entry->ote_chead = entry->ote_ctail = mp;
 388                 entry->ote_mbsize = msgsize(mp);
 389                 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
 390                 entry->ote_ott = ott;
 391                 entry->ote_odd = odd;


 392                 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);


 393                 avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);

 394                 mutex_exit(&ott->ott_lock);
 395                 overlay_target_queue(entry);
 396                 return (OVERLAY_TARGET_ASYNC);
 397         }
 398         refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry);
 399         mutex_exit(&ott->ott_lock);
 400 
 401         mutex_enter(&entry->ote_lock);
 402         if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
 403                 ret = OVERLAY_TARGET_DROP;


 404         } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
 405                 bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
 406                     sizeof (struct in6_addr));
 407                 v6->sin6_port = htons(entry->ote_dest.otp_port);
 408                 *slenp = sizeof (struct sockaddr_in6);
 409                 ret = OVERLAY_TARGET_OK;
 410         } else {
 411                 size_t mlen = msgsize(mp);
 412 
 413                 if (mlen + entry->ote_mbsize > overlay_ent_size) {
 414                         ret = OVERLAY_TARGET_DROP;
 415                 } else {
 416                         if (entry->ote_ctail != NULL) {
 417                                 ASSERT(entry->ote_ctail->b_next ==
 418                                     NULL);
 419                                 entry->ote_ctail->b_next = mp;
 420                                 entry->ote_ctail = mp;
 421                         } else {
 422                                 entry->ote_chead = mp;
 423                                 entry->ote_ctail = mp;
 424                         }
 425                         entry->ote_mbsize += mlen;
 426                         if ((entry->ote_flags &
 427                             OVERLAY_ENTRY_F_PENDING) == 0) {
 428                                 entry->ote_flags |=
 429                                     OVERLAY_ENTRY_F_PENDING;
 430                                 overlay_target_queue(entry);
 431                         }
 432                         ret = OVERLAY_TARGET_ASYNC;
 433                 }
 434         }
 435         mutex_exit(&entry->ote_lock);
 436 
 437         mutex_enter(&ott->ott_lock);
 438         refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry);
 439         mutex_exit(&ott->ott_lock);
 440 
 441         return (ret);
 442 }
 443 
 444 /* ARGSUSED */
 445 static int
 446 overlay_target_info(overlay_target_hdl_t *thdl, void *arg)
 447 {
 448         overlay_dev_t *odd;
 449         overlay_targ_info_t *oti = arg;
 450 
 451         odd = overlay_hold_by_dlid(oti->oti_linkid);
 452         if (odd == NULL)
 453                 return (ENOENT);
 454 
 455         mutex_enter(&odd->odd_lock);
 456         oti->oti_flags = 0;
 457         oti->oti_needs = odd->odd_plugin->ovp_dest;
 458         if (odd->odd_flags & OVERLAY_F_DEGRADED)
 459                 oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED;
 460         if (odd->odd_flags & OVERLAY_F_ACTIVATED)
 461                 oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE;
 462         oti->oti_vnetid = odd->odd_vid;
 463         oti->oti_dcid = odd->odd_dcid;
 464         mutex_exit(&odd->odd_lock);
 465         overlay_hold_rele(odd);
 466         return (0);
 467 }
 468 
 469 /* ARGSUSED */
 470 static int
 471 overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
 472 {
 473         overlay_dev_t *odd;
 474         overlay_target_t *ott;
 475         overlay_targ_associate_t *ota = arg;

 476 
 477         odd = overlay_hold_by_dlid(ota->ota_linkid);
 478         if (odd == NULL)
 479                 return (ENOENT);
 480 
 481         if (ota->ota_id == 0) {
 482                 overlay_hold_rele(odd);
 483                 return (EINVAL);
 484         }
 485 
 486         if (ota->ota_mode != OVERLAY_TARGET_POINT &&
 487             ota->ota_mode != OVERLAY_TARGET_DYNAMIC) {
 488                 overlay_hold_rele(odd);
 489                 return (EINVAL);
 490         }
 491 
 492         if (ota->ota_provides != odd->odd_plugin->ovp_dest) {
 493                 overlay_hold_rele(odd);
 494                 return (EINVAL);
 495         }


 508                         if (ota->ota_point.otp_port == 0) {
 509                                 overlay_hold_rele(odd);
 510                                 return (EINVAL);
 511                         }
 512                 }
 513         }
 514 
 515         ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP);
 516         ott->ott_flags = 0;
 517         ott->ott_ocount = 0;
 518         ott->ott_mode = ota->ota_mode;
 519         ott->ott_dest = ota->ota_provides;
 520         ott->ott_id = ota->ota_id;
 521 
 522         if (ott->ott_mode == OVERLAY_TARGET_POINT) {
 523                 bcopy(&ota->ota_point, &ott->ott_u.ott_point,
 524                     sizeof (overlay_target_point_t));
 525         } else {
 526                 ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
 527                     overlay_mac_hash, overlay_mac_cmp,
 528                     overlay_target_entry_dtor, sizeof (overlay_target_entry_t),

 529                     offsetof(overlay_target_entry_t, ote_reflink),
 530                     offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);






 531                 avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
 532                     sizeof (overlay_target_entry_t),
 533                     offsetof(overlay_target_entry_t, ote_avllink));











 534         }
 535         mutex_enter(&odd->odd_lock);
 536         if (odd->odd_flags & OVERLAY_F_VARPD) {
 537                 mutex_exit(&odd->odd_lock);
 538                 kmem_cache_free(overlay_target_cache, ott);
 539                 overlay_hold_rele(odd);
 540                 return (EEXIST);
 541         }
 542 
 543         odd->odd_flags |= OVERLAY_F_VARPD;
 544         odd->odd_target = ott;
 545         mutex_exit(&odd->odd_lock);
 546 
 547         overlay_hold_rele(odd);
 548 
 549 
 550         return (0);
 551 }
 552 
 553 
 554 /* ARGSUSED */
 555 static int
 556 overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg)
 557 {
 558         overlay_dev_t *odd;
 559         overlay_targ_degrade_t *otd = arg;
 560 
 561         odd = overlay_hold_by_dlid(otd->otd_linkid);
 562         if (odd == NULL)
 563                 return (ENOENT);
 564 
 565         overlay_fm_degrade(odd, otd->otd_buf);
 566         overlay_hold_rele(odd);
 567         return (0);
 568 }
 569 


 591         overlay_targ_id_t *otid = arg;
 592 
 593         odd = overlay_hold_by_dlid(otid->otid_linkid);
 594         if (odd == NULL)
 595                 return (ENOENT);
 596 
 597         mutex_enter(&odd->odd_lock);
 598         odd->odd_flags &= ~OVERLAY_F_VARPD;
 599         mutex_exit(&odd->odd_lock);
 600 
 601         overlay_hold_rele(odd);
 602         return (0);
 603 
 604 }
 605 
 606 static int
 607 overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
 608 {
 609         overlay_targ_lookup_t *otl = arg;
 610         overlay_target_entry_t *entry;

 611         clock_t ret, timeout;
 612         mac_header_info_t mhi;
 613 
 614         timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
 615 again:
 616         mutex_enter(&overlay_target_lock);
 617         while (list_is_empty(&overlay_target_list)) {
 618                 ret = cv_timedwait(&overlay_target_condvar,
 619                     &overlay_target_lock, timeout);
 620                 if (ret == -1) {
 621                         mutex_exit(&overlay_target_lock);
 622                         return (ETIME);
 623                 }
 624         }
 625         entry = list_remove_head(&overlay_target_list);
 626         mutex_exit(&overlay_target_lock);
 627         mutex_enter(&entry->ote_lock);
 628         if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {

 629                 ASSERT(entry->ote_chead == NULL);
 630                 mutex_exit(&entry->ote_lock);
 631                 goto again;
 632         }
 633         ASSERT(entry->ote_chead != NULL);
 634 












 635         /*
 636          * If we have a bogon that doesn't have a valid mac header, drop it and
 637          * try again.
 638          */
 639         if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
 640             &mhi) != 0) {


 641                 boolean_t queue = B_FALSE;
 642                 mblk_t *mp = entry->ote_chead;
 643                 entry->ote_chead = mp->b_next;
 644                 mp->b_next = NULL;
 645                 if (entry->ote_ctail == mp)
 646                         entry->ote_ctail = entry->ote_chead;
 647                 entry->ote_mbsize -= msgsize(mp);
 648                 if (entry->ote_chead != NULL)
 649                         queue = B_TRUE;
 650                 mutex_exit(&entry->ote_lock);
 651                 if (queue == B_TRUE)
 652                         overlay_target_queue(entry);
 653                 freemsg(mp);
 654                 goto again;
 655         }
 656 
 657         /*
 658          * TODO: If VL3 request,
 659          *      set otl->otl_l3req
 660          *      Fill in otl_{src,dst}ip
 661          * Else
 662          *      clear otl->otl_l3req
 663          */
 664         otl->otl_dlid = entry->ote_odd->odd_linkid;
 665         otl->otl_reqid = (uintptr_t)entry;
 666         otl->otl_varpdid = entry->ote_ott->ott_id;
 667         otl->otl_vnetid = entry->ote_odd->odd_vid;
 668 
 669         otl->otl_hdrsize = mhi.mhi_hdrsize;
 670         otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
 671         bcopy(mhi.mhi_daddr, otl->otl_addru.otlu_l2.otl2_dstaddr, ETHERADDRL);
 672         bcopy(mhi.mhi_saddr, otl->otl_addru.otlu_l2.otl2_srcaddr, ETHERADDRL);
 673         otl->otl_addru.otlu_l2.otl2_dsttype = mhi.mhi_dsttype;
 674         otl->otl_addru.otlu_l2.otl2_sap = mhi.mhi_bindsap;
 675         otl->otl_vlan = VLAN_ID(mhi.mhi_tci);









 676         mutex_exit(&entry->ote_lock);
 677 
 678         mutex_enter(&thdl->oth_lock);
 679         list_insert_tail(&thdl->oth_outstanding, entry);
 680         mutex_exit(&thdl->oth_lock);
 681 
 682         return (0);
 683 }
 684 
 685 static int
 686 overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
 687 {
 688         const overlay_targ_resp_t *otr = arg;
 689         overlay_target_entry_t *entry;
 690         mblk_t *mp;

 691 









 692         mutex_enter(&thdl->oth_lock);
 693         for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
 694             entry = list_next(&thdl->oth_outstanding, entry)) {
 695                 if ((uintptr_t)entry == otr->otr_reqid)
 696                         break;
 697         }
 698 
 699         if (entry == NULL) {
 700                 mutex_exit(&thdl->oth_lock);
 701                 return (EINVAL);
 702         }
 703         list_remove(&thdl->oth_outstanding, entry);
 704         mutex_exit(&thdl->oth_lock);
 705 
 706         mutex_enter(&entry->ote_lock);
 707         bcopy(&otr->otr_answer, &entry->ote_dest,
 708             sizeof (overlay_target_point_t));
 709         entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
 710         entry->ote_flags |= OVERLAY_ENTRY_F_VALID;


 711         mp = entry->ote_chead;
 712         entry->ote_chead = NULL;
 713         entry->ote_ctail = NULL;
 714         entry->ote_mbsize = 0;
 715         entry->ote_vtime = gethrtime();
 716         mutex_exit(&entry->ote_lock);
 717 
 718         /*
 719          * For now do an in-situ drain.
 720          *
 721          * TODO: overlay_m_tx() will need to perform remote fabric attachment
 722          * checks, which may leave mblk_t's left in the msg chain for
 723          * mblk_t's whose connectivity with the target entry are unknown.
 724          * This will then need to deal with the leftovers.
 725          */
 726         mp = overlay_m_tx(entry->ote_odd, mp);
 727         freemsgchain(mp);
 728 
 729         mutex_enter(&entry->ote_ott->ott_lock);
 730         entry->ote_ott->ott_ocount--;


1113                 return (ENXIO);
1114         }
1115         ott = odd->odd_target;
1116         if (ott->ott_mode != OVERLAY_TARGET_POINT &&
1117             ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1118                 mutex_exit(&odd->odd_lock);
1119                 overlay_hold_rele(odd);
1120                 return (ENOTSUP);
1121         }
1122         mutex_enter(&ott->ott_lock);
1123         mutex_exit(&odd->odd_lock);
1124 
1125         if (ott->ott_mode == OVERLAY_TARGET_POINT) {
1126                 otc->otc_entry.otce_flags = 0;
1127                 bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest,
1128                     sizeof (overlay_target_point_t));
1129         } else {
1130                 overlay_target_entry_t *ote;
1131                 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1132                     otc->otc_entry.otce_mac);
1133                 if (ote != NULL) {




1134                         mutex_enter(&ote->ote_lock);
1135                         if ((ote->ote_flags &
1136                             OVERLAY_ENTRY_F_VALID_MASK) != 0) {
1137                                 if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
1138                                         otc->otc_entry.otce_flags =
1139                                             OVERLAY_TARGET_CACHE_DROP;



1140                                 } else {
1141                                         otc->otc_entry.otce_flags = 0;
1142                                         bcopy(&ote->ote_dest,
1143                                             &otc->otc_entry.otce_dest,
1144                                             sizeof (overlay_target_point_t));
1145                                 }
1146                                 ret = 0;
1147                         } else {
1148                                 ret = ENOENT;
1149                         }
1150                         mutex_exit(&ote->ote_lock);
1151                 } else {
1152                         ret = ENOENT;
1153                 }
1154         }
1155 

1156         mutex_exit(&ott->ott_lock);
1157         overlay_hold_rele(odd);
1158 
1159         return (ret);
1160 }
1161 
1162 /* ARGSUSED */
1163 static int
1164 overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg)
1165 {
1166         overlay_dev_t *odd;
1167         overlay_target_t *ott;
1168         overlay_target_entry_t *ote;
1169         overlay_targ_cache_t *otc = arg;
1170         mblk_t *mp = NULL;
1171 
1172         if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP)

1173                 return (EINVAL);
1174 




1175         odd = overlay_hold_by_dlid(otc->otc_linkid);
1176         if (odd == NULL)
1177                 return (ENOENT);
1178 
1179         mutex_enter(&odd->odd_lock);
1180         if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1181                 mutex_exit(&odd->odd_lock);
1182                 overlay_hold_rele(odd);
1183                 return (ENXIO);
1184         }
1185         ott = odd->odd_target;
1186         if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1187                 mutex_exit(&odd->odd_lock);
1188                 overlay_hold_rele(odd);
1189                 return (ENOTSUP);
1190         }
1191         mutex_enter(&ott->ott_lock);
1192         mutex_exit(&odd->odd_lock);
1193 
1194         ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1195             otc->otc_entry.otce_mac);
1196         if (ote == NULL) {
1197                 ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP);
1198                 bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL);
1199                 ote->ote_chead = ote->ote_ctail = NULL;
1200                 ote->ote_mbsize = 0;
1201                 ote->ote_ott = ott;
1202                 ote->ote_odd = odd;
1203                 mutex_enter(&ote->ote_lock);
1204                 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote);
1205                 avl_add(&ott->ott_u.ott_dyn.ott_tree, ote);
1206         } else {
1207                 mutex_enter(&ote->ote_lock);
1208         }
1209 
1210         if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
1211                 ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
1212         } else {
1213                 ote->ote_flags |= OVERLAY_ENTRY_F_VALID;


1214                 bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
1215                     sizeof (overlay_target_point_t));
1216                 mp = ote->ote_chead;
1217                 ote->ote_chead = NULL;
1218                 ote->ote_ctail = NULL;
1219                 ote->ote_mbsize = 0;
1220                 ote->ote_vtime = gethrtime();
1221         }
1222 
1223         mutex_exit(&ote->ote_lock);
1224         mutex_exit(&ott->ott_lock);
1225 
1226         if (mp != NULL) {
1227                 mp = overlay_m_tx(ote->ote_odd, mp);
1228                 freemsgchain(mp);
1229         }
1230 
1231         overlay_hold_rele(odd);
1232 
1233         return (0);




  63         list_node_t oth_link;           /* overlay_target_lock */
  64         kmutex_t oth_lock;
  65         list_t  oth_outstanding;        /* oth_lock */
  66 } overlay_target_hdl_t;
  67 
  68 typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int);
  69 typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *);
  70 typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int);
  71 
  72 typedef struct overaly_target_ioctl {
  73         int             oti_cmd;        /* ioctl id */
  74         boolean_t       oti_write;      /* ioctl requires FWRITE */
  75         boolean_t       oti_ncopyout;   /* copyout data? */
  76         overlay_target_copyin_f oti_copyin;     /* copyin func */
  77         overlay_target_ioctl_f oti_func; /* function to call */
  78         overlay_target_copyout_f oti_copyout;   /* copyin func */
  79         size_t          oti_size;       /* size of user level structure */
  80 } overlay_target_ioctl_t;
  81 
  82 static kmem_cache_t *overlay_target_cache;
  83 kmem_cache_t *overlay_entry_cache;
  84 static id_space_t *overlay_thdl_idspace;
  85 static void *overlay_thdl_state;
  86 
  87 /*
  88  * When we support overlay devices in the NGZ, then all of these need to become
  89  * zone aware, by plugging into the netstack engine and becoming per-netstack
  90  * data.
  91  */
  92 static list_t overlay_thdl_list;
  93 static kmutex_t overlay_target_lock;
  94 static kcondvar_t overlay_target_condvar;
  95 static list_t overlay_target_list;
  96 static boolean_t overlay_target_excl;
  97 
  98 /*
  99  * Outstanding data per hash table entry.
 100  */
 101 int overlay_ent_size = 128 * 1024;
 102 
 103 /* ARGSUSED */
 104 static int
 105 overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
 106 {
 107         overlay_target_t *ott = buf;
 108 
 109         mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL);
 110         cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL);
 111         return (0);
 112 }
 113 
 114 /* ARGSUSED */
 115 static void
 116 overlay_target_cache_destructor(void *buf, void *arg)
 117 {
 118         overlay_target_t *ott = buf;
 119 
 120         cv_destroy(&ott->ott_cond);
 121         mutex_destroy(&ott->ott_lock);


 124 /* ARGSUSED */
 125 static int
 126 overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs)
 127 {
 128         overlay_target_entry_t *ote = buf;
 129 
 130         bzero(ote, sizeof (overlay_target_entry_t));
 131         mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL);
 132         return (0);
 133 }
 134 
 135 /* ARGSUSED */
 136 static void
 137 overlay_entry_cache_destructor(void *buf, void *arg)
 138 {
 139         overlay_target_entry_t *ote = buf;
 140 
 141         mutex_destroy(&ote->ote_lock);
 142 }
 143 


 144 static uint64_t
 145 overlay_mac_hash(const void *v)
 146 {
 147         uint32_t crc;
 148         CRC32(crc, v, ETHERADDRL, -1U, crc32_table);
 149         return (crc);
 150 }
 151 
 152 static int
 153 overlay_mac_cmp(const void *a, const void *b)
 154 {
 155         return (bcmp(a, b, ETHERADDRL));
 156 }
 157 
 158 static uint64_t
 159 overlay_vl3_hash(const void *v)
 160 {
 161         const overlay_target_entry_t *ote = v;
 162         uint32_t crc;
 163 
 164         CRC32(crc, &ote->ote_ip, sizeof (ote->ote_ip), -1U, crc32_table);
 165         CRC32(crc, &ote->ote_fab, sizeof (ote->ote_fab), crc, crc32_table);
 166         return (crc);
 167 }
 168 
 169 static int
 170 overlay_vl3_cmp(const void *a, const void *b)
 171 {
 172         const overlay_target_entry_t *l = a;
 173         const overlay_target_entry_t *r = b;
 174 
 175         if (l->ote_fab != r->ote_fab ||
 176             bcmp(&l->ote_ip, &r->ote_ip, sizeof (struct in6_addr)) != 0)
 177                 return (1);
 178         return (0);
 179 }
 180 
 181 static int
 182 overlay_vl3_avl(const void *a, const void *b)
 183 {
 184         const overlay_target_entry_t *l = a;
 185         const overlay_target_entry_t *r = b;
 186 
 187         if (l->ote_fab < r->ote_fab)
 188                 return (-1);
 189         if (l->ote_fab > r->ote_fab)
 190                 return (1);
 191         return (memcmp(&l->ote_ip, &r->ote_ip, sizeof (struct in6_addr)));
 192 }
 193 
 194 /* ARGSUSED */
 195 void
 196 overlay_target_entry_null_dtor(void *arg)
 197 {
 198 }
 199 
 200 /* ARGSUSED */
 201 void
 202 overlay_target_entry_dtor(void *arg)
 203 {
 204         overlay_target_entry_t *ote = arg;
 205 
 206         ASSERT3U(ote->ote_refcnt, ==, 0);
 207 
 208         ote->ote_flags = 0;
 209         bzero(ote->ote_addr, ETHERADDRL);
 210         bzero(&ote->ote_ip, sizeof (ote->ote_ip));
 211         ote->ote_ott = NULL;
 212         ote->ote_odd = NULL;
 213         ote->ote_fab = NULL;
 214         freemsgchain(ote->ote_chead);
 215         ote->ote_chead = ote->ote_ctail = NULL;
 216         ote->ote_mbsize = 0;
 217         ote->ote_vtime = 0;
 218         kmem_cache_free(overlay_entry_cache, ote);
 219 }
 220 
 221 static int
 222 overlay_mac_avl(const void *a, const void *b)
 223 {
 224         int i;
 225         const overlay_target_entry_t *l, *r;
 226         l = a;
 227         r = b;
 228 
 229         for (i = 0; i < ETHERADDRL; i++) {
 230                 if (l->ote_addr[i] > r->ote_addr[i])
 231                         return (1);
 232                 else if (l->ote_addr[i] < r->ote_addr[i])
 233                         return (-1);


 263 overlay_target_fini(void)
 264 {
 265         id_space_destroy(overlay_thdl_idspace);
 266         list_destroy(&overlay_thdl_list);
 267         list_destroy(&overlay_target_list);
 268         cv_destroy(&overlay_target_condvar);
 269         mutex_destroy(&overlay_target_lock);
 270         kmem_cache_destroy(overlay_entry_cache);
 271         kmem_cache_destroy(overlay_target_cache);
 272         ddi_soft_state_fini(&overlay_thdl_state);
 273 }
 274 
 275 void
 276 overlay_target_free(overlay_dev_t *odd)
 277 {
 278         if (odd->odd_target == NULL)
 279                 return;
 280 
 281         if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
 282                 refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
 283                 refhash_t *r3p = odd->odd_target->ott_u.ott_dyn.ott_l3dhash;
 284                 avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
 285                 avl_tree_t *a3p = &odd->odd_target->ott_u.ott_dyn.ott_l3tree;
 286                 overlay_target_entry_t *ote;
 287 


 288                 /*
 289                  * Our AVL tree and hashtable contain the same elements,
 290                  * therefore we should just remove it from the tree, but then
 291                  * delete the entries when we remove them from the hash table
 292                  * (which happens through the refhash dtor).
 293                  */
 294                 while ((ote = avl_first(ap)) != NULL) {
 295                         avl_remove(ap, ote);
 296                         OVERLAY_TARG_ENTRY_REFRELE(ote);
 297                 }
 298                 avl_destroy(ap);
 299 
 300                 while ((ote = avl_first(a3p)) != NULL) {
 301                         avl_remove(a3p, ote);
 302                         OVERLAY_TARG_ENTRY_REFRELE(ote);
 303                 }
 304                 avl_destroy(a3p);
 305 
 306                 for (ote = refhash_first(rp); ote != NULL;
 307                     ote = refhash_next(rp, ote)) {
 308                         refhash_remove(rp, ote);
 309                         OVERLAY_TARG_ENTRY_REFRELE(ote);
 310                 }
 311                 refhash_destroy(rp);
 312 
 313                 for (ote = refhash_first(r3p); ote != NULL;
 314                     ote = refhash_next(r3p, ote)) {
 315                         refhash_remove(r3p, ote);
 316                         OVERLAY_TARG_ENTRY_REFRELE(ote);
 317                 }
 318                 refhash_destroy(r3p);
 319         }
 320 
 321         ASSERT(odd->odd_target->ott_ocount == 0);
 322         bzero(&odd->odd_target->ott_u, sizeof (odd->odd_target->ott_u));
 323         kmem_cache_free(overlay_target_cache, odd->odd_target);
 324         odd->odd_target = NULL;
 325 }
 326 
 327 int
 328 overlay_target_busy()
 329 {
 330         int ret;
 331 
 332         mutex_enter(&overlay_target_lock);
 333         ret = !list_is_empty(&overlay_thdl_list);
 334         mutex_exit(&overlay_target_lock);
 335 
 336         return (ret);
 337 }
 338 
 339 void
 340 overlay_target_queue(overlay_target_entry_t *entry)
 341 {
 342         mutex_enter(&overlay_target_lock);
 343         mutex_enter(&entry->ote_ott->ott_lock);
 344         if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
 345                 mutex_exit(&entry->ote_ott->ott_lock);
 346                 mutex_exit(&overlay_target_lock);
 347                 return;
 348         }
 349         entry->ote_ott->ott_ocount++;
 350         mutex_exit(&entry->ote_ott->ott_lock);
 351         list_insert_tail(&overlay_target_list, entry);
 352         cv_signal(&overlay_target_condvar);
 353         mutex_exit(&overlay_target_lock);
 354 }
 355 
 356 void
 357 overlay_target_quiesce(overlay_target_t *ott)
 358 {
 359         if (ott == NULL)
 360                 return;
 361         mutex_enter(&ott->ott_lock);
 362         ott->ott_flags |= OVERLAY_T_TEARDOWN;
 363         while (ott->ott_ocount != 0)
 364                 cv_wait(&ott->ott_cond, &ott->ott_lock);
 365         mutex_exit(&ott->ott_lock);
 366 }
 367 
 368 /*
 369  * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
 370  * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
 371  * this time, say for NVGRE, we drop all packets that match this.




 372  */
 373 int
 374 overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
 375     socklen_t *slenp, uint64_t *vidp)
 376 {
 377         int ret;
 378         struct sockaddr_in6 *v6;
 379         overlay_target_t *ott;
 380         mac_header_info_t mhi;
 381         overlay_target_entry_t *entry;
 382 
 383         ASSERT(odd->odd_target != NULL);
 384 
 385         *vidp = odd->odd_vid;
 386 
 387         /*
 388          * At this point, the overlay device is in a mux which means that it's
 389          * been activated. At this point, parts of the target, such as the mode
 390          * and the destination are now read-only and we don't have to worry
 391          * about synchronization for them.
 392          */
 393         ott = odd->odd_target;
 394         if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
 395                 return (OVERLAY_TARGET_DROP);
 396 
 397         v6 = (struct sockaddr_in6 *)sock;
 398         bzero(v6, sizeof (struct sockaddr_in6));
 399         v6->sin6_family = AF_INET6;
 400 
 401         if (ott->ott_mode == OVERLAY_TARGET_POINT) {
 402                 mutex_enter(&ott->ott_lock);
 403                 bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr,
 404                     sizeof (struct in6_addr));
 405                 v6->sin6_port = htons(ott->ott_u.ott_point.otp_port);
 406                 mutex_exit(&ott->ott_lock);
 407                 *slenp = sizeof (struct sockaddr_in6);
 408 
 409                 return (OVERLAY_TARGET_OK);
 410         }
 411 
 412         ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
 413 
 414         /*
 415          * VL2 -> UL3 lookups only need the destination VL2 mac address,
 416          * however, if we end up having to route the packet, we will need
 417          * the source vlan as part of the destination selection.
 418          */
 419         if (mac_vlan_header_info(odd->odd_mh, mp, &mhi) != 0)
 420                 return (OVERLAY_TARGET_DROP);
 421 









 422         mutex_enter(&ott->ott_lock);
 423         entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
 424             mhi.mhi_daddr);
 425         if (entry == NULL) {
 426                 entry = kmem_cache_alloc(overlay_entry_cache,
 427                     KM_NOSLEEP | KM_NORMALPRI);
 428                 if (entry == NULL) {
 429                         mutex_exit(&ott->ott_lock);
 430                         return (OVERLAY_TARGET_DROP);
 431                 }





 432                 bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
 433                 entry->ote_chead = entry->ote_ctail = mp;
 434                 entry->ote_mbsize = msgsize(mp);
 435                 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
 436                 entry->ote_ott = ott;
 437                 entry->ote_odd = odd;
 438 
 439                 OVERLAY_TARG_ENTRY_REFHOLD(entry);
 440                 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
 441 
 442                 OVERLAY_TARG_ENTRY_REFHOLD(entry);
 443                 avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
 444 
 445                 mutex_exit(&ott->ott_lock);
 446                 overlay_target_queue(entry);
 447                 return (OVERLAY_TARGET_ASYNC);
 448         }
 449         OVERLAY_TARG_ENTRY_REFHOLD(entry);
 450         mutex_exit(&ott->ott_lock);
 451 
 452         mutex_enter(&entry->ote_lock);
 453         if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
 454                 ret = OVERLAY_TARGET_DROP;
 455         } else if (entry->ote_flags & OVERLAY_ENTRY_F_ROUTER) {
 456                 ret = overlay_route_lookup(odd, mp, &mhi, sock, slenp, vidp);
 457         } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
 458                 bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
 459                     sizeof (struct in6_addr));
 460                 v6->sin6_port = htons(entry->ote_dest.otp_port);
 461                 *slenp = sizeof (struct sockaddr_in6);
 462                 ret = OVERLAY_TARGET_OK;
 463         } else {
 464                 size_t mlen = msgsize(mp);
 465 
 466                 if (mlen + entry->ote_mbsize > overlay_ent_size) {
 467                         ret = OVERLAY_TARGET_DROP;
 468                 } else {
 469                         if (entry->ote_ctail != NULL) {
 470                                 ASSERT(entry->ote_ctail->b_next ==
 471                                     NULL);
 472                                 entry->ote_ctail->b_next = mp;
 473                                 entry->ote_ctail = mp;
 474                         } else {
 475                                 entry->ote_chead = mp;
 476                                 entry->ote_ctail = mp;
 477                         }
 478                         entry->ote_mbsize += mlen;
 479                         if ((entry->ote_flags &
 480                             OVERLAY_ENTRY_F_PENDING) == 0) {
 481                                 entry->ote_flags |=
 482                                     OVERLAY_ENTRY_F_PENDING;
 483                                 overlay_target_queue(entry);
 484                         }
 485                         ret = OVERLAY_TARGET_ASYNC;
 486                 }
 487         }
 488         mutex_exit(&entry->ote_lock);
 489 
 490         mutex_enter(&ott->ott_lock);
 491         OVERLAY_TARG_ENTRY_REFRELE(entry);
 492         mutex_exit(&ott->ott_lock);
 493 
 494         return (ret);
 495 }
 496 
 497 /* ARGSUSED */
 498 static int
 499 overlay_target_info(overlay_target_hdl_t *thdl, void *arg)
 500 {
 501         overlay_dev_t *odd;
 502         overlay_targ_info_t *oti = arg;
 503 
 504         odd = overlay_hold_by_dlid(oti->oti_linkid);
 505         if (odd == NULL)
 506                 return (ENOENT);
 507 
 508         mutex_enter(&odd->odd_lock);
 509         oti->oti_flags = 0;
 510         oti->oti_needs = odd->odd_plugin->ovp_dest;
 511         if (odd->odd_flags & OVERLAY_F_DEGRADED)
 512                 oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED;
 513         if (odd->odd_flags & OVERLAY_F_ACTIVATED)
 514                 oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE;
 515         oti->oti_vnetid = odd->odd_vid;
 516         oti->oti_dcid = odd->odd_dcid;
 517         mutex_exit(&odd->odd_lock);
 518         overlay_hold_rele(odd);
 519         return (0);
 520 }
 521 
 522 /* ARGSUSED */
 523 static int
 524 overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
 525 {
 526         overlay_dev_t *odd;
 527         overlay_target_t *ott;
 528         overlay_targ_associate_t *ota = arg;
 529         overlay_router_t *ort;
 530 
 531         odd = overlay_hold_by_dlid(ota->ota_linkid);
 532         if (odd == NULL)
 533                 return (ENOENT);
 534 
 535         if (ota->ota_id == 0) {
 536                 overlay_hold_rele(odd);
 537                 return (EINVAL);
 538         }
 539 
 540         if (ota->ota_mode != OVERLAY_TARGET_POINT &&
 541             ota->ota_mode != OVERLAY_TARGET_DYNAMIC) {
 542                 overlay_hold_rele(odd);
 543                 return (EINVAL);
 544         }
 545 
 546         if (ota->ota_provides != odd->odd_plugin->ovp_dest) {
 547                 overlay_hold_rele(odd);
 548                 return (EINVAL);
 549         }


 562                         if (ota->ota_point.otp_port == 0) {
 563                                 overlay_hold_rele(odd);
 564                                 return (EINVAL);
 565                         }
 566                 }
 567         }
 568 
 569         ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP);
 570         ott->ott_flags = 0;
 571         ott->ott_ocount = 0;
 572         ott->ott_mode = ota->ota_mode;
 573         ott->ott_dest = ota->ota_provides;
 574         ott->ott_id = ota->ota_id;
 575 
 576         if (ott->ott_mode == OVERLAY_TARGET_POINT) {
 577                 bcopy(&ota->ota_point, &ott->ott_u.ott_point,
 578                     sizeof (overlay_target_point_t));
 579         } else {
 580                 ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
 581                     overlay_mac_hash, overlay_mac_cmp,
 582                     overlay_target_entry_null_dtor,
 583                     sizeof (overlay_target_entry_t),
 584                     offsetof(overlay_target_entry_t, ote_reflink),
 585                     offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
 586                 ott->ott_u.ott_dyn.ott_l3dhash = refhash_create(OVERLAY_HSIZE,
 587                     overlay_vl3_hash, overlay_vl3_cmp,
 588                     overlay_target_entry_null_dtor,
 589                     sizeof (overlay_target_entry_t),
 590                     offsetof(overlay_target_entry_t, ote_l3_reflink), 0,
 591                     KM_SLEEP);
 592                 avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
 593                     sizeof (overlay_target_entry_t),
 594                     offsetof(overlay_target_entry_t, ote_avllink));
 595                 avl_create(&ott->ott_u.ott_dyn.ott_l3tree, overlay_vl3_avl,
 596                     sizeof (overlay_target_entry_t),
 597                     offsetof(overlay_target_entry_t, ote_l3_avllink));
 598 
 599                 ort = kmem_zalloc(sizeof (*ort), KM_SLEEP);
 600                 mutex_init(&ort->otr_lock, NULL, MUTEX_DRIVER, NULL);
 601                 list_create(&ort->otr_tables, sizeof (overlay_route_table_t),
 602                     offsetof(overlay_route_table_t, ort_link));
 603                 avl_create(&ort->otr_tree, overlay_fabric_avl,
 604                     sizeof (overlay_fabric_entry_t),
 605                     offsetof(overlay_fabric_entry_t, ofe_avllink));
 606         }
 607         mutex_enter(&odd->odd_lock);
 608         if (odd->odd_flags & OVERLAY_F_VARPD) {
 609                 mutex_exit(&odd->odd_lock);
 610                 kmem_cache_free(overlay_target_cache, ott);
 611                 overlay_hold_rele(odd);
 612                 return (EEXIST);
 613         }
 614 
 615         odd->odd_flags |= OVERLAY_F_VARPD;
 616         odd->odd_target = ott;
 617         mutex_exit(&odd->odd_lock);
 618 
 619         overlay_hold_rele(odd);
 620 

 621         return (0);
 622 }
 623 
 624 
 625 /* ARGSUSED */
 626 static int
 627 overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg)
 628 {
 629         overlay_dev_t *odd;
 630         overlay_targ_degrade_t *otd = arg;
 631 
 632         odd = overlay_hold_by_dlid(otd->otd_linkid);
 633         if (odd == NULL)
 634                 return (ENOENT);
 635 
 636         overlay_fm_degrade(odd, otd->otd_buf);
 637         overlay_hold_rele(odd);
 638         return (0);
 639 }
 640 


 662         overlay_targ_id_t *otid = arg;
 663 
 664         odd = overlay_hold_by_dlid(otid->otid_linkid);
 665         if (odd == NULL)
 666                 return (ENOENT);
 667 
 668         mutex_enter(&odd->odd_lock);
 669         odd->odd_flags &= ~OVERLAY_F_VARPD;
 670         mutex_exit(&odd->odd_lock);
 671 
 672         overlay_hold_rele(odd);
 673         return (0);
 674 
 675 }
 676 
 677 static int
 678 overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
 679 {
 680         overlay_targ_lookup_t *otl = arg;
 681         overlay_target_entry_t *entry;
 682         void *src, *dst;
 683         clock_t ret, timeout;
 684         mac_header_info_t mhi;

 685         timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
 686 again:
 687         mutex_enter(&overlay_target_lock);
 688         while (list_is_empty(&overlay_target_list)) {
 689                 ret = cv_timedwait(&overlay_target_condvar,
 690                     &overlay_target_lock, timeout);
 691                 if (ret == -1) {
 692                         mutex_exit(&overlay_target_lock);
 693                         return (ETIME);
 694                 }
 695         }
 696         entry = list_remove_head(&overlay_target_list);
 697         mutex_exit(&overlay_target_lock);
 698         mutex_enter(&entry->ote_lock);
 699         if (entry->ote_flags &
 700             (OVERLAY_ENTRY_F_PENDING | OVERLAY_ENTRY_F_VL3_PENDING)) {
 701                 ASSERT(entry->ote_chead == NULL);
 702                 mutex_exit(&entry->ote_lock);
 703                 goto again;
 704         }
 705         ASSERT(entry->ote_chead != NULL);
 706 
 707 
 708         otl->otl_l3req = (entry->ote_flags & OVERLAY_ENTRY_F_VL3_PENDING) ?
 709             B_TRUE : B_FALSE;
 710 
 711         if (otl->otl_l3req) {
 712                 src = &otl->otl_addru.otlu_l3.otl3_srcip;
 713                 dst = &otl->otl_addru.otlu_l3.otl3_dstip;
 714         } else {
 715                 src = &otl->otl_addru.otlu_l2.otl2_srcaddr;
 716                 dst = &otl->otl_addru.otlu_l2.otl2_dstaddr;
 717         }
 718 
 719         /*
 720          * If we have a bogon that doesn't have a valid mac header, or an
 721          * invalid IP header for IP requests, drop it and try again.
 722          */
 723         if ((mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
 724             &mhi) != 0) ||
 725             (otl->otl_l3req && overlay_mblk_vl3ip(entry->ote_chead, src,
 726             dst) != 0)) {
 727                 boolean_t queue = B_FALSE;
 728                 mblk_t *mp = entry->ote_chead;
 729                 entry->ote_chead = mp->b_next;
 730                 mp->b_next = NULL;
 731                 if (entry->ote_ctail == mp)
 732                         entry->ote_ctail = entry->ote_chead;
 733                 entry->ote_mbsize -= msgsize(mp);
 734                 if (entry->ote_chead != NULL)
 735                         queue = B_TRUE;
 736                 mutex_exit(&entry->ote_lock);
 737                 if (queue == B_TRUE)
 738                         overlay_target_queue(entry);
 739                 freemsg(mp);
 740                 goto again;
 741         }
 742 







 743         otl->otl_dlid = entry->ote_odd->odd_linkid;
 744         otl->otl_reqid = (uintptr_t)entry;
 745         otl->otl_varpdid = entry->ote_ott->ott_id;
 746         otl->otl_vnetid = entry->ote_odd->odd_vid;
 747 
 748         otl->otl_hdrsize = mhi.mhi_hdrsize;
 749         otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;


 750         otl->otl_addru.otlu_l2.otl2_dsttype = mhi.mhi_dsttype;
 751         otl->otl_addru.otlu_l2.otl2_sap = mhi.mhi_bindsap;
 752         otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
 753 
 754         /*
 755          * The overlay_mblk_vl3ip() call above fills in dst & src for
 756          * VL3->UL3 requests, so only need to care about VL2->UL3 here.
 757          */
 758         if (!otl->otl_l3req) {
 759                 bcopy(mhi.mhi_daddr, dst, ETHERADDRL);
 760                 bcopy(mhi.mhi_saddr, src, ETHERADDRL);
 761         }
 762         mutex_exit(&entry->ote_lock);
 763 
 764         mutex_enter(&thdl->oth_lock);
 765         list_insert_tail(&thdl->oth_outstanding, entry);
 766         mutex_exit(&thdl->oth_lock);
 767 
 768         return (0);
 769 }
 770 
 771 static int
 772 overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
 773 {
 774         const overlay_targ_resp_t *otr = arg;
 775         overlay_target_entry_t *entry;
 776         mblk_t *mp;
 777         boolean_t is_router = B_FALSE;
 778 
 779         /*
 780          * If we ever support a protocol that uses MAC addresses for the UL
 781          * destination addr, we probably should expand this to check that
 782          * all of otr is zero.
 783          */
 784         if (IN6_IS_ADDR_UNSPECIFIED(&otr->otr_answer.otp_ip) &&
 785             otr->otr_answer.otp_port == 0)
 786                 is_router = B_TRUE;
 787 
 788         mutex_enter(&thdl->oth_lock);
 789         for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
 790             entry = list_next(&thdl->oth_outstanding, entry)) {
 791                 if ((uintptr_t)entry == otr->otr_reqid)
 792                         break;
 793         }
 794 
 795         if (entry == NULL) {
 796                 mutex_exit(&thdl->oth_lock);
 797                 return (EINVAL);
 798         }
 799         list_remove(&thdl->oth_outstanding, entry);
 800         mutex_exit(&thdl->oth_lock);
 801 
 802         mutex_enter(&entry->ote_lock);
 803         bcopy(&otr->otr_answer, &entry->ote_dest,
 804             sizeof (overlay_target_point_t));
 805         entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
 806         entry->ote_flags |= OVERLAY_ENTRY_F_VALID;
 807         if (is_router)
 808                 entry->ote_flags |= OVERLAY_ENTRY_F_ROUTER;
 809         mp = entry->ote_chead;
 810         entry->ote_chead = NULL;
 811         entry->ote_ctail = NULL;
 812         entry->ote_mbsize = 0;
 813         entry->ote_vtime = gethrtime();
 814         mutex_exit(&entry->ote_lock);
 815 
 816         /*
 817          * For now do an in-situ drain.
 818          *
 819          * TODO: overlay_m_tx() will need to perform remote fabric attachment
 820          * checks, which may leave mblk_t's left in the msg chain for
 821          * mblk_t's whose connectivity with the target entry are unknown.
 822          * This will then need to deal with the leftovers.
 823          */
 824         mp = overlay_m_tx(entry->ote_odd, mp);
 825         freemsgchain(mp);
 826 
 827         mutex_enter(&entry->ote_ott->ott_lock);
 828         entry->ote_ott->ott_ocount--;


1211                 return (ENXIO);
1212         }
1213         ott = odd->odd_target;
1214         if (ott->ott_mode != OVERLAY_TARGET_POINT &&
1215             ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1216                 mutex_exit(&odd->odd_lock);
1217                 overlay_hold_rele(odd);
1218                 return (ENOTSUP);
1219         }
1220         mutex_enter(&ott->ott_lock);
1221         mutex_exit(&odd->odd_lock);
1222 
1223         if (ott->ott_mode == OVERLAY_TARGET_POINT) {
1224                 otc->otc_entry.otce_flags = 0;
1225                 bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest,
1226                     sizeof (overlay_target_point_t));
1227         } else {
1228                 overlay_target_entry_t *ote;
1229                 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1230                     otc->otc_entry.otce_mac);
1231                 if (ote == NULL) {
1232                         ret = ENOENT;
1233                         goto done;
1234                 }
1235 
1236                 mutex_enter(&ote->ote_lock);
1237                 if ((ote->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) != 0) {

1238                         if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
1239                                 otc->otc_entry.otce_flags =
1240                                     OVERLAY_TARGET_CACHE_DROP;
1241                         } else if (ote->ote_flags & OVERLAY_ENTRY_F_ROUTER) {
1242                                 otc->otc_entry.otce_flags =
1243                                     OVERLAY_TARGET_CACHE_ROUTER;
1244                         } else {
1245                                 otc->otc_entry.otce_flags = 0;
1246                                 bcopy(&ote->ote_dest, &otc->otc_entry.otce_dest,

1247                                     sizeof (overlay_target_point_t));
1248                         }
1249                         ret = 0;
1250                 } else {
1251                         ret = ENOENT;
1252                 }
1253                 mutex_exit(&ote->ote_lock);


1254         }

1255 
1256 done:
1257         mutex_exit(&ott->ott_lock);
1258         overlay_hold_rele(odd);
1259 
1260         return (ret);
1261 }
1262 
1263 /* ARGSUSED */
1264 static int
1265 overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg)
1266 {
1267         overlay_dev_t *odd;
1268         overlay_target_t *ott;
1269         overlay_target_entry_t *ote;
1270         overlay_targ_cache_t *otc = arg;
1271         mblk_t *mp = NULL;
1272 
1273         if (otc->otc_entry.otce_flags &
1274             ~(OVERLAY_TARGET_CACHE_DROP | OVERLAY_TARGET_CACHE_ROUTER))
1275                 return (EINVAL);
1276 
1277         if (otc->otc_entry.otce_flags ==
1278             (OVERLAY_TARGET_CACHE_DROP | OVERLAY_TARGET_CACHE_ROUTER))
1279                 return (EINVAL);
1280 
1281         odd = overlay_hold_by_dlid(otc->otc_linkid);
1282         if (odd == NULL)
1283                 return (ENOENT);
1284 
1285         mutex_enter(&odd->odd_lock);
1286         if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1287                 mutex_exit(&odd->odd_lock);
1288                 overlay_hold_rele(odd);
1289                 return (ENXIO);
1290         }
1291         ott = odd->odd_target;
1292         if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1293                 mutex_exit(&odd->odd_lock);
1294                 overlay_hold_rele(odd);
1295                 return (ENOTSUP);
1296         }
1297         mutex_enter(&ott->ott_lock);
1298         mutex_exit(&odd->odd_lock);
1299 
1300         ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1301             otc->otc_entry.otce_mac);
1302         if (ote == NULL) {
1303                 ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP);
1304                 bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL);
1305                 ote->ote_chead = ote->ote_ctail = NULL;
1306                 ote->ote_mbsize = 0;
1307                 ote->ote_ott = ott;
1308                 ote->ote_odd = odd;
1309                 mutex_enter(&ote->ote_lock);
1310                 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote);
1311                 avl_add(&ott->ott_u.ott_dyn.ott_tree, ote);
1312         } else {
1313                 mutex_enter(&ote->ote_lock);
1314         }
1315 
1316         if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
1317                 ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
1318         } else {
1319                 ote->ote_flags |= OVERLAY_ENTRY_F_VALID;
1320                 if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_ROUTER)
1321                         ote->ote_flags |= OVERLAY_ENTRY_F_ROUTER;
1322                 bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
1323                     sizeof (overlay_target_point_t));
1324                 mp = ote->ote_chead;
1325                 ote->ote_chead = NULL;
1326                 ote->ote_ctail = NULL;
1327                 ote->ote_mbsize = 0;
1328                 ote->ote_vtime = gethrtime();
1329         }
1330 
1331         mutex_exit(&ote->ote_lock);
1332         mutex_exit(&ott->ott_lock);
1333 
1334         if (mp != NULL) {
1335                 mp = overlay_m_tx(ote->ote_odd, mp);
1336                 freemsgchain(mp);
1337         }
1338 
1339         overlay_hold_rele(odd);
1340 
1341         return (0);