Print this page
Overlay fabric router
        
*** 78,88 ****
          overlay_target_copyout_f oti_copyout;   /* copyin func */
          size_t          oti_size;       /* size of user level structure */
  } overlay_target_ioctl_t;
  
  static kmem_cache_t *overlay_target_cache;
! static kmem_cache_t *overlay_entry_cache;
  static id_space_t *overlay_thdl_idspace;
  static void *overlay_thdl_state;
  
  /*
   * When we support overlay devices in the NGZ, then all of these need to become
--- 78,88 ----
          overlay_target_copyout_f oti_copyout;   /* copyin func */
          size_t          oti_size;       /* size of user level structure */
  } overlay_target_ioctl_t;
  
  static kmem_cache_t *overlay_target_cache;
! kmem_cache_t *overlay_entry_cache;
  static id_space_t *overlay_thdl_idspace;
  static void *overlay_thdl_state;
  
  /*
   * When we support overlay devices in the NGZ, then all of these need to become
*** 96,106 ****
  static boolean_t overlay_target_excl;
  
  /*
   * Outstanding data per hash table entry.
   */
! static int overlay_ent_size = 128 * 1024;
  
  /* ARGSUSED */
  static int
  overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
  {
--- 96,106 ----
  static boolean_t overlay_target_excl;
  
  /*
   * Outstanding data per hash table entry.
   */
! int overlay_ent_size = 128 * 1024;
  
  /* ARGSUSED */
  static int
  overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
  {
*** 139,150 ****
          overlay_target_entry_t *ote = buf;
  
          mutex_destroy(&ote->ote_lock);
  }
  
- /* TODO: we will need to modify these to hash/cmp DCID + MAC */
- 
  static uint64_t
  overlay_mac_hash(const void *v)
  {
          uint32_t crc;
          CRC32(crc, v, ETHERADDRL, -1U, crc32_table);
--- 139,148 ----
*** 155,174 ****
  overlay_mac_cmp(const void *a, const void *b)
  {
          return (bcmp(a, b, ETHERADDRL));
  }
  
  /* ARGSUSED */
! static void
  overlay_target_entry_dtor(void *arg)
  {
          overlay_target_entry_t *ote = arg;
  
          ote->ote_flags = 0;
          bzero(ote->ote_addr, ETHERADDRL);
          ote->ote_ott = NULL;
          ote->ote_odd = NULL;
          freemsgchain(ote->ote_chead);
          ote->ote_chead = ote->ote_ctail = NULL;
          ote->ote_mbsize = 0;
          ote->ote_vtime = 0;
          kmem_cache_free(overlay_entry_cache, ote);
--- 153,218 ----
  overlay_mac_cmp(const void *a, const void *b)
  {
          return (bcmp(a, b, ETHERADDRL));
  }
  
+ static uint64_t
+ overlay_vl3_hash(const void *v)
+ {
+         const overlay_target_entry_t *ote = v;
+         uint32_t crc;
+ 
+         CRC32(crc, &ote->ote_ip, sizeof (ote->ote_ip), -1U, crc32_table);
+         CRC32(crc, &ote->ote_fab, sizeof (ote->ote_fab), crc, crc32_table);
+         return (crc);
+ }
+ 
+ static int
+ overlay_vl3_cmp(const void *a, const void *b)
+ {
+         const overlay_target_entry_t *l = a;
+         const overlay_target_entry_t *r = b;
+ 
+         if (l->ote_fab != r->ote_fab ||
+             bcmp(&l->ote_ip, &r->ote_ip, sizeof (struct in6_addr)) != 0)
+                 return (1);
+         return (0);
+ }
+ 
+ static int
+ overlay_vl3_avl(const void *a, const void *b)
+ {
+         const overlay_target_entry_t *l = a;
+         const overlay_target_entry_t *r = b;
+ 
+         if (l->ote_fab < r->ote_fab)
+                 return (-1);
+         if (l->ote_fab > r->ote_fab)
+                 return (1);
+         return (memcmp(&l->ote_ip, &r->ote_ip, sizeof (struct in6_addr)));
+ }
+ 
  /* ARGSUSED */
! void
! overlay_target_entry_null_dtor(void *arg)
! {
! }
! 
! /* ARGSUSED */
! void
  overlay_target_entry_dtor(void *arg)
  {
          overlay_target_entry_t *ote = arg;
  
+         ASSERT3U(ote->ote_refcnt, ==, 0);
+ 
          ote->ote_flags = 0;
          bzero(ote->ote_addr, ETHERADDRL);
+         bzero(&ote->ote_ip, sizeof (ote->ote_ip));
          ote->ote_ott = NULL;
          ote->ote_odd = NULL;
+         ote->ote_fab = NULL;
          freemsgchain(ote->ote_chead);
          ote->ote_chead = ote->ote_ctail = NULL;
          ote->ote_mbsize = 0;
          ote->ote_vtime = 0;
          kmem_cache_free(overlay_entry_cache, ote);
*** 234,267 ****
          if (odd->odd_target == NULL)
                  return;
  
          if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
                  refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
                  avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
                  overlay_target_entry_t *ote;
  
-                 /* TODO: remove from L3 trees */
- 
                  /*
                   * Our AVL tree and hashtable contain the same elements,
                   * therefore we should just remove it from the tree, but then
                   * delete the entries when we remove them from the hash table
                   * (which happens through the refhash dtor).
                   */
!                 while ((ote = avl_first(ap)) != NULL)
                          avl_remove(ap, ote);
! 
                  avl_destroy(ap);
                  for (ote = refhash_first(rp); ote != NULL;
                      ote = refhash_next(rp, ote)) {
                          refhash_remove(rp, ote);
                  }
                  refhash_destroy(rp);
          }
  
          ASSERT(odd->odd_target->ott_ocount == 0);
          kmem_cache_free(overlay_target_cache, odd->odd_target);
  }
  
  int
  overlay_target_busy()
  {
--- 278,329 ----
          if (odd->odd_target == NULL)
                  return;
  
          if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
                  refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
+                 refhash_t *r3p = odd->odd_target->ott_u.ott_dyn.ott_l3dhash;
                  avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
+                 avl_tree_t *a3p = &odd->odd_target->ott_u.ott_dyn.ott_l3tree;
                  overlay_target_entry_t *ote;
  
                  /*
                   * Our AVL tree and hashtable contain the same elements,
                   * therefore we should just remove it from the tree, but then
                   * delete the entries when we remove them from the hash table
                   * (which happens through the refhash dtor).
                   */
!                 while ((ote = avl_first(ap)) != NULL) {
                          avl_remove(ap, ote);
!                         OVERLAY_TARG_ENTRY_REFRELE(ote);
!                 }
                  avl_destroy(ap);
+ 
+                 while ((ote = avl_first(a3p)) != NULL) {
+                         avl_remove(a3p, ote);
+                         OVERLAY_TARG_ENTRY_REFRELE(ote);
+                 }
+                 avl_destroy(a3p);
+ 
                  for (ote = refhash_first(rp); ote != NULL;
                      ote = refhash_next(rp, ote)) {
                          refhash_remove(rp, ote);
+                         OVERLAY_TARG_ENTRY_REFRELE(ote);
                  }
                  refhash_destroy(rp);
+ 
+                 for (ote = refhash_first(r3p); ote != NULL;
+                     ote = refhash_next(r3p, ote)) {
+                         refhash_remove(r3p, ote);
+                         OVERLAY_TARG_ENTRY_REFRELE(ote);
                  }
+                 refhash_destroy(r3p);
+         }
  
          ASSERT(odd->odd_target->ott_ocount == 0);
+         bzero(&odd->odd_target->ott_u, sizeof (odd->odd_target->ott_u));
          kmem_cache_free(overlay_target_cache, odd->odd_target);
+         odd->odd_target = NULL;
  }
  
  int
  overlay_target_busy()
  {
*** 272,282 ****
          mutex_exit(&overlay_target_lock);
  
          return (ret);
  }
  
! static void
  overlay_target_queue(overlay_target_entry_t *entry)
  {
          mutex_enter(&overlay_target_lock);
          mutex_enter(&entry->ote_ott->ott_lock);
          if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
--- 334,344 ----
          mutex_exit(&overlay_target_lock);
  
          return (ret);
  }
  
! void
  overlay_target_queue(overlay_target_entry_t *entry)
  {
          mutex_enter(&overlay_target_lock);
          mutex_enter(&entry->ote_ott->ott_lock);
          if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
*** 304,331 ****
  }
  
  /*
   * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
   * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
!  * this time, say for NVGRE, we drop all packets that mcuh this.
!  *
!  * XXX: It might be better to replace the 'sock' argument with
!  * overlay_target_entry_t** and set it with the found entry in the case
!  * of OVERLAY_TARGET_OK.
   */
  int
  overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
!     socklen_t *slenp)
  {
          int ret;
          struct sockaddr_in6 *v6;
          overlay_target_t *ott;
          mac_header_info_t mhi;
          overlay_target_entry_t *entry;
  
          ASSERT(odd->odd_target != NULL);
  
          /*
           * At this point, the overlay device is in a mux which means that it's
           * been activated. At this point, parts of the target, such as the mode
           * and the destination are now read-only and we don't have to worry
           * about synchronization for them.
--- 366,391 ----
  }
  
  /*
   * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
   * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
!  * this time, say for NVGRE, we drop all packets that match this.
   */
  int
  overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
!     socklen_t *slenp, uint64_t *vidp)
  {
          int ret;
          struct sockaddr_in6 *v6;
          overlay_target_t *ott;
          mac_header_info_t mhi;
          overlay_target_entry_t *entry;
  
          ASSERT(odd->odd_target != NULL);
  
+         *vidp = odd->odd_vid;
+ 
          /*
           * At this point, the overlay device is in a mux which means that it's
           * been activated. At this point, parts of the target, such as the mode
           * and the destination are now read-only and we don't have to worry
           * about synchronization for them.
*** 350,375 ****
          }
  
          ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
  
          /*
!          * Note we only want the MAC address here, therefore we won't bother
!          * using mac_vlan_header_info(). If any caller needs the vlan info at
!          * this point, this should change to a call to mac_vlan_header_info().
           */
!         if (mac_header_info(odd->odd_mh, mp, &mhi) != 0)
                  return (OVERLAY_TARGET_DROP);
  
-         /*
-          * TODO: compare mhi.mhi_daddr with odd->macaddr.
-          * If match,
-          *      get VL3 dest from mp
-          *      lookup target using VL3 dest
-          * otherwise,
-          *      lookup target using VL2 dest (existing refhash_lookup() call
-          *      below)
-          */
          mutex_enter(&ott->ott_lock);
          entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
              mhi.mhi_daddr);
          if (entry == NULL) {
                  entry = kmem_cache_alloc(overlay_entry_cache,
--- 410,426 ----
          }
  
          ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
  
          /*
!          * VL2 -> UL3 lookups only need the destination VL2 mac address,
!          * however, if we end up having to route the packet, we will need
!          * the source vlan as part of the destination selection.
           */
!         if (mac_vlan_header_info(odd->odd_mh, mp, &mhi) != 0)
                  return (OVERLAY_TARGET_DROP);
  
          mutex_enter(&ott->ott_lock);
          entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
              mhi.mhi_daddr);
          if (entry == NULL) {
                  entry = kmem_cache_alloc(overlay_entry_cache,
*** 376,408 ****
                      KM_NOSLEEP | KM_NORMALPRI);
                  if (entry == NULL) {
                          mutex_exit(&ott->ott_lock);
                          return (OVERLAY_TARGET_DROP);
                  }
-                 /*
-                  * TODO: set entry->ote_dcid, if VL3 lookup, copy dst addr
-                  * into entry->ote_ip.  Probably zero out the address we're
-                  * not lookup up (VL2 or VL3) as well.
-                  */
                  bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
                  entry->ote_chead = entry->ote_ctail = mp;
                  entry->ote_mbsize = msgsize(mp);
                  entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
                  entry->ote_ott = ott;
                  entry->ote_odd = odd;
                  refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
                  avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
                  mutex_exit(&ott->ott_lock);
                  overlay_target_queue(entry);
                  return (OVERLAY_TARGET_ASYNC);
          }
!         refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry);
          mutex_exit(&ott->ott_lock);
  
          mutex_enter(&entry->ote_lock);
          if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
                  ret = OVERLAY_TARGET_DROP;
          } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
                  bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
                      sizeof (struct in6_addr));
                  v6->sin6_port = htons(entry->ote_dest.otp_port);
                  *slenp = sizeof (struct sockaddr_in6);
--- 427,461 ----
                      KM_NOSLEEP | KM_NORMALPRI);
                  if (entry == NULL) {
                          mutex_exit(&ott->ott_lock);
                          return (OVERLAY_TARGET_DROP);
                  }
                  bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
                  entry->ote_chead = entry->ote_ctail = mp;
                  entry->ote_mbsize = msgsize(mp);
                  entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
                  entry->ote_ott = ott;
                  entry->ote_odd = odd;
+ 
+                 OVERLAY_TARG_ENTRY_REFHOLD(entry);
                  refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
+ 
+                 OVERLAY_TARG_ENTRY_REFHOLD(entry);
                  avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
+ 
                  mutex_exit(&ott->ott_lock);
                  overlay_target_queue(entry);
                  return (OVERLAY_TARGET_ASYNC);
          }
!         OVERLAY_TARG_ENTRY_REFHOLD(entry);
          mutex_exit(&ott->ott_lock);
  
          mutex_enter(&entry->ote_lock);
          if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
                  ret = OVERLAY_TARGET_DROP;
+         } else if (entry->ote_flags & OVERLAY_ENTRY_F_ROUTER) {
+                 ret = overlay_route_lookup(odd, mp, &mhi, sock, slenp, vidp);
          } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
                  bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
                      sizeof (struct in6_addr));
                  v6->sin6_port = htons(entry->ote_dest.otp_port);
                  *slenp = sizeof (struct sockaddr_in6);
*** 433,443 ****
                  }
          }
          mutex_exit(&entry->ote_lock);
  
          mutex_enter(&ott->ott_lock);
!         refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry);
          mutex_exit(&ott->ott_lock);
  
          return (ret);
  }
  
--- 486,496 ----
                  }
          }
          mutex_exit(&entry->ote_lock);
  
          mutex_enter(&ott->ott_lock);
!         OVERLAY_TARG_ENTRY_REFRELE(entry);
          mutex_exit(&ott->ott_lock);
  
          return (ret);
  }
  
*** 471,480 ****
--- 524,534 ----
  overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
  {
          overlay_dev_t *odd;
          overlay_target_t *ott;
          overlay_targ_associate_t *ota = arg;
+         overlay_router_t *ort;
  
          odd = overlay_hold_by_dlid(ota->ota_linkid);
          if (odd == NULL)
                  return (ENOENT);
  
*** 523,538 ****
                  bcopy(&ota->ota_point, &ott->ott_u.ott_point,
                      sizeof (overlay_target_point_t));
          } else {
                  ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
                      overlay_mac_hash, overlay_mac_cmp,
!                     overlay_target_entry_dtor, sizeof (overlay_target_entry_t),
                      offsetof(overlay_target_entry_t, ote_reflink),
                      offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
                  avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
                      sizeof (overlay_target_entry_t),
                      offsetof(overlay_target_entry_t, ote_avllink));
          }
          mutex_enter(&odd->odd_lock);
          if (odd->odd_flags & OVERLAY_F_VARPD) {
                  mutex_exit(&odd->odd_lock);
                  kmem_cache_free(overlay_target_cache, ott);
--- 577,610 ----
                  bcopy(&ota->ota_point, &ott->ott_u.ott_point,
                      sizeof (overlay_target_point_t));
          } else {
                  ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
                      overlay_mac_hash, overlay_mac_cmp,
!                     overlay_target_entry_null_dtor,
!                     sizeof (overlay_target_entry_t),
                      offsetof(overlay_target_entry_t, ote_reflink),
                      offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
+                 ott->ott_u.ott_dyn.ott_l3dhash = refhash_create(OVERLAY_HSIZE,
+                     overlay_vl3_hash, overlay_vl3_cmp,
+                     overlay_target_entry_null_dtor,
+                     sizeof (overlay_target_entry_t),
+                     offsetof(overlay_target_entry_t, ote_l3_reflink), 0,
+                     KM_SLEEP);
                  avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
                      sizeof (overlay_target_entry_t),
                      offsetof(overlay_target_entry_t, ote_avllink));
+                 avl_create(&ott->ott_u.ott_dyn.ott_l3tree, overlay_vl3_avl,
+                     sizeof (overlay_target_entry_t),
+                     offsetof(overlay_target_entry_t, ote_l3_avllink));
+ 
+                 ort = kmem_zalloc(sizeof (*ort), KM_SLEEP);
+                 mutex_init(&ort->otr_lock, NULL, MUTEX_DRIVER, NULL);
+                 list_create(&ort->otr_tables, sizeof (overlay_route_table_t),
+                     offsetof(overlay_route_table_t, ort_link));
+                 avl_create(&ort->otr_tree, overlay_fabric_avl,
+                     sizeof (overlay_fabric_entry_t),
+                     offsetof(overlay_fabric_entry_t, ofe_avllink));
          }
          mutex_enter(&odd->odd_lock);
          if (odd->odd_flags & OVERLAY_F_VARPD) {
                  mutex_exit(&odd->odd_lock);
                  kmem_cache_free(overlay_target_cache, ott);
*** 544,554 ****
          odd->odd_target = ott;
          mutex_exit(&odd->odd_lock);
  
          overlay_hold_rele(odd);
  
- 
          return (0);
  }
  
  
  /* ARGSUSED */
--- 616,625 ----
*** 606,618 ****
  static int
  overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
  {
          overlay_targ_lookup_t *otl = arg;
          overlay_target_entry_t *entry;
          clock_t ret, timeout;
          mac_header_info_t mhi;
- 
          timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
  again:
          mutex_enter(&overlay_target_lock);
          while (list_is_empty(&overlay_target_list)) {
                  ret = cv_timedwait(&overlay_target_condvar,
--- 677,689 ----
  static int
  overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
  {
          overlay_targ_lookup_t *otl = arg;
          overlay_target_entry_t *entry;
+         void *src, *dst;
          clock_t ret, timeout;
          mac_header_info_t mhi;
          timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
  again:
          mutex_enter(&overlay_target_lock);
          while (list_is_empty(&overlay_target_list)) {
                  ret = cv_timedwait(&overlay_target_condvar,
*** 623,645 ****
                  }
          }
          entry = list_remove_head(&overlay_target_list);
          mutex_exit(&overlay_target_lock);
          mutex_enter(&entry->ote_lock);
!         if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
                  ASSERT(entry->ote_chead == NULL);
                  mutex_exit(&entry->ote_lock);
                  goto again;
          }
          ASSERT(entry->ote_chead != NULL);
  
          /*
!          * If we have a bogon that doesn't have a valid mac header, drop it and
!          * try again.
           */
!         if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
!             &mhi) != 0) {
                  boolean_t queue = B_FALSE;
                  mblk_t *mp = entry->ote_chead;
                  entry->ote_chead = mp->b_next;
                  mp->b_next = NULL;
                  if (entry->ote_ctail == mp)
--- 694,731 ----
                  }
          }
          entry = list_remove_head(&overlay_target_list);
          mutex_exit(&overlay_target_lock);
          mutex_enter(&entry->ote_lock);
!         if (entry->ote_flags &
!             (OVERLAY_ENTRY_F_PENDING | OVERLAY_ENTRY_F_VL3_PENDING)) {
                  ASSERT(entry->ote_chead == NULL);
                  mutex_exit(&entry->ote_lock);
                  goto again;
          }
          ASSERT(entry->ote_chead != NULL);
  
+ 
+         otl->otl_l3req = (entry->ote_flags & OVERLAY_ENTRY_F_VL3_PENDING) ?
+             B_TRUE : B_FALSE;
+ 
+         if (otl->otl_l3req) {
+                 src = &otl->otl_addru.otlu_l3.otl3_srcip;
+                 dst = &otl->otl_addru.otlu_l3.otl3_dstip;
+         } else {
+                 src = &otl->otl_addru.otlu_l2.otl2_srcaddr;
+                 dst = &otl->otl_addru.otlu_l2.otl2_dstaddr;
+         }
+ 
          /*
!          * If we have a bogon that doesn't have a valid mac header, or an
!          * invalid IP header for IP requests, drop it and try again.
           */
!         if ((mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
!             &mhi) != 0) ||
!             (otl->otl_l3req && overlay_mblk_vl3ip(entry->ote_chead, src,
!             dst) != 0)) {
                  boolean_t queue = B_FALSE;
                  mblk_t *mp = entry->ote_chead;
                  entry->ote_chead = mp->b_next;
                  mp->b_next = NULL;
                  if (entry->ote_ctail == mp)
*** 652,680 ****
                          overlay_target_queue(entry);
                  freemsg(mp);
                  goto again;
          }
  
-         /*
-          * TODO: If VL3 request,
-          *      set otl->otl_l3req
-          *      Fill in otl_{src,dst}ip
-          * Else
-          *      clear otl->otl_l3req
-          */
          otl->otl_dlid = entry->ote_odd->odd_linkid;
          otl->otl_reqid = (uintptr_t)entry;
          otl->otl_varpdid = entry->ote_ott->ott_id;
          otl->otl_vnetid = entry->ote_odd->odd_vid;
  
          otl->otl_hdrsize = mhi.mhi_hdrsize;
          otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
-         bcopy(mhi.mhi_daddr, otl->otl_addru.otlu_l2.otl2_dstaddr, ETHERADDRL);
-         bcopy(mhi.mhi_saddr, otl->otl_addru.otlu_l2.otl2_srcaddr, ETHERADDRL);
          otl->otl_addru.otlu_l2.otl2_dsttype = mhi.mhi_dsttype;
          otl->otl_addru.otlu_l2.otl2_sap = mhi.mhi_bindsap;
          otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
          mutex_exit(&entry->ote_lock);
  
          mutex_enter(&thdl->oth_lock);
          list_insert_tail(&thdl->oth_outstanding, entry);
          mutex_exit(&thdl->oth_lock);
--- 738,766 ----
                          overlay_target_queue(entry);
                  freemsg(mp);
                  goto again;
          }
  
          otl->otl_dlid = entry->ote_odd->odd_linkid;
          otl->otl_reqid = (uintptr_t)entry;
          otl->otl_varpdid = entry->ote_ott->ott_id;
          otl->otl_vnetid = entry->ote_odd->odd_vid;
  
          otl->otl_hdrsize = mhi.mhi_hdrsize;
          otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
          otl->otl_addru.otlu_l2.otl2_dsttype = mhi.mhi_dsttype;
          otl->otl_addru.otlu_l2.otl2_sap = mhi.mhi_bindsap;
          otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
+ 
+         /*
+          * The overlay_mblk_vl3ip() call above fills in dst & src for
+          * VL3->UL3 requests, so only need to care about VL2->UL3 here.
+          */
+         if (!otl->otl_l3req) {
+                 bcopy(mhi.mhi_daddr, dst, ETHERADDRL);
+                 bcopy(mhi.mhi_saddr, src, ETHERADDRL);
+         }
          mutex_exit(&entry->ote_lock);
  
          mutex_enter(&thdl->oth_lock);
          list_insert_tail(&thdl->oth_outstanding, entry);
          mutex_exit(&thdl->oth_lock);
*** 686,696 ****
--- 772,792 ----
  overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
  {
          const overlay_targ_resp_t *otr = arg;
          overlay_target_entry_t *entry;
          mblk_t *mp;
+         boolean_t is_router = B_FALSE;
  
+         /*
+          * If we ever support a protocol that uses MAC addresses for the UL
+          * destination addr, we probably should expand this to check that
+          * all of otr is zero.
+          */
+         if (IN6_IS_ADDR_UNSPECIFIED(&otr->otr_answer.otp_ip) &&
+             otr->otr_answer.otp_port == 0)
+                 is_router = B_TRUE;
+ 
          mutex_enter(&thdl->oth_lock);
          for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
              entry = list_next(&thdl->oth_outstanding, entry)) {
                  if ((uintptr_t)entry == otr->otr_reqid)
                          break;
*** 706,715 ****
--- 802,813 ----
          mutex_enter(&entry->ote_lock);
          bcopy(&otr->otr_answer, &entry->ote_dest,
              sizeof (overlay_target_point_t));
          entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
          entry->ote_flags |= OVERLAY_ENTRY_F_VALID;
+         if (is_router)
+                 entry->ote_flags |= OVERLAY_ENTRY_F_ROUTER;
          mp = entry->ote_chead;
          entry->ote_chead = NULL;
          entry->ote_ctail = NULL;
          entry->ote_mbsize = 0;
          entry->ote_vtime = gethrtime();
*** 1128,1160 ****
                      sizeof (overlay_target_point_t));
          } else {
                  overlay_target_entry_t *ote;
                  ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
                      otc->otc_entry.otce_mac);
!                 if (ote != NULL) {
                          mutex_enter(&ote->ote_lock);
!                         if ((ote->ote_flags &
!                             OVERLAY_ENTRY_F_VALID_MASK) != 0) {
                                  if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
                                          otc->otc_entry.otce_flags =
                                              OVERLAY_TARGET_CACHE_DROP;
                                  } else {
                                          otc->otc_entry.otce_flags = 0;
!                                         bcopy(&ote->ote_dest,
!                                             &otc->otc_entry.otce_dest,
                                              sizeof (overlay_target_point_t));
                                  }
                                  ret = 0;
                          } else {
                                  ret = ENOENT;
                          }
                          mutex_exit(&ote->ote_lock);
-                 } else {
-                         ret = ENOENT;
                  }
-         }
  
          mutex_exit(&ott->ott_lock);
          overlay_hold_rele(odd);
  
          return (ret);
  }
--- 1226,1261 ----
                      sizeof (overlay_target_point_t));
          } else {
                  overlay_target_entry_t *ote;
                  ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
                      otc->otc_entry.otce_mac);
!                 if (ote == NULL) {
!                         ret = ENOENT;
!                         goto done;
!                 }
! 
                  mutex_enter(&ote->ote_lock);
!                 if ((ote->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) != 0) {
                          if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
                                  otc->otc_entry.otce_flags =
                                      OVERLAY_TARGET_CACHE_DROP;
+                         } else if (ote->ote_flags & OVERLAY_ENTRY_F_ROUTER) {
+                                 otc->otc_entry.otce_flags =
+                                     OVERLAY_TARGET_CACHE_ROUTER;
                          } else {
                                  otc->otc_entry.otce_flags = 0;
!                                 bcopy(&ote->ote_dest, &otc->otc_entry.otce_dest,
                                      sizeof (overlay_target_point_t));
                          }
                          ret = 0;
                  } else {
                          ret = ENOENT;
                  }
                  mutex_exit(&ote->ote_lock);
          }
  
+ done:
          mutex_exit(&ott->ott_lock);
          overlay_hold_rele(odd);
  
          return (ret);
  }
*** 1167,1179 ****
          overlay_target_t *ott;
          overlay_target_entry_t *ote;
          overlay_targ_cache_t *otc = arg;
          mblk_t *mp = NULL;
  
!         if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP)
                  return (EINVAL);
  
          odd = overlay_hold_by_dlid(otc->otc_linkid);
          if (odd == NULL)
                  return (ENOENT);
  
          mutex_enter(&odd->odd_lock);
--- 1268,1285 ----
          overlay_target_t *ott;
          overlay_target_entry_t *ote;
          overlay_targ_cache_t *otc = arg;
          mblk_t *mp = NULL;
  
!         if (otc->otc_entry.otce_flags &
!             ~(OVERLAY_TARGET_CACHE_DROP | OVERLAY_TARGET_CACHE_ROUTER))
                  return (EINVAL);
  
+         if (otc->otc_entry.otce_flags ==
+             (OVERLAY_TARGET_CACHE_DROP | OVERLAY_TARGET_CACHE_ROUTER))
+                 return (EINVAL);
+ 
          odd = overlay_hold_by_dlid(otc->otc_linkid);
          if (odd == NULL)
                  return (ENOENT);
  
          mutex_enter(&odd->odd_lock);
*** 1209,1218 ****
--- 1315,1326 ----
  
          if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
                  ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
          } else {
                  ote->ote_flags |= OVERLAY_ENTRY_F_VALID;
+                 if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_ROUTER)
+                         ote->ote_flags |= OVERLAY_ENTRY_F_ROUTER;
                  bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
                      sizeof (overlay_target_point_t));
                  mp = ote->ote_chead;
                  ote->ote_chead = NULL;
                  ote->ote_ctail = NULL;