Print this page
Overlay fabric router

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/overlay/overlay_target.c
          +++ new/usr/src/uts/common/io/overlay/overlay_target.c
↓ open down ↓ 72 lines elided ↑ open up ↑
  73   73          int             oti_cmd;        /* ioctl id */
  74   74          boolean_t       oti_write;      /* ioctl requires FWRITE */
  75   75          boolean_t       oti_ncopyout;   /* copyout data? */
  76   76          overlay_target_copyin_f oti_copyin;     /* copyin func */
  77   77          overlay_target_ioctl_f oti_func; /* function to call */
  78   78          overlay_target_copyout_f oti_copyout;   /* copyin func */
  79   79          size_t          oti_size;       /* size of user level structure */
  80   80  } overlay_target_ioctl_t;
  81   81  
  82   82  static kmem_cache_t *overlay_target_cache;
  83      -static kmem_cache_t *overlay_entry_cache;
       83 +kmem_cache_t *overlay_entry_cache;
  84   84  static id_space_t *overlay_thdl_idspace;
  85   85  static void *overlay_thdl_state;
  86   86  
  87   87  /*
  88   88   * When we support overlay devices in the NGZ, then all of these need to become
  89   89   * zone aware, by plugging into the netstack engine and becoming per-netstack
  90   90   * data.
  91   91   */
  92   92  static list_t overlay_thdl_list;
  93   93  static kmutex_t overlay_target_lock;
  94   94  static kcondvar_t overlay_target_condvar;
  95   95  static list_t overlay_target_list;
  96   96  static boolean_t overlay_target_excl;
  97   97  
  98   98  /*
  99   99   * Outstanding data per hash table entry.
 100  100   */
 101      -static int overlay_ent_size = 128 * 1024;
      101 +int overlay_ent_size = 128 * 1024;
 102  102  
 103  103  /* ARGSUSED */
 104  104  static int
 105  105  overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
 106  106  {
 107  107          overlay_target_t *ott = buf;
 108  108  
 109  109          mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL);
 110  110          cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL);
 111  111          return (0);
↓ open down ↓ 22 lines elided ↑ open up ↑
 134  134  
 135  135  /* ARGSUSED */
 136  136  static void
 137  137  overlay_entry_cache_destructor(void *buf, void *arg)
 138  138  {
 139  139          overlay_target_entry_t *ote = buf;
 140  140  
 141  141          mutex_destroy(&ote->ote_lock);
 142  142  }
 143  143  
 144      -/* TODO: we will need to modify these to hash/cmp DCID + MAC */
 145      -
 146  144  static uint64_t
 147  145  overlay_mac_hash(const void *v)
 148  146  {
 149  147          uint32_t crc;
 150  148          CRC32(crc, v, ETHERADDRL, -1U, crc32_table);
 151  149          return (crc);
 152  150  }
 153  151  
 154  152  static int
 155  153  overlay_mac_cmp(const void *a, const void *b)
 156  154  {
 157  155          return (bcmp(a, b, ETHERADDRL));
 158  156  }
 159  157  
      158 +static uint64_t
      159 +overlay_vl3_hash(const void *v)
      160 +{
      161 +        const overlay_target_entry_t *ote = v;
      162 +        uint32_t crc;
      163 +
      164 +        CRC32(crc, &ote->ote_ip, sizeof (ote->ote_ip), -1U, crc32_table);
      165 +        CRC32(crc, &ote->ote_fab, sizeof (ote->ote_fab), crc, crc32_table);
      166 +        return (crc);
      167 +}
      168 +
      169 +static int
      170 +overlay_vl3_cmp(const void *a, const void *b)
      171 +{
      172 +        const overlay_target_entry_t *l = a;
      173 +        const overlay_target_entry_t *r = b;
      174 +
      175 +        if (l->ote_fab != r->ote_fab ||
      176 +            bcmp(&l->ote_ip, &r->ote_ip, sizeof (struct in6_addr)) != 0)
      177 +                return (1);
      178 +        return (0);
      179 +}
      180 +
      181 +static int
      182 +overlay_vl3_avl(const void *a, const void *b)
      183 +{
      184 +        const overlay_target_entry_t *l = a;
      185 +        const overlay_target_entry_t *r = b;
      186 +
      187 +        if (l->ote_fab < r->ote_fab)
      188 +                return (-1);
      189 +        if (l->ote_fab > r->ote_fab)
      190 +                return (1);
      191 +        return (memcmp(&l->ote_ip, &r->ote_ip, sizeof (struct in6_addr)));
      192 +}
      193 +
 160  194  /* ARGSUSED */
 161      -static void
      195 +void
      196 +overlay_target_entry_null_dtor(void *arg)
      197 +{
      198 +}
      199 +
      200 +/* ARGSUSED */
      201 +void
 162  202  overlay_target_entry_dtor(void *arg)
 163  203  {
 164  204          overlay_target_entry_t *ote = arg;
 165  205  
      206 +        ASSERT3U(ote->ote_refcnt, ==, 0);
      207 +
 166  208          ote->ote_flags = 0;
 167  209          bzero(ote->ote_addr, ETHERADDRL);
      210 +        bzero(&ote->ote_ip, sizeof (ote->ote_ip));
 168  211          ote->ote_ott = NULL;
 169  212          ote->ote_odd = NULL;
      213 +        ote->ote_fab = NULL;
 170  214          freemsgchain(ote->ote_chead);
 171  215          ote->ote_chead = ote->ote_ctail = NULL;
 172  216          ote->ote_mbsize = 0;
 173  217          ote->ote_vtime = 0;
 174  218          kmem_cache_free(overlay_entry_cache, ote);
 175  219  }
 176  220  
 177  221  static int
 178  222  overlay_mac_avl(const void *a, const void *b)
 179  223  {
↓ open down ↓ 49 lines elided ↑ open up ↑
 229  273  }
 230  274  
 231  275  void
 232  276  overlay_target_free(overlay_dev_t *odd)
 233  277  {
 234  278          if (odd->odd_target == NULL)
 235  279                  return;
 236  280  
 237  281          if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
 238  282                  refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
      283 +                refhash_t *r3p = odd->odd_target->ott_u.ott_dyn.ott_l3dhash;
 239  284                  avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
      285 +                avl_tree_t *a3p = &odd->odd_target->ott_u.ott_dyn.ott_l3tree;
 240  286                  overlay_target_entry_t *ote;
 241  287  
 242      -                /* TODO: remove from L3 trees */
 243      -
 244  288                  /*
 245  289                   * Our AVL tree and hashtable contain the same elements,
 246  290                   * therefore we should just remove it from the tree, but then
 247  291                   * delete the entries when we remove them from the hash table
 248  292                   * (which happens through the refhash dtor).
 249  293                   */
 250      -                while ((ote = avl_first(ap)) != NULL)
      294 +                while ((ote = avl_first(ap)) != NULL) {
 251  295                          avl_remove(ap, ote);
 252      -
      296 +                        OVERLAY_TARG_ENTRY_REFRELE(ote);
      297 +                }
 253  298                  avl_destroy(ap);
      299 +
      300 +                while ((ote = avl_first(a3p)) != NULL) {
      301 +                        avl_remove(a3p, ote);
      302 +                        OVERLAY_TARG_ENTRY_REFRELE(ote);
      303 +                }
      304 +                avl_destroy(a3p);
      305 +
 254  306                  for (ote = refhash_first(rp); ote != NULL;
 255  307                      ote = refhash_next(rp, ote)) {
 256  308                          refhash_remove(rp, ote);
      309 +                        OVERLAY_TARG_ENTRY_REFRELE(ote);
 257  310                  }
 258  311                  refhash_destroy(rp);
      312 +
      313 +                for (ote = refhash_first(r3p); ote != NULL;
      314 +                    ote = refhash_next(r3p, ote)) {
      315 +                        refhash_remove(r3p, ote);
      316 +                        OVERLAY_TARG_ENTRY_REFRELE(ote);
      317 +                }
      318 +                refhash_destroy(r3p);
 259  319          }
 260  320  
 261  321          ASSERT(odd->odd_target->ott_ocount == 0);
      322 +        bzero(&odd->odd_target->ott_u, sizeof (odd->odd_target->ott_u));
 262  323          kmem_cache_free(overlay_target_cache, odd->odd_target);
      324 +        odd->odd_target = NULL;
 263  325  }
 264  326  
 265  327  int
 266  328  overlay_target_busy()
 267  329  {
 268  330          int ret;
 269  331  
 270  332          mutex_enter(&overlay_target_lock);
 271  333          ret = !list_is_empty(&overlay_thdl_list);
 272  334          mutex_exit(&overlay_target_lock);
 273  335  
 274  336          return (ret);
 275  337  }
 276  338  
 277      -static void
      339 +void
 278  340  overlay_target_queue(overlay_target_entry_t *entry)
 279  341  {
 280  342          mutex_enter(&overlay_target_lock);
 281  343          mutex_enter(&entry->ote_ott->ott_lock);
 282  344          if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
 283  345                  mutex_exit(&entry->ote_ott->ott_lock);
 284  346                  mutex_exit(&overlay_target_lock);
 285  347                  return;
 286  348          }
 287  349          entry->ote_ott->ott_ocount++;
↓ open down ↓ 11 lines elided ↑ open up ↑
 299  361          mutex_enter(&ott->ott_lock);
 300  362          ott->ott_flags |= OVERLAY_T_TEARDOWN;
 301  363          while (ott->ott_ocount != 0)
 302  364                  cv_wait(&ott->ott_cond, &ott->ott_lock);
 303  365          mutex_exit(&ott->ott_lock);
 304  366  }
 305  367  
 306  368  /*
 307  369   * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
 308  370   * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
 309      - * this time, say for NVGRE, we drop all packets that mcuh this.
 310      - *
 311      - * XXX: It might be better to replace the 'sock' argument with
 312      - * overlay_target_entry_t** and set it with the found entry in the case
 313      - * of OVERLAY_TARGET_OK.
      371 + * this time, say for NVGRE, we drop all packets that match this.
 314  372   */
 315  373  int
 316  374  overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
 317      -    socklen_t *slenp)
      375 +    socklen_t *slenp, uint64_t *vidp)
 318  376  {
 319  377          int ret;
 320  378          struct sockaddr_in6 *v6;
 321  379          overlay_target_t *ott;
 322  380          mac_header_info_t mhi;
 323  381          overlay_target_entry_t *entry;
 324  382  
 325  383          ASSERT(odd->odd_target != NULL);
 326  384  
      385 +        *vidp = odd->odd_vid;
      386 +
 327  387          /*
 328  388           * At this point, the overlay device is in a mux which means that it's
 329  389           * been activated. At this point, parts of the target, such as the mode
 330  390           * and the destination are now read-only and we don't have to worry
 331  391           * about synchronization for them.
 332  392           */
 333  393          ott = odd->odd_target;
 334  394          if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
 335  395                  return (OVERLAY_TARGET_DROP);
 336  396  
↓ open down ↓ 8 lines elided ↑ open up ↑
 345  405                  v6->sin6_port = htons(ott->ott_u.ott_point.otp_port);
 346  406                  mutex_exit(&ott->ott_lock);
 347  407                  *slenp = sizeof (struct sockaddr_in6);
 348  408  
 349  409                  return (OVERLAY_TARGET_OK);
 350  410          }
 351  411  
 352  412          ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
 353  413  
 354  414          /*
 355      -         * Note we only want the MAC address here, therefore we won't bother
 356      -         * using mac_vlan_header_info(). If any caller needs the vlan info at
 357      -         * this point, this should change to a call to mac_vlan_header_info().
      415 +         * VL2 -> UL3 lookups only need the destination VL2 mac address,
      416 +         * however, if we end up having to route the packet, we will need
      417 +         * the source vlan as part of the destination selection.
 358  418           */
 359      -        if (mac_header_info(odd->odd_mh, mp, &mhi) != 0)
      419 +        if (mac_vlan_header_info(odd->odd_mh, mp, &mhi) != 0)
 360  420                  return (OVERLAY_TARGET_DROP);
 361  421  
 362      -        /*
 363      -         * TODO: compare mhi.mhi_daddr with odd->macaddr.
 364      -         * If match,
 365      -         *      get VL3 dest from mp
 366      -         *      lookup target using VL3 dest
 367      -         * otherwise,
 368      -         *      lookup target using VL2 dest (existing refhash_lookup() call
 369      -         *      below)
 370      -         */
 371  422          mutex_enter(&ott->ott_lock);
 372  423          entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
 373  424              mhi.mhi_daddr);
 374  425          if (entry == NULL) {
 375  426                  entry = kmem_cache_alloc(overlay_entry_cache,
 376  427                      KM_NOSLEEP | KM_NORMALPRI);
 377  428                  if (entry == NULL) {
 378  429                          mutex_exit(&ott->ott_lock);
 379  430                          return (OVERLAY_TARGET_DROP);
 380  431                  }
 381      -                /*
 382      -                 * TODO: set entry->ote_dcid, if VL3 lookup, copy dst addr
 383      -                 * into entry->ote_ip.  Probably zero out the address we're
 384      -                 * not lookup up (VL2 or VL3) as well.
 385      -                 */
 386  432                  bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
 387  433                  entry->ote_chead = entry->ote_ctail = mp;
 388  434                  entry->ote_mbsize = msgsize(mp);
 389  435                  entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
 390  436                  entry->ote_ott = ott;
 391  437                  entry->ote_odd = odd;
      438 +
      439 +                OVERLAY_TARG_ENTRY_REFHOLD(entry);
 392  440                  refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
      441 +
      442 +                OVERLAY_TARG_ENTRY_REFHOLD(entry);
 393  443                  avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
      444 +
 394  445                  mutex_exit(&ott->ott_lock);
 395  446                  overlay_target_queue(entry);
 396  447                  return (OVERLAY_TARGET_ASYNC);
 397  448          }
 398      -        refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry);
      449 +        OVERLAY_TARG_ENTRY_REFHOLD(entry);
 399  450          mutex_exit(&ott->ott_lock);
 400  451  
 401  452          mutex_enter(&entry->ote_lock);
 402  453          if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
 403  454                  ret = OVERLAY_TARGET_DROP;
      455 +        } else if (entry->ote_flags & OVERLAY_ENTRY_F_ROUTER) {
      456 +                ret = overlay_route_lookup(odd, mp, &mhi, sock, slenp, vidp);
 404  457          } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
 405  458                  bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
 406  459                      sizeof (struct in6_addr));
 407  460                  v6->sin6_port = htons(entry->ote_dest.otp_port);
 408  461                  *slenp = sizeof (struct sockaddr_in6);
 409  462                  ret = OVERLAY_TARGET_OK;
 410  463          } else {
 411  464                  size_t mlen = msgsize(mp);
 412  465  
 413  466                  if (mlen + entry->ote_mbsize > overlay_ent_size) {
↓ open down ↓ 14 lines elided ↑ open up ↑
 428  481                                  entry->ote_flags |=
 429  482                                      OVERLAY_ENTRY_F_PENDING;
 430  483                                  overlay_target_queue(entry);
 431  484                          }
 432  485                          ret = OVERLAY_TARGET_ASYNC;
 433  486                  }
 434  487          }
 435  488          mutex_exit(&entry->ote_lock);
 436  489  
 437  490          mutex_enter(&ott->ott_lock);
 438      -        refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry);
      491 +        OVERLAY_TARG_ENTRY_REFRELE(entry);
 439  492          mutex_exit(&ott->ott_lock);
 440  493  
 441  494          return (ret);
 442  495  }
 443  496  
 444  497  /* ARGSUSED */
 445  498  static int
 446  499  overlay_target_info(overlay_target_hdl_t *thdl, void *arg)
 447  500  {
 448  501          overlay_dev_t *odd;
↓ open down ↓ 17 lines elided ↑ open up ↑
 466  519          return (0);
 467  520  }
 468  521  
 469  522  /* ARGSUSED */
 470  523  static int
 471  524  overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
 472  525  {
 473  526          overlay_dev_t *odd;
 474  527          overlay_target_t *ott;
 475  528          overlay_targ_associate_t *ota = arg;
      529 +        overlay_router_t *ort;
 476  530  
 477  531          odd = overlay_hold_by_dlid(ota->ota_linkid);
 478  532          if (odd == NULL)
 479  533                  return (ENOENT);
 480  534  
 481  535          if (ota->ota_id == 0) {
 482  536                  overlay_hold_rele(odd);
 483  537                  return (EINVAL);
 484  538          }
 485  539  
↓ open down ↓ 32 lines elided ↑ open up ↑
 518  572          ott->ott_mode = ota->ota_mode;
 519  573          ott->ott_dest = ota->ota_provides;
 520  574          ott->ott_id = ota->ota_id;
 521  575  
 522  576          if (ott->ott_mode == OVERLAY_TARGET_POINT) {
 523  577                  bcopy(&ota->ota_point, &ott->ott_u.ott_point,
 524  578                      sizeof (overlay_target_point_t));
 525  579          } else {
 526  580                  ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
 527  581                      overlay_mac_hash, overlay_mac_cmp,
 528      -                    overlay_target_entry_dtor, sizeof (overlay_target_entry_t),
      582 +                    overlay_target_entry_null_dtor,
      583 +                    sizeof (overlay_target_entry_t),
 529  584                      offsetof(overlay_target_entry_t, ote_reflink),
 530  585                      offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
      586 +                ott->ott_u.ott_dyn.ott_l3dhash = refhash_create(OVERLAY_HSIZE,
      587 +                    overlay_vl3_hash, overlay_vl3_cmp,
      588 +                    overlay_target_entry_null_dtor,
      589 +                    sizeof (overlay_target_entry_t),
      590 +                    offsetof(overlay_target_entry_t, ote_l3_reflink), 0,
      591 +                    KM_SLEEP);
 531  592                  avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
 532  593                      sizeof (overlay_target_entry_t),
 533  594                      offsetof(overlay_target_entry_t, ote_avllink));
      595 +                avl_create(&ott->ott_u.ott_dyn.ott_l3tree, overlay_vl3_avl,
      596 +                    sizeof (overlay_target_entry_t),
      597 +                    offsetof(overlay_target_entry_t, ote_l3_avllink));
      598 +
      599 +                ort = kmem_zalloc(sizeof (*ort), KM_SLEEP);
      600 +                mutex_init(&ort->otr_lock, NULL, MUTEX_DRIVER, NULL);
      601 +                list_create(&ort->otr_tables, sizeof (overlay_route_table_t),
      602 +                    offsetof(overlay_route_table_t, ort_link));
      603 +                avl_create(&ort->otr_tree, overlay_fabric_avl,
      604 +                    sizeof (overlay_fabric_entry_t),
      605 +                    offsetof(overlay_fabric_entry_t, ofe_avllink));
 534  606          }
 535  607          mutex_enter(&odd->odd_lock);
 536  608          if (odd->odd_flags & OVERLAY_F_VARPD) {
 537  609                  mutex_exit(&odd->odd_lock);
 538  610                  kmem_cache_free(overlay_target_cache, ott);
 539  611                  overlay_hold_rele(odd);
 540  612                  return (EEXIST);
 541  613          }
 542  614  
 543  615          odd->odd_flags |= OVERLAY_F_VARPD;
 544  616          odd->odd_target = ott;
 545  617          mutex_exit(&odd->odd_lock);
 546  618  
 547  619          overlay_hold_rele(odd);
 548  620  
 549      -
 550  621          return (0);
 551  622  }
 552  623  
 553  624  
 554  625  /* ARGSUSED */
 555  626  static int
 556  627  overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg)
 557  628  {
 558  629          overlay_dev_t *odd;
 559  630          overlay_targ_degrade_t *otd = arg;
↓ open down ↓ 41 lines elided ↑ open up ↑
 601  672          overlay_hold_rele(odd);
 602  673          return (0);
 603  674  
 604  675  }
 605  676  
 606  677  static int
 607  678  overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
 608  679  {
 609  680          overlay_targ_lookup_t *otl = arg;
 610  681          overlay_target_entry_t *entry;
      682 +        void *src, *dst;
 611  683          clock_t ret, timeout;
 612  684          mac_header_info_t mhi;
 613      -
 614  685          timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
 615  686  again:
 616  687          mutex_enter(&overlay_target_lock);
 617  688          while (list_is_empty(&overlay_target_list)) {
 618  689                  ret = cv_timedwait(&overlay_target_condvar,
 619  690                      &overlay_target_lock, timeout);
 620  691                  if (ret == -1) {
 621  692                          mutex_exit(&overlay_target_lock);
 622  693                          return (ETIME);
 623  694                  }
 624  695          }
 625  696          entry = list_remove_head(&overlay_target_list);
 626  697          mutex_exit(&overlay_target_lock);
 627  698          mutex_enter(&entry->ote_lock);
 628      -        if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
      699 +        if (entry->ote_flags &
      700 +            (OVERLAY_ENTRY_F_PENDING | OVERLAY_ENTRY_F_VL3_PENDING)) {
 629  701                  ASSERT(entry->ote_chead == NULL);
 630  702                  mutex_exit(&entry->ote_lock);
 631  703                  goto again;
 632  704          }
 633  705          ASSERT(entry->ote_chead != NULL);
 634  706  
      707 +
      708 +        otl->otl_l3req = (entry->ote_flags & OVERLAY_ENTRY_F_VL3_PENDING) ?
      709 +            B_TRUE : B_FALSE;
      710 +
      711 +        if (otl->otl_l3req) {
      712 +                src = &otl->otl_addru.otlu_l3.otl3_srcip;
      713 +                dst = &otl->otl_addru.otlu_l3.otl3_dstip;
      714 +        } else {
      715 +                src = &otl->otl_addru.otlu_l2.otl2_srcaddr;
      716 +                dst = &otl->otl_addru.otlu_l2.otl2_dstaddr;
      717 +        }
      718 +
 635  719          /*
 636      -         * If we have a bogon that doesn't have a valid mac header, drop it and
 637      -         * try again.
      720 +         * If we have a bogon that doesn't have a valid mac header, or an
      721 +         * invalid IP header for IP requests, drop it and try again.
 638  722           */
 639      -        if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
 640      -            &mhi) != 0) {
      723 +        if ((mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
      724 +            &mhi) != 0) ||
      725 +            (otl->otl_l3req && overlay_mblk_vl3ip(entry->ote_chead, src,
      726 +            dst) != 0)) {
 641  727                  boolean_t queue = B_FALSE;
 642  728                  mblk_t *mp = entry->ote_chead;
 643  729                  entry->ote_chead = mp->b_next;
 644  730                  mp->b_next = NULL;
 645  731                  if (entry->ote_ctail == mp)
 646  732                          entry->ote_ctail = entry->ote_chead;
 647  733                  entry->ote_mbsize -= msgsize(mp);
 648  734                  if (entry->ote_chead != NULL)
 649  735                          queue = B_TRUE;
 650  736                  mutex_exit(&entry->ote_lock);
 651  737                  if (queue == B_TRUE)
 652  738                          overlay_target_queue(entry);
 653  739                  freemsg(mp);
 654  740                  goto again;
 655  741          }
 656  742  
 657      -        /*
 658      -         * TODO: If VL3 request,
 659      -         *      set otl->otl_l3req
 660      -         *      Fill in otl_{src,dst}ip
 661      -         * Else
 662      -         *      clear otl->otl_l3req
 663      -         */
 664  743          otl->otl_dlid = entry->ote_odd->odd_linkid;
 665  744          otl->otl_reqid = (uintptr_t)entry;
 666  745          otl->otl_varpdid = entry->ote_ott->ott_id;
 667  746          otl->otl_vnetid = entry->ote_odd->odd_vid;
 668  747  
 669  748          otl->otl_hdrsize = mhi.mhi_hdrsize;
 670  749          otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
 671      -        bcopy(mhi.mhi_daddr, otl->otl_addru.otlu_l2.otl2_dstaddr, ETHERADDRL);
 672      -        bcopy(mhi.mhi_saddr, otl->otl_addru.otlu_l2.otl2_srcaddr, ETHERADDRL);
 673  750          otl->otl_addru.otlu_l2.otl2_dsttype = mhi.mhi_dsttype;
 674  751          otl->otl_addru.otlu_l2.otl2_sap = mhi.mhi_bindsap;
 675  752          otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
      753 +
      754 +        /*
      755 +         * The overlay_mblk_vl3ip() call above fills in dst & src for
      756 +         * VL3->UL3 requests, so only need to care about VL2->UL3 here.
      757 +         */
      758 +        if (!otl->otl_l3req) {
      759 +                bcopy(mhi.mhi_daddr, dst, ETHERADDRL);
      760 +                bcopy(mhi.mhi_saddr, src, ETHERADDRL);
      761 +        }
 676  762          mutex_exit(&entry->ote_lock);
 677  763  
 678  764          mutex_enter(&thdl->oth_lock);
 679  765          list_insert_tail(&thdl->oth_outstanding, entry);
 680  766          mutex_exit(&thdl->oth_lock);
 681  767  
 682  768          return (0);
 683  769  }
 684  770  
 685  771  static int
 686  772  overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
 687  773  {
 688  774          const overlay_targ_resp_t *otr = arg;
 689  775          overlay_target_entry_t *entry;
 690  776          mblk_t *mp;
      777 +        boolean_t is_router = B_FALSE;
 691  778  
      779 +        /*
      780 +         * If we ever support a protocol that uses MAC addresses for the UL
      781 +         * destination addr, we probably should expand this to check that
      782 +         * all of otr is zero.
      783 +         */
      784 +        if (IN6_IS_ADDR_UNSPECIFIED(&otr->otr_answer.otp_ip) &&
      785 +            otr->otr_answer.otp_port == 0)
      786 +                is_router = B_TRUE;
      787 +
 692  788          mutex_enter(&thdl->oth_lock);
 693  789          for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
 694  790              entry = list_next(&thdl->oth_outstanding, entry)) {
 695  791                  if ((uintptr_t)entry == otr->otr_reqid)
 696  792                          break;
 697  793          }
 698  794  
 699  795          if (entry == NULL) {
 700  796                  mutex_exit(&thdl->oth_lock);
 701  797                  return (EINVAL);
 702  798          }
 703  799          list_remove(&thdl->oth_outstanding, entry);
 704  800          mutex_exit(&thdl->oth_lock);
 705  801  
 706  802          mutex_enter(&entry->ote_lock);
 707  803          bcopy(&otr->otr_answer, &entry->ote_dest,
 708  804              sizeof (overlay_target_point_t));
 709  805          entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
 710  806          entry->ote_flags |= OVERLAY_ENTRY_F_VALID;
      807 +        if (is_router)
      808 +                entry->ote_flags |= OVERLAY_ENTRY_F_ROUTER;
 711  809          mp = entry->ote_chead;
 712  810          entry->ote_chead = NULL;
 713  811          entry->ote_ctail = NULL;
 714  812          entry->ote_mbsize = 0;
 715  813          entry->ote_vtime = gethrtime();
 716  814          mutex_exit(&entry->ote_lock);
 717  815  
 718  816          /*
 719  817           * For now do an in-situ drain.
 720  818           *
↓ open down ↓ 402 lines elided ↑ open up ↑
1123 1221          mutex_exit(&odd->odd_lock);
1124 1222  
1125 1223          if (ott->ott_mode == OVERLAY_TARGET_POINT) {
1126 1224                  otc->otc_entry.otce_flags = 0;
1127 1225                  bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest,
1128 1226                      sizeof (overlay_target_point_t));
1129 1227          } else {
1130 1228                  overlay_target_entry_t *ote;
1131 1229                  ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1132 1230                      otc->otc_entry.otce_mac);
1133      -                if (ote != NULL) {
1134      -                        mutex_enter(&ote->ote_lock);
1135      -                        if ((ote->ote_flags &
1136      -                            OVERLAY_ENTRY_F_VALID_MASK) != 0) {
1137      -                                if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
1138      -                                        otc->otc_entry.otce_flags =
1139      -                                            OVERLAY_TARGET_CACHE_DROP;
1140      -                                } else {
1141      -                                        otc->otc_entry.otce_flags = 0;
1142      -                                        bcopy(&ote->ote_dest,
1143      -                                            &otc->otc_entry.otce_dest,
1144      -                                            sizeof (overlay_target_point_t));
1145      -                                }
1146      -                                ret = 0;
     1231 +                if (ote == NULL) {
     1232 +                        ret = ENOENT;
     1233 +                        goto done;
     1234 +                }
     1235 +
     1236 +                mutex_enter(&ote->ote_lock);
     1237 +                if ((ote->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) != 0) {
     1238 +                        if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
     1239 +                                otc->otc_entry.otce_flags =
     1240 +                                    OVERLAY_TARGET_CACHE_DROP;
     1241 +                        } else if (ote->ote_flags & OVERLAY_ENTRY_F_ROUTER) {
     1242 +                                otc->otc_entry.otce_flags =
     1243 +                                    OVERLAY_TARGET_CACHE_ROUTER;
1147 1244                          } else {
1148      -                                ret = ENOENT;
     1245 +                                otc->otc_entry.otce_flags = 0;
     1246 +                                bcopy(&ote->ote_dest, &otc->otc_entry.otce_dest,
     1247 +                                    sizeof (overlay_target_point_t));
1149 1248                          }
1150      -                        mutex_exit(&ote->ote_lock);
     1249 +                        ret = 0;
1151 1250                  } else {
1152 1251                          ret = ENOENT;
1153 1252                  }
     1253 +                mutex_exit(&ote->ote_lock);
1154 1254          }
1155 1255  
     1256 +done:
1156 1257          mutex_exit(&ott->ott_lock);
1157 1258          overlay_hold_rele(odd);
1158 1259  
1159 1260          return (ret);
1160 1261  }
1161 1262  
1162 1263  /* ARGSUSED */
1163 1264  static int
1164 1265  overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg)
1165 1266  {
1166 1267          overlay_dev_t *odd;
1167 1268          overlay_target_t *ott;
1168 1269          overlay_target_entry_t *ote;
1169 1270          overlay_targ_cache_t *otc = arg;
1170 1271          mblk_t *mp = NULL;
1171 1272  
1172      -        if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP)
     1273 +        if (otc->otc_entry.otce_flags &
     1274 +            ~(OVERLAY_TARGET_CACHE_DROP | OVERLAY_TARGET_CACHE_ROUTER))
1173 1275                  return (EINVAL);
1174 1276  
     1277 +        if (otc->otc_entry.otce_flags ==
     1278 +            (OVERLAY_TARGET_CACHE_DROP | OVERLAY_TARGET_CACHE_ROUTER))
     1279 +                return (EINVAL);
     1280 +
1175 1281          odd = overlay_hold_by_dlid(otc->otc_linkid);
1176 1282          if (odd == NULL)
1177 1283                  return (ENOENT);
1178 1284  
1179 1285          mutex_enter(&odd->odd_lock);
1180 1286          if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1181 1287                  mutex_exit(&odd->odd_lock);
1182 1288                  overlay_hold_rele(odd);
1183 1289                  return (ENXIO);
1184 1290          }
↓ open down ↓ 19 lines elided ↑ open up ↑
1204 1310                  refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote);
1205 1311                  avl_add(&ott->ott_u.ott_dyn.ott_tree, ote);
1206 1312          } else {
1207 1313                  mutex_enter(&ote->ote_lock);
1208 1314          }
1209 1315  
1210 1316          if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
1211 1317                  ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
1212 1318          } else {
1213 1319                  ote->ote_flags |= OVERLAY_ENTRY_F_VALID;
     1320 +                if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_ROUTER)
     1321 +                        ote->ote_flags |= OVERLAY_ENTRY_F_ROUTER;
1214 1322                  bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
1215 1323                      sizeof (overlay_target_point_t));
1216 1324                  mp = ote->ote_chead;
1217 1325                  ote->ote_chead = NULL;
1218 1326                  ote->ote_ctail = NULL;
1219 1327                  ote->ote_mbsize = 0;
1220 1328                  ote->ote_vtime = gethrtime();
1221 1329          }
1222 1330  
1223 1331          mutex_exit(&ote->ote_lock);
↓ open down ↓ 473 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX