Print this page
Factor out fixed/1-1 processing from vxlnat_vxlan_one(), paving way for
future processing types.
Initial definitions of NAT flows.


 332                         remote->vxnrem_uaddr = underlay_src->sin6_addr;
 333                 }
 334                 /* Replace the vlan ID. Maintain network order... */
 335                 if (remote->vxnrem_vlan != vlan)
 336                         remote->vxnrem_vlan = vlan;
 337         }
 338         /*
 339          * Else just continue and pray for better luck on another packet or
 340          * on the return flight.  It is IP, we can Just Drop It (TM)...
 341          */
 342 
 343         /* We're done with the remote entry now. */
 344         VXNREM_REFRELE(remote);
 345 
 346         /* Advance rptr to the inner IP header and proceed. */
 347         mp->b_rptr = (uint8_t *)ipha;
 348         return (mp);
 349 }
 350 
 351 /*
































































































 352  * Process exactly one VXLAN packet.
 353  */
 354 static void
 355 vxlnat_one_vxlan(mblk_t *mp, struct sockaddr_in6 *underlay_src)
 356 {
 357         vxlan_hdr_t *vxh;
 358         vxlnat_vnet_t *vnet;
 359         ipha_t *ipha;
 360         ip6_t *ip6h;
 361         vxlnat_fixed_t *fixed, fsearch;
 362 
 363         if (MBLKL(mp) < sizeof (*vxh)) {
 364                 /* XXX KEBE ASKS -- should we be more forgiving? */
 365                 DTRACE_PROBE1(vxlnat__in__drop__vxlsize, mblk_t *, mp);
 366                 freemsg(mp);
 367                 return;
 368         }
 369         vxh = (vxlan_hdr_t *)mp->b_rptr;
 370 
 371         /* If we start using more than just the one flag, fix it. */
 372         if (vxh->vxlan_flags != VXLAN_F_VDI_WIRE) {
 373                 DTRACE_PROBE1(vxlnat__in__drop__VDI, mblk_t *, mp);
 374                 freemsg(mp);
 375                 return;
 376         }
 377 
 378         /* Remember, we key off of what's on the wire. */
 379         vnet = vxlnat_get_vnet(VXLAN_ID_WIRE32(vxh->vxlan_id), B_FALSE);
 380         if (vnet == NULL) {
 381                 DTRACE_PROBE1(vxlnat__in__drop__vnetid, uint32_t,


 389             vxlnat_vnet_t, vnet);
 390 
 391         /*
 392          * Off-vxlan processing steps:
 393          * 1.) Locate the ethernet header and check/update/add-into remotes.
 394          * 2.) Search 1-1s, process if hit.
 395          * 3.) Search flows, process if hit.
 396          * 4.) Search rules, create new flow (or not) if hit.
 397          * 5.) Drop the packets.
 398          */
 399 
 400         /* 1.) Locate the ethernet header and check/update/add-into remotes. */
 401         mp->b_rptr += sizeof (*vxh);
 402         while (MBLKL(mp) == 0) {
 403                 mblk_t *oldmp = mp;
 404 
 405                 mp = mp->b_cont;
 406                 freeb(oldmp);
 407         }
 408         mp = vxlnat_cache_remote(mp, underlay_src, vnet);
 409         if (mp == NULL) {
 410                 VXNV_REFRELE(vnet);
 411                 return;
 412         }
 413 
 414         /* 2.) Search 1-1s, process if hit. */
 415         ipha = (ipha_t *)mp->b_rptr;
 416         if (IPH_HDR_VERSION(ipha) == IPV4_VERSION) {

 417                 ip6h = NULL;
 418                 IN6_INADDR_TO_V4MAPPED((struct in_addr *)(&ipha->ipha_src),
 419                     &fsearch.vxnf_addr);
 420         } else {
 421                 /* vxlnat_cache_remote() did reality checks... */
 422                 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
 423                 ip6h = (ip6_t *)ipha;
 424                 ipha = NULL;
 425                 fsearch.vxnf_addr = ip6h->ip6_src;




 426         }
 427         rw_enter(&vnet->vxnv_fixed_lock, RW_READER);
 428         fixed = avl_find(&vnet->vxnv_fixed_ips, &fsearch, NULL);
 429         if (fixed != NULL)
 430                 VXNF_REFHOLD(fixed);
 431         rw_exit(&vnet->vxnv_fixed_lock);
 432         if (fixed != NULL) {
 433                 mblk_t *newmp = NULL;
 434 
 435                 /*
 436                  * XXX KEBE ASKS --> Do MTU check NOW?!  That way, we have
 437                  * pre-natted data.  One gotcha, external dests may have
 438                  * different PathMTUs so see below about EMSGSIZE...
 439                  */
 440 
 441                 /* XXX KEBE SAYS -- FILL ME IN... but for now: */
 442                 if (ipha != NULL)
 443                         newmp = vxlnat_fixed_fixv4(mp, fixed, B_FALSE);
 444                 else
 445                         freemsg(mp); /* XXX handle ip6h */
 446 
 447                 if (newmp != NULL) {
 448                         ire_t *outbound_ire;
 449                         /* Use C99's initializers for fun & profit. */
 450                         ip_recv_attr_t iras =
 451                             { IRAF_IS_IPV4 | IRAF_VERIFIED_SRC };
 452 
 453                         ASSERT3P(ipha, !=, NULL);
 454                         ASSERT3P(ipha, ==, newmp->b_rptr);
 455                         /* XXX KEBE ASKS, IRR_ALLOCATE okay?!? */
 456                         outbound_ire = ire_route_recursive_dstonly_v4(
 457                             ipha->ipha_dst, IRR_ALLOCATE,
 458                             0 /* XXX KEBE SAYS XMIT HINT! */,
 459                             vxlnat_netstack->netstack_ip);
 460                         VERIFY3P(outbound_ire, !=, NULL);
 461                         if (outbound_ire->ire_type == IRE_NOROUTE) {
 462                                 /* Bail! */
 463                                 VXNF_REFRELE(fixed);
 464                                 VXNV_REFRELE(vnet);
 465                                 return;
 466                         }
 467 
 468                         iras.ira_ip_hdr_length = IPH_HDR_LENGTH(ipha);
 469                         if (iras.ira_ip_hdr_length > sizeof (ipha_t))
 470                                 iras.ira_flags |= IRAF_IPV4_OPTIONS;
 471                         iras.ira_xmit_hint = 0; /* XXX KEBE SAYS FIX ME! */
 472                         iras.ira_zoneid = outbound_ire->ire_zoneid;
 473                         iras.ira_pktlen = ntohs(ipha->ipha_length);
 474                         iras.ira_protocol = ipha->ipha_protocol;
 475                         /* XXX KEBE ASKS rifindex & ruifindex ?!? */
 476                         /*
 477                          * NOTE: AT LEAST ira_ill needs ILLF_ROUTER set, as
 478                          * well as the ill for the external NIC (where
 479                          * off-link destinations live).  For fixed, ira_ill
 480                          * should be the ill of the external source.
 481                          */
 482                         iras.ira_rill = vxlnat_underlay_ire->ire_ill;
 483                         iras.ira_ill = fixed->vxnf_ire->ire_ill;
 484                         /* XXX KEBE ASKS cred & cpid ? */
 485                         iras.ira_verified_src = ipha->ipha_src;
 486                         /* XXX KEBE SAYS don't sweat IPsec stuff. */
 487                         /* XXX KEBE SAYS ALSO don't sweat l2src & mhip */
 488 
 489                         /* Okay, we're good! Let's pretend we're forwarding. */
 490                         ire_recv_forward_v4(outbound_ire, mp, ipha, &iras);
 491                         ire_refrele(outbound_ire);
 492                 }
 493 
 494                 /* All done... */
 495                 VXNF_REFRELE(fixed);
 496                 VXNV_REFRELE(vnet);
 497                 return;
 498         }
 499 
 500         /* XXX KEBE SAYS BUILD STEPS 3-4. */
 501 
 502         /* 5.) Nothing, drop the packet. */
 503         /* XXX KEBE ASKS DIAGNOSTIC? */
 504         VXNV_REFRELE(vnet);
 505         freemsg(mp);


 506 }
 507 /*
 508  * ONLY return B_FALSE if we get a packet-clogging event.
 509  */
 510 /* ARGSUSED */
 511 static boolean_t
 512 vxlnat_vxlan_input(ksocket_t insock, mblk_t *chain, size_t msgsize, int oob,
 513     void *ignored)
 514 {
 515         mblk_t *mp, *nextmp;
 516 
 517         /*
 518          * XXX KEBE ASKS --> move hold & release outside of loop?
 519          * If so, hold rwlock here.
 520          */
 521 
 522         for (mp = chain; mp != NULL; mp = nextmp) {
 523                 struct T_unitdata_ind *tudi;
 524                 struct sockaddr_in6 *sin6;
 525 


 621                     old_one, ipaddr_t, *new_ones_place);
 622                 freeb(mp);
 623                 return (NULL);
 624         }
 625         *new_ones_place = new_one;
 626 
 627         /* Adjust ICMP checksum... */
 628         icmph->icmph_checksum = vxlnat_cksum_adjust(icmph->icmph_checksum,
 629             (uint16_t *)&old_one, (uint16_t *)&new_one, sizeof (ipaddr_t));
 630 
 631         /*
 632          * XXX KEBE ASKS, recompute *inner-packet* checksums?  Let's not for
 633          * now, but consider this Fair Warning (or some other VH album...).
 634          */
 635         return (mp);
 636 }
 637 
 638 /*
 639  * Take a 1-1/fixed IPv4 packet and convert it for transmission out the
 640  * appropriate end. "to_private" is what it says on the tin.

 641  */
 642 static mblk_t *
 643 vxlnat_fixed_fixv4(mblk_t *mp, vxlnat_fixed_t *fixed, boolean_t to_private)
 644 {
 645         ipaddr_t new_one, old_one;
 646         ipaddr_t *new_ones_place;
 647         ipha_t *ipha = (ipha_t *)mp->b_rptr;
 648         uint8_t *nexthdr, *end_wptr;
 649 
 650         if (to_private) {
 651                 IN6_V4MAPPED_TO_IPADDR(&fixed->vxnf_addr, new_one);
 652                 new_ones_place = &ipha->ipha_dst;
 653         } else {
 654                 IN6_V4MAPPED_TO_IPADDR(&fixed->vxnf_pubaddr, new_one);
 655                 new_ones_place = &ipha->ipha_src;
 656         }
 657 
 658         old_one = *new_ones_place;
 659         *new_ones_place = new_one;
 660 




 332                         remote->vxnrem_uaddr = underlay_src->sin6_addr;
 333                 }
 334                 /* Replace the vlan ID. Maintain network order... */
 335                 if (remote->vxnrem_vlan != vlan)
 336                         remote->vxnrem_vlan = vlan;
 337         }
 338         /*
 339          * Else just continue and pray for better luck on another packet or
 340          * on the return flight.  It is IP, we can Just Drop It (TM)...
 341          */
 342 
 343         /* We're done with the remote entry now. */
 344         VXNREM_REFRELE(remote);
 345 
 346         /* Advance rptr to the inner IP header and proceed. */
 347         mp->b_rptr = (uint8_t *)ipha;
 348         return (mp);
 349 }
 350 
 351 /*
 352  * See if the inbound VXLAN packet hits a 1-1/fixed mapping, and process if it
 353  * does.  B_TRUE means the packet was handled, and we shouldn't continue
 354  * processing (even if "was handled" means droppage).
 355  */
 356 static boolean_t
 357 vxlnat_one_vxlan_fixed(vxlnat_vnet_t *vnet, mblk_t *mp, ipha_t *ipha,
 358     ip6_t *ip6h)
 359 {
 360         vxlnat_fixed_t *fixed, fsearch;
 361         mblk_t *newmp;
 362         ire_t *outbound_ire;
 363         /* Use C99's initializers for fun & profit. */
 364         ip_recv_attr_t iras = { IRAF_IS_IPV4 | IRAF_VERIFIED_SRC };
 365 
 366         if (ipha != NULL) {
 367                 IN6_INADDR_TO_V4MAPPED((struct in_addr *)(&ipha->ipha_src),
 368                     &fsearch.vxnf_addr);
 369         } else {
 370                 /* vxlnat_cache_remote() did reality checks... */
 371                 ASSERT(ipha == NULL && ip6h != NULL);
 372                 fsearch.vxnf_addr = ip6h->ip6_src;
 373         }
 374 
 375         rw_enter(&vnet->vxnv_fixed_lock, RW_READER);
 376         fixed = avl_find(&vnet->vxnv_fixed_ips, &fsearch, NULL);
 377         if (fixed != NULL)
 378                 VXNF_REFHOLD(fixed);
 379         rw_exit(&vnet->vxnv_fixed_lock);
 380         if (fixed == NULL)
 381                 return (B_FALSE);       /* Try another method of processing. */
 382 
 383         newmp = NULL;
 384         /*
 385          * XXX KEBE ASKS --> Do an MTU check NOW?!  That way, we have
 386          * pre-natted data.  One gotcha, external dests may have
 387          * different PathMTUs so see below about EMSGSIZE...
 388          *
 389          * For now, let the post-NAT crunch through
 390          * ire_recv_forward_v4() take care of all of that.
 391          */
 392 
 393         if (ipha != NULL)
 394                 newmp = vxlnat_fixed_fixv4(mp, fixed, B_FALSE);
 395         else {
 396                 freemsg(mp); /* XXX handle ip6h */
 397                 return (B_TRUE);
 398         }
 399 
 400         if (newmp == NULL)
 401                 return (B_TRUE);        /* mp eaten by vxlnat_fixed_fixv4() */
 402 
 403 
 404         ASSERT3P(ipha, ==, newmp->b_rptr);
 405         /* XXX KEBE ASKS, IRR_ALLOCATE okay?!? */
 406         /* XXX KEBE SAYS XMIT HINT! */
 407         outbound_ire = ire_route_recursive_dstonly_v4(ipha->ipha_dst,
 408             IRR_ALLOCATE, 0, vxlnat_netstack->netstack_ip);
 409         VERIFY3P(outbound_ire, !=, NULL);
 410         if (outbound_ire->ire_type == IRE_NOROUTE) {
 411                 /* Bail! */
 412                 DTRACE_PROBE2(vxlnat__in__drop__fixedire, ipaddr_t,
 413                     ipha->ipha_dst, mblk_t *, mp);
 414                 VXNF_REFRELE(fixed);
 415                 freemsg(mp);
 416                 return (B_TRUE);
 417         }
 418 
 419         iras.ira_ip_hdr_length = IPH_HDR_LENGTH(ipha);
 420         if (iras.ira_ip_hdr_length > sizeof (ipha_t))
 421                 iras.ira_flags |= IRAF_IPV4_OPTIONS;
 422         iras.ira_xmit_hint = 0; /* XXX KEBE SAYS FIX ME! */
 423         iras.ira_zoneid = outbound_ire->ire_zoneid;
 424         iras.ira_pktlen = ntohs(ipha->ipha_length);
 425         iras.ira_protocol = ipha->ipha_protocol;
 426         /* XXX KEBE ASKS rifindex & ruifindex ?!? */
 427         /*
 428          * NOTE: AT LEAST ira_ill needs ILLF_ROUTER set, as
 429          * well as the ill for the external NIC (where
 430          * off-link destinations live).  For fixed, ira_ill
 431          * should be the ill of the external source.
 432          */
 433         iras.ira_rill = vxlnat_underlay_ire->ire_ill;
 434         iras.ira_ill = fixed->vxnf_ire->ire_ill;
 435         /* XXX KEBE ASKS cred & cpid ? */
 436         iras.ira_verified_src = ipha->ipha_src;
 437         /* XXX KEBE SAYS don't sweat IPsec stuff. */
 438         /* XXX KEBE SAYS ALSO don't sweat l2src & mhip */
 439 
 440         /* Okay, we're good! Let's pretend we're forwarding. */
 441         ire_recv_forward_v4(outbound_ire, mp, ipha, &iras);
 442         ire_refrele(outbound_ire);
 443 
 444         return (B_TRUE);
 445 }
 446 
 447 /*
 448  * Process exactly one VXLAN packet.
 449  */
 450 static void
 451 vxlnat_one_vxlan(mblk_t *mp, struct sockaddr_in6 *underlay_src)
 452 {
 453         vxlan_hdr_t *vxh;
 454         vxlnat_vnet_t *vnet;
 455         ipha_t *ipha;
 456         ip6_t *ip6h;

 457 
 458         if (MBLKL(mp) < sizeof (*vxh)) {
 459                 /* XXX KEBE ASKS -- should we be more forgiving? */
 460                 DTRACE_PROBE1(vxlnat__in__drop__vxlsize, mblk_t *, mp);
 461                 freemsg(mp);
 462                 return;
 463         }
 464         vxh = (vxlan_hdr_t *)mp->b_rptr;
 465 
 466         /* If we start using more than just the one flag, fix it. */
 467         if (vxh->vxlan_flags != VXLAN_F_VDI_WIRE) {
 468                 DTRACE_PROBE1(vxlnat__in__drop__VDI, mblk_t *, mp);
 469                 freemsg(mp);
 470                 return;
 471         }
 472 
 473         /* Remember, we key off of what's on the wire. */
 474         vnet = vxlnat_get_vnet(VXLAN_ID_WIRE32(vxh->vxlan_id), B_FALSE);
 475         if (vnet == NULL) {
 476                 DTRACE_PROBE1(vxlnat__in__drop__vnetid, uint32_t,


 484             vxlnat_vnet_t, vnet);
 485 
 486         /*
 487          * Off-vxlan processing steps:
 488          * 1.) Locate the ethernet header and check/update/add-into remotes.
 489          * 2.) Search 1-1s, process if hit.
 490          * 3.) Search flows, process if hit.
 491          * 4.) Search rules, create new flow (or not) if hit.
 492          * 5.) Drop the packets.
 493          */
 494 
 495         /* 1.) Locate the ethernet header and check/update/add-into remotes. */
 496         mp->b_rptr += sizeof (*vxh);
 497         while (MBLKL(mp) == 0) {
 498                 mblk_t *oldmp = mp;
 499 
 500                 mp = mp->b_cont;
 501                 freeb(oldmp);
 502         }
 503         mp = vxlnat_cache_remote(mp, underlay_src, vnet);
 504         if (mp == NULL)
 505                 goto bail_no_free;


 506 
 507         /* Let's cache the IP header here... */
 508         ipha = (ipha_t *)mp->b_rptr;
 509         switch (IPH_HDR_VERSION(ipha)) {
 510         case IPV4_VERSION:
 511                 ip6h = NULL;
 512                 break;
 513         case IPV6_VERSION:



 514                 ip6h = (ip6_t *)ipha;
 515                 ipha = NULL;
 516                 break;
 517         default:
 518                 DTRACE_PROBE2(vxlnat__in__drop__ipvers, int,
 519                     IPH_HDR_VERSION(ipha), mblk_t *, mp);
 520                 goto bail_and_free;
 521         }







 522 
 523         /* 2.) Search 1-1s, process if hit. */
 524         if (vxlnat_one_vxlan_fixed(vnet, mp, ipha, ip6h))
 525                 goto bail_no_free;      /* Success means mp was consumed. */


 526 
 527 #ifdef notyet
 528         /* 3.) Search flows, process if hit. */
 529         if (vxlnat_one_vxlan_flow(vnet, mp, ipha, ip6h))
 530                 goto bail_no_free;      /* Success means mp was consumed. */

 531 
 532         /* 4.) Search rules, create new flow (or not) if hit. */
 533         if (vxlnat_one_vxlan_rule(vnet, mp, ipha, ip6h))
 534                 goto bail_no_free;      /* Success means mp was consumed. */
 535 #endif

 536 
 537         /* 5.) Nothing, drop the packet. */













 538 
 539         DTRACE_PROBE2(vxlnat__in___drop__nohits, vxlnat_vnet_t *, vnet,
 540             mblk_t *, mp);


















 541 
 542 bail_and_free:















 543         freemsg(mp);
 544 bail_no_free:
 545         VXNV_REFRELE(vnet);
 546 }
 547 /*
 548  * ONLY return B_FALSE if we get a packet-clogging event.
 549  */
 550 /* ARGSUSED */
 551 static boolean_t
 552 vxlnat_vxlan_input(ksocket_t insock, mblk_t *chain, size_t msgsize, int oob,
 553     void *ignored)
 554 {
 555         mblk_t *mp, *nextmp;
 556 
 557         /*
 558          * XXX KEBE ASKS --> move hold & release outside of loop?
 559          * If so, hold rwlock here.
 560          */
 561 
 562         for (mp = chain; mp != NULL; mp = nextmp) {
 563                 struct T_unitdata_ind *tudi;
 564                 struct sockaddr_in6 *sin6;
 565 


 661                     old_one, ipaddr_t, *new_ones_place);
 662                 freeb(mp);
 663                 return (NULL);
 664         }
 665         *new_ones_place = new_one;
 666 
 667         /* Adjust ICMP checksum... */
 668         icmph->icmph_checksum = vxlnat_cksum_adjust(icmph->icmph_checksum,
 669             (uint16_t *)&old_one, (uint16_t *)&new_one, sizeof (ipaddr_t));
 670 
 671         /*
 672          * XXX KEBE ASKS, recompute *inner-packet* checksums?  Let's not for
 673          * now, but consider this Fair Warning (or some other VH album...).
 674          */
 675         return (mp);
 676 }
 677 
 678 /*
 679  * Take a 1-1/fixed IPv4 packet and convert it for transmission out the
 680  * appropriate end. "to_private" is what it says on the tin.
 681  * ALWAYS consumes "mp", regardless of return value.
 682  */
 683 static mblk_t *
 684 vxlnat_fixed_fixv4(mblk_t *mp, vxlnat_fixed_t *fixed, boolean_t to_private)
 685 {
 686         ipaddr_t new_one, old_one;
 687         ipaddr_t *new_ones_place;
 688         ipha_t *ipha = (ipha_t *)mp->b_rptr;
 689         uint8_t *nexthdr, *end_wptr;
 690 
 691         if (to_private) {
 692                 IN6_V4MAPPED_TO_IPADDR(&fixed->vxnf_addr, new_one);
 693                 new_ones_place = &ipha->ipha_dst;
 694         } else {
 695                 IN6_V4MAPPED_TO_IPADDR(&fixed->vxnf_pubaddr, new_one);
 696                 new_ones_place = &ipha->ipha_src;
 697         }
 698 
 699         old_one = *new_ones_place;
 700         *new_ones_place = new_one;
 701