Print this page
WIP to help bringup NAT flows
*** 37,46 ****
--- 37,47 ----
#include <sys/tihdr.h>
#include <netinet/in.h>
#include <netinet/udp.h>
#include <inet/ip.h>
#include <inet/ip6.h>
+ #include <inet/tcp_impl.h>
#include <inet/udp_impl.h>
#include <inet/tcp.h>
#include <inet/vxlnat_impl.h>
*** 347,356 ****
--- 348,461 ----
mp->b_rptr = (uint8_t *)ipha;
return (mp);
}
/*
+ * Extract transport-level information to find a NAT flow.
+ * Consume mp and return B_FALSE if there's a problem. Fill in "ports"
+ * and "protocol" and return B_TRUE if there's not.
+ */
+ static boolean_t
+ vxlnat_grab_transport(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, uint32_t *ports,
+ uint8_t *protocol, uint8_t **nexthdr_ptr)
+ {
+ uint8_t *nexthdr;
+
+ /* Punt on IPv6 for now... */
+ if (ip6h != NULL) {
+ freemsg(mp);
+ return (B_FALSE);
+ }
+
+ ASSERT(ipha != NULL);
+ *protocol = ipha->ipha_protocol;
+ nexthdr = ((uint8_t *)ipha + IPH_HDR_LENGTH(ipha));
+ *nexthdr_ptr = nexthdr; /* Get this out of the way now. */
+ if (nexthdr > mp->b_wptr) {
+ DTRACE_PROBE1(vxlnat__in__drop__trnexthdr, mblk_t *, mp);
+ freemsg(mp);
+ return (B_FALSE);
+ }
+ switch (*protocol) {
+ case IPPROTO_TCP: {
+ tcpha_t *tcph = (tcpha_t *)nexthdr;
+
+ if (nexthdr + sizeof (*tcph) > mp->b_wptr) {
+ DTRACE_PROBE1(vxlnat__in__drop__tcpnexthdr, mblk_t *,
+ mp);
+ freemsg(mp);
+ return (B_FALSE);
+ }
+ *ports = *((uint32_t *)tcph);
+ /* XXX KEBE SAYS - grab other metadata here NOW? */
+ break;
+ }
+ case IPPROTO_UDP: {
+ udpha_t *udph = (udpha_t *)nexthdr;
+
+ if (nexthdr + sizeof (*udph) > mp->b_wptr) {
+ DTRACE_PROBE1(vxlnat__in__drop__udpnexthdr, mblk_t *,
+ mp);
+ freemsg(mp);
+ return (B_FALSE);
+ }
+ *ports = *((uint32_t *)udph);
+ /*
+ * XXX KEBE SAYS - not as much as TCP, but grab other metadata
+ * here NOW?
+ */
+ break;
+ }
+ case IPPROTO_ICMP: {
+ icmph_t *icmph = (icmph_t *)nexthdr;
+
+ if (nexthdr + sizeof (*icmph) > mp->b_wptr) {
+ DTRACE_PROBE1(vxlnat__in__drop__icmpnexthdr, mblk_t *,
+ mp);
+ freemsg(mp);
+ return (B_FALSE);
+ }
+ /* XXX KEBE SAYS sort out ICMP header... */
+ switch (icmph->icmph_type) {
+ case ICMP_ECHO_REQUEST:
+ case ICMP_TIME_STAMP_REQUEST:
+ case ICMP_TIME_EXCEEDED:
+ case ICMP_INFO_REQUEST:
+ case ICMP_ADDRESS_MASK_REPLY:
+ /* All ones we can sorta cope with... */
+ break;
+ default:
+ DTRACE_PROBE2(vxlnat__in__drop__icmptype, int,
+ icmph->icmph_type, mblk_t *, mp);
+ freemsg(mp);
+ return (B_FALSE);
+ }
+ /* NOTE: as of now, will switch position depending on endian. */
+ *ports = icmph->icmph_echo_ident;
+ break;
+ }
+ default:
+ *ports = 0;
+ break;
+ }
+
+ return (B_TRUE);
+ }
+
+ /*
+ * This is the evaluate-packet vs. NAT flow state function.
+ * This function does NOT alter "mp".
+ */
+ static boolean_t
+ vxlnat_verify_natstate(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h,
+ vxlnat_flow_t *flow, uint8_t *nexthdr)
+ {
+ /* XXX KEBE SAYS FILL ME IN! */
+ return (B_FALSE);
+ }
+
+ /*
* Inspect the packet and find ports & protos (or ICMP types & codes)
* and see if we have an established NAT flow.
*
* XXX KEBE WONDERS if the transmission path will more closely resemble
* vxlnat_one_vxlan_fixed() because of ipha_ident issues or not...
*** 360,371 ****
*/
static boolean_t
vxlnat_one_vxlan_flow(vxlnat_vnet_t *vnet, mblk_t *mp, ipha_t *ipha,
ip6_t *ip6h)
{
! /* XXX KEBE SAYS FILL ME IN. */
! /* For now... */
return (B_FALSE);
}
/*
* If we reach here, we need to find a NAT rule, and see if we can/should
--- 465,623 ----
*/
static boolean_t
vxlnat_one_vxlan_flow(vxlnat_vnet_t *vnet, mblk_t *mp, ipha_t *ipha,
ip6_t *ip6h)
{
! vxlnat_flow_t *flow, searcher;
! uint8_t *nexthdr;
!
! /*
! * XXX KEBE WONDERS, should we return vxlnat_flow_t instead if we
! * miss? That way, we only need to find the ports/protocol ONCE.
! */
!
! if (ip6h != NULL) {
! /* Eventually, grab addresses for "searcher". */
! return (B_FALSE); /* Bail on IPv6 for now... */
! } else {
! ASSERT(ipha != NULL);
! searcher.vxnfl_isv4 = B_TRUE; /* Required? */
! IN6_INADDR_TO_V4MAPPED((struct in_addr *)(&ipha->ipha_src),
! &searcher.vxnfl_src);
! IN6_INADDR_TO_V4MAPPED((struct in_addr *)(&ipha->ipha_dst),
! &searcher.vxnfl_dst);
! }
!
! if (!vxlnat_grab_transport(mp, ipha, ip6h, &searcher.vxnfl_ports,
! &searcher.vxnfl_protocol, &nexthdr)) {
! DTRACE_PROBE1(vxlnat__in__flowgrab, mblk_t *, mp);
! freemsg(mp);
! return (B_TRUE);
! }
!
!
! /*
! * XXX KEBE SAYS Eventually put the rw&find in an IPv4-only block,
! * because IPv6 (if we NAT it like IPv4) will have its own table/tree.
! */
! rw_enter(&vnet->vxnv_flowv4_lock, RW_READER);
! flow = avl_find(&vnet->vxnv_flows_v4, &searcher, NULL);
! if (flow != NULL)
! VXNFL_REFHOLD(flow);
! rw_exit(&vnet->vxnv_flowv4_lock);
!
! if (flow == NULL)
! return (B_FALSE); /* Let caller handle things. */
!
! if (!vxlnat_verify_natstate(mp, ipha, ip6h, flow, nexthdr)) {
! freemsg(mp); /* XXX KEBE SAYS FOR NOW... */
! } else {
! /* XXX KEBE SAYS PROCESS... */
! }
!
! VXNFL_REFRELE(flow);
! return (B_TRUE);
! }
!
! /*
! * We have a new packet that seems to require a new NAT flow. Construct that
! * flow now, and intern it as both a conn_t in IP *and* in the vnet's
! * appropriate vxnv_flows* tree. Return NULL if we have a problem.
! */
! static vxlnat_flow_t *
! vxlnat_new_flow(vxlnat_rule_t *rule, in6_addr_t *inner_src, in6_addr_t *dst,
! uint32_t ports, uint8_t protocol)
! {
! vxlnat_vnet_t *vnet = rule->vxnr_vnet;
! vxlnat_flow_t *flow, *oldflow;
! avl_tree_t *flowtree;
! krwlock_t *flowlock;
! avl_index_t where;
!
! flow = kmem_alloc(sizeof (*flow), KM_NOSLEEP | KM_NORMALPRI);
! if (flow == NULL)
! return (NULL);
!
! flow->vxnfl_dst = *dst;
! flow->vxnfl_src = *inner_src;
! flow->vxnfl_ports = ports;
! flow->vxnfl_protocol = protocol;
! flow->vxnfl_refcount = 2; /* One for internment, one for caller. */
! /* Assume no mixed-IP-version mappings for now. */
! if (IN6_IS_ADDR_V4MAPPED(inner_src)) {
! ASSERT(IN6_IS_ADDR_V4MAPPED(dst));
! flow->vxnfl_isv4 = B_TRUE;
! flowtree = &vnet->vxnv_flows_v4;
! flowlock = &vnet->vxnv_flowv4_lock;
! } else {
! ASSERT(!IN6_IS_ADDR_V4MAPPED(dst));
! flow->vxnfl_isv4 = B_FALSE;
! /* XXX KEBE SAYS we don't do IPv6 for now. */
! DTRACE_PROBE2(vxlnat__flow__newv6, in6_addr_t *, inner_src,
! in6_addr_t *, dst);
! kmem_free(flow, sizeof (*flow));
! return (NULL);
! }
! VXNR_REFHOLD(rule); /* For the flow itself... */
! flow->vxnfl_rule = rule;
!
! rw_enter(flowlock, RW_WRITER);
! oldflow = (vxlnat_flow_t *)avl_find(flowtree, flow, &where);
! if (oldflow != NULL) {
! /*
! * Hmmm, someone put one in while we were dinking around.
! * XXX KEBE SAYS return the old one, refheld, for now.
! */
! VXNR_REFRELE(rule);
! kmem_free(flow, sizeof (*flow));
! VXNFL_REFHOLD(oldflow);
! flow = oldflow;
! } else {
! avl_insert(flowtree, flow, where);
! /*
! * Do conn_t magic here, except for the conn_t activation. I
! * am aware of holding the rwlock-as-write here. We may need
! * to move this outside the rwlock hold, and
! * reacquire-on-failure.
! */
! if (!vxlnat_new_conn(flow)) {
! ASSERT(flow->vxnfl_connp == NULL);
! avl_remove(flowtree, flow);
! VXNR_REFRELE(flow->vxnfl_rule);
! kmem_free(flow, sizeof (*flow));
! flow = NULL;
! }
! }
! rw_exit(flowlock);
!
! /* We just created this one, activate it. */
! if (oldflow == NULL && flow != NULL)
! vxlnat_activate_conn(flow);
!
! return (flow);
! }
!
! void
! vxlnat_flow_free(vxlnat_flow_t *flow)
! {
! ASSERT(flow->vxnfl_refcount == 0);
!
! /* XXX KEBE SAYS FILL ME IN?! */
! /* XXX KEBE ASKS ipcl_hash_remove()? */
!
! flow->vxnfl_connp->conn_priv = NULL; /* Sufficient? */
! CONN_DEC_REF(flow->vxnfl_connp);
! VXNR_REFRELE(flow->vxnfl_rule);
! kmem_free(flow, sizeof (*flow));
! }
!
! static boolean_t
! vxlnat_verify_initial(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h,
! uint32_t ports, uint8_t protocol, uint8_t *nexthdr)
! {
! /* XXX KEBE SAYS FILL ME IN! */
! freemsg(mp);
return (B_FALSE);
}
/*
* If we reach here, we need to find a NAT rule, and see if we can/should
*** 378,400 ****
static boolean_t
vxlnat_one_vxlan_rule(vxlnat_vnet_t *vnet, mblk_t *mp, ipha_t *ipha,
ip6_t *ip6h)
{
vxlnat_rule_t *rule;
! /* XXX handle IPv6 later */
if (ip6h != NULL)
return (B_FALSE);
ASSERT3P(ipha, !=, NULL);
mutex_enter(&vnet->vxnv_rule_lock);
rule = list_head(&vnet->vxnv_rules);
/*
* search for a match in the nat rules
* XXX investigate perf issues with with respect to list_t size
*/
while (rule != NULL) {
ipaddr_t ipaddr;
uint32_t netmask = 0xffffffff;
uint8_t prefix = rule->vxnr_prefix - 96;
--- 630,663 ----
static boolean_t
vxlnat_one_vxlan_rule(vxlnat_vnet_t *vnet, mblk_t *mp, ipha_t *ipha,
ip6_t *ip6h)
{
vxlnat_rule_t *rule;
+ vxlnat_flow_t *flow;
+ in6_addr_t v4m_src, v4m_dst, *inner_src, *dst;
+ uint32_t ports;
+ uint8_t protocol;
+ uint8_t *nexthdr;
! /* XXX handle IPv6 later, assigning inner_src and dst to ip6_t addrs. */
if (ip6h != NULL)
return (B_FALSE);
ASSERT3P(ipha, !=, NULL);
+ inner_src = &v4m_src;
+ dst = &v4m_dst;
+ IN6_INADDR_TO_V4MAPPED((struct in_addr *)(&ipha->ipha_src), inner_src);
+ IN6_INADDR_TO_V4MAPPED((struct in_addr *)(&ipha->ipha_dst), dst);
mutex_enter(&vnet->vxnv_rule_lock);
rule = list_head(&vnet->vxnv_rules);
/*
* search for a match in the nat rules
* XXX investigate perf issues with with respect to list_t size
+ * XXX KEBE SAYS rewrite when we start doing IPv6 to use "inner_src"
+ * and "dst".
*/
while (rule != NULL) {
ipaddr_t ipaddr;
uint32_t netmask = 0xffffffff;
uint8_t prefix = rule->vxnr_prefix - 96;
*** 417,432 ****
if (rule == NULL)
return (B_FALSE);
/* process packet */
/*
! static vxlnat_flow_t *
! vxlnat_new_flow(vxlnat_rule_t *rule, in6_addr_t *inner_src, in6_addr_t *dst,
! uint32_t ports, uint8_t protocol)
*/
return (B_FALSE);
}
/*
* See if the inbound VXLAN packet hits a 1-1/fixed mapping, and process if it
--- 680,721 ----
if (rule == NULL)
return (B_FALSE);
/* process packet */
+
/*
! * Grab transport header, and figure out if we can proceed.
! *
! * NOTE: vxlnat_grab_transport() will free/consume mp if it fails,
! * because we want to isolate non-flow-starters without having them
! * create new flows. This means we return B_TRUE (consumed mp) on
! * failure.
*/
+ if (!vxlnat_grab_transport(mp, ipha, ip6h, &ports, &protocol, &nexthdr))
+ return (B_TRUE); /* see above... */
+ if (!vxlnat_verify_initial(mp, ipha, ip6h, ports, protocol, nexthdr))
+ return (B_TRUE);
+
+ flow = vxlnat_new_flow(rule, inner_src, dst, ports, protocol);
+ if (flow != NULL) {
+ /*
+ * Call same function that vxlnat_one_vxlan_flow() uses
+ * to remap & transmit the packet out the external side.
+ *
+ * NOTE: We've already checked the initial-packet-
+ * qualification, so unlike the main datapath, we don't
+ * need to call vxlnat_verify_natstate()
+ */
+
+ /* XXX KEBE SAYS PROCESS... */
+
+ VXNFL_REFRELE(flow);
+ return (B_TRUE);
+ }
+
return (B_FALSE);
}
/*
* See if the inbound VXLAN packet hits a 1-1/fixed mapping, and process if it
*** 562,577 ****
DTRACE_PROBE2(vxlnat__in__vnet, uint32_t,
VXLAN_ID_HTON(VXLAN_ID_WIRE32(vxh->vxlan_id)),
vxlnat_vnet_t, vnet);
/*
! * Off-vxlan processing steps:
* 1.) Locate the ethernet header and check/update/add-into remotes.
* 2.) Search 1-1s, process if hit.
* 3.) Search flows, process if hit.
* 4.) Search rules, create new flow (or not) if hit.
! * 5.) Drop the packets.
*/
/* 1.) Locate the ethernet header and check/update/add-into remotes. */
mp->b_rptr += sizeof (*vxh);
while (MBLKL(mp) == 0) {
--- 851,866 ----
DTRACE_PROBE2(vxlnat__in__vnet, uint32_t,
VXLAN_ID_HTON(VXLAN_ID_WIRE32(vxh->vxlan_id)),
vxlnat_vnet_t, vnet);
/*
! * Arrived-from-vxlan processing steps:
* 1.) Locate the ethernet header and check/update/add-into remotes.
* 2.) Search 1-1s, process if hit.
* 3.) Search flows, process if hit.
* 4.) Search rules, create new flow (or not) if hit.
! * 5.) Drop the packet.
*/
/* 1.) Locate the ethernet header and check/update/add-into remotes. */
mp->b_rptr += sizeof (*vxh);
while (MBLKL(mp) == 0) {