1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018, Joyent, Inc.
  14  */
  15 
  16 #ifndef _INET_VXLNAT_IMPL_H
  17 #define _INET_VXLNAT_IMPL_H
  18 
  19 #include <inet/vxlnat.h>
  20 #include <inet/ip.h>
  21 #include <inet/ip6.h>
  22 #include <inet/ip_ire.h>
  23 #include <sys/clock_impl.h>
  24 #include <sys/avl.h>
  25 #include <sys/uio.h>
  26 #include <sys/list.h>
  27 #include <sys/byteorder.h>
  28 #include <sys/vxlan.h>
  29 
  30 /*
  31  * XXX KEBE ASKS --> do we assume port IPPORT_VXLAN all the time?
  32  * IF NOT, then we need to add ports to various things here that deal
  33  * with the underlay network.
  34  *
  35  * NOTE:  All reference counts *include* table/tree/list/whatever internment.
  36  * Once an entry is removed, *_REFRELE() must be invoked, and it may or may
  37  * not free something.
  38  */
  39 
  40 #ifdef __cplusplus
  41 extern "C" {
  42 #endif
  43 
  44 /*
  45  * NAT RULES.  Instantiated per-vnet, write-once/read-only entries,
  46  * linkage/entries protected by "rule lock" outside this structure.
  47  */
  48 typedef struct vxlnat_rule_s {
  49         list_node_t vxnr_link;
  50         /* refheld link, or if NULL, this rule is "condemned" and no good. */
  51         struct vxlnat_vnet_s *vxnr_vnet;
  52         in6_addr_t vxnr_myaddr;
  53         in6_addr_t vxnr_pubaddr;
  54         uint8_t vxnr_myether[ETHERADDRL];
  55         uint16_t vxnr_vlanid;   /* Fabrics use this too. */
  56         uint32_t vxnr_refcount;
  57         uint8_t vxnr_prefix;
  58 } vxlnat_rule_t;
  59 #define VXNR_REFHOLD(vxnr) {                    \
  60         atomic_inc_32(&(vxnr)->vxnr_refcount);   \
  61         ASSERT((vxnr)->vxnr_refcount > 0);        \
  62 }
  63 #define VXNR_REFRELE(vxnr) {                                    \
  64         ASSERT((vxnr)->vxnr_refcount > 0);                        \
  65         membar_exit();                                          \
  66         if (atomic_dec_32_nv(&(vxnr)->vxnr_refcount) == 0)       \
  67                 vxlnat_rule_free(vxnr);                         \
  68 }
  69 extern void vxlnat_rule_free(vxlnat_rule_t *);
  70 
  71 /*
  72  * 1-1 IP mapping.
  73  */
  74 typedef struct vxlnat_fixed_s {
  75         avl_node_t vxnf_treenode;
  76         in6_addr_t vxnf_addr;   /* XXX KEBE ASKS - must it match to a rule? */
  77         in6_addr_t vxnf_pubaddr; /* External IP. */
  78         struct vxlnat_vnet_s *vxnf_vnet;
  79         ire_t *vxnf_ire;        /* Should be a local IRE from the ftable. */
  80         struct vxlnat_remote_s *vxnf_remote;
  81         uint8_t vxnf_myether[ETHERADDRL];
  82         uint16_t vxnf_vlanid;   /* Stored in network order for quick xmit. */
  83         uint32_t vxnf_refcount;
  84         boolean_t vxnf_clear_router;    /* XXX KEBE SAYS CHEESY HACK */
  85 } vxlnat_fixed_t;
  86 #define VXNF_REFHOLD(vxnf) {                    \
  87         atomic_inc_32(&(vxnf)->vxnf_refcount);   \
  88         ASSERT((vxnf)->vxnf_refcount > 0);        \
  89 }
  90 #define VXNF_REFRELE(vxnf) {                                    \
  91         ASSERT((vxnf)->vxnf_refcount > 0);                        \
  92         membar_exit();                                          \
  93         if (atomic_dec_32_nv(&(vxnf)->vxnf_refcount) == 0)       \
  94                 vxlnat_fixed_free(vxnf);                        \
  95 }
  96 extern void vxlnat_fixed_free(vxlnat_fixed_t *);
  97 
  98 /*
  99  * REMOTE VXLAN destinations.
 100  */
 101 typedef struct vxlnat_remote_s {
 102         avl_node_t vxnrem_treenode;
 103         in6_addr_t vxnrem_addr; /* Same prefix as one in rule. */
 104         in6_addr_t vxnrem_uaddr; /* Underlay VXLAN destination. */
 105         struct vxlnat_vnet_s *vxnrem_vnet;      /* Reference-held. */
 106         uint32_t vxnrem_refcount;
 107         uint8_t vxnrem_ether[ETHERADDRL];
 108         uint16_t vxnrem_vlan;
 109         /*
 110          * XXX KEBE SAYS put some lifetime/usetime/etc. here
 111          * so we don't keep too many of these.  Either that, or maybe
 112          * convert to a qqcache or (patents expiring) ARC.
 113          */
 114 } vxlnat_remote_t;
 115 #define VXNREM_REFHOLD(vxnrem) {                        \
 116         atomic_inc_32(&(vxnrem)->vxnrem_refcount);       \
 117         ASSERT((vxnrem)->vxnrem_refcount > 0);            \
 118 }
 119 #define VXNREM_REFRELE(vxnrem) {                                \
 120         ASSERT((vxnrem)->vxnrem_refcount > 0);                    \
 121         membar_exit();                                          \
 122         if (atomic_dec_32_nv(&(vxnrem)->vxnrem_refcount) == 0)   \
 123                 vxlnat_remote_free(vxnrem);                     \
 124 }
 125 extern void vxlnat_remote_free(vxlnat_remote_t *);
 126 
 127 /*
 128  * per-vnetid overarching structure.  AVL tree keyed by vnetid.
 129  * NOTE:  Could be split into vnetid-hashed buckets to split any
 130  * locks.
 131  */
 132 typedef struct vxlnat_vnet_s {
 133         avl_node_t vxnv_treenode;
 134         /*
 135          * 1-1 IP mappings. (1st lookup for an in-to-out packet.)
 136          * Will map to SOMETHING in IP.
 137          * XXX KEBE ASKS - conn_t or something else TBD?!
 138          */
 139         krwlock_t vxnv_fixed_lock;
 140         avl_tree_t vxnv_fixed_ips;
 141         /*
 142          * NAT flows. (2nd lookup for an in-to-out packet.)
 143          * These are also conn_ts with outer-packet fields for out-to-in
 144          * matches against a conn_t.
 145          */
 146         krwlock_t vxnv_flow_lock;
 147         avl_tree_t vxnv_flows;
 148         /* NAT rules. (3rd lookup for an in-to-out packet.) */
 149         kmutex_t vxnv_rule_lock;
 150         list_t vxnv_rules;
 151         /*
 152          * Internal-network remote-nodes. (only lookup for out-to-in packet.)
 153          * Entries here are also refheld by 1-1s or NAT flows.
 154          */
 155         kmutex_t vxnv_remote_lock;
 156         avl_tree_t vxnv_remotes;
 157 
 158         uint32_t vxnv_refcount;
 159         uint32_t vxnv_vnetid;   /* Wire byteorder for less swapping on LE */
 160 } vxlnat_vnet_t;
 161 #define VXNV_REFHOLD(vxnv) {                    \
 162         atomic_inc_32(&(vxnv)->vxnv_refcount);   \
 163         ASSERT((vxnv)->vxnv_refcount > 0);        \
 164 }
 165 #define VXNV_REFRELE(vxnv) {                                    \
 166         ASSERT((vxnv)->vxnv_refcount > 0);                        \
 167         membar_exit();                                          \
 168         if (atomic_dec_32_nv(&(vxnv)->vxnv_refcount) == 0)       \
 169                 vxlnat_vnet_free(vxnv);                         \
 170 }
 171 extern void vxlnat_vnet_free(vxlnat_vnet_t *);
 172 
 173 /*
 174  * Endian-independent macros for rapid off-wire header reading. i.e. avoid
 175  * [nh]to[hn]*()
 176  *
 177  * VXLAN_ID_WIRE32(id) ==> Zero-out "reserved" bits, preserve wire-order
 178  * and position of vnetid.
 179  * VXLAN_FLAGS_WIRE32(vni) ==> Zero-out reserved bits, preserve wire-order
 180  * and position of flags.
 181  * VXLAN_F_VDI_WIRE ==> VXLAN_F_VDI, but w/o needing to swap.
 182  *
 183  * ALSO:  HTON/NTOH for kernel-makes-right interactions with userland, which
 184  * means shifting actual ID to/from low-24-bits of 32-bit word.
 185  * VXLAN_ID_HTON(id)
 186  * VXLAN_ID_NTOH(id)
 187  *
 188  * XXX KEBE ASKS ==> If not confusing to folks, move into sys/vxlan.h and
 189  * have overlay's VXLAN encap adopt them?
 190  */
 191 #ifdef _BIG_ENDIAN
 192 #define VXLAN_ID_WIRE32(id) ((id) & 0xFFFFFF00)
 193 #define VXLAN_F_VDI_WIRE VXLAN_F_VDI
 194 /* XXX KEBE ASKS, do masking here? */
 195 #define VXLAN_ID_HTON(id) ((id) << VXLAN_ID_SHIFT)
 196 #define VXLAN_ID_NTOH(id) ((id) >> VXLAN_ID_SHIFT)
 197 #else   /* i.e. _LITTLE_ENDIAN */
 198 #define VXLAN_ID_WIRE32(id) ((id) & 0xFFFFFF)
 199 #define VXLAN_F_VDI_WIRE 0x08
 200 #define VXLAN_ID_HTON(id) htonl((id) << VXLAN_ID_SHIFT)
 201 #define VXLAN_ID_NTOH(id) (ntohl(id) >> VXLAN_ID_SHIFT)
 202 #endif  /* _BIG_ENDIAN */
 203 #define VXLAN_FLAGS_WIRE32(flags) ((flags) & VXLAN_F_VDI_WIRE)
 204 
 205 extern kmutex_t vxlnat_mutex;
 206 extern netstack_t *vxlnat_netstack;
 207 extern int vxlnat_command(vxn_msg_t *);
 208 extern int vxlnat_read_dump(struct uio *);
 209 extern int vxlnat_vxlan_addr(in6_addr_t *);
 210 extern void vxlnat_closesock(void);
 211 extern void vxlnat_state_init(void);
 212 extern void vxlnat_state_fini(void);
 213 
 214 extern void vxlnat_public_init(void);
 215 extern void vxlnat_public_fini(void);
 216 extern boolean_t vxlnat_public_hold(in6_addr_t *, boolean_t);
 217 extern void vxlnat_public_rele(in6_addr_t *);
 218 
 219 extern int vxlnat_tree_plus_in6_cmp(const void *, const void *);
 220 
 221 /* ire_recvfn & ire_sendfn functions for 1-1/fixed maps. */
 222 extern void vxlnat_fixed_ire_recv_v4(ire_t *, mblk_t *, void *,
 223     ip_recv_attr_t *);
 224 extern void vxlnat_fixed_ire_recv_v6(ire_t *, mblk_t *, void *,
 225     ip_recv_attr_t *);
 226 extern int vxlnat_fixed_ire_send_v4(ire_t *, mblk_t *, void *,
 227     ip_xmit_attr_t *, uint32_t *);
 228 extern int vxlnat_fixed_ire_send_v6(ire_t *, mblk_t *, void *,
 229     ip_xmit_attr_t *, uint32_t *);
 230 
 231 
 232 extern vxlnat_vnet_t *vxlnat_get_vnet(uint32_t, boolean_t);
 233 
 234 #ifdef __cplusplus
 235 }
 236 #endif
 237 
 238 #endif /* _INET_VXLNAT_IMPL_H */