1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018, Joyent, Inc.
  14  */
  15 
  16 #ifndef _INET_VXLNAT_IMPL_H
  17 #define _INET_VXLNAT_IMPL_H
  18 
  19 #include <inet/vxlnat.h>
  20 #include <inet/ip.h>
  21 #include <inet/ip6.h>
  22 #include <inet/ip_ire.h>
  23 #include <sys/clock_impl.h>
  24 #include <sys/avl.h>
  25 #include <sys/uio.h>
  26 #include <sys/list.h>
  27 #include <sys/byteorder.h>
  28 #include <sys/vxlan.h>
  29 
  30 /*
  31  * XXX KEBE ASKS --> do we assume port IPPORT_VXLAN all the time?
  32  * IF NOT, then we need to add ports to various things here that deal
  33  * with the underlay network.
  34  *
  35  * NOTE:  All reference counts *include* table/tree/list/whatever internment.
  36  * Once an entry is removed, *_REFRELE() must be invoked, and it may or may
  37  * not free something.
  38  */
  39 
  40 #ifdef __cplusplus
  41 extern "C" {
  42 #endif
  43 
  44 /*
  45  * NAT RULES.  Instantiated per-vnet, write-once/read-only entries,
  46  * linkage/entries protected by "rule lock" outside this structure.
  47  */
  48 typedef struct vxlnat_rule_s {
  49         list_node_t vxnr_link;
  50         /* refheld link, or if NULL, this rule is "condemned" and no good. */
  51         struct vxlnat_vnet_s *vxnr_vnet;
  52         in6_addr_t vxnr_myaddr;
  53         in6_addr_t vxnr_pubaddr;
  54         /* XXX KEBE ASKS, ire? */
  55         uint8_t vxnr_myether[ETHERADDRL];
  56         uint16_t vxnr_vlanid;   /* Fabrics use this too. */
  57         uint32_t vxnr_refcount;
  58         uint8_t vxnr_prefix;
  59 } vxlnat_rule_t;
  60 #define VXNR_REFHOLD(vxnr) {                    \
  61         atomic_inc_32(&(vxnr)->vxnr_refcount);   \
  62         ASSERT((vxnr)->vxnr_refcount > 0);        \
  63 }
  64 #define VXNR_REFRELE(vxnr) {                                    \
  65         ASSERT((vxnr)->vxnr_refcount > 0);                        \
  66         membar_exit();                                          \
  67         if (atomic_dec_32_nv(&(vxnr)->vxnr_refcount) == 0)       \
  68                 vxlnat_rule_free(vxnr);                         \
  69 }
  70 extern void vxlnat_rule_free(vxlnat_rule_t *);
  71 
  72 /*
  73  * NAT FLOWS.  These are per-vnet, and keyed/searched by:
  74  * <inner-IP-source,IP-dest,inner-source-port,dest-port,protocol>.
  75  * They will be tied-to/part-of a conn_t.
  76  */
  77 typedef struct vxlnat_flow_s {
  78         avl_node_t vxnfl_treenode;
  79         /*
  80          * I'm guessing that dst varies more than src.  Also
  81          * the plan is for the comparitor function to bcmp() both
  82          * of these as one call for IPv6 (if we ever get to that..).
  83          */
  84         in6_addr_t vxnfl_dst;
  85         in6_addr_t vxnfl_src;   /* INNER source address. */
  86         uint32_t vxnfl_ports;
  87         uint8_t vxnfl_protocol;
  88         uint8_t vxnfl_isv4 : 1, /* Will save us 12 bytes of compares... */
  89                 vxlfl_reserved1 : 7;
  90         /* Theoretically 16 bits lies where this comment is. */
  91         uint32_t vxnfl_refcount;
  92         conn_t *vxnfl_connp;    /* Question - embed instead? */
  93         vxlnat_rule_t *vxnfl_rule; /* Refhold to rule that generated me. */
  94         /*
  95          * XXX KEBE SAYS Other NAT-state belongs here too.  Like time-values
  96          * for timeouts, and more!
  97          */
  98 } vxlnat_flow_t;
  99 /* Exploit endianisms, maintain network order... */
 100 #ifdef _BIG_ENDIAN
 101 #define VXNFL_SPORT(ports) (uint16_t)((ports) >> 16) /* Unsigned all around. */
 102 #define VXNFL_DPORT(ports) ((ports) & 0xFFFF)
 103 #else
 104 #define VXNFL_SPORT(ports) ((ports) & 0xFFFF)
 105 #define VXNFL_DPORT(ports) (uint16_t)((ports) >> 16) /* Unsigned all around. */
 106 #endif
 107 #define VXNFL_REFHOLD(vxnfl) {                  \
 108         atomic_inc_32(&(vxnfl)->vxnfl_refcount); \
 109         ASSERT((vxnfl)->vxnfl_refcount > 0);      \
 110 }
 111 #define VXNFL_REFRELE(vxnfl) {                                  \
 112         ASSERT((vxnfl)->vxnfl_refcount > 0);                      \
 113         membar_exit();                                          \
 114         if (atomic_dec_32_nv(&(vxnfl)->vxnfl_refcount) == 0)     \
 115                 vxlnat_flow_free(vxnfl);                        \
 116 }
 117 extern void vxlnat_flow_free(vxlnat_flow_t *);
 118 
 119 /*
 120  * 1-1 IP mapping.
 121  */
 122 typedef struct vxlnat_fixed_s {
 123         avl_node_t vxnf_treenode;
 124         in6_addr_t vxnf_addr;   /* For now it needn't match to a rule. */
 125         in6_addr_t vxnf_pubaddr; /* External IP. */
 126         struct vxlnat_vnet_s *vxnf_vnet;
 127         ire_t *vxnf_ire;        /* Should be an IRE_LOCAL from the ftable. */
 128         struct vxlnat_remote_s *vxnf_remote;
 129         uint8_t vxnf_myether[ETHERADDRL];
 130         uint16_t vxnf_vlanid;   /* Stored in network order for quick xmit. */
 131         uint32_t vxnf_refcount;
 132         boolean_t vxnf_clear_router;    /* XXX KEBE SAYS CHEESY HACK */
 133 } vxlnat_fixed_t;
 134 #define VXNF_REFHOLD(vxnf) {                    \
 135         atomic_inc_32(&(vxnf)->vxnf_refcount);   \
 136         ASSERT((vxnf)->vxnf_refcount > 0);        \
 137 }
 138 #define VXNF_REFRELE(vxnf) {                                    \
 139         ASSERT((vxnf)->vxnf_refcount > 0);                        \
 140         membar_exit();                                          \
 141         if (atomic_dec_32_nv(&(vxnf)->vxnf_refcount) == 0)       \
 142                 vxlnat_fixed_free(vxnf);                        \
 143 }
 144 extern void vxlnat_fixed_free(vxlnat_fixed_t *);
 145 
 146 /*
 147  * REMOTE VXLAN destinations.
 148  */
 149 typedef struct vxlnat_remote_s {
 150         avl_node_t vxnrem_treenode;
 151         in6_addr_t vxnrem_addr; /* Same prefix as one in rule, or fixed addr. */
 152         in6_addr_t vxnrem_uaddr; /* Underlay VXLAN destination. */
 153         struct vxlnat_vnet_s *vxnrem_vnet;      /* Reference-held. */
 154         uint32_t vxnrem_refcount;
 155         uint8_t vxnrem_ether[ETHERADDRL];
 156         uint16_t vxnrem_vlan;
 157         /*
 158          * XXX KEBE SAYS put some lifetime/usetime/etc. here
 159          * so we don't keep too many of these.  Either that, or maybe
 160          * convert to a qqcache or (patents expiring) ARC.
 161          */
 162 } vxlnat_remote_t;
 163 #define VXNREM_REFHOLD(vxnrem) {                        \
 164         atomic_inc_32(&(vxnrem)->vxnrem_refcount);       \
 165         ASSERT((vxnrem)->vxnrem_refcount > 0);            \
 166 }
 167 #define VXNREM_REFRELE(vxnrem) {                                \
 168         ASSERT((vxnrem)->vxnrem_refcount > 0);                    \
 169         membar_exit();                                          \
 170         if (atomic_dec_32_nv(&(vxnrem)->vxnrem_refcount) == 0)   \
 171                 vxlnat_remote_free(vxnrem);                     \
 172 }
 173 extern void vxlnat_remote_free(vxlnat_remote_t *);
 174 
 175 /*
 176  * per-vnetid overarching structure.  AVL tree keyed by vnetid.
 177  * NOTE:  Could be split into vnetid-hashed buckets to split any
 178  * locks.
 179  */
 180 typedef struct vxlnat_vnet_s {
 181         avl_node_t vxnv_treenode;
 182         /*
 183          * 1-1 IP mappings. (1st lookup for an in-to-out packet.)
 184          * Will map to an IRE_LOCAL in IP.
 185          */
 186         krwlock_t vxnv_fixed_lock;
 187         avl_tree_t vxnv_fixed_ips;
 188 
 189         /*
 190          * NAT flows. (2nd lookup for an in-to-out packet.)
 191          * These are also conn_ts with outer-packet fields for out-to-in
 192          * matches against a conn_t.
 193          *
 194          * NOTE: We're going to keep a separate tree for inner IPv6 NAT, if
 195          * we ever need it.
 196          */
 197         krwlock_t vxnv_flowv4_lock;
 198         avl_tree_t vxnv_flows_v4;
 199 
 200         /* NAT rules. (3rd lookup for an in-to-out packet.) */
 201         kmutex_t vxnv_rule_lock;
 202         list_t vxnv_rules;
 203 
 204         /*
 205          * Internal-network remote-nodes. (only lookup for out-to-in packet.)
 206          * Entries here are also refheld by 1-1s or NAT flows.
 207          */
 208         kmutex_t vxnv_remote_lock;
 209         avl_tree_t vxnv_remotes;
 210 
 211         uint32_t vxnv_refcount;
 212         uint32_t vxnv_vnetid;   /* Wire byteorder for less swapping on LE */
 213 } vxlnat_vnet_t;
 214 #define VXNV_REFHOLD(vxnv) {                    \
 215         atomic_inc_32(&(vxnv)->vxnv_refcount);   \
 216         ASSERT((vxnv)->vxnv_refcount > 0);        \
 217 }
 218 #define VXNV_REFRELE(vxnv) {                                    \
 219         ASSERT((vxnv)->vxnv_refcount > 0);                        \
 220         membar_exit();                                          \
 221         if (atomic_dec_32_nv(&(vxnv)->vxnv_refcount) == 0)       \
 222                 vxlnat_vnet_free(vxnv);                         \
 223 }
 224 extern void vxlnat_vnet_free(vxlnat_vnet_t *);
 225 
 226 /*
 227  * Endian-independent macros for rapid off-wire header reading. i.e. avoid
 228  * [nh]to[hn]*()
 229  *
 230  * VXLAN_ID_WIRE32(id) ==> Zero-out "reserved" bits, preserve wire-order
 231  * and position of vnetid.
 232  * VXLAN_FLAGS_WIRE32(vni) ==> Zero-out reserved bits, preserve wire-order
 233  * and position of flags.
 234  * VXLAN_F_VDI_WIRE ==> VXLAN_F_VDI, but w/o needing to swap.
 235  *
 236  * ALSO:  HTON/NTOH for kernel-makes-right interactions with userland, which
 237  * means shifting actual ID to/from low-24-bits of 32-bit word.
 238  * VXLAN_ID_HTON(id)
 239  * VXLAN_ID_NTOH(id)
 240  *
 241  * XXX KEBE ASKS ==> If not confusing to folks, move into sys/vxlan.h and
 242  * have overlay's VXLAN encap adopt them?
 243  */
 244 #ifdef _BIG_ENDIAN
 245 #define VXLAN_ID_WIRE32(id) ((id) & 0xFFFFFF00)
 246 #define VXLAN_F_VDI_WIRE VXLAN_F_VDI
 247 /* XXX KEBE ASKS, do masking here? */
 248 #define VXLAN_ID_HTON(id) ((id) << VXLAN_ID_SHIFT)
 249 #define VXLAN_ID_NTOH(id) ((id) >> VXLAN_ID_SHIFT)
 250 #else   /* i.e. _LITTLE_ENDIAN */
 251 #define VXLAN_ID_WIRE32(id) ((id) & 0xFFFFFF)
 252 #define VXLAN_F_VDI_WIRE 0x08
 253 #define VXLAN_ID_HTON(id) htonl((id) << VXLAN_ID_SHIFT)
 254 #define VXLAN_ID_NTOH(id) (ntohl(id) >> VXLAN_ID_SHIFT)
 255 #endif  /* _BIG_ENDIAN */
 256 #define VXLAN_FLAGS_WIRE32(flags) ((flags) & VXLAN_F_VDI_WIRE)
 257 
 258 extern kmutex_t vxlnat_mutex;
 259 extern netstack_t *vxlnat_netstack;
 260 extern int vxlnat_command(vxn_msg_t *);
 261 extern int vxlnat_read_dump(struct uio *);
 262 extern int vxlnat_vxlan_addr(in6_addr_t *);
 263 extern void vxlnat_closesock(void);
 264 extern void vxlnat_state_init(void);
 265 extern void vxlnat_state_fini(void);
 266 
 267 extern void vxlnat_public_init(void);
 268 extern void vxlnat_public_fini(void);
 269 extern boolean_t vxlnat_public_hold(in6_addr_t *, boolean_t);
 270 extern void vxlnat_public_rele(in6_addr_t *);
 271 
 272 extern int vxlnat_tree_plus_in6_cmp(const void *, const void *);
 273 
 274 /* ire_recvfn & ire_sendfn functions for 1-1/fixed maps. */
 275 extern void vxlnat_fixed_ire_recv_v4(ire_t *, mblk_t *, void *,
 276     ip_recv_attr_t *);
 277 extern void vxlnat_fixed_ire_recv_v6(ire_t *, mblk_t *, void *,
 278     ip_recv_attr_t *);
 279 extern int vxlnat_fixed_ire_send_v4(ire_t *, mblk_t *, void *,
 280     ip_xmit_attr_t *, uint32_t *);
 281 extern int vxlnat_fixed_ire_send_v6(ire_t *, mblk_t *, void *,
 282     ip_xmit_attr_t *, uint32_t *);
 283 
 284 extern boolean_t vxlnat_new_conn(vxlnat_flow_t *);
 285 extern void vxlnat_activate_conn(vxlnat_flow_t *);
 286 #ifdef notyet
 287 extern void vxlnat_deactivate_conn(vxlnat_flow_t *);
 288 #endif
 289 
 290 extern vxlnat_vnet_t *vxlnat_get_vnet(uint32_t, boolean_t);
 291 
 292 #ifdef __cplusplus
 293 }
 294 #endif
 295 
 296 #endif /* _INET_VXLNAT_IMPL_H */