1  /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  */
  15 
  16 #include <sys/types.h>
  17 #include <sys/socket.h>
  18 #include <netinet/in.h>
  19 #include <inet/ip.h>
  20 #include <inet/tcp_impl.h>
  21 #include <inet/udp_impl.h>
  22 
  23 #include <inet/vxlnat_impl.h>
  24 
  25 /*
  26  * Functions for handling conn_t AND for new conn_t receive-side functions
  27  * so we can exploit ipclassifier for NAT flows.
  28  */
  29 
  30 static void
  31 vxlnat_external_tcp_v4(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
  32 {
  33         /* XXX KEBE SAYS FOR NOW, drop. */
  34         freemsg(mp);
  35 }
  36 
  37 static void
  38 vxlnat_external_tcp_v6(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
  39 {
  40         /* XXX KEBE SAYS FOR NOW, drop. */
  41         freemsg(mp);
  42 }
  43 
  44 static void
  45 vxlnat_external_tcp_icmp_v4(void *arg, mblk_t *mp, void *arg2,
  46     ip_recv_attr_t *ira)
  47 {
  48         /* XXX KEBE SAYS FOR NOW, drop. */
  49         freemsg(mp);
  50 }
  51 
  52 static void
  53 vxlnat_external_tcp_icmp_v6(void *arg, mblk_t *mp, void *arg2,
  54     ip_recv_attr_t *ira)
  55 {
  56         /* XXX KEBE SAYS FOR NOW, drop. */
  57         freemsg(mp);
  58 }
  59 
  60 static void
  61 vxlnat_external_udp_v4(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
  62 {
  63         /* XXX KEBE SAYS FOR NOW, drop. */
  64         freemsg(mp);
  65 }
  66 
  67 static void
  68 vxlnat_external_udp_v6(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
  69 {
  70         /* XXX KEBE SAYS FOR NOW, drop. */
  71         freemsg(mp);
  72 }
  73 
  74 static void
  75 vxlnat_external_udp_icmp_v4(void *arg, mblk_t *mp, void *arg2,
  76     ip_recv_attr_t *ira)
  77 {
  78         /* XXX KEBE SAYS FOR NOW, drop. */
  79         freemsg(mp);
  80 }
  81 
  82 static void
  83 vxlnat_external_udp_icmp_v6(void *arg, mblk_t *mp, void *arg2,
  84     ip_recv_attr_t *ira)
  85 {
  86         /* XXX KEBE SAYS FOR NOW, drop. */
  87         freemsg(mp);
  88 }
  89 
  90 static void
  91 vxlnat_external_icmp_v4(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
  92 {
  93         /* XXX KEBE SAYS FOR NOW, drop. */
  94         freemsg(mp);
  95 }
  96 
  97 static void
  98 vxlnat_external_icmp_icmp_v4(void *arg, mblk_t *mp, void *arg2,
  99     ip_recv_attr_t *ira)
 100 {
 101         /* XXX KEBE SAYS FOR NOW, drop. */
 102         freemsg(mp);
 103 }
 104 
 105 boolean_t
 106 vxlnat_new_conn(vxlnat_flow_t *flow)
 107 {
 108         conn_t *connp;
 109         uint16_t new_lport;
 110         uint8_t protocol = flow->vxnfl_protocol;
 111         int rc, ntries = 3;
 112 
 113         /*
 114          * XXX KEBE SAYS -- Use KM_NORMALPRI because we're likely in interrupt
 115          * context when we call this function.  If ipcl_conn_create() becomes
 116          * a problem even with these flags, we may need to go asynchronous.
 117          * XXX KEBE ALSO SAYS -- See TCP's handling of new inbound
 118          * connections.
 119          */
 120         switch (protocol) {
 121         case IPPROTO_TCP:
 122         case IPPROTO_UDP:
 123         case IPPROTO_ICMP:
 124                 /* case IPPROTO_ICMP6: */
 125                 break;
 126         default:
 127                 return (B_FALSE);
 128         }
 129         connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP | KM_NORMALPRI,
 130             vxlnat_netstack);
 131         if (connp == NULL)
 132                 return (B_FALSE);
 133 
 134         /*
 135          * XXX KEBE SAYS FILL IN ALL SORTS OF conn_t STUFF HERE.
 136          * Draw inspiration from iptun_conn_create, but also include
 137          * protocol-specific thingies.
 138          *
 139          * NOTE: As of right this moment, I'm imagining that for
 140          * inside-to-outside, conn_ip_output() will NOT be used, but rather
 141          * ire_forward_recv_v*() will be, not unlike the fixed/1-1 path, and
 142          * that the conn_t's *receive-side* features are the only ones to be
 143          * used.
 144          *
 145          * Also, like UDP, there will be no verifyicmp method assigned.
 146          * (Oddly, iptun does this, but it always returns true. Maybe that's a
 147          * bug in iptun?)
 148          */
 149 
 150         /* connp->conn_flags |= .... */
 151         connp->conn_priv = flow;  /* XXX is this a problem for freeing? */
 152 
 153         /*
 154          * XXX KEBE SAYS Don't worry about conn_ixa FOR NOW, but maybe
 155          * fill it in for use later?
 156          */
 157 
 158         /*
 159          * ALWAYS set this to GLOBAL_ZONEID.  We check at open() for
 160          * a non-exclusive zone open (we disallow it), and for exclusive-
 161          * stack zones, we want IP thinking (correctly) we own the netstack.
 162          */
 163         connp->conn_zoneid = GLOBAL_ZONEID;
 164         /*
 165          * cred_t dance is because we may be getting this straight from
 166          * interrupt context.
 167          */
 168         connp->conn_cred = zone_get_kcred(netstack_get_zoneid(vxlnat_netstack));
 169         connp->conn_cpid = NOPID;
 170 
 171         ASSERT(connp->conn_ref == 1);
 172 
 173         connp->conn_family = flow->vxnfl_isv4 ? AF_INET : AF_INET6;
 174 
 175         CONN_INC_REF(connp);    /* For the following... */
 176         flow->vxnfl_connp = connp;
 177 
 178         /* XXX KEBE SAYS Assume the right thing v4/v6-wise happens for now. */
 179         connp->conn_laddr_v6 = flow->vxnfl_rule->vxnr_pubaddr;
 180         connp->conn_faddr_v6 = flow->vxnfl_dst;
 181 
 182         /* XXX KEBE SAYS REMAP PORTS HERE ... */
 183         connp->conn_ports = flow->vxnfl_ports;
 184         connp->conn_proto = protocol;
 185 
 186         /* XXX KEBE ASKS INSERT HERE? */
 187         do {
 188 
 189                 switch (protocol) {
 190                 case IPPROTO_TCP: {
 191                         tcp_stack_t *tcps = vxlnat_netstack->netstack_tcp;
 192                         tcp_t dummy = {.tcp_tcps = tcps, .tcp_connp = connp};
 193 
 194                         /* Fill in with TCP-specific recv/recvicmp. */
 195                         if (flow->vxnfl_isv4) {
 196                                 connp->conn_recv = vxlnat_external_tcp_v4;
 197                                 connp->conn_recvicmp =
 198                                     vxlnat_external_tcp_icmp_v4;
 199                         } else {
 200                                 connp->conn_recv = vxlnat_external_tcp_v6;
 201                                 connp->conn_recvicmp =
 202                                     vxlnat_external_tcp_icmp_v6;
 203                         }
 204                         /* And set new_lport. */
 205                         new_lport = tcp_update_next_port(
 206                             tcps->tcps_next_port_to_try, &dummy, B_TRUE);
 207                         break;
 208                 }
 209                 case IPPROTO_UDP: {
 210                         udp_stack_t *udps = vxlnat_netstack->netstack_udp;
 211                         udp_t dummy = {.udp_us = udps, .udp_connp = connp };
 212 
 213                         /* Fill in with UDP-specific recv/recvicmp. */
 214                         if (flow->vxnfl_isv4) {
 215                                 connp->conn_recv = vxlnat_external_udp_v4;
 216                                 connp->conn_recvicmp =
 217                                     vxlnat_external_udp_icmp_v4;
 218                         } else {
 219                                 connp->conn_recv = vxlnat_external_udp_v6;
 220                                 connp->conn_recvicmp =
 221                                     vxlnat_external_udp_icmp_v6;
 222                         }
 223                         /* And set new_lport. */
 224                         new_lport = udp_update_next_port(&dummy,
 225                             udps->us_next_port_to_try, B_TRUE);
 226                         break;
 227                 }
 228                 case IPPROTO_ICMP: {
 229                         /* NOTE:  Only an IPv4 version of this is needed. */
 230                         connp->conn_recv = vxlnat_external_icmp_v4;
 231                         connp->conn_recv = vxlnat_external_icmp_icmp_v4;
 232                         /*
 233                          * XXX KEBE SAYS -- I don't think we can tell the real
 234                          * IP code to bind an ICMP socket to anything beyond
 235                          * the addresses.  But also we allow multiple ICMP
 236                          * conn_ts, which could mean duplicate packets.  :-/
 237                          */
 238                         new_lport = 0;
 239                         break;
 240                 }
 241                 default:
 242                         /* Should never reach here... */
 243                         cmn_err(CE_PANIC, "vxnfl_protocol corruption!");
 244                         return (B_FALSE);
 245                 }
 246                 connp->conn_lport = new_lport;
 247 
 248                 rc = ipcl_conn_insert(connp);
 249                 switch (rc) {
 250                 case 0:
 251                         break;
 252                 case EADDRINUSE:
 253                         /* Try rewhacking the ports if we can. */
 254                         switch (protocol) {
 255                         case IPPROTO_TCP:
 256                         case IPPROTO_UDP:
 257                                 /* Try again... */
 258                                 break;
 259                         default:
 260                                 /* Give up now. */
 261                                 ntries = 1;
 262                                 break;
 263                         }
 264                         break;
 265                 default:
 266                         /* GET OUT, NOW! */
 267                         DTRACE_PROBE1(vxlnat__new__conn__badins, int, rc);
 268                         ntries = 1;
 269                         break;
 270                 }
 271         } while (rc != 0 && --ntries > 0);
 272 
 273         if (rc != 0) {
 274                 /* Trash this conn. */
 275                 DTRACE_PROBE3(vxlnat__new__conn__collision, int, rc,
 276                     conn_t *, connp, vxlnat_flow_t *, flow);
 277                 CONN_DEC_REF(connp);
 278                 CONN_DEC_REF(connp);
 279                 /*
 280                  * XXX KEBE ASKS Anything else?  Last CONN_DEC_REF should
 281                  * trigger destroy.
 282                  */
 283                 flow->vxnfl_connp = NULL;
 284                 return (B_FALSE);
 285         }
 286 
 287         return (B_TRUE);
 288 }
 289 
 290 void
 291 vxlnat_activate_conn(vxlnat_flow_t *flow)
 292 {
 293         conn_t *connp = flow->vxnfl_connp;
 294 
 295         mutex_enter(&connp->conn_lock);
 296         connp->conn_state_flags &= ~CONN_INCIPIENT;
 297         mutex_exit(&connp->conn_lock);
 298         /* XXX KEBE ASKS OR INSERT HERE? */
 299 }
 300 
 301 #ifdef notyet
 302 void
 303 vxlnat_deactivate_conn(vxlnat_flow_t *flow)
 304 {
 305         conn_t *connp = flow->vxnfl_connp;
 306 
 307         ip_quiesce_conn(connp);
 308         /* XXX KEBE ASKS ipcl_hash_remove()? */
 309 }
 310 #endif