Print this page
    
OS-4018 lxbrand support TCP SO_REUSEPORT
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Cody Mello <cody.mello@joyent.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/ipclassifier.h
          +++ new/usr/src/uts/common/inet/ipclassifier.h
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
       24 + * Copyright 2015 Joyent, Inc.
  24   25   */
  25   26  
  26   27  #ifndef _INET_IPCLASSIFIER_H
  27   28  #define _INET_IPCLASSIFIER_H
  28   29  
  29   30  #ifdef  __cplusplus
  30   31  extern "C" {
  31   32  #endif
  32   33  
  33   34  #include <inet/common.h>
  34   35  #include <inet/ip.h>
  35   36  #include <inet/mi.h>
  36   37  #include <inet/tcp.h>
  37   38  #include <inet/ip6.h>
  38   39  #include <netinet/in.h>         /* for IPPROTO_* constants */
  39   40  #include <sys/sdt.h>
  40   41  #include <sys/socket_proto.h>
  41   42  #include <sys/sunddi.h>
  42   43  #include <sys/sunldi.h>
  43   44  
  44   45  typedef void (*edesc_rpf)(void *, mblk_t *, void *, ip_recv_attr_t *);
  45   46  struct icmph_s;
  46   47  struct icmp6_hdr;
  47   48  typedef boolean_t (*edesc_vpf)(conn_t *, void *, struct icmph_s *,
  48   49      struct icmp6_hdr *, ip_recv_attr_t *);
  49   50  
  50   51  /*
  51   52   * ==============================
  52   53   * =    The CONNECTION          =
  53   54   * ==============================
  54   55   */
  55   56  
  56   57  /*
  57   58   * The connection structure contains the common information/flags/ref needed.
  58   59   * Implementation will keep the connection struct, the layers (with their
  59   60   * respective data for event i.e. tcp_t if event was tcp_input_data) all in one
  60   61   * contiguous memory location.
  61   62   */
  62   63  
  63   64  /* Conn Flags */
  64   65  /* Unused                       0x00020000 */
  65   66  /* Unused                       0x00040000 */
  66   67  #define IPCL_FULLY_BOUND        0x00080000      /* Bound to correct squeue */
  67   68  /* Unused                       0x00100000 */
  68   69  /* Unused                       0x00200000 */
  69   70  /* Unused                       0x00400000 */
  70   71  #define IPCL_CL_LISTENER        0x00800000      /* Cluster listener */
  71   72  /* Unused                       0x01000000 */
  72   73  /* Unused                       0x02000000 */
  73   74  /* Unused                       0x04000000 */
  74   75  /* Unused                       0x08000000 */
  75   76  /* Unused                       0x10000000 */
  76   77  /* Unused                       0x20000000 */
  77   78  #define IPCL_CONNECTED          0x40000000      /* Conn in connected table */
  78   79  #define IPCL_BOUND              0x80000000      /* Conn in bind table */
  79   80  
  80   81  /* Flags identifying the type of conn */
  81   82  #define IPCL_TCPCONN            0x00000001      /* From tcp_conn_cache */
  82   83  #define IPCL_SCTPCONN           0x00000002      /* From sctp_conn_cache */
  83   84  #define IPCL_IPCCONN            0x00000004      /* From ip_conn_cache */
  84   85  #define IPCL_UDPCONN            0x00000008      /* From udp_conn_cache */
  85   86  #define IPCL_RAWIPCONN          0x00000010      /* From rawip_conn_cache */
  86   87  #define IPCL_RTSCONN            0x00000020      /* From rts_conn_cache */
  87   88  /* Unused                       0x00000040 */
  88   89  #define IPCL_IPTUN              0x00000080      /* iptun module above us */
  89   90  
  90   91  #define IPCL_NONSTR             0x00001000      /* A non-STREAMS socket */
  91   92  /* Unused                       0x10000000 */
  92   93  
  93   94  #define IPCL_REMOVED            0x00000100
  94   95  #define IPCL_REUSED             0x00000200
  95   96  
  96   97  #define IPCL_IS_CONNECTED(connp)                                        \
  97   98          ((connp)->conn_flags & IPCL_CONNECTED)
  98   99  
  99  100  #define IPCL_IS_BOUND(connp)                                            \
 100  101          ((connp)->conn_flags & IPCL_BOUND)
 101  102  
 102  103  /*
 103  104   * Can't use conn_proto since we need to tell difference
 104  105   * between a real TCP socket and a SOCK_RAW, IPPROTO_TCP.
 105  106   */
 106  107  #define IPCL_IS_TCP(connp)                                              \
 107  108          ((connp)->conn_flags & IPCL_TCPCONN)
 108  109  
 109  110  #define IPCL_IS_SCTP(connp)                                             \
 110  111          ((connp)->conn_flags & IPCL_SCTPCONN)
 111  112  
 112  113  #define IPCL_IS_UDP(connp)                                              \
 113  114          ((connp)->conn_flags & IPCL_UDPCONN)
 114  115  
 115  116  #define IPCL_IS_RAWIP(connp)                                            \
 116  117          ((connp)->conn_flags & IPCL_RAWIPCONN)
 117  118  
 118  119  #define IPCL_IS_RTS(connp)                                              \
 119  120          ((connp)->conn_flags & IPCL_RTSCONN)
 120  121  
 121  122  #define IPCL_IS_IPTUN(connp)                                            \
 122  123          ((connp)->conn_flags & IPCL_IPTUN)
 123  124  
 124  125  #define IPCL_IS_NONSTR(connp)   ((connp)->conn_flags & IPCL_NONSTR)
 125  126  
 126  127  typedef struct connf_s connf_t;
 127  128  
 128  129  typedef struct
 129  130  {
 130  131          int     ctb_depth;
 131  132  #define CONN_STACK_DEPTH        15
 132  133          pc_t    ctb_stack[CONN_STACK_DEPTH];
 133  134  } conn_trace_t;
 134  135  
 135  136  typedef struct ip_helper_minor_info_s {
 136  137          dev_t   ip_minfo_dev;           /* Device */
 137  138          vmem_t  *ip_minfo_arena;        /* Arena */
 138  139  } ip_helper_minfo_t;
 139  140  
 140  141  /*
 141  142   * ip helper stream info
 142  143   */
 143  144  typedef struct ip_helper_stream_info_s {
 144  145          ldi_handle_t            iphs_handle;
 145  146          queue_t                 *iphs_rq;
 146  147          queue_t                 *iphs_wq;
 147  148          ip_helper_minfo_t       *iphs_minfo;
 148  149  } ip_helper_stream_info_t;
 149  150  
 150  151  /*
 151  152   * Mandatory Access Control mode, in conn_t's conn_mac_mode field.
 152  153   *      CONN_MAC_DEFAULT: strict enforcement of MAC.
 153  154   *      CONN_MAC_AWARE:   allows communications between unlabeled systems
 154  155   *                        and privileged daemons
 155  156   *      CONN_MAC_IMPLICIT: allows communications without explicit labels
 156  157   *                         on the wire with privileged daemons.
 157  158   *
 158  159   * CONN_MAC_IMPLICIT is intended specifically for labeled IPsec key management
 159  160   * in networks which don't pass CIPSO-labeled packets.
 160  161   */
 161  162  #define CONN_MAC_DEFAULT 0
 162  163  #define CONN_MAC_AWARE 1
 163  164  #define CONN_MAC_IMPLICIT 2
 164  165  
 165  166  /*
 166  167   * conn receive ancillary definition.
 167  168   *
 168  169   * These are the set of socket options that make the receive side
 169  170   * potentially pass up ancillary data items.
 170  171   * We have a union with an integer so that we can quickly check whether
 171  172   * any ancillary data items need to be added.
 172  173   */
 173  174  typedef struct crb_s {
 174  175          union {
 175  176                  uint32_t        crbu_all;
 176  177                  struct {
 177  178                          uint32_t
 178  179          crbb_recvdstaddr : 1,           /* IP_RECVDSTADDR option */
 179  180          crbb_recvopts : 1,              /* IP_RECVOPTS option */
 180  181          crbb_recvif : 1,                /* IP_RECVIF option */
 181  182          crbb_recvslla : 1,              /* IP_RECVSLLA option */
 182  183  
 183  184          crbb_recvttl : 1,               /* IP_RECVTTL option */
 184  185          crbb_ip_recvpktinfo : 1,        /* IP*_RECVPKTINFO option  */
 185  186          crbb_ipv6_recvhoplimit : 1,     /* IPV6_RECVHOPLIMIT option */
 186  187          crbb_ipv6_recvhopopts : 1,      /* IPV6_RECVHOPOPTS option */
 187  188  
 188  189          crbb_ipv6_recvdstopts : 1,      /* IPV6_RECVDSTOPTS option */
 189  190          crbb_ipv6_recvrthdr : 1,        /* IPV6_RECVRTHDR option */
 190  191          crbb_old_ipv6_recvdstopts : 1,  /* old form of IPV6_DSTOPTS */
 191  192          crbb_ipv6_recvrthdrdstopts : 1, /* IPV6_RECVRTHDRDSTOPTS */
 192  193  
 193  194          crbb_ipv6_recvtclass : 1,       /* IPV6_RECVTCLASS */
 194  195          crbb_recvucred : 1,             /* IP_RECVUCRED option */
 195  196          crbb_timestamp : 1;             /* SO_TIMESTAMP "socket" option */
 196  197  
 197  198                  } crbb;
 198  199          } crbu;
 199  200  } crb_t;
 200  201  
 201  202  #define crb_all                         crbu.crbu_all
 202  203  #define crb_recvdstaddr                 crbu.crbb.crbb_recvdstaddr
 203  204  #define crb_recvopts                    crbu.crbb.crbb_recvopts
 204  205  #define crb_recvif                      crbu.crbb.crbb_recvif
 205  206  #define crb_recvslla                    crbu.crbb.crbb_recvslla
 206  207  #define crb_recvttl                     crbu.crbb.crbb_recvttl
 207  208  #define crb_ip_recvpktinfo              crbu.crbb.crbb_ip_recvpktinfo
 208  209  #define crb_ipv6_recvhoplimit           crbu.crbb.crbb_ipv6_recvhoplimit
 209  210  #define crb_ipv6_recvhopopts            crbu.crbb.crbb_ipv6_recvhopopts
 210  211  #define crb_ipv6_recvdstopts            crbu.crbb.crbb_ipv6_recvdstopts
 211  212  #define crb_ipv6_recvrthdr              crbu.crbb.crbb_ipv6_recvrthdr
 212  213  #define crb_old_ipv6_recvdstopts        crbu.crbb.crbb_old_ipv6_recvdstopts
 213  214  #define crb_ipv6_recvrthdrdstopts       crbu.crbb.crbb_ipv6_recvrthdrdstopts
 214  215  #define crb_ipv6_recvtclass             crbu.crbb.crbb_ipv6_recvtclass
 215  216  #define crb_recvucred                   crbu.crbb.crbb_recvucred
 216  217  #define crb_timestamp                   crbu.crbb.crbb_timestamp
 217  218  
 218  219  /*
 219  220   * The initial fields in the conn_t are setup by the kmem_cache constructor,
 220  221   * and are preserved when it is freed. Fields after that are bzero'ed when
 221  222   * the conn_t is freed.
 222  223   *
 223  224   * Much of the conn_t is protected by conn_lock.
 224  225   *
 225  226   * conn_lock is also used by some ULPs (like UDP and RAWIP) to protect
 226  227   * their state.
 227  228   */
 228  229  struct conn_s {
 229  230          kmutex_t        conn_lock;
 230  231          uint32_t        conn_ref;               /* Reference counter */
 231  232          uint32_t        conn_flags;             /* Conn Flags */
 232  233  
 233  234          union {
 234  235                  tcp_t           *cp_tcp;        /* Pointer to the tcp struct */
 235  236                  struct udp_s    *cp_udp;        /* Pointer to the udp struct */
 236  237                  struct icmp_s   *cp_icmp;       /* Pointer to rawip struct */
 237  238                  struct rts_s    *cp_rts;        /* Pointer to rts struct */
 238  239                  struct iptun_s  *cp_iptun;      /* Pointer to iptun_t */
 239  240                  struct sctp_s   *cp_sctp;       /* For IPCL_SCTPCONN */
 240  241                  void            *cp_priv;
 241  242          } conn_proto_priv;
 242  243  #define conn_tcp        conn_proto_priv.cp_tcp
 243  244  #define conn_udp        conn_proto_priv.cp_udp
 244  245  #define conn_icmp       conn_proto_priv.cp_icmp
 245  246  #define conn_rts        conn_proto_priv.cp_rts
 246  247  #define conn_iptun      conn_proto_priv.cp_iptun
 247  248  #define conn_sctp       conn_proto_priv.cp_sctp
 248  249  #define conn_priv       conn_proto_priv.cp_priv
 249  250  
 250  251          kcondvar_t      conn_cv;
 251  252          uint8_t         conn_proto;             /* protocol type */
 252  253  
 253  254          edesc_rpf       conn_recv;              /* Pointer to recv routine */
 254  255          edesc_rpf       conn_recvicmp;          /* For ICMP error */
 255  256          edesc_vpf       conn_verifyicmp;        /* Verify ICMP error */
 256  257  
 257  258          ip_xmit_attr_t  *conn_ixa;              /* Options if no ancil data */
 258  259  
 259  260          /* Fields after this are bzero'ed when the conn_t is freed. */
 260  261  #define conn_start_clr  conn_recv_ancillary
 261  262  
 262  263          /* Options for receive-side ancillary data */
 263  264          crb_t           conn_recv_ancillary;
 264  265  
 265  266          squeue_t        *conn_sqp;              /* Squeue for processing */
 266  267          uint_t          conn_state_flags;       /* IP state flags */
 267  268  
 268  269          int             conn_lingertime;        /* linger time (in seconds) */
 269  270  
 270  271          unsigned int
 271  272                  conn_on_sqp : 1,                /* Conn is being processed */
 272  273                  conn_linger : 1,                /* SO_LINGER state */
 273  274                  conn_useloopback : 1,           /* SO_USELOOPBACK state */
 274  275                  conn_broadcast : 1,             /* SO_BROADCAST state */
 275  276  
 276  277                  conn_reuseaddr : 1,             /* SO_REUSEADDR state */
 277  278                  conn_keepalive : 1,             /* SO_KEEPALIVE state */
 278  279                  conn_multi_router : 1,          /* Wants all multicast pkts */
 279  280                  conn_unspec_src : 1,            /* IP_UNSPEC_SRC */
 280  281  
 281  282                  conn_policy_cached : 1,         /* Is policy cached/latched ? */
 282  283                  conn_in_enforce_policy : 1,     /* Enforce Policy on inbound */
 283  284                  conn_out_enforce_policy : 1,    /* Enforce Policy on outbound */
 284  285                  conn_debug : 1,                 /* SO_DEBUG */
 285  286  
  
    | 
      ↓ open down ↓ | 
    252 lines elided | 
    
      ↑ open up ↑ | 
  
 286  287                  conn_ipv6_v6only : 1,           /* IPV6_V6ONLY */
 287  288                  conn_oobinline : 1,             /* SO_OOBINLINE state */
 288  289                  conn_dgram_errind : 1,          /* SO_DGRAM_ERRIND state */
 289  290                  conn_exclbind : 1,              /* SO_EXCLBIND state */
 290  291  
 291  292                  conn_mdt_ok : 1,                /* MDT is permitted */
 292  293                  conn_allzones : 1,              /* SO_ALLZONES */
 293  294                  conn_ipv6_recvpathmtu : 1,      /* IPV6_RECVPATHMTU */
 294  295                  conn_mcbc_bind : 1,             /* Bound to multi/broadcast */
 295  296  
 296      -                conn_pad_to_bit_31 : 12;
      297 +                conn_reuseport : 1,             /* SO_REUSEPORT state */
      298 +                conn_pad_to_bit_31 : 11;
 297  299  
 298  300          boolean_t       conn_blocked;           /* conn is flow-controlled */
 299  301  
 300  302          squeue_t        *conn_initial_sqp;      /* Squeue at open time */
 301  303          squeue_t        *conn_final_sqp;        /* Squeue after connect */
 302  304          ill_t           *conn_dhcpinit_ill;     /* IP_DHCPINIT_IF */
 303  305          ipsec_latch_t   *conn_latch;            /* latched IDS */
 304  306          struct ipsec_policy_s   *conn_latch_in_policy; /* latched policy (in) */
 305  307          struct ipsec_action_s   *conn_latch_in_action; /* latched action (in) */
 306  308          uint_t          conn_bound_if;          /* IP*_BOUND_IF */
 307  309          queue_t         *conn_rq;               /* Read queue */
 308  310          queue_t         *conn_wq;               /* Write queue */
 309  311          dev_t           conn_dev;               /* Minor number */
 310  312          vmem_t          *conn_minor_arena;      /* Minor arena */
 311  313          ip_helper_stream_info_t *conn_helper_info;
 312  314  
 313  315          cred_t          *conn_cred;             /* Credentials */
 314  316          pid_t           conn_cpid;              /* pid from open/connect */
 315  317          uint64_t        conn_open_time;         /* time when this was opened */
 316  318  
 317  319          connf_t         *conn_g_fanout;         /* Global Hash bucket head */
 318  320          struct conn_s   *conn_g_next;           /* Global Hash chain next */
 319  321          struct conn_s   *conn_g_prev;           /* Global Hash chain prev */
 320  322          struct ipsec_policy_head_s *conn_policy; /* Configured policy */
 321  323          in6_addr_t      conn_bound_addr_v6;     /* Address in bind() */
 322  324  #define conn_bound_addr_v4      V4_PART_OF_V6(conn_bound_addr_v6)
 323  325          connf_t         *conn_fanout;           /* Hash bucket we're part of */
 324  326          struct conn_s   *conn_next;             /* Hash chain next */
 325  327          struct conn_s   *conn_prev;             /* Hash chain prev */
 326  328  
 327  329          struct {
 328  330                  in6_addr_t connua_laddr;        /* Local address - match */
 329  331                  in6_addr_t connua_faddr;        /* Remote address */
 330  332          } connua_v6addr;
 331  333  #define conn_laddr_v4   V4_PART_OF_V6(connua_v6addr.connua_laddr)
 332  334  #define conn_faddr_v4   V4_PART_OF_V6(connua_v6addr.connua_faddr)
 333  335  #define conn_laddr_v6   connua_v6addr.connua_laddr
 334  336  #define conn_faddr_v6   connua_v6addr.connua_faddr
 335  337          in6_addr_t      conn_saddr_v6;          /* Local address - source */
 336  338  #define conn_saddr_v4   V4_PART_OF_V6(conn_saddr_v6)
 337  339  
 338  340          union {
 339  341                  /* Used for classifier match performance */
 340  342                  uint32_t                connu_ports2;
 341  343                  struct {
 342  344                          in_port_t       connu_fport;    /* Remote port */
 343  345                          in_port_t       connu_lport;    /* Local port */
 344  346                  } connu_ports;
 345  347          } u_port;
 346  348  #define conn_fport      u_port.connu_ports.connu_fport
 347  349  #define conn_lport      u_port.connu_ports.connu_lport
 348  350  #define conn_ports      u_port.connu_ports2
 349  351  
 350  352          uint_t          conn_incoming_ifindex;  /* IP{,V6}_BOUND_IF, scopeid */
 351  353          ill_t           *conn_oper_pending_ill; /* pending shared ioctl */
 352  354  
 353  355          krwlock_t       conn_ilg_lock;          /* Protects conn_ilg_* */
 354  356          ilg_t           *conn_ilg;              /* Group memberships */
 355  357  
 356  358          kcondvar_t      conn_refcv;             /* For conn_oper_pending_ill */
 357  359  
 358  360          struct conn_s   *conn_drain_next;       /* Next conn in drain list */
 359  361          struct conn_s   *conn_drain_prev;       /* Prev conn in drain list */
 360  362          idl_t           *conn_idl;              /* Ptr to the drain list head */
 361  363          mblk_t          *conn_ipsec_opt_mp;     /* ipsec option mblk */
 362  364          zoneid_t        conn_zoneid;            /* zone connection is in */
 363  365          int             conn_rtaware;           /* RT_AWARE sockopt value */
 364  366          kcondvar_t      conn_sq_cv;             /* For non-STREAMS socket IO */
 365  367          sock_upcalls_t  *conn_upcalls;          /* Upcalls to sockfs */
 366  368          sock_upper_handle_t conn_upper_handle;  /* Upper handle: sonode * */
 367  369  
 368  370          unsigned int
 369  371                  conn_mlp_type : 2,              /* mlp_type_t; tsol/tndb.h */
 370  372                  conn_anon_mlp : 1,              /* user wants anon MLP */
 371  373                  conn_anon_port : 1,             /* user bound anonymously */
 372  374  
 373  375                  conn_mac_mode : 2,              /* normal/loose/implicit MAC */
 374  376                  conn_anon_priv_bind : 1,        /* *_ANON_PRIV_BIND state */
 375  377                  conn_zone_is_global : 1,        /* GLOBAL_ZONEID */
 376  378                  conn_isvrrp : 1,                /* VRRP control socket */
 377  379                  conn_spare : 23;
 378  380  
 379  381          boolean_t       conn_flow_cntrld;
 380  382          netstack_t      *conn_netstack; /* Corresponds to a netstack_hold */
 381  383  
 382  384          /*
 383  385           * IP format that packets received for this struct should use.
 384  386           * Value can be IP4_VERSION or IPV6_VERSION.
 385  387           * The sending version is encoded using IXAF_IS_IPV4.
 386  388           */
 387  389          ushort_t        conn_ipversion;
 388  390  
 389  391          /* Written to only once at the time of opening the endpoint */
 390  392          sa_family_t     conn_family;            /* Family from socket() call */
 391  393          uint_t          conn_so_type;           /* Type from socket() call */
 392  394  
 393  395          uint_t          conn_sndbuf;            /* SO_SNDBUF state */
 394  396          uint_t          conn_rcvbuf;            /* SO_RCVBUF state */
 395  397          uint_t          conn_wroff;             /* Current write offset */
 396  398  
 397  399          uint_t          conn_sndlowat;          /* Send buffer low water mark */
 398  400          uint_t          conn_rcvlowat;          /* Recv buffer low water mark */
 399  401  
 400  402          uint8_t         conn_default_ttl;       /* Default TTL/hoplimit */
 401  403  
 402  404          uint32_t        conn_flowinfo;  /* Connected flow id and tclass */
 403  405  
 404  406          /*
 405  407           * The most recent address for sendto. Initially set to zero
 406  408           * which is always different than then the destination address
 407  409           * since the send interprets zero as the loopback address.
 408  410           */
 409  411          in6_addr_t      conn_v6lastdst;
 410  412  #define conn_v4lastdst  V4_PART_OF_V6(conn_v6lastdst)
 411  413          ushort_t        conn_lastipversion;
 412  414          in_port_t       conn_lastdstport;
 413  415          uint32_t        conn_lastflowinfo;      /* IPv6-only */
 414  416          uint_t          conn_lastscopeid;       /* IPv6-only */
 415  417          uint_t          conn_lastsrcid;         /* Only for AF_INET6 */
 416  418          /*
 417  419           * When we are not connected conn_saddr might be unspecified.
 418  420           * We track the source that was used with conn_v6lastdst here.
 419  421           */
 420  422          in6_addr_t      conn_v6lastsrc;
 421  423  #define conn_v4lastsrc  V4_PART_OF_V6(conn_v6lastsrc)
 422  424  
 423  425          /* Templates for transmitting packets */
 424  426          ip_pkt_t        conn_xmit_ipp;          /* Options if no ancil data */
 425  427  
 426  428          /*
 427  429           * Header template - conn_ht_ulp is a pointer into conn_ht_iphc.
 428  430           * Note that ixa_ip_hdr_length indicates the offset of ht_ulp in
 429  431           * ht_iphc
 430  432           *
 431  433           * The header template is maintained for connected endpoints (and
 432  434           * updated when sticky options are changed) and also for the lastdst.
 433  435           * There is no conflict between those usages since SOCK_DGRAM and
 434  436           * SOCK_RAW can not be used to specify a destination address (with
 435  437           * sendto/sendmsg) if the socket has been connected.
 436  438           */
 437  439          uint8_t         *conn_ht_iphc;          /* Start of IP header */
 438  440          uint_t          conn_ht_iphc_allocated; /* Allocated buffer size */
 439  441          uint_t          conn_ht_iphc_len;       /* IP+ULP size */
 440  442          uint8_t         *conn_ht_ulp;           /* Upper-layer header */
 441  443          uint_t          conn_ht_ulp_len;        /* ULP header len */
 442  444  
 443  445          /* Checksum to compensate for source routed packets. Host byte order */
 444  446          uint32_t        conn_sum;
 445  447  
 446  448          uint32_t        conn_ioctlref;          /* ioctl ref count */
 447  449  #ifdef CONN_DEBUG
 448  450  #define CONN_TRACE_MAX  10
 449  451          int             conn_trace_last;        /* ndx of last used tracebuf */
 450  452          conn_trace_t    conn_trace_buf[CONN_TRACE_MAX];
 451  453  #endif
 452  454  };
 453  455  
 454  456  /*
 455  457   * connf_t - connection fanout data.
 456  458   *
 457  459   * The hash tables and their linkage (conn_t.{hashnextp, hashprevp} are
 458  460   * protected by the per-bucket lock. Each conn_t inserted in the list
 459  461   * points back at the connf_t that heads the bucket.
 460  462   */
 461  463  struct connf_s {
 462  464          struct conn_s   *connf_head;
 463  465          kmutex_t        connf_lock;
 464  466  };
 465  467  
 466  468  #define CONN_INC_REF(connp)     {                               \
 467  469          mutex_enter(&(connp)->conn_lock);                       \
 468  470          DTRACE_PROBE1(conn__inc__ref, conn_t *, connp);         \
 469  471          ASSERT(conn_trace_ref(connp));                          \
 470  472          (connp)->conn_ref++;                                    \
 471  473          ASSERT((connp)->conn_ref != 0);                         \
 472  474          mutex_exit(&(connp)->conn_lock);                        \
 473  475  }
 474  476  
 475  477  #define CONN_INC_REF_LOCKED(connp)      {                       \
 476  478          DTRACE_PROBE1(conn__inc__ref, conn_t *, connp);         \
 477  479          ASSERT(MUTEX_HELD(&(connp)->conn_lock));                \
 478  480          ASSERT(conn_trace_ref(connp));                          \
 479  481          (connp)->conn_ref++;                                    \
 480  482          ASSERT((connp)->conn_ref != 0);                         \
 481  483  }
 482  484  
 483  485  #define CONN_DEC_REF(connp)     {                                       \
 484  486          mutex_enter(&(connp)->conn_lock);                               \
 485  487          DTRACE_PROBE1(conn__dec__ref, conn_t *, connp);                 \
 486  488          /*                                                              \
 487  489           * The squeue framework always does a CONN_DEC_REF after return \
 488  490           * from TCP. Hence the refcnt must be at least 2 if conn_on_sqp \
 489  491           * is B_TRUE and conn_ref is being decremented. This is to      \
 490  492           * account for the mblk being currently processed.              \
 491  493           */                                                             \
 492  494          if ((connp)->conn_ref == 0 ||                                   \
 493  495              ((connp)->conn_ref == 1 && (connp)->conn_on_sqp))           \
 494  496                  cmn_err(CE_PANIC, "CONN_DEC_REF: connp(%p) has ref "    \
 495  497                          "= %d\n", (void *)(connp), (connp)->conn_ref);  \
 496  498          ASSERT(conn_untrace_ref(connp));                                \
 497  499          (connp)->conn_ref--;                                            \
 498  500          if ((connp)->conn_ref == 0) {                                   \
 499  501                  /* Refcnt can't increase again, safe to drop lock */    \
 500  502                  mutex_exit(&(connp)->conn_lock);                        \
 501  503                  ipcl_conn_destroy(connp);                               \
 502  504          } else {                                                        \
 503  505                  cv_broadcast(&(connp)->conn_cv);                        \
 504  506                  mutex_exit(&(connp)->conn_lock);                        \
 505  507          }                                                               \
 506  508  }
 507  509  
 508  510  /*
 509  511   * For use with subsystems within ip which use ALL_ZONES as a wildcard
 510  512   */
 511  513  #define IPCL_ZONEID(connp)                                              \
 512  514          ((connp)->conn_allzones ? ALL_ZONES : (connp)->conn_zoneid)
 513  515  
 514  516  /*
 515  517   * For matching between a conn_t and a zoneid.
 516  518   */
 517  519  #define IPCL_ZONE_MATCH(connp, zoneid)                                  \
 518  520          (((connp)->conn_allzones) ||                                    \
 519  521              ((zoneid) == ALL_ZONES) ||                                  \
 520  522              (connp)->conn_zoneid == (zoneid))
 521  523  
 522  524  /*
 523  525   * On a labeled system, we must treat bindings to ports
 524  526   * on shared IP addresses by sockets with MAC exemption
 525  527   * privilege as being in all zones, as there's
 526  528   * otherwise no way to identify the right receiver.
 527  529   */
 528  530  
 529  531  #define IPCL_CONNS_MAC(conn1, conn2)                                    \
 530  532          (((conn1)->conn_mac_mode != CONN_MAC_DEFAULT) ||                \
 531  533          ((conn2)->conn_mac_mode != CONN_MAC_DEFAULT))
 532  534  
 533  535  #define IPCL_BIND_ZONE_MATCH(conn1, conn2)                              \
 534  536          (IPCL_CONNS_MAC(conn1, conn2) ||                                \
 535  537          IPCL_ZONE_MATCH(conn1, conn2->conn_zoneid) ||                   \
 536  538          IPCL_ZONE_MATCH(conn2, conn1->conn_zoneid))
 537  539  
 538  540  
 539  541  #define _IPCL_V4_MATCH(v6addr, v4addr)  \
 540  542          (V4_PART_OF_V6((v6addr)) == (v4addr) && IN6_IS_ADDR_V4MAPPED(&(v6addr)))
 541  543  
 542  544  #define _IPCL_V4_MATCH_ANY(addr)        \
 543  545          (IN6_IS_ADDR_V4MAPPED_ANY(&(addr)) || IN6_IS_ADDR_UNSPECIFIED(&(addr)))
 544  546  
 545  547  
 546  548  /*
 547  549   * IPCL_PROTO_MATCH() and IPCL_PROTO_MATCH_V6() only matches conns with
 548  550   * the specified ira_zoneid or conn_allzones by calling conn_wantpacket.
 549  551   */
 550  552  #define IPCL_PROTO_MATCH(connp, ira, ipha)                              \
 551  553          ((((connp)->conn_laddr_v4 == INADDR_ANY) ||                     \
 552  554          (((connp)->conn_laddr_v4 == ((ipha)->ipha_dst)) &&              \
 553  555              (((connp)->conn_faddr_v4 == INADDR_ANY) ||                  \
 554  556          ((connp)->conn_faddr_v4 == ((ipha)->ipha_src))))) &&            \
 555  557          conn_wantpacket((connp), (ira), (ipha)))
 556  558  
 557  559  #define IPCL_PROTO_MATCH_V6(connp, ira, ip6h)                           \
 558  560          ((IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6) ||           \
 559  561          (IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &((ip6h)->ip6_dst)) &&   \
 560  562          (IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_faddr_v6) ||                  \
 561  563          IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, &((ip6h)->ip6_src))))) && \
 562  564          (conn_wantpacket_v6((connp), (ira), (ip6h))))
 563  565  
 564  566  #define IPCL_CONN_HASH(src, ports, ipst)                                \
 565  567          ((unsigned)(ntohl((src)) ^ ((ports) >> 24) ^ ((ports) >> 16) ^  \
 566  568          ((ports) >> 8) ^ (ports)) % (ipst)->ips_ipcl_conn_fanout_size)
 567  569  
 568  570  #define IPCL_CONN_HASH_V6(src, ports, ipst)                             \
 569  571          IPCL_CONN_HASH(V4_PART_OF_V6((src)), (ports), (ipst))
 570  572  
 571  573  #define IPCL_CONN_MATCH(connp, proto, src, dst, ports)                  \
 572  574          ((connp)->conn_proto == (proto) &&                              \
 573  575                  (connp)->conn_ports == (ports) &&                       \
 574  576                  _IPCL_V4_MATCH((connp)->conn_faddr_v6, (src)) &&        \
 575  577                  _IPCL_V4_MATCH((connp)->conn_laddr_v6, (dst)) &&        \
 576  578                  !(connp)->conn_ipv6_v6only)
 577  579  
 578  580  #define IPCL_CONN_MATCH_V6(connp, proto, src, dst, ports)               \
 579  581          ((connp)->conn_proto == (proto) &&                              \
 580  582                  (connp)->conn_ports == (ports) &&                       \
 581  583                  IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, &(src)) &&  \
 582  584                  IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(dst)))
 583  585  
 584  586  #define IPCL_PORT_HASH(port, size) \
 585  587          ((((port) >> 8) ^ (port)) & ((size) - 1))
 586  588  
 587  589  #define IPCL_BIND_HASH(lport, ipst)                                     \
 588  590          ((unsigned)(((lport) >> 8) ^ (lport)) % \
 589  591              (ipst)->ips_ipcl_bind_fanout_size)
 590  592  
 591  593  #define IPCL_BIND_MATCH(connp, proto, laddr, lport)                     \
 592  594          ((connp)->conn_proto == (proto) &&                              \
 593  595                  (connp)->conn_lport == (lport) &&                       \
 594  596                  (_IPCL_V4_MATCH_ANY((connp)->conn_laddr_v6) ||          \
 595  597                  _IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr))) &&     \
 596  598                  !(connp)->conn_ipv6_v6only)
 597  599  
 598  600  #define IPCL_BIND_MATCH_V6(connp, proto, laddr, lport)                  \
 599  601          ((connp)->conn_proto == (proto) &&                              \
 600  602                  (connp)->conn_lport == (lport) &&                       \
 601  603                  (IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(laddr)) || \
 602  604                  IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6)))
 603  605  
 604  606  /*
 605  607   * We compare conn_laddr since it captures both connected and a bind to
 606  608   * a multicast or broadcast address.
 607  609   * The caller needs to match the zoneid and also call conn_wantpacket
 608  610   * for multicast, broadcast, or when conn_incoming_ifindex is set.
 609  611   */
 610  612  #define IPCL_UDP_MATCH(connp, lport, laddr, fport, faddr)               \
 611  613          (((connp)->conn_lport == (lport)) &&                            \
 612  614          ((_IPCL_V4_MATCH_ANY((connp)->conn_laddr_v6) ||                 \
 613  615          (_IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr)) &&             \
 614  616          (_IPCL_V4_MATCH_ANY((connp)->conn_faddr_v6) ||                  \
 615  617          (_IPCL_V4_MATCH((connp)->conn_faddr_v6, (faddr)) &&             \
 616  618          (connp)->conn_fport == (fport)))))) &&                          \
 617  619          !(connp)->conn_ipv6_v6only)
 618  620  
 619  621  /*
 620  622   * We compare conn_laddr since it captures both connected and a bind to
 621  623   * a multicast or broadcast address.
 622  624   * The caller needs to match the zoneid and also call conn_wantpacket_v6
 623  625   * for multicast or when conn_incoming_ifindex is set.
 624  626   */
 625  627  #define IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)    \
 626  628          (((connp)->conn_lport == (lport)) &&                    \
 627  629          (IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6) ||    \
 628  630          (IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(laddr)) &&       \
 629  631          (IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_faddr_v6) ||    \
 630  632          (IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, &(faddr)) &&       \
 631  633          (connp)->conn_fport == (fport))))))
 632  634  
 633  635  #define IPCL_IPTUN_HASH(laddr, faddr)                                   \
 634  636          ((ntohl(laddr) ^ ((ntohl(faddr) << 24) | (ntohl(faddr) >> 8))) % \
 635  637          ipcl_iptun_fanout_size)
 636  638  
 637  639  #define IPCL_IPTUN_HASH_V6(laddr, faddr)                                \
 638  640          IPCL_IPTUN_HASH((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^ \
 639  641              (faddr)->s6_addr32[2] ^ (faddr)->s6_addr32[3],              \
 640  642              (faddr)->s6_addr32[0] ^ (faddr)->s6_addr32[1] ^             \
 641  643              (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3])
 642  644  
 643  645  #define IPCL_IPTUN_MATCH(connp, laddr, faddr)                   \
 644  646          (_IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr)) &&     \
 645  647          _IPCL_V4_MATCH((connp)->conn_faddr_v6, (faddr)))
 646  648  
 647  649  #define IPCL_IPTUN_MATCH_V6(connp, laddr, faddr)                \
 648  650          (IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, (laddr)) &&        \
 649  651          IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, (faddr)))
 650  652  
 651  653  #define IPCL_UDP_HASH(lport, ipst)      \
 652  654          IPCL_PORT_HASH(lport, (ipst)->ips_ipcl_udp_fanout_size)
 653  655  
 654  656  #define CONN_G_HASH_SIZE        1024
 655  657  
 656  658  /* Raw socket hash function. */
 657  659  #define IPCL_RAW_HASH(lport, ipst)      \
 658  660          IPCL_PORT_HASH(lport, (ipst)->ips_ipcl_raw_fanout_size)
 659  661  
 660  662  /*
 661  663   * This is similar to IPCL_BIND_MATCH except that the local port check
 662  664   * is changed to a wildcard port check.
 663  665   * We compare conn_laddr since it captures both connected and a bind to
 664  666   * a multicast or broadcast address.
 665  667   */
 666  668  #define IPCL_RAW_MATCH(connp, proto, laddr)                     \
 667  669          ((connp)->conn_proto == (proto) &&                      \
 668  670          (connp)->conn_lport == 0 &&                             \
 669  671          (_IPCL_V4_MATCH_ANY((connp)->conn_laddr_v6) ||          \
 670  672          _IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr))))
 671  673  
 672  674  #define IPCL_RAW_MATCH_V6(connp, proto, laddr)                  \
 673  675          ((connp)->conn_proto == (proto) &&                      \
 674  676          (connp)->conn_lport == 0 &&                             \
 675  677          (IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6) ||    \
 676  678          IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(laddr))))
 677  679  
 678  680  /* Function prototypes */
 679  681  extern void ipcl_g_init(void);
 680  682  extern void ipcl_init(ip_stack_t *);
 681  683  extern void ipcl_g_destroy(void);
 682  684  extern void ipcl_destroy(ip_stack_t *);
 683  685  extern conn_t *ipcl_conn_create(uint32_t, int, netstack_t *);
 684  686  extern void ipcl_conn_destroy(conn_t *);
 685  687  
 686  688  void ipcl_hash_insert_wildcard(connf_t *, conn_t *);
 687  689  void ipcl_hash_remove(conn_t *);
 688  690  void ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp);
 689  691  
 690  692  extern int      ipcl_bind_insert(conn_t *);
 691  693  extern int      ipcl_bind_insert_v4(conn_t *);
 692  694  extern int      ipcl_bind_insert_v6(conn_t *);
 693  695  extern int      ipcl_conn_insert(conn_t *);
 694  696  extern int      ipcl_conn_insert_v4(conn_t *);
 695  697  extern int      ipcl_conn_insert_v6(conn_t *);
 696  698  extern conn_t   *ipcl_get_next_conn(connf_t *, conn_t *, uint32_t);
 697  699  
 698  700  conn_t *ipcl_classify_v4(mblk_t *, uint8_t, uint_t, ip_recv_attr_t *,
 699  701              ip_stack_t *);
 700  702  conn_t *ipcl_classify_v6(mblk_t *, uint8_t, uint_t, ip_recv_attr_t *,
 701  703              ip_stack_t *);
 702  704  conn_t *ipcl_classify(mblk_t *, ip_recv_attr_t *, ip_stack_t *);
 703  705  conn_t *ipcl_classify_raw(mblk_t *, uint8_t, uint32_t, ipha_t *,
 704  706      ip6_t *, ip_recv_attr_t *, ip_stack_t *);
 705  707  conn_t *ipcl_iptun_classify_v4(ipaddr_t *, ipaddr_t *, ip_stack_t *);
 706  708  conn_t *ipcl_iptun_classify_v6(in6_addr_t *, in6_addr_t *, ip_stack_t *);
 707  709  void    ipcl_globalhash_insert(conn_t *);
 708  710  void    ipcl_globalhash_remove(conn_t *);
 709  711  void    ipcl_walk(pfv_t, void *, ip_stack_t *);
 710  712  conn_t  *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack_t *);
 711  713  conn_t  *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
 712  714              ip_stack_t *);
 713  715  conn_t  *ipcl_lookup_listener_v4(uint16_t, ipaddr_t, zoneid_t, ip_stack_t *);
 714  716  conn_t  *ipcl_lookup_listener_v6(uint16_t, in6_addr_t *, uint_t, zoneid_t,
 715  717              ip_stack_t *);
 716  718  int     conn_trace_ref(conn_t *);
 717  719  int     conn_untrace_ref(conn_t *);
 718  720  void    ipcl_conn_cleanup(conn_t *);
 719  721  extern uint_t   conn_recvancillary_size(conn_t *, crb_t, ip_recv_attr_t *,
 720  722      mblk_t *, ip_pkt_t *);
 721  723  extern void     conn_recvancillary_add(conn_t *, crb_t, ip_recv_attr_t *,
 722  724      ip_pkt_t *, uchar_t *, uint_t);
 723  725  conn_t *ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *, ipha_t *, tcpha_t *,
 724  726              ip_stack_t *);
 725  727  conn_t *ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *, ip6_t *, tcpha_t *,
 726  728              ip_stack_t *);
 727  729  
 728  730  extern int ip_create_helper_stream(conn_t *, ldi_ident_t);
 729  731  extern void ip_free_helper_stream(conn_t *);
 730  732  extern int      ip_helper_stream_setup(queue_t *, dev_t *, int, int,
 731  733      cred_t *, boolean_t);
 732  734  
 733  735  #ifdef  __cplusplus
 734  736  }
 735  737  #endif
 736  738  
 737  739  #endif  /* _INET_IPCLASSIFIER_H */
  
    | 
      ↓ open down ↓ | 
    431 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX