Print this page
Reduce lint
OS-5007 support SO_ATTACH_FILTER on ICMP sockets
Reviewed by: Cody Mello <melloc@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4018 lxbrand support TCP SO_REUSEPORT
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Cody Mello <cody.mello@joyent.com>

@@ -18,10 +18,11 @@
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
  */
 
 /*
  * IP PACKET CLASSIFIER
  *

@@ -866,71 +867,94 @@
         mutex_enter(&(connfp)->connf_lock);                             \
         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);               \
         mutex_exit(&(connfp)->connf_lock);                              \
 }
 
-#define IPCL_HASH_INSERT_BOUND(connfp, connp) {                         \
-        conn_t *pconnp = NULL, *nconnp;                                 \
-        IPCL_HASH_REMOVE((connp));                                      \
-        mutex_enter(&(connfp)->connf_lock);                             \
-        nconnp = (connfp)->connf_head;                                  \
-        while (nconnp != NULL &&                                        \
-            !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) {               \
-                pconnp = nconnp;                                        \
-                nconnp = nconnp->conn_next;                             \
-        }                                                               \
-        if (pconnp != NULL) {                                           \
-                pconnp->conn_next = (connp);                            \
-                (connp)->conn_prev = pconnp;                            \
-        } else {                                                        \
-                (connfp)->connf_head = (connp);                         \
-        }                                                               \
-        if (nconnp != NULL) {                                           \
-                (connp)->conn_next = nconnp;                            \
-                nconnp->conn_prev = (connp);                            \
-        }                                                               \
-        (connp)->conn_fanout = (connfp);                                \
-        (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |   \
-            IPCL_BOUND;                                                 \
-        CONN_INC_REF(connp);                                            \
-        mutex_exit(&(connfp)->connf_lock);                              \
-}
+/*
+ * When inserting bound or wildcard entries into the hash, ordering rules are
+ * used to facilitate timely and correct lookups.  The order is as follows:
+ * 1. Entries bound to a specific address
+ * 2. Entries bound to INADDR_ANY
+ * 3. Entries bound to ADDR_UNSPECIFIED
+ * Entries in a category which share conn_lport (such as those using
+ * SO_REUSEPORT) will be ordered such that the newest inserted is first.
+ */
 
-#define IPCL_HASH_INSERT_WILDCARD(connfp, connp) {                      \
-        conn_t **list, *prev, *next;                                    \
-        boolean_t isv4mapped =                                          \
-            IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6);              \
-        IPCL_HASH_REMOVE((connp));                                      \
-        mutex_enter(&(connfp)->connf_lock);                             \
-        list = &(connfp)->connf_head;                                   \
-        prev = NULL;                                                    \
-        while ((next = *list) != NULL) {                                \
-                if (isv4mapped &&                                       \
-                    IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) &&    \
-                    connp->conn_zoneid == next->conn_zoneid) {          \
-                        (connp)->conn_next = next;                      \
-                        if (prev != NULL)                               \
-                                prev = next->conn_prev;                 \
-                        next->conn_prev = (connp);                      \
-                        break;                                          \
-                }                                                       \
-                list = &next->conn_next;                                \
-                prev = next;                                            \
-        }                                                               \
-        (connp)->conn_prev = prev;                                      \
-        *list = (connp);                                                \
-        (connp)->conn_fanout = (connfp);                                \
-        (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |   \
-            IPCL_BOUND;                                                 \
-        CONN_INC_REF((connp));                                          \
-        mutex_exit(&(connfp)->connf_lock);                              \
+void
+ipcl_hash_insert_bound(connf_t *connfp, conn_t *connp)
+{
+        conn_t *pconnp, *nconnp;
+
+        IPCL_HASH_REMOVE(connp);
+        mutex_enter(&connfp->connf_lock);
+        nconnp = connfp->connf_head;
+        pconnp = NULL;
+        while (nconnp != NULL) {
+                /*
+                 * Walk though entries associated with the fanout until one is
+                 * found which fulfills any of these conditions:
+                 * 1. Listen address of ADDR_ANY/ADDR_UNSPECIFIED
+                 * 2. Listen port the same as connp
+                 */
+                if (_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6) ||
+                    connp->conn_lport == nconnp->conn_lport)
+                        break;
+                pconnp = nconnp;
+                nconnp = nconnp->conn_next;
+        }
+        if (pconnp != NULL) {
+                pconnp->conn_next = connp;
+                connp->conn_prev = pconnp;
+        } else {
+                connfp->connf_head = connp;
+        }
+        if (nconnp != NULL) {
+                connp->conn_next = nconnp;
+                nconnp->conn_prev = connp;
+        }
+        connp->conn_fanout = connfp;
+        connp->conn_flags = (connp->conn_flags & ~IPCL_REMOVED) | IPCL_BOUND;
+        CONN_INC_REF(connp);
+        mutex_exit(&connfp->connf_lock);
 }
 
 void
 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
 {
-        IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+        conn_t *pconnp = NULL, *nconnp;
+        boolean_t isv4mapped = IN6_IS_ADDR_V4MAPPED(&connp->conn_laddr_v6);
+
+        IPCL_HASH_REMOVE(connp);
+        mutex_enter(&connfp->connf_lock);
+        nconnp = connfp->connf_head;
+        pconnp = NULL;
+        while (nconnp != NULL) {
+                if (IN6_IS_ADDR_V4MAPPED_ANY(&nconnp->conn_laddr_v6) &&
+                    isv4mapped && connp->conn_lport == nconnp->conn_lport)
+                        break;
+                if (IN6_IS_ADDR_UNSPECIFIED(&nconnp->conn_laddr_v6) &&
+                    (isv4mapped ||
+                    connp->conn_lport == nconnp->conn_lport))
+                        break;
+
+                pconnp = nconnp;
+                nconnp = nconnp->conn_next;
+        }
+        if (pconnp != NULL) {
+                pconnp->conn_next = connp;
+                connp->conn_prev = pconnp;
+        } else {
+                connfp->connf_head = connp;
+        }
+        if (nconnp != NULL) {
+                connp->conn_next = nconnp;
+                nconnp->conn_prev = connp;
+        }
+        connp->conn_fanout = connfp;
+        connp->conn_flags = (connp->conn_flags & ~IPCL_REMOVED) | IPCL_BOUND;
+        CONN_INC_REF(connp);
+        mutex_exit(&connfp->connf_lock);
 }
 
 /*
  * Because the classifier is used to classify inbound packets, the destination
  * address is meant to be our local tunnel address (tunnel source), and the

@@ -1032,13 +1056,13 @@
 
         if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
             IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
                 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
                     IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
-                        IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+                        ipcl_hash_insert_wildcard(connfp, connp);
                 } else {
-                        IPCL_HASH_INSERT_BOUND(connfp, connp);
+                        ipcl_hash_insert_bound(connfp, connp);
                 }
         } else {
                 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
         }
         return (0);

@@ -1203,13 +1227,13 @@
                 }
 
                 if (connp->conn_faddr_v4 != INADDR_ANY) {
                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
                 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
-                        IPCL_HASH_INSERT_BOUND(connfp, connp);
+                        ipcl_hash_insert_bound(connfp, connp);
                 } else {
-                        IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+                        ipcl_hash_insert_wildcard(connfp, connp);
                 }
                 if (protocol == IPPROTO_RSVP)
                         ill_set_inputfn_all(ipst);
                 break;
 

@@ -1217,13 +1241,13 @@
                 /* Insert it in the Bind Hash */
                 ASSERT(connp->conn_zoneid != ALL_ZONES);
                 connfp = &ipst->ips_ipcl_bind_fanout[
                     IPCL_BIND_HASH(lport, ipst)];
                 if (connp->conn_laddr_v4 != INADDR_ANY) {
-                        IPCL_HASH_INSERT_BOUND(connfp, connp);
+                        ipcl_hash_insert_bound(connfp, connp);
                 } else {
-                        IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+                        ipcl_hash_insert_wildcard(connfp, connp);
                 }
                 if (cl_inet_listen != NULL) {
                         ASSERT(connp->conn_ipversion == IPV4_VERSION);
                         connp->conn_flags |= IPCL_CL_LISTENER;
                         (*cl_inet_listen)(

@@ -1269,25 +1293,25 @@
                 }
 
                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
-                        IPCL_HASH_INSERT_BOUND(connfp, connp);
+                        ipcl_hash_insert_bound(connfp, connp);
                 } else {
-                        IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+                        ipcl_hash_insert_wildcard(connfp, connp);
                 }
                 break;
 
         case IPPROTO_TCP:
                 /* Insert it in the Bind Hash */
                 ASSERT(connp->conn_zoneid != ALL_ZONES);
                 connfp = &ipst->ips_ipcl_bind_fanout[
                     IPCL_BIND_HASH(lport, ipst)];
                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
-                        IPCL_HASH_INSERT_BOUND(connfp, connp);
+                        ipcl_hash_insert_bound(connfp, connp);
                 } else {
-                        IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+                        ipcl_hash_insert_wildcard(connfp, connp);
                 }
                 if (cl_inet_listen != NULL) {
                         sa_family_t     addr_family;
                         uint8_t         *laddrp;
 

@@ -1414,13 +1438,13 @@
                 }
 
                 if (connp->conn_faddr_v4 != INADDR_ANY) {
                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
                 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
-                        IPCL_HASH_INSERT_BOUND(connfp, connp);
+                        ipcl_hash_insert_bound(connfp, connp);
                 } else {
-                        IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+                        ipcl_hash_insert_wildcard(connfp, connp);
                 }
                 break;
         }
 
         return (ret);

@@ -1502,13 +1526,13 @@
                 }
 
                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
-                        IPCL_HASH_INSERT_BOUND(connfp, connp);
+                        ipcl_hash_insert_bound(connfp, connp);
                 } else {
-                        IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+                        ipcl_hash_insert_wildcard(connfp, connp);
                 }
                 break;
         }
 
         return (ret);

@@ -2090,10 +2114,11 @@
         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
         connp->conn_icmp = icmp;
         connp->conn_flags = IPCL_RAWIPCONN;
         connp->conn_proto = IPPROTO_ICMP;
         icmp->icmp_connp = connp;
+        rw_init(&icmp->icmp_bpf_lock, NULL, RW_DEFAULT, NULL);
         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
         if (connp->conn_ixa == NULL)
                 return (ENOMEM);
         connp->conn_ixa->ixa_refcnt = 1;

@@ -2114,10 +2139,11 @@
         ASSERT(icmp->icmp_connp == connp);
         ASSERT(connp->conn_icmp == icmp);
         mutex_destroy(&connp->conn_lock);
         cv_destroy(&connp->conn_cv);
         rw_destroy(&connp->conn_ilg_lock);
+        rw_destroy(&icmp->icmp_bpf_lock);
 
         /* Can be NULL if constructor failed */
         if (connp->conn_ixa != NULL) {
                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
                 ASSERT(connp->conn_ixa->ixa_ire == NULL);