Print this page
OS-5007 support SO_ATTACH_FILTER on ICMP sockets
Reviewed by: Cody Mello <melloc@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
        
@@ -20,10 +20,11 @@
  */
 /*
  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
  */
 /* Copyright (c) 1990 Mentat Inc. */
 
 #include <sys/types.h>
 #include <sys/stream.h>
@@ -78,10 +79,11 @@
 
 #include <sys/tsol/label.h>
 #include <sys/tsol/tnet.h>
 
 #include <inet/rawip_impl.h>
+#include <net/bpf.h>
 
 #include <sys/disp.h>
 
 /*
  * Synchronization notes:
@@ -1009,10 +1011,16 @@
         if (icmp->icmp_filter != NULL) {
                 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t));
                 icmp->icmp_filter = NULL;
         }
 
+        if (icmp->icmp_bpf_len != 0) {
+                kmem_free(icmp->icmp_bpf_prog, icmp->icmp_bpf_len);
+                icmp->icmp_bpf_len = 0;
+                icmp->icmp_bpf_prog = NULL;
+        }
+
         /*
          * Clear any fields which the kmem_cache constructor clears.
          * Only icmp_connp needs to be preserved.
          * TBD: We should make this more efficient to avoid clearing
          * everything.
@@ -1962,10 +1970,108 @@
 
         err = icmp_opt_get(connp, level, name, ptr);
         return (err);
 }
 
+static int
+icmp_attach_filter(icmp_t *icmp, uint_t inlen, const uchar_t *invalp)
+{
+        struct bpf_program prog;
+        ip_bpf_insn_t *insns = NULL;
+        unsigned int size;
+
+#ifdef _LP64
+        if (get_udatamodel() != DATAMODEL_NATIVE) {
+                struct bpf_program32 *prog32;
+
+                if (inlen != sizeof (struct bpf_program32)) {
+                        return (EINVAL);
+                }
+                prog32 = (struct bpf_program32 *)invalp;
+                prog.bf_len = prog32->bf_len;
+                prog.bf_insns = (void *)(uint64_t)prog32->bf_insns;
+        } else
+#endif
+        if (inlen == sizeof (struct bpf_program)) {
+                bcopy(invalp, &prog, sizeof (prog));
+        } else {
+                return (EINVAL);
+        }
+
+        if (prog.bf_len > BPF_MAXINSNS || prog.bf_len == 0) {
+                return (EINVAL);
+        }
+        size = prog.bf_len * sizeof (struct bpf_insn);
+        insns = kmem_alloc(size, KM_SLEEP);
+        if (copyin(prog.bf_insns, insns, size) != 0) {
+                kmem_free(insns, size);
+                return (EFAULT);
+        }
+        if (!ip_bpf_validate(insns, prog.bf_len)) {
+                kmem_free(insns, size);
+                return (EINVAL);
+        }
+
+        rw_enter(&icmp->icmp_bpf_lock, RW_WRITER);
+        if (icmp->icmp_bpf_len != 0) {
+                ASSERT(icmp->icmp_bpf_prog != NULL);
+
+                kmem_free(icmp->icmp_bpf_prog, icmp->icmp_bpf_len);
+        }
+        icmp->icmp_bpf_len = size;
+        icmp->icmp_bpf_prog = insns;
+        rw_exit(&icmp->icmp_bpf_lock);
+        return (0);
+}
+
+static int
+icmp_detach_filter(icmp_t *icmp)
+{
+        int error;
+
+        rw_enter(&icmp->icmp_bpf_lock, RW_WRITER);
+        if (icmp->icmp_bpf_len == 0) {
+                ASSERT(icmp->icmp_bpf_prog == NULL);
+                error = ENOENT;
+        } else {
+                kmem_free(icmp->icmp_bpf_prog,
+                    icmp->icmp_bpf_len);
+                icmp->icmp_bpf_len = 0;
+                icmp->icmp_bpf_prog = NULL;
+                error = 0;
+        }
+        rw_exit(&icmp->icmp_bpf_lock);
+        return (error);
+}
+
+static boolean_t
+icmp_eval_filter(icmp_t *icmp, mblk_t *mp, ip_recv_attr_t *ira)
+{
+        boolean_t res;
+        uchar_t *buf = mp->b_rptr;
+        uint_t wirelen, len = MBLKL(mp);
+
+        rw_enter(&icmp->icmp_bpf_lock, RW_READER);
+        if (icmp->icmp_bpf_len == 0) {
+                rw_exit(&icmp->icmp_bpf_lock);
+                return (B_FALSE);
+        }
+        if (ira->ira_flags & IRAF_IS_IPV4) {
+                ipha_t *ipha = (ipha_t *)buf;
+
+                wirelen = ntohs(ipha->ipha_length);
+        } else {
+                ip6_t *ip6h = (ip6_t *)buf;
+
+                wirelen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
+        }
+        res = !ip_bpf_filter(icmp->icmp_bpf_prog, buf, wirelen, len);
+        rw_exit(&icmp->icmp_bpf_lock);
+
+        return (res);
+}
+
 /*
  * This routine sets socket options.
  */
 int
 icmp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
@@ -2051,10 +2157,14 @@
                 case SO_RCVBUF:
                         if (*i1 > is->is_max_buf) {
                                 return (ENOBUFS);
                         }
                         break;
+                case SO_ATTACH_FILTER:
+                        return (icmp_attach_filter(icmp, inlen, invalp));
+                case SO_DETACH_FILTER:
+                        return (icmp_detach_filter(icmp));
                 }
                 break;
 
         case IPPROTO_IP:
                 /*
@@ -2596,10 +2706,18 @@
         ASSERT(MBLKL(mp) >= ip_hdr_length);     /* IP did a pullup */
 
         /* Initialize regardless of IP version */
         ipps.ipp_fields = 0;
 
+        /* Apply socket filter, if needed */
+        if (icmp->icmp_bpf_len != 0) {
+                if (icmp_eval_filter(icmp, mp, ira)) {
+                        freemsg(mp);
+                        return;
+                }
+        }
+
         if (ira->ira_flags & IRAF_IS_IPV4) {
                 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
                 ASSERT(MBLKL(mp) >= sizeof (ipha_t));
                 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
 
@@ -5025,11 +5143,12 @@
         ldi_ident_release(is->is_ldi_ident);
         kmem_free(is, sizeof (*is));
 }
 
 static void *
-rawip_kstat_init(netstackid_t stackid) {
+rawip_kstat_init(netstackid_t stackid)
+{
         kstat_t *ksp;
 
         rawip_named_kstat_t template = {
                 { "inDatagrams",        KSTAT_DATA_UINT32, 0 },
                 { "inCksumErrs",        KSTAT_DATA_UINT32, 0 },
@@ -5037,13 +5156,11 @@
                 { "outDatagrams",       KSTAT_DATA_UINT32, 0 },
                 { "outErrors",          KSTAT_DATA_UINT32, 0 },
         };
 
         ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2",
-                                        KSTAT_TYPE_NAMED,
-                                        NUM_OF_FIELDS(rawip_named_kstat_t),
-                                        0, stackid);
+            KSTAT_TYPE_NAMED, NUM_OF_FIELDS(rawip_named_kstat_t), 0, stackid);
         if (ksp == NULL || ksp->ks_data == NULL)
                 return (NULL);
 
         bcopy(&template, ksp->ks_data, sizeof (template));
         ksp->ks_update = rawip_kstat_update;