1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2019, Joyent, Inc.
  14  */
  15 
  16 /* IPF oddness for compilation in userland for IPF tests. */
  17 #if defined(KERNEL) || defined(_KERNEL)
  18 #undef KERNEL
  19 #undef _KERNEL
  20 #define KERNEL  1
  21 #define _KERNEL 1
  22 #endif
  23 
  24 #include <sys/errno.h>
  25 #include <sys/types.h>
  26 #include <sys/param.h>
  27 #include <sys/time.h>
  28 #include <sys/socket.h>
  29 #include <net/if.h>
  30 #include <net/route.h>
  31 #include <netinet/in.h>
  32 #include <netinet/in_systm.h>
  33 #include <netinet/ip.h>
  34 #include <netinet/ip_var.h>
  35 #include <netinet/tcp.h>
  36 #include "netinet/ip_compat.h"
  37 #ifdef  USE_INET6
  38 #include <netinet/icmp6.h>
  39 #endif
  40 #include <netinet/tcpip.h>
  41 #include "netinet/ip_fil.h"
  42 #include "netinet/ip_nat.h"
  43 #include "netinet/ip_frag.h"
  44 #include "netinet/ip_state.h"
  45 #include "netinet/ip_proxy.h"
  46 #include "netinet/ip_auth.h"
  47 #include "netinet/ipf_stack.h"
  48 #ifdef IPFILTER_SCAN
  49 #include "netinet/ip_scan.h"
  50 #endif
  51 #ifdef IPFILTER_SYNC
  52 #include "netinet/ip_sync.h"
  53 #endif
  54 #include "netinet/ip_pool.h"
  55 #include "netinet/ip_htable.h"
  56 #ifdef IPFILTER_COMPILED
  57 #include "netinet/ip_rules.h"
  58 #endif
  59 #if defined(_KERNEL)
  60 #include <sys/sunddi.h>
  61 #endif
  62 
  63 #include "netinet/ipf_cfw.h"
  64 #include <sys/file.h>
  65 #include <sys/uio.h>
  66 #include <sys/cred.h>
  67 #include <sys/ddi.h>
  68 
  69 /*
  70  * cfw == Cloud Firewall ==> routines for a global-zone data collector about
  71  * ipf events for SmartOS.  The only ones that CFW cares about are ones
  72  * enforced by global-zone-controlled rulesets.
  73  *
  74  * The variable below is tied into a new ipf (GZ-only) device: /dev/ipfev,
  75  * that flips this on when there is an open instance.  This feature will also
  76  * consume an fr_flag to have per-rule granularity.
  77  */
  78 boolean_t ipf_cfwlog_enabled;
  79 
  80 /*
  81  * Because ipf's test tools in $SRC/cmd insert all of these files, we need to
  82  * stub out what we can vs. drag in even more headers and who knows what else.
  83  */
  84 #ifdef _KERNEL
  85 
  86 /*
  87  * CFW event ring buffer.  Remember, this is for ALL ZONES because only a
  88  * global-zone event-reader will be consuming these.  In other words, it's
  89  * not something to instantiate per-netstack.
  90  *
  91  * We may want to get more sophisticated and performant (e.g. per-processor),
  92  * but for now keep the ring buffer simple and stupid.
  93  * Must be a power of 2, to be bitmaskable, and must be countable by a uint_t
  94  *
  95  * Resizeable, see ipf_cfw_ring_resize() below.
  96  */
  97 #define IPF_CFW_DEFAULT_RING_BUFS       1024
  98 #define IPF_CFW_MIN_RING_BUFS           8
  99 #define IPF_CFW_MAX_RING_BUFS           (1U << 31U)
 100 
 101 /* Assume C's init-to-zero is sufficient for these types... */
 102 static kmutex_t cfw_ringlock;
 103 static kcondvar_t cfw_ringcv;
 104 
 105 static cfwev_t *cfw_ring;       /* NULL by default. */
 106 static uint32_t cfw_ringsize;   /* 0 by default, number of array elements. */
 107 static uint32_t cfw_ringmask;   /* 0 by default. */
 108 
 109 /* If these are equal, we're either empty or full. */
 110 static uint_t cfw_ringstart, cfw_ringend;
 111 static boolean_t cfw_ringfull;  /* Tell the difference here! */
 112 /* Bean-counters. */
 113 static uint64_t cfw_evreports;
 114 static uint64_t cfw_evdrops;
 115 
 116 /*
 117  * Place an event in the CFW event ring buffer.
 118  *
 119  * For now, be simple and drop the oldest event if we overflow. We may wish to
 120  * selectively drop older events based on type in the future.
 121  */
 122 static void
 123 ipf_cfwev_report(cfwev_t *event)
 124 {
 125         mutex_enter(&cfw_ringlock);
 126         if (cfw_ringfull) {
 127                 cfw_ringstart++;
 128                 cfw_ringstart &= cfw_ringmask;
 129                 cfw_ringend++;
 130                 cfw_ringend &= cfw_ringmask;
 131                 DTRACE_PROBE(ipf__cfw__evdrop);
 132                 cfw_evdrops++;
 133                 cfw_ring[cfw_ringend] = *event;
 134         } else {
 135                 cfw_ring[cfw_ringend] = *event;
 136                 cfw_ringend++;
 137                 cfw_ringend &= cfw_ringmask;
 138                 cfw_ringfull = (cfw_ringend == cfw_ringstart);
 139         }
 140         cfw_evreports++;
 141         cv_broadcast(&cfw_ringcv);
 142         mutex_exit(&cfw_ringlock);
 143 }
 144 
 145 #if 0
 146 /*
 147  * Simple event consumer which copies one event from the ring buffer into
 148  * what's provided.  Superceded by ipf_cfwev_consume_many() below.  Kept here
 149  * for reference.
 150  *
 151  * If there are no events, either cv_wait() or return B_FALSE, depending on
 152  * "block".
 153  */
 154 boolean_t
 155 ipf_cfwev_consume(cfwev_t *event, boolean_t block)
 156 {
 157         mutex_enter(&cfw_ringlock);
 158 
 159         /*
 160          * Return B_FALSE if non-block and no data, OR if we receive a signal.
 161          */
 162         while ((cfw_ringstart == cfw_ringend) && !cfw_ringfull) {
 163                 if (!block || !cv_wait_sig(&cfw_ringcv, &cfw_ringlock)) {
 164                         mutex_exit(&cfw_ringlock);
 165                         return (B_FALSE);
 166                 }
 167         }
 168 
 169         *event = cfw_ring[cfw_ringstart];
 170         cfw_ringstart++;
 171         cfw_ringstart &= IPF_CFW_RING_MASK;
 172         cfw_ringfull = B_FALSE;
 173         mutex_exit(&cfw_ringlock);
 174         return (B_TRUE);
 175 }
 176 #endif
 177 
 178 /*
 179  * More sophisticated access to multiple CFW events that can allow copying
 180  * straight from the ring buffer up to userland.  Requires a callback (which
 181  * could call uiomove() directly, OR to a local still-in-kernel buffer) that
 182  * must do the data copying-out.
 183  *
 184  * Callback function is of the form:
 185  *
 186  *      uint_t cfw_many_cb(cfwev_t *evptr, int num_avail, void *cbarg);
 187  *
 188  * The function must return how many events got consumed, which MUST be <= the
 189  * number available.  The function must ALSO UNDERSTAND that cfw_ringlock is
 190  * held during this time.  The function may be called more than once, if the
 191  * available buffers wrap-around OR "block" is set and we don't have enough
 192  * buffers.  If any callback returns 0, exit the function with however many
 193  * were consumed.
 194  *
 195  * This function, like the callback, returns the number of events *CONSUMED*.
 196  *
 197  * .  .  .
 198  *
 199  * Tunables for ipf_cfwev_consume_many().
 200  *
 201  * If you wish to attempt to coalesce reads (to reduce the likelihood of one
 202  * event at a time during high load) change the number of tries below to
 203  * something not 0. Early experiments set this to 10.
 204  *
 205  * The wait between tries is in usecs in cfw_timeout_wait. The pessimal
 206  * case for this is a timeout_wait-spaced trickle of one event at a time.
 207  */
 208 int cfw_timeout_tries = 0;
 209 int cfw_timeout_wait = 10000;   /* 10ms wait. */
 210 
 211 uint_t
 212 ipf_cfwev_consume_many(uint_t num_requested, boolean_t block,
 213     cfwmanycb_t cfw_many_cb, void *cbarg)
 214 {
 215         uint_t consumed = 0, cb_consumed, contig_size;
 216         int timeout_tries = cfw_timeout_tries;
 217 
 218         mutex_enter(&cfw_ringlock);
 219 
 220         /* Silly reality checks */
 221         ASSERT3U(cfw_ringstart, <, cfw_ringsize);
 222         ASSERT3U(cfw_ringend, <, cfw_ringsize);
 223 
 224         /*
 225          * Can goto here again if caller wants blocking. NOTE that
 226          * num_requested may have been decremented and consumed may have been
 227          * incremented if we arrive here via a goto after a cv_wait.
 228          */
 229 from_the_top:
 230         if (cfw_ringstart > cfw_ringend || cfw_ringfull)
 231                 contig_size = cfw_ringsize - cfw_ringstart;
 232         else if (cfw_ringstart < cfw_ringend)
 233                 contig_size = cfw_ringend - cfw_ringstart;
 234         else if (block && cv_wait_sig(&cfw_ringcv, &cfw_ringlock)) {
 235                 /* Maybe something to consume now, try again. */
 236                 goto from_the_top;
 237         } else {
 238                 /* Nothing (more) to consume, return! */
 239                 goto bail;
 240         }
 241 
 242         ASSERT(contig_size + cfw_ringstart == cfw_ringend ||
 243             contig_size + cfw_ringstart == cfw_ringsize);
 244 
 245         if (num_requested < contig_size)
 246                 contig_size = num_requested;
 247 
 248         cb_consumed = cfw_many_cb(&(cfw_ring[cfw_ringstart]), contig_size,
 249             cbarg);
 250         ASSERT(cb_consumed <= contig_size);
 251         cfw_ringstart += cb_consumed;
 252         consumed += cb_consumed;
 253         cfw_ringfull = (cfw_ringfull && cb_consumed == 0);
 254         if (cb_consumed < contig_size) {
 255                 /* Caller clearly had a problem. Reality check and bail. */
 256                 ASSERT((cfw_ringstart & cfw_ringmask) == cfw_ringstart);
 257                 goto bail;
 258         }
 259         ASSERT(cb_consumed == contig_size);
 260         cfw_ringstart &= cfw_ringmask;      /* In case of wraparound. */
 261         num_requested -= contig_size;
 262 
 263         if (num_requested > 0 && cfw_ringstart != cfw_ringend) {
 264                 /* We must have wrapped around the end of the buffer! */
 265                 ASSERT(cfw_ringstart == 0);
 266                 ASSERT(!cfw_ringfull);
 267                 contig_size = cfw_ringend;
 268                 if (num_requested < contig_size)
 269                         contig_size = num_requested;
 270                 cb_consumed = cfw_many_cb(&(cfw_ring[cfw_ringstart]),
 271                     contig_size, cbarg);
 272                 cfw_ringstart += cb_consumed;
 273                 consumed += cb_consumed;
 274                 if (cb_consumed < contig_size) {
 275                         /*
 276                          * Caller clearly had a problem. Reality check and
 277                          * bail.
 278                          */
 279                         ASSERT(cfw_ringend > cfw_ringstart);
 280                         goto bail;
 281                 }
 282                 ASSERT(cb_consumed == contig_size);
 283                 num_requested -= contig_size;
 284         }
 285 
 286         ASSERT(consumed > 0);
 287 
 288         if (num_requested > 0 && block && timeout_tries > 0) {
 289                 clock_t delta = drv_usectohz(cfw_timeout_wait);
 290 
 291                 timeout_tries--;
 292 
 293                 /*
 294                  * We obtained some of the events we requested, but not all.
 295                  * Since we have nothing to consume, wait *a little bit*
 296                  * longer.
 297                  */
 298                 switch (cv_reltimedwait_sig(&cfw_ringcv, &cfw_ringlock, delta,
 299                     TR_CLOCK_TICK)) {
 300                 case 0:
 301                         /* Received signal! Throw out what we have. */
 302                         DTRACE_PROBE1(ipf__cfw__sigdiscard, int, consumed);
 303                         cfw_evdrops += consumed;
 304                         consumed = 0;
 305                         break;
 306                 case -1:
 307                         /* Time reached! Bail with what we got. */
 308                         DTRACE_PROBE(ipf__cfw__timedexpired);
 309                         break;
 310                 default:
 311                         /* Aha! We've got more! */
 312                         DTRACE_PROBE(ipf__cfw__moredata);
 313                         goto from_the_top;
 314                 }
 315         }
 316 
 317 bail:
 318         mutex_exit(&cfw_ringlock);
 319         return (consumed);
 320 }
 321 
 322 /*
 323  * SmartOS likes using the zone's did. Make sure we squirrel that away in the
 324  * ipf netstack instance if it's not there.
 325  */
 326 static inline zoneid_t
 327 ifs_to_did(ipf_stack_t *ifs)
 328 {
 329         if (ifs->ifs_zone_did == 0) {
 330                 zone_t *zone;
 331 
 332                 /*
 333                  * Because we can't get the zone_did at initialization time
 334                  * because most zone data isn't readily available then,
 335                  * cement the did in place now.
 336                  */
 337                 ASSERT(ifs->ifs_zone != GLOBAL_ZONEID);
 338                 zone = zone_find_by_id(ifs->ifs_zone);
 339                 if (zone != NULL) {
 340                         ifs->ifs_zone_did = zone->zone_did;
 341                         zone_rele(zone);
 342                 }
 343                 /* Else we are either in shutdown or something weirder. */
 344         }
 345         return (ifs->ifs_zone_did);
 346 }
 347 
 348 /*
 349  * ipf_block_cfwlog()
 350  *
 351  * Called by fr_check().  Record drop events for the global-zone data
 352  * collector.  Use rest-of-ipf-style names for the parameters.
 353  */
 354 void
 355 ipf_block_cfwlog(frentry_t *fr, fr_info_t *fin, ipf_stack_t *ifs)
 356 {
 357         cfwev_t event = {0};
 358 
 359         /*
 360          * We need a rule.
 361          * Capture failure by using dtrace on this function's entry.
 362          * 'ipf_block_cfwlog:entry /arg0 == NULL/ { printf("GOTCHA!\n"); }'
 363          */
 364         if (fr == NULL)
 365                 return;
 366 
 367         event.cfwev_type = CFWEV_BLOCK;
 368         event.cfwev_length = sizeof (event);
 369         /*
 370          * IPF code elsewhere does the cheesy single-flag check, even thogh
 371          * there are two flags in a rule (one for in, one for out).
 372          */
 373         event.cfwev_direction = (fr->fr_flags & FR_INQUE) ?
 374             CFWDIR_IN : CFWDIR_OUT;
 375 
 376         event.cfwev_protocol = fin->fin_p;
 377         /*
 378          * NOTE: fin_*port is in host/native order, and ICMP info is here too.
 379          */
 380         event.cfwev_sport = htons(fin->fin_sport);
 381         event.cfwev_dport = htons(fin->fin_dport);
 382 
 383         if (fin->fin_v == IPV4_VERSION) {
 384                 IN6_INADDR_TO_V4MAPPED(&fin->fin_src, &event.cfwev_saddr);
 385                 IN6_INADDR_TO_V4MAPPED(&fin->fin_dst, &event.cfwev_daddr);
 386         } else {
 387                 ASSERT3U(fin->fin_v, ==, IPV6_VERSION);
 388                 event.cfwev_saddr = fin->fin_src6.in6;
 389                 event.cfwev_daddr = fin->fin_dst6.in6;
 390         }
 391 
 392         /*
 393          * uniqtime() is what ipf's GETKTIME() uses.
 394          * If cfwev_tstamp needs to be sourced from elsewhere, fix that here.
 395          */
 396         uniqtime(&event.cfwev_tstamp);
 397         event.cfwev_zonedid = ifs_to_did(ifs);
 398         ASSERT(fin->fin_rule <= 0xffff);  /* Must fit in uint16_t... */
 399         event.cfwev_ruleid = fin->fin_rule;
 400         memcpy(event.cfwev_ruleuuid, fr->fr_uuid, sizeof (uuid_t));
 401 
 402         ipf_cfwev_report(&event);
 403 }
 404 
 405 /*
 406  * ipf_log_cfwlog()
 407  *
 408  * Twin of ipstate_log(), but records state events for the global-zone data
 409  * collector.
 410  */
 411 void
 412 ipf_log_cfwlog(struct ipstate *is, uint_t type, ipf_stack_t *ifs)
 413 {
 414         cfwev_t event = {0};
 415 
 416         switch (type) {
 417         case ISL_NEW:
 418         case ISL_CLONE:
 419                 event.cfwev_type = CFWEV_BEGIN;
 420                 break;
 421         case ISL_EXPIRE:
 422         case ISL_FLUSH:
 423         case ISL_REMOVE:
 424         case ISL_KILLED:
 425         case ISL_ORPHAN:
 426 #if 0
 427                 event.cfwev_type = CFWEV_END;
 428                 break;
 429 #else
 430                 /*
 431                  * We don't care about session disappearances in CFW logging
 432                  * for now.
 433                  */
 434                 return;
 435 #endif
 436         default:
 437                 event.cfwev_type = CFWEV_BLOCK;
 438                 break;
 439         }
 440 
 441         /*
 442          * IPF code elsewhere does the cheesy single-flag check, even thogh
 443          * there are two flags in a rule (one for in, one for out).  Follow
 444          * suit here.
 445          */
 446         event.cfwev_length = sizeof (event);
 447         ASSERT(is->is_rule != NULL);
 448         event.cfwev_direction = (is->is_rule->fr_flags & FR_INQUE) ?
 449             CFWDIR_IN : CFWDIR_OUT;
 450         event.cfwev_protocol = is->is_p;
 451         switch (is->is_p) {
 452         case IPPROTO_TCP:
 453         case IPPROTO_UDP:
 454                 /* NOTE: is_*port is in network order. */
 455                 event.cfwev_sport = is->is_sport;
 456                 event.cfwev_dport = is->is_dport;
 457                 break;
 458         case IPPROTO_ICMP:
 459         case IPPROTO_ICMPV6:
 460                 /* Scribble the ICMP type in sport... */
 461                 event.cfwev_sport = is->is_icmp.ici_type;
 462                 break;
 463         }
 464 
 465         if (is->is_v == IPV4_VERSION) {
 466                 IN6_INADDR_TO_V4MAPPED(&is->is_src.in4, &event.cfwev_saddr);
 467                 IN6_INADDR_TO_V4MAPPED(&is->is_dst.in4, &event.cfwev_daddr);
 468         } else {
 469                 ASSERT3U(is->is_v, ==, IPV6_VERSION);
 470                 event.cfwev_saddr = is->is_src.in6;
 471                 event.cfwev_daddr = is->is_dst.in6;
 472         }
 473 
 474         /*
 475          * uniqtime() is what ipf's GETKTIME() uses.
 476          * If cfwev_tstamp needs to be sourced from elsewhere, fix that here.
 477          */
 478         uniqtime(&event.cfwev_tstamp);
 479         event.cfwev_zonedid = ifs_to_did(ifs);
 480         ASSERT(is->is_rulen <= 0xffff);   /* Must fit in uint16_t... */
 481         event.cfwev_ruleid = is->is_rulen;
 482         memcpy(event.cfwev_ruleuuid, is->is_uuid, sizeof (uuid_t));
 483 
 484         ipf_cfwev_report(&event);
 485 }
 486 
 487 typedef struct uio_error_s {
 488         struct uio *ue_uio;
 489         int ue_error;
 490 } uio_error_t;
 491 
 492 /*
 493  * Callback routine we use for ipf_cfwev_consume_many().
 494  * Returning 0 means error indication.
 495  */
 496 static uint_t
 497 cfwlog_read_manycb(cfwev_t *evptr, uint_t num_avail, void *cbarg)
 498 {
 499         uio_error_t *ue = (uio_error_t *)cbarg;
 500 
 501         ASSERT(MUTEX_HELD(&cfw_ringlock));
 502 
 503         if (ue->ue_error != 0)
 504                 return (0);
 505 
 506         ue->ue_error = uiomove((caddr_t)evptr, num_avail * sizeof (*evptr),
 507             UIO_READ, ue->ue_uio);
 508         if (ue->ue_error != 0)
 509                 return (0);
 510 
 511         return (num_avail);
 512 }
 513 
 514 /*
 515  * Resize the CFW event ring buffer.
 516  *
 517  * The caller must insure the new size is a power of 2 between
 518  * IPF_CFW_{MIN,MAX}_RING_BUFS (inclusive) or the special values
 519  * IPF_CFW_RING_ALLOCATE (first-time creation) or IPF_CFW_RING_DESTROY
 520  * (netstack-unload destruction).
 521  */
 522 int
 523 ipf_cfw_ring_resize(uint32_t newsize)
 524 {
 525         ASSERT(MUTEX_HELD(&cfw_ringlock) || newsize == IPF_CFW_RING_ALLOCATE ||
 526             newsize == IPF_CFW_RING_DESTROY);
 527 
 528         if (newsize == IPF_CFW_RING_ALLOCATE) {
 529                 if (cfw_ring != NULL)
 530                         return (EBUSY);
 531                 newsize = IPF_CFW_DEFAULT_RING_BUFS;
 532                 /* Fall through to allocating a new ring buffer. */
 533         } else {
 534                 /* We may be called during error cleanup, so be liberal here. */
 535                 if ((cfw_ring == NULL && newsize == IPF_CFW_RING_DESTROY) ||
 536                     newsize == cfw_ringsize) {
 537                         return (0);
 538                 }
 539                 kmem_free(cfw_ring, cfw_ringsize * sizeof (cfwev_t));
 540                 cfw_ring = NULL;
 541                 if (cfw_ringfull) {
 542                         cfw_evdrops += cfw_ringsize;
 543                 } else if (cfw_ringstart > cfw_ringend) {
 544                         cfw_evdrops += cfw_ringend +
 545                             (cfw_ringsize - cfw_ringstart);
 546                 } else {
 547                         cfw_evdrops += cfw_ringend - cfw_ringstart;
 548                 }
 549                 cfw_ringsize = cfw_ringmask = cfw_ringstart = cfw_ringend = 0;
 550                 cfw_ringfull = B_FALSE;
 551 
 552                 if (newsize == IPF_CFW_RING_DESTROY)
 553                         return (0);
 554                 /*
 555                  * Keep the reports & drops around because if we're just
 556                  * resizing, we need to know what we lost.
 557                  */
 558         }
 559 
 560         ASSERT(ISP2(newsize));
 561         cfw_ring = kmem_alloc(newsize * sizeof (cfwev_t), KM_SLEEP);
 562         /* KM_SLEEP means we always succeed. */
 563         cfw_ringsize = newsize;
 564         cfw_ringmask = cfw_ringsize - 1;
 565 
 566         return (0);
 567 }
 568 
 569 /*
 570  * ioctl handler for /dev/ipfev.  Only supports SIOCIPFCFWCFG (get data
 571  * collector statistics and configuration), and SIOCIPFCFWNEWSZ (resize the
 572  * event ring buffer).
 573  */
 574 /* ARGSUSED */
 575 int
 576 ipf_cfwlog_ioctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *cp,
 577     int *rp)
 578 {
 579         ipfcfwcfg_t cfginfo;
 580         int error;
 581 
 582         if (cmd != SIOCIPFCFWCFG && cmd != SIOCIPFCFWNEWSZ)
 583                 return (EIO);
 584 
 585         if (crgetzoneid(cp) != GLOBAL_ZONEID)
 586                 return (EACCES);
 587 
 588         error = COPYIN((caddr_t)data, (caddr_t)&cfginfo, sizeof (cfginfo));
 589         if (error != 0)
 590                 return (EFAULT);
 591 
 592         cfginfo.ipfcfwc_maxevsize = sizeof (cfwev_t);
 593         mutex_enter(&cfw_ringlock);
 594         cfginfo.ipfcfwc_evreports = cfw_evreports;
 595         if (cmd == SIOCIPFCFWNEWSZ) {
 596                 uint32_t newsize = cfginfo.ipfcfwc_evringsize;
 597 
 598                 /* Do ioctl parameter checking here, then call the resizer. */
 599                 if (newsize < IPF_CFW_MIN_RING_BUFS ||
 600                     newsize > IPF_CFW_MAX_RING_BUFS || !ISP2(newsize)) {
 601                         error = EINVAL;
 602                 } else {
 603                         error = ipf_cfw_ring_resize(cfginfo.ipfcfwc_evringsize);
 604                 }
 605         } else {
 606                 error = 0;
 607         }
 608         /* Both cfw_evdrops and cfw_ringsize are affected by resize. */
 609         cfginfo.ipfcfwc_evdrops = cfw_evdrops;
 610         cfginfo.ipfcfwc_evringsize = cfw_ringsize;
 611         mutex_exit(&cfw_ringlock);
 612 
 613         if (error != 0)
 614                 return (error);
 615 
 616         error = COPYOUT((caddr_t)&cfginfo, (caddr_t)data, sizeof (cfginfo));
 617         if (error != 0)
 618                 return (EFAULT);
 619 
 620         return (0);
 621 }
 622 
 623 /*
 624  * Send events up via /dev/ipfev reads.  Will return only complete events.
 625  */
 626 /* ARGSUSED */
 627 int
 628 ipf_cfwlog_read(dev_t dev, struct uio *uio, cred_t *cp)
 629 {
 630         uint_t requested, consumed;
 631         uio_error_t ue = {uio, 0};
 632         boolean_t block;
 633 
 634         if (uio->uio_resid == 0)
 635                 return (0);
 636         if (uio->uio_resid < sizeof (cfwev_t))
 637                 return (EINVAL);
 638         /* XXX KEBE ASKS: Check for resid being too big?!? */
 639 
 640         block = ((uio->uio_fmode & (FNDELAY | FNONBLOCK)) == 0);
 641         requested = uio->uio_resid / sizeof (cfwev_t);
 642         ASSERT(requested > 0);
 643 
 644         /*
 645          * As stated earlier, ipf_cfwev_consume_many() takes a callback.
 646          * The callback may be called multiple times before we return.
 647          * The callback will execute uiomove().
 648          */
 649         consumed = ipf_cfwev_consume_many(requested, block, cfwlog_read_manycb,
 650             &ue);
 651         ASSERT3U(consumed, <=, requested);
 652         if (!block && consumed == 0 && ue.ue_error == 0) {
 653                 /* No data available. */
 654                 ue.ue_error = EWOULDBLOCK;
 655         } else if (ue.ue_error != 0 || (block && consumed == 0)) {
 656                 /* We had a problem... */
 657                 if (ue.ue_error == 0) {
 658                         /* Cover case of cv_wait_sig() receiving a signal. */
 659                         ue.ue_error = EINTR;
 660                 }
 661                 mutex_enter(&cfw_ringlock);
 662                 DTRACE_PROBE1(ipf__cfw__uiodiscard, int, consumed);
 663                 cfw_evdrops += consumed;
 664                 mutex_exit(&cfw_ringlock);
 665         }
 666         return (ue.ue_error);
 667 }
 668 
 669 #else   /* _KERNEL */
 670 
 671 /* Blank stubs to satisfy userland's test compilations. */
 672 
 673 int
 674 ipf_cfw_ring_resize(uint32_t a)
 675 {
 676         return (0);
 677 }
 678 
 679 void
 680 ipf_log_cfwlog(struct ipstate *a, uint_t b, ipf_stack_t *c)
 681 {
 682 }
 683 
 684 void
 685 ipf_block_cfwlog(frentry_t *a, fr_info_t *b, ipf_stack_t *c)
 686 {
 687 }
 688 
 689 #endif  /* _KERNEL */