Print this page
    
More stats to SIOCIPFCFWCFG and add SIOCIPFCFWNEWSZ to affect ring-buffer size.
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/ipf/cfw.c
          +++ new/usr/src/uts/common/inet/ipf/cfw.c
   1    1  /*
   2    2   * This file and its contents are supplied under the terms of the
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy of the CDDL is also available via the Internet at
   9    9   * http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  
  12   12  /*
  13   13   * Copyright 2019, Joyent, Inc.
  14   14   */
  15   15  
  16   16  /* IPF oddness for compilation in userland for IPF tests. */
  17   17  #if defined(KERNEL) || defined(_KERNEL)
  18   18  #undef KERNEL
  19   19  #undef _KERNEL
  20   20  #define KERNEL  1
  21   21  #define _KERNEL 1
  22   22  #endif
  23   23  
  24   24  #include <sys/errno.h>
  25   25  #include <sys/types.h>
  26   26  #include <sys/param.h>
  27   27  #include <sys/time.h>
  28   28  #include <sys/socket.h>
  29   29  #include <net/if.h>
  30   30  #include <net/route.h>
  31   31  #include <netinet/in.h>
  32   32  #include <netinet/in_systm.h>
  33   33  #include <netinet/ip.h>
  34   34  #include <netinet/ip_var.h>
  35   35  #include <netinet/tcp.h>
  36   36  #include "netinet/ip_compat.h"
  37   37  #ifdef  USE_INET6
  38   38  #include <netinet/icmp6.h>
  39   39  #endif
  40   40  #include <netinet/tcpip.h>
  41   41  #include "netinet/ip_fil.h"
  42   42  #include "netinet/ip_nat.h"
  43   43  #include "netinet/ip_frag.h"
  44   44  #include "netinet/ip_state.h"
  45   45  #include "netinet/ip_proxy.h"
  46   46  #include "netinet/ip_auth.h"
  47   47  #include "netinet/ipf_stack.h"
  48   48  #ifdef IPFILTER_SCAN
  49   49  #include "netinet/ip_scan.h"
  50   50  #endif
  51   51  #ifdef IPFILTER_SYNC
  52   52  #include "netinet/ip_sync.h"
  53   53  #endif
  54   54  #include "netinet/ip_pool.h"
  55   55  #include "netinet/ip_htable.h"
  56   56  #ifdef IPFILTER_COMPILED
  57   57  #include "netinet/ip_rules.h"
  58   58  #endif
  59   59  #if defined(_KERNEL)
  60   60  #include <sys/sunddi.h>
  61   61  #endif
  62   62  
  63   63  #include "netinet/ipf_cfw.h"
  64   64  #include <sys/file.h>
  65   65  #include <sys/uio.h>
  66   66  #include <sys/cred.h>
  67   67  #include <sys/ddi.h>
  68   68  
  69   69  /*
  70   70   * cfw == Cloud Firewall ==> routines for a global-zone data collector about
  71   71   * ipf events for SmartOS.  The only ones that CFW cares about are ones
  72   72   * enforced by global-zone-controlled rulesets.
  73   73   *
  74   74   * The variable below is mdb-hackable to experiment with turning it on and
  75   75   * off. Eventually this will tie into a new ipf (GZ-only) device that flips
  76   76   * this on when there is an open instance.  It may also consume an fr_flag
  77   77   * to have per-rule granularity.
  78   78   */
  79   79  boolean_t ipf_cfwlog_enabled;
  80   80  
  81   81  /*
  82   82   * Because ipf's test tools in $SRC/cmd insert all of these files, we need to
  83   83   * stub out what we can vs. drag in even more headers and who knows what else.
  84   84   */
  85   85  #ifdef _KERNEL
  86   86  
  87   87  /*
  88   88   * CFW event ring buffer.  Remember, this is for ALL ZONES because only a
  89   89   * global-zone event-reader will be consuming these.  In other words, it's
  
    | 
      ↓ open down ↓ | 
    89 lines elided | 
    
      ↑ open up ↑ | 
  
  90   90   * not something to instantiate per-netstack.
  91   91   */
  92   92  
  93   93  /*
  94   94   * We may want to get more sophisticated and performant (e.g. per-processor),
  95   95   * but for now keep the ring buffer simple and stupid.
  96   96   */
  97   97  
  98   98  /* Must be a power of 2, to be bitmaskable, and must be countable by a uint_t */
  99   99  
 100      -#define IPF_CFW_RING_BUFS       1024
 101      -#define IPF_CFW_RING_MASK (IPF_CFW_RING_BUFS - 1)
      100 +#define IPF_CFW_DEFAULT_RING_BUFS       1024
      101 +#define IPF_CFW_MIN_RING_BUFS           8
      102 +#define IPF_CFW_MAX_RING_BUFS           (1U << 31U)
 102  103  
 103  104  /* Assume C's init-to-zero is sufficient for these types... */
 104  105  static kmutex_t cfw_ringlock;
 105  106  static kcondvar_t cfw_ringcv;
 106  107  
 107      -static cfwev_t cfw_evring[IPF_CFW_RING_BUFS];
      108 +static cfwev_t *cfw_ring;       /* NULL by default. */
      109 +static uint32_t cfw_ringsize;   /* 0 by default, number of array elements. */
      110 +static uint32_t cfw_ringmask;   /* 0 by default. */
      111 +
 108  112  /* If these are equal, we're either empty or full. */
 109  113  static uint_t cfw_ringstart, cfw_ringend;
 110  114  static boolean_t cfw_ringfull;  /* Tell the difference here! */
 111  115  static uint64_t cfw_evreports;
 112  116  static uint64_t cfw_evdrops;
 113  117  
 114  118  /*
 115  119   * Place an event in the CFW event ring buffer.
 116  120   *
 117  121   * For now, be simple and drop the oldest event if we overflow. We may wish to
 118  122   * selectively drop older events based on type in the future.
 119  123   */
 120  124  static void
 121  125  ipf_cfwev_report(cfwev_t *event)
 122  126  {
 123  127          mutex_enter(&cfw_ringlock);
 124  128          if (cfw_ringfull) {
 125  129                  cfw_ringstart++;
 126      -                cfw_ringstart &= IPF_CFW_RING_MASK;
      130 +                cfw_ringstart &= cfw_ringmask;
 127  131                  cfw_ringend++;
 128      -                cfw_ringend &= IPF_CFW_RING_MASK;
      132 +                cfw_ringend &= cfw_ringmask;
 129  133                  DTRACE_PROBE(ipf__cfw__evdrop);
 130  134                  cfw_evdrops++;
 131      -                cfw_evring[cfw_ringend] = *event;
      135 +                cfw_ring[cfw_ringend] = *event;
 132  136          } else {
 133      -                cfw_evring[cfw_ringend] = *event;
      137 +                cfw_ring[cfw_ringend] = *event;
 134  138                  cfw_ringend++;
 135      -                cfw_ringend &= IPF_CFW_RING_MASK;
      139 +                cfw_ringend &= cfw_ringmask;
 136  140                  cfw_ringfull = (cfw_ringend == cfw_ringstart);
 137  141          }
 138  142          cfw_evreports++;
 139  143          cv_broadcast(&cfw_ringcv);
 140  144          mutex_exit(&cfw_ringlock);
 141  145  }
 142  146  
 143  147  #if 0
 144  148  /*
 145  149   * Simple event consumer which copies one event from the ring buffer into
 146  150   * what's provided.  In the future, maybe lock-then-callback, even with a
 147  151   * request for multiple events?
 148  152   *
 149  153   * If there are no events, either cv_wait() or return B_FALSE, depending on
 150  154   * "block".
 151  155   */
 152  156  boolean_t
 153  157  ipf_cfwev_consume(cfwev_t *event, boolean_t block)
 154  158  {
 155  159          mutex_enter(&cfw_ringlock);
 156  160  
  
    | 
      ↓ open down ↓ | 
    11 lines elided | 
    
      ↑ open up ↑ | 
  
 157  161          /*
 158  162           * Return B_FALSE if non-block and no data, OR if we receive a signal.
 159  163           */
 160  164          while ((cfw_ringstart == cfw_ringend) && !cfw_ringfull) {
 161  165                  if (!block || !cv_wait_sig(&cfw_ringcv, &cfw_ringlock)) {
 162  166                          mutex_exit(&cfw_ringlock);
 163  167                          return (B_FALSE);
 164  168                  }
 165  169          }
 166  170  
 167      -        *event = cfw_evring[cfw_ringstart];
      171 +        *event = cfw_ring[cfw_ringstart];
 168  172          cfw_ringstart++;
 169  173          cfw_ringstart &= IPF_CFW_RING_MASK;
 170  174          cfw_ringfull = B_FALSE;
 171  175          mutex_exit(&cfw_ringlock);
 172  176          return (B_TRUE);
 173  177  }
 174  178  #endif
 175  179  
 176  180  /*
 177  181   * More sophisticated access to multiple CFW events that can allow copying
 178  182   * straight from the ring buffer up to userland.  Requires a callback (which
 179  183   * could call uiomove() directly, OR to a local still-in-kernel buffer) that
 180  184   * must do the data copying-out.
 181  185   *
 182  186   * Callback function is of the form:
 183  187   *
 184  188   *      uint_t cfw_many_cb(cfwev_t *evptr, int num_avail, void *cbarg);
 185  189   *
 186  190   * The function must return how many events got consumed, which MUST be <= the
 187  191   * number available.  The function must ALSO UNDERSTAND that cfw_ringlock is
 188  192   * held during this time.  The function may be called more than once, if the
 189  193   * available buffers wrap-around OR "block" is set and we don't have enough
 190  194   * buffers.  If any callback returns 0, exit the function with however many
 191  195   * were consumed.
 192  196   *
 193  197   * This function, like the callback, returns the number of events *CONSUMED*.
 194  198   */
 195  199  
 196  200  /*
 197  201   * If you wish to attempt to coalesce reads (to reduce the likelihood of one
 198  202   * event at a time during high load) change the number of tries below to
 199  203   * something not 0. Early experiments set this to 10.
 200  204   *
 201  205   * The wait between tries is in usecs in cfw_timeout_wait. The pessimal
 202  206   * case for this is a timeout_wait-spaced trickle of one event at a time.
 203  207   */
 204  208  int cfw_timeout_tries = 0;
 205  209  int cfw_timeout_wait = 10000;   /* 10ms wait. */
 206  210  
  
    | 
      ↓ open down ↓ | 
    29 lines elided | 
    
      ↑ open up ↑ | 
  
 207  211  uint_t
 208  212  ipf_cfwev_consume_many(uint_t num_requested, boolean_t block,
 209  213      cfwmanycb_t cfw_many_cb, void *cbarg)
 210  214  {
 211  215          uint_t consumed = 0, cb_consumed, contig_size;
 212  216          int timeout_tries = cfw_timeout_tries;
 213  217  
 214  218          mutex_enter(&cfw_ringlock);
 215  219  
 216  220          /* Silly reality checks */
 217      -        ASSERT3U(cfw_ringstart, <, IPF_CFW_RING_BUFS);
 218      -        ASSERT3U(cfw_ringend, <, IPF_CFW_RING_BUFS);
      221 +        ASSERT3U(cfw_ringstart, <, cfw_ringsize);
      222 +        ASSERT3U(cfw_ringend, <, cfw_ringsize);
 219  223  
 220  224          /*
 221  225           * Can goto here again if caller wants blocking. NOTE that
 222  226           * num_requested may have been decremented and consumed may have been
 223  227           * incremented if we arrive here via a goto after a cv_wait.
 224  228           */
 225  229  from_the_top:
 226  230          if (cfw_ringstart > cfw_ringend || cfw_ringfull)
 227      -                contig_size = IPF_CFW_RING_BUFS - cfw_ringstart;
      231 +                contig_size = cfw_ringsize - cfw_ringstart;
 228  232          else if (cfw_ringstart < cfw_ringend)
 229  233                  contig_size = cfw_ringend - cfw_ringstart;
 230  234          else if (block && cv_wait_sig(&cfw_ringcv, &cfw_ringlock)) {
 231  235                  /* Maybe something to consume now, try again. */
 232  236                  goto from_the_top;
 233  237          } else {
 234  238                  /* Nothing (more) to consume, return! */
 235  239                  goto bail;
 236  240          }
 237  241  
 238  242          ASSERT(contig_size + cfw_ringstart == cfw_ringend ||
 239      -            contig_size + cfw_ringstart == IPF_CFW_RING_BUFS);
      243 +            contig_size + cfw_ringstart == cfw_ringsize);
 240  244  
 241  245          if (num_requested < contig_size)
 242  246                  contig_size = num_requested;
 243  247  
 244      -        cb_consumed = cfw_many_cb(&(cfw_evring[cfw_ringstart]), contig_size,
      248 +        cb_consumed = cfw_many_cb(&(cfw_ring[cfw_ringstart]), contig_size,
 245  249              cbarg);
 246  250          ASSERT(cb_consumed <= contig_size);
 247  251          cfw_ringstart += cb_consumed;
 248  252          consumed += cb_consumed;
 249  253          cfw_ringfull = (cfw_ringfull && cb_consumed == 0);
 250  254          if (cb_consumed < contig_size) {
 251  255                  /* Caller clearly had a problem. Reality check and bail. */
 252      -                ASSERT((cfw_ringstart & IPF_CFW_RING_MASK) == cfw_ringstart);
      256 +                ASSERT((cfw_ringstart & cfw_ringmask) == cfw_ringstart);
 253  257                  goto bail;
 254  258          }
 255  259          ASSERT(cb_consumed == contig_size);
 256      -        cfw_ringstart &= IPF_CFW_RING_MASK;     /* In case of wraparound. */
      260 +        cfw_ringstart &= cfw_ringmask;  /* In case of wraparound. */
 257  261          num_requested -= contig_size;
 258  262  
 259  263          if (num_requested > 0 && cfw_ringstart != cfw_ringend) {
 260  264                  /* We must have wrapped around the end of the buffer! */
 261  265                  ASSERT(cfw_ringstart == 0);
 262  266                  ASSERT(!cfw_ringfull);
 263  267                  contig_size = cfw_ringend;
 264  268                  if (num_requested < contig_size)
 265  269                          contig_size = num_requested;
 266      -                cb_consumed = cfw_many_cb(&(cfw_evring[cfw_ringstart]),
      270 +                cb_consumed = cfw_many_cb(&(cfw_ring[cfw_ringstart]),
 267  271                      contig_size, cbarg);
 268  272                  cfw_ringstart += cb_consumed;
 269  273                  consumed += cb_consumed;
 270  274                  if (cb_consumed < contig_size) {
 271  275                          /*
 272  276                           * Caller clearly had a problem. Reality check and
 273  277                           * bail.
 274  278                           */
 275  279                          ASSERT(cfw_ringend > cfw_ringstart);
 276  280                          goto bail;
 277  281                  }
 278  282                  ASSERT(cb_consumed == contig_size);
 279  283                  num_requested -= contig_size;
 280  284          }
 281  285  
 282  286          ASSERT(consumed > 0);
 283  287  
 284  288          if (num_requested > 0 && block && timeout_tries > 0) {
 285  289                  clock_t delta = drv_usectohz(cfw_timeout_wait);
 286  290  
 287  291                  timeout_tries--;
 288  292  
 289  293                  /*
 290  294                   * We obtained some of the events we requested, but not all.
 291  295                   * Since we have nothing to consume, wait *a little bit*
 292  296                   * longer.
 293  297                   */
 294  298                  switch (cv_reltimedwait_sig(&cfw_ringcv, &cfw_ringlock, delta,
 295  299                      TR_CLOCK_TICK)) {
 296  300                  case 0:
 297  301                          /* Received signal! Throw out what we have. */
 298  302                          DTRACE_PROBE1(ipf__cfw__sigdiscard, int, consumed);
 299  303                          cfw_evdrops += consumed;
 300  304                          consumed = 0;
 301  305                          break;
 302  306                  case -1:
 303  307                          /* Time reached! Bail with what we got. */
 304  308                          DTRACE_PROBE(ipf__cfw__timedexpired);
 305  309                          break;
 306  310                  default:
 307  311                          /* Aha! We've got more! */
 308  312                          DTRACE_PROBE(ipf__cfw__moredata);
 309  313                          goto from_the_top;
 310  314                  }
 311  315          }
 312  316  
 313  317  bail:
 314  318          mutex_exit(&cfw_ringlock);
 315  319          return (consumed);
 316  320  }
 317  321  
 318  322  static inline zoneid_t
 319  323  ifs_to_did(ipf_stack_t *ifs)
 320  324  {
 321  325          if (ifs->ifs_zone_did == 0) {
 322  326                  zone_t *zone;
 323  327  
 324  328                  /*
 325  329                   * Because we can't get the zone_did at initialization time
 326  330                   * because most zone data isn't readily available then,
 327  331                   * cement the did in place now.
 328  332                   */
 329  333                  ASSERT(ifs->ifs_zone != GLOBAL_ZONEID);
 330  334                  zone = zone_find_by_id(ifs->ifs_zone);
 331  335                  if (zone != NULL) {
 332  336                          ifs->ifs_zone_did = zone->zone_did;
 333  337                          zone_rele(zone);
 334  338                  }
 335  339                  /* Else we are either in shutdown or something weirder. */
 336  340          }
 337  341          return (ifs->ifs_zone_did);
 338  342  }
 339  343  
 340  344  /*
 341  345   * ipf_block_cfwlog()
 342  346   *
 343  347   * Called by fr_check().  Record drop events for a global-zone data collector.
 344  348   * Use rest-of-ipf-style names for the parameters.
 345  349   */
 346  350  void
 347  351  ipf_block_cfwlog(frentry_t *fr, fr_info_t *fin, ipf_stack_t *ifs)
 348  352  {
 349  353          cfwev_t event = {0};
 350  354  
 351  355          /*
 352  356           * We need a rule.
 353  357           * Capture failure by using dtrace on this function's entry.
 354  358           * 'ipf_block_cfwlog:entry /arg0 == NULL/ { printf("GOTCHA!\n"); }'
 355  359           */
 356  360          if (fr == NULL)
 357  361                  return;
 358  362  
 359  363          event.cfwev_type = CFWEV_BLOCK;
 360  364          event.cfwev_length = sizeof (event);
 361  365          /*
 362  366           * IPF code elsewhere does the cheesy single-flag check, even thogh
 363  367           * there are two flags in a rule (one for in, one for out).
 364  368           */
 365  369          event.cfwev_direction = (fr->fr_flags & FR_INQUE) ?
 366  370              CFWDIR_IN : CFWDIR_OUT;
 367  371  
 368  372          event.cfwev_protocol = fin->fin_p;
 369  373          /*
 370  374           * NOTE: fin_*port is in host/native order, and ICMP info is here too.
 371  375           */
 372  376          event.cfwev_sport = htons(fin->fin_sport);
 373  377          event.cfwev_dport = htons(fin->fin_dport);
 374  378  
 375  379          if (fin->fin_v == IPV4_VERSION) {
 376  380                  IN6_INADDR_TO_V4MAPPED(&fin->fin_src, &event.cfwev_saddr);
 377  381                  IN6_INADDR_TO_V4MAPPED(&fin->fin_dst, &event.cfwev_daddr);
 378  382          } else {
 379  383                  ASSERT3U(fin->fin_v, ==, IPV6_VERSION);
 380  384                  event.cfwev_saddr = fin->fin_src6.in6;
 381  385                  event.cfwev_daddr = fin->fin_dst6.in6;
 382  386          }
 383  387  
 384  388          /*
 385  389           * uniqtime() is what ipf's GETKTIME() uses.
 386  390           * If cfwev_tstamp needs to be sourced from elsewhere, fix that here.
 387  391           */
 388  392          uniqtime(&event.cfwev_tstamp);
 389  393          event.cfwev_zonedid = ifs_to_did(ifs);
 390  394          ASSERT(fin->fin_rule <= 0xffff);        /* Must fit in uint16_t... */
 391  395          event.cfwev_ruleid = fin->fin_rule;
 392  396          memcpy(event.cfwev_ruleuuid, fr->fr_uuid, sizeof (uuid_t));
 393  397  
 394  398          ipf_cfwev_report(&event);
 395  399  }
 396  400  
 397  401  /*
 398  402   * ipf_log_cfwlog()
 399  403   *
 400  404   * Twin of ipstate_log(), but records state events for a global-zone data
 401  405   * collector.
 402  406   */
 403  407  void
 404  408  ipf_log_cfwlog(struct ipstate *is, uint_t type, ipf_stack_t *ifs)
 405  409  {
 406  410          cfwev_t event = {0};
 407  411  
 408  412          switch (type) {
 409  413          case ISL_NEW:
 410  414          case ISL_CLONE:
 411  415                  event.cfwev_type = CFWEV_BEGIN;
 412  416                  break;
 413  417          case ISL_EXPIRE:
 414  418          case ISL_FLUSH:
 415  419          case ISL_REMOVE:
 416  420          case ISL_KILLED:
 417  421          case ISL_ORPHAN:
 418  422  #if 0
 419  423                  event.cfwev_type = CFWEV_END;
 420  424                  break;
 421  425  #else
 422  426                  /*
 423  427                   * We don't care about session disappearances in CFW logging
 424  428                   * for now.
 425  429                   */
 426  430                  return;
 427  431  #endif
 428  432          default:
 429  433                  event.cfwev_type = CFWEV_BLOCK;
 430  434                  break;
 431  435          }
 432  436  
 433  437          /*
 434  438           * IPF code elsewhere does the cheesy single-flag check, even thogh
 435  439           * there are two flags in a rule (one for in, one for out).
 436  440           */
 437  441          event.cfwev_length = sizeof (event);
 438  442          ASSERT(is->is_rule != NULL);
 439  443          event.cfwev_direction = (is->is_rule->fr_flags & FR_INQUE) ?
 440  444              CFWDIR_IN : CFWDIR_OUT;
 441  445          event.cfwev_protocol = is->is_p;
 442  446          switch (is->is_p) {
 443  447          case IPPROTO_TCP:
 444  448          case IPPROTO_UDP:
 445  449                  /* NOTE: is_*port is in network order. */
 446  450                  event.cfwev_sport = is->is_sport;
 447  451                  event.cfwev_dport = is->is_dport;
 448  452                  break;
 449  453          case IPPROTO_ICMP:
 450  454          case IPPROTO_ICMPV6:
 451  455                  /* Scribble the ICMP type in sport... */
 452  456                  event.cfwev_sport = is->is_icmp.ici_type;
 453  457                  break;
 454  458          }
 455  459  
 456  460          if (is->is_v == IPV4_VERSION) {
 457  461                  IN6_INADDR_TO_V4MAPPED(&is->is_src.in4, &event.cfwev_saddr);
 458  462                  IN6_INADDR_TO_V4MAPPED(&is->is_dst.in4, &event.cfwev_daddr);
 459  463          } else {
 460  464                  ASSERT3U(is->is_v, ==, IPV6_VERSION);
 461  465                  event.cfwev_saddr = is->is_src.in6;
 462  466                  event.cfwev_daddr = is->is_dst.in6;
 463  467          }
 464  468  
 465  469          /*
 466  470           * uniqtime() is what ipf's GETKTIME() uses.
 467  471           * If cfwev_tstamp needs to be sourced from elsewhere, fix that here.
 468  472           */
 469  473          uniqtime(&event.cfwev_tstamp);
 470  474          event.cfwev_zonedid = ifs_to_did(ifs);
 471  475          ASSERT(is->is_rulen <= 0xffff); /* Must fit in uint16_t... */
 472  476          event.cfwev_ruleid = is->is_rulen;
 473  477          memcpy(event.cfwev_ruleuuid, is->is_uuid, sizeof (uuid_t));
 474  478  
 475  479          ipf_cfwev_report(&event);
 476  480  }
 477  481  
 478  482  typedef struct uio_error_s {
 479  483          struct uio *ue_uio;
 480  484          int ue_error;
 481  485  } uio_error_t;
 482  486  
 483  487  /* Returning 0 means error indication. */
 484  488  static uint_t
 485  489  cfwlog_read_manycb(cfwev_t *evptr, uint_t num_avail, void *cbarg)
 486  490  {
 487  491          uio_error_t *ue = (uio_error_t *)cbarg;
 488  492  
 489  493          ASSERT(MUTEX_HELD(&cfw_ringlock));
 490  494  
 491  495          if (ue->ue_error != 0)
  
    | 
      ↓ open down ↓ | 
    215 lines elided | 
    
      ↑ open up ↑ | 
  
 492  496                  return (0);
 493  497  
 494  498          ue->ue_error = uiomove((caddr_t)evptr, num_avail * sizeof (*evptr),
 495  499              UIO_READ, ue->ue_uio);
 496  500          if (ue->ue_error != 0)
 497  501                  return (0);
 498  502  
 499  503          return (num_avail);
 500  504  }
 501  505  
      506 +int
      507 +ipf_cfw_ring_resize(uint32_t newsize)
      508 +{
      509 +        ASSERT(MUTEX_HELD(&cfw_ringlock) || newsize == IPF_CFW_RING_ALLOCATE ||
      510 +            newsize == IPF_CFW_RING_DESTROY);
      511 +
      512 +        if (newsize == IPF_CFW_RING_ALLOCATE) {
      513 +                if (cfw_ring != NULL)
      514 +                        return (EBUSY);
      515 +                newsize = IPF_CFW_DEFAULT_RING_BUFS;
      516 +                /* Fall through to allocating a new ring buffer. */
      517 +        } else {
      518 +                /* We may be called during error cleanup, so be liberal here. */
      519 +                if (cfw_ring == NULL && newsize == IPF_CFW_RING_DESTROY)
      520 +                        return (0);
      521 +                kmem_free(cfw_ring, cfw_ringsize * sizeof (cfwev_t));
      522 +                cfw_ring = NULL;
      523 +                if (cfw_ringfull) {
      524 +                        cfw_evdrops += cfw_ringsize;
      525 +                } else if (cfw_ringstart > cfw_ringend) {
      526 +                        cfw_evdrops += cfw_ringend +
      527 +                            (cfw_ringsize - cfw_ringstart);
      528 +                } else {
      529 +                        cfw_evdrops += cfw_ringend - cfw_ringstart;
      530 +                }
      531 +                cfw_ringsize = cfw_ringmask = cfw_ringstart = cfw_ringend = 0;
      532 +                cfw_ringfull = B_FALSE;
      533 +
      534 +                if (newsize == IPF_CFW_RING_DESTROY)
      535 +                        return (0);
      536 +                /*
      537 +                 * Keep the reports & drops around because if we're just
      538 +                 * resizing, we need to know what we lost.
      539 +                 */
      540 +        }
      541 +
      542 +        ASSERT(ISP2(newsize));
      543 +        cfw_ring = kmem_alloc(newsize * sizeof (cfwev_t), KM_SLEEP);
      544 +        /* KM_SLEEP means we always succeed. */
      545 +        cfw_ringsize = newsize;
      546 +        cfw_ringmask = cfw_ringsize - 1;
      547 +
      548 +        return (0);
      549 +}
      550 +
 502  551  /* ARGSUSED */
 503  552  int
 504  553  ipf_cfwlog_ioctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *cp,
 505  554      int *rp)
 506  555  {
 507  556          ipfcfwcfg_t cfginfo;
 508  557          int error;
 509  558  
 510      -        if (cmd != SIOCIPFCFWCFG)
      559 +        if (cmd != SIOCIPFCFWCFG && cmd != SIOCIPFCFWNEWSZ)
 511  560                  return (EIO);
 512  561  
 513  562          if (crgetzoneid(cp) != GLOBAL_ZONEID)
 514  563                  return (EACCES);
 515  564  
 516      -#ifdef notyet
 517  565          error = COPYIN((caddr_t)data, (caddr_t)&cfginfo, sizeof (cfginfo));
 518  566          if (error != 0)
 519  567                  return (EFAULT);
 520      -        /* TODO: Resize ring buffer based on cfginfo.ipfcfwc_evringsize. */
 521      -#endif
 522  568  
 523  569          cfginfo.ipfcfwc_maxevsize = sizeof (cfwev_t);
 524      -        cfginfo.ipfcfwc_evringsize = IPF_CFW_RING_BUFS;
      570 +        mutex_enter(&cfw_ringlock);
      571 +        cfginfo.ipfcfwc_evreports = cfw_evreports;
      572 +        cfginfo.ipfcfwc_evdrops = cfw_evdrops;
      573 +        if (cmd == SIOCIPFCFWNEWSZ) {
      574 +                uint32_t newsize = cfginfo.ipfcfwc_evringsize;
 525  575  
      576 +                /* Do ioctl parameter checking here, then call the resizer. */
      577 +                if (newsize < IPF_CFW_MIN_RING_BUFS ||
      578 +                    newsize > IPF_CFW_MAX_RING_BUFS || !ISP2(newsize)) {
      579 +                        error = EINVAL;
      580 +                } else {
      581 +                        error = ipf_cfw_ring_resize(cfginfo.ipfcfwc_evringsize);
      582 +                }
      583 +        } else {
      584 +                error = 0;
      585 +        }
      586 +        cfginfo.ipfcfwc_evringsize = cfw_ringsize;
      587 +        mutex_exit(&cfw_ringlock);
      588 +
      589 +        if (error != 0)
      590 +                return (error);
      591 +
 526  592          error = COPYOUT((caddr_t)&cfginfo, (caddr_t)data, sizeof (cfginfo));
 527  593          if (error != 0)
 528  594                  return (EFAULT);
 529  595  
 530  596          return (0);
 531  597  }
 532  598  
 533  599  /* ARGSUSED */
 534  600  int
 535  601  ipf_cfwlog_read(dev_t dev, struct uio *uio, cred_t *cp)
 536  602  {
 537  603          uint_t requested, consumed;
 538  604          uio_error_t ue = {uio, 0};
 539  605          boolean_t block;
 540  606  
 541  607          if (uio->uio_resid == 0)
 542  608                  return (0);
 543  609          if (uio->uio_resid < sizeof (cfwev_t))
 544  610                  return (EINVAL);
 545  611          /* XXX KEBE ASKS: Check for resid being too big?!? */
 546  612  
 547  613          block = ((uio->uio_fmode & (FNDELAY | FNONBLOCK)) == 0);
 548  614          requested = uio->uio_resid / sizeof (cfwev_t);
 549  615          ASSERT(requested > 0);
 550  616  
 551  617          /*
 552  618           * As stated earlier, ipf_cfwev_consume_many() takes a callback.
 553  619           * The callback may be called multiple times before we return.
 554  620           * The callback will execute uiomove().
 555  621           */
 556  622          consumed = ipf_cfwev_consume_many(requested, block, cfwlog_read_manycb,
 557  623              &ue);
 558  624          ASSERT3U(consumed, <=, requested);
 559  625          if (!block && consumed == 0 && ue.ue_error == 0) {
 560  626                  /* No data available. */
 561  627                  ue.ue_error = EWOULDBLOCK;
 562  628          } else if (ue.ue_error != 0 || (block && consumed == 0)) {
 563  629                  /* We had a problem... */
 564  630                  if (ue.ue_error == 0) {
 565  631                          /* Cover case of cv_wait_sig() receiving a signal. */
 566  632                          ue.ue_error = EINTR;
 567  633                  }
 568  634                  mutex_enter(&cfw_ringlock);
 569  635                  DTRACE_PROBE1(ipf__cfw__uiodiscard, int, consumed);
  
    | 
      ↓ open down ↓ | 
    34 lines elided | 
    
      ↑ open up ↑ | 
  
 570  636                  cfw_evdrops += consumed;
 571  637                  mutex_exit(&cfw_ringlock);
 572  638          }
 573  639          return (ue.ue_error);
 574  640  }
 575  641  
 576  642  #else
 577  643  
 578  644  /* Blank stubs to satisfy userland's test compilations. */
 579  645  
      646 +int
      647 +ipf_cfw_ring_resize(uint32_t a)
      648 +{
      649 +        return (0);
      650 +}
      651 +
 580  652  void
 581  653  ipf_log_cfwlog(struct ipstate *a, uint_t b, ipf_stack_t *c)
 582  654  {
 583  655  }
 584  656  
 585  657  void
 586  658  ipf_block_cfwlog(frentry_t *a, fr_info_t *b, ipf_stack_t *c)
 587  659  {
 588  660  }
 589  661  
 590  662  #endif  /* _KERNEL */
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX