1 /*
   2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
   3  *
   4  * See the IPFILTER.LICENCE file for details on licencing.
   5  *
   6  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   7  *
   8  * Copyright 2019 Joyent, Inc.
   9  */
  10 
  11 #if !defined(lint)
  12 static const char sccsid[] = "@(#)ip_fil_solaris.c      1.7 07/22/06 (C) 1993-2000 Darren Reed";
  13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
  14 #endif
  15 
  16 #include <sys/types.h>
  17 #include <sys/errno.h>
  18 #include <sys/param.h>
  19 #include <sys/cpuvar.h>
  20 #include <sys/open.h>
  21 #include <sys/ioctl.h>
  22 #include <sys/filio.h>
  23 #include <sys/systm.h>
  24 #include <sys/strsubr.h>
  25 #include <sys/strsun.h>
  26 #include <sys/cred.h>
  27 #include <sys/ddi.h>
  28 #include <sys/sunddi.h>
  29 #include <sys/ksynch.h>
  30 #include <sys/kmem.h>
  31 #include <sys/mac_provider.h>
  32 #include <sys/mkdev.h>
  33 #include <sys/protosw.h>
  34 #include <sys/socket.h>
  35 #include <sys/dditypes.h>
  36 #include <sys/cmn_err.h>
  37 #include <sys/zone.h>
  38 #include <net/if.h>
  39 #include <net/af.h>
  40 #include <net/route.h>
  41 #include <netinet/in.h>
  42 #include <netinet/in_systm.h>
  43 #include <netinet/ip.h>
  44 #include <netinet/ip_var.h>
  45 #include <netinet/tcp.h>
  46 #include <netinet/udp.h>
  47 #include <netinet/tcpip.h>
  48 #include <netinet/ip_icmp.h>
  49 #include "netinet/ip_compat.h"
  50 #ifdef  USE_INET6
  51 # include <netinet/icmp6.h>
  52 #endif
  53 #include "netinet/ip_fil.h"
  54 #include "netinet/ip_nat.h"
  55 #include "netinet/ip_frag.h"
  56 #include "netinet/ip_state.h"
  57 #include "netinet/ip_auth.h"
  58 #include "netinet/ip_proxy.h"
  59 #include "netinet/ipf_stack.h"
  60 #ifdef  IPFILTER_LOOKUP
  61 # include "netinet/ip_lookup.h"
  62 #endif
  63 #include <inet/ip_ire.h>
  64 
  65 #include <sys/md5.h>
  66 #include <sys/neti.h>
  67 
  68 static  int     frzerostats __P((caddr_t, ipf_stack_t *));
  69 static  int     fr_setipfloopback __P((int, ipf_stack_t *));
  70 static  int     fr_enableipf __P((ipf_stack_t *, int));
  71 static  int     fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
  72 static  int     ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
  73 static  int     ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
  74 static  int     ipf_hook __P((hook_data_t, int, int, void *));
  75 static  int     ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
  76 static  int     ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
  77 static  int     ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
  78     void *));
  79 static  int     ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
  80 static  int     ipf_hook4 __P((hook_data_t, int, int, void *));
  81 static  int     ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
  82 static  int     ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
  83 static  int     ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
  84     void *));
  85 static  int     ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
  86     void *));
  87 static  int     ipf_hook6 __P((hook_data_t, int, int, void *));
  88 static  int     ipf_hookvndl3v4_in __P((hook_event_token_t, hook_data_t,
  89     void *));
  90 static  int     ipf_hookvndl3v6_in __P((hook_event_token_t, hook_data_t,
  91     void *));
  92 static  int     ipf_hookvndl3v4_out __P((hook_event_token_t, hook_data_t,
  93     void *));
  94 static  int     ipf_hookvndl3v6_out __P((hook_event_token_t, hook_data_t,
  95     void *));
  96 
  97 static  int     ipf_hookviona_in __P((hook_event_token_t, hook_data_t, void *));
  98 static  int     ipf_hookviona_out __P((hook_event_token_t, hook_data_t,
  99     void *));
 100 
 101 extern  int     ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
 102 extern  int     ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
 103 
 104 static int      ipf_hook_protocol_notify __P((hook_notify_cmd_t, void *,
 105     const char *, const char *, const char *));
 106 static int      ipf_hook_instance_notify __P((hook_notify_cmd_t, void *,
 107     const char *, const char *, const char *));
 108 
 109 #if SOLARIS2 < 10
 110 #if SOLARIS2 >= 7
 111 u_int           *ip_ttl_ptr = NULL;
 112 u_int           *ip_mtudisc = NULL;
 113 # if SOLARIS2 >= 8
 114 int             *ip_forwarding = NULL;
 115 u_int           *ip6_forwarding = NULL;
 116 # else
 117 u_int           *ip_forwarding = NULL;
 118 # endif
 119 #else
 120 u_long          *ip_ttl_ptr = NULL;
 121 u_long          *ip_mtudisc = NULL;
 122 u_long          *ip_forwarding = NULL;
 123 #endif
 124 #endif
 125 
 126 vmem_t  *ipf_minor;     /* minor number arena */
 127 void    *ipf_state;     /* DDI state */
 128 
 129 /*
 130  * GZ-controlled and per-zone stacks:
 131  *
 132  * For each non-global zone, we create two ipf stacks: the per-zone stack and
 133  * the GZ-controlled stack.  The per-zone stack can be controlled and observed
 134  * from inside the zone or from the global zone.  The GZ-controlled stack can
 135  * only be controlled and observed from the global zone (though the rules
 136  * still only affect that non-global zone).
 137  *
 138  * The two hooks are always arranged so that the GZ-controlled stack is always
 139  * "outermost" with respect to the zone.  The traffic flow then looks like
 140  * this:
 141  *
 142  * Inbound:
 143  *
 144  *     nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
 145  *
 146  * Outbound:
 147  *
 148  *     nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
 149  */
 150 
 151 /* IPv4 hook names */
 152 char *hook4_nicevents =         "ipfilter_hook4_nicevents";
 153 char *hook4_nicevents_gz =      "ipfilter_hook4_nicevents_gz";
 154 char *hook4_in =                "ipfilter_hook4_in";
 155 char *hook4_in_gz =             "ipfilter_hook4_in_gz";
 156 char *hook4_out =               "ipfilter_hook4_out";
 157 char *hook4_out_gz =            "ipfilter_hook4_out_gz";
 158 char *hook4_loop_in =           "ipfilter_hook4_loop_in";
 159 char *hook4_loop_in_gz =        "ipfilter_hook4_loop_in_gz";
 160 char *hook4_loop_out =          "ipfilter_hook4_loop_out";
 161 char *hook4_loop_out_gz =       "ipfilter_hook4_loop_out_gz";
 162 
 163 /* IPv6 hook names */
 164 char *hook6_nicevents =         "ipfilter_hook6_nicevents";
 165 char *hook6_nicevents_gz =      "ipfilter_hook6_nicevents_gz";
 166 char *hook6_in =                "ipfilter_hook6_in";
 167 char *hook6_in_gz =             "ipfilter_hook6_in_gz";
 168 char *hook6_out =               "ipfilter_hook6_out";
 169 char *hook6_out_gz =            "ipfilter_hook6_out_gz";
 170 char *hook6_loop_in =           "ipfilter_hook6_loop_in";
 171 char *hook6_loop_in_gz =        "ipfilter_hook6_loop_in_gz";
 172 char *hook6_loop_out =          "ipfilter_hook6_loop_out";
 173 char *hook6_loop_out_gz =       "ipfilter_hook6_loop_out_gz";
 174 
 175 /* vnd IPv4/v6 hook names */
 176 char *hook4_vnd_in =            "ipfilter_hookvndl3v4_in";
 177 char *hook4_vnd_in_gz =         "ipfilter_hookvndl3v4_in_gz";
 178 char *hook6_vnd_in =            "ipfilter_hookvndl3v6_in";
 179 char *hook6_vnd_in_gz =         "ipfilter_hookvndl3v6_in_gz";
 180 char *hook4_vnd_out =           "ipfilter_hookvndl3v4_out";
 181 char *hook4_vnd_out_gz =        "ipfilter_hookvndl3v4_out_gz";
 182 char *hook6_vnd_out =           "ipfilter_hookvndl3v6_out";
 183 char *hook6_vnd_out_gz =        "ipfilter_hookvndl3v6_out_gz";
 184 
 185 /* viona hook names */
 186 char *hook_viona_in =           "ipfilter_hookviona_in";
 187 char *hook_viona_in_gz =        "ipfilter_hookviona_in_gz";
 188 char *hook_viona_out =          "ipfilter_hookviona_out";
 189 char *hook_viona_out_gz =       "ipfilter_hookviona_out_gz";
 190 
 191 /* ------------------------------------------------------------------------ */
 192 /* Function:    ipldetach                                                   */
 193 /* Returns:     int - 0 == success, else error.                             */
 194 /* Parameters:  Nil                                                         */
 195 /*                                                                          */
 196 /* This function is responsible for undoing anything that might have been   */
 197 /* done in a call to iplattach().  It must be able to clean up from a call  */
 198 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
 199 /* configures a table to be so large that we cannot allocate enough memory  */
 200 /* for it.                                                                  */
 201 /* ------------------------------------------------------------------------ */
 202 int ipldetach(ifs)
 203 ipf_stack_t *ifs;
 204 {
 205 
 206         ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
 207 
 208 #if SOLARIS2 < 10
 209 
 210         if (ifs->ifs_fr_control_forwarding & 2) {
 211                 if (ip_forwarding != NULL)
 212                         *ip_forwarding = 0;
 213 #if SOLARIS2 >= 8
 214                 if (ip6_forwarding != NULL)
 215                         *ip6_forwarding = 0;
 216 #endif
 217         }
 218 #endif
 219 
 220         /*
 221          * This lock needs to be dropped around the net_hook_unregister calls
 222          * because we can deadlock here with:
 223          * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
 224          * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
 225          */
 226         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 227 
 228 #define UNDO_HOOK(_f, _b, _e, _h)                                       \
 229         do {                                                            \
 230                 if (ifs->_f != NULL) {                                       \
 231                         if (ifs->_b) {                                       \
 232                                 int tmp = net_hook_unregister(ifs->_f,       \
 233                                            _e, ifs->_h);             \
 234                                 ifs->_b = (tmp != 0 && tmp != ENXIO);        \
 235                                 if (!ifs->_b && ifs->_h != NULL) {        \
 236                                         hook_free(ifs->_h);          \
 237                                         ifs->_h = NULL;                      \
 238                                 }                                       \
 239                         } else if (ifs->_h != NULL) {                        \
 240                                 hook_free(ifs->_h);                  \
 241                                 ifs->_h = NULL;                              \
 242                         }                                               \
 243                 }                                                       \
 244                 _NOTE(CONSTCOND)                                        \
 245         } while (0)
 246 
 247         /*
 248          * Remove IPv6 Hooks
 249          */
 250         if (ifs->ifs_ipf_ipv6 != NULL) {
 251                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
 252                           NH_PHYSICAL_IN, ifs_ipfhook6_in);
 253                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
 254                           NH_PHYSICAL_OUT, ifs_ipfhook6_out);
 255                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
 256                           NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
 257                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
 258                           NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
 259                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
 260                           NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
 261 
 262                 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
 263                         goto detach_failed;
 264                 ifs->ifs_ipf_ipv6 = NULL;
 265         }
 266 
 267         /*
 268          * Remove IPv4 Hooks
 269          */
 270         if (ifs->ifs_ipf_ipv4 != NULL) {
 271                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
 272                           NH_PHYSICAL_IN, ifs_ipfhook4_in);
 273                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
 274                           NH_PHYSICAL_OUT, ifs_ipfhook4_out);
 275                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
 276                           NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
 277                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
 278                           NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
 279                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
 280                           NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
 281 
 282                 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
 283                         goto detach_failed;
 284                 ifs->ifs_ipf_ipv4 = NULL;
 285         }
 286 
 287         /*
 288          * Remove VND hooks
 289          */
 290         if (ifs->ifs_ipf_vndl3v4 != NULL) {
 291                 UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_in,
 292                     NH_PHYSICAL_IN, ifs_ipfhookvndl3v4_in);
 293                 UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_out,
 294                     NH_PHYSICAL_OUT, ifs_ipfhookvndl3v4_out);
 295 
 296                 if (net_protocol_release(ifs->ifs_ipf_vndl3v4) != 0)
 297                         goto detach_failed;
 298                 ifs->ifs_ipf_vndl3v4 = NULL;
 299         }
 300 
 301         if (ifs->ifs_ipf_vndl3v6 != NULL) {
 302                 UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_in,
 303                     NH_PHYSICAL_IN, ifs_ipfhookvndl3v6_in);
 304                 UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_out,
 305                     NH_PHYSICAL_OUT, ifs_ipfhookvndl3v6_out);
 306 
 307                 if (net_protocol_release(ifs->ifs_ipf_vndl3v6) != 0)
 308                         goto detach_failed;
 309                 ifs->ifs_ipf_vndl3v6 = NULL;
 310         }
 311 
 312         /*
 313          * Remove notification of viona hooks
 314          */
 315         net_instance_notify_unregister(ifs->ifs_netid,
 316             ipf_hook_instance_notify);
 317 
 318 #undef UNDO_HOOK
 319 
 320         /*
 321          * Normally, viona will unregister itself before ipldetach() is called,
 322          * so these will be no-ops, but out of caution, we try to make sure
 323          * we've removed any of our references.
 324          */
 325         (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
 326             NH_PHYSICAL_IN);
 327         (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
 328             NH_PHYSICAL_OUT);
 329 
 330         {
 331                 char netidstr[12]; /* Large enough for INT_MAX + NUL */
 332                 (void) snprintf(netidstr, sizeof (netidstr), "%d",
 333                     ifs->ifs_netid);
 334 
 335                 /*
 336                  * The notify callbacks expect the netid value passed as a
 337                  * string in the third argument.  To prevent confusion if
 338                  * traced, we pass the same value the nethook framework would
 339                  * pass, even though the callback does not currently use the
 340                  * value.
 341                  */
 342                 (void) ipf_hook_instance_notify(HN_UNREGISTER, ifs, netidstr,
 343                     NULL, Hn_VIONA);
 344         }
 345 
 346 #ifdef  IPFDEBUG
 347         cmn_err(CE_CONT, "ipldetach()\n");
 348 #endif
 349 
 350         WRITE_ENTER(&ifs->ifs_ipf_global);
 351         fr_deinitialise(ifs);
 352 
 353         (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
 354         (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
 355 
 356         if (ifs->ifs_ipf_locks_done == 1) {
 357                 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
 358                 MUTEX_DESTROY(&ifs->ifs_ipf_rw);
 359                 RW_DESTROY(&ifs->ifs_ipf_tokens);
 360                 RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
 361                 ifs->ifs_ipf_locks_done = 0;
 362         }
 363 
 364         if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
 365             ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
 366             ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
 367             ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
 368             ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
 369                 return -1;
 370 
 371         return 0;
 372 
 373 detach_failed:
 374         WRITE_ENTER(&ifs->ifs_ipf_global);
 375         return -1;
 376 }
 377 
 378 int iplattach(ifs)
 379 ipf_stack_t *ifs;
 380 {
 381 #if SOLARIS2 < 10
 382         int i;
 383 #endif
 384         netid_t id = ifs->ifs_netid;
 385 
 386 #ifdef  IPFDEBUG
 387         cmn_err(CE_CONT, "iplattach()\n");
 388 #endif
 389 
 390         ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
 391         ifs->ifs_fr_flags = IPF_LOGGING;
 392 #ifdef _KERNEL
 393         ifs->ifs_fr_update_ipid = 0;
 394 #else
 395         ifs->ifs_fr_update_ipid = 1;
 396 #endif
 397         ifs->ifs_fr_minttl = 4;
 398         ifs->ifs_fr_icmpminfragmtu = 68;
 399 #if defined(IPFILTER_DEFAULT_BLOCK)
 400         ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
 401 #else
 402         ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
 403 #endif
 404 
 405         bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
 406         MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
 407         MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
 408         RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
 409         RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
 410         ifs->ifs_ipf_locks_done = 1;
 411 
 412         if (fr_initialise(ifs) < 0)
 413                 return -1;
 414 
 415         /*
 416          * For incoming packets, we want the GZ-controlled hooks to run before
 417          * the per-zone hooks, regardless of what order they're are installed.
 418          * See the "GZ-controlled and per-zone stacks" comment block at the top
 419          * of this file.
 420          */
 421 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a)                           \
 422         HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs);     \
 423         (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER;      \
 424         (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
 425 
 426         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
 427                   hook4_nicevents, hook4_nicevents_gz, ifs);
 428         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
 429                   hook4_in, hook4_in_gz, ifs);
 430         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
 431                   hook4_loop_in, hook4_loop_in_gz, ifs);
 432 
 433         /*
 434          * For outgoing packets, we want the GZ-controlled hooks to run after
 435          * the per-zone hooks, regardless of what order they're are installed.
 436          * See the "GZ-controlled and per-zone stacks" comment block at the top
 437          * of this file.
 438          */
 439 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a)                            \
 440         HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs);     \
 441         (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE;      \
 442         (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
 443 
 444         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
 445                   hook4_out, hook4_out_gz, ifs);
 446         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
 447                   hook4_loop_out, hook4_loop_out_gz, ifs);
 448 
 449         /*
 450          * If we hold this lock over all of the net_hook_register calls, we
 451          * can cause a deadlock to occur with the following lock ordering:
 452          * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
 453          * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
 454          */
 455         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 456 
 457         /*
 458          * Add IPv4 hooks
 459          */
 460         ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
 461         if (ifs->ifs_ipf_ipv4 == NULL)
 462                 goto hookup_failed;
 463 
 464         ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
 465             NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
 466         if (!ifs->ifs_hook4_nic_events)
 467                 goto hookup_failed;
 468 
 469         ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
 470             NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
 471         if (!ifs->ifs_hook4_physical_in)
 472                 goto hookup_failed;
 473 
 474         ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
 475             NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
 476         if (!ifs->ifs_hook4_physical_out)
 477                 goto hookup_failed;
 478 
 479         if (ifs->ifs_ipf_loopback) {
 480                 ifs->ifs_hook4_loopback_in = (net_hook_register(
 481                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
 482                     ifs->ifs_ipfhook4_loop_in) == 0);
 483                 if (!ifs->ifs_hook4_loopback_in)
 484                         goto hookup_failed;
 485 
 486                 ifs->ifs_hook4_loopback_out = (net_hook_register(
 487                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
 488                     ifs->ifs_ipfhook4_loop_out) == 0);
 489                 if (!ifs->ifs_hook4_loopback_out)
 490                         goto hookup_failed;
 491         }
 492 
 493         /*
 494          * Add IPv6 hooks
 495          */
 496         ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
 497         if (ifs->ifs_ipf_ipv6 == NULL)
 498                 goto hookup_failed;
 499 
 500         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
 501                   hook6_nicevents, hook6_nicevents_gz, ifs);
 502         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
 503                   hook6_in, hook6_in_gz, ifs);
 504         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
 505                   hook6_loop_in, hook6_loop_in_gz, ifs);
 506         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
 507                   hook6_out, hook6_out_gz, ifs);
 508         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
 509                   hook6_loop_out, hook6_loop_out_gz, ifs);
 510 
 511         ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
 512             NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
 513         if (!ifs->ifs_hook6_nic_events)
 514                 goto hookup_failed;
 515 
 516         ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
 517             NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
 518         if (!ifs->ifs_hook6_physical_in)
 519                 goto hookup_failed;
 520 
 521         ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
 522             NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
 523         if (!ifs->ifs_hook6_physical_out)
 524                 goto hookup_failed;
 525 
 526         if (ifs->ifs_ipf_loopback) {
 527                 ifs->ifs_hook6_loopback_in = (net_hook_register(
 528                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
 529                     ifs->ifs_ipfhook6_loop_in) == 0);
 530                 if (!ifs->ifs_hook6_loopback_in)
 531                         goto hookup_failed;
 532 
 533                 ifs->ifs_hook6_loopback_out = (net_hook_register(
 534                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
 535                     ifs->ifs_ipfhook6_loop_out) == 0);
 536                 if (!ifs->ifs_hook6_loopback_out)
 537                         goto hookup_failed;
 538         }
 539 
 540         /*
 541          * Add VND INET hooks
 542          */
 543         ifs->ifs_ipf_vndl3v4 = net_protocol_lookup(id, NHF_VND_INET);
 544         if (ifs->ifs_ipf_vndl3v4 == NULL)
 545                 goto hookup_failed;
 546 
 547         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v4_in, ipf_hookvndl3v4_in,
 548             hook4_vnd_in, hook4_vnd_in_gz, ifs);
 549         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v4_out, ipf_hookvndl3v4_out,
 550             hook4_vnd_out, hook4_vnd_out_gz, ifs);
 551         ifs->ifs_hookvndl3v4_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v4,
 552             NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v4_in) == 0);
 553         if (!ifs->ifs_hookvndl3v4_physical_in)
 554                 goto hookup_failed;
 555 
 556         ifs->ifs_hookvndl3v4_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v4,
 557             NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v4_out) == 0);
 558         if (!ifs->ifs_hookvndl3v4_physical_out)
 559                 goto hookup_failed;
 560 
 561 
 562         /*
 563          * VND INET6 hooks
 564          */
 565         ifs->ifs_ipf_vndl3v6 = net_protocol_lookup(id, NHF_VND_INET6);
 566         if (ifs->ifs_ipf_vndl3v6 == NULL)
 567                 goto hookup_failed;
 568 
 569         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v6_in, ipf_hookvndl3v6_in,
 570             hook6_vnd_in, hook6_vnd_in_gz, ifs);
 571         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v6_out, ipf_hookvndl3v6_out,
 572             hook6_vnd_out, hook6_vnd_out_gz, ifs);
 573         ifs->ifs_hookvndl3v6_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v6,
 574             NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v6_in) == 0);
 575         if (!ifs->ifs_hookvndl3v6_physical_in)
 576                 goto hookup_failed;
 577 
 578         ifs->ifs_hookvndl3v6_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v6,
 579             NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v6_out) == 0);
 580         if (!ifs->ifs_hookvndl3v6_physical_out)
 581                 goto hookup_failed;
 582 
 583         /*
 584          * VIONA INET hooks.  While the nethook framework allows us to register
 585          * hooks for events that haven't been registered yet, we instead
 586          * register and unregister our hooks in response to notifications
 587          * about the viona hooks from the nethook framework.  This prevents
 588          * problems when the viona module gets unloaded while the ipf module
 589          * does not.  If we do not unregister our hooks after the viona module
 590          * is unloaded, the viona module cannot later re-register them if it
 591          * gets reloaded.  As the ip, vnd, and ipf modules are rarely unloaded
 592          * even on DEBUG kernels, they do not experience this issue.
 593          */
 594         if (net_instance_notify_register(id, ipf_hook_instance_notify,
 595             ifs) != 0)
 596                 goto hookup_failed;
 597 
 598         /*
 599          * Reacquire ipf_global, now it is safe.
 600          */
 601         WRITE_ENTER(&ifs->ifs_ipf_global);
 602 
 603 /* Do not use private interface ip_params_arr[] in Solaris 10 */
 604 #if SOLARIS2 < 10
 605 
 606 #if SOLARIS2 >= 8
 607         ip_forwarding = &ip_g_forward;
 608 #endif
 609         /*
 610          * XXX - There is no terminator for this array, so it is not possible
 611          * to tell if what we are looking for is missing and go off the end
 612          * of the array.
 613          */
 614 
 615 #if SOLARIS2 <= 8
 616         for (i = 0; ; i++) {
 617                 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
 618                         ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
 619                 } else if (!strcmp(ip_param_arr[i].ip_param_name,
 620                             "ip_path_mtu_discovery")) {
 621                         ip_mtudisc = &ip_param_arr[i].ip_param_value;
 622                 }
 623 #if SOLARIS2 < 8
 624                 else if (!strcmp(ip_param_arr[i].ip_param_name,
 625                             "ip_forwarding")) {
 626                         ip_forwarding = &ip_param_arr[i].ip_param_value;
 627                 }
 628 #else
 629                 else if (!strcmp(ip_param_arr[i].ip_param_name,
 630                             "ip6_forwarding")) {
 631                         ip6_forwarding = &ip_param_arr[i].ip_param_value;
 632                 }
 633 #endif
 634 
 635                 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
 636 #if SOLARIS2 >= 8
 637                     ip6_forwarding != NULL &&
 638 #endif
 639                     ip_forwarding != NULL)
 640                         break;
 641         }
 642 #endif
 643 
 644         if (ifs->ifs_fr_control_forwarding & 1) {
 645                 if (ip_forwarding != NULL)
 646                         *ip_forwarding = 1;
 647 #if SOLARIS2 >= 8
 648                 if (ip6_forwarding != NULL)
 649                         *ip6_forwarding = 1;
 650 #endif
 651         }
 652 
 653 #endif
 654 
 655         return 0;
 656 hookup_failed:
 657         WRITE_ENTER(&ifs->ifs_ipf_global);
 658         return -1;
 659 }
 660 
 661 /* ------------------------------------------------------------------------ */
 662 /*
 663  * Called whenever a nethook protocol is registered or unregistered.  Currently
 664  * only used to add or remove the hooks for viona.
 665  *
 666  * While the function signature requires returning int, nothing
 667  * in usr/src/uts/common/io/hook.c that invokes the callbacks
 668  * captures the return value (nor is there currently any documentation
 669  * on what return values should be).  For now at least, we'll return 0
 670  * on success (or 'not applicable') or an error value.  Even if the
 671  * nethook framework doesn't use the return address, it can be observed via
 672  * dtrace if needed.
 673  */
 674 static int
 675 ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg,
 676     const char *name, const char *dummy __unused, const char *he_name)
 677 {
 678         ipf_stack_t *ifs = arg;
 679         hook_t **hookpp;
 680         char *hook_name, *hint_name;
 681         hook_func_t hookfn;
 682         boolean_t *hookedp;
 683         hook_hint_t hint;
 684         boolean_t out;
 685         int ret = 0;
 686 
 687         const boolean_t gz = ifs->ifs_gz_controlled;
 688 
 689         /* We currently only care about viona hooks notifications */
 690         if (strcmp(name, Hn_VIONA) != 0)
 691                 return (0);
 692 
 693         if (strcmp(he_name, NH_PHYSICAL_IN) == 0) {
 694                 out = B_FALSE;
 695         } else if (strcmp(he_name, NH_PHYSICAL_OUT) == 0) {
 696                 out = B_TRUE;
 697         } else {
 698                 /*
 699                  * If we've added more hook events to viona, we must add
 700                  * the corresponding handling here (even if it's just to
 701                  * ignore it) to prevent the firewall from not working as
 702                  * intended.
 703                  */
 704                 cmn_err(CE_PANIC, "%s: unhandled hook event %s", __func__,
 705                     he_name);
 706 
 707                 return (0);
 708         }
 709 
 710         if (out) {
 711                 hookpp = &ifs->ifs_ipfhookviona_out;
 712                 hookfn = ipf_hookviona_out;
 713                 hookedp = &ifs->ifs_hookviona_physical_out;
 714                 name = gz ? hook_viona_out_gz : hook_viona_out;
 715                 hint = gz ? HH_AFTER : HH_BEFORE;
 716                 hint_name = gz ? hook_viona_out : hook_viona_out_gz;
 717         } else {
 718                 hookpp = &ifs->ifs_ipfhookviona_in;
 719                 hookfn = ipf_hookviona_in;
 720                 hookedp = &ifs->ifs_hookviona_physical_in;
 721                 name = gz ? hook_viona_in_gz : hook_viona_in;
 722                 hint = gz ? HH_BEFORE : HH_AFTER;
 723                 hint_name = gz ? hook_viona_in : hook_viona_in_gz;
 724         }
 725 
 726         switch (command) {
 727         default:
 728         case HN_NONE:
 729                 break;
 730         case HN_REGISTER:
 731                 HOOK_INIT(*hookpp, hookfn, (char *)name, ifs);
 732                 (*hookpp)->h_hint = hint;
 733                 (*hookpp)->h_hintvalue = (uintptr_t)hint_name;
 734                 ret = net_hook_register(ifs->ifs_ipf_viona,
 735                     (char *)he_name, *hookpp);
 736                 if (ret != 0) {
 737                         cmn_err(CE_NOTE, "%s: could not register hook "
 738                             "(hook family=%s hook=%s) err=%d", __func__,
 739                             name, he_name, ret);
 740                         *hookedp = B_FALSE;
 741                         return (ret);
 742                 }
 743                 *hookedp = B_TRUE;
 744                 break;
 745         case HN_UNREGISTER:
 746                 if (ifs->ifs_ipf_viona == NULL)
 747                         break;
 748 
 749                 ret = *hookedp ? net_hook_unregister(ifs->ifs_ipf_viona,
 750                     (char *)he_name, *hookpp) : 0;
 751                 if ((ret == 0 || ret == ENXIO)) {
 752                         if (*hookpp != NULL) {
 753                                 hook_free(*hookpp);
 754                                 *hookpp = NULL;
 755                         }
 756                         *hookedp = B_FALSE;
 757                 }
 758                 break;
 759         }
 760 
 761         return (ret);
 762 }
 763 
 764 /*
 765  * Called whenever a new nethook instance is created.  Currently only used
 766  * with the Hn_VIONA nethooks.  Similar to ipf_hook_protocol_notify, the out
 767  * function signature must return an int, though the result is never used.
 768  * We elect to return 0 on success (or not applicable) or a non-zero value
 769  * on error.
 770  */
 771 static int
 772 ipf_hook_instance_notify(hook_notify_cmd_t command, void *arg,
 773     const char *netid, const char *dummy __unused, const char *instance)
 774 {
 775         ipf_stack_t *ifs = arg;
 776         int ret = 0;
 777 
 778         /* We currently only care about viona hooks */
 779         if (strcmp(instance, Hn_VIONA) != 0)
 780                 return (0);
 781 
 782         switch (command) {
 783         case HN_NONE:
 784         default:
 785                 return (0);
 786         case HN_REGISTER:
 787                 ifs->ifs_ipf_viona = net_protocol_lookup(ifs->ifs_netid,
 788                     NHF_VIONA);
 789 
 790                 if (ifs->ifs_ipf_viona == NULL)
 791                         return (EPROTONOSUPPORT);
 792 
 793                 ret = net_protocol_notify_register(ifs->ifs_ipf_viona,
 794                     ipf_hook_protocol_notify, ifs);
 795                 VERIFY(ret == 0 || ret == ESHUTDOWN);
 796                 break;
 797         case HN_UNREGISTER:
 798                 if (ifs->ifs_ipf_viona == NULL)
 799                         break;
 800                 VERIFY0(net_protocol_notify_unregister(ifs->ifs_ipf_viona,
 801                     ipf_hook_protocol_notify));
 802                 VERIFY0(net_protocol_release(ifs->ifs_ipf_viona));
 803                 ifs->ifs_ipf_viona = NULL;
 804                 break;
 805         }
 806 
 807         return (ret);
 808 }
 809 
 810 static  int     fr_setipfloopback(set, ifs)
 811 int set;
 812 ipf_stack_t *ifs;
 813 {
 814         if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
 815                 return EFAULT;
 816 
 817         if (set && !ifs->ifs_ipf_loopback) {
 818                 ifs->ifs_ipf_loopback = 1;
 819 
 820                 ifs->ifs_hook4_loopback_in = (net_hook_register(
 821                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
 822                     ifs->ifs_ipfhook4_loop_in) == 0);
 823                 if (!ifs->ifs_hook4_loopback_in)
 824                         return EINVAL;
 825 
 826                 ifs->ifs_hook4_loopback_out = (net_hook_register(
 827                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
 828                     ifs->ifs_ipfhook4_loop_out) == 0);
 829                 if (!ifs->ifs_hook4_loopback_out)
 830                         return EINVAL;
 831 
 832                 ifs->ifs_hook6_loopback_in = (net_hook_register(
 833                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
 834                     ifs->ifs_ipfhook6_loop_in) == 0);
 835                 if (!ifs->ifs_hook6_loopback_in)
 836                         return EINVAL;
 837 
 838                 ifs->ifs_hook6_loopback_out = (net_hook_register(
 839                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
 840                     ifs->ifs_ipfhook6_loop_out) == 0);
 841                 if (!ifs->ifs_hook6_loopback_out)
 842                         return EINVAL;
 843 
 844         } else if (!set && ifs->ifs_ipf_loopback) {
 845                 ifs->ifs_ipf_loopback = 0;
 846 
 847                 ifs->ifs_hook4_loopback_in =
 848                     (net_hook_unregister(ifs->ifs_ipf_ipv4,
 849                     NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
 850                 if (ifs->ifs_hook4_loopback_in)
 851                         return EBUSY;
 852 
 853                 ifs->ifs_hook4_loopback_out =
 854                     (net_hook_unregister(ifs->ifs_ipf_ipv4,
 855                     NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
 856                 if (ifs->ifs_hook4_loopback_out)
 857                         return EBUSY;
 858 
 859                 ifs->ifs_hook6_loopback_in =
 860                     (net_hook_unregister(ifs->ifs_ipf_ipv6,
 861                     NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
 862                 if (ifs->ifs_hook6_loopback_in)
 863                         return EBUSY;
 864 
 865                 ifs->ifs_hook6_loopback_out =
 866                     (net_hook_unregister(ifs->ifs_ipf_ipv6,
 867                     NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
 868                 if (ifs->ifs_hook6_loopback_out)
 869                         return EBUSY;
 870         }
 871         return 0;
 872 }
 873 
 874 
 875 /*
 876  * Filter ioctl interface.
 877  */
 878 /*ARGSUSED*/
 879 int iplioctl(dev, cmd, data, mode, cp, rp)
 880 dev_t dev;
 881 int cmd;
 882 #if SOLARIS2 >= 7
 883 intptr_t data;
 884 #else
 885 int *data;
 886 #endif
 887 int mode;
 888 cred_t *cp;
 889 int *rp;
 890 {
 891         int error = 0, tmp;
 892         friostat_t fio;
 893         minor_t unit;
 894         u_int enable;
 895         ipf_stack_t *ifs;
 896         zoneid_t zid;
 897         ipf_devstate_t *isp;
 898 
 899 #ifdef  IPFDEBUG
 900         cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
 901                 dev, cmd, data, mode, cp, rp);
 902 #endif
 903         unit = getminor(dev);
 904 
 905         isp = ddi_get_soft_state(ipf_state, unit);
 906         if (isp == NULL)
 907                 return ENXIO;
 908         unit = isp->ipfs_minor;
 909 
 910         if (unit == IPL_LOGEV)
 911                 return (ipf_cfwlog_ioctl(dev, cmd, data, mode, cp, rp));
 912 
 913         zid = crgetzoneid(cp);
 914         if (cmd == SIOCIPFZONESET) {
 915                 if (zid == GLOBAL_ZONEID)
 916                         return fr_setzoneid(isp, (caddr_t) data);
 917                 return EACCES;
 918         }
 919 
 920         /*
 921          * ipf_find_stack returns with a read lock on ifs_ipf_global
 922          */
 923         ifs = ipf_find_stack(zid, isp);
 924         if (ifs == NULL)
 925                 return ENXIO;
 926 
 927         if (ifs->ifs_fr_running <= 0) {
 928                 if (unit != IPL_LOGIPF) {
 929                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 930                         return EIO;
 931                 }
 932                 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
 933                     cmd != SIOCIPFSET && cmd != SIOCFRENB &&
 934                     cmd != SIOCGETFS && cmd != SIOCGETFF) {
 935                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 936                         return EIO;
 937                 }
 938         }
 939 
 940         if (ifs->ifs_fr_enable_active != 0) {
 941                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
 942                 return EBUSY;
 943         }
 944 
 945         error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
 946                                curproc, ifs);
 947         if (error != -1) {
 948                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
 949                 return error;
 950         }
 951         error = 0;
 952 
 953         switch (cmd)
 954         {
 955         case SIOCFRENB :
 956                 if (!(mode & FWRITE))
 957                         error = EPERM;
 958                 else {
 959                         error = COPYIN((caddr_t)data, (caddr_t)&enable,
 960                                        sizeof(enable));
 961                         if (error != 0) {
 962                                 error = EFAULT;
 963                                 break;
 964                         }
 965 
 966                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 967                         WRITE_ENTER(&ifs->ifs_ipf_global);
 968 
 969                         /*
 970                          * We must recheck fr_enable_active here, since we've
 971                          * dropped ifs_ipf_global from R in order to get it
 972                          * exclusively.
 973                          */
 974                         if (ifs->ifs_fr_enable_active == 0) {
 975                                 ifs->ifs_fr_enable_active = 1;
 976                                 error = fr_enableipf(ifs, enable);
 977                                 ifs->ifs_fr_enable_active = 0;
 978                         }
 979                 }
 980                 break;
 981         case SIOCIPFSET :
 982                 if (!(mode & FWRITE)) {
 983                         error = EPERM;
 984                         break;
 985                 }
 986                 /* FALLTHRU */
 987         case SIOCIPFGETNEXT :
 988         case SIOCIPFGET :
 989                 error = fr_ipftune(cmd, (void *)data, ifs);
 990                 break;
 991         case SIOCSETFF :
 992                 if (!(mode & FWRITE))
 993                         error = EPERM;
 994                 else {
 995                         error = COPYIN((caddr_t)data,
 996                                        (caddr_t)&ifs->ifs_fr_flags,
 997                                        sizeof(ifs->ifs_fr_flags));
 998                         if (error != 0)
 999                                 error = EFAULT;
1000                 }
1001                 break;
1002         case SIOCIPFLP :
1003                 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1004                                sizeof(tmp));
1005                 if (error != 0)
1006                         error = EFAULT;
1007                 else
1008                         error = fr_setipfloopback(tmp, ifs);
1009                 break;
1010         case SIOCGETFF :
1011                 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
1012                                 sizeof(ifs->ifs_fr_flags));
1013                 if (error != 0)
1014                         error = EFAULT;
1015                 break;
1016         case SIOCFUNCL :
1017                 error = fr_resolvefunc((void *)data);
1018                 break;
1019         case SIOCINAFR :
1020         case SIOCRMAFR :
1021         case SIOCADAFR :
1022         case SIOCZRLST :
1023                 if (!(mode & FWRITE))
1024                         error = EPERM;
1025                 else
1026                         error = frrequest(unit, cmd, (caddr_t)data,
1027                                           ifs->ifs_fr_active, 1, ifs);
1028                 break;
1029         case SIOCINIFR :
1030         case SIOCRMIFR :
1031         case SIOCADIFR :
1032                 if (!(mode & FWRITE))
1033                         error = EPERM;
1034                 else
1035                         error = frrequest(unit, cmd, (caddr_t)data,
1036                                           1 - ifs->ifs_fr_active, 1, ifs);
1037                 break;
1038         case SIOCSWAPA :
1039                 if (!(mode & FWRITE))
1040                         error = EPERM;
1041                 else {
1042                         WRITE_ENTER(&ifs->ifs_ipf_mutex);
1043                         bzero((char *)ifs->ifs_frcache,
1044                             sizeof (ifs->ifs_frcache));
1045                         error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
1046                                         (caddr_t)data,
1047                                         sizeof(ifs->ifs_fr_active));
1048                         if (error != 0)
1049                                 error = EFAULT;
1050                         else
1051                                 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
1052                         RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
1053                 }
1054                 break;
1055         case SIOCGETFS :
1056                 fr_getstat(&fio, ifs);
1057                 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
1058                 break;
1059         case SIOCFRZST :
1060                 if (!(mode & FWRITE))
1061                         error = EPERM;
1062                 else
1063                         error = fr_zerostats((caddr_t)data, ifs);
1064                 break;
1065         case    SIOCIPFFL :
1066                 if (!(mode & FWRITE))
1067                         error = EPERM;
1068                 else {
1069                         error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1070                                        sizeof(tmp));
1071                         if (!error) {
1072                                 tmp = frflush(unit, 4, tmp, ifs);
1073                                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1074                                                 sizeof(tmp));
1075                                 if (error != 0)
1076                                         error = EFAULT;
1077                         } else
1078                                 error = EFAULT;
1079                 }
1080                 break;
1081 #ifdef USE_INET6
1082         case    SIOCIPFL6 :
1083                 if (!(mode & FWRITE))
1084                         error = EPERM;
1085                 else {
1086                         error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1087                                        sizeof(tmp));
1088                         if (!error) {
1089                                 tmp = frflush(unit, 6, tmp, ifs);
1090                                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1091                                                 sizeof(tmp));
1092                                 if (error != 0)
1093                                         error = EFAULT;
1094                         } else
1095                                 error = EFAULT;
1096                 }
1097                 break;
1098 #endif
1099         case SIOCSTLCK :
1100                 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1101                 if (error == 0) {
1102                         ifs->ifs_fr_state_lock = tmp;
1103                         ifs->ifs_fr_nat_lock = tmp;
1104                         ifs->ifs_fr_frag_lock = tmp;
1105                         ifs->ifs_fr_auth_lock = tmp;
1106                 } else
1107                         error = EFAULT;
1108         break;
1109 #ifdef  IPFILTER_LOG
1110         case    SIOCIPFFB :
1111                 if (!(mode & FWRITE))
1112                         error = EPERM;
1113                 else {
1114                         tmp = ipflog_clear(unit, ifs);
1115                         error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1116                                        sizeof(tmp));
1117                         if (error)
1118                                 error = EFAULT;
1119                 }
1120                 break;
1121 #endif /* IPFILTER_LOG */
1122         case SIOCFRSYN :
1123                 if (!(mode & FWRITE))
1124                         error = EPERM;
1125                 else {
1126                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1127                         WRITE_ENTER(&ifs->ifs_ipf_global);
1128 
1129                         frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1130                         fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1131                         fr_nataddrsync(0, NULL, NULL, ifs);
1132                         fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1133                         error = 0;
1134                 }
1135                 break;
1136         case SIOCGFRST :
1137                 error = fr_outobj((void *)data, fr_fragstats(ifs),
1138                                   IPFOBJ_FRAGSTAT);
1139                 break;
1140         case FIONREAD :
1141 #ifdef  IPFILTER_LOG
1142                 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
1143 
1144                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
1145                 if (error != 0)
1146                         error = EFAULT;
1147 #endif
1148                 break;
1149         case SIOCIPFITER :
1150                 error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
1151                                        curproc, ifs);
1152                 break;
1153 
1154         case SIOCGENITER :
1155                 error = ipf_genericiter((caddr_t)data, crgetuid(cp),
1156                                         curproc, ifs);
1157                 break;
1158 
1159         case SIOCIPFDELTOK :
1160                 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1161                 if (error != 0) {
1162                         error = EFAULT;
1163                 } else {
1164                         error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
1165                 }
1166                 break;
1167 
1168         default :
1169 #ifdef  IPFDEBUG
1170                 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
1171                         cmd, (void *)data);
1172 #endif
1173                 error = EINVAL;
1174                 break;
1175         }
1176         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1177         return error;
1178 }
1179 
1180 
1181 static int fr_enableipf(ifs, enable)
1182 ipf_stack_t *ifs;
1183 int enable;
1184 {
1185         int error;
1186 
1187         if (!enable) {
1188                 error = ipldetach(ifs);
1189                 if (error == 0)
1190                         ifs->ifs_fr_running = -1;
1191                 return error;
1192         }
1193 
1194         if (ifs->ifs_fr_running > 0)
1195                 return 0;
1196 
1197         error = iplattach(ifs);
1198         if (error == 0) {
1199                 if (ifs->ifs_fr_timer_id == NULL) {
1200                         int hz = drv_usectohz(500000);
1201 
1202                         ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
1203                                                        (void *)ifs,
1204                                                        hz);
1205                 }
1206                 ifs->ifs_fr_running = 1;
1207         } else {
1208                 (void) ipldetach(ifs);
1209         }
1210         return error;
1211 }
1212 
1213 
1214 phy_if_t get_unit(name, v, ifs)
1215 char *name;
1216 int v;
1217 ipf_stack_t *ifs;
1218 {
1219         net_handle_t nif;
1220  
1221         if (v == 4)
1222                 nif = ifs->ifs_ipf_ipv4;
1223         else if (v == 6)
1224                 nif = ifs->ifs_ipf_ipv6;
1225         else
1226                 return 0;
1227 
1228         return (net_phylookup(nif, name));
1229 }
1230 
1231 /*
1232  * routines below for saving IP headers to buffer
1233  */
1234 /*ARGSUSED*/
1235 int iplopen(devp, flags, otype, cred)
1236 dev_t *devp;
1237 int flags, otype;
1238 cred_t *cred;
1239 {
1240         ipf_devstate_t *isp;
1241         minor_t min = getminor(*devp);
1242         minor_t minor;
1243 
1244 #ifdef  IPFDEBUG
1245         cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
1246 #endif
1247         if (!(otype & OTYP_CHR))
1248                 return ENXIO;
1249 
1250         if (IPL_LOGMAX < min)
1251                 return ENXIO;
1252 
1253         /* Special-case ipfev: global-zone-open only. */
1254         if (min == IPL_LOGEV) {
1255                 if (crgetzoneid(cred) != GLOBAL_ZONEID)
1256                         return (ENXIO);
1257                 /*
1258                  * Else enable the CFW logging of events.
1259                  * NOTE: For now, we only allow one open at a time.
1260                  * Use atomic_add to confirm/deny. And also for now,
1261                  * assume sizeof (boolean_t) == sizeof (int).
1262                  */
1263                 if (atomic_inc_uint_nv(&ipf_cfwlog_enabled) > 1) {
1264                         atomic_dec_uint(&ipf_cfwlog_enabled);
1265                         return (EBUSY);
1266                 }
1267         }
1268 
1269         minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
1270             VM_BESTFIT | VM_SLEEP);
1271 
1272         if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
1273                 vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
1274                 if (min == IPL_LOGEV)
1275                         atomic_dec_uint(&ipf_cfwlog_enabled);
1276                 return ENXIO;
1277         }
1278 
1279         *devp = makedevice(getmajor(*devp), minor);
1280         isp = ddi_get_soft_state(ipf_state, minor);
1281         VERIFY(isp != NULL);
1282 
1283         isp->ipfs_minor = min;
1284         isp->ipfs_zoneid = IPFS_ZONE_UNSET;
1285 
1286         return 0;
1287 }
1288 
1289 
1290 /*ARGSUSED*/
1291 int iplclose(dev, flags, otype, cred)
1292 dev_t dev;
1293 int flags, otype;
1294 cred_t *cred;
1295 {
1296         minor_t min = getminor(dev);
1297         ipf_devstate_t *isp;
1298 
1299 #ifdef  IPFDEBUG
1300         cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
1301 #endif
1302 
1303         if (IPL_LOGMAX < min)
1304                 return ENXIO;
1305 
1306         isp = ddi_get_soft_state(ipf_state, min);
1307         if (isp != NULL && isp->ipfs_minor == IPL_LOGEV) {
1308                 /* Disable CFW logging. */
1309                 membar_exit();
1310                 atomic_dec_uint(&ipf_cfwlog_enabled);
1311         }
1312 
1313         ddi_soft_state_free(ipf_state, min);
1314         vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
1315 
1316         return 0;
1317 }
1318 
1319 #ifdef  IPFILTER_LOG
1320 /*
1321  * iplread/ipllog
1322  * both of these must operate with at least splnet() lest they be
1323  * called during packet processing and cause an inconsistancy to appear in
1324  * the filter lists.
1325  */
1326 /*ARGSUSED*/
1327 int iplread(dev, uio, cp)
1328 dev_t dev;
1329 register struct uio *uio;
1330 cred_t *cp;
1331 {
1332         ipf_stack_t *ifs;
1333         int ret;
1334         minor_t unit;
1335         ipf_devstate_t *isp;
1336 
1337         unit = getminor(dev);
1338         isp = ddi_get_soft_state(ipf_state, unit);
1339         if (isp == NULL)
1340                 return ENXIO;
1341         unit = isp->ipfs_minor;
1342 
1343         if (unit == IPL_LOGEV)
1344                 return (ipf_cfwlog_read(dev, uio, cp));
1345 
1346         /*
1347          * ipf_find_stack returns with a read lock on ifs_ipf_global
1348          */
1349         ifs = ipf_find_stack(crgetzoneid(cp), isp);
1350         if (ifs == NULL)
1351                 return ENXIO;
1352 
1353 # ifdef IPFDEBUG
1354         cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
1355 # endif
1356 
1357         if (ifs->ifs_fr_running < 1) {
1358                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1359                 return EIO;
1360         }
1361 
1362 # ifdef IPFILTER_SYNC
1363         if (unit == IPL_LOGSYNC) {
1364                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1365                 return ipfsync_read(uio);
1366         }
1367 # endif
1368 
1369         ret = ipflog_read(unit, uio, ifs);
1370         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1371         return ret;
1372 }
1373 #endif /* IPFILTER_LOG */
1374 
1375 
1376 /*
1377  * iplread/ipllog
1378  * both of these must operate with at least splnet() lest they be
1379  * called during packet processing and cause an inconsistancy to appear in
1380  * the filter lists.
1381  */
1382 int iplwrite(dev, uio, cp)
1383 dev_t dev;
1384 register struct uio *uio;
1385 cred_t *cp;
1386 {
1387         ipf_stack_t *ifs;
1388         minor_t unit;
1389         ipf_devstate_t *isp;
1390 
1391         unit = getminor(dev);
1392         isp = ddi_get_soft_state(ipf_state, unit);
1393         if (isp == NULL)
1394                 return ENXIO;
1395         unit = isp->ipfs_minor;
1396 
1397         if (unit == IPL_LOGEV)
1398                 return (EIO);   /* ipfev doesn't support write yet. */
1399 
1400         /*
1401          * ipf_find_stack returns with a read lock on ifs_ipf_global
1402          */
1403         ifs = ipf_find_stack(crgetzoneid(cp), isp);
1404         if (ifs == NULL)
1405                 return ENXIO;
1406 
1407 #ifdef  IPFDEBUG
1408         cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
1409 #endif
1410 
1411         if (ifs->ifs_fr_running < 1) {
1412                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1413                 return EIO;
1414         }
1415 
1416 #ifdef  IPFILTER_SYNC
1417         if (getminor(dev) == IPL_LOGSYNC) {
1418                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1419                 return ipfsync_write(uio);
1420         }
1421 #endif /* IPFILTER_SYNC */
1422         dev = dev;      /* LINT */
1423         uio = uio;      /* LINT */
1424         cp = cp;        /* LINT */
1425         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1426         return ENXIO;
1427 }
1428 
1429 
1430 /*
1431  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
1432  * requires a large amount of setting up and isn't any more efficient.
1433  */
1434 int fr_send_reset(fin)
1435 fr_info_t *fin;
1436 {
1437         tcphdr_t *tcp, *tcp2;
1438         int tlen, hlen;
1439         mblk_t *m;
1440 #ifdef  USE_INET6
1441         ip6_t *ip6;
1442 #endif
1443         ip_t *ip;
1444 
1445         tcp = fin->fin_dp;
1446         if (tcp->th_flags & TH_RST)
1447                 return -1;
1448 
1449 #ifndef IPFILTER_CKSUM
1450         if (fr_checkl4sum(fin) == -1)
1451                 return -1;
1452 #endif
1453 
1454         tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
1455 #ifdef  USE_INET6
1456         if (fin->fin_v == 6)
1457                 hlen = sizeof(ip6_t);
1458         else
1459 #endif
1460                 hlen = sizeof(ip_t);
1461         hlen += sizeof(*tcp2);
1462         if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
1463                 return -1;
1464 
1465         m->b_rptr += 64;
1466         MTYPE(m) = M_DATA;
1467         m->b_wptr = m->b_rptr + hlen;
1468         ip = (ip_t *)m->b_rptr;
1469         bzero((char *)ip, hlen);
1470         tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1471         tcp2->th_dport = tcp->th_sport;
1472         tcp2->th_sport = tcp->th_dport;
1473         if (tcp->th_flags & TH_ACK) {
1474                 tcp2->th_seq = tcp->th_ack;
1475                 tcp2->th_flags = TH_RST;
1476         } else {
1477                 tcp2->th_ack = ntohl(tcp->th_seq);
1478                 tcp2->th_ack += tlen;
1479                 tcp2->th_ack = htonl(tcp2->th_ack);
1480                 tcp2->th_flags = TH_RST|TH_ACK;
1481         }
1482         tcp2->th_off = sizeof(struct tcphdr) >> 2;
1483 
1484         ip->ip_v = fin->fin_v;
1485 #ifdef  USE_INET6
1486         if (fin->fin_v == 6) {
1487                 ip6 = (ip6_t *)m->b_rptr;
1488                 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1489                 ip6->ip6_src = fin->fin_dst6.in6;
1490                 ip6->ip6_dst = fin->fin_src6.in6;
1491                 ip6->ip6_plen = htons(sizeof(*tcp));
1492                 ip6->ip6_nxt = IPPROTO_TCP;
1493                 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1494         } else
1495 #endif
1496         {
1497                 ip->ip_src.s_addr = fin->fin_daddr;
1498                 ip->ip_dst.s_addr = fin->fin_saddr;
1499                 ip->ip_id = fr_nextipid(fin);
1500                 ip->ip_hl = sizeof(*ip) >> 2;
1501                 ip->ip_p = IPPROTO_TCP;
1502                 ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1503                 ip->ip_tos = fin->fin_ip->ip_tos;
1504                 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1505         }
1506         return fr_send_ip(fin, m, &m);
1507 }
1508 
1509 /*
1510  * Function:    fr_send_ip
1511  * Returns:      0: success
1512  *              -1: failed
1513  * Parameters:
1514  *      fin: packet information
1515  *      m: the message block where ip head starts
1516  *
1517  * Send a new packet through the IP stack. 
1518  *
1519  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1520  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1521  * function).
1522  *
1523  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1524  * in by this function.
1525  *
1526  * All other portions of the packet must be in on-the-wire format.
1527  */
1528 /*ARGSUSED*/
1529 static int fr_send_ip(fin, m, mpp)
1530 fr_info_t *fin;
1531 mblk_t *m, **mpp;
1532 {
1533         qpktinfo_t qpi, *qpip;
1534         fr_info_t fnew;
1535         ip_t *ip;
1536         int i, hlen;
1537         ipf_stack_t *ifs = fin->fin_ifs;
1538 
1539         ip = (ip_t *)m->b_rptr;
1540         bzero((char *)&fnew, sizeof(fnew));
1541 
1542 #ifdef  USE_INET6
1543         if (fin->fin_v == 6) {
1544                 ip6_t *ip6;
1545 
1546                 ip6 = (ip6_t *)ip;
1547                 ip6->ip6_vfc = 0x60;
1548                 ip6->ip6_hlim = 127;
1549                 fnew.fin_v = 6;
1550                 hlen = sizeof(*ip6);
1551                 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1552         } else
1553 #endif
1554         {
1555                 fnew.fin_v = 4;
1556 #if SOLARIS2 >= 10
1557                 ip->ip_ttl = 255;
1558                 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1559                         ip->ip_off = htons(IP_DF);
1560 #else
1561                 if (ip_ttl_ptr != NULL)
1562                         ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1563                 else
1564                         ip->ip_ttl = 63;
1565                 if (ip_mtudisc != NULL)
1566                         ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1567                 else
1568                         ip->ip_off = htons(IP_DF);
1569 #endif
1570                 /*
1571                  * The dance with byte order and ip_len/ip_off is because in
1572                  * fr_fastroute, it expects them to be in host byte order but
1573                  * ipf_cksum expects them to be in network byte order.
1574                  */
1575                 ip->ip_len = htons(ip->ip_len);
1576                 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1577                 ip->ip_len = ntohs(ip->ip_len);
1578                 ip->ip_off = ntohs(ip->ip_off);
1579                 hlen = sizeof(*ip);
1580                 fnew.fin_plen = ip->ip_len;
1581         }
1582 
1583         qpip = fin->fin_qpi;
1584         qpi.qpi_off = 0;
1585         qpi.qpi_ill = qpip->qpi_ill;
1586         qpi.qpi_m = m;
1587         qpi.qpi_data = ip;
1588         fnew.fin_qpi = &qpi;
1589         fnew.fin_ifp = fin->fin_ifp;
1590         fnew.fin_flx = FI_NOCKSUM;
1591         fnew.fin_m = m;
1592         fnew.fin_qfm = m;
1593         fnew.fin_ip = ip;
1594         fnew.fin_mp = mpp;
1595         fnew.fin_hlen = hlen;
1596         fnew.fin_dp = (char *)ip + hlen;
1597         fnew.fin_ifs = fin->fin_ifs;
1598         (void) fr_makefrip(hlen, ip, &fnew);
1599 
1600         i = fr_fastroute(m, mpp, &fnew, NULL);
1601         return i;
1602 }
1603 
1604 
1605 int fr_send_icmp_err(type, fin, dst)
1606 int type;
1607 fr_info_t *fin;
1608 int dst;
1609 {
1610         struct in_addr dst4;
1611         struct icmp *icmp;
1612         qpktinfo_t *qpi;
1613         int hlen, code;
1614         phy_if_t phy;
1615         u_short sz;
1616 #ifdef  USE_INET6
1617         mblk_t *mb;
1618 #endif
1619         mblk_t *m;
1620 #ifdef  USE_INET6
1621         ip6_t *ip6;
1622 #endif
1623         ip_t *ip;
1624         ipf_stack_t *ifs = fin->fin_ifs;
1625 
1626         if ((type < 0) || (type > ICMP_MAXTYPE))
1627                 return -1;
1628 
1629         code = fin->fin_icode;
1630 #ifdef USE_INET6
1631         if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1632                 return -1;
1633 #endif
1634 
1635 #ifndef IPFILTER_CKSUM
1636         if (fr_checkl4sum(fin) == -1)
1637                 return -1;
1638 #endif
1639 
1640         qpi = fin->fin_qpi;
1641 
1642 #ifdef  USE_INET6
1643         mb = fin->fin_qfm;
1644 
1645         if (fin->fin_v == 6) {
1646                 sz = sizeof(ip6_t);
1647                 sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1648                 hlen = sizeof(ip6_t);
1649                 type = icmptoicmp6types[type];
1650                 if (type == ICMP6_DST_UNREACH)
1651                         code = icmptoicmp6unreach[code];
1652         } else
1653 #endif
1654         {
1655                 if ((fin->fin_p == IPPROTO_ICMP) &&
1656                     !(fin->fin_flx & FI_SHORT))
1657                         switch (ntohs(fin->fin_data[0]) >> 8)
1658                         {
1659                         case ICMP_ECHO :
1660                         case ICMP_TSTAMP :
1661                         case ICMP_IREQ :
1662                         case ICMP_MASKREQ :
1663                                 break;
1664                         default :
1665                                 return 0;
1666                         }
1667 
1668                 sz = sizeof(ip_t) * 2;
1669                 sz += 8;                /* 64 bits of data */
1670                 hlen = sizeof(ip_t);
1671         }
1672 
1673         sz += offsetof(struct icmp, icmp_ip);
1674         if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1675                 return -1;
1676         MTYPE(m) = M_DATA;
1677         m->b_rptr += 64;
1678         m->b_wptr = m->b_rptr + sz;
1679         bzero((char *)m->b_rptr, (size_t)sz);
1680         ip = (ip_t *)m->b_rptr;
1681         ip->ip_v = fin->fin_v;
1682         icmp = (struct icmp *)(m->b_rptr + hlen);
1683         icmp->icmp_type = type & 0xff;
1684         icmp->icmp_code = code & 0xff;
1685         phy = (phy_if_t)qpi->qpi_ill; 
1686         if (type == ICMP_UNREACH && (phy != 0) && 
1687             fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1688                 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1689 
1690 #ifdef  USE_INET6
1691         if (fin->fin_v == 6) {
1692                 struct in6_addr dst6;
1693                 int csz;
1694 
1695                 if (dst == 0) {
1696                         ipf_stack_t *ifs = fin->fin_ifs;
1697 
1698                         if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1699                                        (void *)&dst6, NULL, ifs) == -1) {
1700                                 FREE_MB_T(m);
1701                                 return -1;
1702                         }
1703                 } else
1704                         dst6 = fin->fin_dst6.in6;
1705 
1706                 csz = sz;
1707                 sz -= sizeof(ip6_t);
1708                 ip6 = (ip6_t *)m->b_rptr;
1709                 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1710                 ip6->ip6_plen = htons((u_short)sz);
1711                 ip6->ip6_nxt = IPPROTO_ICMPV6;
1712                 ip6->ip6_src = dst6;
1713                 ip6->ip6_dst = fin->fin_src6.in6;
1714                 sz -= offsetof(struct icmp, icmp_ip);
1715                 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1716                 icmp->icmp_cksum = csz - sizeof(ip6_t);
1717         } else
1718 #endif
1719         {
1720                 ip->ip_hl = sizeof(*ip) >> 2;
1721                 ip->ip_p = IPPROTO_ICMP;
1722                 ip->ip_id = fin->fin_ip->ip_id;
1723                 ip->ip_tos = fin->fin_ip->ip_tos;
1724                 ip->ip_len = (u_short)sz;
1725                 if (dst == 0) {
1726                         ipf_stack_t *ifs = fin->fin_ifs;
1727 
1728                         if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1729                                        (void *)&dst4, NULL, ifs) == -1) {
1730                                 FREE_MB_T(m);
1731                                 return -1;
1732                         }
1733                 } else {
1734                         dst4 = fin->fin_dst;
1735                 }
1736                 ip->ip_src = dst4;
1737                 ip->ip_dst = fin->fin_src;
1738                 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1739                       sizeof(*fin->fin_ip));
1740                 bcopy((char *)fin->fin_ip + fin->fin_hlen,
1741                       (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1742                 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1743                 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1744                 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1745                                              sz - sizeof(ip_t));
1746         }
1747 
1748         /*
1749          * Need to exit out of these so we don't recursively call rw_enter
1750          * from fr_qout.
1751          */
1752         return fr_send_ip(fin, m, &m);
1753 }
1754 
1755 #include <sys/time.h>
1756 #include <sys/varargs.h>
1757 
1758 #ifndef _KERNEL
1759 #include <stdio.h>
1760 #endif
1761 
1762 /*
1763  * Return the first IP Address associated with an interface
1764  * For IPv6, we walk through the list of logical interfaces and return
1765  * the address of the first one that isn't a link-local interface.
1766  * We can't assume that it is :1 because another link-local address
1767  * may have been assigned there.
1768  */
1769 /*ARGSUSED*/
1770 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1771 int v, atype;
1772 void *ifptr;
1773 struct in_addr  *inp, *inpmask;
1774 ipf_stack_t *ifs;
1775 {
1776         struct sockaddr_in6 v6addr[2];
1777         struct sockaddr_in v4addr[2];
1778         net_ifaddr_t type[2];
1779         net_handle_t net_data;
1780         phy_if_t phyif;
1781         void *array;
1782 
1783         switch (v)
1784         {
1785         case 4:
1786                 net_data = ifs->ifs_ipf_ipv4;
1787                 array = v4addr;
1788                 break;
1789         case 6:
1790                 net_data = ifs->ifs_ipf_ipv6;
1791                 array = v6addr;
1792                 break;
1793         default:
1794                 net_data = NULL;
1795                 break;
1796         }
1797 
1798         if (net_data == NULL)
1799                 return -1;
1800 
1801         phyif = (phy_if_t)ifptr;
1802 
1803         switch (atype)
1804         {
1805         case FRI_PEERADDR :
1806                 type[0] = NA_PEER;
1807                 break;
1808 
1809         case FRI_BROADCAST :
1810                 type[0] = NA_BROADCAST;
1811                 break;
1812 
1813         default :
1814                 type[0] = NA_ADDRESS;
1815                 break;
1816         }
1817 
1818         type[1] = NA_NETMASK;
1819 
1820         if (v == 6) {
1821                 lif_if_t idx = 0;
1822 
1823                 do {
1824                         idx = net_lifgetnext(net_data, phyif, idx);
1825                         if (net_getlifaddr(net_data, phyif, idx, 2, type,
1826                                            array) < 0)
1827                                 return -1;
1828                         if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1829                             !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1830                                 break;
1831                 } while (idx != 0);
1832 
1833                 if (idx == 0)
1834                         return -1;
1835 
1836                 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1837                                         inp, inpmask);
1838         }
1839 
1840         if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1841                 return -1;
1842 
1843         return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1844 }
1845 
1846 
1847 u_32_t fr_newisn(fin)
1848 fr_info_t *fin;
1849 {
1850         static int iss_seq_off = 0;
1851         u_char hash[16];
1852         u_32_t newiss;
1853         MD5_CTX ctx;
1854         ipf_stack_t *ifs = fin->fin_ifs;
1855 
1856         /*
1857          * Compute the base value of the ISS.  It is a hash
1858          * of (saddr, sport, daddr, dport, secret).
1859          */
1860         MD5Init(&ctx);
1861 
1862         MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1863                   sizeof(fin->fin_fi.fi_src));
1864         MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1865                   sizeof(fin->fin_fi.fi_dst));
1866         MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1867 
1868         MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1869 
1870         MD5Final(hash, &ctx);
1871 
1872         bcopy(hash, &newiss, sizeof(newiss));
1873 
1874         /*
1875          * Now increment our "timer", and add it in to
1876          * the computed value.
1877          *
1878          * XXX Use `addin'?
1879          * XXX TCP_ISSINCR too large to use?
1880          */
1881         iss_seq_off += 0x00010000;
1882         newiss += iss_seq_off;
1883         return newiss;
1884 }
1885 
1886 
1887 /* ------------------------------------------------------------------------ */
1888 /* Function:    fr_nextipid                                                 */
1889 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
1890 /* Parameters:  fin(I) - pointer to packet information                      */
1891 /*                                                                          */
1892 /* Returns the next IPv4 ID to use for this packet.                         */
1893 /* ------------------------------------------------------------------------ */
1894 u_short fr_nextipid(fin)
1895 fr_info_t *fin;
1896 {
1897         static u_short ipid = 0;
1898         u_short id;
1899         ipf_stack_t *ifs = fin->fin_ifs;
1900 
1901         MUTEX_ENTER(&ifs->ifs_ipf_rw);
1902         if (fin->fin_pktnum != 0) {
1903                 id = fin->fin_pktnum & 0xffff;
1904         } else {
1905                 id = ipid++;
1906         }
1907         MUTEX_EXIT(&ifs->ifs_ipf_rw);
1908 
1909         return id;
1910 }
1911 
1912 
1913 #ifndef IPFILTER_CKSUM
1914 /* ARGSUSED */
1915 #endif
1916 INLINE void fr_checkv4sum(fin)
1917 fr_info_t *fin;
1918 {
1919 #ifdef IPFILTER_CKSUM
1920         if (fr_checkl4sum(fin) == -1)
1921                 fin->fin_flx |= FI_BAD;
1922 #endif
1923 }
1924 
1925 
1926 #ifdef USE_INET6
1927 # ifndef IPFILTER_CKSUM
1928 /* ARGSUSED */
1929 # endif
1930 INLINE void fr_checkv6sum(fin)
1931 fr_info_t *fin;
1932 {
1933 # ifdef IPFILTER_CKSUM
1934         if (fr_checkl4sum(fin) == -1)
1935                 fin->fin_flx |= FI_BAD;
1936 # endif
1937 }
1938 #endif /* USE_INET6 */
1939 
1940 
1941 #if (SOLARIS2 < 7)
1942 void fr_slowtimer()
1943 #else
1944 /*ARGSUSED*/
1945 void fr_slowtimer __P((void *arg))
1946 #endif
1947 {
1948         ipf_stack_t *ifs = arg;
1949 
1950         READ_ENTER(&ifs->ifs_ipf_global);
1951         if (ifs->ifs_fr_running != 1) {
1952                 ifs->ifs_fr_timer_id = NULL;
1953                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1954                 return;
1955         }
1956         ipf_expiretokens(ifs);
1957         fr_fragexpire(ifs);
1958         fr_timeoutstate(ifs);
1959         fr_natexpire(ifs);
1960         fr_authexpire(ifs);
1961         ifs->ifs_fr_ticks++;
1962         if (ifs->ifs_fr_running == 1)
1963                 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1964                     drv_usectohz(500000));
1965         else
1966                 ifs->ifs_fr_timer_id = NULL;
1967         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1968 }
1969 
1970 
1971 /* ------------------------------------------------------------------------ */
1972 /* Function:    fr_pullup                                                   */
1973 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
1974 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
1975 /*              fin(I) - pointer to packet information                      */
1976 /*              len(I) - number of bytes to pullup                          */
1977 /*                                                                          */
1978 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1979 /* single buffer for ease of access.  Operating system native functions are */
1980 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
1981 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
1982 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
1983 /* and ONLY if the pullup succeeds.                                         */
1984 /*                                                                          */
1985 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
1986 /* of buffers that starts at *fin->fin_mp.                                  */
1987 /* ------------------------------------------------------------------------ */
1988 void *fr_pullup(min, fin, len)
1989 mb_t *min;
1990 fr_info_t *fin;
1991 int len;
1992 {
1993         qpktinfo_t *qpi = fin->fin_qpi;
1994         int out = fin->fin_out, dpoff, ipoff;
1995         mb_t *m = min, *m1, *m2;
1996         char *ip;
1997         uint32_t start, stuff, end, value, flags;
1998         ipf_stack_t *ifs = fin->fin_ifs;
1999 
2000         if (m == NULL)
2001                 return NULL;
2002 
2003         ip = (char *)fin->fin_ip;
2004         if ((fin->fin_flx & FI_COALESCE) != 0)
2005                 return ip;
2006 
2007         ipoff = fin->fin_ipoff;
2008         if (fin->fin_dp != NULL)
2009                 dpoff = (char *)fin->fin_dp - (char *)ip;
2010         else
2011                 dpoff = 0;
2012 
2013         if (M_LEN(m) < len + ipoff) {
2014 
2015                 /*
2016                  * pfil_precheck ensures the IP header is on a 32bit
2017                  * aligned address so simply fail if that isn't currently
2018                  * the case (should never happen).
2019                  */
2020                 int inc = 0;
2021 
2022                 if (ipoff > 0) {
2023                         if ((ipoff & 3) != 0) {
2024                                 inc = 4 - (ipoff & 3);
2025                                 if (m->b_rptr - inc >= m->b_datap->db_base)
2026                                         m->b_rptr -= inc;
2027                                 else
2028                                         inc = 0;
2029                         }
2030                 }
2031 
2032                 /*
2033                  * XXX This is here as a work around for a bug with DEBUG
2034                  * XXX Solaris kernels.  The problem is b_prev is used by IP
2035                  * XXX code as a way to stash the phyint_index for a packet,
2036                  * XXX this doesn't get reset by IP but freeb does an ASSERT()
2037                  * XXX for both of these to be NULL.  See 6442390.
2038                  */
2039                 m1 = m;
2040                 m2 = m->b_prev;
2041 
2042                 do {
2043                         m1->b_next = NULL;
2044                         m1->b_prev = NULL;
2045                         m1 = m1->b_cont;
2046                 } while (m1);
2047 
2048                 /*
2049                  * Need to preserve checksum information by copying them
2050                  * to newmp which heads the pulluped message.
2051                  */
2052                 mac_hcksum_get(m, &start, &stuff, &end, &value, &flags);
2053 
2054                 if (pullupmsg(m, len + ipoff + inc) == 0) {
2055                         ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
2056                         FREE_MB_T(*fin->fin_mp);
2057                         *fin->fin_mp = NULL;
2058                         fin->fin_m = NULL;
2059                         fin->fin_ip = NULL;
2060                         fin->fin_dp = NULL;
2061                         qpi->qpi_data = NULL;
2062                         return NULL;
2063                 }
2064 
2065                 mac_hcksum_set(m, start, stuff, end, value, flags);
2066 
2067                 m->b_prev = m2;
2068                 m->b_rptr += inc;
2069                 fin->fin_m = m;
2070                 ip = MTOD(m, char *) + ipoff;
2071                 qpi->qpi_data = ip;
2072         }
2073 
2074         ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
2075         fin->fin_ip = (ip_t *)ip;
2076         if (fin->fin_dp != NULL)
2077                 fin->fin_dp = (char *)fin->fin_ip + dpoff;
2078 
2079         if (len == fin->fin_plen)
2080                 fin->fin_flx |= FI_COALESCE;
2081         return ip;
2082 }
2083 
2084 
2085 /*
2086  * Function:    fr_verifysrc
2087  * Returns:     int (really boolean)
2088  * Parameters:  fin - packet information
2089  *
2090  * Check whether the packet has a valid source address for the interface on
2091  * which the packet arrived, implementing the "fr_chksrc" feature.
2092  * Returns true iff the packet's source address is valid.
2093  */
2094 int fr_verifysrc(fin)
2095 fr_info_t *fin;
2096 {
2097         net_handle_t net_data_p;
2098         phy_if_t phy_ifdata_routeto;
2099         struct sockaddr sin;
2100         ipf_stack_t *ifs = fin->fin_ifs;
2101 
2102         if (fin->fin_v == 4) { 
2103                 net_data_p = ifs->ifs_ipf_ipv4;
2104         } else if (fin->fin_v == 6) { 
2105                 net_data_p = ifs->ifs_ipf_ipv6;
2106         } else { 
2107                 return (0); 
2108         }
2109 
2110         /* Get the index corresponding to the if name */
2111         sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2112         bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
2113         phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
2114 
2115         return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0); 
2116 }
2117 
2118 /*
2119  * Return true only if forwarding is enabled on the interface.
2120  */
2121 static int
2122 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp)
2123 {
2124         lif_if_t lif;
2125 
2126         for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0;
2127             lif = net_lifgetnext(ndp, phyif, lif)) {
2128                 int res;
2129                 uint64_t flags;
2130 
2131                 res = net_getlifflags(ndp, phyif, lif, &flags);
2132                 if (res != 0)
2133                         return (0);
2134                 if (flags & IFF_ROUTER)
2135                         return (1);
2136         }
2137 
2138         return (0);
2139 }
2140 
2141 /*
2142  * Function:    fr_fastroute
2143  * Returns:      0: success;
2144  *              -1: failed
2145  * Parameters:
2146  *      mb: the message block where ip head starts
2147  *      mpp: the pointer to the pointer of the orignal
2148  *              packet message
2149  *      fin: packet information
2150  *      fdp: destination interface information
2151  *      if it is NULL, no interface information provided.
2152  *
2153  * This function is for fastroute/to/dup-to rules. It calls
2154  * pfil_make_lay2_packet to search route, make lay-2 header
2155  * ,and identify output queue for the IP packet.
2156  * The destination address depends on the following conditions:
2157  * 1: for fastroute rule, fdp is passed in as NULL, so the
2158  *      destination address is the IP Packet's destination address
2159  * 2: for to/dup-to rule, if an ip address is specified after
2160  *      the interface name, this address is the as destination
2161  *      address. Otherwise IP Packet's destination address is used
2162  */
2163 int fr_fastroute(mb, mpp, fin, fdp)
2164 mblk_t *mb, **mpp;
2165 fr_info_t *fin;
2166 frdest_t *fdp;
2167 {
2168         net_handle_t net_data_p;
2169         net_inject_t *inj;
2170         mblk_t *mp = NULL;
2171         frentry_t *fr = fin->fin_fr;
2172         qpktinfo_t *qpi;
2173         ip_t *ip;
2174 
2175         struct sockaddr_in *sin;
2176         struct sockaddr_in6 *sin6;
2177         struct sockaddr *sinp;
2178         ipf_stack_t *ifs = fin->fin_ifs;
2179 #ifndef sparc
2180         u_short __iplen, __ipoff;
2181 #endif
2182 
2183         if (fin->fin_v == 4) {
2184                 net_data_p = ifs->ifs_ipf_ipv4;
2185         } else if (fin->fin_v == 6) {
2186                 net_data_p = ifs->ifs_ipf_ipv6;
2187         } else {
2188                 return (-1);
2189         }
2190 
2191         /*
2192          * If we're forwarding (vs. injecting), check the src here, fin_ifp is
2193          * the src interface.
2194          */
2195         if (fdp != NULL &&
2196            !fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p))
2197                 return (-1);
2198 
2199         inj = net_inject_alloc(NETINFO_VERSION);
2200         if (inj == NULL)
2201                 return -1;
2202 
2203         ip = fin->fin_ip;
2204         qpi = fin->fin_qpi;
2205 
2206         /*
2207          * If this is a duplicate mblk then we want ip to point at that
2208          * data, not the original, if and only if it is already pointing at
2209          * the current mblk data.
2210          *
2211          * Otherwise, if it's not a duplicate, and we're not already pointing
2212          * at the current mblk data, then we want to ensure that the data
2213          * points at ip.
2214          */
2215 
2216         if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
2217                 ip = (ip_t *)mb->b_rptr;
2218         } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
2219                 qpi->qpi_m->b_rptr = (uchar_t *)ip;
2220                 qpi->qpi_off = 0;
2221         }
2222 
2223         /*
2224          * If there is another M_PROTO, we don't want it
2225          */
2226         if (*mpp != mb) {
2227                 mp = unlinkb(*mpp);
2228                 freeb(*mpp);
2229                 *mpp = mp;
2230         }
2231 
2232         sinp = (struct sockaddr *)&inj->ni_addr;
2233         sin = (struct sockaddr_in *)sinp;
2234         sin6 = (struct sockaddr_in6 *)sinp;
2235         bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
2236         inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2237         inj->ni_packet = mb;
2238 
2239         /*
2240          * In case we're here due to "to <if>" being used with
2241          * "keep state", check that we're going in the correct
2242          * direction.
2243          */
2244         if (fdp != NULL) {
2245                 if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
2246                         (fin->fin_rev != 0) && (fdp == &fr->fr_tif))
2247                         goto bad_fastroute;
2248                 inj->ni_physical = (phy_if_t)fdp->fd_ifp;
2249                 if (fin->fin_v == 4) {
2250                         sin->sin_addr = fdp->fd_ip;
2251                 } else {
2252                         sin6->sin6_addr = fdp->fd_ip6.in6;
2253                 }
2254         } else {
2255                 if (fin->fin_v == 4) {
2256                         sin->sin_addr = ip->ip_dst;
2257                 } else {
2258                         sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
2259                 }
2260                 inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
2261         }
2262 
2263         /* If we're forwarding (vs. injecting), check the destinatation here. */
2264         if (fdp != NULL && !fr_forwarding_enabled(inj->ni_physical, net_data_p))
2265                 goto bad_fastroute;
2266 
2267         /*
2268          * Clear the hardware checksum flags from packets that we are doing
2269          * input processing on as leaving them set will cause the outgoing
2270          * NIC (if it supports hardware checksum) to calculate them anew,
2271          * using the old (correct) checksums as the pseudo value to start
2272          * from.
2273          */
2274         if (fin->fin_out == 0) {
2275                 DB_CKSUMFLAGS(mb) = 0;
2276         }
2277 
2278         *mpp = mb;
2279 
2280         if (fin->fin_out == 0) {
2281                 void *saveifp;
2282                 u_32_t pass;
2283 
2284                 saveifp = fin->fin_ifp;
2285                 fin->fin_ifp = (void *)inj->ni_physical;
2286                 fin->fin_flx &= ~FI_STATE;
2287                 fin->fin_out = 1;
2288                 (void) fr_acctpkt(fin, &pass);
2289                 fin->fin_fr = NULL;
2290                 if (!fr || !(fr->fr_flags & FR_RETMASK))
2291                         (void) fr_checkstate(fin, &pass);
2292                 if (fr_checknatout(fin, NULL) == -1)
2293                         goto bad_fastroute;
2294                 fin->fin_out = 0;
2295                 fin->fin_ifp = saveifp;
2296         }
2297 #ifndef sparc
2298         if (fin->fin_v == 4) {
2299                 __iplen = (u_short)ip->ip_len,
2300                 __ipoff = (u_short)ip->ip_off;
2301 
2302                 ip->ip_len = htons(__iplen);
2303                 ip->ip_off = htons(__ipoff);
2304         }
2305 #endif
2306 
2307         if (net_data_p) {
2308                 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
2309                         net_inject_free(inj);
2310                         return (-1);
2311                 }
2312         }
2313 
2314         ifs->ifs_fr_frouteok[0]++;
2315         net_inject_free(inj);
2316         return 0;
2317 bad_fastroute:
2318         net_inject_free(inj);
2319         freemsg(mb);
2320         ifs->ifs_fr_frouteok[1]++;
2321         return -1;
2322 }
2323 
2324 
2325 /* ------------------------------------------------------------------------ */
2326 /* Function:    ipf_hook4_out                                               */
2327 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2328 /* Parameters:  event(I)     - pointer to event                             */
2329 /*              info(I)      - pointer to hook information for firewalling  */
2330 /*                                                                          */
2331 /* Calling ipf_hook.                                                        */
2332 /* ------------------------------------------------------------------------ */
2333 /*ARGSUSED*/
2334 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
2335 {
2336         return ipf_hook(info, 1, 0, arg);
2337 }
2338 /*ARGSUSED*/
2339 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
2340 {
2341         return ipf_hook6(info, 1, 0, arg);
2342 }
2343 
2344 /* ------------------------------------------------------------------------ */
2345 /* Function:    ipf_hook4_in                                                */
2346 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2347 /* Parameters:  event(I)     - pointer to event                             */
2348 /*              info(I)      - pointer to hook information for firewalling  */
2349 /*                                                                          */
2350 /* Calling ipf_hook.                                                        */
2351 /* ------------------------------------------------------------------------ */
2352 /*ARGSUSED*/
2353 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
2354 {
2355         return ipf_hook(info, 0, 0, arg);
2356 }
2357 /*ARGSUSED*/
2358 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
2359 {
2360         return ipf_hook6(info, 0, 0, arg);
2361 }
2362 
2363 
2364 /* ------------------------------------------------------------------------ */
2365 /* Function:    ipf_hook4_loop_out                                          */
2366 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2367 /* Parameters:  event(I)     - pointer to event                             */
2368 /*              info(I)      - pointer to hook information for firewalling  */
2369 /*                                                                          */
2370 /* Calling ipf_hook.                                                        */
2371 /* ------------------------------------------------------------------------ */
2372 /*ARGSUSED*/
2373 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2374 {
2375         return ipf_hook(info, 1, FI_NOCKSUM, arg);
2376 }
2377 /*ARGSUSED*/
2378 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2379 {
2380         return ipf_hook6(info, 1, FI_NOCKSUM, arg);
2381 }
2382 
2383 /* ------------------------------------------------------------------------ */
2384 /* Function:    ipf_hookvndl3_in                                            */
2385 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2386 /* Parameters:  event(I)     - pointer to event                             */
2387 /*              info(I)      - pointer to hook information for firewalling  */
2388 /*                                                                          */
2389 /* The vnd hooks are private hooks to ON. They represents a layer 2         */
2390 /* datapath generally used to implement virtual machines. The driver sends  */
2391 /* along L3 packets of either type IP or IPv6. The ethertype to distinguish */
2392 /* them is in the upper 16 bits while the remaining bits are the            */
2393 /* traditional packet hook flags.                                           */
2394 /*                                                                          */
2395 /* They end up calling the appropriate traditional ip hooks.                */
2396 /* ------------------------------------------------------------------------ */
2397 /*ARGSUSED*/
2398 int ipf_hookvndl3v4_in(hook_event_token_t token, hook_data_t info, void *arg)
2399 {
2400         return ipf_hook4_in(token, info, arg);
2401 }
2402 
2403 int ipf_hookvndl3v6_in(hook_event_token_t token, hook_data_t info, void *arg)
2404 {
2405         return ipf_hook6_in(token, info, arg);
2406 }
2407 
2408 /*ARGSUSED*/
2409 int ipf_hookvndl3v4_out(hook_event_token_t token, hook_data_t info, void *arg)
2410 {
2411         return ipf_hook4_out(token, info, arg);
2412 }
2413 
2414 int ipf_hookvndl3v6_out(hook_event_token_t token, hook_data_t info, void *arg)
2415 {
2416         return ipf_hook6_out(token, info, arg);
2417 }
2418 
2419 /* Static constants used by ipf_hook_ether */
2420 static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = {
2421         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
2422 };
2423 static uint8_t ipf_eth_ipv4_mcast[3] = { 0x01, 0x00, 0x5E };
2424 static uint8_t ipf_eth_ipv6_mcast[2] = { 0x33, 0x33 };
2425 
2426 /* ------------------------------------------------------------------------ */
2427 /* Function:    ipf_hook_ether                                              */
2428 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2429 /* Parameters:  token(I)     - pointer to event                             */
2430 /*              info(I)      - pointer to hook information for firewalling  */
2431 /*                                                                          */
2432 /* The ipf_hook_ether hook is currently private to illumos.  It represents  */
2433 /* a layer 2 datapath generally used by virtual machines.  Currently the    */
2434 /* hook is only used by the viona driver to pass along L2 frames for        */
2435 /* inspection.  It requires that the L2 ethernet header is contained within */
2436 /* a single dblk_t (however layers above the L2 header have no restrctions  */
2437 /* in ipf).  ipf does not currently support filtering on L2 fields (e.g.    */
2438 /* filtering on a MAC address or ethertype), however virtual machines do    */
2439 /* not have native IP stack instances where ipf traditionally hooks in.     */
2440 /* Instead this entry point is used to determine if the packet is unicast,  */
2441 /* broadcast, or multicast. The IPv4 or IPv6 packet is then passed to the   */
2442 /* traditional ip hooks for filtering.  Non IPv4 or non IPv6 packets are    */
2443 /* not subject to examination.                                              */
2444 /* ------------------------------------------------------------------------ */
2445 int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg,
2446     boolean_t out)
2447 {
2448         struct ether_header *ethp;
2449         hook_pkt_event_t *hpe = (hook_pkt_event_t *)info;
2450         mblk_t *mp;
2451         size_t offset, len;
2452         uint16_t etype;
2453         boolean_t v6;
2454 
2455         /*
2456          * viona will only pass us mblks with the L2 header contained in a
2457          * single data block.
2458          */
2459         mp = *hpe->hpe_mp;
2460         len = MBLKL(mp);
2461 
2462         VERIFY3S(len, >=, sizeof (struct ether_header));
2463 
2464         ethp = (struct ether_header *)mp->b_rptr;
2465         if ((etype = ntohs(ethp->ether_type)) == ETHERTYPE_VLAN) {
2466                 struct ether_vlan_header *evh =
2467                     (struct ether_vlan_header *)ethp;
2468 
2469                 VERIFY3S(len, >=, sizeof (struct ether_vlan_header));
2470 
2471                 etype = ntohs(evh->ether_type);
2472                 offset = sizeof (*evh);
2473         } else {
2474                 offset = sizeof (*ethp);
2475         }
2476 
2477         /*
2478          * ipf only support filtering IPv4 and IPv6.  Ignore other types.
2479          */
2480         if (etype == ETHERTYPE_IP)
2481                 v6 = B_FALSE;
2482         else if (etype == ETHERTYPE_IPV6)
2483                 v6 = B_TRUE;
2484         else
2485                 return (0);
2486 
2487         if (bcmp(ipf_eth_bcast_addr, ethp, ETHERADDRL) == 0)
2488                 hpe->hpe_flags |= HPE_BROADCAST;
2489         else if (bcmp(ipf_eth_ipv4_mcast, ethp,
2490             sizeof (ipf_eth_ipv4_mcast)) == 0)
2491                 hpe->hpe_flags |= HPE_MULTICAST;
2492         else if (bcmp(ipf_eth_ipv6_mcast, ethp,
2493             sizeof (ipf_eth_ipv6_mcast)) == 0)
2494                 hpe->hpe_flags |= HPE_MULTICAST;
2495 
2496         /* Find the start of the IPv4 or IPv6 header */
2497         for (; offset >= len; len = MBLKL(mp)) {
2498                 offset -= len;
2499                 mp = mp->b_cont;
2500                 if (mp == NULL) {
2501                         freemsg(*hpe->hpe_mp);
2502                         *hpe->hpe_mp = NULL;
2503                         return (-1);
2504                 }
2505         }
2506         hpe->hpe_mb = mp;
2507         hpe->hpe_hdr = mp->b_rptr + offset;
2508 
2509         return (v6 ? ipf_hook6(info, out, 0, arg) :
2510             ipf_hook(info, out, 0, arg));
2511 }
2512 
2513 /* ------------------------------------------------------------------------ */
2514 /* Function:    ipf_hookviona_{in,out}                                      */
2515 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2516 /* Parameters:  event(I)     - pointer to event                             */
2517 /*              info(I)      - pointer to hook information for firewalling  */
2518 /*                                                                          */
2519 /* The viona hooks are private hooks to illumos. They represents a layer 2  */
2520 /* datapath generally used to implement virtual machines.                   */
2521 /* along L2 packets.                                                        */
2522 /*                                                                          */
2523 /* They end up calling the appropriate traditional ip hooks.                */
2524 /* ------------------------------------------------------------------------ */
2525 int
2526 ipf_hookviona_in(hook_event_token_t token, hook_data_t info, void *arg)
2527 {
2528         return (ipf_hook_ether(token, info, arg, B_FALSE));
2529 }
2530 
2531 int
2532 ipf_hookviona_out(hook_event_token_t token, hook_data_t info, void *arg)
2533 {
2534         return (ipf_hook_ether(token, info, arg, B_TRUE));
2535 }
2536 
2537 /* ------------------------------------------------------------------------ */
2538 /* Function:    ipf_hook4_loop_in                                           */
2539 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2540 /* Parameters:  event(I)     - pointer to event                             */
2541 /*              info(I)      - pointer to hook information for firewalling  */
2542 /*                                                                          */
2543 /* Calling ipf_hook.                                                        */
2544 /* ------------------------------------------------------------------------ */
2545 /*ARGSUSED*/
2546 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2547 {
2548         return ipf_hook(info, 0, FI_NOCKSUM, arg);
2549 }
2550 /*ARGSUSED*/
2551 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2552 {
2553         return ipf_hook6(info, 0, FI_NOCKSUM, arg);
2554 }
2555 
2556 /* ------------------------------------------------------------------------ */
2557 /* Function:    ipf_hook                                                    */
2558 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2559 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
2560 /*              out(I)       - whether packet is going in or out            */
2561 /*              loopback(I)  - whether packet is a loopback packet or not   */
2562 /*                                                                          */
2563 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
2564 /* parameters out of the info structure and forms them up to be useful for  */
2565 /* calling ipfilter.                                                        */
2566 /* ------------------------------------------------------------------------ */
2567 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
2568 {
2569         hook_pkt_event_t *fw;
2570         ipf_stack_t *ifs;
2571         qpktinfo_t qpi;
2572         int rval, hlen;
2573         u_short swap;
2574         phy_if_t phy; 
2575         ip_t *ip;
2576 
2577         ifs = arg;
2578         fw = (hook_pkt_event_t *)info;
2579 
2580         ASSERT(fw != NULL);
2581         phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2582 
2583         ip = fw->hpe_hdr;
2584         swap = ntohs(ip->ip_len);
2585         ip->ip_len = swap;
2586         swap = ntohs(ip->ip_off);
2587         ip->ip_off = swap;
2588         hlen = IPH_HDR_LENGTH(ip);
2589 
2590         qpi.qpi_m = fw->hpe_mb;
2591         qpi.qpi_data = fw->hpe_hdr;
2592         qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2593         qpi.qpi_ill = (void *)phy;
2594         qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2595         if (qpi.qpi_flags)
2596                 qpi.qpi_flags |= FI_MBCAST;
2597         qpi.qpi_flags |= loopback;
2598 
2599         rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2600             &qpi, fw->hpe_mp, ifs);
2601 
2602         /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2603         if (rval == 0 && *(fw->hpe_mp) == NULL)
2604                 rval = 1;
2605 
2606         /* Notify IP the packet mblk_t and IP header pointers. */
2607         fw->hpe_mb = qpi.qpi_m;
2608         fw->hpe_hdr = qpi.qpi_data;
2609         if (rval == 0) {
2610                 ip = qpi.qpi_data;
2611                 swap = ntohs(ip->ip_len);
2612                 ip->ip_len = swap;
2613                 swap = ntohs(ip->ip_off);
2614                 ip->ip_off = swap;
2615         }
2616         return rval;
2617 
2618 }
2619 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2620 {
2621         hook_pkt_event_t *fw;
2622         int rval, hlen;
2623         qpktinfo_t qpi;
2624         phy_if_t phy; 
2625 
2626         fw = (hook_pkt_event_t *)info;
2627 
2628         ASSERT(fw != NULL);
2629         phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2630 
2631         hlen = sizeof (ip6_t);
2632 
2633         qpi.qpi_m = fw->hpe_mb;
2634         qpi.qpi_data = fw->hpe_hdr;
2635         qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2636         qpi.qpi_ill = (void *)phy;
2637         qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2638         if (qpi.qpi_flags)
2639                 qpi.qpi_flags |= FI_MBCAST;
2640         qpi.qpi_flags |= loopback;
2641 
2642         rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2643             &qpi, fw->hpe_mp, arg);
2644 
2645         /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2646         if (rval == 0 && *(fw->hpe_mp) == NULL)
2647                 rval = 1;
2648 
2649         /* Notify IP the packet mblk_t and IP header pointers. */
2650         fw->hpe_mb = qpi.qpi_m;
2651         fw->hpe_hdr = qpi.qpi_data;
2652         return rval;
2653 }
2654 
2655 
2656 /* ------------------------------------------------------------------------ */
2657 /* Function:    ipf_nic_event_v4                                            */
2658 /* Returns:     int - 0 == no problems encountered                          */
2659 /* Parameters:  event(I)     - pointer to event                             */
2660 /*              info(I)      - pointer to information about a NIC event     */
2661 /*                                                                          */
2662 /* Function to receive asynchronous NIC events from IP                      */
2663 /* ------------------------------------------------------------------------ */
2664 /*ARGSUSED*/
2665 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2666 {
2667         struct sockaddr_in *sin;
2668         hook_nic_event_t *hn;
2669         ipf_stack_t *ifs = arg;
2670         void *new_ifp = NULL;
2671 
2672         if (ifs->ifs_fr_running <= 0)
2673                 return (0);
2674 
2675         hn = (hook_nic_event_t *)info;
2676 
2677         switch (hn->hne_event)
2678         {
2679         case NE_PLUMB :
2680                 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2681                        ifs);
2682                 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2683                               hn->hne_data, ifs);
2684                 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2685                              hn->hne_data, ifs);
2686                 break;
2687 
2688         case NE_UNPLUMB :
2689                 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2690                 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2691                               ifs);
2692                 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2693                 break;
2694 
2695         case NE_ADDRESS_CHANGE :
2696                 /*
2697                  * We only respond to events for logical interface 0 because
2698                  * IPFilter only uses the first address given to a network
2699                  * interface.  We check for hne_lif==1 because the netinfo
2700                  * code maps adds 1 to the lif number so that it can return
2701                  * 0 to indicate "no more lifs" when walking them.
2702                  */
2703                 if (hn->hne_lif == 1) {
2704                         frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2705                             ifs);
2706                         sin = hn->hne_data;
2707                         fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2708                             ifs);
2709                 }
2710                 break;
2711 
2712 #if SOLARIS2 >= 10
2713         case NE_IFINDEX_CHANGE :
2714                 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2715 
2716                 if (hn->hne_data != NULL) {
2717                         /*
2718                          * The netinfo passes interface index as int (hne_data should be
2719                          * handled as a pointer to int), which is always 32bit. We need to
2720                          * convert it to void pointer here, since interfaces are
2721                          * represented as pointers to void in IPF. The pointers are 64 bits
2722                          * long on 64bit platforms. Doing something like
2723                          *      (void *)((int) x)
2724                          * will throw warning:
2725                          *   "cast to pointer from integer of different size"
2726                          * during 64bit compilation.
2727                          *
2728                          * The line below uses (size_t) to typecast int to
2729                          * size_t, which might be 64bit/32bit (depending
2730                          * on architecture). Once we have proper 64bit/32bit
2731                          * type (size_t), we can safely convert it to void pointer.
2732                          */
2733                         new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2734                         fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2735                         fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2736                         fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2737                 }
2738                 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2739                 break;
2740 #endif
2741 
2742         default :
2743                 break;
2744         }
2745 
2746         return 0;
2747 }
2748 
2749 
2750 /* ------------------------------------------------------------------------ */
2751 /* Function:    ipf_nic_event_v6                                            */
2752 /* Returns:     int - 0 == no problems encountered                          */
2753 /* Parameters:  event(I)     - pointer to event                             */
2754 /*              info(I)      - pointer to information about a NIC event     */
2755 /*                                                                          */
2756 /* Function to receive asynchronous NIC events from IP                      */
2757 /* ------------------------------------------------------------------------ */
2758 /*ARGSUSED*/
2759 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2760 {
2761         struct sockaddr_in6 *sin6;
2762         hook_nic_event_t *hn;
2763         ipf_stack_t *ifs = arg;
2764         void *new_ifp = NULL;
2765 
2766         if (ifs->ifs_fr_running <= 0)
2767                 return (0);
2768 
2769         hn = (hook_nic_event_t *)info;
2770 
2771         switch (hn->hne_event)
2772         {
2773         case NE_PLUMB :
2774                 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2775                        hn->hne_data, ifs);
2776                 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2777                               hn->hne_data, ifs);
2778                 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2779                              hn->hne_data, ifs);
2780                 break;
2781 
2782         case NE_UNPLUMB :
2783                 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2784                 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2785                               ifs);
2786                 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2787                 break;
2788 
2789         case NE_ADDRESS_CHANGE :
2790                 if (hn->hne_lif == 1) {
2791                         sin6 = hn->hne_data;
2792                         fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2793                                        ifs);
2794                 }
2795                 break;
2796 
2797 #if SOLARIS2 >= 10
2798         case NE_IFINDEX_CHANGE :
2799                 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2800                 if (hn->hne_data != NULL) {
2801                         /*
2802                          * The netinfo passes interface index as int (hne_data should be
2803                          * handled as a pointer to int), which is always 32bit. We need to
2804                          * convert it to void pointer here, since interfaces are
2805                          * represented as pointers to void in IPF. The pointers are 64 bits
2806                          * long on 64bit platforms. Doing something like
2807                          *      (void *)((int) x)
2808                          * will throw warning:
2809                          *   "cast to pointer from integer of different size"
2810                          * during 64bit compilation.
2811                          *
2812                          * The line below uses (size_t) to typecast int to
2813                          * size_t, which might be 64bit/32bit (depending
2814                          * on architecture). Once we have proper 64bit/32bit
2815                          * type (size_t), we can safely convert it to void pointer.
2816                          */
2817                         new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2818                         fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2819                         fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2820                         fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2821                 }
2822                 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2823                 break;
2824 #endif
2825 
2826         default :
2827                 break;
2828         }
2829 
2830         return 0;
2831 }
2832 
2833 /*
2834  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2835  * are needed in Solaris kernel only. We don't need them in
2836  * ipftest to pretend the ICMP/RST packet was sent as a response.
2837  */
2838 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2839 /* ------------------------------------------------------------------------ */
2840 /* Function:    fr_make_rst                                                 */
2841 /* Returns:     int - 0 on success, -1 on failure                           */
2842 /* Parameters:  fin(I) - pointer to packet information                      */
2843 /*                                                                          */
2844 /* We must alter the original mblks passed to IPF from IP stack via         */
2845 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
2846 /* IPF can basicaly do only these things with mblk representing the packet: */
2847 /*      leave it as it is (pass the packet)                                 */
2848 /*                                                                          */
2849 /*      discard it (block the packet)                                       */
2850 /*                                                                          */
2851 /*      alter it (i.e. NAT)                                                 */
2852 /*                                                                          */
2853 /* As you can see IPF can not simply discard the mblk and supply a new one  */
2854 /* instead to IP stack via FW_HOOKS.                                        */
2855 /*                                                                          */
2856 /* The return-rst action for packets coming via NIC is handled as follows:  */
2857 /*      mblk with packet is discarded                                       */
2858 /*                                                                          */
2859 /*      new mblk with RST response is constructed and injected to network   */
2860 /*                                                                          */
2861 /* IPF can't inject packets to loopback interface, this is just another     */
2862 /* limitation we have to deal with here. The only option to send RST        */
2863 /* response to offending TCP packet coming via loopback is to alter it.     */
2864 /*                                                                          */
2865 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on      */
2866 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to     */
2867 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.                            */
2868 /* ------------------------------------------------------------------------ */
2869 int fr_make_rst(fin)
2870 fr_info_t *fin;
2871 {
2872         uint16_t tmp_port;
2873         int rv = -1;
2874         uint32_t old_ack;
2875         tcphdr_t *tcp = NULL;
2876         struct in_addr tmp_src;
2877 #ifdef USE_INET6
2878         struct in6_addr tmp_src6;
2879 #endif
2880 
2881         ASSERT(fin->fin_p == IPPROTO_TCP);
2882 
2883         /*
2884          * We do not need to adjust chksum, since it is not being checked by
2885          * Solaris IP stack for loopback clients.
2886          */
2887         if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2888             ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2889 
2890                 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2891                         /* Swap IPv4 addresses. */
2892                         tmp_src = fin->fin_ip->ip_src;
2893                         fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2894                         fin->fin_ip->ip_dst = tmp_src;
2895 
2896                         rv = 0;
2897                 }
2898                 else
2899                         tcp = NULL;
2900         }
2901 #ifdef USE_INET6
2902         else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2903             ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2904                 /*
2905                  * We are relying on fact the next header is TCP, which is true
2906                  * for regular TCP packets coming in over loopback.
2907                  */
2908                 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2909                         /* Swap IPv6 addresses. */
2910                         tmp_src6 = fin->fin_ip6->ip6_src;
2911                         fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2912                         fin->fin_ip6->ip6_dst = tmp_src6;
2913 
2914                         rv = 0;
2915                 }
2916                 else
2917                         tcp = NULL;
2918         }
2919 #endif
2920 
2921         if (tcp != NULL) {
2922                 /*
2923                  * Adjust TCP header:
2924                  *      swap ports,
2925                  *      set flags,
2926                  *      set correct ACK number
2927                  */
2928                 tmp_port = tcp->th_sport;
2929                 tcp->th_sport = tcp->th_dport;
2930                 tcp->th_dport = tmp_port;
2931                 old_ack = tcp->th_ack;
2932                 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2933                 tcp->th_seq = old_ack;
2934                 tcp->th_flags = TH_RST | TH_ACK;
2935         }
2936 
2937         return (rv);
2938 }
2939 
2940 /* ------------------------------------------------------------------------ */
2941 /* Function:    fr_make_icmp_v4                                             */
2942 /* Returns:     int - 0 on success, -1 on failure                           */
2943 /* Parameters:  fin(I) - pointer to packet information                      */
2944 /*                                                                          */
2945 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2946 /* what is going to happen here and why. Once you read the comment there,   */
2947 /* continue here with next paragraph.                                       */
2948 /*                                                                          */
2949 /* To turn IPv4 packet into ICMPv4 response packet, these things must       */
2950 /* happen here:                                                             */
2951 /*      (1) Original mblk is copied (duplicated).                           */
2952 /*                                                                          */
2953 /*      (2) ICMP header is created.                                         */
2954 /*                                                                          */
2955 /*      (3) Link ICMP header with copy of original mblk, we have ICMPv4     */
2956 /*          data ready then.                                                */
2957 /*                                                                          */
2958 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
2959 /*                                                                          */
2960 /*      (5) The mblk containing original packet is trimmed to contain IP    */
2961 /*          header only and ICMP chksum is computed.                        */
2962 /*                                                                          */
2963 /*      (6) The ICMP header we have from (3) is linked to original mblk,    */
2964 /*          which now contains new IP header. If original packet was spread */
2965 /*          over several mblks, only the first mblk is kept.                */
2966 /* ------------------------------------------------------------------------ */
2967 static int fr_make_icmp_v4(fin)
2968 fr_info_t *fin;
2969 {
2970         struct in_addr tmp_src;
2971         tcphdr_t *tcp;
2972         struct icmp *icmp;
2973         mblk_t *mblk_icmp;
2974         mblk_t *mblk_ip;
2975         size_t icmp_pld_len;    /* octets to append to ICMP header */
2976         size_t orig_iphdr_len;  /* length of IP header only */
2977         uint32_t sum;
2978         uint16_t *buf;
2979         int len;
2980 
2981 
2982         if (fin->fin_v != 4)
2983                 return (-1);
2984 
2985         /*
2986          * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2987          * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2988          */
2989         tcp = (tcphdr_t *) fin->fin_dp;
2990 
2991         if ((fin->fin_p == IPPROTO_TCP) && 
2992             ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2993                 return (-1);
2994 
2995         /*
2996          * Step (1)
2997          *
2998          * Make copy of original mblk.
2999          *
3000          * We want to copy as much data as necessary, not less, not more.  The
3001          * ICMPv4 payload length for unreachable messages is:
3002          *      original IP header + 8 bytes of L4 (if there are any).
3003          *
3004          * We determine if there are at least 8 bytes of L4 data following IP
3005          * header first.
3006          */
3007         icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
3008                 ICMPERR_ICMPHLEN : fin->fin_dlen;
3009         /*
3010          * Since we don't want to copy more data than necessary, we must trim
3011          * the original mblk here.  The right way (STREAMish) would be to use
3012          * adjmsg() to trim it.  However we would have to calculate the length
3013          * argument for adjmsg() from pointers we already have here.
3014          *
3015          * Since we have pointers and offsets, it's faster and easier for
3016          * us to just adjust pointers by hand instead of using adjmsg().
3017          */
3018         fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
3019         fin->fin_m->b_wptr += icmp_pld_len;
3020         icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
3021 
3022         /*
3023          * Also we don't want to copy any L2 stuff, which might precede IP
3024          * header, so we have have to set b_rptr to point to the start of IP
3025          * header.
3026          */
3027         fin->fin_m->b_rptr += fin->fin_ipoff;
3028         if ((mblk_ip = copyb(fin->fin_m)) == NULL)
3029                 return (-1);
3030         fin->fin_m->b_rptr -= fin->fin_ipoff;
3031 
3032         /*
3033          * Step (2)
3034          *
3035          * Create an ICMP header, which will be appened to original mblk later.
3036          * ICMP header is just another mblk.
3037          */
3038         mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
3039         if (mblk_icmp == NULL) {
3040                 FREE_MB_T(mblk_ip);
3041                 return (-1);
3042         }
3043 
3044         MTYPE(mblk_icmp) = M_DATA;
3045         icmp = (struct icmp *) mblk_icmp->b_wptr;
3046         icmp->icmp_type = ICMP_UNREACH;
3047         icmp->icmp_code = fin->fin_icode & 0xFF;
3048         icmp->icmp_void = 0;
3049         icmp->icmp_cksum = 0;
3050         mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
3051 
3052         /*
3053          * Step (3)
3054          *
3055          * Complete ICMP packet - link ICMP header with L4 data from original
3056          * IP packet.
3057          */
3058         linkb(mblk_icmp, mblk_ip);
3059 
3060         /*
3061          * Step (4)
3062          *
3063          * Swap IP addresses and change IP header fields accordingly in
3064          * original IP packet.
3065          *
3066          * There is a rule option return-icmp as a dest for physical
3067          * interfaces. This option becomes useless for loopback, since IPF box
3068          * uses same address as a loopback destination. We ignore the option
3069          * here, the ICMP packet will always look like as it would have been
3070          * sent from the original destination host.
3071          */
3072         tmp_src = fin->fin_ip->ip_src;
3073         fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
3074         fin->fin_ip->ip_dst = tmp_src;
3075         fin->fin_ip->ip_p = IPPROTO_ICMP;
3076         fin->fin_ip->ip_sum = 0;
3077 
3078         /*
3079          * Step (5)
3080          *
3081          * We trim the orignal mblk to hold IP header only.
3082          */
3083         fin->fin_m->b_wptr = fin->fin_dp;
3084         orig_iphdr_len = fin->fin_m->b_wptr -
3085                             (fin->fin_m->b_rptr + fin->fin_ipoff);
3086         fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
3087                             orig_iphdr_len);
3088 
3089         /*
3090          * ICMP chksum calculation. The data we are calculating chksum for are
3091          * spread over two mblks, therefore we have to use two for loops.
3092          *
3093          * First for loop computes chksum part for ICMP header.
3094          */
3095         buf = (uint16_t *) icmp;
3096         len = ICMPERR_ICMPHLEN;
3097         for (sum = 0; len > 1; len -= 2)
3098                 sum += *buf++;
3099 
3100         /*
3101          * Here we add chksum part for ICMP payload.
3102          */
3103         len = icmp_pld_len;
3104         buf = (uint16_t *) mblk_ip->b_rptr;
3105         for (; len > 1; len -= 2)
3106                 sum += *buf++;
3107 
3108         /*
3109          * Chksum is done.
3110          */
3111         sum = (sum >> 16) + (sum & 0xffff);
3112         sum += (sum >> 16);
3113         icmp->icmp_cksum = ~sum; 
3114 
3115         /*
3116          * Step (6)
3117          *
3118          * Release all packet mblks, except the first one.
3119          */
3120         if (fin->fin_m->b_cont != NULL) {
3121                 FREE_MB_T(fin->fin_m->b_cont);
3122         }
3123 
3124         /*
3125          * Append ICMP payload to first mblk, which already contains new IP
3126          * header.
3127          */
3128         linkb(fin->fin_m, mblk_icmp);
3129 
3130         return (0);
3131 }
3132 
3133 #ifdef USE_INET6
3134 /* ------------------------------------------------------------------------ */
3135 /* Function:    fr_make_icmp_v6                                             */
3136 /* Returns:     int - 0 on success, -1 on failure                           */
3137 /* Parameters:  fin(I) - pointer to packet information                      */
3138 /*                                                                          */
3139 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
3140 /* what and why is going to happen here. Once you read the comment there,   */
3141 /* continue here with next paragraph.                                       */
3142 /*                                                                          */
3143 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
3144 /* The algorithm is fairly simple:                                          */
3145 /*      1) We need to get copy of complete mblk.                            */
3146 /*                                                                          */
3147 /*      2) New ICMPv6 header is created.                                    */
3148 /*                                                                          */
3149 /*      3) The copy of original mblk with packet is linked to ICMPv6        */
3150 /*         header.                                                          */
3151 /*                                                                          */
3152 /*      4) The checksum must be adjusted.                                   */
3153 /*                                                                          */
3154 /*      5) IP addresses in original mblk are swapped and IP header data     */
3155 /*         are adjusted (protocol number).                                  */
3156 /*                                                                          */
3157 /*      6) Original mblk is trimmed to hold IPv6 header only, then it is    */
3158 /*         linked with the ICMPv6 data we got from (3).                     */
3159 /* ------------------------------------------------------------------------ */
3160 static int fr_make_icmp_v6(fin)
3161 fr_info_t *fin;
3162 {
3163         struct icmp6_hdr *icmp6;
3164         tcphdr_t *tcp;
3165         struct in6_addr tmp_src6;
3166         size_t icmp_pld_len;
3167         mblk_t *mblk_ip, *mblk_icmp;
3168 
3169         if (fin->fin_v != 6)
3170                 return (-1);
3171 
3172         /*
3173          * If we are dealing with TCP, then packet must SYN/FIN to be routed by
3174          * IP stack. If it is not SYN/FIN, then we must drop it silently.
3175          */
3176         tcp = (tcphdr_t *) fin->fin_dp;
3177 
3178         if ((fin->fin_p == IPPROTO_TCP) && 
3179             ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
3180                 return (-1);
3181 
3182         /*
3183          * Step (1)
3184          *
3185          * We need to copy complete packet in case of IPv6, no trimming is
3186          * needed (except the L2 headers).
3187          */
3188         icmp_pld_len = M_LEN(fin->fin_m);
3189         fin->fin_m->b_rptr += fin->fin_ipoff;
3190         if ((mblk_ip = copyb(fin->fin_m)) == NULL)
3191                 return (-1);
3192         fin->fin_m->b_rptr -= fin->fin_ipoff;
3193 
3194         /*
3195          * Step (2)
3196          *
3197          * Allocate and create ICMP header.
3198          */
3199         mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
3200                         BPRI_HI);
3201 
3202         if (mblk_icmp == NULL)
3203                 return (-1);
3204         
3205         MTYPE(mblk_icmp) = M_DATA;
3206         icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
3207         icmp6->icmp6_type = ICMP6_DST_UNREACH;
3208         icmp6->icmp6_code = fin->fin_icode & 0xFF;
3209         icmp6->icmp6_data32[0] = 0;
3210         mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
3211         
3212         /*
3213          * Step (3)
3214          *
3215          * Link the copy of IP packet to ICMP header.
3216          */
3217         linkb(mblk_icmp, mblk_ip);
3218 
3219         /* 
3220          * Step (4)
3221          *
3222          * Calculate chksum - this is much more easier task than in case of
3223          * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
3224          * We are making compensation just for change of packet length.
3225          */
3226         icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
3227 
3228         /*
3229          * Step (5)
3230          *
3231          * Swap IP addresses.
3232          */
3233         tmp_src6 = fin->fin_ip6->ip6_src;
3234         fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
3235         fin->fin_ip6->ip6_dst = tmp_src6;
3236 
3237         /*
3238          * and adjust IP header data.
3239          */
3240         fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
3241         fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
3242 
3243         /*
3244          * Step (6)
3245          *
3246          * We must release all linked mblks from original packet and keep only
3247          * the first mblk with IP header to link ICMP data.
3248          */
3249         fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
3250 
3251         if (fin->fin_m->b_cont != NULL) {
3252                 FREE_MB_T(fin->fin_m->b_cont);
3253         }
3254 
3255         /*
3256          * Append ICMP payload to IP header.
3257          */
3258         linkb(fin->fin_m, mblk_icmp);
3259 
3260         return (0);
3261 }
3262 #endif  /* USE_INET6 */
3263 
3264 /* ------------------------------------------------------------------------ */
3265 /* Function:    fr_make_icmp                                                */
3266 /* Returns:     int - 0 on success, -1 on failure                           */
3267 /* Parameters:  fin(I) - pointer to packet information                      */
3268 /*                                                                          */
3269 /* We must alter the original mblks passed to IPF from IP stack via         */
3270 /* FW_HOOKS. The reasons why we must alter packet are discussed within      */
3271 /* comment at fr_make_rst() function.                                       */
3272 /*                                                                          */
3273 /* The fr_make_icmp() function acts as a wrapper, which passes the code     */
3274 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on         */
3275 /* protocol version. However there are some details, which are common to    */
3276 /* both IP versions. The details are going to be explained here.            */
3277 /*                                                                          */
3278 /* The packet looks as follows:                                             */
3279 /*    xxx | IP hdr | IP payload    ...  |                                   */
3280 /*    ^   ^        ^                    ^                                   */
3281 /*    |   |        |                    |                                   */
3282 /*    |   |        |            fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
3283 /*    |   |        |                                                        */
3284 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
3285 /*    |   |                                                                 */
3286 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
3287 /*    |      of loopback)                                                   */
3288 /*    |                                                                     */
3289 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC            */
3290 /*                                                                          */
3291 /* All relevant IP headers are pulled up into the first mblk. It happened   */
3292 /* well in advance before the matching rule was found (the rule, which took */
3293 /* us here, to fr_make_icmp() function).                                    */
3294 /*                                                                          */
3295 /* Both functions will turn packet passed in fin->fin_m mblk into a new          */
3296 /* packet. New packet will be represented as chain of mblks.                */
3297 /* orig mblk |- b_cont ---.                                                 */
3298 /*    ^                    `-> ICMP hdr |- b_cont--.                     */
3299 /*    |                           ^                 `-> duped orig mblk          */
3300 /*    |                           |                             ^           */
3301 /*    `- The original mblk        |                             |           */
3302 /*       will be trimmed to       |                             |           */
3303 /*       to contain IP header     |                             |           */
3304 /*       only                     |                             |           */
3305 /*                                |                             |           */
3306 /*                                `- This is newly              |           */
3307 /*                                   allocated mblk to          |           */
3308 /*                                   hold ICMPv6 data.          |           */
3309 /*                                                              |           */
3310 /*                                                              |           */
3311 /*                                                              |           */
3312 /*          This is the copy of original mblk, it will contain -'           */
3313 /*          orignal IP  packet in case of ICMPv6. In case of                */
3314 /*          ICMPv4 it will contain up to 8 bytes of IP payload              */
3315 /*          (TCP/UDP/L4) data from original packet.                         */
3316 /* ------------------------------------------------------------------------ */
3317 int fr_make_icmp(fin)
3318 fr_info_t *fin;
3319 {
3320         int rv;
3321         
3322         if (fin->fin_v == 4)
3323                 rv = fr_make_icmp_v4(fin);
3324 #ifdef USE_INET6
3325         else if (fin->fin_v == 6)
3326                 rv = fr_make_icmp_v6(fin);
3327 #endif
3328         else
3329                 rv = -1;
3330 
3331         return (rv);
3332 }
3333 
3334 /* ------------------------------------------------------------------------ */
3335 /* Function:    fr_buf_sum                                                  */
3336 /* Returns:     unsigned int - sum of buffer buf                            */
3337 /* Parameters:  buf - pointer to buf we want to sum up                      */
3338 /*              len - length of buffer buf                                  */
3339 /*                                                                          */
3340 /* Sums buffer buf. The result is used for chksum calculation. The buf      */
3341 /* argument must be aligned.                                                */
3342 /* ------------------------------------------------------------------------ */
3343 static uint32_t fr_buf_sum(buf, len)
3344 const void *buf;
3345 unsigned int len;
3346 {
3347         uint32_t        sum = 0;
3348         uint16_t        *b = (uint16_t *)buf;
3349 
3350         while (len > 1) {
3351                 sum += *b++;
3352                 len -= 2;
3353         }
3354 
3355         if (len == 1)
3356                 sum += htons((*(unsigned char *)b) << 8);
3357 
3358         return (sum);
3359 }
3360 
3361 /* ------------------------------------------------------------------------ */
3362 /* Function:    fr_calc_chksum                                              */
3363 /* Returns:     void                                                        */
3364 /* Parameters:  fin - pointer to fr_info_t instance with packet data        */
3365 /*              pkt - pointer to duplicated packet                          */
3366 /*                                                                          */
3367 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP        */
3368 /* versions.                                                                */
3369 /* ------------------------------------------------------------------------ */
3370 void fr_calc_chksum(fin, pkt)
3371 fr_info_t *fin;
3372 mb_t *pkt;
3373 {
3374         struct pseudo_hdr {
3375                 union {
3376                         struct in_addr  in4;
3377 #ifdef USE_INET6
3378                         struct in6_addr in6;
3379 #endif
3380                 } src_addr;
3381                 union {
3382                         struct in_addr  in4;
3383 #ifdef USE_INET6
3384                         struct in6_addr in6;
3385 #endif
3386                 } dst_addr;
3387                 char            zero;
3388                 char            proto;
3389                 uint16_t        len;
3390         }       phdr;
3391         uint32_t        sum, ip_sum;
3392         void    *buf;
3393         uint16_t        *l4_csum_p;
3394         tcphdr_t        *tcp;
3395         udphdr_t        *udp;
3396         icmphdr_t       *icmp;
3397 #ifdef USE_INET6
3398         struct icmp6_hdr        *icmp6;
3399 #endif
3400         ip_t            *ip;
3401         unsigned int    len;
3402         int             pld_len;
3403 
3404         /*
3405          * We need to pullup the packet to the single continuous buffer to avoid
3406          * potential misaligment of b_rptr member in mblk chain.
3407          */
3408         if (pullupmsg(pkt, -1) == 0) {
3409                 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
3410                     " will not be computed by IPF");
3411                 return;
3412         }
3413 
3414         /*
3415          * It is guaranteed IP header starts right at b_rptr, because we are
3416          * working with a copy of the original packet.
3417          *
3418          * Compute pseudo header chksum for TCP and UDP.
3419          */
3420         if ((fin->fin_p == IPPROTO_UDP) ||
3421             (fin->fin_p == IPPROTO_TCP)) {
3422                 bzero(&phdr, sizeof (phdr));
3423 #ifdef USE_INET6
3424                 if (fin->fin_v == 6) {
3425                         phdr.src_addr.in6 = fin->fin_srcip6;
3426                         phdr.dst_addr.in6 = fin->fin_dstip6;
3427                 } else {
3428                         phdr.src_addr.in4 = fin->fin_src;
3429                         phdr.dst_addr.in4 = fin->fin_dst;
3430                 }
3431 #else
3432                 phdr.src_addr.in4 = fin->fin_src;
3433                 phdr.dst_addr.in4 = fin->fin_dst;
3434 #endif
3435                 phdr.zero = (char) 0;
3436                 phdr.proto = fin->fin_p;
3437                 phdr.len = htons((uint16_t)fin->fin_dlen);
3438                 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
3439         } else {
3440                 sum = 0;
3441         }
3442 
3443         /*
3444          * Set pointer to the L4 chksum field in the packet, set buf pointer to
3445          * the L4 header start.
3446          */
3447         switch (fin->fin_p) {
3448                 case IPPROTO_UDP:
3449                         udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3450                         l4_csum_p = &udp->uh_sum;
3451                         buf = udp;
3452                         break;
3453                 case IPPROTO_TCP:
3454                         tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3455                         l4_csum_p = &tcp->th_sum;
3456                         buf = tcp;
3457                         break;
3458                 case IPPROTO_ICMP:
3459                         icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3460                         l4_csum_p = &icmp->icmp_cksum;
3461                         buf = icmp;
3462                         break;
3463 #ifdef USE_INET6
3464                 case IPPROTO_ICMPV6:
3465                         icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
3466                         l4_csum_p = &icmp6->icmp6_cksum;
3467                         buf = icmp6;
3468                         break;
3469 #endif
3470                 default:
3471                         l4_csum_p = NULL;
3472         }
3473 
3474         /*
3475          * Compute L4 chksum if needed.
3476          */
3477         if (l4_csum_p != NULL) {
3478                 *l4_csum_p = (uint16_t)0;
3479                 pld_len = fin->fin_dlen;
3480                 len = pkt->b_wptr - (unsigned char *)buf;
3481                 ASSERT(len == pld_len);
3482                 /*
3483                  * Add payload sum to pseudoheader sum.
3484                  */
3485                 sum += fr_buf_sum(buf, len);
3486                 while (sum >> 16)
3487                         sum = (sum & 0xFFFF) + (sum >> 16);
3488 
3489                 *l4_csum_p = ~((uint16_t)sum);
3490                 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
3491         }
3492 
3493         /*
3494          * The IP header chksum is needed just for IPv4.
3495          */
3496         if (fin->fin_v == 4) {
3497                 /*
3498                  * Compute IPv4 header chksum.
3499                  */
3500                 ip = (ip_t *)pkt->b_rptr;
3501                 ip->ip_sum = (uint16_t)0;
3502                 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
3503                 while (ip_sum >> 16)
3504                         ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
3505 
3506                 ip->ip_sum = ~((uint16_t)ip_sum);
3507                 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
3508         }
3509 
3510         return;
3511 }
3512 
3513 #endif  /* _KERNEL && SOLARIS2 >= 10 */