1 /*
   2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
   3  *
   4  * See the IPFILTER.LICENCE file for details on licencing.
   5  *
   6  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   7  *
   8  * Copyright 2018 Joyent, Inc.
   9  */
  10 
  11 #if !defined(lint)
  12 static const char sccsid[] = "@(#)ip_fil_solaris.c      1.7 07/22/06 (C) 1993-2000 Darren Reed";
  13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
  14 #endif
  15 
  16 #include <sys/types.h>
  17 #include <sys/errno.h>
  18 #include <sys/param.h>
  19 #include <sys/cpuvar.h>
  20 #include <sys/open.h>
  21 #include <sys/ioctl.h>
  22 #include <sys/filio.h>
  23 #include <sys/systm.h>
  24 #include <sys/strsubr.h>
  25 #include <sys/strsun.h>
  26 #include <sys/cred.h>
  27 #include <sys/ddi.h>
  28 #include <sys/sunddi.h>
  29 #include <sys/ksynch.h>
  30 #include <sys/kmem.h>
  31 #include <sys/mac_provider.h>
  32 #include <sys/mkdev.h>
  33 #include <sys/protosw.h>
  34 #include <sys/socket.h>
  35 #include <sys/dditypes.h>
  36 #include <sys/cmn_err.h>
  37 #include <sys/zone.h>
  38 #include <net/if.h>
  39 #include <net/af.h>
  40 #include <net/route.h>
  41 #include <netinet/in.h>
  42 #include <netinet/in_systm.h>
  43 #include <netinet/ip.h>
  44 #include <netinet/ip_var.h>
  45 #include <netinet/tcp.h>
  46 #include <netinet/udp.h>
  47 #include <netinet/tcpip.h>
  48 #include <netinet/ip_icmp.h>
  49 #include "netinet/ip_compat.h"
  50 #ifdef  USE_INET6
  51 # include <netinet/icmp6.h>
  52 #endif
  53 #include "netinet/ip_fil.h"
  54 #include "netinet/ip_nat.h"
  55 #include "netinet/ip_frag.h"
  56 #include "netinet/ip_state.h"
  57 #include "netinet/ip_auth.h"
  58 #include "netinet/ip_proxy.h"
  59 #include "netinet/ipf_stack.h"
  60 #ifdef  IPFILTER_LOOKUP
  61 # include "netinet/ip_lookup.h"
  62 #endif
  63 #include <inet/ip_ire.h>
  64 
  65 #include <sys/md5.h>
  66 #include <sys/neti.h>
  67 
  68 static  int     frzerostats __P((caddr_t, ipf_stack_t *));
  69 static  int     fr_setipfloopback __P((int, ipf_stack_t *));
  70 static  int     fr_enableipf __P((ipf_stack_t *, int));
  71 static  int     fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
  72 static  int     ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
  73 static  int     ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
  74 static  int     ipf_hook __P((hook_data_t, int, int, void *));
  75 static  int     ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
  76 static  int     ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
  77 static  int     ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
  78     void *));
  79 static  int     ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
  80 static  int     ipf_hook4 __P((hook_data_t, int, int, void *));
  81 static  int     ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
  82 static  int     ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
  83 static  int     ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
  84     void *));
  85 static  int     ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
  86     void *));
  87 static  int     ipf_hook6 __P((hook_data_t, int, int, void *));
  88 static  int     ipf_hookvndl3v4_in __P((hook_event_token_t, hook_data_t,
  89     void *));
  90 static  int     ipf_hookvndl3v6_in __P((hook_event_token_t, hook_data_t,
  91     void *));
  92 static  int     ipf_hookvndl3v4_out __P((hook_event_token_t, hook_data_t,
  93     void *));
  94 static  int     ipf_hookvndl3v6_out __P((hook_event_token_t, hook_data_t,
  95     void *));
  96 
  97 static  int     ipf_hookviona_in __P((hook_event_token_t, hook_data_t, void *));
  98 static  int     ipf_hookviona_out __P((hook_event_token_t, hook_data_t,
  99     void *));
 100 
 101 extern  int     ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
 102 extern  int     ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
 103 
 104 static int      ipf_hook_protocol_notify __P((hook_notify_cmd_t, void *,
 105     const char *, const char *, const char *));
 106 static int      ipf_hook_instance_notify __P((hook_notify_cmd_t, void *,
 107     const char *, const char *, const char *));
 108 
 109 #if SOLARIS2 < 10
 110 #if SOLARIS2 >= 7
 111 u_int           *ip_ttl_ptr = NULL;
 112 u_int           *ip_mtudisc = NULL;
 113 # if SOLARIS2 >= 8
 114 int             *ip_forwarding = NULL;
 115 u_int           *ip6_forwarding = NULL;
 116 # else
 117 u_int           *ip_forwarding = NULL;
 118 # endif
 119 #else
 120 u_long          *ip_ttl_ptr = NULL;
 121 u_long          *ip_mtudisc = NULL;
 122 u_long          *ip_forwarding = NULL;
 123 #endif
 124 #endif
 125 
 126 vmem_t  *ipf_minor;     /* minor number arena */
 127 void    *ipf_state;     /* DDI state */
 128 
 129 /*
 130  * GZ-controlled and per-zone stacks:
 131  *
 132  * For each non-global zone, we create two ipf stacks: the per-zone stack and
 133  * the GZ-controlled stack.  The per-zone stack can be controlled and observed
 134  * from inside the zone or from the global zone.  The GZ-controlled stack can
 135  * only be controlled and observed from the global zone (though the rules
 136  * still only affect that non-global zone).
 137  *
 138  * The two hooks are always arranged so that the GZ-controlled stack is always
 139  * "outermost" with respect to the zone.  The traffic flow then looks like
 140  * this:
 141  *
 142  * Inbound:
 143  *
 144  *     nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
 145  *
 146  * Outbound:
 147  *
 148  *     nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
 149  */
 150 
 151 /* IPv4 hook names */
 152 char *hook4_nicevents =         "ipfilter_hook4_nicevents";
 153 char *hook4_nicevents_gz =      "ipfilter_hook4_nicevents_gz";
 154 char *hook4_in =                "ipfilter_hook4_in";
 155 char *hook4_in_gz =             "ipfilter_hook4_in_gz";
 156 char *hook4_out =               "ipfilter_hook4_out";
 157 char *hook4_out_gz =            "ipfilter_hook4_out_gz";
 158 char *hook4_loop_in =           "ipfilter_hook4_loop_in";
 159 char *hook4_loop_in_gz =        "ipfilter_hook4_loop_in_gz";
 160 char *hook4_loop_out =          "ipfilter_hook4_loop_out";
 161 char *hook4_loop_out_gz =       "ipfilter_hook4_loop_out_gz";
 162 
 163 /* IPv6 hook names */
 164 char *hook6_nicevents =         "ipfilter_hook6_nicevents";
 165 char *hook6_nicevents_gz =      "ipfilter_hook6_nicevents_gz";
 166 char *hook6_in =                "ipfilter_hook6_in";
 167 char *hook6_in_gz =             "ipfilter_hook6_in_gz";
 168 char *hook6_out =               "ipfilter_hook6_out";
 169 char *hook6_out_gz =            "ipfilter_hook6_out_gz";
 170 char *hook6_loop_in =           "ipfilter_hook6_loop_in";
 171 char *hook6_loop_in_gz =        "ipfilter_hook6_loop_in_gz";
 172 char *hook6_loop_out =          "ipfilter_hook6_loop_out";
 173 char *hook6_loop_out_gz =       "ipfilter_hook6_loop_out_gz";
 174 
 175 /* vnd IPv4/v6 hook names */
 176 char *hook4_vnd_in =            "ipfilter_hookvndl3v4_in";
 177 char *hook4_vnd_in_gz =         "ipfilter_hookvndl3v4_in_gz";
 178 char *hook6_vnd_in =            "ipfilter_hookvndl3v6_in";
 179 char *hook6_vnd_in_gz =         "ipfilter_hookvndl3v6_in_gz";
 180 char *hook4_vnd_out =           "ipfilter_hookvndl3v4_out";
 181 char *hook4_vnd_out_gz =        "ipfilter_hookvndl3v4_out_gz";
 182 char *hook6_vnd_out =           "ipfilter_hookvndl3v6_out";
 183 char *hook6_vnd_out_gz =        "ipfilter_hookvndl3v6_out_gz";
 184 
 185 /* viona hook names */
 186 char *hook_viona_in =           "ipfilter_hookviona_in";
 187 char *hook_viona_in_gz =        "ipfilter_hookviona_in_gz";
 188 char *hook_viona_out =          "ipfilter_hookviona_out";
 189 char *hook_viona_out_gz =       "ipfilter_hookviona_out_gz";
 190 
 191 /* ------------------------------------------------------------------------ */
 192 /* Function:    ipldetach                                                   */
 193 /* Returns:     int - 0 == success, else error.                             */
 194 /* Parameters:  Nil                                                         */
 195 /*                                                                          */
 196 /* This function is responsible for undoing anything that might have been   */
 197 /* done in a call to iplattach().  It must be able to clean up from a call  */
 198 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
 199 /* configures a table to be so large that we cannot allocate enough memory  */
 200 /* for it.                                                                  */
 201 /* ------------------------------------------------------------------------ */
 202 int ipldetach(ifs)
 203 ipf_stack_t *ifs;
 204 {
 205 
 206         ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
 207 
 208 #if SOLARIS2 < 10
 209 
 210         if (ifs->ifs_fr_control_forwarding & 2) {
 211                 if (ip_forwarding != NULL)
 212                         *ip_forwarding = 0;
 213 #if SOLARIS2 >= 8
 214                 if (ip6_forwarding != NULL)
 215                         *ip6_forwarding = 0;
 216 #endif
 217         }
 218 #endif
 219 
 220         /*
 221          * This lock needs to be dropped around the net_hook_unregister calls
 222          * because we can deadlock here with:
 223          * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
 224          * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
 225          */
 226         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 227 
 228 #define UNDO_HOOK(_f, _b, _e, _h)                                       \
 229         do {                                                            \
 230                 if (ifs->_f != NULL) {                                       \
 231                         if (ifs->_b) {                                       \
 232                                 int tmp = net_hook_unregister(ifs->_f,       \
 233                                            _e, ifs->_h);             \
 234                                 ifs->_b = (tmp != 0 && tmp != ENXIO);        \
 235                                 if (!ifs->_b && ifs->_h != NULL) {        \
 236                                         hook_free(ifs->_h);          \
 237                                         ifs->_h = NULL;                      \
 238                                 }                                       \
 239                         } else if (ifs->_h != NULL) {                        \
 240                                 hook_free(ifs->_h);                  \
 241                                 ifs->_h = NULL;                              \
 242                         }                                               \
 243                 }                                                       \
 244                 _NOTE(CONSTCOND)                                        \
 245         } while (0)
 246 
 247         /*
 248          * Remove IPv6 Hooks
 249          */
 250         if (ifs->ifs_ipf_ipv6 != NULL) {
 251                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
 252                           NH_PHYSICAL_IN, ifs_ipfhook6_in);
 253                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
 254                           NH_PHYSICAL_OUT, ifs_ipfhook6_out);
 255                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
 256                           NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
 257                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
 258                           NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
 259                 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
 260                           NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
 261 
 262                 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
 263                         goto detach_failed;
 264                 ifs->ifs_ipf_ipv6 = NULL;
 265         }
 266 
 267         /*
 268          * Remove IPv4 Hooks
 269          */
 270         if (ifs->ifs_ipf_ipv4 != NULL) {
 271                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
 272                           NH_PHYSICAL_IN, ifs_ipfhook4_in);
 273                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
 274                           NH_PHYSICAL_OUT, ifs_ipfhook4_out);
 275                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
 276                           NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
 277                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
 278                           NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
 279                 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
 280                           NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
 281 
 282                 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
 283                         goto detach_failed;
 284                 ifs->ifs_ipf_ipv4 = NULL;
 285         }
 286 
 287         /*
 288          * Remove VND hooks
 289          */
 290         if (ifs->ifs_ipf_vndl3v4 != NULL) {
 291                 UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_in,
 292                     NH_PHYSICAL_IN, ifs_ipfhookvndl3v4_in);
 293                 UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_out,
 294                     NH_PHYSICAL_OUT, ifs_ipfhookvndl3v4_out);
 295 
 296                 if (net_protocol_release(ifs->ifs_ipf_vndl3v4) != 0)
 297                         goto detach_failed;
 298                 ifs->ifs_ipf_vndl3v4 = NULL;
 299         }
 300 
 301         if (ifs->ifs_ipf_vndl3v6 != NULL) {
 302                 UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_in,
 303                     NH_PHYSICAL_IN, ifs_ipfhookvndl3v6_in);
 304                 UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_out,
 305                     NH_PHYSICAL_OUT, ifs_ipfhookvndl3v6_out);
 306 
 307                 if (net_protocol_release(ifs->ifs_ipf_vndl3v6) != 0)
 308                         goto detach_failed;
 309                 ifs->ifs_ipf_vndl3v6 = NULL;
 310         }
 311 
 312         /*
 313          * Remove notification of viona hooks
 314          */
 315         net_instance_notify_unregister(ifs->ifs_netid,
 316             ipf_hook_instance_notify);
 317 
 318 #undef UNDO_HOOK
 319 
 320         /*
 321          * Normally, viona will unregister itself before ipldetach() is called,
 322          * so these will be no-ops, but out of caution, we try to make sure
 323          * we've removed any of our references.
 324          */
 325         (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
 326             NH_PHYSICAL_IN);
 327         (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
 328             NH_PHYSICAL_OUT);
 329 
 330         {
 331                 char netidstr[12]; /* Large enough for INT_MAX + NUL */
 332                 (void) snprintf(netidstr, sizeof (netidstr), "%d",
 333                     ifs->ifs_netid);
 334 
 335                 /*
 336                  * The notify callbacks expect the netid value passed as a
 337                  * string in the third argument.  To prevent confusion if
 338                  * traced, we pass the same value the nethook framework would
 339                  * pass, even though the callback does not currently use the
 340                  * value.
 341                  */
 342                 (void) ipf_hook_instance_notify(HN_UNREGISTER, ifs, netidstr,
 343                     NULL, Hn_VIONA);
 344         }
 345 
 346 #ifdef  IPFDEBUG
 347         cmn_err(CE_CONT, "ipldetach()\n");
 348 #endif
 349 
 350         WRITE_ENTER(&ifs->ifs_ipf_global);
 351         fr_deinitialise(ifs);
 352 
 353         (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
 354         (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
 355 
 356         if (ifs->ifs_ipf_locks_done == 1) {
 357                 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
 358                 MUTEX_DESTROY(&ifs->ifs_ipf_rw);
 359                 RW_DESTROY(&ifs->ifs_ipf_tokens);
 360                 RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
 361                 ifs->ifs_ipf_locks_done = 0;
 362         }
 363 
 364         if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
 365             ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
 366             ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
 367             ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
 368             ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
 369                 return -1;
 370 
 371         return 0;
 372 
 373 detach_failed:
 374         WRITE_ENTER(&ifs->ifs_ipf_global);
 375         return -1;
 376 }
 377 
 378 int iplattach(ifs)
 379 ipf_stack_t *ifs;
 380 {
 381 #if SOLARIS2 < 10
 382         int i;
 383 #endif
 384         netid_t id = ifs->ifs_netid;
 385 
 386 #ifdef  IPFDEBUG
 387         cmn_err(CE_CONT, "iplattach()\n");
 388 #endif
 389 
 390         ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
 391         ifs->ifs_fr_flags = IPF_LOGGING;
 392 #ifdef _KERNEL
 393         ifs->ifs_fr_update_ipid = 0;
 394 #else
 395         ifs->ifs_fr_update_ipid = 1;
 396 #endif
 397         ifs->ifs_fr_minttl = 4;
 398         ifs->ifs_fr_icmpminfragmtu = 68;
 399 #if defined(IPFILTER_DEFAULT_BLOCK)
 400         ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
 401 #else
 402         ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
 403 #endif
 404 
 405         bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
 406         MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
 407         MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
 408         RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
 409         RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
 410         ifs->ifs_ipf_locks_done = 1;
 411 
 412         if (fr_initialise(ifs) < 0)
 413                 return -1;
 414 
 415         /*
 416          * For incoming packets, we want the GZ-controlled hooks to run before
 417          * the per-zone hooks, regardless of what order they're are installed.
 418          * See the "GZ-controlled and per-zone stacks" comment block at the top
 419          * of this file.
 420          */
 421 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a)                           \
 422         HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs);     \
 423         (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER;      \
 424         (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
 425 
 426         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
 427                   hook4_nicevents, hook4_nicevents_gz, ifs);
 428         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
 429                   hook4_in, hook4_in_gz, ifs);
 430         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
 431                   hook4_loop_in, hook4_loop_in_gz, ifs);
 432 
 433         /*
 434          * For outgoing packets, we want the GZ-controlled hooks to run after
 435          * the per-zone hooks, regardless of what order they're are installed.
 436          * See the "GZ-controlled and per-zone stacks" comment block at the top
 437          * of this file.
 438          */
 439 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a)                            \
 440         HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs);     \
 441         (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE;      \
 442         (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
 443 
 444         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
 445                   hook4_out, hook4_out_gz, ifs);
 446         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
 447                   hook4_loop_out, hook4_loop_out_gz, ifs);
 448 
 449         /*
 450          * If we hold this lock over all of the net_hook_register calls, we
 451          * can cause a deadlock to occur with the following lock ordering:
 452          * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
 453          * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
 454          */
 455         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 456 
 457         /*
 458          * Add IPv4 hooks
 459          */
 460         ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
 461         if (ifs->ifs_ipf_ipv4 == NULL)
 462                 goto hookup_failed;
 463 
 464         ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
 465             NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
 466         if (!ifs->ifs_hook4_nic_events)
 467                 goto hookup_failed;
 468 
 469         ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
 470             NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
 471         if (!ifs->ifs_hook4_physical_in)
 472                 goto hookup_failed;
 473 
 474         ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
 475             NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
 476         if (!ifs->ifs_hook4_physical_out)
 477                 goto hookup_failed;
 478 
 479         if (ifs->ifs_ipf_loopback) {
 480                 ifs->ifs_hook4_loopback_in = (net_hook_register(
 481                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
 482                     ifs->ifs_ipfhook4_loop_in) == 0);
 483                 if (!ifs->ifs_hook4_loopback_in)
 484                         goto hookup_failed;
 485 
 486                 ifs->ifs_hook4_loopback_out = (net_hook_register(
 487                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
 488                     ifs->ifs_ipfhook4_loop_out) == 0);
 489                 if (!ifs->ifs_hook4_loopback_out)
 490                         goto hookup_failed;
 491         }
 492 
 493         /*
 494          * Add IPv6 hooks
 495          */
 496         ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
 497         if (ifs->ifs_ipf_ipv6 == NULL)
 498                 goto hookup_failed;
 499 
 500         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
 501                   hook6_nicevents, hook6_nicevents_gz, ifs);
 502         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
 503                   hook6_in, hook6_in_gz, ifs);
 504         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
 505                   hook6_loop_in, hook6_loop_in_gz, ifs);
 506         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
 507                   hook6_out, hook6_out_gz, ifs);
 508         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
 509                   hook6_loop_out, hook6_loop_out_gz, ifs);
 510 
 511         ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
 512             NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
 513         if (!ifs->ifs_hook6_nic_events)
 514                 goto hookup_failed;
 515 
 516         ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
 517             NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
 518         if (!ifs->ifs_hook6_physical_in)
 519                 goto hookup_failed;
 520 
 521         ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
 522             NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
 523         if (!ifs->ifs_hook6_physical_out)
 524                 goto hookup_failed;
 525 
 526         if (ifs->ifs_ipf_loopback) {
 527                 ifs->ifs_hook6_loopback_in = (net_hook_register(
 528                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
 529                     ifs->ifs_ipfhook6_loop_in) == 0);
 530                 if (!ifs->ifs_hook6_loopback_in)
 531                         goto hookup_failed;
 532 
 533                 ifs->ifs_hook6_loopback_out = (net_hook_register(
 534                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
 535                     ifs->ifs_ipfhook6_loop_out) == 0);
 536                 if (!ifs->ifs_hook6_loopback_out)
 537                         goto hookup_failed;
 538         }
 539 
 540         /*
 541          * Add VND INET hooks
 542          */
 543         ifs->ifs_ipf_vndl3v4 = net_protocol_lookup(id, NHF_VND_INET);
 544         if (ifs->ifs_ipf_vndl3v4 == NULL)
 545                 goto hookup_failed;
 546 
 547         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v4_in, ipf_hookvndl3v4_in,
 548             hook4_vnd_in, hook4_vnd_in_gz, ifs);
 549         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v4_out, ipf_hookvndl3v4_out,
 550             hook4_vnd_out, hook4_vnd_out_gz, ifs);
 551         ifs->ifs_hookvndl3v4_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v4,
 552             NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v4_in) == 0);
 553         if (!ifs->ifs_hookvndl3v4_physical_in)
 554                 goto hookup_failed;
 555 
 556         ifs->ifs_hookvndl3v4_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v4,
 557             NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v4_out) == 0);
 558         if (!ifs->ifs_hookvndl3v4_physical_out)
 559                 goto hookup_failed;
 560 
 561 
 562         /*
 563          * VND INET6 hooks
 564          */
 565         ifs->ifs_ipf_vndl3v6 = net_protocol_lookup(id, NHF_VND_INET6);
 566         if (ifs->ifs_ipf_vndl3v6 == NULL)
 567                 goto hookup_failed;
 568 
 569         HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v6_in, ipf_hookvndl3v6_in,
 570             hook6_vnd_in, hook6_vnd_in_gz, ifs);
 571         HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v6_out, ipf_hookvndl3v6_out,
 572             hook6_vnd_out, hook6_vnd_out_gz, ifs);
 573         ifs->ifs_hookvndl3v6_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v6,
 574             NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v6_in) == 0);
 575         if (!ifs->ifs_hookvndl3v6_physical_in)
 576                 goto hookup_failed;
 577 
 578         ifs->ifs_hookvndl3v6_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v6,
 579             NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v6_out) == 0);
 580         if (!ifs->ifs_hookvndl3v6_physical_out)
 581                 goto hookup_failed;
 582 
 583         /*
 584          * VIONA INET hooks.  While the nethook framework allows us to register
 585          * hooks for events that haven't been registered yet, we instead
 586          * register and unregister our hooks in response to notifications
 587          * about the viona hooks from the nethook framework.  This prevents
 588          * problems when the viona module gets unloaded while the ipf module
 589          * does not.  If we do not unregister our hooks after the viona module
 590          * is unloaded, the viona module cannot later re-register them if it
 591          * gets reloaded.  As the ip, vnd, and ipf modules are rarely unloaded
 592          * even on DEBUG kernels, they do not experience this issue.
 593          */
 594         if (net_instance_notify_register(id, ipf_hook_instance_notify,
 595             ifs) != 0)
 596                 goto hookup_failed;
 597 
 598         /*
 599          * Reacquire ipf_global, now it is safe.
 600          */
 601         WRITE_ENTER(&ifs->ifs_ipf_global);
 602 
 603 /* Do not use private interface ip_params_arr[] in Solaris 10 */
 604 #if SOLARIS2 < 10
 605 
 606 #if SOLARIS2 >= 8
 607         ip_forwarding = &ip_g_forward;
 608 #endif
 609         /*
 610          * XXX - There is no terminator for this array, so it is not possible
 611          * to tell if what we are looking for is missing and go off the end
 612          * of the array.
 613          */
 614 
 615 #if SOLARIS2 <= 8
 616         for (i = 0; ; i++) {
 617                 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
 618                         ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
 619                 } else if (!strcmp(ip_param_arr[i].ip_param_name,
 620                             "ip_path_mtu_discovery")) {
 621                         ip_mtudisc = &ip_param_arr[i].ip_param_value;
 622                 }
 623 #if SOLARIS2 < 8
 624                 else if (!strcmp(ip_param_arr[i].ip_param_name,
 625                             "ip_forwarding")) {
 626                         ip_forwarding = &ip_param_arr[i].ip_param_value;
 627                 }
 628 #else
 629                 else if (!strcmp(ip_param_arr[i].ip_param_name,
 630                             "ip6_forwarding")) {
 631                         ip6_forwarding = &ip_param_arr[i].ip_param_value;
 632                 }
 633 #endif
 634 
 635                 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
 636 #if SOLARIS2 >= 8
 637                     ip6_forwarding != NULL &&
 638 #endif
 639                     ip_forwarding != NULL)
 640                         break;
 641         }
 642 #endif
 643 
 644         if (ifs->ifs_fr_control_forwarding & 1) {
 645                 if (ip_forwarding != NULL)
 646                         *ip_forwarding = 1;
 647 #if SOLARIS2 >= 8
 648                 if (ip6_forwarding != NULL)
 649                         *ip6_forwarding = 1;
 650 #endif
 651         }
 652 
 653 #endif
 654 
 655         return 0;
 656 hookup_failed:
 657         WRITE_ENTER(&ifs->ifs_ipf_global);
 658         return -1;
 659 }
 660 
 661 /* ------------------------------------------------------------------------ */
 662 /*
 663  * Called whenever a nethook protocol is registered or unregistered.  Currently
 664  * only used to add or remove the hooks for viona.
 665  *
 666  * While the function signature requires returning int, nothing
 667  * in usr/src/uts/common/io/hook.c that invokes the callbacks
 668  * captures the return value (nor is there currently any documentation
 669  * on what return values should be).  For now at least, we'll return 0
 670  * on success (or 'not applicable') or an error value.  Even if the
 671  * nethook framework doesn't use the return address, it can be observed via
 672  * dtrace if needed.
 673  */
 674 static int
 675 ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg,
 676     const char *name, const char *dummy __unused, const char *he_name)
 677 {
 678         ipf_stack_t *ifs = arg;
 679         hook_t **hookpp;
 680         char *hook_name, *hint_name;
 681         hook_func_t hookfn;
 682         boolean_t *hookedp;
 683         hook_hint_t hint;
 684         boolean_t out;
 685         int ret = 0;
 686 
 687         const boolean_t gz = ifs->ifs_gz_controlled;
 688 
 689         /* We currently only care about viona hooks notifications */
 690         if (strcmp(name, Hn_VIONA) != 0)
 691                 return (0);
 692 
 693         if (strcmp(he_name, NH_PHYSICAL_IN) == 0) {
 694                 out = B_FALSE;
 695         } else if (strcmp(he_name, NH_PHYSICAL_OUT) == 0) {
 696                 out = B_TRUE;
 697         } else {
 698                 /*
 699                  * If we've added more hook events to viona, we must add
 700                  * the corresponding handling here (even if it's just to
 701                  * ignore it) to prevent the firewall from not working as
 702                  * intended.
 703                  */
 704                 cmn_err(CE_PANIC, "%s: unhandled hook event %s", __func__,
 705                     he_name);
 706 
 707                 return (0);
 708         }
 709 
 710         if (out) {
 711                 hookpp = &ifs->ifs_ipfhookviona_out;
 712                 hookfn = ipf_hookviona_out;
 713                 hookedp = &ifs->ifs_hookviona_physical_out;
 714                 name = gz ? hook_viona_out_gz : hook_viona_out;
 715                 hint = gz ? HH_AFTER : HH_BEFORE;
 716                 hint_name = gz ? hook_viona_out : hook_viona_out_gz;
 717         } else {
 718                 hookpp = &ifs->ifs_ipfhookviona_in;
 719                 hookfn = ipf_hookviona_in;
 720                 hookedp = &ifs->ifs_hookviona_physical_in;
 721                 name = gz ? hook_viona_in_gz : hook_viona_in;
 722                 hint = gz ? HH_BEFORE : HH_AFTER;
 723                 hint_name = gz ? hook_viona_in : hook_viona_in_gz;
 724         }
 725 
 726         switch (command) {
 727         default:
 728         case HN_NONE:
 729                 break;
 730         case HN_REGISTER:
 731                 HOOK_INIT(*hookpp, hookfn, (char *)name, ifs);
 732                 (*hookpp)->h_hint = hint;
 733                 (*hookpp)->h_hintvalue = (uintptr_t)hint_name;
 734                 ret = net_hook_register(ifs->ifs_ipf_viona,
 735                     (char *)he_name, *hookpp);
 736                 if (ret != 0) {
 737                         cmn_err(CE_NOTE, "%s: could not register hook "
 738                             "(hook family=%s hook=%s) err=%d", __func__,
 739                             name, he_name, ret);
 740                         *hookedp = B_FALSE;
 741                         return (ret);
 742                 }
 743                 *hookedp = B_TRUE;
 744                 break;
 745         case HN_UNREGISTER:
 746                 if (ifs->ifs_ipf_viona == NULL)
 747                         break;
 748 
 749                 ret = *hookedp ? net_hook_unregister(ifs->ifs_ipf_viona,
 750                     (char *)he_name, *hookpp) : 0;
 751                 if ((ret == 0 || ret == ENXIO)) {
 752                         if (*hookpp != NULL) {
 753                                 hook_free(*hookpp);
 754                                 *hookpp = NULL;
 755                         }
 756                         *hookedp = B_FALSE;
 757                 }
 758                 break;
 759         }
 760 
 761         return (ret);
 762 }
 763 
 764 /*
 765  * Called whenever a new nethook instance is created.  Currently only used
 766  * with the Hn_VIONA nethooks.  Similar to ipf_hook_protocol_notify, the out
 767  * function signature must return an int, though the result is never used.
 768  * We elect to return 0 on success (or not applicable) or a non-zero value
 769  * on error.
 770  */
 771 static int
 772 ipf_hook_instance_notify(hook_notify_cmd_t command, void *arg,
 773     const char *netid, const char *dummy __unused, const char *instance)
 774 {
 775         ipf_stack_t *ifs = arg;
 776         int ret = 0;
 777 
 778         /* We currently only care about viona hooks */
 779         if (strcmp(instance, Hn_VIONA) != 0)
 780                 return (0);
 781 
 782         switch (command) {
 783         case HN_NONE:
 784         default:
 785                 return (0);
 786         case HN_REGISTER:
 787                 ifs->ifs_ipf_viona = net_protocol_lookup(ifs->ifs_netid,
 788                     NHF_VIONA);
 789 
 790                 if (ifs->ifs_ipf_viona == NULL)
 791                         return (EPROTONOSUPPORT);
 792 
 793                 ret = net_protocol_notify_register(ifs->ifs_ipf_viona,
 794                     ipf_hook_protocol_notify, ifs);
 795                 VERIFY(ret == 0 || ret == ESHUTDOWN);
 796                 break;
 797         case HN_UNREGISTER:
 798                 if (ifs->ifs_ipf_viona == NULL)
 799                         break;
 800                 VERIFY0(net_protocol_notify_unregister(ifs->ifs_ipf_viona,
 801                     ipf_hook_protocol_notify));
 802                 VERIFY0(net_protocol_release(ifs->ifs_ipf_viona));
 803                 ifs->ifs_ipf_viona = NULL;
 804                 break;
 805         }
 806 
 807         return (ret);
 808 }
 809 
 810 static  int     fr_setipfloopback(set, ifs)
 811 int set;
 812 ipf_stack_t *ifs;
 813 {
 814         if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
 815                 return EFAULT;
 816 
 817         if (set && !ifs->ifs_ipf_loopback) {
 818                 ifs->ifs_ipf_loopback = 1;
 819 
 820                 ifs->ifs_hook4_loopback_in = (net_hook_register(
 821                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
 822                     ifs->ifs_ipfhook4_loop_in) == 0);
 823                 if (!ifs->ifs_hook4_loopback_in)
 824                         return EINVAL;
 825 
 826                 ifs->ifs_hook4_loopback_out = (net_hook_register(
 827                     ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
 828                     ifs->ifs_ipfhook4_loop_out) == 0);
 829                 if (!ifs->ifs_hook4_loopback_out)
 830                         return EINVAL;
 831 
 832                 ifs->ifs_hook6_loopback_in = (net_hook_register(
 833                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
 834                     ifs->ifs_ipfhook6_loop_in) == 0);
 835                 if (!ifs->ifs_hook6_loopback_in)
 836                         return EINVAL;
 837 
 838                 ifs->ifs_hook6_loopback_out = (net_hook_register(
 839                     ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
 840                     ifs->ifs_ipfhook6_loop_out) == 0);
 841                 if (!ifs->ifs_hook6_loopback_out)
 842                         return EINVAL;
 843 
 844         } else if (!set && ifs->ifs_ipf_loopback) {
 845                 ifs->ifs_ipf_loopback = 0;
 846 
 847                 ifs->ifs_hook4_loopback_in =
 848                     (net_hook_unregister(ifs->ifs_ipf_ipv4,
 849                     NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
 850                 if (ifs->ifs_hook4_loopback_in)
 851                         return EBUSY;
 852 
 853                 ifs->ifs_hook4_loopback_out =
 854                     (net_hook_unregister(ifs->ifs_ipf_ipv4,
 855                     NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
 856                 if (ifs->ifs_hook4_loopback_out)
 857                         return EBUSY;
 858 
 859                 ifs->ifs_hook6_loopback_in =
 860                     (net_hook_unregister(ifs->ifs_ipf_ipv6,
 861                     NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
 862                 if (ifs->ifs_hook6_loopback_in)
 863                         return EBUSY;
 864 
 865                 ifs->ifs_hook6_loopback_out =
 866                     (net_hook_unregister(ifs->ifs_ipf_ipv6,
 867                     NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
 868                 if (ifs->ifs_hook6_loopback_out)
 869                         return EBUSY;
 870         }
 871         return 0;
 872 }
 873 
 874 
 875 /*
 876  * Filter ioctl interface.
 877  */
 878 /*ARGSUSED*/
 879 int iplioctl(dev, cmd, data, mode, cp, rp)
 880 dev_t dev;
 881 int cmd;
 882 #if SOLARIS2 >= 7
 883 intptr_t data;
 884 #else
 885 int *data;
 886 #endif
 887 int mode;
 888 cred_t *cp;
 889 int *rp;
 890 {
 891         int error = 0, tmp;
 892         friostat_t fio;
 893         minor_t unit;
 894         u_int enable;
 895         ipf_stack_t *ifs;
 896         zoneid_t zid;
 897         ipf_devstate_t *isp;
 898 
 899 #ifdef  IPFDEBUG
 900         cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
 901                 dev, cmd, data, mode, cp, rp);
 902 #endif
 903         unit = getminor(dev);
 904 
 905         isp = ddi_get_soft_state(ipf_state, unit);
 906         if (isp == NULL)
 907                 return ENXIO;
 908         unit = isp->ipfs_minor;
 909 
 910         zid = crgetzoneid(cp);
 911         if (cmd == SIOCIPFZONESET) {
 912                 if (zid == GLOBAL_ZONEID)
 913                         return fr_setzoneid(isp, (caddr_t) data);
 914                 return EACCES;
 915         }
 916 
 917         /*
 918          * ipf_find_stack returns with a read lock on ifs_ipf_global
 919          */
 920         ifs = ipf_find_stack(zid, isp);
 921         if (ifs == NULL)
 922                 return ENXIO;
 923 
 924         if (ifs->ifs_fr_running <= 0) {
 925                 if (unit != IPL_LOGIPF) {
 926                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 927                         return EIO;
 928                 }
 929                 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
 930                     cmd != SIOCIPFSET && cmd != SIOCFRENB &&
 931                     cmd != SIOCGETFS && cmd != SIOCGETFF) {
 932                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 933                         return EIO;
 934                 }
 935         }
 936 
 937         if (ifs->ifs_fr_enable_active != 0) {
 938                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
 939                 return EBUSY;
 940         }
 941 
 942         error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
 943                                curproc, ifs);
 944         if (error != -1) {
 945                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
 946                 return error;
 947         }
 948         error = 0;
 949 
 950         switch (cmd)
 951         {
 952         case SIOCFRENB :
 953                 if (!(mode & FWRITE))
 954                         error = EPERM;
 955                 else {
 956                         error = COPYIN((caddr_t)data, (caddr_t)&enable,
 957                                        sizeof(enable));
 958                         if (error != 0) {
 959                                 error = EFAULT;
 960                                 break;
 961                         }
 962 
 963                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
 964                         WRITE_ENTER(&ifs->ifs_ipf_global);
 965 
 966                         /*
 967                          * We must recheck fr_enable_active here, since we've
 968                          * dropped ifs_ipf_global from R in order to get it
 969                          * exclusively.
 970                          */
 971                         if (ifs->ifs_fr_enable_active == 0) {
 972                                 ifs->ifs_fr_enable_active = 1;
 973                                 error = fr_enableipf(ifs, enable);
 974                                 ifs->ifs_fr_enable_active = 0;
 975                         }
 976                 }
 977                 break;
 978         case SIOCIPFSET :
 979                 if (!(mode & FWRITE)) {
 980                         error = EPERM;
 981                         break;
 982                 }
 983                 /* FALLTHRU */
 984         case SIOCIPFGETNEXT :
 985         case SIOCIPFGET :
 986                 error = fr_ipftune(cmd, (void *)data, ifs);
 987                 break;
 988         case SIOCSETFF :
 989                 if (!(mode & FWRITE))
 990                         error = EPERM;
 991                 else {
 992                         error = COPYIN((caddr_t)data,
 993                                        (caddr_t)&ifs->ifs_fr_flags,
 994                                        sizeof(ifs->ifs_fr_flags));
 995                         if (error != 0)
 996                                 error = EFAULT;
 997                 }
 998                 break;
 999         case SIOCIPFLP :
1000                 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1001                                sizeof(tmp));
1002                 if (error != 0)
1003                         error = EFAULT;
1004                 else
1005                         error = fr_setipfloopback(tmp, ifs);
1006                 break;
1007         case SIOCGETFF :
1008                 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
1009                                 sizeof(ifs->ifs_fr_flags));
1010                 if (error != 0)
1011                         error = EFAULT;
1012                 break;
1013         case SIOCFUNCL :
1014                 error = fr_resolvefunc((void *)data);
1015                 break;
1016         case SIOCINAFR :
1017         case SIOCRMAFR :
1018         case SIOCADAFR :
1019         case SIOCZRLST :
1020                 if (!(mode & FWRITE))
1021                         error = EPERM;
1022                 else
1023                         error = frrequest(unit, cmd, (caddr_t)data,
1024                                           ifs->ifs_fr_active, 1, ifs);
1025                 break;
1026         case SIOCINIFR :
1027         case SIOCRMIFR :
1028         case SIOCADIFR :
1029                 if (!(mode & FWRITE))
1030                         error = EPERM;
1031                 else
1032                         error = frrequest(unit, cmd, (caddr_t)data,
1033                                           1 - ifs->ifs_fr_active, 1, ifs);
1034                 break;
1035         case SIOCSWAPA :
1036                 if (!(mode & FWRITE))
1037                         error = EPERM;
1038                 else {
1039                         WRITE_ENTER(&ifs->ifs_ipf_mutex);
1040                         bzero((char *)ifs->ifs_frcache,
1041                             sizeof (ifs->ifs_frcache));
1042                         error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
1043                                         (caddr_t)data,
1044                                         sizeof(ifs->ifs_fr_active));
1045                         if (error != 0)
1046                                 error = EFAULT;
1047                         else
1048                                 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
1049                         RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
1050                 }
1051                 break;
1052         case SIOCGETFS :
1053                 fr_getstat(&fio, ifs);
1054                 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
1055                 break;
1056         case SIOCFRZST :
1057                 if (!(mode & FWRITE))
1058                         error = EPERM;
1059                 else
1060                         error = fr_zerostats((caddr_t)data, ifs);
1061                 break;
1062         case    SIOCIPFFL :
1063                 if (!(mode & FWRITE))
1064                         error = EPERM;
1065                 else {
1066                         error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1067                                        sizeof(tmp));
1068                         if (!error) {
1069                                 tmp = frflush(unit, 4, tmp, ifs);
1070                                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1071                                                 sizeof(tmp));
1072                                 if (error != 0)
1073                                         error = EFAULT;
1074                         } else
1075                                 error = EFAULT;
1076                 }
1077                 break;
1078 #ifdef USE_INET6
1079         case    SIOCIPFL6 :
1080                 if (!(mode & FWRITE))
1081                         error = EPERM;
1082                 else {
1083                         error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1084                                        sizeof(tmp));
1085                         if (!error) {
1086                                 tmp = frflush(unit, 6, tmp, ifs);
1087                                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1088                                                 sizeof(tmp));
1089                                 if (error != 0)
1090                                         error = EFAULT;
1091                         } else
1092                                 error = EFAULT;
1093                 }
1094                 break;
1095 #endif
1096         case SIOCSTLCK :
1097                 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1098                 if (error == 0) {
1099                         ifs->ifs_fr_state_lock = tmp;
1100                         ifs->ifs_fr_nat_lock = tmp;
1101                         ifs->ifs_fr_frag_lock = tmp;
1102                         ifs->ifs_fr_auth_lock = tmp;
1103                 } else
1104                         error = EFAULT;
1105         break;
1106 #ifdef  IPFILTER_LOG
1107         case    SIOCIPFFB :
1108                 if (!(mode & FWRITE))
1109                         error = EPERM;
1110                 else {
1111                         tmp = ipflog_clear(unit, ifs);
1112                         error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1113                                        sizeof(tmp));
1114                         if (error)
1115                                 error = EFAULT;
1116                 }
1117                 break;
1118 #endif /* IPFILTER_LOG */
1119         case SIOCFRSYN :
1120                 if (!(mode & FWRITE))
1121                         error = EPERM;
1122                 else {
1123                         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1124                         WRITE_ENTER(&ifs->ifs_ipf_global);
1125 
1126                         frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1127                         fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1128                         fr_nataddrsync(0, NULL, NULL, ifs);
1129                         fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1130                         error = 0;
1131                 }
1132                 break;
1133         case SIOCGFRST :
1134                 error = fr_outobj((void *)data, fr_fragstats(ifs),
1135                                   IPFOBJ_FRAGSTAT);
1136                 break;
1137         case FIONREAD :
1138 #ifdef  IPFILTER_LOG
1139                 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
1140 
1141                 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
1142                 if (error != 0)
1143                         error = EFAULT;
1144 #endif
1145                 break;
1146         case SIOCIPFITER :
1147                 error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
1148                                        curproc, ifs);
1149                 break;
1150 
1151         case SIOCGENITER :
1152                 error = ipf_genericiter((caddr_t)data, crgetuid(cp),
1153                                         curproc, ifs);
1154                 break;
1155 
1156         case SIOCIPFDELTOK :
1157                 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1158                 if (error != 0) {
1159                         error = EFAULT;
1160                 } else {
1161                         error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
1162                 }
1163                 break;
1164 
1165         default :
1166 #ifdef  IPFDEBUG
1167                 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
1168                         cmd, (void *)data);
1169 #endif
1170                 error = EINVAL;
1171                 break;
1172         }
1173         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1174         return error;
1175 }
1176 
1177 
1178 static int fr_enableipf(ifs, enable)
1179 ipf_stack_t *ifs;
1180 int enable;
1181 {
1182         int error;
1183 
1184         if (!enable) {
1185                 error = ipldetach(ifs);
1186                 if (error == 0)
1187                         ifs->ifs_fr_running = -1;
1188                 return error;
1189         }
1190 
1191         if (ifs->ifs_fr_running > 0)
1192                 return 0;
1193 
1194         error = iplattach(ifs);
1195         if (error == 0) {
1196                 if (ifs->ifs_fr_timer_id == NULL) {
1197                         int hz = drv_usectohz(500000);
1198 
1199                         ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
1200                                                        (void *)ifs,
1201                                                        hz);
1202                 }
1203                 ifs->ifs_fr_running = 1;
1204         } else {
1205                 (void) ipldetach(ifs);
1206         }
1207         return error;
1208 }
1209 
1210 
1211 phy_if_t get_unit(name, v, ifs)
1212 char *name;
1213 int v;
1214 ipf_stack_t *ifs;
1215 {
1216         net_handle_t nif;
1217  
1218         if (v == 4)
1219                 nif = ifs->ifs_ipf_ipv4;
1220         else if (v == 6)
1221                 nif = ifs->ifs_ipf_ipv6;
1222         else
1223                 return 0;
1224 
1225         return (net_phylookup(nif, name));
1226 }
1227 
1228 /*
1229  * routines below for saving IP headers to buffer
1230  */
1231 /*ARGSUSED*/
1232 int iplopen(devp, flags, otype, cred)
1233 dev_t *devp;
1234 int flags, otype;
1235 cred_t *cred;
1236 {
1237         ipf_devstate_t *isp;
1238         minor_t min = getminor(*devp);
1239         minor_t minor;
1240 
1241 #ifdef  IPFDEBUG
1242         cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
1243 #endif
1244         if (!(otype & OTYP_CHR))
1245                 return ENXIO;
1246 
1247         if (IPL_LOGMAX < min)
1248                 return ENXIO;
1249 
1250         minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
1251             VM_BESTFIT | VM_SLEEP);
1252 
1253         if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
1254                 vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
1255                 return ENXIO;
1256         }
1257 
1258         *devp = makedevice(getmajor(*devp), minor);
1259         isp = ddi_get_soft_state(ipf_state, minor);
1260         VERIFY(isp != NULL);
1261 
1262         isp->ipfs_minor = min;
1263         isp->ipfs_zoneid = IPFS_ZONE_UNSET;
1264 
1265         return 0;
1266 }
1267 
1268 
1269 /*ARGSUSED*/
1270 int iplclose(dev, flags, otype, cred)
1271 dev_t dev;
1272 int flags, otype;
1273 cred_t *cred;
1274 {
1275         minor_t min = getminor(dev);
1276 
1277 #ifdef  IPFDEBUG
1278         cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
1279 #endif
1280 
1281         if (IPL_LOGMAX < min)
1282                 return ENXIO;
1283 
1284         ddi_soft_state_free(ipf_state, min);
1285         vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
1286 
1287         return 0;
1288 }
1289 
1290 #ifdef  IPFILTER_LOG
1291 /*
1292  * iplread/ipllog
1293  * both of these must operate with at least splnet() lest they be
1294  * called during packet processing and cause an inconsistancy to appear in
1295  * the filter lists.
1296  */
1297 /*ARGSUSED*/
1298 int iplread(dev, uio, cp)
1299 dev_t dev;
1300 register struct uio *uio;
1301 cred_t *cp;
1302 {
1303         ipf_stack_t *ifs;
1304         int ret;
1305         minor_t unit;
1306         ipf_devstate_t *isp;
1307 
1308         unit = getminor(dev);
1309         isp = ddi_get_soft_state(ipf_state, unit);
1310         if (isp == NULL)
1311                 return ENXIO;
1312         unit = isp->ipfs_minor;
1313 
1314         /*
1315          * ipf_find_stack returns with a read lock on ifs_ipf_global
1316          */
1317         ifs = ipf_find_stack(crgetzoneid(cp), isp);
1318         if (ifs == NULL)
1319                 return ENXIO;
1320 
1321 # ifdef IPFDEBUG
1322         cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
1323 # endif
1324 
1325         if (ifs->ifs_fr_running < 1) {
1326                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1327                 return EIO;
1328         }
1329 
1330 # ifdef IPFILTER_SYNC
1331         if (unit == IPL_LOGSYNC) {
1332                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1333                 return ipfsync_read(uio);
1334         }
1335 # endif
1336 
1337         ret = ipflog_read(unit, uio, ifs);
1338         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1339         return ret;
1340 }
1341 #endif /* IPFILTER_LOG */
1342 
1343 
1344 /*
1345  * iplread/ipllog
1346  * both of these must operate with at least splnet() lest they be
1347  * called during packet processing and cause an inconsistancy to appear in
1348  * the filter lists.
1349  */
1350 int iplwrite(dev, uio, cp)
1351 dev_t dev;
1352 register struct uio *uio;
1353 cred_t *cp;
1354 {
1355         ipf_stack_t *ifs;
1356         minor_t unit;
1357         ipf_devstate_t *isp;
1358 
1359         unit = getminor(dev);
1360         isp = ddi_get_soft_state(ipf_state, unit);
1361         if (isp == NULL)
1362                 return ENXIO;
1363         unit = isp->ipfs_minor;
1364 
1365         /*
1366          * ipf_find_stack returns with a read lock on ifs_ipf_global
1367          */
1368         ifs = ipf_find_stack(crgetzoneid(cp), isp);
1369         if (ifs == NULL)
1370                 return ENXIO;
1371 
1372 #ifdef  IPFDEBUG
1373         cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
1374 #endif
1375 
1376         if (ifs->ifs_fr_running < 1) {
1377                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1378                 return EIO;
1379         }
1380 
1381 #ifdef  IPFILTER_SYNC
1382         if (getminor(dev) == IPL_LOGSYNC) {
1383                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1384                 return ipfsync_write(uio);
1385         }
1386 #endif /* IPFILTER_SYNC */
1387         dev = dev;      /* LINT */
1388         uio = uio;      /* LINT */
1389         cp = cp;        /* LINT */
1390         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1391         return ENXIO;
1392 }
1393 
1394 
1395 /*
1396  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
1397  * requires a large amount of setting up and isn't any more efficient.
1398  */
1399 int fr_send_reset(fin)
1400 fr_info_t *fin;
1401 {
1402         tcphdr_t *tcp, *tcp2;
1403         int tlen, hlen;
1404         mblk_t *m;
1405 #ifdef  USE_INET6
1406         ip6_t *ip6;
1407 #endif
1408         ip_t *ip;
1409 
1410         tcp = fin->fin_dp;
1411         if (tcp->th_flags & TH_RST)
1412                 return -1;
1413 
1414 #ifndef IPFILTER_CKSUM
1415         if (fr_checkl4sum(fin) == -1)
1416                 return -1;
1417 #endif
1418 
1419         tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
1420 #ifdef  USE_INET6
1421         if (fin->fin_v == 6)
1422                 hlen = sizeof(ip6_t);
1423         else
1424 #endif
1425                 hlen = sizeof(ip_t);
1426         hlen += sizeof(*tcp2);
1427         if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
1428                 return -1;
1429 
1430         m->b_rptr += 64;
1431         MTYPE(m) = M_DATA;
1432         m->b_wptr = m->b_rptr + hlen;
1433         ip = (ip_t *)m->b_rptr;
1434         bzero((char *)ip, hlen);
1435         tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1436         tcp2->th_dport = tcp->th_sport;
1437         tcp2->th_sport = tcp->th_dport;
1438         if (tcp->th_flags & TH_ACK) {
1439                 tcp2->th_seq = tcp->th_ack;
1440                 tcp2->th_flags = TH_RST;
1441         } else {
1442                 tcp2->th_ack = ntohl(tcp->th_seq);
1443                 tcp2->th_ack += tlen;
1444                 tcp2->th_ack = htonl(tcp2->th_ack);
1445                 tcp2->th_flags = TH_RST|TH_ACK;
1446         }
1447         tcp2->th_off = sizeof(struct tcphdr) >> 2;
1448 
1449         ip->ip_v = fin->fin_v;
1450 #ifdef  USE_INET6
1451         if (fin->fin_v == 6) {
1452                 ip6 = (ip6_t *)m->b_rptr;
1453                 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1454                 ip6->ip6_src = fin->fin_dst6.in6;
1455                 ip6->ip6_dst = fin->fin_src6.in6;
1456                 ip6->ip6_plen = htons(sizeof(*tcp));
1457                 ip6->ip6_nxt = IPPROTO_TCP;
1458                 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1459         } else
1460 #endif
1461         {
1462                 ip->ip_src.s_addr = fin->fin_daddr;
1463                 ip->ip_dst.s_addr = fin->fin_saddr;
1464                 ip->ip_id = fr_nextipid(fin);
1465                 ip->ip_hl = sizeof(*ip) >> 2;
1466                 ip->ip_p = IPPROTO_TCP;
1467                 ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1468                 ip->ip_tos = fin->fin_ip->ip_tos;
1469                 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1470         }
1471         return fr_send_ip(fin, m, &m);
1472 }
1473 
1474 /*
1475  * Function:    fr_send_ip
1476  * Returns:      0: success
1477  *              -1: failed
1478  * Parameters:
1479  *      fin: packet information
1480  *      m: the message block where ip head starts
1481  *
1482  * Send a new packet through the IP stack. 
1483  *
1484  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1485  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1486  * function).
1487  *
1488  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1489  * in by this function.
1490  *
1491  * All other portions of the packet must be in on-the-wire format.
1492  */
1493 /*ARGSUSED*/
1494 static int fr_send_ip(fin, m, mpp)
1495 fr_info_t *fin;
1496 mblk_t *m, **mpp;
1497 {
1498         qpktinfo_t qpi, *qpip;
1499         fr_info_t fnew;
1500         ip_t *ip;
1501         int i, hlen;
1502         ipf_stack_t *ifs = fin->fin_ifs;
1503 
1504         ip = (ip_t *)m->b_rptr;
1505         bzero((char *)&fnew, sizeof(fnew));
1506 
1507 #ifdef  USE_INET6
1508         if (fin->fin_v == 6) {
1509                 ip6_t *ip6;
1510 
1511                 ip6 = (ip6_t *)ip;
1512                 ip6->ip6_vfc = 0x60;
1513                 ip6->ip6_hlim = 127;
1514                 fnew.fin_v = 6;
1515                 hlen = sizeof(*ip6);
1516                 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1517         } else
1518 #endif
1519         {
1520                 fnew.fin_v = 4;
1521 #if SOLARIS2 >= 10
1522                 ip->ip_ttl = 255;
1523                 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1524                         ip->ip_off = htons(IP_DF);
1525 #else
1526                 if (ip_ttl_ptr != NULL)
1527                         ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1528                 else
1529                         ip->ip_ttl = 63;
1530                 if (ip_mtudisc != NULL)
1531                         ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1532                 else
1533                         ip->ip_off = htons(IP_DF);
1534 #endif
1535                 /*
1536                  * The dance with byte order and ip_len/ip_off is because in
1537                  * fr_fastroute, it expects them to be in host byte order but
1538                  * ipf_cksum expects them to be in network byte order.
1539                  */
1540                 ip->ip_len = htons(ip->ip_len);
1541                 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1542                 ip->ip_len = ntohs(ip->ip_len);
1543                 ip->ip_off = ntohs(ip->ip_off);
1544                 hlen = sizeof(*ip);
1545                 fnew.fin_plen = ip->ip_len;
1546         }
1547 
1548         qpip = fin->fin_qpi;
1549         qpi.qpi_off = 0;
1550         qpi.qpi_ill = qpip->qpi_ill;
1551         qpi.qpi_m = m;
1552         qpi.qpi_data = ip;
1553         fnew.fin_qpi = &qpi;
1554         fnew.fin_ifp = fin->fin_ifp;
1555         fnew.fin_flx = FI_NOCKSUM;
1556         fnew.fin_m = m;
1557         fnew.fin_qfm = m;
1558         fnew.fin_ip = ip;
1559         fnew.fin_mp = mpp;
1560         fnew.fin_hlen = hlen;
1561         fnew.fin_dp = (char *)ip + hlen;
1562         fnew.fin_ifs = fin->fin_ifs;
1563         (void) fr_makefrip(hlen, ip, &fnew);
1564 
1565         i = fr_fastroute(m, mpp, &fnew, NULL);
1566         return i;
1567 }
1568 
1569 
1570 int fr_send_icmp_err(type, fin, dst)
1571 int type;
1572 fr_info_t *fin;
1573 int dst;
1574 {
1575         struct in_addr dst4;
1576         struct icmp *icmp;
1577         qpktinfo_t *qpi;
1578         int hlen, code;
1579         phy_if_t phy;
1580         u_short sz;
1581 #ifdef  USE_INET6
1582         mblk_t *mb;
1583 #endif
1584         mblk_t *m;
1585 #ifdef  USE_INET6
1586         ip6_t *ip6;
1587 #endif
1588         ip_t *ip;
1589         ipf_stack_t *ifs = fin->fin_ifs;
1590 
1591         if ((type < 0) || (type > ICMP_MAXTYPE))
1592                 return -1;
1593 
1594         code = fin->fin_icode;
1595 #ifdef USE_INET6
1596         if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1597                 return -1;
1598 #endif
1599 
1600 #ifndef IPFILTER_CKSUM
1601         if (fr_checkl4sum(fin) == -1)
1602                 return -1;
1603 #endif
1604 
1605         qpi = fin->fin_qpi;
1606 
1607 #ifdef  USE_INET6
1608         mb = fin->fin_qfm;
1609 
1610         if (fin->fin_v == 6) {
1611                 sz = sizeof(ip6_t);
1612                 sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1613                 hlen = sizeof(ip6_t);
1614                 type = icmptoicmp6types[type];
1615                 if (type == ICMP6_DST_UNREACH)
1616                         code = icmptoicmp6unreach[code];
1617         } else
1618 #endif
1619         {
1620                 if ((fin->fin_p == IPPROTO_ICMP) &&
1621                     !(fin->fin_flx & FI_SHORT))
1622                         switch (ntohs(fin->fin_data[0]) >> 8)
1623                         {
1624                         case ICMP_ECHO :
1625                         case ICMP_TSTAMP :
1626                         case ICMP_IREQ :
1627                         case ICMP_MASKREQ :
1628                                 break;
1629                         default :
1630                                 return 0;
1631                         }
1632 
1633                 sz = sizeof(ip_t) * 2;
1634                 sz += 8;                /* 64 bits of data */
1635                 hlen = sizeof(ip_t);
1636         }
1637 
1638         sz += offsetof(struct icmp, icmp_ip);
1639         if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1640                 return -1;
1641         MTYPE(m) = M_DATA;
1642         m->b_rptr += 64;
1643         m->b_wptr = m->b_rptr + sz;
1644         bzero((char *)m->b_rptr, (size_t)sz);
1645         ip = (ip_t *)m->b_rptr;
1646         ip->ip_v = fin->fin_v;
1647         icmp = (struct icmp *)(m->b_rptr + hlen);
1648         icmp->icmp_type = type & 0xff;
1649         icmp->icmp_code = code & 0xff;
1650         phy = (phy_if_t)qpi->qpi_ill; 
1651         if (type == ICMP_UNREACH && (phy != 0) && 
1652             fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1653                 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1654 
1655 #ifdef  USE_INET6
1656         if (fin->fin_v == 6) {
1657                 struct in6_addr dst6;
1658                 int csz;
1659 
1660                 if (dst == 0) {
1661                         ipf_stack_t *ifs = fin->fin_ifs;
1662 
1663                         if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1664                                        (void *)&dst6, NULL, ifs) == -1) {
1665                                 FREE_MB_T(m);
1666                                 return -1;
1667                         }
1668                 } else
1669                         dst6 = fin->fin_dst6.in6;
1670 
1671                 csz = sz;
1672                 sz -= sizeof(ip6_t);
1673                 ip6 = (ip6_t *)m->b_rptr;
1674                 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1675                 ip6->ip6_plen = htons((u_short)sz);
1676                 ip6->ip6_nxt = IPPROTO_ICMPV6;
1677                 ip6->ip6_src = dst6;
1678                 ip6->ip6_dst = fin->fin_src6.in6;
1679                 sz -= offsetof(struct icmp, icmp_ip);
1680                 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1681                 icmp->icmp_cksum = csz - sizeof(ip6_t);
1682         } else
1683 #endif
1684         {
1685                 ip->ip_hl = sizeof(*ip) >> 2;
1686                 ip->ip_p = IPPROTO_ICMP;
1687                 ip->ip_id = fin->fin_ip->ip_id;
1688                 ip->ip_tos = fin->fin_ip->ip_tos;
1689                 ip->ip_len = (u_short)sz;
1690                 if (dst == 0) {
1691                         ipf_stack_t *ifs = fin->fin_ifs;
1692 
1693                         if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1694                                        (void *)&dst4, NULL, ifs) == -1) {
1695                                 FREE_MB_T(m);
1696                                 return -1;
1697                         }
1698                 } else {
1699                         dst4 = fin->fin_dst;
1700                 }
1701                 ip->ip_src = dst4;
1702                 ip->ip_dst = fin->fin_src;
1703                 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1704                       sizeof(*fin->fin_ip));
1705                 bcopy((char *)fin->fin_ip + fin->fin_hlen,
1706                       (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1707                 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1708                 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1709                 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1710                                              sz - sizeof(ip_t));
1711         }
1712 
1713         /*
1714          * Need to exit out of these so we don't recursively call rw_enter
1715          * from fr_qout.
1716          */
1717         return fr_send_ip(fin, m, &m);
1718 }
1719 
1720 #include <sys/time.h>
1721 #include <sys/varargs.h>
1722 
1723 #ifndef _KERNEL
1724 #include <stdio.h>
1725 #endif
1726 
1727 /*
1728  * Return the first IP Address associated with an interface
1729  * For IPv6, we walk through the list of logical interfaces and return
1730  * the address of the first one that isn't a link-local interface.
1731  * We can't assume that it is :1 because another link-local address
1732  * may have been assigned there.
1733  */
1734 /*ARGSUSED*/
1735 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1736 int v, atype;
1737 void *ifptr;
1738 struct in_addr  *inp, *inpmask;
1739 ipf_stack_t *ifs;
1740 {
1741         struct sockaddr_in6 v6addr[2];
1742         struct sockaddr_in v4addr[2];
1743         net_ifaddr_t type[2];
1744         net_handle_t net_data;
1745         phy_if_t phyif;
1746         void *array;
1747 
1748         switch (v)
1749         {
1750         case 4:
1751                 net_data = ifs->ifs_ipf_ipv4;
1752                 array = v4addr;
1753                 break;
1754         case 6:
1755                 net_data = ifs->ifs_ipf_ipv6;
1756                 array = v6addr;
1757                 break;
1758         default:
1759                 net_data = NULL;
1760                 break;
1761         }
1762 
1763         if (net_data == NULL)
1764                 return -1;
1765 
1766         phyif = (phy_if_t)ifptr;
1767 
1768         switch (atype)
1769         {
1770         case FRI_PEERADDR :
1771                 type[0] = NA_PEER;
1772                 break;
1773 
1774         case FRI_BROADCAST :
1775                 type[0] = NA_BROADCAST;
1776                 break;
1777 
1778         default :
1779                 type[0] = NA_ADDRESS;
1780                 break;
1781         }
1782 
1783         type[1] = NA_NETMASK;
1784 
1785         if (v == 6) {
1786                 lif_if_t idx = 0;
1787 
1788                 do {
1789                         idx = net_lifgetnext(net_data, phyif, idx);
1790                         if (net_getlifaddr(net_data, phyif, idx, 2, type,
1791                                            array) < 0)
1792                                 return -1;
1793                         if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1794                             !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1795                                 break;
1796                 } while (idx != 0);
1797 
1798                 if (idx == 0)
1799                         return -1;
1800 
1801                 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1802                                         inp, inpmask);
1803         }
1804 
1805         if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1806                 return -1;
1807 
1808         return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1809 }
1810 
1811 
1812 u_32_t fr_newisn(fin)
1813 fr_info_t *fin;
1814 {
1815         static int iss_seq_off = 0;
1816         u_char hash[16];
1817         u_32_t newiss;
1818         MD5_CTX ctx;
1819         ipf_stack_t *ifs = fin->fin_ifs;
1820 
1821         /*
1822          * Compute the base value of the ISS.  It is a hash
1823          * of (saddr, sport, daddr, dport, secret).
1824          */
1825         MD5Init(&ctx);
1826 
1827         MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1828                   sizeof(fin->fin_fi.fi_src));
1829         MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1830                   sizeof(fin->fin_fi.fi_dst));
1831         MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1832 
1833         MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1834 
1835         MD5Final(hash, &ctx);
1836 
1837         bcopy(hash, &newiss, sizeof(newiss));
1838 
1839         /*
1840          * Now increment our "timer", and add it in to
1841          * the computed value.
1842          *
1843          * XXX Use `addin'?
1844          * XXX TCP_ISSINCR too large to use?
1845          */
1846         iss_seq_off += 0x00010000;
1847         newiss += iss_seq_off;
1848         return newiss;
1849 }
1850 
1851 
1852 /* ------------------------------------------------------------------------ */
1853 /* Function:    fr_nextipid                                                 */
1854 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
1855 /* Parameters:  fin(I) - pointer to packet information                      */
1856 /*                                                                          */
1857 /* Returns the next IPv4 ID to use for this packet.                         */
1858 /* ------------------------------------------------------------------------ */
1859 u_short fr_nextipid(fin)
1860 fr_info_t *fin;
1861 {
1862         static u_short ipid = 0;
1863         u_short id;
1864         ipf_stack_t *ifs = fin->fin_ifs;
1865 
1866         MUTEX_ENTER(&ifs->ifs_ipf_rw);
1867         if (fin->fin_pktnum != 0) {
1868                 id = fin->fin_pktnum & 0xffff;
1869         } else {
1870                 id = ipid++;
1871         }
1872         MUTEX_EXIT(&ifs->ifs_ipf_rw);
1873 
1874         return id;
1875 }
1876 
1877 
1878 #ifndef IPFILTER_CKSUM
1879 /* ARGSUSED */
1880 #endif
1881 INLINE void fr_checkv4sum(fin)
1882 fr_info_t *fin;
1883 {
1884 #ifdef IPFILTER_CKSUM
1885         if (fr_checkl4sum(fin) == -1)
1886                 fin->fin_flx |= FI_BAD;
1887 #endif
1888 }
1889 
1890 
1891 #ifdef USE_INET6
1892 # ifndef IPFILTER_CKSUM
1893 /* ARGSUSED */
1894 # endif
1895 INLINE void fr_checkv6sum(fin)
1896 fr_info_t *fin;
1897 {
1898 # ifdef IPFILTER_CKSUM
1899         if (fr_checkl4sum(fin) == -1)
1900                 fin->fin_flx |= FI_BAD;
1901 # endif
1902 }
1903 #endif /* USE_INET6 */
1904 
1905 
1906 #if (SOLARIS2 < 7)
1907 void fr_slowtimer()
1908 #else
1909 /*ARGSUSED*/
1910 void fr_slowtimer __P((void *arg))
1911 #endif
1912 {
1913         ipf_stack_t *ifs = arg;
1914 
1915         READ_ENTER(&ifs->ifs_ipf_global);
1916         if (ifs->ifs_fr_running != 1) {
1917                 ifs->ifs_fr_timer_id = NULL;
1918                 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1919                 return;
1920         }
1921         ipf_expiretokens(ifs);
1922         fr_fragexpire(ifs);
1923         fr_timeoutstate(ifs);
1924         fr_natexpire(ifs);
1925         fr_authexpire(ifs);
1926         ifs->ifs_fr_ticks++;
1927         if (ifs->ifs_fr_running == 1)
1928                 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1929                     drv_usectohz(500000));
1930         else
1931                 ifs->ifs_fr_timer_id = NULL;
1932         RWLOCK_EXIT(&ifs->ifs_ipf_global);
1933 }
1934 
1935 
1936 /* ------------------------------------------------------------------------ */
1937 /* Function:    fr_pullup                                                   */
1938 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
1939 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
1940 /*              fin(I) - pointer to packet information                      */
1941 /*              len(I) - number of bytes to pullup                          */
1942 /*                                                                          */
1943 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1944 /* single buffer for ease of access.  Operating system native functions are */
1945 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
1946 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
1947 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
1948 /* and ONLY if the pullup succeeds.                                         */
1949 /*                                                                          */
1950 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
1951 /* of buffers that starts at *fin->fin_mp.                                  */
1952 /* ------------------------------------------------------------------------ */
1953 void *fr_pullup(min, fin, len)
1954 mb_t *min;
1955 fr_info_t *fin;
1956 int len;
1957 {
1958         qpktinfo_t *qpi = fin->fin_qpi;
1959         int out = fin->fin_out, dpoff, ipoff;
1960         mb_t *m = min, *m1, *m2;
1961         char *ip;
1962         uint32_t start, stuff, end, value, flags;
1963         ipf_stack_t *ifs = fin->fin_ifs;
1964 
1965         if (m == NULL)
1966                 return NULL;
1967 
1968         ip = (char *)fin->fin_ip;
1969         if ((fin->fin_flx & FI_COALESCE) != 0)
1970                 return ip;
1971 
1972         ipoff = fin->fin_ipoff;
1973         if (fin->fin_dp != NULL)
1974                 dpoff = (char *)fin->fin_dp - (char *)ip;
1975         else
1976                 dpoff = 0;
1977 
1978         if (M_LEN(m) < len + ipoff) {
1979 
1980                 /*
1981                  * pfil_precheck ensures the IP header is on a 32bit
1982                  * aligned address so simply fail if that isn't currently
1983                  * the case (should never happen).
1984                  */
1985                 int inc = 0;
1986 
1987                 if (ipoff > 0) {
1988                         if ((ipoff & 3) != 0) {
1989                                 inc = 4 - (ipoff & 3);
1990                                 if (m->b_rptr - inc >= m->b_datap->db_base)
1991                                         m->b_rptr -= inc;
1992                                 else
1993                                         inc = 0;
1994                         }
1995                 }
1996 
1997                 /*
1998                  * XXX This is here as a work around for a bug with DEBUG
1999                  * XXX Solaris kernels.  The problem is b_prev is used by IP
2000                  * XXX code as a way to stash the phyint_index for a packet,
2001                  * XXX this doesn't get reset by IP but freeb does an ASSERT()
2002                  * XXX for both of these to be NULL.  See 6442390.
2003                  */
2004                 m1 = m;
2005                 m2 = m->b_prev;
2006 
2007                 do {
2008                         m1->b_next = NULL;
2009                         m1->b_prev = NULL;
2010                         m1 = m1->b_cont;
2011                 } while (m1);
2012 
2013                 /*
2014                  * Need to preserve checksum information by copying them
2015                  * to newmp which heads the pulluped message.
2016                  */
2017                 mac_hcksum_get(m, &start, &stuff, &end, &value, &flags);
2018 
2019                 if (pullupmsg(m, len + ipoff + inc) == 0) {
2020                         ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
2021                         FREE_MB_T(*fin->fin_mp);
2022                         *fin->fin_mp = NULL;
2023                         fin->fin_m = NULL;
2024                         fin->fin_ip = NULL;
2025                         fin->fin_dp = NULL;
2026                         qpi->qpi_data = NULL;
2027                         return NULL;
2028                 }
2029 
2030                 mac_hcksum_set(m, start, stuff, end, value, flags);
2031 
2032                 m->b_prev = m2;
2033                 m->b_rptr += inc;
2034                 fin->fin_m = m;
2035                 ip = MTOD(m, char *) + ipoff;
2036                 qpi->qpi_data = ip;
2037         }
2038 
2039         ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
2040         fin->fin_ip = (ip_t *)ip;
2041         if (fin->fin_dp != NULL)
2042                 fin->fin_dp = (char *)fin->fin_ip + dpoff;
2043 
2044         if (len == fin->fin_plen)
2045                 fin->fin_flx |= FI_COALESCE;
2046         return ip;
2047 }
2048 
2049 
2050 /*
2051  * Function:    fr_verifysrc
2052  * Returns:     int (really boolean)
2053  * Parameters:  fin - packet information
2054  *
2055  * Check whether the packet has a valid source address for the interface on
2056  * which the packet arrived, implementing the "fr_chksrc" feature.
2057  * Returns true iff the packet's source address is valid.
2058  */
2059 int fr_verifysrc(fin)
2060 fr_info_t *fin;
2061 {
2062         net_handle_t net_data_p;
2063         phy_if_t phy_ifdata_routeto;
2064         struct sockaddr sin;
2065         ipf_stack_t *ifs = fin->fin_ifs;
2066 
2067         if (fin->fin_v == 4) { 
2068                 net_data_p = ifs->ifs_ipf_ipv4;
2069         } else if (fin->fin_v == 6) { 
2070                 net_data_p = ifs->ifs_ipf_ipv6;
2071         } else { 
2072                 return (0); 
2073         }
2074 
2075         /* Get the index corresponding to the if name */
2076         sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2077         bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
2078         phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
2079 
2080         return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0); 
2081 }
2082 
2083 /*
2084  * Return true only if forwarding is enabled on the interface.
2085  */
2086 static int
2087 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp)
2088 {
2089         lif_if_t lif;
2090 
2091         for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0;
2092             lif = net_lifgetnext(ndp, phyif, lif)) {
2093                 int res;
2094                 uint64_t flags;
2095 
2096                 res = net_getlifflags(ndp, phyif, lif, &flags);
2097                 if (res != 0)
2098                         return (0);
2099                 if (flags & IFF_ROUTER)
2100                         return (1);
2101         }
2102 
2103         return (0);
2104 }
2105 
2106 /*
2107  * Function:    fr_fastroute
2108  * Returns:      0: success;
2109  *              -1: failed
2110  * Parameters:
2111  *      mb: the message block where ip head starts
2112  *      mpp: the pointer to the pointer of the orignal
2113  *              packet message
2114  *      fin: packet information
2115  *      fdp: destination interface information
2116  *      if it is NULL, no interface information provided.
2117  *
2118  * This function is for fastroute/to/dup-to rules. It calls
2119  * pfil_make_lay2_packet to search route, make lay-2 header
2120  * ,and identify output queue for the IP packet.
2121  * The destination address depends on the following conditions:
2122  * 1: for fastroute rule, fdp is passed in as NULL, so the
2123  *      destination address is the IP Packet's destination address
2124  * 2: for to/dup-to rule, if an ip address is specified after
2125  *      the interface name, this address is the as destination
2126  *      address. Otherwise IP Packet's destination address is used
2127  */
2128 int fr_fastroute(mb, mpp, fin, fdp)
2129 mblk_t *mb, **mpp;
2130 fr_info_t *fin;
2131 frdest_t *fdp;
2132 {
2133         net_handle_t net_data_p;
2134         net_inject_t *inj;
2135         mblk_t *mp = NULL;
2136         frentry_t *fr = fin->fin_fr;
2137         qpktinfo_t *qpi;
2138         ip_t *ip;
2139 
2140         struct sockaddr_in *sin;
2141         struct sockaddr_in6 *sin6;
2142         struct sockaddr *sinp;
2143         ipf_stack_t *ifs = fin->fin_ifs;
2144 #ifndef sparc
2145         u_short __iplen, __ipoff;
2146 #endif
2147 
2148         if (fin->fin_v == 4) {
2149                 net_data_p = ifs->ifs_ipf_ipv4;
2150         } else if (fin->fin_v == 6) {
2151                 net_data_p = ifs->ifs_ipf_ipv6;
2152         } else {
2153                 return (-1);
2154         }
2155 
2156         /*
2157          * If we're forwarding (vs. injecting), check the src here, fin_ifp is
2158          * the src interface.
2159          */
2160         if (fdp != NULL &&
2161            !fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p))
2162                 return (-1);
2163 
2164         inj = net_inject_alloc(NETINFO_VERSION);
2165         if (inj == NULL)
2166                 return -1;
2167 
2168         ip = fin->fin_ip;
2169         qpi = fin->fin_qpi;
2170 
2171         /*
2172          * If this is a duplicate mblk then we want ip to point at that
2173          * data, not the original, if and only if it is already pointing at
2174          * the current mblk data.
2175          *
2176          * Otherwise, if it's not a duplicate, and we're not already pointing
2177          * at the current mblk data, then we want to ensure that the data
2178          * points at ip.
2179          */
2180 
2181         if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
2182                 ip = (ip_t *)mb->b_rptr;
2183         } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
2184                 qpi->qpi_m->b_rptr = (uchar_t *)ip;
2185                 qpi->qpi_off = 0;
2186         }
2187 
2188         /*
2189          * If there is another M_PROTO, we don't want it
2190          */
2191         if (*mpp != mb) {
2192                 mp = unlinkb(*mpp);
2193                 freeb(*mpp);
2194                 *mpp = mp;
2195         }
2196 
2197         sinp = (struct sockaddr *)&inj->ni_addr;
2198         sin = (struct sockaddr_in *)sinp;
2199         sin6 = (struct sockaddr_in6 *)sinp;
2200         bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
2201         inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2202         inj->ni_packet = mb;
2203 
2204         /*
2205          * In case we're here due to "to <if>" being used with
2206          * "keep state", check that we're going in the correct
2207          * direction.
2208          */
2209         if (fdp != NULL) {
2210                 if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
2211                         (fin->fin_rev != 0) && (fdp == &fr->fr_tif))
2212                         goto bad_fastroute;
2213                 inj->ni_physical = (phy_if_t)fdp->fd_ifp;
2214                 if (fin->fin_v == 4) {
2215                         sin->sin_addr = fdp->fd_ip;
2216                 } else {
2217                         sin6->sin6_addr = fdp->fd_ip6.in6;
2218                 }
2219         } else {
2220                 if (fin->fin_v == 4) {
2221                         sin->sin_addr = ip->ip_dst;
2222                 } else {
2223                         sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
2224                 }
2225                 inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
2226         }
2227 
2228         /* If we're forwarding (vs. injecting), check the destinatation here. */
2229         if (fdp != NULL && !fr_forwarding_enabled(inj->ni_physical, net_data_p))
2230                 goto bad_fastroute;
2231 
2232         /*
2233          * Clear the hardware checksum flags from packets that we are doing
2234          * input processing on as leaving them set will cause the outgoing
2235          * NIC (if it supports hardware checksum) to calculate them anew,
2236          * using the old (correct) checksums as the pseudo value to start
2237          * from.
2238          */
2239         if (fin->fin_out == 0) {
2240                 DB_CKSUMFLAGS(mb) = 0;
2241         }
2242 
2243         *mpp = mb;
2244 
2245         if (fin->fin_out == 0) {
2246                 void *saveifp;
2247                 u_32_t pass;
2248 
2249                 saveifp = fin->fin_ifp;
2250                 fin->fin_ifp = (void *)inj->ni_physical;
2251                 fin->fin_flx &= ~FI_STATE;
2252                 fin->fin_out = 1;
2253                 (void) fr_acctpkt(fin, &pass);
2254                 fin->fin_fr = NULL;
2255                 if (!fr || !(fr->fr_flags & FR_RETMASK))
2256                         (void) fr_checkstate(fin, &pass);
2257                 if (fr_checknatout(fin, NULL) == -1)
2258                         goto bad_fastroute;
2259                 fin->fin_out = 0;
2260                 fin->fin_ifp = saveifp;
2261         }
2262 #ifndef sparc
2263         if (fin->fin_v == 4) {
2264                 __iplen = (u_short)ip->ip_len,
2265                 __ipoff = (u_short)ip->ip_off;
2266 
2267                 ip->ip_len = htons(__iplen);
2268                 ip->ip_off = htons(__ipoff);
2269         }
2270 #endif
2271 
2272         if (net_data_p) {
2273                 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
2274                         net_inject_free(inj);
2275                         return (-1);
2276                 }
2277         }
2278 
2279         ifs->ifs_fr_frouteok[0]++;
2280         net_inject_free(inj);
2281         return 0;
2282 bad_fastroute:
2283         net_inject_free(inj);
2284         freemsg(mb);
2285         ifs->ifs_fr_frouteok[1]++;
2286         return -1;
2287 }
2288 
2289 
2290 /* ------------------------------------------------------------------------ */
2291 /* Function:    ipf_hook4_out                                               */
2292 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2293 /* Parameters:  event(I)     - pointer to event                             */
2294 /*              info(I)      - pointer to hook information for firewalling  */
2295 /*                                                                          */
2296 /* Calling ipf_hook.                                                        */
2297 /* ------------------------------------------------------------------------ */
2298 /*ARGSUSED*/
2299 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
2300 {
2301         return ipf_hook(info, 1, 0, arg);
2302 }
2303 /*ARGSUSED*/
2304 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
2305 {
2306         return ipf_hook6(info, 1, 0, arg);
2307 }
2308 
2309 /* ------------------------------------------------------------------------ */
2310 /* Function:    ipf_hook4_in                                                */
2311 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2312 /* Parameters:  event(I)     - pointer to event                             */
2313 /*              info(I)      - pointer to hook information for firewalling  */
2314 /*                                                                          */
2315 /* Calling ipf_hook.                                                        */
2316 /* ------------------------------------------------------------------------ */
2317 /*ARGSUSED*/
2318 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
2319 {
2320         return ipf_hook(info, 0, 0, arg);
2321 }
2322 /*ARGSUSED*/
2323 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
2324 {
2325         return ipf_hook6(info, 0, 0, arg);
2326 }
2327 
2328 
2329 /* ------------------------------------------------------------------------ */
2330 /* Function:    ipf_hook4_loop_out                                          */
2331 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2332 /* Parameters:  event(I)     - pointer to event                             */
2333 /*              info(I)      - pointer to hook information for firewalling  */
2334 /*                                                                          */
2335 /* Calling ipf_hook.                                                        */
2336 /* ------------------------------------------------------------------------ */
2337 /*ARGSUSED*/
2338 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2339 {
2340         return ipf_hook(info, 1, FI_NOCKSUM, arg);
2341 }
2342 /*ARGSUSED*/
2343 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2344 {
2345         return ipf_hook6(info, 1, FI_NOCKSUM, arg);
2346 }
2347 
2348 /* ------------------------------------------------------------------------ */
2349 /* Function:    ipf_hookvndl3_in                                            */
2350 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2351 /* Parameters:  event(I)     - pointer to event                             */
2352 /*              info(I)      - pointer to hook information for firewalling  */
2353 /*                                                                          */
2354 /* The vnd hooks are private hooks to ON. They represents a layer 2         */
2355 /* datapath generally used to implement virtual machines. The driver sends  */
2356 /* along L3 packets of either type IP or IPv6. The ethertype to distinguish */
2357 /* them is in the upper 16 bits while the remaining bits are the            */
2358 /* traditional packet hook flags.                                           */
2359 /*                                                                          */
2360 /* They end up calling the appropriate traditional ip hooks.                */
2361 /* ------------------------------------------------------------------------ */
2362 /*ARGSUSED*/
2363 int ipf_hookvndl3v4_in(hook_event_token_t token, hook_data_t info, void *arg)
2364 {
2365         return ipf_hook4_in(token, info, arg);
2366 }
2367 
2368 int ipf_hookvndl3v6_in(hook_event_token_t token, hook_data_t info, void *arg)
2369 {
2370         return ipf_hook6_in(token, info, arg);
2371 }
2372 
2373 /*ARGSUSED*/
2374 int ipf_hookvndl3v4_out(hook_event_token_t token, hook_data_t info, void *arg)
2375 {
2376         return ipf_hook4_out(token, info, arg);
2377 }
2378 
2379 int ipf_hookvndl3v6_out(hook_event_token_t token, hook_data_t info, void *arg)
2380 {
2381         return ipf_hook6_out(token, info, arg);
2382 }
2383 
2384 /* Static constants used by ipf_hook_ether */
2385 static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = {
2386         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
2387 };
2388 static uint8_t ipf_eth_ipv4_mcast[3] = { 0x01, 0x00, 0x5E };
2389 static uint8_t ipf_eth_ipv6_mcast[2] = { 0x33, 0x33 };
2390 
2391 /* ------------------------------------------------------------------------ */
2392 /* Function:    ipf_hook_ether                                              */
2393 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2394 /* Parameters:  token(I)     - pointer to event                             */
2395 /*              info(I)      - pointer to hook information for firewalling  */
2396 /*                                                                          */
2397 /* The ipf_hook_ether hook is currently private to illumos.  It represents  */
2398 /* a layer 2 datapath generally used by virtual machines.  Currently the    */
2399 /* hook is only used by the viona driver to pass along L2 frames for        */
2400 /* inspection.  It requires that the L2 ethernet header is contained within */
2401 /* a single dblk_t (however layers above the L2 header have no restrctions  */
2402 /* in ipf).  ipf does not currently support filtering on L2 fields (e.g.    */
2403 /* filtering on a MAC address or ethertype), however virtual machines do    */
2404 /* not have native IP stack instances where ipf traditionally hooks in.     */
2405 /* Instead this entry point is used to determine if the packet is unicast,  */
2406 /* broadcast, or multicast. The IPv4 or IPv6 packet is then passed to the   */
2407 /* traditional ip hooks for filtering.  Non IPv4 or non IPv6 packets are    */
2408 /* not subject to examination.                                              */
2409 /* ------------------------------------------------------------------------ */
2410 int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg,
2411     boolean_t out)
2412 {
2413         struct ether_header *ethp;
2414         hook_pkt_event_t *hpe = (hook_pkt_event_t *)info;
2415         mblk_t *mp;
2416         size_t offset, len;
2417         uint16_t etype;
2418         boolean_t v6;
2419 
2420         /*
2421          * viona will only pass us mblks with the L2 header contained in a
2422          * single data block.
2423          */
2424         mp = *hpe->hpe_mp;
2425         len = MBLKL(mp);
2426 
2427         VERIFY3S(len, >=, sizeof (struct ether_header));
2428 
2429         ethp = (struct ether_header *)mp->b_rptr;
2430         if ((etype = ntohs(ethp->ether_type)) == ETHERTYPE_VLAN) {
2431                 struct ether_vlan_header *evh =
2432                     (struct ether_vlan_header *)ethp;
2433 
2434                 VERIFY3S(len, >=, sizeof (struct ether_vlan_header));
2435 
2436                 etype = ntohs(evh->ether_type);
2437                 offset = sizeof (*evh);
2438         } else {
2439                 offset = sizeof (*ethp);
2440         }
2441 
2442         /*
2443          * ipf only support filtering IPv4 and IPv6.  Ignore other types.
2444          */
2445         if (etype == ETHERTYPE_IP)
2446                 v6 = B_FALSE;
2447         else if (etype == ETHERTYPE_IPV6)
2448                 v6 = B_TRUE;
2449         else
2450                 return (0);
2451 
2452         if (bcmp(ipf_eth_bcast_addr, ethp, ETHERADDRL) == 0)
2453                 hpe->hpe_flags |= HPE_BROADCAST;
2454         else if (bcmp(ipf_eth_ipv4_mcast, ethp,
2455             sizeof (ipf_eth_ipv4_mcast)) == 0)
2456                 hpe->hpe_flags |= HPE_MULTICAST;
2457         else if (bcmp(ipf_eth_ipv6_mcast, ethp,
2458             sizeof (ipf_eth_ipv6_mcast)) == 0)
2459                 hpe->hpe_flags |= HPE_MULTICAST;
2460 
2461         /* Find the start of the IPv4 or IPv6 header */
2462         for (; offset >= len; len = MBLKL(mp)) {
2463                 offset -= len;
2464                 mp = mp->b_cont;
2465                 if (mp == NULL) {
2466                         freemsg(*hpe->hpe_mp);
2467                         *hpe->hpe_mp = NULL;
2468                         return (-1);
2469                 }
2470         }
2471         hpe->hpe_mb = mp;
2472         hpe->hpe_hdr = mp->b_rptr + offset;
2473 
2474         return (v6 ? ipf_hook6(info, out, 0, arg) :
2475             ipf_hook(info, out, 0, arg));
2476 }
2477 
2478 /* ------------------------------------------------------------------------ */
2479 /* Function:    ipf_hookviona_{in,out}                                      */
2480 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2481 /* Parameters:  event(I)     - pointer to event                             */
2482 /*              info(I)      - pointer to hook information for firewalling  */
2483 /*                                                                          */
2484 /* The viona hooks are private hooks to illumos. They represents a layer 2  */
2485 /* datapath generally used to implement virtual machines.                   */
2486 /* along L2 packets.                                                        */
2487 /*                                                                          */
2488 /* They end up calling the appropriate traditional ip hooks.                */
2489 /* ------------------------------------------------------------------------ */
2490 int
2491 ipf_hookviona_in(hook_event_token_t token, hook_data_t info, void *arg)
2492 {
2493         return (ipf_hook_ether(token, info, arg, B_FALSE));
2494 }
2495 
2496 int
2497 ipf_hookviona_out(hook_event_token_t token, hook_data_t info, void *arg)
2498 {
2499         return (ipf_hook_ether(token, info, arg, B_TRUE));
2500 }
2501 
2502 /* ------------------------------------------------------------------------ */
2503 /* Function:    ipf_hook4_loop_in                                           */
2504 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2505 /* Parameters:  event(I)     - pointer to event                             */
2506 /*              info(I)      - pointer to hook information for firewalling  */
2507 /*                                                                          */
2508 /* Calling ipf_hook.                                                        */
2509 /* ------------------------------------------------------------------------ */
2510 /*ARGSUSED*/
2511 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2512 {
2513         return ipf_hook(info, 0, FI_NOCKSUM, arg);
2514 }
2515 /*ARGSUSED*/
2516 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2517 {
2518         return ipf_hook6(info, 0, FI_NOCKSUM, arg);
2519 }
2520 
2521 /* ------------------------------------------------------------------------ */
2522 /* Function:    ipf_hook                                                    */
2523 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2524 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
2525 /*              out(I)       - whether packet is going in or out            */
2526 /*              loopback(I)  - whether packet is a loopback packet or not   */
2527 /*                                                                          */
2528 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
2529 /* parameters out of the info structure and forms them up to be useful for  */
2530 /* calling ipfilter.                                                        */
2531 /* ------------------------------------------------------------------------ */
2532 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
2533 {
2534         hook_pkt_event_t *fw;
2535         ipf_stack_t *ifs;
2536         qpktinfo_t qpi;
2537         int rval, hlen;
2538         u_short swap;
2539         phy_if_t phy; 
2540         ip_t *ip;
2541 
2542         ifs = arg;
2543         fw = (hook_pkt_event_t *)info;
2544 
2545         ASSERT(fw != NULL);
2546         phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2547 
2548         ip = fw->hpe_hdr;
2549         swap = ntohs(ip->ip_len);
2550         ip->ip_len = swap;
2551         swap = ntohs(ip->ip_off);
2552         ip->ip_off = swap;
2553         hlen = IPH_HDR_LENGTH(ip);
2554 
2555         qpi.qpi_m = fw->hpe_mb;
2556         qpi.qpi_data = fw->hpe_hdr;
2557         qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2558         qpi.qpi_ill = (void *)phy;
2559         qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2560         if (qpi.qpi_flags)
2561                 qpi.qpi_flags |= FI_MBCAST;
2562         qpi.qpi_flags |= loopback;
2563 
2564         rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2565             &qpi, fw->hpe_mp, ifs);
2566 
2567         /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2568         if (rval == 0 && *(fw->hpe_mp) == NULL)
2569                 rval = 1;
2570 
2571         /* Notify IP the packet mblk_t and IP header pointers. */
2572         fw->hpe_mb = qpi.qpi_m;
2573         fw->hpe_hdr = qpi.qpi_data;
2574         if (rval == 0) {
2575                 ip = qpi.qpi_data;
2576                 swap = ntohs(ip->ip_len);
2577                 ip->ip_len = swap;
2578                 swap = ntohs(ip->ip_off);
2579                 ip->ip_off = swap;
2580         }
2581         return rval;
2582 
2583 }
2584 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2585 {
2586         hook_pkt_event_t *fw;
2587         int rval, hlen;
2588         qpktinfo_t qpi;
2589         phy_if_t phy; 
2590 
2591         fw = (hook_pkt_event_t *)info;
2592 
2593         ASSERT(fw != NULL);
2594         phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2595 
2596         hlen = sizeof (ip6_t);
2597 
2598         qpi.qpi_m = fw->hpe_mb;
2599         qpi.qpi_data = fw->hpe_hdr;
2600         qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2601         qpi.qpi_ill = (void *)phy;
2602         qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2603         if (qpi.qpi_flags)
2604                 qpi.qpi_flags |= FI_MBCAST;
2605         qpi.qpi_flags |= loopback;
2606 
2607         rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2608             &qpi, fw->hpe_mp, arg);
2609 
2610         /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2611         if (rval == 0 && *(fw->hpe_mp) == NULL)
2612                 rval = 1;
2613 
2614         /* Notify IP the packet mblk_t and IP header pointers. */
2615         fw->hpe_mb = qpi.qpi_m;
2616         fw->hpe_hdr = qpi.qpi_data;
2617         return rval;
2618 }
2619 
2620 
2621 /* ------------------------------------------------------------------------ */
2622 /* Function:    ipf_nic_event_v4                                            */
2623 /* Returns:     int - 0 == no problems encountered                          */
2624 /* Parameters:  event(I)     - pointer to event                             */
2625 /*              info(I)      - pointer to information about a NIC event     */
2626 /*                                                                          */
2627 /* Function to receive asynchronous NIC events from IP                      */
2628 /* ------------------------------------------------------------------------ */
2629 /*ARGSUSED*/
2630 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2631 {
2632         struct sockaddr_in *sin;
2633         hook_nic_event_t *hn;
2634         ipf_stack_t *ifs = arg;
2635         void *new_ifp = NULL;
2636 
2637         if (ifs->ifs_fr_running <= 0)
2638                 return (0);
2639 
2640         hn = (hook_nic_event_t *)info;
2641 
2642         switch (hn->hne_event)
2643         {
2644         case NE_PLUMB :
2645                 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2646                        ifs);
2647                 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2648                               hn->hne_data, ifs);
2649                 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2650                              hn->hne_data, ifs);
2651                 break;
2652 
2653         case NE_UNPLUMB :
2654                 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2655                 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2656                               ifs);
2657                 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2658                 break;
2659 
2660         case NE_ADDRESS_CHANGE :
2661                 /*
2662                  * We only respond to events for logical interface 0 because
2663                  * IPFilter only uses the first address given to a network
2664                  * interface.  We check for hne_lif==1 because the netinfo
2665                  * code maps adds 1 to the lif number so that it can return
2666                  * 0 to indicate "no more lifs" when walking them.
2667                  */
2668                 if (hn->hne_lif == 1) {
2669                         frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2670                             ifs);
2671                         sin = hn->hne_data;
2672                         fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2673                             ifs);
2674                 }
2675                 break;
2676 
2677 #if SOLARIS2 >= 10
2678         case NE_IFINDEX_CHANGE :
2679                 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2680 
2681                 if (hn->hne_data != NULL) {
2682                         /*
2683                          * The netinfo passes interface index as int (hne_data should be
2684                          * handled as a pointer to int), which is always 32bit. We need to
2685                          * convert it to void pointer here, since interfaces are
2686                          * represented as pointers to void in IPF. The pointers are 64 bits
2687                          * long on 64bit platforms. Doing something like
2688                          *      (void *)((int) x)
2689                          * will throw warning:
2690                          *   "cast to pointer from integer of different size"
2691                          * during 64bit compilation.
2692                          *
2693                          * The line below uses (size_t) to typecast int to
2694                          * size_t, which might be 64bit/32bit (depending
2695                          * on architecture). Once we have proper 64bit/32bit
2696                          * type (size_t), we can safely convert it to void pointer.
2697                          */
2698                         new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2699                         fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2700                         fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2701                         fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2702                 }
2703                 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2704                 break;
2705 #endif
2706 
2707         default :
2708                 break;
2709         }
2710 
2711         return 0;
2712 }
2713 
2714 
2715 /* ------------------------------------------------------------------------ */
2716 /* Function:    ipf_nic_event_v6                                            */
2717 /* Returns:     int - 0 == no problems encountered                          */
2718 /* Parameters:  event(I)     - pointer to event                             */
2719 /*              info(I)      - pointer to information about a NIC event     */
2720 /*                                                                          */
2721 /* Function to receive asynchronous NIC events from IP                      */
2722 /* ------------------------------------------------------------------------ */
2723 /*ARGSUSED*/
2724 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2725 {
2726         struct sockaddr_in6 *sin6;
2727         hook_nic_event_t *hn;
2728         ipf_stack_t *ifs = arg;
2729         void *new_ifp = NULL;
2730 
2731         if (ifs->ifs_fr_running <= 0)
2732                 return (0);
2733 
2734         hn = (hook_nic_event_t *)info;
2735 
2736         switch (hn->hne_event)
2737         {
2738         case NE_PLUMB :
2739                 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2740                        hn->hne_data, ifs);
2741                 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2742                               hn->hne_data, ifs);
2743                 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2744                              hn->hne_data, ifs);
2745                 break;
2746 
2747         case NE_UNPLUMB :
2748                 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2749                 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2750                               ifs);
2751                 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2752                 break;
2753 
2754         case NE_ADDRESS_CHANGE :
2755                 if (hn->hne_lif == 1) {
2756                         sin6 = hn->hne_data;
2757                         fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2758                                        ifs);
2759                 }
2760                 break;
2761 
2762 #if SOLARIS2 >= 10
2763         case NE_IFINDEX_CHANGE :
2764                 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2765                 if (hn->hne_data != NULL) {
2766                         /*
2767                          * The netinfo passes interface index as int (hne_data should be
2768                          * handled as a pointer to int), which is always 32bit. We need to
2769                          * convert it to void pointer here, since interfaces are
2770                          * represented as pointers to void in IPF. The pointers are 64 bits
2771                          * long on 64bit platforms. Doing something like
2772                          *      (void *)((int) x)
2773                          * will throw warning:
2774                          *   "cast to pointer from integer of different size"
2775                          * during 64bit compilation.
2776                          *
2777                          * The line below uses (size_t) to typecast int to
2778                          * size_t, which might be 64bit/32bit (depending
2779                          * on architecture). Once we have proper 64bit/32bit
2780                          * type (size_t), we can safely convert it to void pointer.
2781                          */
2782                         new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2783                         fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2784                         fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2785                         fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2786                 }
2787                 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2788                 break;
2789 #endif
2790 
2791         default :
2792                 break;
2793         }
2794 
2795         return 0;
2796 }
2797 
2798 /*
2799  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2800  * are needed in Solaris kernel only. We don't need them in
2801  * ipftest to pretend the ICMP/RST packet was sent as a response.
2802  */
2803 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2804 /* ------------------------------------------------------------------------ */
2805 /* Function:    fr_make_rst                                                 */
2806 /* Returns:     int - 0 on success, -1 on failure                           */
2807 /* Parameters:  fin(I) - pointer to packet information                      */
2808 /*                                                                          */
2809 /* We must alter the original mblks passed to IPF from IP stack via         */
2810 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
2811 /* IPF can basicaly do only these things with mblk representing the packet: */
2812 /*      leave it as it is (pass the packet)                                 */
2813 /*                                                                          */
2814 /*      discard it (block the packet)                                       */
2815 /*                                                                          */
2816 /*      alter it (i.e. NAT)                                                 */
2817 /*                                                                          */
2818 /* As you can see IPF can not simply discard the mblk and supply a new one  */
2819 /* instead to IP stack via FW_HOOKS.                                        */
2820 /*                                                                          */
2821 /* The return-rst action for packets coming via NIC is handled as follows:  */
2822 /*      mblk with packet is discarded                                       */
2823 /*                                                                          */
2824 /*      new mblk with RST response is constructed and injected to network   */
2825 /*                                                                          */
2826 /* IPF can't inject packets to loopback interface, this is just another     */
2827 /* limitation we have to deal with here. The only option to send RST        */
2828 /* response to offending TCP packet coming via loopback is to alter it.     */
2829 /*                                                                          */
2830 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on      */
2831 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to     */
2832 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.                            */
2833 /* ------------------------------------------------------------------------ */
2834 int fr_make_rst(fin)
2835 fr_info_t *fin;
2836 {
2837         uint16_t tmp_port;
2838         int rv = -1;
2839         uint32_t old_ack;
2840         tcphdr_t *tcp = NULL;
2841         struct in_addr tmp_src;
2842 #ifdef USE_INET6
2843         struct in6_addr tmp_src6;
2844 #endif
2845 
2846         ASSERT(fin->fin_p == IPPROTO_TCP);
2847 
2848         /*
2849          * We do not need to adjust chksum, since it is not being checked by
2850          * Solaris IP stack for loopback clients.
2851          */
2852         if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2853             ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2854 
2855                 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2856                         /* Swap IPv4 addresses. */
2857                         tmp_src = fin->fin_ip->ip_src;
2858                         fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2859                         fin->fin_ip->ip_dst = tmp_src;
2860 
2861                         rv = 0;
2862                 }
2863                 else
2864                         tcp = NULL;
2865         }
2866 #ifdef USE_INET6
2867         else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2868             ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2869                 /*
2870                  * We are relying on fact the next header is TCP, which is true
2871                  * for regular TCP packets coming in over loopback.
2872                  */
2873                 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2874                         /* Swap IPv6 addresses. */
2875                         tmp_src6 = fin->fin_ip6->ip6_src;
2876                         fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2877                         fin->fin_ip6->ip6_dst = tmp_src6;
2878 
2879                         rv = 0;
2880                 }
2881                 else
2882                         tcp = NULL;
2883         }
2884 #endif
2885 
2886         if (tcp != NULL) {
2887                 /*
2888                  * Adjust TCP header:
2889                  *      swap ports,
2890                  *      set flags,
2891                  *      set correct ACK number
2892                  */
2893                 tmp_port = tcp->th_sport;
2894                 tcp->th_sport = tcp->th_dport;
2895                 tcp->th_dport = tmp_port;
2896                 old_ack = tcp->th_ack;
2897                 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2898                 tcp->th_seq = old_ack;
2899                 tcp->th_flags = TH_RST | TH_ACK;
2900         }
2901 
2902         return (rv);
2903 }
2904 
2905 /* ------------------------------------------------------------------------ */
2906 /* Function:    fr_make_icmp_v4                                             */
2907 /* Returns:     int - 0 on success, -1 on failure                           */
2908 /* Parameters:  fin(I) - pointer to packet information                      */
2909 /*                                                                          */
2910 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2911 /* what is going to happen here and why. Once you read the comment there,   */
2912 /* continue here with next paragraph.                                       */
2913 /*                                                                          */
2914 /* To turn IPv4 packet into ICMPv4 response packet, these things must       */
2915 /* happen here:                                                             */
2916 /*      (1) Original mblk is copied (duplicated).                           */
2917 /*                                                                          */
2918 /*      (2) ICMP header is created.                                         */
2919 /*                                                                          */
2920 /*      (3) Link ICMP header with copy of original mblk, we have ICMPv4     */
2921 /*          data ready then.                                                */
2922 /*                                                                          */
2923 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
2924 /*                                                                          */
2925 /*      (5) The mblk containing original packet is trimmed to contain IP    */
2926 /*          header only and ICMP chksum is computed.                        */
2927 /*                                                                          */
2928 /*      (6) The ICMP header we have from (3) is linked to original mblk,    */
2929 /*          which now contains new IP header. If original packet was spread */
2930 /*          over several mblks, only the first mblk is kept.                */
2931 /* ------------------------------------------------------------------------ */
2932 static int fr_make_icmp_v4(fin)
2933 fr_info_t *fin;
2934 {
2935         struct in_addr tmp_src;
2936         tcphdr_t *tcp;
2937         struct icmp *icmp;
2938         mblk_t *mblk_icmp;
2939         mblk_t *mblk_ip;
2940         size_t icmp_pld_len;    /* octets to append to ICMP header */
2941         size_t orig_iphdr_len;  /* length of IP header only */
2942         uint32_t sum;
2943         uint16_t *buf;
2944         int len;
2945 
2946 
2947         if (fin->fin_v != 4)
2948                 return (-1);
2949 
2950         /*
2951          * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2952          * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2953          */
2954         tcp = (tcphdr_t *) fin->fin_dp;
2955 
2956         if ((fin->fin_p == IPPROTO_TCP) && 
2957             ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2958                 return (-1);
2959 
2960         /*
2961          * Step (1)
2962          *
2963          * Make copy of original mblk.
2964          *
2965          * We want to copy as much data as necessary, not less, not more.  The
2966          * ICMPv4 payload length for unreachable messages is:
2967          *      original IP header + 8 bytes of L4 (if there are any).
2968          *
2969          * We determine if there are at least 8 bytes of L4 data following IP
2970          * header first.
2971          */
2972         icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2973                 ICMPERR_ICMPHLEN : fin->fin_dlen;
2974         /*
2975          * Since we don't want to copy more data than necessary, we must trim
2976          * the original mblk here.  The right way (STREAMish) would be to use
2977          * adjmsg() to trim it.  However we would have to calculate the length
2978          * argument for adjmsg() from pointers we already have here.
2979          *
2980          * Since we have pointers and offsets, it's faster and easier for
2981          * us to just adjust pointers by hand instead of using adjmsg().
2982          */
2983         fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2984         fin->fin_m->b_wptr += icmp_pld_len;
2985         icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2986 
2987         /*
2988          * Also we don't want to copy any L2 stuff, which might precede IP
2989          * header, so we have have to set b_rptr to point to the start of IP
2990          * header.
2991          */
2992         fin->fin_m->b_rptr += fin->fin_ipoff;
2993         if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2994                 return (-1);
2995         fin->fin_m->b_rptr -= fin->fin_ipoff;
2996 
2997         /*
2998          * Step (2)
2999          *
3000          * Create an ICMP header, which will be appened to original mblk later.
3001          * ICMP header is just another mblk.
3002          */
3003         mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
3004         if (mblk_icmp == NULL) {
3005                 FREE_MB_T(mblk_ip);
3006                 return (-1);
3007         }
3008 
3009         MTYPE(mblk_icmp) = M_DATA;
3010         icmp = (struct icmp *) mblk_icmp->b_wptr;
3011         icmp->icmp_type = ICMP_UNREACH;
3012         icmp->icmp_code = fin->fin_icode & 0xFF;
3013         icmp->icmp_void = 0;
3014         icmp->icmp_cksum = 0;
3015         mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
3016 
3017         /*
3018          * Step (3)
3019          *
3020          * Complete ICMP packet - link ICMP header with L4 data from original
3021          * IP packet.
3022          */
3023         linkb(mblk_icmp, mblk_ip);
3024 
3025         /*
3026          * Step (4)
3027          *
3028          * Swap IP addresses and change IP header fields accordingly in
3029          * original IP packet.
3030          *
3031          * There is a rule option return-icmp as a dest for physical
3032          * interfaces. This option becomes useless for loopback, since IPF box
3033          * uses same address as a loopback destination. We ignore the option
3034          * here, the ICMP packet will always look like as it would have been
3035          * sent from the original destination host.
3036          */
3037         tmp_src = fin->fin_ip->ip_src;
3038         fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
3039         fin->fin_ip->ip_dst = tmp_src;
3040         fin->fin_ip->ip_p = IPPROTO_ICMP;
3041         fin->fin_ip->ip_sum = 0;
3042 
3043         /*
3044          * Step (5)
3045          *
3046          * We trim the orignal mblk to hold IP header only.
3047          */
3048         fin->fin_m->b_wptr = fin->fin_dp;
3049         orig_iphdr_len = fin->fin_m->b_wptr -
3050                             (fin->fin_m->b_rptr + fin->fin_ipoff);
3051         fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
3052                             orig_iphdr_len);
3053 
3054         /*
3055          * ICMP chksum calculation. The data we are calculating chksum for are
3056          * spread over two mblks, therefore we have to use two for loops.
3057          *
3058          * First for loop computes chksum part for ICMP header.
3059          */
3060         buf = (uint16_t *) icmp;
3061         len = ICMPERR_ICMPHLEN;
3062         for (sum = 0; len > 1; len -= 2)
3063                 sum += *buf++;
3064 
3065         /*
3066          * Here we add chksum part for ICMP payload.
3067          */
3068         len = icmp_pld_len;
3069         buf = (uint16_t *) mblk_ip->b_rptr;
3070         for (; len > 1; len -= 2)
3071                 sum += *buf++;
3072 
3073         /*
3074          * Chksum is done.
3075          */
3076         sum = (sum >> 16) + (sum & 0xffff);
3077         sum += (sum >> 16);
3078         icmp->icmp_cksum = ~sum; 
3079 
3080         /*
3081          * Step (6)
3082          *
3083          * Release all packet mblks, except the first one.
3084          */
3085         if (fin->fin_m->b_cont != NULL) {
3086                 FREE_MB_T(fin->fin_m->b_cont);
3087         }
3088 
3089         /*
3090          * Append ICMP payload to first mblk, which already contains new IP
3091          * header.
3092          */
3093         linkb(fin->fin_m, mblk_icmp);
3094 
3095         return (0);
3096 }
3097 
3098 #ifdef USE_INET6
3099 /* ------------------------------------------------------------------------ */
3100 /* Function:    fr_make_icmp_v6                                             */
3101 /* Returns:     int - 0 on success, -1 on failure                           */
3102 /* Parameters:  fin(I) - pointer to packet information                      */
3103 /*                                                                          */
3104 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
3105 /* what and why is going to happen here. Once you read the comment there,   */
3106 /* continue here with next paragraph.                                       */
3107 /*                                                                          */
3108 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
3109 /* The algorithm is fairly simple:                                          */
3110 /*      1) We need to get copy of complete mblk.                            */
3111 /*                                                                          */
3112 /*      2) New ICMPv6 header is created.                                    */
3113 /*                                                                          */
3114 /*      3) The copy of original mblk with packet is linked to ICMPv6        */
3115 /*         header.                                                          */
3116 /*                                                                          */
3117 /*      4) The checksum must be adjusted.                                   */
3118 /*                                                                          */
3119 /*      5) IP addresses in original mblk are swapped and IP header data     */
3120 /*         are adjusted (protocol number).                                  */
3121 /*                                                                          */
3122 /*      6) Original mblk is trimmed to hold IPv6 header only, then it is    */
3123 /*         linked with the ICMPv6 data we got from (3).                     */
3124 /* ------------------------------------------------------------------------ */
3125 static int fr_make_icmp_v6(fin)
3126 fr_info_t *fin;
3127 {
3128         struct icmp6_hdr *icmp6;
3129         tcphdr_t *tcp;
3130         struct in6_addr tmp_src6;
3131         size_t icmp_pld_len;
3132         mblk_t *mblk_ip, *mblk_icmp;
3133 
3134         if (fin->fin_v != 6)
3135                 return (-1);
3136 
3137         /*
3138          * If we are dealing with TCP, then packet must SYN/FIN to be routed by
3139          * IP stack. If it is not SYN/FIN, then we must drop it silently.
3140          */
3141         tcp = (tcphdr_t *) fin->fin_dp;
3142 
3143         if ((fin->fin_p == IPPROTO_TCP) && 
3144             ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
3145                 return (-1);
3146 
3147         /*
3148          * Step (1)
3149          *
3150          * We need to copy complete packet in case of IPv6, no trimming is
3151          * needed (except the L2 headers).
3152          */
3153         icmp_pld_len = M_LEN(fin->fin_m);
3154         fin->fin_m->b_rptr += fin->fin_ipoff;
3155         if ((mblk_ip = copyb(fin->fin_m)) == NULL)
3156                 return (-1);
3157         fin->fin_m->b_rptr -= fin->fin_ipoff;
3158 
3159         /*
3160          * Step (2)
3161          *
3162          * Allocate and create ICMP header.
3163          */
3164         mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
3165                         BPRI_HI);
3166 
3167         if (mblk_icmp == NULL)
3168                 return (-1);
3169         
3170         MTYPE(mblk_icmp) = M_DATA;
3171         icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
3172         icmp6->icmp6_type = ICMP6_DST_UNREACH;
3173         icmp6->icmp6_code = fin->fin_icode & 0xFF;
3174         icmp6->icmp6_data32[0] = 0;
3175         mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
3176         
3177         /*
3178          * Step (3)
3179          *
3180          * Link the copy of IP packet to ICMP header.
3181          */
3182         linkb(mblk_icmp, mblk_ip);
3183 
3184         /* 
3185          * Step (4)
3186          *
3187          * Calculate chksum - this is much more easier task than in case of
3188          * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
3189          * We are making compensation just for change of packet length.
3190          */
3191         icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
3192 
3193         /*
3194          * Step (5)
3195          *
3196          * Swap IP addresses.
3197          */
3198         tmp_src6 = fin->fin_ip6->ip6_src;
3199         fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
3200         fin->fin_ip6->ip6_dst = tmp_src6;
3201 
3202         /*
3203          * and adjust IP header data.
3204          */
3205         fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
3206         fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
3207 
3208         /*
3209          * Step (6)
3210          *
3211          * We must release all linked mblks from original packet and keep only
3212          * the first mblk with IP header to link ICMP data.
3213          */
3214         fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
3215 
3216         if (fin->fin_m->b_cont != NULL) {
3217                 FREE_MB_T(fin->fin_m->b_cont);
3218         }
3219 
3220         /*
3221          * Append ICMP payload to IP header.
3222          */
3223         linkb(fin->fin_m, mblk_icmp);
3224 
3225         return (0);
3226 }
3227 #endif  /* USE_INET6 */
3228 
3229 /* ------------------------------------------------------------------------ */
3230 /* Function:    fr_make_icmp                                                */
3231 /* Returns:     int - 0 on success, -1 on failure                           */
3232 /* Parameters:  fin(I) - pointer to packet information                      */
3233 /*                                                                          */
3234 /* We must alter the original mblks passed to IPF from IP stack via         */
3235 /* FW_HOOKS. The reasons why we must alter packet are discussed within      */
3236 /* comment at fr_make_rst() function.                                       */
3237 /*                                                                          */
3238 /* The fr_make_icmp() function acts as a wrapper, which passes the code     */
3239 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on         */
3240 /* protocol version. However there are some details, which are common to    */
3241 /* both IP versions. The details are going to be explained here.            */
3242 /*                                                                          */
3243 /* The packet looks as follows:                                             */
3244 /*    xxx | IP hdr | IP payload    ...  |                                   */
3245 /*    ^   ^        ^                    ^                                   */
3246 /*    |   |        |                    |                                   */
3247 /*    |   |        |            fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
3248 /*    |   |        |                                                        */
3249 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
3250 /*    |   |                                                                 */
3251 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
3252 /*    |      of loopback)                                                   */
3253 /*    |                                                                     */
3254 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC            */
3255 /*                                                                          */
3256 /* All relevant IP headers are pulled up into the first mblk. It happened   */
3257 /* well in advance before the matching rule was found (the rule, which took */
3258 /* us here, to fr_make_icmp() function).                                    */
3259 /*                                                                          */
3260 /* Both functions will turn packet passed in fin->fin_m mblk into a new          */
3261 /* packet. New packet will be represented as chain of mblks.                */
3262 /* orig mblk |- b_cont ---.                                                 */
3263 /*    ^                    `-> ICMP hdr |- b_cont--.                     */
3264 /*    |                           ^                 `-> duped orig mblk          */
3265 /*    |                           |                             ^           */
3266 /*    `- The original mblk        |                             |           */
3267 /*       will be trimmed to       |                             |           */
3268 /*       to contain IP header     |                             |           */
3269 /*       only                     |                             |           */
3270 /*                                |                             |           */
3271 /*                                `- This is newly              |           */
3272 /*                                   allocated mblk to          |           */
3273 /*                                   hold ICMPv6 data.          |           */
3274 /*                                                              |           */
3275 /*                                                              |           */
3276 /*                                                              |           */
3277 /*          This is the copy of original mblk, it will contain -'           */
3278 /*          orignal IP  packet in case of ICMPv6. In case of                */
3279 /*          ICMPv4 it will contain up to 8 bytes of IP payload              */
3280 /*          (TCP/UDP/L4) data from original packet.                         */
3281 /* ------------------------------------------------------------------------ */
3282 int fr_make_icmp(fin)
3283 fr_info_t *fin;
3284 {
3285         int rv;
3286         
3287         if (fin->fin_v == 4)
3288                 rv = fr_make_icmp_v4(fin);
3289 #ifdef USE_INET6
3290         else if (fin->fin_v == 6)
3291                 rv = fr_make_icmp_v6(fin);
3292 #endif
3293         else
3294                 rv = -1;
3295 
3296         return (rv);
3297 }
3298 
3299 /* ------------------------------------------------------------------------ */
3300 /* Function:    fr_buf_sum                                                  */
3301 /* Returns:     unsigned int - sum of buffer buf                            */
3302 /* Parameters:  buf - pointer to buf we want to sum up                      */
3303 /*              len - length of buffer buf                                  */
3304 /*                                                                          */
3305 /* Sums buffer buf. The result is used for chksum calculation. The buf      */
3306 /* argument must be aligned.                                                */
3307 /* ------------------------------------------------------------------------ */
3308 static uint32_t fr_buf_sum(buf, len)
3309 const void *buf;
3310 unsigned int len;
3311 {
3312         uint32_t        sum = 0;
3313         uint16_t        *b = (uint16_t *)buf;
3314 
3315         while (len > 1) {
3316                 sum += *b++;
3317                 len -= 2;
3318         }
3319 
3320         if (len == 1)
3321                 sum += htons((*(unsigned char *)b) << 8);
3322 
3323         return (sum);
3324 }
3325 
3326 /* ------------------------------------------------------------------------ */
3327 /* Function:    fr_calc_chksum                                              */
3328 /* Returns:     void                                                        */
3329 /* Parameters:  fin - pointer to fr_info_t instance with packet data        */
3330 /*              pkt - pointer to duplicated packet                          */
3331 /*                                                                          */
3332 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP        */
3333 /* versions.                                                                */
3334 /* ------------------------------------------------------------------------ */
3335 void fr_calc_chksum(fin, pkt)
3336 fr_info_t *fin;
3337 mb_t *pkt;
3338 {
3339         struct pseudo_hdr {
3340                 union {
3341                         struct in_addr  in4;
3342 #ifdef USE_INET6
3343                         struct in6_addr in6;
3344 #endif
3345                 } src_addr;
3346                 union {
3347                         struct in_addr  in4;
3348 #ifdef USE_INET6
3349                         struct in6_addr in6;
3350 #endif
3351                 } dst_addr;
3352                 char            zero;
3353                 char            proto;
3354                 uint16_t        len;
3355         }       phdr;
3356         uint32_t        sum, ip_sum;
3357         void    *buf;
3358         uint16_t        *l4_csum_p;
3359         tcphdr_t        *tcp;
3360         udphdr_t        *udp;
3361         icmphdr_t       *icmp;
3362 #ifdef USE_INET6
3363         struct icmp6_hdr        *icmp6;
3364 #endif
3365         ip_t            *ip;
3366         unsigned int    len;
3367         int             pld_len;
3368 
3369         /*
3370          * We need to pullup the packet to the single continuous buffer to avoid
3371          * potential misaligment of b_rptr member in mblk chain.
3372          */
3373         if (pullupmsg(pkt, -1) == 0) {
3374                 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
3375                     " will not be computed by IPF");
3376                 return;
3377         }
3378 
3379         /*
3380          * It is guaranteed IP header starts right at b_rptr, because we are
3381          * working with a copy of the original packet.
3382          *
3383          * Compute pseudo header chksum for TCP and UDP.
3384          */
3385         if ((fin->fin_p == IPPROTO_UDP) ||
3386             (fin->fin_p == IPPROTO_TCP)) {
3387                 bzero(&phdr, sizeof (phdr));
3388 #ifdef USE_INET6
3389                 if (fin->fin_v == 6) {
3390                         phdr.src_addr.in6 = fin->fin_srcip6;
3391                         phdr.dst_addr.in6 = fin->fin_dstip6;
3392                 } else {
3393                         phdr.src_addr.in4 = fin->fin_src;
3394                         phdr.dst_addr.in4 = fin->fin_dst;
3395                 }
3396 #else
3397                 phdr.src_addr.in4 = fin->fin_src;
3398                 phdr.dst_addr.in4 = fin->fin_dst;
3399 #endif
3400                 phdr.zero = (char) 0;
3401                 phdr.proto = fin->fin_p;
3402                 phdr.len = htons((uint16_t)fin->fin_dlen);
3403                 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
3404         } else {
3405                 sum = 0;
3406         }
3407 
3408         /*
3409          * Set pointer to the L4 chksum field in the packet, set buf pointer to
3410          * the L4 header start.
3411          */
3412         switch (fin->fin_p) {
3413                 case IPPROTO_UDP:
3414                         udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3415                         l4_csum_p = &udp->uh_sum;
3416                         buf = udp;
3417                         break;
3418                 case IPPROTO_TCP:
3419                         tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3420                         l4_csum_p = &tcp->th_sum;
3421                         buf = tcp;
3422                         break;
3423                 case IPPROTO_ICMP:
3424                         icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3425                         l4_csum_p = &icmp->icmp_cksum;
3426                         buf = icmp;
3427                         break;
3428 #ifdef USE_INET6
3429                 case IPPROTO_ICMPV6:
3430                         icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
3431                         l4_csum_p = &icmp6->icmp6_cksum;
3432                         buf = icmp6;
3433                         break;
3434 #endif
3435                 default:
3436                         l4_csum_p = NULL;
3437         }
3438 
3439         /*
3440          * Compute L4 chksum if needed.
3441          */
3442         if (l4_csum_p != NULL) {
3443                 *l4_csum_p = (uint16_t)0;
3444                 pld_len = fin->fin_dlen;
3445                 len = pkt->b_wptr - (unsigned char *)buf;
3446                 ASSERT(len == pld_len);
3447                 /*
3448                  * Add payload sum to pseudoheader sum.
3449                  */
3450                 sum += fr_buf_sum(buf, len);
3451                 while (sum >> 16)
3452                         sum = (sum & 0xFFFF) + (sum >> 16);
3453 
3454                 *l4_csum_p = ~((uint16_t)sum);
3455                 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
3456         }
3457 
3458         /*
3459          * The IP header chksum is needed just for IPv4.
3460          */
3461         if (fin->fin_v == 4) {
3462                 /*
3463                  * Compute IPv4 header chksum.
3464                  */
3465                 ip = (ip_t *)pkt->b_rptr;
3466                 ip->ip_sum = (uint16_t)0;
3467                 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
3468                 while (ip_sum >> 16)
3469                         ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
3470 
3471                 ip->ip_sum = ~((uint16_t)ip_sum);
3472                 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
3473         }
3474 
3475         return;
3476 }
3477 
3478 #endif  /* _KERNEL && SOLARIS2 >= 10 */