1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright (c) 2017 Joyent, Inc.
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/stream.h>
  30 #include <sys/stropts.h>
  31 #include <sys/errno.h>
  32 #include <sys/strlog.h>
  33 #include <sys/tihdr.h>
  34 #include <sys/socket.h>
  35 #include <sys/ddi.h>
  36 #include <sys/sunddi.h>
  37 #include <sys/kmem.h>
  38 #include <sys/zone.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/cmn_err.h>
  41 #include <sys/vtrace.h>
  42 #include <sys/debug.h>
  43 #include <sys/atomic.h>
  44 #include <sys/strsun.h>
  45 #include <sys/random.h>
  46 #include <netinet/in.h>
  47 #include <net/if.h>
  48 #include <netinet/ip6.h>
  49 #include <net/pfkeyv2.h>
  50 #include <net/pfpolicy.h>
  51 
  52 #include <inet/common.h>
  53 #include <inet/mi.h>
  54 #include <inet/nd.h>
  55 #include <inet/ip.h>
  56 #include <inet/ip_impl.h>
  57 #include <inet/ip6.h>
  58 #include <inet/ip_if.h>
  59 #include <inet/ip_ndp.h>
  60 #include <inet/sadb.h>
  61 #include <inet/ipsec_info.h>
  62 #include <inet/ipsec_impl.h>
  63 #include <inet/ipsecesp.h>
  64 #include <inet/ipdrop.h>
  65 #include <inet/tcp.h>
  66 #include <sys/kstat.h>
  67 #include <sys/policy.h>
  68 #include <sys/strsun.h>
  69 #include <sys/strsubr.h>
  70 #include <inet/udp_impl.h>
  71 #include <sys/taskq.h>
  72 #include <sys/note.h>
  73 
  74 #include <sys/tsol/tnet.h>
  75 
  76 /*
  77  * Table of ND variables supported by ipsecesp. These are loaded into
  78  * ipsecesp_g_nd in ipsecesp_init_nd.
  79  * All of these are alterable, within the min/max values given, at run time.
  80  */
  81 static  ipsecespparam_t lcl_param_arr[] = {
  82         /* min  max                     value   name */
  83         { 0,    3,                      0,      "ipsecesp_debug"},
  84         { 125,  32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"},
  85         { 1,    10,                     1,      "ipsecesp_reap_delay"},
  86         { 1,    SADB_MAX_REPLAY,        64,     "ipsecesp_replay_size"},
  87         { 1,    300,                    15,     "ipsecesp_acquire_timeout"},
  88         { 1,    1800,                   90,     "ipsecesp_larval_timeout"},
  89         /* Default lifetime values for ACQUIRE messages. */
  90         { 0,    0xffffffffU,    0,      "ipsecesp_default_soft_bytes"},
  91         { 0,    0xffffffffU,    0,      "ipsecesp_default_hard_bytes"},
  92         { 0,    0xffffffffU,    24000,  "ipsecesp_default_soft_addtime"},
  93         { 0,    0xffffffffU,    28800,  "ipsecesp_default_hard_addtime"},
  94         { 0,    0xffffffffU,    0,      "ipsecesp_default_soft_usetime"},
  95         { 0,    0xffffffffU,    0,      "ipsecesp_default_hard_usetime"},
  96         { 0,    1,              0,      "ipsecesp_log_unknown_spi"},
  97         { 0,    2,              1,      "ipsecesp_padding_check"},
  98         { 0,    600,            20,     "ipsecesp_nat_keepalive_interval"},
  99 };
 100 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */
 101 
 102 #define esp0dbg(a)      printf a
 103 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
 104 #define esp1dbg(espstack, a)    if (espstack->ipsecesp_debug != 0) printf a
 105 #define esp2dbg(espstack, a)    if (espstack->ipsecesp_debug > 1) printf a
 106 #define esp3dbg(espstack, a)    if (espstack->ipsecesp_debug > 2) printf a
 107 
 108 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *);
 109 static int ipsecesp_close(queue_t *);
 110 static void ipsecesp_wput(queue_t *, mblk_t *);
 111 static void     *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns);
 112 static void     ipsecesp_stack_fini(netstackid_t stackid, void *arg);
 113 
 114 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *);
 115 static void esp_outbound_finish(mblk_t *, ip_xmit_attr_t *);
 116 static void esp_inbound_restart(mblk_t *, ip_recv_attr_t *);
 117 
 118 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t,
 119     ipsecesp_stack_t *, cred_t *);
 120 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t,
 121     kstat_named_t **, ipsecesp_stack_t *);
 122 static mblk_t *esp_submit_req_inbound(mblk_t *, ip_recv_attr_t *,
 123     ipsa_t *, uint_t);
 124 static mblk_t *esp_submit_req_outbound(mblk_t *, ip_xmit_attr_t *,
 125     ipsa_t *, uchar_t *, uint_t);
 126 
 127 /* Setable in /etc/system */
 128 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE;
 129 
 130 static struct module_info info = {
 131         5137, "ipsecesp", 0, INFPSZ, 65536, 1024
 132 };
 133 
 134 static struct qinit rinit = {
 135         (pfi_t)putnext, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
 136         NULL
 137 };
 138 
 139 static struct qinit winit = {
 140         (pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
 141         NULL
 142 };
 143 
 144 struct streamtab ipsecespinfo = {
 145         &rinit, &winit, NULL, NULL
 146 };
 147 
 148 static taskq_t *esp_taskq;
 149 
 150 /*
 151  * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now.
 152  *
 153  * Question:    Do I need this, given that all instance's esps->esps_wq point
 154  *              to IP?
 155  *
 156  * Answer:      Yes, because I need to know which queue is BOUND to
 157  *              IPPROTO_ESP
 158  */
 159 
 160 static int      esp_kstat_update(kstat_t *, int);
 161 
 162 static boolean_t
 163 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid)
 164 {
 165         espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat",
 166             "net", KSTAT_TYPE_NAMED,
 167             sizeof (esp_kstats_t) / sizeof (kstat_named_t),
 168             KSTAT_FLAG_PERSISTENT, stackid);
 169 
 170         if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL)
 171                 return (B_FALSE);
 172 
 173         espstack->esp_kstats = espstack->esp_ksp->ks_data;
 174 
 175         espstack->esp_ksp->ks_update = esp_kstat_update;
 176         espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid;
 177 
 178 #define K64 KSTAT_DATA_UINT64
 179 #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64)
 180 
 181         KI(num_aalgs);
 182         KI(num_ealgs);
 183         KI(good_auth);
 184         KI(bad_auth);
 185         KI(bad_padding);
 186         KI(replay_failures);
 187         KI(replay_early_failures);
 188         KI(keysock_in);
 189         KI(out_requests);
 190         KI(acquire_requests);
 191         KI(bytes_expired);
 192         KI(out_discards);
 193         KI(crypto_sync);
 194         KI(crypto_async);
 195         KI(crypto_failures);
 196         KI(bad_decrypt);
 197         KI(sa_port_renumbers);
 198 
 199 #undef KI
 200 #undef K64
 201 
 202         kstat_install(espstack->esp_ksp);
 203 
 204         return (B_TRUE);
 205 }
 206 
 207 static int
 208 esp_kstat_update(kstat_t *kp, int rw)
 209 {
 210         esp_kstats_t *ekp;
 211         netstackid_t    stackid = (zoneid_t)(uintptr_t)kp->ks_private;
 212         netstack_t      *ns;
 213         ipsec_stack_t   *ipss;
 214 
 215         if ((kp == NULL) || (kp->ks_data == NULL))
 216                 return (EIO);
 217 
 218         if (rw == KSTAT_WRITE)
 219                 return (EACCES);
 220 
 221         ns = netstack_find_by_stackid(stackid);
 222         if (ns == NULL)
 223                 return (-1);
 224         ipss = ns->netstack_ipsec;
 225         if (ipss == NULL) {
 226                 netstack_rele(ns);
 227                 return (-1);
 228         }
 229         ekp = (esp_kstats_t *)kp->ks_data;
 230 
 231         rw_enter(&ipss->ipsec_alg_lock, RW_READER);
 232         ekp->esp_stat_num_aalgs.value.ui64 =
 233             ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
 234         ekp->esp_stat_num_ealgs.value.ui64 =
 235             ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
 236         rw_exit(&ipss->ipsec_alg_lock);
 237 
 238         netstack_rele(ns);
 239         return (0);
 240 }
 241 
 242 #ifdef DEBUG
 243 /*
 244  * Debug routine, useful to see pre-encryption data.
 245  */
 246 static char *
 247 dump_msg(mblk_t *mp)
 248 {
 249         char tmp_str[3], tmp_line[256];
 250 
 251         while (mp != NULL) {
 252                 unsigned char *ptr;
 253 
 254                 printf("mblk address 0x%p, length %ld, db_ref %d "
 255                     "type %d, base 0x%p, lim 0x%p\n",
 256                     (void *) mp, (long)(mp->b_wptr - mp->b_rptr),
 257                     mp->b_datap->db_ref, mp->b_datap->db_type,
 258                     (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim);
 259                 ptr = mp->b_rptr;
 260 
 261                 tmp_line[0] = '\0';
 262                 while (ptr < mp->b_wptr) {
 263                         uint_t diff;
 264 
 265                         diff = (ptr - mp->b_rptr);
 266                         if (!(diff & 0x1f)) {
 267                                 if (strlen(tmp_line) > 0) {
 268                                         printf("bytes: %s\n", tmp_line);
 269                                         tmp_line[0] = '\0';
 270                                 }
 271                         }
 272                         if (!(diff & 0x3))
 273                                 (void) strcat(tmp_line, " ");
 274                         (void) sprintf(tmp_str, "%02x", *ptr);
 275                         (void) strcat(tmp_line, tmp_str);
 276                         ptr++;
 277                 }
 278                 if (strlen(tmp_line) > 0)
 279                         printf("bytes: %s\n", tmp_line);
 280 
 281                 mp = mp->b_cont;
 282         }
 283 
 284         return ("\n");
 285 }
 286 
 287 #else /* DEBUG */
 288 static char *
 289 dump_msg(mblk_t *mp)
 290 {
 291         printf("Find value of mp %p.\n", mp);
 292         return ("\n");
 293 }
 294 #endif /* DEBUG */
 295 
 296 /*
 297  * Don't have to lock age_interval, as only one thread will access it at
 298  * a time, because I control the one function that does with timeout().
 299  */
 300 static void
 301 esp_ager(void *arg)
 302 {
 303         ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
 304         netstack_t      *ns = espstack->ipsecesp_netstack;
 305         hrtime_t begin = gethrtime();
 306 
 307         sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q,
 308             espstack->ipsecesp_reap_delay, ns);
 309         sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q,
 310             espstack->ipsecesp_reap_delay, ns);
 311 
 312         espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q,
 313             esp_ager, espstack,
 314             &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max,
 315             info.mi_idnum);
 316 }
 317 
 318 /*
 319  * Get an ESP NDD parameter.
 320  */
 321 /* ARGSUSED */
 322 static int
 323 ipsecesp_param_get(
 324     queue_t     *q,
 325     mblk_t      *mp,
 326     caddr_t     cp,
 327     cred_t *cr)
 328 {
 329         ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp;
 330         uint_t value;
 331         ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
 332 
 333         mutex_enter(&espstack->ipsecesp_param_lock);
 334         value = ipsecesppa->ipsecesp_param_value;
 335         mutex_exit(&espstack->ipsecesp_param_lock);
 336 
 337         (void) mi_mpprintf(mp, "%u", value);
 338         return (0);
 339 }
 340 
 341 /*
 342  * This routine sets an NDD variable in a ipsecespparam_t structure.
 343  */
 344 /* ARGSUSED */
 345 static int
 346 ipsecesp_param_set(
 347     queue_t     *q,
 348     mblk_t      *mp,
 349     char        *value,
 350     caddr_t     cp,
 351     cred_t *cr)
 352 {
 353         ulong_t new_value;
 354         ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp;
 355         ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
 356 
 357         /*
 358          * Fail the request if the new value does not lie within the
 359          * required bounds.
 360          */
 361         if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
 362             new_value < ipsecesppa->ipsecesp_param_min ||
 363             new_value > ipsecesppa->ipsecesp_param_max) {
 364                 return (EINVAL);
 365         }
 366 
 367         /* Set the new value */
 368         mutex_enter(&espstack->ipsecesp_param_lock);
 369         ipsecesppa->ipsecesp_param_value = new_value;
 370         mutex_exit(&espstack->ipsecesp_param_lock);
 371         return (0);
 372 }
 373 
 374 /*
 375  * Using lifetime NDD variables, fill in an extended combination's
 376  * lifetime information.
 377  */
 378 void
 379 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
 380 {
 381         ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
 382 
 383         ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes;
 384         ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes;
 385         ecomb->sadb_x_ecomb_soft_addtime =
 386             espstack->ipsecesp_default_soft_addtime;
 387         ecomb->sadb_x_ecomb_hard_addtime =
 388             espstack->ipsecesp_default_hard_addtime;
 389         ecomb->sadb_x_ecomb_soft_usetime =
 390             espstack->ipsecesp_default_soft_usetime;
 391         ecomb->sadb_x_ecomb_hard_usetime =
 392             espstack->ipsecesp_default_hard_usetime;
 393 }
 394 
 395 /*
 396  * Initialize things for ESP at module load time.
 397  */
 398 boolean_t
 399 ipsecesp_ddi_init(void)
 400 {
 401         esp_taskq = taskq_create("esp_taskq", 1, minclsyspri,
 402             IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
 403 
 404         /*
 405          * We want to be informed each time a stack is created or
 406          * destroyed in the kernel, so we can maintain the
 407          * set of ipsecesp_stack_t's.
 408          */
 409         netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL,
 410             ipsecesp_stack_fini);
 411 
 412         return (B_TRUE);
 413 }
 414 
 415 /*
 416  * Walk through the param array specified registering each element with the
 417  * named dispatch handler.
 418  */
 419 static boolean_t
 420 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt)
 421 {
 422         for (; cnt-- > 0; espp++) {
 423                 if (espp->ipsecesp_param_name != NULL &&
 424                     espp->ipsecesp_param_name[0]) {
 425                         if (!nd_load(ndp,
 426                             espp->ipsecesp_param_name,
 427                             ipsecesp_param_get, ipsecesp_param_set,
 428                             (caddr_t)espp)) {
 429                                 nd_free(ndp);
 430                                 return (B_FALSE);
 431                         }
 432                 }
 433         }
 434         return (B_TRUE);
 435 }
 436 
 437 /*
 438  * Initialize things for ESP for each stack instance
 439  */
 440 static void *
 441 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns)
 442 {
 443         ipsecesp_stack_t        *espstack;
 444         ipsecespparam_t         *espp;
 445 
 446         espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack),
 447             KM_SLEEP);
 448         espstack->ipsecesp_netstack = ns;
 449 
 450         espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
 451         espstack->ipsecesp_params = espp;
 452         bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr));
 453 
 454         (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp,
 455             A_CNT(lcl_param_arr));
 456 
 457         (void) esp_kstat_init(espstack, stackid);
 458 
 459         espstack->esp_sadb.s_acquire_timeout =
 460             &espstack->ipsecesp_acquire_timeout;
 461         sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size,
 462             espstack->ipsecesp_netstack);
 463 
 464         mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0);
 465 
 466         ip_drop_register(&espstack->esp_dropper, "IPsec ESP");
 467         return (espstack);
 468 }
 469 
 470 /*
 471  * Destroy things for ESP at module unload time.
 472  */
 473 void
 474 ipsecesp_ddi_destroy(void)
 475 {
 476         netstack_unregister(NS_IPSECESP);
 477         taskq_destroy(esp_taskq);
 478 }
 479 
 480 /*
 481  * Destroy things for ESP for one stack instance
 482  */
 483 static void
 484 ipsecesp_stack_fini(netstackid_t stackid, void *arg)
 485 {
 486         ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
 487 
 488         if (espstack->esp_pfkey_q != NULL) {
 489                 (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event);
 490         }
 491         espstack->esp_sadb.s_acquire_timeout = NULL;
 492         sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack);
 493         ip_drop_unregister(&espstack->esp_dropper);
 494         mutex_destroy(&espstack->ipsecesp_param_lock);
 495         nd_free(&espstack->ipsecesp_g_nd);
 496 
 497         kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr));
 498         espstack->ipsecesp_params = NULL;
 499         kstat_delete_netstack(espstack->esp_ksp, stackid);
 500         espstack->esp_ksp = NULL;
 501         espstack->esp_kstats = NULL;
 502         kmem_free(espstack, sizeof (*espstack));
 503 }
 504 
 505 /*
 506  * ESP module open routine, which is here for keysock plumbing.
 507  * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old
 508  * Days of export control, and fears that ESP would not be allowed
 509  * to be shipped at all by default.  Eventually, keysock should
 510  * either access AH and ESP via modstubs or krtld dependencies, or
 511  * perhaps be folded in with AH and ESP into a single IPsec/netsec
 512  * module ("netsec" if PF_KEY provides more than AH/ESP keying tables).
 513  */
 514 /* ARGSUSED */
 515 static int
 516 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 517 {
 518         netstack_t              *ns;
 519         ipsecesp_stack_t        *espstack;
 520 
 521         if (secpolicy_ip_config(credp, B_FALSE) != 0)
 522                 return (EPERM);
 523 
 524         if (q->q_ptr != NULL)
 525                 return (0);  /* Re-open of an already open instance. */
 526 
 527         if (sflag != MODOPEN)
 528                 return (EINVAL);
 529 
 530         ns = netstack_find_by_cred(credp);
 531         ASSERT(ns != NULL);
 532         espstack = ns->netstack_ipsecesp;
 533         ASSERT(espstack != NULL);
 534 
 535         q->q_ptr = espstack;
 536         WR(q)->q_ptr = q->q_ptr;
 537 
 538         qprocson(q);
 539         return (0);
 540 }
 541 
 542 /*
 543  * ESP module close routine.
 544  */
 545 static int
 546 ipsecesp_close(queue_t *q)
 547 {
 548         ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
 549 
 550         /*
 551          * Clean up q_ptr, if needed.
 552          */
 553         qprocsoff(q);
 554 
 555         /* Keysock queue check is safe, because of OCEXCL perimeter. */
 556 
 557         if (q == espstack->esp_pfkey_q) {
 558                 esp1dbg(espstack,
 559                     ("ipsecesp_close:  Ummm... keysock is closing ESP.\n"));
 560                 espstack->esp_pfkey_q = NULL;
 561                 /* Detach qtimeouts. */
 562                 (void) quntimeout(q, espstack->esp_event);
 563         }
 564 
 565         netstack_rele(espstack->ipsecesp_netstack);
 566         return (0);
 567 }
 568 
 569 /*
 570  * Add a number of bytes to what the SA has protected so far.  Return
 571  * B_TRUE if the SA can still protect that many bytes.
 572  *
 573  * Caller must REFRELE the passed-in assoc.  This function must REFRELE
 574  * any obtained peer SA.
 575  */
 576 static boolean_t
 577 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
 578 {
 579         ipsa_t *inassoc, *outassoc;
 580         isaf_t *bucket;
 581         boolean_t inrc, outrc, isv6;
 582         sadb_t *sp;
 583         int outhash;
 584         netstack_t              *ns = assoc->ipsa_netstack;
 585         ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
 586 
 587         /* No peer?  No problem! */
 588         if (!assoc->ipsa_haspeer) {
 589                 return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes,
 590                     B_TRUE));
 591         }
 592 
 593         /*
 594          * Otherwise, we want to grab both the original assoc and its peer.
 595          * There might be a race for this, but if it's a real race, two
 596          * expire messages may occur.  We limit this by only sending the
 597          * expire message on one of the peers, we'll pick the inbound
 598          * arbitrarily.
 599          *
 600          * If we need tight synchronization on the peer SA, then we need to
 601          * reconsider.
 602          */
 603 
 604         /* Use address length to select IPv6/IPv4 */
 605         isv6 = (assoc->ipsa_addrfam == AF_INET6);
 606         sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
 607 
 608         if (inbound) {
 609                 inassoc = assoc;
 610                 if (isv6) {
 611                         outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
 612                             &inassoc->ipsa_dstaddr));
 613                 } else {
 614                         outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
 615                             &inassoc->ipsa_dstaddr));
 616                 }
 617                 bucket = &sp->sdb_of[outhash];
 618                 mutex_enter(&bucket->isaf_lock);
 619                 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
 620                     inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
 621                     inassoc->ipsa_addrfam);
 622                 mutex_exit(&bucket->isaf_lock);
 623                 if (outassoc == NULL) {
 624                         /* Q: Do we wish to set haspeer == B_FALSE? */
 625                         esp0dbg(("esp_age_bytes: "
 626                             "can't find peer for inbound.\n"));
 627                         return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc,
 628                             bytes, B_TRUE));
 629                 }
 630         } else {
 631                 outassoc = assoc;
 632                 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
 633                 mutex_enter(&bucket->isaf_lock);
 634                 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
 635                     outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
 636                     outassoc->ipsa_addrfam);
 637                 mutex_exit(&bucket->isaf_lock);
 638                 if (inassoc == NULL) {
 639                         /* Q: Do we wish to set haspeer == B_FALSE? */
 640                         esp0dbg(("esp_age_bytes: "
 641                             "can't find peer for outbound.\n"));
 642                         return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc,
 643                             bytes, B_TRUE));
 644                 }
 645         }
 646 
 647         inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE);
 648         outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE);
 649 
 650         /*
 651          * REFRELE any peer SA.
 652          *
 653          * Because of the multi-line macro nature of IPSA_REFRELE, keep
 654          * them in { }.
 655          */
 656         if (inbound) {
 657                 IPSA_REFRELE(outassoc);
 658         } else {
 659                 IPSA_REFRELE(inassoc);
 660         }
 661 
 662         return (inrc && outrc);
 663 }
 664 
 665 /*
 666  * Do incoming NAT-T manipulations for packet.
 667  * Returns NULL if the mblk chain is consumed.
 668  */
 669 static mblk_t *
 670 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc)
 671 {
 672         ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
 673         tcpha_t *tcpha;
 674         udpha_t *udpha;
 675         /* Initialize to our inbound cksum adjustment... */
 676         uint32_t sum = assoc->ipsa_inbound_cksum;
 677 
 678         switch (ipha->ipha_protocol) {
 679         case IPPROTO_TCP:
 680                 tcpha = (tcpha_t *)(data_mp->b_rptr +
 681                     IPH_HDR_LENGTH(ipha));
 682 
 683 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) +   ((x) >> 16)
 684                 sum += ~ntohs(tcpha->tha_sum) & 0xFFFF;
 685                 DOWN_SUM(sum);
 686                 DOWN_SUM(sum);
 687                 tcpha->tha_sum = ~htons(sum);
 688                 break;
 689         case IPPROTO_UDP:
 690                 udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha));
 691 
 692                 if (udpha->uha_checksum != 0) {
 693                         /* Adujst if the inbound one was not zero. */
 694                         sum += ~ntohs(udpha->uha_checksum) & 0xFFFF;
 695                         DOWN_SUM(sum);
 696                         DOWN_SUM(sum);
 697                         udpha->uha_checksum = ~htons(sum);
 698                         if (udpha->uha_checksum == 0)
 699                                 udpha->uha_checksum = 0xFFFF;
 700                 }
 701 #undef DOWN_SUM
 702                 break;
 703         case IPPROTO_IP:
 704                 /*
 705                  * This case is only an issue for self-encapsulated
 706                  * packets.  So for now, fall through.
 707                  */
 708                 break;
 709         }
 710         return (data_mp);
 711 }
 712 
 713 
 714 /*
 715  * Strip ESP header, check padding, and fix IP header.
 716  * Returns B_TRUE on success, B_FALSE if an error occured.
 717  */
 718 static boolean_t
 719 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen,
 720     kstat_named_t **counter, ipsecesp_stack_t *espstack)
 721 {
 722         ipha_t *ipha;
 723         ip6_t *ip6h;
 724         uint_t divpoint;
 725         mblk_t *scratch;
 726         uint8_t nexthdr, padlen;
 727         uint8_t lastpad;
 728         ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
 729         uint8_t *lastbyte;
 730 
 731         /*
 732          * Strip ESP data and fix IP header.
 733          *
 734          * XXX In case the beginning of esp_inbound() changes to not do a
 735          * pullup, this part of the code can remain unchanged.
 736          */
 737         if (isv4) {
 738                 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t));
 739                 ipha = (ipha_t *)data_mp->b_rptr;
 740                 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) +
 741                     IPH_HDR_LENGTH(ipha));
 742                 divpoint = IPH_HDR_LENGTH(ipha);
 743         } else {
 744                 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t));
 745                 ip6h = (ip6_t *)data_mp->b_rptr;
 746                 divpoint = ip_hdr_length_v6(data_mp, ip6h);
 747         }
 748 
 749         scratch = data_mp;
 750         while (scratch->b_cont != NULL)
 751                 scratch = scratch->b_cont;
 752 
 753         ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3);
 754 
 755         /*
 756          * "Next header" and padding length are the last two bytes in the
 757          * ESP-protected datagram, thus the explicit - 1 and - 2.
 758          * lastpad is the last byte of the padding, which can be used for
 759          * a quick check to see if the padding is correct.
 760          */
 761         lastbyte = scratch->b_wptr - 1;
 762         nexthdr = *lastbyte--;
 763         padlen = *lastbyte--;
 764 
 765         if (isv4) {
 766                 /* Fix part of the IP header. */
 767                 ipha->ipha_protocol = nexthdr;
 768                 /*
 769                  * Reality check the padlen.  The explicit - 2 is for the
 770                  * padding length and the next-header bytes.
 771                  */
 772                 if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 -
 773                     sizeof (esph_t) - ivlen) {
 774                         ESP_BUMP_STAT(espstack, bad_decrypt);
 775                         ipsec_rl_strlog(espstack->ipsecesp_netstack,
 776                             info.mi_idnum, 0, 0,
 777                             SL_ERROR | SL_WARN,
 778                             "Corrupt ESP packet (padlen too big).\n");
 779                         esp1dbg(espstack, ("padlen (%d) is greater than:\n",
 780                             padlen));
 781                         esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp "
 782                             "hdr - ivlen(%d) = %d.\n",
 783                             ntohs(ipha->ipha_length), ivlen,
 784                             (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) -
 785                             2 - sizeof (esph_t) - ivlen)));
 786                         *counter = DROPPER(ipss, ipds_esp_bad_padlen);
 787                         return (B_FALSE);
 788                 }
 789 
 790                 /*
 791                  * Fix the rest of the header.  The explicit - 2 is for the
 792                  * padding length and the next-header bytes.
 793                  */
 794                 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen -
 795                     2 - sizeof (esph_t) - ivlen);
 796                 ipha->ipha_hdr_checksum = 0;
 797                 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
 798         } else {
 799                 if (ip6h->ip6_nxt == IPPROTO_ESP) {
 800                         ip6h->ip6_nxt = nexthdr;
 801                 } else {
 802                         ip_pkt_t ipp;
 803 
 804                         bzero(&ipp, sizeof (ipp));
 805                         (void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp,
 806                             NULL);
 807                         if (ipp.ipp_dstopts != NULL) {
 808                                 ipp.ipp_dstopts->ip6d_nxt = nexthdr;
 809                         } else if (ipp.ipp_rthdr != NULL) {
 810                                 ipp.ipp_rthdr->ip6r_nxt = nexthdr;
 811                         } else if (ipp.ipp_hopopts != NULL) {
 812                                 ipp.ipp_hopopts->ip6h_nxt = nexthdr;
 813                         } else {
 814                                 /* Panic a DEBUG kernel. */
 815                                 ASSERT(ipp.ipp_hopopts != NULL);
 816                                 /* Otherwise, pretend it's IP + ESP. */
 817                                 cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n");
 818                                 ip6h->ip6_nxt = nexthdr;
 819                         }
 820                 }
 821 
 822                 if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) -
 823                     ivlen) {
 824                         ESP_BUMP_STAT(espstack, bad_decrypt);
 825                         ipsec_rl_strlog(espstack->ipsecesp_netstack,
 826                             info.mi_idnum, 0, 0,
 827                             SL_ERROR | SL_WARN,
 828                             "Corrupt ESP packet (v6 padlen too big).\n");
 829                         esp1dbg(espstack, ("padlen (%d) is greater than:\n",
 830                             padlen));
 831                         esp1dbg(espstack,
 832                             ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = "
 833                             "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen)
 834                             + sizeof (ip6_t)), ivlen,
 835                             (unsigned)(ntohs(ip6h->ip6_plen) - 2 -
 836                             sizeof (esph_t) - ivlen)));
 837                         *counter = DROPPER(ipss, ipds_esp_bad_padlen);
 838                         return (B_FALSE);
 839                 }
 840 
 841 
 842                 /*
 843                  * Fix the rest of the header.  The explicit - 2 is for the
 844                  * padding length and the next-header bytes.  IPv6 is nice,
 845                  * because there's no hdr checksum!
 846                  */
 847                 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen -
 848                     2 - sizeof (esph_t) - ivlen);
 849         }
 850 
 851         if (espstack->ipsecesp_padding_check > 0 && padlen > 0) {
 852                 /*
 853                  * Weak padding check: compare last-byte to length, they
 854                  * should be equal.
 855                  */
 856                 lastpad = *lastbyte--;
 857 
 858                 if (padlen != lastpad) {
 859                         ipsec_rl_strlog(espstack->ipsecesp_netstack,
 860                             info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
 861                             "Corrupt ESP packet (lastpad != padlen).\n");
 862                         esp1dbg(espstack,
 863                             ("lastpad (%d) not equal to padlen (%d):\n",
 864                             lastpad, padlen));
 865                         ESP_BUMP_STAT(espstack, bad_padding);
 866                         *counter = DROPPER(ipss, ipds_esp_bad_padding);
 867                         return (B_FALSE);
 868                 }
 869 
 870                 /*
 871                  * Strong padding check: Check all pad bytes to see that
 872                  * they're ascending.  Go backwards using a descending counter
 873                  * to verify.  padlen == 1 is checked by previous block, so
 874                  * only bother if we've more than 1 byte of padding.
 875                  * Consequently, start the check one byte before the location
 876                  * of "lastpad".
 877                  */
 878                 if (espstack->ipsecesp_padding_check > 1) {
 879                         /*
 880                          * This assert may have to become an if and a pullup
 881                          * if we start accepting multi-dblk mblks. For now,
 882                          * though, any packet here will have been pulled up in
 883                          * esp_inbound.
 884                          */
 885                         ASSERT(MBLKL(scratch) >= lastpad + 3);
 886 
 887                         /*
 888                          * Use "--lastpad" because we already checked the very
 889                          * last pad byte previously.
 890                          */
 891                         while (--lastpad != 0) {
 892                                 if (lastpad != *lastbyte) {
 893                                         ipsec_rl_strlog(
 894                                             espstack->ipsecesp_netstack,
 895                                             info.mi_idnum, 0, 0,
 896                                             SL_ERROR | SL_WARN, "Corrupt ESP "
 897                                             "packet (bad padding).\n");
 898                                         esp1dbg(espstack,
 899                                             ("padding not in correct"
 900                                             " format:\n"));
 901                                         ESP_BUMP_STAT(espstack, bad_padding);
 902                                         *counter = DROPPER(ipss,
 903                                             ipds_esp_bad_padding);
 904                                         return (B_FALSE);
 905                                 }
 906                                 lastbyte--;
 907                         }
 908                 }
 909         }
 910 
 911         /* Trim off the padding. */
 912         ASSERT(data_mp->b_cont == NULL);
 913         data_mp->b_wptr -= (padlen + 2);
 914 
 915         /*
 916          * Remove the ESP header.
 917          *
 918          * The above assertions about data_mp's size will make this work.
 919          *
 920          * XXX  Question:  If I send up and get back a contiguous mblk,
 921          * would it be quicker to bcopy over, or keep doing the dupb stuff?
 922          * I go with copying for now.
 923          */
 924 
 925         if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) &&
 926             IS_P2ALIGNED(ivlen, sizeof (uint32_t))) {
 927                 uint8_t *start = data_mp->b_rptr;
 928                 uint32_t *src, *dst;
 929 
 930                 src = (uint32_t *)(start + divpoint);
 931                 dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen);
 932 
 933                 ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) &&
 934                     IS_P2ALIGNED(src, sizeof (uint32_t)));
 935 
 936                 do {
 937                         src--;
 938                         dst--;
 939                         *dst = *src;
 940                 } while (src != (uint32_t *)start);
 941 
 942                 data_mp->b_rptr = (uchar_t *)dst;
 943         } else {
 944                 uint8_t *start = data_mp->b_rptr;
 945                 uint8_t *src, *dst;
 946 
 947                 src = start + divpoint;
 948                 dst = src + sizeof (esph_t) + ivlen;
 949 
 950                 do {
 951                         src--;
 952                         dst--;
 953                         *dst = *src;
 954                 } while (src != start);
 955 
 956                 data_mp->b_rptr = dst;
 957         }
 958 
 959         esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n"));
 960         esp2dbg(espstack, (dump_msg(data_mp)));
 961 
 962         return (B_TRUE);
 963 }
 964 
 965 /*
 966  * Updating use times can be tricky business if the ipsa_haspeer flag is
 967  * set.  This function is called once in an SA's lifetime.
 968  *
 969  * Caller has to REFRELE "assoc" which is passed in.  This function has
 970  * to REFRELE any peer SA that is obtained.
 971  */
 972 static void
 973 esp_set_usetime(ipsa_t *assoc, boolean_t inbound)
 974 {
 975         ipsa_t *inassoc, *outassoc;
 976         isaf_t *bucket;
 977         sadb_t *sp;
 978         int outhash;
 979         boolean_t isv6;
 980         netstack_t              *ns = assoc->ipsa_netstack;
 981         ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
 982 
 983         /* No peer?  No problem! */
 984         if (!assoc->ipsa_haspeer) {
 985                 sadb_set_usetime(assoc);
 986                 return;
 987         }
 988 
 989         /*
 990          * Otherwise, we want to grab both the original assoc and its peer.
 991          * There might be a race for this, but if it's a real race, the times
 992          * will be out-of-synch by at most a second, and since our time
 993          * granularity is a second, this won't be a problem.
 994          *
 995          * If we need tight synchronization on the peer SA, then we need to
 996          * reconsider.
 997          */
 998 
 999         /* Use address length to select IPv6/IPv4 */
1000         isv6 = (assoc->ipsa_addrfam == AF_INET6);
1001         sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
1002 
1003         if (inbound) {
1004                 inassoc = assoc;
1005                 if (isv6) {
1006                         outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
1007                             &inassoc->ipsa_dstaddr));
1008                 } else {
1009                         outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
1010                             &inassoc->ipsa_dstaddr));
1011                 }
1012                 bucket = &sp->sdb_of[outhash];
1013                 mutex_enter(&bucket->isaf_lock);
1014                 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1015                     inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1016                     inassoc->ipsa_addrfam);
1017                 mutex_exit(&bucket->isaf_lock);
1018                 if (outassoc == NULL) {
1019                         /* Q: Do we wish to set haspeer == B_FALSE? */
1020                         esp0dbg(("esp_set_usetime: "
1021                             "can't find peer for inbound.\n"));
1022                         sadb_set_usetime(inassoc);
1023                         return;
1024                 }
1025         } else {
1026                 outassoc = assoc;
1027                 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1028                 mutex_enter(&bucket->isaf_lock);
1029                 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1030                     outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1031                     outassoc->ipsa_addrfam);
1032                 mutex_exit(&bucket->isaf_lock);
1033                 if (inassoc == NULL) {
1034                         /* Q: Do we wish to set haspeer == B_FALSE? */
1035                         esp0dbg(("esp_set_usetime: "
1036                             "can't find peer for outbound.\n"));
1037                         sadb_set_usetime(outassoc);
1038                         return;
1039                 }
1040         }
1041 
1042         /* Update usetime on both. */
1043         sadb_set_usetime(inassoc);
1044         sadb_set_usetime(outassoc);
1045 
1046         /*
1047          * REFRELE any peer SA.
1048          *
1049          * Because of the multi-line macro nature of IPSA_REFRELE, keep
1050          * them in { }.
1051          */
1052         if (inbound) {
1053                 IPSA_REFRELE(outassoc);
1054         } else {
1055                 IPSA_REFRELE(inassoc);
1056         }
1057 }
1058 
1059 /*
1060  * Handle ESP inbound data for IPv4 and IPv6.
1061  * On success returns B_TRUE, on failure returns B_FALSE and frees the
1062  * mblk chain data_mp.
1063  */
1064 mblk_t *
1065 esp_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira)
1066 {
1067         esph_t *esph = (esph_t *)arg;
1068         ipsa_t *ipsa = ira->ira_ipsec_esp_sa;
1069         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
1070         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1071         ipsec_stack_t   *ipss = ns->netstack_ipsec;
1072 
1073         /*
1074          * We may wish to check replay in-range-only here as an optimization.
1075          * Include the reality check of ipsa->ipsa_replay >
1076          * ipsa->ipsa_replay_wsize for times when it's the first N packets,
1077          * where N == ipsa->ipsa_replay_wsize.
1078          *
1079          * Another check that may come here later is the "collision" check.
1080          * If legitimate packets flow quickly enough, this won't be a problem,
1081          * but collisions may cause authentication algorithm crunching to
1082          * take place when it doesn't need to.
1083          */
1084         if (!sadb_replay_peek(ipsa, esph->esph_replay)) {
1085                 ESP_BUMP_STAT(espstack, replay_early_failures);
1086                 IP_ESP_BUMP_STAT(ipss, in_discards);
1087                 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
1088                     DROPPER(ipss, ipds_esp_early_replay),
1089                     &espstack->esp_dropper);
1090                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1091                 return (NULL);
1092         }
1093 
1094         /*
1095          * Adjust the IP header's payload length to reflect the removal
1096          * of the ICV.
1097          */
1098         if (!(ira->ira_flags & IRAF_IS_IPV4)) {
1099                 ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
1100                 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) -
1101                     ipsa->ipsa_mac_len);
1102         } else {
1103                 ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
1104                 ipha->ipha_length = htons(ntohs(ipha->ipha_length) -
1105                     ipsa->ipsa_mac_len);
1106         }
1107 
1108         /* submit the request to the crypto framework */
1109         return (esp_submit_req_inbound(data_mp, ira, ipsa,
1110             (uint8_t *)esph - data_mp->b_rptr));
1111 }
1112 
1113 /* XXX refactor me */
1114 /*
1115  * Handle the SADB_GETSPI message.  Create a larval SA.
1116  */
1117 static void
1118 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
1119 {
1120         ipsa_t *newbie, *target;
1121         isaf_t *outbound, *inbound;
1122         int rc, diagnostic;
1123         sadb_sa_t *assoc;
1124         keysock_out_t *kso;
1125         uint32_t newspi;
1126 
1127         /*
1128          * Randomly generate a proposed SPI value
1129          */
1130         if (cl_inet_getspi != NULL) {
1131                 cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid,
1132                     IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL);
1133         } else {
1134                 (void) random_get_pseudo_bytes((uint8_t *)&newspi,
1135                     sizeof (uint32_t));
1136         }
1137         newbie = sadb_getspi(ksi, newspi, &diagnostic,
1138             espstack->ipsecesp_netstack, IPPROTO_ESP);
1139 
1140         if (newbie == NULL) {
1141                 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic,
1142                     ksi->ks_in_serial);
1143                 return;
1144         } else if (newbie == (ipsa_t *)-1) {
1145                 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
1146                     ksi->ks_in_serial);
1147                 return;
1148         }
1149 
1150         /*
1151          * XXX - We may randomly collide.  We really should recover from this.
1152          *       Unfortunately, that could require spending way-too-much-time
1153          *       in here.  For now, let the user retry.
1154          */
1155 
1156         if (newbie->ipsa_addrfam == AF_INET6) {
1157                 outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6,
1158                     *(uint32_t *)(newbie->ipsa_dstaddr));
1159                 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6,
1160                     newbie->ipsa_spi);
1161         } else {
1162                 ASSERT(newbie->ipsa_addrfam == AF_INET);
1163                 outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4,
1164                     *(uint32_t *)(newbie->ipsa_dstaddr));
1165                 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4,
1166                     newbie->ipsa_spi);
1167         }
1168 
1169         mutex_enter(&outbound->isaf_lock);
1170         mutex_enter(&inbound->isaf_lock);
1171 
1172         /*
1173          * Check for collisions (i.e. did sadb_getspi() return with something
1174          * that already exists?).
1175          *
1176          * Try outbound first.  Even though SADB_GETSPI is traditionally
1177          * for inbound SAs, you never know what a user might do.
1178          */
1179         target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
1180             newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
1181         if (target == NULL) {
1182                 target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
1183                     newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
1184                     newbie->ipsa_addrfam);
1185         }
1186 
1187         /*
1188          * I don't have collisions elsewhere!
1189          * (Nor will I because I'm still holding inbound/outbound locks.)
1190          */
1191 
1192         if (target != NULL) {
1193                 rc = EEXIST;
1194                 IPSA_REFRELE(target);
1195         } else {
1196                 /*
1197                  * sadb_insertassoc() also checks for collisions, so
1198                  * if there's a colliding entry, rc will be set
1199                  * to EEXIST.
1200                  */
1201                 rc = sadb_insertassoc(newbie, inbound);
1202                 newbie->ipsa_hardexpiretime = gethrestime_sec();
1203                 newbie->ipsa_hardexpiretime +=
1204                     espstack->ipsecesp_larval_timeout;
1205         }
1206 
1207         /*
1208          * Can exit outbound mutex.  Hold inbound until we're done
1209          * with newbie.
1210          */
1211         mutex_exit(&outbound->isaf_lock);
1212 
1213         if (rc != 0) {
1214                 mutex_exit(&inbound->isaf_lock);
1215                 IPSA_REFRELE(newbie);
1216                 sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc,
1217                     SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1218                 return;
1219         }
1220 
1221 
1222         /* Can write here because I'm still holding the bucket lock. */
1223         newbie->ipsa_type = SADB_SATYPE_ESP;
1224 
1225         /*
1226          * Construct successful return message. We have one thing going
1227          * for us in PF_KEY v2.  That's the fact that
1228          *      sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
1229          */
1230         assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
1231         assoc->sadb_sa_exttype = SADB_EXT_SA;
1232         assoc->sadb_sa_spi = newbie->ipsa_spi;
1233         *((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
1234         mutex_exit(&inbound->isaf_lock);
1235 
1236         /* Convert KEYSOCK_IN to KEYSOCK_OUT. */
1237         kso = (keysock_out_t *)ksi;
1238         kso->ks_out_len = sizeof (*kso);
1239         kso->ks_out_serial = ksi->ks_in_serial;
1240         kso->ks_out_type = KEYSOCK_OUT;
1241 
1242         /*
1243          * Can safely putnext() to esp_pfkey_q, because this is a turnaround
1244          * from the esp_pfkey_q.
1245          */
1246         putnext(espstack->esp_pfkey_q, mp);
1247 }
1248 
1249 /*
1250  * Insert the ESP header into a packet.  Duplicate an mblk, and insert a newly
1251  * allocated mblk with the ESP header in between the two.
1252  */
1253 static boolean_t
1254 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint,
1255     ipsecesp_stack_t *espstack)
1256 {
1257         mblk_t *split_mp = mp;
1258         uint_t wheretodiv = divpoint;
1259 
1260         while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) {
1261                 wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr);
1262                 split_mp = split_mp->b_cont;
1263                 ASSERT(split_mp != NULL);
1264         }
1265 
1266         if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) {
1267                 mblk_t *scratch;
1268 
1269                 /* "scratch" is the 2nd half, split_mp is the first. */
1270                 scratch = dupb(split_mp);
1271                 if (scratch == NULL) {
1272                         esp1dbg(espstack,
1273                             ("esp_insert_esp: can't allocate scratch.\n"));
1274                         return (B_FALSE);
1275                 }
1276                 /* NOTE:  dupb() doesn't set b_cont appropriately. */
1277                 scratch->b_cont = split_mp->b_cont;
1278                 scratch->b_rptr += wheretodiv;
1279                 split_mp->b_wptr = split_mp->b_rptr + wheretodiv;
1280                 split_mp->b_cont = scratch;
1281         }
1282         /*
1283          * At this point, split_mp is exactly "wheretodiv" bytes long, and
1284          * holds the end of the pre-ESP part of the datagram.
1285          */
1286         esp_mp->b_cont = split_mp->b_cont;
1287         split_mp->b_cont = esp_mp;
1288 
1289         return (B_TRUE);
1290 }
1291 
1292 /*
1293  * Section 7 of RFC 3947 says:
1294  *
1295  * 7.  Recovering from the Expiring NAT Mappings
1296  *
1297  *    There are cases where NAT box decides to remove mappings that are still
1298  *    alive (for example, when the keepalive interval is too long, or when the
1299  *    NAT box is rebooted).  To recover from this, ends that are NOT behind
1300  *    NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from
1301  *    the other end to determine which IP and port addresses should be used.
1302  *    The host behind dynamic NAT MUST NOT do this, as otherwise it opens a
1303  *    DoS attack possibility because the IP address or port of the other host
1304  *    will not change (it is not behind NAT).
1305  *
1306  *    Keepalives cannot be used for these purposes, as they are not
1307  *    authenticated, but any IKE authenticated IKE packet or ESP packet can be
1308  *    used to detect whether the IP address or the port has changed.
1309  *
1310  * The following function will check an SA and its explicitly-set pair to see
1311  * if the NAT-T remote port matches the received packet (which must have
1312  * passed ESP authentication, see esp_in_done() for the caller context).  If
1313  * there is a mismatch, the SAs are updated.  It is not important if we race
1314  * with a transmitting thread, as if there is a transmitting thread, it will
1315  * merely emit a packet that will most-likely be dropped.
1316  *
1317  * "ports" are ordered src,dst, and assoc is an inbound SA, where src should
1318  * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port.
1319  */
1320 #ifdef _LITTLE_ENDIAN
1321 #define FIRST_16(x) ((x) & 0xFFFF)
1322 #define NEXT_16(x) (((x) >> 16) & 0xFFFF)
1323 #else
1324 #define FIRST_16(x) (((x) >> 16) & 0xFFFF)
1325 #define NEXT_16(x) ((x) & 0xFFFF)
1326 #endif
1327 static void
1328 esp_port_freshness(uint32_t ports, ipsa_t *assoc)
1329 {
1330         uint16_t remote = FIRST_16(ports);
1331         uint16_t local = NEXT_16(ports);
1332         ipsa_t *outbound_peer;
1333         isaf_t *bucket;
1334         ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
1335 
1336         /* We found a conn_t, therefore local != 0. */
1337         ASSERT(local != 0);
1338         /* Assume an IPv4 SA. */
1339         ASSERT(assoc->ipsa_addrfam == AF_INET);
1340 
1341         /*
1342          * On-the-wire rport == 0 means something's very wrong.
1343          * An unpaired SA is also useless to us.
1344          * If we are behind the NAT, don't bother.
1345          * A zero local NAT port defaults to 4500, so check that too.
1346          * And, of course, if the ports already match, we don't need to
1347          * bother.
1348          */
1349         if (remote == 0 || assoc->ipsa_otherspi == 0 ||
1350             (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) ||
1351             (assoc->ipsa_remote_nat_port == 0 &&
1352             remote == htons(IPPORT_IKE_NATT)) ||
1353             remote == assoc->ipsa_remote_nat_port)
1354                 return;
1355 
1356         /* Try and snag the peer.   NOTE:  Assume IPv4 for now. */
1357         bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4),
1358             assoc->ipsa_srcaddr[0]);
1359         mutex_enter(&bucket->isaf_lock);
1360         outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi,
1361             assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET);
1362         mutex_exit(&bucket->isaf_lock);
1363 
1364         /* We probably lost a race to a deleting or expiring thread. */
1365         if (outbound_peer == NULL)
1366                 return;
1367 
1368         /*
1369          * Hold the mutexes for both SAs so we don't race another inbound
1370          * thread.  A lock-entry order shouldn't matter, since all other
1371          * per-ipsa locks are individually held-then-released.
1372          *
1373          * Luckily, this has nothing to do with the remote-NAT address,
1374          * so we don't have to re-scribble the cached-checksum differential.
1375          */
1376         mutex_enter(&outbound_peer->ipsa_lock);
1377         mutex_enter(&assoc->ipsa_lock);
1378         outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port =
1379             remote;
1380         mutex_exit(&assoc->ipsa_lock);
1381         mutex_exit(&outbound_peer->ipsa_lock);
1382         IPSA_REFRELE(outbound_peer);
1383         ESP_BUMP_STAT(espstack, sa_port_renumbers);
1384 }
1385 /*
1386  * Finish processing of an inbound ESP packet after processing by the
1387  * crypto framework.
1388  * - Remove the ESP header.
1389  * - Send packet back to IP.
1390  * If authentication was performed on the packet, this function is called
1391  * only if the authentication succeeded.
1392  * On success returns B_TRUE, on failure returns B_FALSE and frees the
1393  * mblk chain data_mp.
1394  */
1395 static mblk_t *
1396 esp_in_done(mblk_t *data_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
1397 {
1398         ipsa_t *assoc;
1399         uint_t espstart;
1400         uint32_t ivlen = 0;
1401         uint_t processed_len;
1402         esph_t *esph;
1403         kstat_named_t *counter;
1404         boolean_t is_natt;
1405         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
1406         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1407         ipsec_stack_t   *ipss = ns->netstack_ipsec;
1408 
1409         assoc = ira->ira_ipsec_esp_sa;
1410         ASSERT(assoc != NULL);
1411 
1412         is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
1413 
1414         /* get the pointer to the ESP header */
1415         if (assoc->ipsa_encr_alg == SADB_EALG_NULL) {
1416                 /* authentication-only ESP */
1417                 espstart = ic->ic_crypto_data.cd_offset;
1418                 processed_len = ic->ic_crypto_data.cd_length;
1419         } else {
1420                 /* encryption present */
1421                 ivlen = assoc->ipsa_iv_len;
1422                 if (assoc->ipsa_auth_alg == SADB_AALG_NONE) {
1423                         /* encryption-only ESP */
1424                         espstart = ic->ic_crypto_data.cd_offset -
1425                             sizeof (esph_t) - assoc->ipsa_iv_len;
1426                         processed_len = ic->ic_crypto_data.cd_length +
1427                             ivlen;
1428                 } else {
1429                         /* encryption with authentication */
1430                         espstart = ic->ic_crypto_dual_data.dd_offset1;
1431                         processed_len = ic->ic_crypto_dual_data.dd_len2 +
1432                             ivlen;
1433                 }
1434         }
1435 
1436         esph = (esph_t *)(data_mp->b_rptr + espstart);
1437 
1438         if (assoc->ipsa_auth_alg != IPSA_AALG_NONE ||
1439             (assoc->ipsa_flags & IPSA_F_COMBINED)) {
1440                 /*
1441                  * Authentication passed if we reach this point.
1442                  * Packets with authentication will have the ICV
1443                  * after the crypto data. Adjust b_wptr before
1444                  * making padlen checks.
1445                  */
1446                 ESP_BUMP_STAT(espstack, good_auth);
1447                 data_mp->b_wptr -= assoc->ipsa_mac_len;
1448 
1449                 /*
1450                  * Check replay window here!
1451                  * For right now, assume keysock will set the replay window
1452                  * size to zero for SAs that have an unspecified sender.
1453                  * This may change...
1454                  */
1455 
1456                 if (!sadb_replay_check(assoc, esph->esph_replay)) {
1457                         /*
1458                          * Log the event. As of now we print out an event.
1459                          * Do not print the replay failure number, or else
1460                          * syslog cannot collate the error messages.  Printing
1461                          * the replay number that failed opens a denial-of-
1462                          * service attack.
1463                          */
1464                         ipsec_assocfailure(info.mi_idnum, 0, 0,
1465                             SL_ERROR | SL_WARN,
1466                             "Replay failed for ESP spi 0x%x, dst %s.\n",
1467                             assoc->ipsa_spi, assoc->ipsa_dstaddr,
1468                             assoc->ipsa_addrfam, espstack->ipsecesp_netstack);
1469                         ESP_BUMP_STAT(espstack, replay_failures);
1470                         counter = DROPPER(ipss, ipds_esp_replay);
1471                         goto drop_and_bail;
1472                 }
1473 
1474                 if (is_natt) {
1475                         ASSERT(ira->ira_flags & IRAF_ESP_UDP_PORTS);
1476                         ASSERT(ira->ira_esp_udp_ports != 0);
1477                         esp_port_freshness(ira->ira_esp_udp_ports, assoc);
1478                 }
1479         }
1480 
1481         esp_set_usetime(assoc, B_TRUE);
1482 
1483         if (!esp_age_bytes(assoc, processed_len, B_TRUE)) {
1484                 /* The ipsa has hit hard expiration, LOG and AUDIT. */
1485                 ipsec_assocfailure(info.mi_idnum, 0, 0,
1486                     SL_ERROR | SL_WARN,
1487                     "ESP association 0x%x, dst %s had bytes expire.\n",
1488                     assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
1489                     espstack->ipsecesp_netstack);
1490                 ESP_BUMP_STAT(espstack, bytes_expired);
1491                 counter = DROPPER(ipss, ipds_esp_bytes_expire);
1492                 goto drop_and_bail;
1493         }
1494 
1495         /*
1496          * Remove ESP header and padding from packet.  I hope the compiler
1497          * spews "branch, predict taken" code for this.
1498          */
1499 
1500         if (esp_strip_header(data_mp, (ira->ira_flags & IRAF_IS_IPV4),
1501             ivlen, &counter, espstack)) {
1502 
1503                 if (is_system_labeled() && assoc->ipsa_tsl != NULL) {
1504                         if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) {
1505                                 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
1506                                     DROPPER(ipss, ipds_ah_nomem),
1507                                     &espstack->esp_dropper);
1508                                 BUMP_MIB(ira->ira_ill->ill_ip_mib,
1509                                     ipIfStatsInDiscards);
1510                                 return (NULL);
1511                         }
1512                 }
1513                 if (is_natt)
1514                         return (esp_fix_natt_checksums(data_mp, assoc));
1515 
1516                 if (assoc->ipsa_state == IPSA_STATE_IDLE) {
1517                         /*
1518                          * Cluster buffering case.  Tell caller that we're
1519                          * handling the packet.
1520                          */
1521                         sadb_buf_pkt(assoc, data_mp, ira);
1522                         return (NULL);
1523                 }
1524 
1525                 return (data_mp);
1526         }
1527 
1528         esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n"));
1529 drop_and_bail:
1530         IP_ESP_BUMP_STAT(ipss, in_discards);
1531         ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, counter,
1532             &espstack->esp_dropper);
1533         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1534         return (NULL);
1535 }
1536 
1537 /*
1538  * Called upon failing the inbound ICV check. The message passed as
1539  * argument is freed.
1540  */
1541 static void
1542 esp_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira)
1543 {
1544         ipsa_t          *assoc = ira->ira_ipsec_esp_sa;
1545         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
1546         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1547         ipsec_stack_t   *ipss = ns->netstack_ipsec;
1548 
1549         /*
1550          * Log the event. Don't print to the console, block
1551          * potential denial-of-service attack.
1552          */
1553         ESP_BUMP_STAT(espstack, bad_auth);
1554 
1555         ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
1556             "ESP Authentication failed for spi 0x%x, dst %s.\n",
1557             assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
1558             espstack->ipsecesp_netstack);
1559 
1560         IP_ESP_BUMP_STAT(ipss, in_discards);
1561         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1562             DROPPER(ipss, ipds_esp_bad_auth),
1563             &espstack->esp_dropper);
1564 }
1565 
1566 
1567 /*
1568  * Invoked for outbound packets after ESP processing. If the packet
1569  * also requires AH, performs the AH SA selection and AH processing.
1570  *
1571  * Returns data_mp (possibly with AH added) unless data_mp was consumed
1572  * due to an error, or queued due to async. crypto or an ACQUIRE trigger.
1573  */
1574 static mblk_t *
1575 esp_do_outbound_ah(mblk_t *data_mp, ip_xmit_attr_t *ixa)
1576 {
1577         ipsec_action_t *ap;
1578 
1579         ap = ixa->ixa_ipsec_action;
1580         if (ap == NULL) {
1581                 ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
1582                 ap = pp->ipsp_act;
1583         }
1584 
1585         if (!ap->ipa_want_ah)
1586                 return (data_mp);
1587 
1588         /*
1589          * Normally the AH SA would have already been put in place
1590          * but it could have been flushed so we need to look for it.
1591          */
1592         if (ixa->ixa_ipsec_ah_sa == NULL) {
1593                 if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) {
1594                         sadb_acquire(data_mp, ixa, B_TRUE, B_FALSE);
1595                         return (NULL);
1596                 }
1597         }
1598         ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
1599 
1600         data_mp = ixa->ixa_ipsec_ah_sa->ipsa_output_func(data_mp, ixa);
1601         return (data_mp);
1602 }
1603 
1604 
1605 /*
1606  * Kernel crypto framework callback invoked after completion of async
1607  * crypto requests for outbound packets.
1608  */
1609 static void
1610 esp_kcf_callback_outbound(void *arg, int status)
1611 {
1612         mblk_t          *mp = (mblk_t *)arg;
1613         mblk_t          *async_mp;
1614         netstack_t      *ns;
1615         ipsec_stack_t   *ipss;
1616         ipsecesp_stack_t *espstack;
1617         mblk_t          *data_mp;
1618         ip_xmit_attr_t  ixas;
1619         ipsec_crypto_t  *ic;
1620         ill_t           *ill;
1621 
1622         /*
1623          * First remove the ipsec_crypto_t mblk
1624          * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
1625          */
1626         async_mp = ipsec_remove_crypto_data(mp, &ic);
1627         ASSERT(async_mp != NULL);
1628 
1629         /*
1630          * Extract the ip_xmit_attr_t from the first mblk.
1631          * Verifies that the netstack and ill is still around; could
1632          * have vanished while kEf was doing its work.
1633          * On succesful return we have a nce_t and the ill/ipst can't
1634          * disappear until we do the nce_refrele in ixa_cleanup.
1635          */
1636         data_mp = async_mp->b_cont;
1637         async_mp->b_cont = NULL;
1638         if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) {
1639                 /* Disappeared on us - no ill/ipst for MIB */
1640                 /* We have nowhere to do stats since ixa_ipst could be NULL */
1641                 if (ixas.ixa_nce != NULL) {
1642                         ill = ixas.ixa_nce->nce_ill;
1643                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1644                         ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
1645                 }
1646                 freemsg(data_mp);
1647                 goto done;
1648         }
1649         ns = ixas.ixa_ipst->ips_netstack;
1650         espstack = ns->netstack_ipsecesp;
1651         ipss = ns->netstack_ipsec;
1652         ill = ixas.ixa_nce->nce_ill;
1653 
1654         if (status == CRYPTO_SUCCESS) {
1655                 /*
1656                  * If a ICV was computed, it was stored by the
1657                  * crypto framework at the end of the packet.
1658                  */
1659                 ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
1660 
1661                 esp_set_usetime(ixas.ixa_ipsec_esp_sa, B_FALSE);
1662                 /* NAT-T packet. */
1663                 if (IPH_HDR_VERSION(ipha) == IP_VERSION &&
1664                     ipha->ipha_protocol == IPPROTO_UDP)
1665                         esp_prepare_udp(ns, data_mp, ipha);
1666 
1667                 /* do AH processing if needed */
1668                 data_mp = esp_do_outbound_ah(data_mp, &ixas);
1669                 if (data_mp == NULL)
1670                         goto done;
1671 
1672                 (void) ip_output_post_ipsec(data_mp, &ixas);
1673         } else {
1674                 /* Outbound shouldn't see invalid MAC */
1675                 ASSERT(status != CRYPTO_INVALID_MAC);
1676 
1677                 esp1dbg(espstack,
1678                     ("esp_kcf_callback_outbound: crypto failed with 0x%x\n",
1679                     status));
1680                 ESP_BUMP_STAT(espstack, crypto_failures);
1681                 ESP_BUMP_STAT(espstack, out_discards);
1682                 ip_drop_packet(data_mp, B_FALSE, ill,
1683                     DROPPER(ipss, ipds_esp_crypto_failed),
1684                     &espstack->esp_dropper);
1685                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1686         }
1687 done:
1688         ixa_cleanup(&ixas);
1689         (void) ipsec_free_crypto_data(mp);
1690 }
1691 
1692 /*
1693  * Kernel crypto framework callback invoked after completion of async
1694  * crypto requests for inbound packets.
1695  */
1696 static void
1697 esp_kcf_callback_inbound(void *arg, int status)
1698 {
1699         mblk_t          *mp = (mblk_t *)arg;
1700         mblk_t          *async_mp;
1701         netstack_t      *ns;
1702         ipsecesp_stack_t *espstack;
1703         ipsec_stack_t   *ipss;
1704         mblk_t          *data_mp;
1705         ip_recv_attr_t  iras;
1706         ipsec_crypto_t  *ic;
1707 
1708         /*
1709          * First remove the ipsec_crypto_t mblk
1710          * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
1711          */
1712         async_mp = ipsec_remove_crypto_data(mp, &ic);
1713         ASSERT(async_mp != NULL);
1714 
1715         /*
1716          * Extract the ip_recv_attr_t from the first mblk.
1717          * Verifies that the netstack and ill is still around; could
1718          * have vanished while kEf was doing its work.
1719          */
1720         data_mp = async_mp->b_cont;
1721         async_mp->b_cont = NULL;
1722         if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
1723                 /* The ill or ip_stack_t disappeared on us */
1724                 ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
1725                 freemsg(data_mp);
1726                 goto done;
1727         }
1728 
1729         ns = iras.ira_ill->ill_ipst->ips_netstack;
1730         espstack = ns->netstack_ipsecesp;
1731         ipss = ns->netstack_ipsec;
1732 
1733         if (status == CRYPTO_SUCCESS) {
1734                 data_mp = esp_in_done(data_mp, &iras, ic);
1735                 if (data_mp == NULL)
1736                         goto done;
1737 
1738                 /* finish IPsec processing */
1739                 ip_input_post_ipsec(data_mp, &iras);
1740         } else if (status == CRYPTO_INVALID_MAC) {
1741                 esp_log_bad_auth(data_mp, &iras);
1742         } else {
1743                 esp1dbg(espstack,
1744                     ("esp_kcf_callback: crypto failed with 0x%x\n",
1745                     status));
1746                 ESP_BUMP_STAT(espstack, crypto_failures);
1747                 IP_ESP_BUMP_STAT(ipss, in_discards);
1748                 ip_drop_packet(data_mp, B_TRUE, iras.ira_ill,
1749                     DROPPER(ipss, ipds_esp_crypto_failed),
1750                     &espstack->esp_dropper);
1751                 BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1752         }
1753 done:
1754         ira_cleanup(&iras, B_TRUE);
1755         (void) ipsec_free_crypto_data(mp);
1756 }
1757 
1758 /*
1759  * Invoked on crypto framework failure during inbound and outbound processing.
1760  */
1761 static void
1762 esp_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc,
1763     ill_t *ill, ipsecesp_stack_t *espstack)
1764 {
1765         ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
1766 
1767         esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n",
1768             is_inbound ? "inbound" : "outbound", kef_rc));
1769         ip_drop_packet(data_mp, is_inbound, ill,
1770             DROPPER(ipss, ipds_esp_crypto_failed),
1771             &espstack->esp_dropper);
1772         ESP_BUMP_STAT(espstack, crypto_failures);
1773         if (is_inbound)
1774                 IP_ESP_BUMP_STAT(ipss, in_discards);
1775         else
1776                 ESP_BUMP_STAT(espstack, out_discards);
1777 }
1778 
1779 /*
1780  * A statement-equivalent macro, _cr MUST point to a modifiable
1781  * crypto_call_req_t.
1782  */
1783 #define ESP_INIT_CALLREQ(_cr, _mp, _callback)                           \
1784         (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE;      \
1785         (_cr)->cr_callback_arg = (_mp);                              \
1786         (_cr)->cr_callback_func = (_callback)
1787 
1788 #define ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {                      \
1789         (mac)->cd_format = CRYPTO_DATA_RAW;                          \
1790         (mac)->cd_offset = 0;                                                \
1791         (mac)->cd_length = icvlen;                                   \
1792         (mac)->cd_raw.iov_base = (char *)icvbuf;                     \
1793         (mac)->cd_raw.iov_len = icvlen;                                      \
1794 }
1795 
1796 #define ESP_INIT_CRYPTO_DATA(data, mp, off, len) {                      \
1797         if (MBLKL(mp) >= (len) + (off)) {                            \
1798                 (data)->cd_format = CRYPTO_DATA_RAW;                 \
1799                 (data)->cd_raw.iov_base = (char *)(mp)->b_rptr;           \
1800                 (data)->cd_raw.iov_len = MBLKL(mp);                  \
1801                 (data)->cd_offset = off;                             \
1802         } else {                                                        \
1803                 (data)->cd_format = CRYPTO_DATA_MBLK;                        \
1804                 (data)->cd_mp = mp;                                  \
1805                 (data)->cd_offset = off;                             \
1806         }                                                               \
1807         (data)->cd_length = len;                                     \
1808 }
1809 
1810 #define ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) {   \
1811         (data)->dd_format = CRYPTO_DATA_MBLK;                                \
1812         (data)->dd_mp = mp;                                          \
1813         (data)->dd_len1 = len1;                                              \
1814         (data)->dd_offset1 = off1;                                   \
1815         (data)->dd_len2 = len2;                                              \
1816         (data)->dd_offset2 = off2;                                   \
1817 }
1818 
1819 /*
1820  * Returns data_mp if successfully completed the request. Returns
1821  * NULL if it failed (and increments InDiscards) or if it is pending.
1822  */
1823 static mblk_t *
1824 esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira,
1825     ipsa_t *assoc, uint_t esph_offset)
1826 {
1827         uint_t auth_offset, msg_len, auth_len;
1828         crypto_call_req_t call_req, *callrp;
1829         mblk_t *mp;
1830         esph_t *esph_ptr;
1831         int kef_rc;
1832         uint_t icv_len = assoc->ipsa_mac_len;
1833         crypto_ctx_template_t auth_ctx_tmpl;
1834         boolean_t do_auth, do_encr, force;
1835         uint_t encr_offset, encr_len;
1836         uint_t iv_len = assoc->ipsa_iv_len;
1837         crypto_ctx_template_t encr_ctx_tmpl;
1838         ipsec_crypto_t  *ic, icstack;
1839         uchar_t *iv_ptr;
1840         netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
1841         ipsec_stack_t *ipss = ns->netstack_ipsec;
1842         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1843 
1844         do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
1845         do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
1846         force = (assoc->ipsa_flags & IPSA_F_ASYNC);
1847 
1848 #ifdef IPSEC_LATENCY_TEST
1849         kef_rc = CRYPTO_SUCCESS;
1850 #else
1851         kef_rc = CRYPTO_FAILED;
1852 #endif
1853 
1854         /*
1855          * An inbound packet is of the form:
1856          * [IP,options,ESP,IV,data,ICV,pad]
1857          */
1858         esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
1859         iv_ptr = (uchar_t *)(esph_ptr + 1);
1860         /* Packet length starting at IP header ending after ESP ICV. */
1861         msg_len = MBLKL(esp_mp);
1862 
1863         encr_offset = esph_offset + sizeof (esph_t) + iv_len;
1864         encr_len = msg_len - encr_offset;
1865 
1866         /*
1867          * Counter mode algs need a nonce. This is setup in sadb_common_add().
1868          * If for some reason we are using a SA which does not have a nonce
1869          * then we must fail here.
1870          */
1871         if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
1872             (assoc->ipsa_nonce == NULL)) {
1873                 ip_drop_packet(esp_mp, B_TRUE, ira->ira_ill,
1874                     DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
1875                 return (NULL);
1876         }
1877 
1878         if (force) {
1879                 /* We are doing asynch; allocate mblks to hold state */
1880                 if ((mp = ip_recv_attr_to_mblk(ira)) == NULL ||
1881                     (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
1882                         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1883                         ip_drop_input("ipIfStatsInDiscards", esp_mp,
1884                             ira->ira_ill);
1885                         return (NULL);
1886                 }
1887                 linkb(mp, esp_mp);
1888                 callrp = &call_req;
1889                 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_inbound);
1890         } else {
1891                 /*
1892                  * If we know we are going to do sync then ipsec_crypto_t
1893                  * should be on the stack.
1894                  */
1895                 ic = &icstack;
1896                 bzero(ic, sizeof (*ic));
1897                 callrp = NULL;
1898         }
1899 
1900         if (do_auth) {
1901                 /* authentication context template */
1902                 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
1903                     auth_ctx_tmpl);
1904 
1905                 /* ICV to be verified */
1906                 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
1907                     icv_len, esp_mp->b_wptr - icv_len);
1908 
1909                 /* authentication starts at the ESP header */
1910                 auth_offset = esph_offset;
1911                 auth_len = msg_len - auth_offset - icv_len;
1912                 if (!do_encr) {
1913                         /* authentication only */
1914                         /* initialize input data argument */
1915                         ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
1916                             esp_mp, auth_offset, auth_len);
1917 
1918                         /* call the crypto framework */
1919                         kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
1920                             &ic->ic_crypto_data,
1921                             &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
1922                             &ic->ic_crypto_mac, callrp);
1923                 }
1924         }
1925 
1926         if (do_encr) {
1927                 /* encryption template */
1928                 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
1929                     encr_ctx_tmpl);
1930 
1931                 /* Call the nonce update function. Also passes in IV */
1932                 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len,
1933                     iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
1934 
1935                 if (!do_auth) {
1936                         /* decryption only */
1937                         /* initialize input data argument */
1938                         ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
1939                             esp_mp, encr_offset, encr_len);
1940 
1941                         /* call the crypto framework */
1942                         kef_rc = crypto_decrypt((crypto_mechanism_t *)
1943                             &ic->ic_cmm, &ic->ic_crypto_data,
1944                             &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
1945                             NULL, callrp);
1946                 }
1947         }
1948 
1949         if (do_auth && do_encr) {
1950                 /* dual operation */
1951                 /* initialize input data argument */
1952                 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
1953                     esp_mp, auth_offset, auth_len,
1954                     encr_offset, encr_len - icv_len);
1955 
1956                 /* specify IV */
1957                 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
1958 
1959                 /* call the framework */
1960                 kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech,
1961                     &assoc->ipsa_emech, &ic->ic_crypto_dual_data,
1962                     &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey,
1963                     auth_ctx_tmpl, encr_ctx_tmpl, &ic->ic_crypto_mac,
1964                     NULL, callrp);
1965         }
1966 
1967         switch (kef_rc) {
1968         case CRYPTO_SUCCESS:
1969                 ESP_BUMP_STAT(espstack, crypto_sync);
1970                 esp_mp = esp_in_done(esp_mp, ira, ic);
1971                 if (force) {
1972                         /* Free mp after we are done with ic */
1973                         mp = ipsec_free_crypto_data(mp);
1974                         (void) ip_recv_attr_free_mblk(mp);
1975                 }
1976                 return (esp_mp);
1977         case CRYPTO_QUEUED:
1978                 /* esp_kcf_callback_inbound() will be invoked on completion */
1979                 ESP_BUMP_STAT(espstack, crypto_async);
1980                 return (NULL);
1981         case CRYPTO_INVALID_MAC:
1982                 if (force) {
1983                         mp = ipsec_free_crypto_data(mp);
1984                         esp_mp = ip_recv_attr_free_mblk(mp);
1985                 }
1986                 ESP_BUMP_STAT(espstack, crypto_sync);
1987                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1988                 esp_log_bad_auth(esp_mp, ira);
1989                 /* esp_mp was passed to ip_drop_packet */
1990                 return (NULL);
1991         }
1992 
1993         if (force) {
1994                 mp = ipsec_free_crypto_data(mp);
1995                 esp_mp = ip_recv_attr_free_mblk(mp);
1996         }
1997         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1998         esp_crypto_failed(esp_mp, B_TRUE, kef_rc, ira->ira_ill, espstack);
1999         /* esp_mp was passed to ip_drop_packet */
2000         return (NULL);
2001 }
2002 
2003 /*
2004  * Compute the IP and UDP checksums -- common code for both keepalives and
2005  * actual ESP-in-UDP packets.  Be flexible with multiple mblks because ESP
2006  * uses mblk-insertion to insert the UDP header.
2007  * TODO - If there is an easy way to prep a packet for HW checksums, make
2008  * it happen here.
2009  * Note that this is used before both before calling ip_output_simple and
2010  * in the esp datapath. The former could use IXAF_SET_ULP_CKSUM but not the
2011  * latter.
2012  */
2013 static void
2014 esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha)
2015 {
2016         int offset;
2017         uint32_t cksum;
2018         uint16_t *arr;
2019         mblk_t *udpmp = mp;
2020         uint_t hlen = IPH_HDR_LENGTH(ipha);
2021 
2022         ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2023 
2024         ipha->ipha_hdr_checksum = 0;
2025         ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
2026 
2027         if (ns->netstack_udp->us_do_checksum) {
2028                 ASSERT(MBLKL(udpmp) >= sizeof (udpha_t));
2029                 /* arr points to the IP header. */
2030                 arr = (uint16_t *)ipha;
2031                 IP_STAT(ns->netstack_ip, ip_out_sw_cksum);
2032                 IP_STAT_UPDATE(ns->netstack_ip, ip_out_sw_cksum_bytes,
2033                     ntohs(htons(ipha->ipha_length) - hlen));
2034                 /* arr[6-9] are the IP addresses. */
2035                 cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] +
2036                     ntohs(htons(ipha->ipha_length) - hlen);
2037                 cksum = IP_CSUM(mp, hlen, cksum);
2038                 offset = hlen + UDP_CHECKSUM_OFFSET;
2039                 while (offset >= MBLKL(udpmp)) {
2040                         offset -= MBLKL(udpmp);
2041                         udpmp = udpmp->b_cont;
2042                 }
2043                 /* arr points to the UDP header's checksum field. */
2044                 arr = (uint16_t *)(udpmp->b_rptr + offset);
2045                 *arr = cksum;
2046         }
2047 }
2048 
2049 /*
2050  * taskq handler so we can send the NAT-T keepalive on a separate thread.
2051  */
2052 static void
2053 actually_send_keepalive(void *arg)
2054 {
2055         mblk_t *mp = (mblk_t *)arg;
2056         ip_xmit_attr_t ixas;
2057         netstack_t      *ns;
2058         netstackid_t    stackid;
2059 
2060         stackid = (netstackid_t)(uintptr_t)mp->b_prev;
2061         mp->b_prev = NULL;
2062         ns = netstack_find_by_stackid(stackid);
2063         if (ns == NULL) {
2064                 /* Disappeared */
2065                 ip_drop_output("ipIfStatsOutDiscards", mp, NULL);
2066                 freemsg(mp);
2067                 return;
2068         }
2069 
2070         bzero(&ixas, sizeof (ixas));
2071         ixas.ixa_zoneid = ALL_ZONES;
2072         ixas.ixa_cred = kcred;
2073         ixas.ixa_cpid = NOPID;
2074         ixas.ixa_tsl = NULL;
2075         ixas.ixa_ipst = ns->netstack_ip;
2076         /* No ULP checksum; done by esp_prepare_udp */
2077         ixas.ixa_flags = (IXAF_IS_IPV4 | IXAF_NO_IPSEC | IXAF_VERIFY_SOURCE);
2078 
2079         (void) ip_output_simple(mp, &ixas);
2080         ixa_cleanup(&ixas);
2081         netstack_rele(ns);
2082 }
2083 
2084 /*
2085  * Send a one-byte UDP NAT-T keepalive.
2086  */
2087 void
2088 ipsecesp_send_keepalive(ipsa_t *assoc)
2089 {
2090         mblk_t          *mp;
2091         ipha_t          *ipha;
2092         udpha_t         *udpha;
2093         netstack_t      *ns = assoc->ipsa_netstack;
2094 
2095         ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock));
2096 
2097         mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI);
2098         if (mp == NULL)
2099                 return;
2100         ipha = (ipha_t *)mp->b_rptr;
2101         ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
2102         ipha->ipha_type_of_service = 0;
2103         ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1);
2104         /* Use the low-16 of the SPI so we have some clue where it came from. */
2105         ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1);
2106         ipha->ipha_fragment_offset_and_flags = 0;  /* Too small to fragment! */
2107         ipha->ipha_ttl = 0xFF;
2108         ipha->ipha_protocol = IPPROTO_UDP;
2109         ipha->ipha_hdr_checksum = 0;
2110         ipha->ipha_src = assoc->ipsa_srcaddr[0];
2111         ipha->ipha_dst = assoc->ipsa_dstaddr[0];
2112         udpha = (udpha_t *)(ipha + 1);
2113         udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
2114             assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
2115         udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
2116             assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
2117         udpha->uha_length = htons(sizeof (udpha_t) + 1);
2118         udpha->uha_checksum = 0;
2119         mp->b_wptr = (uint8_t *)(udpha + 1);
2120         *(mp->b_wptr++) = 0xFF;
2121 
2122         esp_prepare_udp(ns, mp, ipha);
2123 
2124         /*
2125          * We're holding an isaf_t bucket lock, so pawn off the actual
2126          * packet transmission to another thread.  Just in case syncq
2127          * processing causes a same-bucket packet to be processed.
2128          */
2129         mp->b_prev = (mblk_t *)(uintptr_t)ns->netstack_stackid;
2130 
2131         if (taskq_dispatch(esp_taskq, actually_send_keepalive, mp,
2132             TQ_NOSLEEP) == 0) {
2133                 /* Assume no memory if taskq_dispatch() fails. */
2134                 mp->b_prev = NULL;
2135                 ip_drop_packet(mp, B_FALSE, NULL,
2136                     DROPPER(ns->netstack_ipsec, ipds_esp_nomem),
2137                     &ns->netstack_ipsecesp->esp_dropper);
2138         }
2139 }
2140 
2141 /*
2142  * Returns mp if successfully completed the request. Returns
2143  * NULL if it failed (and increments InDiscards) or if it is pending.
2144  */
2145 static mblk_t *
2146 esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc,
2147     uchar_t *icv_buf, uint_t payload_len)
2148 {
2149         uint_t auth_len;
2150         crypto_call_req_t call_req, *callrp;
2151         mblk_t *esp_mp;
2152         esph_t *esph_ptr;
2153         mblk_t *mp;
2154         int kef_rc = CRYPTO_FAILED;
2155         uint_t icv_len = assoc->ipsa_mac_len;
2156         crypto_ctx_template_t auth_ctx_tmpl;
2157         boolean_t do_auth, do_encr, force;
2158         uint_t iv_len = assoc->ipsa_iv_len;
2159         crypto_ctx_template_t encr_ctx_tmpl;
2160         boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
2161         size_t esph_offset = (is_natt ? UDPH_SIZE : 0);
2162         netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
2163         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2164         ipsec_crypto_t  *ic, icstack;
2165         uchar_t         *iv_ptr;
2166         crypto_data_t   *cd_ptr = NULL;
2167         ill_t           *ill = ixa->ixa_nce->nce_ill;
2168         ipsec_stack_t   *ipss = ns->netstack_ipsec;
2169 
2170         esp3dbg(espstack, ("esp_submit_req_outbound:%s",
2171             is_natt ? "natt" : "not natt"));
2172 
2173         do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
2174         do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
2175         force = (assoc->ipsa_flags & IPSA_F_ASYNC);
2176 
2177 #ifdef IPSEC_LATENCY_TEST
2178         kef_rc = CRYPTO_SUCCESS;
2179 #else
2180         kef_rc = CRYPTO_FAILED;
2181 #endif
2182 
2183         /*
2184          * Outbound IPsec packets are of the form:
2185          * [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV]
2186          * unless it's NATT, then it's
2187          * [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV]
2188          * Get a pointer to the mblk containing the ESP header.
2189          */
2190         ASSERT(data_mp->b_cont != NULL);
2191         esp_mp = data_mp->b_cont;
2192         esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
2193         iv_ptr = (uchar_t *)(esph_ptr + 1);
2194 
2195         /*
2196          * Combined mode algs need a nonce. This is setup in sadb_common_add().
2197          * If for some reason we are using a SA which does not have a nonce
2198          * then we must fail here.
2199          */
2200         if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
2201             (assoc->ipsa_nonce == NULL)) {
2202                 ip_drop_packet(data_mp, B_FALSE, NULL,
2203                     DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
2204                 return (NULL);
2205         }
2206 
2207         if (force) {
2208                 /* We are doing asynch; allocate mblks to hold state */
2209                 if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL ||
2210                     (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
2211                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2212                         ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
2213                         freemsg(data_mp);
2214                         return (NULL);
2215                 }
2216 
2217                 linkb(mp, data_mp);
2218                 callrp = &call_req;
2219                 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_outbound);
2220         } else {
2221                 /*
2222                  * If we know we are going to do sync then ipsec_crypto_t
2223                  * should be on the stack.
2224                  */
2225                 ic = &icstack;
2226                 bzero(ic, sizeof (*ic));
2227                 callrp = NULL;
2228         }
2229 
2230 
2231         if (do_auth) {
2232                 /* authentication context template */
2233                 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
2234                     auth_ctx_tmpl);
2235 
2236                 /* where to store the computed mac */
2237                 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
2238                     icv_len, icv_buf);
2239 
2240                 /* authentication starts at the ESP header */
2241                 auth_len = payload_len + iv_len + sizeof (esph_t);
2242                 if (!do_encr) {
2243                         /* authentication only */
2244                         /* initialize input data argument */
2245                         ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
2246                             esp_mp, esph_offset, auth_len);
2247 
2248                         /* call the crypto framework */
2249                         kef_rc = crypto_mac(&assoc->ipsa_amech,
2250                             &ic->ic_crypto_data,
2251                             &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
2252                             &ic->ic_crypto_mac, callrp);
2253                 }
2254         }
2255 
2256         if (do_encr) {
2257                 /* encryption context template */
2258                 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
2259                     encr_ctx_tmpl);
2260                 /* Call the nonce update function. */
2261                 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len,
2262                     iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
2263 
2264                 if (!do_auth) {
2265                         /* encryption only, skip mblk that contains ESP hdr */
2266                         /* initialize input data argument */
2267                         ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
2268                             esp_mp->b_cont, 0, payload_len);
2269 
2270                         /*
2271                          * For combined mode ciphers, the ciphertext is the same
2272                          * size as the clear text, the ICV should follow the
2273                          * ciphertext. To convince the kcf to allow in-line
2274                          * encryption, with an ICV, use ipsec_out_crypto_mac
2275                          * to point to the same buffer as the data. The calling
2276                          * function need to ensure the buffer is large enough to
2277                          * include the ICV.
2278                          *
2279                          * The IV is already written to the packet buffer, the
2280                          * nonce setup function copied it to the params struct
2281                          * for the cipher to use.
2282                          */
2283                         if (assoc->ipsa_flags & IPSA_F_COMBINED) {
2284                                 bcopy(&ic->ic_crypto_data,
2285                                     &ic->ic_crypto_mac,
2286                                     sizeof (crypto_data_t));
2287                                 ic->ic_crypto_mac.cd_length =
2288                                     payload_len + icv_len;
2289                                 cd_ptr = &ic->ic_crypto_mac;
2290                         }
2291 
2292                         /* call the crypto framework */
2293                         kef_rc = crypto_encrypt((crypto_mechanism_t *)
2294                             &ic->ic_cmm, &ic->ic_crypto_data,
2295                             &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
2296                             cd_ptr, callrp);
2297 
2298                 }
2299         }
2300 
2301         if (do_auth && do_encr) {
2302                 /*
2303                  * Encryption and authentication:
2304                  * Pass the pointer to the mblk chain starting at the ESP
2305                  * header to the framework. Skip the ESP header mblk
2306                  * for encryption, which is reflected by an encryption
2307                  * offset equal to the length of that mblk. Start
2308                  * the authentication at the ESP header, i.e. use an
2309                  * authentication offset of zero.
2310                  */
2311                 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
2312                     esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len);
2313 
2314                 /* specify IV */
2315                 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
2316 
2317                 /* call the framework */
2318                 kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech,
2319                     &assoc->ipsa_amech, NULL,
2320                     &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey,
2321                     encr_ctx_tmpl, auth_ctx_tmpl,
2322                     &ic->ic_crypto_dual_data,
2323                     &ic->ic_crypto_mac, callrp);
2324         }
2325 
2326         switch (kef_rc) {
2327         case CRYPTO_SUCCESS:
2328                 ESP_BUMP_STAT(espstack, crypto_sync);
2329                 esp_set_usetime(assoc, B_FALSE);
2330                 if (force) {
2331                         mp = ipsec_free_crypto_data(mp);
2332                         data_mp = ip_xmit_attr_free_mblk(mp);
2333                 }
2334                 if (is_natt)
2335                         esp_prepare_udp(ns, data_mp, (ipha_t *)data_mp->b_rptr);
2336                 return (data_mp);
2337         case CRYPTO_QUEUED:
2338                 /* esp_kcf_callback_outbound() will be invoked on completion */
2339                 ESP_BUMP_STAT(espstack, crypto_async);
2340                 return (NULL);
2341         }
2342 
2343         if (force) {
2344                 mp = ipsec_free_crypto_data(mp);
2345                 data_mp = ip_xmit_attr_free_mblk(mp);
2346         }
2347         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2348         esp_crypto_failed(data_mp, B_FALSE, kef_rc, NULL, espstack);
2349         /* data_mp was passed to ip_drop_packet */
2350         return (NULL);
2351 }
2352 
2353 /*
2354  * Handle outbound IPsec processing for IPv4 and IPv6
2355  *
2356  * Returns data_mp if successfully completed the request. Returns
2357  * NULL if it failed (and increments InDiscards) or if it is pending.
2358  */
2359 static mblk_t *
2360 esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa)
2361 {
2362         mblk_t *espmp, *tailmp;
2363         ipha_t *ipha;
2364         ip6_t *ip6h;
2365         esph_t *esph_ptr, *iv_ptr;
2366         uint_t af;
2367         uint8_t *nhp;
2368         uintptr_t divpoint, datalen, adj, padlen, i, alloclen;
2369         uintptr_t esplen = sizeof (esph_t);
2370         uint8_t protocol;
2371         ipsa_t *assoc;
2372         uint_t iv_len, block_size, mac_len = 0;
2373         uchar_t *icv_buf;
2374         udpha_t *udpha;
2375         boolean_t is_natt = B_FALSE;
2376         netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
2377         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2378         ipsec_stack_t   *ipss = ns->netstack_ipsec;
2379         ill_t           *ill = ixa->ixa_nce->nce_ill;
2380         boolean_t       need_refrele = B_FALSE;
2381 
2382         ESP_BUMP_STAT(espstack, out_requests);
2383 
2384         /*
2385          * <sigh> We have to copy the message here, because TCP (for example)
2386          * keeps a dupb() of the message lying around for retransmission.
2387          * Since ESP changes the whole of the datagram, we have to create our
2388          * own copy lest we clobber TCP's data.  Since we have to copy anyway,
2389          * we might as well make use of msgpullup() and get the mblk into one
2390          * contiguous piece!
2391          */
2392         tailmp = msgpullup(data_mp, -1);
2393         if (tailmp == NULL) {
2394                 esp0dbg(("esp_outbound: msgpullup() failed, "
2395                     "dropping packet.\n"));
2396                 ip_drop_packet(data_mp, B_FALSE, ill,
2397                     DROPPER(ipss, ipds_esp_nomem),
2398                     &espstack->esp_dropper);
2399                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2400                 return (NULL);
2401         }
2402         freemsg(data_mp);
2403         data_mp = tailmp;
2404 
2405         assoc = ixa->ixa_ipsec_esp_sa;
2406         ASSERT(assoc != NULL);
2407 
2408         /*
2409          * Get the outer IP header in shape to escape this system..
2410          */
2411         if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) {
2412                 /*
2413                  * Need to update packet with any CIPSO option and update
2414                  * ixa_tsl to capture the new label.
2415                  * We allocate a separate ixa for that purpose.
2416                  */
2417                 ixa = ip_xmit_attr_duplicate(ixa);
2418                 if (ixa == NULL) {
2419                         ip_drop_packet(data_mp, B_FALSE, ill,
2420                             DROPPER(ipss, ipds_esp_nomem),
2421                             &espstack->esp_dropper);
2422                         return (NULL);
2423                 }
2424                 need_refrele = B_TRUE;
2425 
2426                 label_hold(assoc->ipsa_otsl);
2427                 ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl);
2428 
2429                 data_mp = sadb_whack_label(data_mp, assoc, ixa,
2430                     DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
2431                 if (data_mp == NULL) {
2432                         /* Packet dropped by sadb_whack_label */
2433                         ixa_refrele(ixa);
2434                         return (NULL);
2435                 }
2436         }
2437 
2438         /*
2439          * Reality check....
2440          */
2441         ipha = (ipha_t *)data_mp->b_rptr;  /* So we can call esp_acquire(). */
2442 
2443         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2444                 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
2445 
2446                 af = AF_INET;
2447                 divpoint = IPH_HDR_LENGTH(ipha);
2448                 datalen = ntohs(ipha->ipha_length) - divpoint;
2449                 nhp = (uint8_t *)&ipha->ipha_protocol;
2450         } else {
2451                 ip_pkt_t ipp;
2452 
2453                 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
2454 
2455                 af = AF_INET6;
2456                 ip6h = (ip6_t *)ipha;
2457                 bzero(&ipp, sizeof (ipp));
2458                 divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL);
2459                 if (ipp.ipp_dstopts != NULL &&
2460                     ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) {
2461                         /*
2462                          * Destination options are tricky.  If we get in here,
2463                          * then we have a terminal header following the
2464                          * destination options.  We need to adjust backwards
2465                          * so we insert ESP BEFORE the destination options
2466                          * bag.  (So that the dstopts get encrypted!)
2467                          *
2468                          * Since this is for outbound packets only, we know
2469                          * that non-terminal destination options only precede
2470                          * routing headers.
2471                          */
2472                         divpoint -= ipp.ipp_dstoptslen;
2473                 }
2474                 datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint;
2475 
2476                 if (ipp.ipp_rthdr != NULL) {
2477                         nhp = &ipp.ipp_rthdr->ip6r_nxt;
2478                 } else if (ipp.ipp_hopopts != NULL) {
2479                         nhp = &ipp.ipp_hopopts->ip6h_nxt;
2480                 } else {
2481                         ASSERT(divpoint == sizeof (ip6_t));
2482                         /* It's probably IP + ESP. */
2483                         nhp = &ip6h->ip6_nxt;
2484                 }
2485         }
2486 
2487         mac_len = assoc->ipsa_mac_len;
2488 
2489         if (assoc->ipsa_flags & IPSA_F_NATT) {
2490                 /* wedge in UDP header */
2491                 is_natt = B_TRUE;
2492                 esplen += UDPH_SIZE;
2493         }
2494 
2495         /*
2496          * Set up ESP header and encryption padding for ENCR PI request.
2497          */
2498 
2499         /* Determine the padding length.  Pad to 4-bytes for no-encryption. */
2500         if (assoc->ipsa_encr_alg != SADB_EALG_NULL) {
2501                 iv_len = assoc->ipsa_iv_len;
2502                 block_size = assoc->ipsa_datalen;
2503 
2504                 /*
2505                  * Pad the data to the length of the cipher block size.
2506                  * Include the two additional bytes (hence the - 2) for the
2507                  * padding length and the next header.  Take this into account
2508                  * when calculating the actual length of the padding.
2509                  */
2510                 ASSERT(ISP2(iv_len));
2511                 padlen = ((unsigned)(block_size - datalen - 2)) &
2512                     (block_size - 1);
2513         } else {
2514                 iv_len = 0;
2515                 padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) &
2516                     (sizeof (uint32_t) - 1);
2517         }
2518 
2519         /* Allocate ESP header and IV. */
2520         esplen += iv_len;
2521 
2522         /*
2523          * Update association byte-count lifetimes.  Don't forget to take
2524          * into account the padding length and next-header (hence the + 2).
2525          *
2526          * Use the amount of data fed into the "encryption algorithm".  This
2527          * is the IV, the data length, the padding length, and the final two
2528          * bytes (padlen, and next-header).
2529          *
2530          */
2531 
2532         if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) {
2533                 ip_drop_packet(data_mp, B_FALSE, ill,
2534                     DROPPER(ipss, ipds_esp_bytes_expire),
2535                     &espstack->esp_dropper);
2536                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2537                 if (need_refrele)
2538                         ixa_refrele(ixa);
2539                 return (NULL);
2540         }
2541 
2542         espmp = allocb(esplen, BPRI_HI);
2543         if (espmp == NULL) {
2544                 ESP_BUMP_STAT(espstack, out_discards);
2545                 esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n"));
2546                 ip_drop_packet(data_mp, B_FALSE, ill,
2547                     DROPPER(ipss, ipds_esp_nomem),
2548                     &espstack->esp_dropper);
2549                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2550                 if (need_refrele)
2551                         ixa_refrele(ixa);
2552                 return (NULL);
2553         }
2554         espmp->b_wptr += esplen;
2555         esph_ptr = (esph_t *)espmp->b_rptr;
2556 
2557         if (is_natt) {
2558                 esp3dbg(espstack, ("esp_outbound: NATT"));
2559 
2560                 udpha = (udpha_t *)espmp->b_rptr;
2561                 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
2562                     assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
2563                 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
2564                     assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
2565                 /*
2566                  * Set the checksum to 0, so that the esp_prepare_udp() call
2567                  * can do the right thing.
2568                  */
2569                 udpha->uha_checksum = 0;
2570                 esph_ptr = (esph_t *)(udpha + 1);
2571         }
2572 
2573         esph_ptr->esph_spi = assoc->ipsa_spi;
2574 
2575         esph_ptr->esph_replay = htonl(atomic_inc_32_nv(&assoc->ipsa_replay));
2576         if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) {
2577                 /*
2578                  * XXX We have replay counter wrapping.
2579                  * We probably want to nuke this SA (and its peer).
2580                  */
2581                 ipsec_assocfailure(info.mi_idnum, 0, 0,
2582                     SL_ERROR | SL_CONSOLE | SL_WARN,
2583                     "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n",
2584                     esph_ptr->esph_spi, assoc->ipsa_dstaddr, af,
2585                     espstack->ipsecesp_netstack);
2586 
2587                 ESP_BUMP_STAT(espstack, out_discards);
2588                 sadb_replay_delete(assoc);
2589                 ip_drop_packet(data_mp, B_FALSE, ill,
2590                     DROPPER(ipss, ipds_esp_replay),
2591                     &espstack->esp_dropper);
2592                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2593                 if (need_refrele)
2594                         ixa_refrele(ixa);
2595                 return (NULL);
2596         }
2597 
2598         iv_ptr = (esph_ptr + 1);
2599         /*
2600          * iv_ptr points to the mblk which will contain the IV once we have
2601          * written it there. This mblk will be part of a mblk chain that
2602          * will make up the packet.
2603          *
2604          * For counter mode algorithms, the IV is a 64 bit quantity, it
2605          * must NEVER repeat in the lifetime of the SA, otherwise an
2606          * attacker who had recorded enough packets might be able to
2607          * determine some clear text.
2608          *
2609          * To ensure this does not happen, the IV is stored in the SA and
2610          * incremented for each packet, the IV is then copied into the
2611          * "packet" for transmission to the receiving system. The IV will
2612          * also be copied into the nonce, when the packet is encrypted.
2613          *
2614          * CBC mode algorithms use a random IV for each packet. We do not
2615          * require the highest quality random bits, but for best security
2616          * with CBC mode ciphers, the value must be unlikely to repeat and
2617          * must not be known in advance to an adversary capable of influencing
2618          * the clear text.
2619          */
2620         if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc,
2621             espstack)) {
2622                 ip_drop_packet(data_mp, B_FALSE, ill,
2623                     DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper);
2624                 if (need_refrele)
2625                         ixa_refrele(ixa);
2626                 return (NULL);
2627         }
2628 
2629         /* Fix the IP header. */
2630         alloclen = padlen + 2 + mac_len;
2631         adj = alloclen + (espmp->b_wptr - espmp->b_rptr);
2632 
2633         protocol = *nhp;
2634 
2635         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2636                 ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj);
2637                 if (is_natt) {
2638                         *nhp = IPPROTO_UDP;
2639                         udpha->uha_length = htons(ntohs(ipha->ipha_length) -
2640                             IPH_HDR_LENGTH(ipha));
2641                 } else {
2642                         *nhp = IPPROTO_ESP;
2643                 }
2644                 ipha->ipha_hdr_checksum = 0;
2645                 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
2646         } else {
2647                 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj);
2648                 *nhp = IPPROTO_ESP;
2649         }
2650 
2651         /* I've got the two ESP mblks, now insert them. */
2652 
2653         esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n"));
2654         esp2dbg(espstack, (dump_msg(data_mp)));
2655 
2656         if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) {
2657                 ESP_BUMP_STAT(espstack, out_discards);
2658                 /* NOTE:  esp_insert_esp() only fails if there's no memory. */
2659                 ip_drop_packet(data_mp, B_FALSE, ill,
2660                     DROPPER(ipss, ipds_esp_nomem),
2661                     &espstack->esp_dropper);
2662                 freeb(espmp);
2663                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2664                 if (need_refrele)
2665                         ixa_refrele(ixa);
2666                 return (NULL);
2667         }
2668 
2669         /* Append padding (and leave room for ICV). */
2670         for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont)
2671                 ;
2672         if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) {
2673                 tailmp->b_cont = allocb(alloclen, BPRI_HI);
2674                 if (tailmp->b_cont == NULL) {
2675                         ESP_BUMP_STAT(espstack, out_discards);
2676                         esp0dbg(("esp_outbound:  Can't allocate tailmp.\n"));
2677                         ip_drop_packet(data_mp, B_FALSE, ill,
2678                             DROPPER(ipss, ipds_esp_nomem),
2679                             &espstack->esp_dropper);
2680                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2681                         if (need_refrele)
2682                                 ixa_refrele(ixa);
2683                         return (NULL);
2684                 }
2685                 tailmp = tailmp->b_cont;
2686         }
2687 
2688         /*
2689          * If there's padding, N bytes of padding must be of the form 0x1,
2690          * 0x2, 0x3... 0xN.
2691          */
2692         for (i = 0; i < padlen; ) {
2693                 i++;
2694                 *tailmp->b_wptr++ = i;
2695         }
2696         *tailmp->b_wptr++ = i;
2697         *tailmp->b_wptr++ = protocol;
2698 
2699         esp2dbg(espstack, ("data_Mp before encryption:\n"));
2700         esp2dbg(espstack, (dump_msg(data_mp)));
2701 
2702         /*
2703          * Okay.  I've set up the pre-encryption ESP.  Let's do it!
2704          */
2705 
2706         if (mac_len > 0) {
2707                 ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim);
2708                 icv_buf = tailmp->b_wptr;
2709                 tailmp->b_wptr += mac_len;
2710         } else {
2711                 icv_buf = NULL;
2712         }
2713 
2714         data_mp = esp_submit_req_outbound(data_mp, ixa, assoc, icv_buf,
2715             datalen + padlen + 2);
2716         if (need_refrele)
2717                 ixa_refrele(ixa);
2718         return (data_mp);
2719 }
2720 
2721 /*
2722  * IP calls this to validate the ICMP errors that
2723  * we got from the network.
2724  */
2725 mblk_t *
2726 ipsecesp_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira)
2727 {
2728         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
2729         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2730         ipsec_stack_t   *ipss = ns->netstack_ipsec;
2731 
2732         /*
2733          * Unless we get an entire packet back, this function is useless.
2734          * Why?
2735          *
2736          * 1.)  Partial packets are useless, because the "next header"
2737          *      is at the end of the decrypted ESP packet.  Without the
2738          *      whole packet, this is useless.
2739          *
2740          * 2.)  If we every use a stateful cipher, such as a stream or a
2741          *      one-time pad, we can't do anything.
2742          *
2743          * Since the chances of us getting an entire packet back are very
2744          * very small, we discard here.
2745          */
2746         IP_ESP_BUMP_STAT(ipss, in_discards);
2747         ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
2748             DROPPER(ipss, ipds_esp_icmp),
2749             &espstack->esp_dropper);
2750         return (NULL);
2751 }
2752 
2753 /*
2754  * Construct an SADB_REGISTER message with the current algorithms.
2755  * This function gets called when 'ipsecalgs -s' is run or when
2756  * in.iked (or other KMD) starts.
2757  */
2758 static boolean_t
2759 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
2760     ipsecesp_stack_t *espstack, cred_t *cr)
2761 {
2762         mblk_t *pfkey_msg_mp, *keysock_out_mp;
2763         sadb_msg_t *samsg;
2764         sadb_supported_t *sasupp_auth = NULL;
2765         sadb_supported_t *sasupp_encr = NULL;
2766         sadb_alg_t *saalg;
2767         uint_t allocsize = sizeof (*samsg);
2768         uint_t i, numalgs_snap;
2769         int current_aalgs;
2770         ipsec_alginfo_t **authalgs;
2771         uint_t num_aalgs;
2772         int current_ealgs;
2773         ipsec_alginfo_t **encralgs;
2774         uint_t num_ealgs;
2775         ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
2776         sadb_sens_t *sens;
2777         size_t sens_len = 0;
2778         sadb_ext_t *nextext;
2779         ts_label_t *sens_tsl = NULL;
2780 
2781         /* Allocate the KEYSOCK_OUT. */
2782         keysock_out_mp = sadb_keysock_out(serial);
2783         if (keysock_out_mp == NULL) {
2784                 esp0dbg(("esp_register_out: couldn't allocate mblk.\n"));
2785                 return (B_FALSE);
2786         }
2787 
2788         if (is_system_labeled() && (cr != NULL)) {
2789                 sens_tsl = crgetlabel(cr);
2790                 if (sens_tsl != NULL) {
2791                         sens_len = sadb_sens_len_from_label(sens_tsl);
2792                         allocsize += sens_len;
2793                 }
2794         }
2795 
2796         /*
2797          * Allocate the PF_KEY message that follows KEYSOCK_OUT.
2798          */
2799 
2800         rw_enter(&ipss->ipsec_alg_lock, RW_READER);
2801         /*
2802          * Fill SADB_REGISTER message's algorithm descriptors.  Hold
2803          * down the lock while filling it.
2804          *
2805          * Return only valid algorithms, so the number of algorithms
2806          * to send up may be less than the number of algorithm entries
2807          * in the table.
2808          */
2809         authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
2810         for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
2811                 if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
2812                         num_aalgs++;
2813 
2814         if (num_aalgs != 0) {
2815                 allocsize += (num_aalgs * sizeof (*saalg));
2816                 allocsize += sizeof (*sasupp_auth);
2817         }
2818         encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR];
2819         for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
2820                 if (encralgs[i] != NULL && ALG_VALID(encralgs[i]))
2821                         num_ealgs++;
2822 
2823         if (num_ealgs != 0) {
2824                 allocsize += (num_ealgs * sizeof (*saalg));
2825                 allocsize += sizeof (*sasupp_encr);
2826         }
2827         keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI);
2828         if (keysock_out_mp->b_cont == NULL) {
2829                 rw_exit(&ipss->ipsec_alg_lock);
2830                 freemsg(keysock_out_mp);
2831                 return (B_FALSE);
2832         }
2833         pfkey_msg_mp = keysock_out_mp->b_cont;
2834         pfkey_msg_mp->b_wptr += allocsize;
2835 
2836         nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg));
2837 
2838         if (num_aalgs != 0) {
2839                 sasupp_auth = (sadb_supported_t *)nextext;
2840                 saalg = (sadb_alg_t *)(sasupp_auth + 1);
2841 
2842                 ASSERT(((ulong_t)saalg & 0x7) == 0);
2843 
2844                 numalgs_snap = 0;
2845                 for (i = 0;
2846                     ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
2847                     i++) {
2848                         if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
2849                                 continue;
2850 
2851                         saalg->sadb_alg_id = authalgs[i]->alg_id;
2852                         saalg->sadb_alg_ivlen = 0;
2853                         saalg->sadb_alg_minbits      = authalgs[i]->alg_ef_minbits;
2854                         saalg->sadb_alg_maxbits      = authalgs[i]->alg_ef_maxbits;
2855                         saalg->sadb_x_alg_increment =
2856                             authalgs[i]->alg_increment;
2857                         saalg->sadb_x_alg_saltbits = SADB_8TO1(
2858                             authalgs[i]->alg_saltlen);
2859                         numalgs_snap++;
2860                         saalg++;
2861                 }
2862                 ASSERT(numalgs_snap == num_aalgs);
2863 #ifdef DEBUG
2864                 /*
2865                  * Reality check to make sure I snagged all of the
2866                  * algorithms.
2867                  */
2868                 for (; i < IPSEC_MAX_ALGS; i++) {
2869                         if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) {
2870                                 cmn_err(CE_PANIC, "esp_register_out()! "
2871                                     "Missed aalg #%d.\n", i);
2872                         }
2873                 }
2874 #endif /* DEBUG */
2875                 nextext = (sadb_ext_t *)saalg;
2876         }
2877 
2878         if (num_ealgs != 0) {
2879                 sasupp_encr = (sadb_supported_t *)nextext;
2880                 saalg = (sadb_alg_t *)(sasupp_encr + 1);
2881 
2882                 numalgs_snap = 0;
2883                 for (i = 0;
2884                     ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) {
2885                         if (encralgs[i] == NULL || !ALG_VALID(encralgs[i]))
2886                                 continue;
2887                         saalg->sadb_alg_id = encralgs[i]->alg_id;
2888                         saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen;
2889                         saalg->sadb_alg_minbits      = encralgs[i]->alg_ef_minbits;
2890                         saalg->sadb_alg_maxbits      = encralgs[i]->alg_ef_maxbits;
2891                         /*
2892                          * We could advertise the ICV length, except there
2893                          * is not a value in sadb_x_algb to do this.
2894                          * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen;
2895                          */
2896                         saalg->sadb_x_alg_increment =
2897                             encralgs[i]->alg_increment;
2898                         saalg->sadb_x_alg_saltbits =
2899                             SADB_8TO1(encralgs[i]->alg_saltlen);
2900 
2901                         numalgs_snap++;
2902                         saalg++;
2903                 }
2904                 ASSERT(numalgs_snap == num_ealgs);
2905 #ifdef DEBUG
2906                 /*
2907                  * Reality check to make sure I snagged all of the
2908                  * algorithms.
2909                  */
2910                 for (; i < IPSEC_MAX_ALGS; i++) {
2911                         if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) {
2912                                 cmn_err(CE_PANIC, "esp_register_out()! "
2913                                     "Missed ealg #%d.\n", i);
2914                         }
2915                 }
2916 #endif /* DEBUG */
2917                 nextext = (sadb_ext_t *)saalg;
2918         }
2919 
2920         current_aalgs = num_aalgs;
2921         current_ealgs = num_ealgs;
2922 
2923         rw_exit(&ipss->ipsec_alg_lock);
2924 
2925         if (sens_tsl != NULL) {
2926                 sens = (sadb_sens_t *)nextext;
2927                 sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
2928                     sens_tsl, sens_len);
2929 
2930                 nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len);
2931         }
2932 
2933         /* Now fill the rest of the SADB_REGISTER message. */
2934 
2935         samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr;
2936         samsg->sadb_msg_version = PF_KEY_V2;
2937         samsg->sadb_msg_type = SADB_REGISTER;
2938         samsg->sadb_msg_errno = 0;
2939         samsg->sadb_msg_satype = SADB_SATYPE_ESP;
2940         samsg->sadb_msg_len = SADB_8TO64(allocsize);
2941         samsg->sadb_msg_reserved = 0;
2942         /*
2943          * Assume caller has sufficient sequence/pid number info.  If it's one
2944          * from me over a new alg., I could give two hoots about sequence.
2945          */
2946         samsg->sadb_msg_seq = sequence;
2947         samsg->sadb_msg_pid = pid;
2948 
2949         if (sasupp_auth != NULL) {
2950                 sasupp_auth->sadb_supported_len = SADB_8TO64(
2951                     sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs);
2952                 sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
2953                 sasupp_auth->sadb_supported_reserved = 0;
2954         }
2955 
2956         if (sasupp_encr != NULL) {
2957                 sasupp_encr->sadb_supported_len = SADB_8TO64(
2958                     sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs);
2959                 sasupp_encr->sadb_supported_exttype =
2960                     SADB_EXT_SUPPORTED_ENCRYPT;
2961                 sasupp_encr->sadb_supported_reserved = 0;
2962         }
2963 
2964         if (espstack->esp_pfkey_q != NULL)
2965                 putnext(espstack->esp_pfkey_q, keysock_out_mp);
2966         else {
2967                 freemsg(keysock_out_mp);
2968                 return (B_FALSE);
2969         }
2970 
2971         return (B_TRUE);
2972 }
2973 
2974 /*
2975  * Invoked when the algorithm table changes. Causes SADB_REGISTER
2976  * messages continaining the current list of algorithms to be
2977  * sent up to the ESP listeners.
2978  */
2979 void
2980 ipsecesp_algs_changed(netstack_t *ns)
2981 {
2982         ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
2983 
2984         /*
2985          * Time to send a PF_KEY SADB_REGISTER message to ESP listeners
2986          * everywhere.  (The function itself checks for NULL esp_pfkey_q.)
2987          */
2988         (void) esp_register_out(0, 0, 0, espstack, NULL);
2989 }
2990 
2991 /*
2992  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
2993  * and send() it into ESP and IP again.
2994  */
2995 static void
2996 inbound_task(void *arg)
2997 {
2998         mblk_t          *mp = (mblk_t *)arg;
2999         mblk_t          *async_mp;
3000         ip_recv_attr_t  iras;
3001 
3002         async_mp = mp;
3003         mp = async_mp->b_cont;
3004         async_mp->b_cont = NULL;
3005         if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
3006                 /* The ill or ip_stack_t disappeared on us */
3007                 ip_drop_input("ip_recv_attr_from_mblk", mp, NULL);
3008                 freemsg(mp);
3009                 goto done;
3010         }
3011 
3012         esp_inbound_restart(mp, &iras);
3013 done:
3014         ira_cleanup(&iras, B_TRUE);
3015 }
3016 
3017 /*
3018  * Restart ESP after the SA has been added.
3019  */
3020 static void
3021 esp_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira)
3022 {
3023         esph_t          *esph;
3024         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
3025         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3026 
3027         esp2dbg(espstack, ("in ESP inbound_task"));
3028         ASSERT(espstack != NULL);
3029 
3030         mp = ipsec_inbound_esp_sa(mp, ira, &esph);
3031         if (mp == NULL)
3032                 return;
3033 
3034         ASSERT(esph != NULL);
3035         ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3036         ASSERT(ira->ira_ipsec_esp_sa != NULL);
3037 
3038         mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, ira);
3039         if (mp == NULL) {
3040                 /*
3041                  * Either it failed or is pending. In the former case
3042                  * ipIfStatsInDiscards was increased.
3043                  */
3044                 return;
3045         }
3046 
3047         ip_input_post_ipsec(mp, ira);
3048 }
3049 
3050 /*
3051  * Now that weak-key passed, actually ADD the security association, and
3052  * send back a reply ADD message.
3053  */
3054 static int
3055 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
3056     int *diagnostic, ipsecesp_stack_t *espstack)
3057 {
3058         isaf_t *primary = NULL, *secondary;
3059         boolean_t clone = B_FALSE, is_inbound = B_FALSE;
3060         ipsa_t *larval = NULL;
3061         ipsacq_t *acqrec;
3062         iacqf_t *acq_bucket;
3063         mblk_t *acq_msgs = NULL;
3064         int rc;
3065         mblk_t *lpkt;
3066         int error;
3067         ipsa_query_t sq;
3068         ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
3069 
3070         /*
3071          * Locate the appropriate table(s).
3072          */
3073         sq.spp = &espstack->esp_sadb;    /* XXX */
3074         error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST,
3075             IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
3076             &sq, diagnostic);
3077         if (error)
3078                 return (error);
3079 
3080         /*
3081          * Use the direction flags provided by the KMD to determine
3082          * if the inbound or outbound table should be the primary
3083          * for this SA. If these flags were absent then make this
3084          * decision based on the addresses.
3085          */
3086         if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) {
3087                 primary = sq.inbound;
3088                 secondary = sq.outbound;
3089                 is_inbound = B_TRUE;
3090                 if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND)
3091                         clone = B_TRUE;
3092         } else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) {
3093                 primary = sq.outbound;
3094                 secondary = sq.inbound;
3095         }
3096 
3097         if (primary == NULL) {
3098                 /*
3099                  * The KMD did not set a direction flag, determine which
3100                  * table to insert the SA into based on addresses.
3101                  */
3102                 switch (ksi->ks_in_dsttype) {
3103                 case KS_IN_ADDR_MBCAST:
3104                         clone = B_TRUE; /* All mcast SAs can be bidirectional */
3105                         sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
3106                         /* FALLTHRU */
3107                 /*
3108                  * If the source address is either one of mine, or unspecified
3109                  * (which is best summed up by saying "not 'not mine'"),
3110                  * then the association is potentially bi-directional,
3111                  * in that it can be used for inbound traffic and outbound
3112                  * traffic.  The best example of such an SA is a multicast
3113                  * SA (which allows me to receive the outbound traffic).
3114                  */
3115                 case KS_IN_ADDR_ME:
3116                         sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
3117                         primary = sq.inbound;
3118                         secondary = sq.outbound;
3119                         if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
3120                                 clone = B_TRUE;
3121                         is_inbound = B_TRUE;
3122                         break;
3123                 /*
3124                  * If the source address literally not mine (either
3125                  * unspecified or not mine), then this SA may have an
3126                  * address that WILL be mine after some configuration.
3127                  * We pay the price for this by making it a bi-directional
3128                  * SA.
3129                  */
3130                 case KS_IN_ADDR_NOTME:
3131                         sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
3132                         primary = sq.outbound;
3133                         secondary = sq.inbound;
3134                         if (ksi->ks_in_srctype != KS_IN_ADDR_ME) {
3135                                 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
3136                                 clone = B_TRUE;
3137                         }
3138                         break;
3139                 default:
3140                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
3141                         return (EINVAL);
3142                 }
3143         }
3144 
3145         /*
3146          * Find a ACQUIRE list entry if possible.  If we've added an SA that
3147          * suits the needs of an ACQUIRE list entry, we can eliminate the
3148          * ACQUIRE list entry and transmit the enqueued packets.  Use the
3149          * high-bit of the sequence number to queue it.  Key off destination
3150          * addr, and change acqrec's state.
3151          */
3152 
3153         if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
3154                 acq_bucket = &(sq.sp->sdb_acq[sq.outhash]);
3155                 mutex_enter(&acq_bucket->iacqf_lock);
3156                 for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
3157                     acqrec = acqrec->ipsacq_next) {
3158                         mutex_enter(&acqrec->ipsacq_lock);
3159                         /*
3160                          * Q:  I only check sequence.  Should I check dst?
3161                          * A: Yes, check dest because those are the packets
3162                          *    that are queued up.
3163                          */
3164                         if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
3165                             IPSA_ARE_ADDR_EQUAL(sq.dstaddr,
3166                             acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
3167                                 break;
3168                         mutex_exit(&acqrec->ipsacq_lock);
3169                 }
3170                 if (acqrec != NULL) {
3171                         /*
3172                          * AHA!  I found an ACQUIRE record for this SA.
3173                          * Grab the msg list, and free the acquire record.
3174                          * I already am holding the lock for this record,
3175                          * so all I have to do is free it.
3176                          */
3177                         acq_msgs = acqrec->ipsacq_mp;
3178                         acqrec->ipsacq_mp = NULL;
3179                         mutex_exit(&acqrec->ipsacq_lock);
3180                         sadb_destroy_acquire(acqrec,
3181                             espstack->ipsecesp_netstack);
3182                 }
3183                 mutex_exit(&acq_bucket->iacqf_lock);
3184         }
3185 
3186         /*
3187          * Find PF_KEY message, and see if I'm an update.  If so, find entry
3188          * in larval list (if there).
3189          */
3190         if (samsg->sadb_msg_type == SADB_UPDATE) {
3191                 mutex_enter(&sq.inbound->isaf_lock);
3192                 larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi,
3193                     ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family);
3194                 mutex_exit(&sq.inbound->isaf_lock);
3195 
3196                 if ((larval == NULL) ||
3197                     (larval->ipsa_state != IPSA_STATE_LARVAL)) {
3198                         *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
3199                         if (larval != NULL) {
3200                                 IPSA_REFRELE(larval);
3201                         }
3202                         esp0dbg(("Larval update, but larval disappeared.\n"));
3203                         return (ESRCH);
3204                 } /* Else sadb_common_add unlinks it for me! */
3205         }
3206 
3207         if (larval != NULL) {
3208                 /*
3209                  * Hold again, because sadb_common_add() consumes a reference,
3210                  * and we don't want to clear_lpkt() without a reference.
3211                  */
3212                 IPSA_REFHOLD(larval);
3213         }
3214 
3215         rc = sadb_common_add(espstack->esp_pfkey_q,
3216             mp, samsg, ksi, primary, secondary, larval, clone, is_inbound,
3217             diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb);
3218 
3219         if (larval != NULL) {
3220                 if (rc == 0) {
3221                         lpkt = sadb_clear_lpkt(larval);
3222                         if (lpkt != NULL) {
3223                                 rc = !taskq_dispatch(esp_taskq, inbound_task,
3224                                     lpkt, TQ_NOSLEEP);
3225                         }
3226                 }
3227                 IPSA_REFRELE(larval);
3228         }
3229 
3230         /*
3231          * How much more stack will I create with all of these
3232          * esp_outbound() calls?
3233          */
3234 
3235         /* Handle the packets queued waiting for the SA */
3236         while (acq_msgs != NULL) {
3237                 mblk_t          *asyncmp;
3238                 mblk_t          *data_mp;
3239                 ip_xmit_attr_t  ixas;
3240                 ill_t           *ill;
3241 
3242                 asyncmp = acq_msgs;
3243                 acq_msgs = acq_msgs->b_next;
3244                 asyncmp->b_next = NULL;
3245 
3246                 /*
3247                  * Extract the ip_xmit_attr_t from the first mblk.
3248                  * Verifies that the netstack and ill is still around; could
3249                  * have vanished while iked was doing its work.
3250                  * On succesful return we have a nce_t and the ill/ipst can't
3251                  * disappear until we do the nce_refrele in ixa_cleanup.
3252                  */
3253                 data_mp = asyncmp->b_cont;
3254                 asyncmp->b_cont = NULL;
3255                 if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) {
3256                         ESP_BUMP_STAT(espstack, out_discards);
3257                         ip_drop_packet(data_mp, B_FALSE, NULL,
3258                             DROPPER(ipss, ipds_sadb_acquire_timeout),
3259                             &espstack->esp_dropper);
3260                 } else if (rc != 0) {
3261                         ill = ixas.ixa_nce->nce_ill;
3262                         ESP_BUMP_STAT(espstack, out_discards);
3263                         ip_drop_packet(data_mp, B_FALSE, ill,
3264                             DROPPER(ipss, ipds_sadb_acquire_timeout),
3265                             &espstack->esp_dropper);
3266                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3267                 } else {
3268                         esp_outbound_finish(data_mp, &ixas);
3269                 }
3270                 ixa_cleanup(&ixas);
3271         }
3272 
3273         return (rc);
3274 }
3275 
3276 /*
3277  * Process one of the queued messages (from ipsacq_mp) once the SA
3278  * has been added.
3279  */
3280 static void
3281 esp_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa)
3282 {
3283         netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
3284         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3285         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3286         ill_t           *ill = ixa->ixa_nce->nce_ill;
3287 
3288         if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_ESP)) {
3289                 ESP_BUMP_STAT(espstack, out_discards);
3290                 ip_drop_packet(data_mp, B_FALSE, ill,
3291                     DROPPER(ipss, ipds_sadb_acquire_timeout),
3292                     &espstack->esp_dropper);
3293                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3294                 return;
3295         }
3296 
3297         data_mp = esp_outbound(data_mp, ixa);
3298         if (data_mp == NULL)
3299                 return;
3300 
3301         /* do AH processing if needed */
3302         data_mp = esp_do_outbound_ah(data_mp, ixa);
3303         if (data_mp == NULL)
3304                 return;
3305 
3306         (void) ip_output_post_ipsec(data_mp, ixa);
3307 }
3308 
3309 /*
3310  * Add new ESP security association.  This may become a generic AH/ESP
3311  * routine eventually.
3312  */
3313 static int
3314 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns)
3315 {
3316         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3317         sadb_address_t *srcext =
3318             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3319         sadb_address_t *dstext =
3320             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3321         sadb_address_t *isrcext =
3322             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
3323         sadb_address_t *idstext =
3324             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
3325         sadb_address_t *nttext_loc =
3326             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC];
3327         sadb_address_t *nttext_rem =
3328             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM];
3329         sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3330         sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3331         struct sockaddr_in *src, *dst;
3332         struct sockaddr_in *natt_loc, *natt_rem;
3333         struct sockaddr_in6 *natt_loc6, *natt_rem6;
3334         sadb_lifetime_t *soft =
3335             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3336         sadb_lifetime_t *hard =
3337             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3338         sadb_lifetime_t *idle =
3339             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
3340         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3341         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3342 
3343 
3344 
3345         /* I need certain extensions present for an ADD message. */
3346         if (srcext == NULL) {
3347                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3348                 return (EINVAL);
3349         }
3350         if (dstext == NULL) {
3351                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3352                 return (EINVAL);
3353         }
3354         if (isrcext == NULL && idstext != NULL) {
3355                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3356                 return (EINVAL);
3357         }
3358         if (isrcext != NULL && idstext == NULL) {
3359                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
3360                 return (EINVAL);
3361         }
3362         if (assoc == NULL) {
3363                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3364                 return (EINVAL);
3365         }
3366         if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) {
3367                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY;
3368                 return (EINVAL);
3369         }
3370 
3371         src = (struct sockaddr_in *)(srcext + 1);
3372         dst = (struct sockaddr_in *)(dstext + 1);
3373         natt_loc = (struct sockaddr_in *)(nttext_loc + 1);
3374         natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1);
3375         natt_rem = (struct sockaddr_in *)(nttext_rem + 1);
3376         natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1);
3377 
3378         /* Sundry ADD-specific reality checks. */
3379         /* XXX STATS :  Logging/stats here? */
3380 
3381         if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) &&
3382             (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
3383                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
3384                 return (EINVAL);
3385         }
3386         if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) {
3387                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3388                 return (EINVAL);
3389         }
3390 
3391 #ifndef IPSEC_LATENCY_TEST
3392         if (assoc->sadb_sa_encrypt == SADB_EALG_NULL &&
3393             assoc->sadb_sa_auth == SADB_AALG_NONE) {
3394                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3395                 return (EINVAL);
3396         }
3397 #endif
3398 
3399         if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) {
3400                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
3401                 return (EINVAL);
3402         }
3403 
3404         if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
3405                 return (EINVAL);
3406         }
3407         ASSERT(src->sin_family == dst->sin_family);
3408 
3409         if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) {
3410                 if (nttext_loc == NULL) {
3411                         *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3412                         return (EINVAL);
3413                 }
3414 
3415                 if (natt_loc->sin_family == AF_INET6 &&
3416                     !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) {
3417                         *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC;
3418                         return (EINVAL);
3419                 }
3420         }
3421 
3422         if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) {
3423                 if (nttext_rem == NULL) {
3424                         *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3425                         return (EINVAL);
3426                 }
3427                 if (natt_rem->sin_family == AF_INET6 &&
3428                     !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) {
3429                         *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM;
3430                         return (EINVAL);
3431                 }
3432         }
3433 
3434 
3435         /* Stuff I don't support, for now.  XXX Diagnostic? */
3436         if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL)
3437                 return (EOPNOTSUPP);
3438 
3439         if ((*diagnostic = sadb_labelchk(ksi)) != 0)
3440                 return (EINVAL);
3441 
3442         /*
3443          * XXX Policy :  I'm not checking identities at this time,
3444          * but if I did, I'd do them here, before I sent
3445          * the weak key check up to the algorithm.
3446          */
3447 
3448         rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3449 
3450         /*
3451          * First locate the authentication algorithm.
3452          */
3453 #ifdef IPSEC_LATENCY_TEST
3454         if (akey != NULL && assoc->sadb_sa_auth != SADB_AALG_NONE) {
3455 #else
3456         if (akey != NULL) {
3457 #endif
3458                 ipsec_alginfo_t *aalg;
3459 
3460                 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3461                     [assoc->sadb_sa_auth];
3462                 if (aalg == NULL || !ALG_VALID(aalg)) {
3463                         rw_exit(&ipss->ipsec_alg_lock);
3464                         esp1dbg(espstack, ("Couldn't find auth alg #%d.\n",
3465                             assoc->sadb_sa_auth));
3466                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3467                         return (EINVAL);
3468                 }
3469 
3470                 /*
3471                  * Sanity check key sizes.
3472                  * Note: It's not possible to use SADB_AALG_NONE because
3473                  * this auth_alg is not defined with ALG_FLAG_VALID. If this
3474                  * ever changes, the same check for SADB_AALG_NONE and
3475                  * a auth_key != NULL should be made here ( see below).
3476                  */
3477                 if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) {
3478                         rw_exit(&ipss->ipsec_alg_lock);
3479                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
3480                         return (EINVAL);
3481                 }
3482                 ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3483 
3484                 /* check key and fix parity if needed */
3485                 if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE,
3486                     diagnostic) != 0) {
3487                         rw_exit(&ipss->ipsec_alg_lock);
3488                         return (EINVAL);
3489                 }
3490         }
3491 
3492         /*
3493          * Then locate the encryption algorithm.
3494          */
3495         if (ekey != NULL) {
3496                 uint_t keybits;
3497                 ipsec_alginfo_t *ealg;
3498 
3499                 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3500                     [assoc->sadb_sa_encrypt];
3501                 if (ealg == NULL || !ALG_VALID(ealg)) {
3502                         rw_exit(&ipss->ipsec_alg_lock);
3503                         esp1dbg(espstack, ("Couldn't find encr alg #%d.\n",
3504                             assoc->sadb_sa_encrypt));
3505                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3506                         return (EINVAL);
3507                 }
3508 
3509                 /*
3510                  * Sanity check key sizes. If the encryption algorithm is
3511                  * SADB_EALG_NULL but the encryption key is NOT
3512                  * NULL then complain.
3513                  *
3514                  * The keying material includes salt bits if required by
3515                  * algorithm and optionally the Initial IV, check the
3516                  * length of whats left.
3517                  */
3518                 keybits = ekey->sadb_key_bits;
3519                 keybits -= ekey->sadb_key_reserved;
3520                 keybits -= SADB_8TO1(ealg->alg_saltlen);
3521                 if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) ||
3522                     (!ipsec_valid_key_size(keybits, ealg))) {
3523                         rw_exit(&ipss->ipsec_alg_lock);
3524                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
3525                         return (EINVAL);
3526                 }
3527                 ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3528 
3529                 /* check key */
3530                 if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE,
3531                     diagnostic) != 0) {
3532                         rw_exit(&ipss->ipsec_alg_lock);
3533                         return (EINVAL);
3534                 }
3535         }
3536         rw_exit(&ipss->ipsec_alg_lock);
3537 
3538         return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
3539             diagnostic, espstack));
3540 }
3541 
3542 /*
3543  * Update a security association.  Updates come in two varieties.  The first
3544  * is an update of lifetimes on a non-larval SA.  The second is an update of
3545  * a larval SA, which ends up looking a lot more like an add.
3546  */
3547 static int
3548 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
3549     ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
3550 {
3551         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3552         mblk_t    *buf_pkt;
3553         int rcode;
3554 
3555         sadb_address_t *dstext =
3556             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3557 
3558         if (dstext == NULL) {
3559                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3560                 return (EINVAL);
3561         }
3562 
3563         rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb,
3564             diagnostic, espstack->esp_pfkey_q, esp_add_sa,
3565             espstack->ipsecesp_netstack, sadb_msg_type);
3566 
3567         if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) ||
3568             (rcode != 0)) {
3569                 return (rcode);
3570         }
3571 
3572         HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec,
3573             espstack->esp_dropper, buf_pkt);
3574 
3575         return (rcode);
3576 }
3577 
3578 /* XXX refactor me */
3579 /*
3580  * Delete a security association.  This is REALLY likely to be code common to
3581  * both AH and ESP.  Find the association, then unlink it.
3582  */
3583 static int
3584 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
3585     ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
3586 {
3587         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3588         sadb_address_t *dstext =
3589             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3590         sadb_address_t *srcext =
3591             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3592         struct sockaddr_in *sin;
3593 
3594         if (assoc == NULL) {
3595                 if (dstext != NULL) {
3596                         sin = (struct sockaddr_in *)(dstext + 1);
3597                 } else if (srcext != NULL) {
3598                         sin = (struct sockaddr_in *)(srcext + 1);
3599                 } else {
3600                         *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3601                         return (EINVAL);
3602                 }
3603                 return (sadb_purge_sa(mp, ksi,
3604                     (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 :
3605                     &espstack->esp_sadb.s_v4, diagnostic,
3606                     espstack->esp_pfkey_q));
3607         }
3608 
3609         return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic,
3610             espstack->esp_pfkey_q, sadb_msg_type));
3611 }
3612 
3613 /* XXX refactor me */
3614 /*
3615  * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP
3616  * messages.
3617  */
3618 static void
3619 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
3620 {
3621         int error;
3622         sadb_msg_t *samsg;
3623 
3624         /*
3625          * Dump each fanout, bailing if error is non-zero.
3626          */
3627 
3628         error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
3629             &espstack->esp_sadb.s_v4);
3630         if (error != 0)
3631                 goto bail;
3632 
3633         error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
3634             &espstack->esp_sadb.s_v6);
3635 bail:
3636         ASSERT(mp->b_cont != NULL);
3637         samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3638         samsg->sadb_msg_errno = (uint8_t)error;
3639         sadb_pfkey_echo(espstack->esp_pfkey_q, mp,
3640             (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL);
3641 }
3642 
3643 /*
3644  * First-cut reality check for an inbound PF_KEY message.
3645  */
3646 static boolean_t
3647 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi,
3648     ipsecesp_stack_t *espstack)
3649 {
3650         int diagnostic;
3651 
3652         if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
3653                 diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
3654                 goto badmsg;
3655         }
3656         if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
3657             ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
3658                 diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
3659                 goto badmsg;
3660         }
3661         return (B_FALSE);       /* False ==> no failures */
3662 
3663 badmsg:
3664         sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
3665             ksi->ks_in_serial);
3666         return (B_TRUE);        /* True ==> failures */
3667 }
3668 
3669 /*
3670  * ESP parsing of PF_KEY messages.  Keysock did most of the really silly
3671  * error cases.  What I receive is a fully-formed, syntactically legal
3672  * PF_KEY message.  I then need to check semantics...
3673  *
3674  * This code may become common to AH and ESP.  Stay tuned.
3675  *
3676  * I also make the assumption that db_ref's are cool.  If this assumption
3677  * is wrong, this means that someone other than keysock or me has been
3678  * mucking with PF_KEY messages.
3679  */
3680 static void
3681 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack)
3682 {
3683         mblk_t *msg = mp->b_cont;
3684         sadb_msg_t *samsg;
3685         keysock_in_t *ksi;
3686         int error;
3687         int diagnostic = SADB_X_DIAGNOSTIC_NONE;
3688 
3689         ASSERT(msg != NULL);
3690 
3691         samsg = (sadb_msg_t *)msg->b_rptr;
3692         ksi = (keysock_in_t *)mp->b_rptr;
3693 
3694         /*
3695          * If applicable, convert unspecified AF_INET6 to unspecified
3696          * AF_INET.  And do other address reality checks.
3697          */
3698         if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp,
3699             espstack->ipsecesp_netstack) ||
3700             esp_pfkey_reality_failures(mp, ksi, espstack)) {
3701                 return;
3702         }
3703 
3704         switch (samsg->sadb_msg_type) {
3705         case SADB_ADD:
3706                 error = esp_add_sa(mp, ksi, &diagnostic,
3707                     espstack->ipsecesp_netstack);
3708                 if (error != 0) {
3709                         sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3710                             diagnostic, ksi->ks_in_serial);
3711                 }
3712                 /* else esp_add_sa() took care of things. */
3713                 break;
3714         case SADB_DELETE:
3715         case SADB_X_DELPAIR:
3716         case SADB_X_DELPAIR_STATE:
3717                 error = esp_del_sa(mp, ksi, &diagnostic, espstack,
3718                     samsg->sadb_msg_type);
3719                 if (error != 0) {
3720                         sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3721                             diagnostic, ksi->ks_in_serial);
3722                 }
3723                 /* Else esp_del_sa() took care of things. */
3724                 break;
3725         case SADB_GET:
3726                 error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb,
3727                     &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type);
3728                 if (error != 0) {
3729                         sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3730                             diagnostic, ksi->ks_in_serial);
3731                 }
3732                 /* Else sadb_get_sa() took care of things. */
3733                 break;
3734         case SADB_FLUSH:
3735                 sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack);
3736                 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL);
3737                 break;
3738         case SADB_REGISTER:
3739                 /*
3740                  * Hmmm, let's do it!  Check for extensions (there should
3741                  * be none), extract the fields, call esp_register_out(),
3742                  * then either free or report an error.
3743                  *
3744                  * Keysock takes care of the PF_KEY bookkeeping for this.
3745                  */
3746                 if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
3747                     ksi->ks_in_serial, espstack, msg_getcred(mp, NULL))) {
3748                         freemsg(mp);
3749                 } else {
3750                         /*
3751                          * Only way this path hits is if there is a memory
3752                          * failure.  It will not return B_FALSE because of
3753                          * lack of esp_pfkey_q if I am in wput().
3754                          */
3755                         sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM,
3756                             diagnostic, ksi->ks_in_serial);
3757                 }
3758                 break;
3759         case SADB_UPDATE:
3760         case SADB_X_UPDATEPAIR:
3761                 /*
3762                  * Find a larval, if not there, find a full one and get
3763                  * strict.
3764                  */
3765                 error = esp_update_sa(mp, ksi, &diagnostic, espstack,
3766                     samsg->sadb_msg_type);
3767                 if (error != 0) {
3768                         sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3769                             diagnostic, ksi->ks_in_serial);
3770                 }
3771                 /* else esp_update_sa() took care of things. */
3772                 break;
3773         case SADB_GETSPI:
3774                 /*
3775                  * Reserve a new larval entry.
3776                  */
3777                 esp_getspi(mp, ksi, espstack);
3778                 break;
3779         case SADB_ACQUIRE:
3780                 /*
3781                  * Find larval and/or ACQUIRE record and kill it (them), I'm
3782                  * most likely an error.  Inbound ACQUIRE messages should only
3783                  * have the base header.
3784                  */
3785                 sadb_in_acquire(samsg, &espstack->esp_sadb,
3786                     espstack->esp_pfkey_q, espstack->ipsecesp_netstack);
3787                 freemsg(mp);
3788                 break;
3789         case SADB_DUMP:
3790                 /*
3791                  * Dump all entries.
3792                  */
3793                 esp_dump(mp, ksi, espstack);
3794                 /* esp_dump will take care of the return message, etc. */
3795                 break;
3796         case SADB_EXPIRE:
3797                 /* Should never reach me. */
3798                 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP,
3799                     diagnostic, ksi->ks_in_serial);
3800                 break;
3801         default:
3802                 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL,
3803                     SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
3804                 break;
3805         }
3806 }
3807 
3808 /*
3809  * Handle case where PF_KEY says it can't find a keysock for one of my
3810  * ACQUIRE messages.
3811  */
3812 static void
3813 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack)
3814 {
3815         sadb_msg_t *samsg;
3816         keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
3817 
3818         if (mp->b_cont == NULL) {
3819                 freemsg(mp);
3820                 return;
3821         }
3822         samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3823 
3824         /*
3825          * If keysock can't find any registered, delete the acquire record
3826          * immediately, and handle errors.
3827          */
3828         if (samsg->sadb_msg_type == SADB_ACQUIRE) {
3829                 samsg->sadb_msg_errno = kse->ks_err_errno;
3830                 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
3831                 /*
3832                  * Use the write-side of the esp_pfkey_q
3833                  */
3834                 sadb_in_acquire(samsg, &espstack->esp_sadb,
3835                     WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack);
3836         }
3837 
3838         freemsg(mp);
3839 }
3840 
3841 /*
3842  * ESP module write put routine.
3843  */
3844 static void
3845 ipsecesp_wput(queue_t *q, mblk_t *mp)
3846 {
3847         ipsec_info_t *ii;
3848         struct iocblk *iocp;
3849         ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
3850 
3851         esp3dbg(espstack, ("In esp_wput().\n"));
3852 
3853         /* NOTE: Each case must take care of freeing or passing mp. */
3854         switch (mp->b_datap->db_type) {
3855         case M_CTL:
3856                 if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
3857                         /* Not big enough message. */
3858                         freemsg(mp);
3859                         break;
3860                 }
3861                 ii = (ipsec_info_t *)mp->b_rptr;
3862 
3863                 switch (ii->ipsec_info_type) {
3864                 case KEYSOCK_OUT_ERR:
3865                         esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n"));
3866                         esp_keysock_no_socket(mp, espstack);
3867                         break;
3868                 case KEYSOCK_IN:
3869                         ESP_BUMP_STAT(espstack, keysock_in);
3870                         esp3dbg(espstack, ("Got KEYSOCK_IN message.\n"));
3871 
3872                         /* Parse the message. */
3873                         esp_parse_pfkey(mp, espstack);
3874                         break;
3875                 case KEYSOCK_HELLO:
3876                         sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp,
3877                             esp_ager, (void *)espstack, &espstack->esp_event,
3878                             SADB_SATYPE_ESP);
3879                         break;
3880                 default:
3881                         esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n",
3882                             ii->ipsec_info_type));
3883                         freemsg(mp);
3884                         break;
3885                 }
3886                 break;
3887         case M_IOCTL:
3888                 iocp = (struct iocblk *)mp->b_rptr;
3889                 switch (iocp->ioc_cmd) {
3890                 case ND_SET:
3891                 case ND_GET:
3892                         if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) {
3893                                 qreply(q, mp);
3894                                 return;
3895                         } else {
3896                                 iocp->ioc_error = ENOENT;
3897                         }
3898                         /* FALLTHRU */
3899                 default:
3900                         /* We really don't support any other ioctls, do we? */
3901 
3902                         /* Return EINVAL */
3903                         if (iocp->ioc_error != ENOENT)
3904                                 iocp->ioc_error = EINVAL;
3905                         iocp->ioc_count = 0;
3906                         mp->b_datap->db_type = M_IOCACK;
3907                         qreply(q, mp);
3908                         return;
3909                 }
3910         default:
3911                 esp3dbg(espstack,
3912                     ("Got default message, type %d, passing to IP.\n",
3913                     mp->b_datap->db_type));
3914                 putnext(q, mp);
3915         }
3916 }
3917 
3918 /*
3919  * Wrapper to allow IP to trigger an ESP association failure message
3920  * during inbound SA selection.
3921  */
3922 void
3923 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
3924     uint32_t spi, void *addr, int af, ip_recv_attr_t *ira)
3925 {
3926         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
3927         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3928         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3929 
3930         if (espstack->ipsecesp_log_unknown_spi) {
3931                 ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
3932                     addr, af, espstack->ipsecesp_netstack);
3933         }
3934 
3935         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
3936             DROPPER(ipss, ipds_esp_no_sa),
3937             &espstack->esp_dropper);
3938 }
3939 
3940 /*
3941  * Initialize the ESP input and output processing functions.
3942  */
3943 void
3944 ipsecesp_init_funcs(ipsa_t *sa)
3945 {
3946         if (sa->ipsa_output_func == NULL)
3947                 sa->ipsa_output_func = esp_outbound;
3948         if (sa->ipsa_input_func == NULL)
3949                 sa->ipsa_input_func = esp_inbound;
3950 }