1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2017 Joyent, Inc.
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/stream.h>
  30 #include <sys/stropts.h>
  31 #include <sys/errno.h>
  32 #include <sys/strlog.h>
  33 #include <sys/tihdr.h>
  34 #include <sys/socket.h>
  35 #include <sys/ddi.h>
  36 #include <sys/sunddi.h>
  37 #include <sys/mkdev.h>
  38 #include <sys/kmem.h>
  39 #include <sys/zone.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/cmn_err.h>
  42 #include <sys/vtrace.h>
  43 #include <sys/debug.h>
  44 #include <sys/atomic.h>
  45 #include <sys/strsun.h>
  46 #include <sys/random.h>
  47 #include <netinet/in.h>
  48 #include <net/if.h>
  49 #include <netinet/ip6.h>
  50 #include <netinet/icmp6.h>
  51 #include <net/pfkeyv2.h>
  52 #include <net/pfpolicy.h>
  53 
  54 #include <inet/common.h>
  55 #include <inet/mi.h>
  56 #include <inet/ip.h>
  57 #include <inet/ip6.h>
  58 #include <inet/nd.h>
  59 #include <inet/ip_if.h>
  60 #include <inet/ip_ndp.h>
  61 #include <inet/ipsec_info.h>
  62 #include <inet/ipsec_impl.h>
  63 #include <inet/sadb.h>
  64 #include <inet/ipsecah.h>
  65 #include <inet/ipsec_impl.h>
  66 #include <inet/ipdrop.h>
  67 #include <sys/taskq.h>
  68 #include <sys/policy.h>
  69 #include <sys/strsun.h>
  70 
  71 #include <sys/crypto/common.h>
  72 #include <sys/crypto/api.h>
  73 #include <sys/kstat.h>
  74 #include <sys/strsubr.h>
  75 
  76 #include <sys/tsol/tnet.h>
  77 
  78 /*
  79  * Table of ND variables supported by ipsecah. These are loaded into
  80  * ipsecah_g_nd in ipsecah_init_nd.
  81  * All of these are alterable, within the min/max values given, at run time.
  82  */
  83 static  ipsecahparam_t  lcl_param_arr[] = {
  84         /* min  max                     value   name */
  85         { 0,    3,                      0,      "ipsecah_debug"},
  86         { 125,  32000, SADB_AGE_INTERVAL_DEFAULT,       "ipsecah_age_interval"},
  87         { 1,    10,                     1,      "ipsecah_reap_delay"},
  88         { 1,    SADB_MAX_REPLAY,        64,     "ipsecah_replay_size"},
  89         { 1,    300,                    15,     "ipsecah_acquire_timeout"},
  90         { 1,    1800,                   90,     "ipsecah_larval_timeout"},
  91         /* Default lifetime values for ACQUIRE messages. */
  92         { 0,    0xffffffffU,            0,      "ipsecah_default_soft_bytes"},
  93         { 0,    0xffffffffU,            0,      "ipsecah_default_hard_bytes"},
  94         { 0,    0xffffffffU,            24000,  "ipsecah_default_soft_addtime"},
  95         { 0,    0xffffffffU,            28800,  "ipsecah_default_hard_addtime"},
  96         { 0,    0xffffffffU,            0,      "ipsecah_default_soft_usetime"},
  97         { 0,    0xffffffffU,            0,      "ipsecah_default_hard_usetime"},
  98         { 0,    1,                      0,      "ipsecah_log_unknown_spi"},
  99 };
 100 
 101 #define ah0dbg(a)       printf a
 102 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
 103 #define ah1dbg(ahstack, a)      if (ahstack->ipsecah_debug != 0) printf a
 104 #define ah2dbg(ahstack, a)      if (ahstack->ipsecah_debug > 1) printf a
 105 #define ah3dbg(ahstack, a)      if (ahstack->ipsecah_debug > 2) printf a
 106 
 107 /*
 108  * XXX This is broken. Padding should be determined dynamically
 109  * depending on the ICV size and IP version number so that the
 110  * total AH header size is a multiple of 32 bits or 64 bits
 111  * for V4 and V6 respectively. For 96bit ICVs we have no problems.
 112  * Anything different from that, we need to fix our code.
 113  */
 114 #define IPV4_PADDING_ALIGN      0x04    /* Multiple of 32 bits */
 115 #define IPV6_PADDING_ALIGN      0x04    /* Multiple of 32 bits */
 116 
 117 /*
 118  * Helper macro. Avoids a call to msgdsize if there is only one
 119  * mblk in the chain.
 120  */
 121 #define AH_MSGSIZE(mp) ((mp)->b_cont != NULL ? msgdsize(mp) : MBLKL(mp))
 122 
 123 
 124 static mblk_t *ah_auth_out_done(mblk_t *, ip_xmit_attr_t *, ipsec_crypto_t *);
 125 static mblk_t *ah_auth_in_done(mblk_t *, ip_recv_attr_t *, ipsec_crypto_t *);
 126 static mblk_t *ah_process_ip_options_v4(mblk_t *, ipsa_t *, int *, uint_t,
 127     boolean_t, ipsecah_stack_t *);
 128 static mblk_t *ah_process_ip_options_v6(mblk_t *, ipsa_t *, int *, uint_t,
 129     boolean_t, ipsecah_stack_t *);
 130 static void ah_getspi(mblk_t *, keysock_in_t *, ipsecah_stack_t *);
 131 static void ah_inbound_restart(mblk_t *, ip_recv_attr_t *);
 132 
 133 static mblk_t *ah_outbound(mblk_t *, ip_xmit_attr_t *);
 134 static void ah_outbound_finish(mblk_t *, ip_xmit_attr_t *);
 135 
 136 static int ipsecah_open(queue_t *, dev_t *, int, int, cred_t *);
 137 static int ipsecah_close(queue_t *);
 138 static void ipsecah_wput(queue_t *, mblk_t *);
 139 static boolean_t ah_register_out(uint32_t, uint32_t, uint_t, ipsecah_stack_t *,
 140     cred_t *);
 141 static void     *ipsecah_stack_init(netstackid_t stackid, netstack_t *ns);
 142 static void     ipsecah_stack_fini(netstackid_t stackid, void *arg);
 143 
 144 /* Setable in /etc/system */
 145 uint32_t ah_hash_size = IPSEC_DEFAULT_HASH_SIZE;
 146 
 147 static taskq_t *ah_taskq;
 148 
 149 static struct module_info info = {
 150         5136, "ipsecah", 0, INFPSZ, 65536, 1024
 151 };
 152 
 153 static struct qinit rinit = {
 154         (pfi_t)putnext, NULL, ipsecah_open, ipsecah_close, NULL, &info,
 155         NULL
 156 };
 157 
 158 static struct qinit winit = {
 159         (pfi_t)ipsecah_wput, NULL, ipsecah_open, ipsecah_close, NULL, &info,
 160         NULL
 161 };
 162 
 163 struct streamtab ipsecahinfo = {
 164         &rinit, &winit, NULL, NULL
 165 };
 166 
 167 static int ah_kstat_update(kstat_t *, int);
 168 
 169 uint64_t ipsacq_maxpackets = IPSACQ_MAXPACKETS;
 170 
 171 static boolean_t
 172 ah_kstat_init(ipsecah_stack_t *ahstack, netstackid_t stackid)
 173 {
 174         ipsec_stack_t   *ipss = ahstack->ipsecah_netstack->netstack_ipsec;
 175 
 176         ahstack->ah_ksp = kstat_create_netstack("ipsecah", 0, "ah_stat", "net",
 177             KSTAT_TYPE_NAMED, sizeof (ah_kstats_t) / sizeof (kstat_named_t), 0,
 178             stackid);
 179 
 180         if (ahstack->ah_ksp == NULL || ahstack->ah_ksp->ks_data == NULL)
 181                 return (B_FALSE);
 182 
 183         ahstack->ah_kstats = ahstack->ah_ksp->ks_data;
 184 
 185         ahstack->ah_ksp->ks_update = ah_kstat_update;
 186         ahstack->ah_ksp->ks_private = (void *)(uintptr_t)stackid;
 187 
 188 #define K64 KSTAT_DATA_UINT64
 189 #define KI(x) kstat_named_init(&(ahstack->ah_kstats->ah_stat_##x), #x, K64)
 190 
 191         KI(num_aalgs);
 192         KI(good_auth);
 193         KI(bad_auth);
 194         KI(replay_failures);
 195         KI(replay_early_failures);
 196         KI(keysock_in);
 197         KI(out_requests);
 198         KI(acquire_requests);
 199         KI(bytes_expired);
 200         KI(out_discards);
 201         KI(crypto_sync);
 202         KI(crypto_async);
 203         KI(crypto_failures);
 204 
 205 #undef KI
 206 #undef K64
 207 
 208         kstat_install(ahstack->ah_ksp);
 209         IP_ACQUIRE_STAT(ipss, maxpackets, ipsacq_maxpackets);
 210         return (B_TRUE);
 211 }
 212 
 213 static int
 214 ah_kstat_update(kstat_t *kp, int rw)
 215 {
 216         ah_kstats_t     *ekp;
 217         netstackid_t    stackid = (netstackid_t)(uintptr_t)kp->ks_private;
 218         netstack_t      *ns;
 219         ipsec_stack_t   *ipss;
 220 
 221         if ((kp == NULL) || (kp->ks_data == NULL))
 222                 return (EIO);
 223 
 224         if (rw == KSTAT_WRITE)
 225                 return (EACCES);
 226 
 227         ns = netstack_find_by_stackid(stackid);
 228         if (ns == NULL)
 229                 return (-1);
 230         ipss = ns->netstack_ipsec;
 231         if (ipss == NULL) {
 232                 netstack_rele(ns);
 233                 return (-1);
 234         }
 235         ekp = (ah_kstats_t *)kp->ks_data;
 236 
 237         rw_enter(&ipss->ipsec_alg_lock, RW_READER);
 238         ekp->ah_stat_num_aalgs.value.ui64 = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
 239         rw_exit(&ipss->ipsec_alg_lock);
 240 
 241         netstack_rele(ns);
 242         return (0);
 243 }
 244 
 245 /*
 246  * Don't have to lock ipsec_age_interval, as only one thread will access it at
 247  * a time, because I control the one function that does a qtimeout() on
 248  * ah_pfkey_q.
 249  */
 250 static void
 251 ah_ager(void *arg)
 252 {
 253         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg;
 254         netstack_t      *ns = ahstack->ipsecah_netstack;
 255         hrtime_t begin = gethrtime();
 256 
 257         sadb_ager(&ahstack->ah_sadb.s_v4, ahstack->ah_pfkey_q,
 258             ahstack->ipsecah_reap_delay, ns);
 259         sadb_ager(&ahstack->ah_sadb.s_v6, ahstack->ah_pfkey_q,
 260             ahstack->ipsecah_reap_delay, ns);
 261 
 262         ahstack->ah_event = sadb_retimeout(begin, ahstack->ah_pfkey_q,
 263             ah_ager, ahstack,
 264             &ahstack->ipsecah_age_interval, ahstack->ipsecah_age_int_max,
 265             info.mi_idnum);
 266 }
 267 
 268 /*
 269  * Get an AH NDD parameter.
 270  */
 271 /* ARGSUSED */
 272 static int
 273 ipsecah_param_get(
 274     queue_t     *q,
 275     mblk_t      *mp,
 276     caddr_t     cp,
 277     cred_t *cr)
 278 {
 279         ipsecahparam_t  *ipsecahpa = (ipsecahparam_t *)cp;
 280         uint_t value;
 281         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr;
 282 
 283         mutex_enter(&ahstack->ipsecah_param_lock);
 284         value = ipsecahpa->ipsecah_param_value;
 285         mutex_exit(&ahstack->ipsecah_param_lock);
 286 
 287         (void) mi_mpprintf(mp, "%u", value);
 288         return (0);
 289 }
 290 
 291 /*
 292  * This routine sets an NDD variable in a ipsecahparam_t structure.
 293  */
 294 /* ARGSUSED */
 295 static int
 296 ipsecah_param_set(
 297     queue_t     *q,
 298     mblk_t      *mp,
 299     char        *value,
 300     caddr_t     cp,
 301     cred_t *cr)
 302 {
 303         ulong_t new_value;
 304         ipsecahparam_t  *ipsecahpa = (ipsecahparam_t *)cp;
 305         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr;
 306 
 307         /*
 308          * Fail the request if the new value does not lie within the
 309          * required bounds.
 310          */
 311         if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
 312             new_value < ipsecahpa->ipsecah_param_min ||
 313             new_value > ipsecahpa->ipsecah_param_max) {
 314                 return (EINVAL);
 315         }
 316 
 317         /* Set the new value */
 318         mutex_enter(&ahstack->ipsecah_param_lock);
 319         ipsecahpa->ipsecah_param_value = new_value;
 320         mutex_exit(&ahstack->ipsecah_param_lock);
 321         return (0);
 322 }
 323 
 324 /*
 325  * Using lifetime NDD variables, fill in an extended combination's
 326  * lifetime information.
 327  */
 328 void
 329 ipsecah_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
 330 {
 331         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
 332 
 333         ecomb->sadb_x_ecomb_soft_bytes = ahstack->ipsecah_default_soft_bytes;
 334         ecomb->sadb_x_ecomb_hard_bytes = ahstack->ipsecah_default_hard_bytes;
 335         ecomb->sadb_x_ecomb_soft_addtime =
 336             ahstack->ipsecah_default_soft_addtime;
 337         ecomb->sadb_x_ecomb_hard_addtime =
 338             ahstack->ipsecah_default_hard_addtime;
 339         ecomb->sadb_x_ecomb_soft_usetime =
 340             ahstack->ipsecah_default_soft_usetime;
 341         ecomb->sadb_x_ecomb_hard_usetime =
 342             ahstack->ipsecah_default_hard_usetime;
 343 }
 344 
 345 /*
 346  * Initialize things for AH at module load time.
 347  */
 348 boolean_t
 349 ipsecah_ddi_init(void)
 350 {
 351         ah_taskq = taskq_create("ah_taskq", 1, minclsyspri,
 352             IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
 353 
 354         /*
 355          * We want to be informed each time a stack is created or
 356          * destroyed in the kernel, so we can maintain the
 357          * set of ipsecah_stack_t's.
 358          */
 359         netstack_register(NS_IPSECAH, ipsecah_stack_init, NULL,
 360             ipsecah_stack_fini);
 361 
 362         return (B_TRUE);
 363 }
 364 
 365 /*
 366  * Walk through the param array specified registering each element with the
 367  * named dispatch handler.
 368  */
 369 static boolean_t
 370 ipsecah_param_register(IDP *ndp, ipsecahparam_t *ahp, int cnt)
 371 {
 372         for (; cnt-- > 0; ahp++) {
 373                 if (ahp->ipsecah_param_name != NULL &&
 374                     ahp->ipsecah_param_name[0]) {
 375                         if (!nd_load(ndp,
 376                             ahp->ipsecah_param_name,
 377                             ipsecah_param_get, ipsecah_param_set,
 378                             (caddr_t)ahp)) {
 379                                 nd_free(ndp);
 380                                 return (B_FALSE);
 381                         }
 382                 }
 383         }
 384         return (B_TRUE);
 385 }
 386 
 387 /*
 388  * Initialize things for AH for each stack instance
 389  */
 390 static void *
 391 ipsecah_stack_init(netstackid_t stackid, netstack_t *ns)
 392 {
 393         ipsecah_stack_t *ahstack;
 394         ipsecahparam_t  *ahp;
 395 
 396         ahstack = (ipsecah_stack_t *)kmem_zalloc(sizeof (*ahstack), KM_SLEEP);
 397         ahstack->ipsecah_netstack = ns;
 398 
 399         ahp = (ipsecahparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
 400         ahstack->ipsecah_params = ahp;
 401         bcopy(lcl_param_arr, ahp, sizeof (lcl_param_arr));
 402 
 403         (void) ipsecah_param_register(&ahstack->ipsecah_g_nd, ahp,
 404             A_CNT(lcl_param_arr));
 405 
 406         (void) ah_kstat_init(ahstack, stackid);
 407 
 408         ahstack->ah_sadb.s_acquire_timeout = &ahstack->ipsecah_acquire_timeout;
 409         sadbp_init("AH", &ahstack->ah_sadb, SADB_SATYPE_AH, ah_hash_size,
 410             ahstack->ipsecah_netstack);
 411 
 412         mutex_init(&ahstack->ipsecah_param_lock, NULL, MUTEX_DEFAULT, 0);
 413 
 414         ip_drop_register(&ahstack->ah_dropper, "IPsec AH");
 415         return (ahstack);
 416 }
 417 
 418 /*
 419  * Destroy things for AH at module unload time.
 420  */
 421 void
 422 ipsecah_ddi_destroy(void)
 423 {
 424         netstack_unregister(NS_IPSECAH);
 425         taskq_destroy(ah_taskq);
 426 }
 427 
 428 /*
 429  * Destroy things for AH for one stack... Never called?
 430  */
 431 static void
 432 ipsecah_stack_fini(netstackid_t stackid, void *arg)
 433 {
 434         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg;
 435 
 436         if (ahstack->ah_pfkey_q != NULL) {
 437                 (void) quntimeout(ahstack->ah_pfkey_q, ahstack->ah_event);
 438         }
 439         ahstack->ah_sadb.s_acquire_timeout = NULL;
 440         sadbp_destroy(&ahstack->ah_sadb, ahstack->ipsecah_netstack);
 441         ip_drop_unregister(&ahstack->ah_dropper);
 442         mutex_destroy(&ahstack->ipsecah_param_lock);
 443         nd_free(&ahstack->ipsecah_g_nd);
 444 
 445         kmem_free(ahstack->ipsecah_params, sizeof (lcl_param_arr));
 446         ahstack->ipsecah_params = NULL;
 447         kstat_delete_netstack(ahstack->ah_ksp, stackid);
 448         ahstack->ah_ksp = NULL;
 449         ahstack->ah_kstats = NULL;
 450 
 451         kmem_free(ahstack, sizeof (*ahstack));
 452 }
 453 
 454 /*
 455  * AH module open routine, which is here for keysock plumbing.
 456  * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old
 457  * Days of export control, and fears that ESP would not be allowed
 458  * to be shipped at all by default.  Eventually, keysock should
 459  * either access AH and ESP via modstubs or krtld dependencies, or
 460  * perhaps be folded in with AH and ESP into a single IPsec/netsec
 461  * module ("netsec" if PF_KEY provides more than AH/ESP keying tables).
 462  */
 463 /* ARGSUSED */
 464 static int
 465 ipsecah_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 466 {
 467         netstack_t      *ns;
 468         ipsecah_stack_t *ahstack;
 469 
 470         if (secpolicy_ip_config(credp, B_FALSE) != 0)
 471                 return (EPERM);
 472 
 473         if (q->q_ptr != NULL)
 474                 return (0);  /* Re-open of an already open instance. */
 475 
 476         if (sflag != MODOPEN)
 477                 return (EINVAL);
 478 
 479         ns = netstack_find_by_cred(credp);
 480         ASSERT(ns != NULL);
 481         ahstack = ns->netstack_ipsecah;
 482         ASSERT(ahstack != NULL);
 483 
 484         q->q_ptr = ahstack;
 485         WR(q)->q_ptr = q->q_ptr;
 486 
 487         qprocson(q);
 488         return (0);
 489 }
 490 
 491 /*
 492  * AH module close routine.
 493  */
 494 static int
 495 ipsecah_close(queue_t *q)
 496 {
 497         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr;
 498 
 499         /*
 500          * Clean up q_ptr, if needed.
 501          */
 502         qprocsoff(q);
 503 
 504         /* Keysock queue check is safe, because of OCEXCL perimeter. */
 505 
 506         if (q == ahstack->ah_pfkey_q) {
 507                 ah1dbg(ahstack,
 508                     ("ipsecah_close:  Ummm... keysock is closing AH.\n"));
 509                 ahstack->ah_pfkey_q = NULL;
 510                 /* Detach qtimeouts. */
 511                 (void) quntimeout(q, ahstack->ah_event);
 512         }
 513 
 514         netstack_rele(ahstack->ipsecah_netstack);
 515         return (0);
 516 }
 517 
 518 /*
 519  * Construct an SADB_REGISTER message with the current algorithms.
 520  */
 521 static boolean_t
 522 ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
 523     ipsecah_stack_t *ahstack, cred_t *cr)
 524 {
 525         mblk_t *mp;
 526         boolean_t rc = B_TRUE;
 527         sadb_msg_t *samsg;
 528         sadb_supported_t *sasupp;
 529         sadb_alg_t *saalg;
 530         uint_t allocsize = sizeof (*samsg);
 531         uint_t i, numalgs_snap;
 532         ipsec_alginfo_t **authalgs;
 533         uint_t num_aalgs;
 534         ipsec_stack_t   *ipss = ahstack->ipsecah_netstack->netstack_ipsec;
 535         sadb_sens_t *sens;
 536         size_t sens_len = 0;
 537         sadb_ext_t *nextext;
 538         ts_label_t *sens_tsl = NULL;
 539 
 540         /* Allocate the KEYSOCK_OUT. */
 541         mp = sadb_keysock_out(serial);
 542         if (mp == NULL) {
 543                 ah0dbg(("ah_register_out: couldn't allocate mblk.\n"));
 544                 return (B_FALSE);
 545         }
 546 
 547         if (is_system_labeled() && (cr != NULL)) {
 548                 sens_tsl = crgetlabel(cr);
 549                 if (sens_tsl != NULL) {
 550                         sens_len = sadb_sens_len_from_label(sens_tsl);
 551                         allocsize += sens_len;
 552                 }
 553         }
 554 
 555         /*
 556          * Allocate the PF_KEY message that follows KEYSOCK_OUT.
 557          * The alg reader lock needs to be held while allocating
 558          * the variable part (i.e. the algorithms) of the message.
 559          */
 560 
 561         rw_enter(&ipss->ipsec_alg_lock, RW_READER);
 562 
 563         /*
 564          * Return only valid algorithms, so the number of algorithms
 565          * to send up may be less than the number of algorithm entries
 566          * in the table.
 567          */
 568         authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
 569         for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
 570                 if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
 571                         num_aalgs++;
 572 
 573         /*
 574          * Fill SADB_REGISTER message's algorithm descriptors.  Hold
 575          * down the lock while filling it.
 576          */
 577         if (num_aalgs != 0) {
 578                 allocsize += (num_aalgs * sizeof (*saalg));
 579                 allocsize += sizeof (*sasupp);
 580         }
 581         mp->b_cont = allocb(allocsize, BPRI_HI);
 582         if (mp->b_cont == NULL) {
 583                 rw_exit(&ipss->ipsec_alg_lock);
 584                 freemsg(mp);
 585                 return (B_FALSE);
 586         }
 587 
 588         mp->b_cont->b_wptr += allocsize;
 589         nextext = (sadb_ext_t *)(mp->b_cont->b_rptr + sizeof (*samsg));
 590 
 591         if (num_aalgs != 0) {
 592 
 593                 saalg = (sadb_alg_t *)(((uint8_t *)nextext) + sizeof (*sasupp));
 594                 ASSERT(((ulong_t)saalg & 0x7) == 0);
 595 
 596                 numalgs_snap = 0;
 597                 for (i = 0;
 598                     ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
 599                     i++) {
 600                         if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
 601                                 continue;
 602 
 603                         saalg->sadb_alg_id = authalgs[i]->alg_id;
 604                         saalg->sadb_alg_ivlen = 0;
 605                         saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits;
 606                         saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits;
 607                         saalg->sadb_x_alg_increment =
 608                             authalgs[i]->alg_increment;
 609                         /* For now, salt is meaningless in AH. */
 610                         ASSERT(authalgs[i]->alg_saltlen == 0);
 611                         saalg->sadb_x_alg_saltbits =
 612                             SADB_8TO1(authalgs[i]->alg_saltlen);
 613                         numalgs_snap++;
 614                         saalg++;
 615                 }
 616                 ASSERT(numalgs_snap == num_aalgs);
 617 #ifdef DEBUG
 618                 /*
 619                  * Reality check to make sure I snagged all of the
 620                  * algorithms.
 621                  */
 622                 for (; i < IPSEC_MAX_ALGS; i++)
 623                         if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
 624                                 cmn_err(CE_PANIC,
 625                                     "ah_register_out()!  Missed #%d.\n", i);
 626 #endif /* DEBUG */
 627                 nextext = (sadb_ext_t *)saalg;
 628         }
 629 
 630         rw_exit(&ipss->ipsec_alg_lock);
 631 
 632         if (sens_tsl != NULL) {
 633                 sens = (sadb_sens_t *)nextext;
 634                 sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
 635                     sens_tsl, sens_len);
 636 
 637                 nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len);
 638         }
 639 
 640         /* Now fill the restof the SADB_REGISTER message. */
 641 
 642         samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
 643         samsg->sadb_msg_version = PF_KEY_V2;
 644         samsg->sadb_msg_type = SADB_REGISTER;
 645         samsg->sadb_msg_errno = 0;
 646         samsg->sadb_msg_satype = SADB_SATYPE_AH;
 647         samsg->sadb_msg_len = SADB_8TO64(allocsize);
 648         samsg->sadb_msg_reserved = 0;
 649         /*
 650          * Assume caller has sufficient sequence/pid number info.  If it's one
 651          * from me over a new alg., I could give two hoots about sequence.
 652          */
 653         samsg->sadb_msg_seq = sequence;
 654         samsg->sadb_msg_pid = pid;
 655 
 656         if (num_aalgs != 0) {
 657                 sasupp = (sadb_supported_t *)(samsg + 1);
 658                 sasupp->sadb_supported_len = SADB_8TO64(
 659                     sizeof (*sasupp) + sizeof (*saalg) * num_aalgs);
 660                 sasupp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
 661                 sasupp->sadb_supported_reserved = 0;
 662         }
 663 
 664         if (ahstack->ah_pfkey_q != NULL)
 665                 putnext(ahstack->ah_pfkey_q, mp);
 666         else {
 667                 rc = B_FALSE;
 668                 freemsg(mp);
 669         }
 670 
 671         return (rc);
 672 }
 673 
 674 /*
 675  * Invoked when the algorithm table changes. Causes SADB_REGISTER
 676  * messages continaining the current list of algorithms to be
 677  * sent up to the AH listeners.
 678  */
 679 void
 680 ipsecah_algs_changed(netstack_t *ns)
 681 {
 682         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
 683 
 684         /*
 685          * Time to send a PF_KEY SADB_REGISTER message to AH listeners
 686          * everywhere.  (The function itself checks for NULL ah_pfkey_q.)
 687          */
 688         (void) ah_register_out(0, 0, 0, ahstack, NULL);
 689 }
 690 
 691 /*
 692  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
 693  * and send it into AH and IP again.
 694  */
 695 static void
 696 inbound_task(void *arg)
 697 {
 698         mblk_t          *mp = (mblk_t *)arg;
 699         mblk_t          *async_mp;
 700         ip_recv_attr_t  iras;
 701 
 702         async_mp = mp;
 703         mp = async_mp->b_cont;
 704         async_mp->b_cont = NULL;
 705         if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
 706                 /* The ill or ip_stack_t disappeared on us */
 707                 ip_drop_input("ip_recv_attr_from_mblk", mp, NULL);
 708                 freemsg(mp);
 709                 goto done;
 710         }
 711 
 712         ah_inbound_restart(mp, &iras);
 713 done:
 714         ira_cleanup(&iras, B_TRUE);
 715 }
 716 
 717 /*
 718  * Restart ESP after the SA has been added.
 719  */
 720 static void
 721 ah_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira)
 722 {
 723         ah_t            *ah;
 724         netstack_t      *ns;
 725         ipsecah_stack_t *ahstack;
 726 
 727         ns = ira->ira_ill->ill_ipst->ips_netstack;
 728         ahstack = ns->netstack_ipsecah;
 729 
 730         ASSERT(ahstack != NULL);
 731         mp = ipsec_inbound_ah_sa(mp, ira, &ah);
 732         if (mp == NULL)
 733                 return;
 734 
 735         ASSERT(ah != NULL);
 736         ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
 737         ASSERT(ira->ira_ipsec_ah_sa != NULL);
 738 
 739         mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
 740         if (mp == NULL) {
 741                 /*
 742                  * Either it failed or is pending. In the former case
 743                  * ipIfStatsInDiscards was increased.
 744                  */
 745                 return;
 746         }
 747         ip_input_post_ipsec(mp, ira);
 748 }
 749 
 750 /*
 751  * Now that weak-key passed, actually ADD the security association, and
 752  * send back a reply ADD message.
 753  */
 754 static int
 755 ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
 756     int *diagnostic, ipsecah_stack_t *ahstack)
 757 {
 758         isaf_t *primary = NULL, *secondary;
 759         boolean_t clone = B_FALSE, is_inbound = B_FALSE;
 760         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
 761         ipsa_t *larval;
 762         ipsacq_t *acqrec;
 763         iacqf_t *acq_bucket;
 764         mblk_t *acq_msgs = NULL;
 765         mblk_t *lpkt;
 766         int rc;
 767         ipsa_query_t sq;
 768         int error;
 769         netstack_t      *ns = ahstack->ipsecah_netstack;
 770         ipsec_stack_t   *ipss = ns->netstack_ipsec;
 771 
 772         /*
 773          * Locate the appropriate table(s).
 774          */
 775 
 776         sq.spp = &ahstack->ah_sadb;
 777         error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST,
 778             IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
 779             &sq, diagnostic);
 780         if (error)
 781                 return (error);
 782 
 783         /*
 784          * Use the direction flags provided by the KMD to determine
 785          * if the inbound or outbound table should be the primary
 786          * for this SA. If these flags were absent then make this
 787          * decision based on the addresses.
 788          */
 789         if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
 790                 primary = sq.inbound;
 791                 secondary = sq.outbound;
 792                 is_inbound = B_TRUE;
 793                 if (assoc->sadb_sa_flags & IPSA_F_OUTBOUND)
 794                         clone = B_TRUE;
 795         } else {
 796                 if (assoc->sadb_sa_flags & IPSA_F_OUTBOUND) {
 797                         primary = sq.outbound;
 798                         secondary = sq.inbound;
 799                 }
 800         }
 801         if (primary == NULL) {
 802                 /*
 803                  * The KMD did not set a direction flag, determine which
 804                  * table to insert the SA into based on addresses.
 805                  */
 806                 switch (ksi->ks_in_dsttype) {
 807                 case KS_IN_ADDR_MBCAST:
 808                         clone = B_TRUE; /* All mcast SAs can be bidirectional */
 809                         assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
 810                         /* FALLTHRU */
 811                 /*
 812                  * If the source address is either one of mine, or unspecified
 813                  * (which is best summed up by saying "not 'not mine'"),
 814                  * then the association is potentially bi-directional,
 815                  * in that it can be used for inbound traffic and outbound
 816                  * traffic.  The best example of such and SA is a multicast
 817                  * SA (which allows me to receive the outbound traffic).
 818                  */
 819                 case KS_IN_ADDR_ME:
 820                         assoc->sadb_sa_flags |= IPSA_F_INBOUND;
 821                         primary = sq.inbound;
 822                         secondary = sq.outbound;
 823                         if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
 824                                 clone = B_TRUE;
 825                         is_inbound = B_TRUE;
 826                         break;
 827 
 828                 /*
 829                  * If the source address literally not mine (either
 830                  * unspecified or not mine), then this SA may have an
 831                  * address that WILL be mine after some configuration.
 832                  * We pay the price for this by making it a bi-directional
 833                  * SA.
 834                  */
 835                 case KS_IN_ADDR_NOTME:
 836                         assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
 837                         primary = sq.outbound;
 838                         secondary = sq.inbound;
 839                         if (ksi->ks_in_srctype != KS_IN_ADDR_ME) {
 840                                 assoc->sadb_sa_flags |= IPSA_F_INBOUND;
 841                                 clone = B_TRUE;
 842                         }
 843                         break;
 844                 default:
 845                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
 846                         return (EINVAL);
 847                 }
 848         }
 849 
 850         /*
 851          * Find a ACQUIRE list entry if possible.  If we've added an SA that
 852          * suits the needs of an ACQUIRE list entry, we can eliminate the
 853          * ACQUIRE list entry and transmit the enqueued packets.  Use the
 854          * high-bit of the sequence number to queue it.  Key off destination
 855          * addr, and change acqrec's state.
 856          */
 857 
 858         if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
 859                 acq_bucket = &(sq.sp->sdb_acq[sq.outhash]);
 860                 mutex_enter(&acq_bucket->iacqf_lock);
 861                 for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
 862                     acqrec = acqrec->ipsacq_next) {
 863                         mutex_enter(&acqrec->ipsacq_lock);
 864                         /*
 865                          * Q:  I only check sequence.  Should I check dst?
 866                          * A: Yes, check dest because those are the packets
 867                          *    that are queued up.
 868                          */
 869                         if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
 870                             IPSA_ARE_ADDR_EQUAL(sq.dstaddr,
 871                             acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
 872                                 break;
 873                         mutex_exit(&acqrec->ipsacq_lock);
 874                 }
 875                 if (acqrec != NULL) {
 876                         /*
 877                          * AHA!  I found an ACQUIRE record for this SA.
 878                          * Grab the msg list, and free the acquire record.
 879                          * I already am holding the lock for this record,
 880                          * so all I have to do is free it.
 881                          */
 882                         acq_msgs = acqrec->ipsacq_mp;
 883                         acqrec->ipsacq_mp = NULL;
 884                         mutex_exit(&acqrec->ipsacq_lock);
 885                         sadb_destroy_acquire(acqrec, ns);
 886                 }
 887                 mutex_exit(&acq_bucket->iacqf_lock);
 888         }
 889 
 890         /*
 891          * Find PF_KEY message, and see if I'm an update.  If so, find entry
 892          * in larval list (if there).
 893          */
 894 
 895         larval = NULL;
 896 
 897         if (samsg->sadb_msg_type == SADB_UPDATE) {
 898                 mutex_enter(&sq.inbound->isaf_lock);
 899                 larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi,
 900                     ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family);
 901                 mutex_exit(&sq.inbound->isaf_lock);
 902 
 903                 if ((larval == NULL) ||
 904                     (larval->ipsa_state != IPSA_STATE_LARVAL)) {
 905                         *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
 906                         if (larval != NULL) {
 907                                 IPSA_REFRELE(larval);
 908                         }
 909                         ah0dbg(("Larval update, but larval disappeared.\n"));
 910                         return (ESRCH);
 911                 } /* Else sadb_common_add unlinks it for me! */
 912         }
 913 
 914         if (larval != NULL) {
 915                 /*
 916                  * Hold again, because sadb_common_add() consumes a reference,
 917                  * and we don't want to clear_lpkt() without a reference.
 918                  */
 919                 IPSA_REFHOLD(larval);
 920         }
 921 
 922         rc = sadb_common_add(ahstack->ah_pfkey_q, mp,
 923             samsg, ksi, primary, secondary, larval, clone, is_inbound,
 924             diagnostic, ns, &ahstack->ah_sadb);
 925 
 926         if (larval != NULL) {
 927                 if (rc == 0) {
 928                         lpkt = sadb_clear_lpkt(larval);
 929                         if (lpkt != NULL) {
 930                                 rc = !taskq_dispatch(ah_taskq, inbound_task,
 931                                     lpkt, TQ_NOSLEEP);
 932                         }
 933                 }
 934                 IPSA_REFRELE(larval);
 935         }
 936 
 937         /*
 938          * How much more stack will I create with all of these
 939          * ah_outbound_*() calls?
 940          */
 941 
 942         /* Handle the packets queued waiting for the SA */
 943         while (acq_msgs != NULL) {
 944                 mblk_t          *asyncmp;
 945                 mblk_t          *data_mp;
 946                 ip_xmit_attr_t  ixas;
 947                 ill_t           *ill;
 948 
 949                 asyncmp = acq_msgs;
 950                 acq_msgs = acq_msgs->b_next;
 951                 asyncmp->b_next = NULL;
 952 
 953                 /*
 954                  * Extract the ip_xmit_attr_t from the first mblk.
 955                  * Verifies that the netstack and ill is still around; could
 956                  * have vanished while iked was doing its work.
 957                  * On succesful return we have a nce_t and the ill/ipst can't
 958                  * disappear until we do the nce_refrele in ixa_cleanup.
 959                  */
 960                 data_mp = asyncmp->b_cont;
 961                 asyncmp->b_cont = NULL;
 962                 if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) {
 963                         AH_BUMP_STAT(ahstack, out_discards);
 964                         ip_drop_packet(data_mp, B_FALSE, NULL,
 965                             DROPPER(ipss, ipds_sadb_acquire_timeout),
 966                             &ahstack->ah_dropper);
 967                 } else if (rc != 0) {
 968                         ill = ixas.ixa_nce->nce_ill;
 969                         AH_BUMP_STAT(ahstack, out_discards);
 970                         ip_drop_packet(data_mp, B_FALSE, ill,
 971                             DROPPER(ipss, ipds_sadb_acquire_timeout),
 972                             &ahstack->ah_dropper);
 973                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
 974                 } else {
 975                         ah_outbound_finish(data_mp, &ixas);
 976                 }
 977                 ixa_cleanup(&ixas);
 978         }
 979 
 980         return (rc);
 981 }
 982 
 983 
 984 /*
 985  * Process one of the queued messages (from ipsacq_mp) once the SA
 986  * has been added.
 987  */
 988 static void
 989 ah_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa)
 990 {
 991         netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
 992         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
 993         ipsec_stack_t   *ipss = ns->netstack_ipsec;
 994         ill_t           *ill = ixa->ixa_nce->nce_ill;
 995 
 996         if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) {
 997                 AH_BUMP_STAT(ahstack, out_discards);
 998                 ip_drop_packet(data_mp, B_FALSE, ill,
 999                     DROPPER(ipss, ipds_sadb_acquire_timeout),
1000                     &ahstack->ah_dropper);
1001                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1002                 return;
1003         }
1004 
1005         data_mp = ah_outbound(data_mp, ixa);
1006         if (data_mp == NULL)
1007                 return;
1008 
1009         (void) ip_output_post_ipsec(data_mp, ixa);
1010 }
1011 
1012 /*
1013  * Add new AH security association.  This may become a generic AH/ESP
1014  * routine eventually.
1015  */
1016 static int
1017 ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns)
1018 {
1019         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
1020         sadb_address_t *srcext =
1021             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
1022         sadb_address_t *dstext =
1023             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1024         sadb_address_t *isrcext =
1025             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
1026         sadb_address_t *idstext =
1027             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
1028         sadb_key_t *key = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
1029         struct sockaddr_in *src, *dst;
1030         /* We don't need sockaddr_in6 for now. */
1031         sadb_lifetime_t *soft =
1032             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
1033         sadb_lifetime_t *hard =
1034             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
1035         sadb_lifetime_t *idle =
1036             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
1037         ipsec_alginfo_t *aalg;
1038         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
1039         ipsec_stack_t   *ipss = ns->netstack_ipsec;
1040 
1041         /* I need certain extensions present for an ADD message. */
1042         if (srcext == NULL) {
1043                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
1044                 return (EINVAL);
1045         }
1046         if (dstext == NULL) {
1047                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
1048                 return (EINVAL);
1049         }
1050         if (isrcext == NULL && idstext != NULL) {
1051                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
1052                 return (EINVAL);
1053         }
1054         if (isrcext != NULL && idstext == NULL) {
1055                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
1056                 return (EINVAL);
1057         }
1058         if (assoc == NULL) {
1059                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
1060                 return (EINVAL);
1061         }
1062         if (key == NULL) {
1063                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_AKEY;
1064                 return (EINVAL);
1065         }
1066 
1067         src = (struct sockaddr_in *)(srcext + 1);
1068         dst = (struct sockaddr_in *)(dstext + 1);
1069 
1070         /* Sundry ADD-specific reality checks. */
1071         /* XXX STATS : Logging/stats here? */
1072 
1073         if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) &&
1074             (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
1075                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
1076                 return (EINVAL);
1077         }
1078         if (assoc->sadb_sa_encrypt != SADB_EALG_NONE) {
1079                 *diagnostic = SADB_X_DIAGNOSTIC_ENCR_NOTSUPP;
1080                 return (EINVAL);
1081         }
1082         if (assoc->sadb_sa_flags & ~ahstack->ah_sadb.s_addflags) {
1083                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
1084                 return (EINVAL);
1085         }
1086         if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0)
1087                 return (EINVAL);
1088 
1089         ASSERT(src->sin_family == dst->sin_family);
1090 
1091         /* Stuff I don't support, for now.  XXX Diagnostic? */
1092         if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL)
1093                 return (EOPNOTSUPP);
1094 
1095         if (ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL) {
1096                 if (!is_system_labeled())
1097                         return (EOPNOTSUPP);
1098         }
1099 
1100         if (ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS] != NULL) {
1101                 if (!is_system_labeled())
1102                         return (EOPNOTSUPP);
1103         }
1104         /*
1105          * XXX Policy : I'm not checking identities at this time, but
1106          * if I did, I'd do them here, before I sent the weak key
1107          * check up to the algorithm.
1108          */
1109 
1110         /* verify that there is a mapping for the specified algorithm */
1111         rw_enter(&ipss->ipsec_alg_lock, RW_READER);
1112         aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][assoc->sadb_sa_auth];
1113         if (aalg == NULL || !ALG_VALID(aalg)) {
1114                 rw_exit(&ipss->ipsec_alg_lock);
1115                 ah1dbg(ahstack, ("Couldn't find auth alg #%d.\n",
1116                     assoc->sadb_sa_auth));
1117                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
1118                 return (EINVAL);
1119         }
1120         ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
1121 
1122         /* sanity check key sizes */
1123         if (!ipsec_valid_key_size(key->sadb_key_bits, aalg)) {
1124                 rw_exit(&ipss->ipsec_alg_lock);
1125                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
1126                 return (EINVAL);
1127         }
1128 
1129         /* check key and fix parity if needed */
1130         if (ipsec_check_key(aalg->alg_mech_type, key, B_TRUE,
1131             diagnostic) != 0) {
1132                 rw_exit(&ipss->ipsec_alg_lock);
1133                 return (EINVAL);
1134         }
1135 
1136         rw_exit(&ipss->ipsec_alg_lock);
1137 
1138         return (ah_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
1139             diagnostic, ahstack));
1140 }
1141 
1142 /* Refactor me */
1143 /*
1144  * Update a security association.  Updates come in two varieties.  The first
1145  * is an update of lifetimes on a non-larval SA.  The second is an update of
1146  * a larval SA, which ends up looking a lot more like an add.
1147  */
1148 static int
1149 ah_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
1150     ipsecah_stack_t *ahstack, uint8_t sadb_msg_type)
1151 {
1152         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
1153         sadb_address_t *dstext =
1154             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1155         mblk_t  *buf_pkt;
1156         int rcode;
1157 
1158         if (dstext == NULL) {
1159                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
1160                 return (EINVAL);
1161         }
1162 
1163         rcode = sadb_update_sa(mp, ksi, &buf_pkt, &ahstack->ah_sadb,
1164             diagnostic, ahstack->ah_pfkey_q, ah_add_sa,
1165             ahstack->ipsecah_netstack, sadb_msg_type);
1166 
1167         if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) ||
1168             (rcode != 0)) {
1169                 return (rcode);
1170         }
1171 
1172         HANDLE_BUF_PKT(ah_taskq, ahstack->ipsecah_netstack->netstack_ipsec,
1173             ahstack->ah_dropper, buf_pkt);
1174 
1175         return (rcode);
1176 }
1177 
1178 /* Refactor me */
1179 /*
1180  * Delete a security association.  This is REALLY likely to be code common to
1181  * both AH and ESP.  Find the association, then unlink it.
1182  */
1183 static int
1184 ah_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
1185     ipsecah_stack_t *ahstack, uint8_t sadb_msg_type)
1186 {
1187         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
1188         sadb_address_t *dstext =
1189             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1190         sadb_address_t *srcext =
1191             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
1192         struct sockaddr_in *sin;
1193 
1194         if (assoc == NULL) {
1195                 if (dstext != NULL)
1196                         sin = (struct sockaddr_in *)(dstext + 1);
1197                 else if (srcext != NULL)
1198                         sin = (struct sockaddr_in *)(srcext + 1);
1199                 else {
1200                         *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
1201                         return (EINVAL);
1202                 }
1203                 return (sadb_purge_sa(mp, ksi,
1204                     (sin->sin_family == AF_INET6) ? &ahstack->ah_sadb.s_v6 :
1205                     &ahstack->ah_sadb.s_v4, diagnostic, ahstack->ah_pfkey_q));
1206         }
1207 
1208         return (sadb_delget_sa(mp, ksi, &ahstack->ah_sadb, diagnostic,
1209             ahstack->ah_pfkey_q, sadb_msg_type));
1210 }
1211 
1212 /* Refactor me */
1213 /*
1214  * Convert the entire contents of all of AH's SA tables into PF_KEY SADB_DUMP
1215  * messages.
1216  */
1217 static void
1218 ah_dump(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack)
1219 {
1220         int error;
1221         sadb_msg_t *samsg;
1222 
1223         /*
1224          * Dump each fanout, bailing if error is non-zero.
1225          */
1226 
1227         error = sadb_dump(ahstack->ah_pfkey_q, mp, ksi, &ahstack->ah_sadb.s_v4);
1228         if (error != 0)
1229                 goto bail;
1230 
1231         error = sadb_dump(ahstack->ah_pfkey_q, mp, ksi, &ahstack->ah_sadb.s_v6);
1232 bail:
1233         ASSERT(mp->b_cont != NULL);
1234         samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
1235         samsg->sadb_msg_errno = (uint8_t)error;
1236         sadb_pfkey_echo(ahstack->ah_pfkey_q, mp,
1237             (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL);
1238 }
1239 
1240 /*
1241  * First-cut reality check for an inbound PF_KEY message.
1242  */
1243 static boolean_t
1244 ah_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi,
1245     ipsecah_stack_t *ahstack)
1246 {
1247         int diagnostic;
1248 
1249         if (mp->b_cont == NULL) {
1250                 freemsg(mp);
1251                 return (B_TRUE);
1252         }
1253 
1254         if (ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
1255                 diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
1256                 goto badmsg;
1257         }
1258         if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
1259                 diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
1260                 goto badmsg;
1261         }
1262         if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
1263             ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
1264                 diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
1265                 goto badmsg;
1266         }
1267         return (B_FALSE);       /* False ==> no failures */
1268 
1269 badmsg:
1270         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL,
1271             diagnostic, ksi->ks_in_serial);
1272         return (B_TRUE);        /* True ==> failures */
1273 }
1274 
1275 /*
1276  * AH parsing of PF_KEY messages.  Keysock did most of the really silly
1277  * error cases.  What I receive is a fully-formed, syntactically legal
1278  * PF_KEY message.  I then need to check semantics...
1279  *
1280  * This code may become common to AH and ESP.  Stay tuned.
1281  *
1282  * I also make the assumption that db_ref's are cool.  If this assumption
1283  * is wrong, this means that someone other than keysock or me has been
1284  * mucking with PF_KEY messages.
1285  */
1286 static void
1287 ah_parse_pfkey(mblk_t *mp, ipsecah_stack_t *ahstack)
1288 {
1289         mblk_t *msg = mp->b_cont;
1290         sadb_msg_t *samsg;
1291         keysock_in_t *ksi;
1292         int error;
1293         int diagnostic = SADB_X_DIAGNOSTIC_NONE;
1294 
1295         ASSERT(msg != NULL);
1296 
1297         samsg = (sadb_msg_t *)msg->b_rptr;
1298         ksi = (keysock_in_t *)mp->b_rptr;
1299 
1300         /*
1301          * If applicable, convert unspecified AF_INET6 to unspecified
1302          * AF_INET.
1303          */
1304         if (!sadb_addrfix(ksi, ahstack->ah_pfkey_q, mp,
1305             ahstack->ipsecah_netstack) ||
1306             ah_pfkey_reality_failures(mp, ksi, ahstack)) {
1307                 return;
1308         }
1309 
1310         switch (samsg->sadb_msg_type) {
1311         case SADB_ADD:
1312                 error = ah_add_sa(mp, ksi, &diagnostic,
1313                     ahstack->ipsecah_netstack);
1314                 if (error != 0) {
1315                         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1316                             diagnostic, ksi->ks_in_serial);
1317                 }
1318                 /* else ah_add_sa() took care of things. */
1319                 break;
1320         case SADB_DELETE:
1321         case SADB_X_DELPAIR:
1322         case SADB_X_DELPAIR_STATE:
1323                 error = ah_del_sa(mp, ksi, &diagnostic, ahstack,
1324                     samsg->sadb_msg_type);
1325                 if (error != 0) {
1326                         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1327                             diagnostic, ksi->ks_in_serial);
1328                 }
1329                 /* Else ah_del_sa() took care of things. */
1330                 break;
1331         case SADB_GET:
1332                 error = sadb_delget_sa(mp, ksi, &ahstack->ah_sadb, &diagnostic,
1333                     ahstack->ah_pfkey_q, samsg->sadb_msg_type);
1334                 if (error != 0) {
1335                         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1336                             diagnostic, ksi->ks_in_serial);
1337                 }
1338                 /* Else sadb_get_sa() took care of things. */
1339                 break;
1340         case SADB_FLUSH:
1341                 sadbp_flush(&ahstack->ah_sadb, ahstack->ipsecah_netstack);
1342                 sadb_pfkey_echo(ahstack->ah_pfkey_q, mp, samsg, ksi, NULL);
1343                 break;
1344         case SADB_REGISTER:
1345                 /*
1346                  * Hmmm, let's do it!  Check for extensions (there should
1347                  * be none), extract the fields, call ah_register_out(),
1348                  * then either free or report an error.
1349                  *
1350                  * Keysock takes care of the PF_KEY bookkeeping for this.
1351                  */
1352                 if (ah_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
1353                     ksi->ks_in_serial, ahstack, msg_getcred(mp, NULL))) {
1354                         freemsg(mp);
1355                 } else {
1356                         /*
1357                          * Only way this path hits is if there is a memory
1358                          * failure.  It will not return B_FALSE because of
1359                          * lack of ah_pfkey_q if I am in wput().
1360                          */
1361                         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, ENOMEM,
1362                             diagnostic, ksi->ks_in_serial);
1363                 }
1364                 break;
1365         case SADB_UPDATE:
1366         case SADB_X_UPDATEPAIR:
1367                 /*
1368                  * Find a larval, if not there, find a full one and get
1369                  * strict.
1370                  */
1371                 error = ah_update_sa(mp, ksi, &diagnostic, ahstack,
1372                     samsg->sadb_msg_type);
1373                 if (error != 0) {
1374                         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1375                             diagnostic, ksi->ks_in_serial);
1376                 }
1377                 /* else ah_update_sa() took care of things. */
1378                 break;
1379         case SADB_GETSPI:
1380                 /*
1381                  * Reserve a new larval entry.
1382                  */
1383                 ah_getspi(mp, ksi, ahstack);
1384                 break;
1385         case SADB_ACQUIRE:
1386                 /*
1387                  * Find larval and/or ACQUIRE record and kill it (them), I'm
1388                  * most likely an error.  Inbound ACQUIRE messages should only
1389                  * have the base header.
1390                  */
1391                 sadb_in_acquire(samsg, &ahstack->ah_sadb, ahstack->ah_pfkey_q,
1392                     ahstack->ipsecah_netstack);
1393                 freemsg(mp);
1394                 break;
1395         case SADB_DUMP:
1396                 /*
1397                  * Dump all entries.
1398                  */
1399                 ah_dump(mp, ksi, ahstack);
1400                 /* ah_dump will take care of the return message, etc. */
1401                 break;
1402         case SADB_EXPIRE:
1403                 /* Should never reach me. */
1404                 sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EOPNOTSUPP,
1405                     diagnostic, ksi->ks_in_serial);
1406                 break;
1407         default:
1408                 sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL,
1409                     SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
1410                 break;
1411         }
1412 }
1413 
1414 /*
1415  * Handle case where PF_KEY says it can't find a keysock for one of my
1416  * ACQUIRE messages.
1417  */
1418 static void
1419 ah_keysock_no_socket(mblk_t *mp, ipsecah_stack_t *ahstack)
1420 {
1421         sadb_msg_t *samsg;
1422         keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
1423 
1424         if (mp->b_cont == NULL) {
1425                 freemsg(mp);
1426                 return;
1427         }
1428         samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
1429 
1430         /*
1431          * If keysock can't find any registered, delete the acquire record
1432          * immediately, and handle errors.
1433          */
1434         if (samsg->sadb_msg_type == SADB_ACQUIRE) {
1435                 samsg->sadb_msg_errno = kse->ks_err_errno;
1436                 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1437                 /*
1438                  * Use the write-side of the ah_pfkey_q
1439                  */
1440                 sadb_in_acquire(samsg, &ahstack->ah_sadb,
1441                     WR(ahstack->ah_pfkey_q), ahstack->ipsecah_netstack);
1442         }
1443 
1444         freemsg(mp);
1445 }
1446 
1447 /*
1448  * AH module write put routine.
1449  */
1450 static void
1451 ipsecah_wput(queue_t *q, mblk_t *mp)
1452 {
1453         ipsec_info_t *ii;
1454         struct iocblk *iocp;
1455         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr;
1456 
1457         ah3dbg(ahstack, ("In ah_wput().\n"));
1458 
1459         /* NOTE:  Each case must take care of freeing or passing mp. */
1460         switch (mp->b_datap->db_type) {
1461         case M_CTL:
1462                 if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
1463                         /* Not big enough message. */
1464                         freemsg(mp);
1465                         break;
1466                 }
1467                 ii = (ipsec_info_t *)mp->b_rptr;
1468 
1469                 switch (ii->ipsec_info_type) {
1470                 case KEYSOCK_OUT_ERR:
1471                         ah1dbg(ahstack, ("Got KEYSOCK_OUT_ERR message.\n"));
1472                         ah_keysock_no_socket(mp, ahstack);
1473                         break;
1474                 case KEYSOCK_IN:
1475                         AH_BUMP_STAT(ahstack, keysock_in);
1476                         ah3dbg(ahstack, ("Got KEYSOCK_IN message.\n"));
1477 
1478                         /* Parse the message. */
1479                         ah_parse_pfkey(mp, ahstack);
1480                         break;
1481                 case KEYSOCK_HELLO:
1482                         sadb_keysock_hello(&ahstack->ah_pfkey_q, q, mp,
1483                             ah_ager, (void *)ahstack, &ahstack->ah_event,
1484                             SADB_SATYPE_AH);
1485                         break;
1486                 default:
1487                         ah1dbg(ahstack, ("Got M_CTL from above of 0x%x.\n",
1488                             ii->ipsec_info_type));
1489                         freemsg(mp);
1490                         break;
1491                 }
1492                 break;
1493         case M_IOCTL:
1494                 iocp = (struct iocblk *)mp->b_rptr;
1495                 switch (iocp->ioc_cmd) {
1496                 case ND_SET:
1497                 case ND_GET:
1498                         if (nd_getset(q, ahstack->ipsecah_g_nd, mp)) {
1499                                 qreply(q, mp);
1500                                 return;
1501                         } else {
1502                                 iocp->ioc_error = ENOENT;
1503                         }
1504                         /* FALLTHRU */
1505                 default:
1506                         /* We really don't support any other ioctls, do we? */
1507 
1508                         /* Return EINVAL */
1509                         if (iocp->ioc_error != ENOENT)
1510                                 iocp->ioc_error = EINVAL;
1511                         iocp->ioc_count = 0;
1512                         mp->b_datap->db_type = M_IOCACK;
1513                         qreply(q, mp);
1514                         return;
1515                 }
1516         default:
1517                 ah3dbg(ahstack,
1518                     ("Got default message, type %d, passing to IP.\n",
1519                     mp->b_datap->db_type));
1520                 putnext(q, mp);
1521         }
1522 }
1523 
1524 /* Refactor me */
1525 /*
1526  * Updating use times can be tricky business if the ipsa_haspeer flag is
1527  * set.  This function is called once in an SA's lifetime.
1528  *
1529  * Caller has to REFRELE "assoc" which is passed in.  This function has
1530  * to REFRELE any peer SA that is obtained.
1531  */
1532 static void
1533 ah_set_usetime(ipsa_t *assoc, boolean_t inbound)
1534 {
1535         ipsa_t *inassoc, *outassoc;
1536         isaf_t *bucket;
1537         sadb_t *sp;
1538         int outhash;
1539         boolean_t isv6;
1540         netstack_t      *ns = assoc->ipsa_netstack;
1541         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
1542 
1543         /* No peer?  No problem! */
1544         if (!assoc->ipsa_haspeer) {
1545                 sadb_set_usetime(assoc);
1546                 return;
1547         }
1548 
1549         /*
1550          * Otherwise, we want to grab both the original assoc and its peer.
1551          * There might be a race for this, but if it's a real race, the times
1552          * will be out-of-synch by at most a second, and since our time
1553          * granularity is a second, this won't be a problem.
1554          *
1555          * If we need tight synchronization on the peer SA, then we need to
1556          * reconsider.
1557          */
1558 
1559         /* Use address family to select IPv6/IPv4 */
1560         isv6 = (assoc->ipsa_addrfam == AF_INET6);
1561         if (isv6) {
1562                 sp = &ahstack->ah_sadb.s_v6;
1563         } else {
1564                 sp = &ahstack->ah_sadb.s_v4;
1565                 ASSERT(assoc->ipsa_addrfam == AF_INET);
1566         }
1567         if (inbound) {
1568                 inassoc = assoc;
1569                 if (isv6)
1570                         outhash = OUTBOUND_HASH_V6(sp,
1571                             *((in6_addr_t *)&inassoc->ipsa_dstaddr));
1572                 else
1573                         outhash = OUTBOUND_HASH_V4(sp,
1574                             *((ipaddr_t *)&inassoc->ipsa_dstaddr));
1575                 bucket = &sp->sdb_of[outhash];
1576 
1577                 mutex_enter(&bucket->isaf_lock);
1578                 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1579                     inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1580                     inassoc->ipsa_addrfam);
1581                 mutex_exit(&bucket->isaf_lock);
1582                 if (outassoc == NULL) {
1583                         /* Q: Do we wish to set haspeer == B_FALSE? */
1584                         ah0dbg(("ah_set_usetime: "
1585                             "can't find peer for inbound.\n"));
1586                         sadb_set_usetime(inassoc);
1587                         return;
1588                 }
1589         } else {
1590                 outassoc = assoc;
1591                 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1592                 mutex_enter(&bucket->isaf_lock);
1593                 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1594                     outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1595                     outassoc->ipsa_addrfam);
1596                 mutex_exit(&bucket->isaf_lock);
1597                 if (inassoc == NULL) {
1598                         /* Q: Do we wish to set haspeer == B_FALSE? */
1599                         ah0dbg(("ah_set_usetime: "
1600                             "can't find peer for outbound.\n"));
1601                         sadb_set_usetime(outassoc);
1602                         return;
1603                 }
1604         }
1605 
1606         /* Update usetime on both. */
1607         sadb_set_usetime(inassoc);
1608         sadb_set_usetime(outassoc);
1609 
1610         /*
1611          * REFRELE any peer SA.
1612          *
1613          * Because of the multi-line macro nature of IPSA_REFRELE, keep
1614          * them in { }.
1615          */
1616         if (inbound) {
1617                 IPSA_REFRELE(outassoc);
1618         } else {
1619                 IPSA_REFRELE(inassoc);
1620         }
1621 }
1622 
1623 /* Refactor me */
1624 /*
1625  * Add a number of bytes to what the SA has protected so far.  Return
1626  * B_TRUE if the SA can still protect that many bytes.
1627  *
1628  * Caller must REFRELE the passed-in assoc.  This function must REFRELE
1629  * any obtained peer SA.
1630  */
1631 static boolean_t
1632 ah_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
1633 {
1634         ipsa_t *inassoc, *outassoc;
1635         isaf_t *bucket;
1636         boolean_t inrc, outrc, isv6;
1637         sadb_t *sp;
1638         int outhash;
1639         netstack_t      *ns = assoc->ipsa_netstack;
1640         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
1641 
1642         /* No peer?  No problem! */
1643         if (!assoc->ipsa_haspeer) {
1644                 return (sadb_age_bytes(ahstack->ah_pfkey_q, assoc, bytes,
1645                     B_TRUE));
1646         }
1647 
1648         /*
1649          * Otherwise, we want to grab both the original assoc and its peer.
1650          * There might be a race for this, but if it's a real race, two
1651          * expire messages may occur.  We limit this by only sending the
1652          * expire message on one of the peers, we'll pick the inbound
1653          * arbitrarily.
1654          *
1655          * If we need tight synchronization on the peer SA, then we need to
1656          * reconsider.
1657          */
1658 
1659         /* Pick v4/v6 bucket based on addrfam. */
1660         isv6 = (assoc->ipsa_addrfam == AF_INET6);
1661         if (isv6) {
1662                 sp = &ahstack->ah_sadb.s_v6;
1663         } else {
1664                 sp = &ahstack->ah_sadb.s_v4;
1665                 ASSERT(assoc->ipsa_addrfam == AF_INET);
1666         }
1667         if (inbound) {
1668                 inassoc = assoc;
1669                 if (isv6)
1670                         outhash = OUTBOUND_HASH_V6(sp,
1671                             *((in6_addr_t *)&inassoc->ipsa_dstaddr));
1672                 else
1673                         outhash = OUTBOUND_HASH_V4(sp,
1674                             *((ipaddr_t *)&inassoc->ipsa_dstaddr));
1675                 bucket = &sp->sdb_of[outhash];
1676                 mutex_enter(&bucket->isaf_lock);
1677                 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1678                     inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1679                     inassoc->ipsa_addrfam);
1680                 mutex_exit(&bucket->isaf_lock);
1681                 if (outassoc == NULL) {
1682                         /* Q: Do we wish to set haspeer == B_FALSE? */
1683                         ah0dbg(("ah_age_bytes: "
1684                             "can't find peer for inbound.\n"));
1685                         return (sadb_age_bytes(ahstack->ah_pfkey_q, inassoc,
1686                             bytes, B_TRUE));
1687                 }
1688         } else {
1689                 outassoc = assoc;
1690                 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1691                 mutex_enter(&bucket->isaf_lock);
1692                 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1693                     outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1694                     outassoc->ipsa_addrfam);
1695                 mutex_exit(&bucket->isaf_lock);
1696                 if (inassoc == NULL) {
1697                         /* Q: Do we wish to set haspeer == B_FALSE? */
1698                         ah0dbg(("ah_age_bytes: "
1699                             "can't find peer for outbound.\n"));
1700                         return (sadb_age_bytes(ahstack->ah_pfkey_q, outassoc,
1701                             bytes, B_TRUE));
1702                 }
1703         }
1704 
1705         inrc = sadb_age_bytes(ahstack->ah_pfkey_q, inassoc, bytes, B_TRUE);
1706         outrc = sadb_age_bytes(ahstack->ah_pfkey_q, outassoc, bytes, B_FALSE);
1707 
1708         /*
1709          * REFRELE any peer SA.
1710          *
1711          * Because of the multi-line macro nature of IPSA_REFRELE, keep
1712          * them in { }.
1713          */
1714         if (inbound) {
1715                 IPSA_REFRELE(outassoc);
1716         } else {
1717                 IPSA_REFRELE(inassoc);
1718         }
1719 
1720         return (inrc && outrc);
1721 }
1722 
1723 /* Refactor me */
1724 /*
1725  * Handle the SADB_GETSPI message.  Create a larval SA.
1726  */
1727 static void
1728 ah_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack)
1729 {
1730         ipsa_t *newbie, *target;
1731         isaf_t *outbound, *inbound;
1732         int rc, diagnostic;
1733         sadb_sa_t *assoc;
1734         keysock_out_t *kso;
1735         uint32_t newspi;
1736 
1737         /*
1738          * Randomly generate a proposed SPI value.
1739          */
1740         if (cl_inet_getspi != NULL) {
1741                 cl_inet_getspi(ahstack->ipsecah_netstack->netstack_stackid,
1742                     IPPROTO_AH, (uint8_t *)&newspi, sizeof (uint32_t), NULL);
1743         } else {
1744                 (void) random_get_pseudo_bytes((uint8_t *)&newspi,
1745                     sizeof (uint32_t));
1746         }
1747         newbie = sadb_getspi(ksi, newspi, &diagnostic,
1748             ahstack->ipsecah_netstack, IPPROTO_AH);
1749 
1750         if (newbie == NULL) {
1751                 sadb_pfkey_error(ahstack->ah_pfkey_q, mp, ENOMEM, diagnostic,
1752                     ksi->ks_in_serial);
1753                 return;
1754         } else if (newbie == (ipsa_t *)-1) {
1755                 sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL, diagnostic,
1756                     ksi->ks_in_serial);
1757                 return;
1758         }
1759 
1760         /*
1761          * XXX - We may randomly collide.  We really should recover from this.
1762          *       Unfortunately, that could require spending way-too-much-time
1763          *       in here.  For now, let the user retry.
1764          */
1765 
1766         if (newbie->ipsa_addrfam == AF_INET6) {
1767                 outbound = OUTBOUND_BUCKET_V6(&ahstack->ah_sadb.s_v6,
1768                     *(uint32_t *)(newbie->ipsa_dstaddr));
1769                 inbound = INBOUND_BUCKET(&ahstack->ah_sadb.s_v6,
1770                     newbie->ipsa_spi);
1771         } else {
1772                 outbound = OUTBOUND_BUCKET_V4(&ahstack->ah_sadb.s_v4,
1773                     *(uint32_t *)(newbie->ipsa_dstaddr));
1774                 inbound = INBOUND_BUCKET(&ahstack->ah_sadb.s_v4,
1775                     newbie->ipsa_spi);
1776         }
1777 
1778         mutex_enter(&outbound->isaf_lock);
1779         mutex_enter(&inbound->isaf_lock);
1780 
1781         /*
1782          * Check for collisions (i.e. did sadb_getspi() return with something
1783          * that already exists?).
1784          *
1785          * Try outbound first.  Even though SADB_GETSPI is traditionally
1786          * for inbound SAs, you never know what a user might do.
1787          */
1788         target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
1789             newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
1790         if (target == NULL) {
1791                 target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
1792                     newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
1793                     newbie->ipsa_addrfam);
1794         }
1795 
1796         /*
1797          * I don't have collisions elsewhere!
1798          * (Nor will I because I'm still holding inbound/outbound locks.)
1799          */
1800 
1801         if (target != NULL) {
1802                 rc = EEXIST;
1803                 IPSA_REFRELE(target);
1804         } else {
1805                 /*
1806                  * sadb_insertassoc() also checks for collisions, so
1807                  * if there's a colliding larval entry, rc will be set
1808                  * to EEXIST.
1809                  */
1810                 rc = sadb_insertassoc(newbie, inbound);
1811                 newbie->ipsa_hardexpiretime = gethrestime_sec();
1812                 newbie->ipsa_hardexpiretime += ahstack->ipsecah_larval_timeout;
1813         }
1814 
1815         /*
1816          * Can exit outbound mutex.  Hold inbound until we're done with
1817          * newbie.
1818          */
1819         mutex_exit(&outbound->isaf_lock);
1820 
1821         if (rc != 0) {
1822                 mutex_exit(&inbound->isaf_lock);
1823                 IPSA_REFRELE(newbie);
1824                 sadb_pfkey_error(ahstack->ah_pfkey_q, mp, rc,
1825                     SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1826                 return;
1827         }
1828 
1829         /* Can write here because I'm still holding the bucket lock. */
1830         newbie->ipsa_type = SADB_SATYPE_AH;
1831 
1832         /*
1833          * Construct successful return message.  We have one thing going
1834          * for us in PF_KEY v2.  That's the fact that
1835          *      sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
1836          */
1837         assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
1838         assoc->sadb_sa_exttype = SADB_EXT_SA;
1839         assoc->sadb_sa_spi = newbie->ipsa_spi;
1840         *((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
1841         mutex_exit(&inbound->isaf_lock);
1842 
1843         /* Convert KEYSOCK_IN to KEYSOCK_OUT. */
1844         kso = (keysock_out_t *)ksi;
1845         kso->ks_out_len = sizeof (*kso);
1846         kso->ks_out_serial = ksi->ks_in_serial;
1847         kso->ks_out_type = KEYSOCK_OUT;
1848 
1849         /*
1850          * Can safely putnext() to ah_pfkey_q, because this is a turnaround
1851          * from the ah_pfkey_q.
1852          */
1853         putnext(ahstack->ah_pfkey_q, mp);
1854 }
1855 
1856 /*
1857  * IPv6 sends up the ICMP errors for validation and the removal of the AH
1858  * header.
1859  * If succesful, the mp has been modified to not include the AH header so
1860  * that the caller can fanout to the ULP's icmp error handler.
1861  */
1862 static mblk_t *
1863 ah_icmp_error_v6(mblk_t *mp, ip_recv_attr_t *ira, ipsecah_stack_t *ahstack)
1864 {
1865         ip6_t *ip6h, *oip6h;
1866         uint16_t hdr_length, ah_length;
1867         uint8_t *nexthdrp;
1868         ah_t *ah;
1869         icmp6_t *icmp6;
1870         isaf_t *isaf;
1871         ipsa_t *assoc;
1872         uint8_t *post_ah_ptr;
1873         ipsec_stack_t   *ipss = ahstack->ipsecah_netstack->netstack_ipsec;
1874 
1875         /*
1876          * Eat the cost of a pullupmsg() for now.  It makes the rest of this
1877          * code far less convoluted.
1878          */
1879         if (!pullupmsg(mp, -1) ||
1880             !ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, &hdr_length,
1881             &nexthdrp) ||
1882             mp->b_rptr + hdr_length + sizeof (icmp6_t) + sizeof (ip6_t) +
1883             sizeof (ah_t) > mp->b_wptr) {
1884                 IP_AH_BUMP_STAT(ipss, in_discards);
1885                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1886                     DROPPER(ipss, ipds_ah_nomem),
1887                     &ahstack->ah_dropper);
1888                 return (NULL);
1889         }
1890 
1891         oip6h = (ip6_t *)mp->b_rptr;
1892         icmp6 = (icmp6_t *)((uint8_t *)oip6h + hdr_length);
1893         ip6h = (ip6_t *)(icmp6 + 1);
1894         if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) {
1895                 IP_AH_BUMP_STAT(ipss, in_discards);
1896                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1897                     DROPPER(ipss, ipds_ah_bad_v6_hdrs),
1898                     &ahstack->ah_dropper);
1899                 return (NULL);
1900         }
1901         ah = (ah_t *)((uint8_t *)ip6h + hdr_length);
1902 
1903         isaf = OUTBOUND_BUCKET_V6(&ahstack->ah_sadb.s_v6, ip6h->ip6_dst);
1904         mutex_enter(&isaf->isaf_lock);
1905         assoc = ipsec_getassocbyspi(isaf, ah->ah_spi,
1906             (uint32_t *)&ip6h->ip6_src, (uint32_t *)&ip6h->ip6_dst, AF_INET6);
1907         mutex_exit(&isaf->isaf_lock);
1908 
1909         if (assoc == NULL) {
1910                 IP_AH_BUMP_STAT(ipss, lookup_failure);
1911                 IP_AH_BUMP_STAT(ipss, in_discards);
1912                 if (ahstack->ipsecah_log_unknown_spi) {
1913                         ipsec_assocfailure(info.mi_idnum, 0, 0,
1914                             SL_CONSOLE | SL_WARN | SL_ERROR,
1915                             "Bad ICMP message - No association for the "
1916                             "attached AH header whose spi is 0x%x, "
1917                             "sender is 0x%x\n",
1918                             ah->ah_spi, &oip6h->ip6_src, AF_INET6,
1919                             ahstack->ipsecah_netstack);
1920                 }
1921                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1922                     DROPPER(ipss, ipds_ah_no_sa),
1923                     &ahstack->ah_dropper);
1924                 return (NULL);
1925         }
1926 
1927         IPSA_REFRELE(assoc);
1928 
1929         /*
1930          * There seems to be a valid association. If there is enough of AH
1931          * header remove it, otherwise bail.  One could check whether it has
1932          * complete AH header plus 8 bytes but it does not make sense if an
1933          * icmp error is returned for ICMP messages e.g ICMP time exceeded,
1934          * that are being sent up. Let the caller figure out.
1935          *
1936          * NOTE: ah_length is the number of 32 bit words minus 2.
1937          */
1938         ah_length = (ah->ah_length << 2) + 8;
1939         post_ah_ptr = (uint8_t *)ah + ah_length;
1940 
1941         if (post_ah_ptr > mp->b_wptr) {
1942                 IP_AH_BUMP_STAT(ipss, in_discards);
1943                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1944                     DROPPER(ipss, ipds_ah_bad_length),
1945                     &ahstack->ah_dropper);
1946                 return (NULL);
1947         }
1948 
1949         ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - ah_length);
1950         *nexthdrp = ah->ah_nexthdr;
1951         ovbcopy(post_ah_ptr, ah,
1952             (size_t)((uintptr_t)mp->b_wptr - (uintptr_t)post_ah_ptr));
1953         mp->b_wptr -= ah_length;
1954 
1955         return (mp);
1956 }
1957 
1958 /*
1959  * IP sends up the ICMP errors for validation and the removal of
1960  * the AH header.
1961  * If succesful, the mp has been modified to not include the AH header so
1962  * that the caller can fanout to the ULP's icmp error handler.
1963  */
1964 static mblk_t *
1965 ah_icmp_error_v4(mblk_t *mp, ip_recv_attr_t *ira, ipsecah_stack_t *ahstack)
1966 {
1967         mblk_t *mp1;
1968         icmph_t *icmph;
1969         int iph_hdr_length;
1970         int hdr_length;
1971         isaf_t *hptr;
1972         ipsa_t *assoc;
1973         int ah_length;
1974         ipha_t *ipha;
1975         ipha_t *oipha;
1976         ah_t *ah;
1977         uint32_t length;
1978         int alloc_size;
1979         uint8_t nexthdr;
1980         ipsec_stack_t   *ipss = ahstack->ipsecah_netstack->netstack_ipsec;
1981 
1982         oipha = ipha = (ipha_t *)mp->b_rptr;
1983         iph_hdr_length = IPH_HDR_LENGTH(ipha);
1984         icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
1985 
1986         ipha = (ipha_t *)&icmph[1];
1987         hdr_length = IPH_HDR_LENGTH(ipha);
1988 
1989         /*
1990          * See if we have enough to locate the SPI
1991          */
1992         if ((uchar_t *)ipha + hdr_length + 8 > mp->b_wptr) {
1993                 if (!pullupmsg(mp, (uchar_t *)ipha + hdr_length + 8 -
1994                     mp->b_rptr)) {
1995                         ipsec_rl_strlog(ahstack->ipsecah_netstack,
1996                             info.mi_idnum, 0, 0,
1997                             SL_WARN | SL_ERROR,
1998                             "ICMP error: Small AH header\n");
1999                         IP_AH_BUMP_STAT(ipss, in_discards);
2000                         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2001                             DROPPER(ipss, ipds_ah_bad_length),
2002                             &ahstack->ah_dropper);
2003                         return (NULL);
2004                 }
2005                 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2006                 ipha = (ipha_t *)&icmph[1];
2007         }
2008 
2009         ah = (ah_t *)((uint8_t *)ipha + hdr_length);
2010         nexthdr = ah->ah_nexthdr;
2011 
2012         hptr = OUTBOUND_BUCKET_V4(&ahstack->ah_sadb.s_v4, ipha->ipha_dst);
2013         mutex_enter(&hptr->isaf_lock);
2014         assoc = ipsec_getassocbyspi(hptr, ah->ah_spi,
2015             (uint32_t *)&ipha->ipha_src, (uint32_t *)&ipha->ipha_dst, AF_INET);
2016         mutex_exit(&hptr->isaf_lock);
2017 
2018         if (assoc == NULL) {
2019                 IP_AH_BUMP_STAT(ipss, lookup_failure);
2020                 IP_AH_BUMP_STAT(ipss, in_discards);
2021                 if (ahstack->ipsecah_log_unknown_spi) {
2022                         ipsec_assocfailure(info.mi_idnum, 0, 0,
2023                             SL_CONSOLE | SL_WARN | SL_ERROR,
2024                             "Bad ICMP message - No association for the "
2025                             "attached AH header whose spi is 0x%x, "
2026                             "sender is 0x%x\n",
2027                             ah->ah_spi, &oipha->ipha_src, AF_INET,
2028                             ahstack->ipsecah_netstack);
2029                 }
2030                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2031                     DROPPER(ipss, ipds_ah_no_sa),
2032                     &ahstack->ah_dropper);
2033                 return (NULL);
2034         }
2035 
2036         IPSA_REFRELE(assoc);
2037         /*
2038          * There seems to be a valid association. If there
2039          * is enough of AH header remove it, otherwise remove
2040          * as much as possible and send it back. One could check
2041          * whether it has complete AH header plus 8 bytes but it
2042          * does not make sense if an icmp error is returned for
2043          * ICMP messages e.g ICMP time exceeded, that are being
2044          * sent up. Let the caller figure out.
2045          *
2046          * NOTE: ah_length is the number of 32 bit words minus 2.
2047          */
2048         ah_length = (ah->ah_length << 2) + 8;
2049 
2050         if ((uchar_t *)ipha + hdr_length + ah_length > mp->b_wptr) {
2051                 if (mp->b_cont == NULL) {
2052                         /*
2053                          * There is nothing to pullup. Just remove as
2054                          * much as possible. This is a common case for
2055                          * IPV4.
2056                          */
2057                         ah_length = (mp->b_wptr - ((uchar_t *)ipha +
2058                             hdr_length));
2059                         goto done;
2060                 }
2061                 /* Pullup the full ah header */
2062                 if (!pullupmsg(mp, (uchar_t *)ah + ah_length - mp->b_rptr)) {
2063                         /*
2064                          * pullupmsg could have failed if there was not
2065                          * enough to pullup or memory allocation failed.
2066                          * We tried hard, give up now.
2067                          */
2068                         IP_AH_BUMP_STAT(ipss, in_discards);
2069                         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2070                             DROPPER(ipss, ipds_ah_nomem),
2071                             &ahstack->ah_dropper);
2072                         return (NULL);
2073                 }
2074                 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2075                 ipha = (ipha_t *)&icmph[1];
2076         }
2077 done:
2078         /*
2079          * Remove the AH header and change the protocol.
2080          * Don't update the spi fields in the ip_recv_attr_t
2081          * as we are called just to validate the
2082          * message attached to the ICMP message.
2083          *
2084          * If we never pulled up since all of the message
2085          * is in one single mblk, we can't remove the AH header
2086          * by just setting the b_wptr to the beginning of the
2087          * AH header. We need to allocate a mblk that can hold
2088          * up until the inner IP header and copy them.
2089          */
2090         alloc_size = iph_hdr_length + sizeof (icmph_t) + hdr_length;
2091 
2092         if ((mp1 = allocb(alloc_size, BPRI_LO)) == NULL) {
2093                 IP_AH_BUMP_STAT(ipss, in_discards);
2094                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2095                     DROPPER(ipss, ipds_ah_nomem),
2096                     &ahstack->ah_dropper);
2097                 return (NULL);
2098         }
2099         bcopy(mp->b_rptr, mp1->b_rptr, alloc_size);
2100         mp1->b_wptr += alloc_size;
2101 
2102         /*
2103          * Skip whatever we have copied and as much of AH header
2104          * possible. If we still have something left in the original
2105          * message, tag on.
2106          */
2107         mp->b_rptr = (uchar_t *)ipha + hdr_length + ah_length;
2108 
2109         if (mp->b_rptr != mp->b_wptr) {
2110                 mp1->b_cont = mp;
2111         } else {
2112                 if (mp->b_cont != NULL)
2113                         mp1->b_cont = mp->b_cont;
2114                 freeb(mp);
2115         }
2116 
2117         ipha = (ipha_t *)(mp1->b_rptr + iph_hdr_length + sizeof (icmph_t));
2118         ipha->ipha_protocol = nexthdr;
2119         length = ntohs(ipha->ipha_length);
2120         length -= ah_length;
2121         ipha->ipha_length = htons((uint16_t)length);
2122         ipha->ipha_hdr_checksum = 0;
2123         ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
2124 
2125         return (mp1);
2126 }
2127 
2128 /*
2129  * IP calls this to validate the ICMP errors that
2130  * we got from the network.
2131  */
2132 mblk_t *
2133 ipsecah_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira)
2134 {
2135         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
2136         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
2137 
2138         if (ira->ira_flags & IRAF_IS_IPV4)
2139                 return (ah_icmp_error_v4(data_mp, ira, ahstack));
2140         else
2141                 return (ah_icmp_error_v6(data_mp, ira, ahstack));
2142 }
2143 
2144 static int
2145 ah_fix_tlv_options_v6(uint8_t *oi_opt, uint8_t *pi_opt, uint_t ehdrlen,
2146     uint8_t hdr_type, boolean_t copy_always)
2147 {
2148         uint8_t opt_type;
2149         uint_t optlen;
2150 
2151         ASSERT(hdr_type == IPPROTO_DSTOPTS || hdr_type == IPPROTO_HOPOPTS);
2152 
2153         /*
2154          * Copy the next header and hdr ext. len of the HOP-by-HOP
2155          * and Destination option.
2156          */
2157         *pi_opt++ = *oi_opt++;
2158         *pi_opt++ = *oi_opt++;
2159         ehdrlen -= 2;
2160 
2161         /*
2162          * Now handle all the TLV encoded options.
2163          */
2164         while (ehdrlen != 0) {
2165                 opt_type = *oi_opt;
2166 
2167                 if (opt_type == IP6OPT_PAD1) {
2168                         optlen = 1;
2169                 } else {
2170                         if (ehdrlen < 2)
2171                                 goto bad_opt;
2172                         optlen = 2 + oi_opt[1];
2173                         if (optlen > ehdrlen)
2174                                 goto bad_opt;
2175                 }
2176                 if (copy_always || !(opt_type & IP6OPT_MUTABLE)) {
2177                         bcopy(oi_opt, pi_opt, optlen);
2178                 } else {
2179                         if (optlen == 1) {
2180                                 *pi_opt = 0;
2181                         } else {
2182                                 /*
2183                                  * Copy the type and data length fields.
2184                                  * Zero the option data by skipping
2185                                  * option type and option data len
2186                                  * fields.
2187                                  */
2188                                 *pi_opt = *oi_opt;
2189                                 *(pi_opt + 1) = *(oi_opt + 1);
2190                                 bzero(pi_opt + 2, optlen - 2);
2191                         }
2192                 }
2193                 ehdrlen -= optlen;
2194                 oi_opt += optlen;
2195                 pi_opt += optlen;
2196         }
2197         return (0);
2198 bad_opt:
2199         return (-1);
2200 }
2201 
2202 /*
2203  * Construct a pseudo header for AH, processing all the options.
2204  *
2205  * oip6h is the IPv6 header of the incoming or outgoing packet.
2206  * ip6h is the pointer to the pseudo headers IPV6 header. All
2207  * the space needed for the options have been allocated including
2208  * the AH header.
2209  *
2210  * If copy_always is set, all the options that appear before AH are copied
2211  * blindly without checking for IP6OPT_MUTABLE. This is used by
2212  * ah_auth_out_done().  Please refer to that function for details.
2213  *
2214  * NOTE :
2215  *
2216  * *  AH header is never copied in this function even if copy_always
2217  *    is set. It just returns the ah_offset - offset of the AH header
2218  *    and the caller needs to do the copying. This is done so that we
2219  *    don't have pass extra arguments e.g. SA etc. and also,
2220  *    it is not needed when ah_auth_out_done is calling this function.
2221  */
2222 static uint_t
2223 ah_fix_phdr_v6(ip6_t *ip6h, ip6_t *oip6h, boolean_t outbound,
2224     boolean_t copy_always)
2225 {
2226         uint8_t *oi_opt;
2227         uint8_t *pi_opt;
2228         uint8_t nexthdr;
2229         uint8_t *prev_nexthdr;
2230         ip6_hbh_t *hbhhdr;
2231         ip6_dest_t *dsthdr = NULL;
2232         ip6_rthdr0_t *rthdr;
2233         int ehdrlen;
2234         ah_t *ah;
2235         int ret;
2236 
2237         /*
2238          * In the outbound case for source route, ULP has already moved
2239          * the first hop, which is now in ip6_dst. We need to re-arrange
2240          * the header to make it look like how it would appear in the
2241          * receiver i.e
2242          *
2243          * Because of ip_massage_options_v6 the header looks like
2244          * this :
2245          *
2246          * ip6_src = S, ip6_dst = I1. followed by I2,I3,D.
2247          *
2248          * When it reaches the receiver, it would look like
2249          *
2250          * ip6_src = S, ip6_dst = D. followed by I1,I2,I3.
2251          *
2252          * NOTE : We assume that there are no problems with the options
2253          * as IP should have already checked this.
2254          */
2255 
2256         oi_opt = (uchar_t *)&oip6h[1];
2257         pi_opt = (uchar_t *)&ip6h[1];
2258 
2259         /*
2260          * We set the prev_nexthdr properly in the pseudo header.
2261          * After we finish authentication and come back from the
2262          * algorithm module, pseudo header will become the real
2263          * IP header.
2264          */
2265         prev_nexthdr = (uint8_t *)&ip6h->ip6_nxt;
2266         nexthdr = oip6h->ip6_nxt;
2267         /* Assume IP has already stripped it */
2268         ASSERT(nexthdr != IPPROTO_FRAGMENT);
2269         ah = NULL;
2270         dsthdr = NULL;
2271         for (;;) {
2272                 switch (nexthdr) {
2273                 case IPPROTO_HOPOPTS:
2274                         hbhhdr = (ip6_hbh_t *)oi_opt;
2275                         nexthdr = hbhhdr->ip6h_nxt;
2276                         ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
2277                         ret = ah_fix_tlv_options_v6(oi_opt, pi_opt, ehdrlen,
2278                             IPPROTO_HOPOPTS, copy_always);
2279                         /*
2280                          * Return a zero offset indicating error if there
2281                          * was error.
2282                          */
2283                         if (ret == -1)
2284                                 return (0);
2285                         hbhhdr = (ip6_hbh_t *)pi_opt;
2286                         prev_nexthdr = (uint8_t *)&hbhhdr->ip6h_nxt;
2287                         break;
2288                 case IPPROTO_ROUTING:
2289                         rthdr = (ip6_rthdr0_t *)oi_opt;
2290                         nexthdr = rthdr->ip6r0_nxt;
2291                         ehdrlen = 8 * (rthdr->ip6r0_len + 1);
2292                         if (!copy_always && outbound) {
2293                                 int i, left;
2294                                 ip6_rthdr0_t *prthdr;
2295                                 in6_addr_t *ap, *pap;
2296 
2297                                 left = rthdr->ip6r0_segleft;
2298                                 prthdr = (ip6_rthdr0_t *)pi_opt;
2299                                 pap = (in6_addr_t *)(prthdr + 1);
2300                                 ap = (in6_addr_t *)(rthdr + 1);
2301                                 /*
2302                                  * First eight bytes except seg_left
2303                                  * does not change en route.
2304                                  */
2305                                 bcopy(oi_opt, pi_opt, 8);
2306                                 prthdr->ip6r0_segleft = 0;
2307                                 /*
2308                                  * First address has been moved to
2309                                  * the destination address of the
2310                                  * ip header by ip_massage_options_v6.
2311                                  * And the real destination address is
2312                                  * in the last address part of the
2313                                  * option.
2314                                  */
2315                                 *pap = oip6h->ip6_dst;
2316                                 for (i = 1; i < left - 1; i++)
2317                                         pap[i] = ap[i - 1];
2318                                 ip6h->ip6_dst = *(ap + left - 1);
2319                         } else {
2320                                 bcopy(oi_opt, pi_opt, ehdrlen);
2321                         }
2322                         rthdr = (ip6_rthdr0_t *)pi_opt;
2323                         prev_nexthdr = (uint8_t *)&rthdr->ip6r0_nxt;
2324                         break;
2325                 case IPPROTO_DSTOPTS:
2326                         /*
2327                          * Destination options are tricky.  If there is
2328                          * a terminal (e.g. non-IPv6-extension) header
2329                          * following the destination options, don't
2330                          * reset prev_nexthdr or advance the AH insertion
2331                          * point and just treat this as a terminal header.
2332                          *
2333                          * If this is an inbound packet, just deal with
2334                          * it as is.
2335                          */
2336                         dsthdr = (ip6_dest_t *)oi_opt;
2337                         /*
2338                          * XXX I hope common-subexpression elimination
2339                          * saves us the double-evaluate.
2340                          */
2341                         if (outbound && dsthdr->ip6d_nxt != IPPROTO_ROUTING &&
2342                             dsthdr->ip6d_nxt != IPPROTO_HOPOPTS)
2343                                 goto terminal_hdr;
2344                         nexthdr = dsthdr->ip6d_nxt;
2345                         ehdrlen = 8 * (dsthdr->ip6d_len + 1);
2346                         ret = ah_fix_tlv_options_v6(oi_opt, pi_opt, ehdrlen,
2347                             IPPROTO_DSTOPTS, copy_always);
2348                         /*
2349                          * Return a zero offset indicating error if there
2350                          * was error.
2351                          */
2352                         if (ret == -1)
2353                                 return (0);
2354                         break;
2355                 case IPPROTO_AH:
2356                         /*
2357                          * Be conservative in what you send.  We shouldn't
2358                          * see two same-scoped AH's in one packet.
2359                          * (Inner-IP-scoped AH will be hit by terminal
2360                          * header of IP or IPv6.)
2361                          */
2362                         ASSERT(!outbound);
2363                         return ((uint_t)(pi_opt - (uint8_t *)ip6h));
2364                 default:
2365                         ASSERT(outbound);
2366 terminal_hdr:
2367                         *prev_nexthdr = IPPROTO_AH;
2368                         ah = (ah_t *)pi_opt;
2369                         ah->ah_nexthdr = nexthdr;
2370                         return ((uint_t)(pi_opt - (uint8_t *)ip6h));
2371                 }
2372                 pi_opt += ehdrlen;
2373                 oi_opt += ehdrlen;
2374         }
2375         /* NOTREACHED */
2376 }
2377 
2378 static boolean_t
2379 ah_finish_up(ah_t *phdr_ah, ah_t *inbound_ah, ipsa_t *assoc,
2380     int ah_data_sz, int ah_align_sz, ipsecah_stack_t *ahstack)
2381 {
2382         int i;
2383 
2384         /*
2385          * Padding :
2386          *
2387          * 1) Authentication data may have to be padded
2388          * before ICV calculation if ICV is not a multiple
2389          * of 64 bits. This padding is arbitrary and transmitted
2390          * with the packet at the end of the authentication data.
2391          * Payload length should include the padding bytes.
2392          *
2393          * 2) Explicit padding of the whole datagram may be
2394          * required by the algorithm which need not be
2395          * transmitted. It is assumed that this will be taken
2396          * care by the algorithm module.
2397          */
2398         bzero(phdr_ah + 1, ah_data_sz); /* Zero out ICV for pseudo-hdr. */
2399 
2400         if (inbound_ah == NULL) {
2401                 /* Outbound AH datagram. */
2402 
2403                 phdr_ah->ah_length = (ah_align_sz >> 2) + 1;
2404                 phdr_ah->ah_reserved = 0;
2405                 phdr_ah->ah_spi = assoc->ipsa_spi;
2406 
2407                 phdr_ah->ah_replay =
2408                     htonl(atomic_inc_32_nv(&assoc->ipsa_replay));
2409                 if (phdr_ah->ah_replay == 0 && assoc->ipsa_replay_wsize != 0) {
2410                         /*
2411                          * XXX We have replay counter wrapping.  We probably
2412                          * want to nuke this SA (and its peer).
2413                          */
2414                         ipsec_assocfailure(info.mi_idnum, 0, 0,
2415                             SL_ERROR | SL_CONSOLE | SL_WARN,
2416                             "Outbound AH SA (0x%x), dst %s has wrapped "
2417                             "sequence.\n", phdr_ah->ah_spi,
2418                             assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
2419                             ahstack->ipsecah_netstack);
2420 
2421                         sadb_replay_delete(assoc);
2422                         /* Caller will free phdr_mp and return NULL. */
2423                         return (B_FALSE);
2424                 }
2425 
2426                 if (ah_data_sz != ah_align_sz) {
2427                         uchar_t *pad = ((uchar_t *)phdr_ah + sizeof (ah_t) +
2428                             ah_data_sz);
2429 
2430                         for (i = 0; i < (ah_align_sz - ah_data_sz); i++) {
2431                                 pad[i] = (uchar_t)i;    /* Fill the padding */
2432                         }
2433                 }
2434         } else {
2435                 /* Inbound AH datagram. */
2436                 phdr_ah->ah_nexthdr = inbound_ah->ah_nexthdr;
2437                 phdr_ah->ah_length = inbound_ah->ah_length;
2438                 phdr_ah->ah_reserved = 0;
2439                 ASSERT(inbound_ah->ah_spi == assoc->ipsa_spi);
2440                 phdr_ah->ah_spi = inbound_ah->ah_spi;
2441                 phdr_ah->ah_replay = inbound_ah->ah_replay;
2442 
2443                 if (ah_data_sz != ah_align_sz) {
2444                         uchar_t *opad = ((uchar_t *)inbound_ah +
2445                             sizeof (ah_t) + ah_data_sz);
2446                         uchar_t *pad = ((uchar_t *)phdr_ah + sizeof (ah_t) +
2447                             ah_data_sz);
2448 
2449                         for (i = 0; i < (ah_align_sz - ah_data_sz); i++) {
2450                                 pad[i] = opad[i];       /* Copy the padding */
2451                         }
2452                 }
2453         }
2454 
2455         return (B_TRUE);
2456 }
2457 
2458 /*
2459  * Called upon failing the inbound ICV check. The message passed as
2460  * argument is freed.
2461  */
2462 static void
2463 ah_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
2464 {
2465         boolean_t       isv4 = (ira->ira_flags & IRAF_IS_IPV4);
2466         ipsa_t          *assoc = ira->ira_ipsec_ah_sa;
2467         int             af;
2468         void            *addr;
2469         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
2470         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
2471         ipsec_stack_t   *ipss = ns->netstack_ipsec;
2472 
2473         ASSERT(mp->b_datap->db_type == M_DATA);
2474 
2475         mp->b_rptr -= ic->ic_skip_len;
2476 
2477         if (isv4) {
2478                 ipha_t *ipha = (ipha_t *)mp->b_rptr;
2479                 addr = &ipha->ipha_dst;
2480                 af = AF_INET;
2481         } else {
2482                 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2483                 addr = &ip6h->ip6_dst;
2484                 af = AF_INET6;
2485         }
2486 
2487         /*
2488          * Log the event. Don't print to the console, block
2489          * potential denial-of-service attack.
2490          */
2491         AH_BUMP_STAT(ahstack, bad_auth);
2492 
2493         ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
2494             "AH Authentication failed spi %x, dst_addr %s",
2495             assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack);
2496 
2497         IP_AH_BUMP_STAT(ipss, in_discards);
2498         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2499             DROPPER(ipss, ipds_ah_bad_auth),
2500             &ahstack->ah_dropper);
2501 }
2502 
2503 /*
2504  * Kernel crypto framework callback invoked after completion of async
2505  * crypto requests for outbound packets.
2506  */
2507 static void
2508 ah_kcf_callback_outbound(void *arg, int status)
2509 {
2510         mblk_t          *mp = (mblk_t *)arg;
2511         mblk_t          *async_mp;
2512         netstack_t      *ns;
2513         ipsec_stack_t   *ipss;
2514         ipsecah_stack_t *ahstack;
2515         mblk_t          *data_mp;
2516         ip_xmit_attr_t  ixas;
2517         ipsec_crypto_t  *ic;
2518         ill_t           *ill;
2519 
2520         /*
2521          * First remove the ipsec_crypto_t mblk
2522          * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
2523          */
2524         async_mp = ipsec_remove_crypto_data(mp, &ic);
2525         ASSERT(async_mp != NULL);
2526 
2527         /*
2528          * Extract the ip_xmit_attr_t from the first mblk.
2529          * Verifies that the netstack and ill is still around; could
2530          * have vanished while kEf was doing its work.
2531          * On succesful return we have a nce_t and the ill/ipst can't
2532          * disappear until we do the nce_refrele in ixa_cleanup.
2533          */
2534         data_mp = async_mp->b_cont;
2535         async_mp->b_cont = NULL;
2536         if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) {
2537                 /* Disappeared on us - no ill/ipst for MIB */
2538                 if (ixas.ixa_nce != NULL) {
2539                         ill = ixas.ixa_nce->nce_ill;
2540                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2541                         ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
2542                 }
2543                 freemsg(data_mp);
2544                 goto done;
2545         }
2546         ns = ixas.ixa_ipst->ips_netstack;
2547         ahstack = ns->netstack_ipsecah;
2548         ipss = ns->netstack_ipsec;
2549         ill = ixas.ixa_nce->nce_ill;
2550 
2551         if (status == CRYPTO_SUCCESS) {
2552                 data_mp = ah_auth_out_done(data_mp, &ixas, ic);
2553                 if (data_mp == NULL)
2554                         goto done;
2555 
2556                 (void) ip_output_post_ipsec(data_mp, &ixas);
2557         } else {
2558                 /* Outbound shouldn't see invalid MAC */
2559                 ASSERT(status != CRYPTO_INVALID_MAC);
2560 
2561                 ah1dbg(ahstack,
2562                     ("ah_kcf_callback_outbound: crypto failed with 0x%x\n",
2563                     status));
2564                 AH_BUMP_STAT(ahstack, crypto_failures);
2565                 AH_BUMP_STAT(ahstack, out_discards);
2566 
2567                 ip_drop_packet(data_mp, B_FALSE, ill,
2568                     DROPPER(ipss, ipds_ah_crypto_failed),
2569                     &ahstack->ah_dropper);
2570                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2571         }
2572 done:
2573         ixa_cleanup(&ixas);
2574         (void) ipsec_free_crypto_data(mp);
2575 }
2576 
2577 /*
2578  * Kernel crypto framework callback invoked after completion of async
2579  * crypto requests for inbound packets.
2580  */
2581 static void
2582 ah_kcf_callback_inbound(void *arg, int status)
2583 {
2584         mblk_t          *mp = (mblk_t *)arg;
2585         mblk_t          *async_mp;
2586         netstack_t      *ns;
2587         ipsec_stack_t   *ipss;
2588         ipsecah_stack_t *ahstack;
2589         mblk_t          *data_mp;
2590         ip_recv_attr_t  iras;
2591         ipsec_crypto_t  *ic;
2592 
2593         /*
2594          * First remove the ipsec_crypto_t mblk
2595          * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
2596          */
2597         async_mp = ipsec_remove_crypto_data(mp, &ic);
2598         ASSERT(async_mp != NULL);
2599 
2600         /*
2601          * Extract the ip_xmit_attr_t from the first mblk.
2602          * Verifies that the netstack and ill is still around; could
2603          * have vanished while kEf was doing its work.
2604          */
2605         data_mp = async_mp->b_cont;
2606         async_mp->b_cont = NULL;
2607         if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
2608                 /* The ill or ip_stack_t disappeared on us */
2609                 ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
2610                 freemsg(data_mp);
2611                 goto done;
2612         }
2613         ns = iras.ira_ill->ill_ipst->ips_netstack;
2614         ahstack = ns->netstack_ipsecah;
2615         ipss = ns->netstack_ipsec;
2616 
2617         if (status == CRYPTO_SUCCESS) {
2618                 data_mp = ah_auth_in_done(data_mp, &iras, ic);
2619                 if (data_mp == NULL)
2620                         goto done;
2621 
2622                 /* finish IPsec processing */
2623                 ip_input_post_ipsec(data_mp, &iras);
2624 
2625         } else if (status == CRYPTO_INVALID_MAC) {
2626                 ah_log_bad_auth(data_mp, &iras, ic);
2627         } else {
2628                 ah1dbg(ahstack,
2629                     ("ah_kcf_callback_inbound: crypto failed with 0x%x\n",
2630                     status));
2631                 AH_BUMP_STAT(ahstack, crypto_failures);
2632                 IP_AH_BUMP_STAT(ipss, in_discards);
2633                 ip_drop_packet(data_mp, B_TRUE, iras.ira_ill,
2634                     DROPPER(ipss, ipds_ah_crypto_failed),
2635                     &ahstack->ah_dropper);
2636                 BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2637         }
2638 done:
2639         ira_cleanup(&iras, B_TRUE);
2640         (void) ipsec_free_crypto_data(mp);
2641 }
2642 
2643 /*
2644  * Invoked on kernel crypto failure during inbound and outbound processing.
2645  */
2646 static void
2647 ah_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc,
2648     ill_t *ill, ipsecah_stack_t *ahstack)
2649 {
2650         ipsec_stack_t   *ipss = ahstack->ipsecah_netstack->netstack_ipsec;
2651 
2652         ah1dbg(ahstack, ("crypto failed for %s AH with 0x%x\n",
2653             is_inbound ? "inbound" : "outbound", kef_rc));
2654         ip_drop_packet(data_mp, is_inbound, ill,
2655             DROPPER(ipss, ipds_ah_crypto_failed),
2656             &ahstack->ah_dropper);
2657         AH_BUMP_STAT(ahstack, crypto_failures);
2658         if (is_inbound)
2659                 IP_AH_BUMP_STAT(ipss, in_discards);
2660         else
2661                 AH_BUMP_STAT(ahstack, out_discards);
2662 }
2663 
2664 /*
2665  * Helper macros for the ah_submit_req_{inbound,outbound}() functions.
2666  */
2667 
2668 /*
2669  * A statement-equivalent macro, _cr MUST point to a modifiable
2670  * crypto_call_req_t.
2671  */
2672 #define AH_INIT_CALLREQ(_cr, _mp, _callback)            \
2673         (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE;      \
2674         (_cr)->cr_callback_arg = (_mp);                              \
2675         (_cr)->cr_callback_func = (_callback)
2676 
2677 #define AH_INIT_CRYPTO_DATA(data, msglen, mblk) {                       \
2678         (data)->cd_format = CRYPTO_DATA_MBLK;                                \
2679         (data)->cd_mp = mblk;                                                \
2680         (data)->cd_offset = 0;                                               \
2681         (data)->cd_length = msglen;                                  \
2682 }
2683 
2684 #define AH_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {                       \
2685         (mac)->cd_format = CRYPTO_DATA_RAW;                          \
2686         (mac)->cd_offset = 0;                                                \
2687         (mac)->cd_length = icvlen;                                   \
2688         (mac)->cd_raw.iov_base = icvbuf;                             \
2689         (mac)->cd_raw.iov_len = icvlen;                                      \
2690 }
2691 
2692 /*
2693  * Submit an inbound packet for processing by the crypto framework.
2694  */
2695 static mblk_t *
2696 ah_submit_req_inbound(mblk_t *phdr_mp, ip_recv_attr_t *ira,
2697     size_t skip_len, uint32_t ah_offset, ipsa_t *assoc)
2698 {
2699         int kef_rc;
2700         mblk_t *mp;
2701         crypto_call_req_t call_req, *callrp;
2702         uint_t icv_len = assoc->ipsa_mac_len;
2703         crypto_ctx_template_t ctx_tmpl;
2704         ipsecah_stack_t *ahstack;
2705         ipsec_crypto_t  *ic, icstack;
2706         boolean_t force = (assoc->ipsa_flags & IPSA_F_ASYNC);
2707 
2708         ahstack = ira->ira_ill->ill_ipst->ips_netstack->netstack_ipsecah;
2709 
2710         ASSERT(phdr_mp != NULL);
2711         ASSERT(phdr_mp->b_datap->db_type == M_DATA);
2712 
2713         if (force) {
2714                 /* We are doing asynch; allocate mblks to hold state */
2715                 if ((mp = ip_recv_attr_to_mblk(ira)) == NULL ||
2716                     (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
2717                         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2718                         ip_drop_input("ipIfStatsInDiscards", phdr_mp,
2719                             ira->ira_ill);
2720                         freemsg(phdr_mp);
2721                         return (NULL);
2722                 }
2723 
2724                 linkb(mp, phdr_mp);
2725                 callrp = &call_req;
2726                 AH_INIT_CALLREQ(callrp, mp, ah_kcf_callback_inbound);
2727         } else {
2728                 /*
2729                  * If we know we are going to do sync then ipsec_crypto_t
2730                  * should be on the stack.
2731                  */
2732                 ic = &icstack;
2733                 bzero(ic, sizeof (*ic));
2734                 callrp = NULL;
2735         }
2736 
2737         /* init arguments for the crypto framework */
2738         AH_INIT_CRYPTO_DATA(&ic->ic_crypto_data, AH_MSGSIZE(phdr_mp),
2739             phdr_mp);
2740 
2741         AH_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, icv_len,
2742             (char *)phdr_mp->b_cont->b_rptr - skip_len + ah_offset +
2743             sizeof (ah_t));
2744 
2745         ic->ic_skip_len = skip_len;
2746 
2747         IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, ctx_tmpl);
2748 
2749         /* call KEF to do the MAC operation */
2750         kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
2751             &ic->ic_crypto_data, &assoc->ipsa_kcfauthkey, ctx_tmpl,
2752             &ic->ic_crypto_mac, callrp);
2753 
2754         switch (kef_rc) {
2755         case CRYPTO_SUCCESS:
2756                 AH_BUMP_STAT(ahstack, crypto_sync);
2757                 phdr_mp = ah_auth_in_done(phdr_mp, ira, ic);
2758                 if (force) {
2759                         /* Free mp after we are done with ic */
2760                         mp = ipsec_free_crypto_data(mp);
2761                         (void) ip_recv_attr_free_mblk(mp);
2762                 }
2763                 return (phdr_mp);
2764         case CRYPTO_QUEUED:
2765                 /* ah_kcf_callback_inbound() will be invoked on completion */
2766                 AH_BUMP_STAT(ahstack, crypto_async);
2767                 return (NULL);
2768         case CRYPTO_INVALID_MAC:
2769                 /* Free mp after we are done with ic */
2770                 AH_BUMP_STAT(ahstack, crypto_sync);
2771                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2772                 ah_log_bad_auth(phdr_mp, ira, ic);
2773                 /* phdr_mp was passed to ip_drop_packet */
2774                 if (force) {
2775                         mp = ipsec_free_crypto_data(mp);
2776                         (void) ip_recv_attr_free_mblk(mp);
2777                 }
2778                 return (NULL);
2779         }
2780 
2781         if (force) {
2782                 mp = ipsec_free_crypto_data(mp);
2783                 phdr_mp = ip_recv_attr_free_mblk(mp);
2784         }
2785         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2786         ah_crypto_failed(phdr_mp, B_TRUE, kef_rc, ira->ira_ill, ahstack);
2787         /* phdr_mp was passed to ip_drop_packet */
2788         return (NULL);
2789 }
2790 
2791 /*
2792  * Submit an outbound packet for processing by the crypto framework.
2793  */
2794 static mblk_t *
2795 ah_submit_req_outbound(mblk_t *phdr_mp, ip_xmit_attr_t *ixa,
2796     size_t skip_len, ipsa_t *assoc)
2797 {
2798         int kef_rc;
2799         mblk_t *mp;
2800         crypto_call_req_t call_req, *callrp;
2801         uint_t icv_len = assoc->ipsa_mac_len;
2802         ipsecah_stack_t *ahstack;
2803         ipsec_crypto_t  *ic, icstack;
2804         ill_t           *ill = ixa->ixa_nce->nce_ill;
2805         boolean_t force = (assoc->ipsa_flags & IPSA_F_ASYNC);
2806 
2807         ahstack = ill->ill_ipst->ips_netstack->netstack_ipsecah;
2808 
2809         ASSERT(phdr_mp != NULL);
2810         ASSERT(phdr_mp->b_datap->db_type == M_DATA);
2811 
2812         if (force) {
2813                 /* We are doing asynch; allocate mblks to hold state */
2814                 if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL ||
2815                     (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
2816                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2817                         ip_drop_output("ipIfStatsOutDiscards", phdr_mp, ill);
2818                         freemsg(phdr_mp);
2819                         return (NULL);
2820                 }
2821                 linkb(mp, phdr_mp);
2822                 callrp = &call_req;
2823                 AH_INIT_CALLREQ(callrp, mp, ah_kcf_callback_outbound);
2824         } else {
2825                 /*
2826                  * If we know we are going to do sync then ipsec_crypto_t
2827                  * should be on the stack.
2828                  */
2829                 ic = &icstack;
2830                 bzero(ic, sizeof (*ic));
2831                 callrp = NULL;
2832         }
2833 
2834         /* init arguments for the crypto framework */
2835         AH_INIT_CRYPTO_DATA(&ic->ic_crypto_data, AH_MSGSIZE(phdr_mp),
2836             phdr_mp);
2837 
2838         AH_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, icv_len,
2839             (char *)phdr_mp->b_wptr);
2840 
2841         ic->ic_skip_len = skip_len;
2842 
2843         ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
2844 
2845         /* call KEF to do the MAC operation */
2846         kef_rc = crypto_mac(&assoc->ipsa_amech, &ic->ic_crypto_data,
2847             &assoc->ipsa_kcfauthkey, assoc->ipsa_authtmpl,
2848             &ic->ic_crypto_mac, callrp);
2849 
2850         switch (kef_rc) {
2851         case CRYPTO_SUCCESS:
2852                 AH_BUMP_STAT(ahstack, crypto_sync);
2853                 phdr_mp = ah_auth_out_done(phdr_mp, ixa, ic);
2854                 if (force) {
2855                         /* Free mp after we are done with ic */
2856                         mp = ipsec_free_crypto_data(mp);
2857                         (void) ip_xmit_attr_free_mblk(mp);
2858                 }
2859                 return (phdr_mp);
2860         case CRYPTO_QUEUED:
2861                 /* ah_kcf_callback_outbound() will be invoked on completion */
2862                 AH_BUMP_STAT(ahstack, crypto_async);
2863                 return (NULL);
2864         }
2865 
2866         if (force) {
2867                 mp = ipsec_free_crypto_data(mp);
2868                 phdr_mp = ip_xmit_attr_free_mblk(mp);
2869         }
2870         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2871         ah_crypto_failed(phdr_mp, B_FALSE, kef_rc, NULL, ahstack);
2872         /* phdr_mp was passed to ip_drop_packet */
2873         return (NULL);
2874 }
2875 
2876 /*
2877  * This function constructs a pseudo header by looking at the IP header
2878  * and options if any. This is called for both outbound and inbound,
2879  * before computing the ICV.
2880  */
2881 static mblk_t *
2882 ah_process_ip_options_v6(mblk_t *mp, ipsa_t *assoc, int *length_to_skip,
2883     uint_t ah_data_sz, boolean_t outbound, ipsecah_stack_t *ahstack)
2884 {
2885         ip6_t   *ip6h;
2886         ip6_t   *oip6h;
2887         mblk_t  *phdr_mp;
2888         int option_length;
2889         uint_t  ah_align_sz;
2890         uint_t ah_offset;
2891         int hdr_size;
2892 
2893         /*
2894          * Allocate space for the authentication data also. It is
2895          * useful both during the ICV calculation where we need to
2896          * feed in zeroes and while sending the datagram back to IP
2897          * where we will be using the same space.
2898          *
2899          * We need to allocate space for padding bytes if it is not
2900          * a multiple of IPV6_PADDING_ALIGN.
2901          *
2902          * In addition, we allocate space for the ICV computed by
2903          * the kernel crypto framework, saving us a separate kmem
2904          * allocation down the road.
2905          */
2906 
2907         ah_align_sz = P2ALIGN(ah_data_sz + IPV6_PADDING_ALIGN - 1,
2908             IPV6_PADDING_ALIGN);
2909 
2910         ASSERT(ah_align_sz >= ah_data_sz);
2911 
2912         hdr_size = ipsec_ah_get_hdr_size_v6(mp, B_FALSE);
2913         option_length = hdr_size - IPV6_HDR_LEN;
2914 
2915         /* This was not included in ipsec_ah_get_hdr_size_v6() */
2916         hdr_size += (sizeof (ah_t) + ah_align_sz);
2917 
2918         if (!outbound && (MBLKL(mp) < hdr_size)) {
2919                 /*
2920                  * We have post-AH header options in a separate mblk,
2921                  * a pullup is required.
2922                  */
2923                 if (!pullupmsg(mp, hdr_size))
2924                         return (NULL);
2925         }
2926 
2927         if ((phdr_mp = allocb_tmpl(hdr_size + ah_data_sz, mp)) == NULL) {
2928                 return (NULL);
2929         }
2930 
2931         oip6h = (ip6_t *)mp->b_rptr;
2932 
2933         /*
2934          * Form the basic IP header first. Zero out the header
2935          * so that the mutable fields are zeroed out.
2936          */
2937         ip6h = (ip6_t *)phdr_mp->b_rptr;
2938         bzero(ip6h, sizeof (ip6_t));
2939         ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2940 
2941         if (outbound) {
2942                 /*
2943                  * Include the size of AH and authentication data.
2944                  * This is how our recipient would compute the
2945                  * authentication data. Look at what we do in the
2946                  * inbound case below.
2947                  */
2948                 ip6h->ip6_plen = htons(ntohs(oip6h->ip6_plen) +
2949                     sizeof (ah_t) + ah_align_sz);
2950         } else {
2951                 ip6h->ip6_plen = oip6h->ip6_plen;
2952         }
2953 
2954         ip6h->ip6_src = oip6h->ip6_src;
2955         ip6h->ip6_dst = oip6h->ip6_dst;
2956 
2957         *length_to_skip = IPV6_HDR_LEN;
2958         if (option_length == 0) {
2959                 /* Form the AH header */
2960                 ip6h->ip6_nxt = IPPROTO_AH;
2961                 ((ah_t *)(ip6h + 1))->ah_nexthdr = oip6h->ip6_nxt;
2962                 ah_offset = *length_to_skip;
2963         } else {
2964                 ip6h->ip6_nxt = oip6h->ip6_nxt;
2965                 /* option_length does not include the AH header's size */
2966                 *length_to_skip += option_length;
2967 
2968                 ah_offset = ah_fix_phdr_v6(ip6h, oip6h, outbound, B_FALSE);
2969                 if (ah_offset == 0) {
2970                         return (NULL);
2971                 }
2972         }
2973 
2974         if (!ah_finish_up(((ah_t *)((uint8_t *)ip6h + ah_offset)),
2975             (outbound ? NULL : ((ah_t *)((uint8_t *)oip6h + ah_offset))),
2976             assoc, ah_data_sz, ah_align_sz, ahstack)) {
2977                 freeb(phdr_mp);
2978                 /*
2979                  * Returning NULL will tell the caller to
2980                  * IPSA_REFELE(), free the memory, etc.
2981                  */
2982                 return (NULL);
2983         }
2984 
2985         phdr_mp->b_wptr = ((uint8_t *)ip6h + ah_offset + sizeof (ah_t) +
2986             ah_align_sz);
2987         if (!outbound)
2988                 *length_to_skip += sizeof (ah_t) + ah_align_sz;
2989         return (phdr_mp);
2990 }
2991 
2992 /*
2993  * This function constructs a pseudo header by looking at the IP header
2994  * and options if any. This is called for both outbound and inbound,
2995  * before computing the ICV.
2996  */
2997 static mblk_t *
2998 ah_process_ip_options_v4(mblk_t *mp, ipsa_t *assoc, int *length_to_skip,
2999     uint_t ah_data_sz, boolean_t outbound, ipsecah_stack_t *ahstack)
3000 {
3001         ipoptp_t opts;
3002         uint32_t option_length;
3003         ipha_t  *ipha;
3004         ipha_t  *oipha;
3005         mblk_t  *phdr_mp;
3006         int      size;
3007         uchar_t *optptr;
3008         uint8_t optval;
3009         uint8_t optlen;
3010         ipaddr_t dst;
3011         uint32_t v_hlen_tos_len;
3012         int ip_hdr_length;
3013         uint_t  ah_align_sz;
3014         uint32_t off;
3015 
3016 #ifdef  _BIG_ENDIAN
3017 #define V_HLEN  (v_hlen_tos_len >> 24)
3018 #else
3019 #define V_HLEN  (v_hlen_tos_len & 0xFF)
3020 #endif
3021 
3022         oipha = (ipha_t *)mp->b_rptr;
3023         v_hlen_tos_len = ((uint32_t *)oipha)[0];
3024 
3025         /*
3026          * Allocate space for the authentication data also. It is
3027          * useful both during the ICV calculation where we need to
3028          * feed in zeroes and while sending the datagram back to IP
3029          * where we will be using the same space.
3030          *
3031          * We need to allocate space for padding bytes if it is not
3032          * a multiple of IPV4_PADDING_ALIGN.
3033          *
3034          * In addition, we allocate space for the ICV computed by
3035          * the kernel crypto framework, saving us a separate kmem
3036          * allocation down the road.
3037          */
3038 
3039         ah_align_sz = P2ALIGN(ah_data_sz + IPV4_PADDING_ALIGN - 1,
3040             IPV4_PADDING_ALIGN);
3041 
3042         ASSERT(ah_align_sz >= ah_data_sz);
3043 
3044         size = IP_SIMPLE_HDR_LENGTH + sizeof (ah_t) + ah_align_sz +
3045             ah_data_sz;
3046 
3047         if (V_HLEN != IP_SIMPLE_HDR_VERSION) {
3048                 option_length = oipha->ipha_version_and_hdr_length -
3049                     (uint8_t)((IP_VERSION << 4) +
3050                     IP_SIMPLE_HDR_LENGTH_IN_WORDS);
3051                 option_length <<= 2;
3052                 size += option_length;
3053         }
3054 
3055         if ((phdr_mp = allocb_tmpl(size, mp)) == NULL) {
3056                 return (NULL);
3057         }
3058 
3059         /*
3060          * Form the basic IP header first.
3061          */
3062         ipha = (ipha_t *)phdr_mp->b_rptr;
3063         ipha->ipha_version_and_hdr_length = oipha->ipha_version_and_hdr_length;
3064         ipha->ipha_type_of_service = 0;
3065 
3066         if (outbound) {
3067                 /*
3068                  * Include the size of AH and authentication data.
3069                  * This is how our recipient would compute the
3070                  * authentication data. Look at what we do in the
3071                  * inbound case below.
3072                  */
3073                 ipha->ipha_length = ntohs(htons(oipha->ipha_length) +
3074                     sizeof (ah_t) + ah_align_sz);
3075         } else {
3076                 ipha->ipha_length = oipha->ipha_length;
3077         }
3078 
3079         ipha->ipha_ident = oipha->ipha_ident;
3080         ipha->ipha_fragment_offset_and_flags = 0;
3081         ipha->ipha_ttl = 0;
3082         ipha->ipha_protocol = IPPROTO_AH;
3083         ipha->ipha_hdr_checksum = 0;
3084         ipha->ipha_src = oipha->ipha_src;
3085         ipha->ipha_dst = dst = oipha->ipha_dst;
3086 
3087         /*
3088          * If there is no option to process return now.
3089          */
3090         ip_hdr_length = IP_SIMPLE_HDR_LENGTH;
3091 
3092         if (V_HLEN == IP_SIMPLE_HDR_VERSION) {
3093                 /* Form the AH header */
3094                 goto ah_hdr;
3095         }
3096 
3097         ip_hdr_length += option_length;
3098 
3099         /*
3100          * We have options. In the outbound case for source route,
3101          * ULP has already moved the first hop, which is now in
3102          * ipha_dst. We need the final destination for the calculation
3103          * of authentication data. And also make sure that mutable
3104          * and experimental fields are zeroed out in the IP options.
3105          */
3106 
3107         bcopy(&oipha[1], &ipha[1], option_length);
3108 
3109         for (optval = ipoptp_first(&opts, ipha);
3110             optval != IPOPT_EOL;
3111             optval = ipoptp_next(&opts)) {
3112                 optptr = opts.ipoptp_cur;
3113                 optlen = opts.ipoptp_len;
3114                 switch (optval) {
3115                 case IPOPT_EXTSEC:
3116                 case IPOPT_COMSEC:
3117                 case IPOPT_RA:
3118                 case IPOPT_SDMDD:
3119                 case IPOPT_SECURITY:
3120                         /*
3121                          * These options are Immutable, leave them as-is.
3122                          * Note that IPOPT_NOP is also Immutable, but it
3123                          * was skipped by ipoptp_next() and thus remains
3124                          * intact in the header.
3125                          */
3126                         break;
3127                 case IPOPT_SSRR:
3128                 case IPOPT_LSRR:
3129                         if ((opts.ipoptp_flags & IPOPTP_ERROR) != 0)
3130                                 goto bad_ipv4opt;
3131                         /*
3132                          * These two are mutable and will be zeroed, but
3133                          * first get the final destination.
3134                          */
3135                         off = optptr[IPOPT_OFFSET];
3136                         /*
3137                          * If one of the conditions is true, it means
3138                          * end of options and dst already has the right
3139                          * value. So, just fall through.
3140                          */
3141                         if (!(optlen < IP_ADDR_LEN || off > optlen - 3)) {
3142                                 off = optlen - IP_ADDR_LEN;
3143                                 bcopy(&optptr[off], &dst, IP_ADDR_LEN);
3144                         }
3145                         /* FALLTHRU */
3146                 case IPOPT_RR:
3147                 case IPOPT_TS:
3148                 case IPOPT_SATID:
3149                 default:
3150                         /*
3151                          * optlen should include from the beginning of an
3152                          * option.
3153                          * NOTE : Stream Identifier Option (SID): RFC 791
3154                          * shows the bit pattern of optlen as 2 and documents
3155                          * the length as 4. We assume it to be 2 here.
3156                          */
3157                         bzero(optptr, optlen);
3158                         break;
3159                 }
3160         }
3161 
3162         if ((opts.ipoptp_flags & IPOPTP_ERROR) != 0) {
3163 bad_ipv4opt:
3164                 ah1dbg(ahstack, ("AH : bad IPv4 option"));
3165                 freeb(phdr_mp);
3166                 return (NULL);
3167         }
3168 
3169         /*
3170          * Don't change ipha_dst for an inbound datagram as it points
3171          * to the right value. Only for the outbound with LSRR/SSRR,
3172          * because of ip_massage_options called by the ULP, ipha_dst
3173          * points to the first hop and we need to use the final
3174          * destination for computing the ICV.
3175          */
3176 
3177         if (outbound)
3178                 ipha->ipha_dst = dst;
3179 ah_hdr:
3180         ((ah_t *)((uint8_t *)ipha + ip_hdr_length))->ah_nexthdr =
3181             oipha->ipha_protocol;
3182         if (!ah_finish_up(((ah_t *)((uint8_t *)ipha + ip_hdr_length)),
3183             (outbound ? NULL : ((ah_t *)((uint8_t *)oipha + ip_hdr_length))),
3184             assoc, ah_data_sz, ah_align_sz, ahstack)) {
3185                 freeb(phdr_mp);
3186                 /*
3187                  * Returning NULL will tell the caller to IPSA_REFELE(), free
3188                  * the memory, etc.
3189                  */
3190                 return (NULL);
3191         }
3192 
3193         phdr_mp->b_wptr = ((uchar_t *)ipha + ip_hdr_length +
3194             sizeof (ah_t) + ah_align_sz);
3195 
3196         ASSERT(phdr_mp->b_wptr <= phdr_mp->b_datap->db_lim);
3197         if (outbound)
3198                 *length_to_skip = ip_hdr_length;
3199         else
3200                 *length_to_skip = ip_hdr_length + sizeof (ah_t) + ah_align_sz;
3201         return (phdr_mp);
3202 }
3203 
3204 /*
3205  * Authenticate an outbound datagram. This function is called
3206  * whenever IP sends an outbound datagram that needs authentication.
3207  * Returns a modified packet if done. Returns NULL if error or queued.
3208  * If error return then ipIfStatsOutDiscards has been increased.
3209  */
3210 static mblk_t *
3211 ah_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa)
3212 {
3213         mblk_t *phdr_mp;
3214         ipsa_t *assoc;
3215         int length_to_skip;
3216         uint_t ah_align_sz;
3217         uint_t age_bytes;
3218         netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
3219         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
3220         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3221         ill_t           *ill = ixa->ixa_nce->nce_ill;
3222         boolean_t       need_refrele = B_FALSE;
3223 
3224         /*
3225          * Construct the chain of mblks
3226          *
3227          * PSEUDO_HDR->DATA
3228          *
3229          * one by one.
3230          */
3231 
3232         AH_BUMP_STAT(ahstack, out_requests);
3233 
3234         ASSERT(data_mp->b_datap->db_type == M_DATA);
3235 
3236         assoc = ixa->ixa_ipsec_ah_sa;
3237         ASSERT(assoc != NULL);
3238 
3239 
3240         /*
3241          * Get the outer IP header in shape to escape this system..
3242          */
3243         if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) {
3244                 /*
3245                  * Need to update packet with any CIPSO option and update
3246                  * ixa_tsl to capture the new label.
3247                  * We allocate a separate ixa for that purpose.
3248                  */
3249                 ixa = ip_xmit_attr_duplicate(ixa);
3250                 if (ixa == NULL) {
3251                         ip_drop_packet(data_mp, B_FALSE, ill,
3252                             DROPPER(ipss, ipds_ah_nomem),
3253                             &ahstack->ah_dropper);
3254                         return (NULL);
3255                 }
3256                 need_refrele = B_TRUE;
3257 
3258                 label_hold(assoc->ipsa_otsl);
3259                 ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl);
3260 
3261                 data_mp = sadb_whack_label(data_mp, assoc, ixa,
3262                     DROPPER(ipss, ipds_ah_nomem), &ahstack->ah_dropper);
3263                 if (data_mp == NULL) {
3264                         /* Packet dropped by sadb_whack_label */
3265                         ixa_refrele(ixa);
3266                         return (NULL);
3267                 }
3268         }
3269 
3270         /*
3271          * Age SA according to number of bytes that will be sent after
3272          * adding the AH header, ICV, and padding to the packet.
3273          */
3274 
3275         if (ixa->ixa_flags & IXAF_IS_IPV4) {
3276                 ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
3277                 ah_align_sz = P2ALIGN(assoc->ipsa_mac_len +
3278                     IPV4_PADDING_ALIGN - 1, IPV4_PADDING_ALIGN);
3279                 age_bytes = ntohs(ipha->ipha_length) + sizeof (ah_t) +
3280                     ah_align_sz;
3281         } else {
3282                 ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
3283                 ah_align_sz = P2ALIGN(assoc->ipsa_mac_len +
3284                     IPV6_PADDING_ALIGN - 1, IPV6_PADDING_ALIGN);
3285                 age_bytes = sizeof (ip6_t) + ntohs(ip6h->ip6_plen) +
3286                     sizeof (ah_t) + ah_align_sz;
3287         }
3288 
3289         if (!ah_age_bytes(assoc, age_bytes, B_FALSE)) {
3290                 /* rig things as if ipsec_getassocbyconn() failed */
3291                 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
3292                     "AH association 0x%x, dst %s had bytes expire.\n",
3293                     ntohl(assoc->ipsa_spi), assoc->ipsa_dstaddr, AF_INET,
3294                     ahstack->ipsecah_netstack);
3295                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3296                 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
3297                 freemsg(data_mp);
3298                 if (need_refrele)
3299                         ixa_refrele(ixa);
3300                 return (NULL);
3301         }
3302 
3303         /*
3304          * XXX We need to have fixed up the outer label before we get here.
3305          * (AH is computing the checksum over the outer label).
3306          */
3307 
3308         /*
3309          * Insert pseudo header:
3310          * [IP, ULP] => [IP, AH, ICV] -> ULP
3311          */
3312 
3313         if (ixa->ixa_flags & IXAF_IS_IPV4) {
3314                 phdr_mp = ah_process_ip_options_v4(data_mp, assoc,
3315                     &length_to_skip, assoc->ipsa_mac_len, B_TRUE, ahstack);
3316         } else {
3317                 phdr_mp = ah_process_ip_options_v6(data_mp, assoc,
3318                     &length_to_skip, assoc->ipsa_mac_len, B_TRUE, ahstack);
3319         }
3320 
3321         if (phdr_mp == NULL) {
3322                 AH_BUMP_STAT(ahstack, out_discards);
3323                 ip_drop_packet(data_mp, B_FALSE, ixa->ixa_nce->nce_ill,
3324                     DROPPER(ipss, ipds_ah_bad_v4_opts),
3325                     &ahstack->ah_dropper);
3326                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3327                 if (need_refrele)
3328                         ixa_refrele(ixa);
3329                 return (NULL);
3330         }
3331 
3332         phdr_mp->b_cont = data_mp;
3333         data_mp->b_rptr += length_to_skip;
3334         data_mp = phdr_mp;
3335 
3336         /*
3337          * At this point data_mp points to
3338          * an mblk containing the pseudo header (IP header,
3339          * AH header, and ICV with mutable fields zero'ed out).
3340          * mp points to the mblk containing the ULP data. The original
3341          * IP header is kept before the ULP data in data_mp.
3342          */
3343 
3344         /* submit MAC request to KCF */
3345         data_mp = ah_submit_req_outbound(data_mp, ixa, length_to_skip, assoc);
3346         if (need_refrele)
3347                 ixa_refrele(ixa);
3348         return (data_mp);
3349 }
3350 
3351 static mblk_t *
3352 ah_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira)
3353 {
3354         ah_t            *ah = (ah_t *)arg;
3355         ipsa_t          *assoc = ira->ira_ipsec_ah_sa;
3356         int             length_to_skip;
3357         int             ah_length;
3358         mblk_t          *phdr_mp;
3359         uint32_t        ah_offset;
3360         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
3361         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
3362         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3363 
3364         ASSERT(assoc != NULL);
3365 
3366         /*
3367          * We may wish to check replay in-range-only here as an optimization.
3368          * Include the reality check of ipsa->ipsa_replay >
3369          * ipsa->ipsa_replay_wsize for times when it's the first N packets,
3370          * where N == ipsa->ipsa_replay_wsize.
3371          *
3372          * Another check that may come here later is the "collision" check.
3373          * If legitimate packets flow quickly enough, this won't be a problem,
3374          * but collisions may cause authentication algorithm crunching to
3375          * take place when it doesn't need to.
3376          */
3377         if (!sadb_replay_peek(assoc, ah->ah_replay)) {
3378                 AH_BUMP_STAT(ahstack, replay_early_failures);
3379                 IP_AH_BUMP_STAT(ipss, in_discards);
3380                 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
3381                     DROPPER(ipss, ipds_ah_early_replay),
3382                     &ahstack->ah_dropper);
3383                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3384                 return (NULL);
3385         }
3386 
3387         /*
3388          * The offset of the AH header can be computed from its pointer
3389          * within the data mblk, which was pulled up until the AH header
3390          * by ipsec_inbound_ah_sa() during SA selection.
3391          */
3392         ah_offset = (uchar_t *)ah - data_mp->b_rptr;
3393 
3394         /*
3395          * We need to pullup until the ICV before we call
3396          * ah_process_ip_options_v6.
3397          */
3398         ah_length = (ah->ah_length << 2) + 8;
3399 
3400         /*
3401          * NOTE : If we want to use any field of IP/AH header, you need
3402          * to re-assign following the pullup.
3403          */
3404         if (((uchar_t *)ah + ah_length) > data_mp->b_wptr) {
3405                 if (!pullupmsg(data_mp, (uchar_t *)ah + ah_length -
3406                     data_mp->b_rptr)) {
3407                         (void) ipsec_rl_strlog(ns, info.mi_idnum, 0, 0,
3408                             SL_WARN | SL_ERROR,
3409                             "ah_inbound: Small AH header\n");
3410                         IP_AH_BUMP_STAT(ipss, in_discards);
3411                         ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
3412                             DROPPER(ipss, ipds_ah_nomem),
3413                             &ahstack->ah_dropper);
3414                         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3415                         return (NULL);
3416                 }
3417         }
3418 
3419         /*
3420          * Insert pseudo header:
3421          * [IP, ULP] => [IP, AH, ICV] -> ULP
3422          */
3423         if (ira->ira_flags & IRAF_IS_IPV4) {
3424                 phdr_mp = ah_process_ip_options_v4(data_mp, assoc,
3425                     &length_to_skip, assoc->ipsa_mac_len, B_FALSE, ahstack);
3426         } else {
3427                 phdr_mp = ah_process_ip_options_v6(data_mp, assoc,
3428                     &length_to_skip, assoc->ipsa_mac_len, B_FALSE, ahstack);
3429         }
3430 
3431         if (phdr_mp == NULL) {
3432                 IP_AH_BUMP_STAT(ipss, in_discards);
3433                 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
3434                     ((ira->ira_flags & IRAF_IS_IPV4) ?
3435                     DROPPER(ipss, ipds_ah_bad_v4_opts) :
3436                     DROPPER(ipss, ipds_ah_bad_v6_hdrs)),
3437                     &ahstack->ah_dropper);
3438                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3439                 return (NULL);
3440         }
3441 
3442         phdr_mp->b_cont = data_mp;
3443         data_mp->b_rptr += length_to_skip;
3444         data_mp = phdr_mp;
3445 
3446         /* submit request to KCF */
3447         return (ah_submit_req_inbound(data_mp, ira, length_to_skip, ah_offset,
3448             assoc));
3449 }
3450 
3451 /*
3452  * Invoked after processing of an inbound packet by the
3453  * kernel crypto framework. Called by ah_submit_req() for a sync request,
3454  * or by the kcf callback for an async request.
3455  * Returns NULL if the mblk chain is consumed.
3456  */
3457 static mblk_t *
3458 ah_auth_in_done(mblk_t *phdr_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
3459 {
3460         ipha_t *ipha;
3461         uint_t ah_offset = 0;
3462         mblk_t *mp;
3463         int align_len, newpos;
3464         ah_t *ah;
3465         uint32_t length;
3466         uint32_t *dest32;
3467         uint8_t *dest;
3468         boolean_t isv4;
3469         ip6_t *ip6h;
3470         uint_t icv_len;
3471         ipsa_t *assoc;
3472         kstat_named_t *counter;
3473         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
3474         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
3475         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3476 
3477         isv4 = (ira->ira_flags & IRAF_IS_IPV4);
3478         assoc = ira->ira_ipsec_ah_sa;
3479         icv_len = (uint_t)ic->ic_crypto_mac.cd_raw.iov_len;
3480 
3481         if (phdr_mp == NULL) {
3482                 ip_drop_packet(phdr_mp, B_TRUE, ira->ira_ill,
3483                     DROPPER(ipss, ipds_ah_nomem),
3484                     &ahstack->ah_dropper);
3485                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3486                 return (NULL);
3487         }
3488 
3489         mp = phdr_mp->b_cont;
3490         if (mp == NULL) {
3491                 ip_drop_packet(phdr_mp, B_TRUE, ira->ira_ill,
3492                     DROPPER(ipss, ipds_ah_nomem),
3493                     &ahstack->ah_dropper);
3494                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3495                 return (NULL);
3496         }
3497         mp->b_rptr -= ic->ic_skip_len;
3498 
3499         ah_set_usetime(assoc, B_TRUE);
3500 
3501         if (isv4) {
3502                 ipha = (ipha_t *)mp->b_rptr;
3503                 ah_offset = ipha->ipha_version_and_hdr_length -
3504                     (uint8_t)((IP_VERSION << 4));
3505                 ah_offset <<= 2;
3506                 align_len = P2ALIGN(icv_len + IPV4_PADDING_ALIGN - 1,
3507                     IPV4_PADDING_ALIGN);
3508         } else {
3509                 ip6h = (ip6_t *)mp->b_rptr;
3510                 ah_offset = ipsec_ah_get_hdr_size_v6(mp, B_TRUE);
3511                 ASSERT((mp->b_wptr - mp->b_rptr) >= ah_offset);
3512                 align_len = P2ALIGN(icv_len + IPV6_PADDING_ALIGN - 1,
3513                     IPV6_PADDING_ALIGN);
3514         }
3515 
3516         ah = (ah_t *)(mp->b_rptr + ah_offset);
3517         newpos = sizeof (ah_t) + align_len;
3518 
3519         /*
3520          * We get here only when authentication passed.
3521          */
3522 
3523         ah3dbg(ahstack, ("AH succeeded, checking replay\n"));
3524         AH_BUMP_STAT(ahstack, good_auth);
3525 
3526         if (!sadb_replay_check(assoc, ah->ah_replay)) {
3527                 int af;
3528                 void *addr;
3529 
3530                 if (isv4) {
3531                         addr = &ipha->ipha_dst;
3532                         af = AF_INET;
3533                 } else {
3534                         addr = &ip6h->ip6_dst;
3535                         af = AF_INET6;
3536                 }
3537 
3538                 /*
3539                  * Log the event. As of now we print out an event.
3540                  * Do not print the replay failure number, or else
3541                  * syslog cannot collate the error messages.  Printing
3542                  * the replay number that failed (or printing to the
3543                  * console) opens a denial-of-service attack.
3544                  */
3545                 AH_BUMP_STAT(ahstack, replay_failures);
3546                 ipsec_assocfailure(info.mi_idnum, 0, 0,
3547                     SL_ERROR | SL_WARN,
3548                     "Replay failed for AH spi %x, dst_addr %s",
3549                     assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack);
3550                 counter = DROPPER(ipss, ipds_ah_replay);
3551                 goto ah_in_discard;
3552         }
3553 
3554         /*
3555          * We need to remove the AH header from the original
3556          * datagram. Best way to do this is to move the pre-AH headers
3557          * forward in the (relatively simple) IPv4 case.  In IPv6, it's
3558          * a bit more complicated because of IPv6's next-header chaining,
3559          * but it's doable.
3560          */
3561         if (isv4) {
3562                 /*
3563                  * Assign the right protocol, adjust the length as we
3564                  * are removing the AH header and adjust the checksum to
3565                  * account for the protocol and length.
3566                  */
3567                 length = ntohs(ipha->ipha_length);
3568                 if (!ah_age_bytes(assoc, length, B_TRUE)) {
3569                         /* The ipsa has hit hard expiration, LOG and AUDIT. */
3570                         ipsec_assocfailure(info.mi_idnum, 0, 0,
3571                             SL_ERROR | SL_WARN,
3572                             "AH Association 0x%x, dst %s had bytes expire.\n",
3573                             assoc->ipsa_spi, assoc->ipsa_dstaddr,
3574                             AF_INET, ahstack->ipsecah_netstack);
3575                         AH_BUMP_STAT(ahstack, bytes_expired);
3576                         counter = DROPPER(ipss, ipds_ah_bytes_expire);
3577                         goto ah_in_discard;
3578                 }
3579                 ipha->ipha_protocol = ah->ah_nexthdr;
3580                 length -= newpos;
3581 
3582                 ipha->ipha_length = htons((uint16_t)length);
3583                 ipha->ipha_hdr_checksum = 0;
3584                 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
3585         } else {
3586                 uchar_t *whereptr;
3587                 int hdrlen;
3588                 uint8_t *nexthdr;
3589                 ip6_hbh_t *hbhhdr;
3590                 ip6_dest_t *dsthdr;
3591                 ip6_rthdr0_t *rthdr;
3592 
3593                 /*
3594                  * Make phdr_mp hold until the AH header and make
3595                  * mp hold everything past AH header.
3596                  */
3597                 length = ntohs(ip6h->ip6_plen);
3598                 if (!ah_age_bytes(assoc, length + sizeof (ip6_t), B_TRUE)) {
3599                         /* The ipsa has hit hard expiration, LOG and AUDIT. */
3600                         ipsec_assocfailure(info.mi_idnum, 0, 0,
3601                             SL_ERROR | SL_WARN,
3602                             "AH Association 0x%x, dst %s had bytes "
3603                             "expire.\n", assoc->ipsa_spi, &ip6h->ip6_dst,
3604                             AF_INET6, ahstack->ipsecah_netstack);
3605                         AH_BUMP_STAT(ahstack, bytes_expired);
3606                         counter = DROPPER(ipss, ipds_ah_bytes_expire);
3607                         goto ah_in_discard;
3608                 }
3609 
3610                 /*
3611                  * Update the next header field of the header preceding
3612                  * AH with the next header field of AH. Start with the
3613                  * IPv6 header and proceed with the extension headers
3614                  * until we find what we're looking for.
3615                  */
3616                 nexthdr = &ip6h->ip6_nxt;
3617                 whereptr =  (uchar_t *)ip6h;
3618                 hdrlen = sizeof (ip6_t);
3619 
3620                 while (*nexthdr != IPPROTO_AH) {
3621                         whereptr += hdrlen;
3622                         /* Assume IP has already stripped it */
3623                         ASSERT(*nexthdr != IPPROTO_FRAGMENT);
3624                         switch (*nexthdr) {
3625                         case IPPROTO_HOPOPTS:
3626                                 hbhhdr = (ip6_hbh_t *)whereptr;
3627                                 nexthdr = &hbhhdr->ip6h_nxt;
3628                                 hdrlen = 8 * (hbhhdr->ip6h_len + 1);
3629                                 break;
3630                         case IPPROTO_DSTOPTS:
3631                                 dsthdr = (ip6_dest_t *)whereptr;
3632                                 nexthdr = &dsthdr->ip6d_nxt;
3633                                 hdrlen = 8 * (dsthdr->ip6d_len + 1);
3634                                 break;
3635                         case IPPROTO_ROUTING:
3636                                 rthdr = (ip6_rthdr0_t *)whereptr;
3637                                 nexthdr = &rthdr->ip6r0_nxt;
3638                                 hdrlen = 8 * (rthdr->ip6r0_len + 1);
3639                                 break;
3640                         }
3641                 }
3642                 *nexthdr = ah->ah_nexthdr;
3643                 length -= newpos;
3644                 ip6h->ip6_plen = htons((uint16_t)length);
3645         }
3646 
3647         /* Now that we've fixed the IP header, move it forward. */
3648         mp->b_rptr += newpos;
3649         if (IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) {
3650                 dest32 = (uint32_t *)(mp->b_rptr + ah_offset);
3651                 while (--dest32 >= (uint32_t *)mp->b_rptr)
3652                         *dest32 = *(dest32 - (newpos >> 2));
3653         } else {
3654                 dest = mp->b_rptr + ah_offset;
3655                 while (--dest >= mp->b_rptr)
3656                         *dest = *(dest - newpos);
3657         }
3658         freeb(phdr_mp);
3659 
3660         /*
3661          * If SA is labelled, use its label, else inherit the label
3662          */
3663         if (is_system_labeled() && (assoc->ipsa_tsl != NULL)) {
3664                 if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) {
3665                         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
3666                             DROPPER(ipss, ipds_ah_nomem), &ahstack->ah_dropper);
3667                         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3668                         return (NULL);
3669                 }
3670         }
3671 
3672         if (assoc->ipsa_state == IPSA_STATE_IDLE) {
3673                 /*
3674                  * Cluster buffering case.  Tell caller that we're
3675                  * handling the packet.
3676                  */
3677                 sadb_buf_pkt(assoc, mp, ira);
3678                 return (NULL);
3679         }
3680 
3681         return (mp);
3682 
3683 ah_in_discard:
3684         IP_AH_BUMP_STAT(ipss, in_discards);
3685         ip_drop_packet(phdr_mp, B_TRUE, ira->ira_ill, counter,
3686             &ahstack->ah_dropper);
3687         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3688         return (NULL);
3689 }
3690 
3691 /*
3692  * Invoked after processing of an outbound packet by the
3693  * kernel crypto framework, either by ah_submit_req() for a request
3694  * executed syncrhonously, or by the KEF callback for a request
3695  * executed asynchronously.
3696  */
3697 static mblk_t *
3698 ah_auth_out_done(mblk_t *phdr_mp, ip_xmit_attr_t *ixa, ipsec_crypto_t *ic)
3699 {
3700         mblk_t *mp;
3701         int align_len;
3702         uint32_t hdrs_length;
3703         uchar_t *ptr;
3704         uint32_t length;
3705         boolean_t isv4;
3706         size_t icv_len;
3707         netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
3708         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
3709         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3710         ill_t           *ill = ixa->ixa_nce->nce_ill;
3711 
3712         isv4 = (ixa->ixa_flags & IXAF_IS_IPV4);
3713         icv_len = ic->ic_crypto_mac.cd_raw.iov_len;
3714 
3715         mp = phdr_mp->b_cont;
3716         if (mp == NULL) {
3717                 ip_drop_packet(phdr_mp, B_FALSE, ill,
3718                     DROPPER(ipss, ipds_ah_nomem),
3719                     &ahstack->ah_dropper);
3720                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3721                 return (NULL);
3722         }
3723         mp->b_rptr -= ic->ic_skip_len;
3724 
3725         ASSERT(ixa->ixa_flags & IXAF_IPSEC_SECURE);
3726         ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
3727         ah_set_usetime(ixa->ixa_ipsec_ah_sa, B_FALSE);
3728 
3729         if (isv4) {
3730                 ipha_t *ipha;
3731                 ipha_t *nipha;
3732 
3733                 ipha = (ipha_t *)mp->b_rptr;
3734                 hdrs_length = ipha->ipha_version_and_hdr_length -
3735                     (uint8_t)((IP_VERSION << 4));
3736                 hdrs_length <<= 2;
3737                 align_len = P2ALIGN(icv_len + IPV4_PADDING_ALIGN - 1,
3738                     IPV4_PADDING_ALIGN);
3739                 /*
3740                  * phdr_mp must have the right amount of space for the
3741                  * combined IP and AH header. Copy the IP header and
3742                  * the ack_data onto AH. Note that the AH header was
3743                  * already formed before the ICV calculation and hence
3744                  * you don't have to copy it here.
3745                  */
3746                 bcopy(mp->b_rptr, phdr_mp->b_rptr, hdrs_length);
3747 
3748                 ptr = phdr_mp->b_rptr + hdrs_length + sizeof (ah_t);
3749                 bcopy(phdr_mp->b_wptr, ptr, icv_len);
3750 
3751                 /*
3752                  * Compute the new header checksum as we are assigning
3753                  * IPPROTO_AH and adjusting the length here.
3754                  */
3755                 nipha = (ipha_t *)phdr_mp->b_rptr;
3756 
3757                 nipha->ipha_protocol = IPPROTO_AH;
3758                 length = ntohs(nipha->ipha_length);
3759                 length += (sizeof (ah_t) + align_len);
3760                 nipha->ipha_length = htons((uint16_t)length);
3761                 nipha->ipha_hdr_checksum = 0;
3762                 nipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(nipha);
3763         } else {
3764                 ip6_t *ip6h;
3765                 ip6_t *nip6h;
3766                 uint_t ah_offset;
3767 
3768                 ip6h = (ip6_t *)mp->b_rptr;
3769                 nip6h = (ip6_t *)phdr_mp->b_rptr;
3770                 align_len = P2ALIGN(icv_len + IPV6_PADDING_ALIGN - 1,
3771                     IPV6_PADDING_ALIGN);
3772                 /*
3773                  * phdr_mp must have the right amount of space for the
3774                  * combined IP and AH header. Copy the IP header with
3775                  * options into the pseudo header. When we constructed
3776                  * a pseudo header, we did not copy some of the mutable
3777                  * fields. We do it now by calling ah_fix_phdr_v6()
3778                  * with the last argument B_TRUE. It returns the
3779                  * ah_offset into the pseudo header.
3780                  */
3781 
3782                 bcopy(ip6h, nip6h, IPV6_HDR_LEN);
3783                 ah_offset = ah_fix_phdr_v6(nip6h, ip6h, B_TRUE, B_TRUE);
3784                 ASSERT(ah_offset != 0);
3785                 /*
3786                  * phdr_mp can hold exactly the whole IP header with options
3787                  * plus the AH header also. Thus subtracting the AH header's
3788                  * size should give exactly how much of the original header
3789                  * should be skipped.
3790                  */
3791                 hdrs_length = (phdr_mp->b_wptr - phdr_mp->b_rptr) -
3792                     sizeof (ah_t) - icv_len;
3793                 bcopy(phdr_mp->b_wptr, ((uint8_t *)nip6h + ah_offset +
3794                     sizeof (ah_t)), icv_len);
3795                 length = ntohs(nip6h->ip6_plen);
3796                 length += (sizeof (ah_t) + align_len);
3797                 nip6h->ip6_plen = htons((uint16_t)length);
3798         }
3799 
3800         /* Skip the original IP header */
3801         mp->b_rptr += hdrs_length;
3802         if (mp->b_rptr == mp->b_wptr) {
3803                 phdr_mp->b_cont = mp->b_cont;
3804                 freeb(mp);
3805         }
3806 
3807         return (phdr_mp);
3808 }
3809 
3810 /* Refactor me */
3811 /*
3812  * Wrapper to allow IP to trigger an AH association failure message
3813  * during SA inbound selection.
3814  */
3815 void
3816 ipsecah_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
3817     uint32_t spi, void *addr, int af, ip_recv_attr_t *ira)
3818 {
3819         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
3820         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
3821         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3822 
3823         if (ahstack->ipsecah_log_unknown_spi) {
3824                 ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
3825                     addr, af, ahstack->ipsecah_netstack);
3826         }
3827 
3828         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
3829             DROPPER(ipss, ipds_ah_no_sa),
3830             &ahstack->ah_dropper);
3831 }
3832 
3833 /*
3834  * Initialize the AH input and output processing functions.
3835  */
3836 void
3837 ipsecah_init_funcs(ipsa_t *sa)
3838 {
3839         if (sa->ipsa_output_func == NULL)
3840                 sa->ipsa_output_func = ah_outbound;
3841         if (sa->ipsa_input_func == NULL)
3842                 sa->ipsa_input_func = ah_inbound;
3843 }