1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/stream.h>
  29 #include <sys/stropts.h>
  30 #include <sys/errno.h>
  31 #include <sys/strlog.h>
  32 #include <sys/tihdr.h>
  33 #include <sys/socket.h>
  34 #include <sys/ddi.h>
  35 #include <sys/sunddi.h>
  36 #include <sys/mkdev.h>
  37 #include <sys/kmem.h>
  38 #include <sys/zone.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/cmn_err.h>
  41 #include <sys/vtrace.h>
  42 #include <sys/debug.h>
  43 #include <sys/atomic.h>
  44 #include <sys/strsun.h>
  45 #include <sys/random.h>
  46 #include <netinet/in.h>
  47 #include <net/if.h>
  48 #include <netinet/ip6.h>
  49 #include <netinet/icmp6.h>
  50 #include <net/pfkeyv2.h>
  51 #include <net/pfpolicy.h>
  52 
  53 #include <inet/common.h>
  54 #include <inet/mi.h>
  55 #include <inet/ip.h>
  56 #include <inet/ip6.h>
  57 #include <inet/nd.h>
  58 #include <inet/ip_if.h>
  59 #include <inet/ip_ndp.h>
  60 #include <inet/ipsec_info.h>
  61 #include <inet/ipsec_impl.h>
  62 #include <inet/sadb.h>
  63 #include <inet/ipsecah.h>
  64 #include <inet/ipsec_impl.h>
  65 #include <inet/ipdrop.h>
  66 #include <sys/taskq.h>
  67 #include <sys/policy.h>
  68 #include <sys/strsun.h>
  69 
  70 #include <sys/crypto/common.h>
  71 #include <sys/crypto/api.h>
  72 #include <sys/kstat.h>
  73 #include <sys/strsubr.h>
  74 
  75 #include <sys/tsol/tnet.h>
  76 
  77 /*
  78  * Table of ND variables supported by ipsecah. These are loaded into
  79  * ipsecah_g_nd in ipsecah_init_nd.
  80  * All of these are alterable, within the min/max values given, at run time.
  81  */
  82 static  ipsecahparam_t  lcl_param_arr[] = {
  83         /* min  max                     value   name */
  84         { 0,    3,                      0,      "ipsecah_debug"},
  85         { 125,  32000, SADB_AGE_INTERVAL_DEFAULT,       "ipsecah_age_interval"},
  86         { 1,    10,                     1,      "ipsecah_reap_delay"},
  87         { 1,    SADB_MAX_REPLAY,        64,     "ipsecah_replay_size"},
  88         { 1,    300,                    15,     "ipsecah_acquire_timeout"},
  89         { 1,    1800,                   90,     "ipsecah_larval_timeout"},
  90         /* Default lifetime values for ACQUIRE messages. */
  91         { 0,    0xffffffffU,            0,      "ipsecah_default_soft_bytes"},
  92         { 0,    0xffffffffU,            0,      "ipsecah_default_hard_bytes"},
  93         { 0,    0xffffffffU,            24000,  "ipsecah_default_soft_addtime"},
  94         { 0,    0xffffffffU,            28800,  "ipsecah_default_hard_addtime"},
  95         { 0,    0xffffffffU,            0,      "ipsecah_default_soft_usetime"},
  96         { 0,    0xffffffffU,            0,      "ipsecah_default_hard_usetime"},
  97         { 0,    1,                      0,      "ipsecah_log_unknown_spi"},
  98 };
  99 
 100 #define ah0dbg(a)       printf a
 101 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
 102 #define ah1dbg(ahstack, a)      if (ahstack->ipsecah_debug != 0) printf a
 103 #define ah2dbg(ahstack, a)      if (ahstack->ipsecah_debug > 1) printf a
 104 #define ah3dbg(ahstack, a)      if (ahstack->ipsecah_debug > 2) printf a
 105 
 106 /*
 107  * XXX This is broken. Padding should be determined dynamically
 108  * depending on the ICV size and IP version number so that the
 109  * total AH header size is a multiple of 32 bits or 64 bits
 110  * for V4 and V6 respectively. For 96bit ICVs we have no problems.
 111  * Anything different from that, we need to fix our code.
 112  */
 113 #define IPV4_PADDING_ALIGN      0x04    /* Multiple of 32 bits */
 114 #define IPV6_PADDING_ALIGN      0x04    /* Multiple of 32 bits */
 115 
 116 /*
 117  * Helper macro. Avoids a call to msgdsize if there is only one
 118  * mblk in the chain.
 119  */
 120 #define AH_MSGSIZE(mp) ((mp)->b_cont != NULL ? msgdsize(mp) : MBLKL(mp))
 121 
 122 
 123 static mblk_t *ah_auth_out_done(mblk_t *, ip_xmit_attr_t *, ipsec_crypto_t *);
 124 static mblk_t *ah_auth_in_done(mblk_t *, ip_recv_attr_t *, ipsec_crypto_t *);
 125 static mblk_t *ah_process_ip_options_v4(mblk_t *, ipsa_t *, int *, uint_t,
 126     boolean_t, ipsecah_stack_t *);
 127 static mblk_t *ah_process_ip_options_v6(mblk_t *, ipsa_t *, int *, uint_t,
 128     boolean_t, ipsecah_stack_t *);
 129 static void ah_getspi(mblk_t *, keysock_in_t *, ipsecah_stack_t *);
 130 static void ah_inbound_restart(mblk_t *, ip_recv_attr_t *);
 131 
 132 static mblk_t *ah_outbound(mblk_t *, ip_xmit_attr_t *);
 133 static void ah_outbound_finish(mblk_t *, ip_xmit_attr_t *);
 134 
 135 static int ipsecah_open(queue_t *, dev_t *, int, int, cred_t *);
 136 static int ipsecah_close(queue_t *);
 137 static void ipsecah_wput(queue_t *, mblk_t *);
 138 static boolean_t ah_register_out(uint32_t, uint32_t, uint_t, ipsecah_stack_t *,
 139     cred_t *);
 140 static void     *ipsecah_stack_init(netstackid_t stackid, netstack_t *ns);
 141 static void     ipsecah_stack_fini(netstackid_t stackid, void *arg);
 142 
 143 /* Setable in /etc/system */
 144 uint32_t ah_hash_size = IPSEC_DEFAULT_HASH_SIZE;
 145 
 146 static taskq_t *ah_taskq;
 147 
 148 static struct module_info info = {
 149         5136, "ipsecah", 0, INFPSZ, 65536, 1024
 150 };
 151 
 152 static struct qinit rinit = {
 153         (pfi_t)putnext, NULL, ipsecah_open, ipsecah_close, NULL, &info,
 154         NULL
 155 };
 156 
 157 static struct qinit winit = {
 158         (pfi_t)ipsecah_wput, NULL, ipsecah_open, ipsecah_close, NULL, &info,
 159         NULL
 160 };
 161 
 162 struct streamtab ipsecahinfo = {
 163         &rinit, &winit, NULL, NULL
 164 };
 165 
 166 static int ah_kstat_update(kstat_t *, int);
 167 
 168 uint64_t ipsacq_maxpackets = IPSACQ_MAXPACKETS;
 169 
 170 static boolean_t
 171 ah_kstat_init(ipsecah_stack_t *ahstack, netstackid_t stackid)
 172 {
 173         ipsec_stack_t   *ipss = ahstack->ipsecah_netstack->netstack_ipsec;
 174 
 175         ahstack->ah_ksp = kstat_create_netstack("ipsecah", 0, "ah_stat", "net",
 176             KSTAT_TYPE_NAMED, sizeof (ah_kstats_t) / sizeof (kstat_named_t),
 177             KSTAT_FLAG_PERSISTENT, stackid);
 178 
 179         if (ahstack->ah_ksp == NULL || ahstack->ah_ksp->ks_data == NULL)
 180                 return (B_FALSE);
 181 
 182         ahstack->ah_kstats = ahstack->ah_ksp->ks_data;
 183 
 184         ahstack->ah_ksp->ks_update = ah_kstat_update;
 185         ahstack->ah_ksp->ks_private = (void *)(uintptr_t)stackid;
 186 
 187 #define K64 KSTAT_DATA_UINT64
 188 #define KI(x) kstat_named_init(&(ahstack->ah_kstats->ah_stat_##x), #x, K64)
 189 
 190         KI(num_aalgs);
 191         KI(good_auth);
 192         KI(bad_auth);
 193         KI(replay_failures);
 194         KI(replay_early_failures);
 195         KI(keysock_in);
 196         KI(out_requests);
 197         KI(acquire_requests);
 198         KI(bytes_expired);
 199         KI(out_discards);
 200         KI(crypto_sync);
 201         KI(crypto_async);
 202         KI(crypto_failures);
 203 
 204 #undef KI
 205 #undef K64
 206 
 207         kstat_install(ahstack->ah_ksp);
 208         IP_ACQUIRE_STAT(ipss, maxpackets, ipsacq_maxpackets);
 209         return (B_TRUE);
 210 }
 211 
 212 static int
 213 ah_kstat_update(kstat_t *kp, int rw)
 214 {
 215         ah_kstats_t     *ekp;
 216         netstackid_t    stackid = (netstackid_t)(uintptr_t)kp->ks_private;
 217         netstack_t      *ns;
 218         ipsec_stack_t   *ipss;
 219 
 220         if ((kp == NULL) || (kp->ks_data == NULL))
 221                 return (EIO);
 222 
 223         if (rw == KSTAT_WRITE)
 224                 return (EACCES);
 225 
 226         ns = netstack_find_by_stackid(stackid);
 227         if (ns == NULL)
 228                 return (-1);
 229         ipss = ns->netstack_ipsec;
 230         if (ipss == NULL) {
 231                 netstack_rele(ns);
 232                 return (-1);
 233         }
 234         ekp = (ah_kstats_t *)kp->ks_data;
 235 
 236         rw_enter(&ipss->ipsec_alg_lock, RW_READER);
 237         ekp->ah_stat_num_aalgs.value.ui64 = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
 238         rw_exit(&ipss->ipsec_alg_lock);
 239 
 240         netstack_rele(ns);
 241         return (0);
 242 }
 243 
 244 /*
 245  * Don't have to lock ipsec_age_interval, as only one thread will access it at
 246  * a time, because I control the one function that does a qtimeout() on
 247  * ah_pfkey_q.
 248  */
 249 static void
 250 ah_ager(void *arg)
 251 {
 252         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg;
 253         netstack_t      *ns = ahstack->ipsecah_netstack;
 254         hrtime_t begin = gethrtime();
 255 
 256         sadb_ager(&ahstack->ah_sadb.s_v4, ahstack->ah_pfkey_q,
 257             ahstack->ipsecah_reap_delay, ns);
 258         sadb_ager(&ahstack->ah_sadb.s_v6, ahstack->ah_pfkey_q,
 259             ahstack->ipsecah_reap_delay, ns);
 260 
 261         ahstack->ah_event = sadb_retimeout(begin, ahstack->ah_pfkey_q,
 262             ah_ager, ahstack,
 263             &ahstack->ipsecah_age_interval, ahstack->ipsecah_age_int_max,
 264             info.mi_idnum);
 265 }
 266 
 267 /*
 268  * Get an AH NDD parameter.
 269  */
 270 /* ARGSUSED */
 271 static int
 272 ipsecah_param_get(
 273     queue_t     *q,
 274     mblk_t      *mp,
 275     caddr_t     cp,
 276     cred_t *cr)
 277 {
 278         ipsecahparam_t  *ipsecahpa = (ipsecahparam_t *)cp;
 279         uint_t value;
 280         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr;
 281 
 282         mutex_enter(&ahstack->ipsecah_param_lock);
 283         value = ipsecahpa->ipsecah_param_value;
 284         mutex_exit(&ahstack->ipsecah_param_lock);
 285 
 286         (void) mi_mpprintf(mp, "%u", value);
 287         return (0);
 288 }
 289 
 290 /*
 291  * This routine sets an NDD variable in a ipsecahparam_t structure.
 292  */
 293 /* ARGSUSED */
 294 static int
 295 ipsecah_param_set(
 296     queue_t     *q,
 297     mblk_t      *mp,
 298     char        *value,
 299     caddr_t     cp,
 300     cred_t *cr)
 301 {
 302         ulong_t new_value;
 303         ipsecahparam_t  *ipsecahpa = (ipsecahparam_t *)cp;
 304         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr;
 305 
 306         /*
 307          * Fail the request if the new value does not lie within the
 308          * required bounds.
 309          */
 310         if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
 311             new_value < ipsecahpa->ipsecah_param_min ||
 312             new_value > ipsecahpa->ipsecah_param_max) {
 313                 return (EINVAL);
 314         }
 315 
 316         /* Set the new value */
 317         mutex_enter(&ahstack->ipsecah_param_lock);
 318         ipsecahpa->ipsecah_param_value = new_value;
 319         mutex_exit(&ahstack->ipsecah_param_lock);
 320         return (0);
 321 }
 322 
 323 /*
 324  * Using lifetime NDD variables, fill in an extended combination's
 325  * lifetime information.
 326  */
 327 void
 328 ipsecah_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
 329 {
 330         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
 331 
 332         ecomb->sadb_x_ecomb_soft_bytes = ahstack->ipsecah_default_soft_bytes;
 333         ecomb->sadb_x_ecomb_hard_bytes = ahstack->ipsecah_default_hard_bytes;
 334         ecomb->sadb_x_ecomb_soft_addtime =
 335             ahstack->ipsecah_default_soft_addtime;
 336         ecomb->sadb_x_ecomb_hard_addtime =
 337             ahstack->ipsecah_default_hard_addtime;
 338         ecomb->sadb_x_ecomb_soft_usetime =
 339             ahstack->ipsecah_default_soft_usetime;
 340         ecomb->sadb_x_ecomb_hard_usetime =
 341             ahstack->ipsecah_default_hard_usetime;
 342 }
 343 
 344 /*
 345  * Initialize things for AH at module load time.
 346  */
 347 boolean_t
 348 ipsecah_ddi_init(void)
 349 {
 350         ah_taskq = taskq_create("ah_taskq", 1, minclsyspri,
 351             IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
 352 
 353         /*
 354          * We want to be informed each time a stack is created or
 355          * destroyed in the kernel, so we can maintain the
 356          * set of ipsecah_stack_t's.
 357          */
 358         netstack_register(NS_IPSECAH, ipsecah_stack_init, NULL,
 359             ipsecah_stack_fini);
 360 
 361         return (B_TRUE);
 362 }
 363 
 364 /*
 365  * Walk through the param array specified registering each element with the
 366  * named dispatch handler.
 367  */
 368 static boolean_t
 369 ipsecah_param_register(IDP *ndp, ipsecahparam_t *ahp, int cnt)
 370 {
 371         for (; cnt-- > 0; ahp++) {
 372                 if (ahp->ipsecah_param_name != NULL &&
 373                     ahp->ipsecah_param_name[0]) {
 374                         if (!nd_load(ndp,
 375                             ahp->ipsecah_param_name,
 376                             ipsecah_param_get, ipsecah_param_set,
 377                             (caddr_t)ahp)) {
 378                                 nd_free(ndp);
 379                                 return (B_FALSE);
 380                         }
 381                 }
 382         }
 383         return (B_TRUE);
 384 }
 385 
 386 /*
 387  * Initialize things for AH for each stack instance
 388  */
 389 static void *
 390 ipsecah_stack_init(netstackid_t stackid, netstack_t *ns)
 391 {
 392         ipsecah_stack_t *ahstack;
 393         ipsecahparam_t  *ahp;
 394 
 395         ahstack = (ipsecah_stack_t *)kmem_zalloc(sizeof (*ahstack), KM_SLEEP);
 396         ahstack->ipsecah_netstack = ns;
 397 
 398         ahp = (ipsecahparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
 399         ahstack->ipsecah_params = ahp;
 400         bcopy(lcl_param_arr, ahp, sizeof (lcl_param_arr));
 401 
 402         (void) ipsecah_param_register(&ahstack->ipsecah_g_nd, ahp,
 403             A_CNT(lcl_param_arr));
 404 
 405         (void) ah_kstat_init(ahstack, stackid);
 406 
 407         ahstack->ah_sadb.s_acquire_timeout = &ahstack->ipsecah_acquire_timeout;
 408         sadbp_init("AH", &ahstack->ah_sadb, SADB_SATYPE_AH, ah_hash_size,
 409             ahstack->ipsecah_netstack);
 410 
 411         mutex_init(&ahstack->ipsecah_param_lock, NULL, MUTEX_DEFAULT, 0);
 412 
 413         ip_drop_register(&ahstack->ah_dropper, "IPsec AH");
 414         return (ahstack);
 415 }
 416 
 417 /*
 418  * Destroy things for AH at module unload time.
 419  */
 420 void
 421 ipsecah_ddi_destroy(void)
 422 {
 423         netstack_unregister(NS_IPSECAH);
 424         taskq_destroy(ah_taskq);
 425 }
 426 
 427 /*
 428  * Destroy things for AH for one stack... Never called?
 429  */
 430 static void
 431 ipsecah_stack_fini(netstackid_t stackid, void *arg)
 432 {
 433         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg;
 434 
 435         if (ahstack->ah_pfkey_q != NULL) {
 436                 (void) quntimeout(ahstack->ah_pfkey_q, ahstack->ah_event);
 437         }
 438         ahstack->ah_sadb.s_acquire_timeout = NULL;
 439         sadbp_destroy(&ahstack->ah_sadb, ahstack->ipsecah_netstack);
 440         ip_drop_unregister(&ahstack->ah_dropper);
 441         mutex_destroy(&ahstack->ipsecah_param_lock);
 442         nd_free(&ahstack->ipsecah_g_nd);
 443 
 444         kmem_free(ahstack->ipsecah_params, sizeof (lcl_param_arr));
 445         ahstack->ipsecah_params = NULL;
 446         kstat_delete_netstack(ahstack->ah_ksp, stackid);
 447         ahstack->ah_ksp = NULL;
 448         ahstack->ah_kstats = NULL;
 449 
 450         kmem_free(ahstack, sizeof (*ahstack));
 451 }
 452 
 453 /*
 454  * AH module open routine, which is here for keysock plumbing.
 455  * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old
 456  * Days of export control, and fears that ESP would not be allowed
 457  * to be shipped at all by default.  Eventually, keysock should
 458  * either access AH and ESP via modstubs or krtld dependencies, or
 459  * perhaps be folded in with AH and ESP into a single IPsec/netsec
 460  * module ("netsec" if PF_KEY provides more than AH/ESP keying tables).
 461  */
 462 /* ARGSUSED */
 463 static int
 464 ipsecah_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 465 {
 466         netstack_t      *ns;
 467         ipsecah_stack_t *ahstack;
 468 
 469         if (secpolicy_ip_config(credp, B_FALSE) != 0)
 470                 return (EPERM);
 471 
 472         if (q->q_ptr != NULL)
 473                 return (0);  /* Re-open of an already open instance. */
 474 
 475         if (sflag != MODOPEN)
 476                 return (EINVAL);
 477 
 478         ns = netstack_find_by_cred(credp);
 479         ASSERT(ns != NULL);
 480         ahstack = ns->netstack_ipsecah;
 481         ASSERT(ahstack != NULL);
 482 
 483         q->q_ptr = ahstack;
 484         WR(q)->q_ptr = q->q_ptr;
 485 
 486         qprocson(q);
 487         return (0);
 488 }
 489 
 490 /*
 491  * AH module close routine.
 492  */
 493 static int
 494 ipsecah_close(queue_t *q)
 495 {
 496         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr;
 497 
 498         /*
 499          * Clean up q_ptr, if needed.
 500          */
 501         qprocsoff(q);
 502 
 503         /* Keysock queue check is safe, because of OCEXCL perimeter. */
 504 
 505         if (q == ahstack->ah_pfkey_q) {
 506                 ah1dbg(ahstack,
 507                     ("ipsecah_close:  Ummm... keysock is closing AH.\n"));
 508                 ahstack->ah_pfkey_q = NULL;
 509                 /* Detach qtimeouts. */
 510                 (void) quntimeout(q, ahstack->ah_event);
 511         }
 512 
 513         netstack_rele(ahstack->ipsecah_netstack);
 514         return (0);
 515 }
 516 
 517 /*
 518  * Construct an SADB_REGISTER message with the current algorithms.
 519  */
 520 static boolean_t
 521 ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
 522     ipsecah_stack_t *ahstack, cred_t *cr)
 523 {
 524         mblk_t *mp;
 525         boolean_t rc = B_TRUE;
 526         sadb_msg_t *samsg;
 527         sadb_supported_t *sasupp;
 528         sadb_alg_t *saalg;
 529         uint_t allocsize = sizeof (*samsg);
 530         uint_t i, numalgs_snap;
 531         ipsec_alginfo_t **authalgs;
 532         uint_t num_aalgs;
 533         ipsec_stack_t   *ipss = ahstack->ipsecah_netstack->netstack_ipsec;
 534         sadb_sens_t *sens;
 535         size_t sens_len = 0;
 536         sadb_ext_t *nextext;
 537         ts_label_t *sens_tsl = NULL;
 538 
 539         /* Allocate the KEYSOCK_OUT. */
 540         mp = sadb_keysock_out(serial);
 541         if (mp == NULL) {
 542                 ah0dbg(("ah_register_out: couldn't allocate mblk.\n"));
 543                 return (B_FALSE);
 544         }
 545 
 546         if (is_system_labeled() && (cr != NULL)) {
 547                 sens_tsl = crgetlabel(cr);
 548                 if (sens_tsl != NULL) {
 549                         sens_len = sadb_sens_len_from_label(sens_tsl);
 550                         allocsize += sens_len;
 551                 }
 552         }
 553 
 554         /*
 555          * Allocate the PF_KEY message that follows KEYSOCK_OUT.
 556          * The alg reader lock needs to be held while allocating
 557          * the variable part (i.e. the algorithms) of the message.
 558          */
 559 
 560         rw_enter(&ipss->ipsec_alg_lock, RW_READER);
 561 
 562         /*
 563          * Return only valid algorithms, so the number of algorithms
 564          * to send up may be less than the number of algorithm entries
 565          * in the table.
 566          */
 567         authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
 568         for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
 569                 if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
 570                         num_aalgs++;
 571 
 572         /*
 573          * Fill SADB_REGISTER message's algorithm descriptors.  Hold
 574          * down the lock while filling it.
 575          */
 576         if (num_aalgs != 0) {
 577                 allocsize += (num_aalgs * sizeof (*saalg));
 578                 allocsize += sizeof (*sasupp);
 579         }
 580         mp->b_cont = allocb(allocsize, BPRI_HI);
 581         if (mp->b_cont == NULL) {
 582                 rw_exit(&ipss->ipsec_alg_lock);
 583                 freemsg(mp);
 584                 return (B_FALSE);
 585         }
 586 
 587         mp->b_cont->b_wptr += allocsize;
 588         nextext = (sadb_ext_t *)(mp->b_cont->b_rptr + sizeof (*samsg));
 589 
 590         if (num_aalgs != 0) {
 591 
 592                 saalg = (sadb_alg_t *)(((uint8_t *)nextext) + sizeof (*sasupp));
 593                 ASSERT(((ulong_t)saalg & 0x7) == 0);
 594 
 595                 numalgs_snap = 0;
 596                 for (i = 0;
 597                     ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
 598                     i++) {
 599                         if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
 600                                 continue;
 601 
 602                         saalg->sadb_alg_id = authalgs[i]->alg_id;
 603                         saalg->sadb_alg_ivlen = 0;
 604                         saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits;
 605                         saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits;
 606                         saalg->sadb_x_alg_increment =
 607                             authalgs[i]->alg_increment;
 608                         /* For now, salt is meaningless in AH. */
 609                         ASSERT(authalgs[i]->alg_saltlen == 0);
 610                         saalg->sadb_x_alg_saltbits =
 611                             SADB_8TO1(authalgs[i]->alg_saltlen);
 612                         numalgs_snap++;
 613                         saalg++;
 614                 }
 615                 ASSERT(numalgs_snap == num_aalgs);
 616 #ifdef DEBUG
 617                 /*
 618                  * Reality check to make sure I snagged all of the
 619                  * algorithms.
 620                  */
 621                 for (; i < IPSEC_MAX_ALGS; i++)
 622                         if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
 623                                 cmn_err(CE_PANIC,
 624                                     "ah_register_out()!  Missed #%d.\n", i);
 625 #endif /* DEBUG */
 626                 nextext = (sadb_ext_t *)saalg;
 627         }
 628 
 629         rw_exit(&ipss->ipsec_alg_lock);
 630 
 631         if (sens_tsl != NULL) {
 632                 sens = (sadb_sens_t *)nextext;
 633                 sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
 634                     sens_tsl, sens_len);
 635 
 636                 nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len);
 637         }
 638 
 639         /* Now fill the restof the SADB_REGISTER message. */
 640 
 641         samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
 642         samsg->sadb_msg_version = PF_KEY_V2;
 643         samsg->sadb_msg_type = SADB_REGISTER;
 644         samsg->sadb_msg_errno = 0;
 645         samsg->sadb_msg_satype = SADB_SATYPE_AH;
 646         samsg->sadb_msg_len = SADB_8TO64(allocsize);
 647         samsg->sadb_msg_reserved = 0;
 648         /*
 649          * Assume caller has sufficient sequence/pid number info.  If it's one
 650          * from me over a new alg., I could give two hoots about sequence.
 651          */
 652         samsg->sadb_msg_seq = sequence;
 653         samsg->sadb_msg_pid = pid;
 654 
 655         if (num_aalgs != 0) {
 656                 sasupp = (sadb_supported_t *)(samsg + 1);
 657                 sasupp->sadb_supported_len = SADB_8TO64(
 658                     sizeof (*sasupp) + sizeof (*saalg) * num_aalgs);
 659                 sasupp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
 660                 sasupp->sadb_supported_reserved = 0;
 661         }
 662 
 663         if (ahstack->ah_pfkey_q != NULL)
 664                 putnext(ahstack->ah_pfkey_q, mp);
 665         else {
 666                 rc = B_FALSE;
 667                 freemsg(mp);
 668         }
 669 
 670         return (rc);
 671 }
 672 
 673 /*
 674  * Invoked when the algorithm table changes. Causes SADB_REGISTER
 675  * messages continaining the current list of algorithms to be
 676  * sent up to the AH listeners.
 677  */
 678 void
 679 ipsecah_algs_changed(netstack_t *ns)
 680 {
 681         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
 682 
 683         /*
 684          * Time to send a PF_KEY SADB_REGISTER message to AH listeners
 685          * everywhere.  (The function itself checks for NULL ah_pfkey_q.)
 686          */
 687         (void) ah_register_out(0, 0, 0, ahstack, NULL);
 688 }
 689 
 690 /*
 691  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
 692  * and send it into AH and IP again.
 693  */
 694 static void
 695 inbound_task(void *arg)
 696 {
 697         mblk_t          *mp = (mblk_t *)arg;
 698         mblk_t          *async_mp;
 699         ip_recv_attr_t  iras;
 700 
 701         async_mp = mp;
 702         mp = async_mp->b_cont;
 703         async_mp->b_cont = NULL;
 704         if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
 705                 /* The ill or ip_stack_t disappeared on us */
 706                 ip_drop_input("ip_recv_attr_from_mblk", mp, NULL);
 707                 freemsg(mp);
 708                 goto done;
 709         }
 710 
 711         ah_inbound_restart(mp, &iras);
 712 done:
 713         ira_cleanup(&iras, B_TRUE);
 714 }
 715 
 716 /*
 717  * Restart ESP after the SA has been added.
 718  */
 719 static void
 720 ah_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira)
 721 {
 722         ah_t            *ah;
 723         netstack_t      *ns;
 724         ipsecah_stack_t *ahstack;
 725 
 726         ns = ira->ira_ill->ill_ipst->ips_netstack;
 727         ahstack = ns->netstack_ipsecah;
 728 
 729         ASSERT(ahstack != NULL);
 730         mp = ipsec_inbound_ah_sa(mp, ira, &ah);
 731         if (mp == NULL)
 732                 return;
 733 
 734         ASSERT(ah != NULL);
 735         ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
 736         ASSERT(ira->ira_ipsec_ah_sa != NULL);
 737 
 738         mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
 739         if (mp == NULL) {
 740                 /*
 741                  * Either it failed or is pending. In the former case
 742                  * ipIfStatsInDiscards was increased.
 743                  */
 744                 return;
 745         }
 746         ip_input_post_ipsec(mp, ira);
 747 }
 748 
 749 /*
 750  * Now that weak-key passed, actually ADD the security association, and
 751  * send back a reply ADD message.
 752  */
 753 static int
 754 ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
 755     int *diagnostic, ipsecah_stack_t *ahstack)
 756 {
 757         isaf_t *primary = NULL, *secondary;
 758         boolean_t clone = B_FALSE, is_inbound = B_FALSE;
 759         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
 760         ipsa_t *larval;
 761         ipsacq_t *acqrec;
 762         iacqf_t *acq_bucket;
 763         mblk_t *acq_msgs = NULL;
 764         mblk_t *lpkt;
 765         int rc;
 766         ipsa_query_t sq;
 767         int error;
 768         netstack_t      *ns = ahstack->ipsecah_netstack;
 769         ipsec_stack_t   *ipss = ns->netstack_ipsec;
 770 
 771         /*
 772          * Locate the appropriate table(s).
 773          */
 774 
 775         sq.spp = &ahstack->ah_sadb;
 776         error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST,
 777             IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
 778             &sq, diagnostic);
 779         if (error)
 780                 return (error);
 781 
 782         /*
 783          * Use the direction flags provided by the KMD to determine
 784          * if the inbound or outbound table should be the primary
 785          * for this SA. If these flags were absent then make this
 786          * decision based on the addresses.
 787          */
 788         if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
 789                 primary = sq.inbound;
 790                 secondary = sq.outbound;
 791                 is_inbound = B_TRUE;
 792                 if (assoc->sadb_sa_flags & IPSA_F_OUTBOUND)
 793                         clone = B_TRUE;
 794         } else {
 795                 if (assoc->sadb_sa_flags & IPSA_F_OUTBOUND) {
 796                         primary = sq.outbound;
 797                         secondary = sq.inbound;
 798                 }
 799         }
 800         if (primary == NULL) {
 801                 /*
 802                  * The KMD did not set a direction flag, determine which
 803                  * table to insert the SA into based on addresses.
 804                  */
 805                 switch (ksi->ks_in_dsttype) {
 806                 case KS_IN_ADDR_MBCAST:
 807                         clone = B_TRUE; /* All mcast SAs can be bidirectional */
 808                         assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
 809                         /* FALLTHRU */
 810                 /*
 811                  * If the source address is either one of mine, or unspecified
 812                  * (which is best summed up by saying "not 'not mine'"),
 813                  * then the association is potentially bi-directional,
 814                  * in that it can be used for inbound traffic and outbound
 815                  * traffic.  The best example of such and SA is a multicast
 816                  * SA (which allows me to receive the outbound traffic).
 817                  */
 818                 case KS_IN_ADDR_ME:
 819                         assoc->sadb_sa_flags |= IPSA_F_INBOUND;
 820                         primary = sq.inbound;
 821                         secondary = sq.outbound;
 822                         if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
 823                                 clone = B_TRUE;
 824                         is_inbound = B_TRUE;
 825                         break;
 826 
 827                 /*
 828                  * If the source address literally not mine (either
 829                  * unspecified or not mine), then this SA may have an
 830                  * address that WILL be mine after some configuration.
 831                  * We pay the price for this by making it a bi-directional
 832                  * SA.
 833                  */
 834                 case KS_IN_ADDR_NOTME:
 835                         assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
 836                         primary = sq.outbound;
 837                         secondary = sq.inbound;
 838                         if (ksi->ks_in_srctype != KS_IN_ADDR_ME) {
 839                                 assoc->sadb_sa_flags |= IPSA_F_INBOUND;
 840                                 clone = B_TRUE;
 841                         }
 842                         break;
 843                 default:
 844                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
 845                         return (EINVAL);
 846                 }
 847         }
 848 
 849         /*
 850          * Find a ACQUIRE list entry if possible.  If we've added an SA that
 851          * suits the needs of an ACQUIRE list entry, we can eliminate the
 852          * ACQUIRE list entry and transmit the enqueued packets.  Use the
 853          * high-bit of the sequence number to queue it.  Key off destination
 854          * addr, and change acqrec's state.
 855          */
 856 
 857         if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
 858                 acq_bucket = &(sq.sp->sdb_acq[sq.outhash]);
 859                 mutex_enter(&acq_bucket->iacqf_lock);
 860                 for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
 861                     acqrec = acqrec->ipsacq_next) {
 862                         mutex_enter(&acqrec->ipsacq_lock);
 863                         /*
 864                          * Q:  I only check sequence.  Should I check dst?
 865                          * A: Yes, check dest because those are the packets
 866                          *    that are queued up.
 867                          */
 868                         if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
 869                             IPSA_ARE_ADDR_EQUAL(sq.dstaddr,
 870                             acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
 871                                 break;
 872                         mutex_exit(&acqrec->ipsacq_lock);
 873                 }
 874                 if (acqrec != NULL) {
 875                         /*
 876                          * AHA!  I found an ACQUIRE record for this SA.
 877                          * Grab the msg list, and free the acquire record.
 878                          * I already am holding the lock for this record,
 879                          * so all I have to do is free it.
 880                          */
 881                         acq_msgs = acqrec->ipsacq_mp;
 882                         acqrec->ipsacq_mp = NULL;
 883                         mutex_exit(&acqrec->ipsacq_lock);
 884                         sadb_destroy_acquire(acqrec, ns);
 885                 }
 886                 mutex_exit(&acq_bucket->iacqf_lock);
 887         }
 888 
 889         /*
 890          * Find PF_KEY message, and see if I'm an update.  If so, find entry
 891          * in larval list (if there).
 892          */
 893 
 894         larval = NULL;
 895 
 896         if (samsg->sadb_msg_type == SADB_UPDATE) {
 897                 mutex_enter(&sq.inbound->isaf_lock);
 898                 larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi,
 899                     ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family);
 900                 mutex_exit(&sq.inbound->isaf_lock);
 901 
 902                 if ((larval == NULL) ||
 903                     (larval->ipsa_state != IPSA_STATE_LARVAL)) {
 904                         *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
 905                         if (larval != NULL) {
 906                                 IPSA_REFRELE(larval);
 907                         }
 908                         ah0dbg(("Larval update, but larval disappeared.\n"));
 909                         return (ESRCH);
 910                 } /* Else sadb_common_add unlinks it for me! */
 911         }
 912 
 913         if (larval != NULL) {
 914                 /*
 915                  * Hold again, because sadb_common_add() consumes a reference,
 916                  * and we don't want to clear_lpkt() without a reference.
 917                  */
 918                 IPSA_REFHOLD(larval);
 919         }
 920 
 921         rc = sadb_common_add(ahstack->ah_pfkey_q, mp,
 922             samsg, ksi, primary, secondary, larval, clone, is_inbound,
 923             diagnostic, ns, &ahstack->ah_sadb);
 924 
 925         if (larval != NULL) {
 926                 if (rc == 0) {
 927                         lpkt = sadb_clear_lpkt(larval);
 928                         if (lpkt != NULL) {
 929                                 rc = !taskq_dispatch(ah_taskq, inbound_task,
 930                                     lpkt, TQ_NOSLEEP);
 931                         }
 932                 }
 933                 IPSA_REFRELE(larval);
 934         }
 935 
 936         /*
 937          * How much more stack will I create with all of these
 938          * ah_outbound_*() calls?
 939          */
 940 
 941         /* Handle the packets queued waiting for the SA */
 942         while (acq_msgs != NULL) {
 943                 mblk_t          *asyncmp;
 944                 mblk_t          *data_mp;
 945                 ip_xmit_attr_t  ixas;
 946                 ill_t           *ill;
 947 
 948                 asyncmp = acq_msgs;
 949                 acq_msgs = acq_msgs->b_next;
 950                 asyncmp->b_next = NULL;
 951 
 952                 /*
 953                  * Extract the ip_xmit_attr_t from the first mblk.
 954                  * Verifies that the netstack and ill is still around; could
 955                  * have vanished while iked was doing its work.
 956                  * On succesful return we have a nce_t and the ill/ipst can't
 957                  * disappear until we do the nce_refrele in ixa_cleanup.
 958                  */
 959                 data_mp = asyncmp->b_cont;
 960                 asyncmp->b_cont = NULL;
 961                 if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) {
 962                         AH_BUMP_STAT(ahstack, out_discards);
 963                         ip_drop_packet(data_mp, B_FALSE, NULL,
 964                             DROPPER(ipss, ipds_sadb_acquire_timeout),
 965                             &ahstack->ah_dropper);
 966                 } else if (rc != 0) {
 967                         ill = ixas.ixa_nce->nce_ill;
 968                         AH_BUMP_STAT(ahstack, out_discards);
 969                         ip_drop_packet(data_mp, B_FALSE, ill,
 970                             DROPPER(ipss, ipds_sadb_acquire_timeout),
 971                             &ahstack->ah_dropper);
 972                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
 973                 } else {
 974                         ah_outbound_finish(data_mp, &ixas);
 975                 }
 976                 ixa_cleanup(&ixas);
 977         }
 978 
 979         return (rc);
 980 }
 981 
 982 
 983 /*
 984  * Process one of the queued messages (from ipsacq_mp) once the SA
 985  * has been added.
 986  */
 987 static void
 988 ah_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa)
 989 {
 990         netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
 991         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
 992         ipsec_stack_t   *ipss = ns->netstack_ipsec;
 993         ill_t           *ill = ixa->ixa_nce->nce_ill;
 994 
 995         if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) {
 996                 AH_BUMP_STAT(ahstack, out_discards);
 997                 ip_drop_packet(data_mp, B_FALSE, ill,
 998                     DROPPER(ipss, ipds_sadb_acquire_timeout),
 999                     &ahstack->ah_dropper);
1000                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1001                 return;
1002         }
1003 
1004         data_mp = ah_outbound(data_mp, ixa);
1005         if (data_mp == NULL)
1006                 return;
1007 
1008         (void) ip_output_post_ipsec(data_mp, ixa);
1009 }
1010 
1011 /*
1012  * Add new AH security association.  This may become a generic AH/ESP
1013  * routine eventually.
1014  */
1015 static int
1016 ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns)
1017 {
1018         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
1019         sadb_address_t *srcext =
1020             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
1021         sadb_address_t *dstext =
1022             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1023         sadb_address_t *isrcext =
1024             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
1025         sadb_address_t *idstext =
1026             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
1027         sadb_key_t *key = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
1028         struct sockaddr_in *src, *dst;
1029         /* We don't need sockaddr_in6 for now. */
1030         sadb_lifetime_t *soft =
1031             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
1032         sadb_lifetime_t *hard =
1033             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
1034         sadb_lifetime_t *idle =
1035             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
1036         ipsec_alginfo_t *aalg;
1037         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
1038         ipsec_stack_t   *ipss = ns->netstack_ipsec;
1039 
1040         /* I need certain extensions present for an ADD message. */
1041         if (srcext == NULL) {
1042                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
1043                 return (EINVAL);
1044         }
1045         if (dstext == NULL) {
1046                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
1047                 return (EINVAL);
1048         }
1049         if (isrcext == NULL && idstext != NULL) {
1050                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
1051                 return (EINVAL);
1052         }
1053         if (isrcext != NULL && idstext == NULL) {
1054                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
1055                 return (EINVAL);
1056         }
1057         if (assoc == NULL) {
1058                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
1059                 return (EINVAL);
1060         }
1061         if (key == NULL) {
1062                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_AKEY;
1063                 return (EINVAL);
1064         }
1065 
1066         src = (struct sockaddr_in *)(srcext + 1);
1067         dst = (struct sockaddr_in *)(dstext + 1);
1068 
1069         /* Sundry ADD-specific reality checks. */
1070         /* XXX STATS : Logging/stats here? */
1071 
1072         if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) &&
1073             (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
1074                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
1075                 return (EINVAL);
1076         }
1077         if (assoc->sadb_sa_encrypt != SADB_EALG_NONE) {
1078                 *diagnostic = SADB_X_DIAGNOSTIC_ENCR_NOTSUPP;
1079                 return (EINVAL);
1080         }
1081         if (assoc->sadb_sa_flags & ~ahstack->ah_sadb.s_addflags) {
1082                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
1083                 return (EINVAL);
1084         }
1085         if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0)
1086                 return (EINVAL);
1087 
1088         ASSERT(src->sin_family == dst->sin_family);
1089 
1090         /* Stuff I don't support, for now.  XXX Diagnostic? */
1091         if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL)
1092                 return (EOPNOTSUPP);
1093 
1094         if (ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL) {
1095                 if (!is_system_labeled())
1096                         return (EOPNOTSUPP);
1097         }
1098 
1099         if (ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS] != NULL) {
1100                 if (!is_system_labeled())
1101                         return (EOPNOTSUPP);
1102         }
1103         /*
1104          * XXX Policy : I'm not checking identities at this time, but
1105          * if I did, I'd do them here, before I sent the weak key
1106          * check up to the algorithm.
1107          */
1108 
1109         /* verify that there is a mapping for the specified algorithm */
1110         rw_enter(&ipss->ipsec_alg_lock, RW_READER);
1111         aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][assoc->sadb_sa_auth];
1112         if (aalg == NULL || !ALG_VALID(aalg)) {
1113                 rw_exit(&ipss->ipsec_alg_lock);
1114                 ah1dbg(ahstack, ("Couldn't find auth alg #%d.\n",
1115                     assoc->sadb_sa_auth));
1116                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
1117                 return (EINVAL);
1118         }
1119         ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
1120 
1121         /* sanity check key sizes */
1122         if (!ipsec_valid_key_size(key->sadb_key_bits, aalg)) {
1123                 rw_exit(&ipss->ipsec_alg_lock);
1124                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
1125                 return (EINVAL);
1126         }
1127 
1128         /* check key and fix parity if needed */
1129         if (ipsec_check_key(aalg->alg_mech_type, key, B_TRUE,
1130             diagnostic) != 0) {
1131                 rw_exit(&ipss->ipsec_alg_lock);
1132                 return (EINVAL);
1133         }
1134 
1135         rw_exit(&ipss->ipsec_alg_lock);
1136 
1137         return (ah_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
1138             diagnostic, ahstack));
1139 }
1140 
1141 /* Refactor me */
1142 /*
1143  * Update a security association.  Updates come in two varieties.  The first
1144  * is an update of lifetimes on a non-larval SA.  The second is an update of
1145  * a larval SA, which ends up looking a lot more like an add.
1146  */
1147 static int
1148 ah_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
1149     ipsecah_stack_t *ahstack, uint8_t sadb_msg_type)
1150 {
1151         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
1152         sadb_address_t *dstext =
1153             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1154         mblk_t  *buf_pkt;
1155         int rcode;
1156 
1157         if (dstext == NULL) {
1158                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
1159                 return (EINVAL);
1160         }
1161 
1162         rcode = sadb_update_sa(mp, ksi, &buf_pkt, &ahstack->ah_sadb,
1163             diagnostic, ahstack->ah_pfkey_q, ah_add_sa,
1164             ahstack->ipsecah_netstack, sadb_msg_type);
1165 
1166         if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) ||
1167             (rcode != 0)) {
1168                 return (rcode);
1169         }
1170 
1171         HANDLE_BUF_PKT(ah_taskq, ahstack->ipsecah_netstack->netstack_ipsec,
1172             ahstack->ah_dropper, buf_pkt);
1173 
1174         return (rcode);
1175 }
1176 
1177 /* Refactor me */
1178 /*
1179  * Delete a security association.  This is REALLY likely to be code common to
1180  * both AH and ESP.  Find the association, then unlink it.
1181  */
1182 static int
1183 ah_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
1184     ipsecah_stack_t *ahstack, uint8_t sadb_msg_type)
1185 {
1186         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
1187         sadb_address_t *dstext =
1188             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1189         sadb_address_t *srcext =
1190             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
1191         struct sockaddr_in *sin;
1192 
1193         if (assoc == NULL) {
1194                 if (dstext != NULL)
1195                         sin = (struct sockaddr_in *)(dstext + 1);
1196                 else if (srcext != NULL)
1197                         sin = (struct sockaddr_in *)(srcext + 1);
1198                 else {
1199                         *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
1200                         return (EINVAL);
1201                 }
1202                 return (sadb_purge_sa(mp, ksi,
1203                     (sin->sin_family == AF_INET6) ? &ahstack->ah_sadb.s_v6 :
1204                     &ahstack->ah_sadb.s_v4, diagnostic, ahstack->ah_pfkey_q));
1205         }
1206 
1207         return (sadb_delget_sa(mp, ksi, &ahstack->ah_sadb, diagnostic,
1208             ahstack->ah_pfkey_q, sadb_msg_type));
1209 }
1210 
1211 /* Refactor me */
1212 /*
1213  * Convert the entire contents of all of AH's SA tables into PF_KEY SADB_DUMP
1214  * messages.
1215  */
1216 static void
1217 ah_dump(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack)
1218 {
1219         int error;
1220         sadb_msg_t *samsg;
1221 
1222         /*
1223          * Dump each fanout, bailing if error is non-zero.
1224          */
1225 
1226         error = sadb_dump(ahstack->ah_pfkey_q, mp, ksi, &ahstack->ah_sadb.s_v4);
1227         if (error != 0)
1228                 goto bail;
1229 
1230         error = sadb_dump(ahstack->ah_pfkey_q, mp, ksi, &ahstack->ah_sadb.s_v6);
1231 bail:
1232         ASSERT(mp->b_cont != NULL);
1233         samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
1234         samsg->sadb_msg_errno = (uint8_t)error;
1235         sadb_pfkey_echo(ahstack->ah_pfkey_q, mp,
1236             (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL);
1237 }
1238 
1239 /*
1240  * First-cut reality check for an inbound PF_KEY message.
1241  */
1242 static boolean_t
1243 ah_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi,
1244     ipsecah_stack_t *ahstack)
1245 {
1246         int diagnostic;
1247 
1248         if (mp->b_cont == NULL) {
1249                 freemsg(mp);
1250                 return (B_TRUE);
1251         }
1252 
1253         if (ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
1254                 diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
1255                 goto badmsg;
1256         }
1257         if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
1258                 diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
1259                 goto badmsg;
1260         }
1261         if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
1262             ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
1263                 diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
1264                 goto badmsg;
1265         }
1266         return (B_FALSE);       /* False ==> no failures */
1267 
1268 badmsg:
1269         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL,
1270             diagnostic, ksi->ks_in_serial);
1271         return (B_TRUE);        /* True ==> failures */
1272 }
1273 
1274 /*
1275  * AH parsing of PF_KEY messages.  Keysock did most of the really silly
1276  * error cases.  What I receive is a fully-formed, syntactically legal
1277  * PF_KEY message.  I then need to check semantics...
1278  *
1279  * This code may become common to AH and ESP.  Stay tuned.
1280  *
1281  * I also make the assumption that db_ref's are cool.  If this assumption
1282  * is wrong, this means that someone other than keysock or me has been
1283  * mucking with PF_KEY messages.
1284  */
1285 static void
1286 ah_parse_pfkey(mblk_t *mp, ipsecah_stack_t *ahstack)
1287 {
1288         mblk_t *msg = mp->b_cont;
1289         sadb_msg_t *samsg;
1290         keysock_in_t *ksi;
1291         int error;
1292         int diagnostic = SADB_X_DIAGNOSTIC_NONE;
1293 
1294         ASSERT(msg != NULL);
1295 
1296         samsg = (sadb_msg_t *)msg->b_rptr;
1297         ksi = (keysock_in_t *)mp->b_rptr;
1298 
1299         /*
1300          * If applicable, convert unspecified AF_INET6 to unspecified
1301          * AF_INET.
1302          */
1303         if (!sadb_addrfix(ksi, ahstack->ah_pfkey_q, mp,
1304             ahstack->ipsecah_netstack) ||
1305             ah_pfkey_reality_failures(mp, ksi, ahstack)) {
1306                 return;
1307         }
1308 
1309         switch (samsg->sadb_msg_type) {
1310         case SADB_ADD:
1311                 error = ah_add_sa(mp, ksi, &diagnostic,
1312                     ahstack->ipsecah_netstack);
1313                 if (error != 0) {
1314                         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1315                             diagnostic, ksi->ks_in_serial);
1316                 }
1317                 /* else ah_add_sa() took care of things. */
1318                 break;
1319         case SADB_DELETE:
1320         case SADB_X_DELPAIR:
1321         case SADB_X_DELPAIR_STATE:
1322                 error = ah_del_sa(mp, ksi, &diagnostic, ahstack,
1323                     samsg->sadb_msg_type);
1324                 if (error != 0) {
1325                         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1326                             diagnostic, ksi->ks_in_serial);
1327                 }
1328                 /* Else ah_del_sa() took care of things. */
1329                 break;
1330         case SADB_GET:
1331                 error = sadb_delget_sa(mp, ksi, &ahstack->ah_sadb, &diagnostic,
1332                     ahstack->ah_pfkey_q, samsg->sadb_msg_type);
1333                 if (error != 0) {
1334                         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1335                             diagnostic, ksi->ks_in_serial);
1336                 }
1337                 /* Else sadb_get_sa() took care of things. */
1338                 break;
1339         case SADB_FLUSH:
1340                 sadbp_flush(&ahstack->ah_sadb, ahstack->ipsecah_netstack);
1341                 sadb_pfkey_echo(ahstack->ah_pfkey_q, mp, samsg, ksi, NULL);
1342                 break;
1343         case SADB_REGISTER:
1344                 /*
1345                  * Hmmm, let's do it!  Check for extensions (there should
1346                  * be none), extract the fields, call ah_register_out(),
1347                  * then either free or report an error.
1348                  *
1349                  * Keysock takes care of the PF_KEY bookkeeping for this.
1350                  */
1351                 if (ah_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
1352                     ksi->ks_in_serial, ahstack, msg_getcred(mp, NULL))) {
1353                         freemsg(mp);
1354                 } else {
1355                         /*
1356                          * Only way this path hits is if there is a memory
1357                          * failure.  It will not return B_FALSE because of
1358                          * lack of ah_pfkey_q if I am in wput().
1359                          */
1360                         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, ENOMEM,
1361                             diagnostic, ksi->ks_in_serial);
1362                 }
1363                 break;
1364         case SADB_UPDATE:
1365         case SADB_X_UPDATEPAIR:
1366                 /*
1367                  * Find a larval, if not there, find a full one and get
1368                  * strict.
1369                  */
1370                 error = ah_update_sa(mp, ksi, &diagnostic, ahstack,
1371                     samsg->sadb_msg_type);
1372                 if (error != 0) {
1373                         sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1374                             diagnostic, ksi->ks_in_serial);
1375                 }
1376                 /* else ah_update_sa() took care of things. */
1377                 break;
1378         case SADB_GETSPI:
1379                 /*
1380                  * Reserve a new larval entry.
1381                  */
1382                 ah_getspi(mp, ksi, ahstack);
1383                 break;
1384         case SADB_ACQUIRE:
1385                 /*
1386                  * Find larval and/or ACQUIRE record and kill it (them), I'm
1387                  * most likely an error.  Inbound ACQUIRE messages should only
1388                  * have the base header.
1389                  */
1390                 sadb_in_acquire(samsg, &ahstack->ah_sadb, ahstack->ah_pfkey_q,
1391                     ahstack->ipsecah_netstack);
1392                 freemsg(mp);
1393                 break;
1394         case SADB_DUMP:
1395                 /*
1396                  * Dump all entries.
1397                  */
1398                 ah_dump(mp, ksi, ahstack);
1399                 /* ah_dump will take care of the return message, etc. */
1400                 break;
1401         case SADB_EXPIRE:
1402                 /* Should never reach me. */
1403                 sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EOPNOTSUPP,
1404                     diagnostic, ksi->ks_in_serial);
1405                 break;
1406         default:
1407                 sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL,
1408                     SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
1409                 break;
1410         }
1411 }
1412 
1413 /*
1414  * Handle case where PF_KEY says it can't find a keysock for one of my
1415  * ACQUIRE messages.
1416  */
1417 static void
1418 ah_keysock_no_socket(mblk_t *mp, ipsecah_stack_t *ahstack)
1419 {
1420         sadb_msg_t *samsg;
1421         keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
1422 
1423         if (mp->b_cont == NULL) {
1424                 freemsg(mp);
1425                 return;
1426         }
1427         samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
1428 
1429         /*
1430          * If keysock can't find any registered, delete the acquire record
1431          * immediately, and handle errors.
1432          */
1433         if (samsg->sadb_msg_type == SADB_ACQUIRE) {
1434                 samsg->sadb_msg_errno = kse->ks_err_errno;
1435                 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1436                 /*
1437                  * Use the write-side of the ah_pfkey_q
1438                  */
1439                 sadb_in_acquire(samsg, &ahstack->ah_sadb,
1440                     WR(ahstack->ah_pfkey_q), ahstack->ipsecah_netstack);
1441         }
1442 
1443         freemsg(mp);
1444 }
1445 
1446 /*
1447  * AH module write put routine.
1448  */
1449 static void
1450 ipsecah_wput(queue_t *q, mblk_t *mp)
1451 {
1452         ipsec_info_t *ii;
1453         struct iocblk *iocp;
1454         ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr;
1455 
1456         ah3dbg(ahstack, ("In ah_wput().\n"));
1457 
1458         /* NOTE:  Each case must take care of freeing or passing mp. */
1459         switch (mp->b_datap->db_type) {
1460         case M_CTL:
1461                 if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
1462                         /* Not big enough message. */
1463                         freemsg(mp);
1464                         break;
1465                 }
1466                 ii = (ipsec_info_t *)mp->b_rptr;
1467 
1468                 switch (ii->ipsec_info_type) {
1469                 case KEYSOCK_OUT_ERR:
1470                         ah1dbg(ahstack, ("Got KEYSOCK_OUT_ERR message.\n"));
1471                         ah_keysock_no_socket(mp, ahstack);
1472                         break;
1473                 case KEYSOCK_IN:
1474                         AH_BUMP_STAT(ahstack, keysock_in);
1475                         ah3dbg(ahstack, ("Got KEYSOCK_IN message.\n"));
1476 
1477                         /* Parse the message. */
1478                         ah_parse_pfkey(mp, ahstack);
1479                         break;
1480                 case KEYSOCK_HELLO:
1481                         sadb_keysock_hello(&ahstack->ah_pfkey_q, q, mp,
1482                             ah_ager, (void *)ahstack, &ahstack->ah_event,
1483                             SADB_SATYPE_AH);
1484                         break;
1485                 default:
1486                         ah1dbg(ahstack, ("Got M_CTL from above of 0x%x.\n",
1487                             ii->ipsec_info_type));
1488                         freemsg(mp);
1489                         break;
1490                 }
1491                 break;
1492         case M_IOCTL:
1493                 iocp = (struct iocblk *)mp->b_rptr;
1494                 switch (iocp->ioc_cmd) {
1495                 case ND_SET:
1496                 case ND_GET:
1497                         if (nd_getset(q, ahstack->ipsecah_g_nd, mp)) {
1498                                 qreply(q, mp);
1499                                 return;
1500                         } else {
1501                                 iocp->ioc_error = ENOENT;
1502                         }
1503                         /* FALLTHRU */
1504                 default:
1505                         /* We really don't support any other ioctls, do we? */
1506 
1507                         /* Return EINVAL */
1508                         if (iocp->ioc_error != ENOENT)
1509                                 iocp->ioc_error = EINVAL;
1510                         iocp->ioc_count = 0;
1511                         mp->b_datap->db_type = M_IOCACK;
1512                         qreply(q, mp);
1513                         return;
1514                 }
1515         default:
1516                 ah3dbg(ahstack,
1517                     ("Got default message, type %d, passing to IP.\n",
1518                     mp->b_datap->db_type));
1519                 putnext(q, mp);
1520         }
1521 }
1522 
1523 /* Refactor me */
1524 /*
1525  * Updating use times can be tricky business if the ipsa_haspeer flag is
1526  * set.  This function is called once in an SA's lifetime.
1527  *
1528  * Caller has to REFRELE "assoc" which is passed in.  This function has
1529  * to REFRELE any peer SA that is obtained.
1530  */
1531 static void
1532 ah_set_usetime(ipsa_t *assoc, boolean_t inbound)
1533 {
1534         ipsa_t *inassoc, *outassoc;
1535         isaf_t *bucket;
1536         sadb_t *sp;
1537         int outhash;
1538         boolean_t isv6;
1539         netstack_t      *ns = assoc->ipsa_netstack;
1540         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
1541 
1542         /* No peer?  No problem! */
1543         if (!assoc->ipsa_haspeer) {
1544                 sadb_set_usetime(assoc);
1545                 return;
1546         }
1547 
1548         /*
1549          * Otherwise, we want to grab both the original assoc and its peer.
1550          * There might be a race for this, but if it's a real race, the times
1551          * will be out-of-synch by at most a second, and since our time
1552          * granularity is a second, this won't be a problem.
1553          *
1554          * If we need tight synchronization on the peer SA, then we need to
1555          * reconsider.
1556          */
1557 
1558         /* Use address family to select IPv6/IPv4 */
1559         isv6 = (assoc->ipsa_addrfam == AF_INET6);
1560         if (isv6) {
1561                 sp = &ahstack->ah_sadb.s_v6;
1562         } else {
1563                 sp = &ahstack->ah_sadb.s_v4;
1564                 ASSERT(assoc->ipsa_addrfam == AF_INET);
1565         }
1566         if (inbound) {
1567                 inassoc = assoc;
1568                 if (isv6)
1569                         outhash = OUTBOUND_HASH_V6(sp,
1570                             *((in6_addr_t *)&inassoc->ipsa_dstaddr));
1571                 else
1572                         outhash = OUTBOUND_HASH_V4(sp,
1573                             *((ipaddr_t *)&inassoc->ipsa_dstaddr));
1574                 bucket = &sp->sdb_of[outhash];
1575 
1576                 mutex_enter(&bucket->isaf_lock);
1577                 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1578                     inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1579                     inassoc->ipsa_addrfam);
1580                 mutex_exit(&bucket->isaf_lock);
1581                 if (outassoc == NULL) {
1582                         /* Q: Do we wish to set haspeer == B_FALSE? */
1583                         ah0dbg(("ah_set_usetime: "
1584                             "can't find peer for inbound.\n"));
1585                         sadb_set_usetime(inassoc);
1586                         return;
1587                 }
1588         } else {
1589                 outassoc = assoc;
1590                 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1591                 mutex_enter(&bucket->isaf_lock);
1592                 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1593                     outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1594                     outassoc->ipsa_addrfam);
1595                 mutex_exit(&bucket->isaf_lock);
1596                 if (inassoc == NULL) {
1597                         /* Q: Do we wish to set haspeer == B_FALSE? */
1598                         ah0dbg(("ah_set_usetime: "
1599                             "can't find peer for outbound.\n"));
1600                         sadb_set_usetime(outassoc);
1601                         return;
1602                 }
1603         }
1604 
1605         /* Update usetime on both. */
1606         sadb_set_usetime(inassoc);
1607         sadb_set_usetime(outassoc);
1608 
1609         /*
1610          * REFRELE any peer SA.
1611          *
1612          * Because of the multi-line macro nature of IPSA_REFRELE, keep
1613          * them in { }.
1614          */
1615         if (inbound) {
1616                 IPSA_REFRELE(outassoc);
1617         } else {
1618                 IPSA_REFRELE(inassoc);
1619         }
1620 }
1621 
1622 /* Refactor me */
1623 /*
1624  * Add a number of bytes to what the SA has protected so far.  Return
1625  * B_TRUE if the SA can still protect that many bytes.
1626  *
1627  * Caller must REFRELE the passed-in assoc.  This function must REFRELE
1628  * any obtained peer SA.
1629  */
1630 static boolean_t
1631 ah_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
1632 {
1633         ipsa_t *inassoc, *outassoc;
1634         isaf_t *bucket;
1635         boolean_t inrc, outrc, isv6;
1636         sadb_t *sp;
1637         int outhash;
1638         netstack_t      *ns = assoc->ipsa_netstack;
1639         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
1640 
1641         /* No peer?  No problem! */
1642         if (!assoc->ipsa_haspeer) {
1643                 return (sadb_age_bytes(ahstack->ah_pfkey_q, assoc, bytes,
1644                     B_TRUE));
1645         }
1646 
1647         /*
1648          * Otherwise, we want to grab both the original assoc and its peer.
1649          * There might be a race for this, but if it's a real race, two
1650          * expire messages may occur.  We limit this by only sending the
1651          * expire message on one of the peers, we'll pick the inbound
1652          * arbitrarily.
1653          *
1654          * If we need tight synchronization on the peer SA, then we need to
1655          * reconsider.
1656          */
1657 
1658         /* Pick v4/v6 bucket based on addrfam. */
1659         isv6 = (assoc->ipsa_addrfam == AF_INET6);
1660         if (isv6) {
1661                 sp = &ahstack->ah_sadb.s_v6;
1662         } else {
1663                 sp = &ahstack->ah_sadb.s_v4;
1664                 ASSERT(assoc->ipsa_addrfam == AF_INET);
1665         }
1666         if (inbound) {
1667                 inassoc = assoc;
1668                 if (isv6)
1669                         outhash = OUTBOUND_HASH_V6(sp,
1670                             *((in6_addr_t *)&inassoc->ipsa_dstaddr));
1671                 else
1672                         outhash = OUTBOUND_HASH_V4(sp,
1673                             *((ipaddr_t *)&inassoc->ipsa_dstaddr));
1674                 bucket = &sp->sdb_of[outhash];
1675                 mutex_enter(&bucket->isaf_lock);
1676                 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1677                     inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1678                     inassoc->ipsa_addrfam);
1679                 mutex_exit(&bucket->isaf_lock);
1680                 if (outassoc == NULL) {
1681                         /* Q: Do we wish to set haspeer == B_FALSE? */
1682                         ah0dbg(("ah_age_bytes: "
1683                             "can't find peer for inbound.\n"));
1684                         return (sadb_age_bytes(ahstack->ah_pfkey_q, inassoc,
1685                             bytes, B_TRUE));
1686                 }
1687         } else {
1688                 outassoc = assoc;
1689                 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1690                 mutex_enter(&bucket->isaf_lock);
1691                 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1692                     outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1693                     outassoc->ipsa_addrfam);
1694                 mutex_exit(&bucket->isaf_lock);
1695                 if (inassoc == NULL) {
1696                         /* Q: Do we wish to set haspeer == B_FALSE? */
1697                         ah0dbg(("ah_age_bytes: "
1698                             "can't find peer for outbound.\n"));
1699                         return (sadb_age_bytes(ahstack->ah_pfkey_q, outassoc,
1700                             bytes, B_TRUE));
1701                 }
1702         }
1703 
1704         inrc = sadb_age_bytes(ahstack->ah_pfkey_q, inassoc, bytes, B_TRUE);
1705         outrc = sadb_age_bytes(ahstack->ah_pfkey_q, outassoc, bytes, B_FALSE);
1706 
1707         /*
1708          * REFRELE any peer SA.
1709          *
1710          * Because of the multi-line macro nature of IPSA_REFRELE, keep
1711          * them in { }.
1712          */
1713         if (inbound) {
1714                 IPSA_REFRELE(outassoc);
1715         } else {
1716                 IPSA_REFRELE(inassoc);
1717         }
1718 
1719         return (inrc && outrc);
1720 }
1721 
1722 /* Refactor me */
1723 /*
1724  * Handle the SADB_GETSPI message.  Create a larval SA.
1725  */
1726 static void
1727 ah_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack)
1728 {
1729         ipsa_t *newbie, *target;
1730         isaf_t *outbound, *inbound;
1731         int rc, diagnostic;
1732         sadb_sa_t *assoc;
1733         keysock_out_t *kso;
1734         uint32_t newspi;
1735 
1736         /*
1737          * Randomly generate a proposed SPI value.
1738          */
1739         if (cl_inet_getspi != NULL) {
1740                 cl_inet_getspi(ahstack->ipsecah_netstack->netstack_stackid,
1741                     IPPROTO_AH, (uint8_t *)&newspi, sizeof (uint32_t), NULL);
1742         } else {
1743                 (void) random_get_pseudo_bytes((uint8_t *)&newspi,
1744                     sizeof (uint32_t));
1745         }
1746         newbie = sadb_getspi(ksi, newspi, &diagnostic,
1747             ahstack->ipsecah_netstack, IPPROTO_AH);
1748 
1749         if (newbie == NULL) {
1750                 sadb_pfkey_error(ahstack->ah_pfkey_q, mp, ENOMEM, diagnostic,
1751                     ksi->ks_in_serial);
1752                 return;
1753         } else if (newbie == (ipsa_t *)-1) {
1754                 sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL, diagnostic,
1755                     ksi->ks_in_serial);
1756                 return;
1757         }
1758 
1759         /*
1760          * XXX - We may randomly collide.  We really should recover from this.
1761          *       Unfortunately, that could require spending way-too-much-time
1762          *       in here.  For now, let the user retry.
1763          */
1764 
1765         if (newbie->ipsa_addrfam == AF_INET6) {
1766                 outbound = OUTBOUND_BUCKET_V6(&ahstack->ah_sadb.s_v6,
1767                     *(uint32_t *)(newbie->ipsa_dstaddr));
1768                 inbound = INBOUND_BUCKET(&ahstack->ah_sadb.s_v6,
1769                     newbie->ipsa_spi);
1770         } else {
1771                 outbound = OUTBOUND_BUCKET_V4(&ahstack->ah_sadb.s_v4,
1772                     *(uint32_t *)(newbie->ipsa_dstaddr));
1773                 inbound = INBOUND_BUCKET(&ahstack->ah_sadb.s_v4,
1774                     newbie->ipsa_spi);
1775         }
1776 
1777         mutex_enter(&outbound->isaf_lock);
1778         mutex_enter(&inbound->isaf_lock);
1779 
1780         /*
1781          * Check for collisions (i.e. did sadb_getspi() return with something
1782          * that already exists?).
1783          *
1784          * Try outbound first.  Even though SADB_GETSPI is traditionally
1785          * for inbound SAs, you never know what a user might do.
1786          */
1787         target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
1788             newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
1789         if (target == NULL) {
1790                 target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
1791                     newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
1792                     newbie->ipsa_addrfam);
1793         }
1794 
1795         /*
1796          * I don't have collisions elsewhere!
1797          * (Nor will I because I'm still holding inbound/outbound locks.)
1798          */
1799 
1800         if (target != NULL) {
1801                 rc = EEXIST;
1802                 IPSA_REFRELE(target);
1803         } else {
1804                 /*
1805                  * sadb_insertassoc() also checks for collisions, so
1806                  * if there's a colliding larval entry, rc will be set
1807                  * to EEXIST.
1808                  */
1809                 rc = sadb_insertassoc(newbie, inbound);
1810                 newbie->ipsa_hardexpiretime = gethrestime_sec();
1811                 newbie->ipsa_hardexpiretime += ahstack->ipsecah_larval_timeout;
1812         }
1813 
1814         /*
1815          * Can exit outbound mutex.  Hold inbound until we're done with
1816          * newbie.
1817          */
1818         mutex_exit(&outbound->isaf_lock);
1819 
1820         if (rc != 0) {
1821                 mutex_exit(&inbound->isaf_lock);
1822                 IPSA_REFRELE(newbie);
1823                 sadb_pfkey_error(ahstack->ah_pfkey_q, mp, rc,
1824                     SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1825                 return;
1826         }
1827 
1828         /* Can write here because I'm still holding the bucket lock. */
1829         newbie->ipsa_type = SADB_SATYPE_AH;
1830 
1831         /*
1832          * Construct successful return message.  We have one thing going
1833          * for us in PF_KEY v2.  That's the fact that
1834          *      sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
1835          */
1836         assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
1837         assoc->sadb_sa_exttype = SADB_EXT_SA;
1838         assoc->sadb_sa_spi = newbie->ipsa_spi;
1839         *((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
1840         mutex_exit(&inbound->isaf_lock);
1841 
1842         /* Convert KEYSOCK_IN to KEYSOCK_OUT. */
1843         kso = (keysock_out_t *)ksi;
1844         kso->ks_out_len = sizeof (*kso);
1845         kso->ks_out_serial = ksi->ks_in_serial;
1846         kso->ks_out_type = KEYSOCK_OUT;
1847 
1848         /*
1849          * Can safely putnext() to ah_pfkey_q, because this is a turnaround
1850          * from the ah_pfkey_q.
1851          */
1852         putnext(ahstack->ah_pfkey_q, mp);
1853 }
1854 
1855 /*
1856  * IPv6 sends up the ICMP errors for validation and the removal of the AH
1857  * header.
1858  * If succesful, the mp has been modified to not include the AH header so
1859  * that the caller can fanout to the ULP's icmp error handler.
1860  */
1861 static mblk_t *
1862 ah_icmp_error_v6(mblk_t *mp, ip_recv_attr_t *ira, ipsecah_stack_t *ahstack)
1863 {
1864         ip6_t *ip6h, *oip6h;
1865         uint16_t hdr_length, ah_length;
1866         uint8_t *nexthdrp;
1867         ah_t *ah;
1868         icmp6_t *icmp6;
1869         isaf_t *isaf;
1870         ipsa_t *assoc;
1871         uint8_t *post_ah_ptr;
1872         ipsec_stack_t   *ipss = ahstack->ipsecah_netstack->netstack_ipsec;
1873 
1874         /*
1875          * Eat the cost of a pullupmsg() for now.  It makes the rest of this
1876          * code far less convoluted.
1877          */
1878         if (!pullupmsg(mp, -1) ||
1879             !ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, &hdr_length,
1880             &nexthdrp) ||
1881             mp->b_rptr + hdr_length + sizeof (icmp6_t) + sizeof (ip6_t) +
1882             sizeof (ah_t) > mp->b_wptr) {
1883                 IP_AH_BUMP_STAT(ipss, in_discards);
1884                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1885                     DROPPER(ipss, ipds_ah_nomem),
1886                     &ahstack->ah_dropper);
1887                 return (NULL);
1888         }
1889 
1890         oip6h = (ip6_t *)mp->b_rptr;
1891         icmp6 = (icmp6_t *)((uint8_t *)oip6h + hdr_length);
1892         ip6h = (ip6_t *)(icmp6 + 1);
1893         if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) {
1894                 IP_AH_BUMP_STAT(ipss, in_discards);
1895                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1896                     DROPPER(ipss, ipds_ah_bad_v6_hdrs),
1897                     &ahstack->ah_dropper);
1898                 return (NULL);
1899         }
1900         ah = (ah_t *)((uint8_t *)ip6h + hdr_length);
1901 
1902         isaf = OUTBOUND_BUCKET_V6(&ahstack->ah_sadb.s_v6, ip6h->ip6_dst);
1903         mutex_enter(&isaf->isaf_lock);
1904         assoc = ipsec_getassocbyspi(isaf, ah->ah_spi,
1905             (uint32_t *)&ip6h->ip6_src, (uint32_t *)&ip6h->ip6_dst, AF_INET6);
1906         mutex_exit(&isaf->isaf_lock);
1907 
1908         if (assoc == NULL) {
1909                 IP_AH_BUMP_STAT(ipss, lookup_failure);
1910                 IP_AH_BUMP_STAT(ipss, in_discards);
1911                 if (ahstack->ipsecah_log_unknown_spi) {
1912                         ipsec_assocfailure(info.mi_idnum, 0, 0,
1913                             SL_CONSOLE | SL_WARN | SL_ERROR,
1914                             "Bad ICMP message - No association for the "
1915                             "attached AH header whose spi is 0x%x, "
1916                             "sender is 0x%x\n",
1917                             ah->ah_spi, &oip6h->ip6_src, AF_INET6,
1918                             ahstack->ipsecah_netstack);
1919                 }
1920                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1921                     DROPPER(ipss, ipds_ah_no_sa),
1922                     &ahstack->ah_dropper);
1923                 return (NULL);
1924         }
1925 
1926         IPSA_REFRELE(assoc);
1927 
1928         /*
1929          * There seems to be a valid association. If there is enough of AH
1930          * header remove it, otherwise bail.  One could check whether it has
1931          * complete AH header plus 8 bytes but it does not make sense if an
1932          * icmp error is returned for ICMP messages e.g ICMP time exceeded,
1933          * that are being sent up. Let the caller figure out.
1934          *
1935          * NOTE: ah_length is the number of 32 bit words minus 2.
1936          */
1937         ah_length = (ah->ah_length << 2) + 8;
1938         post_ah_ptr = (uint8_t *)ah + ah_length;
1939 
1940         if (post_ah_ptr > mp->b_wptr) {
1941                 IP_AH_BUMP_STAT(ipss, in_discards);
1942                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1943                     DROPPER(ipss, ipds_ah_bad_length),
1944                     &ahstack->ah_dropper);
1945                 return (NULL);
1946         }
1947 
1948         ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - ah_length);
1949         *nexthdrp = ah->ah_nexthdr;
1950         ovbcopy(post_ah_ptr, ah,
1951             (size_t)((uintptr_t)mp->b_wptr - (uintptr_t)post_ah_ptr));
1952         mp->b_wptr -= ah_length;
1953 
1954         return (mp);
1955 }
1956 
1957 /*
1958  * IP sends up the ICMP errors for validation and the removal of
1959  * the AH header.
1960  * If succesful, the mp has been modified to not include the AH header so
1961  * that the caller can fanout to the ULP's icmp error handler.
1962  */
1963 static mblk_t *
1964 ah_icmp_error_v4(mblk_t *mp, ip_recv_attr_t *ira, ipsecah_stack_t *ahstack)
1965 {
1966         mblk_t *mp1;
1967         icmph_t *icmph;
1968         int iph_hdr_length;
1969         int hdr_length;
1970         isaf_t *hptr;
1971         ipsa_t *assoc;
1972         int ah_length;
1973         ipha_t *ipha;
1974         ipha_t *oipha;
1975         ah_t *ah;
1976         uint32_t length;
1977         int alloc_size;
1978         uint8_t nexthdr;
1979         ipsec_stack_t   *ipss = ahstack->ipsecah_netstack->netstack_ipsec;
1980 
1981         oipha = ipha = (ipha_t *)mp->b_rptr;
1982         iph_hdr_length = IPH_HDR_LENGTH(ipha);
1983         icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
1984 
1985         ipha = (ipha_t *)&icmph[1];
1986         hdr_length = IPH_HDR_LENGTH(ipha);
1987 
1988         /*
1989          * See if we have enough to locate the SPI
1990          */
1991         if ((uchar_t *)ipha + hdr_length + 8 > mp->b_wptr) {
1992                 if (!pullupmsg(mp, (uchar_t *)ipha + hdr_length + 8 -
1993                     mp->b_rptr)) {
1994                         ipsec_rl_strlog(ahstack->ipsecah_netstack,
1995                             info.mi_idnum, 0, 0,
1996                             SL_WARN | SL_ERROR,
1997                             "ICMP error: Small AH header\n");
1998                         IP_AH_BUMP_STAT(ipss, in_discards);
1999                         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2000                             DROPPER(ipss, ipds_ah_bad_length),
2001                             &ahstack->ah_dropper);
2002                         return (NULL);
2003                 }
2004                 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2005                 ipha = (ipha_t *)&icmph[1];
2006         }
2007 
2008         ah = (ah_t *)((uint8_t *)ipha + hdr_length);
2009         nexthdr = ah->ah_nexthdr;
2010 
2011         hptr = OUTBOUND_BUCKET_V4(&ahstack->ah_sadb.s_v4, ipha->ipha_dst);
2012         mutex_enter(&hptr->isaf_lock);
2013         assoc = ipsec_getassocbyspi(hptr, ah->ah_spi,
2014             (uint32_t *)&ipha->ipha_src, (uint32_t *)&ipha->ipha_dst, AF_INET);
2015         mutex_exit(&hptr->isaf_lock);
2016 
2017         if (assoc == NULL) {
2018                 IP_AH_BUMP_STAT(ipss, lookup_failure);
2019                 IP_AH_BUMP_STAT(ipss, in_discards);
2020                 if (ahstack->ipsecah_log_unknown_spi) {
2021                         ipsec_assocfailure(info.mi_idnum, 0, 0,
2022                             SL_CONSOLE | SL_WARN | SL_ERROR,
2023                             "Bad ICMP message - No association for the "
2024                             "attached AH header whose spi is 0x%x, "
2025                             "sender is 0x%x\n",
2026                             ah->ah_spi, &oipha->ipha_src, AF_INET,
2027                             ahstack->ipsecah_netstack);
2028                 }
2029                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2030                     DROPPER(ipss, ipds_ah_no_sa),
2031                     &ahstack->ah_dropper);
2032                 return (NULL);
2033         }
2034 
2035         IPSA_REFRELE(assoc);
2036         /*
2037          * There seems to be a valid association. If there
2038          * is enough of AH header remove it, otherwise remove
2039          * as much as possible and send it back. One could check
2040          * whether it has complete AH header plus 8 bytes but it
2041          * does not make sense if an icmp error is returned for
2042          * ICMP messages e.g ICMP time exceeded, that are being
2043          * sent up. Let the caller figure out.
2044          *
2045          * NOTE: ah_length is the number of 32 bit words minus 2.
2046          */
2047         ah_length = (ah->ah_length << 2) + 8;
2048 
2049         if ((uchar_t *)ipha + hdr_length + ah_length > mp->b_wptr) {
2050                 if (mp->b_cont == NULL) {
2051                         /*
2052                          * There is nothing to pullup. Just remove as
2053                          * much as possible. This is a common case for
2054                          * IPV4.
2055                          */
2056                         ah_length = (mp->b_wptr - ((uchar_t *)ipha +
2057                             hdr_length));
2058                         goto done;
2059                 }
2060                 /* Pullup the full ah header */
2061                 if (!pullupmsg(mp, (uchar_t *)ah + ah_length - mp->b_rptr)) {
2062                         /*
2063                          * pullupmsg could have failed if there was not
2064                          * enough to pullup or memory allocation failed.
2065                          * We tried hard, give up now.
2066                          */
2067                         IP_AH_BUMP_STAT(ipss, in_discards);
2068                         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2069                             DROPPER(ipss, ipds_ah_nomem),
2070                             &ahstack->ah_dropper);
2071                         return (NULL);
2072                 }
2073                 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2074                 ipha = (ipha_t *)&icmph[1];
2075         }
2076 done:
2077         /*
2078          * Remove the AH header and change the protocol.
2079          * Don't update the spi fields in the ip_recv_attr_t
2080          * as we are called just to validate the
2081          * message attached to the ICMP message.
2082          *
2083          * If we never pulled up since all of the message
2084          * is in one single mblk, we can't remove the AH header
2085          * by just setting the b_wptr to the beginning of the
2086          * AH header. We need to allocate a mblk that can hold
2087          * up until the inner IP header and copy them.
2088          */
2089         alloc_size = iph_hdr_length + sizeof (icmph_t) + hdr_length;
2090 
2091         if ((mp1 = allocb(alloc_size, BPRI_LO)) == NULL) {
2092                 IP_AH_BUMP_STAT(ipss, in_discards);
2093                 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2094                     DROPPER(ipss, ipds_ah_nomem),
2095                     &ahstack->ah_dropper);
2096                 return (NULL);
2097         }
2098         bcopy(mp->b_rptr, mp1->b_rptr, alloc_size);
2099         mp1->b_wptr += alloc_size;
2100 
2101         /*
2102          * Skip whatever we have copied and as much of AH header
2103          * possible. If we still have something left in the original
2104          * message, tag on.
2105          */
2106         mp->b_rptr = (uchar_t *)ipha + hdr_length + ah_length;
2107 
2108         if (mp->b_rptr != mp->b_wptr) {
2109                 mp1->b_cont = mp;
2110         } else {
2111                 if (mp->b_cont != NULL)
2112                         mp1->b_cont = mp->b_cont;
2113                 freeb(mp);
2114         }
2115 
2116         ipha = (ipha_t *)(mp1->b_rptr + iph_hdr_length + sizeof (icmph_t));
2117         ipha->ipha_protocol = nexthdr;
2118         length = ntohs(ipha->ipha_length);
2119         length -= ah_length;
2120         ipha->ipha_length = htons((uint16_t)length);
2121         ipha->ipha_hdr_checksum = 0;
2122         ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
2123 
2124         return (mp1);
2125 }
2126 
2127 /*
2128  * IP calls this to validate the ICMP errors that
2129  * we got from the network.
2130  */
2131 mblk_t *
2132 ipsecah_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira)
2133 {
2134         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
2135         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
2136 
2137         if (ira->ira_flags & IRAF_IS_IPV4)
2138                 return (ah_icmp_error_v4(data_mp, ira, ahstack));
2139         else
2140                 return (ah_icmp_error_v6(data_mp, ira, ahstack));
2141 }
2142 
2143 static int
2144 ah_fix_tlv_options_v6(uint8_t *oi_opt, uint8_t *pi_opt, uint_t ehdrlen,
2145     uint8_t hdr_type, boolean_t copy_always)
2146 {
2147         uint8_t opt_type;
2148         uint_t optlen;
2149 
2150         ASSERT(hdr_type == IPPROTO_DSTOPTS || hdr_type == IPPROTO_HOPOPTS);
2151 
2152         /*
2153          * Copy the next header and hdr ext. len of the HOP-by-HOP
2154          * and Destination option.
2155          */
2156         *pi_opt++ = *oi_opt++;
2157         *pi_opt++ = *oi_opt++;
2158         ehdrlen -= 2;
2159 
2160         /*
2161          * Now handle all the TLV encoded options.
2162          */
2163         while (ehdrlen != 0) {
2164                 opt_type = *oi_opt;
2165 
2166                 if (opt_type == IP6OPT_PAD1) {
2167                         optlen = 1;
2168                 } else {
2169                         if (ehdrlen < 2)
2170                                 goto bad_opt;
2171                         optlen = 2 + oi_opt[1];
2172                         if (optlen > ehdrlen)
2173                                 goto bad_opt;
2174                 }
2175                 if (copy_always || !(opt_type & IP6OPT_MUTABLE)) {
2176                         bcopy(oi_opt, pi_opt, optlen);
2177                 } else {
2178                         if (optlen == 1) {
2179                                 *pi_opt = 0;
2180                         } else {
2181                                 /*
2182                                  * Copy the type and data length fields.
2183                                  * Zero the option data by skipping
2184                                  * option type and option data len
2185                                  * fields.
2186                                  */
2187                                 *pi_opt = *oi_opt;
2188                                 *(pi_opt + 1) = *(oi_opt + 1);
2189                                 bzero(pi_opt + 2, optlen - 2);
2190                         }
2191                 }
2192                 ehdrlen -= optlen;
2193                 oi_opt += optlen;
2194                 pi_opt += optlen;
2195         }
2196         return (0);
2197 bad_opt:
2198         return (-1);
2199 }
2200 
2201 /*
2202  * Construct a pseudo header for AH, processing all the options.
2203  *
2204  * oip6h is the IPv6 header of the incoming or outgoing packet.
2205  * ip6h is the pointer to the pseudo headers IPV6 header. All
2206  * the space needed for the options have been allocated including
2207  * the AH header.
2208  *
2209  * If copy_always is set, all the options that appear before AH are copied
2210  * blindly without checking for IP6OPT_MUTABLE. This is used by
2211  * ah_auth_out_done().  Please refer to that function for details.
2212  *
2213  * NOTE :
2214  *
2215  * *  AH header is never copied in this function even if copy_always
2216  *    is set. It just returns the ah_offset - offset of the AH header
2217  *    and the caller needs to do the copying. This is done so that we
2218  *    don't have pass extra arguments e.g. SA etc. and also,
2219  *    it is not needed when ah_auth_out_done is calling this function.
2220  */
2221 static uint_t
2222 ah_fix_phdr_v6(ip6_t *ip6h, ip6_t *oip6h, boolean_t outbound,
2223     boolean_t copy_always)
2224 {
2225         uint8_t *oi_opt;
2226         uint8_t *pi_opt;
2227         uint8_t nexthdr;
2228         uint8_t *prev_nexthdr;
2229         ip6_hbh_t *hbhhdr;
2230         ip6_dest_t *dsthdr = NULL;
2231         ip6_rthdr0_t *rthdr;
2232         int ehdrlen;
2233         ah_t *ah;
2234         int ret;
2235 
2236         /*
2237          * In the outbound case for source route, ULP has already moved
2238          * the first hop, which is now in ip6_dst. We need to re-arrange
2239          * the header to make it look like how it would appear in the
2240          * receiver i.e
2241          *
2242          * Because of ip_massage_options_v6 the header looks like
2243          * this :
2244          *
2245          * ip6_src = S, ip6_dst = I1. followed by I2,I3,D.
2246          *
2247          * When it reaches the receiver, it would look like
2248          *
2249          * ip6_src = S, ip6_dst = D. followed by I1,I2,I3.
2250          *
2251          * NOTE : We assume that there are no problems with the options
2252          * as IP should have already checked this.
2253          */
2254 
2255         oi_opt = (uchar_t *)&oip6h[1];
2256         pi_opt = (uchar_t *)&ip6h[1];
2257 
2258         /*
2259          * We set the prev_nexthdr properly in the pseudo header.
2260          * After we finish authentication and come back from the
2261          * algorithm module, pseudo header will become the real
2262          * IP header.
2263          */
2264         prev_nexthdr = (uint8_t *)&ip6h->ip6_nxt;
2265         nexthdr = oip6h->ip6_nxt;
2266         /* Assume IP has already stripped it */
2267         ASSERT(nexthdr != IPPROTO_FRAGMENT);
2268         ah = NULL;
2269         dsthdr = NULL;
2270         for (;;) {
2271                 switch (nexthdr) {
2272                 case IPPROTO_HOPOPTS:
2273                         hbhhdr = (ip6_hbh_t *)oi_opt;
2274                         nexthdr = hbhhdr->ip6h_nxt;
2275                         ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
2276                         ret = ah_fix_tlv_options_v6(oi_opt, pi_opt, ehdrlen,
2277                             IPPROTO_HOPOPTS, copy_always);
2278                         /*
2279                          * Return a zero offset indicating error if there
2280                          * was error.
2281                          */
2282                         if (ret == -1)
2283                                 return (0);
2284                         hbhhdr = (ip6_hbh_t *)pi_opt;
2285                         prev_nexthdr = (uint8_t *)&hbhhdr->ip6h_nxt;
2286                         break;
2287                 case IPPROTO_ROUTING:
2288                         rthdr = (ip6_rthdr0_t *)oi_opt;
2289                         nexthdr = rthdr->ip6r0_nxt;
2290                         ehdrlen = 8 * (rthdr->ip6r0_len + 1);
2291                         if (!copy_always && outbound) {
2292                                 int i, left;
2293                                 ip6_rthdr0_t *prthdr;
2294                                 in6_addr_t *ap, *pap;
2295 
2296                                 left = rthdr->ip6r0_segleft;
2297                                 prthdr = (ip6_rthdr0_t *)pi_opt;
2298                                 pap = (in6_addr_t *)(prthdr + 1);
2299                                 ap = (in6_addr_t *)(rthdr + 1);
2300                                 /*
2301                                  * First eight bytes except seg_left
2302                                  * does not change en route.
2303                                  */
2304                                 bcopy(oi_opt, pi_opt, 8);
2305                                 prthdr->ip6r0_segleft = 0;
2306                                 /*
2307                                  * First address has been moved to
2308                                  * the destination address of the
2309                                  * ip header by ip_massage_options_v6.
2310                                  * And the real destination address is
2311                                  * in the last address part of the
2312                                  * option.
2313                                  */
2314                                 *pap = oip6h->ip6_dst;
2315                                 for (i = 1; i < left - 1; i++)
2316                                         pap[i] = ap[i - 1];
2317                                 ip6h->ip6_dst = *(ap + left - 1);
2318                         } else {
2319                                 bcopy(oi_opt, pi_opt, ehdrlen);
2320                         }
2321                         rthdr = (ip6_rthdr0_t *)pi_opt;
2322                         prev_nexthdr = (uint8_t *)&rthdr->ip6r0_nxt;
2323                         break;
2324                 case IPPROTO_DSTOPTS:
2325                         /*
2326                          * Destination options are tricky.  If there is
2327                          * a terminal (e.g. non-IPv6-extension) header
2328                          * following the destination options, don't
2329                          * reset prev_nexthdr or advance the AH insertion
2330                          * point and just treat this as a terminal header.
2331                          *
2332                          * If this is an inbound packet, just deal with
2333                          * it as is.
2334                          */
2335                         dsthdr = (ip6_dest_t *)oi_opt;
2336                         /*
2337                          * XXX I hope common-subexpression elimination
2338                          * saves us the double-evaluate.
2339                          */
2340                         if (outbound && dsthdr->ip6d_nxt != IPPROTO_ROUTING &&
2341                             dsthdr->ip6d_nxt != IPPROTO_HOPOPTS)
2342                                 goto terminal_hdr;
2343                         nexthdr = dsthdr->ip6d_nxt;
2344                         ehdrlen = 8 * (dsthdr->ip6d_len + 1);
2345                         ret = ah_fix_tlv_options_v6(oi_opt, pi_opt, ehdrlen,
2346                             IPPROTO_DSTOPTS, copy_always);
2347                         /*
2348                          * Return a zero offset indicating error if there
2349                          * was error.
2350                          */
2351                         if (ret == -1)
2352                                 return (0);
2353                         break;
2354                 case IPPROTO_AH:
2355                         /*
2356                          * Be conservative in what you send.  We shouldn't
2357                          * see two same-scoped AH's in one packet.
2358                          * (Inner-IP-scoped AH will be hit by terminal
2359                          * header of IP or IPv6.)
2360                          */
2361                         ASSERT(!outbound);
2362                         return ((uint_t)(pi_opt - (uint8_t *)ip6h));
2363                 default:
2364                         ASSERT(outbound);
2365 terminal_hdr:
2366                         *prev_nexthdr = IPPROTO_AH;
2367                         ah = (ah_t *)pi_opt;
2368                         ah->ah_nexthdr = nexthdr;
2369                         return ((uint_t)(pi_opt - (uint8_t *)ip6h));
2370                 }
2371                 pi_opt += ehdrlen;
2372                 oi_opt += ehdrlen;
2373         }
2374         /* NOTREACHED */
2375 }
2376 
2377 static boolean_t
2378 ah_finish_up(ah_t *phdr_ah, ah_t *inbound_ah, ipsa_t *assoc,
2379     int ah_data_sz, int ah_align_sz, ipsecah_stack_t *ahstack)
2380 {
2381         int i;
2382 
2383         /*
2384          * Padding :
2385          *
2386          * 1) Authentication data may have to be padded
2387          * before ICV calculation if ICV is not a multiple
2388          * of 64 bits. This padding is arbitrary and transmitted
2389          * with the packet at the end of the authentication data.
2390          * Payload length should include the padding bytes.
2391          *
2392          * 2) Explicit padding of the whole datagram may be
2393          * required by the algorithm which need not be
2394          * transmitted. It is assumed that this will be taken
2395          * care by the algorithm module.
2396          */
2397         bzero(phdr_ah + 1, ah_data_sz); /* Zero out ICV for pseudo-hdr. */
2398 
2399         if (inbound_ah == NULL) {
2400                 /* Outbound AH datagram. */
2401 
2402                 phdr_ah->ah_length = (ah_align_sz >> 2) + 1;
2403                 phdr_ah->ah_reserved = 0;
2404                 phdr_ah->ah_spi = assoc->ipsa_spi;
2405 
2406                 phdr_ah->ah_replay =
2407                     htonl(atomic_inc_32_nv(&assoc->ipsa_replay));
2408                 if (phdr_ah->ah_replay == 0 && assoc->ipsa_replay_wsize != 0) {
2409                         /*
2410                          * XXX We have replay counter wrapping.  We probably
2411                          * want to nuke this SA (and its peer).
2412                          */
2413                         ipsec_assocfailure(info.mi_idnum, 0, 0,
2414                             SL_ERROR | SL_CONSOLE | SL_WARN,
2415                             "Outbound AH SA (0x%x), dst %s has wrapped "
2416                             "sequence.\n", phdr_ah->ah_spi,
2417                             assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
2418                             ahstack->ipsecah_netstack);
2419 
2420                         sadb_replay_delete(assoc);
2421                         /* Caller will free phdr_mp and return NULL. */
2422                         return (B_FALSE);
2423                 }
2424 
2425                 if (ah_data_sz != ah_align_sz) {
2426                         uchar_t *pad = ((uchar_t *)phdr_ah + sizeof (ah_t) +
2427                             ah_data_sz);
2428 
2429                         for (i = 0; i < (ah_align_sz - ah_data_sz); i++) {
2430                                 pad[i] = (uchar_t)i;    /* Fill the padding */
2431                         }
2432                 }
2433         } else {
2434                 /* Inbound AH datagram. */
2435                 phdr_ah->ah_nexthdr = inbound_ah->ah_nexthdr;
2436                 phdr_ah->ah_length = inbound_ah->ah_length;
2437                 phdr_ah->ah_reserved = 0;
2438                 ASSERT(inbound_ah->ah_spi == assoc->ipsa_spi);
2439                 phdr_ah->ah_spi = inbound_ah->ah_spi;
2440                 phdr_ah->ah_replay = inbound_ah->ah_replay;
2441 
2442                 if (ah_data_sz != ah_align_sz) {
2443                         uchar_t *opad = ((uchar_t *)inbound_ah +
2444                             sizeof (ah_t) + ah_data_sz);
2445                         uchar_t *pad = ((uchar_t *)phdr_ah + sizeof (ah_t) +
2446                             ah_data_sz);
2447 
2448                         for (i = 0; i < (ah_align_sz - ah_data_sz); i++) {
2449                                 pad[i] = opad[i];       /* Copy the padding */
2450                         }
2451                 }
2452         }
2453 
2454         return (B_TRUE);
2455 }
2456 
2457 /*
2458  * Called upon failing the inbound ICV check. The message passed as
2459  * argument is freed.
2460  */
2461 static void
2462 ah_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
2463 {
2464         boolean_t       isv4 = (ira->ira_flags & IRAF_IS_IPV4);
2465         ipsa_t          *assoc = ira->ira_ipsec_ah_sa;
2466         int             af;
2467         void            *addr;
2468         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
2469         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
2470         ipsec_stack_t   *ipss = ns->netstack_ipsec;
2471 
2472         ASSERT(mp->b_datap->db_type == M_DATA);
2473 
2474         mp->b_rptr -= ic->ic_skip_len;
2475 
2476         if (isv4) {
2477                 ipha_t *ipha = (ipha_t *)mp->b_rptr;
2478                 addr = &ipha->ipha_dst;
2479                 af = AF_INET;
2480         } else {
2481                 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2482                 addr = &ip6h->ip6_dst;
2483                 af = AF_INET6;
2484         }
2485 
2486         /*
2487          * Log the event. Don't print to the console, block
2488          * potential denial-of-service attack.
2489          */
2490         AH_BUMP_STAT(ahstack, bad_auth);
2491 
2492         ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
2493             "AH Authentication failed spi %x, dst_addr %s",
2494             assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack);
2495 
2496         IP_AH_BUMP_STAT(ipss, in_discards);
2497         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2498             DROPPER(ipss, ipds_ah_bad_auth),
2499             &ahstack->ah_dropper);
2500 }
2501 
2502 /*
2503  * Kernel crypto framework callback invoked after completion of async
2504  * crypto requests for outbound packets.
2505  */
2506 static void
2507 ah_kcf_callback_outbound(void *arg, int status)
2508 {
2509         mblk_t          *mp = (mblk_t *)arg;
2510         mblk_t          *async_mp;
2511         netstack_t      *ns;
2512         ipsec_stack_t   *ipss;
2513         ipsecah_stack_t *ahstack;
2514         mblk_t          *data_mp;
2515         ip_xmit_attr_t  ixas;
2516         ipsec_crypto_t  *ic;
2517         ill_t           *ill;
2518 
2519         /*
2520          * First remove the ipsec_crypto_t mblk
2521          * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
2522          */
2523         async_mp = ipsec_remove_crypto_data(mp, &ic);
2524         ASSERT(async_mp != NULL);
2525 
2526         /*
2527          * Extract the ip_xmit_attr_t from the first mblk.
2528          * Verifies that the netstack and ill is still around; could
2529          * have vanished while kEf was doing its work.
2530          * On succesful return we have a nce_t and the ill/ipst can't
2531          * disappear until we do the nce_refrele in ixa_cleanup.
2532          */
2533         data_mp = async_mp->b_cont;
2534         async_mp->b_cont = NULL;
2535         if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) {
2536                 /* Disappeared on us - no ill/ipst for MIB */
2537                 if (ixas.ixa_nce != NULL) {
2538                         ill = ixas.ixa_nce->nce_ill;
2539                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2540                         ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
2541                 }
2542                 freemsg(data_mp);
2543                 goto done;
2544         }
2545         ns = ixas.ixa_ipst->ips_netstack;
2546         ahstack = ns->netstack_ipsecah;
2547         ipss = ns->netstack_ipsec;
2548         ill = ixas.ixa_nce->nce_ill;
2549 
2550         if (status == CRYPTO_SUCCESS) {
2551                 data_mp = ah_auth_out_done(data_mp, &ixas, ic);
2552                 if (data_mp == NULL)
2553                         goto done;
2554 
2555                 (void) ip_output_post_ipsec(data_mp, &ixas);
2556         } else {
2557                 /* Outbound shouldn't see invalid MAC */
2558                 ASSERT(status != CRYPTO_INVALID_MAC);
2559 
2560                 ah1dbg(ahstack,
2561                     ("ah_kcf_callback_outbound: crypto failed with 0x%x\n",
2562                     status));
2563                 AH_BUMP_STAT(ahstack, crypto_failures);
2564                 AH_BUMP_STAT(ahstack, out_discards);
2565 
2566                 ip_drop_packet(data_mp, B_FALSE, ill,
2567                     DROPPER(ipss, ipds_ah_crypto_failed),
2568                     &ahstack->ah_dropper);
2569                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2570         }
2571 done:
2572         ixa_cleanup(&ixas);
2573         (void) ipsec_free_crypto_data(mp);
2574 }
2575 
2576 /*
2577  * Kernel crypto framework callback invoked after completion of async
2578  * crypto requests for inbound packets.
2579  */
2580 static void
2581 ah_kcf_callback_inbound(void *arg, int status)
2582 {
2583         mblk_t          *mp = (mblk_t *)arg;
2584         mblk_t          *async_mp;
2585         netstack_t      *ns;
2586         ipsec_stack_t   *ipss;
2587         ipsecah_stack_t *ahstack;
2588         mblk_t          *data_mp;
2589         ip_recv_attr_t  iras;
2590         ipsec_crypto_t  *ic;
2591 
2592         /*
2593          * First remove the ipsec_crypto_t mblk
2594          * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
2595          */
2596         async_mp = ipsec_remove_crypto_data(mp, &ic);
2597         ASSERT(async_mp != NULL);
2598 
2599         /*
2600          * Extract the ip_xmit_attr_t from the first mblk.
2601          * Verifies that the netstack and ill is still around; could
2602          * have vanished while kEf was doing its work.
2603          */
2604         data_mp = async_mp->b_cont;
2605         async_mp->b_cont = NULL;
2606         if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
2607                 /* The ill or ip_stack_t disappeared on us */
2608                 ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
2609                 freemsg(data_mp);
2610                 goto done;
2611         }
2612         ns = iras.ira_ill->ill_ipst->ips_netstack;
2613         ahstack = ns->netstack_ipsecah;
2614         ipss = ns->netstack_ipsec;
2615 
2616         if (status == CRYPTO_SUCCESS) {
2617                 data_mp = ah_auth_in_done(data_mp, &iras, ic);
2618                 if (data_mp == NULL)
2619                         goto done;
2620 
2621                 /* finish IPsec processing */
2622                 ip_input_post_ipsec(data_mp, &iras);
2623 
2624         } else if (status == CRYPTO_INVALID_MAC) {
2625                 ah_log_bad_auth(data_mp, &iras, ic);
2626         } else {
2627                 ah1dbg(ahstack,
2628                     ("ah_kcf_callback_inbound: crypto failed with 0x%x\n",
2629                     status));
2630                 AH_BUMP_STAT(ahstack, crypto_failures);
2631                 IP_AH_BUMP_STAT(ipss, in_discards);
2632                 ip_drop_packet(data_mp, B_TRUE, iras.ira_ill,
2633                     DROPPER(ipss, ipds_ah_crypto_failed),
2634                     &ahstack->ah_dropper);
2635                 BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2636         }
2637 done:
2638         ira_cleanup(&iras, B_TRUE);
2639         (void) ipsec_free_crypto_data(mp);
2640 }
2641 
2642 /*
2643  * Invoked on kernel crypto failure during inbound and outbound processing.
2644  */
2645 static void
2646 ah_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc,
2647     ill_t *ill, ipsecah_stack_t *ahstack)
2648 {
2649         ipsec_stack_t   *ipss = ahstack->ipsecah_netstack->netstack_ipsec;
2650 
2651         ah1dbg(ahstack, ("crypto failed for %s AH with 0x%x\n",
2652             is_inbound ? "inbound" : "outbound", kef_rc));
2653         ip_drop_packet(data_mp, is_inbound, ill,
2654             DROPPER(ipss, ipds_ah_crypto_failed),
2655             &ahstack->ah_dropper);
2656         AH_BUMP_STAT(ahstack, crypto_failures);
2657         if (is_inbound)
2658                 IP_AH_BUMP_STAT(ipss, in_discards);
2659         else
2660                 AH_BUMP_STAT(ahstack, out_discards);
2661 }
2662 
2663 /*
2664  * Helper macros for the ah_submit_req_{inbound,outbound}() functions.
2665  */
2666 
2667 /*
2668  * A statement-equivalent macro, _cr MUST point to a modifiable
2669  * crypto_call_req_t.
2670  */
2671 #define AH_INIT_CALLREQ(_cr, _mp, _callback)            \
2672         (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE;      \
2673         (_cr)->cr_callback_arg = (_mp);                              \
2674         (_cr)->cr_callback_func = (_callback)
2675 
2676 #define AH_INIT_CRYPTO_DATA(data, msglen, mblk) {                       \
2677         (data)->cd_format = CRYPTO_DATA_MBLK;                                \
2678         (data)->cd_mp = mblk;                                                \
2679         (data)->cd_offset = 0;                                               \
2680         (data)->cd_length = msglen;                                  \
2681 }
2682 
2683 #define AH_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {                       \
2684         (mac)->cd_format = CRYPTO_DATA_RAW;                          \
2685         (mac)->cd_offset = 0;                                                \
2686         (mac)->cd_length = icvlen;                                   \
2687         (mac)->cd_raw.iov_base = icvbuf;                             \
2688         (mac)->cd_raw.iov_len = icvlen;                                      \
2689 }
2690 
2691 /*
2692  * Submit an inbound packet for processing by the crypto framework.
2693  */
2694 static mblk_t *
2695 ah_submit_req_inbound(mblk_t *phdr_mp, ip_recv_attr_t *ira,
2696     size_t skip_len, uint32_t ah_offset, ipsa_t *assoc)
2697 {
2698         int kef_rc;
2699         mblk_t *mp;
2700         crypto_call_req_t call_req, *callrp;
2701         uint_t icv_len = assoc->ipsa_mac_len;
2702         crypto_ctx_template_t ctx_tmpl;
2703         ipsecah_stack_t *ahstack;
2704         ipsec_crypto_t  *ic, icstack;
2705         boolean_t force = (assoc->ipsa_flags & IPSA_F_ASYNC);
2706 
2707         ahstack = ira->ira_ill->ill_ipst->ips_netstack->netstack_ipsecah;
2708 
2709         ASSERT(phdr_mp != NULL);
2710         ASSERT(phdr_mp->b_datap->db_type == M_DATA);
2711 
2712         if (force) {
2713                 /* We are doing asynch; allocate mblks to hold state */
2714                 if ((mp = ip_recv_attr_to_mblk(ira)) == NULL ||
2715                     (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
2716                         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2717                         ip_drop_input("ipIfStatsInDiscards", phdr_mp,
2718                             ira->ira_ill);
2719                         freemsg(phdr_mp);
2720                         return (NULL);
2721                 }
2722 
2723                 linkb(mp, phdr_mp);
2724                 callrp = &call_req;
2725                 AH_INIT_CALLREQ(callrp, mp, ah_kcf_callback_inbound);
2726         } else {
2727                 /*
2728                  * If we know we are going to do sync then ipsec_crypto_t
2729                  * should be on the stack.
2730                  */
2731                 ic = &icstack;
2732                 bzero(ic, sizeof (*ic));
2733                 callrp = NULL;
2734         }
2735 
2736         /* init arguments for the crypto framework */
2737         AH_INIT_CRYPTO_DATA(&ic->ic_crypto_data, AH_MSGSIZE(phdr_mp),
2738             phdr_mp);
2739 
2740         AH_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, icv_len,
2741             (char *)phdr_mp->b_cont->b_rptr - skip_len + ah_offset +
2742             sizeof (ah_t));
2743 
2744         ic->ic_skip_len = skip_len;
2745 
2746         IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, ctx_tmpl);
2747 
2748         /* call KEF to do the MAC operation */
2749         kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
2750             &ic->ic_crypto_data, &assoc->ipsa_kcfauthkey, ctx_tmpl,
2751             &ic->ic_crypto_mac, callrp);
2752 
2753         switch (kef_rc) {
2754         case CRYPTO_SUCCESS:
2755                 AH_BUMP_STAT(ahstack, crypto_sync);
2756                 phdr_mp = ah_auth_in_done(phdr_mp, ira, ic);
2757                 if (force) {
2758                         /* Free mp after we are done with ic */
2759                         mp = ipsec_free_crypto_data(mp);
2760                         (void) ip_recv_attr_free_mblk(mp);
2761                 }
2762                 return (phdr_mp);
2763         case CRYPTO_QUEUED:
2764                 /* ah_kcf_callback_inbound() will be invoked on completion */
2765                 AH_BUMP_STAT(ahstack, crypto_async);
2766                 return (NULL);
2767         case CRYPTO_INVALID_MAC:
2768                 /* Free mp after we are done with ic */
2769                 AH_BUMP_STAT(ahstack, crypto_sync);
2770                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2771                 ah_log_bad_auth(phdr_mp, ira, ic);
2772                 /* phdr_mp was passed to ip_drop_packet */
2773                 if (force) {
2774                         mp = ipsec_free_crypto_data(mp);
2775                         (void) ip_recv_attr_free_mblk(mp);
2776                 }
2777                 return (NULL);
2778         }
2779 
2780         if (force) {
2781                 mp = ipsec_free_crypto_data(mp);
2782                 phdr_mp = ip_recv_attr_free_mblk(mp);
2783         }
2784         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2785         ah_crypto_failed(phdr_mp, B_TRUE, kef_rc, ira->ira_ill, ahstack);
2786         /* phdr_mp was passed to ip_drop_packet */
2787         return (NULL);
2788 }
2789 
2790 /*
2791  * Submit an outbound packet for processing by the crypto framework.
2792  */
2793 static mblk_t *
2794 ah_submit_req_outbound(mblk_t *phdr_mp, ip_xmit_attr_t *ixa,
2795     size_t skip_len, ipsa_t *assoc)
2796 {
2797         int kef_rc;
2798         mblk_t *mp;
2799         crypto_call_req_t call_req, *callrp;
2800         uint_t icv_len = assoc->ipsa_mac_len;
2801         ipsecah_stack_t *ahstack;
2802         ipsec_crypto_t  *ic, icstack;
2803         ill_t           *ill = ixa->ixa_nce->nce_ill;
2804         boolean_t force = (assoc->ipsa_flags & IPSA_F_ASYNC);
2805 
2806         ahstack = ill->ill_ipst->ips_netstack->netstack_ipsecah;
2807 
2808         ASSERT(phdr_mp != NULL);
2809         ASSERT(phdr_mp->b_datap->db_type == M_DATA);
2810 
2811         if (force) {
2812                 /* We are doing asynch; allocate mblks to hold state */
2813                 if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL ||
2814                     (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
2815                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2816                         ip_drop_output("ipIfStatsOutDiscards", phdr_mp, ill);
2817                         freemsg(phdr_mp);
2818                         return (NULL);
2819                 }
2820                 linkb(mp, phdr_mp);
2821                 callrp = &call_req;
2822                 AH_INIT_CALLREQ(callrp, mp, ah_kcf_callback_outbound);
2823         } else {
2824                 /*
2825                  * If we know we are going to do sync then ipsec_crypto_t
2826                  * should be on the stack.
2827                  */
2828                 ic = &icstack;
2829                 bzero(ic, sizeof (*ic));
2830                 callrp = NULL;
2831         }
2832 
2833         /* init arguments for the crypto framework */
2834         AH_INIT_CRYPTO_DATA(&ic->ic_crypto_data, AH_MSGSIZE(phdr_mp),
2835             phdr_mp);
2836 
2837         AH_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, icv_len,
2838             (char *)phdr_mp->b_wptr);
2839 
2840         ic->ic_skip_len = skip_len;
2841 
2842         ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
2843 
2844         /* call KEF to do the MAC operation */
2845         kef_rc = crypto_mac(&assoc->ipsa_amech, &ic->ic_crypto_data,
2846             &assoc->ipsa_kcfauthkey, assoc->ipsa_authtmpl,
2847             &ic->ic_crypto_mac, callrp);
2848 
2849         switch (kef_rc) {
2850         case CRYPTO_SUCCESS:
2851                 AH_BUMP_STAT(ahstack, crypto_sync);
2852                 phdr_mp = ah_auth_out_done(phdr_mp, ixa, ic);
2853                 if (force) {
2854                         /* Free mp after we are done with ic */
2855                         mp = ipsec_free_crypto_data(mp);
2856                         (void) ip_xmit_attr_free_mblk(mp);
2857                 }
2858                 return (phdr_mp);
2859         case CRYPTO_QUEUED:
2860                 /* ah_kcf_callback_outbound() will be invoked on completion */
2861                 AH_BUMP_STAT(ahstack, crypto_async);
2862                 return (NULL);
2863         }
2864 
2865         if (force) {
2866                 mp = ipsec_free_crypto_data(mp);
2867                 phdr_mp = ip_xmit_attr_free_mblk(mp);
2868         }
2869         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2870         ah_crypto_failed(phdr_mp, B_FALSE, kef_rc, NULL, ahstack);
2871         /* phdr_mp was passed to ip_drop_packet */
2872         return (NULL);
2873 }
2874 
2875 /*
2876  * This function constructs a pseudo header by looking at the IP header
2877  * and options if any. This is called for both outbound and inbound,
2878  * before computing the ICV.
2879  */
2880 static mblk_t *
2881 ah_process_ip_options_v6(mblk_t *mp, ipsa_t *assoc, int *length_to_skip,
2882     uint_t ah_data_sz, boolean_t outbound, ipsecah_stack_t *ahstack)
2883 {
2884         ip6_t   *ip6h;
2885         ip6_t   *oip6h;
2886         mblk_t  *phdr_mp;
2887         int option_length;
2888         uint_t  ah_align_sz;
2889         uint_t ah_offset;
2890         int hdr_size;
2891 
2892         /*
2893          * Allocate space for the authentication data also. It is
2894          * useful both during the ICV calculation where we need to
2895          * feed in zeroes and while sending the datagram back to IP
2896          * where we will be using the same space.
2897          *
2898          * We need to allocate space for padding bytes if it is not
2899          * a multiple of IPV6_PADDING_ALIGN.
2900          *
2901          * In addition, we allocate space for the ICV computed by
2902          * the kernel crypto framework, saving us a separate kmem
2903          * allocation down the road.
2904          */
2905 
2906         ah_align_sz = P2ALIGN(ah_data_sz + IPV6_PADDING_ALIGN - 1,
2907             IPV6_PADDING_ALIGN);
2908 
2909         ASSERT(ah_align_sz >= ah_data_sz);
2910 
2911         hdr_size = ipsec_ah_get_hdr_size_v6(mp, B_FALSE);
2912         option_length = hdr_size - IPV6_HDR_LEN;
2913 
2914         /* This was not included in ipsec_ah_get_hdr_size_v6() */
2915         hdr_size += (sizeof (ah_t) + ah_align_sz);
2916 
2917         if (!outbound && (MBLKL(mp) < hdr_size)) {
2918                 /*
2919                  * We have post-AH header options in a separate mblk,
2920                  * a pullup is required.
2921                  */
2922                 if (!pullupmsg(mp, hdr_size))
2923                         return (NULL);
2924         }
2925 
2926         if ((phdr_mp = allocb_tmpl(hdr_size + ah_data_sz, mp)) == NULL) {
2927                 return (NULL);
2928         }
2929 
2930         oip6h = (ip6_t *)mp->b_rptr;
2931 
2932         /*
2933          * Form the basic IP header first. Zero out the header
2934          * so that the mutable fields are zeroed out.
2935          */
2936         ip6h = (ip6_t *)phdr_mp->b_rptr;
2937         bzero(ip6h, sizeof (ip6_t));
2938         ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2939 
2940         if (outbound) {
2941                 /*
2942                  * Include the size of AH and authentication data.
2943                  * This is how our recipient would compute the
2944                  * authentication data. Look at what we do in the
2945                  * inbound case below.
2946                  */
2947                 ip6h->ip6_plen = htons(ntohs(oip6h->ip6_plen) +
2948                     sizeof (ah_t) + ah_align_sz);
2949         } else {
2950                 ip6h->ip6_plen = oip6h->ip6_plen;
2951         }
2952 
2953         ip6h->ip6_src = oip6h->ip6_src;
2954         ip6h->ip6_dst = oip6h->ip6_dst;
2955 
2956         *length_to_skip = IPV6_HDR_LEN;
2957         if (option_length == 0) {
2958                 /* Form the AH header */
2959                 ip6h->ip6_nxt = IPPROTO_AH;
2960                 ((ah_t *)(ip6h + 1))->ah_nexthdr = oip6h->ip6_nxt;
2961                 ah_offset = *length_to_skip;
2962         } else {
2963                 ip6h->ip6_nxt = oip6h->ip6_nxt;
2964                 /* option_length does not include the AH header's size */
2965                 *length_to_skip += option_length;
2966 
2967                 ah_offset = ah_fix_phdr_v6(ip6h, oip6h, outbound, B_FALSE);
2968                 if (ah_offset == 0) {
2969                         return (NULL);
2970                 }
2971         }
2972 
2973         if (!ah_finish_up(((ah_t *)((uint8_t *)ip6h + ah_offset)),
2974             (outbound ? NULL : ((ah_t *)((uint8_t *)oip6h + ah_offset))),
2975             assoc, ah_data_sz, ah_align_sz, ahstack)) {
2976                 freeb(phdr_mp);
2977                 /*
2978                  * Returning NULL will tell the caller to
2979                  * IPSA_REFELE(), free the memory, etc.
2980                  */
2981                 return (NULL);
2982         }
2983 
2984         phdr_mp->b_wptr = ((uint8_t *)ip6h + ah_offset + sizeof (ah_t) +
2985             ah_align_sz);
2986         if (!outbound)
2987                 *length_to_skip += sizeof (ah_t) + ah_align_sz;
2988         return (phdr_mp);
2989 }
2990 
2991 /*
2992  * This function constructs a pseudo header by looking at the IP header
2993  * and options if any. This is called for both outbound and inbound,
2994  * before computing the ICV.
2995  */
2996 static mblk_t *
2997 ah_process_ip_options_v4(mblk_t *mp, ipsa_t *assoc, int *length_to_skip,
2998     uint_t ah_data_sz, boolean_t outbound, ipsecah_stack_t *ahstack)
2999 {
3000         ipoptp_t opts;
3001         uint32_t option_length;
3002         ipha_t  *ipha;
3003         ipha_t  *oipha;
3004         mblk_t  *phdr_mp;
3005         int      size;
3006         uchar_t *optptr;
3007         uint8_t optval;
3008         uint8_t optlen;
3009         ipaddr_t dst;
3010         uint32_t v_hlen_tos_len;
3011         int ip_hdr_length;
3012         uint_t  ah_align_sz;
3013         uint32_t off;
3014 
3015 #ifdef  _BIG_ENDIAN
3016 #define V_HLEN  (v_hlen_tos_len >> 24)
3017 #else
3018 #define V_HLEN  (v_hlen_tos_len & 0xFF)
3019 #endif
3020 
3021         oipha = (ipha_t *)mp->b_rptr;
3022         v_hlen_tos_len = ((uint32_t *)oipha)[0];
3023 
3024         /*
3025          * Allocate space for the authentication data also. It is
3026          * useful both during the ICV calculation where we need to
3027          * feed in zeroes and while sending the datagram back to IP
3028          * where we will be using the same space.
3029          *
3030          * We need to allocate space for padding bytes if it is not
3031          * a multiple of IPV4_PADDING_ALIGN.
3032          *
3033          * In addition, we allocate space for the ICV computed by
3034          * the kernel crypto framework, saving us a separate kmem
3035          * allocation down the road.
3036          */
3037 
3038         ah_align_sz = P2ALIGN(ah_data_sz + IPV4_PADDING_ALIGN - 1,
3039             IPV4_PADDING_ALIGN);
3040 
3041         ASSERT(ah_align_sz >= ah_data_sz);
3042 
3043         size = IP_SIMPLE_HDR_LENGTH + sizeof (ah_t) + ah_align_sz +
3044             ah_data_sz;
3045 
3046         if (V_HLEN != IP_SIMPLE_HDR_VERSION) {
3047                 option_length = oipha->ipha_version_and_hdr_length -
3048                     (uint8_t)((IP_VERSION << 4) +
3049                     IP_SIMPLE_HDR_LENGTH_IN_WORDS);
3050                 option_length <<= 2;
3051                 size += option_length;
3052         }
3053 
3054         if ((phdr_mp = allocb_tmpl(size, mp)) == NULL) {
3055                 return (NULL);
3056         }
3057 
3058         /*
3059          * Form the basic IP header first.
3060          */
3061         ipha = (ipha_t *)phdr_mp->b_rptr;
3062         ipha->ipha_version_and_hdr_length = oipha->ipha_version_and_hdr_length;
3063         ipha->ipha_type_of_service = 0;
3064 
3065         if (outbound) {
3066                 /*
3067                  * Include the size of AH and authentication data.
3068                  * This is how our recipient would compute the
3069                  * authentication data. Look at what we do in the
3070                  * inbound case below.
3071                  */
3072                 ipha->ipha_length = ntohs(htons(oipha->ipha_length) +
3073                     sizeof (ah_t) + ah_align_sz);
3074         } else {
3075                 ipha->ipha_length = oipha->ipha_length;
3076         }
3077 
3078         ipha->ipha_ident = oipha->ipha_ident;
3079         ipha->ipha_fragment_offset_and_flags = 0;
3080         ipha->ipha_ttl = 0;
3081         ipha->ipha_protocol = IPPROTO_AH;
3082         ipha->ipha_hdr_checksum = 0;
3083         ipha->ipha_src = oipha->ipha_src;
3084         ipha->ipha_dst = dst = oipha->ipha_dst;
3085 
3086         /*
3087          * If there is no option to process return now.
3088          */
3089         ip_hdr_length = IP_SIMPLE_HDR_LENGTH;
3090 
3091         if (V_HLEN == IP_SIMPLE_HDR_VERSION) {
3092                 /* Form the AH header */
3093                 goto ah_hdr;
3094         }
3095 
3096         ip_hdr_length += option_length;
3097 
3098         /*
3099          * We have options. In the outbound case for source route,
3100          * ULP has already moved the first hop, which is now in
3101          * ipha_dst. We need the final destination for the calculation
3102          * of authentication data. And also make sure that mutable
3103          * and experimental fields are zeroed out in the IP options.
3104          */
3105 
3106         bcopy(&oipha[1], &ipha[1], option_length);
3107 
3108         for (optval = ipoptp_first(&opts, ipha);
3109             optval != IPOPT_EOL;
3110             optval = ipoptp_next(&opts)) {
3111                 optptr = opts.ipoptp_cur;
3112                 optlen = opts.ipoptp_len;
3113                 switch (optval) {
3114                 case IPOPT_EXTSEC:
3115                 case IPOPT_COMSEC:
3116                 case IPOPT_RA:
3117                 case IPOPT_SDMDD:
3118                 case IPOPT_SECURITY:
3119                         /*
3120                          * These options are Immutable, leave them as-is.
3121                          * Note that IPOPT_NOP is also Immutable, but it
3122                          * was skipped by ipoptp_next() and thus remains
3123                          * intact in the header.
3124                          */
3125                         break;
3126                 case IPOPT_SSRR:
3127                 case IPOPT_LSRR:
3128                         if ((opts.ipoptp_flags & IPOPTP_ERROR) != 0)
3129                                 goto bad_ipv4opt;
3130                         /*
3131                          * These two are mutable and will be zeroed, but
3132                          * first get the final destination.
3133                          */
3134                         off = optptr[IPOPT_OFFSET];
3135                         /*
3136                          * If one of the conditions is true, it means
3137                          * end of options and dst already has the right
3138                          * value. So, just fall through.
3139                          */
3140                         if (!(optlen < IP_ADDR_LEN || off > optlen - 3)) {
3141                                 off = optlen - IP_ADDR_LEN;
3142                                 bcopy(&optptr[off], &dst, IP_ADDR_LEN);
3143                         }
3144                         /* FALLTHRU */
3145                 case IPOPT_RR:
3146                 case IPOPT_TS:
3147                 case IPOPT_SATID:
3148                 default:
3149                         /*
3150                          * optlen should include from the beginning of an
3151                          * option.
3152                          * NOTE : Stream Identifier Option (SID): RFC 791
3153                          * shows the bit pattern of optlen as 2 and documents
3154                          * the length as 4. We assume it to be 2 here.
3155                          */
3156                         bzero(optptr, optlen);
3157                         break;
3158                 }
3159         }
3160 
3161         if ((opts.ipoptp_flags & IPOPTP_ERROR) != 0) {
3162 bad_ipv4opt:
3163                 ah1dbg(ahstack, ("AH : bad IPv4 option"));
3164                 freeb(phdr_mp);
3165                 return (NULL);
3166         }
3167 
3168         /*
3169          * Don't change ipha_dst for an inbound datagram as it points
3170          * to the right value. Only for the outbound with LSRR/SSRR,
3171          * because of ip_massage_options called by the ULP, ipha_dst
3172          * points to the first hop and we need to use the final
3173          * destination for computing the ICV.
3174          */
3175 
3176         if (outbound)
3177                 ipha->ipha_dst = dst;
3178 ah_hdr:
3179         ((ah_t *)((uint8_t *)ipha + ip_hdr_length))->ah_nexthdr =
3180             oipha->ipha_protocol;
3181         if (!ah_finish_up(((ah_t *)((uint8_t *)ipha + ip_hdr_length)),
3182             (outbound ? NULL : ((ah_t *)((uint8_t *)oipha + ip_hdr_length))),
3183             assoc, ah_data_sz, ah_align_sz, ahstack)) {
3184                 freeb(phdr_mp);
3185                 /*
3186                  * Returning NULL will tell the caller to IPSA_REFELE(), free
3187                  * the memory, etc.
3188                  */
3189                 return (NULL);
3190         }
3191 
3192         phdr_mp->b_wptr = ((uchar_t *)ipha + ip_hdr_length +
3193             sizeof (ah_t) + ah_align_sz);
3194 
3195         ASSERT(phdr_mp->b_wptr <= phdr_mp->b_datap->db_lim);
3196         if (outbound)
3197                 *length_to_skip = ip_hdr_length;
3198         else
3199                 *length_to_skip = ip_hdr_length + sizeof (ah_t) + ah_align_sz;
3200         return (phdr_mp);
3201 }
3202 
3203 /*
3204  * Authenticate an outbound datagram. This function is called
3205  * whenever IP sends an outbound datagram that needs authentication.
3206  * Returns a modified packet if done. Returns NULL if error or queued.
3207  * If error return then ipIfStatsOutDiscards has been increased.
3208  */
3209 static mblk_t *
3210 ah_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa)
3211 {
3212         mblk_t *phdr_mp;
3213         ipsa_t *assoc;
3214         int length_to_skip;
3215         uint_t ah_align_sz;
3216         uint_t age_bytes;
3217         netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
3218         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
3219         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3220         ill_t           *ill = ixa->ixa_nce->nce_ill;
3221         boolean_t       need_refrele = B_FALSE;
3222 
3223         /*
3224          * Construct the chain of mblks
3225          *
3226          * PSEUDO_HDR->DATA
3227          *
3228          * one by one.
3229          */
3230 
3231         AH_BUMP_STAT(ahstack, out_requests);
3232 
3233         ASSERT(data_mp->b_datap->db_type == M_DATA);
3234 
3235         assoc = ixa->ixa_ipsec_ah_sa;
3236         ASSERT(assoc != NULL);
3237 
3238 
3239         /*
3240          * Get the outer IP header in shape to escape this system..
3241          */
3242         if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) {
3243                 /*
3244                  * Need to update packet with any CIPSO option and update
3245                  * ixa_tsl to capture the new label.
3246                  * We allocate a separate ixa for that purpose.
3247                  */
3248                 ixa = ip_xmit_attr_duplicate(ixa);
3249                 if (ixa == NULL) {
3250                         ip_drop_packet(data_mp, B_FALSE, ill,
3251                             DROPPER(ipss, ipds_ah_nomem),
3252                             &ahstack->ah_dropper);
3253                         return (NULL);
3254                 }
3255                 need_refrele = B_TRUE;
3256 
3257                 label_hold(assoc->ipsa_otsl);
3258                 ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl);
3259 
3260                 data_mp = sadb_whack_label(data_mp, assoc, ixa,
3261                     DROPPER(ipss, ipds_ah_nomem), &ahstack->ah_dropper);
3262                 if (data_mp == NULL) {
3263                         /* Packet dropped by sadb_whack_label */
3264                         ixa_refrele(ixa);
3265                         return (NULL);
3266                 }
3267         }
3268 
3269         /*
3270          * Age SA according to number of bytes that will be sent after
3271          * adding the AH header, ICV, and padding to the packet.
3272          */
3273 
3274         if (ixa->ixa_flags & IXAF_IS_IPV4) {
3275                 ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
3276                 ah_align_sz = P2ALIGN(assoc->ipsa_mac_len +
3277                     IPV4_PADDING_ALIGN - 1, IPV4_PADDING_ALIGN);
3278                 age_bytes = ntohs(ipha->ipha_length) + sizeof (ah_t) +
3279                     ah_align_sz;
3280         } else {
3281                 ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
3282                 ah_align_sz = P2ALIGN(assoc->ipsa_mac_len +
3283                     IPV6_PADDING_ALIGN - 1, IPV6_PADDING_ALIGN);
3284                 age_bytes = sizeof (ip6_t) + ntohs(ip6h->ip6_plen) +
3285                     sizeof (ah_t) + ah_align_sz;
3286         }
3287 
3288         if (!ah_age_bytes(assoc, age_bytes, B_FALSE)) {
3289                 /* rig things as if ipsec_getassocbyconn() failed */
3290                 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
3291                     "AH association 0x%x, dst %s had bytes expire.\n",
3292                     ntohl(assoc->ipsa_spi), assoc->ipsa_dstaddr, AF_INET,
3293                     ahstack->ipsecah_netstack);
3294                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3295                 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
3296                 freemsg(data_mp);
3297                 if (need_refrele)
3298                         ixa_refrele(ixa);
3299                 return (NULL);
3300         }
3301 
3302         /*
3303          * XXX We need to have fixed up the outer label before we get here.
3304          * (AH is computing the checksum over the outer label).
3305          */
3306 
3307         /*
3308          * Insert pseudo header:
3309          * [IP, ULP] => [IP, AH, ICV] -> ULP
3310          */
3311 
3312         if (ixa->ixa_flags & IXAF_IS_IPV4) {
3313                 phdr_mp = ah_process_ip_options_v4(data_mp, assoc,
3314                     &length_to_skip, assoc->ipsa_mac_len, B_TRUE, ahstack);
3315         } else {
3316                 phdr_mp = ah_process_ip_options_v6(data_mp, assoc,
3317                     &length_to_skip, assoc->ipsa_mac_len, B_TRUE, ahstack);
3318         }
3319 
3320         if (phdr_mp == NULL) {
3321                 AH_BUMP_STAT(ahstack, out_discards);
3322                 ip_drop_packet(data_mp, B_FALSE, ixa->ixa_nce->nce_ill,
3323                     DROPPER(ipss, ipds_ah_bad_v4_opts),
3324                     &ahstack->ah_dropper);
3325                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3326                 if (need_refrele)
3327                         ixa_refrele(ixa);
3328                 return (NULL);
3329         }
3330 
3331         phdr_mp->b_cont = data_mp;
3332         data_mp->b_rptr += length_to_skip;
3333         data_mp = phdr_mp;
3334 
3335         /*
3336          * At this point data_mp points to
3337          * an mblk containing the pseudo header (IP header,
3338          * AH header, and ICV with mutable fields zero'ed out).
3339          * mp points to the mblk containing the ULP data. The original
3340          * IP header is kept before the ULP data in data_mp.
3341          */
3342 
3343         /* submit MAC request to KCF */
3344         data_mp = ah_submit_req_outbound(data_mp, ixa, length_to_skip, assoc);
3345         if (need_refrele)
3346                 ixa_refrele(ixa);
3347         return (data_mp);
3348 }
3349 
3350 static mblk_t *
3351 ah_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira)
3352 {
3353         ah_t            *ah = (ah_t *)arg;
3354         ipsa_t          *assoc = ira->ira_ipsec_ah_sa;
3355         int             length_to_skip;
3356         int             ah_length;
3357         mblk_t          *phdr_mp;
3358         uint32_t        ah_offset;
3359         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
3360         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
3361         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3362 
3363         ASSERT(assoc != NULL);
3364 
3365         /*
3366          * We may wish to check replay in-range-only here as an optimization.
3367          * Include the reality check of ipsa->ipsa_replay >
3368          * ipsa->ipsa_replay_wsize for times when it's the first N packets,
3369          * where N == ipsa->ipsa_replay_wsize.
3370          *
3371          * Another check that may come here later is the "collision" check.
3372          * If legitimate packets flow quickly enough, this won't be a problem,
3373          * but collisions may cause authentication algorithm crunching to
3374          * take place when it doesn't need to.
3375          */
3376         if (!sadb_replay_peek(assoc, ah->ah_replay)) {
3377                 AH_BUMP_STAT(ahstack, replay_early_failures);
3378                 IP_AH_BUMP_STAT(ipss, in_discards);
3379                 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
3380                     DROPPER(ipss, ipds_ah_early_replay),
3381                     &ahstack->ah_dropper);
3382                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3383                 return (NULL);
3384         }
3385 
3386         /*
3387          * The offset of the AH header can be computed from its pointer
3388          * within the data mblk, which was pulled up until the AH header
3389          * by ipsec_inbound_ah_sa() during SA selection.
3390          */
3391         ah_offset = (uchar_t *)ah - data_mp->b_rptr;
3392 
3393         /*
3394          * We need to pullup until the ICV before we call
3395          * ah_process_ip_options_v6.
3396          */
3397         ah_length = (ah->ah_length << 2) + 8;
3398 
3399         /*
3400          * NOTE : If we want to use any field of IP/AH header, you need
3401          * to re-assign following the pullup.
3402          */
3403         if (((uchar_t *)ah + ah_length) > data_mp->b_wptr) {
3404                 if (!pullupmsg(data_mp, (uchar_t *)ah + ah_length -
3405                     data_mp->b_rptr)) {
3406                         (void) ipsec_rl_strlog(ns, info.mi_idnum, 0, 0,
3407                             SL_WARN | SL_ERROR,
3408                             "ah_inbound: Small AH header\n");
3409                         IP_AH_BUMP_STAT(ipss, in_discards);
3410                         ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
3411                             DROPPER(ipss, ipds_ah_nomem),
3412                             &ahstack->ah_dropper);
3413                         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3414                         return (NULL);
3415                 }
3416         }
3417 
3418         /*
3419          * Insert pseudo header:
3420          * [IP, ULP] => [IP, AH, ICV] -> ULP
3421          */
3422         if (ira->ira_flags & IRAF_IS_IPV4) {
3423                 phdr_mp = ah_process_ip_options_v4(data_mp, assoc,
3424                     &length_to_skip, assoc->ipsa_mac_len, B_FALSE, ahstack);
3425         } else {
3426                 phdr_mp = ah_process_ip_options_v6(data_mp, assoc,
3427                     &length_to_skip, assoc->ipsa_mac_len, B_FALSE, ahstack);
3428         }
3429 
3430         if (phdr_mp == NULL) {
3431                 IP_AH_BUMP_STAT(ipss, in_discards);
3432                 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
3433                     ((ira->ira_flags & IRAF_IS_IPV4) ?
3434                     DROPPER(ipss, ipds_ah_bad_v4_opts) :
3435                     DROPPER(ipss, ipds_ah_bad_v6_hdrs)),
3436                     &ahstack->ah_dropper);
3437                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3438                 return (NULL);
3439         }
3440 
3441         phdr_mp->b_cont = data_mp;
3442         data_mp->b_rptr += length_to_skip;
3443         data_mp = phdr_mp;
3444 
3445         /* submit request to KCF */
3446         return (ah_submit_req_inbound(data_mp, ira, length_to_skip, ah_offset,
3447             assoc));
3448 }
3449 
3450 /*
3451  * Invoked after processing of an inbound packet by the
3452  * kernel crypto framework. Called by ah_submit_req() for a sync request,
3453  * or by the kcf callback for an async request.
3454  * Returns NULL if the mblk chain is consumed.
3455  */
3456 static mblk_t *
3457 ah_auth_in_done(mblk_t *phdr_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
3458 {
3459         ipha_t *ipha;
3460         uint_t ah_offset = 0;
3461         mblk_t *mp;
3462         int align_len, newpos;
3463         ah_t *ah;
3464         uint32_t length;
3465         uint32_t *dest32;
3466         uint8_t *dest;
3467         boolean_t isv4;
3468         ip6_t *ip6h;
3469         uint_t icv_len;
3470         ipsa_t *assoc;
3471         kstat_named_t *counter;
3472         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
3473         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
3474         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3475 
3476         isv4 = (ira->ira_flags & IRAF_IS_IPV4);
3477         assoc = ira->ira_ipsec_ah_sa;
3478         icv_len = (uint_t)ic->ic_crypto_mac.cd_raw.iov_len;
3479 
3480         if (phdr_mp == NULL) {
3481                 ip_drop_packet(phdr_mp, B_TRUE, ira->ira_ill,
3482                     DROPPER(ipss, ipds_ah_nomem),
3483                     &ahstack->ah_dropper);
3484                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3485                 return (NULL);
3486         }
3487 
3488         mp = phdr_mp->b_cont;
3489         if (mp == NULL) {
3490                 ip_drop_packet(phdr_mp, B_TRUE, ira->ira_ill,
3491                     DROPPER(ipss, ipds_ah_nomem),
3492                     &ahstack->ah_dropper);
3493                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3494                 return (NULL);
3495         }
3496         mp->b_rptr -= ic->ic_skip_len;
3497 
3498         ah_set_usetime(assoc, B_TRUE);
3499 
3500         if (isv4) {
3501                 ipha = (ipha_t *)mp->b_rptr;
3502                 ah_offset = ipha->ipha_version_and_hdr_length -
3503                     (uint8_t)((IP_VERSION << 4));
3504                 ah_offset <<= 2;
3505                 align_len = P2ALIGN(icv_len + IPV4_PADDING_ALIGN - 1,
3506                     IPV4_PADDING_ALIGN);
3507         } else {
3508                 ip6h = (ip6_t *)mp->b_rptr;
3509                 ah_offset = ipsec_ah_get_hdr_size_v6(mp, B_TRUE);
3510                 ASSERT((mp->b_wptr - mp->b_rptr) >= ah_offset);
3511                 align_len = P2ALIGN(icv_len + IPV6_PADDING_ALIGN - 1,
3512                     IPV6_PADDING_ALIGN);
3513         }
3514 
3515         ah = (ah_t *)(mp->b_rptr + ah_offset);
3516         newpos = sizeof (ah_t) + align_len;
3517 
3518         /*
3519          * We get here only when authentication passed.
3520          */
3521 
3522         ah3dbg(ahstack, ("AH succeeded, checking replay\n"));
3523         AH_BUMP_STAT(ahstack, good_auth);
3524 
3525         if (!sadb_replay_check(assoc, ah->ah_replay)) {
3526                 int af;
3527                 void *addr;
3528 
3529                 if (isv4) {
3530                         addr = &ipha->ipha_dst;
3531                         af = AF_INET;
3532                 } else {
3533                         addr = &ip6h->ip6_dst;
3534                         af = AF_INET6;
3535                 }
3536 
3537                 /*
3538                  * Log the event. As of now we print out an event.
3539                  * Do not print the replay failure number, or else
3540                  * syslog cannot collate the error messages.  Printing
3541                  * the replay number that failed (or printing to the
3542                  * console) opens a denial-of-service attack.
3543                  */
3544                 AH_BUMP_STAT(ahstack, replay_failures);
3545                 ipsec_assocfailure(info.mi_idnum, 0, 0,
3546                     SL_ERROR | SL_WARN,
3547                     "Replay failed for AH spi %x, dst_addr %s",
3548                     assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack);
3549                 counter = DROPPER(ipss, ipds_ah_replay);
3550                 goto ah_in_discard;
3551         }
3552 
3553         /*
3554          * We need to remove the AH header from the original
3555          * datagram. Best way to do this is to move the pre-AH headers
3556          * forward in the (relatively simple) IPv4 case.  In IPv6, it's
3557          * a bit more complicated because of IPv6's next-header chaining,
3558          * but it's doable.
3559          */
3560         if (isv4) {
3561                 /*
3562                  * Assign the right protocol, adjust the length as we
3563                  * are removing the AH header and adjust the checksum to
3564                  * account for the protocol and length.
3565                  */
3566                 length = ntohs(ipha->ipha_length);
3567                 if (!ah_age_bytes(assoc, length, B_TRUE)) {
3568                         /* The ipsa has hit hard expiration, LOG and AUDIT. */
3569                         ipsec_assocfailure(info.mi_idnum, 0, 0,
3570                             SL_ERROR | SL_WARN,
3571                             "AH Association 0x%x, dst %s had bytes expire.\n",
3572                             assoc->ipsa_spi, assoc->ipsa_dstaddr,
3573                             AF_INET, ahstack->ipsecah_netstack);
3574                         AH_BUMP_STAT(ahstack, bytes_expired);
3575                         counter = DROPPER(ipss, ipds_ah_bytes_expire);
3576                         goto ah_in_discard;
3577                 }
3578                 ipha->ipha_protocol = ah->ah_nexthdr;
3579                 length -= newpos;
3580 
3581                 ipha->ipha_length = htons((uint16_t)length);
3582                 ipha->ipha_hdr_checksum = 0;
3583                 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
3584         } else {
3585                 uchar_t *whereptr;
3586                 int hdrlen;
3587                 uint8_t *nexthdr;
3588                 ip6_hbh_t *hbhhdr;
3589                 ip6_dest_t *dsthdr;
3590                 ip6_rthdr0_t *rthdr;
3591 
3592                 /*
3593                  * Make phdr_mp hold until the AH header and make
3594                  * mp hold everything past AH header.
3595                  */
3596                 length = ntohs(ip6h->ip6_plen);
3597                 if (!ah_age_bytes(assoc, length + sizeof (ip6_t), B_TRUE)) {
3598                         /* The ipsa has hit hard expiration, LOG and AUDIT. */
3599                         ipsec_assocfailure(info.mi_idnum, 0, 0,
3600                             SL_ERROR | SL_WARN,
3601                             "AH Association 0x%x, dst %s had bytes "
3602                             "expire.\n", assoc->ipsa_spi, &ip6h->ip6_dst,
3603                             AF_INET6, ahstack->ipsecah_netstack);
3604                         AH_BUMP_STAT(ahstack, bytes_expired);
3605                         counter = DROPPER(ipss, ipds_ah_bytes_expire);
3606                         goto ah_in_discard;
3607                 }
3608 
3609                 /*
3610                  * Update the next header field of the header preceding
3611                  * AH with the next header field of AH. Start with the
3612                  * IPv6 header and proceed with the extension headers
3613                  * until we find what we're looking for.
3614                  */
3615                 nexthdr = &ip6h->ip6_nxt;
3616                 whereptr =  (uchar_t *)ip6h;
3617                 hdrlen = sizeof (ip6_t);
3618 
3619                 while (*nexthdr != IPPROTO_AH) {
3620                         whereptr += hdrlen;
3621                         /* Assume IP has already stripped it */
3622                         ASSERT(*nexthdr != IPPROTO_FRAGMENT);
3623                         switch (*nexthdr) {
3624                         case IPPROTO_HOPOPTS:
3625                                 hbhhdr = (ip6_hbh_t *)whereptr;
3626                                 nexthdr = &hbhhdr->ip6h_nxt;
3627                                 hdrlen = 8 * (hbhhdr->ip6h_len + 1);
3628                                 break;
3629                         case IPPROTO_DSTOPTS:
3630                                 dsthdr = (ip6_dest_t *)whereptr;
3631                                 nexthdr = &dsthdr->ip6d_nxt;
3632                                 hdrlen = 8 * (dsthdr->ip6d_len + 1);
3633                                 break;
3634                         case IPPROTO_ROUTING:
3635                                 rthdr = (ip6_rthdr0_t *)whereptr;
3636                                 nexthdr = &rthdr->ip6r0_nxt;
3637                                 hdrlen = 8 * (rthdr->ip6r0_len + 1);
3638                                 break;
3639                         }
3640                 }
3641                 *nexthdr = ah->ah_nexthdr;
3642                 length -= newpos;
3643                 ip6h->ip6_plen = htons((uint16_t)length);
3644         }
3645 
3646         /* Now that we've fixed the IP header, move it forward. */
3647         mp->b_rptr += newpos;
3648         if (IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) {
3649                 dest32 = (uint32_t *)(mp->b_rptr + ah_offset);
3650                 while (--dest32 >= (uint32_t *)mp->b_rptr)
3651                         *dest32 = *(dest32 - (newpos >> 2));
3652         } else {
3653                 dest = mp->b_rptr + ah_offset;
3654                 while (--dest >= mp->b_rptr)
3655                         *dest = *(dest - newpos);
3656         }
3657         freeb(phdr_mp);
3658 
3659         /*
3660          * If SA is labelled, use its label, else inherit the label
3661          */
3662         if (is_system_labeled() && (assoc->ipsa_tsl != NULL)) {
3663                 if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) {
3664                         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
3665                             DROPPER(ipss, ipds_ah_nomem), &ahstack->ah_dropper);
3666                         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3667                         return (NULL);
3668                 }
3669         }
3670 
3671         if (assoc->ipsa_state == IPSA_STATE_IDLE) {
3672                 /*
3673                  * Cluster buffering case.  Tell caller that we're
3674                  * handling the packet.
3675                  */
3676                 sadb_buf_pkt(assoc, mp, ira);
3677                 return (NULL);
3678         }
3679 
3680         return (mp);
3681 
3682 ah_in_discard:
3683         IP_AH_BUMP_STAT(ipss, in_discards);
3684         ip_drop_packet(phdr_mp, B_TRUE, ira->ira_ill, counter,
3685             &ahstack->ah_dropper);
3686         BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3687         return (NULL);
3688 }
3689 
3690 /*
3691  * Invoked after processing of an outbound packet by the
3692  * kernel crypto framework, either by ah_submit_req() for a request
3693  * executed syncrhonously, or by the KEF callback for a request
3694  * executed asynchronously.
3695  */
3696 static mblk_t *
3697 ah_auth_out_done(mblk_t *phdr_mp, ip_xmit_attr_t *ixa, ipsec_crypto_t *ic)
3698 {
3699         mblk_t *mp;
3700         int align_len;
3701         uint32_t hdrs_length;
3702         uchar_t *ptr;
3703         uint32_t length;
3704         boolean_t isv4;
3705         size_t icv_len;
3706         netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
3707         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
3708         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3709         ill_t           *ill = ixa->ixa_nce->nce_ill;
3710 
3711         isv4 = (ixa->ixa_flags & IXAF_IS_IPV4);
3712         icv_len = ic->ic_crypto_mac.cd_raw.iov_len;
3713 
3714         mp = phdr_mp->b_cont;
3715         if (mp == NULL) {
3716                 ip_drop_packet(phdr_mp, B_FALSE, ill,
3717                     DROPPER(ipss, ipds_ah_nomem),
3718                     &ahstack->ah_dropper);
3719                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3720                 return (NULL);
3721         }
3722         mp->b_rptr -= ic->ic_skip_len;
3723 
3724         ASSERT(ixa->ixa_flags & IXAF_IPSEC_SECURE);
3725         ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
3726         ah_set_usetime(ixa->ixa_ipsec_ah_sa, B_FALSE);
3727 
3728         if (isv4) {
3729                 ipha_t *ipha;
3730                 ipha_t *nipha;
3731 
3732                 ipha = (ipha_t *)mp->b_rptr;
3733                 hdrs_length = ipha->ipha_version_and_hdr_length -
3734                     (uint8_t)((IP_VERSION << 4));
3735                 hdrs_length <<= 2;
3736                 align_len = P2ALIGN(icv_len + IPV4_PADDING_ALIGN - 1,
3737                     IPV4_PADDING_ALIGN);
3738                 /*
3739                  * phdr_mp must have the right amount of space for the
3740                  * combined IP and AH header. Copy the IP header and
3741                  * the ack_data onto AH. Note that the AH header was
3742                  * already formed before the ICV calculation and hence
3743                  * you don't have to copy it here.
3744                  */
3745                 bcopy(mp->b_rptr, phdr_mp->b_rptr, hdrs_length);
3746 
3747                 ptr = phdr_mp->b_rptr + hdrs_length + sizeof (ah_t);
3748                 bcopy(phdr_mp->b_wptr, ptr, icv_len);
3749 
3750                 /*
3751                  * Compute the new header checksum as we are assigning
3752                  * IPPROTO_AH and adjusting the length here.
3753                  */
3754                 nipha = (ipha_t *)phdr_mp->b_rptr;
3755 
3756                 nipha->ipha_protocol = IPPROTO_AH;
3757                 length = ntohs(nipha->ipha_length);
3758                 length += (sizeof (ah_t) + align_len);
3759                 nipha->ipha_length = htons((uint16_t)length);
3760                 nipha->ipha_hdr_checksum = 0;
3761                 nipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(nipha);
3762         } else {
3763                 ip6_t *ip6h;
3764                 ip6_t *nip6h;
3765                 uint_t ah_offset;
3766 
3767                 ip6h = (ip6_t *)mp->b_rptr;
3768                 nip6h = (ip6_t *)phdr_mp->b_rptr;
3769                 align_len = P2ALIGN(icv_len + IPV6_PADDING_ALIGN - 1,
3770                     IPV6_PADDING_ALIGN);
3771                 /*
3772                  * phdr_mp must have the right amount of space for the
3773                  * combined IP and AH header. Copy the IP header with
3774                  * options into the pseudo header. When we constructed
3775                  * a pseudo header, we did not copy some of the mutable
3776                  * fields. We do it now by calling ah_fix_phdr_v6()
3777                  * with the last argument B_TRUE. It returns the
3778                  * ah_offset into the pseudo header.
3779                  */
3780 
3781                 bcopy(ip6h, nip6h, IPV6_HDR_LEN);
3782                 ah_offset = ah_fix_phdr_v6(nip6h, ip6h, B_TRUE, B_TRUE);
3783                 ASSERT(ah_offset != 0);
3784                 /*
3785                  * phdr_mp can hold exactly the whole IP header with options
3786                  * plus the AH header also. Thus subtracting the AH header's
3787                  * size should give exactly how much of the original header
3788                  * should be skipped.
3789                  */
3790                 hdrs_length = (phdr_mp->b_wptr - phdr_mp->b_rptr) -
3791                     sizeof (ah_t) - icv_len;
3792                 bcopy(phdr_mp->b_wptr, ((uint8_t *)nip6h + ah_offset +
3793                     sizeof (ah_t)), icv_len);
3794                 length = ntohs(nip6h->ip6_plen);
3795                 length += (sizeof (ah_t) + align_len);
3796                 nip6h->ip6_plen = htons((uint16_t)length);
3797         }
3798 
3799         /* Skip the original IP header */
3800         mp->b_rptr += hdrs_length;
3801         if (mp->b_rptr == mp->b_wptr) {
3802                 phdr_mp->b_cont = mp->b_cont;
3803                 freeb(mp);
3804         }
3805 
3806         return (phdr_mp);
3807 }
3808 
3809 /* Refactor me */
3810 /*
3811  * Wrapper to allow IP to trigger an AH association failure message
3812  * during SA inbound selection.
3813  */
3814 void
3815 ipsecah_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
3816     uint32_t spi, void *addr, int af, ip_recv_attr_t *ira)
3817 {
3818         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
3819         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
3820         ipsec_stack_t   *ipss = ns->netstack_ipsec;
3821 
3822         if (ahstack->ipsecah_log_unknown_spi) {
3823                 ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
3824                     addr, af, ahstack->ipsecah_netstack);
3825         }
3826 
3827         ip_drop_packet(mp, B_TRUE, ira->ira_ill,
3828             DROPPER(ipss, ipds_ah_no_sa),
3829             &ahstack->ah_dropper);
3830 }
3831 
3832 /*
3833  * Initialize the AH input and output processing functions.
3834  */
3835 void
3836 ipsecah_init_funcs(ipsa_t *sa)
3837 {
3838         if (sa->ipsa_output_func == NULL)
3839                 sa->ipsa_output_func = ah_outbound;
3840         if (sa->ipsa_input_func == NULL)
3841                 sa->ipsa_input_func = ah_inbound;
3842 }