1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright (c) 2017 Joyent, Inc.
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/stream.h>
  30 #include <sys/stropts.h>
  31 #include <sys/strsubr.h>
  32 #include <sys/errno.h>
  33 #include <sys/ddi.h>
  34 #include <sys/debug.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/stream.h>
  37 #include <sys/strlog.h>
  38 #include <sys/kmem.h>
  39 #include <sys/sunddi.h>
  40 #include <sys/tihdr.h>
  41 #include <sys/atomic.h>
  42 #include <sys/socket.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/crypto/common.h>
  45 #include <sys/crypto/api.h>
  46 #include <sys/zone.h>
  47 #include <netinet/in.h>
  48 #include <net/if.h>
  49 #include <net/pfkeyv2.h>
  50 #include <net/pfpolicy.h>
  51 #include <inet/common.h>
  52 #include <netinet/ip6.h>
  53 #include <inet/ip.h>
  54 #include <inet/ip_ire.h>
  55 #include <inet/ip6.h>
  56 #include <inet/ipsec_info.h>
  57 #include <inet/tcp.h>
  58 #include <inet/sadb.h>
  59 #include <inet/ipsec_impl.h>
  60 #include <inet/ipsecah.h>
  61 #include <inet/ipsecesp.h>
  62 #include <sys/random.h>
  63 #include <sys/dlpi.h>
  64 #include <sys/strsun.h>
  65 #include <sys/strsubr.h>
  66 #include <inet/ip_if.h>
  67 #include <inet/ipdrop.h>
  68 #include <inet/ipclassifier.h>
  69 #include <inet/sctp_ip.h>
  70 #include <sys/tsol/tnet.h>
  71 
  72 /*
  73  * This source file contains Security Association Database (SADB) common
  74  * routines.  They are linked in with the AH module.  Since AH has no chance
  75  * of falling under export control, it was safe to link it in there.
  76  */
  77 
  78 static uint8_t *sadb_action_to_ecomb(uint8_t *, uint8_t *, ipsec_action_t *,
  79     netstack_t *);
  80 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *);
  81 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
  82                             netstack_t *);
  83 static void sadb_destroy(sadb_t *, netstack_t *);
  84 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
  85 static ts_label_t *sadb_label_from_sens(sadb_sens_t *, uint64_t *);
  86 
  87 static time_t sadb_add_time(time_t, uint64_t);
  88 static void lifetime_fuzz(ipsa_t *);
  89 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
  90 static int get_ipsa_pair(ipsa_query_t *, ipsap_t *, int *);
  91 static void init_ipsa_pair(ipsap_t *);
  92 static void destroy_ipsa_pair(ipsap_t *);
  93 static int update_pairing(ipsap_t *, ipsa_query_t *, keysock_in_t *, int *);
  94 static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
  95 
  96 /*
  97  * ipsacq_maxpackets is defined here to make it tunable
  98  * from /etc/system.
  99  */
 100 extern uint64_t ipsacq_maxpackets;
 101 
 102 #define SET_EXPIRE(sa, delta, exp) {                            \
 103         if (((sa)->ipsa_ ## delta) != 0) {                           \
 104                 (sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,    \
 105                         (sa)->ipsa_ ## delta);                               \
 106         }                                                               \
 107 }
 108 
 109 #define UPDATE_EXPIRE(sa, delta, exp) {                                 \
 110         if (((sa)->ipsa_ ## delta) != 0) {                           \
 111                 time_t tmp = sadb_add_time((sa)->ipsa_usetime,               \
 112                         (sa)->ipsa_ ## delta);                               \
 113                 if (((sa)->ipsa_ ## exp) == 0)                               \
 114                         (sa)->ipsa_ ## exp = tmp;                    \
 115                 else                                                    \
 116                         (sa)->ipsa_ ## exp =                                 \
 117                             MIN((sa)->ipsa_ ## exp, tmp);            \
 118         }                                                               \
 119 }
 120 
 121 
 122 /* wrap the macro so we can pass it as a function pointer */
 123 void
 124 sadb_sa_refrele(void *target)
 125 {
 126         IPSA_REFRELE(((ipsa_t *)target));
 127 }
 128 
 129 /*
 130  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
 131  * a signed type.
 132  */
 133 #define TIME_MAX LONG_MAX
 134 
 135 /*
 136  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
 137  * time_t is defined to be a signed type with the same range as
 138  * "long".  On ILP32 systems, we thus run the risk of wrapping around
 139  * at end of time, as well as "overwrapping" the clock back around
 140  * into a seemingly valid but incorrect future date earlier than the
 141  * desired expiration.
 142  *
 143  * In order to avoid odd behavior (either negative lifetimes or loss
 144  * of high order bits) when someone asks for bizarrely long SA
 145  * lifetimes, we do a saturating add for expire times.
 146  *
 147  * We presume that ILP32 systems will be past end of support life when
 148  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
 149  *
 150  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
 151  * will hopefully have figured out clever ways to avoid the use of
 152  * fixed-sized integers in computation.
 153  */
 154 static time_t
 155 sadb_add_time(time_t base, uint64_t delta)
 156 {
 157         time_t sum;
 158 
 159         /*
 160          * Clip delta to the maximum possible time_t value to
 161          * prevent "overwrapping" back into a shorter-than-desired
 162          * future time.
 163          */
 164         if (delta > TIME_MAX)
 165                 delta = TIME_MAX;
 166         /*
 167          * This sum may still overflow.
 168          */
 169         sum = base + delta;
 170 
 171         /*
 172          * .. so if the result is less than the base, we overflowed.
 173          */
 174         if (sum < base)
 175                 sum = TIME_MAX;
 176 
 177         return (sum);
 178 }
 179 
 180 /*
 181  * Callers of this function have already created a working security
 182  * association, and have found the appropriate table & hash chain.  All this
 183  * function does is check duplicates, and insert the SA.  The caller needs to
 184  * hold the hash bucket lock and increment the refcnt before insertion.
 185  *
 186  * Return 0 if success, EEXIST if collision.
 187  */
 188 #define SA_UNIQUE_MATCH(sa1, sa2) \
 189         (((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
 190         ((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
 191 
 192 int
 193 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
 194 {
 195         ipsa_t **ptpn = NULL;
 196         ipsa_t *walker;
 197         boolean_t unspecsrc;
 198 
 199         ASSERT(MUTEX_HELD(&bucket->isaf_lock));
 200 
 201         unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
 202 
 203         walker = bucket->isaf_ipsa;
 204         ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
 205 
 206         /*
 207          * Find insertion point (pointed to with **ptpn).  Insert at the head
 208          * of the list unless there's an unspecified source address, then
 209          * insert it after the last SA with a specified source address.
 210          *
 211          * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
 212          * checking for collisions.
 213          */
 214 
 215         while (walker != NULL) {
 216                 if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
 217                     ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
 218                         if (walker->ipsa_spi == ipsa->ipsa_spi)
 219                                 return (EEXIST);
 220 
 221                         mutex_enter(&walker->ipsa_lock);
 222                         if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
 223                             (walker->ipsa_flags & IPSA_F_USED) &&
 224                             SA_UNIQUE_MATCH(walker, ipsa)) {
 225                                 walker->ipsa_flags |= IPSA_F_CINVALID;
 226                         }
 227                         mutex_exit(&walker->ipsa_lock);
 228                 }
 229 
 230                 if (ptpn == NULL && unspecsrc) {
 231                         if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
 232                             walker->ipsa_addrfam))
 233                                 ptpn = walker->ipsa_ptpn;
 234                         else if (walker->ipsa_next == NULL)
 235                                 ptpn = &walker->ipsa_next;
 236                 }
 237 
 238                 walker = walker->ipsa_next;
 239         }
 240 
 241         if (ptpn == NULL)
 242                 ptpn = &bucket->isaf_ipsa;
 243         ipsa->ipsa_next = *ptpn;
 244         ipsa->ipsa_ptpn = ptpn;
 245         if (ipsa->ipsa_next != NULL)
 246                 ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
 247         *ptpn = ipsa;
 248         ipsa->ipsa_linklock = &bucket->isaf_lock;
 249 
 250         return (0);
 251 }
 252 #undef SA_UNIQUE_MATCH
 253 
 254 /*
 255  * Free a security association.  Its reference count is 0, which means
 256  * I must free it.  The SA must be unlocked and must not be linked into
 257  * any fanout list.
 258  */
 259 static void
 260 sadb_freeassoc(ipsa_t *ipsa)
 261 {
 262         ipsec_stack_t   *ipss = ipsa->ipsa_netstack->netstack_ipsec;
 263         mblk_t          *asyncmp, *mp;
 264 
 265         ASSERT(ipss != NULL);
 266         ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
 267         ASSERT(ipsa->ipsa_refcnt == 0);
 268         ASSERT(ipsa->ipsa_next == NULL);
 269         ASSERT(ipsa->ipsa_ptpn == NULL);
 270 
 271 
 272         asyncmp = sadb_clear_lpkt(ipsa);
 273         if (asyncmp != NULL) {
 274                 mp = ip_recv_attr_free_mblk(asyncmp);
 275                 ip_drop_packet(mp, B_TRUE, NULL,
 276                     DROPPER(ipss, ipds_sadb_inlarval_timeout),
 277                     &ipss->ipsec_sadb_dropper);
 278         }
 279         mutex_enter(&ipsa->ipsa_lock);
 280 
 281         if (ipsa->ipsa_tsl != NULL) {
 282                 label_rele(ipsa->ipsa_tsl);
 283                 ipsa->ipsa_tsl = NULL;
 284         }
 285 
 286         if (ipsa->ipsa_otsl != NULL) {
 287                 label_rele(ipsa->ipsa_otsl);
 288                 ipsa->ipsa_otsl = NULL;
 289         }
 290 
 291         ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
 292         ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
 293         mutex_exit(&ipsa->ipsa_lock);
 294 
 295         /* bzero() these fields for paranoia's sake. */
 296         if (ipsa->ipsa_authkey != NULL) {
 297                 bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
 298                 kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
 299         }
 300         if (ipsa->ipsa_encrkey != NULL) {
 301                 bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
 302                 kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
 303         }
 304         if (ipsa->ipsa_nonce_buf != NULL) {
 305                 bzero(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
 306                 kmem_free(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
 307         }
 308         if (ipsa->ipsa_src_cid != NULL) {
 309                 IPSID_REFRELE(ipsa->ipsa_src_cid);
 310         }
 311         if (ipsa->ipsa_dst_cid != NULL) {
 312                 IPSID_REFRELE(ipsa->ipsa_dst_cid);
 313         }
 314         if (ipsa->ipsa_emech.cm_param != NULL)
 315                 kmem_free(ipsa->ipsa_emech.cm_param,
 316                     ipsa->ipsa_emech.cm_param_len);
 317 
 318         mutex_destroy(&ipsa->ipsa_lock);
 319         kmem_free(ipsa, sizeof (*ipsa));
 320 }
 321 
 322 /*
 323  * Unlink a security association from a hash bucket.  Assume the hash bucket
 324  * lock is held, but the association's lock is not.
 325  *
 326  * Note that we do not bump the bucket's generation number here because
 327  * we might not be making a visible change to the set of visible SA's.
 328  * All callers MUST bump the bucket's generation number before they unlock
 329  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
 330  * was present in the bucket at the time it was locked.
 331  */
 332 void
 333 sadb_unlinkassoc(ipsa_t *ipsa)
 334 {
 335         ASSERT(ipsa->ipsa_linklock != NULL);
 336         ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
 337 
 338         /* These fields are protected by the link lock. */
 339         *(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
 340         if (ipsa->ipsa_next != NULL) {
 341                 ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
 342                 ipsa->ipsa_next = NULL;
 343         }
 344 
 345         ipsa->ipsa_ptpn = NULL;
 346 
 347         /* This may destroy the SA. */
 348         IPSA_REFRELE(ipsa);
 349 }
 350 
 351 void
 352 sadb_delete_cluster(ipsa_t *assoc)
 353 {
 354         uint8_t protocol;
 355 
 356         if (cl_inet_deletespi &&
 357             ((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
 358             (assoc->ipsa_state == IPSA_STATE_MATURE))) {
 359                 protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ?
 360                     IPPROTO_AH : IPPROTO_ESP;
 361                 cl_inet_deletespi(assoc->ipsa_netstack->netstack_stackid,
 362                     protocol, assoc->ipsa_spi, NULL);
 363         }
 364 }
 365 
 366 /*
 367  * Create a larval security association with the specified SPI.  All other
 368  * fields are zeroed.
 369  */
 370 static ipsa_t *
 371 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
 372     netstack_t *ns)
 373 {
 374         ipsa_t *newbie;
 375 
 376         /*
 377          * Allocate...
 378          */
 379 
 380         newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
 381         if (newbie == NULL) {
 382                 /* Can't make new larval SA. */
 383                 return (NULL);
 384         }
 385 
 386         /* Assigned requested SPI, assume caller does SPI allocation magic. */
 387         newbie->ipsa_spi = spi;
 388         newbie->ipsa_netstack = ns;  /* No netstack_hold */
 389 
 390         /*
 391          * Copy addresses...
 392          */
 393 
 394         IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
 395         IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
 396 
 397         newbie->ipsa_addrfam = addrfam;
 398 
 399         /*
 400          * Set common initialization values, including refcnt.
 401          */
 402         mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
 403         newbie->ipsa_state = IPSA_STATE_LARVAL;
 404         newbie->ipsa_refcnt = 1;
 405         newbie->ipsa_freefunc = sadb_freeassoc;
 406 
 407         /*
 408          * There aren't a lot of other common initialization values, as
 409          * they are copied in from the PF_KEY message.
 410          */
 411 
 412         return (newbie);
 413 }
 414 
 415 /*
 416  * Call me to initialize a security association fanout.
 417  */
 418 static int
 419 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
 420 {
 421         isaf_t *table;
 422         int i;
 423 
 424         table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
 425         *tablep = table;
 426 
 427         if (table == NULL)
 428                 return (ENOMEM);
 429 
 430         for (i = 0; i < size; i++) {
 431                 mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
 432                 table[i].isaf_ipsa = NULL;
 433                 table[i].isaf_gen = 0;
 434         }
 435 
 436         return (0);
 437 }
 438 
 439 /*
 440  * Call me to initialize an acquire fanout
 441  */
 442 static int
 443 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
 444 {
 445         iacqf_t *table;
 446         int i;
 447 
 448         table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
 449         *tablep = table;
 450 
 451         if (table == NULL)
 452                 return (ENOMEM);
 453 
 454         for (i = 0; i < size; i++) {
 455                 mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
 456                 table[i].iacqf_ipsacq = NULL;
 457         }
 458 
 459         return (0);
 460 }
 461 
 462 /*
 463  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
 464  * caller must clean up partial allocations.
 465  */
 466 static int
 467 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
 468 {
 469         ASSERT(sp->sdb_of == NULL);
 470         ASSERT(sp->sdb_if == NULL);
 471         ASSERT(sp->sdb_acq == NULL);
 472 
 473         sp->sdb_hashsize = size;
 474         if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
 475                 return (ENOMEM);
 476         if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
 477                 return (ENOMEM);
 478         if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
 479                 return (ENOMEM);
 480 
 481         return (0);
 482 }
 483 
 484 /*
 485  * Call me to initialize an SADB instance; fall back to default size on failure.
 486  */
 487 static void
 488 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
 489     netstack_t *ns)
 490 {
 491         ASSERT(sp->sdb_of == NULL);
 492         ASSERT(sp->sdb_if == NULL);
 493         ASSERT(sp->sdb_acq == NULL);
 494 
 495         if (size < IPSEC_DEFAULT_HASH_SIZE)
 496                 size = IPSEC_DEFAULT_HASH_SIZE;
 497 
 498         if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
 499 
 500                 cmn_err(CE_WARN,
 501                     "Unable to allocate %u entry IPv%u %s SADB hash table",
 502                     size, ver, name);
 503 
 504                 sadb_destroy(sp, ns);
 505                 size = IPSEC_DEFAULT_HASH_SIZE;
 506                 cmn_err(CE_WARN, "Falling back to %d entries", size);
 507                 (void) sadb_init_trial(sp, size, KM_SLEEP);
 508         }
 509 }
 510 
 511 
 512 /*
 513  * Initialize an SADB-pair.
 514  */
 515 void
 516 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
 517 {
 518         sadb_init(name, &sp->s_v4, size, 4, ns);
 519         sadb_init(name, &sp->s_v6, size, 6, ns);
 520 
 521         sp->s_satype = type;
 522 
 523         ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
 524         if (type == SADB_SATYPE_AH) {
 525                 ipsec_stack_t   *ipss = ns->netstack_ipsec;
 526 
 527                 ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
 528                 sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
 529                 sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
 530         } else {
 531                 sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
 532                 sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
 533         }
 534 }
 535 
 536 /*
 537  * Deliver a single SADB_DUMP message representing a single SA.  This is
 538  * called many times by sadb_dump().
 539  *
 540  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
 541  * the caller should take that as a hint that dupb() on the "original answer"
 542  * failed, and that perhaps the caller should try again with a copyb()ed
 543  * "original answer".
 544  */
 545 static int
 546 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
 547     sadb_msg_t *samsg)
 548 {
 549         mblk_t *answer;
 550 
 551         answer = dupb(original_answer);
 552         if (answer == NULL)
 553                 return (ENOBUFS);
 554         answer->b_cont = sadb_sa2msg(ipsa, samsg);
 555         if (answer->b_cont == NULL) {
 556                 freeb(answer);
 557                 return (ENOMEM);
 558         }
 559 
 560         /* Just do a putnext, and let keysock deal with flow control. */
 561         putnext(pfkey_q, answer);
 562         return (0);
 563 }
 564 
 565 /*
 566  * Common function to allocate and prepare a keysock_out_t M_CTL message.
 567  */
 568 mblk_t *
 569 sadb_keysock_out(minor_t serial)
 570 {
 571         mblk_t *mp;
 572         keysock_out_t *kso;
 573 
 574         mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
 575         if (mp != NULL) {
 576                 mp->b_datap->db_type = M_CTL;
 577                 mp->b_wptr += sizeof (ipsec_info_t);
 578                 kso = (keysock_out_t *)mp->b_rptr;
 579                 kso->ks_out_type = KEYSOCK_OUT;
 580                 kso->ks_out_len = sizeof (*kso);
 581                 kso->ks_out_serial = serial;
 582         }
 583 
 584         return (mp);
 585 }
 586 
 587 /*
 588  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
 589  * to keysock.
 590  */
 591 static int
 592 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
 593     int num_entries, boolean_t do_peers, time_t active_time)
 594 {
 595         int i, error = 0;
 596         mblk_t *original_answer;
 597         ipsa_t *walker;
 598         sadb_msg_t *samsg;
 599         time_t  current;
 600 
 601         /*
 602          * For each IPSA hash bucket do:
 603          *      - Hold the mutex
 604          *      - Walk each entry, doing an sadb_dump_deliver() on it.
 605          */
 606         ASSERT(mp->b_cont != NULL);
 607         samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
 608 
 609         original_answer = sadb_keysock_out(serial);
 610         if (original_answer == NULL)
 611                 return (ENOMEM);
 612 
 613         current = gethrestime_sec();
 614         for (i = 0; i < num_entries; i++) {
 615                 mutex_enter(&fanout[i].isaf_lock);
 616                 for (walker = fanout[i].isaf_ipsa; walker != NULL;
 617                     walker = walker->ipsa_next) {
 618                         if (!do_peers && walker->ipsa_haspeer)
 619                                 continue;
 620                         if ((active_time != 0) &&
 621                             ((current - walker->ipsa_lastuse) > active_time))
 622                                 continue;
 623                         error = sadb_dump_deliver(pfkey_q, original_answer,
 624                             walker, samsg);
 625                         if (error == ENOBUFS) {
 626                                 mblk_t *new_original_answer;
 627 
 628                                 /* Ran out of dupb's.  Try a copyb. */
 629                                 new_original_answer = copyb(original_answer);
 630                                 if (new_original_answer == NULL) {
 631                                         error = ENOMEM;
 632                                 } else {
 633                                         freeb(original_answer);
 634                                         original_answer = new_original_answer;
 635                                         error = sadb_dump_deliver(pfkey_q,
 636                                             original_answer, walker, samsg);
 637                                 }
 638                         }
 639                         if (error != 0)
 640                                 break;  /* out of for loop. */
 641                 }
 642                 mutex_exit(&fanout[i].isaf_lock);
 643                 if (error != 0)
 644                         break;  /* out of for loop. */
 645         }
 646 
 647         freeb(original_answer);
 648         return (error);
 649 }
 650 
 651 /*
 652  * Dump an entire SADB; outbound first, then inbound.
 653  */
 654 
 655 int
 656 sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
 657 {
 658         int error;
 659         time_t  active_time = 0;
 660         sadb_x_edump_t  *edump =
 661             (sadb_x_edump_t *)ksi->ks_in_extv[SADB_X_EXT_EDUMP];
 662 
 663         if (edump != NULL) {
 664                 active_time = edump->sadb_x_edump_timeout;
 665         }
 666 
 667         /* Dump outbound */
 668         error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
 669             sp->sdb_hashsize, B_TRUE, active_time);
 670         if (error)
 671                 return (error);
 672 
 673         /* Dump inbound */
 674         return sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
 675             sp->sdb_hashsize, B_FALSE, active_time);
 676 }
 677 
 678 /*
 679  * Generic sadb table walker.
 680  *
 681  * Call "walkfn" for each SA in each bucket in "table"; pass the
 682  * bucket, the entry and "cookie" to the callback function.
 683  * Take care to ensure that walkfn can delete the SA without screwing
 684  * up our traverse.
 685  *
 686  * The bucket is locked for the duration of the callback, both so that the
 687  * callback can just call sadb_unlinkassoc() when it wants to delete something,
 688  * and so that no new entries are added while we're walking the list.
 689  */
 690 static void
 691 sadb_walker(isaf_t *table, uint_t numentries,
 692     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
 693     void *cookie)
 694 {
 695         int i;
 696         for (i = 0; i < numentries; i++) {
 697                 ipsa_t *entry, *next;
 698 
 699                 mutex_enter(&table[i].isaf_lock);
 700 
 701                 for (entry = table[i].isaf_ipsa; entry != NULL;
 702                     entry = next) {
 703                         next = entry->ipsa_next;
 704                         (*walkfn)(&table[i], entry, cookie);
 705                 }
 706                 mutex_exit(&table[i].isaf_lock);
 707         }
 708 }
 709 
 710 /*
 711  * Call me to free up a security association fanout.  Use the forever
 712  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
 713  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
 714  * when a module is unloaded).
 715  */
 716 static void
 717 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever,
 718     boolean_t inbound)
 719 {
 720         int i;
 721         isaf_t *table = *tablep;
 722         uint8_t protocol;
 723         ipsa_t *sa;
 724         netstackid_t sid;
 725 
 726         if (table == NULL)
 727                 return;
 728 
 729         for (i = 0; i < numentries; i++) {
 730                 mutex_enter(&table[i].isaf_lock);
 731                 while ((sa = table[i].isaf_ipsa) != NULL) {
 732                         if (inbound && cl_inet_deletespi &&
 733                             (sa->ipsa_state != IPSA_STATE_ACTIVE_ELSEWHERE) &&
 734                             (sa->ipsa_state != IPSA_STATE_IDLE)) {
 735                                 protocol = (sa->ipsa_type == SADB_SATYPE_AH) ?
 736                                     IPPROTO_AH : IPPROTO_ESP;
 737                                 sid = sa->ipsa_netstack->netstack_stackid;
 738                                 cl_inet_deletespi(sid, protocol, sa->ipsa_spi,
 739                                     NULL);
 740                         }
 741                         sadb_unlinkassoc(sa);
 742                 }
 743                 table[i].isaf_gen++;
 744                 mutex_exit(&table[i].isaf_lock);
 745                 if (forever)
 746                         mutex_destroy(&(table[i].isaf_lock));
 747         }
 748 
 749         if (forever) {
 750                 *tablep = NULL;
 751                 kmem_free(table, numentries * sizeof (*table));
 752         }
 753 }
 754 
 755 /*
 756  * Entry points to sadb_destroyer().
 757  */
 758 static void
 759 sadb_flush(sadb_t *sp, netstack_t *ns)
 760 {
 761         /*
 762          * Flush out each bucket, one at a time.  Were it not for keysock's
 763          * enforcement, there would be a subtlety where I could add on the
 764          * heels of a flush.  With keysock's enforcement, however, this
 765          * makes ESP's job easy.
 766          */
 767         sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE, B_FALSE);
 768         sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE, B_TRUE);
 769 
 770         /* For each acquire, destroy it; leave the bucket mutex alone. */
 771         sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
 772 }
 773 
 774 static void
 775 sadb_destroy(sadb_t *sp, netstack_t *ns)
 776 {
 777         sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE, B_FALSE);
 778         sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE, B_TRUE);
 779 
 780         /* For each acquire, destroy it, including the bucket mutex. */
 781         sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
 782 
 783         ASSERT(sp->sdb_of == NULL);
 784         ASSERT(sp->sdb_if == NULL);
 785         ASSERT(sp->sdb_acq == NULL);
 786 }
 787 
 788 void
 789 sadbp_flush(sadbp_t *spp, netstack_t *ns)
 790 {
 791         sadb_flush(&spp->s_v4, ns);
 792         sadb_flush(&spp->s_v6, ns);
 793 }
 794 
 795 void
 796 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
 797 {
 798         sadb_destroy(&spp->s_v4, ns);
 799         sadb_destroy(&spp->s_v6, ns);
 800 
 801         if (spp->s_satype == SADB_SATYPE_AH) {
 802                 ipsec_stack_t   *ipss = ns->netstack_ipsec;
 803 
 804                 ip_drop_unregister(&ipss->ipsec_sadb_dropper);
 805         }
 806 }
 807 
 808 
 809 /*
 810  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
 811  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
 812  * EINVAL.
 813  */
 814 int
 815 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
 816     sadb_lifetime_t *idle)
 817 {
 818         if (hard == NULL || soft == NULL)
 819                 return (0);
 820 
 821         if (hard->sadb_lifetime_allocations != 0 &&
 822             soft->sadb_lifetime_allocations != 0 &&
 823             hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
 824                 return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
 825 
 826         if (hard->sadb_lifetime_bytes != 0 &&
 827             soft->sadb_lifetime_bytes != 0 &&
 828             hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
 829                 return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
 830 
 831         if (hard->sadb_lifetime_addtime != 0 &&
 832             soft->sadb_lifetime_addtime != 0 &&
 833             hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
 834                 return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
 835 
 836         if (hard->sadb_lifetime_usetime != 0 &&
 837             soft->sadb_lifetime_usetime != 0 &&
 838             hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
 839                 return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
 840 
 841         if (idle != NULL) {
 842                 if (hard->sadb_lifetime_addtime != 0 &&
 843                     idle->sadb_lifetime_addtime != 0 &&
 844                     hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
 845                         return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
 846 
 847                 if (soft->sadb_lifetime_addtime != 0 &&
 848                     idle->sadb_lifetime_addtime != 0 &&
 849                     soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
 850                         return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
 851 
 852                 if (hard->sadb_lifetime_usetime != 0 &&
 853                     idle->sadb_lifetime_usetime != 0 &&
 854                     hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
 855                         return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
 856 
 857                 if (soft->sadb_lifetime_usetime != 0 &&
 858                     idle->sadb_lifetime_usetime != 0 &&
 859                     soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
 860                         return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
 861         }
 862 
 863         return (0);
 864 }
 865 
 866 /*
 867  * Sanity check sensitivity labels.
 868  *
 869  * For now, just reject labels on unlabeled systems.
 870  */
 871 int
 872 sadb_labelchk(keysock_in_t *ksi)
 873 {
 874         if (!is_system_labeled()) {
 875                 if (ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL)
 876                         return (SADB_X_DIAGNOSTIC_BAD_LABEL);
 877 
 878                 if (ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS] != NULL)
 879                         return (SADB_X_DIAGNOSTIC_BAD_LABEL);
 880         }
 881 
 882         return (0);
 883 }
 884 
 885 /*
 886  * Clone a security association for the purposes of inserting a single SA
 887  * into inbound and outbound tables respectively. This function should only
 888  * be called from sadb_common_add().
 889  */
 890 static ipsa_t *
 891 sadb_cloneassoc(ipsa_t *ipsa)
 892 {
 893         ipsa_t *newbie;
 894         boolean_t error = B_FALSE;
 895 
 896         ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
 897 
 898         newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
 899         if (newbie == NULL)
 900                 return (NULL);
 901 
 902         /* Copy over what we can. */
 903         *newbie = *ipsa;
 904 
 905         /* bzero and initialize locks, in case *_init() allocates... */
 906         mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
 907 
 908         if (newbie->ipsa_tsl != NULL)
 909                 label_hold(newbie->ipsa_tsl);
 910 
 911         if (newbie->ipsa_otsl != NULL)
 912                 label_hold(newbie->ipsa_otsl);
 913 
 914         /*
 915          * While somewhat dain-bramaged, the most graceful way to
 916          * recover from errors is to keep plowing through the
 917          * allocations, and getting what I can.  It's easier to call
 918          * sadb_freeassoc() on the stillborn clone when all the
 919          * pointers aren't pointing to the parent's data.
 920          */
 921 
 922         if (ipsa->ipsa_authkey != NULL) {
 923                 newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
 924                     KM_NOSLEEP);
 925                 if (newbie->ipsa_authkey == NULL) {
 926                         error = B_TRUE;
 927                 } else {
 928                         bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
 929                             newbie->ipsa_authkeylen);
 930 
 931                         newbie->ipsa_kcfauthkey.ck_data =
 932                             newbie->ipsa_authkey;
 933                 }
 934 
 935                 if (newbie->ipsa_amech.cm_param != NULL) {
 936                         newbie->ipsa_amech.cm_param =
 937                             (char *)&newbie->ipsa_mac_len;
 938                 }
 939         }
 940 
 941         if (ipsa->ipsa_encrkey != NULL) {
 942                 newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
 943                     KM_NOSLEEP);
 944                 if (newbie->ipsa_encrkey == NULL) {
 945                         error = B_TRUE;
 946                 } else {
 947                         bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
 948                             newbie->ipsa_encrkeylen);
 949 
 950                         newbie->ipsa_kcfencrkey.ck_data =
 951                             newbie->ipsa_encrkey;
 952                 }
 953         }
 954 
 955         newbie->ipsa_authtmpl = NULL;
 956         newbie->ipsa_encrtmpl = NULL;
 957         newbie->ipsa_haspeer = B_TRUE;
 958 
 959         if (ipsa->ipsa_src_cid != NULL) {
 960                 newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
 961                 IPSID_REFHOLD(ipsa->ipsa_src_cid);
 962         }
 963 
 964         if (ipsa->ipsa_dst_cid != NULL) {
 965                 newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
 966                 IPSID_REFHOLD(ipsa->ipsa_dst_cid);
 967         }
 968 
 969         if (error) {
 970                 sadb_freeassoc(newbie);
 971                 return (NULL);
 972         }
 973 
 974         return (newbie);
 975 }
 976 
 977 /*
 978  * Initialize a SADB address extension at the address specified by addrext.
 979  * Return a pointer to the end of the new address extension.
 980  */
 981 static uint8_t *
 982 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
 983     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
 984 {
 985         struct sockaddr_in *sin;
 986         struct sockaddr_in6 *sin6;
 987         uint8_t *cur = start;
 988         int addrext_len;
 989         int sin_len;
 990         sadb_address_t *addrext = (sadb_address_t *)cur;
 991 
 992         if (cur == NULL)
 993                 return (NULL);
 994 
 995         cur += sizeof (*addrext);
 996         if (cur > end)
 997                 return (NULL);
 998 
 999         addrext->sadb_address_proto = proto;
1000         addrext->sadb_address_prefixlen = prefix;
1001         addrext->sadb_address_reserved = 0;
1002         addrext->sadb_address_exttype = exttype;
1003 
1004         switch (af) {
1005         case AF_INET:
1006                 sin = (struct sockaddr_in *)cur;
1007                 sin_len = sizeof (*sin);
1008                 cur += sin_len;
1009                 if (cur > end)
1010                         return (NULL);
1011 
1012                 sin->sin_family = af;
1013                 bzero(sin->sin_zero, sizeof (sin->sin_zero));
1014                 sin->sin_port = port;
1015                 IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1016                 break;
1017         case AF_INET6:
1018                 sin6 = (struct sockaddr_in6 *)cur;
1019                 sin_len = sizeof (*sin6);
1020                 cur += sin_len;
1021                 if (cur > end)
1022                         return (NULL);
1023 
1024                 bzero(sin6, sizeof (*sin6));
1025                 sin6->sin6_family = af;
1026                 sin6->sin6_port = port;
1027                 IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1028                 break;
1029         }
1030 
1031         addrext_len = roundup(cur - start, sizeof (uint64_t));
1032         addrext->sadb_address_len = SADB_8TO64(addrext_len);
1033 
1034         cur = start + addrext_len;
1035         if (cur > end)
1036                 cur = NULL;
1037 
1038         return (cur);
1039 }
1040 
1041 /*
1042  * Construct a key management cookie extension.
1043  */
1044 
1045 static uint8_t *
1046 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1047 {
1048         sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1049 
1050         if (cur == NULL)
1051                 return (NULL);
1052 
1053         cur += sizeof (*kmcext);
1054 
1055         if (cur > end)
1056                 return (NULL);
1057 
1058         kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1059         kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1060         kmcext->sadb_x_kmc_proto = kmp;
1061         kmcext->sadb_x_kmc_cookie = kmc;
1062         kmcext->sadb_x_kmc_reserved = 0;
1063 
1064         return (cur);
1065 }
1066 
1067 /*
1068  * Given an original message header with sufficient space following it, and an
1069  * SA, construct a full PF_KEY message with all of the relevant extensions.
1070  * This is mostly used for SADB_GET, and SADB_DUMP.
1071  */
1072 static mblk_t *
1073 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1074 {
1075         int alloclen, addrsize, paddrsize, authsize, encrsize;
1076         int srcidsize, dstidsize, senslen, osenslen;
1077         sa_family_t fam, pfam;  /* Address family for SADB_EXT_ADDRESS */
1078                                 /* src/dst and proxy sockaddrs. */
1079         /*
1080          * The following are pointers into the PF_KEY message this PF_KEY
1081          * message creates.
1082          */
1083         sadb_msg_t *newsamsg;
1084         sadb_sa_t *assoc;
1085         sadb_lifetime_t *lt;
1086         sadb_key_t *key;
1087         sadb_ident_t *ident;
1088         sadb_sens_t *sens;
1089         sadb_ext_t *walker;     /* For when we need a generic ext. pointer. */
1090         sadb_x_replay_ctr_t *repl_ctr;
1091         sadb_x_pair_t *pair_ext;
1092 
1093         mblk_t *mp;
1094         uint8_t *cur, *end;
1095         /* These indicate the presence of the above extension fields. */
1096         boolean_t soft = B_FALSE, hard = B_FALSE;
1097         boolean_t isrc = B_FALSE, idst = B_FALSE;
1098         boolean_t auth = B_FALSE, encr = B_FALSE;
1099         boolean_t sensinteg = B_FALSE, osensinteg = B_FALSE;
1100         boolean_t srcid = B_FALSE, dstid = B_FALSE;
1101         boolean_t idle;
1102         boolean_t paired;
1103         uint32_t otherspi;
1104 
1105         /* First off, figure out the allocation length for this message. */
1106         /*
1107          * Constant stuff.  This includes base, SA, address (src, dst),
1108          * and lifetime (current).
1109          */
1110         alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1111             sizeof (sadb_lifetime_t);
1112 
1113         fam = ipsa->ipsa_addrfam;
1114         switch (fam) {
1115         case AF_INET:
1116                 addrsize = roundup(sizeof (struct sockaddr_in) +
1117                     sizeof (sadb_address_t), sizeof (uint64_t));
1118                 break;
1119         case AF_INET6:
1120                 addrsize = roundup(sizeof (struct sockaddr_in6) +
1121                     sizeof (sadb_address_t), sizeof (uint64_t));
1122                 break;
1123         default:
1124                 return (NULL);
1125         }
1126         /*
1127          * Allocate TWO address extensions, for source and destination.
1128          * (Thus, the * 2.)
1129          */
1130         alloclen += addrsize * 2;
1131         if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1132                 alloclen += addrsize;
1133         if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1134                 alloclen += addrsize;
1135 
1136         if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1137                 paired = B_TRUE;
1138                 alloclen += sizeof (sadb_x_pair_t);
1139                 otherspi = ipsa->ipsa_otherspi;
1140         } else {
1141                 paired = B_FALSE;
1142         }
1143 
1144         /* How 'bout other lifetimes? */
1145         if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1146             ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1147                 alloclen += sizeof (sadb_lifetime_t);
1148                 soft = B_TRUE;
1149         }
1150 
1151         if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1152             ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1153                 alloclen += sizeof (sadb_lifetime_t);
1154                 hard = B_TRUE;
1155         }
1156 
1157         if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1158                 alloclen += sizeof (sadb_lifetime_t);
1159                 idle = B_TRUE;
1160         } else {
1161                 idle = B_FALSE;
1162         }
1163 
1164         /* Inner addresses. */
1165         if (ipsa->ipsa_innerfam != 0) {
1166                 pfam = ipsa->ipsa_innerfam;
1167                 switch (pfam) {
1168                 case AF_INET6:
1169                         paddrsize = roundup(sizeof (struct sockaddr_in6) +
1170                             sizeof (sadb_address_t), sizeof (uint64_t));
1171                         break;
1172                 case AF_INET:
1173                         paddrsize = roundup(sizeof (struct sockaddr_in) +
1174                             sizeof (sadb_address_t), sizeof (uint64_t));
1175                         break;
1176                 default:
1177                         cmn_err(CE_PANIC,
1178                             "IPsec SADB: Proxy length failure.\n");
1179                         break;
1180                 }
1181                 isrc = B_TRUE;
1182                 idst = B_TRUE;
1183                 alloclen += 2 * paddrsize;
1184         }
1185 
1186         /* For the following fields, assume that length != 0 ==> stuff */
1187         if (ipsa->ipsa_authkeylen != 0) {
1188                 authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1189                     sizeof (uint64_t));
1190                 alloclen += authsize;
1191                 auth = B_TRUE;
1192         }
1193 
1194         if (ipsa->ipsa_encrkeylen != 0) {
1195                 encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen +
1196                     ipsa->ipsa_nonce_len, sizeof (uint64_t));
1197                 alloclen += encrsize;
1198                 encr = B_TRUE;
1199         } else {
1200                 encr = B_FALSE;
1201         }
1202 
1203         if (ipsa->ipsa_tsl != NULL) {
1204                 senslen = sadb_sens_len_from_label(ipsa->ipsa_tsl);
1205                 alloclen += senslen;
1206                 sensinteg = B_TRUE;
1207         }
1208 
1209         if (ipsa->ipsa_otsl != NULL) {
1210                 osenslen = sadb_sens_len_from_label(ipsa->ipsa_otsl);
1211                 alloclen += osenslen;
1212                 osensinteg = B_TRUE;
1213         }
1214 
1215         /*
1216          * Must use strlen() here for lengths.  Identities use NULL
1217          * pointers to indicate their nonexistence.
1218          */
1219         if (ipsa->ipsa_src_cid != NULL) {
1220                 srcidsize = roundup(sizeof (sadb_ident_t) +
1221                     strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1222                     sizeof (uint64_t));
1223                 alloclen += srcidsize;
1224                 srcid = B_TRUE;
1225         }
1226 
1227         if (ipsa->ipsa_dst_cid != NULL) {
1228                 dstidsize = roundup(sizeof (sadb_ident_t) +
1229                     strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1230                     sizeof (uint64_t));
1231                 alloclen += dstidsize;
1232                 dstid = B_TRUE;
1233         }
1234 
1235         if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1236                 alloclen += sizeof (sadb_x_kmc_t);
1237 
1238         if (ipsa->ipsa_replay != 0) {
1239                 alloclen += sizeof (sadb_x_replay_ctr_t);
1240         }
1241 
1242         /* Make sure the allocation length is a multiple of 8 bytes. */
1243         ASSERT((alloclen & 0x7) == 0);
1244 
1245         /* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1246         mp = allocb(alloclen, BPRI_HI);
1247         if (mp == NULL)
1248                 return (NULL);
1249         bzero(mp->b_rptr, alloclen);
1250 
1251         mp->b_wptr += alloclen;
1252         end = mp->b_wptr;
1253         newsamsg = (sadb_msg_t *)mp->b_rptr;
1254         *newsamsg = *samsg;
1255         newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1256 
1257         mutex_enter(&ipsa->ipsa_lock);   /* Since I'm grabbing SA fields... */
1258 
1259         newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1260 
1261         assoc = (sadb_sa_t *)(newsamsg + 1);
1262         assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1263         assoc->sadb_sa_exttype = SADB_EXT_SA;
1264         assoc->sadb_sa_spi = ipsa->ipsa_spi;
1265         assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1266         assoc->sadb_sa_state = ipsa->ipsa_state;
1267         assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1268         assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1269         assoc->sadb_sa_flags = ipsa->ipsa_flags;
1270 
1271         lt = (sadb_lifetime_t *)(assoc + 1);
1272         lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1273         lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1274         /* We do not support the concept. */
1275         lt->sadb_lifetime_allocations = 0;
1276         lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1277         lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1278         lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1279 
1280         if (hard) {
1281                 lt++;
1282                 lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1283                 lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1284                 lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1285                 lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1286                 lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1287                 lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1288         }
1289 
1290         if (soft) {
1291                 lt++;
1292                 lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1293                 lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1294                 lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1295                 lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1296                 lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1297                 lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1298         }
1299 
1300         if (idle) {
1301                 lt++;
1302                 lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1303                 lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1304                 lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1305                 lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1306         }
1307 
1308         cur = (uint8_t *)(lt + 1);
1309 
1310         /* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1311         cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1312             ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1313             SA_PROTO(ipsa), 0);
1314         if (cur == NULL) {
1315                 freemsg(mp);
1316                 mp = NULL;
1317                 goto bail;
1318         }
1319 
1320         cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1321             ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1322             SA_PROTO(ipsa), 0);
1323         if (cur == NULL) {
1324                 freemsg(mp);
1325                 mp = NULL;
1326                 goto bail;
1327         }
1328 
1329         if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1330                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1331                     fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1332                     IPPROTO_UDP, 0);
1333                 if (cur == NULL) {
1334                         freemsg(mp);
1335                         mp = NULL;
1336                         goto bail;
1337                 }
1338         }
1339 
1340         if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1341                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1342                     fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1343                     IPPROTO_UDP, 0);
1344                 if (cur == NULL) {
1345                         freemsg(mp);
1346                         mp = NULL;
1347                         goto bail;
1348                 }
1349         }
1350 
1351         /* If we are a tunnel-mode SA, fill in the inner-selectors. */
1352         if (isrc) {
1353                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1354                     pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1355                     SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1356                 if (cur == NULL) {
1357                         freemsg(mp);
1358                         mp = NULL;
1359                         goto bail;
1360                 }
1361         }
1362 
1363         if (idst) {
1364                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1365                     pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1366                     SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1367                 if (cur == NULL) {
1368                         freemsg(mp);
1369                         mp = NULL;
1370                         goto bail;
1371                 }
1372         }
1373 
1374         if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1375                 cur = sadb_make_kmc_ext(cur, end,
1376                     ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1377                 if (cur == NULL) {
1378                         freemsg(mp);
1379                         mp = NULL;
1380                         goto bail;
1381                 }
1382         }
1383 
1384         walker = (sadb_ext_t *)cur;
1385         if (auth) {
1386                 key = (sadb_key_t *)walker;
1387                 key->sadb_key_len = SADB_8TO64(authsize);
1388                 key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1389                 key->sadb_key_bits = ipsa->ipsa_authkeybits;
1390                 key->sadb_key_reserved = 0;
1391                 bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1392                 walker = (sadb_ext_t *)((uint64_t *)walker +
1393                     walker->sadb_ext_len);
1394         }
1395 
1396         if (encr) {
1397                 uint8_t *buf_ptr;
1398                 key = (sadb_key_t *)walker;
1399                 key->sadb_key_len = SADB_8TO64(encrsize);
1400                 key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1401                 key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1402                 key->sadb_key_reserved = ipsa->ipsa_saltbits;
1403                 buf_ptr = (uint8_t *)(key + 1);
1404                 bcopy(ipsa->ipsa_encrkey, buf_ptr, ipsa->ipsa_encrkeylen);
1405                 if (ipsa->ipsa_salt != NULL) {
1406                         buf_ptr += ipsa->ipsa_encrkeylen;
1407                         bcopy(ipsa->ipsa_salt, buf_ptr, ipsa->ipsa_saltlen);
1408                 }
1409                 walker = (sadb_ext_t *)((uint64_t *)walker +
1410                     walker->sadb_ext_len);
1411         }
1412 
1413         if (srcid) {
1414                 ident = (sadb_ident_t *)walker;
1415                 ident->sadb_ident_len = SADB_8TO64(srcidsize);
1416                 ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1417                 ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1418                 ident->sadb_ident_id = 0;
1419                 ident->sadb_ident_reserved = 0;
1420                 (void) strcpy((char *)(ident + 1),
1421                     ipsa->ipsa_src_cid->ipsid_cid);
1422                 walker = (sadb_ext_t *)((uint64_t *)walker +
1423                     walker->sadb_ext_len);
1424         }
1425 
1426         if (dstid) {
1427                 ident = (sadb_ident_t *)walker;
1428                 ident->sadb_ident_len = SADB_8TO64(dstidsize);
1429                 ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1430                 ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1431                 ident->sadb_ident_id = 0;
1432                 ident->sadb_ident_reserved = 0;
1433                 (void) strcpy((char *)(ident + 1),
1434                     ipsa->ipsa_dst_cid->ipsid_cid);
1435                 walker = (sadb_ext_t *)((uint64_t *)walker +
1436                     walker->sadb_ext_len);
1437         }
1438 
1439         if (sensinteg) {
1440                 sens = (sadb_sens_t *)walker;
1441                 sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
1442                     ipsa->ipsa_tsl, senslen);
1443 
1444                 walker = (sadb_ext_t *)((uint64_t *)walker +
1445                     walker->sadb_ext_len);
1446         }
1447 
1448         if (osensinteg) {
1449                 sens = (sadb_sens_t *)walker;
1450 
1451                 sadb_sens_from_label(sens, SADB_X_EXT_OUTER_SENS,
1452                     ipsa->ipsa_otsl, osenslen);
1453                 if (ipsa->ipsa_mac_exempt)
1454                         sens->sadb_x_sens_flags = SADB_X_SENS_IMPLICIT;
1455 
1456                 walker = (sadb_ext_t *)((uint64_t *)walker +
1457                     walker->sadb_ext_len);
1458         }
1459 
1460         if (paired) {
1461                 pair_ext = (sadb_x_pair_t *)walker;
1462 
1463                 pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1464                 pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1465                 pair_ext->sadb_x_pair_spi = otherspi;
1466 
1467                 walker = (sadb_ext_t *)((uint64_t *)walker +
1468                     walker->sadb_ext_len);
1469         }
1470 
1471         if (ipsa->ipsa_replay != 0) {
1472                 repl_ctr = (sadb_x_replay_ctr_t *)walker;
1473                 repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1474                 repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1475                 repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1476                 repl_ctr->sadb_x_rc_replay64 = 0;
1477                 walker = (sadb_ext_t *)(repl_ctr + 1);
1478         }
1479 
1480 bail:
1481         /* Pardon any delays... */
1482         mutex_exit(&ipsa->ipsa_lock);
1483 
1484         return (mp);
1485 }
1486 
1487 /*
1488  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1489  * and adjust base message accordingly.
1490  *
1491  * Assume message is pulled up in one piece of contiguous memory.
1492  *
1493  * Say if we start off with:
1494  *
1495  * +------+----+-------------+-----------+---------------+---------------+
1496  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1497  * +------+----+-------------+-----------+---------------+---------------+
1498  *
1499  * we will end up with
1500  *
1501  * +------+----+-------------+-----------+---------------+
1502  * | base | SA | source addr | dest addr | soft lifetime |
1503  * +------+----+-------------+-----------+---------------+
1504  */
1505 static void
1506 sadb_strip(sadb_msg_t *samsg)
1507 {
1508         sadb_ext_t *ext;
1509         uint8_t *target = NULL;
1510         uint8_t *msgend;
1511         int sofar = SADB_8TO64(sizeof (*samsg));
1512         int copylen;
1513 
1514         ext = (sadb_ext_t *)(samsg + 1);
1515         msgend = (uint8_t *)samsg;
1516         msgend += SADB_64TO8(samsg->sadb_msg_len);
1517         while ((uint8_t *)ext < msgend) {
1518                 if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1519                     ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1520                     ext->sadb_ext_type == SADB_X_EXT_EDUMP ||
1521                     ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1522                         /*
1523                          * Aha!  I found a header to be erased.
1524                          */
1525 
1526                         if (target != NULL) {
1527                                 /*
1528                                  * If I had a previous header to be erased,
1529                                  * copy over it.  I can get away with just
1530                                  * copying backwards because the target will
1531                                  * always be 8 bytes behind the source.
1532                                  */
1533                                 copylen = ((uint8_t *)ext) - (target +
1534                                     SADB_64TO8(
1535                                     ((sadb_ext_t *)target)->sadb_ext_len));
1536                                 ovbcopy(((uint8_t *)ext - copylen), target,
1537                                     copylen);
1538                                 target += copylen;
1539                                 ((sadb_ext_t *)target)->sadb_ext_len =
1540                                     SADB_8TO64(((uint8_t *)ext) - target +
1541                                     SADB_64TO8(ext->sadb_ext_len));
1542                         } else {
1543                                 target = (uint8_t *)ext;
1544                         }
1545                 } else {
1546                         sofar += ext->sadb_ext_len;
1547                 }
1548 
1549                 ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1550         }
1551 
1552         ASSERT((uint8_t *)ext == msgend);
1553 
1554         if (target != NULL) {
1555                 copylen = ((uint8_t *)ext) - (target +
1556                     SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1557                 if (copylen != 0)
1558                         ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1559         }
1560 
1561         /* Adjust samsg. */
1562         samsg->sadb_msg_len = (uint16_t)sofar;
1563 
1564         /* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1565 }
1566 
1567 /*
1568  * AH needs to send an error to PF_KEY.  Assume mp points to an M_CTL
1569  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1570  * the sending keysock instance is included.
1571  */
1572 void
1573 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1574     uint_t serial)
1575 {
1576         mblk_t *msg = mp->b_cont;
1577         sadb_msg_t *samsg;
1578         keysock_out_t *kso;
1579 
1580         /*
1581          * Enough functions call this to merit a NULL queue check.
1582          */
1583         if (pfkey_q == NULL) {
1584                 freemsg(mp);
1585                 return;
1586         }
1587 
1588         ASSERT(msg != NULL);
1589         ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1590         ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1591         samsg = (sadb_msg_t *)msg->b_rptr;
1592         kso = (keysock_out_t *)mp->b_rptr;
1593 
1594         kso->ks_out_type = KEYSOCK_OUT;
1595         kso->ks_out_len = sizeof (*kso);
1596         kso->ks_out_serial = serial;
1597 
1598         /*
1599          * Only send the base message up in the event of an error.
1600          * Don't worry about bzero()-ing, because it was probably bogus
1601          * anyway.
1602          */
1603         msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1604         samsg = (sadb_msg_t *)msg->b_rptr;
1605         samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1606         samsg->sadb_msg_errno = (uint8_t)error;
1607         if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1608                 samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1609 
1610         putnext(pfkey_q, mp);
1611 }
1612 
1613 /*
1614  * Send a successful return packet back to keysock via the queue in pfkey_q.
1615  *
1616  * Often, an SA is associated with the reply message, it's passed in if needed,
1617  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1618  * and the caller will release said refcnt.
1619  */
1620 void
1621 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1622     keysock_in_t *ksi, ipsa_t *ipsa)
1623 {
1624         keysock_out_t *kso;
1625         mblk_t *mp1;
1626         sadb_msg_t *newsamsg;
1627         uint8_t *oldend;
1628 
1629         ASSERT((mp->b_cont != NULL) &&
1630             ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1631             ((void *)mp->b_rptr == (void *)ksi));
1632 
1633         switch (samsg->sadb_msg_type) {
1634         case SADB_ADD:
1635         case SADB_UPDATE:
1636         case SADB_X_UPDATEPAIR:
1637         case SADB_X_DELPAIR_STATE:
1638         case SADB_FLUSH:
1639         case SADB_DUMP:
1640                 /*
1641                  * I have all of the message already.  I just need to strip
1642                  * out the keying material and echo the message back.
1643                  *
1644                  * NOTE: for SADB_DUMP, the function sadb_dump() did the
1645                  * work.  When DUMP reaches here, it should only be a base
1646                  * message.
1647                  */
1648         justecho:
1649                 if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1650                     ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL ||
1651                     ksi->ks_in_extv[SADB_X_EXT_EDUMP] != NULL) {
1652                         sadb_strip(samsg);
1653                         /* Assume PF_KEY message is contiguous. */
1654                         ASSERT(mp->b_cont->b_cont == NULL);
1655                         oldend = mp->b_cont->b_wptr;
1656                         mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1657                             SADB_64TO8(samsg->sadb_msg_len);
1658                         bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1659                 }
1660                 break;
1661         case SADB_GET:
1662                 /*
1663                  * Do a lot of work here, because of the ipsa I just found.
1664                  * First construct the new PF_KEY message, then abandon
1665                  * the old one.
1666                  */
1667                 mp1 = sadb_sa2msg(ipsa, samsg);
1668                 if (mp1 == NULL) {
1669                         sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1670                             SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1671                         return;
1672                 }
1673                 freemsg(mp->b_cont);
1674                 mp->b_cont = mp1;
1675                 break;
1676         case SADB_DELETE:
1677         case SADB_X_DELPAIR:
1678                 if (ipsa == NULL)
1679                         goto justecho;
1680                 /*
1681                  * Because listening KMds may require more info, treat
1682                  * DELETE like a special case of GET.
1683                  */
1684                 mp1 = sadb_sa2msg(ipsa, samsg);
1685                 if (mp1 == NULL) {
1686                         sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1687                             SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1688                         return;
1689                 }
1690                 newsamsg = (sadb_msg_t *)mp1->b_rptr;
1691                 sadb_strip(newsamsg);
1692                 oldend = mp1->b_wptr;
1693                 mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1694                 bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1695                 freemsg(mp->b_cont);
1696                 mp->b_cont = mp1;
1697                 break;
1698         default:
1699                 if (mp != NULL)
1700                         freemsg(mp);
1701                 return;
1702         }
1703 
1704         /* ksi is now null and void. */
1705         kso = (keysock_out_t *)ksi;
1706         kso->ks_out_type = KEYSOCK_OUT;
1707         kso->ks_out_len = sizeof (*kso);
1708         kso->ks_out_serial = ksi->ks_in_serial;
1709         /* We're ready to send... */
1710         putnext(pfkey_q, mp);
1711 }
1712 
1713 /*
1714  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1715  */
1716 void
1717 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1718     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
1719 {
1720         keysock_hello_ack_t *kha;
1721         queue_t *oldq;
1722 
1723         ASSERT(OTHERQ(q) != NULL);
1724 
1725         /*
1726          * First, check atomically that I'm the first and only keysock
1727          * instance.
1728          *
1729          * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1730          * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1731          * messages.
1732          */
1733 
1734         oldq = atomic_cas_ptr((void **)pfkey_qp, NULL, OTHERQ(q));
1735         if (oldq != NULL) {
1736                 ASSERT(oldq != q);
1737                 cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1738                     (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1739                 freemsg(mp);
1740                 return;
1741         }
1742 
1743         kha = (keysock_hello_ack_t *)mp->b_rptr;
1744         kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1745         kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1746         kha->ks_hello_satype = (uint8_t)satype;
1747 
1748         /*
1749          * If we made it past the atomic_cas_ptr, then we have "exclusive"
1750          * access to the timeout handle.  Fire it off after the default ager
1751          * interval.
1752          */
1753         *top = qtimeout(*pfkey_qp, ager, agerarg,
1754             drv_usectohz(SADB_AGE_INTERVAL_DEFAULT * 1000));
1755 
1756         putnext(*pfkey_qp, mp);
1757 }
1758 
1759 /*
1760  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1761  *
1762  * Check addresses themselves for wildcard or multicast.
1763  * Check ire table for local/non-local/broadcast.
1764  */
1765 int
1766 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
1767     netstack_t *ns)
1768 {
1769         sadb_address_t *addr = (sadb_address_t *)ext;
1770         struct sockaddr_in *sin;
1771         struct sockaddr_in6 *sin6;
1772         int diagnostic, type;
1773         boolean_t normalized = B_FALSE;
1774 
1775         ASSERT(ext != NULL);
1776         ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
1777             (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
1778             (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
1779             (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
1780             (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
1781             (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
1782 
1783         /* Assign both sockaddrs, the compiler will do the right thing. */
1784         sin = (struct sockaddr_in *)(addr + 1);
1785         sin6 = (struct sockaddr_in6 *)(addr + 1);
1786 
1787         if (sin6->sin6_family == AF_INET6) {
1788                 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1789                         /*
1790                          * Convert to an AF_INET sockaddr.  This means the
1791                          * return messages will have the extra space, but have
1792                          * AF_INET sockaddrs instead of AF_INET6.
1793                          *
1794                          * Yes, RFC 2367 isn't clear on what to do here w.r.t.
1795                          * mapped addresses, but since AF_INET6 ::ffff:<v4> is
1796                          * equal to AF_INET <v4>, it shouldnt be a huge
1797                          * problem.
1798                          */
1799                         sin->sin_family = AF_INET;
1800                         IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
1801                             &sin->sin_addr);
1802                         bzero(&sin->sin_zero, sizeof (sin->sin_zero));
1803                         normalized = B_TRUE;
1804                 }
1805         } else if (sin->sin_family != AF_INET) {
1806                 switch (ext->sadb_ext_type) {
1807                 case SADB_EXT_ADDRESS_SRC:
1808                         diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
1809                         break;
1810                 case SADB_EXT_ADDRESS_DST:
1811                         diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
1812                         break;
1813                 case SADB_X_EXT_ADDRESS_INNER_SRC:
1814                         diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
1815                         break;
1816                 case SADB_X_EXT_ADDRESS_INNER_DST:
1817                         diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
1818                         break;
1819                 case SADB_X_EXT_ADDRESS_NATT_LOC:
1820                         diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
1821                         break;
1822                 case SADB_X_EXT_ADDRESS_NATT_REM:
1823                         diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
1824                         break;
1825                         /* There is no default, see above ASSERT. */
1826                 }
1827 bail:
1828                 if (pfkey_q != NULL) {
1829                         sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
1830                             serial);
1831                 } else {
1832                         /*
1833                          * Scribble in sadb_msg that we got passed in.
1834                          * Overload "mp" to be an sadb_msg pointer.
1835                          */
1836                         sadb_msg_t *samsg = (sadb_msg_t *)mp;
1837 
1838                         samsg->sadb_msg_errno = EINVAL;
1839                         samsg->sadb_x_msg_diagnostic = diagnostic;
1840                 }
1841                 return (KS_IN_ADDR_UNKNOWN);
1842         }
1843 
1844         if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
1845             ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
1846                 /*
1847                  * We need only check for prefix issues.
1848                  */
1849 
1850                 /* Set diagnostic now, in case we need it later. */
1851                 diagnostic =
1852                     (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
1853                     SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
1854                     SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
1855 
1856                 if (normalized)
1857                         addr->sadb_address_prefixlen -= 96;
1858 
1859                 /*
1860                  * Verify and mask out inner-addresses based on prefix length.
1861                  */
1862                 if (sin->sin_family == AF_INET) {
1863                         if (addr->sadb_address_prefixlen > 32)
1864                                 goto bail;
1865                         sin->sin_addr.s_addr &=
1866                             ip_plen_to_mask(addr->sadb_address_prefixlen);
1867                 } else {
1868                         in6_addr_t mask;
1869 
1870                         ASSERT(sin->sin_family == AF_INET6);
1871                         /*
1872                          * ip_plen_to_mask_v6() returns NULL if the value in
1873                          * question is out of range.
1874                          */
1875                         if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
1876                             &mask) == NULL)
1877                                 goto bail;
1878                         sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
1879                         sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
1880                         sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
1881                         sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
1882                 }
1883 
1884                 /* We don't care in these cases. */
1885                 return (KS_IN_ADDR_DONTCARE);
1886         }
1887 
1888         if (sin->sin_family == AF_INET6) {
1889                 /* Check the easy ones now. */
1890                 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
1891                         return (KS_IN_ADDR_MBCAST);
1892                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
1893                         return (KS_IN_ADDR_UNSPEC);
1894                 /*
1895                  * At this point, we're a unicast IPv6 address.
1896                  *
1897                  * XXX Zones alert -> me/notme decision needs to be tempered
1898                  * by what zone we're in when we go to zone-aware IPsec.
1899                  */
1900                 if (ip_type_v6(&sin6->sin6_addr, ns->netstack_ip) ==
1901                     IRE_LOCAL) {
1902                         /* Hey hey, it's local. */
1903                         return (KS_IN_ADDR_ME);
1904                 }
1905         } else {
1906                 ASSERT(sin->sin_family == AF_INET);
1907                 if (sin->sin_addr.s_addr == INADDR_ANY)
1908                         return (KS_IN_ADDR_UNSPEC);
1909                 if (CLASSD(sin->sin_addr.s_addr))
1910                         return (KS_IN_ADDR_MBCAST);
1911                 /*
1912                  * At this point we're a unicast or broadcast IPv4 address.
1913                  *
1914                  * Check if the address is IRE_BROADCAST or IRE_LOCAL.
1915                  *
1916                  * XXX Zones alert -> me/notme decision needs to be tempered
1917                  * by what zone we're in when we go to zone-aware IPsec.
1918                  */
1919                 type = ip_type_v4(sin->sin_addr.s_addr, ns->netstack_ip);
1920                 switch (type) {
1921                 case IRE_LOCAL:
1922                         return (KS_IN_ADDR_ME);
1923                 case IRE_BROADCAST:
1924                         return (KS_IN_ADDR_MBCAST);
1925                 }
1926         }
1927 
1928         return (KS_IN_ADDR_NOTME);
1929 }
1930 
1931 /*
1932  * Address normalizations and reality checks for inbound PF_KEY messages.
1933  *
1934  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
1935  * the source to AF_INET.  Do the same for the inner sources.
1936  */
1937 boolean_t
1938 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
1939 {
1940         struct sockaddr_in *src, *isrc;
1941         struct sockaddr_in6 *dst, *idst;
1942         sadb_address_t *srcext, *dstext;
1943         uint16_t sport;
1944         sadb_ext_t **extv = ksi->ks_in_extv;
1945         int rc;
1946 
1947         if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
1948                 rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
1949                     ksi->ks_in_serial, ns);
1950                 if (rc == KS_IN_ADDR_UNKNOWN)
1951                         return (B_FALSE);
1952                 if (rc == KS_IN_ADDR_MBCAST) {
1953                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
1954                             SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
1955                         return (B_FALSE);
1956                 }
1957                 ksi->ks_in_srctype = rc;
1958         }
1959 
1960         if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
1961                 rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
1962                     ksi->ks_in_serial, ns);
1963                 if (rc == KS_IN_ADDR_UNKNOWN)
1964                         return (B_FALSE);
1965                 if (rc == KS_IN_ADDR_UNSPEC) {
1966                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
1967                             SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
1968                         return (B_FALSE);
1969                 }
1970                 ksi->ks_in_dsttype = rc;
1971         }
1972 
1973         /*
1974          * NAT-Traversal addrs are simple enough to not require all of
1975          * the checks in sadb_addrcheck().  Just normalize or reject if not
1976          * AF_INET.
1977          */
1978         if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
1979                 rc = sadb_addrcheck(pfkey_q, mp,
1980                     extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
1981 
1982                 /*
1983                  * Local NAT-T addresses never use an IRE_LOCAL, so it should
1984                  * always be NOTME, or UNSPEC (to handle both tunnel mode
1985                  * AND local-port flexibility).
1986                  */
1987                 if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
1988                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
1989                             SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
1990                             ksi->ks_in_serial);
1991                         return (B_FALSE);
1992                 }
1993                 src = (struct sockaddr_in *)
1994                     (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
1995                 if (src->sin_family != AF_INET) {
1996                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
1997                             SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
1998                             ksi->ks_in_serial);
1999                         return (B_FALSE);
2000                 }
2001         }
2002 
2003         if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2004                 rc = sadb_addrcheck(pfkey_q, mp,
2005                     extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2006 
2007                 /*
2008                  * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2009                  * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2010                  */
2011                 if (rc != KS_IN_ADDR_NOTME &&
2012                     !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2013                     rc == KS_IN_ADDR_UNSPEC)) {
2014                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
2015                             SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2016                             ksi->ks_in_serial);
2017                         return (B_FALSE);
2018                 }
2019                 src = (struct sockaddr_in *)
2020                     (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2021                 if (src->sin_family != AF_INET) {
2022                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
2023                             SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2024                             ksi->ks_in_serial);
2025                         return (B_FALSE);
2026                 }
2027         }
2028 
2029         if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2030                 if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2031                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
2032                             SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2033                             ksi->ks_in_serial);
2034                         return (B_FALSE);
2035                 }
2036 
2037                 if (sadb_addrcheck(pfkey_q, mp,
2038                     extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2039                     == KS_IN_ADDR_UNKNOWN ||
2040                     sadb_addrcheck(pfkey_q, mp,
2041                     extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2042                     == KS_IN_ADDR_UNKNOWN)
2043                         return (B_FALSE);
2044 
2045                 isrc = (struct sockaddr_in *)
2046                     (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2047                     1);
2048                 idst = (struct sockaddr_in6 *)
2049                     (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2050                     1);
2051                 if (isrc->sin_family != idst->sin6_family) {
2052                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
2053                             SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2054                             ksi->ks_in_serial);
2055                         return (B_FALSE);
2056                 }
2057         } else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2058                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
2059                             SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2060                             ksi->ks_in_serial);
2061                         return (B_FALSE);
2062         } else {
2063                 isrc = NULL;    /* For inner/outer port check below. */
2064         }
2065 
2066         dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2067         srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2068 
2069         if (dstext == NULL || srcext == NULL)
2070                 return (B_TRUE);
2071 
2072         dst = (struct sockaddr_in6 *)(dstext + 1);
2073         src = (struct sockaddr_in *)(srcext + 1);
2074 
2075         if (isrc != NULL &&
2076             (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2077             (src->sin_port != 0 || dst->sin6_port != 0)) {
2078                 /* Can't set inner and outer ports in one SA. */
2079                 sadb_pfkey_error(pfkey_q, mp, EINVAL,
2080                     SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2081                     ksi->ks_in_serial);
2082                 return (B_FALSE);
2083         }
2084 
2085         if (dst->sin6_family == src->sin_family)
2086                 return (B_TRUE);
2087 
2088         if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2089                 if (srcext->sadb_address_proto == 0) {
2090                         srcext->sadb_address_proto = dstext->sadb_address_proto;
2091                 } else if (dstext->sadb_address_proto == 0) {
2092                         dstext->sadb_address_proto = srcext->sadb_address_proto;
2093                 } else {
2094                         /* Inequal protocols, neither were 0.  Report error. */
2095                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
2096                             SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2097                             ksi->ks_in_serial);
2098                         return (B_FALSE);
2099                 }
2100         }
2101 
2102         /*
2103          * With the exception of an unspec IPv6 source and an IPv4
2104          * destination, address families MUST me matched.
2105          */
2106         if (src->sin_family == AF_INET ||
2107             ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2108                 sadb_pfkey_error(pfkey_q, mp, EINVAL,
2109                     SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2110                 return (B_FALSE);
2111         }
2112 
2113         /*
2114          * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2115          * in the same place for sockaddr_in and sockaddr_in6.
2116          */
2117         sport = src->sin_port;
2118         bzero(src, sizeof (*src));
2119         src->sin_family = AF_INET;
2120         src->sin_port = sport;
2121 
2122         return (B_TRUE);
2123 }
2124 
2125 /*
2126  * Set the results in "addrtype", given an IRE as requested by
2127  * sadb_addrcheck().
2128  */
2129 int
2130 sadb_addrset(ire_t *ire)
2131 {
2132         if ((ire->ire_type & IRE_BROADCAST) ||
2133             (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2134             (ire->ire_ipversion == IPV6_VERSION &&
2135             IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2136                 return (KS_IN_ADDR_MBCAST);
2137         if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2138                 return (KS_IN_ADDR_ME);
2139         return (KS_IN_ADDR_NOTME);
2140 }
2141 
2142 /*
2143  * Match primitives..
2144  * !!! TODO: short term: inner selectors
2145  *              ipv6 scope id (ifindex)
2146  * longer term:  zone id.  sensitivity label. uid.
2147  */
2148 boolean_t
2149 sadb_match_spi(ipsa_query_t *sq, ipsa_t *sa)
2150 {
2151         return (sq->spi == sa->ipsa_spi);
2152 }
2153 
2154 boolean_t
2155 sadb_match_dst_v6(ipsa_query_t *sq, ipsa_t *sa)
2156 {
2157         return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_dstaddr, sq->dstaddr, AF_INET6));
2158 }
2159 
2160 boolean_t
2161 sadb_match_src_v6(ipsa_query_t *sq, ipsa_t *sa)
2162 {
2163         return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_srcaddr, sq->srcaddr, AF_INET6));
2164 }
2165 
2166 boolean_t
2167 sadb_match_dst_v4(ipsa_query_t *sq, ipsa_t *sa)
2168 {
2169         return (sq->dstaddr[0] == sa->ipsa_dstaddr[0]);
2170 }
2171 
2172 boolean_t
2173 sadb_match_src_v4(ipsa_query_t *sq, ipsa_t *sa)
2174 {
2175         return (sq->srcaddr[0] == sa->ipsa_srcaddr[0]);
2176 }
2177 
2178 boolean_t
2179 sadb_match_dstid(ipsa_query_t *sq, ipsa_t *sa)
2180 {
2181         return ((sa->ipsa_dst_cid != NULL) &&
2182             (sq->didtype == sa->ipsa_dst_cid->ipsid_type) &&
2183             (strcmp(sq->didstr, sa->ipsa_dst_cid->ipsid_cid) == 0));
2184 
2185 }
2186 boolean_t
2187 sadb_match_srcid(ipsa_query_t *sq, ipsa_t *sa)
2188 {
2189         return ((sa->ipsa_src_cid != NULL) &&
2190             (sq->sidtype == sa->ipsa_src_cid->ipsid_type) &&
2191             (strcmp(sq->sidstr, sa->ipsa_src_cid->ipsid_cid) == 0));
2192 }
2193 
2194 boolean_t
2195 sadb_match_kmc(ipsa_query_t *sq, ipsa_t *sa)
2196 {
2197 #define M(a, b) (((a) == 0) || ((b) == 0) || ((a) == (b)))
2198 
2199         return (M(sq->kmc, sa->ipsa_kmc) && M(sq->kmp, sa->ipsa_kmp));
2200 
2201 #undef M
2202 }
2203 
2204 /*
2205  * Common function which extracts several PF_KEY extensions for ease of
2206  * SADB matching.
2207  *
2208  * XXX TODO: weed out ipsa_query_t fields not used during matching
2209  * or afterwards?
2210  */
2211 int
2212 sadb_form_query(keysock_in_t *ksi, uint32_t req, uint32_t match,
2213     ipsa_query_t *sq, int *diagnostic)
2214 {
2215         int i;
2216         ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2217 
2218         for (i = 0; i < IPSA_NMATCH; i++)
2219                 sq->matchers[i] = NULL;
2220 
2221         ASSERT((req & ~match) == 0);
2222 
2223         sq->req = req;
2224         sq->dstext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2225         sq->srcext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2226         sq->assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2227 
2228         if ((req & IPSA_Q_DST) && (sq->dstext == NULL)) {
2229                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2230                 return (EINVAL);
2231         }
2232         if ((req & IPSA_Q_SRC) && (sq->srcext == NULL)) {
2233                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2234                 return (EINVAL);
2235         }
2236         if ((req & IPSA_Q_SA) && (sq->assoc == NULL)) {
2237                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2238                 return (EINVAL);
2239         }
2240 
2241         if (match & IPSA_Q_SA) {
2242                 *mfpp++ = sadb_match_spi;
2243                 sq->spi = sq->assoc->sadb_sa_spi;
2244         }
2245 
2246         if (sq->dstext != NULL)
2247                 sq->dst = (struct sockaddr_in *)(sq->dstext + 1);
2248         else {
2249                 sq->dst = NULL;
2250                 sq->dst6 = NULL;
2251                 sq->dstaddr = NULL;
2252         }
2253 
2254         if (sq->srcext != NULL)
2255                 sq->src = (struct sockaddr_in *)(sq->srcext + 1);
2256         else {
2257                 sq->src = NULL;
2258                 sq->src6 = NULL;
2259                 sq->srcaddr = NULL;
2260         }
2261 
2262         if (sq->dst != NULL)
2263                 sq->af = sq->dst->sin_family;
2264         else if (sq->src != NULL)
2265                 sq->af = sq->src->sin_family;
2266         else
2267                 sq->af = AF_INET;
2268 
2269         if (sq->af == AF_INET6) {
2270                 if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2271                         *mfpp++ = sadb_match_dst_v6;
2272                         sq->dst6 = (struct sockaddr_in6 *)sq->dst;
2273                         sq->dstaddr = (uint32_t *)&(sq->dst6->sin6_addr);
2274                 } else {
2275                         match &= ~IPSA_Q_DST;
2276                         sq->dstaddr = ALL_ZEROES_PTR;
2277                 }
2278 
2279                 if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2280                         sq->src6 = (struct sockaddr_in6 *)(sq->srcext + 1);
2281                         sq->srcaddr = (uint32_t *)&sq->src6->sin6_addr;
2282                         if (sq->src6->sin6_family != AF_INET6) {
2283                                 *diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2284                                 return (EINVAL);
2285                         }
2286                         *mfpp++ = sadb_match_src_v6;
2287                 } else {
2288                         match &= ~IPSA_Q_SRC;
2289                         sq->srcaddr = ALL_ZEROES_PTR;
2290                 }
2291         } else {
2292                 sq->src6 = sq->dst6 = NULL;
2293                 if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2294                         *mfpp++ = sadb_match_dst_v4;
2295                         sq->dstaddr = (uint32_t *)&sq->dst->sin_addr;
2296                 } else {
2297                         match &= ~IPSA_Q_DST;
2298                         sq->dstaddr = ALL_ZEROES_PTR;
2299                 }
2300                 if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2301                         sq->srcaddr = (uint32_t *)&sq->src->sin_addr;
2302                         if (sq->src->sin_family != AF_INET) {
2303                                 *diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2304                                 return (EINVAL);
2305                         }
2306                         *mfpp++ = sadb_match_src_v4;
2307                 } else {
2308                         match &= ~IPSA_Q_SRC;
2309                         sq->srcaddr = ALL_ZEROES_PTR;
2310                 }
2311         }
2312 
2313         sq->dstid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2314         if ((match & IPSA_Q_DSTID) && (sq->dstid != NULL)) {
2315                 sq->didstr = (char *)(sq->dstid + 1);
2316                 sq->didtype = sq->dstid->sadb_ident_type;
2317                 *mfpp++ = sadb_match_dstid;
2318         }
2319 
2320         sq->srcid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2321 
2322         if ((match & IPSA_Q_SRCID) && (sq->srcid != NULL)) {
2323                 sq->sidstr = (char *)(sq->srcid + 1);
2324                 sq->sidtype = sq->srcid->sadb_ident_type;
2325                 *mfpp++ = sadb_match_srcid;
2326         }
2327 
2328         sq->kmcext = (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2329         sq->kmc = 0;
2330         sq->kmp = 0;
2331 
2332         if ((match & IPSA_Q_KMC) && (sq->kmcext)) {
2333                 sq->kmc = sq->kmcext->sadb_x_kmc_cookie;
2334                 sq->kmp = sq->kmcext->sadb_x_kmc_proto;
2335                 *mfpp++ = sadb_match_kmc;
2336         }
2337 
2338         if (match & (IPSA_Q_INBOUND|IPSA_Q_OUTBOUND)) {
2339                 if (sq->af == AF_INET6)
2340                         sq->sp = &sq->spp->s_v6;
2341                 else
2342                         sq->sp = &sq->spp->s_v4;
2343         } else {
2344                 sq->sp = NULL;
2345         }
2346 
2347         if (match & IPSA_Q_INBOUND) {
2348                 sq->inhash = INBOUND_HASH(sq->sp, sq->assoc->sadb_sa_spi);
2349                 sq->inbound = &sq->sp->sdb_if[sq->inhash];
2350         } else {
2351                 sq->inhash = 0;
2352                 sq->inbound = NULL;
2353         }
2354 
2355         if (match & IPSA_Q_OUTBOUND) {
2356                 if (sq->af == AF_INET6) {
2357                         sq->outhash = OUTBOUND_HASH_V6(sq->sp, *(sq->dstaddr));
2358                 } else {
2359                         sq->outhash = OUTBOUND_HASH_V4(sq->sp, *(sq->dstaddr));
2360                 }
2361                 sq->outbound = &sq->sp->sdb_of[sq->outhash];
2362         } else {
2363                 sq->outhash = 0;
2364                 sq->outbound = NULL;
2365         }
2366         sq->match = match;
2367         return (0);
2368 }
2369 
2370 /*
2371  * Match an initialized query structure with a security association;
2372  * return B_TRUE on a match, B_FALSE on a miss.
2373  * Applies match functions set up by sadb_form_query() until one returns false.
2374  */
2375 boolean_t
2376 sadb_match_query(ipsa_query_t *sq, ipsa_t *sa)
2377 {
2378         ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2379         ipsa_match_fn_t mfp;
2380 
2381         for (mfp = *mfpp++; mfp != NULL; mfp = *mfpp++) {
2382                 if (!mfp(sq, sa))
2383                         return (B_FALSE);
2384         }
2385         return (B_TRUE);
2386 }
2387 
2388 /*
2389  * Walker callback function to delete sa's based on src/dst address.
2390  * Assumes that we're called with *head locked, no other locks held;
2391  * Conveniently, and not coincidentally, this is both what sadb_walker
2392  * gives us and also what sadb_unlinkassoc expects.
2393  */
2394 struct sadb_purge_state
2395 {
2396         ipsa_query_t sq;
2397         boolean_t inbnd;
2398         uint8_t sadb_sa_state;
2399 };
2400 
2401 static void
2402 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2403 {
2404         struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2405 
2406         ASSERT(MUTEX_HELD(&head->isaf_lock));
2407 
2408         mutex_enter(&entry->ipsa_lock);
2409 
2410         if (entry->ipsa_state == IPSA_STATE_LARVAL ||
2411             !sadb_match_query(&ps->sq, entry)) {
2412                 mutex_exit(&entry->ipsa_lock);
2413                 return;
2414         }
2415 
2416         if (ps->inbnd) {
2417                 sadb_delete_cluster(entry);
2418         }
2419         entry->ipsa_state = IPSA_STATE_DEAD;
2420         (void) sadb_torch_assoc(head, entry);
2421 }
2422 
2423 /*
2424  * Common code to purge an SA with a matching src or dst address.
2425  * Don't kill larval SA's in such a purge.
2426  */
2427 int
2428 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp,
2429     int *diagnostic, queue_t *pfkey_q)
2430 {
2431         struct sadb_purge_state ps;
2432         int error = sadb_form_query(ksi, 0,
2433             IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2434             &ps.sq, diagnostic);
2435 
2436         if (error != 0)
2437                 return (error);
2438 
2439         /*
2440          * This is simple, crude, and effective.
2441          * Unimplemented optimizations (TBD):
2442          * - we can limit how many places we search based on where we
2443          * think the SA is filed.
2444          * - if we get a dst address, we can hash based on dst addr to find
2445          * the correct bucket in the outbound table.
2446          */
2447         ps.inbnd = B_TRUE;
2448         sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2449         ps.inbnd = B_FALSE;
2450         sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2451 
2452         ASSERT(mp->b_cont != NULL);
2453         sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2454             NULL);
2455         return (0);
2456 }
2457 
2458 static void
2459 sadb_delpair_state_one(isaf_t *head, ipsa_t *entry, void *cookie)
2460 {
2461         struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2462         isaf_t  *inbound_bucket;
2463         ipsa_t *peer_assoc;
2464         ipsa_query_t *sq = &ps->sq;
2465 
2466         ASSERT(MUTEX_HELD(&head->isaf_lock));
2467 
2468         mutex_enter(&entry->ipsa_lock);
2469 
2470         if ((entry->ipsa_state != ps->sadb_sa_state) ||
2471             ((sq->srcaddr != NULL) &&
2472             !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, sq->srcaddr, sq->af))) {
2473                 mutex_exit(&entry->ipsa_lock);
2474                 return;
2475         }
2476 
2477         /*
2478          * The isaf_t *, which is passed in , is always an outbound bucket,
2479          * and we are preserving the outbound-then-inbound hash-bucket lock
2480          * ordering. The sadb_walker() which triggers this function is called
2481          * only on the outbound fanout, and the corresponding inbound bucket
2482          * lock is safe to acquire here.
2483          */
2484 
2485         if (entry->ipsa_haspeer) {
2486                 inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_spi);
2487                 mutex_enter(&inbound_bucket->isaf_lock);
2488                 peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2489                     entry->ipsa_spi, entry->ipsa_srcaddr,
2490                     entry->ipsa_dstaddr, entry->ipsa_addrfam);
2491         } else {
2492                 inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_otherspi);
2493                 mutex_enter(&inbound_bucket->isaf_lock);
2494                 peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2495                     entry->ipsa_otherspi, entry->ipsa_dstaddr,
2496                     entry->ipsa_srcaddr, entry->ipsa_addrfam);
2497         }
2498 
2499         entry->ipsa_state = IPSA_STATE_DEAD;
2500         (void) sadb_torch_assoc(head, entry);
2501         if (peer_assoc != NULL) {
2502                 mutex_enter(&peer_assoc->ipsa_lock);
2503                 peer_assoc->ipsa_state = IPSA_STATE_DEAD;
2504                 (void) sadb_torch_assoc(inbound_bucket, peer_assoc);
2505         }
2506         mutex_exit(&inbound_bucket->isaf_lock);
2507 }
2508 
2509 static int
2510 sadb_delpair_state(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2511     int *diagnostic, queue_t *pfkey_q)
2512 {
2513         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2514         struct sadb_purge_state ps;
2515         int error;
2516 
2517         ps.sq.spp = spp;                /* XXX param */
2518 
2519         error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SRC,
2520             IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2521             &ps.sq, diagnostic);
2522         if (error != 0)
2523                 return (error);
2524 
2525         ps.inbnd = B_FALSE;
2526         ps.sadb_sa_state = assoc->sadb_sa_state;
2527         sadb_walker(ps.sq.sp->sdb_of, ps.sq.sp->sdb_hashsize,
2528             sadb_delpair_state_one, &ps);
2529 
2530         ASSERT(mp->b_cont != NULL);
2531         sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
2532             ksi, NULL);
2533         return (0);
2534 }
2535 
2536 /*
2537  * Common code to delete/get an SA.
2538  */
2539 int
2540 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2541     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2542 {
2543         ipsa_query_t sq;
2544         ipsa_t *echo_target = NULL;
2545         ipsap_t ipsapp;
2546         uint_t  error = 0;
2547 
2548         if (sadb_msg_type == SADB_X_DELPAIR_STATE)
2549                 return (sadb_delpair_state(mp, ksi, spp, diagnostic, pfkey_q));
2550 
2551         sq.spp = spp;           /* XXX param */
2552         error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SA,
2553             IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
2554             &sq, diagnostic);
2555         if (error != 0)
2556                 return (error);
2557 
2558         error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
2559         if (error != 0) {
2560                 return (error);
2561         }
2562 
2563         echo_target = ipsapp.ipsap_sa_ptr;
2564         if (echo_target == NULL)
2565                 echo_target = ipsapp.ipsap_psa_ptr;
2566 
2567         if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2568                 /*
2569                  * Bucket locks will be required if SA is actually unlinked.
2570                  * get_ipsa_pair() returns valid hash bucket pointers even
2571                  * if it can't find a pair SA pointer. To prevent a potential
2572                  * deadlock, always lock the outbound bucket before the inbound.
2573                  */
2574                 if (ipsapp.in_inbound_table) {
2575                         mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2576                         mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2577                 } else {
2578                         mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2579                         mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2580                 }
2581 
2582                 if (ipsapp.ipsap_sa_ptr != NULL) {
2583                         mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
2584                         if (ipsapp.ipsap_sa_ptr->ipsa_flags & IPSA_F_INBOUND) {
2585                                 sadb_delete_cluster(ipsapp.ipsap_sa_ptr);
2586                         }
2587                         ipsapp.ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2588                         (void) sadb_torch_assoc(ipsapp.ipsap_bucket,
2589                             ipsapp.ipsap_sa_ptr);
2590                         /*
2591                          * sadb_torch_assoc() releases the ipsa_lock
2592                          * and calls sadb_unlinkassoc() which does a
2593                          * IPSA_REFRELE.
2594                          */
2595                 }
2596                 if (ipsapp.ipsap_psa_ptr != NULL) {
2597                         mutex_enter(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2598                         if (sadb_msg_type == SADB_X_DELPAIR ||
2599                             ipsapp.ipsap_psa_ptr->ipsa_haspeer) {
2600                                 if (ipsapp.ipsap_psa_ptr->ipsa_flags &
2601                                     IPSA_F_INBOUND) {
2602                                         sadb_delete_cluster
2603                                             (ipsapp.ipsap_psa_ptr);
2604                                 }
2605                                 ipsapp.ipsap_psa_ptr->ipsa_state =
2606                                     IPSA_STATE_DEAD;
2607                                 (void) sadb_torch_assoc(ipsapp.ipsap_pbucket,
2608                                     ipsapp.ipsap_psa_ptr);
2609                         } else {
2610                                 /*
2611                                  * Only half of the "pair" has been deleted.
2612                                  * Update the remaining SA and remove references
2613                                  * to its pair SA, which is now gone.
2614                                  */
2615                                 ipsapp.ipsap_psa_ptr->ipsa_otherspi = 0;
2616                                 ipsapp.ipsap_psa_ptr->ipsa_flags &=
2617                                     ~IPSA_F_PAIRED;
2618                                 mutex_exit(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2619                         }
2620                 } else if (sadb_msg_type == SADB_X_DELPAIR) {
2621                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2622                         error = ESRCH;
2623                 }
2624                 mutex_exit(&ipsapp.ipsap_bucket->isaf_lock);
2625                 mutex_exit(&ipsapp.ipsap_pbucket->isaf_lock);
2626         }
2627 
2628         ASSERT(mp->b_cont != NULL);
2629 
2630         if (error == 0)
2631                 sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2632                     mp->b_cont->b_rptr, ksi, echo_target);
2633 
2634         destroy_ipsa_pair(&ipsapp);
2635 
2636         return (error);
2637 }
2638 
2639 /*
2640  * This function takes a sadb_sa_t and finds the ipsa_t structure
2641  * and the isaf_t (hash bucket) that its stored under. If the security
2642  * association has a peer, the ipsa_t structure and bucket for that security
2643  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2644  * are returned as a ipsap_t.
2645  *
2646  * The hash buckets are returned for convenience, if the calling function
2647  * needs to use the hash bucket locks, say to remove the SA's, it should
2648  * take care to observe the convention of locking outbound bucket then
2649  * inbound bucket. The flag in_inbound_table provides direction.
2650  *
2651  * Note that a "pair" is defined as one (but not both) of the following:
2652  *
2653  * A security association which has a soft reference to another security
2654  * association via its SPI.
2655  *
2656  * A security association that is not obviously "inbound" or "outbound" so
2657  * it appears in both hash tables, the "peer" being the same security
2658  * association in the other hash table.
2659  *
2660  * This function will return NULL if the ipsa_t can't be found in the
2661  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2662  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2663  * provided at least one ipsa_t is found.
2664  */
2665 static int
2666 get_ipsa_pair(ipsa_query_t *sq, ipsap_t *ipsapp, int *diagnostic)
2667 {
2668         uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2669         uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2670         uint32_t pair_spi;
2671 
2672         init_ipsa_pair(ipsapp);
2673 
2674         ipsapp->in_inbound_table = B_FALSE;
2675 
2676         /* Lock down both buckets. */
2677         mutex_enter(&sq->outbound->isaf_lock);
2678         mutex_enter(&sq->inbound->isaf_lock);
2679 
2680         if (sq->assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2681                 ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2682                     sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2683                 if (ipsapp->ipsap_sa_ptr != NULL) {
2684                         ipsapp->ipsap_bucket = sq->inbound;
2685                         ipsapp->ipsap_pbucket = sq->outbound;
2686                         ipsapp->in_inbound_table = B_TRUE;
2687                 } else {
2688                         ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->outbound,
2689                             sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2690                             sq->af);
2691                         ipsapp->ipsap_bucket = sq->outbound;
2692                         ipsapp->ipsap_pbucket = sq->inbound;
2693                 }
2694         } else {
2695                 /* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2696                 ipsapp->ipsap_sa_ptr =
2697                     ipsec_getassocbyspi(sq->outbound,
2698                     sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2699                 if (ipsapp->ipsap_sa_ptr != NULL) {
2700                         ipsapp->ipsap_bucket = sq->outbound;
2701                         ipsapp->ipsap_pbucket = sq->inbound;
2702                 } else {
2703                         ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2704                             sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2705                             sq->af);
2706                         ipsapp->ipsap_bucket = sq->inbound;
2707                         ipsapp->ipsap_pbucket = sq->outbound;
2708                         if (ipsapp->ipsap_sa_ptr != NULL)
2709                                 ipsapp->in_inbound_table = B_TRUE;
2710                 }
2711         }
2712 
2713         if (ipsapp->ipsap_sa_ptr == NULL) {
2714                 mutex_exit(&sq->outbound->isaf_lock);
2715                 mutex_exit(&sq->inbound->isaf_lock);
2716                 *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2717                 return (ESRCH);
2718         }
2719 
2720         if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2721             ipsapp->in_inbound_table) {
2722                 mutex_exit(&sq->outbound->isaf_lock);
2723                 mutex_exit(&sq->inbound->isaf_lock);
2724                 return (0);
2725         }
2726 
2727         mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2728         if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2729                 /*
2730                  * haspeer implies no sa_pairing, look for same spi
2731                  * in other hashtable.
2732                  */
2733                 ipsapp->ipsap_psa_ptr =
2734                     ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2735                     sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2736                 mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2737                 mutex_exit(&sq->outbound->isaf_lock);
2738                 mutex_exit(&sq->inbound->isaf_lock);
2739                 return (0);
2740         }
2741         pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2742         IPSA_COPY_ADDR(&pair_srcaddr,
2743             ipsapp->ipsap_sa_ptr->ipsa_srcaddr, sq->af);
2744         IPSA_COPY_ADDR(&pair_dstaddr,
2745             ipsapp->ipsap_sa_ptr->ipsa_dstaddr, sq->af);
2746         mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2747         mutex_exit(&sq->inbound->isaf_lock);
2748         mutex_exit(&sq->outbound->isaf_lock);
2749 
2750         if (pair_spi == 0) {
2751                 ASSERT(ipsapp->ipsap_bucket != NULL);
2752                 ASSERT(ipsapp->ipsap_pbucket != NULL);
2753                 return (0);
2754         }
2755 
2756         /* found sa in outbound sadb, peer should be inbound */
2757 
2758         if (ipsapp->in_inbound_table) {
2759                 /* Found SA in inbound table, pair will be in outbound. */
2760                 if (sq->af == AF_INET6) {
2761                         ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sq->sp,
2762                             *(uint32_t *)pair_srcaddr);
2763                 } else {
2764                         ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sq->sp,
2765                             *(uint32_t *)pair_srcaddr);
2766                 }
2767         } else {
2768                 ipsapp->ipsap_pbucket = INBOUND_BUCKET(sq->sp, pair_spi);
2769         }
2770         mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2771         ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2772             pair_spi, pair_dstaddr, pair_srcaddr, sq->af);
2773         mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2774         ASSERT(ipsapp->ipsap_bucket != NULL);
2775         ASSERT(ipsapp->ipsap_pbucket != NULL);
2776         return (0);
2777 }
2778 
2779 /*
2780  * Perform NAT-traversal cached checksum offset calculations here.
2781  */
2782 static void
2783 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2784     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2785     uint32_t *dst_addr_ptr)
2786 {
2787         struct sockaddr_in *natt_loc, *natt_rem;
2788         uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2789         uint32_t running_sum = 0;
2790 
2791 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) +   ((x) >> 16)
2792 
2793         if (natt_rem_ext != NULL) {
2794                 uint32_t l_src;
2795                 uint32_t l_rem;
2796 
2797                 natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2798 
2799                 /* Ensured by sadb_addrfix(). */
2800                 ASSERT(natt_rem->sin_family == AF_INET);
2801 
2802                 natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2803                 newbie->ipsa_remote_nat_port = natt_rem->sin_port;
2804                 l_src = *src_addr_ptr;
2805                 l_rem = *natt_rem_ptr;
2806 
2807                 /* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2808                 newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
2809 
2810                 l_src = ntohl(l_src);
2811                 DOWN_SUM(l_src);
2812                 DOWN_SUM(l_src);
2813                 l_rem = ntohl(l_rem);
2814                 DOWN_SUM(l_rem);
2815                 DOWN_SUM(l_rem);
2816 
2817                 /*
2818                  * We're 1's complement for checksums, so check for wraparound
2819                  * here.
2820                  */
2821                 if (l_rem > l_src)
2822                         l_src--;
2823 
2824                 running_sum += l_src - l_rem;
2825 
2826                 DOWN_SUM(running_sum);
2827                 DOWN_SUM(running_sum);
2828         }
2829 
2830         if (natt_loc_ext != NULL) {
2831                 natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2832 
2833                 /* Ensured by sadb_addrfix(). */
2834                 ASSERT(natt_loc->sin_family == AF_INET);
2835 
2836                 natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
2837                 newbie->ipsa_local_nat_port = natt_loc->sin_port;
2838 
2839                 /* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2840                 newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
2841 
2842                 /*
2843                  * NAT-T port agility means we may have natt_loc_ext, but
2844                  * only for a local-port change.
2845                  */
2846                 if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
2847                         uint32_t l_dst = ntohl(*dst_addr_ptr);
2848                         uint32_t l_loc = ntohl(*natt_loc_ptr);
2849 
2850                         DOWN_SUM(l_loc);
2851                         DOWN_SUM(l_loc);
2852                         DOWN_SUM(l_dst);
2853                         DOWN_SUM(l_dst);
2854 
2855                         /*
2856                          * We're 1's complement for checksums, so check for
2857                          * wraparound here.
2858                          */
2859                         if (l_loc > l_dst)
2860                                 l_dst--;
2861 
2862                         running_sum += l_dst - l_loc;
2863                         DOWN_SUM(running_sum);
2864                         DOWN_SUM(running_sum);
2865                 }
2866         }
2867 
2868         newbie->ipsa_inbound_cksum = running_sum;
2869 #undef DOWN_SUM
2870 }
2871 
2872 /*
2873  * This function is called from consumers that need to insert a fully-grown
2874  * security association into its tables.  This function takes into account that
2875  * SAs can be "inbound", "outbound", or "both".  The "primary" and "secondary"
2876  * hash bucket parameters are set in order of what the SA will be most of the
2877  * time.  (For example, an SA with an unspecified source, and a multicast
2878  * destination will primarily be an outbound SA.  OTOH, if that destination
2879  * is unicast for this node, then the SA will primarily be inbound.)
2880  *
2881  * It takes a lot of parameters because even if clone is B_FALSE, this needs
2882  * to check both buckets for purposes of collision.
2883  *
2884  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2885  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2886  * with additional diagnostic information because there is at least one EINVAL
2887  * case here.
2888  */
2889 int
2890 sadb_common_add(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2891     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2892     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
2893     netstack_t *ns, sadbp_t *spp)
2894 {
2895         ipsa_t *newbie_clone = NULL, *scratch;
2896         ipsap_t ipsapp;
2897         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2898         sadb_address_t *srcext =
2899             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2900         sadb_address_t *dstext =
2901             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2902         sadb_address_t *isrcext =
2903             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2904         sadb_address_t *idstext =
2905             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2906         sadb_x_kmc_t *kmcext =
2907             (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2908         sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2909         sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2910         sadb_sens_t *sens =
2911             (sadb_sens_t *)ksi->ks_in_extv[SADB_EXT_SENSITIVITY];
2912         sadb_sens_t *osens =
2913             (sadb_sens_t *)ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS];
2914         sadb_x_pair_t *pair_ext =
2915             (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
2916         sadb_x_replay_ctr_t *replayext =
2917             (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
2918         uint8_t protocol =
2919             (samsg->sadb_msg_satype == SADB_SATYPE_AH) ? IPPROTO_AH:IPPROTO_ESP;
2920         int salt_offset;
2921         uint8_t *buf_ptr;
2922         struct sockaddr_in *src, *dst, *isrc, *idst;
2923         struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
2924         sadb_lifetime_t *soft =
2925             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
2926         sadb_lifetime_t *hard =
2927             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
2928         sadb_lifetime_t *idle =
2929             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
2930         sa_family_t af;
2931         int error = 0;
2932         boolean_t isupdate = (newbie != NULL);
2933         uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
2934         ipsec_stack_t   *ipss = ns->netstack_ipsec;
2935         ip_stack_t      *ipst = ns->netstack_ip;
2936         ipsec_alginfo_t *alg;
2937         int             rcode;
2938         boolean_t       async = B_FALSE;
2939 
2940         init_ipsa_pair(&ipsapp);
2941 
2942         if (srcext == NULL) {
2943                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2944                 return (EINVAL);
2945         }
2946         if (dstext == NULL) {
2947                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2948                 return (EINVAL);
2949         }
2950         if (assoc == NULL) {
2951                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2952                 return (EINVAL);
2953         }
2954 
2955         src = (struct sockaddr_in *)(srcext + 1);
2956         src6 = (struct sockaddr_in6 *)(srcext + 1);
2957         dst = (struct sockaddr_in *)(dstext + 1);
2958         dst6 = (struct sockaddr_in6 *)(dstext + 1);
2959         if (isrcext != NULL) {
2960                 isrc = (struct sockaddr_in *)(isrcext + 1);
2961                 isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
2962                 ASSERT(idstext != NULL);
2963                 idst = (struct sockaddr_in *)(idstext + 1);
2964                 idst6 = (struct sockaddr_in6 *)(idstext + 1);
2965         } else {
2966                 isrc = NULL;
2967                 isrc6 = NULL;
2968         }
2969 
2970         af = src->sin_family;
2971 
2972         if (af == AF_INET) {
2973                 src_addr_ptr = (uint32_t *)&src->sin_addr;
2974                 dst_addr_ptr = (uint32_t *)&dst->sin_addr;
2975         } else {
2976                 ASSERT(af == AF_INET6);
2977                 src_addr_ptr = (uint32_t *)&src6->sin6_addr;
2978                 dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
2979         }
2980 
2981         if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) &&
2982             cl_inet_checkspi &&
2983             (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
2984                 rcode = cl_inet_checkspi(ns->netstack_stackid, protocol,
2985                     assoc->sadb_sa_spi, NULL);
2986                 if (rcode == -1) {
2987                         return (EEXIST);
2988                 }
2989         }
2990 
2991         /*
2992          * Check to see if the new SA will be cloned AND paired. The
2993          * reason a SA will be cloned is the source or destination addresses
2994          * are not specific enough to determine if the SA goes in the outbound
2995          * or the inbound hash table, so its cloned and put in both. If
2996          * the SA is paired, it's soft linked to another SA for the other
2997          * direction. Keeping track and looking up SA's that are direction
2998          * unspecific and linked is too hard.
2999          */
3000         if (clone && (pair_ext != NULL)) {
3001                 *diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3002                 return (EINVAL);
3003         }
3004 
3005         if (!isupdate) {
3006                 newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3007                     src_addr_ptr, dst_addr_ptr, af, ns);
3008                 if (newbie == NULL)
3009                         return (ENOMEM);
3010         }
3011 
3012         mutex_enter(&newbie->ipsa_lock);
3013 
3014         if (isrc != NULL) {
3015                 if (isrc->sin_family == AF_INET) {
3016                         if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3017                                 if (srcext->sadb_address_proto != 0) {
3018                                         /*
3019                                          * Mismatched outer-packet protocol
3020                                          * and inner-packet address family.
3021                                          */
3022                                         mutex_exit(&newbie->ipsa_lock);
3023                                         error = EPROTOTYPE;
3024                                         *diagnostic =
3025                                             SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3026                                         goto error;
3027                                 } else {
3028                                         /* Fill in with explicit protocol. */
3029                                         srcext->sadb_address_proto =
3030                                             IPPROTO_ENCAP;
3031                                         dstext->sadb_address_proto =
3032                                             IPPROTO_ENCAP;
3033                                 }
3034                         }
3035                         isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3036                         idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3037                 } else {
3038                         ASSERT(isrc->sin_family == AF_INET6);
3039                         if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3040                                 if (srcext->sadb_address_proto != 0) {
3041                                         /*
3042                                          * Mismatched outer-packet protocol
3043                                          * and inner-packet address family.
3044                                          */
3045                                         mutex_exit(&newbie->ipsa_lock);
3046                                         error = EPROTOTYPE;
3047                                         *diagnostic =
3048                                             SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3049                                         goto error;
3050                                 } else {
3051                                         /* Fill in with explicit protocol. */
3052                                         srcext->sadb_address_proto =
3053                                             IPPROTO_IPV6;
3054                                         dstext->sadb_address_proto =
3055                                             IPPROTO_IPV6;
3056                                 }
3057                         }
3058                         isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3059                         idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3060                 }
3061                 newbie->ipsa_innerfam = isrc->sin_family;
3062 
3063                 IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3064                     newbie->ipsa_innerfam);
3065                 IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3066                     newbie->ipsa_innerfam);
3067                 newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3068                 newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3069 
3070                 /* Unique value uses inner-ports for Tunnel Mode... */
3071                 newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3072                     idst->sin_port, dstext->sadb_address_proto,
3073                     idstext->sadb_address_proto);
3074                 newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3075                     idst->sin_port, dstext->sadb_address_proto,
3076                     idstext->sadb_address_proto);
3077         } else {
3078                 /* ... and outer-ports for Transport Mode. */
3079                 newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3080                     dst->sin_port, dstext->sadb_address_proto, 0);
3081                 newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3082                     dst->sin_port, dstext->sadb_address_proto, 0);
3083         }
3084         if (newbie->ipsa_unique_mask != (uint64_t)0)
3085                 newbie->ipsa_flags |= IPSA_F_UNIQUE;
3086 
3087         sadb_nat_calculations(newbie,
3088             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3089             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3090             src_addr_ptr, dst_addr_ptr);
3091 
3092         newbie->ipsa_type = samsg->sadb_msg_satype;
3093 
3094         ASSERT((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
3095             (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE));
3096         newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3097         newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3098 
3099         newbie->ipsa_flags |= assoc->sadb_sa_flags;
3100         if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3101             ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) {
3102                 mutex_exit(&newbie->ipsa_lock);
3103                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3104                 error = EINVAL;
3105                 goto error;
3106         }
3107         if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3108             ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) {
3109                 mutex_exit(&newbie->ipsa_lock);
3110                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3111                 error = EINVAL;
3112                 goto error;
3113         }
3114         if (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3115             ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL) {
3116                 mutex_exit(&newbie->ipsa_lock);
3117                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3118                 error = EINVAL;
3119                 goto error;
3120         }
3121         /*
3122          * If unspecified source address, force replay_wsize to 0.
3123          * This is because an SA that has multiple sources of secure
3124          * traffic cannot enforce a replay counter w/o synchronizing the
3125          * senders.
3126          */
3127         if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3128                 newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3129         else
3130                 newbie->ipsa_replay_wsize = 0;
3131 
3132         newbie->ipsa_addtime = gethrestime_sec();
3133 
3134         if (kmcext != NULL) {
3135                 newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3136                 newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3137         }
3138 
3139         /*
3140          * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3141          * The spec says that one can update current lifetimes, but
3142          * that seems impractical, especially in the larval-to-mature
3143          * update that this function performs.
3144          */
3145         if (soft != NULL) {
3146                 newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3147                 newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3148                 newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3149                 newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3150                 SET_EXPIRE(newbie, softaddlt, softexpiretime);
3151         }
3152         if (hard != NULL) {
3153                 newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3154                 newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3155                 newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3156                 newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3157                 SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3158         }
3159         if (idle != NULL) {
3160                 newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3161                 newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3162                 newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3163                     newbie->ipsa_idleaddlt;
3164                 newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3165         }
3166 
3167         newbie->ipsa_authtmpl = NULL;
3168         newbie->ipsa_encrtmpl = NULL;
3169 
3170 #ifdef IPSEC_LATENCY_TEST
3171         if (akey != NULL && newbie->ipsa_auth_alg != SADB_AALG_NONE) {
3172 #else
3173         if (akey != NULL) {
3174 #endif
3175                 async = (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
3176                     IPSEC_ALGS_EXEC_ASYNC);
3177 
3178                 newbie->ipsa_authkeybits = akey->sadb_key_bits;
3179                 newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3180                 /* In case we have to round up to the next byte... */
3181                 if ((akey->sadb_key_bits & 0x7) != 0)
3182                         newbie->ipsa_authkeylen++;
3183                 newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3184                     KM_NOSLEEP);
3185                 if (newbie->ipsa_authkey == NULL) {
3186                         error = ENOMEM;
3187                         mutex_exit(&newbie->ipsa_lock);
3188                         goto error;
3189                 }
3190                 bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3191                 bzero(akey + 1, newbie->ipsa_authkeylen);
3192 
3193                 /*
3194                  * Pre-initialize the kernel crypto framework key
3195                  * structure.
3196                  */
3197                 newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3198                 newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3199                 newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3200 
3201                 rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3202                 alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3203                     [newbie->ipsa_auth_alg];
3204                 if (alg != NULL && ALG_VALID(alg)) {
3205                         newbie->ipsa_amech.cm_type = alg->alg_mech_type;
3206                         newbie->ipsa_amech.cm_param =
3207                             (char *)&newbie->ipsa_mac_len;
3208                         newbie->ipsa_amech.cm_param_len = sizeof (size_t);
3209                         newbie->ipsa_mac_len = (size_t)alg->alg_datalen;
3210                 } else {
3211                         newbie->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3212                 }
3213                 error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3214                 rw_exit(&ipss->ipsec_alg_lock);
3215                 if (error != 0) {
3216                         mutex_exit(&newbie->ipsa_lock);
3217                         /*
3218                          * An error here indicates that alg is the wrong type
3219                          * (IE: not authentication) or its not in the alg tables
3220                          * created by ipsecalgs(1m), or Kcf does not like the
3221                          * parameters passed in with this algorithm, which is
3222                          * probably a coding error!
3223                          */
3224                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3225 
3226                         goto error;
3227                 }
3228         }
3229 
3230         if (ekey != NULL) {
3231                 rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3232                 async = async || (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
3233                     IPSEC_ALGS_EXEC_ASYNC);
3234                 alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3235                     [newbie->ipsa_encr_alg];
3236 
3237                 if (alg != NULL && ALG_VALID(alg)) {
3238                         newbie->ipsa_emech.cm_type = alg->alg_mech_type;
3239                         newbie->ipsa_datalen = alg->alg_datalen;
3240                         if (alg->alg_flags & ALG_FLAG_COUNTERMODE)
3241                                 newbie->ipsa_flags |= IPSA_F_COUNTERMODE;
3242 
3243                         if (alg->alg_flags & ALG_FLAG_COMBINED) {
3244                                 newbie->ipsa_flags |= IPSA_F_COMBINED;
3245                                 newbie->ipsa_mac_len =  alg->alg_icvlen;
3246                         }
3247 
3248                         if (alg->alg_flags & ALG_FLAG_CCM)
3249                                 newbie->ipsa_noncefunc = ccm_params_init;
3250                         else if (alg->alg_flags & ALG_FLAG_GCM)
3251                                 newbie->ipsa_noncefunc = gcm_params_init;
3252                         else newbie->ipsa_noncefunc = cbc_params_init;
3253 
3254                         newbie->ipsa_saltlen = alg->alg_saltlen;
3255                         newbie->ipsa_saltbits = SADB_8TO1(newbie->ipsa_saltlen);
3256                         newbie->ipsa_iv_len = alg->alg_ivlen;
3257                         newbie->ipsa_nonce_len = newbie->ipsa_saltlen +
3258                             newbie->ipsa_iv_len;
3259                         newbie->ipsa_emech.cm_param = NULL;
3260                         newbie->ipsa_emech.cm_param_len = 0;
3261                 } else {
3262                         newbie->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3263                 }
3264                 rw_exit(&ipss->ipsec_alg_lock);
3265 
3266                 /*
3267                  * The byte stream following the sadb_key_t is made up of:
3268                  * key bytes, [salt bytes], [IV initial value]
3269                  * All of these have variable length. The IV is typically
3270                  * randomly generated by this function and not passed in.
3271                  * By supporting the injection of a known IV, the whole
3272                  * IPsec subsystem and the underlying crypto subsystem
3273                  * can be tested with known test vectors.
3274                  *
3275                  * The keying material has been checked by ext_check()
3276                  * and ipsec_valid_key_size(), after removing salt/IV
3277                  * bits, whats left is the encryption key. If this is too
3278                  * short, ipsec_create_ctx_tmpl() will fail and the SA
3279                  * won't get created.
3280                  *
3281                  * set ipsa_encrkeylen to length of key only.
3282                  */
3283                 newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3284                 newbie->ipsa_encrkeybits -= ekey->sadb_key_reserved;
3285                 newbie->ipsa_encrkeybits -= newbie->ipsa_saltbits;
3286                 newbie->ipsa_encrkeylen = SADB_1TO8(newbie->ipsa_encrkeybits);
3287 
3288                 /* In case we have to round up to the next byte... */
3289                 if ((ekey->sadb_key_bits & 0x7) != 0)
3290                         newbie->ipsa_encrkeylen++;
3291 
3292                 newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3293                     KM_NOSLEEP);
3294                 if (newbie->ipsa_encrkey == NULL) {
3295                         error = ENOMEM;
3296                         mutex_exit(&newbie->ipsa_lock);
3297                         goto error;
3298                 }
3299 
3300                 buf_ptr = (uint8_t *)(ekey + 1);
3301                 bcopy(buf_ptr, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3302 
3303                 if (newbie->ipsa_flags & IPSA_F_COMBINED) {
3304                         /*
3305                          * Combined mode algs need a nonce. Copy the salt and
3306                          * IV into a buffer. The ipsa_nonce is a pointer into
3307                          * this buffer, some bytes at the start of the buffer
3308                          * may be unused, depends on the salt length. The IV
3309                          * is 64 bit aligned so it can be incremented as a
3310                          * uint64_t. Zero out key in samsg_t before freeing.
3311                          */
3312 
3313                         newbie->ipsa_nonce_buf = kmem_alloc(
3314                             sizeof (ipsec_nonce_t), KM_NOSLEEP);
3315                         if (newbie->ipsa_nonce_buf == NULL) {
3316                                 error = ENOMEM;
3317                                 mutex_exit(&newbie->ipsa_lock);
3318                                 goto error;
3319                         }
3320                         /*
3321                          * Initialize nonce and salt pointers to point
3322                          * to the nonce buffer. This is just in case we get
3323                          * bad data, the pointers will be valid, the data
3324                          * won't be.
3325                          *
3326                          * See sadb.h for layout of nonce.
3327                          */
3328                         newbie->ipsa_iv = &newbie->ipsa_nonce_buf->iv;
3329                         newbie->ipsa_salt = (uint8_t *)newbie->ipsa_nonce_buf;
3330                         newbie->ipsa_nonce = newbie->ipsa_salt;
3331                         if (newbie->ipsa_saltlen != 0) {
3332                                 salt_offset = MAXSALTSIZE -
3333                                     newbie->ipsa_saltlen;
3334                                 newbie->ipsa_salt = (uint8_t *)
3335                                     &newbie->ipsa_nonce_buf->salt[salt_offset];
3336                                 newbie->ipsa_nonce = newbie->ipsa_salt;
3337                                 buf_ptr += newbie->ipsa_encrkeylen;
3338                                 bcopy(buf_ptr, newbie->ipsa_salt,
3339                                     newbie->ipsa_saltlen);
3340                         }
3341                         /*
3342                          * The IV for CCM/GCM mode increments, it should not
3343                          * repeat. Get a random value for the IV, make a
3344                          * copy, the SA will expire when/if the IV ever
3345                          * wraps back to the initial value. If an Initial IV
3346                          * is passed in via PF_KEY, save this in the SA.
3347                          * Initialising IV for inbound is pointless as its
3348                          * taken from the inbound packet.
3349                          */
3350                         if (!is_inbound) {
3351                                 if (ekey->sadb_key_reserved != 0) {
3352                                         buf_ptr += newbie->ipsa_saltlen;
3353                                         bcopy(buf_ptr, (uint8_t *)newbie->
3354                                             ipsa_iv, SADB_1TO8(ekey->
3355                                             sadb_key_reserved));
3356                                 } else {
3357                                         (void) random_get_pseudo_bytes(
3358                                             (uint8_t *)newbie->ipsa_iv,
3359                                             newbie->ipsa_iv_len);
3360                                 }
3361                                 newbie->ipsa_iv_softexpire =
3362                                     (*newbie->ipsa_iv) << 9;
3363                                 newbie->ipsa_iv_hardexpire = *newbie->ipsa_iv;
3364                         }
3365                 }
3366                 bzero((ekey + 1), SADB_1TO8(ekey->sadb_key_bits));
3367 
3368                 /*
3369                  * Pre-initialize the kernel crypto framework key
3370                  * structure.
3371                  */
3372                 newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3373                 newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3374                 newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3375 
3376                 rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3377                 error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3378                 rw_exit(&ipss->ipsec_alg_lock);
3379                 if (error != 0) {
3380                         mutex_exit(&newbie->ipsa_lock);
3381                         /* See above for error explanation. */
3382                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3383                         goto error;
3384                 }
3385         }
3386 
3387         if (async)
3388                 newbie->ipsa_flags |= IPSA_F_ASYNC;
3389 
3390         /*
3391          * Ptrs to processing functions.
3392          */
3393         if (newbie->ipsa_type == SADB_SATYPE_ESP)
3394                 ipsecesp_init_funcs(newbie);
3395         else
3396                 ipsecah_init_funcs(newbie);
3397         ASSERT(newbie->ipsa_output_func != NULL &&
3398             newbie->ipsa_input_func != NULL);
3399 
3400         /*
3401          * Certificate ID stuff.
3402          */
3403         if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3404                 sadb_ident_t *id =
3405                     (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3406 
3407                 /*
3408                  * Can assume strlen() will return okay because ext_check() in
3409                  * keysock.c prepares the string for us.
3410                  */
3411                 newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3412                     (char *)(id+1), ns);
3413                 if (newbie->ipsa_src_cid == NULL) {
3414                         error = ENOMEM;
3415                         mutex_exit(&newbie->ipsa_lock);
3416                         goto error;
3417                 }
3418         }
3419 
3420         if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3421                 sadb_ident_t *id =
3422                     (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3423 
3424                 /*
3425                  * Can assume strlen() will return okay because ext_check() in
3426                  * keysock.c prepares the string for us.
3427                  */
3428                 newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3429                     (char *)(id+1), ns);
3430                 if (newbie->ipsa_dst_cid == NULL) {
3431                         error = ENOMEM;
3432                         mutex_exit(&newbie->ipsa_lock);
3433                         goto error;
3434                 }
3435         }
3436 
3437         /*
3438          * sensitivity label handling code:
3439          * Convert sens + bitmap into cred_t, and associate it
3440          * with the new SA.
3441          */
3442         if (sens != NULL) {
3443                 uint64_t *bitmap = (uint64_t *)(sens + 1);
3444 
3445                 newbie->ipsa_tsl = sadb_label_from_sens(sens, bitmap);
3446         }
3447 
3448         /*
3449          * Likewise for outer sensitivity.
3450          */
3451         if (osens != NULL) {
3452                 uint64_t *bitmap = (uint64_t *)(osens + 1);
3453                 ts_label_t *tsl, *effective_tsl;
3454                 uint32_t *peer_addr_ptr;
3455                 zoneid_t zoneid = GLOBAL_ZONEID;
3456                 zone_t *zone;
3457 
3458                 peer_addr_ptr = is_inbound ? src_addr_ptr : dst_addr_ptr;
3459 
3460                 tsl = sadb_label_from_sens(osens, bitmap);
3461                 newbie->ipsa_mac_exempt = CONN_MAC_DEFAULT;
3462 
3463                 if (osens->sadb_x_sens_flags & SADB_X_SENS_IMPLICIT) {
3464                         newbie->ipsa_mac_exempt = CONN_MAC_IMPLICIT;
3465                 }
3466 
3467                 error = tsol_check_dest(tsl, peer_addr_ptr,
3468                     (af == AF_INET6)?IPV6_VERSION:IPV4_VERSION,
3469                     newbie->ipsa_mac_exempt, B_TRUE, &effective_tsl);
3470                 if (error != 0) {
3471                         label_rele(tsl);
3472                         mutex_exit(&newbie->ipsa_lock);
3473                         goto error;
3474                 }
3475 
3476                 if (effective_tsl != NULL) {
3477                         label_rele(tsl);
3478                         tsl = effective_tsl;
3479                 }
3480 
3481                 newbie->ipsa_otsl = tsl;
3482 
3483                 zone = zone_find_by_label(tsl);
3484                 if (zone != NULL) {
3485                         zoneid = zone->zone_id;
3486                         zone_rele(zone);
3487                 }
3488                 /*
3489                  * For exclusive stacks we set the zoneid to zero to operate
3490                  * as if in the global zone for tsol_compute_label_v4/v6
3491                  */
3492                 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
3493                         zoneid = GLOBAL_ZONEID;
3494 
3495                 if (af == AF_INET6) {
3496                         error = tsol_compute_label_v6(tsl, zoneid,
3497                             (in6_addr_t *)peer_addr_ptr,
3498                             newbie->ipsa_opt_storage, ipst);
3499                 } else {
3500                         error = tsol_compute_label_v4(tsl, zoneid,
3501                             *peer_addr_ptr, newbie->ipsa_opt_storage, ipst);
3502                 }
3503                 if (error != 0) {
3504                         mutex_exit(&newbie->ipsa_lock);
3505                         goto error;
3506                 }
3507         }
3508 
3509 
3510         if (replayext != NULL) {
3511                 if ((replayext->sadb_x_rc_replay32 == 0) &&
3512                     (replayext->sadb_x_rc_replay64 != 0)) {
3513                         error = EOPNOTSUPP;
3514                         *diagnostic = SADB_X_DIAGNOSTIC_INVALID_REPLAY;
3515                         mutex_exit(&newbie->ipsa_lock);
3516                         goto error;
3517                 }
3518                 newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3519         }
3520 
3521         /* now that the SA has been updated, set its new state */
3522         newbie->ipsa_state = assoc->sadb_sa_state;
3523 
3524         if (clone) {
3525                 newbie->ipsa_haspeer = B_TRUE;
3526         } else {
3527                 if (!is_inbound) {
3528                         lifetime_fuzz(newbie);
3529                 }
3530         }
3531         /*
3532          * The less locks I hold when doing an insertion and possible cloning,
3533          * the better!
3534          */
3535         mutex_exit(&newbie->ipsa_lock);
3536 
3537         if (clone) {
3538                 newbie_clone = sadb_cloneassoc(newbie);
3539 
3540                 if (newbie_clone == NULL) {
3541                         error = ENOMEM;
3542                         goto error;
3543                 }
3544         }
3545 
3546         /*
3547          * Enter the bucket locks.  The order of entry is outbound,
3548          * inbound.  We map "primary" and "secondary" into outbound and inbound
3549          * based on the destination address type.  If the destination address
3550          * type is for a node that isn't mine (or potentially mine), the
3551          * "primary" bucket is the outbound one.
3552          */
3553         if (!is_inbound) {
3554                 /* primary == outbound */
3555                 mutex_enter(&primary->isaf_lock);
3556                 mutex_enter(&secondary->isaf_lock);
3557         } else {
3558                 /* primary == inbound */
3559                 mutex_enter(&secondary->isaf_lock);
3560                 mutex_enter(&primary->isaf_lock);
3561         }
3562 
3563         /*
3564          * sadb_insertassoc() doesn't increment the reference
3565          * count.  We therefore have to increment the
3566          * reference count one more time to reflect the
3567          * pointers of the table that reference this SA.
3568          */
3569         IPSA_REFHOLD(newbie);
3570 
3571         if (isupdate) {
3572                 /*
3573                  * Unlink from larval holding cell in the "inbound" fanout.
3574                  */
3575                 ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3576                     newbie->ipsa_linklock == &secondary->isaf_lock);
3577                 sadb_unlinkassoc(newbie);
3578         }
3579 
3580         mutex_enter(&newbie->ipsa_lock);
3581         error = sadb_insertassoc(newbie, primary);
3582         mutex_exit(&newbie->ipsa_lock);
3583 
3584         if (error != 0) {
3585                 /*
3586                  * Since sadb_insertassoc() failed, we must decrement the
3587                  * refcount again so the cleanup code will actually free
3588                  * the offending SA.
3589                  */
3590                 IPSA_REFRELE(newbie);
3591                 goto error_unlock;
3592         }
3593 
3594         if (newbie_clone != NULL) {
3595                 mutex_enter(&newbie_clone->ipsa_lock);
3596                 error = sadb_insertassoc(newbie_clone, secondary);
3597                 mutex_exit(&newbie_clone->ipsa_lock);
3598                 if (error != 0) {
3599                         /* Collision in secondary table. */
3600                         sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3601                         goto error_unlock;
3602                 }
3603                 IPSA_REFHOLD(newbie_clone);
3604         } else {
3605                 ASSERT(primary != secondary);
3606                 scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3607                     ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3608                 if (scratch != NULL) {
3609                         /* Collision in secondary table. */
3610                         sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3611                         /* Set the error, since ipsec_getassocbyspi() can't. */
3612                         error = EEXIST;
3613                         goto error_unlock;
3614                 }
3615         }
3616 
3617         /* OKAY!  So let's do some reality check assertions. */
3618 
3619         ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3620         ASSERT(newbie_clone == NULL ||
3621             (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3622 
3623 error_unlock:
3624 
3625         /*
3626          * We can exit the locks in any order.  Only entrance needs to
3627          * follow any protocol.
3628          */
3629         mutex_exit(&secondary->isaf_lock);
3630         mutex_exit(&primary->isaf_lock);
3631 
3632         if (pair_ext != NULL && error == 0) {
3633                 /* update pair_spi if it exists. */
3634                 ipsa_query_t sq;
3635 
3636                 sq.spp = spp;           /* XXX param */
3637                 error = sadb_form_query(ksi, IPSA_Q_DST, IPSA_Q_SRC|IPSA_Q_DST|
3638                     IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, &sq, diagnostic);
3639                 if (error)
3640                         return (error);
3641 
3642                 error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
3643 
3644                 if (error != 0)
3645                         goto error;
3646 
3647                 if (ipsapp.ipsap_psa_ptr != NULL) {
3648                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3649                         error = EINVAL;
3650                 } else {
3651                         /* update_pairing() sets diagnostic */
3652                         error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
3653                 }
3654         }
3655         /* Common error point for this routine. */
3656 error:
3657         if (newbie != NULL) {
3658                 if (error != 0) {
3659                         /* This SA is broken, let the reaper clean up. */
3660                         mutex_enter(&newbie->ipsa_lock);
3661                         newbie->ipsa_state = IPSA_STATE_DEAD;
3662                         newbie->ipsa_hardexpiretime = 1;
3663                         mutex_exit(&newbie->ipsa_lock);
3664                 }
3665                 IPSA_REFRELE(newbie);
3666         }
3667         if (newbie_clone != NULL) {
3668                 IPSA_REFRELE(newbie_clone);
3669         }
3670 
3671         if (error == 0) {
3672                 /*
3673                  * Construct favorable PF_KEY return message and send to
3674                  * keysock. Update the flags in the original keysock message
3675                  * to reflect the actual flags in the new SA.
3676                  *  (Q:  Do I need to pass "newbie"?  If I do,
3677                  * make sure to REFHOLD, call, then REFRELE.)
3678                  */
3679                 assoc->sadb_sa_flags = newbie->ipsa_flags;
3680                 sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3681         }
3682 
3683         destroy_ipsa_pair(&ipsapp);
3684         return (error);
3685 }
3686 
3687 /*
3688  * Set the time of first use for a security association.  Update any
3689  * expiration times as a result.
3690  */
3691 void
3692 sadb_set_usetime(ipsa_t *assoc)
3693 {
3694         time_t snapshot = gethrestime_sec();
3695 
3696         mutex_enter(&assoc->ipsa_lock);
3697         assoc->ipsa_lastuse = snapshot;
3698         assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3699 
3700         /*
3701          * Caller does check usetime before calling me usually, and
3702          * double-checking is better than a mutex_enter/exit hit.
3703          */
3704         if (assoc->ipsa_usetime == 0) {
3705                 /*
3706                  * This is redundant for outbound SA's, as
3707                  * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3708                  * Inbound SAs, however, have no such protection.
3709                  */
3710                 assoc->ipsa_flags |= IPSA_F_USED;
3711                 assoc->ipsa_usetime = snapshot;
3712 
3713                 /*
3714                  * After setting the use time, see if we have a use lifetime
3715                  * that would cause the actual SA expiration time to shorten.
3716                  */
3717                 UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3718                 UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3719         }
3720         mutex_exit(&assoc->ipsa_lock);
3721 }
3722 
3723 /*
3724  * Send up a PF_KEY expire message for this association.
3725  */
3726 static void
3727 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3728 {
3729         mblk_t *mp, *mp1;
3730         int alloclen, af;
3731         sadb_msg_t *samsg;
3732         sadb_lifetime_t *current, *expire;
3733         sadb_sa_t *saext;
3734         uint8_t *end;
3735         boolean_t tunnel_mode;
3736 
3737         ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3738 
3739         /* Don't bother sending if there's no queue. */
3740         if (pfkey_q == NULL)
3741                 return;
3742 
3743         mp = sadb_keysock_out(0);
3744         if (mp == NULL) {
3745                 /* cmn_err(CE_WARN, */
3746                 /*      "sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3747                 return;
3748         }
3749 
3750         alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3751             2 * sizeof (sadb_address_t) + sizeof (*saext);
3752 
3753         af = assoc->ipsa_addrfam;
3754         switch (af) {
3755         case AF_INET:
3756                 alloclen += 2 * sizeof (struct sockaddr_in);
3757                 break;
3758         case AF_INET6:
3759                 alloclen += 2 * sizeof (struct sockaddr_in6);
3760                 break;
3761         default:
3762                 /* Won't happen unless there's a kernel bug. */
3763                 freeb(mp);
3764                 cmn_err(CE_WARN,
3765                     "sadb_expire_assoc: Unknown address length.\n");
3766                 return;
3767         }
3768 
3769         tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3770         if (tunnel_mode) {
3771                 alloclen += 2 * sizeof (sadb_address_t);
3772                 switch (assoc->ipsa_innerfam) {
3773                 case AF_INET:
3774                         alloclen += 2 * sizeof (struct sockaddr_in);
3775                         break;
3776                 case AF_INET6:
3777                         alloclen += 2 * sizeof (struct sockaddr_in6);
3778                         break;
3779                 default:
3780                         /* Won't happen unless there's a kernel bug. */
3781                         freeb(mp);
3782                         cmn_err(CE_WARN, "sadb_expire_assoc: "
3783                             "Unknown inner address length.\n");
3784                         return;
3785                 }
3786         }
3787 
3788         mp->b_cont = allocb(alloclen, BPRI_HI);
3789         if (mp->b_cont == NULL) {
3790                 freeb(mp);
3791                 /* cmn_err(CE_WARN, */
3792                 /*      "sadb_expire_assoc: Can't allocate message.\n"); */
3793                 return;
3794         }
3795 
3796         mp1 = mp;
3797         mp = mp->b_cont;
3798         end = mp->b_wptr + alloclen;
3799 
3800         samsg = (sadb_msg_t *)mp->b_wptr;
3801         mp->b_wptr += sizeof (*samsg);
3802         samsg->sadb_msg_version = PF_KEY_V2;
3803         samsg->sadb_msg_type = SADB_EXPIRE;
3804         samsg->sadb_msg_errno = 0;
3805         samsg->sadb_msg_satype = assoc->ipsa_type;
3806         samsg->sadb_msg_len = SADB_8TO64(alloclen);
3807         samsg->sadb_msg_reserved = 0;
3808         samsg->sadb_msg_seq = 0;
3809         samsg->sadb_msg_pid = 0;
3810 
3811         saext = (sadb_sa_t *)mp->b_wptr;
3812         mp->b_wptr += sizeof (*saext);
3813         saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3814         saext->sadb_sa_exttype = SADB_EXT_SA;
3815         saext->sadb_sa_spi = assoc->ipsa_spi;
3816         saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3817         saext->sadb_sa_state = assoc->ipsa_state;
3818         saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3819         saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3820         saext->sadb_sa_flags = assoc->ipsa_flags;
3821 
3822         current = (sadb_lifetime_t *)mp->b_wptr;
3823         mp->b_wptr += sizeof (sadb_lifetime_t);
3824         current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3825         current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3826         /* We do not support the concept. */
3827         current->sadb_lifetime_allocations = 0;
3828         current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3829         current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3830         current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3831 
3832         expire = (sadb_lifetime_t *)mp->b_wptr;
3833         mp->b_wptr += sizeof (*expire);
3834         expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3835 
3836         if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3837                 expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3838                 expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3839                 expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3840                 expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3841                 expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3842         } else if (assoc->ipsa_state == IPSA_STATE_DYING) {
3843                 expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3844                 expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3845                 expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3846                 expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3847                 expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3848         } else {
3849                 ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
3850                 expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
3851                 expire->sadb_lifetime_allocations = 0;
3852                 expire->sadb_lifetime_bytes = 0;
3853                 expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
3854                 expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
3855         }
3856 
3857         mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3858             af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3859             SA_PROTO(assoc), 0);
3860         ASSERT(mp->b_wptr != NULL);
3861 
3862         mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3863             af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3864             SA_PROTO(assoc), 0);
3865         ASSERT(mp->b_wptr != NULL);
3866 
3867         if (tunnel_mode) {
3868                 mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3869                     SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3870                     assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3871                     assoc->ipsa_innersrcpfx);
3872                 ASSERT(mp->b_wptr != NULL);
3873                 mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3874                     SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3875                     assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3876                     assoc->ipsa_innerdstpfx);
3877                 ASSERT(mp->b_wptr != NULL);
3878         }
3879 
3880         /* Can just putnext, we're ready to go! */
3881         putnext(pfkey_q, mp1);
3882 }
3883 
3884 /*
3885  * "Age" the SA with the number of bytes that was used to protect traffic.
3886  * Send an SADB_EXPIRE message if appropriate.  Return B_TRUE if there was
3887  * enough "charge" left in the SA to protect the data.  Return B_FALSE
3888  * otherwise.  (If B_FALSE is returned, the association either was, or became
3889  * DEAD.)
3890  */
3891 boolean_t
3892 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3893     boolean_t sendmsg)
3894 {
3895         boolean_t rc = B_TRUE;
3896         uint64_t newtotal;
3897 
3898         mutex_enter(&assoc->ipsa_lock);
3899         newtotal = assoc->ipsa_bytes + bytes;
3900         if (assoc->ipsa_hardbyteslt != 0 &&
3901             newtotal >= assoc->ipsa_hardbyteslt) {
3902                 if (assoc->ipsa_state != IPSA_STATE_DEAD) {
3903                         sadb_delete_cluster(assoc);
3904                         /*
3905                          * Send EXPIRE message to PF_KEY.  May wish to pawn
3906                          * this off on another non-interrupt thread.  Also
3907                          * unlink this SA immediately.
3908                          */
3909                         assoc->ipsa_state = IPSA_STATE_DEAD;
3910                         if (sendmsg)
3911                                 sadb_expire_assoc(pfkey_q, assoc);
3912                         /*
3913                          * Set non-zero expiration time so sadb_age_assoc()
3914                          * will work when reaping.
3915                          */
3916                         assoc->ipsa_hardexpiretime = (time_t)1;
3917                 } /* Else someone beat me to it! */
3918                 rc = B_FALSE;
3919         } else if (assoc->ipsa_softbyteslt != 0 &&
3920             (newtotal >= assoc->ipsa_softbyteslt)) {
3921                 if (assoc->ipsa_state < IPSA_STATE_DYING) {
3922                         /*
3923                          * Send EXPIRE message to PF_KEY.  May wish to pawn
3924                          * this off on another non-interrupt thread.
3925                          */
3926                         assoc->ipsa_state = IPSA_STATE_DYING;
3927                         assoc->ipsa_bytes = newtotal;
3928                         if (sendmsg)
3929                                 sadb_expire_assoc(pfkey_q, assoc);
3930                 } /* Else someone beat me to it! */
3931         }
3932         if (rc == B_TRUE)
3933                 assoc->ipsa_bytes = newtotal;
3934         mutex_exit(&assoc->ipsa_lock);
3935         return (rc);
3936 }
3937 
3938 /*
3939  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3940  *     sadb_age_assoc().
3941  */
3942 static ipsa_t *
3943 sadb_torch_assoc(isaf_t *head, ipsa_t *sa)
3944 {
3945         ASSERT(MUTEX_HELD(&head->isaf_lock));
3946         ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3947         ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3948 
3949         /*
3950          * Force cached SAs to be revalidated..
3951          */
3952         head->isaf_gen++;
3953 
3954         mutex_exit(&sa->ipsa_lock);
3955         sadb_unlinkassoc(sa);
3956 
3957         return (NULL);
3958 }
3959 
3960 /*
3961  * Do various SA-is-idle activities depending on delta (the number of idle
3962  * seconds on the SA) and/or other properties of the SA.
3963  *
3964  * Return B_TRUE if I've sent a packet, because I have to drop the
3965  * association's mutex before sending a packet out the wire.
3966  */
3967 /* ARGSUSED */
3968 static boolean_t
3969 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
3970 {
3971         ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
3972         int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
3973 
3974         ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3975 
3976         if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
3977             delta >= nat_t_interval &&
3978             gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
3979                 ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
3980                 assoc->ipsa_last_nat_t_ka = gethrestime_sec();
3981                 mutex_exit(&assoc->ipsa_lock);
3982                 ipsecesp_send_keepalive(assoc);
3983                 return (B_TRUE);
3984         }
3985         return (B_FALSE);
3986 }
3987 
3988 /*
3989  * Return "assoc" if haspeer is true and I send an expire.  This allows
3990  * the consumers' aging functions to tidy up an expired SA's peer.
3991  */
3992 static ipsa_t *
3993 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
3994     time_t current, int reap_delay, boolean_t inbound)
3995 {
3996         ipsa_t *retval = NULL;
3997         boolean_t dropped_mutex = B_FALSE;
3998 
3999         ASSERT(MUTEX_HELD(&head->isaf_lock));
4000 
4001         mutex_enter(&assoc->ipsa_lock);
4002 
4003         if (((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
4004             ((assoc->ipsa_state == IPSA_STATE_IDLE) ||
4005             (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) &&
4006             (assoc->ipsa_hardexpiretime != 0))) &&
4007             (assoc->ipsa_hardexpiretime <= current)) {
4008                 assoc->ipsa_state = IPSA_STATE_DEAD;
4009                 return (sadb_torch_assoc(head, assoc));
4010         }
4011 
4012         /*
4013          * Check lifetimes.  Fortunately, SA setup is done
4014          * such that there are only two times to look at,
4015          * softexpiretime, and hardexpiretime.
4016          *
4017          * Check hard first.
4018          */
4019 
4020         if (assoc->ipsa_hardexpiretime != 0 &&
4021             assoc->ipsa_hardexpiretime <= current) {
4022                 if (assoc->ipsa_state == IPSA_STATE_DEAD)
4023                         return (sadb_torch_assoc(head, assoc));
4024 
4025                 if (inbound) {
4026                         sadb_delete_cluster(assoc);
4027                 }
4028 
4029                 /*
4030                  * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
4031                  */
4032                 assoc->ipsa_state = IPSA_STATE_DEAD;
4033                 if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
4034                         /*
4035                          * If the SA is paired or peered with another, put
4036                          * a copy on a list which can be processed later, the
4037                          * pair/peer SA needs to be updated so the both die
4038                          * at the same time.
4039                          *
4040                          * If I return assoc, I have to bump up its reference
4041                          * count to keep with the ipsa_t reference count
4042                          * semantics.
4043                          */
4044                         IPSA_REFHOLD(assoc);
4045                         retval = assoc;
4046                 }
4047                 sadb_expire_assoc(pfkey_q, assoc);
4048                 assoc->ipsa_hardexpiretime = current + reap_delay;
4049         } else if (assoc->ipsa_softexpiretime != 0 &&
4050             assoc->ipsa_softexpiretime <= current &&
4051             assoc->ipsa_state < IPSA_STATE_DYING) {
4052                 /*
4053                  * Send EXPIRE message to PF_KEY.  May wish to pawn
4054                  * this off on another non-interrupt thread.
4055                  */
4056                 assoc->ipsa_state = IPSA_STATE_DYING;
4057                 if (assoc->ipsa_haspeer) {
4058                         /*
4059                          * If the SA has a peer, update the peer's state
4060                          * on SOFT_EXPIRE, this is mostly to prevent two
4061                          * expire messages from effectively the same SA.
4062                          *
4063                          * Don't care about paired SA's, then can (and should)
4064                          * be able to soft expire at different times.
4065                          *
4066                          * If I return assoc, I have to bump up its
4067                          * reference count to keep with the ipsa_t reference
4068                          * count semantics.
4069                          */
4070                         IPSA_REFHOLD(assoc);
4071                         retval = assoc;
4072                 }
4073                 sadb_expire_assoc(pfkey_q, assoc);
4074         } else if (assoc->ipsa_idletime != 0 &&
4075             assoc->ipsa_idleexpiretime <= current) {
4076                 if (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) {
4077                         assoc->ipsa_state = IPSA_STATE_IDLE;
4078                 }
4079 
4080                 /*
4081                  * Need to handle Mature case
4082                  */
4083                 if (assoc->ipsa_state == IPSA_STATE_MATURE) {
4084                         sadb_expire_assoc(pfkey_q, assoc);
4085                 }
4086         } else {
4087                 /* Check idle time activities. */
4088                 dropped_mutex = sadb_idle_activities(assoc,
4089                     current - assoc->ipsa_lastuse, inbound);
4090         }
4091 
4092         if (!dropped_mutex)
4093                 mutex_exit(&assoc->ipsa_lock);
4094         return (retval);
4095 }
4096 
4097 /*
4098  * Called by a consumer protocol to do ther dirty work of reaping dead
4099  * Security Associations.
4100  *
4101  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4102  * SA's that are already marked DEAD, so expired SA's are only reaped
4103  * the second time sadb_ager() runs.
4104  */
4105 void
4106 sadb_ager(sadb_t *sp, queue_t *pfkey_q, int reap_delay, netstack_t *ns)
4107 {
4108         int i;
4109         isaf_t *bucket;
4110         ipsa_t *assoc, *spare;
4111         iacqf_t *acqlist;
4112         ipsacq_t *acqrec, *spareacq;
4113         templist_t *haspeerlist, *newbie;
4114         /* Snapshot current time now. */
4115         time_t current = gethrestime_sec();
4116         haspeerlist = NULL;
4117 
4118         /*
4119          * Do my dirty work.  This includes aging real entries, aging
4120          * larvals, and aging outstanding ACQUIREs.
4121          *
4122          * I hope I don't tie up resources for too long.
4123          */
4124 
4125         /* Age acquires. */
4126 
4127         for (i = 0; i < sp->sdb_hashsize; i++) {
4128                 acqlist = &sp->sdb_acq[i];
4129                 mutex_enter(&acqlist->iacqf_lock);
4130                 for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
4131                     acqrec = spareacq) {
4132                         spareacq = acqrec->ipsacq_next;
4133                         if (current > acqrec->ipsacq_expire)
4134                                 sadb_destroy_acquire(acqrec, ns);
4135                 }
4136                 mutex_exit(&acqlist->iacqf_lock);
4137         }
4138 
4139         /* Age inbound associations. */
4140         for (i = 0; i < sp->sdb_hashsize; i++) {
4141                 bucket = &(sp->sdb_if[i]);
4142                 mutex_enter(&bucket->isaf_lock);
4143                 for (assoc = bucket->isaf_ipsa; assoc != NULL;
4144                     assoc = spare) {
4145                         spare = assoc->ipsa_next;
4146                         if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4147                             reap_delay, B_TRUE) != NULL) {
4148                                 /*
4149                                  * Put SA's which have a peer or SA's which
4150                                  * are paired on a list for processing after
4151                                  * all the hash tables have been walked.
4152                                  *
4153                                  * sadb_age_assoc() increments the refcnt,
4154                                  * effectively doing an IPSA_REFHOLD().
4155                                  */
4156                                 newbie = kmem_alloc(sizeof (*newbie),
4157                                     KM_NOSLEEP);
4158                                 if (newbie == NULL) {
4159                                         /*
4160                                          * Don't forget to REFRELE().
4161                                          */
4162                                         IPSA_REFRELE(assoc);
4163                                         continue;       /* for loop... */
4164                                 }
4165                                 newbie->next = haspeerlist;
4166                                 newbie->ipsa = assoc;
4167                                 haspeerlist = newbie;
4168                         }
4169                 }
4170                 mutex_exit(&bucket->isaf_lock);
4171         }
4172 
4173         age_pair_peer_list(haspeerlist, sp, B_FALSE);
4174         haspeerlist = NULL;
4175 
4176         /* Age outbound associations. */
4177         for (i = 0; i < sp->sdb_hashsize; i++) {
4178                 bucket = &(sp->sdb_of[i]);
4179                 mutex_enter(&bucket->isaf_lock);
4180                 for (assoc = bucket->isaf_ipsa; assoc != NULL;
4181                     assoc = spare) {
4182                         spare = assoc->ipsa_next;
4183                         if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4184                             reap_delay, B_FALSE) != NULL) {
4185                                 /*
4186                                  * sadb_age_assoc() increments the refcnt,
4187                                  * effectively doing an IPSA_REFHOLD().
4188                                  */
4189                                 newbie = kmem_alloc(sizeof (*newbie),
4190                                     KM_NOSLEEP);
4191                                 if (newbie == NULL) {
4192                                         /*
4193                                          * Don't forget to REFRELE().
4194                                          */
4195                                         IPSA_REFRELE(assoc);
4196                                         continue;       /* for loop... */
4197                                 }
4198                                 newbie->next = haspeerlist;
4199                                 newbie->ipsa = assoc;
4200                                 haspeerlist = newbie;
4201                         }
4202                 }
4203                 mutex_exit(&bucket->isaf_lock);
4204         }
4205 
4206         age_pair_peer_list(haspeerlist, sp, B_TRUE);
4207 
4208         /*
4209          * Run a GC pass to clean out dead identities.
4210          */
4211         ipsid_gc(ns);
4212 }
4213 
4214 /*
4215  * Figure out when to reschedule the ager.
4216  */
4217 timeout_id_t
4218 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4219     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4220 {
4221         hrtime_t end = gethrtime();
4222         uint_t interval = *intp;        /* "interval" is in ms. */
4223 
4224         /*
4225          * See how long this took.  If it took too long, increase the
4226          * aging interval.
4227          */
4228         if ((end - begin) > MSEC2NSEC(interval)) {
4229                 if (interval >= intmax) {
4230                         /* XXX Rate limit this?  Or recommend flush? */
4231                         (void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4232                             "Too many SA's to age out in %d msec.\n",
4233                             intmax);
4234                 } else {
4235                         /* Double by shifting by one bit. */
4236                         interval <<= 1;
4237                         interval = min(interval, intmax);
4238                 }
4239         } else if ((end - begin) <= (MSEC2NSEC(interval) / 2) &&
4240             interval > SADB_AGE_INTERVAL_DEFAULT) {
4241                 /*
4242                  * If I took less than half of the interval, then I should
4243                  * ratchet the interval back down.  Never automatically
4244                  * shift below the default aging interval.
4245                  *
4246                  * NOTE:This even overrides manual setting of the age
4247                  *      interval using NDD to lower the setting past the
4248                  *      default.  In other words, if you set the interval
4249                  *      lower than the default, and your SADB gets too big,
4250                  *      the interval will only self-lower back to the default.
4251                  */
4252                 /* Halve by shifting one bit. */
4253                 interval >>= 1;
4254                 interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4255         }
4256         *intp = interval;
4257         return (qtimeout(pfkey_q, ager, agerarg,
4258             drv_usectohz(interval * (MICROSEC / MILLISEC))));
4259 }
4260 
4261 
4262 /*
4263  * Update the lifetime values of an SA.  This is the path an SADB_UPDATE
4264  * message takes when updating a MATURE or DYING SA.
4265  */
4266 static void
4267 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4268     sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4269 {
4270         mutex_enter(&assoc->ipsa_lock);
4271 
4272         /*
4273          * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4274          * passed in during an update message.  We currently don't handle
4275          * these.
4276          */
4277 
4278         if (hard != NULL) {
4279                 if (hard->sadb_lifetime_bytes != 0)
4280                         assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4281                 if (hard->sadb_lifetime_usetime != 0)
4282                         assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4283                 if (hard->sadb_lifetime_addtime != 0)
4284                         assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4285                 if (assoc->ipsa_hardaddlt != 0) {
4286                         assoc->ipsa_hardexpiretime =
4287                             assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4288                 }
4289                 if (assoc->ipsa_harduselt != 0 &&
4290                     assoc->ipsa_flags & IPSA_F_USED) {
4291                         UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4292                 }
4293                 if (hard->sadb_lifetime_allocations != 0)
4294                         assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4295         }
4296 
4297         if (soft != NULL) {
4298                 if (soft->sadb_lifetime_bytes != 0) {
4299                         if (soft->sadb_lifetime_bytes >
4300                             assoc->ipsa_hardbyteslt) {
4301                                 assoc->ipsa_softbyteslt =
4302                                     assoc->ipsa_hardbyteslt;
4303                         } else {
4304                                 assoc->ipsa_softbyteslt =
4305                                     soft->sadb_lifetime_bytes;
4306                         }
4307                 }
4308                 if (soft->sadb_lifetime_usetime != 0) {
4309                         if (soft->sadb_lifetime_usetime >
4310                             assoc->ipsa_harduselt) {
4311                                 assoc->ipsa_softuselt =
4312                                     assoc->ipsa_harduselt;
4313                         } else {
4314                                 assoc->ipsa_softuselt =
4315                                     soft->sadb_lifetime_usetime;
4316                         }
4317                 }
4318                 if (soft->sadb_lifetime_addtime != 0) {
4319                         if (soft->sadb_lifetime_addtime >
4320                             assoc->ipsa_hardexpiretime) {
4321                                 assoc->ipsa_softexpiretime =
4322                                     assoc->ipsa_hardexpiretime;
4323                         } else {
4324                                 assoc->ipsa_softaddlt =
4325                                     soft->sadb_lifetime_addtime;
4326                         }
4327                 }
4328                 if (assoc->ipsa_softaddlt != 0) {
4329                         assoc->ipsa_softexpiretime =
4330                             assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4331                 }
4332                 if (assoc->ipsa_softuselt != 0 &&
4333                     assoc->ipsa_flags & IPSA_F_USED) {
4334                         UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4335                 }
4336                 if (outbound && assoc->ipsa_softexpiretime != 0) {
4337                         if (assoc->ipsa_state == IPSA_STATE_MATURE)
4338                                 lifetime_fuzz(assoc);
4339                 }
4340 
4341                 if (soft->sadb_lifetime_allocations != 0)
4342                         assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4343         }
4344 
4345         if (idle != NULL) {
4346                 time_t current = gethrestime_sec();
4347                 if ((assoc->ipsa_idleexpiretime <= current) &&
4348                     (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4349                         assoc->ipsa_idleexpiretime =
4350                             current + assoc->ipsa_idleaddlt;
4351                 }
4352                 if (idle->sadb_lifetime_addtime != 0)
4353                         assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4354                 if (idle->sadb_lifetime_usetime != 0)
4355                         assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4356                 if (assoc->ipsa_idleaddlt != 0) {
4357                         assoc->ipsa_idleexpiretime =
4358                             current + idle->sadb_lifetime_addtime;
4359                         assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4360                 }
4361                 if (assoc->ipsa_idleuselt != 0) {
4362                         if (assoc->ipsa_idletime != 0) {
4363                                 assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4364                                     assoc->ipsa_idleuselt);
4365                         assoc->ipsa_idleexpiretime =
4366                             current + assoc->ipsa_idletime;
4367                         } else {
4368                                 assoc->ipsa_idleexpiretime =
4369                                     current + assoc->ipsa_idleuselt;
4370                                 assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4371                         }
4372                 }
4373         }
4374         mutex_exit(&assoc->ipsa_lock);
4375 }
4376 
4377 static int
4378 sadb_update_state(ipsa_t *assoc, uint_t new_state, mblk_t **ipkt_lst)
4379 {
4380         int rcode = 0;
4381         time_t current = gethrestime_sec();
4382 
4383         mutex_enter(&assoc->ipsa_lock);
4384 
4385         switch (new_state) {
4386         case SADB_X_SASTATE_ACTIVE_ELSEWHERE:
4387                 if (assoc->ipsa_state == SADB_X_SASTATE_IDLE) {
4388                         assoc->ipsa_state = IPSA_STATE_ACTIVE_ELSEWHERE;
4389                         assoc->ipsa_idleexpiretime =
4390                             current + assoc->ipsa_idletime;
4391                 }
4392                 break;
4393         case SADB_X_SASTATE_IDLE:
4394                 if (assoc->ipsa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4395                         assoc->ipsa_state = IPSA_STATE_IDLE;
4396                         assoc->ipsa_idleexpiretime =
4397                             current + assoc->ipsa_idletime;
4398                 } else {
4399                         rcode = EINVAL;
4400                 }
4401                 break;
4402 
4403         case SADB_X_SASTATE_ACTIVE:
4404                 if (assoc->ipsa_state != SADB_X_SASTATE_IDLE) {
4405                         rcode = EINVAL;
4406                         break;
4407                 }
4408                 assoc->ipsa_state = IPSA_STATE_MATURE;
4409                 assoc->ipsa_idleexpiretime = current + assoc->ipsa_idletime;
4410 
4411                 if (ipkt_lst == NULL) {
4412                         break;
4413                 }
4414 
4415                 if (assoc->ipsa_bpkt_head != NULL) {
4416                         *ipkt_lst = assoc->ipsa_bpkt_head;
4417                         assoc->ipsa_bpkt_head = assoc->ipsa_bpkt_tail = NULL;
4418                         assoc->ipsa_mblkcnt = 0;
4419                 } else {
4420                         *ipkt_lst = NULL;
4421                 }
4422                 break;
4423         default:
4424                 rcode = EINVAL;
4425                 break;
4426         }
4427 
4428         mutex_exit(&assoc->ipsa_lock);
4429         return (rcode);
4430 }
4431 
4432 /*
4433  * Check a proposed KMC update for sanity.
4434  */
4435 static int
4436 sadb_check_kmc(ipsa_query_t *sq, ipsa_t *sa, int *diagnostic)
4437 {
4438         uint32_t kmp = sq->kmp;
4439         uint32_t kmc = sq->kmc;
4440 
4441         if (sa == NULL)
4442                 return (0);
4443 
4444         if (sa->ipsa_state == IPSA_STATE_DEAD)
4445                 return (ESRCH); /* DEAD == Not there, in this case. */
4446 
4447         if ((kmp != 0) && ((sa->ipsa_kmp != 0) || (sa->ipsa_kmp != kmp))) {
4448                 *diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4449                 return (EINVAL);
4450         }
4451 
4452         if ((kmc != 0) && ((sa->ipsa_kmc != 0) || (sa->ipsa_kmc != kmc))) {
4453                 *diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4454                 return (EINVAL);
4455         }
4456 
4457         return (0);
4458 }
4459 
4460 /*
4461  * Actually update the KMC info.
4462  */
4463 static void
4464 sadb_update_kmc(ipsa_query_t *sq, ipsa_t *sa)
4465 {
4466         uint32_t kmp = sq->kmp;
4467         uint32_t kmc = sq->kmc;
4468 
4469         if (kmp != 0)
4470                 sa->ipsa_kmp = kmp;
4471         if (kmc != 0)
4472                 sa->ipsa_kmc = kmc;
4473 }
4474 
4475 /*
4476  * Common code to update an SA.
4477  */
4478 
4479 int
4480 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, mblk_t **ipkt_lst,
4481     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4482     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4483     netstack_t *ns, uint8_t sadb_msg_type)
4484 {
4485         sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4486         sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4487         sadb_x_replay_ctr_t *replext =
4488             (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
4489         sadb_lifetime_t *soft =
4490             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4491         sadb_lifetime_t *hard =
4492             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4493         sadb_lifetime_t *idle =
4494             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4495         sadb_x_pair_t *pair_ext =
4496             (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4497         ipsa_t *echo_target = NULL;
4498         ipsap_t ipsapp;
4499         ipsa_query_t sq;
4500         time_t current = gethrestime_sec();
4501 
4502         sq.spp = spp;           /* XXX param */
4503         int error = sadb_form_query(ksi, IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA,
4504             IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
4505             &sq, diagnostic);
4506 
4507         if (error != 0)
4508                 return (error);
4509 
4510         error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
4511         if (error != 0)
4512                 return (error);
4513 
4514         if (ipsapp.ipsap_psa_ptr == NULL && ipsapp.ipsap_sa_ptr != NULL) {
4515                 if (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4516                         /*
4517                          * REFRELE the target and let the add_sa_func()
4518                          * deal with updating a larval SA.
4519                          */
4520                         destroy_ipsa_pair(&ipsapp);
4521                         return (add_sa_func(mp, ksi, diagnostic, ns));
4522                 }
4523         }
4524 
4525         /*
4526          * At this point we have an UPDATE to a MATURE SA. There should
4527          * not be any keying material present.
4528          */
4529         if (akey != NULL) {
4530                 *diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4531                 error = EINVAL;
4532                 goto bail;
4533         }
4534         if (ekey != NULL) {
4535                 *diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4536                 error = EINVAL;
4537                 goto bail;
4538         }
4539 
4540         if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4541                 if (ipsapp.ipsap_sa_ptr != NULL &&
4542                     ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4543                         if ((error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4544                             sq.assoc->sadb_sa_state, NULL)) != 0) {
4545                                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4546                                 goto bail;
4547                         }
4548                 }
4549                 if (ipsapp.ipsap_psa_ptr != NULL &&
4550                     ipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4551                         if ((error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4552                             sq.assoc->sadb_sa_state, NULL)) != 0) {
4553                                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4554                                 goto bail;
4555                         }
4556                 }
4557         }
4558         if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE) {
4559                 if (ipsapp.ipsap_sa_ptr != NULL) {
4560                         error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4561                             sq.assoc->sadb_sa_state,
4562                             (ipsapp.ipsap_sa_ptr->ipsa_flags &
4563                             IPSA_F_INBOUND) ? ipkt_lst : NULL);
4564                         if (error) {
4565                                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4566                                 goto bail;
4567                         }
4568                 }
4569                 if (ipsapp.ipsap_psa_ptr != NULL) {
4570                         error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4571                             sq.assoc->sadb_sa_state,
4572                             (ipsapp.ipsap_psa_ptr->ipsa_flags &
4573                             IPSA_F_INBOUND) ? ipkt_lst : NULL);
4574                         if (error) {
4575                                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4576                                 goto bail;
4577                         }
4578                 }
4579                 sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4580                     ksi, echo_target);
4581                 goto bail;
4582         }
4583 
4584         /*
4585          * Reality checks for updates of active associations.
4586          * Sundry first-pass UPDATE-specific reality checks.
4587          * Have to do the checks here, because it's after the add_sa code.
4588          * XXX STATS : logging/stats here?
4589          */
4590 
4591         if (!((sq.assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
4592             (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE))) {
4593                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4594                 error = EINVAL;
4595                 goto bail;
4596         }
4597         if (sq.assoc->sadb_sa_flags & ~spp->s_updateflags) {
4598                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4599                 error = EINVAL;
4600                 goto bail;
4601         }
4602         if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4603                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_LIFETIME;
4604                 error = EOPNOTSUPP;
4605                 goto bail;
4606         }
4607 
4608         if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4609                 error = EINVAL;
4610                 goto bail;
4611         }
4612 
4613         if ((*diagnostic = sadb_labelchk(ksi)) != 0)
4614                 return (EINVAL);
4615 
4616         error = sadb_check_kmc(&sq, ipsapp.ipsap_sa_ptr, diagnostic);
4617         if (error != 0)
4618                 goto bail;
4619 
4620         error = sadb_check_kmc(&sq, ipsapp.ipsap_psa_ptr, diagnostic);
4621         if (error != 0)
4622                 goto bail;
4623 
4624 
4625         if (ipsapp.ipsap_sa_ptr != NULL) {
4626                 /*
4627                  * Do not allow replay value change for MATURE or LARVAL SA.
4628                  */
4629 
4630                 if ((replext != NULL) &&
4631                     ((ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) ||
4632                     (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_MATURE))) {
4633                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4634                         error = EINVAL;
4635                         goto bail;
4636                 }
4637         }
4638 
4639 
4640         if (ipsapp.ipsap_sa_ptr != NULL) {
4641                 sadb_update_lifetimes(ipsapp.ipsap_sa_ptr, hard, soft,
4642                     idle, B_TRUE);
4643                 sadb_update_kmc(&sq, ipsapp.ipsap_sa_ptr);
4644                 if ((replext != NULL) &&
4645                     (ipsapp.ipsap_sa_ptr->ipsa_replay_wsize != 0)) {
4646                         /*
4647                          * If an inbound SA, update the replay counter
4648                          * and check off all the other sequence number
4649                          */
4650                         if (ksi->ks_in_dsttype == KS_IN_ADDR_ME) {
4651                                 if (!sadb_replay_check(ipsapp.ipsap_sa_ptr,
4652                                     replext->sadb_x_rc_replay32)) {
4653                                         *diagnostic =
4654                                             SADB_X_DIAGNOSTIC_INVALID_REPLAY;
4655                                         error = EINVAL;
4656                                         goto bail;
4657                                 }
4658                                 mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4659                                 ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4660                                     current +
4661                                     ipsapp.ipsap_sa_ptr->ipsa_idletime;
4662                                 mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4663                         } else {
4664                                 mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4665                                 ipsapp.ipsap_sa_ptr->ipsa_replay =
4666                                     replext->sadb_x_rc_replay32;
4667                                 ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4668                                     current +
4669                                     ipsapp.ipsap_sa_ptr->ipsa_idletime;
4670                                 mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4671                         }
4672                 }
4673         }
4674 
4675         if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4676                 if (ipsapp.ipsap_psa_ptr != NULL) {
4677                         sadb_update_lifetimes(ipsapp.ipsap_psa_ptr, hard, soft,
4678                             idle, B_FALSE);
4679                         sadb_update_kmc(&sq, ipsapp.ipsap_psa_ptr);
4680                 } else {
4681                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4682                         error = ESRCH;
4683                         goto bail;
4684                 }
4685         }
4686 
4687         if (pair_ext != NULL)
4688                 error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
4689 
4690         if (error == 0)
4691                 sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4692                     ksi, echo_target);
4693 bail:
4694 
4695         destroy_ipsa_pair(&ipsapp);
4696 
4697         return (error);
4698 }
4699 
4700 
4701 static int
4702 update_pairing(ipsap_t *ipsapp, ipsa_query_t *sq, keysock_in_t *ksi,
4703     int *diagnostic)
4704 {
4705         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4706         sadb_x_pair_t *pair_ext =
4707             (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4708         int error = 0;
4709         ipsap_t oipsapp;
4710         boolean_t undo_pair = B_FALSE;
4711         uint32_t ipsa_flags;
4712 
4713         if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4714             assoc->sadb_sa_spi) {
4715                 *diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4716                 return (EINVAL);
4717         }
4718 
4719         /*
4720          * Assume for now that the spi value provided in the SADB_UPDATE
4721          * message was valid, update the SA with its pair spi value.
4722          * If the spi turns out to be bogus or the SA no longer exists
4723          * then this will be detected when the reverse update is made
4724          * below.
4725          */
4726         mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4727         ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4728         ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4729         mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4730 
4731         /*
4732          * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4733          * should now return pointers to the SA *AND* its pair, if this is not
4734          * the case, the "otherspi" either did not exist or was deleted. Also
4735          * check that "otherspi" is not already paired. If everything looks
4736          * good, complete the update. IPSA_REFRELE the first pair_pointer
4737          * after this update to ensure its not deleted until we are done.
4738          */
4739         error = get_ipsa_pair(sq, &oipsapp, diagnostic);
4740         if (error != 0) {
4741                 /*
4742                  * This should never happen, calling function still has
4743                  * IPSA_REFHELD on the SA we just updated.
4744                  */
4745                 return (error); /* XXX EINVAL instead of ESRCH? */
4746         }
4747 
4748         if (oipsapp.ipsap_psa_ptr == NULL) {
4749                 *diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4750                 error = EINVAL;
4751                 undo_pair = B_TRUE;
4752         } else {
4753                 ipsa_flags = oipsapp.ipsap_psa_ptr->ipsa_flags;
4754                 if ((oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
4755                     (oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
4756                         /* Its dead Jim! */
4757                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4758                         undo_pair = B_TRUE;
4759                 } else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4760                     (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4761                         /* This SA is in both hashtables. */
4762                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4763                         undo_pair = B_TRUE;
4764                 } else if (ipsa_flags & IPSA_F_PAIRED) {
4765                         /* This SA is already paired with another. */
4766                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4767                         undo_pair = B_TRUE;
4768                 }
4769         }
4770 
4771         if (undo_pair) {
4772                 /* The pair SA does not exist. */
4773                 mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4774                 ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4775                 ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4776                 mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4777         } else {
4778                 mutex_enter(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4779                 oipsapp.ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4780                 oipsapp.ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4781                 mutex_exit(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4782         }
4783 
4784         destroy_ipsa_pair(&oipsapp);
4785         return (error);
4786 }
4787 
4788 /*
4789  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4790  * a list of outstanding SADB_ACQUIRE messages.  If ipsec_getassocbyconn() fails
4791  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4792  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4793  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4794  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4795  * other direction's SA.
4796  */
4797 
4798 /*
4799  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4800  * grab it, lock it, and return it.  Otherwise return NULL.
4801  *
4802  * XXX MLS number of arguments getting unwieldy here
4803  */
4804 static ipsacq_t *
4805 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4806     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4807     uint64_t unique_id, ts_label_t *tsl)
4808 {
4809         ipsacq_t *walker;
4810         sa_family_t fam;
4811         uint32_t blank_address[4] = {0, 0, 0, 0};
4812 
4813         if (isrc == NULL) {
4814                 ASSERT(idst == NULL);
4815                 isrc = idst = blank_address;
4816         }
4817 
4818         /*
4819          * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4820          *
4821          * XXX May need search for duplicates based on other things too!
4822          */
4823         for (walker = bucket->iacqf_ipsacq; walker != NULL;
4824             walker = walker->ipsacq_next) {
4825                 mutex_enter(&walker->ipsacq_lock);
4826                 fam = walker->ipsacq_addrfam;
4827                 if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4828                     IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4829                     ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4830                     (in6_addr_t *)walker->ipsacq_innersrc) &&
4831                     ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4832                     (in6_addr_t *)walker->ipsacq_innerdst) &&
4833                     (ap == walker->ipsacq_act) &&
4834                     (pp == walker->ipsacq_policy) &&
4835                     /* XXX do deep compares of ap/pp? */
4836                     (unique_id == walker->ipsacq_unique_id) &&
4837                     (ipsec_label_match(tsl, walker->ipsacq_tsl)))
4838                         break;                  /* everything matched */
4839                 mutex_exit(&walker->ipsacq_lock);
4840         }
4841 
4842         return (walker);
4843 }
4844 
4845 /*
4846  * Generate an SADB_ACQUIRE base message mblk, including KEYSOCK_OUT metadata.
4847  * In other words, this will return, upon success, a two-mblk chain.
4848  */
4849 static inline mblk_t *
4850 sadb_acquire_msg_base(minor_t serial, uint8_t satype, uint32_t seq, pid_t pid)
4851 {
4852         mblk_t *mp;
4853         sadb_msg_t *samsg;
4854 
4855         mp = sadb_keysock_out(serial);
4856         if (mp == NULL)
4857                 return (NULL);
4858         mp->b_cont = allocb(sizeof (sadb_msg_t), BPRI_HI);
4859         if (mp->b_cont == NULL) {
4860                 freeb(mp);
4861                 return (NULL);
4862         }
4863 
4864         samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
4865         mp->b_cont->b_wptr += sizeof (*samsg);
4866         samsg->sadb_msg_version = PF_KEY_V2;
4867         samsg->sadb_msg_type = SADB_ACQUIRE;
4868         samsg->sadb_msg_errno = 0;
4869         samsg->sadb_msg_reserved = 0;
4870         samsg->sadb_msg_satype = satype;
4871         samsg->sadb_msg_seq = seq;
4872         samsg->sadb_msg_pid = pid;
4873 
4874         return (mp);
4875 }
4876 
4877 /*
4878  * Generate address and TX/MLS sensitivity label PF_KEY extensions that are
4879  * common to both regular and extended ACQUIREs.
4880  */
4881 static mblk_t *
4882 sadb_acquire_msg_common(ipsec_selector_t *sel, ipsec_policy_t *pp,
4883     ipsec_action_t *ap, boolean_t tunnel_mode, ts_label_t *tsl,
4884     sadb_sens_t *sens)
4885 {
4886         size_t len;
4887         mblk_t *mp;
4888         uint8_t *start, *cur, *end;
4889         uint32_t *saddrptr, *daddrptr;
4890         sa_family_t af;
4891         ipsec_action_t *oldap;
4892         ipsec_selkey_t *ipsl;
4893         uint8_t proto, pfxlen;
4894         uint16_t lport, rport;
4895         int senslen = 0;
4896 
4897         /*
4898          * Get action pointer set if it isn't already.
4899          */
4900         oldap = ap;
4901         if (pp != NULL) {
4902                 ap = pp->ipsp_act;
4903                 if (ap == NULL)
4904                         ap = oldap;
4905         }
4906 
4907         /*
4908          * Biggest-case scenario:
4909          * 4x (sadb_address_t + struct sockaddr_in6)
4910          *      (src, dst, isrc, idst)
4911          *      (COMING SOON, 6x, because of triggering-packet contents.)
4912          * sadb_x_kmc_t
4913          * sadb_sens_t
4914          * And wiggle room for label bitvectors.  Luckily there are
4915          * programmatic ways to find it.
4916          */
4917         len = 4 * (sizeof (sadb_address_t) + sizeof (struct sockaddr_in6));
4918 
4919         /* Figure out full and proper length of sensitivity labels. */
4920         if (sens != NULL) {
4921                 ASSERT(tsl == NULL);
4922                 senslen = SADB_64TO8(sens->sadb_sens_len);
4923         } else if (tsl != NULL) {
4924                 senslen = sadb_sens_len_from_label(tsl);
4925         }
4926 #ifdef DEBUG
4927         else {
4928                 ASSERT(senslen == 0);
4929         }
4930 #endif /* DEBUG */
4931         len += senslen;
4932 
4933         mp = allocb(len, BPRI_HI);
4934         if (mp == NULL)
4935                 return (NULL);
4936 
4937         start = mp->b_rptr;
4938         end = start + len;
4939         cur = start;
4940 
4941         /*
4942          * Address extensions first, from most-recently-defined to least.
4943          * (This should immediately trigger surprise or verify robustness on
4944          * older apps, like in.iked.)
4945          */
4946         if (tunnel_mode) {
4947                 /*
4948                  * Form inner address extensions based NOT on the inner
4949                  * selectors (i.e. the packet data), but on the policy's
4950                  * selector key (i.e. the policy's selector information).
4951                  *
4952                  * NOTE:  The position of IPv4 and IPv6 addresses is the
4953                  * same in ipsec_selkey_t (unless the compiler does very
4954                  * strange things with unions, consult your local C language
4955                  * lawyer for details).
4956                  */
4957                 ASSERT(pp != NULL);
4958 
4959                 ipsl = &(pp->ipsp_sel->ipsl_key);
4960                 if (ipsl->ipsl_valid & IPSL_IPV4) {
4961                         af = AF_INET;
4962                         ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
4963                         ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
4964                 } else {
4965                         af = AF_INET6;
4966                         ASSERT(sel->ips_protocol == IPPROTO_IPV6);
4967                         ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
4968                 }
4969 
4970                 if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
4971                         saddrptr = (uint32_t *)(&ipsl->ipsl_local);
4972                         pfxlen = ipsl->ipsl_local_pfxlen;
4973                 } else {
4974                         saddrptr = (uint32_t *)(&ipv6_all_zeros);
4975                         pfxlen = 0;
4976                 }
4977                 /* XXX What about ICMP type/code? */
4978                 lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
4979                     ipsl->ipsl_lport : 0;
4980                 proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
4981                     ipsl->ipsl_proto : 0;
4982 
4983                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
4984                     af, saddrptr, lport, proto, pfxlen);
4985                 if (cur == NULL) {
4986                         freeb(mp);
4987                         return (NULL);
4988                 }
4989 
4990                 if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
4991                         daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
4992                         pfxlen = ipsl->ipsl_remote_pfxlen;
4993                 } else {
4994                         daddrptr = (uint32_t *)(&ipv6_all_zeros);
4995                         pfxlen = 0;
4996                 }
4997                 /* XXX What about ICMP type/code? */
4998                 rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
4999                     ipsl->ipsl_rport : 0;
5000 
5001                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5002                     af, daddrptr, rport, proto, pfxlen);
5003                 if (cur == NULL) {
5004                         freeb(mp);
5005                         return (NULL);
5006                 }
5007                 /*
5008                  * TODO  - if we go to 3884's dream of transport mode IP-in-IP
5009                  * _with_ inner-packet address selectors, we'll need to further
5010                  * distinguish tunnel mode here.  For now, having inner
5011                  * addresses and/or ports is sufficient.
5012                  *
5013                  * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5014                  * outer addresses.
5015                  */
5016                 proto = sel->ips_protocol;   /* Either _ENCAP or _IPV6 */
5017                 lport = rport = 0;
5018         } else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5019                 /*
5020                  * For cases when the policy calls out specific ports (or not).
5021                  */
5022                 proto = 0;
5023                 lport = 0;
5024                 rport = 0;
5025                 if (pp != NULL) {
5026                         ipsl = &(pp->ipsp_sel->ipsl_key);
5027                         if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5028                                 proto = ipsl->ipsl_proto;
5029                         if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5030                                 rport = ipsl->ipsl_rport;
5031                         if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5032                                 lport = ipsl->ipsl_lport;
5033                 }
5034         } else {
5035                 /*
5036                  * For require-unique-SA policies.
5037                  */
5038                 proto = sel->ips_protocol;
5039                 lport = sel->ips_local_port;
5040                 rport = sel->ips_remote_port;
5041         }
5042 
5043         /*
5044          * Regular addresses.  These are outer-packet ones for tunnel mode.
5045          * Or for transport mode, the regulard address & port information.
5046          */
5047         af = sel->ips_isv4 ? AF_INET : AF_INET6;
5048 
5049         /*
5050          * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5051          * ipsec_selector_t.
5052          */
5053         cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5054             (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5055         if (cur == NULL) {
5056                 freeb(mp);
5057                 return (NULL);
5058         }
5059 
5060         cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5061             (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5062         if (cur == NULL) {
5063                 freeb(mp);
5064                 return (NULL);
5065         }
5066 
5067         /*
5068          * If present, generate a sensitivity label.
5069          */
5070         if (cur + senslen > end) {
5071                 freeb(mp);
5072                 return (NULL);
5073         }
5074         if (sens != NULL) {
5075                 /* Explicit sadb_sens_t, usually from inverse-ACQUIRE. */
5076                 bcopy(sens, cur, senslen);
5077         } else if (tsl != NULL) {
5078                 /* Generate sadb_sens_t from ACQUIRE source. */
5079                 sadb_sens_from_label((sadb_sens_t *)cur, SADB_EXT_SENSITIVITY,
5080                     tsl, senslen);
5081         }
5082 #ifdef DEBUG
5083         else {
5084                 ASSERT(senslen == 0);
5085         }
5086 #endif /* DEBUG */
5087         cur += senslen;
5088         mp->b_wptr = cur;
5089 
5090         return (mp);
5091 }
5092 
5093 /*
5094  * Generate a regular ACQUIRE's proposal extension and KMC information..
5095  */
5096 static mblk_t *
5097 sadb_acquire_prop(ipsec_action_t *ap, netstack_t *ns, boolean_t do_esp)
5098 {
5099         ipsec_stack_t *ipss = ns->netstack_ipsec;
5100         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
5101         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
5102         mblk_t *mp = NULL;
5103         sadb_prop_t *prop;
5104         sadb_comb_t *comb;
5105         ipsec_action_t *walker;
5106         int ncombs, allocsize, ealgid, aalgid, aminbits, amaxbits, eminbits,
5107             emaxbits, replay;
5108         uint64_t softbytes, hardbytes, softaddtime, hardaddtime, softusetime,
5109             hardusetime;
5110         uint32_t kmc = 0, kmp = 0;
5111 
5112         /*
5113          * Since it's an rwlock read, AND writing to the IPsec algorithms is
5114          * rare, just acquire it once up top, and drop it upon return.
5115          */
5116         rw_enter(&ipss->ipsec_alg_lock, RW_READER);
5117         if (do_esp) {
5118                 uint64_t num_aalgs, num_ealgs;
5119 
5120                 if (espstack->esp_kstats == NULL)
5121                         goto bail;
5122 
5123                 num_aalgs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
5124                 num_ealgs = ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
5125                 if (num_ealgs == 0)
5126                         goto bail;      /* IPsec not loaded yet, apparently. */
5127                 num_aalgs++;    /* No-auth or self-auth-crypto ESP. */
5128 
5129                 /* Use netstack's maximum loaded algorithms... */
5130                 ncombs = num_ealgs * num_aalgs;
5131                 replay =  espstack->ipsecesp_replay_size;
5132         } else {
5133                 if (ahstack->ah_kstats == NULL)
5134                         goto bail;
5135 
5136                 ncombs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
5137 
5138                 if (ncombs == 0)
5139                         goto bail;      /* IPsec not loaded yet, apparently. */
5140                 replay =  ahstack->ipsecah_replay_size;
5141         }
5142 
5143         allocsize = sizeof (*prop) + ncombs * sizeof (*comb) +
5144             sizeof (sadb_x_kmc_t);
5145         mp = allocb(allocsize, BPRI_HI);
5146         if (mp == NULL)
5147                 goto bail;
5148         prop = (sadb_prop_t *)mp->b_rptr;
5149         mp->b_wptr += sizeof (*prop);
5150         comb = (sadb_comb_t *)mp->b_wptr;
5151         /* Decrement allocsize, if it goes to or below 0, stop. */
5152         allocsize -= sizeof (*prop);
5153         prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
5154         prop->sadb_prop_len = SADB_8TO64(sizeof (*prop));
5155         *(uint32_t *)(&prop->sadb_prop_replay) = 0;      /* Quick zero-out! */
5156         prop->sadb_prop_replay = replay;
5157 
5158         /*
5159          * Based upon algorithm properties, and what-not, prioritize a
5160          * proposal, based on the ordering of the ESP algorithms in the
5161          * alternatives in the policy rule or socket that was placed
5162          * in the acquire record.
5163          *
5164          * For each action in policy list
5165          *   Add combination.
5166          *   I should not hit it, but if I've hit limit, return.
5167          */
5168 
5169         for (walker = ap; walker != NULL; walker = walker->ipa_next) {
5170                 ipsec_alginfo_t *ealg, *aalg;
5171                 ipsec_prot_t *prot;
5172 
5173                 if (walker->ipa_act.ipa_type != IPSEC_POLICY_APPLY)
5174                         continue;
5175 
5176                 prot = &walker->ipa_act.ipa_apply;
5177                 if (walker->ipa_act.ipa_apply.ipp_km_proto != 0)
5178                         kmp = walker->ipa_act.ipa_apply.ipp_km_proto;
5179                 if (walker->ipa_act.ipa_apply.ipp_km_cookie != 0)
5180                         kmc = walker->ipa_act.ipa_apply.ipp_km_cookie;
5181                 if (walker->ipa_act.ipa_apply.ipp_replay_depth) {
5182                         prop->sadb_prop_replay =
5183                             walker->ipa_act.ipa_apply.ipp_replay_depth;
5184                 }
5185 
5186                 if (do_esp) {
5187                         if (!prot->ipp_use_esp)
5188                                 continue;
5189 
5190                         if (prot->ipp_esp_auth_alg != 0) {
5191                                 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
5192                                     [prot->ipp_esp_auth_alg];
5193                                 if (aalg == NULL || !ALG_VALID(aalg))
5194                                         continue;
5195                         } else
5196                                 aalg = NULL;
5197 
5198                         ASSERT(prot->ipp_encr_alg > 0);
5199                         ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
5200                             [prot->ipp_encr_alg];
5201                         if (ealg == NULL || !ALG_VALID(ealg))
5202                                 continue;
5203 
5204                         /*
5205                          * These may want to come from policy rule..
5206                          */
5207                         softbytes = espstack->ipsecesp_default_soft_bytes;
5208                         hardbytes = espstack->ipsecesp_default_hard_bytes;
5209                         softaddtime = espstack->ipsecesp_default_soft_addtime;
5210                         hardaddtime = espstack->ipsecesp_default_hard_addtime;
5211                         softusetime = espstack->ipsecesp_default_soft_usetime;
5212                         hardusetime = espstack->ipsecesp_default_hard_usetime;
5213                 } else {
5214                         if (!prot->ipp_use_ah)
5215                                 continue;
5216                         ealg = NULL;
5217                         aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
5218                             [prot->ipp_auth_alg];
5219                         if (aalg == NULL || !ALG_VALID(aalg))
5220                                 continue;
5221 
5222                         /*
5223                          * These may want to come from policy rule..
5224                          */
5225                         softbytes = ahstack->ipsecah_default_soft_bytes;
5226                         hardbytes = ahstack->ipsecah_default_hard_bytes;
5227                         softaddtime = ahstack->ipsecah_default_soft_addtime;
5228                         hardaddtime = ahstack->ipsecah_default_hard_addtime;
5229                         softusetime = ahstack->ipsecah_default_soft_usetime;
5230                         hardusetime = ahstack->ipsecah_default_hard_usetime;
5231                 }
5232 
5233                 if (ealg == NULL) {
5234                         ealgid = eminbits = emaxbits = 0;
5235                 } else {
5236                         ealgid = ealg->alg_id;
5237                         eminbits =
5238                             MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits);
5239                         emaxbits =
5240                             MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits);
5241                 }
5242 
5243                 if (aalg == NULL) {
5244                         aalgid = aminbits = amaxbits = 0;
5245                 } else {
5246                         aalgid = aalg->alg_id;
5247                         aminbits = MAX(prot->ipp_espa_minbits,
5248                             aalg->alg_ef_minbits);
5249                         amaxbits = MIN(prot->ipp_espa_maxbits,
5250                             aalg->alg_ef_maxbits);
5251                 }
5252 
5253                 comb->sadb_comb_flags = 0;
5254                 comb->sadb_comb_reserved = 0;
5255                 comb->sadb_comb_encrypt = ealgid;
5256                 comb->sadb_comb_encrypt_minbits = eminbits;
5257                 comb->sadb_comb_encrypt_maxbits = emaxbits;
5258                 comb->sadb_comb_auth = aalgid;
5259                 comb->sadb_comb_auth_minbits = aminbits;
5260                 comb->sadb_comb_auth_maxbits = amaxbits;
5261                 comb->sadb_comb_soft_allocations = 0;
5262                 comb->sadb_comb_hard_allocations = 0;
5263                 comb->sadb_comb_soft_bytes = softbytes;
5264                 comb->sadb_comb_hard_bytes = hardbytes;
5265                 comb->sadb_comb_soft_addtime = softaddtime;
5266                 comb->sadb_comb_hard_addtime = hardaddtime;
5267                 comb->sadb_comb_soft_usetime = softusetime;
5268                 comb->sadb_comb_hard_usetime = hardusetime;
5269 
5270                 prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
5271                 mp->b_wptr += sizeof (*comb);
5272                 allocsize -= sizeof (*comb);
5273                 /* Should never dip BELOW sizeof (KM cookie extension). */
5274                 ASSERT3S(allocsize, >=, sizeof (sadb_x_kmc_t));
5275                 if (allocsize <= sizeof (sadb_x_kmc_t))
5276                         break;  /* out of space.. */
5277                 comb++;
5278         }
5279 
5280         /* Don't include KMC extension if there's no room. */
5281         if (((kmp != 0) || (kmc != 0)) && allocsize >= sizeof (sadb_x_kmc_t)) {
5282                 if (sadb_make_kmc_ext(mp->b_wptr,
5283                     mp->b_wptr + sizeof (sadb_x_kmc_t), kmp, kmc) == NULL) {
5284                         freeb(mp);
5285                         mp = NULL;
5286                         goto bail;
5287                 }
5288                 mp->b_wptr += sizeof (sadb_x_kmc_t);
5289                 prop->sadb_prop_len += SADB_8TO64(sizeof (sadb_x_kmc_t));
5290         }
5291 
5292 bail:
5293         rw_exit(&ipss->ipsec_alg_lock);
5294         return (mp);
5295 }
5296 
5297 /*
5298  * Generate an extended ACQUIRE's extended-proposal extension.
5299  */
5300 /* ARGSUSED */
5301 static mblk_t *
5302 sadb_acquire_extended_prop(ipsec_action_t *ap, netstack_t *ns)
5303 {
5304         sadb_prop_t *eprop;
5305         uint8_t *cur, *end;
5306         mblk_t *mp;
5307         int allocsize, numecombs = 0, numalgdescs = 0;
5308         uint32_t kmc = 0, kmp = 0, replay = 0;
5309         ipsec_action_t *walker;
5310 
5311         allocsize = sizeof (*eprop);
5312 
5313         /*
5314          * Going to walk through the action list twice.  Once for allocation
5315          * measurement, and once for actual construction.
5316          */
5317         for (walker = ap; walker != NULL; walker = walker->ipa_next) {
5318                 ipsec_prot_t *ipp;
5319 
5320                 /*
5321                  * Skip non-IPsec policies
5322                  */
5323                 if (walker->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5324                         continue;
5325 
5326                 ipp = &walker->ipa_act.ipa_apply;
5327 
5328                 if (walker->ipa_act.ipa_apply.ipp_km_proto)
5329                         kmp = ipp->ipp_km_proto;
5330                 if (walker->ipa_act.ipa_apply.ipp_km_cookie)
5331                         kmc = ipp->ipp_km_cookie;
5332                 if (walker->ipa_act.ipa_apply.ipp_replay_depth)
5333                         replay = ipp->ipp_replay_depth;
5334 
5335                 if (ipp->ipp_use_ah)
5336                         numalgdescs++;
5337                 if (ipp->ipp_use_esp) {
5338                         numalgdescs++;
5339                         if (ipp->ipp_use_espa)
5340                                 numalgdescs++;
5341                 }
5342 
5343                 numecombs++;
5344         }
5345         ASSERT(numecombs > 0);
5346 
5347         allocsize += numecombs * sizeof (sadb_x_ecomb_t) +
5348             numalgdescs * sizeof (sadb_x_algdesc_t) + sizeof (sadb_x_kmc_t);
5349         mp = allocb(allocsize, BPRI_HI);
5350         if (mp == NULL)
5351                 return (NULL);
5352         eprop = (sadb_prop_t *)mp->b_rptr;
5353         end = mp->b_rptr + allocsize;
5354         cur = mp->b_rptr + sizeof (*eprop);
5355 
5356         eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5357         eprop->sadb_x_prop_ereserved = 0;
5358         eprop->sadb_x_prop_numecombs = 0;
5359         *(uint32_t *)(&eprop->sadb_prop_replay) = 0;     /* Quick zero-out! */
5360         /* Pick ESP's replay default if need be. */
5361         eprop->sadb_prop_replay = (replay == 0) ?
5362             ns->netstack_ipsecesp->ipsecesp_replay_size : replay;
5363 
5364         /* This time, walk through and actually allocate. */
5365         for (walker = ap; walker != NULL; walker = walker->ipa_next) {
5366                 /*
5367                  * Skip non-IPsec policies
5368                  */
5369                 if (walker->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5370                         continue;
5371                 cur = sadb_action_to_ecomb(cur, end, walker, ns);
5372                 if (cur == NULL) {
5373                         /* NOTE: inverse-ACQUIRE should note this as ENOMEM. */
5374                         freeb(mp);
5375                         return (NULL);
5376                 }
5377                 eprop->sadb_x_prop_numecombs++;
5378         }
5379 
5380         ASSERT(end - cur >= sizeof (sadb_x_kmc_t));
5381         if ((kmp != 0) || (kmc != 0)) {
5382                 cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5383                 if (cur == NULL) {
5384                         freeb(mp);
5385                         return (NULL);
5386                 }
5387         }
5388         mp->b_wptr = cur;
5389         eprop->sadb_prop_len = SADB_8TO64(cur - mp->b_rptr);
5390 
5391         return (mp);
5392 }
5393 
5394 /*
5395  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
5396  * of all of the same length.  Give up (and drop) if memory
5397  * cannot be allocated for a new one; otherwise, invoke callback to
5398  * send the acquire up..
5399  *
5400  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
5401  * list.  The ah_add_sa_finish() routines can look at the packet's attached
5402  * attributes and handle this case specially.
5403  */
5404 void
5405 sadb_acquire(mblk_t *datamp, ip_xmit_attr_t *ixa, boolean_t need_ah,
5406     boolean_t need_esp)
5407 {
5408         mblk_t  *asyncmp, *regular, *extended, *common, *prop, *eprop;
5409         sadbp_t *spp;
5410         sadb_t *sp;
5411         ipsacq_t *newbie;
5412         iacqf_t *bucket;
5413         ipha_t *ipha = (ipha_t *)datamp->b_rptr;
5414         ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
5415         uint32_t *src, *dst, *isrc, *idst;
5416         ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
5417         ipsec_action_t *ap = ixa->ixa_ipsec_action;
5418         sa_family_t af;
5419         int hashoffset;
5420         uint32_t seq;
5421         uint64_t unique_id = 0;
5422         boolean_t tunnel_mode = (ixa->ixa_flags & IXAF_IPSEC_TUNNEL) != 0;
5423         ts_label_t      *tsl;
5424         netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
5425         ipsec_stack_t   *ipss = ns->netstack_ipsec;
5426         ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
5427         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
5428         ipsec_selector_t sel;
5429         queue_t *q;
5430 
5431         ASSERT((pp != NULL) || (ap != NULL));
5432 
5433         ASSERT(need_ah || need_esp);
5434 
5435         /* Assign sadb pointers */
5436         if (need_esp) {
5437                 /*
5438                  * ESP happens first if we need both AH and ESP.
5439                  */
5440                 spp = &espstack->esp_sadb;
5441         } else {
5442                 spp = &ahstack->ah_sadb;
5443         }
5444         sp = (ixa->ixa_flags & IXAF_IS_IPV4) ? &spp->s_v4 : &spp->s_v6;
5445 
5446         if (is_system_labeled())
5447                 tsl = ixa->ixa_tsl;
5448         else
5449                 tsl = NULL;
5450 
5451         if (ap == NULL)
5452                 ap = pp->ipsp_act;
5453         ASSERT(ap != NULL);
5454 
5455         if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
5456                 unique_id = SA_FORM_UNIQUE_ID(ixa);
5457 
5458         /*
5459          * Set up an ACQUIRE record.
5460          *
5461          * Immediately, make sure the ACQUIRE sequence number doesn't slip
5462          * below the lowest point allowed in the kernel.  (In other words,
5463          * make sure the high bit on the sequence number is set.)
5464          */
5465 
5466         seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
5467 
5468         if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
5469                 src = (uint32_t *)&ipha->ipha_src;
5470                 dst = (uint32_t *)&ipha->ipha_dst;
5471                 af = AF_INET;
5472                 hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
5473                 ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
5474         } else {
5475                 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
5476                 src = (uint32_t *)&ip6h->ip6_src;
5477                 dst = (uint32_t *)&ip6h->ip6_dst;
5478                 af = AF_INET6;
5479                 hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
5480                 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
5481         }
5482 
5483         if (tunnel_mode) {
5484                 if (pp == NULL) {
5485                         /*
5486                          * Tunnel mode with no policy pointer means this is a
5487                          * reflected ICMP (like a ECHO REQUEST) that came in
5488                          * with self-encapsulated protection.  Until we better
5489                          * support this, drop the packet.
5490                          */
5491                         ip_drop_packet(datamp, B_FALSE, NULL,
5492                             DROPPER(ipss, ipds_spd_got_selfencap),
5493                             &ipss->ipsec_spd_dropper);
5494                         return;
5495                 }
5496                 /* Snag inner addresses. */
5497                 isrc = ixa->ixa_ipsec_insrc;
5498                 idst = ixa->ixa_ipsec_indst;
5499         } else {
5500                 isrc = idst = NULL;
5501         }
5502 
5503         /*
5504          * Check buckets to see if there is an existing entry.  If so,
5505          * grab it.  sadb_checkacquire locks newbie if found.
5506          */
5507         bucket = &(sp->sdb_acq[hashoffset]);
5508         mutex_enter(&bucket->iacqf_lock);
5509         newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
5510             unique_id, tsl);
5511 
5512         if (newbie == NULL) {
5513                 /*
5514                  * Otherwise, allocate a new one.
5515                  */
5516                 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
5517                 if (newbie == NULL) {
5518                         mutex_exit(&bucket->iacqf_lock);
5519                         ip_drop_packet(datamp, B_FALSE, NULL,
5520                             DROPPER(ipss, ipds_sadb_acquire_nomem),
5521                             &ipss->ipsec_sadb_dropper);
5522                         return;
5523                 }
5524                 newbie->ipsacq_policy = pp;
5525                 if (pp != NULL) {
5526                         IPPOL_REFHOLD(pp);
5527                 }
5528                 IPACT_REFHOLD(ap);
5529                 newbie->ipsacq_act = ap;
5530                 newbie->ipsacq_linklock = &bucket->iacqf_lock;
5531                 newbie->ipsacq_next = bucket->iacqf_ipsacq;
5532                 newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
5533                 if (newbie->ipsacq_next != NULL)
5534                         newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
5535 
5536                 bucket->iacqf_ipsacq = newbie;
5537                 mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
5538                 mutex_enter(&newbie->ipsacq_lock);
5539         }
5540 
5541         /*
5542          * XXX MLS does it actually help us to drop the bucket lock here?
5543          * we have inserted a half-built, locked acquire record into the
5544          * bucket.  any competing thread will now be able to lock the bucket
5545          * to scan it, but will immediately pile up on the new acquire
5546          * record's lock; I don't think we gain anything here other than to
5547          * disperse blame for lock contention.
5548          *
5549          * we might be able to dispense with acquire record locks entirely..
5550          * just use the bucket locks..
5551          */
5552 
5553         mutex_exit(&bucket->iacqf_lock);
5554 
5555         /*
5556          * This assert looks silly for now, but we may need to enter newbie's
5557          * mutex during a search.
5558          */
5559         ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5560 
5561         /*
5562          * Make the ip_xmit_attr_t into something we can queue.
5563          * If no memory it frees datamp.
5564          */
5565         asyncmp = ip_xmit_attr_to_mblk(ixa);
5566         if (asyncmp != NULL)
5567                 linkb(asyncmp, datamp);
5568 
5569         /* Queue up packet.  Use b_next. */
5570 
5571         if (asyncmp == NULL) {
5572                 /* Statistics for allocation failure */
5573                 if (ixa->ixa_flags & IXAF_IS_IPV4) {
5574                         BUMP_MIB(&ixa->ixa_ipst->ips_ip_mib,
5575                             ipIfStatsOutDiscards);
5576                 } else {
5577                         BUMP_MIB(&ixa->ixa_ipst->ips_ip6_mib,
5578                             ipIfStatsOutDiscards);
5579                 }
5580                 ip_drop_output("No memory for asyncmp", datamp, NULL);
5581                 freemsg(datamp);
5582                 /*
5583                  * The acquire record will be freed quickly if it's new
5584                  * (ipsacq_expire == 0), and will proceed as if no packet
5585                  * showed up if not.
5586                  */
5587                 mutex_exit(&newbie->ipsacq_lock);
5588                 return;
5589         } else if (newbie->ipsacq_numpackets == 0) {
5590                 /* First one. */
5591                 newbie->ipsacq_mp = asyncmp;
5592                 newbie->ipsacq_numpackets = 1;
5593                 newbie->ipsacq_expire = gethrestime_sec();
5594                 /*
5595                  * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5596                  * value.
5597                  */
5598                 newbie->ipsacq_expire += *spp->s_acquire_timeout;
5599                 newbie->ipsacq_seq = seq;
5600                 newbie->ipsacq_addrfam = af;
5601 
5602                 newbie->ipsacq_srcport = ixa->ixa_ipsec_src_port;
5603                 newbie->ipsacq_dstport = ixa->ixa_ipsec_dst_port;
5604                 newbie->ipsacq_icmp_type = ixa->ixa_ipsec_icmp_type;
5605                 newbie->ipsacq_icmp_code = ixa->ixa_ipsec_icmp_code;
5606                 if (tunnel_mode) {
5607                         newbie->ipsacq_inneraddrfam = ixa->ixa_ipsec_inaf;
5608                         newbie->ipsacq_proto = ixa->ixa_ipsec_inaf == AF_INET6 ?
5609                             IPPROTO_IPV6 : IPPROTO_ENCAP;
5610                         newbie->ipsacq_innersrcpfx = ixa->ixa_ipsec_insrcpfx;
5611                         newbie->ipsacq_innerdstpfx = ixa->ixa_ipsec_indstpfx;
5612                         IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5613                             ixa->ixa_ipsec_insrc, ixa->ixa_ipsec_inaf);
5614                         IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5615                             ixa->ixa_ipsec_indst, ixa->ixa_ipsec_inaf);
5616                 } else {
5617                         newbie->ipsacq_proto = ixa->ixa_ipsec_proto;
5618                 }
5619                 newbie->ipsacq_unique_id = unique_id;
5620 
5621                 if (tsl != NULL) {
5622                         label_hold(tsl);
5623                         newbie->ipsacq_tsl = tsl;
5624                 }
5625         } else {
5626                 /* Scan to the end of the list & insert. */
5627                 mblk_t *lastone = newbie->ipsacq_mp;
5628 
5629                 while (lastone->b_next != NULL)
5630                         lastone = lastone->b_next;
5631                 lastone->b_next = asyncmp;
5632                 if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5633                         newbie->ipsacq_numpackets = ipsacq_maxpackets;
5634                         lastone = newbie->ipsacq_mp;
5635                         newbie->ipsacq_mp = lastone->b_next;
5636                         lastone->b_next = NULL;
5637 
5638                         /* Freeing the async message */
5639                         lastone = ip_xmit_attr_free_mblk(lastone);
5640                         ip_drop_packet(lastone, B_FALSE, NULL,
5641                             DROPPER(ipss, ipds_sadb_acquire_toofull),
5642                             &ipss->ipsec_sadb_dropper);
5643                 } else {
5644                         IP_ACQUIRE_STAT(ipss, qhiwater,
5645                             newbie->ipsacq_numpackets);
5646                 }
5647         }
5648 
5649         /*
5650          * Reset addresses.  Set them to the most recently added mblk chain,
5651          * so that the address pointers in the acquire record will point
5652          * at an mblk still attached to the acquire list.
5653          */
5654 
5655         newbie->ipsacq_srcaddr = src;
5656         newbie->ipsacq_dstaddr = dst;
5657 
5658         /*
5659          * If the acquire record has more than one queued packet, we've
5660          * already sent an ACQUIRE, and don't need to repeat ourself.
5661          */
5662         if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5663                 /* I have an acquire outstanding already! */
5664                 mutex_exit(&newbie->ipsacq_lock);
5665                 return;
5666         }
5667 
5668         if (need_esp) {
5669                 ESP_BUMP_STAT(espstack, acquire_requests);
5670                 q = espstack->esp_pfkey_q;
5671         } else {
5672                 /*
5673                  * Two cases get us here:
5674                  * 1.) AH-only policy.
5675                  *
5676                  * 2.) A continuation of an AH+ESP policy, and this is the
5677                  * post-ESP, AH-needs-to-send-a-regular-ACQUIRE case.
5678                  * (i.e. called from esp_do_outbound_ah().)
5679                  */
5680                 AH_BUMP_STAT(ahstack, acquire_requests);
5681                 q = ahstack->ah_pfkey_q;
5682         }
5683 
5684         /*
5685          * Get selectors and other policy-expression bits needed for an
5686          * ACQUIRE.
5687          */
5688         bzero(&sel, sizeof (sel));
5689         sel.ips_isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0;
5690         if (tunnel_mode) {
5691                 sel.ips_protocol = (ixa->ixa_ipsec_inaf == AF_INET) ?
5692                     IPPROTO_ENCAP : IPPROTO_IPV6;
5693         } else {
5694                 sel.ips_protocol = ixa->ixa_ipsec_proto;
5695                 sel.ips_local_port = ixa->ixa_ipsec_src_port;
5696                 sel.ips_remote_port = ixa->ixa_ipsec_dst_port;
5697         }
5698         sel.ips_icmp_type = ixa->ixa_ipsec_icmp_type;
5699         sel.ips_icmp_code = ixa->ixa_ipsec_icmp_code;
5700         sel.ips_is_icmp_inv_acq = 0;
5701         if (af == AF_INET) {
5702                 sel.ips_local_addr_v4 = ipha->ipha_src;
5703                 sel.ips_remote_addr_v4 = ipha->ipha_dst;
5704         } else {
5705                 sel.ips_local_addr_v6 = ip6h->ip6_src;
5706                 sel.ips_remote_addr_v6 = ip6h->ip6_dst;
5707         }
5708 
5709 
5710         /*
5711          * 1. Generate addresses, kmc, and sensitivity.  These are "common"
5712          * and should be an mblk pointed to by common. TBD -- eventually it
5713          * will include triggering packet contents as more address extensions.
5714          *
5715          * 2. Generate ACQUIRE & KEYSOCK_OUT and single-protocol proposal.
5716          * These are "regular" and "prop".  String regular->b_cont->b_cont =
5717          * common, common->b_cont = prop.
5718          *
5719          * 3. If extended register got turned on, generate EXT_ACQUIRE &
5720          * KEYSOCK_OUT and multi-protocol eprop. These are "extended" and
5721          * "eprop".  String extended->b_cont->b_cont = dupb(common) and
5722          * extended->b_cont->b_cont->b_cont = prop.
5723          *
5724          * 4. Deliver:  putnext(q, regular) and if there, putnext(q, extended).
5725          */
5726 
5727         regular = extended = prop = eprop = NULL;
5728 
5729         common = sadb_acquire_msg_common(&sel, pp, ap, tunnel_mode, tsl, NULL);
5730         if (common == NULL)
5731                 goto bail;
5732 
5733         regular = sadb_acquire_msg_base(0, (need_esp ?
5734             SADB_SATYPE_ESP : SADB_SATYPE_AH), newbie->ipsacq_seq, 0);
5735         if (regular == NULL)
5736                 goto bail;
5737 
5738         /*
5739          * Pardon the boolean cleverness. At least one of need_* must be true.
5740          * If they are equal, it's an AH & ESP policy and ESP needs to go
5741          * first.  If they aren't, just check the contents of need_esp.
5742          */
5743         prop = sadb_acquire_prop(ap, ns, need_esp);
5744         if (prop == NULL)
5745                 goto bail;
5746 
5747         /* Link the parts together. */
5748         regular->b_cont->b_cont = common;
5749         common->b_cont = prop;
5750         /*
5751          * Prop is now linked, so don't freemsg() it if the extended
5752          * construction goes off the rails.
5753          */
5754         prop = NULL;
5755 
5756         ((sadb_msg_t *)(regular->b_cont->b_rptr))->sadb_msg_len =
5757             SADB_8TO64(msgsize(regular->b_cont));
5758 
5759         /*
5760          * If we need an extended ACQUIRE, build it here.
5761          */
5762         if (keysock_extended_reg(ns)) {
5763                 /* NOTE: "common" still points to what we need. */
5764                 extended = sadb_acquire_msg_base(0, 0, newbie->ipsacq_seq, 0);
5765                 if (extended == NULL) {
5766                         common = NULL;
5767                         goto bail;
5768                 }
5769 
5770                 extended->b_cont->b_cont = dupb(common);
5771                 common = NULL;
5772                 if (extended->b_cont->b_cont == NULL)
5773                         goto bail;
5774 
5775                 eprop = sadb_acquire_extended_prop(ap, ns);
5776                 if (eprop == NULL)
5777                         goto bail;
5778                 extended->b_cont->b_cont->b_cont = eprop;
5779 
5780                 ((sadb_msg_t *)(extended->b_cont->b_rptr))->sadb_msg_len =
5781                     SADB_8TO64(msgsize(extended->b_cont));
5782         }
5783 
5784         /* So we don't hold a lock across putnext()... */
5785         mutex_exit(&newbie->ipsacq_lock);
5786 
5787         if (extended != NULL)
5788                 putnext(q, extended);
5789         ASSERT(regular != NULL);
5790         putnext(q, regular);
5791         return;
5792 
5793 bail:
5794         /* Make this acquire record go away quickly... */
5795         newbie->ipsacq_expire = 0;
5796         /* Exploit freemsg(NULL) being legal for fun & profit. */
5797         freemsg(common);
5798         freemsg(prop);
5799         freemsg(extended);
5800         freemsg(regular);
5801         mutex_exit(&newbie->ipsacq_lock);
5802 }
5803 
5804 /*
5805  * Unlink and free an acquire record.
5806  */
5807 void
5808 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
5809 {
5810         mblk_t          *mp;
5811         ipsec_stack_t   *ipss = ns->netstack_ipsec;
5812 
5813         ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
5814 
5815         if (acqrec->ipsacq_policy != NULL) {
5816                 IPPOL_REFRELE(acqrec->ipsacq_policy);
5817         }
5818         if (acqrec->ipsacq_act != NULL) {
5819                 IPACT_REFRELE(acqrec->ipsacq_act);
5820         }
5821 
5822         /* Unlink */
5823         *(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
5824         if (acqrec->ipsacq_next != NULL)
5825                 acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
5826 
5827         if (acqrec->ipsacq_tsl != NULL) {
5828                 label_rele(acqrec->ipsacq_tsl);
5829                 acqrec->ipsacq_tsl = NULL;
5830         }
5831 
5832         /*
5833          * Free hanging mp's.
5834          *
5835          * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
5836          */
5837 
5838         mutex_enter(&acqrec->ipsacq_lock);
5839         while (acqrec->ipsacq_mp != NULL) {
5840                 mp = acqrec->ipsacq_mp;
5841                 acqrec->ipsacq_mp = mp->b_next;
5842                 mp->b_next = NULL;
5843                 /* Freeing the async message */
5844                 mp = ip_xmit_attr_free_mblk(mp);
5845                 ip_drop_packet(mp, B_FALSE, NULL,
5846                     DROPPER(ipss, ipds_sadb_acquire_timeout),
5847                     &ipss->ipsec_sadb_dropper);
5848         }
5849         mutex_exit(&acqrec->ipsacq_lock);
5850 
5851         /* Free */
5852         mutex_destroy(&acqrec->ipsacq_lock);
5853         kmem_free(acqrec, sizeof (*acqrec));
5854 }
5855 
5856 /*
5857  * Destroy an acquire list fanout.
5858  */
5859 static void
5860 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
5861     netstack_t *ns)
5862 {
5863         int i;
5864         iacqf_t *list = *listp;
5865 
5866         if (list == NULL)
5867                 return;
5868 
5869         for (i = 0; i < numentries; i++) {
5870                 mutex_enter(&(list[i].iacqf_lock));
5871                 while (list[i].iacqf_ipsacq != NULL)
5872                         sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
5873                 mutex_exit(&(list[i].iacqf_lock));
5874                 if (forever)
5875                         mutex_destroy(&(list[i].iacqf_lock));
5876         }
5877 
5878         if (forever) {
5879                 *listp = NULL;
5880                 kmem_free(list, numentries * sizeof (*list));
5881         }
5882 }
5883 
5884 /*
5885  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
5886  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
5887  */
5888 static uint8_t *
5889 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
5890     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
5891     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
5892 {
5893         uint8_t *cur = start;
5894         ipsec_alginfo_t *algp;
5895         sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
5896 
5897         cur += sizeof (*algdesc);
5898         if (cur >= limit)
5899                 return (NULL);
5900 
5901         ecomb->sadb_x_ecomb_numalgs++;
5902 
5903         /*
5904          * Normalize vs. crypto framework's limits.  This way, you can specify
5905          * a stronger policy, and when the framework loads a stronger version,
5906          * you can just keep plowing w/o rewhacking your SPD.
5907          */
5908         rw_enter(&ipss->ipsec_alg_lock, RW_READER);
5909         algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
5910             IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
5911         if (algp == NULL) {
5912                 rw_exit(&ipss->ipsec_alg_lock);
5913                 return (NULL);  /* Algorithm doesn't exist.  Fail gracefully. */
5914         }
5915         if (minbits < algp->alg_ef_minbits)
5916                 minbits = algp->alg_ef_minbits;
5917         if (maxbits > algp->alg_ef_maxbits)
5918                 maxbits = algp->alg_ef_maxbits;
5919         rw_exit(&ipss->ipsec_alg_lock);
5920 
5921         algdesc->sadb_x_algdesc_reserved = SADB_8TO1(algp->alg_saltlen);
5922         algdesc->sadb_x_algdesc_satype = satype;
5923         algdesc->sadb_x_algdesc_algtype = algtype;
5924         algdesc->sadb_x_algdesc_alg = alg;
5925         algdesc->sadb_x_algdesc_minbits = minbits;
5926         algdesc->sadb_x_algdesc_maxbits = maxbits;
5927 
5928         return (cur);
5929 }
5930 
5931 /*
5932  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
5933  * which must fit before *limit
5934  *
5935  * return NULL if we ran out of room or a pointer to the end of the ecomb.
5936  */
5937 static uint8_t *
5938 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5939     netstack_t *ns)
5940 {
5941         uint8_t *cur = start;
5942         sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5943         ipsec_prot_t *ipp;
5944         ipsec_stack_t *ipss = ns->netstack_ipsec;
5945 
5946         cur += sizeof (*ecomb);
5947         if (cur >= limit)
5948                 return (NULL);
5949 
5950         ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
5951 
5952         ipp = &act->ipa_act.ipa_apply;
5953 
5954         ecomb->sadb_x_ecomb_numalgs = 0;
5955         ecomb->sadb_x_ecomb_reserved = 0;
5956         ecomb->sadb_x_ecomb_reserved2 = 0;
5957         /*
5958          * No limits on allocations, since we really don't support that
5959          * concept currently.
5960          */
5961         ecomb->sadb_x_ecomb_soft_allocations = 0;
5962         ecomb->sadb_x_ecomb_hard_allocations = 0;
5963 
5964         /*
5965          * XXX TBD: Policy or global parameters will eventually be
5966          * able to fill in some of these.
5967          */
5968         ecomb->sadb_x_ecomb_flags = 0;
5969         ecomb->sadb_x_ecomb_soft_bytes = 0;
5970         ecomb->sadb_x_ecomb_hard_bytes = 0;
5971         ecomb->sadb_x_ecomb_soft_addtime = 0;
5972         ecomb->sadb_x_ecomb_hard_addtime = 0;
5973         ecomb->sadb_x_ecomb_soft_usetime = 0;
5974         ecomb->sadb_x_ecomb_hard_usetime = 0;
5975 
5976         if (ipp->ipp_use_ah) {
5977                 cur = sadb_new_algdesc(cur, limit, ecomb,
5978                     SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
5979                     ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
5980                 if (cur == NULL)
5981                         return (NULL);
5982                 ipsecah_fill_defs(ecomb, ns);
5983         }
5984 
5985         if (ipp->ipp_use_esp) {
5986                 if (ipp->ipp_use_espa) {
5987                         cur = sadb_new_algdesc(cur, limit, ecomb,
5988                             SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
5989                             ipp->ipp_esp_auth_alg,
5990                             ipp->ipp_espa_minbits,
5991                             ipp->ipp_espa_maxbits, ipss);
5992                         if (cur == NULL)
5993                                 return (NULL);
5994                 }
5995 
5996                 cur = sadb_new_algdesc(cur, limit, ecomb,
5997                     SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
5998                     ipp->ipp_encr_alg,
5999                     ipp->ipp_espe_minbits,
6000                     ipp->ipp_espe_maxbits, ipss);
6001                 if (cur == NULL)
6002                         return (NULL);
6003                 /* Fill in lifetimes if and only if AH didn't already... */
6004                 if (!ipp->ipp_use_ah)
6005                         ipsecesp_fill_defs(ecomb, ns);
6006         }
6007 
6008         return (cur);
6009 }
6010 
6011 #include <sys/tsol/label_macro.h> /* XXX should not need this */
6012 
6013 /*
6014  * From a cred_t, construct a sensitivity label extension
6015  *
6016  * We send up a fixed-size sensitivity label bitmap, and are perhaps
6017  * overly chummy with the underlying data structures here.
6018  */
6019 
6020 /* ARGSUSED */
6021 int
6022 sadb_sens_len_from_label(ts_label_t *tsl)
6023 {
6024         int baselen = sizeof (sadb_sens_t) + _C_LEN * 4;
6025         return (roundup(baselen, sizeof (uint64_t)));
6026 }
6027 
6028 void
6029 sadb_sens_from_label(sadb_sens_t *sens, int exttype, ts_label_t *tsl,
6030     int senslen)
6031 {
6032         uint8_t *bitmap;
6033         bslabel_t *sl;
6034 
6035         /* LINTED */
6036         ASSERT((_C_LEN & 1) == 0);
6037         ASSERT((senslen & 7) == 0);
6038 
6039         sl = label2bslabel(tsl);
6040 
6041         sens->sadb_sens_exttype = exttype;
6042         sens->sadb_sens_len = SADB_8TO64(senslen);
6043 
6044         sens->sadb_sens_dpd = tsl->tsl_doi;
6045         sens->sadb_sens_sens_level = LCLASS(sl);
6046         sens->sadb_sens_integ_level = 0; /* TBD */
6047         sens->sadb_sens_sens_len = _C_LEN >> 1;
6048         sens->sadb_sens_integ_len = 0; /* TBD */
6049         sens->sadb_x_sens_flags = 0;
6050 
6051         bitmap = (uint8_t *)(sens + 1);
6052         bcopy(&(((_bslabel_impl_t *)sl)->compartments), bitmap, _C_LEN * 4);
6053 }
6054 
6055 /*
6056  * Okay, how do we report errors/invalid labels from this?
6057  * With a special designated "not a label" cred_t ?
6058  */
6059 /* ARGSUSED */
6060 ts_label_t *
6061 sadb_label_from_sens(sadb_sens_t *sens, uint64_t *bitmap)
6062 {
6063         int bitmap_len = SADB_64TO8(sens->sadb_sens_sens_len);
6064         bslabel_t sl;
6065         ts_label_t *tsl;
6066 
6067         if (sens->sadb_sens_integ_level != 0)
6068                 return (NULL);
6069         if (sens->sadb_sens_integ_len != 0)
6070                 return (NULL);
6071         if (bitmap_len > _C_LEN * 4)
6072                 return (NULL);
6073 
6074         bsllow(&sl);
6075         LCLASS_SET((_bslabel_impl_t *)&sl, sens->sadb_sens_sens_level);
6076         bcopy(bitmap, &((_bslabel_impl_t *)&sl)->compartments,
6077             bitmap_len);
6078 
6079         tsl = labelalloc(&sl, sens->sadb_sens_dpd, KM_NOSLEEP);
6080         if (tsl == NULL)
6081                 return (NULL);
6082 
6083         if (sens->sadb_x_sens_flags & SADB_X_SENS_UNLABELED)
6084                 tsl->tsl_flags |= TSLF_UNLABELED;
6085         return (tsl);
6086 }
6087 
6088 /* End XXX label-library-leakage */
6089 
6090 /*
6091  * Given an SADB_GETSPI message, find an appropriately ranged SA and
6092  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
6093  * If there was a memory allocation error, return NULL.  (Assume NULL !=
6094  * (ipsa_t *)-1).
6095  *
6096  * master_spi is passed in host order.
6097  */
6098 ipsa_t *
6099 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
6100     netstack_t *ns, uint_t sa_type)
6101 {
6102         sadb_address_t *src =
6103             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
6104             *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
6105         sadb_spirange_t *range =
6106             (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
6107         struct sockaddr_in *ssa, *dsa;
6108         struct sockaddr_in6 *ssa6, *dsa6;
6109         uint32_t *srcaddr, *dstaddr;
6110         sa_family_t af;
6111         uint32_t add, min, max;
6112         uint8_t protocol =
6113             (sa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP;
6114 
6115         if (src == NULL) {
6116                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
6117                 return ((ipsa_t *)-1);
6118         }
6119         if (dst == NULL) {
6120                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
6121                 return ((ipsa_t *)-1);
6122         }
6123         if (range == NULL) {
6124                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
6125                 return ((ipsa_t *)-1);
6126         }
6127 
6128         min = ntohl(range->sadb_spirange_min);
6129         max = ntohl(range->sadb_spirange_max);
6130         dsa = (struct sockaddr_in *)(dst + 1);
6131         dsa6 = (struct sockaddr_in6 *)dsa;
6132 
6133         ssa = (struct sockaddr_in *)(src + 1);
6134         ssa6 = (struct sockaddr_in6 *)ssa;
6135         ASSERT(dsa->sin_family == ssa->sin_family);
6136 
6137         srcaddr = ALL_ZEROES_PTR;
6138         af = dsa->sin_family;
6139         switch (af) {
6140         case AF_INET:
6141                 if (src != NULL)
6142                         srcaddr = (uint32_t *)(&ssa->sin_addr);
6143                 dstaddr = (uint32_t *)(&dsa->sin_addr);
6144                 break;
6145         case AF_INET6:
6146                 if (src != NULL)
6147                         srcaddr = (uint32_t *)(&ssa6->sin6_addr);
6148                 dstaddr = (uint32_t *)(&dsa6->sin6_addr);
6149                 break;
6150         default:
6151                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
6152                 return ((ipsa_t *)-1);
6153         }
6154 
6155         if (master_spi < min || master_spi > max) {
6156                 /* Return a random value in the range. */
6157                 if (cl_inet_getspi) {
6158                         cl_inet_getspi(ns->netstack_stackid, protocol,
6159                             (uint8_t *)&add, sizeof (add), NULL);
6160                 } else {
6161                         (void) random_get_pseudo_bytes((uint8_t *)&add,
6162                             sizeof (add));
6163                 }
6164                 master_spi = min + (add % (max - min + 1));
6165         }
6166 
6167         /*
6168          * Since master_spi is passed in host order, we need to htonl() it
6169          * for the purposes of creating a new SA.
6170          */
6171         return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
6172             ns));
6173 }
6174 
6175 /*
6176  *
6177  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
6178  * base header, just ignore it.  Otherwise, lock down the whole ACQUIRE list
6179  * and scan for the sequence number in question.  I may wish to accept an
6180  * address pair with it, for easier searching.
6181  *
6182  * Caller frees the message, so we don't have to here.
6183  *
6184  * NOTE:        The pfkey_q parameter may be used in the future for ACQUIRE
6185  *              failures.
6186  */
6187 /* ARGSUSED */
6188 void
6189 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *pfkey_q,
6190     netstack_t *ns)
6191 {
6192         int i;
6193         ipsacq_t *acqrec;
6194         iacqf_t *bucket;
6195 
6196         /*
6197          * I only accept the base header for this!
6198          * Though to be honest, requiring the dst address would help
6199          * immensely.
6200          *
6201          * XXX  There are already cases where I can get the dst address.
6202          */
6203         if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
6204                 return;
6205 
6206         /*
6207          * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
6208          * (and in the future send a message to IP with the appropriate error
6209          * number).
6210          *
6211          * Q: Do I want to reject if pid != 0?
6212          */
6213 
6214         for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
6215                 bucket = &sp->s_v4.sdb_acq[i];
6216                 mutex_enter(&bucket->iacqf_lock);
6217                 for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6218                     acqrec = acqrec->ipsacq_next) {
6219                         if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6220                                 break;  /* for acqrec... loop. */
6221                 }
6222                 if (acqrec != NULL)
6223                         break;  /* for i = 0... loop. */
6224 
6225                 mutex_exit(&bucket->iacqf_lock);
6226         }
6227 
6228         if (acqrec == NULL) {
6229                 for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
6230                         bucket = &sp->s_v6.sdb_acq[i];
6231                         mutex_enter(&bucket->iacqf_lock);
6232                         for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6233                             acqrec = acqrec->ipsacq_next) {
6234                                 if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6235                                         break;  /* for acqrec... loop. */
6236                         }
6237                         if (acqrec != NULL)
6238                                 break;  /* for i = 0... loop. */
6239 
6240                         mutex_exit(&bucket->iacqf_lock);
6241                 }
6242         }
6243 
6244 
6245         if (acqrec == NULL)
6246                 return;
6247 
6248         /*
6249          * What do I do with the errno and IP?  I may need mp's services a
6250          * little more.  See sadb_destroy_acquire() for future directions
6251          * beyond free the mblk chain on the acquire record.
6252          */
6253 
6254         ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
6255         sadb_destroy_acquire(acqrec, ns);
6256         /* Have to exit mutex here, because of breaking out of for loop. */
6257         mutex_exit(&bucket->iacqf_lock);
6258 }
6259 
6260 /*
6261  * The following functions work with the replay windows of an SA.  They assume
6262  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
6263  * represents the highest sequence number packet received, and back
6264  * (ipsa->ipsa_replay_wsize) packets.
6265  */
6266 
6267 /*
6268  * Is the replay bit set?
6269  */
6270 static boolean_t
6271 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
6272 {
6273         uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6274 
6275         return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
6276 }
6277 
6278 /*
6279  * Shift the bits of the replay window over.
6280  */
6281 static void
6282 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
6283 {
6284         int i;
6285         int jump = ((shift - 1) >> 6) + 1;
6286 
6287         if (shift == 0)
6288                 return;
6289 
6290         for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
6291                 if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
6292                         ipsa->ipsa_replay_arr[i + jump] |=
6293                             ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
6294                 }
6295                 ipsa->ipsa_replay_arr[i] <<= shift;
6296         }
6297 }
6298 
6299 /*
6300  * Set a bit in the bit vector.
6301  */
6302 static void
6303 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
6304 {
6305         uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6306 
6307         ipsa->ipsa_replay_arr[offset >> 6] |= bit;
6308 }
6309 
6310 #define SADB_MAX_REPLAY_VALUE 0xffffffff
6311 
6312 /*
6313  * Assume caller has NOT done ntohl() already on seq.  Check to see
6314  * if replay sequence number "seq" has been seen already.
6315  */
6316 boolean_t
6317 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
6318 {
6319         boolean_t rc;
6320         uint32_t diff;
6321 
6322         if (ipsa->ipsa_replay_wsize == 0)
6323                 return (B_TRUE);
6324 
6325         /*
6326          * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
6327          */
6328 
6329         /* Convert sequence number into host order before holding the mutex. */
6330         seq = ntohl(seq);
6331 
6332         mutex_enter(&ipsa->ipsa_lock);
6333 
6334         /* Initialize inbound SA's ipsa_replay field to last one received. */
6335         if (ipsa->ipsa_replay == 0)
6336                 ipsa->ipsa_replay = 1;
6337 
6338         if (seq > ipsa->ipsa_replay) {
6339                 /*
6340                  * I have received a new "highest value received".  Shift
6341                  * the replay window over.
6342                  */
6343                 diff = seq - ipsa->ipsa_replay;
6344                 if (diff < ipsa->ipsa_replay_wsize) {
6345                         /* In replay window, shift bits over. */
6346                         ipsa_shift_replay(ipsa, diff);
6347                 } else {
6348                         /* WAY FAR AHEAD, clear bits and start again. */
6349                         bzero(ipsa->ipsa_replay_arr,
6350                             sizeof (ipsa->ipsa_replay_arr));
6351                 }
6352                 ipsa_set_replay(ipsa, 0);
6353                 ipsa->ipsa_replay = seq;
6354                 rc = B_TRUE;
6355                 goto done;
6356         }
6357         diff = ipsa->ipsa_replay - seq;
6358         if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
6359                 rc = B_FALSE;
6360                 goto done;
6361         }
6362         /* Set this packet as seen. */
6363         ipsa_set_replay(ipsa, diff);
6364 
6365         rc = B_TRUE;
6366 done:
6367         mutex_exit(&ipsa->ipsa_lock);
6368         return (rc);
6369 }
6370 
6371 /*
6372  * "Peek" and see if we should even bother going through the effort of
6373  * running an authentication check on the sequence number passed in.
6374  * this takes into account packets that are below the replay window,
6375  * and collisions with already replayed packets.  Return B_TRUE if it
6376  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
6377  * Assume same byte-ordering as sadb_replay_check.
6378  */
6379 boolean_t
6380 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
6381 {
6382         boolean_t rc = B_FALSE;
6383         uint32_t diff;
6384 
6385         if (ipsa->ipsa_replay_wsize == 0)
6386                 return (B_TRUE);
6387 
6388         /*
6389          * 0 is 0, regardless of byte order... :)
6390          *
6391          * If I get 0 on the wire (and there is a replay window) then the
6392          * sender most likely wrapped.  This ipsa may need to be marked or
6393          * something.
6394          */
6395         if (seq == 0)
6396                 return (B_FALSE);
6397 
6398         seq = ntohl(seq);
6399         mutex_enter(&ipsa->ipsa_lock);
6400         if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
6401             ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
6402                 goto done;
6403 
6404         /*
6405          * If I've hit 0xffffffff, then quite honestly, I don't need to
6406          * bother with formalities.  I'm not accepting any more packets
6407          * on this SA.
6408          */
6409         if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
6410                 /*
6411                  * Since we're already holding the lock, update the
6412                  * expire time ala. sadb_replay_delete() and return.
6413                  */
6414                 ipsa->ipsa_hardexpiretime = (time_t)1;
6415                 goto done;
6416         }
6417 
6418         if (seq <= ipsa->ipsa_replay) {
6419                 /*
6420                  * This seq is in the replay window.  I'm not below it,
6421                  * because I already checked for that above!
6422                  */
6423                 diff = ipsa->ipsa_replay - seq;
6424                 if (ipsa_is_replay_set(ipsa, diff))
6425                         goto done;
6426         }
6427         /* Else return B_TRUE, I'm going to advance the window. */
6428 
6429         rc = B_TRUE;
6430 done:
6431         mutex_exit(&ipsa->ipsa_lock);
6432         return (rc);
6433 }
6434 
6435 /*
6436  * Delete a single SA.
6437  *
6438  * For now, use the quick-and-dirty trick of making the association's
6439  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
6440  */
6441 void
6442 sadb_replay_delete(ipsa_t *assoc)
6443 {
6444         mutex_enter(&assoc->ipsa_lock);
6445         assoc->ipsa_hardexpiretime = (time_t)1;
6446         mutex_exit(&assoc->ipsa_lock);
6447 }
6448 
6449 /*
6450  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
6451  * this is designed to take only a format string with "* %x * %s *", so
6452  * that "spi" is printed first, then "addr" is converted using inet_pton().
6453  *
6454  * This is abstracted out to save the stack space for only when inet_pton()
6455  * is called.  Make sure "spi" is in network order; it usually is when this
6456  * would get called.
6457  */
6458 void
6459 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
6460     uint32_t spi, void *addr, int af, netstack_t *ns)
6461 {
6462         char buf[INET6_ADDRSTRLEN];
6463 
6464         ASSERT(af == AF_INET6 || af == AF_INET);
6465 
6466         ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
6467             inet_ntop(af, addr, buf, sizeof (buf)));
6468 }
6469 
6470 /*
6471  * Fills in a reference to the policy, if any, from the conn, in *ppp
6472  */
6473 static void
6474 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
6475 {
6476         ipsec_policy_t  *pp;
6477         ipsec_latch_t   *ipl = connp->conn_latch;
6478 
6479         if ((ipl != NULL) && (connp->conn_ixa->ixa_ipsec_policy != NULL)) {
6480                 pp = connp->conn_ixa->ixa_ipsec_policy;
6481                 IPPOL_REFHOLD(pp);
6482         } else {
6483                 pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, sel,
6484                     connp->conn_netstack);
6485         }
6486         *ppp = pp;
6487 }
6488 
6489 /*
6490  * The following functions scan through active conn_t structures
6491  * and return a reference to the best-matching policy it can find.
6492  * Caller must release the reference.
6493  */
6494 static void
6495 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6496 {
6497         connf_t *connfp;
6498         conn_t *connp = NULL;
6499         ipsec_selector_t portonly;
6500 
6501         bzero((void *)&portonly, sizeof (portonly));
6502 
6503         if (sel->ips_local_port == 0)
6504                 return;
6505 
6506         connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
6507             ipst)];
6508         mutex_enter(&connfp->connf_lock);
6509 
6510         if (sel->ips_isv4) {
6511                 connp = connfp->connf_head;
6512                 while (connp != NULL) {
6513                         if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
6514                             sel->ips_local_addr_v4, sel->ips_remote_port,
6515                             sel->ips_remote_addr_v4))
6516                                 break;
6517                         connp = connp->conn_next;
6518                 }
6519 
6520                 if (connp == NULL) {
6521                         /* Try port-only match in IPv6. */
6522                         portonly.ips_local_port = sel->ips_local_port;
6523                         sel = &portonly;
6524                 }
6525         }
6526 
6527         if (connp == NULL) {
6528                 connp = connfp->connf_head;
6529                 while (connp != NULL) {
6530                         if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
6531                             sel->ips_local_addr_v6, sel->ips_remote_port,
6532                             sel->ips_remote_addr_v6))
6533                                 break;
6534                         connp = connp->conn_next;
6535                 }
6536 
6537                 if (connp == NULL) {
6538                         mutex_exit(&connfp->connf_lock);
6539                         return;
6540                 }
6541         }
6542 
6543         CONN_INC_REF(connp);
6544         mutex_exit(&connfp->connf_lock);
6545 
6546         ipsec_conn_pol(sel, connp, ppp);
6547         CONN_DEC_REF(connp);
6548 }
6549 
6550 static conn_t *
6551 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
6552 {
6553         connf_t *connfp;
6554         conn_t *connp = NULL;
6555         const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6556 
6557         if (sel->ips_local_port == 0)
6558                 return (NULL);
6559 
6560         connfp = &ipst->ips_ipcl_bind_fanout[
6561             IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6562         mutex_enter(&connfp->connf_lock);
6563 
6564         if (sel->ips_isv4) {
6565                 connp = connfp->connf_head;
6566                 while (connp != NULL) {
6567                         if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6568                             sel->ips_local_addr_v4, pptr[1]))
6569                                 break;
6570                         connp = connp->conn_next;
6571                 }
6572 
6573                 if (connp == NULL) {
6574                         /* Match to all-zeroes. */
6575                         v6addrmatch = &ipv6_all_zeros;
6576                 }
6577         }
6578 
6579         if (connp == NULL) {
6580                 connp = connfp->connf_head;
6581                 while (connp != NULL) {
6582                         if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6583                             *v6addrmatch, pptr[1]))
6584                                 break;
6585                         connp = connp->conn_next;
6586                 }
6587 
6588                 if (connp == NULL) {
6589                         mutex_exit(&connfp->connf_lock);
6590                         return (NULL);
6591                 }
6592         }
6593 
6594         CONN_INC_REF(connp);
6595         mutex_exit(&connfp->connf_lock);
6596         return (connp);
6597 }
6598 
6599 static void
6600 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6601 {
6602         connf_t         *connfp;
6603         conn_t          *connp;
6604         uint32_t        ports;
6605         uint16_t        *pptr = (uint16_t *)&ports;
6606 
6607         /*
6608          * Find TCP state in the following order:
6609          * 1.) Connected conns.
6610          * 2.) Listeners.
6611          *
6612          * Even though #2 will be the common case for inbound traffic, only
6613          * following this order insures correctness.
6614          */
6615 
6616         if (sel->ips_local_port == 0)
6617                 return;
6618 
6619         /*
6620          * 0 should be fport, 1 should be lport.  SRC is the local one here.
6621          * See ipsec_construct_inverse_acquire() for details.
6622          */
6623         pptr[0] = sel->ips_remote_port;
6624         pptr[1] = sel->ips_local_port;
6625 
6626         connfp = &ipst->ips_ipcl_conn_fanout[
6627             IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6628         mutex_enter(&connfp->connf_lock);
6629         connp = connfp->connf_head;
6630 
6631         if (sel->ips_isv4) {
6632                 while (connp != NULL) {
6633                         if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
6634                             sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
6635                             ports))
6636                                 break;
6637                         connp = connp->conn_next;
6638                 }
6639         } else {
6640                 while (connp != NULL) {
6641                         if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
6642                             sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
6643                             ports))
6644                                 break;
6645                         connp = connp->conn_next;
6646                 }
6647         }
6648 
6649         if (connp != NULL) {
6650                 CONN_INC_REF(connp);
6651                 mutex_exit(&connfp->connf_lock);
6652         } else {
6653                 mutex_exit(&connfp->connf_lock);
6654 
6655                 /* Try the listen hash. */
6656                 if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
6657                         return;
6658         }
6659 
6660         ipsec_conn_pol(sel, connp, ppp);
6661         CONN_DEC_REF(connp);
6662 }
6663 
6664 static void
6665 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6666     ip_stack_t *ipst)
6667 {
6668         conn_t          *connp;
6669         uint32_t        ports;
6670         uint16_t        *pptr = (uint16_t *)&ports;
6671 
6672         /*
6673          * Find SCP state in the following order:
6674          * 1.) Connected conns.
6675          * 2.) Listeners.
6676          *
6677          * Even though #2 will be the common case for inbound traffic, only
6678          * following this order insures correctness.
6679          */
6680 
6681         if (sel->ips_local_port == 0)
6682                 return;
6683 
6684         /*
6685          * 0 should be fport, 1 should be lport.  SRC is the local one here.
6686          * See ipsec_construct_inverse_acquire() for details.
6687          */
6688         pptr[0] = sel->ips_remote_port;
6689         pptr[1] = sel->ips_local_port;
6690 
6691         /*
6692          * For labeled systems, there's no need to check the
6693          * label here.  It's known to be good as we checked
6694          * before allowing the connection to become bound.
6695          */
6696         if (sel->ips_isv4) {
6697                 in6_addr_t      src, dst;
6698 
6699                 IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
6700                 IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
6701                 connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
6702                     0, ipst->ips_netstack->netstack_sctp);
6703         } else {
6704                 connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6705                     &sel->ips_local_addr_v6, ports, ALL_ZONES,
6706                     0, ipst->ips_netstack->netstack_sctp);
6707         }
6708         if (connp == NULL)
6709                 return;
6710         ipsec_conn_pol(sel, connp, ppp);
6711         CONN_DEC_REF(connp);
6712 }
6713 
6714 /*
6715  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6716  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6717  * to PF_KEY.
6718  *
6719  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6720  * ignore prefix lengths in the address extension.  Since we match on first-
6721  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6722  * set addresses to mask out the lower bits, we should get a suitable search
6723  * key for the SPD anyway.  This is the function to change if the assumption
6724  * about suitable search keys is wrong.
6725  */
6726 static int
6727 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6728     sadb_address_t *dstext, int *diagnostic)
6729 {
6730         struct sockaddr_in *src, *dst;
6731         struct sockaddr_in6 *src6, *dst6;
6732 
6733         *diagnostic = 0;
6734 
6735         bzero(sel, sizeof (*sel));
6736         sel->ips_protocol = srcext->sadb_address_proto;
6737         dst = (struct sockaddr_in *)(dstext + 1);
6738         if (dst->sin_family == AF_INET6) {
6739                 dst6 = (struct sockaddr_in6 *)dst;
6740                 src6 = (struct sockaddr_in6 *)(srcext + 1);
6741                 if (src6->sin6_family != AF_INET6) {
6742                         *diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6743                         return (EINVAL);
6744                 }
6745                 sel->ips_remote_addr_v6 = dst6->sin6_addr;
6746                 sel->ips_local_addr_v6 = src6->sin6_addr;
6747                 if (sel->ips_protocol == IPPROTO_ICMPV6) {
6748                         sel->ips_is_icmp_inv_acq = 1;
6749                 } else {
6750                         sel->ips_remote_port = dst6->sin6_port;
6751                         sel->ips_local_port = src6->sin6_port;
6752                 }
6753                 sel->ips_isv4 = B_FALSE;
6754         } else {
6755                 src = (struct sockaddr_in *)(srcext + 1);
6756                 if (src->sin_family != AF_INET) {
6757                         *diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6758                         return (EINVAL);
6759                 }
6760                 sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6761                 sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6762                 if (sel->ips_protocol == IPPROTO_ICMP) {
6763                         sel->ips_is_icmp_inv_acq = 1;
6764                 } else {
6765                         sel->ips_remote_port = dst->sin_port;
6766                         sel->ips_local_port = src->sin_port;
6767                 }
6768                 sel->ips_isv4 = B_TRUE;
6769         }
6770         return (0);
6771 }
6772 
6773 /*
6774  * We have encapsulation.
6775  * - Lookup tun_t by address and look for an associated
6776  *   tunnel policy
6777  * - If there are inner selectors
6778  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6779  *   - Look up tunnel policy based on selectors
6780  * - Else
6781  *   - Sanity check the negotation
6782  *   - If appropriate, fall through to global policy
6783  */
6784 static int
6785 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6786     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6787     int *diagnostic)
6788 {
6789         int err;
6790         ipsec_policy_head_t *polhead;
6791 
6792         *diagnostic = 0;
6793 
6794         /* Check for inner selectors and act appropriately */
6795 
6796         if (innsrcext != NULL) {
6797                 /* Inner selectors present */
6798                 ASSERT(inndstext != NULL);
6799                 if ((itp == NULL) ||
6800                     (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6801                     (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6802                         /*
6803                          * If inner packet selectors, we must have negotiate
6804                          * tunnel and active policy.  If the tunnel has
6805                          * transport-mode policy set on it, or has no policy,
6806                          * fail.
6807                          */
6808                         return (ENOENT);
6809                 } else {
6810                         /*
6811                          * Reset "sel" to indicate inner selectors.  Pass
6812                          * inner PF_KEY address extensions for this to happen.
6813                          */
6814                         if ((err = ipsec_get_inverse_acquire_sel(sel,
6815                             innsrcext, inndstext, diagnostic)) != 0)
6816                                 return (err);
6817                         /*
6818                          * Now look for a tunnel policy based on those inner
6819                          * selectors.  (Common code is below.)
6820                          */
6821                 }
6822         } else {
6823                 /* No inner selectors present */
6824                 if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6825                         /*
6826                          * Transport mode negotiation with no tunnel policy
6827                          * configured - return to indicate a global policy
6828                          * check is needed.
6829                          */
6830                         return (0);
6831                 } else if (itp->itp_flags & ITPF_P_TUNNEL) {
6832                         /* Tunnel mode set with no inner selectors. */
6833                         return (ENOENT);
6834                 }
6835                 /*
6836                  * Else, this is a tunnel policy configured with ifconfig(1m)
6837                  * or "negotiate transport" with ipsecconf(1m).  We have an
6838                  * itp with policy set based on any match, so don't bother
6839                  * changing fields in "sel".
6840                  */
6841         }
6842 
6843         ASSERT(itp != NULL);
6844         polhead = itp->itp_policy;
6845         ASSERT(polhead != NULL);
6846         rw_enter(&polhead->iph_lock, RW_READER);
6847         *ppp = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, sel);
6848         rw_exit(&polhead->iph_lock);
6849 
6850         /*
6851          * Don't default to global if we didn't find a matching policy entry.
6852          * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6853          */
6854         if (*ppp == NULL)
6855                 return (ENOENT);
6856 
6857         return (0);
6858 }
6859 
6860 /*
6861  * For sctp conn_faddr is the primary address, hence this is of limited
6862  * use for sctp.
6863  */
6864 static void
6865 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6866     ip_stack_t *ipst)
6867 {
6868         boolean_t       isv4 = sel->ips_isv4;
6869         connf_t         *connfp;
6870         conn_t          *connp;
6871 
6872         if (isv4) {
6873                 connfp = &ipst->ips_ipcl_proto_fanout_v4[sel->ips_protocol];
6874         } else {
6875                 connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6876         }
6877 
6878         mutex_enter(&connfp->connf_lock);
6879         for (connp = connfp->connf_head; connp != NULL;
6880             connp = connp->conn_next) {
6881                 if (isv4) {
6882                         if ((connp->conn_laddr_v4 == INADDR_ANY ||
6883                             connp->conn_laddr_v4 == sel->ips_local_addr_v4) &&
6884                             (connp->conn_faddr_v4 == INADDR_ANY ||
6885                             connp->conn_faddr_v4 == sel->ips_remote_addr_v4))
6886                                 break;
6887                 } else {
6888                         if ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
6889                             IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
6890                             &sel->ips_local_addr_v6)) &&
6891                             (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
6892                             IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
6893                             &sel->ips_remote_addr_v6)))
6894                                 break;
6895                 }
6896         }
6897         if (connp == NULL) {
6898                 mutex_exit(&connfp->connf_lock);
6899                 return;
6900         }
6901 
6902         CONN_INC_REF(connp);
6903         mutex_exit(&connfp->connf_lock);
6904 
6905         ipsec_conn_pol(sel, connp, ppp);
6906         CONN_DEC_REF(connp);
6907 }
6908 
6909 /*
6910  * Construct an inverse ACQUIRE reply based on:
6911  *
6912  * 1.) Current global policy.
6913  * 2.) An conn_t match depending on what all was passed in the extv[].
6914  * 3.) A tunnel's policy head.
6915  * ...
6916  * N.) Other stuff TBD (e.g. identities)
6917  *
6918  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6919  * in this function so the caller can extract them where appropriately.
6920  *
6921  * The SRC address is the local one - just like an outbound ACQUIRE message.
6922  *
6923  * XXX MLS: key management supplies a label which we just reflect back up
6924  * again.  clearly we need to involve the label in the rest of the checks.
6925  */
6926 mblk_t *
6927 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6928     netstack_t *ns)
6929 {
6930         int err;
6931         int diagnostic;
6932         sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6933             *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6934             *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6935             *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6936         sadb_sens_t *sens = (sadb_sens_t *)extv[SADB_EXT_SENSITIVITY];
6937         struct sockaddr_in6 *src, *dst;
6938         struct sockaddr_in6 *isrc, *idst;
6939         ipsec_tun_pol_t *itp = NULL;
6940         ipsec_policy_t *pp = NULL;
6941         ipsec_selector_t sel, isel;
6942         mblk_t *retmp = NULL;
6943         ip_stack_t      *ipst = ns->netstack_ip;
6944 
6945 
6946         /* Normalize addresses */
6947         if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6948             == KS_IN_ADDR_UNKNOWN) {
6949                 err = EINVAL;
6950                 diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6951                 goto bail;
6952         }
6953         src = (struct sockaddr_in6 *)(srcext + 1);
6954         if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6955             == KS_IN_ADDR_UNKNOWN) {
6956                 err = EINVAL;
6957                 diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6958                 goto bail;
6959         }
6960         dst = (struct sockaddr_in6 *)(dstext + 1);
6961         if (src->sin6_family != dst->sin6_family) {
6962                 err = EINVAL;
6963                 diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6964                 goto bail;
6965         }
6966 
6967         /* Check for tunnel mode and act appropriately */
6968         if (innsrcext != NULL) {
6969                 if (inndstext == NULL) {
6970                         err = EINVAL;
6971                         diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6972                         goto bail;
6973                 }
6974                 if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6975                     (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6976                         err = EINVAL;
6977                         diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6978                         goto bail;
6979                 }
6980                 isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6981                 if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6982                     (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6983                         err = EINVAL;
6984                         diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6985                         goto bail;
6986                 }
6987                 idst = (struct sockaddr_in6 *)(inndstext + 1);
6988                 if (isrc->sin6_family != idst->sin6_family) {
6989                         err = EINVAL;
6990                         diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6991                         goto bail;
6992                 }
6993                 if (isrc->sin6_family != AF_INET &&
6994                     isrc->sin6_family != AF_INET6) {
6995                         err = EINVAL;
6996                         diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6997                         goto bail;
6998                 }
6999         } else if (inndstext != NULL) {
7000                 err = EINVAL;
7001                 diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
7002                 goto bail;
7003         }
7004 
7005         /* Get selectors first, based on outer addresses */
7006         err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
7007         if (err != 0)
7008                 goto bail;
7009 
7010         /* Check for tunnel mode mismatches. */
7011         if (innsrcext != NULL &&
7012             ((isrc->sin6_family == AF_INET &&
7013             sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
7014             (isrc->sin6_family == AF_INET6 &&
7015             sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
7016                 err = EPROTOTYPE;
7017                 goto bail;
7018         }
7019 
7020         /*
7021          * Okay, we have the addresses and other selector information.
7022          * Let's first find a conn...
7023          */
7024         pp = NULL;
7025         switch (sel.ips_protocol) {
7026         case IPPROTO_TCP:
7027                 ipsec_tcp_pol(&sel, &pp, ipst);
7028                 break;
7029         case IPPROTO_UDP:
7030                 ipsec_udp_pol(&sel, &pp, ipst);
7031                 break;
7032         case IPPROTO_SCTP:
7033                 ipsec_sctp_pol(&sel, &pp, ipst);
7034                 break;
7035         case IPPROTO_ENCAP:
7036         case IPPROTO_IPV6:
7037                 /*
7038                  * Assume sel.ips_remote_addr_* has the right address at
7039                  * that exact position.
7040                  */
7041                 itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
7042                     (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family,
7043                     ipst);
7044 
7045                 if (innsrcext == NULL) {
7046                         /*
7047                          * Transport-mode tunnel, make sure we fake out isel
7048                          * to contain something based on the outer protocol.
7049                          */
7050                         bzero(&isel, sizeof (isel));
7051                         isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
7052                 } /* Else isel is initialized by ipsec_tun_pol(). */
7053                 err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
7054                     &diagnostic);
7055                 /*
7056                  * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
7057                  * may be.
7058                  */
7059                 if (err != 0)
7060                         goto bail;
7061                 break;
7062         default:
7063                 ipsec_oth_pol(&sel, &pp, ipst);
7064                 break;
7065         }
7066 
7067         /*
7068          * If we didn't find a matching conn_t or other policy head, take a
7069          * look in the global policy.
7070          */
7071         if (pp == NULL) {
7072                 pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, &sel, ns);
7073                 if (pp == NULL) {
7074                         /* There's no global policy. */
7075                         err = ENOENT;
7076                         diagnostic = 0;
7077                         goto bail;
7078                 }
7079         }
7080 
7081         ASSERT(pp != NULL);
7082         retmp = sadb_acquire_msg_base(0, 0, samsg->sadb_msg_seq,
7083             samsg->sadb_msg_pid);
7084         if (retmp != NULL) {
7085                 /* Remove KEYSOCK_OUT, because caller constructs it instead. */
7086                 mblk_t *kso = retmp;
7087 
7088                 retmp = retmp->b_cont;
7089                 freeb(kso);
7090                 /* Append addresses... */
7091                 retmp->b_cont = sadb_acquire_msg_common(&sel, pp, NULL,
7092                     (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)), NULL,
7093                     sens);
7094                 if (retmp->b_cont == NULL) {
7095                         freemsg(retmp);
7096                         retmp = NULL;
7097                 }
7098                 /* And the policy result. */
7099                 retmp->b_cont->b_cont =
7100                     sadb_acquire_extended_prop(pp->ipsp_act, ns);
7101                 if (retmp->b_cont->b_cont == NULL) {
7102                         freemsg(retmp);
7103                         retmp = NULL;
7104                 }
7105                 ((sadb_msg_t *)retmp->b_rptr)->sadb_msg_len =
7106                     SADB_8TO64(msgsize(retmp));
7107         }
7108 
7109         if (pp != NULL) {
7110                 IPPOL_REFRELE(pp);
7111         }
7112         ASSERT(err == 0 && diagnostic == 0);
7113         if (retmp == NULL)
7114                 err = ENOMEM;
7115 bail:
7116         if (itp != NULL) {
7117                 ITP_REFRELE(itp, ns);
7118         }
7119         samsg->sadb_msg_errno = (uint8_t)err;
7120         samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
7121         return (retmp);
7122 }
7123 
7124 /*
7125  * ipsa_lpkt is a one-element queue, only manipulated by the next two
7126  * functions.  They have to hold the ipsa_lock because of potential races
7127  * between key management using SADB_UPDATE, and inbound packets that may
7128  * queue up on the larval SA (hence the 'l' in "lpkt").
7129  */
7130 
7131 /*
7132  * sadb_set_lpkt:
7133  *
7134  * Returns the passed-in packet if the SA is no longer larval.
7135  *
7136  * Returns NULL if the SA is larval, and needs to be swapped into the SA for
7137  * processing after an SADB_UPDATE.
7138  */
7139 mblk_t *
7140 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, ip_recv_attr_t *ira)
7141 {
7142         mblk_t          *opkt;
7143 
7144         mutex_enter(&ipsa->ipsa_lock);
7145         opkt = ipsa->ipsa_lpkt;
7146         if (ipsa->ipsa_state == IPSA_STATE_LARVAL) {
7147                 /*
7148                  * Consume npkt and place it in the LARVAL SA's inbound
7149                  * packet slot.
7150                  */
7151                 mblk_t  *attrmp;
7152 
7153                 attrmp = ip_recv_attr_to_mblk(ira);
7154                 if (attrmp == NULL) {
7155                         ill_t *ill = ira->ira_ill;
7156 
7157                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
7158                         ip_drop_input("ipIfStatsInDiscards", npkt, ill);
7159                         freemsg(npkt);
7160                         opkt = NULL;
7161                 } else {
7162                         ASSERT(attrmp->b_cont == NULL);
7163                         attrmp->b_cont = npkt;
7164                         ipsa->ipsa_lpkt = attrmp;
7165                 }
7166                 npkt = NULL;
7167         } else {
7168                 /*
7169                  * If not larval, we lost the race.  NOTE: ipsa_lpkt may still
7170                  * have been non-NULL in the non-larval case, because of
7171                  * inbound packets arriving prior to sadb_common_add()
7172                  * transferring the SA completely out of larval state, but
7173                  * after lpkt was grabbed by the AH/ESP-specific add routines.
7174                  * We should clear the old ipsa_lpkt in this case to make sure
7175                  * that it doesn't linger on the now-MATURE IPsec SA, or get
7176                  * picked up as an out-of-order packet.
7177                  */
7178                 ipsa->ipsa_lpkt = NULL;
7179         }
7180         mutex_exit(&ipsa->ipsa_lock);
7181 
7182         if (opkt != NULL) {
7183                 ipsec_stack_t   *ipss;
7184 
7185                 ipss = ira->ira_ill->ill_ipst->ips_netstack->netstack_ipsec;
7186                 opkt = ip_recv_attr_free_mblk(opkt);
7187                 ip_drop_packet(opkt, B_TRUE, ira->ira_ill,
7188                     DROPPER(ipss, ipds_sadb_inlarval_replace),
7189                     &ipss->ipsec_sadb_dropper);
7190         }
7191         return (npkt);
7192 }
7193 
7194 /*
7195  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
7196  * previous value.
7197  */
7198 mblk_t *
7199 sadb_clear_lpkt(ipsa_t *ipsa)
7200 {
7201         mblk_t *opkt;
7202 
7203         mutex_enter(&ipsa->ipsa_lock);
7204         opkt = ipsa->ipsa_lpkt;
7205         ipsa->ipsa_lpkt = NULL;
7206         mutex_exit(&ipsa->ipsa_lock);
7207         return (opkt);
7208 }
7209 
7210 /*
7211  * Buffer a packet that's in IDLE state as set by Solaris Clustering.
7212  */
7213 void
7214 sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, ip_recv_attr_t *ira)
7215 {
7216         netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
7217         ipsec_stack_t   *ipss = ns->netstack_ipsec;
7218         in6_addr_t *srcaddr = (in6_addr_t *)(&ipsa->ipsa_srcaddr);
7219         in6_addr_t *dstaddr = (in6_addr_t *)(&ipsa->ipsa_dstaddr);
7220         mblk_t          *mp;
7221 
7222         ASSERT(ipsa->ipsa_state == IPSA_STATE_IDLE);
7223 
7224         if (cl_inet_idlesa == NULL) {
7225                 ip_drop_packet(bpkt, B_TRUE, ira->ira_ill,
7226                     DROPPER(ipss, ipds_sadb_inidle_overflow),
7227                     &ipss->ipsec_sadb_dropper);
7228                 return;
7229         }
7230 
7231         cl_inet_idlesa(ns->netstack_stackid,
7232             (ipsa->ipsa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP,
7233             ipsa->ipsa_spi, ipsa->ipsa_addrfam, *srcaddr, *dstaddr, NULL);
7234 
7235         mp = ip_recv_attr_to_mblk(ira);
7236         if (mp == NULL) {
7237                 ip_drop_packet(bpkt, B_TRUE, ira->ira_ill,
7238                     DROPPER(ipss, ipds_sadb_inidle_overflow),
7239                     &ipss->ipsec_sadb_dropper);
7240                 return;
7241         }
7242         linkb(mp, bpkt);
7243 
7244         mutex_enter(&ipsa->ipsa_lock);
7245         ipsa->ipsa_mblkcnt++;
7246         if (ipsa->ipsa_bpkt_head == NULL) {
7247                 ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_tail = bpkt;
7248         } else {
7249                 ipsa->ipsa_bpkt_tail->b_next = bpkt;
7250                 ipsa->ipsa_bpkt_tail = bpkt;
7251                 if (ipsa->ipsa_mblkcnt > SADB_MAX_IDLEPKTS) {
7252                         mblk_t *tmp;
7253 
7254                         tmp = ipsa->ipsa_bpkt_head;
7255                         ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_head->b_next;
7256                         tmp = ip_recv_attr_free_mblk(tmp);
7257                         ip_drop_packet(tmp, B_TRUE, NULL,
7258                             DROPPER(ipss, ipds_sadb_inidle_overflow),
7259                             &ipss->ipsec_sadb_dropper);
7260                         ipsa->ipsa_mblkcnt --;
7261                 }
7262         }
7263         mutex_exit(&ipsa->ipsa_lock);
7264 }
7265 
7266 /*
7267  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
7268  * and put into STREAMS again.
7269  */
7270 void
7271 sadb_clear_buf_pkt(void *ipkt)
7272 {
7273         mblk_t  *tmp, *buf_pkt;
7274         ip_recv_attr_t  iras;
7275 
7276         buf_pkt = (mblk_t *)ipkt;
7277 
7278         while (buf_pkt != NULL) {
7279                 mblk_t *data_mp;
7280 
7281                 tmp = buf_pkt->b_next;
7282                 buf_pkt->b_next = NULL;
7283 
7284                 data_mp = buf_pkt->b_cont;
7285                 buf_pkt->b_cont = NULL;
7286                 if (!ip_recv_attr_from_mblk(buf_pkt, &iras)) {
7287                         /* The ill or ip_stack_t disappeared on us. */
7288                         ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
7289                         freemsg(data_mp);
7290                 } else {
7291                         ip_input_post_ipsec(data_mp, &iras);
7292                 }
7293                 ira_cleanup(&iras, B_TRUE);
7294                 buf_pkt = tmp;
7295         }
7296 }
7297 /*
7298  * Walker callback used by sadb_alg_update() to free/create crypto
7299  * context template when a crypto software provider is removed or
7300  * added.
7301  */
7302 
7303 struct sadb_update_alg_state {
7304         ipsec_algtype_t alg_type;
7305         uint8_t alg_id;
7306         boolean_t is_added;
7307         boolean_t async_auth;
7308         boolean_t async_encr;
7309 };
7310 
7311 static void
7312 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
7313 {
7314         struct sadb_update_alg_state *update_state =
7315             (struct sadb_update_alg_state *)cookie;
7316         crypto_ctx_template_t *ctx_tmpl = NULL;
7317 
7318         ASSERT(MUTEX_HELD(&head->isaf_lock));
7319 
7320         if (entry->ipsa_state == IPSA_STATE_LARVAL)
7321                 return;
7322 
7323         mutex_enter(&entry->ipsa_lock);
7324 
7325         if ((entry->ipsa_encr_alg != SADB_EALG_NONE && entry->ipsa_encr_alg !=
7326             SADB_EALG_NULL && update_state->async_encr) ||
7327             (entry->ipsa_auth_alg != SADB_AALG_NONE &&
7328             update_state->async_auth)) {
7329                 entry->ipsa_flags |= IPSA_F_ASYNC;
7330         } else {
7331                 entry->ipsa_flags &= ~IPSA_F_ASYNC;
7332         }
7333 
7334         switch (update_state->alg_type) {
7335         case IPSEC_ALG_AUTH:
7336                 if (entry->ipsa_auth_alg == update_state->alg_id)
7337                         ctx_tmpl = &entry->ipsa_authtmpl;
7338                 break;
7339         case IPSEC_ALG_ENCR:
7340                 if (entry->ipsa_encr_alg == update_state->alg_id)
7341                         ctx_tmpl = &entry->ipsa_encrtmpl;
7342                 break;
7343         default:
7344                 ctx_tmpl = NULL;
7345         }
7346 
7347         if (ctx_tmpl == NULL) {
7348                 mutex_exit(&entry->ipsa_lock);
7349                 return;
7350         }
7351 
7352         /*
7353          * The context template of the SA may be affected by the change
7354          * of crypto provider.
7355          */
7356         if (update_state->is_added) {
7357                 /* create the context template if not already done */
7358                 if (*ctx_tmpl == NULL) {
7359                         (void) ipsec_create_ctx_tmpl(entry,
7360                             update_state->alg_type);
7361                 }
7362         } else {
7363                 /*
7364                  * The crypto provider was removed. If the context template
7365                  * exists but it is no longer valid, free it.
7366                  */
7367                 if (*ctx_tmpl != NULL)
7368                         ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
7369         }
7370 
7371         mutex_exit(&entry->ipsa_lock);
7372 }
7373 
7374 /*
7375  * Invoked by IP when an software crypto provider has been updated, or if
7376  * the crypto synchrony changes.  The type and id of the corresponding
7377  * algorithm is passed as argument.  The type is set to ALL in the case of
7378  * a synchrony change.
7379  *
7380  * is_added is B_TRUE if the provider was added, B_FALSE if it was
7381  * removed. The function updates the SADB and free/creates the
7382  * context templates associated with SAs if needed.
7383  */
7384 
7385 #define SADB_ALG_UPDATE_WALK(sadb, table) \
7386     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
7387         &update_state)
7388 
7389 void
7390 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
7391     netstack_t *ns)
7392 {
7393         struct sadb_update_alg_state update_state;
7394         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
7395         ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
7396         ipsec_stack_t *ipss = ns->netstack_ipsec;
7397 
7398         update_state.alg_type = alg_type;
7399         update_state.alg_id = alg_id;
7400         update_state.is_added = is_added;
7401         update_state.async_auth = ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
7402             IPSEC_ALGS_EXEC_ASYNC;
7403         update_state.async_encr = ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
7404             IPSEC_ALGS_EXEC_ASYNC;
7405 
7406         if (alg_type == IPSEC_ALG_AUTH || alg_type == IPSEC_ALG_ALL) {
7407                 /* walk the AH tables only for auth. algorithm changes */
7408                 SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
7409                 SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
7410                 SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
7411                 SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
7412         }
7413 
7414         /* walk the ESP tables */
7415         SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
7416         SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
7417         SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
7418         SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
7419 }
7420 
7421 /*
7422  * Creates a context template for the specified SA. This function
7423  * is called when an SA is created and when a context template needs
7424  * to be created due to a change of software provider.
7425  */
7426 int
7427 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7428 {
7429         ipsec_alginfo_t *alg;
7430         crypto_mechanism_t mech;
7431         crypto_key_t *key;
7432         crypto_ctx_template_t *sa_tmpl;
7433         int rv;
7434         ipsec_stack_t   *ipss = sa->ipsa_netstack->netstack_ipsec;
7435 
7436         ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
7437         ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7438 
7439         /* get pointers to the algorithm info, context template, and key */
7440         switch (alg_type) {
7441         case IPSEC_ALG_AUTH:
7442                 key = &sa->ipsa_kcfauthkey;
7443                 sa_tmpl = &sa->ipsa_authtmpl;
7444                 alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
7445                 break;
7446         case IPSEC_ALG_ENCR:
7447                 key = &sa->ipsa_kcfencrkey;
7448                 sa_tmpl = &sa->ipsa_encrtmpl;
7449                 alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
7450                 break;
7451         default:
7452                 alg = NULL;
7453         }
7454 
7455         if (alg == NULL || !ALG_VALID(alg))
7456                 return (EINVAL);
7457 
7458         /* initialize the mech info structure for the framework */
7459         ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
7460         mech.cm_type = alg->alg_mech_type;
7461         mech.cm_param = NULL;
7462         mech.cm_param_len = 0;
7463 
7464         /* create a new context template */
7465         rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
7466 
7467         /*
7468          * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
7469          * providers are available for that mechanism. In that case
7470          * we don't fail, and will generate the context template from
7471          * the framework callback when a software provider for that
7472          * mechanism registers.
7473          *
7474          * The context template is assigned the special value
7475          * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
7476          * lack of memory. No attempt will be made to use
7477          * the context template if it is set to this value.
7478          */
7479         if (rv == CRYPTO_HOST_MEMORY) {
7480                 *sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
7481         } else if (rv != CRYPTO_SUCCESS) {
7482                 *sa_tmpl = NULL;
7483                 if (rv != CRYPTO_MECH_NOT_SUPPORTED)
7484                         return (EINVAL);
7485         }
7486 
7487         return (0);
7488 }
7489 
7490 /*
7491  * Destroy the context template of the specified algorithm type
7492  * of the specified SA. Must be called while holding the SA lock.
7493  */
7494 void
7495 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7496 {
7497         ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7498 
7499         if (alg_type == IPSEC_ALG_AUTH) {
7500                 if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
7501                         sa->ipsa_authtmpl = NULL;
7502                 else if (sa->ipsa_authtmpl != NULL) {
7503                         crypto_destroy_ctx_template(sa->ipsa_authtmpl);
7504                         sa->ipsa_authtmpl = NULL;
7505                 }
7506         } else {
7507                 ASSERT(alg_type == IPSEC_ALG_ENCR);
7508                 if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
7509                         sa->ipsa_encrtmpl = NULL;
7510                 else if (sa->ipsa_encrtmpl != NULL) {
7511                         crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
7512                         sa->ipsa_encrtmpl = NULL;
7513                 }
7514         }
7515 }
7516 
7517 /*
7518  * Use the kernel crypto framework to check the validity of a key received
7519  * via keysock. Returns 0 if the key is OK, -1 otherwise.
7520  */
7521 int
7522 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
7523     boolean_t is_auth, int *diag)
7524 {
7525         crypto_mechanism_t mech;
7526         crypto_key_t crypto_key;
7527         int crypto_rc;
7528 
7529         mech.cm_type = mech_type;
7530         mech.cm_param = NULL;
7531         mech.cm_param_len = 0;
7532 
7533         crypto_key.ck_format = CRYPTO_KEY_RAW;
7534         crypto_key.ck_data = sadb_key + 1;
7535         crypto_key.ck_length = sadb_key->sadb_key_bits;
7536 
7537         crypto_rc = crypto_key_check(&mech, &crypto_key);
7538 
7539         switch (crypto_rc) {
7540         case CRYPTO_SUCCESS:
7541                 return (0);
7542         case CRYPTO_MECHANISM_INVALID:
7543         case CRYPTO_MECH_NOT_SUPPORTED:
7544                 *diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
7545                     SADB_X_DIAGNOSTIC_BAD_EALG;
7546                 break;
7547         case CRYPTO_KEY_SIZE_RANGE:
7548                 *diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
7549                     SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
7550                 break;
7551         case CRYPTO_WEAK_KEY:
7552                 *diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
7553                     SADB_X_DIAGNOSTIC_WEAK_EKEY;
7554                 break;
7555         }
7556 
7557         return (-1);
7558 }
7559 
7560 /*
7561  * Whack options in the outer IP header when ipsec changes the outer label
7562  *
7563  * This is inelegant and really could use refactoring.
7564  */
7565 mblk_t *
7566 sadb_whack_label_v4(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
7567     ipdropper_t *dropper)
7568 {
7569         int delta;
7570         int plen;
7571         dblk_t *db;
7572         int hlen;
7573         uint8_t *opt_storage = assoc->ipsa_opt_storage;
7574         ipha_t *ipha = (ipha_t *)mp->b_rptr;
7575 
7576         plen = ntohs(ipha->ipha_length);
7577 
7578         delta = tsol_remove_secopt(ipha, MBLKL(mp));
7579         mp->b_wptr += delta;
7580         plen += delta;
7581 
7582         /* XXX XXX code copied from tsol_check_label */
7583 
7584         /* Make sure we have room for the worst-case addition */
7585         hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
7586         hlen = (hlen + 3) & ~3;
7587         if (hlen > IP_MAX_HDR_LENGTH)
7588                 hlen = IP_MAX_HDR_LENGTH;
7589         hlen -= IPH_HDR_LENGTH(ipha);
7590 
7591         db = mp->b_datap;
7592         if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
7593                 int copylen;
7594                 mblk_t *new_mp;
7595 
7596                 /* allocate enough to be meaningful, but not *too* much */
7597                 copylen = MBLKL(mp);
7598                 if (copylen > 256)
7599                         copylen = 256;
7600                 new_mp = allocb_tmpl(hlen + copylen +
7601                     (mp->b_rptr - mp->b_datap->db_base), mp);
7602 
7603                 if (new_mp == NULL) {
7604                         ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
7605                         return (NULL);
7606                 }
7607 
7608                 /* keep the bias */
7609                 new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
7610                 new_mp->b_wptr = new_mp->b_rptr + copylen;
7611                 bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
7612                 new_mp->b_cont = mp;
7613                 if ((mp->b_rptr += copylen) >= mp->b_wptr) {
7614                         new_mp->b_cont = mp->b_cont;
7615                         freeb(mp);
7616                 }
7617                 mp = new_mp;
7618                 ipha = (ipha_t *)mp->b_rptr;
7619         }
7620 
7621         delta = tsol_prepend_option(assoc->ipsa_opt_storage, ipha, MBLKL(mp));
7622 
7623         ASSERT(delta != -1);
7624 
7625         plen += delta;
7626         mp->b_wptr += delta;
7627 
7628         /*
7629          * Paranoia
7630          */
7631         db = mp->b_datap;
7632 
7633         ASSERT3P(mp->b_wptr, <=, db->db_lim);
7634         ASSERT3P(mp->b_rptr, <=, db->db_lim);
7635 
7636         ASSERT3P(mp->b_wptr, >=, db->db_base);
7637         ASSERT3P(mp->b_rptr, >=, db->db_base);
7638         /* End paranoia */
7639 
7640         ipha->ipha_length = htons(plen);
7641 
7642         return (mp);
7643 }
7644 
7645 mblk_t *
7646 sadb_whack_label_v6(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
7647     ipdropper_t *dropper)
7648 {
7649         int delta;
7650         int plen;
7651         dblk_t *db;
7652         int hlen;
7653         uint8_t *opt_storage = assoc->ipsa_opt_storage;
7654         uint_t sec_opt_len; /* label option length not including type, len */
7655         ip6_t *ip6h = (ip6_t *)mp->b_rptr;
7656 
7657         plen = ntohs(ip6h->ip6_plen);
7658 
7659         delta = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
7660         mp->b_wptr += delta;
7661         plen += delta;
7662 
7663         /* XXX XXX code copied from tsol_check_label_v6 */
7664         /*
7665          * Make sure we have room for the worst-case addition. Add 2 bytes for
7666          * the hop-by-hop ext header's next header and length fields. Add
7667          * another 2 bytes for the label option type, len and then round
7668          * up to the next 8-byte multiple.
7669          */
7670         sec_opt_len = opt_storage[1];
7671 
7672         db = mp->b_datap;
7673         hlen = (4 + sec_opt_len + 7) & ~7;
7674 
7675         if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
7676                 int copylen;
7677                 mblk_t *new_mp;
7678                 uint16_t hdr_len;
7679 
7680                 hdr_len = ip_hdr_length_v6(mp, ip6h);
7681                 /*
7682                  * Allocate enough to be meaningful, but not *too* much.
7683                  * Also all the IPv6 extension headers must be in the same mblk
7684                  */
7685                 copylen = MBLKL(mp);
7686                 if (copylen > 256)
7687                         copylen = 256;
7688                 if (copylen < hdr_len)
7689                         copylen = hdr_len;
7690                 new_mp = allocb_tmpl(hlen + copylen +
7691                     (mp->b_rptr - mp->b_datap->db_base), mp);
7692                 if (new_mp == NULL) {
7693                         ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
7694                         return (NULL);
7695                 }
7696 
7697                 /* keep the bias */
7698                 new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
7699                 new_mp->b_wptr = new_mp->b_rptr + copylen;
7700                 bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
7701                 new_mp->b_cont = mp;
7702                 if ((mp->b_rptr += copylen) >= mp->b_wptr) {
7703                         new_mp->b_cont = mp->b_cont;
7704                         freeb(mp);
7705                 }
7706                 mp = new_mp;
7707                 ip6h = (ip6_t *)mp->b_rptr;
7708         }
7709 
7710         delta = tsol_prepend_option_v6(assoc->ipsa_opt_storage,
7711             ip6h, MBLKL(mp));
7712 
7713         ASSERT(delta != -1);
7714 
7715         plen += delta;
7716         mp->b_wptr += delta;
7717 
7718         /*
7719          * Paranoia
7720          */
7721         db = mp->b_datap;
7722 
7723         ASSERT3P(mp->b_wptr, <=, db->db_lim);
7724         ASSERT3P(mp->b_rptr, <=, db->db_lim);
7725 
7726         ASSERT3P(mp->b_wptr, >=, db->db_base);
7727         ASSERT3P(mp->b_rptr, >=, db->db_base);
7728         /* End paranoia */
7729 
7730         ip6h->ip6_plen = htons(plen);
7731 
7732         return (mp);
7733 }
7734 
7735 /* Whack the labels and update ip_xmit_attr_t as needed */
7736 mblk_t *
7737 sadb_whack_label(mblk_t *mp, ipsa_t *assoc, ip_xmit_attr_t *ixa,
7738     kstat_named_t *counter, ipdropper_t *dropper)
7739 {
7740         int adjust;
7741         int iplen;
7742 
7743         if (ixa->ixa_flags & IXAF_IS_IPV4) {
7744                 ipha_t          *ipha = (ipha_t *)mp->b_rptr;
7745 
7746                 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7747                 iplen = ntohs(ipha->ipha_length);
7748                 mp = sadb_whack_label_v4(mp, assoc, counter, dropper);
7749                 if (mp == NULL)
7750                         return (NULL);
7751 
7752                 ipha = (ipha_t *)mp->b_rptr;
7753                 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7754                 adjust = (int)ntohs(ipha->ipha_length) - iplen;
7755         } else {
7756                 ip6_t           *ip6h = (ip6_t *)mp->b_rptr;
7757 
7758                 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7759                 iplen = ntohs(ip6h->ip6_plen);
7760                 mp = sadb_whack_label_v6(mp, assoc, counter, dropper);
7761                 if (mp == NULL)
7762                         return (NULL);
7763 
7764                 ip6h = (ip6_t *)mp->b_rptr;
7765                 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7766                 adjust = (int)ntohs(ip6h->ip6_plen) - iplen;
7767         }
7768         ixa->ixa_pktlen += adjust;
7769         ixa->ixa_ip_hdr_length += adjust;
7770         return (mp);
7771 }
7772 
7773 /*
7774  * If this is an outgoing SA then add some fuzz to the
7775  * SOFT EXPIRE time. The reason for this is to stop
7776  * peers trying to renegotiate SOFT expiring SA's at
7777  * the same time. The amount of fuzz needs to be at
7778  * least 8 seconds which is the typical interval
7779  * sadb_ager(), although this is only a guide as it
7780  * selftunes.
7781  */
7782 static void
7783 lifetime_fuzz(ipsa_t *assoc)
7784 {
7785         uint8_t rnd;
7786 
7787         if (assoc->ipsa_softaddlt == 0)
7788                 return;
7789 
7790         (void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
7791         rnd = (rnd & 0xF) + 8;
7792         assoc->ipsa_softexpiretime -= rnd;
7793         assoc->ipsa_softaddlt -= rnd;
7794 }
7795 
7796 static void
7797 destroy_ipsa_pair(ipsap_t *ipsapp)
7798 {
7799         /*
7800          * Because of the multi-line macro nature of IPSA_REFRELE, keep
7801          * them in { }.
7802          */
7803         if (ipsapp->ipsap_sa_ptr != NULL) {
7804                 IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
7805         }
7806         if (ipsapp->ipsap_psa_ptr != NULL) {
7807                 IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
7808         }
7809         init_ipsa_pair(ipsapp);
7810 }
7811 
7812 static void
7813 init_ipsa_pair(ipsap_t *ipsapp)
7814 {
7815         ipsapp->ipsap_bucket = NULL;
7816         ipsapp->ipsap_sa_ptr = NULL;
7817         ipsapp->ipsap_pbucket = NULL;
7818         ipsapp->ipsap_psa_ptr = NULL;
7819 }
7820 
7821 /*
7822  * The sadb_ager() function walks through the hash tables of SA's and ages
7823  * them, if the SA expires as a result, its marked as DEAD and will be reaped
7824  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
7825  * SA appears in both the inbound and outbound tables because its not possible
7826  * to determine its direction) are placed on a list when they expire. This is
7827  * to ensure that pair/peer SA's are reaped at the same time, even if they
7828  * expire at different times.
7829  *
7830  * This function is called twice by sadb_ager(), one after processing the
7831  * inbound table, then again after processing the outbound table.
7832  */
7833 void
7834 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
7835 {
7836         templist_t *listptr;
7837         int outhash;
7838         isaf_t *bucket;
7839         boolean_t haspeer;
7840         ipsa_t *peer_assoc, *dying;
7841         /*
7842          * Haspeer cases will contain both IPv4 and IPv6.  This code
7843          * is address independent.
7844          */
7845         while (haspeerlist != NULL) {
7846                 /* "dying" contains the SA that has a peer. */
7847                 dying = haspeerlist->ipsa;
7848                 haspeer = (dying->ipsa_haspeer);
7849                 listptr = haspeerlist;
7850                 haspeerlist = listptr->next;
7851                 kmem_free(listptr, sizeof (*listptr));
7852                 /*
7853                  * Pick peer bucket based on addrfam.
7854                  */
7855                 if (outbound) {
7856                         if (haspeer)
7857                                 bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
7858                         else
7859                                 bucket = INBOUND_BUCKET(sp,
7860                                     dying->ipsa_otherspi);
7861                 } else { /* inbound */
7862                         if (haspeer) {
7863                                 if (dying->ipsa_addrfam == AF_INET6) {
7864                                         outhash = OUTBOUND_HASH_V6(sp,
7865                                             *((in6_addr_t *)&dying->
7866                                             ipsa_dstaddr));
7867                                 } else {
7868                                         outhash = OUTBOUND_HASH_V4(sp,
7869                                             *((ipaddr_t *)&dying->
7870                                             ipsa_dstaddr));
7871                                 }
7872                         } else if (dying->ipsa_addrfam == AF_INET6) {
7873                                 outhash = OUTBOUND_HASH_V6(sp,
7874                                     *((in6_addr_t *)&dying->
7875                                     ipsa_srcaddr));
7876                         } else {
7877                                 outhash = OUTBOUND_HASH_V4(sp,
7878                                     *((ipaddr_t *)&dying->
7879                                     ipsa_srcaddr));
7880                         }
7881                         bucket = &(sp->sdb_of[outhash]);
7882                 }
7883 
7884                 mutex_enter(&bucket->isaf_lock);
7885                 /*
7886                  * "haspeer" SA's have the same src/dst address ordering,
7887                  * "paired" SA's have the src/dst addresses reversed.
7888                  */
7889                 if (haspeer) {
7890                         peer_assoc = ipsec_getassocbyspi(bucket,
7891                             dying->ipsa_spi, dying->ipsa_srcaddr,
7892                             dying->ipsa_dstaddr, dying->ipsa_addrfam);
7893                 } else {
7894                         peer_assoc = ipsec_getassocbyspi(bucket,
7895                             dying->ipsa_otherspi, dying->ipsa_dstaddr,
7896                             dying->ipsa_srcaddr, dying->ipsa_addrfam);
7897                 }
7898 
7899                 mutex_exit(&bucket->isaf_lock);
7900                 if (peer_assoc != NULL) {
7901                         mutex_enter(&peer_assoc->ipsa_lock);
7902                         mutex_enter(&dying->ipsa_lock);
7903                         if (!haspeer) {
7904                                 /*
7905                                  * Only SA's which have a "peer" or are
7906                                  * "paired" end up on this list, so this
7907                                  * must be a "paired" SA, update the flags
7908                                  * to break the pair.
7909                                  */
7910                                 peer_assoc->ipsa_otherspi = 0;
7911                                 peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
7912                                 dying->ipsa_otherspi = 0;
7913                                 dying->ipsa_flags &= ~IPSA_F_PAIRED;
7914                         }
7915                         if (haspeer || outbound) {
7916                                 /*
7917                                  * Update the state of the "inbound" SA when
7918                                  * the "outbound" SA has expired. Don't update
7919                                  * the "outbound" SA when the "inbound" SA
7920                                  * SA expires because setting the hard_addtime
7921                                  * below will cause this to happen.
7922                                  */
7923                                 peer_assoc->ipsa_state = dying->ipsa_state;
7924                         }
7925                         if (dying->ipsa_state == IPSA_STATE_DEAD)
7926                                 peer_assoc->ipsa_hardexpiretime = 1;
7927 
7928                         mutex_exit(&dying->ipsa_lock);
7929                         mutex_exit(&peer_assoc->ipsa_lock);
7930                         IPSA_REFRELE(peer_assoc);
7931                 }
7932                 IPSA_REFRELE(dying);
7933         }
7934 }
7935 
7936 /*
7937  * Ensure that the IV used for CCM mode never repeats. The IV should
7938  * only be updated by this function. Also check to see if the IV
7939  * is about to wrap and generate a SOFT Expire. This function is only
7940  * called for outgoing packets, the IV for incomming packets is taken
7941  * from the wire. If the outgoing SA needs to be expired, update
7942  * the matching incomming SA.
7943  */
7944 boolean_t
7945 update_iv(uint8_t *iv_ptr, queue_t *pfkey_q, ipsa_t *assoc,
7946     ipsecesp_stack_t *espstack)
7947 {
7948         boolean_t rc = B_TRUE;
7949         isaf_t *inbound_bucket;
7950         sadb_t *sp;
7951         ipsa_t *pair_sa = NULL;
7952         int sa_new_state = 0;
7953 
7954         /* For non counter modes, the IV is random data. */
7955         if (!(assoc->ipsa_flags & IPSA_F_COUNTERMODE)) {
7956                 (void) random_get_pseudo_bytes(iv_ptr, assoc->ipsa_iv_len);
7957                 return (rc);
7958         }
7959 
7960         mutex_enter(&assoc->ipsa_lock);
7961 
7962         (*assoc->ipsa_iv)++;
7963 
7964         if (*assoc->ipsa_iv == assoc->ipsa_iv_hardexpire) {
7965                 sa_new_state = IPSA_STATE_DEAD;
7966                 rc = B_FALSE;
7967         } else if (*assoc->ipsa_iv == assoc->ipsa_iv_softexpire) {
7968                 if (assoc->ipsa_state != IPSA_STATE_DYING) {
7969                         /*
7970                          * This SA may have already been expired when its
7971                          * PAIR_SA expired.
7972                          */
7973                         sa_new_state = IPSA_STATE_DYING;
7974                 }
7975         }
7976         if (sa_new_state) {
7977                 /*
7978                  * If there is a state change, we need to update this SA
7979                  * and its "pair", we can find the bucket for the "pair" SA
7980                  * while holding the ipsa_t mutex, but we won't actually
7981                  * update anything untill the ipsa_t mutex has been released
7982                  * for _this_ SA.
7983                  */
7984                 assoc->ipsa_state = sa_new_state;
7985                 if (assoc->ipsa_addrfam == AF_INET6) {
7986                         sp = &espstack->esp_sadb.s_v6;
7987                 } else {
7988                         sp = &espstack->esp_sadb.s_v4;
7989                 }
7990                 inbound_bucket = INBOUND_BUCKET(sp, assoc->ipsa_otherspi);
7991                 sadb_expire_assoc(pfkey_q, assoc);
7992         }
7993         if (rc == B_TRUE)
7994                 bcopy(assoc->ipsa_iv, iv_ptr, assoc->ipsa_iv_len);
7995 
7996         mutex_exit(&assoc->ipsa_lock);
7997 
7998         if (sa_new_state) {
7999                 /* Find the inbound SA, need to lock hash bucket. */
8000                 mutex_enter(&inbound_bucket->isaf_lock);
8001                 pair_sa = ipsec_getassocbyspi(inbound_bucket,
8002                     assoc->ipsa_otherspi, assoc->ipsa_dstaddr,
8003                     assoc->ipsa_srcaddr, assoc->ipsa_addrfam);
8004                 mutex_exit(&inbound_bucket->isaf_lock);
8005                 if (pair_sa != NULL) {
8006                         mutex_enter(&pair_sa->ipsa_lock);
8007                         pair_sa->ipsa_state = sa_new_state;
8008                         mutex_exit(&pair_sa->ipsa_lock);
8009                         IPSA_REFRELE(pair_sa);
8010                 }
8011         }
8012 
8013         return (rc);
8014 }
8015 
8016 void
8017 ccm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
8018     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
8019 {
8020         uchar_t *nonce;
8021         crypto_mechanism_t *combined_mech;
8022         CK_AES_CCM_PARAMS *params;
8023 
8024         combined_mech = (crypto_mechanism_t *)cm_mech;
8025         params = (CK_AES_CCM_PARAMS *)(combined_mech + 1);
8026         nonce = (uchar_t *)(params + 1);
8027         params->ulMACSize = assoc->ipsa_mac_len;
8028         params->ulNonceSize = assoc->ipsa_nonce_len;
8029         params->ulAuthDataSize = sizeof (esph_t);
8030         params->ulDataSize = data_len;
8031         params->nonce = nonce;
8032         params->authData = esph;
8033 
8034         cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
8035         cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
8036         cm_mech->combined_mech.cm_param = (caddr_t)params;
8037         /* See gcm_params_init() for comments. */
8038         bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
8039         nonce += assoc->ipsa_saltlen;
8040         bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
8041         crypto_data->cd_miscdata = NULL;
8042 }
8043 
8044 /* ARGSUSED */
8045 void
8046 cbc_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
8047     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
8048 {
8049         cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
8050         cm_mech->combined_mech.cm_param_len = 0;
8051         cm_mech->combined_mech.cm_param = NULL;
8052         crypto_data->cd_miscdata = (char *)iv_ptr;
8053 }
8054 
8055 /* ARGSUSED */
8056 void
8057 gcm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
8058     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
8059 {
8060         uchar_t *nonce;
8061         crypto_mechanism_t *combined_mech;
8062         CK_AES_GCM_PARAMS *params;
8063 
8064         combined_mech = (crypto_mechanism_t *)cm_mech;
8065         params = (CK_AES_GCM_PARAMS *)(combined_mech + 1);
8066         nonce = (uchar_t *)(params + 1);
8067 
8068         params->pIv = nonce;
8069         params->ulIvLen = assoc->ipsa_nonce_len;
8070         params->ulIvBits = SADB_8TO1(assoc->ipsa_nonce_len);
8071         params->pAAD = esph;
8072         params->ulAADLen = sizeof (esph_t);
8073         params->ulTagBits = SADB_8TO1(assoc->ipsa_mac_len);
8074 
8075         cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
8076         cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
8077         cm_mech->combined_mech.cm_param = (caddr_t)params;
8078         /*
8079          * Create the nonce, which is made up of the salt and the IV.
8080          * Copy the salt from the SA and the IV from the packet.
8081          * For inbound packets we copy the IV from the packet because it
8082          * was set by the sending system, for outbound packets we copy the IV
8083          * from the packet because the IV in the SA may be changed by another
8084          * thread, the IV in the packet was created while holding a mutex.
8085          */
8086         bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
8087         nonce += assoc->ipsa_saltlen;
8088         bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
8089         crypto_data->cd_miscdata = NULL;
8090 }