1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  * Copyright 2015 Garrett D'Amore <garrett@damore.org>
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/stream.h>
  30 #include <sys/stropts.h>
  31 #include <sys/strsubr.h>
  32 #include <sys/errno.h>
  33 #include <sys/ddi.h>
  34 #include <sys/debug.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/stream.h>
  37 #include <sys/strlog.h>
  38 #include <sys/kmem.h>
  39 #include <sys/sunddi.h>
  40 #include <sys/tihdr.h>
  41 #include <sys/atomic.h>
  42 #include <sys/socket.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/crypto/common.h>
  45 #include <sys/crypto/api.h>
  46 #include <sys/zone.h>
  47 #include <netinet/in.h>
  48 #include <net/if.h>
  49 #include <net/pfkeyv2.h>
  50 #include <net/pfpolicy.h>
  51 #include <inet/common.h>
  52 #include <netinet/ip6.h>
  53 #include <inet/ip.h>
  54 #include <inet/ip_ire.h>
  55 #include <inet/ip6.h>
  56 #include <inet/ipsec_info.h>
  57 #include <inet/tcp.h>
  58 #include <inet/sadb.h>
  59 #include <inet/ipsec_impl.h>
  60 #include <inet/ipsecah.h>
  61 #include <inet/ipsecesp.h>
  62 #include <sys/random.h>
  63 #include <sys/dlpi.h>
  64 #include <sys/strsun.h>
  65 #include <sys/strsubr.h>
  66 #include <inet/ip_if.h>
  67 #include <inet/ipdrop.h>
  68 #include <inet/ipclassifier.h>
  69 #include <inet/sctp_ip.h>
  70 #include <sys/tsol/tnet.h>
  71 
  72 /*
  73  * This source file contains Security Association Database (SADB) common
  74  * routines.  They are linked in with the AH module.  Since AH has no chance
  75  * of falling under export control, it was safe to link it in there.
  76  */
  77 
  78 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
  79     ipsec_action_t *, boolean_t, uint32_t, uint32_t, sadb_sens_t *,
  80     netstack_t *);
  81 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *);
  82 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
  83                             netstack_t *);
  84 static void sadb_destroy(sadb_t *, netstack_t *);
  85 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
  86 static ts_label_t *sadb_label_from_sens(sadb_sens_t *, uint64_t *);
  87 static sadb_sens_t *sadb_make_sens_ext(ts_label_t *tsl, int *len);
  88 
  89 static time_t sadb_add_time(time_t, uint64_t);
  90 static void lifetime_fuzz(ipsa_t *);
  91 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
  92 static int get_ipsa_pair(ipsa_query_t *, ipsap_t *, int *);
  93 static void init_ipsa_pair(ipsap_t *);
  94 static void destroy_ipsa_pair(ipsap_t *);
  95 static int update_pairing(ipsap_t *, ipsa_query_t *, keysock_in_t *, int *);
  96 static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
  97 
  98 /*
  99  * ipsacq_maxpackets is defined here to make it tunable
 100  * from /etc/system.
 101  */
 102 extern uint64_t ipsacq_maxpackets;
 103 
 104 #define SET_EXPIRE(sa, delta, exp) {                            \
 105         if (((sa)->ipsa_ ## delta) != 0) {                           \
 106                 (sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,    \
 107                         (sa)->ipsa_ ## delta);                               \
 108         }                                                               \
 109 }
 110 
 111 #define UPDATE_EXPIRE(sa, delta, exp) {                                 \
 112         if (((sa)->ipsa_ ## delta) != 0) {                           \
 113                 time_t tmp = sadb_add_time((sa)->ipsa_usetime,               \
 114                         (sa)->ipsa_ ## delta);                               \
 115                 if (((sa)->ipsa_ ## exp) == 0)                               \
 116                         (sa)->ipsa_ ## exp = tmp;                    \
 117                 else                                                    \
 118                         (sa)->ipsa_ ## exp =                                 \
 119                             MIN((sa)->ipsa_ ## exp, tmp);            \
 120         }                                                               \
 121 }
 122 
 123 
 124 /* wrap the macro so we can pass it as a function pointer */
 125 void
 126 sadb_sa_refrele(void *target)
 127 {
 128         IPSA_REFRELE(((ipsa_t *)target));
 129 }
 130 
 131 /*
 132  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
 133  * a signed type.
 134  */
 135 #define TIME_MAX LONG_MAX
 136 
 137 /*
 138  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
 139  * time_t is defined to be a signed type with the same range as
 140  * "long".  On ILP32 systems, we thus run the risk of wrapping around
 141  * at end of time, as well as "overwrapping" the clock back around
 142  * into a seemingly valid but incorrect future date earlier than the
 143  * desired expiration.
 144  *
 145  * In order to avoid odd behavior (either negative lifetimes or loss
 146  * of high order bits) when someone asks for bizarrely long SA
 147  * lifetimes, we do a saturating add for expire times.
 148  *
 149  * We presume that ILP32 systems will be past end of support life when
 150  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
 151  *
 152  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
 153  * will hopefully have figured out clever ways to avoid the use of
 154  * fixed-sized integers in computation.
 155  */
 156 static time_t
 157 sadb_add_time(time_t base, uint64_t delta)
 158 {
 159         time_t sum;
 160 
 161         /*
 162          * Clip delta to the maximum possible time_t value to
 163          * prevent "overwrapping" back into a shorter-than-desired
 164          * future time.
 165          */
 166         if (delta > TIME_MAX)
 167                 delta = TIME_MAX;
 168         /*
 169          * This sum may still overflow.
 170          */
 171         sum = base + delta;
 172 
 173         /*
 174          * .. so if the result is less than the base, we overflowed.
 175          */
 176         if (sum < base)
 177                 sum = TIME_MAX;
 178 
 179         return (sum);
 180 }
 181 
 182 /*
 183  * Callers of this function have already created a working security
 184  * association, and have found the appropriate table & hash chain.  All this
 185  * function does is check duplicates, and insert the SA.  The caller needs to
 186  * hold the hash bucket lock and increment the refcnt before insertion.
 187  *
 188  * Return 0 if success, EEXIST if collision.
 189  */
 190 #define SA_UNIQUE_MATCH(sa1, sa2) \
 191         (((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
 192         ((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
 193 
 194 int
 195 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
 196 {
 197         ipsa_t **ptpn = NULL;
 198         ipsa_t *walker;
 199         boolean_t unspecsrc;
 200 
 201         ASSERT(MUTEX_HELD(&bucket->isaf_lock));
 202 
 203         unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
 204 
 205         walker = bucket->isaf_ipsa;
 206         ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
 207 
 208         /*
 209          * Find insertion point (pointed to with **ptpn).  Insert at the head
 210          * of the list unless there's an unspecified source address, then
 211          * insert it after the last SA with a specified source address.
 212          *
 213          * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
 214          * checking for collisions.
 215          */
 216 
 217         while (walker != NULL) {
 218                 if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
 219                     ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
 220                         if (walker->ipsa_spi == ipsa->ipsa_spi)
 221                                 return (EEXIST);
 222 
 223                         mutex_enter(&walker->ipsa_lock);
 224                         if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
 225                             (walker->ipsa_flags & IPSA_F_USED) &&
 226                             SA_UNIQUE_MATCH(walker, ipsa)) {
 227                                 walker->ipsa_flags |= IPSA_F_CINVALID;
 228                         }
 229                         mutex_exit(&walker->ipsa_lock);
 230                 }
 231 
 232                 if (ptpn == NULL && unspecsrc) {
 233                         if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
 234                             walker->ipsa_addrfam))
 235                                 ptpn = walker->ipsa_ptpn;
 236                         else if (walker->ipsa_next == NULL)
 237                                 ptpn = &walker->ipsa_next;
 238                 }
 239 
 240                 walker = walker->ipsa_next;
 241         }
 242 
 243         if (ptpn == NULL)
 244                 ptpn = &bucket->isaf_ipsa;
 245         ipsa->ipsa_next = *ptpn;
 246         ipsa->ipsa_ptpn = ptpn;
 247         if (ipsa->ipsa_next != NULL)
 248                 ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
 249         *ptpn = ipsa;
 250         ipsa->ipsa_linklock = &bucket->isaf_lock;
 251 
 252         return (0);
 253 }
 254 #undef SA_UNIQUE_MATCH
 255 
 256 /*
 257  * Free a security association.  Its reference count is 0, which means
 258  * I must free it.  The SA must be unlocked and must not be linked into
 259  * any fanout list.
 260  */
 261 static void
 262 sadb_freeassoc(ipsa_t *ipsa)
 263 {
 264         ipsec_stack_t   *ipss = ipsa->ipsa_netstack->netstack_ipsec;
 265         mblk_t          *asyncmp, *mp;
 266 
 267         ASSERT(ipss != NULL);
 268         ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
 269         ASSERT(ipsa->ipsa_refcnt == 0);
 270         ASSERT(ipsa->ipsa_next == NULL);
 271         ASSERT(ipsa->ipsa_ptpn == NULL);
 272 
 273 
 274         asyncmp = sadb_clear_lpkt(ipsa);
 275         if (asyncmp != NULL) {
 276                 mp = ip_recv_attr_free_mblk(asyncmp);
 277                 ip_drop_packet(mp, B_TRUE, NULL,
 278                     DROPPER(ipss, ipds_sadb_inlarval_timeout),
 279                     &ipss->ipsec_sadb_dropper);
 280         }
 281         mutex_enter(&ipsa->ipsa_lock);
 282 
 283         if (ipsa->ipsa_tsl != NULL) {
 284                 label_rele(ipsa->ipsa_tsl);
 285                 ipsa->ipsa_tsl = NULL;
 286         }
 287 
 288         if (ipsa->ipsa_otsl != NULL) {
 289                 label_rele(ipsa->ipsa_otsl);
 290                 ipsa->ipsa_otsl = NULL;
 291         }
 292 
 293         ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
 294         ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
 295         mutex_exit(&ipsa->ipsa_lock);
 296 
 297         /* bzero() these fields for paranoia's sake. */
 298         if (ipsa->ipsa_authkey != NULL) {
 299                 bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
 300                 kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
 301         }
 302         if (ipsa->ipsa_encrkey != NULL) {
 303                 bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
 304                 kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
 305         }
 306         if (ipsa->ipsa_nonce_buf != NULL) {
 307                 bzero(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
 308                 kmem_free(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
 309         }
 310         if (ipsa->ipsa_src_cid != NULL) {
 311                 IPSID_REFRELE(ipsa->ipsa_src_cid);
 312         }
 313         if (ipsa->ipsa_dst_cid != NULL) {
 314                 IPSID_REFRELE(ipsa->ipsa_dst_cid);
 315         }
 316         if (ipsa->ipsa_emech.cm_param != NULL)
 317                 kmem_free(ipsa->ipsa_emech.cm_param,
 318                     ipsa->ipsa_emech.cm_param_len);
 319 
 320         mutex_destroy(&ipsa->ipsa_lock);
 321         kmem_free(ipsa, sizeof (*ipsa));
 322 }
 323 
 324 /*
 325  * Unlink a security association from a hash bucket.  Assume the hash bucket
 326  * lock is held, but the association's lock is not.
 327  *
 328  * Note that we do not bump the bucket's generation number here because
 329  * we might not be making a visible change to the set of visible SA's.
 330  * All callers MUST bump the bucket's generation number before they unlock
 331  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
 332  * was present in the bucket at the time it was locked.
 333  */
 334 void
 335 sadb_unlinkassoc(ipsa_t *ipsa)
 336 {
 337         ASSERT(ipsa->ipsa_linklock != NULL);
 338         ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
 339 
 340         /* These fields are protected by the link lock. */
 341         *(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
 342         if (ipsa->ipsa_next != NULL) {
 343                 ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
 344                 ipsa->ipsa_next = NULL;
 345         }
 346 
 347         ipsa->ipsa_ptpn = NULL;
 348 
 349         /* This may destroy the SA. */
 350         IPSA_REFRELE(ipsa);
 351 }
 352 
 353 /*
 354  * Create a larval security association with the specified SPI.  All other
 355  * fields are zeroed.
 356  */
 357 static ipsa_t *
 358 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
 359     netstack_t *ns)
 360 {
 361         ipsa_t *newbie;
 362 
 363         /*
 364          * Allocate...
 365          */
 366 
 367         newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
 368         if (newbie == NULL) {
 369                 /* Can't make new larval SA. */
 370                 return (NULL);
 371         }
 372 
 373         /* Assigned requested SPI, assume caller does SPI allocation magic. */
 374         newbie->ipsa_spi = spi;
 375         newbie->ipsa_netstack = ns;  /* No netstack_hold */
 376 
 377         /*
 378          * Copy addresses...
 379          */
 380 
 381         IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
 382         IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
 383 
 384         newbie->ipsa_addrfam = addrfam;
 385 
 386         /*
 387          * Set common initialization values, including refcnt.
 388          */
 389         mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
 390         newbie->ipsa_state = IPSA_STATE_LARVAL;
 391         newbie->ipsa_refcnt = 1;
 392         newbie->ipsa_freefunc = sadb_freeassoc;
 393 
 394         /*
 395          * There aren't a lot of other common initialization values, as
 396          * they are copied in from the PF_KEY message.
 397          */
 398 
 399         return (newbie);
 400 }
 401 
 402 /*
 403  * Call me to initialize a security association fanout.
 404  */
 405 static int
 406 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
 407 {
 408         isaf_t *table;
 409         int i;
 410 
 411         table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
 412         *tablep = table;
 413 
 414         if (table == NULL)
 415                 return (ENOMEM);
 416 
 417         for (i = 0; i < size; i++) {
 418                 mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
 419                 table[i].isaf_ipsa = NULL;
 420                 table[i].isaf_gen = 0;
 421         }
 422 
 423         return (0);
 424 }
 425 
 426 /*
 427  * Call me to initialize an acquire fanout
 428  */
 429 static int
 430 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
 431 {
 432         iacqf_t *table;
 433         int i;
 434 
 435         table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
 436         *tablep = table;
 437 
 438         if (table == NULL)
 439                 return (ENOMEM);
 440 
 441         for (i = 0; i < size; i++) {
 442                 mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
 443                 table[i].iacqf_ipsacq = NULL;
 444         }
 445 
 446         return (0);
 447 }
 448 
 449 /*
 450  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
 451  * caller must clean up partial allocations.
 452  */
 453 static int
 454 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
 455 {
 456         ASSERT(sp->sdb_of == NULL);
 457         ASSERT(sp->sdb_if == NULL);
 458         ASSERT(sp->sdb_acq == NULL);
 459 
 460         sp->sdb_hashsize = size;
 461         if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
 462                 return (ENOMEM);
 463         if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
 464                 return (ENOMEM);
 465         if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
 466                 return (ENOMEM);
 467 
 468         return (0);
 469 }
 470 
 471 /*
 472  * Call me to initialize an SADB instance; fall back to default size on failure.
 473  */
 474 static void
 475 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
 476     netstack_t *ns)
 477 {
 478         ASSERT(sp->sdb_of == NULL);
 479         ASSERT(sp->sdb_if == NULL);
 480         ASSERT(sp->sdb_acq == NULL);
 481 
 482         if (size < IPSEC_DEFAULT_HASH_SIZE)
 483                 size = IPSEC_DEFAULT_HASH_SIZE;
 484 
 485         if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
 486 
 487                 cmn_err(CE_WARN,
 488                     "Unable to allocate %u entry IPv%u %s SADB hash table",
 489                     size, ver, name);
 490 
 491                 sadb_destroy(sp, ns);
 492                 size = IPSEC_DEFAULT_HASH_SIZE;
 493                 cmn_err(CE_WARN, "Falling back to %d entries", size);
 494                 (void) sadb_init_trial(sp, size, KM_SLEEP);
 495         }
 496 }
 497 
 498 
 499 /*
 500  * Initialize an SADB-pair.
 501  */
 502 void
 503 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
 504 {
 505         sadb_init(name, &sp->s_v4, size, 4, ns);
 506         sadb_init(name, &sp->s_v6, size, 6, ns);
 507 
 508         sp->s_satype = type;
 509 
 510         ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
 511         if (type == SADB_SATYPE_AH) {
 512                 ipsec_stack_t   *ipss = ns->netstack_ipsec;
 513 
 514                 ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
 515                 sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
 516                 sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
 517         } else {
 518                 sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
 519                 sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
 520         }
 521 }
 522 
 523 /*
 524  * Deliver a single SADB_DUMP message representing a single SA.  This is
 525  * called many times by sadb_dump().
 526  *
 527  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
 528  * the caller should take that as a hint that dupb() on the "original answer"
 529  * failed, and that perhaps the caller should try again with a copyb()ed
 530  * "original answer".
 531  */
 532 static int
 533 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
 534     sadb_msg_t *samsg)
 535 {
 536         mblk_t *answer;
 537 
 538         answer = dupb(original_answer);
 539         if (answer == NULL)
 540                 return (ENOBUFS);
 541         answer->b_cont = sadb_sa2msg(ipsa, samsg);
 542         if (answer->b_cont == NULL) {
 543                 freeb(answer);
 544                 return (ENOMEM);
 545         }
 546 
 547         /* Just do a putnext, and let keysock deal with flow control. */
 548         putnext(pfkey_q, answer);
 549         return (0);
 550 }
 551 
 552 /*
 553  * Common function to allocate and prepare a keysock_out_t M_CTL message.
 554  */
 555 mblk_t *
 556 sadb_keysock_out(minor_t serial)
 557 {
 558         mblk_t *mp;
 559         keysock_out_t *kso;
 560 
 561         mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
 562         if (mp != NULL) {
 563                 mp->b_datap->db_type = M_CTL;
 564                 mp->b_wptr += sizeof (ipsec_info_t);
 565                 kso = (keysock_out_t *)mp->b_rptr;
 566                 kso->ks_out_type = KEYSOCK_OUT;
 567                 kso->ks_out_len = sizeof (*kso);
 568                 kso->ks_out_serial = serial;
 569         }
 570 
 571         return (mp);
 572 }
 573 
 574 /*
 575  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
 576  * to keysock.
 577  */
 578 static int
 579 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
 580     int num_entries, boolean_t do_peers)
 581 {
 582         int i, error = 0;
 583         mblk_t *original_answer;
 584         ipsa_t *walker;
 585         sadb_msg_t *samsg;
 586 
 587         /*
 588          * For each IPSA hash bucket do:
 589          *      - Hold the mutex
 590          *      - Walk each entry, doing an sadb_dump_deliver() on it.
 591          */
 592         ASSERT(mp->b_cont != NULL);
 593         samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
 594 
 595         original_answer = sadb_keysock_out(serial);
 596         if (original_answer == NULL)
 597                 return (ENOMEM);
 598 
 599         for (i = 0; i < num_entries; i++) {
 600                 mutex_enter(&fanout[i].isaf_lock);
 601                 for (walker = fanout[i].isaf_ipsa; walker != NULL;
 602                     walker = walker->ipsa_next) {
 603                         if (!do_peers && walker->ipsa_haspeer)
 604                                 continue;
 605                         error = sadb_dump_deliver(pfkey_q, original_answer,
 606                             walker, samsg);
 607                         if (error == ENOBUFS) {
 608                                 mblk_t *new_original_answer;
 609 
 610                                 /* Ran out of dupb's.  Try a copyb. */
 611                                 new_original_answer = copyb(original_answer);
 612                                 if (new_original_answer == NULL) {
 613                                         error = ENOMEM;
 614                                 } else {
 615                                         freeb(original_answer);
 616                                         original_answer = new_original_answer;
 617                                         error = sadb_dump_deliver(pfkey_q,
 618                                             original_answer, walker, samsg);
 619                                 }
 620                         }
 621                         if (error != 0)
 622                                 break;  /* out of for loop. */
 623                 }
 624                 mutex_exit(&fanout[i].isaf_lock);
 625                 if (error != 0)
 626                         break;  /* out of for loop. */
 627         }
 628 
 629         freeb(original_answer);
 630         return (error);
 631 }
 632 
 633 /*
 634  * Dump an entire SADB; outbound first, then inbound.
 635  */
 636 
 637 int
 638 sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
 639 {
 640         int error;
 641 
 642         /* Dump outbound */
 643         error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
 644             sp->sdb_hashsize, B_TRUE);
 645         if (error)
 646                 return (error);
 647 
 648         /* Dump inbound */
 649         return (sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
 650             sp->sdb_hashsize, B_FALSE));
 651 }
 652 
 653 /*
 654  * Generic sadb table walker.
 655  *
 656  * Call "walkfn" for each SA in each bucket in "table"; pass the
 657  * bucket, the entry and "cookie" to the callback function.
 658  * Take care to ensure that walkfn can delete the SA without screwing
 659  * up our traverse.
 660  *
 661  * The bucket is locked for the duration of the callback, both so that the
 662  * callback can just call sadb_unlinkassoc() when it wants to delete something,
 663  * and so that no new entries are added while we're walking the list.
 664  */
 665 static void
 666 sadb_walker(isaf_t *table, uint_t numentries,
 667     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
 668     void *cookie)
 669 {
 670         int i;
 671         for (i = 0; i < numentries; i++) {
 672                 ipsa_t *entry, *next;
 673 
 674                 mutex_enter(&table[i].isaf_lock);
 675 
 676                 for (entry = table[i].isaf_ipsa; entry != NULL;
 677                     entry = next) {
 678                         next = entry->ipsa_next;
 679                         (*walkfn)(&table[i], entry, cookie);
 680                 }
 681                 mutex_exit(&table[i].isaf_lock);
 682         }
 683 }
 684 
 685 /*
 686  * Call me to free up a security association fanout.  Use the forever
 687  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
 688  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
 689  * when a netstack is destroyed or a module is unloaded).
 690  */
 691 static void
 692 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever)
 693 {
 694         int i;
 695         isaf_t *table = *tablep;
 696         ipsa_t *sa;
 697 
 698         if (table == NULL)
 699                 return;
 700 
 701         for (i = 0; i < numentries; i++) {
 702                 mutex_enter(&table[i].isaf_lock);
 703                 while ((sa = table[i].isaf_ipsa) != NULL) {
 704                         sadb_unlinkassoc(sa);
 705                 }
 706                 table[i].isaf_gen++;
 707                 mutex_exit(&table[i].isaf_lock);
 708                 if (forever)
 709                         mutex_destroy(&(table[i].isaf_lock));
 710         }
 711 
 712         if (forever) {
 713                 *tablep = NULL;
 714                 kmem_free(table, numentries * sizeof (*table));
 715         }
 716 }
 717 
 718 /*
 719  * Entry points to sadb_destroyer().
 720  */
 721 static void
 722 sadb_flush(sadb_t *sp, netstack_t *ns)
 723 {
 724         /*
 725          * Flush out each bucket, one at a time.  Were it not for keysock's
 726          * enforcement, there would be a subtlety where I could add on the
 727          * heels of a flush.  With keysock's enforcement, however, this
 728          * makes ESP's job easy.
 729          */
 730         sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE);
 731         sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE);
 732 
 733         /* For each acquire, destroy it; leave the bucket mutex alone. */
 734         sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
 735 }
 736 
 737 static void
 738 sadb_destroy(sadb_t *sp, netstack_t *ns)
 739 {
 740         sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE);
 741         sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE);
 742 
 743         /* For each acquire, destroy it, including the bucket mutex. */
 744         sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
 745 
 746         ASSERT(sp->sdb_of == NULL);
 747         ASSERT(sp->sdb_if == NULL);
 748         ASSERT(sp->sdb_acq == NULL);
 749 }
 750 
 751 void
 752 sadbp_flush(sadbp_t *spp, netstack_t *ns)
 753 {
 754         sadb_flush(&spp->s_v4, ns);
 755         sadb_flush(&spp->s_v6, ns);
 756 }
 757 
 758 void
 759 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
 760 {
 761         sadb_destroy(&spp->s_v4, ns);
 762         sadb_destroy(&spp->s_v6, ns);
 763 
 764         if (spp->s_satype == SADB_SATYPE_AH) {
 765                 ipsec_stack_t   *ipss = ns->netstack_ipsec;
 766 
 767                 ip_drop_unregister(&ipss->ipsec_sadb_dropper);
 768         }
 769 }
 770 
 771 
 772 /*
 773  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
 774  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
 775  * EINVAL.
 776  */
 777 int
 778 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
 779     sadb_lifetime_t *idle)
 780 {
 781         if (hard == NULL || soft == NULL)
 782                 return (0);
 783 
 784         if (hard->sadb_lifetime_allocations != 0 &&
 785             soft->sadb_lifetime_allocations != 0 &&
 786             hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
 787                 return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
 788 
 789         if (hard->sadb_lifetime_bytes != 0 &&
 790             soft->sadb_lifetime_bytes != 0 &&
 791             hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
 792                 return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
 793 
 794         if (hard->sadb_lifetime_addtime != 0 &&
 795             soft->sadb_lifetime_addtime != 0 &&
 796             hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
 797                 return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
 798 
 799         if (hard->sadb_lifetime_usetime != 0 &&
 800             soft->sadb_lifetime_usetime != 0 &&
 801             hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
 802                 return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
 803 
 804         if (idle != NULL) {
 805                 if (hard->sadb_lifetime_addtime != 0 &&
 806                     idle->sadb_lifetime_addtime != 0 &&
 807                     hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
 808                         return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
 809 
 810                 if (soft->sadb_lifetime_addtime != 0 &&
 811                     idle->sadb_lifetime_addtime != 0 &&
 812                     soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
 813                         return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
 814 
 815                 if (hard->sadb_lifetime_usetime != 0 &&
 816                     idle->sadb_lifetime_usetime != 0 &&
 817                     hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
 818                         return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
 819 
 820                 if (soft->sadb_lifetime_usetime != 0 &&
 821                     idle->sadb_lifetime_usetime != 0 &&
 822                     soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
 823                         return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
 824         }
 825 
 826         return (0);
 827 }
 828 
 829 /*
 830  * Sanity check sensitivity labels.
 831  *
 832  * For now, just reject labels on unlabeled systems.
 833  */
 834 int
 835 sadb_labelchk(keysock_in_t *ksi)
 836 {
 837         if (!is_system_labeled()) {
 838                 if (ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL)
 839                         return (SADB_X_DIAGNOSTIC_BAD_LABEL);
 840 
 841                 if (ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS] != NULL)
 842                         return (SADB_X_DIAGNOSTIC_BAD_LABEL);
 843         }
 844 
 845         return (0);
 846 }
 847 
 848 /*
 849  * Clone a security association for the purposes of inserting a single SA
 850  * into inbound and outbound tables respectively. This function should only
 851  * be called from sadb_common_add().
 852  */
 853 static ipsa_t *
 854 sadb_cloneassoc(ipsa_t *ipsa)
 855 {
 856         ipsa_t *newbie;
 857         boolean_t error = B_FALSE;
 858 
 859         ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
 860 
 861         newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
 862         if (newbie == NULL)
 863                 return (NULL);
 864 
 865         /* Copy over what we can. */
 866         *newbie = *ipsa;
 867 
 868         /* bzero and initialize locks, in case *_init() allocates... */
 869         mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
 870 
 871         if (newbie->ipsa_tsl != NULL)
 872                 label_hold(newbie->ipsa_tsl);
 873 
 874         if (newbie->ipsa_otsl != NULL)
 875                 label_hold(newbie->ipsa_otsl);
 876 
 877         /*
 878          * While somewhat dain-bramaged, the most graceful way to
 879          * recover from errors is to keep plowing through the
 880          * allocations, and getting what I can.  It's easier to call
 881          * sadb_freeassoc() on the stillborn clone when all the
 882          * pointers aren't pointing to the parent's data.
 883          */
 884 
 885         if (ipsa->ipsa_authkey != NULL) {
 886                 newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
 887                     KM_NOSLEEP);
 888                 if (newbie->ipsa_authkey == NULL) {
 889                         error = B_TRUE;
 890                 } else {
 891                         bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
 892                             newbie->ipsa_authkeylen);
 893 
 894                         newbie->ipsa_kcfauthkey.ck_data =
 895                             newbie->ipsa_authkey;
 896                 }
 897 
 898                 if (newbie->ipsa_amech.cm_param != NULL) {
 899                         newbie->ipsa_amech.cm_param =
 900                             (char *)&newbie->ipsa_mac_len;
 901                 }
 902         }
 903 
 904         if (ipsa->ipsa_encrkey != NULL) {
 905                 newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
 906                     KM_NOSLEEP);
 907                 if (newbie->ipsa_encrkey == NULL) {
 908                         error = B_TRUE;
 909                 } else {
 910                         bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
 911                             newbie->ipsa_encrkeylen);
 912 
 913                         newbie->ipsa_kcfencrkey.ck_data =
 914                             newbie->ipsa_encrkey;
 915                 }
 916         }
 917 
 918         newbie->ipsa_authtmpl = NULL;
 919         newbie->ipsa_encrtmpl = NULL;
 920         newbie->ipsa_haspeer = B_TRUE;
 921 
 922         if (ipsa->ipsa_src_cid != NULL) {
 923                 newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
 924                 IPSID_REFHOLD(ipsa->ipsa_src_cid);
 925         }
 926 
 927         if (ipsa->ipsa_dst_cid != NULL) {
 928                 newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
 929                 IPSID_REFHOLD(ipsa->ipsa_dst_cid);
 930         }
 931 
 932         if (error) {
 933                 sadb_freeassoc(newbie);
 934                 return (NULL);
 935         }
 936 
 937         return (newbie);
 938 }
 939 
 940 /*
 941  * Initialize a SADB address extension at the address specified by addrext.
 942  * Return a pointer to the end of the new address extension.
 943  */
 944 static uint8_t *
 945 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
 946     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
 947 {
 948         struct sockaddr_in *sin;
 949         struct sockaddr_in6 *sin6;
 950         uint8_t *cur = start;
 951         int addrext_len;
 952         int sin_len;
 953         sadb_address_t *addrext = (sadb_address_t *)cur;
 954 
 955         if (cur == NULL)
 956                 return (NULL);
 957 
 958         cur += sizeof (*addrext);
 959         if (cur > end)
 960                 return (NULL);
 961 
 962         addrext->sadb_address_proto = proto;
 963         addrext->sadb_address_prefixlen = prefix;
 964         addrext->sadb_address_reserved = 0;
 965         addrext->sadb_address_exttype = exttype;
 966 
 967         switch (af) {
 968         case AF_INET:
 969                 sin = (struct sockaddr_in *)cur;
 970                 sin_len = sizeof (*sin);
 971                 cur += sin_len;
 972                 if (cur > end)
 973                         return (NULL);
 974 
 975                 sin->sin_family = af;
 976                 bzero(sin->sin_zero, sizeof (sin->sin_zero));
 977                 sin->sin_port = port;
 978                 IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
 979                 break;
 980         case AF_INET6:
 981                 sin6 = (struct sockaddr_in6 *)cur;
 982                 sin_len = sizeof (*sin6);
 983                 cur += sin_len;
 984                 if (cur > end)
 985                         return (NULL);
 986 
 987                 bzero(sin6, sizeof (*sin6));
 988                 sin6->sin6_family = af;
 989                 sin6->sin6_port = port;
 990                 IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
 991                 break;
 992         }
 993 
 994         addrext_len = roundup(cur - start, sizeof (uint64_t));
 995         addrext->sadb_address_len = SADB_8TO64(addrext_len);
 996 
 997         cur = start + addrext_len;
 998         if (cur > end)
 999                 cur = NULL;
1000 
1001         return (cur);
1002 }
1003 
1004 /*
1005  * Construct a key management cookie extension.
1006  */
1007 
1008 static uint8_t *
1009 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1010 {
1011         sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1012 
1013         if (cur == NULL)
1014                 return (NULL);
1015 
1016         cur += sizeof (*kmcext);
1017 
1018         if (cur > end)
1019                 return (NULL);
1020 
1021         kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1022         kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1023         kmcext->sadb_x_kmc_proto = kmp;
1024         kmcext->sadb_x_kmc_cookie = kmc;
1025         kmcext->sadb_x_kmc_reserved = 0;
1026 
1027         return (cur);
1028 }
1029 
1030 /*
1031  * Given an original message header with sufficient space following it, and an
1032  * SA, construct a full PF_KEY message with all of the relevant extensions.
1033  * This is mostly used for SADB_GET, and SADB_DUMP.
1034  */
1035 static mblk_t *
1036 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1037 {
1038         int alloclen, addrsize, paddrsize, authsize, encrsize;
1039         int srcidsize, dstidsize, senslen, osenslen;
1040         sa_family_t fam, pfam;  /* Address family for SADB_EXT_ADDRESS */
1041                                 /* src/dst and proxy sockaddrs. */
1042         /*
1043          * The following are pointers into the PF_KEY message this PF_KEY
1044          * message creates.
1045          */
1046         sadb_msg_t *newsamsg;
1047         sadb_sa_t *assoc;
1048         sadb_lifetime_t *lt;
1049         sadb_key_t *key;
1050         sadb_ident_t *ident;
1051         sadb_sens_t *sens;
1052         sadb_ext_t *walker;     /* For when we need a generic ext. pointer. */
1053         sadb_x_replay_ctr_t *repl_ctr;
1054         sadb_x_pair_t *pair_ext;
1055 
1056         mblk_t *mp;
1057         uint8_t *cur, *end;
1058         /* These indicate the presence of the above extension fields. */
1059         boolean_t soft = B_FALSE, hard = B_FALSE;
1060         boolean_t isrc = B_FALSE, idst = B_FALSE;
1061         boolean_t auth = B_FALSE, encr = B_FALSE;
1062         boolean_t sensinteg = B_FALSE, osensinteg = B_FALSE;
1063         boolean_t srcid = B_FALSE, dstid = B_FALSE;
1064         boolean_t idle;
1065         boolean_t paired;
1066         uint32_t otherspi;
1067 
1068         /* First off, figure out the allocation length for this message. */
1069         /*
1070          * Constant stuff.  This includes base, SA, address (src, dst),
1071          * and lifetime (current).
1072          */
1073         alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1074             sizeof (sadb_lifetime_t);
1075 
1076         fam = ipsa->ipsa_addrfam;
1077         switch (fam) {
1078         case AF_INET:
1079                 addrsize = roundup(sizeof (struct sockaddr_in) +
1080                     sizeof (sadb_address_t), sizeof (uint64_t));
1081                 break;
1082         case AF_INET6:
1083                 addrsize = roundup(sizeof (struct sockaddr_in6) +
1084                     sizeof (sadb_address_t), sizeof (uint64_t));
1085                 break;
1086         default:
1087                 return (NULL);
1088         }
1089         /*
1090          * Allocate TWO address extensions, for source and destination.
1091          * (Thus, the * 2.)
1092          */
1093         alloclen += addrsize * 2;
1094         if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1095                 alloclen += addrsize;
1096         if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1097                 alloclen += addrsize;
1098 
1099         if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1100                 paired = B_TRUE;
1101                 alloclen += sizeof (sadb_x_pair_t);
1102                 otherspi = ipsa->ipsa_otherspi;
1103         } else {
1104                 paired = B_FALSE;
1105         }
1106 
1107         /* How 'bout other lifetimes? */
1108         if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1109             ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1110                 alloclen += sizeof (sadb_lifetime_t);
1111                 soft = B_TRUE;
1112         }
1113 
1114         if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1115             ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1116                 alloclen += sizeof (sadb_lifetime_t);
1117                 hard = B_TRUE;
1118         }
1119 
1120         if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1121                 alloclen += sizeof (sadb_lifetime_t);
1122                 idle = B_TRUE;
1123         } else {
1124                 idle = B_FALSE;
1125         }
1126 
1127         /* Inner addresses. */
1128         if (ipsa->ipsa_innerfam != 0) {
1129                 pfam = ipsa->ipsa_innerfam;
1130                 switch (pfam) {
1131                 case AF_INET6:
1132                         paddrsize = roundup(sizeof (struct sockaddr_in6) +
1133                             sizeof (sadb_address_t), sizeof (uint64_t));
1134                         break;
1135                 case AF_INET:
1136                         paddrsize = roundup(sizeof (struct sockaddr_in) +
1137                             sizeof (sadb_address_t), sizeof (uint64_t));
1138                         break;
1139                 default:
1140                         cmn_err(CE_PANIC,
1141                             "IPsec SADB: Proxy length failure.\n");
1142                         break;
1143                 }
1144                 isrc = B_TRUE;
1145                 idst = B_TRUE;
1146                 alloclen += 2 * paddrsize;
1147         }
1148 
1149         /* For the following fields, assume that length != 0 ==> stuff */
1150         if (ipsa->ipsa_authkeylen != 0) {
1151                 authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1152                     sizeof (uint64_t));
1153                 alloclen += authsize;
1154                 auth = B_TRUE;
1155         }
1156 
1157         if (ipsa->ipsa_encrkeylen != 0) {
1158                 encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen +
1159                     ipsa->ipsa_nonce_len, sizeof (uint64_t));
1160                 alloclen += encrsize;
1161                 encr = B_TRUE;
1162         } else {
1163                 encr = B_FALSE;
1164         }
1165 
1166         if (ipsa->ipsa_tsl != NULL) {
1167                 senslen = sadb_sens_len_from_label(ipsa->ipsa_tsl);
1168                 alloclen += senslen;
1169                 sensinteg = B_TRUE;
1170         }
1171 
1172         if (ipsa->ipsa_otsl != NULL) {
1173                 osenslen = sadb_sens_len_from_label(ipsa->ipsa_otsl);
1174                 alloclen += osenslen;
1175                 osensinteg = B_TRUE;
1176         }
1177 
1178         /*
1179          * Must use strlen() here for lengths.  Identities use NULL
1180          * pointers to indicate their nonexistence.
1181          */
1182         if (ipsa->ipsa_src_cid != NULL) {
1183                 srcidsize = roundup(sizeof (sadb_ident_t) +
1184                     strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1185                     sizeof (uint64_t));
1186                 alloclen += srcidsize;
1187                 srcid = B_TRUE;
1188         }
1189 
1190         if (ipsa->ipsa_dst_cid != NULL) {
1191                 dstidsize = roundup(sizeof (sadb_ident_t) +
1192                     strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1193                     sizeof (uint64_t));
1194                 alloclen += dstidsize;
1195                 dstid = B_TRUE;
1196         }
1197 
1198         if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1199                 alloclen += sizeof (sadb_x_kmc_t);
1200 
1201         if (ipsa->ipsa_replay != 0) {
1202                 alloclen += sizeof (sadb_x_replay_ctr_t);
1203         }
1204 
1205         /* Make sure the allocation length is a multiple of 8 bytes. */
1206         ASSERT((alloclen & 0x7) == 0);
1207 
1208         /* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1209         mp = allocb(alloclen, BPRI_HI);
1210         if (mp == NULL)
1211                 return (NULL);
1212         bzero(mp->b_rptr, alloclen);
1213 
1214         mp->b_wptr += alloclen;
1215         end = mp->b_wptr;
1216         newsamsg = (sadb_msg_t *)mp->b_rptr;
1217         *newsamsg = *samsg;
1218         newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1219 
1220         mutex_enter(&ipsa->ipsa_lock);   /* Since I'm grabbing SA fields... */
1221 
1222         newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1223 
1224         assoc = (sadb_sa_t *)(newsamsg + 1);
1225         assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1226         assoc->sadb_sa_exttype = SADB_EXT_SA;
1227         assoc->sadb_sa_spi = ipsa->ipsa_spi;
1228         assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1229         assoc->sadb_sa_state = ipsa->ipsa_state;
1230         assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1231         assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1232         assoc->sadb_sa_flags = ipsa->ipsa_flags;
1233 
1234         lt = (sadb_lifetime_t *)(assoc + 1);
1235         lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1236         lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1237         /* We do not support the concept. */
1238         lt->sadb_lifetime_allocations = 0;
1239         lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1240         lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1241         lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1242 
1243         if (hard) {
1244                 lt++;
1245                 lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1246                 lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1247                 lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1248                 lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1249                 lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1250                 lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1251         }
1252 
1253         if (soft) {
1254                 lt++;
1255                 lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1256                 lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1257                 lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1258                 lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1259                 lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1260                 lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1261         }
1262 
1263         if (idle) {
1264                 lt++;
1265                 lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1266                 lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1267                 lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1268                 lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1269         }
1270 
1271         cur = (uint8_t *)(lt + 1);
1272 
1273         /* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1274         cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1275             ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1276             SA_PROTO(ipsa), 0);
1277         if (cur == NULL) {
1278                 freemsg(mp);
1279                 mp = NULL;
1280                 goto bail;
1281         }
1282 
1283         cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1284             ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1285             SA_PROTO(ipsa), 0);
1286         if (cur == NULL) {
1287                 freemsg(mp);
1288                 mp = NULL;
1289                 goto bail;
1290         }
1291 
1292         if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1293                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1294                     fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1295                     IPPROTO_UDP, 0);
1296                 if (cur == NULL) {
1297                         freemsg(mp);
1298                         mp = NULL;
1299                         goto bail;
1300                 }
1301         }
1302 
1303         if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1304                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1305                     fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1306                     IPPROTO_UDP, 0);
1307                 if (cur == NULL) {
1308                         freemsg(mp);
1309                         mp = NULL;
1310                         goto bail;
1311                 }
1312         }
1313 
1314         /* If we are a tunnel-mode SA, fill in the inner-selectors. */
1315         if (isrc) {
1316                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1317                     pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1318                     SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1319                 if (cur == NULL) {
1320                         freemsg(mp);
1321                         mp = NULL;
1322                         goto bail;
1323                 }
1324         }
1325 
1326         if (idst) {
1327                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1328                     pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1329                     SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1330                 if (cur == NULL) {
1331                         freemsg(mp);
1332                         mp = NULL;
1333                         goto bail;
1334                 }
1335         }
1336 
1337         if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1338                 cur = sadb_make_kmc_ext(cur, end,
1339                     ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1340                 if (cur == NULL) {
1341                         freemsg(mp);
1342                         mp = NULL;
1343                         goto bail;
1344                 }
1345         }
1346 
1347         walker = (sadb_ext_t *)cur;
1348         if (auth) {
1349                 key = (sadb_key_t *)walker;
1350                 key->sadb_key_len = SADB_8TO64(authsize);
1351                 key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1352                 key->sadb_key_bits = ipsa->ipsa_authkeybits;
1353                 key->sadb_key_reserved = 0;
1354                 bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1355                 walker = (sadb_ext_t *)((uint64_t *)walker +
1356                     walker->sadb_ext_len);
1357         }
1358 
1359         if (encr) {
1360                 uint8_t *buf_ptr;
1361                 key = (sadb_key_t *)walker;
1362                 key->sadb_key_len = SADB_8TO64(encrsize);
1363                 key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1364                 key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1365                 key->sadb_key_reserved = ipsa->ipsa_saltbits;
1366                 buf_ptr = (uint8_t *)(key + 1);
1367                 bcopy(ipsa->ipsa_encrkey, buf_ptr, ipsa->ipsa_encrkeylen);
1368                 if (ipsa->ipsa_salt != NULL) {
1369                         buf_ptr += ipsa->ipsa_encrkeylen;
1370                         bcopy(ipsa->ipsa_salt, buf_ptr, ipsa->ipsa_saltlen);
1371                 }
1372                 walker = (sadb_ext_t *)((uint64_t *)walker +
1373                     walker->sadb_ext_len);
1374         }
1375 
1376         if (srcid) {
1377                 ident = (sadb_ident_t *)walker;
1378                 ident->sadb_ident_len = SADB_8TO64(srcidsize);
1379                 ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1380                 ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1381                 ident->sadb_ident_id = 0;
1382                 ident->sadb_ident_reserved = 0;
1383                 (void) strcpy((char *)(ident + 1),
1384                     ipsa->ipsa_src_cid->ipsid_cid);
1385                 walker = (sadb_ext_t *)((uint64_t *)walker +
1386                     walker->sadb_ext_len);
1387         }
1388 
1389         if (dstid) {
1390                 ident = (sadb_ident_t *)walker;
1391                 ident->sadb_ident_len = SADB_8TO64(dstidsize);
1392                 ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1393                 ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1394                 ident->sadb_ident_id = 0;
1395                 ident->sadb_ident_reserved = 0;
1396                 (void) strcpy((char *)(ident + 1),
1397                     ipsa->ipsa_dst_cid->ipsid_cid);
1398                 walker = (sadb_ext_t *)((uint64_t *)walker +
1399                     walker->sadb_ext_len);
1400         }
1401 
1402         if (sensinteg) {
1403                 sens = (sadb_sens_t *)walker;
1404                 sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
1405                     ipsa->ipsa_tsl, senslen);
1406 
1407                 walker = (sadb_ext_t *)((uint64_t *)walker +
1408                     walker->sadb_ext_len);
1409         }
1410 
1411         if (osensinteg) {
1412                 sens = (sadb_sens_t *)walker;
1413 
1414                 sadb_sens_from_label(sens, SADB_X_EXT_OUTER_SENS,
1415                     ipsa->ipsa_otsl, osenslen);
1416                 if (ipsa->ipsa_mac_exempt)
1417                         sens->sadb_x_sens_flags = SADB_X_SENS_IMPLICIT;
1418 
1419                 walker = (sadb_ext_t *)((uint64_t *)walker +
1420                     walker->sadb_ext_len);
1421         }
1422 
1423         if (paired) {
1424                 pair_ext = (sadb_x_pair_t *)walker;
1425 
1426                 pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1427                 pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1428                 pair_ext->sadb_x_pair_spi = otherspi;
1429 
1430                 walker = (sadb_ext_t *)((uint64_t *)walker +
1431                     walker->sadb_ext_len);
1432         }
1433 
1434         if (ipsa->ipsa_replay != 0) {
1435                 repl_ctr = (sadb_x_replay_ctr_t *)walker;
1436                 repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1437                 repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1438                 repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1439                 repl_ctr->sadb_x_rc_replay64 = 0;
1440                 walker = (sadb_ext_t *)(repl_ctr + 1);
1441         }
1442 
1443 bail:
1444         /* Pardon any delays... */
1445         mutex_exit(&ipsa->ipsa_lock);
1446 
1447         return (mp);
1448 }
1449 
1450 /*
1451  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1452  * and adjust base message accordingly.
1453  *
1454  * Assume message is pulled up in one piece of contiguous memory.
1455  *
1456  * Say if we start off with:
1457  *
1458  * +------+----+-------------+-----------+---------------+---------------+
1459  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1460  * +------+----+-------------+-----------+---------------+---------------+
1461  *
1462  * we will end up with
1463  *
1464  * +------+----+-------------+-----------+---------------+
1465  * | base | SA | source addr | dest addr | soft lifetime |
1466  * +------+----+-------------+-----------+---------------+
1467  */
1468 static void
1469 sadb_strip(sadb_msg_t *samsg)
1470 {
1471         sadb_ext_t *ext;
1472         uint8_t *target = NULL;
1473         uint8_t *msgend;
1474         int sofar = SADB_8TO64(sizeof (*samsg));
1475         int copylen;
1476 
1477         ext = (sadb_ext_t *)(samsg + 1);
1478         msgend = (uint8_t *)samsg;
1479         msgend += SADB_64TO8(samsg->sadb_msg_len);
1480         while ((uint8_t *)ext < msgend) {
1481                 if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1482                     ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1483                     ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1484                         /*
1485                          * Aha!  I found a header to be erased.
1486                          */
1487 
1488                         if (target != NULL) {
1489                                 /*
1490                                  * If I had a previous header to be erased,
1491                                  * copy over it.  I can get away with just
1492                                  * copying backwards because the target will
1493                                  * always be 8 bytes behind the source.
1494                                  */
1495                                 copylen = ((uint8_t *)ext) - (target +
1496                                     SADB_64TO8(
1497                                     ((sadb_ext_t *)target)->sadb_ext_len));
1498                                 ovbcopy(((uint8_t *)ext - copylen), target,
1499                                     copylen);
1500                                 target += copylen;
1501                                 ((sadb_ext_t *)target)->sadb_ext_len =
1502                                     SADB_8TO64(((uint8_t *)ext) - target +
1503                                     SADB_64TO8(ext->sadb_ext_len));
1504                         } else {
1505                                 target = (uint8_t *)ext;
1506                         }
1507                 } else {
1508                         sofar += ext->sadb_ext_len;
1509                 }
1510 
1511                 ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1512         }
1513 
1514         ASSERT((uint8_t *)ext == msgend);
1515 
1516         if (target != NULL) {
1517                 copylen = ((uint8_t *)ext) - (target +
1518                     SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1519                 if (copylen != 0)
1520                         ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1521         }
1522 
1523         /* Adjust samsg. */
1524         samsg->sadb_msg_len = (uint16_t)sofar;
1525 
1526         /* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1527 }
1528 
1529 /*
1530  * AH needs to send an error to PF_KEY.  Assume mp points to an M_CTL
1531  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1532  * the sending keysock instance is included.
1533  */
1534 void
1535 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1536     uint_t serial)
1537 {
1538         mblk_t *msg = mp->b_cont;
1539         sadb_msg_t *samsg;
1540         keysock_out_t *kso;
1541 
1542         /*
1543          * Enough functions call this to merit a NULL queue check.
1544          */
1545         if (pfkey_q == NULL) {
1546                 freemsg(mp);
1547                 return;
1548         }
1549 
1550         ASSERT(msg != NULL);
1551         ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1552         ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1553         samsg = (sadb_msg_t *)msg->b_rptr;
1554         kso = (keysock_out_t *)mp->b_rptr;
1555 
1556         kso->ks_out_type = KEYSOCK_OUT;
1557         kso->ks_out_len = sizeof (*kso);
1558         kso->ks_out_serial = serial;
1559 
1560         /*
1561          * Only send the base message up in the event of an error.
1562          * Don't worry about bzero()-ing, because it was probably bogus
1563          * anyway.
1564          */
1565         msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1566         samsg = (sadb_msg_t *)msg->b_rptr;
1567         samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1568         samsg->sadb_msg_errno = (uint8_t)error;
1569         if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1570                 samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1571 
1572         putnext(pfkey_q, mp);
1573 }
1574 
1575 /*
1576  * Send a successful return packet back to keysock via the queue in pfkey_q.
1577  *
1578  * Often, an SA is associated with the reply message, it's passed in if needed,
1579  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1580  * and the caller will release said refcnt.
1581  */
1582 void
1583 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1584     keysock_in_t *ksi, ipsa_t *ipsa)
1585 {
1586         keysock_out_t *kso;
1587         mblk_t *mp1;
1588         sadb_msg_t *newsamsg;
1589         uint8_t *oldend;
1590 
1591         ASSERT((mp->b_cont != NULL) &&
1592             ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1593             ((void *)mp->b_rptr == (void *)ksi));
1594 
1595         switch (samsg->sadb_msg_type) {
1596         case SADB_ADD:
1597         case SADB_UPDATE:
1598         case SADB_X_UPDATEPAIR:
1599         case SADB_FLUSH:
1600         case SADB_DUMP:
1601                 /*
1602                  * I have all of the message already.  I just need to strip
1603                  * out the keying material and echo the message back.
1604                  *
1605                  * NOTE: for SADB_DUMP, the function sadb_dump() did the
1606                  * work.  When DUMP reaches here, it should only be a base
1607                  * message.
1608                  */
1609         justecho:
1610                 ASSERT(samsg->sadb_msg_type != SADB_DUMP ||
1611                     samsg->sadb_msg_len == SADB_8TO64(sizeof (sadb_msg_t)));
1612                 if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1613                     ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
1614                         sadb_strip(samsg);
1615                         /* Assume PF_KEY message is contiguous. */
1616                         ASSERT(mp->b_cont->b_cont == NULL);
1617                         oldend = mp->b_cont->b_wptr;
1618                         mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1619                             SADB_64TO8(samsg->sadb_msg_len);
1620                         bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1621                 }
1622                 break;
1623         case SADB_GET:
1624                 /*
1625                  * Do a lot of work here, because of the ipsa I just found.
1626                  * First construct the new PF_KEY message, then abandon
1627                  * the old one.
1628                  */
1629                 mp1 = sadb_sa2msg(ipsa, samsg);
1630                 if (mp1 == NULL) {
1631                         sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1632                             SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1633                         return;
1634                 }
1635                 freemsg(mp->b_cont);
1636                 mp->b_cont = mp1;
1637                 break;
1638         case SADB_DELETE:
1639         case SADB_X_DELPAIR:
1640                 if (ipsa == NULL)
1641                         goto justecho;
1642                 /*
1643                  * Because listening KMds may require more info, treat
1644                  * DELETE like a special case of GET.
1645                  */
1646                 mp1 = sadb_sa2msg(ipsa, samsg);
1647                 if (mp1 == NULL) {
1648                         sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1649                             SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1650                         return;
1651                 }
1652                 newsamsg = (sadb_msg_t *)mp1->b_rptr;
1653                 sadb_strip(newsamsg);
1654                 oldend = mp1->b_wptr;
1655                 mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1656                 bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1657                 freemsg(mp->b_cont);
1658                 mp->b_cont = mp1;
1659                 break;
1660         default:
1661                 if (mp != NULL)
1662                         freemsg(mp);
1663                 return;
1664         }
1665 
1666         /* ksi is now null and void. */
1667         kso = (keysock_out_t *)ksi;
1668         kso->ks_out_type = KEYSOCK_OUT;
1669         kso->ks_out_len = sizeof (*kso);
1670         kso->ks_out_serial = ksi->ks_in_serial;
1671         /* We're ready to send... */
1672         putnext(pfkey_q, mp);
1673 }
1674 
1675 /*
1676  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1677  */
1678 void
1679 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1680     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
1681 {
1682         keysock_hello_ack_t *kha;
1683         queue_t *oldq;
1684 
1685         ASSERT(OTHERQ(q) != NULL);
1686 
1687         /*
1688          * First, check atomically that I'm the first and only keysock
1689          * instance.
1690          *
1691          * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1692          * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1693          * messages.
1694          */
1695 
1696         oldq = atomic_cas_ptr((void **)pfkey_qp, NULL, OTHERQ(q));
1697         if (oldq != NULL) {
1698                 ASSERT(oldq != q);
1699                 cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1700                     (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1701                 freemsg(mp);
1702                 return;
1703         }
1704 
1705         kha = (keysock_hello_ack_t *)mp->b_rptr;
1706         kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1707         kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1708         kha->ks_hello_satype = (uint8_t)satype;
1709 
1710         /*
1711          * If we made it past the atomic_cas_ptr, then we have "exclusive"
1712          * access to the timeout handle.  Fire it off after the default ager
1713          * interval.
1714          */
1715         *top = qtimeout(*pfkey_qp, ager, agerarg,
1716             drv_usectohz(SADB_AGE_INTERVAL_DEFAULT * 1000));
1717 
1718         putnext(*pfkey_qp, mp);
1719 }
1720 
1721 /*
1722  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1723  *
1724  * Check addresses themselves for wildcard or multicast.
1725  * Check ire table for local/non-local/broadcast.
1726  */
1727 int
1728 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
1729     netstack_t *ns)
1730 {
1731         sadb_address_t *addr = (sadb_address_t *)ext;
1732         struct sockaddr_in *sin;
1733         struct sockaddr_in6 *sin6;
1734         int diagnostic, type;
1735         boolean_t normalized = B_FALSE;
1736 
1737         ASSERT(ext != NULL);
1738         ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
1739             (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
1740             (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
1741             (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
1742             (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
1743             (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
1744 
1745         /* Assign both sockaddrs, the compiler will do the right thing. */
1746         sin = (struct sockaddr_in *)(addr + 1);
1747         sin6 = (struct sockaddr_in6 *)(addr + 1);
1748 
1749         if (sin6->sin6_family == AF_INET6) {
1750                 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1751                         /*
1752                          * Convert to an AF_INET sockaddr.  This means the
1753                          * return messages will have the extra space, but have
1754                          * AF_INET sockaddrs instead of AF_INET6.
1755                          *
1756                          * Yes, RFC 2367 isn't clear on what to do here w.r.t.
1757                          * mapped addresses, but since AF_INET6 ::ffff:<v4> is
1758                          * equal to AF_INET <v4>, it shouldnt be a huge
1759                          * problem.
1760                          */
1761                         sin->sin_family = AF_INET;
1762                         IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
1763                             &sin->sin_addr);
1764                         bzero(&sin->sin_zero, sizeof (sin->sin_zero));
1765                         normalized = B_TRUE;
1766                 }
1767         } else if (sin->sin_family != AF_INET) {
1768                 switch (ext->sadb_ext_type) {
1769                 case SADB_EXT_ADDRESS_SRC:
1770                         diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
1771                         break;
1772                 case SADB_EXT_ADDRESS_DST:
1773                         diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
1774                         break;
1775                 case SADB_X_EXT_ADDRESS_INNER_SRC:
1776                         diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
1777                         break;
1778                 case SADB_X_EXT_ADDRESS_INNER_DST:
1779                         diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
1780                         break;
1781                 case SADB_X_EXT_ADDRESS_NATT_LOC:
1782                         diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
1783                         break;
1784                 case SADB_X_EXT_ADDRESS_NATT_REM:
1785                         diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
1786                         break;
1787                         /* There is no default, see above ASSERT. */
1788                 }
1789 bail:
1790                 if (pfkey_q != NULL) {
1791                         sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
1792                             serial);
1793                 } else {
1794                         /*
1795                          * Scribble in sadb_msg that we got passed in.
1796                          * Overload "mp" to be an sadb_msg pointer.
1797                          */
1798                         sadb_msg_t *samsg = (sadb_msg_t *)mp;
1799 
1800                         samsg->sadb_msg_errno = EINVAL;
1801                         samsg->sadb_x_msg_diagnostic = diagnostic;
1802                 }
1803                 return (KS_IN_ADDR_UNKNOWN);
1804         }
1805 
1806         if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
1807             ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
1808                 /*
1809                  * We need only check for prefix issues.
1810                  */
1811 
1812                 /* Set diagnostic now, in case we need it later. */
1813                 diagnostic =
1814                     (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
1815                     SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
1816                     SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
1817 
1818                 if (normalized)
1819                         addr->sadb_address_prefixlen -= 96;
1820 
1821                 /*
1822                  * Verify and mask out inner-addresses based on prefix length.
1823                  */
1824                 if (sin->sin_family == AF_INET) {
1825                         if (addr->sadb_address_prefixlen > 32)
1826                                 goto bail;
1827                         sin->sin_addr.s_addr &=
1828                             ip_plen_to_mask(addr->sadb_address_prefixlen);
1829                 } else {
1830                         in6_addr_t mask;
1831 
1832                         ASSERT(sin->sin_family == AF_INET6);
1833                         /*
1834                          * ip_plen_to_mask_v6() returns NULL if the value in
1835                          * question is out of range.
1836                          */
1837                         if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
1838                             &mask) == NULL)
1839                                 goto bail;
1840                         sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
1841                         sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
1842                         sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
1843                         sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
1844                 }
1845 
1846                 /* We don't care in these cases. */
1847                 return (KS_IN_ADDR_DONTCARE);
1848         }
1849 
1850         if (sin->sin_family == AF_INET6) {
1851                 /* Check the easy ones now. */
1852                 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
1853                         return (KS_IN_ADDR_MBCAST);
1854                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
1855                         return (KS_IN_ADDR_UNSPEC);
1856                 /*
1857                  * At this point, we're a unicast IPv6 address.
1858                  *
1859                  * XXX Zones alert -> me/notme decision needs to be tempered
1860                  * by what zone we're in when we go to zone-aware IPsec.
1861                  */
1862                 if (ip_type_v6(&sin6->sin6_addr, ns->netstack_ip) ==
1863                     IRE_LOCAL) {
1864                         /* Hey hey, it's local. */
1865                         return (KS_IN_ADDR_ME);
1866                 }
1867         } else {
1868                 ASSERT(sin->sin_family == AF_INET);
1869                 if (sin->sin_addr.s_addr == INADDR_ANY)
1870                         return (KS_IN_ADDR_UNSPEC);
1871                 if (CLASSD(sin->sin_addr.s_addr))
1872                         return (KS_IN_ADDR_MBCAST);
1873                 /*
1874                  * At this point we're a unicast or broadcast IPv4 address.
1875                  *
1876                  * Check if the address is IRE_BROADCAST or IRE_LOCAL.
1877                  *
1878                  * XXX Zones alert -> me/notme decision needs to be tempered
1879                  * by what zone we're in when we go to zone-aware IPsec.
1880                  */
1881                 type = ip_type_v4(sin->sin_addr.s_addr, ns->netstack_ip);
1882                 switch (type) {
1883                 case IRE_LOCAL:
1884                         return (KS_IN_ADDR_ME);
1885                 case IRE_BROADCAST:
1886                         return (KS_IN_ADDR_MBCAST);
1887                 }
1888         }
1889 
1890         return (KS_IN_ADDR_NOTME);
1891 }
1892 
1893 /*
1894  * Address normalizations and reality checks for inbound PF_KEY messages.
1895  *
1896  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
1897  * the source to AF_INET.  Do the same for the inner sources.
1898  */
1899 boolean_t
1900 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
1901 {
1902         struct sockaddr_in *src, *isrc;
1903         struct sockaddr_in6 *dst, *idst;
1904         sadb_address_t *srcext, *dstext;
1905         uint16_t sport;
1906         sadb_ext_t **extv = ksi->ks_in_extv;
1907         int rc;
1908 
1909         if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
1910                 rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
1911                     ksi->ks_in_serial, ns);
1912                 if (rc == KS_IN_ADDR_UNKNOWN)
1913                         return (B_FALSE);
1914                 if (rc == KS_IN_ADDR_MBCAST) {
1915                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
1916                             SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
1917                         return (B_FALSE);
1918                 }
1919                 ksi->ks_in_srctype = rc;
1920         }
1921 
1922         if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
1923                 rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
1924                     ksi->ks_in_serial, ns);
1925                 if (rc == KS_IN_ADDR_UNKNOWN)
1926                         return (B_FALSE);
1927                 if (rc == KS_IN_ADDR_UNSPEC) {
1928                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
1929                             SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
1930                         return (B_FALSE);
1931                 }
1932                 ksi->ks_in_dsttype = rc;
1933         }
1934 
1935         /*
1936          * NAT-Traversal addrs are simple enough to not require all of
1937          * the checks in sadb_addrcheck().  Just normalize or reject if not
1938          * AF_INET.
1939          */
1940         if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
1941                 rc = sadb_addrcheck(pfkey_q, mp,
1942                     extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
1943 
1944                 /*
1945                  * Local NAT-T addresses never use an IRE_LOCAL, so it should
1946                  * always be NOTME, or UNSPEC (to handle both tunnel mode
1947                  * AND local-port flexibility).
1948                  */
1949                 if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
1950                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
1951                             SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
1952                             ksi->ks_in_serial);
1953                         return (B_FALSE);
1954                 }
1955                 src = (struct sockaddr_in *)
1956                     (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
1957                 if (src->sin_family != AF_INET) {
1958                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
1959                             SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
1960                             ksi->ks_in_serial);
1961                         return (B_FALSE);
1962                 }
1963         }
1964 
1965         if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
1966                 rc = sadb_addrcheck(pfkey_q, mp,
1967                     extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
1968 
1969                 /*
1970                  * Remote NAT-T addresses never use an IRE_LOCAL, so it should
1971                  * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
1972                  */
1973                 if (rc != KS_IN_ADDR_NOTME &&
1974                     !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
1975                     rc == KS_IN_ADDR_UNSPEC)) {
1976                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
1977                             SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
1978                             ksi->ks_in_serial);
1979                         return (B_FALSE);
1980                 }
1981                 src = (struct sockaddr_in *)
1982                     (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
1983                 if (src->sin_family != AF_INET) {
1984                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
1985                             SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
1986                             ksi->ks_in_serial);
1987                         return (B_FALSE);
1988                 }
1989         }
1990 
1991         if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
1992                 if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
1993                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
1994                             SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
1995                             ksi->ks_in_serial);
1996                         return (B_FALSE);
1997                 }
1998 
1999                 if (sadb_addrcheck(pfkey_q, mp,
2000                     extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2001                     == KS_IN_ADDR_UNKNOWN ||
2002                     sadb_addrcheck(pfkey_q, mp,
2003                     extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2004                     == KS_IN_ADDR_UNKNOWN)
2005                         return (B_FALSE);
2006 
2007                 isrc = (struct sockaddr_in *)
2008                     (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2009                     1);
2010                 idst = (struct sockaddr_in6 *)
2011                     (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2012                     1);
2013                 if (isrc->sin_family != idst->sin6_family) {
2014                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
2015                             SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2016                             ksi->ks_in_serial);
2017                         return (B_FALSE);
2018                 }
2019         } else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2020                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
2021                             SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2022                             ksi->ks_in_serial);
2023                         return (B_FALSE);
2024         } else {
2025                 isrc = NULL;    /* For inner/outer port check below. */
2026         }
2027 
2028         dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2029         srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2030 
2031         if (dstext == NULL || srcext == NULL)
2032                 return (B_TRUE);
2033 
2034         dst = (struct sockaddr_in6 *)(dstext + 1);
2035         src = (struct sockaddr_in *)(srcext + 1);
2036 
2037         if (isrc != NULL &&
2038             (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2039             (src->sin_port != 0 || dst->sin6_port != 0)) {
2040                 /* Can't set inner and outer ports in one SA. */
2041                 sadb_pfkey_error(pfkey_q, mp, EINVAL,
2042                     SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2043                     ksi->ks_in_serial);
2044                 return (B_FALSE);
2045         }
2046 
2047         if (dst->sin6_family == src->sin_family)
2048                 return (B_TRUE);
2049 
2050         if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2051                 if (srcext->sadb_address_proto == 0) {
2052                         srcext->sadb_address_proto = dstext->sadb_address_proto;
2053                 } else if (dstext->sadb_address_proto == 0) {
2054                         dstext->sadb_address_proto = srcext->sadb_address_proto;
2055                 } else {
2056                         /* Inequal protocols, neither were 0.  Report error. */
2057                         sadb_pfkey_error(pfkey_q, mp, EINVAL,
2058                             SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2059                             ksi->ks_in_serial);
2060                         return (B_FALSE);
2061                 }
2062         }
2063 
2064         /*
2065          * With the exception of an unspec IPv6 source and an IPv4
2066          * destination, address families MUST me matched.
2067          */
2068         if (src->sin_family == AF_INET ||
2069             ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2070                 sadb_pfkey_error(pfkey_q, mp, EINVAL,
2071                     SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2072                 return (B_FALSE);
2073         }
2074 
2075         /*
2076          * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2077          * in the same place for sockaddr_in and sockaddr_in6.
2078          */
2079         sport = src->sin_port;
2080         bzero(src, sizeof (*src));
2081         src->sin_family = AF_INET;
2082         src->sin_port = sport;
2083 
2084         return (B_TRUE);
2085 }
2086 
2087 /*
2088  * Set the results in "addrtype", given an IRE as requested by
2089  * sadb_addrcheck().
2090  */
2091 int
2092 sadb_addrset(ire_t *ire)
2093 {
2094         if ((ire->ire_type & IRE_BROADCAST) ||
2095             (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2096             (ire->ire_ipversion == IPV6_VERSION &&
2097             IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2098                 return (KS_IN_ADDR_MBCAST);
2099         if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2100                 return (KS_IN_ADDR_ME);
2101         return (KS_IN_ADDR_NOTME);
2102 }
2103 
2104 /*
2105  * Match primitives..
2106  * !!! TODO: short term: inner selectors
2107  *              ipv6 scope id (ifindex)
2108  * longer term:  zone id.  sensitivity label. uid.
2109  */
2110 boolean_t
2111 sadb_match_spi(ipsa_query_t *sq, ipsa_t *sa)
2112 {
2113         return (sq->spi == sa->ipsa_spi);
2114 }
2115 
2116 boolean_t
2117 sadb_match_dst_v6(ipsa_query_t *sq, ipsa_t *sa)
2118 {
2119         return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_dstaddr, sq->dstaddr, AF_INET6));
2120 }
2121 
2122 boolean_t
2123 sadb_match_src_v6(ipsa_query_t *sq, ipsa_t *sa)
2124 {
2125         return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_srcaddr, sq->srcaddr, AF_INET6));
2126 }
2127 
2128 boolean_t
2129 sadb_match_dst_v4(ipsa_query_t *sq, ipsa_t *sa)
2130 {
2131         return (sq->dstaddr[0] == sa->ipsa_dstaddr[0]);
2132 }
2133 
2134 boolean_t
2135 sadb_match_src_v4(ipsa_query_t *sq, ipsa_t *sa)
2136 {
2137         return (sq->srcaddr[0] == sa->ipsa_srcaddr[0]);
2138 }
2139 
2140 boolean_t
2141 sadb_match_dstid(ipsa_query_t *sq, ipsa_t *sa)
2142 {
2143         return ((sa->ipsa_dst_cid != NULL) &&
2144             (sq->didtype == sa->ipsa_dst_cid->ipsid_type) &&
2145             (strcmp(sq->didstr, sa->ipsa_dst_cid->ipsid_cid) == 0));
2146 
2147 }
2148 boolean_t
2149 sadb_match_srcid(ipsa_query_t *sq, ipsa_t *sa)
2150 {
2151         return ((sa->ipsa_src_cid != NULL) &&
2152             (sq->sidtype == sa->ipsa_src_cid->ipsid_type) &&
2153             (strcmp(sq->sidstr, sa->ipsa_src_cid->ipsid_cid) == 0));
2154 }
2155 
2156 boolean_t
2157 sadb_match_kmc(ipsa_query_t *sq, ipsa_t *sa)
2158 {
2159 #define M(a, b) (((a) == 0) || ((b) == 0) || ((a) == (b)))
2160 
2161         return (M(sq->kmc, sa->ipsa_kmc) && M(sq->kmp, sa->ipsa_kmp));
2162 
2163 #undef M
2164 }
2165 
2166 /*
2167  * Common function which extracts several PF_KEY extensions for ease of
2168  * SADB matching.
2169  *
2170  * XXX TODO: weed out ipsa_query_t fields not used during matching
2171  * or afterwards?
2172  */
2173 int
2174 sadb_form_query(keysock_in_t *ksi, uint32_t req, uint32_t match,
2175     ipsa_query_t *sq, int *diagnostic)
2176 {
2177         int i;
2178         ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2179 
2180         for (i = 0; i < IPSA_NMATCH; i++)
2181                 sq->matchers[i] = NULL;
2182 
2183         ASSERT((req & ~match) == 0);
2184 
2185         sq->req = req;
2186         sq->dstext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2187         sq->srcext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2188         sq->assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2189 
2190         if ((req & IPSA_Q_DST) && (sq->dstext == NULL)) {
2191                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2192                 return (EINVAL);
2193         }
2194         if ((req & IPSA_Q_SRC) && (sq->srcext == NULL)) {
2195                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2196                 return (EINVAL);
2197         }
2198         if ((req & IPSA_Q_SA) && (sq->assoc == NULL)) {
2199                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2200                 return (EINVAL);
2201         }
2202 
2203         if (match & IPSA_Q_SA) {
2204                 *mfpp++ = sadb_match_spi;
2205                 sq->spi = sq->assoc->sadb_sa_spi;
2206         }
2207 
2208         if (sq->dstext != NULL)
2209                 sq->dst = (struct sockaddr_in *)(sq->dstext + 1);
2210         else {
2211                 sq->dst = NULL;
2212                 sq->dst6 = NULL;
2213                 sq->dstaddr = NULL;
2214         }
2215 
2216         if (sq->srcext != NULL)
2217                 sq->src = (struct sockaddr_in *)(sq->srcext + 1);
2218         else {
2219                 sq->src = NULL;
2220                 sq->src6 = NULL;
2221                 sq->srcaddr = NULL;
2222         }
2223 
2224         if (sq->dst != NULL)
2225                 sq->af = sq->dst->sin_family;
2226         else if (sq->src != NULL)
2227                 sq->af = sq->src->sin_family;
2228         else
2229                 sq->af = AF_INET;
2230 
2231         if (sq->af == AF_INET6) {
2232                 if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2233                         *mfpp++ = sadb_match_dst_v6;
2234                         sq->dst6 = (struct sockaddr_in6 *)sq->dst;
2235                         sq->dstaddr = (uint32_t *)&(sq->dst6->sin6_addr);
2236                 } else {
2237                         match &= ~IPSA_Q_DST;
2238                         sq->dstaddr = ALL_ZEROES_PTR;
2239                 }
2240 
2241                 if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2242                         sq->src6 = (struct sockaddr_in6 *)(sq->srcext + 1);
2243                         sq->srcaddr = (uint32_t *)&sq->src6->sin6_addr;
2244                         if (sq->src6->sin6_family != AF_INET6) {
2245                                 *diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2246                                 return (EINVAL);
2247                         }
2248                         *mfpp++ = sadb_match_src_v6;
2249                 } else {
2250                         match &= ~IPSA_Q_SRC;
2251                         sq->srcaddr = ALL_ZEROES_PTR;
2252                 }
2253         } else {
2254                 sq->src6 = sq->dst6 = NULL;
2255                 if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2256                         *mfpp++ = sadb_match_dst_v4;
2257                         sq->dstaddr = (uint32_t *)&sq->dst->sin_addr;
2258                 } else {
2259                         match &= ~IPSA_Q_DST;
2260                         sq->dstaddr = ALL_ZEROES_PTR;
2261                 }
2262                 if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2263                         sq->srcaddr = (uint32_t *)&sq->src->sin_addr;
2264                         if (sq->src->sin_family != AF_INET) {
2265                                 *diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2266                                 return (EINVAL);
2267                         }
2268                         *mfpp++ = sadb_match_src_v4;
2269                 } else {
2270                         match &= ~IPSA_Q_SRC;
2271                         sq->srcaddr = ALL_ZEROES_PTR;
2272                 }
2273         }
2274 
2275         sq->dstid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2276         if ((match & IPSA_Q_DSTID) && (sq->dstid != NULL)) {
2277                 sq->didstr = (char *)(sq->dstid + 1);
2278                 sq->didtype = sq->dstid->sadb_ident_type;
2279                 *mfpp++ = sadb_match_dstid;
2280         }
2281 
2282         sq->srcid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2283 
2284         if ((match & IPSA_Q_SRCID) && (sq->srcid != NULL)) {
2285                 sq->sidstr = (char *)(sq->srcid + 1);
2286                 sq->sidtype = sq->srcid->sadb_ident_type;
2287                 *mfpp++ = sadb_match_srcid;
2288         }
2289 
2290         sq->kmcext = (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2291         sq->kmc = 0;
2292         sq->kmp = 0;
2293 
2294         if ((match & IPSA_Q_KMC) && (sq->kmcext)) {
2295                 sq->kmc = sq->kmcext->sadb_x_kmc_cookie;
2296                 sq->kmp = sq->kmcext->sadb_x_kmc_proto;
2297                 *mfpp++ = sadb_match_kmc;
2298         }
2299 
2300         if (match & (IPSA_Q_INBOUND|IPSA_Q_OUTBOUND)) {
2301                 if (sq->af == AF_INET6)
2302                         sq->sp = &sq->spp->s_v6;
2303                 else
2304                         sq->sp = &sq->spp->s_v4;
2305         } else {
2306                 sq->sp = NULL;
2307         }
2308 
2309         if (match & IPSA_Q_INBOUND) {
2310                 sq->inhash = INBOUND_HASH(sq->sp, sq->assoc->sadb_sa_spi);
2311                 sq->inbound = &sq->sp->sdb_if[sq->inhash];
2312         } else {
2313                 sq->inhash = 0;
2314                 sq->inbound = NULL;
2315         }
2316 
2317         if (match & IPSA_Q_OUTBOUND) {
2318                 if (sq->af == AF_INET6) {
2319                         sq->outhash = OUTBOUND_HASH_V6(sq->sp, *(sq->dstaddr));
2320                 } else {
2321                         sq->outhash = OUTBOUND_HASH_V4(sq->sp, *(sq->dstaddr));
2322                 }
2323                 sq->outbound = &sq->sp->sdb_of[sq->outhash];
2324         } else {
2325                 sq->outhash = 0;
2326                 sq->outbound = NULL;
2327         }
2328         sq->match = match;
2329         return (0);
2330 }
2331 
2332 /*
2333  * Match an initialized query structure with a security association;
2334  * return B_TRUE on a match, B_FALSE on a miss.
2335  * Applies match functions set up by sadb_form_query() until one returns false.
2336  */
2337 boolean_t
2338 sadb_match_query(ipsa_query_t *sq, ipsa_t *sa)
2339 {
2340         ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2341         ipsa_match_fn_t mfp;
2342 
2343         for (mfp = *mfpp++; mfp != NULL; mfp = *mfpp++) {
2344                 if (!mfp(sq, sa))
2345                         return (B_FALSE);
2346         }
2347         return (B_TRUE);
2348 }
2349 
2350 /*
2351  * Walker callback function to delete sa's based on src/dst address.
2352  * Assumes that we're called with *head locked, no other locks held;
2353  * Conveniently, and not coincidentally, this is both what sadb_walker
2354  * gives us and also what sadb_unlinkassoc expects.
2355  */
2356 static void
2357 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2358 {
2359         ipsa_query_t *query = (ipsa_query_t *)cookie;
2360 
2361         ASSERT(MUTEX_HELD(&head->isaf_lock));
2362 
2363         mutex_enter(&entry->ipsa_lock);
2364 
2365         if (entry->ipsa_state == IPSA_STATE_LARVAL ||
2366             !sadb_match_query(query, entry)) {
2367                 mutex_exit(&entry->ipsa_lock);
2368                 return;
2369         }
2370 
2371         entry->ipsa_state = IPSA_STATE_DEAD;
2372         (void) sadb_torch_assoc(head, entry);
2373 }
2374 
2375 /*
2376  * Common code to purge an SA with a matching src or dst address.
2377  * Don't kill larval SA's in such a purge.
2378  */
2379 int
2380 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp,
2381         int *diagnostic, queue_t *pfkey_q)
2382 {
2383         ipsa_query_t query;
2384         int error = sadb_form_query(ksi, 0,
2385             IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2386             &query, diagnostic);
2387 
2388         if (error != 0)
2389                 return (error);
2390 
2391         /*
2392          * This is simple, crude, and effective.
2393          * Unimplemented optimizations (TBD):
2394          * - we can limit how many places we search based on where we
2395          * think the SA is filed.
2396          * - if we get a dst address, we can hash based on dst addr to find
2397          * the correct bucket in the outbound table.
2398          */
2399         sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &query);
2400         sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &query);
2401 
2402         ASSERT(mp->b_cont != NULL);
2403         sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2404             NULL);
2405         return (0);
2406 }
2407 
2408 /*
2409  * Common code to delete/get an SA.
2410  */
2411 int
2412 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2413     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2414 {
2415         ipsa_query_t sq;
2416         ipsa_t *echo_target = NULL;
2417         ipsap_t ipsapp;
2418         uint_t  error = 0;
2419 
2420         sq.spp = spp;           /* XXX param */
2421         error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SA,
2422             IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
2423             &sq, diagnostic);
2424         if (error != 0)
2425                 return (error);
2426 
2427         error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
2428         if (error != 0) {
2429                 return (error);
2430         }
2431 
2432         echo_target = ipsapp.ipsap_sa_ptr;
2433         if (echo_target == NULL)
2434                 echo_target = ipsapp.ipsap_psa_ptr;
2435 
2436         if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2437                 /*
2438                  * Bucket locks will be required if SA is actually unlinked.
2439                  * get_ipsa_pair() returns valid hash bucket pointers even
2440                  * if it can't find a pair SA pointer. To prevent a potential
2441                  * deadlock, always lock the outbound bucket before the inbound.
2442                  */
2443                 if (ipsapp.in_inbound_table) {
2444                         mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2445                         mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2446                 } else {
2447                         mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2448                         mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2449                 }
2450 
2451                 if (ipsapp.ipsap_sa_ptr != NULL) {
2452                         mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
2453                         ipsapp.ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2454                         (void) sadb_torch_assoc(ipsapp.ipsap_bucket,
2455                             ipsapp.ipsap_sa_ptr);
2456                         /*
2457                          * sadb_torch_assoc() releases the ipsa_lock
2458                          * and calls sadb_unlinkassoc() which does a
2459                          * IPSA_REFRELE.
2460                          */
2461                 }
2462                 if (ipsapp.ipsap_psa_ptr != NULL) {
2463                         mutex_enter(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2464                         if (sadb_msg_type == SADB_X_DELPAIR ||
2465                             ipsapp.ipsap_psa_ptr->ipsa_haspeer) {
2466                                 ipsapp.ipsap_psa_ptr->ipsa_state =
2467                                     IPSA_STATE_DEAD;
2468                                 (void) sadb_torch_assoc(ipsapp.ipsap_pbucket,
2469                                     ipsapp.ipsap_psa_ptr);
2470                         } else {
2471                                 /*
2472                                  * Only half of the "pair" has been deleted.
2473                                  * Update the remaining SA and remove references
2474                                  * to its pair SA, which is now gone.
2475                                  */
2476                                 ipsapp.ipsap_psa_ptr->ipsa_otherspi = 0;
2477                                 ipsapp.ipsap_psa_ptr->ipsa_flags &=
2478                                     ~IPSA_F_PAIRED;
2479                                 mutex_exit(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2480                         }
2481                 } else if (sadb_msg_type == SADB_X_DELPAIR) {
2482                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2483                         error = ESRCH;
2484                 }
2485                 mutex_exit(&ipsapp.ipsap_bucket->isaf_lock);
2486                 mutex_exit(&ipsapp.ipsap_pbucket->isaf_lock);
2487         }
2488 
2489         ASSERT(mp->b_cont != NULL);
2490 
2491         if (error == 0)
2492                 sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2493                     mp->b_cont->b_rptr, ksi, echo_target);
2494 
2495         destroy_ipsa_pair(&ipsapp);
2496 
2497         return (error);
2498 }
2499 
2500 /*
2501  * This function takes a sadb_sa_t and finds the ipsa_t structure
2502  * and the isaf_t (hash bucket) that its stored under. If the security
2503  * association has a peer, the ipsa_t structure and bucket for that security
2504  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2505  * are returned as a ipsap_t.
2506  *
2507  * The hash buckets are returned for convenience, if the calling function
2508  * needs to use the hash bucket locks, say to remove the SA's, it should
2509  * take care to observe the convention of locking outbound bucket then
2510  * inbound bucket. The flag in_inbound_table provides direction.
2511  *
2512  * Note that a "pair" is defined as one (but not both) of the following:
2513  *
2514  * A security association which has a soft reference to another security
2515  * association via its SPI.
2516  *
2517  * A security association that is not obviously "inbound" or "outbound" so
2518  * it appears in both hash tables, the "peer" being the same security
2519  * association in the other hash table.
2520  *
2521  * This function will return NULL if the ipsa_t can't be found in the
2522  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2523  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2524  * provided at least one ipsa_t is found.
2525  */
2526 static int
2527 get_ipsa_pair(ipsa_query_t *sq, ipsap_t *ipsapp, int *diagnostic)
2528 {
2529         uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2530         uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2531         uint32_t pair_spi;
2532 
2533         init_ipsa_pair(ipsapp);
2534 
2535         ipsapp->in_inbound_table = B_FALSE;
2536 
2537         /* Lock down both buckets. */
2538         mutex_enter(&sq->outbound->isaf_lock);
2539         mutex_enter(&sq->inbound->isaf_lock);
2540 
2541         if (sq->assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2542                 ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2543                     sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2544                 if (ipsapp->ipsap_sa_ptr != NULL) {
2545                         ipsapp->ipsap_bucket = sq->inbound;
2546                         ipsapp->ipsap_pbucket = sq->outbound;
2547                         ipsapp->in_inbound_table = B_TRUE;
2548                 } else {
2549                         ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->outbound,
2550                             sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2551                             sq->af);
2552                         ipsapp->ipsap_bucket = sq->outbound;
2553                         ipsapp->ipsap_pbucket = sq->inbound;
2554                 }
2555         } else {
2556                 /* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2557                 ipsapp->ipsap_sa_ptr =
2558                     ipsec_getassocbyspi(sq->outbound,
2559                     sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2560                 if (ipsapp->ipsap_sa_ptr != NULL) {
2561                         ipsapp->ipsap_bucket = sq->outbound;
2562                         ipsapp->ipsap_pbucket = sq->inbound;
2563                 } else {
2564                         ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2565                             sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2566                             sq->af);
2567                         ipsapp->ipsap_bucket = sq->inbound;
2568                         ipsapp->ipsap_pbucket = sq->outbound;
2569                         if (ipsapp->ipsap_sa_ptr != NULL)
2570                                 ipsapp->in_inbound_table = B_TRUE;
2571                 }
2572         }
2573 
2574         if (ipsapp->ipsap_sa_ptr == NULL) {
2575                 mutex_exit(&sq->outbound->isaf_lock);
2576                 mutex_exit(&sq->inbound->isaf_lock);
2577                 *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2578                 return (ESRCH);
2579         }
2580 
2581         if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2582             ipsapp->in_inbound_table) {
2583                 mutex_exit(&sq->outbound->isaf_lock);
2584                 mutex_exit(&sq->inbound->isaf_lock);
2585                 return (0);
2586         }
2587 
2588         mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2589         if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2590                 /*
2591                  * haspeer implies no sa_pairing, look for same spi
2592                  * in other hashtable.
2593                  */
2594                 ipsapp->ipsap_psa_ptr =
2595                     ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2596                     sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2597                 mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2598                 mutex_exit(&sq->outbound->isaf_lock);
2599                 mutex_exit(&sq->inbound->isaf_lock);
2600                 return (0);
2601         }
2602         pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2603         IPSA_COPY_ADDR(&pair_srcaddr,
2604             ipsapp->ipsap_sa_ptr->ipsa_srcaddr, sq->af);
2605         IPSA_COPY_ADDR(&pair_dstaddr,
2606             ipsapp->ipsap_sa_ptr->ipsa_dstaddr, sq->af);
2607         mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2608         mutex_exit(&sq->inbound->isaf_lock);
2609         mutex_exit(&sq->outbound->isaf_lock);
2610 
2611         if (pair_spi == 0) {
2612                 ASSERT(ipsapp->ipsap_bucket != NULL);
2613                 ASSERT(ipsapp->ipsap_pbucket != NULL);
2614                 return (0);
2615         }
2616 
2617         /* found sa in outbound sadb, peer should be inbound */
2618 
2619         if (ipsapp->in_inbound_table) {
2620                 /* Found SA in inbound table, pair will be in outbound. */
2621                 if (sq->af == AF_INET6) {
2622                         ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sq->sp,
2623                             *(uint32_t *)pair_srcaddr);
2624                 } else {
2625                         ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sq->sp,
2626                             *(uint32_t *)pair_srcaddr);
2627                 }
2628         } else {
2629                 ipsapp->ipsap_pbucket = INBOUND_BUCKET(sq->sp, pair_spi);
2630         }
2631         mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2632         ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2633             pair_spi, pair_dstaddr, pair_srcaddr, sq->af);
2634         mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2635         ASSERT(ipsapp->ipsap_bucket != NULL);
2636         ASSERT(ipsapp->ipsap_pbucket != NULL);
2637         return (0);
2638 }
2639 
2640 /*
2641  * Perform NAT-traversal cached checksum offset calculations here.
2642  */
2643 static void
2644 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2645     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2646     uint32_t *dst_addr_ptr)
2647 {
2648         struct sockaddr_in *natt_loc, *natt_rem;
2649         uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2650         uint32_t running_sum = 0;
2651 
2652 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) +   ((x) >> 16)
2653 
2654         if (natt_rem_ext != NULL) {
2655                 uint32_t l_src;
2656                 uint32_t l_rem;
2657 
2658                 natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2659 
2660                 /* Ensured by sadb_addrfix(). */
2661                 ASSERT(natt_rem->sin_family == AF_INET);
2662 
2663                 natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2664                 newbie->ipsa_remote_nat_port = natt_rem->sin_port;
2665                 l_src = *src_addr_ptr;
2666                 l_rem = *natt_rem_ptr;
2667 
2668                 /* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2669                 newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
2670 
2671                 l_src = ntohl(l_src);
2672                 DOWN_SUM(l_src);
2673                 DOWN_SUM(l_src);
2674                 l_rem = ntohl(l_rem);
2675                 DOWN_SUM(l_rem);
2676                 DOWN_SUM(l_rem);
2677 
2678                 /*
2679                  * We're 1's complement for checksums, so check for wraparound
2680                  * here.
2681                  */
2682                 if (l_rem > l_src)
2683                         l_src--;
2684 
2685                 running_sum += l_src - l_rem;
2686 
2687                 DOWN_SUM(running_sum);
2688                 DOWN_SUM(running_sum);
2689         }
2690 
2691         if (natt_loc_ext != NULL) {
2692                 natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2693 
2694                 /* Ensured by sadb_addrfix(). */
2695                 ASSERT(natt_loc->sin_family == AF_INET);
2696 
2697                 natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
2698                 newbie->ipsa_local_nat_port = natt_loc->sin_port;
2699 
2700                 /* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2701                 newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
2702 
2703                 /*
2704                  * NAT-T port agility means we may have natt_loc_ext, but
2705                  * only for a local-port change.
2706                  */
2707                 if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
2708                         uint32_t l_dst = ntohl(*dst_addr_ptr);
2709                         uint32_t l_loc = ntohl(*natt_loc_ptr);
2710 
2711                         DOWN_SUM(l_loc);
2712                         DOWN_SUM(l_loc);
2713                         DOWN_SUM(l_dst);
2714                         DOWN_SUM(l_dst);
2715 
2716                         /*
2717                          * We're 1's complement for checksums, so check for
2718                          * wraparound here.
2719                          */
2720                         if (l_loc > l_dst)
2721                                 l_dst--;
2722 
2723                         running_sum += l_dst - l_loc;
2724                         DOWN_SUM(running_sum);
2725                         DOWN_SUM(running_sum);
2726                 }
2727         }
2728 
2729         newbie->ipsa_inbound_cksum = running_sum;
2730 #undef DOWN_SUM
2731 }
2732 
2733 /*
2734  * This function is called from consumers that need to insert a fully-grown
2735  * security association into its tables.  This function takes into account that
2736  * SAs can be "inbound", "outbound", or "both".  The "primary" and "secondary"
2737  * hash bucket parameters are set in order of what the SA will be most of the
2738  * time.  (For example, an SA with an unspecified source, and a multicast
2739  * destination will primarily be an outbound SA.  OTOH, if that destination
2740  * is unicast for this node, then the SA will primarily be inbound.)
2741  *
2742  * It takes a lot of parameters because even if clone is B_FALSE, this needs
2743  * to check both buckets for purposes of collision.
2744  *
2745  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2746  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2747  * with additional diagnostic information because there is at least one EINVAL
2748  * case here.
2749  */
2750 int
2751 sadb_common_add(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2752     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2753     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
2754     netstack_t *ns, sadbp_t *spp)
2755 {
2756         ipsa_t *newbie_clone = NULL, *scratch;
2757         ipsap_t ipsapp;
2758         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2759         sadb_address_t *srcext =
2760             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2761         sadb_address_t *dstext =
2762             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2763         sadb_address_t *isrcext =
2764             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2765         sadb_address_t *idstext =
2766             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2767         sadb_x_kmc_t *kmcext =
2768             (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2769         sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2770         sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2771         sadb_sens_t *sens =
2772             (sadb_sens_t *)ksi->ks_in_extv[SADB_EXT_SENSITIVITY];
2773         sadb_sens_t *osens =
2774             (sadb_sens_t *)ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS];
2775         sadb_x_pair_t *pair_ext =
2776             (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
2777         sadb_x_replay_ctr_t *replayext =
2778             (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
2779         int salt_offset;
2780         uint8_t *buf_ptr;
2781         struct sockaddr_in *src, *dst, *isrc, *idst;
2782         struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
2783         sadb_lifetime_t *soft =
2784             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
2785         sadb_lifetime_t *hard =
2786             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
2787         sadb_lifetime_t *idle =
2788             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
2789         sa_family_t af;
2790         int error = 0;
2791         boolean_t isupdate = (newbie != NULL);
2792         uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
2793         ipsec_stack_t   *ipss = ns->netstack_ipsec;
2794         ip_stack_t      *ipst = ns->netstack_ip;
2795         ipsec_alginfo_t *alg;
2796         boolean_t       async = B_FALSE;
2797 
2798         init_ipsa_pair(&ipsapp);
2799 
2800         if (srcext == NULL) {
2801                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2802                 return (EINVAL);
2803         }
2804         if (dstext == NULL) {
2805                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2806                 return (EINVAL);
2807         }
2808         if (assoc == NULL) {
2809                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2810                 return (EINVAL);
2811         }
2812 
2813         src = (struct sockaddr_in *)(srcext + 1);
2814         src6 = (struct sockaddr_in6 *)(srcext + 1);
2815         dst = (struct sockaddr_in *)(dstext + 1);
2816         dst6 = (struct sockaddr_in6 *)(dstext + 1);
2817         if (isrcext != NULL) {
2818                 isrc = (struct sockaddr_in *)(isrcext + 1);
2819                 isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
2820                 ASSERT(idstext != NULL);
2821                 idst = (struct sockaddr_in *)(idstext + 1);
2822                 idst6 = (struct sockaddr_in6 *)(idstext + 1);
2823         } else {
2824                 isrc = NULL;
2825                 isrc6 = NULL;
2826         }
2827 
2828         af = src->sin_family;
2829 
2830         if (af == AF_INET) {
2831                 src_addr_ptr = (uint32_t *)&src->sin_addr;
2832                 dst_addr_ptr = (uint32_t *)&dst->sin_addr;
2833         } else {
2834                 ASSERT(af == AF_INET6);
2835                 src_addr_ptr = (uint32_t *)&src6->sin6_addr;
2836                 dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
2837         }
2838 
2839         /*
2840          * Check to see if the new SA will be cloned AND paired. The
2841          * reason a SA will be cloned is the source or destination addresses
2842          * are not specific enough to determine if the SA goes in the outbound
2843          * or the inbound hash table, so its cloned and put in both. If
2844          * the SA is paired, it's soft linked to another SA for the other
2845          * direction. Keeping track and looking up SA's that are direction
2846          * unspecific and linked is too hard.
2847          */
2848         if (clone && (pair_ext != NULL)) {
2849                 *diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
2850                 return (EINVAL);
2851         }
2852 
2853         if (!isupdate) {
2854                 newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
2855                     src_addr_ptr, dst_addr_ptr, af, ns);
2856                 if (newbie == NULL)
2857                         return (ENOMEM);
2858         }
2859 
2860         mutex_enter(&newbie->ipsa_lock);
2861 
2862         if (isrc != NULL) {
2863                 if (isrc->sin_family == AF_INET) {
2864                         if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
2865                                 if (srcext->sadb_address_proto != 0) {
2866                                         /*
2867                                          * Mismatched outer-packet protocol
2868                                          * and inner-packet address family.
2869                                          */
2870                                         mutex_exit(&newbie->ipsa_lock);
2871                                         error = EPROTOTYPE;
2872                                         *diagnostic =
2873                                             SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
2874                                         goto error;
2875                                 } else {
2876                                         /* Fill in with explicit protocol. */
2877                                         srcext->sadb_address_proto =
2878                                             IPPROTO_ENCAP;
2879                                         dstext->sadb_address_proto =
2880                                             IPPROTO_ENCAP;
2881                                 }
2882                         }
2883                         isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
2884                         idst_addr_ptr = (uint32_t *)&idst->sin_addr;
2885                 } else {
2886                         ASSERT(isrc->sin_family == AF_INET6);
2887                         if (srcext->sadb_address_proto != IPPROTO_IPV6) {
2888                                 if (srcext->sadb_address_proto != 0) {
2889                                         /*
2890                                          * Mismatched outer-packet protocol
2891                                          * and inner-packet address family.
2892                                          */
2893                                         mutex_exit(&newbie->ipsa_lock);
2894                                         error = EPROTOTYPE;
2895                                         *diagnostic =
2896                                             SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
2897                                         goto error;
2898                                 } else {
2899                                         /* Fill in with explicit protocol. */
2900                                         srcext->sadb_address_proto =
2901                                             IPPROTO_IPV6;
2902                                         dstext->sadb_address_proto =
2903                                             IPPROTO_IPV6;
2904                                 }
2905                         }
2906                         isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
2907                         idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
2908                 }
2909                 newbie->ipsa_innerfam = isrc->sin_family;
2910 
2911                 IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
2912                     newbie->ipsa_innerfam);
2913                 IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
2914                     newbie->ipsa_innerfam);
2915                 newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
2916                 newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
2917 
2918                 /* Unique value uses inner-ports for Tunnel Mode... */
2919                 newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
2920                     idst->sin_port, dstext->sadb_address_proto,
2921                     idstext->sadb_address_proto);
2922                 newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
2923                     idst->sin_port, dstext->sadb_address_proto,
2924                     idstext->sadb_address_proto);
2925         } else {
2926                 /* ... and outer-ports for Transport Mode. */
2927                 newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
2928                     dst->sin_port, dstext->sadb_address_proto, 0);
2929                 newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
2930                     dst->sin_port, dstext->sadb_address_proto, 0);
2931         }
2932         if (newbie->ipsa_unique_mask != (uint64_t)0)
2933                 newbie->ipsa_flags |= IPSA_F_UNIQUE;
2934 
2935         sadb_nat_calculations(newbie,
2936             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
2937             (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
2938             src_addr_ptr, dst_addr_ptr);
2939 
2940         newbie->ipsa_type = samsg->sadb_msg_satype;
2941 
2942         ASSERT(assoc->sadb_sa_state == SADB_SASTATE_MATURE);
2943         newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
2944         newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
2945 
2946         newbie->ipsa_flags |= assoc->sadb_sa_flags;
2947         if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
2948             ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) {
2949                 mutex_exit(&newbie->ipsa_lock);
2950                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
2951                 error = EINVAL;
2952                 goto error;
2953         }
2954         if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
2955             ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) {
2956                 mutex_exit(&newbie->ipsa_lock);
2957                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
2958                 error = EINVAL;
2959                 goto error;
2960         }
2961         if (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
2962             ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL) {
2963                 mutex_exit(&newbie->ipsa_lock);
2964                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
2965                 error = EINVAL;
2966                 goto error;
2967         }
2968         /*
2969          * If unspecified source address, force replay_wsize to 0.
2970          * This is because an SA that has multiple sources of secure
2971          * traffic cannot enforce a replay counter w/o synchronizing the
2972          * senders.
2973          */
2974         if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
2975                 newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
2976         else
2977                 newbie->ipsa_replay_wsize = 0;
2978 
2979         newbie->ipsa_addtime = gethrestime_sec();
2980 
2981         if (kmcext != NULL) {
2982                 newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
2983                 newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
2984         }
2985 
2986         /*
2987          * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
2988          * The spec says that one can update current lifetimes, but
2989          * that seems impractical, especially in the larval-to-mature
2990          * update that this function performs.
2991          */
2992         if (soft != NULL) {
2993                 newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
2994                 newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
2995                 newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
2996                 newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
2997                 SET_EXPIRE(newbie, softaddlt, softexpiretime);
2998         }
2999         if (hard != NULL) {
3000                 newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3001                 newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3002                 newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3003                 newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3004                 SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3005         }
3006         if (idle != NULL) {
3007                 newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3008                 newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3009                 newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3010                     newbie->ipsa_idleaddlt;
3011                 newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3012         }
3013 
3014         newbie->ipsa_authtmpl = NULL;
3015         newbie->ipsa_encrtmpl = NULL;
3016 
3017 #ifdef IPSEC_LATENCY_TEST
3018         if (akey != NULL && newbie->ipsa_auth_alg != SADB_AALG_NONE) {
3019 #else
3020         if (akey != NULL) {
3021 #endif
3022                 async = (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
3023                     IPSEC_ALGS_EXEC_ASYNC);
3024 
3025                 newbie->ipsa_authkeybits = akey->sadb_key_bits;
3026                 newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3027                 /* In case we have to round up to the next byte... */
3028                 if ((akey->sadb_key_bits & 0x7) != 0)
3029                         newbie->ipsa_authkeylen++;
3030                 newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3031                     KM_NOSLEEP);
3032                 if (newbie->ipsa_authkey == NULL) {
3033                         error = ENOMEM;
3034                         mutex_exit(&newbie->ipsa_lock);
3035                         goto error;
3036                 }
3037                 bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3038                 bzero(akey + 1, newbie->ipsa_authkeylen);
3039 
3040                 /*
3041                  * Pre-initialize the kernel crypto framework key
3042                  * structure.
3043                  */
3044                 newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3045                 newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3046                 newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3047 
3048                 mutex_enter(&ipss->ipsec_alg_lock);
3049                 alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3050                     [newbie->ipsa_auth_alg];
3051                 if (alg != NULL && ALG_VALID(alg)) {
3052                         newbie->ipsa_amech.cm_type = alg->alg_mech_type;
3053                         newbie->ipsa_amech.cm_param =
3054                             (char *)&newbie->ipsa_mac_len;
3055                         newbie->ipsa_amech.cm_param_len = sizeof (size_t);
3056                         newbie->ipsa_mac_len = (size_t)alg->alg_datalen;
3057                 } else {
3058                         newbie->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3059                 }
3060                 error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3061                 mutex_exit(&ipss->ipsec_alg_lock);
3062                 if (error != 0) {
3063                         mutex_exit(&newbie->ipsa_lock);
3064                         /*
3065                          * An error here indicates that alg is the wrong type
3066                          * (IE: not authentication) or its not in the alg tables
3067                          * created by ipsecalgs(1m), or Kcf does not like the
3068                          * parameters passed in with this algorithm, which is
3069                          * probably a coding error!
3070                          */
3071                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3072 
3073                         goto error;
3074                 }
3075         }
3076 
3077         if (ekey != NULL) {
3078                 mutex_enter(&ipss->ipsec_alg_lock);
3079                 async = async || (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
3080                     IPSEC_ALGS_EXEC_ASYNC);
3081                 alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3082                     [newbie->ipsa_encr_alg];
3083 
3084                 if (alg != NULL && ALG_VALID(alg)) {
3085                         newbie->ipsa_emech.cm_type = alg->alg_mech_type;
3086                         newbie->ipsa_datalen = alg->alg_datalen;
3087                         if (alg->alg_flags & ALG_FLAG_COUNTERMODE)
3088                                 newbie->ipsa_flags |= IPSA_F_COUNTERMODE;
3089 
3090                         if (alg->alg_flags & ALG_FLAG_COMBINED) {
3091                                 newbie->ipsa_flags |= IPSA_F_COMBINED;
3092                                 newbie->ipsa_mac_len =  alg->alg_icvlen;
3093                         }
3094 
3095                         if (alg->alg_flags & ALG_FLAG_CCM)
3096                                 newbie->ipsa_noncefunc = ccm_params_init;
3097                         else if (alg->alg_flags & ALG_FLAG_GCM)
3098                                 newbie->ipsa_noncefunc = gcm_params_init;
3099                         else newbie->ipsa_noncefunc = cbc_params_init;
3100 
3101                         newbie->ipsa_saltlen = alg->alg_saltlen;
3102                         newbie->ipsa_saltbits = SADB_8TO1(newbie->ipsa_saltlen);
3103                         newbie->ipsa_iv_len = alg->alg_ivlen;
3104                         newbie->ipsa_nonce_len = newbie->ipsa_saltlen +
3105                             newbie->ipsa_iv_len;
3106                         newbie->ipsa_emech.cm_param = NULL;
3107                         newbie->ipsa_emech.cm_param_len = 0;
3108                 } else {
3109                         newbie->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3110                 }
3111                 mutex_exit(&ipss->ipsec_alg_lock);
3112 
3113                 /*
3114                  * The byte stream following the sadb_key_t is made up of:
3115                  * key bytes, [salt bytes], [IV initial value]
3116                  * All of these have variable length. The IV is typically
3117                  * randomly generated by this function and not passed in.
3118                  * By supporting the injection of a known IV, the whole
3119                  * IPsec subsystem and the underlying crypto subsystem
3120                  * can be tested with known test vectors.
3121                  *
3122                  * The keying material has been checked by ext_check()
3123                  * and ipsec_valid_key_size(), after removing salt/IV
3124                  * bits, whats left is the encryption key. If this is too
3125                  * short, ipsec_create_ctx_tmpl() will fail and the SA
3126                  * won't get created.
3127                  *
3128                  * set ipsa_encrkeylen to length of key only.
3129                  */
3130                 newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3131                 newbie->ipsa_encrkeybits -= ekey->sadb_key_reserved;
3132                 newbie->ipsa_encrkeybits -= newbie->ipsa_saltbits;
3133                 newbie->ipsa_encrkeylen = SADB_1TO8(newbie->ipsa_encrkeybits);
3134 
3135                 /* In case we have to round up to the next byte... */
3136                 if ((ekey->sadb_key_bits & 0x7) != 0)
3137                         newbie->ipsa_encrkeylen++;
3138 
3139                 newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3140                     KM_NOSLEEP);
3141                 if (newbie->ipsa_encrkey == NULL) {
3142                         error = ENOMEM;
3143                         mutex_exit(&newbie->ipsa_lock);
3144                         goto error;
3145                 }
3146 
3147                 buf_ptr = (uint8_t *)(ekey + 1);
3148                 bcopy(buf_ptr, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3149 
3150                 if (newbie->ipsa_flags & IPSA_F_COMBINED) {
3151                         /*
3152                          * Combined mode algs need a nonce. Copy the salt and
3153                          * IV into a buffer. The ipsa_nonce is a pointer into
3154                          * this buffer, some bytes at the start of the buffer
3155                          * may be unused, depends on the salt length. The IV
3156                          * is 64 bit aligned so it can be incremented as a
3157                          * uint64_t. Zero out key in samsg_t before freeing.
3158                          */
3159 
3160                         newbie->ipsa_nonce_buf = kmem_alloc(
3161                             sizeof (ipsec_nonce_t), KM_NOSLEEP);
3162                         if (newbie->ipsa_nonce_buf == NULL) {
3163                                 error = ENOMEM;
3164                                 mutex_exit(&newbie->ipsa_lock);
3165                                 goto error;
3166                         }
3167                         /*
3168                          * Initialize nonce and salt pointers to point
3169                          * to the nonce buffer. This is just in case we get
3170                          * bad data, the pointers will be valid, the data
3171                          * won't be.
3172                          *
3173                          * See sadb.h for layout of nonce.
3174                          */
3175                         newbie->ipsa_iv = &newbie->ipsa_nonce_buf->iv;
3176                         newbie->ipsa_salt = (uint8_t *)newbie->ipsa_nonce_buf;
3177                         newbie->ipsa_nonce = newbie->ipsa_salt;
3178                         if (newbie->ipsa_saltlen != 0) {
3179                                 salt_offset = MAXSALTSIZE -
3180                                     newbie->ipsa_saltlen;
3181                                 newbie->ipsa_salt = (uint8_t *)
3182                                     &newbie->ipsa_nonce_buf->salt[salt_offset];
3183                                 newbie->ipsa_nonce = newbie->ipsa_salt;
3184                                 buf_ptr += newbie->ipsa_encrkeylen;
3185                                 bcopy(buf_ptr, newbie->ipsa_salt,
3186                                     newbie->ipsa_saltlen);
3187                         }
3188                         /*
3189                          * The IV for CCM/GCM mode increments, it should not
3190                          * repeat. Get a random value for the IV, make a
3191                          * copy, the SA will expire when/if the IV ever
3192                          * wraps back to the initial value. If an Initial IV
3193                          * is passed in via PF_KEY, save this in the SA.
3194                          * Initialising IV for inbound is pointless as its
3195                          * taken from the inbound packet.
3196                          */
3197                         if (!is_inbound) {
3198                                 if (ekey->sadb_key_reserved != 0) {
3199                                         buf_ptr += newbie->ipsa_saltlen;
3200                                         bcopy(buf_ptr, (uint8_t *)newbie->
3201                                             ipsa_iv, SADB_1TO8(ekey->
3202                                             sadb_key_reserved));
3203                                 } else {
3204                                         (void) random_get_pseudo_bytes(
3205                                             (uint8_t *)newbie->ipsa_iv,
3206                                             newbie->ipsa_iv_len);
3207                                 }
3208                                 newbie->ipsa_iv_softexpire =
3209                                     (*newbie->ipsa_iv) << 9;
3210                                 newbie->ipsa_iv_hardexpire = *newbie->ipsa_iv;
3211                         }
3212                 }
3213                 bzero((ekey + 1), SADB_1TO8(ekey->sadb_key_bits));
3214 
3215                 /*
3216                  * Pre-initialize the kernel crypto framework key
3217                  * structure.
3218                  */
3219                 newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3220                 newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3221                 newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3222 
3223                 mutex_enter(&ipss->ipsec_alg_lock);
3224                 error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3225                 mutex_exit(&ipss->ipsec_alg_lock);
3226                 if (error != 0) {
3227                         mutex_exit(&newbie->ipsa_lock);
3228                         /* See above for error explanation. */
3229                         *diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3230                         goto error;
3231                 }
3232         }
3233 
3234         if (async)
3235                 newbie->ipsa_flags |= IPSA_F_ASYNC;
3236 
3237         /*
3238          * Ptrs to processing functions.
3239          */
3240         if (newbie->ipsa_type == SADB_SATYPE_ESP)
3241                 ipsecesp_init_funcs(newbie);
3242         else
3243                 ipsecah_init_funcs(newbie);
3244         ASSERT(newbie->ipsa_output_func != NULL &&
3245             newbie->ipsa_input_func != NULL);
3246 
3247         /*
3248          * Certificate ID stuff.
3249          */
3250         if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3251                 sadb_ident_t *id =
3252                     (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3253 
3254                 /*
3255                  * Can assume strlen() will return okay because ext_check() in
3256                  * keysock.c prepares the string for us.
3257                  */
3258                 newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3259                     (char *)(id+1), ns);
3260                 if (newbie->ipsa_src_cid == NULL) {
3261                         error = ENOMEM;
3262                         mutex_exit(&newbie->ipsa_lock);
3263                         goto error;
3264                 }
3265         }
3266 
3267         if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3268                 sadb_ident_t *id =
3269                     (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3270 
3271                 /*
3272                  * Can assume strlen() will return okay because ext_check() in
3273                  * keysock.c prepares the string for us.
3274                  */
3275                 newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3276                     (char *)(id+1), ns);
3277                 if (newbie->ipsa_dst_cid == NULL) {
3278                         error = ENOMEM;
3279                         mutex_exit(&newbie->ipsa_lock);
3280                         goto error;
3281                 }
3282         }
3283 
3284         /*
3285          * sensitivity label handling code:
3286          * Convert sens + bitmap into cred_t, and associate it
3287          * with the new SA.
3288          */
3289         if (sens != NULL) {
3290                 uint64_t *bitmap = (uint64_t *)(sens + 1);
3291 
3292                 newbie->ipsa_tsl = sadb_label_from_sens(sens, bitmap);
3293         }
3294 
3295         /*
3296          * Likewise for outer sensitivity.
3297          */
3298         if (osens != NULL) {
3299                 uint64_t *bitmap = (uint64_t *)(osens + 1);
3300                 ts_label_t *tsl, *effective_tsl;
3301                 uint32_t *peer_addr_ptr;
3302                 zoneid_t zoneid = GLOBAL_ZONEID;
3303                 zone_t *zone;
3304 
3305                 peer_addr_ptr = is_inbound ? src_addr_ptr : dst_addr_ptr;
3306 
3307                 tsl = sadb_label_from_sens(osens, bitmap);
3308                 newbie->ipsa_mac_exempt = CONN_MAC_DEFAULT;
3309 
3310                 if (osens->sadb_x_sens_flags & SADB_X_SENS_IMPLICIT) {
3311                         newbie->ipsa_mac_exempt = CONN_MAC_IMPLICIT;
3312                 }
3313 
3314                 error = tsol_check_dest(tsl, peer_addr_ptr,
3315                     (af == AF_INET6)?IPV6_VERSION:IPV4_VERSION,
3316                     newbie->ipsa_mac_exempt, B_TRUE, &effective_tsl);
3317                 if (error != 0) {
3318                         label_rele(tsl);
3319                         mutex_exit(&newbie->ipsa_lock);
3320                         goto error;
3321                 }
3322 
3323                 if (effective_tsl != NULL) {
3324                         label_rele(tsl);
3325                         tsl = effective_tsl;
3326                 }
3327 
3328                 newbie->ipsa_otsl = tsl;
3329 
3330                 zone = zone_find_by_label(tsl);
3331                 if (zone != NULL) {
3332                         zoneid = zone->zone_id;
3333                         zone_rele(zone);
3334                 }
3335                 /*
3336                  * For exclusive stacks we set the zoneid to zero to operate
3337                  * as if in the global zone for tsol_compute_label_v4/v6
3338                  */
3339                 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
3340                         zoneid = GLOBAL_ZONEID;
3341 
3342                 if (af == AF_INET6) {
3343                         error = tsol_compute_label_v6(tsl, zoneid,
3344                             (in6_addr_t *)peer_addr_ptr,
3345                             newbie->ipsa_opt_storage, ipst);
3346                 } else {
3347                         error = tsol_compute_label_v4(tsl, zoneid,
3348                             *peer_addr_ptr, newbie->ipsa_opt_storage, ipst);
3349                 }
3350                 if (error != 0) {
3351                         mutex_exit(&newbie->ipsa_lock);
3352                         goto error;
3353                 }
3354         }
3355 
3356 
3357         if (replayext != NULL) {
3358                 if ((replayext->sadb_x_rc_replay32 == 0) &&
3359                     (replayext->sadb_x_rc_replay64 != 0)) {
3360                         error = EOPNOTSUPP;
3361                         *diagnostic = SADB_X_DIAGNOSTIC_INVALID_REPLAY;
3362                         mutex_exit(&newbie->ipsa_lock);
3363                         goto error;
3364                 }
3365                 newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3366         }
3367 
3368         /* now that the SA has been updated, set its new state */
3369         newbie->ipsa_state = assoc->sadb_sa_state;
3370 
3371         if (clone) {
3372                 newbie->ipsa_haspeer = B_TRUE;
3373         } else {
3374                 if (!is_inbound) {
3375                         lifetime_fuzz(newbie);
3376                 }
3377         }
3378         /*
3379          * The less locks I hold when doing an insertion and possible cloning,
3380          * the better!
3381          */
3382         mutex_exit(&newbie->ipsa_lock);
3383 
3384         if (clone) {
3385                 newbie_clone = sadb_cloneassoc(newbie);
3386 
3387                 if (newbie_clone == NULL) {
3388                         error = ENOMEM;
3389                         goto error;
3390                 }
3391         }
3392 
3393         /*
3394          * Enter the bucket locks.  The order of entry is outbound,
3395          * inbound.  We map "primary" and "secondary" into outbound and inbound
3396          * based on the destination address type.  If the destination address
3397          * type is for a node that isn't mine (or potentially mine), the
3398          * "primary" bucket is the outbound one.
3399          */
3400         if (!is_inbound) {
3401                 /* primary == outbound */
3402                 mutex_enter(&primary->isaf_lock);
3403                 mutex_enter(&secondary->isaf_lock);
3404         } else {
3405                 /* primary == inbound */
3406                 mutex_enter(&secondary->isaf_lock);
3407                 mutex_enter(&primary->isaf_lock);
3408         }
3409 
3410         /*
3411          * sadb_insertassoc() doesn't increment the reference
3412          * count.  We therefore have to increment the
3413          * reference count one more time to reflect the
3414          * pointers of the table that reference this SA.
3415          */
3416         IPSA_REFHOLD(newbie);
3417 
3418         if (isupdate) {
3419                 /*
3420                  * Unlink from larval holding cell in the "inbound" fanout.
3421                  */
3422                 ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3423                     newbie->ipsa_linklock == &secondary->isaf_lock);
3424                 sadb_unlinkassoc(newbie);
3425         }
3426 
3427         mutex_enter(&newbie->ipsa_lock);
3428         error = sadb_insertassoc(newbie, primary);
3429         mutex_exit(&newbie->ipsa_lock);
3430 
3431         if (error != 0) {
3432                 /*
3433                  * Since sadb_insertassoc() failed, we must decrement the
3434                  * refcount again so the cleanup code will actually free
3435                  * the offending SA.
3436                  */
3437                 IPSA_REFRELE(newbie);
3438                 goto error_unlock;
3439         }
3440 
3441         if (newbie_clone != NULL) {
3442                 mutex_enter(&newbie_clone->ipsa_lock);
3443                 error = sadb_insertassoc(newbie_clone, secondary);
3444                 mutex_exit(&newbie_clone->ipsa_lock);
3445                 if (error != 0) {
3446                         /* Collision in secondary table. */
3447                         sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3448                         goto error_unlock;
3449                 }
3450                 IPSA_REFHOLD(newbie_clone);
3451         } else {
3452                 ASSERT(primary != secondary);
3453                 scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3454                     ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3455                 if (scratch != NULL) {
3456                         /* Collision in secondary table. */
3457                         sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3458                         /* Set the error, since ipsec_getassocbyspi() can't. */
3459                         error = EEXIST;
3460                         goto error_unlock;
3461                 }
3462         }
3463 
3464         /* OKAY!  So let's do some reality check assertions. */
3465 
3466         ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3467         ASSERT(newbie_clone == NULL ||
3468             (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3469 
3470 error_unlock:
3471 
3472         /*
3473          * We can exit the locks in any order.  Only entrance needs to
3474          * follow any protocol.
3475          */
3476         mutex_exit(&secondary->isaf_lock);
3477         mutex_exit(&primary->isaf_lock);
3478 
3479         if (pair_ext != NULL && error == 0) {
3480                 /* update pair_spi if it exists. */
3481                 ipsa_query_t sq;
3482 
3483                 sq.spp = spp;           /* XXX param */
3484                 error = sadb_form_query(ksi, IPSA_Q_DST, IPSA_Q_SRC|IPSA_Q_DST|
3485                     IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, &sq, diagnostic);
3486                 if (error)
3487                         return (error);
3488 
3489                 error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
3490 
3491                 if (error != 0)
3492                         goto error;
3493 
3494                 if (ipsapp.ipsap_psa_ptr != NULL) {
3495                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3496                         error = EINVAL;
3497                 } else {
3498                         /* update_pairing() sets diagnostic */
3499                         error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
3500                 }
3501         }
3502         /* Common error point for this routine. */
3503 error:
3504         if (newbie != NULL) {
3505                 if (error != 0) {
3506                         /* This SA is broken, let the reaper clean up. */
3507                         mutex_enter(&newbie->ipsa_lock);
3508                         newbie->ipsa_state = IPSA_STATE_DEAD;
3509                         newbie->ipsa_hardexpiretime = 1;
3510                         mutex_exit(&newbie->ipsa_lock);
3511                 }
3512                 IPSA_REFRELE(newbie);
3513         }
3514         if (newbie_clone != NULL) {
3515                 IPSA_REFRELE(newbie_clone);
3516         }
3517 
3518         if (error == 0) {
3519                 /*
3520                  * Construct favorable PF_KEY return message and send to
3521                  * keysock. Update the flags in the original keysock message
3522                  * to reflect the actual flags in the new SA.
3523                  *  (Q:  Do I need to pass "newbie"?  If I do,
3524                  * make sure to REFHOLD, call, then REFRELE.)
3525                  */
3526                 assoc->sadb_sa_flags = newbie->ipsa_flags;
3527                 sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3528         }
3529 
3530         destroy_ipsa_pair(&ipsapp);
3531         return (error);
3532 }
3533 
3534 /*
3535  * Set the time of first use for a security association.  Update any
3536  * expiration times as a result.
3537  */
3538 void
3539 sadb_set_usetime(ipsa_t *assoc)
3540 {
3541         time_t snapshot = gethrestime_sec();
3542 
3543         mutex_enter(&assoc->ipsa_lock);
3544         assoc->ipsa_lastuse = snapshot;
3545         assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3546 
3547         /*
3548          * Caller does check usetime before calling me usually, and
3549          * double-checking is better than a mutex_enter/exit hit.
3550          */
3551         if (assoc->ipsa_usetime == 0) {
3552                 /*
3553                  * This is redundant for outbound SA's, as
3554                  * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3555                  * Inbound SAs, however, have no such protection.
3556                  */
3557                 assoc->ipsa_flags |= IPSA_F_USED;
3558                 assoc->ipsa_usetime = snapshot;
3559 
3560                 /*
3561                  * After setting the use time, see if we have a use lifetime
3562                  * that would cause the actual SA expiration time to shorten.
3563                  */
3564                 UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3565                 UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3566         }
3567         mutex_exit(&assoc->ipsa_lock);
3568 }
3569 
3570 /*
3571  * Send up a PF_KEY expire message for this association.
3572  */
3573 static void
3574 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3575 {
3576         mblk_t *mp, *mp1;
3577         int alloclen, af;
3578         sadb_msg_t *samsg;
3579         sadb_lifetime_t *current, *expire;
3580         sadb_sa_t *saext;
3581         uint8_t *end;
3582         boolean_t tunnel_mode;
3583 
3584         ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3585 
3586         /* Don't bother sending if there's no queue. */
3587         if (pfkey_q == NULL)
3588                 return;
3589 
3590         mp = sadb_keysock_out(0);
3591         if (mp == NULL) {
3592                 /* cmn_err(CE_WARN, */
3593                 /*      "sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3594                 return;
3595         }
3596 
3597         alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3598             2 * sizeof (sadb_address_t) + sizeof (*saext);
3599 
3600         af = assoc->ipsa_addrfam;
3601         switch (af) {
3602         case AF_INET:
3603                 alloclen += 2 * sizeof (struct sockaddr_in);
3604                 break;
3605         case AF_INET6:
3606                 alloclen += 2 * sizeof (struct sockaddr_in6);
3607                 break;
3608         default:
3609                 /* Won't happen unless there's a kernel bug. */
3610                 freeb(mp);
3611                 cmn_err(CE_WARN,
3612                     "sadb_expire_assoc: Unknown address length.\n");
3613                 return;
3614         }
3615 
3616         tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3617         if (tunnel_mode) {
3618                 alloclen += 2 * sizeof (sadb_address_t);
3619                 switch (assoc->ipsa_innerfam) {
3620                 case AF_INET:
3621                         alloclen += 2 * sizeof (struct sockaddr_in);
3622                         break;
3623                 case AF_INET6:
3624                         alloclen += 2 * sizeof (struct sockaddr_in6);
3625                         break;
3626                 default:
3627                         /* Won't happen unless there's a kernel bug. */
3628                         freeb(mp);
3629                         cmn_err(CE_WARN, "sadb_expire_assoc: "
3630                             "Unknown inner address length.\n");
3631                         return;
3632                 }
3633         }
3634 
3635         mp->b_cont = allocb(alloclen, BPRI_HI);
3636         if (mp->b_cont == NULL) {
3637                 freeb(mp);
3638                 /* cmn_err(CE_WARN, */
3639                 /*      "sadb_expire_assoc: Can't allocate message.\n"); */
3640                 return;
3641         }
3642 
3643         mp1 = mp;
3644         mp = mp->b_cont;
3645         end = mp->b_wptr + alloclen;
3646 
3647         samsg = (sadb_msg_t *)mp->b_wptr;
3648         mp->b_wptr += sizeof (*samsg);
3649         samsg->sadb_msg_version = PF_KEY_V2;
3650         samsg->sadb_msg_type = SADB_EXPIRE;
3651         samsg->sadb_msg_errno = 0;
3652         samsg->sadb_msg_satype = assoc->ipsa_type;
3653         samsg->sadb_msg_len = SADB_8TO64(alloclen);
3654         samsg->sadb_msg_reserved = 0;
3655         samsg->sadb_msg_seq = 0;
3656         samsg->sadb_msg_pid = 0;
3657 
3658         saext = (sadb_sa_t *)mp->b_wptr;
3659         mp->b_wptr += sizeof (*saext);
3660         saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3661         saext->sadb_sa_exttype = SADB_EXT_SA;
3662         saext->sadb_sa_spi = assoc->ipsa_spi;
3663         saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3664         saext->sadb_sa_state = assoc->ipsa_state;
3665         saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3666         saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3667         saext->sadb_sa_flags = assoc->ipsa_flags;
3668 
3669         current = (sadb_lifetime_t *)mp->b_wptr;
3670         mp->b_wptr += sizeof (sadb_lifetime_t);
3671         current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3672         current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3673         /* We do not support the concept. */
3674         current->sadb_lifetime_allocations = 0;
3675         current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3676         current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3677         current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3678 
3679         expire = (sadb_lifetime_t *)mp->b_wptr;
3680         mp->b_wptr += sizeof (*expire);
3681         expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3682 
3683         if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3684                 expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3685                 expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3686                 expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3687                 expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3688                 expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3689         } else if (assoc->ipsa_state == IPSA_STATE_DYING) {
3690                 expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3691                 expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3692                 expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3693                 expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3694                 expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3695         } else {
3696                 ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
3697                 expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
3698                 expire->sadb_lifetime_allocations = 0;
3699                 expire->sadb_lifetime_bytes = 0;
3700                 expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
3701                 expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
3702         }
3703 
3704         mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3705             af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3706             SA_PROTO(assoc), 0);
3707         ASSERT(mp->b_wptr != NULL);
3708 
3709         mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3710             af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3711             SA_PROTO(assoc), 0);
3712         ASSERT(mp->b_wptr != NULL);
3713 
3714         if (tunnel_mode) {
3715                 mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3716                     SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3717                     assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3718                     assoc->ipsa_innersrcpfx);
3719                 ASSERT(mp->b_wptr != NULL);
3720                 mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3721                     SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3722                     assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3723                     assoc->ipsa_innerdstpfx);
3724                 ASSERT(mp->b_wptr != NULL);
3725         }
3726 
3727         /* Can just putnext, we're ready to go! */
3728         putnext(pfkey_q, mp1);
3729 }
3730 
3731 /*
3732  * "Age" the SA with the number of bytes that was used to protect traffic.
3733  * Send an SADB_EXPIRE message if appropriate.  Return B_TRUE if there was
3734  * enough "charge" left in the SA to protect the data.  Return B_FALSE
3735  * otherwise.  (If B_FALSE is returned, the association either was, or became
3736  * DEAD.)
3737  */
3738 boolean_t
3739 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3740     boolean_t sendmsg)
3741 {
3742         boolean_t rc = B_TRUE;
3743         uint64_t newtotal;
3744 
3745         mutex_enter(&assoc->ipsa_lock);
3746         newtotal = assoc->ipsa_bytes + bytes;
3747         if (assoc->ipsa_hardbyteslt != 0 &&
3748             newtotal >= assoc->ipsa_hardbyteslt) {
3749                 if (assoc->ipsa_state != IPSA_STATE_DEAD) {
3750                         /*
3751                          * Send EXPIRE message to PF_KEY.  May wish to pawn
3752                          * this off on another non-interrupt thread.  Also
3753                          * unlink this SA immediately.
3754                          */
3755                         assoc->ipsa_state = IPSA_STATE_DEAD;
3756                         if (sendmsg)
3757                                 sadb_expire_assoc(pfkey_q, assoc);
3758                         /*
3759                          * Set non-zero expiration time so sadb_age_assoc()
3760                          * will work when reaping.
3761                          */
3762                         assoc->ipsa_hardexpiretime = (time_t)1;
3763                 } /* Else someone beat me to it! */
3764                 rc = B_FALSE;
3765         } else if (assoc->ipsa_softbyteslt != 0 &&
3766             (newtotal >= assoc->ipsa_softbyteslt)) {
3767                 if (assoc->ipsa_state < IPSA_STATE_DYING) {
3768                         /*
3769                          * Send EXPIRE message to PF_KEY.  May wish to pawn
3770                          * this off on another non-interrupt thread.
3771                          */
3772                         assoc->ipsa_state = IPSA_STATE_DYING;
3773                         assoc->ipsa_bytes = newtotal;
3774                         if (sendmsg)
3775                                 sadb_expire_assoc(pfkey_q, assoc);
3776                 } /* Else someone beat me to it! */
3777         }
3778         if (rc == B_TRUE)
3779                 assoc->ipsa_bytes = newtotal;
3780         mutex_exit(&assoc->ipsa_lock);
3781         return (rc);
3782 }
3783 
3784 /*
3785  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3786  *     sadb_age_assoc().
3787  */
3788 static ipsa_t *
3789 sadb_torch_assoc(isaf_t *head, ipsa_t *sa)
3790 {
3791         ASSERT(MUTEX_HELD(&head->isaf_lock));
3792         ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3793         ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3794 
3795         /*
3796          * Force cached SAs to be revalidated..
3797          */
3798         head->isaf_gen++;
3799 
3800         mutex_exit(&sa->ipsa_lock);
3801         sadb_unlinkassoc(sa);
3802 
3803         return (NULL);
3804 }
3805 
3806 /*
3807  * Do various SA-is-idle activities depending on delta (the number of idle
3808  * seconds on the SA) and/or other properties of the SA.
3809  *
3810  * Return B_TRUE if I've sent a packet, because I have to drop the
3811  * association's mutex before sending a packet out the wire.
3812  */
3813 /* ARGSUSED */
3814 static boolean_t
3815 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
3816 {
3817         ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
3818         int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
3819 
3820         ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3821 
3822         if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
3823             delta >= nat_t_interval &&
3824             gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
3825                 ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
3826                 assoc->ipsa_last_nat_t_ka = gethrestime_sec();
3827                 mutex_exit(&assoc->ipsa_lock);
3828                 ipsecesp_send_keepalive(assoc);
3829                 return (B_TRUE);
3830         }
3831         return (B_FALSE);
3832 }
3833 
3834 /*
3835  * Return "assoc" if haspeer is true and I send an expire.  This allows
3836  * the consumers' aging functions to tidy up an expired SA's peer.
3837  */
3838 static ipsa_t *
3839 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
3840     time_t current, int reap_delay, boolean_t inbound)
3841 {
3842         ipsa_t *retval = NULL;
3843         boolean_t dropped_mutex = B_FALSE;
3844 
3845         ASSERT(MUTEX_HELD(&head->isaf_lock));
3846 
3847         mutex_enter(&assoc->ipsa_lock);
3848 
3849         /*
3850          * Be aggressive in reaping expired LARVAL SAs.
3851          */
3852         if (assoc->ipsa_state == IPSA_STATE_LARVAL &&
3853             assoc->ipsa_hardexpiretime <= current) {
3854                 assoc->ipsa_state = IPSA_STATE_DEAD;
3855                 return (sadb_torch_assoc(head, assoc));
3856         }
3857 
3858         /*
3859          * Check lifetimes.  Fortunately, SA setup is done
3860          * such that there are only two times to look at,
3861          * softexpiretime, and hardexpiretime.
3862          *
3863          * Check hard first.
3864          */
3865 
3866         if (assoc->ipsa_hardexpiretime != 0 &&
3867             assoc->ipsa_hardexpiretime <= current) {
3868                 if (assoc->ipsa_state == IPSA_STATE_DEAD)
3869                         return (sadb_torch_assoc(head, assoc));
3870 
3871                 /*
3872                  * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
3873                  */
3874                 assoc->ipsa_state = IPSA_STATE_DEAD;
3875                 if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
3876                         /*
3877                          * If the SA is paired or peered with another, put
3878                          * a copy on a list which can be processed later, the
3879                          * pair/peer SA needs to be updated so the both die
3880                          * at the same time.
3881                          *
3882                          * If I return assoc, I have to bump up its reference
3883                          * count to keep with the ipsa_t reference count
3884                          * semantics.
3885                          */
3886                         IPSA_REFHOLD(assoc);
3887                         retval = assoc;
3888                 }
3889                 sadb_expire_assoc(pfkey_q, assoc);
3890                 assoc->ipsa_hardexpiretime = current + reap_delay;
3891         } else if (assoc->ipsa_softexpiretime != 0 &&
3892             assoc->ipsa_softexpiretime <= current &&
3893             assoc->ipsa_state < IPSA_STATE_DYING) {
3894                 /*
3895                  * Send EXPIRE message to PF_KEY.  May wish to pawn
3896                  * this off on another non-interrupt thread.
3897                  */
3898                 assoc->ipsa_state = IPSA_STATE_DYING;
3899                 if (assoc->ipsa_haspeer) {
3900                         /*
3901                          * If the SA has a peer, update the peer's state
3902                          * on SOFT_EXPIRE, this is mostly to prevent two
3903                          * expire messages from effectively the same SA.
3904                          *
3905                          * Don't care about paired SA's, then can (and should)
3906                          * be able to soft expire at different times.
3907                          *
3908                          * If I return assoc, I have to bump up its
3909                          * reference count to keep with the ipsa_t reference
3910                          * count semantics.
3911                          */
3912                         IPSA_REFHOLD(assoc);
3913                         retval = assoc;
3914                 }
3915                 sadb_expire_assoc(pfkey_q, assoc);
3916         } else if (assoc->ipsa_idletime != 0 &&
3917             assoc->ipsa_idleexpiretime <= current) {
3918                 /* Only issue an IDLE expiration if we're in mature state. */
3919                 if (assoc->ipsa_state == IPSA_STATE_MATURE) {
3920                         sadb_expire_assoc(pfkey_q, assoc);
3921                 }
3922         } else {
3923                 /* Check idle time activities. */
3924                 dropped_mutex = sadb_idle_activities(assoc,
3925                     current - assoc->ipsa_lastuse, inbound);
3926         }
3927 
3928         if (!dropped_mutex)
3929                 mutex_exit(&assoc->ipsa_lock);
3930         return (retval);
3931 }
3932 
3933 /*
3934  * Called by a consumer protocol to do ther dirty work of reaping dead
3935  * Security Associations.
3936  *
3937  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
3938  * SA's that are already marked DEAD, so expired SA's are only reaped
3939  * the second time sadb_ager() runs.
3940  */
3941 void
3942 sadb_ager(sadb_t *sp, queue_t *pfkey_q, int reap_delay, netstack_t *ns)
3943 {
3944         int i;
3945         isaf_t *bucket;
3946         ipsa_t *assoc, *spare;
3947         iacqf_t *acqlist;
3948         ipsacq_t *acqrec, *spareacq;
3949         templist_t *haspeerlist, *newbie;
3950         /* Snapshot current time now. */
3951         time_t current = gethrestime_sec();
3952         haspeerlist = NULL;
3953 
3954         /*
3955          * Do my dirty work.  This includes aging real entries, aging
3956          * larvals, and aging outstanding ACQUIREs.
3957          *
3958          * I hope I don't tie up resources for too long.
3959          */
3960 
3961         /* Age acquires. */
3962 
3963         for (i = 0; i < sp->sdb_hashsize; i++) {
3964                 acqlist = &sp->sdb_acq[i];
3965                 mutex_enter(&acqlist->iacqf_lock);
3966                 for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
3967                     acqrec = spareacq) {
3968                         spareacq = acqrec->ipsacq_next;
3969                         if (current > acqrec->ipsacq_expire)
3970                                 sadb_destroy_acquire(acqrec, ns);
3971                 }
3972                 mutex_exit(&acqlist->iacqf_lock);
3973         }
3974 
3975         /* Age inbound associations. */
3976         for (i = 0; i < sp->sdb_hashsize; i++) {
3977                 bucket = &(sp->sdb_if[i]);
3978                 mutex_enter(&bucket->isaf_lock);
3979                 for (assoc = bucket->isaf_ipsa; assoc != NULL;
3980                     assoc = spare) {
3981                         spare = assoc->ipsa_next;
3982                         if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
3983                             reap_delay, B_TRUE) != NULL) {
3984                                 /*
3985                                  * Put SA's which have a peer or SA's which
3986                                  * are paired on a list for processing after
3987                                  * all the hash tables have been walked.
3988                                  *
3989                                  * sadb_age_assoc() increments the refcnt,
3990                                  * effectively doing an IPSA_REFHOLD().
3991                                  */
3992                                 newbie = kmem_alloc(sizeof (*newbie),
3993                                     KM_NOSLEEP);
3994                                 if (newbie == NULL) {
3995                                         /*
3996                                          * Don't forget to REFRELE().
3997                                          */
3998                                         IPSA_REFRELE(assoc);
3999                                         continue;       /* for loop... */
4000                                 }
4001                                 newbie->next = haspeerlist;
4002                                 newbie->ipsa = assoc;
4003                                 haspeerlist = newbie;
4004                         }
4005                 }
4006                 mutex_exit(&bucket->isaf_lock);
4007         }
4008 
4009         age_pair_peer_list(haspeerlist, sp, B_FALSE);
4010         haspeerlist = NULL;
4011 
4012         /* Age outbound associations. */
4013         for (i = 0; i < sp->sdb_hashsize; i++) {
4014                 bucket = &(sp->sdb_of[i]);
4015                 mutex_enter(&bucket->isaf_lock);
4016                 for (assoc = bucket->isaf_ipsa; assoc != NULL;
4017                     assoc = spare) {
4018                         spare = assoc->ipsa_next;
4019                         if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4020                             reap_delay, B_FALSE) != NULL) {
4021                                 /*
4022                                  * sadb_age_assoc() increments the refcnt,
4023                                  * effectively doing an IPSA_REFHOLD().
4024                                  */
4025                                 newbie = kmem_alloc(sizeof (*newbie),
4026                                     KM_NOSLEEP);
4027                                 if (newbie == NULL) {
4028                                         /*
4029                                          * Don't forget to REFRELE().
4030                                          */
4031                                         IPSA_REFRELE(assoc);
4032                                         continue;       /* for loop... */
4033                                 }
4034                                 newbie->next = haspeerlist;
4035                                 newbie->ipsa = assoc;
4036                                 haspeerlist = newbie;
4037                         }
4038                 }
4039                 mutex_exit(&bucket->isaf_lock);
4040         }
4041 
4042         age_pair_peer_list(haspeerlist, sp, B_TRUE);
4043 
4044         /*
4045          * Run a GC pass to clean out dead identities.
4046          */
4047         ipsid_gc(ns);
4048 }
4049 
4050 /*
4051  * Figure out when to reschedule the ager.
4052  */
4053 timeout_id_t
4054 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4055     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4056 {
4057         hrtime_t end = gethrtime();
4058         uint_t interval = *intp;        /* "interval" is in ms. */
4059 
4060         /*
4061          * See how long this took.  If it took too long, increase the
4062          * aging interval.
4063          */
4064         if ((end - begin) > MSEC2NSEC(interval)) {
4065                 if (interval >= intmax) {
4066                         /* XXX Rate limit this?  Or recommend flush? */
4067                         (void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4068                             "Too many SA's to age out in %d msec.\n",
4069                             intmax);
4070                 } else {
4071                         /* Double by shifting by one bit. */
4072                         interval <<= 1;
4073                         interval = min(interval, intmax);
4074                 }
4075         } else if ((end - begin) <= (MSEC2NSEC(interval) / 2) &&
4076             interval > SADB_AGE_INTERVAL_DEFAULT) {
4077                 /*
4078                  * If I took less than half of the interval, then I should
4079                  * ratchet the interval back down.  Never automatically
4080                  * shift below the default aging interval.
4081                  *
4082                  * NOTE:This even overrides manual setting of the age
4083                  *      interval using NDD to lower the setting past the
4084                  *      default.  In other words, if you set the interval
4085                  *      lower than the default, and your SADB gets too big,
4086                  *      the interval will only self-lower back to the default.
4087                  */
4088                 /* Halve by shifting one bit. */
4089                 interval >>= 1;
4090                 interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4091         }
4092         *intp = interval;
4093         return (qtimeout(pfkey_q, ager, agerarg,
4094             drv_usectohz(interval * (MICROSEC / MILLISEC))));
4095 }
4096 
4097 
4098 /*
4099  * Update the lifetime values of an SA.  This is the path an SADB_UPDATE
4100  * message takes when updating a MATURE or DYING SA.
4101  */
4102 static void
4103 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4104     sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4105 {
4106         mutex_enter(&assoc->ipsa_lock);
4107 
4108         /*
4109          * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4110          * passed in during an update message.  We currently don't handle
4111          * these.
4112          */
4113 
4114         if (hard != NULL) {
4115                 if (hard->sadb_lifetime_bytes != 0)
4116                         assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4117                 if (hard->sadb_lifetime_usetime != 0)
4118                         assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4119                 if (hard->sadb_lifetime_addtime != 0)
4120                         assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4121                 if (assoc->ipsa_hardaddlt != 0) {
4122                         assoc->ipsa_hardexpiretime =
4123                             assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4124                 }
4125                 if (assoc->ipsa_harduselt != 0 &&
4126                     assoc->ipsa_flags & IPSA_F_USED) {
4127                         UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4128                 }
4129                 if (hard->sadb_lifetime_allocations != 0)
4130                         assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4131         }
4132 
4133         if (soft != NULL) {
4134                 if (soft->sadb_lifetime_bytes != 0) {
4135                         if (soft->sadb_lifetime_bytes >
4136                             assoc->ipsa_hardbyteslt) {
4137                                 assoc->ipsa_softbyteslt =
4138                                     assoc->ipsa_hardbyteslt;
4139                         } else {
4140                                 assoc->ipsa_softbyteslt =
4141                                     soft->sadb_lifetime_bytes;
4142                         }
4143                 }
4144                 if (soft->sadb_lifetime_usetime != 0) {
4145                         if (soft->sadb_lifetime_usetime >
4146                             assoc->ipsa_harduselt) {
4147                                 assoc->ipsa_softuselt =
4148                                     assoc->ipsa_harduselt;
4149                         } else {
4150                                 assoc->ipsa_softuselt =
4151                                     soft->sadb_lifetime_usetime;
4152                         }
4153                 }
4154                 if (soft->sadb_lifetime_addtime != 0) {
4155                         if (soft->sadb_lifetime_addtime >
4156                             assoc->ipsa_hardexpiretime) {
4157                                 assoc->ipsa_softexpiretime =
4158                                     assoc->ipsa_hardexpiretime;
4159                         } else {
4160                                 assoc->ipsa_softaddlt =
4161                                     soft->sadb_lifetime_addtime;
4162                         }
4163                 }
4164                 if (assoc->ipsa_softaddlt != 0) {
4165                         assoc->ipsa_softexpiretime =
4166                             assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4167                 }
4168                 if (assoc->ipsa_softuselt != 0 &&
4169                     assoc->ipsa_flags & IPSA_F_USED) {
4170                         UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4171                 }
4172                 if (outbound && assoc->ipsa_softexpiretime != 0) {
4173                         if (assoc->ipsa_state == IPSA_STATE_MATURE)
4174                                 lifetime_fuzz(assoc);
4175                 }
4176 
4177                 if (soft->sadb_lifetime_allocations != 0)
4178                         assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4179         }
4180 
4181         if (idle != NULL) {
4182                 time_t current = gethrestime_sec();
4183                 if ((assoc->ipsa_idleexpiretime <= current) &&
4184                     (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4185                         assoc->ipsa_idleexpiretime =
4186                             current + assoc->ipsa_idleaddlt;
4187                 }
4188                 if (idle->sadb_lifetime_addtime != 0)
4189                         assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4190                 if (idle->sadb_lifetime_usetime != 0)
4191                         assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4192                 if (assoc->ipsa_idleaddlt != 0) {
4193                         assoc->ipsa_idleexpiretime =
4194                             current + idle->sadb_lifetime_addtime;
4195                         assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4196                 }
4197                 if (assoc->ipsa_idleuselt != 0) {
4198                         if (assoc->ipsa_idletime != 0) {
4199                                 assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4200                                     assoc->ipsa_idleuselt);
4201                         assoc->ipsa_idleexpiretime =
4202                             current + assoc->ipsa_idletime;
4203                         } else {
4204                                 assoc->ipsa_idleexpiretime =
4205                                     current + assoc->ipsa_idleuselt;
4206                                 assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4207                         }
4208                 }
4209         }
4210         mutex_exit(&assoc->ipsa_lock);
4211 }
4212 
4213 /*
4214  * Check a proposed KMC update for sanity.
4215  */
4216 static int
4217 sadb_check_kmc(ipsa_query_t *sq, ipsa_t *sa, int *diagnostic)
4218 {
4219         uint32_t kmp = sq->kmp;
4220         uint32_t kmc = sq->kmc;
4221 
4222         if (sa == NULL)
4223                 return (0);
4224 
4225         if (sa->ipsa_state == IPSA_STATE_DEAD)
4226                 return (ESRCH); /* DEAD == Not there, in this case. */
4227 
4228         if ((kmp != 0) && ((sa->ipsa_kmp != 0) || (sa->ipsa_kmp != kmp))) {
4229                 *diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4230                 return (EINVAL);
4231         }
4232 
4233         if ((kmc != 0) && ((sa->ipsa_kmc != 0) || (sa->ipsa_kmc != kmc))) {
4234                 *diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4235                 return (EINVAL);
4236         }
4237 
4238         return (0);
4239 }
4240 
4241 /*
4242  * Actually update the KMC info.
4243  */
4244 static void
4245 sadb_update_kmc(ipsa_query_t *sq, ipsa_t *sa)
4246 {
4247         uint32_t kmp = sq->kmp;
4248         uint32_t kmc = sq->kmc;
4249 
4250         if (kmp != 0)
4251                 sa->ipsa_kmp = kmp;
4252         if (kmc != 0)
4253                 sa->ipsa_kmc = kmc;
4254 }
4255 
4256 /*
4257  * Common code to update an SA.
4258  */
4259 
4260 int
4261 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi,
4262     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4263     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4264     netstack_t *ns, uint8_t sadb_msg_type)
4265 {
4266         sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4267         sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4268         sadb_lifetime_t *soft =
4269             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4270         sadb_lifetime_t *hard =
4271             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4272         sadb_lifetime_t *idle =
4273             (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4274         sadb_x_pair_t *pair_ext =
4275             (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4276         ipsa_t *echo_target = NULL;
4277         ipsap_t ipsapp;
4278         ipsa_query_t sq;
4279 
4280         sq.spp = spp;           /* XXX param */
4281         int error = sadb_form_query(ksi, IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA,
4282             IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
4283             &sq, diagnostic);
4284 
4285         if (error != 0)
4286                 return (error);
4287 
4288         error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
4289         if (error != 0)
4290                 return (error);
4291 
4292         if (ipsapp.ipsap_psa_ptr == NULL && ipsapp.ipsap_sa_ptr != NULL) {
4293                 if (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4294                         /*
4295                          * REFRELE the target and let the add_sa_func()
4296                          * deal with updating a larval SA.
4297                          */
4298                         destroy_ipsa_pair(&ipsapp);
4299                         return (add_sa_func(mp, ksi, diagnostic, ns));
4300                 }
4301         }
4302 
4303         /*
4304          * At this point we have an UPDATE to a MATURE SA. There should
4305          * not be any keying material present.
4306          */
4307         if (akey != NULL) {
4308                 *diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4309                 error = EINVAL;
4310                 goto bail;
4311         }
4312         if (ekey != NULL) {
4313                 *diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4314                 error = EINVAL;
4315                 goto bail;
4316         }
4317 
4318         /*
4319          * Reality checks for updates of active associations.
4320          * Sundry first-pass UPDATE-specific reality checks.
4321          * Have to do the checks here, because it's after the add_sa code.
4322          * XXX STATS : logging/stats here?
4323          */
4324 
4325         if (sq.assoc->sadb_sa_state != SADB_SASTATE_MATURE) {
4326                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4327                 error = EINVAL;
4328                 goto bail;
4329         }
4330         if (sq.assoc->sadb_sa_flags & ~spp->s_updateflags) {
4331                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4332                 error = EINVAL;
4333                 goto bail;
4334         }
4335         if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4336                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_LIFETIME;
4337                 error = EOPNOTSUPP;
4338                 goto bail;
4339         }
4340 
4341         if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4342                 error = EINVAL;
4343                 goto bail;
4344         }
4345 
4346         if ((*diagnostic = sadb_labelchk(ksi)) != 0)
4347                 return (EINVAL);
4348 
4349         error = sadb_check_kmc(&sq, ipsapp.ipsap_sa_ptr, diagnostic);
4350         if (error != 0)
4351                 goto bail;
4352 
4353         error = sadb_check_kmc(&sq, ipsapp.ipsap_psa_ptr, diagnostic);
4354         if (error != 0)
4355                 goto bail;
4356 
4357         if (ipsapp.ipsap_sa_ptr != NULL) {
4358                 sadb_update_lifetimes(ipsapp.ipsap_sa_ptr, hard, soft,
4359                     idle, B_TRUE);
4360                 sadb_update_kmc(&sq, ipsapp.ipsap_sa_ptr);
4361         }
4362 
4363         if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4364                 if (ipsapp.ipsap_psa_ptr != NULL) {
4365                         sadb_update_lifetimes(ipsapp.ipsap_psa_ptr, hard, soft,
4366                             idle, B_FALSE);
4367                         sadb_update_kmc(&sq, ipsapp.ipsap_psa_ptr);
4368                 } else {
4369                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4370                         error = ESRCH;
4371                         goto bail;
4372                 }
4373         }
4374 
4375         if (pair_ext != NULL)
4376                 error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
4377 
4378         if (error == 0)
4379                 sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4380                     ksi, echo_target);
4381 bail:
4382 
4383         destroy_ipsa_pair(&ipsapp);
4384 
4385         return (error);
4386 }
4387 
4388 
4389 static int
4390 update_pairing(ipsap_t *ipsapp, ipsa_query_t *sq, keysock_in_t *ksi,
4391     int *diagnostic)
4392 {
4393         sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4394         sadb_x_pair_t *pair_ext =
4395             (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4396         int error = 0;
4397         ipsap_t oipsapp;
4398         boolean_t undo_pair = B_FALSE;
4399         uint32_t ipsa_flags;
4400 
4401         if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4402             assoc->sadb_sa_spi) {
4403                 *diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4404                 return (EINVAL);
4405         }
4406 
4407         /*
4408          * Assume for now that the spi value provided in the SADB_UPDATE
4409          * message was valid, update the SA with its pair spi value.
4410          * If the spi turns out to be bogus or the SA no longer exists
4411          * then this will be detected when the reverse update is made
4412          * below.
4413          */
4414         mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4415         ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4416         ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4417         mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4418 
4419         /*
4420          * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4421          * should now return pointers to the SA *AND* its pair, if this is not
4422          * the case, the "otherspi" either did not exist or was deleted. Also
4423          * check that "otherspi" is not already paired. If everything looks
4424          * good, complete the update. IPSA_REFRELE the first pair_pointer
4425          * after this update to ensure its not deleted until we are done.
4426          */
4427         error = get_ipsa_pair(sq, &oipsapp, diagnostic);
4428         if (error != 0) {
4429                 /*
4430                  * This should never happen, calling function still has
4431                  * IPSA_REFHELD on the SA we just updated.
4432                  */
4433                 return (error); /* XXX EINVAL instead of ESRCH? */
4434         }
4435 
4436         if (oipsapp.ipsap_psa_ptr == NULL) {
4437                 *diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4438                 error = EINVAL;
4439                 undo_pair = B_TRUE;
4440         } else {
4441                 ipsa_flags = oipsapp.ipsap_psa_ptr->ipsa_flags;
4442                 if ((oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
4443                     (oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
4444                         /* Its dead Jim! */
4445                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4446                         undo_pair = B_TRUE;
4447                 } else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4448                     (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4449                         /* This SA is in both hashtables. */
4450                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4451                         undo_pair = B_TRUE;
4452                 } else if (ipsa_flags & IPSA_F_PAIRED) {
4453                         /* This SA is already paired with another. */
4454                         *diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4455                         undo_pair = B_TRUE;
4456                 }
4457         }
4458 
4459         if (undo_pair) {
4460                 /* The pair SA does not exist. */
4461                 mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4462                 ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4463                 ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4464                 mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4465         } else {
4466                 mutex_enter(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4467                 oipsapp.ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4468                 oipsapp.ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4469                 mutex_exit(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4470         }
4471 
4472         destroy_ipsa_pair(&oipsapp);
4473         return (error);
4474 }
4475 
4476 /*
4477  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4478  * a list of outstanding SADB_ACQUIRE messages.  If ipsec_getassocbyconn() fails
4479  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4480  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4481  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4482  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4483  * other direction's SA.
4484  */
4485 
4486 /*
4487  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4488  * grab it, lock it, and return it.  Otherwise return NULL.
4489  *
4490  * XXX MLS number of arguments getting unwieldy here
4491  */
4492 static ipsacq_t *
4493 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4494     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4495     uint64_t unique_id, ts_label_t *tsl)
4496 {
4497         ipsacq_t *walker;
4498         sa_family_t fam;
4499         uint32_t blank_address[4] = {0, 0, 0, 0};
4500 
4501         if (isrc == NULL) {
4502                 ASSERT(idst == NULL);
4503                 isrc = idst = blank_address;
4504         }
4505 
4506         /*
4507          * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4508          *
4509          * XXX May need search for duplicates based on other things too!
4510          */
4511         for (walker = bucket->iacqf_ipsacq; walker != NULL;
4512             walker = walker->ipsacq_next) {
4513                 mutex_enter(&walker->ipsacq_lock);
4514                 fam = walker->ipsacq_addrfam;
4515                 if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4516                     IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4517                     ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4518                     (in6_addr_t *)walker->ipsacq_innersrc) &&
4519                     ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4520                     (in6_addr_t *)walker->ipsacq_innerdst) &&
4521                     (ap == walker->ipsacq_act) &&
4522                     (pp == walker->ipsacq_policy) &&
4523                     /* XXX do deep compares of ap/pp? */
4524                     (unique_id == walker->ipsacq_unique_id) &&
4525                     (ipsec_label_match(tsl, walker->ipsacq_tsl)))
4526                         break;                  /* everything matched */
4527                 mutex_exit(&walker->ipsacq_lock);
4528         }
4529 
4530         return (walker);
4531 }
4532 
4533 /*
4534  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4535  * of all of the same length.  Give up (and drop) if memory
4536  * cannot be allocated for a new one; otherwise, invoke callback to
4537  * send the acquire up..
4538  *
4539  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4540  * list.  The ah_add_sa_finish() routines can look at the packet's attached
4541  * attributes and handle this case specially.
4542  */
4543 void
4544 sadb_acquire(mblk_t *datamp, ip_xmit_attr_t *ixa, boolean_t need_ah,
4545     boolean_t need_esp)
4546 {
4547         mblk_t  *asyncmp;
4548         sadbp_t *spp;
4549         sadb_t *sp;
4550         ipsacq_t *newbie;
4551         iacqf_t *bucket;
4552         mblk_t *extended;
4553         ipha_t *ipha = (ipha_t *)datamp->b_rptr;
4554         ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
4555         uint32_t *src, *dst, *isrc, *idst;
4556         ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
4557         ipsec_action_t *ap = ixa->ixa_ipsec_action;
4558         sa_family_t af;
4559         int hashoffset;
4560         uint32_t seq;
4561         uint64_t unique_id = 0;
4562         ipsec_selector_t sel;
4563         boolean_t tunnel_mode = (ixa->ixa_flags & IXAF_IPSEC_TUNNEL) != 0;
4564         ts_label_t      *tsl = NULL;
4565         netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
4566         ipsec_stack_t   *ipss = ns->netstack_ipsec;
4567         sadb_sens_t     *sens = NULL;
4568         int             sens_len;
4569 
4570         ASSERT((pp != NULL) || (ap != NULL));
4571 
4572         ASSERT(need_ah != NULL || need_esp != NULL);
4573 
4574         /* Assign sadb pointers */
4575         if (need_esp) { /* ESP for AH+ESP */
4576                 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
4577 
4578                 spp = &espstack->esp_sadb;
4579         } else {
4580                 ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
4581 
4582                 spp = &ahstack->ah_sadb;
4583         }
4584         sp = (ixa->ixa_flags & IXAF_IS_IPV4) ? &spp->s_v4 : &spp->s_v6;
4585 
4586         if (is_system_labeled())
4587                 tsl = ixa->ixa_tsl;
4588 
4589         if (ap == NULL)
4590                 ap = pp->ipsp_act;
4591 
4592         ASSERT(ap != NULL);
4593 
4594         if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
4595                 unique_id = SA_FORM_UNIQUE_ID(ixa);
4596 
4597         /*
4598          * Set up an ACQUIRE record.
4599          *
4600          * Immediately, make sure the ACQUIRE sequence number doesn't slip
4601          * below the lowest point allowed in the kernel.  (In other words,
4602          * make sure the high bit on the sequence number is set.)
4603          */
4604 
4605         seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
4606 
4607         if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
4608                 src = (uint32_t *)&ipha->ipha_src;
4609                 dst = (uint32_t *)&ipha->ipha_dst;
4610                 af = AF_INET;
4611                 hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
4612                 ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
4613         } else {
4614                 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
4615                 src = (uint32_t *)&ip6h->ip6_src;
4616                 dst = (uint32_t *)&ip6h->ip6_dst;
4617                 af = AF_INET6;
4618                 hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
4619                 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
4620         }
4621 
4622         if (tunnel_mode) {
4623                 if (pp == NULL) {
4624                         /*
4625                          * Tunnel mode with no policy pointer means this is a
4626                          * reflected ICMP (like a ECHO REQUEST) that came in
4627                          * with self-encapsulated protection.  Until we better
4628                          * support this, drop the packet.
4629                          */
4630                         ip_drop_packet(datamp, B_FALSE, NULL,
4631                             DROPPER(ipss, ipds_spd_got_selfencap),
4632                             &ipss->ipsec_spd_dropper);
4633                         return;
4634                 }
4635                 /* Snag inner addresses. */
4636                 isrc = ixa->ixa_ipsec_insrc;
4637                 idst = ixa->ixa_ipsec_indst;
4638         } else {
4639                 isrc = idst = NULL;
4640         }
4641 
4642         /*
4643          * Check buckets to see if there is an existing entry.  If so,
4644          * grab it.  sadb_checkacquire locks newbie if found.
4645          */
4646         bucket = &(sp->sdb_acq[hashoffset]);
4647         mutex_enter(&bucket->iacqf_lock);
4648         newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
4649             unique_id, tsl);
4650 
4651         if (newbie == NULL) {
4652                 /*
4653                  * Otherwise, allocate a new one.
4654                  */
4655                 newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
4656                 if (newbie == NULL) {
4657                         mutex_exit(&bucket->iacqf_lock);
4658                         ip_drop_packet(datamp, B_FALSE, NULL,
4659                             DROPPER(ipss, ipds_sadb_acquire_nomem),
4660                             &ipss->ipsec_sadb_dropper);
4661                         return;
4662                 }
4663                 newbie->ipsacq_policy = pp;
4664                 if (pp != NULL) {
4665                         IPPOL_REFHOLD(pp);
4666                 }
4667                 IPACT_REFHOLD(ap);
4668                 newbie->ipsacq_act = ap;
4669                 newbie->ipsacq_linklock = &bucket->iacqf_lock;
4670                 newbie->ipsacq_next = bucket->iacqf_ipsacq;
4671                 newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
4672                 if (newbie->ipsacq_next != NULL)
4673                         newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
4674 
4675                 bucket->iacqf_ipsacq = newbie;
4676                 mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
4677                 mutex_enter(&newbie->ipsacq_lock);
4678         }
4679 
4680         /*
4681          * XXX MLS does it actually help us to drop the bucket lock here?
4682          * we have inserted a half-built, locked acquire record into the
4683          * bucket.  any competing thread will now be able to lock the bucket
4684          * to scan it, but will immediately pile up on the new acquire
4685          * record's lock; I don't think we gain anything here other than to
4686          * disperse blame for lock contention.
4687          *
4688          * we might be able to dispense with acquire record locks entirely..
4689          * just use the bucket locks..
4690          */
4691 
4692         mutex_exit(&bucket->iacqf_lock);
4693 
4694         /*
4695          * This assert looks silly for now, but we may need to enter newbie's
4696          * mutex during a search.
4697          */
4698         ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
4699 
4700         /*
4701          * Make the ip_xmit_attr_t into something we can queue.
4702          * If no memory it frees datamp.
4703          */
4704         asyncmp = ip_xmit_attr_to_mblk(ixa);
4705         if (asyncmp != NULL)
4706                 linkb(asyncmp, datamp);
4707 
4708         /* Queue up packet.  Use b_next. */
4709 
4710         if (asyncmp == NULL) {
4711                 /* Statistics for allocation failure */
4712                 if (ixa->ixa_flags & IXAF_IS_IPV4) {
4713                         BUMP_MIB(&ixa->ixa_ipst->ips_ip_mib,
4714                             ipIfStatsOutDiscards);
4715                 } else {
4716                         BUMP_MIB(&ixa->ixa_ipst->ips_ip6_mib,
4717                             ipIfStatsOutDiscards);
4718                 }
4719                 ip_drop_output("No memory for asyncmp", datamp, NULL);
4720                 freemsg(datamp);
4721         } else if (newbie->ipsacq_numpackets == 0) {
4722                 /* First one. */
4723                 newbie->ipsacq_mp = asyncmp;
4724                 newbie->ipsacq_numpackets = 1;
4725                 newbie->ipsacq_expire = gethrestime_sec();
4726                 /*
4727                  * Extended ACQUIRE with both AH+ESP will use ESP's timeout
4728                  * value.
4729                  */
4730                 newbie->ipsacq_expire += *spp->s_acquire_timeout;
4731                 newbie->ipsacq_seq = seq;
4732                 newbie->ipsacq_addrfam = af;
4733 
4734                 newbie->ipsacq_srcport = ixa->ixa_ipsec_src_port;
4735                 newbie->ipsacq_dstport = ixa->ixa_ipsec_dst_port;
4736                 newbie->ipsacq_icmp_type = ixa->ixa_ipsec_icmp_type;
4737                 newbie->ipsacq_icmp_code = ixa->ixa_ipsec_icmp_code;
4738                 if (tunnel_mode) {
4739                         newbie->ipsacq_inneraddrfam = ixa->ixa_ipsec_inaf;
4740                         newbie->ipsacq_proto = ixa->ixa_ipsec_inaf == AF_INET6 ?
4741                             IPPROTO_IPV6 : IPPROTO_ENCAP;
4742                         newbie->ipsacq_innersrcpfx = ixa->ixa_ipsec_insrcpfx;
4743                         newbie->ipsacq_innerdstpfx = ixa->ixa_ipsec_indstpfx;
4744                         IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
4745                             ixa->ixa_ipsec_insrc, ixa->ixa_ipsec_inaf);
4746                         IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
4747                             ixa->ixa_ipsec_indst, ixa->ixa_ipsec_inaf);
4748                 } else {
4749                         newbie->ipsacq_proto = ixa->ixa_ipsec_proto;
4750                 }
4751                 newbie->ipsacq_unique_id = unique_id;
4752 
4753                 if (ixa->ixa_tsl != NULL) {
4754                         label_hold(ixa->ixa_tsl);
4755                         newbie->ipsacq_tsl = ixa->ixa_tsl;
4756                 }
4757         } else {
4758                 /* Scan to the end of the list & insert. */
4759                 mblk_t *lastone = newbie->ipsacq_mp;
4760 
4761                 while (lastone->b_next != NULL)
4762                         lastone = lastone->b_next;
4763                 lastone->b_next = asyncmp;
4764                 if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
4765                         newbie->ipsacq_numpackets = ipsacq_maxpackets;
4766                         lastone = newbie->ipsacq_mp;
4767                         newbie->ipsacq_mp = lastone->b_next;
4768                         lastone->b_next = NULL;
4769 
4770                         /* Freeing the async message */
4771                         lastone = ip_xmit_attr_free_mblk(lastone);
4772                         ip_drop_packet(lastone, B_FALSE, NULL,
4773                             DROPPER(ipss, ipds_sadb_acquire_toofull),
4774                             &ipss->ipsec_sadb_dropper);
4775                 } else {
4776                         IP_ACQUIRE_STAT(ipss, qhiwater,
4777                             newbie->ipsacq_numpackets);
4778                 }
4779         }
4780 
4781         /*
4782          * Reset addresses.  Set them to the most recently added mblk chain,
4783          * so that the address pointers in the acquire record will point
4784          * at an mblk still attached to the acquire list.
4785          */
4786 
4787         newbie->ipsacq_srcaddr = src;
4788         newbie->ipsacq_dstaddr = dst;
4789 
4790         /*
4791          * If the acquire record has more than one queued packet, we've
4792          * already sent an ACQUIRE, and don't need to repeat ourself.
4793          */
4794         if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
4795                 /* I have an acquire outstanding already! */
4796                 mutex_exit(&newbie->ipsacq_lock);
4797                 return;
4798         }
4799 
4800         if (!keysock_extended_reg(ns))
4801                 goto punt_extended;
4802         /*
4803          * Construct an extended ACQUIRE.  There are logging
4804          * opportunities here in failure cases.
4805          */
4806         bzero(&sel, sizeof (sel));
4807         sel.ips_isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0;
4808         if (tunnel_mode) {
4809                 sel.ips_protocol = (ixa->ixa_ipsec_inaf == AF_INET) ?
4810                     IPPROTO_ENCAP : IPPROTO_IPV6;
4811         } else {
4812                 sel.ips_protocol = ixa->ixa_ipsec_proto;
4813                 sel.ips_local_port = ixa->ixa_ipsec_src_port;
4814                 sel.ips_remote_port = ixa->ixa_ipsec_dst_port;
4815         }
4816         sel.ips_icmp_type = ixa->ixa_ipsec_icmp_type;
4817         sel.ips_icmp_code = ixa->ixa_ipsec_icmp_code;
4818         sel.ips_is_icmp_inv_acq = 0;
4819         if (af == AF_INET) {
4820                 sel.ips_local_addr_v4 = ipha->ipha_src;
4821                 sel.ips_remote_addr_v4 = ipha->ipha_dst;
4822         } else {
4823                 sel.ips_local_addr_v6 = ip6h->ip6_src;
4824                 sel.ips_remote_addr_v6 = ip6h->ip6_dst;
4825         }
4826 
4827         extended = sadb_keysock_out(0);
4828         if (extended == NULL)
4829                 goto punt_extended;
4830 
4831         if (ixa->ixa_tsl != NULL) {
4832                 /*
4833                  * XXX MLS correct condition here?
4834                  * XXX MLS other credential attributes in acquire?
4835                  * XXX malloc failure?  don't fall back to original?
4836                  */
4837                 sens = sadb_make_sens_ext(ixa->ixa_tsl, &sens_len);
4838 
4839                 if (sens == NULL) {
4840                         freeb(extended);
4841                         goto punt_extended;
4842                 }
4843         }
4844 
4845         extended->b_cont = sadb_extended_acquire(&sel, pp, ap, tunnel_mode,
4846             seq, 0, sens, ns);
4847 
4848         if (sens != NULL)
4849                 kmem_free(sens, sens_len);
4850 
4851         if (extended->b_cont == NULL) {
4852                 freeb(extended);
4853                 goto punt_extended;
4854         }
4855 
4856         /*
4857          * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
4858          * this new record.  The send-acquire callback assumes that acqrec is
4859          * already locked.
4860          */
4861         (*spp->s_acqfn)(newbie, extended, ns);
4862         return;
4863 
4864 punt_extended:
4865         (*spp->s_acqfn)(newbie, NULL, ns);
4866 }
4867 
4868 /*
4869  * Unlink and free an acquire record.
4870  */
4871 void
4872 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
4873 {
4874         mblk_t          *mp;
4875         ipsec_stack_t   *ipss = ns->netstack_ipsec;
4876 
4877         ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
4878 
4879         if (acqrec->ipsacq_policy != NULL) {
4880                 IPPOL_REFRELE(acqrec->ipsacq_policy);
4881         }
4882         if (acqrec->ipsacq_act != NULL) {
4883                 IPACT_REFRELE(acqrec->ipsacq_act);
4884         }
4885 
4886         /* Unlink */
4887         *(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
4888         if (acqrec->ipsacq_next != NULL)
4889                 acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
4890 
4891         if (acqrec->ipsacq_tsl != NULL) {
4892                 label_rele(acqrec->ipsacq_tsl);
4893                 acqrec->ipsacq_tsl = NULL;
4894         }
4895 
4896         /*
4897          * Free hanging mp's.
4898          *
4899          * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
4900          */
4901 
4902         mutex_enter(&acqrec->ipsacq_lock);
4903         while (acqrec->ipsacq_mp != NULL) {
4904                 mp = acqrec->ipsacq_mp;
4905                 acqrec->ipsacq_mp = mp->b_next;
4906                 mp->b_next = NULL;
4907                 /* Freeing the async message */
4908                 mp = ip_xmit_attr_free_mblk(mp);
4909                 ip_drop_packet(mp, B_FALSE, NULL,
4910                     DROPPER(ipss, ipds_sadb_acquire_timeout),
4911                     &ipss->ipsec_sadb_dropper);
4912         }
4913         mutex_exit(&acqrec->ipsacq_lock);
4914 
4915         /* Free */
4916         mutex_destroy(&acqrec->ipsacq_lock);
4917         kmem_free(acqrec, sizeof (*acqrec));
4918 }
4919 
4920 /*
4921  * Destroy an acquire list fanout.
4922  */
4923 static void
4924 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
4925     netstack_t *ns)
4926 {
4927         int i;
4928         iacqf_t *list = *listp;
4929 
4930         if (list == NULL)
4931                 return;
4932 
4933         for (i = 0; i < numentries; i++) {
4934                 mutex_enter(&(list[i].iacqf_lock));
4935                 while (list[i].iacqf_ipsacq != NULL)
4936                         sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
4937                 mutex_exit(&(list[i].iacqf_lock));
4938                 if (forever)
4939                         mutex_destroy(&(list[i].iacqf_lock));
4940         }
4941 
4942         if (forever) {
4943                 *listp = NULL;
4944                 kmem_free(list, numentries * sizeof (*list));
4945         }
4946 }
4947 
4948 /*
4949  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
4950  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
4951  */
4952 static uint8_t *
4953 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
4954     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
4955     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
4956 {
4957         uint8_t *cur = start;
4958         ipsec_alginfo_t *algp;
4959         sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
4960 
4961         cur += sizeof (*algdesc);
4962         if (cur >= limit)
4963                 return (NULL);
4964 
4965         ecomb->sadb_x_ecomb_numalgs++;
4966 
4967         /*
4968          * Normalize vs. crypto framework's limits.  This way, you can specify
4969          * a stronger policy, and when the framework loads a stronger version,
4970          * you can just keep plowing w/o rewhacking your SPD.
4971          */
4972         mutex_enter(&ipss->ipsec_alg_lock);
4973         algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
4974             IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
4975         if (algp == NULL) {
4976                 mutex_exit(&ipss->ipsec_alg_lock);
4977                 return (NULL);  /* Algorithm doesn't exist.  Fail gracefully. */
4978         }
4979         if (minbits < algp->alg_ef_minbits)
4980                 minbits = algp->alg_ef_minbits;
4981         if (maxbits > algp->alg_ef_maxbits)
4982                 maxbits = algp->alg_ef_maxbits;
4983         mutex_exit(&ipss->ipsec_alg_lock);
4984 
4985         algdesc->sadb_x_algdesc_reserved = SADB_8TO1(algp->alg_saltlen);
4986         algdesc->sadb_x_algdesc_satype = satype;
4987         algdesc->sadb_x_algdesc_algtype = algtype;
4988         algdesc->sadb_x_algdesc_alg = alg;
4989         algdesc->sadb_x_algdesc_minbits = minbits;
4990         algdesc->sadb_x_algdesc_maxbits = maxbits;
4991 
4992         return (cur);
4993 }
4994 
4995 /*
4996  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
4997  * which must fit before *limit
4998  *
4999  * return NULL if we ran out of room or a pointer to the end of the ecomb.
5000  */
5001 static uint8_t *
5002 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5003     netstack_t *ns)
5004 {
5005         uint8_t *cur = start;
5006         sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5007         ipsec_prot_t *ipp;
5008         ipsec_stack_t *ipss = ns->netstack_ipsec;
5009 
5010         cur += sizeof (*ecomb);
5011         if (cur >= limit)
5012                 return (NULL);
5013 
5014         ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
5015 
5016         ipp = &act->ipa_act.ipa_apply;
5017 
5018         ecomb->sadb_x_ecomb_numalgs = 0;
5019         ecomb->sadb_x_ecomb_reserved = 0;
5020         ecomb->sadb_x_ecomb_reserved2 = 0;
5021         /*
5022          * No limits on allocations, since we really don't support that
5023          * concept currently.
5024          */
5025         ecomb->sadb_x_ecomb_soft_allocations = 0;
5026         ecomb->sadb_x_ecomb_hard_allocations = 0;
5027 
5028         /*
5029          * XXX TBD: Policy or global parameters will eventually be
5030          * able to fill in some of these.
5031          */
5032         ecomb->sadb_x_ecomb_flags = 0;
5033         ecomb->sadb_x_ecomb_soft_bytes = 0;
5034         ecomb->sadb_x_ecomb_hard_bytes = 0;
5035         ecomb->sadb_x_ecomb_soft_addtime = 0;
5036         ecomb->sadb_x_ecomb_hard_addtime = 0;
5037         ecomb->sadb_x_ecomb_soft_usetime = 0;
5038         ecomb->sadb_x_ecomb_hard_usetime = 0;
5039 
5040         if (ipp->ipp_use_ah) {
5041                 cur = sadb_new_algdesc(cur, limit, ecomb,
5042                     SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
5043                     ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
5044                 if (cur == NULL)
5045                         return (NULL);
5046                 ipsecah_fill_defs(ecomb, ns);
5047         }
5048 
5049         if (ipp->ipp_use_esp) {
5050                 if (ipp->ipp_use_espa) {
5051                         cur = sadb_new_algdesc(cur, limit, ecomb,
5052                             SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
5053                             ipp->ipp_esp_auth_alg,
5054                             ipp->ipp_espa_minbits,
5055                             ipp->ipp_espa_maxbits, ipss);
5056                         if (cur == NULL)
5057                                 return (NULL);
5058                 }
5059 
5060                 cur = sadb_new_algdesc(cur, limit, ecomb,
5061                     SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
5062                     ipp->ipp_encr_alg,
5063                     ipp->ipp_espe_minbits,
5064                     ipp->ipp_espe_maxbits, ipss);
5065                 if (cur == NULL)
5066                         return (NULL);
5067                 /* Fill in lifetimes if and only if AH didn't already... */
5068                 if (!ipp->ipp_use_ah)
5069                         ipsecesp_fill_defs(ecomb, ns);
5070         }
5071 
5072         return (cur);
5073 }
5074 
5075 #include <sys/tsol/label_macro.h> /* XXX should not need this */
5076 
5077 /*
5078  * From a cred_t, construct a sensitivity label extension
5079  *
5080  * We send up a fixed-size sensitivity label bitmap, and are perhaps
5081  * overly chummy with the underlying data structures here.
5082  */
5083 
5084 /* ARGSUSED */
5085 int
5086 sadb_sens_len_from_label(ts_label_t *tsl)
5087 {
5088         int baselen = sizeof (sadb_sens_t) + _C_LEN * 4;
5089         return (roundup(baselen, sizeof (uint64_t)));
5090 }
5091 
5092 void
5093 sadb_sens_from_label(sadb_sens_t *sens, int exttype, ts_label_t *tsl,
5094     int senslen)
5095 {
5096         uint8_t *bitmap;
5097         bslabel_t *sl;
5098 
5099         /* LINTED */
5100         ASSERT((_C_LEN & 1) == 0);
5101         ASSERT((senslen & 7) == 0);
5102 
5103         sl = label2bslabel(tsl);
5104 
5105         sens->sadb_sens_exttype = exttype;
5106         sens->sadb_sens_len = SADB_8TO64(senslen);
5107 
5108         sens->sadb_sens_dpd = tsl->tsl_doi;
5109         sens->sadb_sens_sens_level = LCLASS(sl);
5110         sens->sadb_sens_integ_level = 0; /* TBD */
5111         sens->sadb_sens_sens_len = _C_LEN >> 1;
5112         sens->sadb_sens_integ_len = 0; /* TBD */
5113         sens->sadb_x_sens_flags = 0;
5114 
5115         bitmap = (uint8_t *)(sens + 1);
5116         bcopy(&(((_bslabel_impl_t *)sl)->compartments), bitmap, _C_LEN * 4);
5117 }
5118 
5119 static sadb_sens_t *
5120 sadb_make_sens_ext(ts_label_t *tsl, int *len)
5121 {
5122         /* XXX allocation failure? */
5123         int sens_len = sadb_sens_len_from_label(tsl);
5124 
5125         sadb_sens_t *sens = kmem_alloc(sens_len, KM_SLEEP);
5126 
5127         sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY, tsl, sens_len);
5128 
5129         *len = sens_len;
5130 
5131         return (sens);
5132 }
5133 
5134 /*
5135  * Okay, how do we report errors/invalid labels from this?
5136  * With a special designated "not a label" cred_t ?
5137  */
5138 /* ARGSUSED */
5139 ts_label_t *
5140 sadb_label_from_sens(sadb_sens_t *sens, uint64_t *bitmap)
5141 {
5142         int bitmap_len = SADB_64TO8(sens->sadb_sens_sens_len);
5143         bslabel_t sl;
5144         ts_label_t *tsl;
5145 
5146         if (sens->sadb_sens_integ_level != 0)
5147                 return (NULL);
5148         if (sens->sadb_sens_integ_len != 0)
5149                 return (NULL);
5150         if (bitmap_len > _C_LEN * 4)
5151                 return (NULL);
5152 
5153         bsllow(&sl);
5154         LCLASS_SET((_bslabel_impl_t *)&sl, sens->sadb_sens_sens_level);
5155         bcopy(bitmap, &((_bslabel_impl_t *)&sl)->compartments,
5156             bitmap_len);
5157 
5158         tsl = labelalloc(&sl, sens->sadb_sens_dpd, KM_NOSLEEP);
5159         if (tsl == NULL)
5160                 return (NULL);
5161 
5162         if (sens->sadb_x_sens_flags & SADB_X_SENS_UNLABELED)
5163                 tsl->tsl_flags |= TSLF_UNLABELED;
5164         return (tsl);
5165 }
5166 
5167 /* End XXX label-library-leakage */
5168 
5169 /*
5170  * Construct an extended ACQUIRE message based on a selector and the resulting
5171  * IPsec action.
5172  *
5173  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5174  * generation. As a consequence, expect this function to evolve
5175  * rapidly.
5176  */
5177 static mblk_t *
5178 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5179     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5180     sadb_sens_t *sens, netstack_t *ns)
5181 {
5182         mblk_t *mp;
5183         sadb_msg_t *samsg;
5184         uint8_t *start, *cur, *end;
5185         uint32_t *saddrptr, *daddrptr;
5186         sa_family_t af;
5187         sadb_prop_t *eprop;
5188         ipsec_action_t *ap, *an;
5189         ipsec_selkey_t *ipsl;
5190         uint8_t proto, pfxlen;
5191         uint16_t lport, rport;
5192         uint32_t kmp, kmc;
5193 
5194         /*
5195          * Find the action we want sooner rather than later..
5196          */
5197         an = NULL;
5198         if (pol == NULL) {
5199                 ap = act;
5200         } else {
5201                 ap = pol->ipsp_act;
5202 
5203                 if (ap != NULL)
5204                         an = ap->ipa_next;
5205         }
5206 
5207         /*
5208          * Just take a swag for the allocation for now.  We can always
5209          * alter it later.
5210          */
5211 #define SADB_EXTENDED_ACQUIRE_SIZE      4096
5212         mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5213         if (mp == NULL)
5214                 return (NULL);
5215 
5216         start = mp->b_rptr;
5217         end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5218 
5219         cur = start;
5220 
5221         samsg = (sadb_msg_t *)cur;
5222         cur += sizeof (*samsg);
5223 
5224         samsg->sadb_msg_version = PF_KEY_V2;
5225         samsg->sadb_msg_type = SADB_ACQUIRE;
5226         samsg->sadb_msg_errno = 0;
5227         samsg->sadb_msg_reserved = 0;
5228         samsg->sadb_msg_satype = 0;
5229         samsg->sadb_msg_seq = seq;
5230         samsg->sadb_msg_pid = pid;
5231 
5232         if (tunnel_mode) {
5233                 /*
5234                  * Form inner address extensions based NOT on the inner
5235                  * selectors (i.e. the packet data), but on the policy's
5236                  * selector key (i.e. the policy's selector information).
5237                  *
5238                  * NOTE:  The position of IPv4 and IPv6 addresses is the
5239                  * same in ipsec_selkey_t (unless the compiler does very
5240                  * strange things with unions, consult your local C language
5241                  * lawyer for details).
5242                  */
5243                 ASSERT(pol != NULL);
5244 
5245                 ipsl = &(pol->ipsp_sel->ipsl_key);
5246                 if (ipsl->ipsl_valid & IPSL_IPV4) {
5247                         af = AF_INET;
5248                         ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5249                         ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5250                 } else {
5251                         af = AF_INET6;
5252                         ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5253                         ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5254                 }
5255 
5256                 if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5257                         saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5258                         pfxlen = ipsl->ipsl_local_pfxlen;
5259                 } else {
5260                         saddrptr = (uint32_t *)(&ipv6_all_zeros);
5261                         pfxlen = 0;
5262                 }
5263                 /* XXX What about ICMP type/code? */
5264                 lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5265                     ipsl->ipsl_lport : 0;
5266                 proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5267                     ipsl->ipsl_proto : 0;
5268 
5269                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5270                     af, saddrptr, lport, proto, pfxlen);
5271                 if (cur == NULL) {
5272                         freeb(mp);
5273                         return (NULL);
5274                 }
5275 
5276                 if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5277                         daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5278                         pfxlen = ipsl->ipsl_remote_pfxlen;
5279                 } else {
5280                         daddrptr = (uint32_t *)(&ipv6_all_zeros);
5281                         pfxlen = 0;
5282                 }
5283                 /* XXX What about ICMP type/code? */
5284                 rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5285                     ipsl->ipsl_rport : 0;
5286 
5287                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5288                     af, daddrptr, rport, proto, pfxlen);
5289                 if (cur == NULL) {
5290                         freeb(mp);
5291                         return (NULL);
5292                 }
5293                 /*
5294                  * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5295                  * _with_ inner-packet address selectors, we'll need to further
5296                  * distinguish tunnel mode here.  For now, having inner
5297                  * addresses and/or ports is sufficient.
5298                  *
5299                  * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5300                  * outer addresses.
5301                  */
5302                 proto = sel->ips_protocol;   /* Either _ENCAP or _IPV6 */
5303                 lport = rport = 0;
5304         } else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5305                 proto = 0;
5306                 lport = 0;
5307                 rport = 0;
5308                 if (pol != NULL) {
5309                         ipsl = &(pol->ipsp_sel->ipsl_key);
5310                         if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5311                                 proto = ipsl->ipsl_proto;
5312                         if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5313                                 rport = ipsl->ipsl_rport;
5314                         if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5315                                 lport = ipsl->ipsl_lport;
5316                 }
5317         } else {
5318                 proto = sel->ips_protocol;
5319                 lport = sel->ips_local_port;
5320                 rport = sel->ips_remote_port;
5321         }
5322 
5323         af = sel->ips_isv4 ? AF_INET : AF_INET6;
5324 
5325         /*
5326          * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5327          * ipsec_selector_t.
5328          */
5329         cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5330             (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5331 
5332         if (cur == NULL) {
5333                 freeb(mp);
5334                 return (NULL);
5335         }
5336 
5337         cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5338             (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5339 
5340         if (cur == NULL) {
5341                 freeb(mp);
5342                 return (NULL);
5343         }
5344 
5345         if (sens != NULL) {
5346                 uint8_t *sensext = cur;
5347                 int senslen = SADB_64TO8(sens->sadb_sens_len);
5348 
5349                 cur += senslen;
5350                 if (cur > end) {
5351                         freeb(mp);
5352                         return (NULL);
5353                 }
5354                 bcopy(sens, sensext, senslen);
5355         }
5356 
5357         /*
5358          * This section will change a lot as policy evolves.
5359          * For now, it'll be relatively simple.
5360          */
5361         eprop = (sadb_prop_t *)cur;
5362         cur += sizeof (*eprop);
5363         if (cur > end) {
5364                 /* no space left */
5365                 freeb(mp);
5366                 return (NULL);
5367         }
5368 
5369         eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5370         eprop->sadb_x_prop_ereserved = 0;
5371         eprop->sadb_x_prop_numecombs = 0;
5372         eprop->sadb_prop_replay = 32;        /* default */
5373 
5374         kmc = kmp = 0;
5375 
5376         for (; ap != NULL; ap = an) {
5377                 an = (pol != NULL) ? ap->ipa_next : NULL;
5378 
5379                 /*
5380                  * Skip non-IPsec policies
5381                  */
5382                 if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5383                         continue;
5384 
5385                 if (ap->ipa_act.ipa_apply.ipp_km_proto)
5386                         kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5387                 if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5388                         kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5389                 if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5390                         eprop->sadb_prop_replay =
5391                             ap->ipa_act.ipa_apply.ipp_replay_depth;
5392                 }
5393 
5394                 cur = sadb_action_to_ecomb(cur, end, ap, ns);
5395                 if (cur == NULL) { /* no space */
5396                         freeb(mp);
5397                         return (NULL);
5398                 }
5399                 eprop->sadb_x_prop_numecombs++;
5400         }
5401 
5402         if (eprop->sadb_x_prop_numecombs == 0) {
5403                 /*
5404                  * This will happen if we fail to find a policy
5405                  * allowing for IPsec processing.
5406                  * Construct an error message.
5407                  */
5408                 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5409                 samsg->sadb_msg_errno = ENOENT;
5410                 samsg->sadb_x_msg_diagnostic = 0;
5411                 return (mp);
5412         }
5413 
5414         if ((kmp != 0) || (kmc != 0)) {
5415                 cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5416                 if (cur == NULL) {
5417                         freeb(mp);
5418                         return (NULL);
5419                 }
5420         }
5421 
5422         eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5423         samsg->sadb_msg_len = SADB_8TO64(cur - start);
5424         mp->b_wptr = cur;
5425 
5426         return (mp);
5427 }
5428 
5429 /*
5430  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5431  *
5432  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5433  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5434  * maximize code consolidation while preventing algorithm changes from messing
5435  * with the callers finishing touches on the ACQUIRE itself.
5436  */
5437 mblk_t *
5438 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5439 {
5440         uint_t allocsize;
5441         mblk_t *pfkeymp, *msgmp;
5442         sa_family_t af;
5443         uint8_t *cur, *end;
5444         sadb_msg_t *samsg;
5445         uint16_t sport_typecode;
5446         uint16_t dport_typecode;
5447         uint8_t check_proto;
5448         boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5449 
5450         ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5451 
5452         pfkeymp = sadb_keysock_out(0);
5453         if (pfkeymp == NULL)
5454                 return (NULL);
5455 
5456         /*
5457          * First, allocate a basic ACQUIRE message
5458          */
5459         allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5460             sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5461 
5462         /* Make sure there's enough to cover both AF_INET and AF_INET6. */
5463         allocsize += 2 * sizeof (struct sockaddr_in6);
5464 
5465         mutex_enter(&ipss->ipsec_alg_lock);
5466         /* NOTE:  The lock is now held through to this function's return. */
5467         allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5468             ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5469 
5470         if (tunnel_mode) {
5471                 /* Tunnel mode! */
5472                 allocsize += 2 * sizeof (sadb_address_t);
5473                 /* Enough to cover both AF_INET and AF_INET6. */
5474                 allocsize += 2 * sizeof (struct sockaddr_in6);
5475         }
5476 
5477         msgmp = allocb(allocsize, BPRI_HI);
5478         if (msgmp == NULL) {
5479                 freeb(pfkeymp);
5480                 mutex_exit(&ipss->ipsec_alg_lock);
5481                 return (NULL);
5482         }
5483 
5484         pfkeymp->b_cont = msgmp;
5485         cur = msgmp->b_rptr;
5486         end = cur + allocsize;
5487         samsg = (sadb_msg_t *)cur;
5488         cur += sizeof (sadb_msg_t);
5489 
5490         af = acqrec->ipsacq_addrfam;
5491         switch (af) {
5492         case AF_INET:
5493                 check_proto = IPPROTO_ICMP;
5494                 break;
5495         case AF_INET6:
5496                 check_proto = IPPROTO_ICMPV6;
5497                 break;
5498         default:
5499                 /* This should never happen unless we have kernel bugs. */
5500                 cmn_err(CE_WARN,
5501                     "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5502                 ASSERT(0);
5503                 mutex_exit(&ipss->ipsec_alg_lock);
5504                 return (NULL);
5505         }
5506 
5507         samsg->sadb_msg_version = PF_KEY_V2;
5508         samsg->sadb_msg_type = SADB_ACQUIRE;
5509         samsg->sadb_msg_satype = satype;
5510         samsg->sadb_msg_errno = 0;
5511         samsg->sadb_msg_pid = 0;
5512         samsg->sadb_msg_reserved = 0;
5513         samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5514 
5515         ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5516 
5517         if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5518                 sport_typecode = dport_typecode = 0;
5519         } else {
5520                 sport_typecode = acqrec->ipsacq_srcport;
5521                 dport_typecode = acqrec->ipsacq_dstport;
5522         }
5523 
5524         cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5525             acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5526 
5527         cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5528             acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5529 
5530         if (tunnel_mode) {
5531                 sport_typecode = acqrec->ipsacq_srcport;
5532                 dport_typecode = acqrec->ipsacq_dstport;
5533                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5534                     acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5535                     sport_typecode, acqrec->ipsacq_inner_proto,
5536                     acqrec->ipsacq_innersrcpfx);
5537                 cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5538                     acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5539                     dport_typecode, acqrec->ipsacq_inner_proto,
5540                     acqrec->ipsacq_innerdstpfx);
5541         }
5542 
5543         /* XXX Insert identity information here. */
5544 
5545         /* XXXMLS Insert sensitivity information here. */
5546 
5547         if (cur != NULL)
5548                 samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5549         else
5550                 mutex_exit(&ipss->ipsec_alg_lock);
5551 
5552         return (pfkeymp);
5553 }
5554 
5555 /*
5556  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5557  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5558  * If there was a memory allocation error, return NULL.  (Assume NULL !=
5559  * (ipsa_t *)-1).
5560  *
5561  * master_spi is passed in host order.
5562  */
5563 ipsa_t *
5564 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5565     netstack_t *ns)
5566 {
5567         sadb_address_t *src =
5568             (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5569             *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5570         sadb_spirange_t *range =
5571             (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5572         struct sockaddr_in *ssa, *dsa;
5573         struct sockaddr_in6 *ssa6, *dsa6;
5574         uint32_t *srcaddr, *dstaddr;
5575         sa_family_t af;
5576         uint32_t add, min, max;
5577 
5578         if (src == NULL) {
5579                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5580                 return ((ipsa_t *)-1);
5581         }
5582         if (dst == NULL) {
5583                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5584                 return ((ipsa_t *)-1);
5585         }
5586         if (range == NULL) {
5587                 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5588                 return ((ipsa_t *)-1);
5589         }
5590 
5591         min = ntohl(range->sadb_spirange_min);
5592         max = ntohl(range->sadb_spirange_max);
5593         dsa = (struct sockaddr_in *)(dst + 1);
5594         dsa6 = (struct sockaddr_in6 *)dsa;
5595 
5596         ssa = (struct sockaddr_in *)(src + 1);
5597         ssa6 = (struct sockaddr_in6 *)ssa;
5598         ASSERT(dsa->sin_family == ssa->sin_family);
5599 
5600         srcaddr = ALL_ZEROES_PTR;
5601         af = dsa->sin_family;
5602         switch (af) {
5603         case AF_INET:
5604                 if (src != NULL)
5605                         srcaddr = (uint32_t *)(&ssa->sin_addr);
5606                 dstaddr = (uint32_t *)(&dsa->sin_addr);
5607                 break;
5608         case AF_INET6:
5609                 if (src != NULL)
5610                         srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5611                 dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5612                 break;
5613         default:
5614                 *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5615                 return ((ipsa_t *)-1);
5616         }
5617 
5618         if (master_spi < min || master_spi > max) {
5619                 /* Return a random value in the range. */
5620                 (void) random_get_pseudo_bytes((uint8_t *)&add, sizeof (add));
5621                 master_spi = min + (add % (max - min + 1));
5622         }
5623 
5624         /*
5625          * Since master_spi is passed in host order, we need to htonl() it
5626          * for the purposes of creating a new SA.
5627          */
5628         return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5629             ns));
5630 }
5631 
5632 /*
5633  *
5634  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5635  * base header, just ignore it.  Otherwise, lock down the whole ACQUIRE list
5636  * and scan for the sequence number in question.  I may wish to accept an
5637  * address pair with it, for easier searching.
5638  *
5639  * Caller frees the message, so we don't have to here.
5640  *
5641  * NOTE:        The pfkey_q parameter may be used in the future for ACQUIRE
5642  *              failures.
5643  */
5644 /* ARGSUSED */
5645 void
5646 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *pfkey_q,
5647     netstack_t *ns)
5648 {
5649         int i;
5650         ipsacq_t *acqrec;
5651         iacqf_t *bucket;
5652 
5653         /*
5654          * I only accept the base header for this!
5655          * Though to be honest, requiring the dst address would help
5656          * immensely.
5657          *
5658          * XXX  There are already cases where I can get the dst address.
5659          */
5660         if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5661                 return;
5662 
5663         /*
5664          * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5665          * (and in the future send a message to IP with the appropriate error
5666          * number).
5667          *
5668          * Q: Do I want to reject if pid != 0?
5669          */
5670 
5671         for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5672                 bucket = &sp->s_v4.sdb_acq[i];
5673                 mutex_enter(&bucket->iacqf_lock);
5674                 for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5675                     acqrec = acqrec->ipsacq_next) {
5676                         if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5677                                 break;  /* for acqrec... loop. */
5678                 }
5679                 if (acqrec != NULL)
5680                         break;  /* for i = 0... loop. */
5681 
5682                 mutex_exit(&bucket->iacqf_lock);
5683         }
5684 
5685         if (acqrec == NULL) {
5686                 for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
5687                         bucket = &sp->s_v6.sdb_acq[i];
5688                         mutex_enter(&bucket->iacqf_lock);
5689                         for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5690                             acqrec = acqrec->ipsacq_next) {
5691                                 if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5692                                         break;  /* for acqrec... loop. */
5693                         }
5694                         if (acqrec != NULL)
5695                                 break;  /* for i = 0... loop. */
5696 
5697                         mutex_exit(&bucket->iacqf_lock);
5698                 }
5699         }
5700 
5701 
5702         if (acqrec == NULL)
5703                 return;
5704 
5705         /*
5706          * What do I do with the errno and IP?  I may need mp's services a
5707          * little more.  See sadb_destroy_acquire() for future directions
5708          * beyond free the mblk chain on the acquire record.
5709          */
5710 
5711         ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
5712         sadb_destroy_acquire(acqrec, ns);
5713         /* Have to exit mutex here, because of breaking out of for loop. */
5714         mutex_exit(&bucket->iacqf_lock);
5715 }
5716 
5717 /*
5718  * The following functions work with the replay windows of an SA.  They assume
5719  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
5720  * represents the highest sequence number packet received, and back
5721  * (ipsa->ipsa_replay_wsize) packets.
5722  */
5723 
5724 /*
5725  * Is the replay bit set?
5726  */
5727 static boolean_t
5728 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
5729 {
5730         uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5731 
5732         return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
5733 }
5734 
5735 /*
5736  * Shift the bits of the replay window over.
5737  */
5738 static void
5739 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
5740 {
5741         int i;
5742         int jump = ((shift - 1) >> 6) + 1;
5743 
5744         if (shift == 0)
5745                 return;
5746 
5747         for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
5748                 if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
5749                         ipsa->ipsa_replay_arr[i + jump] |=
5750                             ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
5751                 }
5752                 ipsa->ipsa_replay_arr[i] <<= shift;
5753         }
5754 }
5755 
5756 /*
5757  * Set a bit in the bit vector.
5758  */
5759 static void
5760 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
5761 {
5762         uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5763 
5764         ipsa->ipsa_replay_arr[offset >> 6] |= bit;
5765 }
5766 
5767 #define SADB_MAX_REPLAY_VALUE 0xffffffff
5768 
5769 /*
5770  * Assume caller has NOT done ntohl() already on seq.  Check to see
5771  * if replay sequence number "seq" has been seen already.
5772  */
5773 boolean_t
5774 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
5775 {
5776         boolean_t rc;
5777         uint32_t diff;
5778 
5779         if (ipsa->ipsa_replay_wsize == 0)
5780                 return (B_TRUE);
5781 
5782         /*
5783          * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
5784          */
5785 
5786         /* Convert sequence number into host order before holding the mutex. */
5787         seq = ntohl(seq);
5788 
5789         mutex_enter(&ipsa->ipsa_lock);
5790 
5791         /* Initialize inbound SA's ipsa_replay field to last one received. */
5792         if (ipsa->ipsa_replay == 0)
5793                 ipsa->ipsa_replay = 1;
5794 
5795         if (seq > ipsa->ipsa_replay) {
5796                 /*
5797                  * I have received a new "highest value received".  Shift
5798                  * the replay window over.
5799                  */
5800                 diff = seq - ipsa->ipsa_replay;
5801                 if (diff < ipsa->ipsa_replay_wsize) {
5802                         /* In replay window, shift bits over. */
5803                         ipsa_shift_replay(ipsa, diff);
5804                 } else {
5805                         /* WAY FAR AHEAD, clear bits and start again. */
5806                         bzero(ipsa->ipsa_replay_arr,
5807                             sizeof (ipsa->ipsa_replay_arr));
5808                 }
5809                 ipsa_set_replay(ipsa, 0);
5810                 ipsa->ipsa_replay = seq;
5811                 rc = B_TRUE;
5812                 goto done;
5813         }
5814         diff = ipsa->ipsa_replay - seq;
5815         if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
5816                 rc = B_FALSE;
5817                 goto done;
5818         }
5819         /* Set this packet as seen. */
5820         ipsa_set_replay(ipsa, diff);
5821 
5822         rc = B_TRUE;
5823 done:
5824         mutex_exit(&ipsa->ipsa_lock);
5825         return (rc);
5826 }
5827 
5828 /*
5829  * "Peek" and see if we should even bother going through the effort of
5830  * running an authentication check on the sequence number passed in.
5831  * this takes into account packets that are below the replay window,
5832  * and collisions with already replayed packets.  Return B_TRUE if it
5833  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
5834  * Assume same byte-ordering as sadb_replay_check.
5835  */
5836 boolean_t
5837 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
5838 {
5839         boolean_t rc = B_FALSE;
5840         uint32_t diff;
5841 
5842         if (ipsa->ipsa_replay_wsize == 0)
5843                 return (B_TRUE);
5844 
5845         /*
5846          * 0 is 0, regardless of byte order... :)
5847          *
5848          * If I get 0 on the wire (and there is a replay window) then the
5849          * sender most likely wrapped.  This ipsa may need to be marked or
5850          * something.
5851          */
5852         if (seq == 0)
5853                 return (B_FALSE);
5854 
5855         seq = ntohl(seq);
5856         mutex_enter(&ipsa->ipsa_lock);
5857         if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
5858             ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
5859                 goto done;
5860 
5861         /*
5862          * If I've hit 0xffffffff, then quite honestly, I don't need to
5863          * bother with formalities.  I'm not accepting any more packets
5864          * on this SA.
5865          */
5866         if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
5867                 /*
5868                  * Since we're already holding the lock, update the
5869                  * expire time ala. sadb_replay_delete() and return.
5870                  */
5871                 ipsa->ipsa_hardexpiretime = (time_t)1;
5872                 goto done;
5873         }
5874 
5875         if (seq <= ipsa->ipsa_replay) {
5876                 /*
5877                  * This seq is in the replay window.  I'm not below it,
5878                  * because I already checked for that above!
5879                  */
5880                 diff = ipsa->ipsa_replay - seq;
5881                 if (ipsa_is_replay_set(ipsa, diff))
5882                         goto done;
5883         }
5884         /* Else return B_TRUE, I'm going to advance the window. */
5885 
5886         rc = B_TRUE;
5887 done:
5888         mutex_exit(&ipsa->ipsa_lock);
5889         return (rc);
5890 }
5891 
5892 /*
5893  * Delete a single SA.
5894  *
5895  * For now, use the quick-and-dirty trick of making the association's
5896  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
5897  */
5898 void
5899 sadb_replay_delete(ipsa_t *assoc)
5900 {
5901         mutex_enter(&assoc->ipsa_lock);
5902         assoc->ipsa_hardexpiretime = (time_t)1;
5903         mutex_exit(&assoc->ipsa_lock);
5904 }
5905 
5906 /*
5907  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
5908  * this is designed to take only a format string with "* %x * %s *", so
5909  * that "spi" is printed first, then "addr" is converted using inet_pton().
5910  *
5911  * This is abstracted out to save the stack space for only when inet_pton()
5912  * is called.  Make sure "spi" is in network order; it usually is when this
5913  * would get called.
5914  */
5915 void
5916 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
5917     uint32_t spi, void *addr, int af, netstack_t *ns)
5918 {
5919         char buf[INET6_ADDRSTRLEN];
5920 
5921         ASSERT(af == AF_INET6 || af == AF_INET);
5922 
5923         ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
5924             inet_ntop(af, addr, buf, sizeof (buf)));
5925 }
5926 
5927 /*
5928  * Fills in a reference to the policy, if any, from the conn, in *ppp
5929  */
5930 static void
5931 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
5932 {
5933         ipsec_policy_t  *pp;
5934         ipsec_latch_t   *ipl = connp->conn_latch;
5935 
5936         if ((ipl != NULL) && (connp->conn_ixa->ixa_ipsec_policy != NULL)) {
5937                 pp = connp->conn_ixa->ixa_ipsec_policy;
5938                 IPPOL_REFHOLD(pp);
5939         } else {
5940                 pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, sel,
5941                     connp->conn_netstack);
5942         }
5943         *ppp = pp;
5944 }
5945 
5946 /*
5947  * The following functions scan through active conn_t structures
5948  * and return a reference to the best-matching policy it can find.
5949  * Caller must release the reference.
5950  */
5951 static void
5952 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
5953 {
5954         connf_t *connfp;
5955         conn_t *connp = NULL;
5956         ipsec_selector_t portonly;
5957 
5958         bzero((void *)&portonly, sizeof (portonly));
5959 
5960         if (sel->ips_local_port == 0)
5961                 return;
5962 
5963         connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
5964             ipst)];
5965         mutex_enter(&connfp->connf_lock);
5966 
5967         if (sel->ips_isv4) {
5968                 connp = connfp->connf_head;
5969                 while (connp != NULL) {
5970                         if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
5971                             sel->ips_local_addr_v4, sel->ips_remote_port,
5972                             sel->ips_remote_addr_v4))
5973                                 break;
5974                         connp = connp->conn_next;
5975                 }
5976 
5977                 if (connp == NULL) {
5978                         /* Try port-only match in IPv6. */
5979                         portonly.ips_local_port = sel->ips_local_port;
5980                         sel = &portonly;
5981                 }
5982         }
5983 
5984         if (connp == NULL) {
5985                 connp = connfp->connf_head;
5986                 while (connp != NULL) {
5987                         if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
5988                             sel->ips_local_addr_v6, sel->ips_remote_port,
5989                             sel->ips_remote_addr_v6))
5990                                 break;
5991                         connp = connp->conn_next;
5992                 }
5993 
5994                 if (connp == NULL) {
5995                         mutex_exit(&connfp->connf_lock);
5996                         return;
5997                 }
5998         }
5999 
6000         CONN_INC_REF(connp);
6001         mutex_exit(&connfp->connf_lock);
6002 
6003         ipsec_conn_pol(sel, connp, ppp);
6004         CONN_DEC_REF(connp);
6005 }
6006 
6007 static conn_t *
6008 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
6009 {
6010         connf_t *connfp;
6011         conn_t *connp = NULL;
6012         const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6013 
6014         if (sel->ips_local_port == 0)
6015                 return (NULL);
6016 
6017         connfp = &ipst->ips_ipcl_bind_fanout[
6018             IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6019         mutex_enter(&connfp->connf_lock);
6020 
6021         if (sel->ips_isv4) {
6022                 connp = connfp->connf_head;
6023                 while (connp != NULL) {
6024                         if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6025                             sel->ips_local_addr_v4, pptr[1]))
6026                                 break;
6027                         connp = connp->conn_next;
6028                 }
6029 
6030                 if (connp == NULL) {
6031                         /* Match to all-zeroes. */
6032                         v6addrmatch = &ipv6_all_zeros;
6033                 }
6034         }
6035 
6036         if (connp == NULL) {
6037                 connp = connfp->connf_head;
6038                 while (connp != NULL) {
6039                         if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6040                             *v6addrmatch, pptr[1]))
6041                                 break;
6042                         connp = connp->conn_next;
6043                 }
6044 
6045                 if (connp == NULL) {
6046                         mutex_exit(&connfp->connf_lock);
6047                         return (NULL);
6048                 }
6049         }
6050 
6051         CONN_INC_REF(connp);
6052         mutex_exit(&connfp->connf_lock);
6053         return (connp);
6054 }
6055 
6056 static void
6057 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6058 {
6059         connf_t         *connfp;
6060         conn_t          *connp;
6061         uint32_t        ports;
6062         uint16_t        *pptr = (uint16_t *)&ports;
6063 
6064         /*
6065          * Find TCP state in the following order:
6066          * 1.) Connected conns.
6067          * 2.) Listeners.
6068          *
6069          * Even though #2 will be the common case for inbound traffic, only
6070          * following this order insures correctness.
6071          */
6072 
6073         if (sel->ips_local_port == 0)
6074                 return;
6075 
6076         /*
6077          * 0 should be fport, 1 should be lport.  SRC is the local one here.
6078          * See ipsec_construct_inverse_acquire() for details.
6079          */
6080         pptr[0] = sel->ips_remote_port;
6081         pptr[1] = sel->ips_local_port;
6082 
6083         connfp = &ipst->ips_ipcl_conn_fanout[
6084             IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6085         mutex_enter(&connfp->connf_lock);
6086         connp = connfp->connf_head;
6087 
6088         if (sel->ips_isv4) {
6089                 while (connp != NULL) {
6090                         if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
6091                             sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
6092                             ports))
6093                                 break;
6094                         connp = connp->conn_next;
6095                 }
6096         } else {
6097                 while (connp != NULL) {
6098                         if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
6099                             sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
6100                             ports))
6101                                 break;
6102                         connp = connp->conn_next;
6103                 }
6104         }
6105 
6106         if (connp != NULL) {
6107                 CONN_INC_REF(connp);
6108                 mutex_exit(&connfp->connf_lock);
6109         } else {
6110                 mutex_exit(&connfp->connf_lock);
6111 
6112                 /* Try the listen hash. */
6113                 if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
6114                         return;
6115         }
6116 
6117         ipsec_conn_pol(sel, connp, ppp);
6118         CONN_DEC_REF(connp);
6119 }
6120 
6121 static void
6122 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6123     ip_stack_t *ipst)
6124 {
6125         conn_t          *connp;
6126         uint32_t        ports;
6127         uint16_t        *pptr = (uint16_t *)&ports;
6128 
6129         /*
6130          * Find SCP state in the following order:
6131          * 1.) Connected conns.
6132          * 2.) Listeners.
6133          *
6134          * Even though #2 will be the common case for inbound traffic, only
6135          * following this order insures correctness.
6136          */
6137 
6138         if (sel->ips_local_port == 0)
6139                 return;
6140 
6141         /*
6142          * 0 should be fport, 1 should be lport.  SRC is the local one here.
6143          * See ipsec_construct_inverse_acquire() for details.
6144          */
6145         pptr[0] = sel->ips_remote_port;
6146         pptr[1] = sel->ips_local_port;
6147 
6148         /*
6149          * For labeled systems, there's no need to check the
6150          * label here.  It's known to be good as we checked
6151          * before allowing the connection to become bound.
6152          */
6153         if (sel->ips_isv4) {
6154                 in6_addr_t      src, dst;
6155 
6156                 IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
6157                 IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
6158                 connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
6159                     0, ipst->ips_netstack->netstack_sctp);
6160         } else {
6161                 connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6162                     &sel->ips_local_addr_v6, ports, ALL_ZONES,
6163                     0, ipst->ips_netstack->netstack_sctp);
6164         }
6165         if (connp == NULL)
6166                 return;
6167         ipsec_conn_pol(sel, connp, ppp);
6168         CONN_DEC_REF(connp);
6169 }
6170 
6171 /*
6172  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6173  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6174  * to PF_KEY.
6175  *
6176  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6177  * ignore prefix lengths in the address extension.  Since we match on first-
6178  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6179  * set addresses to mask out the lower bits, we should get a suitable search
6180  * key for the SPD anyway.  This is the function to change if the assumption
6181  * about suitable search keys is wrong.
6182  */
6183 static int
6184 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6185     sadb_address_t *dstext, int *diagnostic)
6186 {
6187         struct sockaddr_in *src, *dst;
6188         struct sockaddr_in6 *src6, *dst6;
6189 
6190         *diagnostic = 0;
6191 
6192         bzero(sel, sizeof (*sel));
6193         sel->ips_protocol = srcext->sadb_address_proto;
6194         dst = (struct sockaddr_in *)(dstext + 1);
6195         if (dst->sin_family == AF_INET6) {
6196                 dst6 = (struct sockaddr_in6 *)dst;
6197                 src6 = (struct sockaddr_in6 *)(srcext + 1);
6198                 if (src6->sin6_family != AF_INET6) {
6199                         *diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6200                         return (EINVAL);
6201                 }
6202                 sel->ips_remote_addr_v6 = dst6->sin6_addr;
6203                 sel->ips_local_addr_v6 = src6->sin6_addr;
6204                 if (sel->ips_protocol == IPPROTO_ICMPV6) {
6205                         sel->ips_is_icmp_inv_acq = 1;
6206                 } else {
6207                         sel->ips_remote_port = dst6->sin6_port;
6208                         sel->ips_local_port = src6->sin6_port;
6209                 }
6210                 sel->ips_isv4 = B_FALSE;
6211         } else {
6212                 src = (struct sockaddr_in *)(srcext + 1);
6213                 if (src->sin_family != AF_INET) {
6214                         *diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6215                         return (EINVAL);
6216                 }
6217                 sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6218                 sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6219                 if (sel->ips_protocol == IPPROTO_ICMP) {
6220                         sel->ips_is_icmp_inv_acq = 1;
6221                 } else {
6222                         sel->ips_remote_port = dst->sin_port;
6223                         sel->ips_local_port = src->sin_port;
6224                 }
6225                 sel->ips_isv4 = B_TRUE;
6226         }
6227         return (0);
6228 }
6229 
6230 /*
6231  * We have encapsulation.
6232  * - Lookup tun_t by address and look for an associated
6233  *   tunnel policy
6234  * - If there are inner selectors
6235  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6236  *   - Look up tunnel policy based on selectors
6237  * - Else
6238  *   - Sanity check the negotation
6239  *   - If appropriate, fall through to global policy
6240  */
6241 static int
6242 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6243     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6244     int *diagnostic)
6245 {
6246         int err;
6247         ipsec_policy_head_t *polhead;
6248 
6249         *diagnostic = 0;
6250 
6251         /* Check for inner selectors and act appropriately */
6252 
6253         if (innsrcext != NULL) {
6254                 /* Inner selectors present */
6255                 ASSERT(inndstext != NULL);
6256                 if ((itp == NULL) ||
6257                     (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6258                     (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6259                         /*
6260                          * If inner packet selectors, we must have negotiate
6261                          * tunnel and active policy.  If the tunnel has
6262                          * transport-mode policy set on it, or has no policy,
6263                          * fail.
6264                          */
6265                         return (ENOENT);
6266                 } else {
6267                         /*
6268                          * Reset "sel" to indicate inner selectors.  Pass
6269                          * inner PF_KEY address extensions for this to happen.
6270                          */
6271                         if ((err = ipsec_get_inverse_acquire_sel(sel,
6272                             innsrcext, inndstext, diagnostic)) != 0)
6273                                 return (err);
6274                         /*
6275                          * Now look for a tunnel policy based on those inner
6276                          * selectors.  (Common code is below.)
6277                          */
6278                 }
6279         } else {
6280                 /* No inner selectors present */
6281                 if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6282                         /*
6283                          * Transport mode negotiation with no tunnel policy
6284                          * configured - return to indicate a global policy
6285                          * check is needed.
6286                          */
6287                         return (0);
6288                 } else if (itp->itp_flags & ITPF_P_TUNNEL) {
6289                         /* Tunnel mode set with no inner selectors. */
6290                         return (ENOENT);
6291                 }
6292                 /*
6293                  * Else, this is a tunnel policy configured with ifconfig(1m)
6294                  * or "negotiate transport" with ipsecconf(1m).  We have an
6295                  * itp with policy set based on any match, so don't bother
6296                  * changing fields in "sel".
6297                  */
6298         }
6299 
6300         ASSERT(itp != NULL);
6301         polhead = itp->itp_policy;
6302         ASSERT(polhead != NULL);
6303         rw_enter(&polhead->iph_lock, RW_READER);
6304         *ppp = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, sel);
6305         rw_exit(&polhead->iph_lock);
6306 
6307         /*
6308          * Don't default to global if we didn't find a matching policy entry.
6309          * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6310          */
6311         if (*ppp == NULL)
6312                 return (ENOENT);
6313 
6314         return (0);
6315 }
6316 
6317 /*
6318  * For sctp conn_faddr is the primary address, hence this is of limited
6319  * use for sctp.
6320  */
6321 static void
6322 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6323     ip_stack_t *ipst)
6324 {
6325         boolean_t       isv4 = sel->ips_isv4;
6326         connf_t         *connfp;
6327         conn_t          *connp;
6328 
6329         if (isv4) {
6330                 connfp = &ipst->ips_ipcl_proto_fanout_v4[sel->ips_protocol];
6331         } else {
6332                 connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6333         }
6334 
6335         mutex_enter(&connfp->connf_lock);
6336         for (connp = connfp->connf_head; connp != NULL;
6337             connp = connp->conn_next) {
6338                 if (isv4) {
6339                         if ((connp->conn_laddr_v4 == INADDR_ANY ||
6340                             connp->conn_laddr_v4 == sel->ips_local_addr_v4) &&
6341                             (connp->conn_faddr_v4 == INADDR_ANY ||
6342                             connp->conn_faddr_v4 == sel->ips_remote_addr_v4))
6343                                 break;
6344                 } else {
6345                         if ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
6346                             IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
6347                             &sel->ips_local_addr_v6)) &&
6348                             (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
6349                             IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
6350                             &sel->ips_remote_addr_v6)))
6351                                 break;
6352                 }
6353         }
6354         if (connp == NULL) {
6355                 mutex_exit(&connfp->connf_lock);
6356                 return;
6357         }
6358 
6359         CONN_INC_REF(connp);
6360         mutex_exit(&connfp->connf_lock);
6361 
6362         ipsec_conn_pol(sel, connp, ppp);
6363         CONN_DEC_REF(connp);
6364 }
6365 
6366 /*
6367  * Construct an inverse ACQUIRE reply based on:
6368  *
6369  * 1.) Current global policy.
6370  * 2.) An conn_t match depending on what all was passed in the extv[].
6371  * 3.) A tunnel's policy head.
6372  * ...
6373  * N.) Other stuff TBD (e.g. identities)
6374  *
6375  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6376  * in this function so the caller can extract them where appropriately.
6377  *
6378  * The SRC address is the local one - just like an outbound ACQUIRE message.
6379  *
6380  * XXX MLS: key management supplies a label which we just reflect back up
6381  * again.  clearly we need to involve the label in the rest of the checks.
6382  */
6383 mblk_t *
6384 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6385     netstack_t *ns)
6386 {
6387         int err;
6388         int diagnostic;
6389         sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6390             *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6391             *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6392             *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6393         sadb_sens_t *sens = (sadb_sens_t *)extv[SADB_EXT_SENSITIVITY];
6394         struct sockaddr_in6 *src, *dst;
6395         struct sockaddr_in6 *isrc, *idst;
6396         ipsec_tun_pol_t *itp = NULL;
6397         ipsec_policy_t *pp = NULL;
6398         ipsec_selector_t sel, isel;
6399         mblk_t *retmp = NULL;
6400         ip_stack_t      *ipst = ns->netstack_ip;
6401 
6402 
6403         /* Normalize addresses */
6404         if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6405             == KS_IN_ADDR_UNKNOWN) {
6406                 err = EINVAL;
6407                 diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6408                 goto bail;
6409         }
6410         src = (struct sockaddr_in6 *)(srcext + 1);
6411         if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6412             == KS_IN_ADDR_UNKNOWN) {
6413                 err = EINVAL;
6414                 diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6415                 goto bail;
6416         }
6417         dst = (struct sockaddr_in6 *)(dstext + 1);
6418         if (src->sin6_family != dst->sin6_family) {
6419                 err = EINVAL;
6420                 diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6421                 goto bail;
6422         }
6423 
6424         /* Check for tunnel mode and act appropriately */
6425         if (innsrcext != NULL) {
6426                 if (inndstext == NULL) {
6427                         err = EINVAL;
6428                         diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6429                         goto bail;
6430                 }
6431                 if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6432                     (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6433                         err = EINVAL;
6434                         diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6435                         goto bail;
6436                 }
6437                 isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6438                 if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6439                     (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6440                         err = EINVAL;
6441                         diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6442                         goto bail;
6443                 }
6444                 idst = (struct sockaddr_in6 *)(inndstext + 1);
6445                 if (isrc->sin6_family != idst->sin6_family) {
6446                         err = EINVAL;
6447                         diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6448                         goto bail;
6449                 }
6450                 if (isrc->sin6_family != AF_INET &&
6451                     isrc->sin6_family != AF_INET6) {
6452                         err = EINVAL;
6453                         diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6454                         goto bail;
6455                 }
6456         } else if (inndstext != NULL) {
6457                 err = EINVAL;
6458                 diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6459                 goto bail;
6460         }
6461 
6462         /* Get selectors first, based on outer addresses */
6463         err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6464         if (err != 0)
6465                 goto bail;
6466 
6467         /* Check for tunnel mode mismatches. */
6468         if (innsrcext != NULL &&
6469             ((isrc->sin6_family == AF_INET &&
6470             sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6471             (isrc->sin6_family == AF_INET6 &&
6472             sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6473                 err = EPROTOTYPE;
6474                 goto bail;
6475         }
6476 
6477         /*
6478          * Okay, we have the addresses and other selector information.
6479          * Let's first find a conn...
6480          */
6481         pp = NULL;
6482         switch (sel.ips_protocol) {
6483         case IPPROTO_TCP:
6484                 ipsec_tcp_pol(&sel, &pp, ipst);
6485                 break;
6486         case IPPROTO_UDP:
6487                 ipsec_udp_pol(&sel, &pp, ipst);
6488                 break;
6489         case IPPROTO_SCTP:
6490                 ipsec_sctp_pol(&sel, &pp, ipst);
6491                 break;
6492         case IPPROTO_ENCAP:
6493         case IPPROTO_IPV6:
6494                 /*
6495                  * Assume sel.ips_remote_addr_* has the right address at
6496                  * that exact position.
6497                  */
6498                 itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
6499                     (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family,
6500                     ipst);
6501 
6502                 if (innsrcext == NULL) {
6503                         /*
6504                          * Transport-mode tunnel, make sure we fake out isel
6505                          * to contain something based on the outer protocol.
6506                          */
6507                         bzero(&isel, sizeof (isel));
6508                         isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6509                 } /* Else isel is initialized by ipsec_tun_pol(). */
6510                 err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6511                     &diagnostic);
6512                 /*
6513                  * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6514                  * may be.
6515                  */
6516                 if (err != 0)
6517                         goto bail;
6518                 break;
6519         default:
6520                 ipsec_oth_pol(&sel, &pp, ipst);
6521                 break;
6522         }
6523 
6524         /*
6525          * If we didn't find a matching conn_t or other policy head, take a
6526          * look in the global policy.
6527          */
6528         if (pp == NULL) {
6529                 pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, &sel, ns);
6530                 if (pp == NULL) {
6531                         /* There's no global policy. */
6532                         err = ENOENT;
6533                         diagnostic = 0;
6534                         goto bail;
6535                 }
6536         }
6537 
6538         /*
6539          * Now that we have a policy entry/widget, construct an ACQUIRE
6540          * message based on that, fix fields where appropriate,
6541          * and return the message.
6542          */
6543         retmp = sadb_extended_acquire(&sel, pp, NULL,
6544             (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6545             samsg->sadb_msg_seq, samsg->sadb_msg_pid, sens, ns);
6546         if (pp != NULL) {
6547                 IPPOL_REFRELE(pp);
6548         }
6549         ASSERT(err == 0 && diagnostic == 0);
6550         if (retmp == NULL)
6551                 err = ENOMEM;
6552 bail:
6553         if (itp != NULL) {
6554                 ITP_REFRELE(itp, ns);
6555         }
6556         samsg->sadb_msg_errno = (uint8_t)err;
6557         samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6558         return (retmp);
6559 }
6560 
6561 /*
6562  * ipsa_lpkt is a one-element queue, only manipulated by the next two
6563  * functions.  They have to hold the ipsa_lock because of potential races
6564  * between key management using SADB_UPDATE, and inbound packets that may
6565  * queue up on the larval SA (hence the 'l' in "lpkt").
6566  */
6567 
6568 /*
6569  * sadb_set_lpkt:
6570  *
6571  * Returns the passed-in packet if the SA is no longer larval.
6572  *
6573  * Returns NULL if the SA is larval, and needs to be swapped into the SA for
6574  * processing after an SADB_UPDATE.
6575  */
6576 mblk_t *
6577 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, ip_recv_attr_t *ira)
6578 {
6579         mblk_t          *opkt;
6580 
6581         mutex_enter(&ipsa->ipsa_lock);
6582         opkt = ipsa->ipsa_lpkt;
6583         if (ipsa->ipsa_state == IPSA_STATE_LARVAL) {
6584                 /*
6585                  * Consume npkt and place it in the LARVAL SA's inbound
6586                  * packet slot.
6587                  */
6588                 mblk_t  *attrmp;
6589 
6590                 attrmp = ip_recv_attr_to_mblk(ira);
6591                 if (attrmp == NULL) {
6592                         ill_t *ill = ira->ira_ill;
6593 
6594                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
6595                         ip_drop_input("ipIfStatsInDiscards", npkt, ill);
6596                         freemsg(npkt);
6597                         opkt = NULL;
6598                 } else {
6599                         ASSERT(attrmp->b_cont == NULL);
6600                         attrmp->b_cont = npkt;
6601                         ipsa->ipsa_lpkt = attrmp;
6602                 }
6603                 npkt = NULL;
6604         } else {
6605                 /*
6606                  * If not larval, we lost the race.  NOTE: ipsa_lpkt may still
6607                  * have been non-NULL in the non-larval case, because of
6608                  * inbound packets arriving prior to sadb_common_add()
6609                  * transferring the SA completely out of larval state, but
6610                  * after lpkt was grabbed by the AH/ESP-specific add routines.
6611                  * We should clear the old ipsa_lpkt in this case to make sure
6612                  * that it doesn't linger on the now-MATURE IPsec SA, or get
6613                  * picked up as an out-of-order packet.
6614                  */
6615                 ipsa->ipsa_lpkt = NULL;
6616         }
6617         mutex_exit(&ipsa->ipsa_lock);
6618 
6619         if (opkt != NULL) {
6620                 ipsec_stack_t   *ipss;
6621 
6622                 ipss = ira->ira_ill->ill_ipst->ips_netstack->netstack_ipsec;
6623                 opkt = ip_recv_attr_free_mblk(opkt);
6624                 ip_drop_packet(opkt, B_TRUE, ira->ira_ill,
6625                     DROPPER(ipss, ipds_sadb_inlarval_replace),
6626                     &ipss->ipsec_sadb_dropper);
6627         }
6628         return (npkt);
6629 }
6630 
6631 /*
6632  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6633  * previous value.
6634  */
6635 mblk_t *
6636 sadb_clear_lpkt(ipsa_t *ipsa)
6637 {
6638         mblk_t *opkt;
6639 
6640         mutex_enter(&ipsa->ipsa_lock);
6641         opkt = ipsa->ipsa_lpkt;
6642         ipsa->ipsa_lpkt = NULL;
6643         mutex_exit(&ipsa->ipsa_lock);
6644         return (opkt);
6645 }
6646 
6647 /*
6648  * Walker callback used by sadb_alg_update() to free/create crypto
6649  * context template when a crypto software provider is removed or
6650  * added.
6651  */
6652 
6653 struct sadb_update_alg_state {
6654         ipsec_algtype_t alg_type;
6655         uint8_t alg_id;
6656         boolean_t is_added;
6657         boolean_t async_auth;
6658         boolean_t async_encr;
6659 };
6660 
6661 static void
6662 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
6663 {
6664         struct sadb_update_alg_state *update_state =
6665             (struct sadb_update_alg_state *)cookie;
6666         crypto_ctx_template_t *ctx_tmpl = NULL;
6667 
6668         ASSERT(MUTEX_HELD(&head->isaf_lock));
6669 
6670         if (entry->ipsa_state == IPSA_STATE_LARVAL)
6671                 return;
6672 
6673         mutex_enter(&entry->ipsa_lock);
6674 
6675         if ((entry->ipsa_encr_alg != SADB_EALG_NONE && entry->ipsa_encr_alg !=
6676             SADB_EALG_NULL && update_state->async_encr) ||
6677             (entry->ipsa_auth_alg != SADB_AALG_NONE &&
6678             update_state->async_auth)) {
6679                 entry->ipsa_flags |= IPSA_F_ASYNC;
6680         } else {
6681                 entry->ipsa_flags &= ~IPSA_F_ASYNC;
6682         }
6683 
6684         switch (update_state->alg_type) {
6685         case IPSEC_ALG_AUTH:
6686                 if (entry->ipsa_auth_alg == update_state->alg_id)
6687                         ctx_tmpl = &entry->ipsa_authtmpl;
6688                 break;
6689         case IPSEC_ALG_ENCR:
6690                 if (entry->ipsa_encr_alg == update_state->alg_id)
6691                         ctx_tmpl = &entry->ipsa_encrtmpl;
6692                 break;
6693         default:
6694                 ctx_tmpl = NULL;
6695         }
6696 
6697         if (ctx_tmpl == NULL) {
6698                 mutex_exit(&entry->ipsa_lock);
6699                 return;
6700         }
6701 
6702         /*
6703          * The context template of the SA may be affected by the change
6704          * of crypto provider.
6705          */
6706         if (update_state->is_added) {
6707                 /* create the context template if not already done */
6708                 if (*ctx_tmpl == NULL) {
6709                         (void) ipsec_create_ctx_tmpl(entry,
6710                             update_state->alg_type);
6711                 }
6712         } else {
6713                 /*
6714                  * The crypto provider was removed. If the context template
6715                  * exists but it is no longer valid, free it.
6716                  */
6717                 if (*ctx_tmpl != NULL)
6718                         ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
6719         }
6720 
6721         mutex_exit(&entry->ipsa_lock);
6722 }
6723 
6724 /*
6725  * Invoked by IP when an software crypto provider has been updated, or if
6726  * the crypto synchrony changes.  The type and id of the corresponding
6727  * algorithm is passed as argument.  The type is set to ALL in the case of
6728  * a synchrony change.
6729  *
6730  * is_added is B_TRUE if the provider was added, B_FALSE if it was
6731  * removed. The function updates the SADB and free/creates the
6732  * context templates associated with SAs if needed.
6733  */
6734 
6735 #define SADB_ALG_UPDATE_WALK(sadb, table) \
6736     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
6737         &update_state)
6738 
6739 void
6740 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
6741     netstack_t *ns)
6742 {
6743         struct sadb_update_alg_state update_state;
6744         ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
6745         ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
6746         ipsec_stack_t *ipss = ns->netstack_ipsec;
6747 
6748         update_state.alg_type = alg_type;
6749         update_state.alg_id = alg_id;
6750         update_state.is_added = is_added;
6751         update_state.async_auth = ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
6752             IPSEC_ALGS_EXEC_ASYNC;
6753         update_state.async_encr = ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
6754             IPSEC_ALGS_EXEC_ASYNC;
6755 
6756         if (alg_type == IPSEC_ALG_AUTH || alg_type == IPSEC_ALG_ALL) {
6757                 /* walk the AH tables only for auth. algorithm changes */
6758                 SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
6759                 SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
6760                 SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
6761                 SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
6762         }
6763 
6764         /* walk the ESP tables */
6765         SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
6766         SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
6767         SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
6768         SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
6769 }
6770 
6771 /*
6772  * Creates a context template for the specified SA. This function
6773  * is called when an SA is created and when a context template needs
6774  * to be created due to a change of software provider.
6775  */
6776 int
6777 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6778 {
6779         ipsec_alginfo_t *alg;
6780         crypto_mechanism_t mech;
6781         crypto_key_t *key;
6782         crypto_ctx_template_t *sa_tmpl;
6783         int rv;
6784         ipsec_stack_t   *ipss = sa->ipsa_netstack->netstack_ipsec;
6785 
6786         ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
6787         ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6788 
6789         /* get pointers to the algorithm info, context template, and key */
6790         switch (alg_type) {
6791         case IPSEC_ALG_AUTH:
6792                 key = &sa->ipsa_kcfauthkey;
6793                 sa_tmpl = &sa->ipsa_authtmpl;
6794                 alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
6795                 break;
6796         case IPSEC_ALG_ENCR:
6797                 key = &sa->ipsa_kcfencrkey;
6798                 sa_tmpl = &sa->ipsa_encrtmpl;
6799                 alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
6800                 break;
6801         default:
6802                 alg = NULL;
6803         }
6804 
6805         if (alg == NULL || !ALG_VALID(alg))
6806                 return (EINVAL);
6807 
6808         /* initialize the mech info structure for the framework */
6809         ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
6810         mech.cm_type = alg->alg_mech_type;
6811         mech.cm_param = NULL;
6812         mech.cm_param_len = 0;
6813 
6814         /* create a new context template */
6815         rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
6816 
6817         /*
6818          * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
6819          * providers are available for that mechanism. In that case
6820          * we don't fail, and will generate the context template from
6821          * the framework callback when a software provider for that
6822          * mechanism registers.
6823          *
6824          * The context template is assigned the special value
6825          * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
6826          * lack of memory. No attempt will be made to use
6827          * the context template if it is set to this value.
6828          */
6829         if (rv == CRYPTO_HOST_MEMORY) {
6830                 *sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
6831         } else if (rv != CRYPTO_SUCCESS) {
6832                 *sa_tmpl = NULL;
6833                 if (rv != CRYPTO_MECH_NOT_SUPPORTED)
6834                         return (EINVAL);
6835         }
6836 
6837         return (0);
6838 }
6839 
6840 /*
6841  * Destroy the context template of the specified algorithm type
6842  * of the specified SA. Must be called while holding the SA lock.
6843  */
6844 void
6845 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6846 {
6847         ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6848 
6849         if (alg_type == IPSEC_ALG_AUTH) {
6850                 if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
6851                         sa->ipsa_authtmpl = NULL;
6852                 else if (sa->ipsa_authtmpl != NULL) {
6853                         crypto_destroy_ctx_template(sa->ipsa_authtmpl);
6854                         sa->ipsa_authtmpl = NULL;
6855                 }
6856         } else {
6857                 ASSERT(alg_type == IPSEC_ALG_ENCR);
6858                 if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
6859                         sa->ipsa_encrtmpl = NULL;
6860                 else if (sa->ipsa_encrtmpl != NULL) {
6861                         crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
6862                         sa->ipsa_encrtmpl = NULL;
6863                 }
6864         }
6865 }
6866 
6867 /*
6868  * Use the kernel crypto framework to check the validity of a key received
6869  * via keysock. Returns 0 if the key is OK, -1 otherwise.
6870  */
6871 int
6872 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
6873     boolean_t is_auth, int *diag)
6874 {
6875         crypto_mechanism_t mech;
6876         crypto_key_t crypto_key;
6877         int crypto_rc;
6878 
6879         mech.cm_type = mech_type;
6880         mech.cm_param = NULL;
6881         mech.cm_param_len = 0;
6882 
6883         crypto_key.ck_format = CRYPTO_KEY_RAW;
6884         crypto_key.ck_data = sadb_key + 1;
6885         crypto_key.ck_length = sadb_key->sadb_key_bits;
6886 
6887         crypto_rc = crypto_key_check(&mech, &crypto_key);
6888 
6889         switch (crypto_rc) {
6890         case CRYPTO_SUCCESS:
6891                 return (0);
6892         case CRYPTO_MECHANISM_INVALID:
6893         case CRYPTO_MECH_NOT_SUPPORTED:
6894                 *diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
6895                     SADB_X_DIAGNOSTIC_BAD_EALG;
6896                 break;
6897         case CRYPTO_KEY_SIZE_RANGE:
6898                 *diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
6899                     SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
6900                 break;
6901         case CRYPTO_WEAK_KEY:
6902                 *diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
6903                     SADB_X_DIAGNOSTIC_WEAK_EKEY;
6904                 break;
6905         }
6906 
6907         return (-1);
6908 }
6909 
6910 /*
6911  * Whack options in the outer IP header when ipsec changes the outer label
6912  *
6913  * This is inelegant and really could use refactoring.
6914  */
6915 mblk_t *
6916 sadb_whack_label_v4(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
6917     ipdropper_t *dropper)
6918 {
6919         int delta;
6920         int plen;
6921         dblk_t *db;
6922         int hlen;
6923         uint8_t *opt_storage = assoc->ipsa_opt_storage;
6924         ipha_t *ipha = (ipha_t *)mp->b_rptr;
6925 
6926         plen = ntohs(ipha->ipha_length);
6927 
6928         delta = tsol_remove_secopt(ipha, MBLKL(mp));
6929         mp->b_wptr += delta;
6930         plen += delta;
6931 
6932         /* XXX XXX code copied from tsol_check_label */
6933 
6934         /* Make sure we have room for the worst-case addition */
6935         hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
6936         hlen = (hlen + 3) & ~3;
6937         if (hlen > IP_MAX_HDR_LENGTH)
6938                 hlen = IP_MAX_HDR_LENGTH;
6939         hlen -= IPH_HDR_LENGTH(ipha);
6940 
6941         db = mp->b_datap;
6942         if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
6943                 int copylen;
6944                 mblk_t *new_mp;
6945 
6946                 /* allocate enough to be meaningful, but not *too* much */
6947                 copylen = MBLKL(mp);
6948                 if (copylen > 256)
6949                         copylen = 256;
6950                 new_mp = allocb_tmpl(hlen + copylen +
6951                     (mp->b_rptr - mp->b_datap->db_base), mp);
6952 
6953                 if (new_mp == NULL) {
6954                         ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
6955                         return (NULL);
6956                 }
6957 
6958                 /* keep the bias */
6959                 new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
6960                 new_mp->b_wptr = new_mp->b_rptr + copylen;
6961                 bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
6962                 new_mp->b_cont = mp;
6963                 if ((mp->b_rptr += copylen) >= mp->b_wptr) {
6964                         new_mp->b_cont = mp->b_cont;
6965                         freeb(mp);
6966                 }
6967                 mp = new_mp;
6968                 ipha = (ipha_t *)mp->b_rptr;
6969         }
6970 
6971         delta = tsol_prepend_option(assoc->ipsa_opt_storage, ipha, MBLKL(mp));
6972 
6973         ASSERT(delta != -1);
6974 
6975         plen += delta;
6976         mp->b_wptr += delta;
6977 
6978         /*
6979          * Paranoia
6980          */
6981         db = mp->b_datap;
6982 
6983         ASSERT3P(mp->b_wptr, <=, db->db_lim);
6984         ASSERT3P(mp->b_rptr, <=, db->db_lim);
6985 
6986         ASSERT3P(mp->b_wptr, >=, db->db_base);
6987         ASSERT3P(mp->b_rptr, >=, db->db_base);
6988         /* End paranoia */
6989 
6990         ipha->ipha_length = htons(plen);
6991 
6992         return (mp);
6993 }
6994 
6995 mblk_t *
6996 sadb_whack_label_v6(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
6997     ipdropper_t *dropper)
6998 {
6999         int delta;
7000         int plen;
7001         dblk_t *db;
7002         int hlen;
7003         uint8_t *opt_storage = assoc->ipsa_opt_storage;
7004         uint_t sec_opt_len; /* label option length not including type, len */
7005         ip6_t *ip6h = (ip6_t *)mp->b_rptr;
7006 
7007         plen = ntohs(ip6h->ip6_plen);
7008 
7009         delta = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
7010         mp->b_wptr += delta;
7011         plen += delta;
7012 
7013         /* XXX XXX code copied from tsol_check_label_v6 */
7014         /*
7015          * Make sure we have room for the worst-case addition. Add 2 bytes for
7016          * the hop-by-hop ext header's next header and length fields. Add
7017          * another 2 bytes for the label option type, len and then round
7018          * up to the next 8-byte multiple.
7019          */
7020         sec_opt_len = opt_storage[1];
7021 
7022         db = mp->b_datap;
7023         hlen = (4 + sec_opt_len + 7) & ~7;
7024 
7025         if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
7026                 int copylen;
7027                 mblk_t *new_mp;
7028                 uint16_t hdr_len;
7029 
7030                 hdr_len = ip_hdr_length_v6(mp, ip6h);
7031                 /*
7032                  * Allocate enough to be meaningful, but not *too* much.
7033                  * Also all the IPv6 extension headers must be in the same mblk
7034                  */
7035                 copylen = MBLKL(mp);
7036                 if (copylen > 256)
7037                         copylen = 256;
7038                 if (copylen < hdr_len)
7039                         copylen = hdr_len;
7040                 new_mp = allocb_tmpl(hlen + copylen +
7041                     (mp->b_rptr - mp->b_datap->db_base), mp);
7042                 if (new_mp == NULL) {
7043                         ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
7044                         return (NULL);
7045                 }
7046 
7047                 /* keep the bias */
7048                 new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
7049                 new_mp->b_wptr = new_mp->b_rptr + copylen;
7050                 bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
7051                 new_mp->b_cont = mp;
7052                 if ((mp->b_rptr += copylen) >= mp->b_wptr) {
7053                         new_mp->b_cont = mp->b_cont;
7054                         freeb(mp);
7055                 }
7056                 mp = new_mp;
7057                 ip6h = (ip6_t *)mp->b_rptr;
7058         }
7059 
7060         delta = tsol_prepend_option_v6(assoc->ipsa_opt_storage,
7061             ip6h, MBLKL(mp));
7062 
7063         ASSERT(delta != -1);
7064 
7065         plen += delta;
7066         mp->b_wptr += delta;
7067 
7068         /*
7069          * Paranoia
7070          */
7071         db = mp->b_datap;
7072 
7073         ASSERT3P(mp->b_wptr, <=, db->db_lim);
7074         ASSERT3P(mp->b_rptr, <=, db->db_lim);
7075 
7076         ASSERT3P(mp->b_wptr, >=, db->db_base);
7077         ASSERT3P(mp->b_rptr, >=, db->db_base);
7078         /* End paranoia */
7079 
7080         ip6h->ip6_plen = htons(plen);
7081 
7082         return (mp);
7083 }
7084 
7085 /* Whack the labels and update ip_xmit_attr_t as needed */
7086 mblk_t *
7087 sadb_whack_label(mblk_t *mp, ipsa_t *assoc, ip_xmit_attr_t *ixa,
7088     kstat_named_t *counter, ipdropper_t *dropper)
7089 {
7090         int adjust;
7091         int iplen;
7092 
7093         if (ixa->ixa_flags & IXAF_IS_IPV4) {
7094                 ipha_t          *ipha = (ipha_t *)mp->b_rptr;
7095 
7096                 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7097                 iplen = ntohs(ipha->ipha_length);
7098                 mp = sadb_whack_label_v4(mp, assoc, counter, dropper);
7099                 if (mp == NULL)
7100                         return (NULL);
7101 
7102                 ipha = (ipha_t *)mp->b_rptr;
7103                 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7104                 adjust = (int)ntohs(ipha->ipha_length) - iplen;
7105         } else {
7106                 ip6_t           *ip6h = (ip6_t *)mp->b_rptr;
7107 
7108                 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7109                 iplen = ntohs(ip6h->ip6_plen);
7110                 mp = sadb_whack_label_v6(mp, assoc, counter, dropper);
7111                 if (mp == NULL)
7112                         return (NULL);
7113 
7114                 ip6h = (ip6_t *)mp->b_rptr;
7115                 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7116                 adjust = (int)ntohs(ip6h->ip6_plen) - iplen;
7117         }
7118         ixa->ixa_pktlen += adjust;
7119         ixa->ixa_ip_hdr_length += adjust;
7120         return (mp);
7121 }
7122 
7123 /*
7124  * If this is an outgoing SA then add some fuzz to the
7125  * SOFT EXPIRE time. The reason for this is to stop
7126  * peers trying to renegotiate SOFT expiring SA's at
7127  * the same time. The amount of fuzz needs to be at
7128  * least 8 seconds which is the typical interval
7129  * sadb_ager(), although this is only a guide as it
7130  * selftunes.
7131  */
7132 static void
7133 lifetime_fuzz(ipsa_t *assoc)
7134 {
7135         uint8_t rnd;
7136 
7137         if (assoc->ipsa_softaddlt == 0)
7138                 return;
7139 
7140         (void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
7141         rnd = (rnd & 0xF) + 8;
7142         assoc->ipsa_softexpiretime -= rnd;
7143         assoc->ipsa_softaddlt -= rnd;
7144 }
7145 
7146 static void
7147 destroy_ipsa_pair(ipsap_t *ipsapp)
7148 {
7149         /*
7150          * Because of the multi-line macro nature of IPSA_REFRELE, keep
7151          * them in { }.
7152          */
7153         if (ipsapp->ipsap_sa_ptr != NULL) {
7154                 IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
7155         }
7156         if (ipsapp->ipsap_psa_ptr != NULL) {
7157                 IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
7158         }
7159         init_ipsa_pair(ipsapp);
7160 }
7161 
7162 static void
7163 init_ipsa_pair(ipsap_t *ipsapp)
7164 {
7165         ipsapp->ipsap_bucket = NULL;
7166         ipsapp->ipsap_sa_ptr = NULL;
7167         ipsapp->ipsap_pbucket = NULL;
7168         ipsapp->ipsap_psa_ptr = NULL;
7169 }
7170 
7171 /*
7172  * The sadb_ager() function walks through the hash tables of SA's and ages
7173  * them, if the SA expires as a result, its marked as DEAD and will be reaped
7174  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
7175  * SA appears in both the inbound and outbound tables because its not possible
7176  * to determine its direction) are placed on a list when they expire. This is
7177  * to ensure that pair/peer SA's are reaped at the same time, even if they
7178  * expire at different times.
7179  *
7180  * This function is called twice by sadb_ager(), one after processing the
7181  * inbound table, then again after processing the outbound table.
7182  */
7183 void
7184 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
7185 {
7186         templist_t *listptr;
7187         int outhash;
7188         isaf_t *bucket;
7189         boolean_t haspeer;
7190         ipsa_t *peer_assoc, *dying;
7191         /*
7192          * Haspeer cases will contain both IPv4 and IPv6.  This code
7193          * is address independent.
7194          */
7195         while (haspeerlist != NULL) {
7196                 /* "dying" contains the SA that has a peer. */
7197                 dying = haspeerlist->ipsa;
7198                 haspeer = (dying->ipsa_haspeer);
7199                 listptr = haspeerlist;
7200                 haspeerlist = listptr->next;
7201                 kmem_free(listptr, sizeof (*listptr));
7202                 /*
7203                  * Pick peer bucket based on addrfam.
7204                  */
7205                 if (outbound) {
7206                         if (haspeer)
7207                                 bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
7208                         else
7209                                 bucket = INBOUND_BUCKET(sp,
7210                                     dying->ipsa_otherspi);
7211                 } else { /* inbound */
7212                         if (haspeer) {
7213                                 if (dying->ipsa_addrfam == AF_INET6) {
7214                                         outhash = OUTBOUND_HASH_V6(sp,
7215                                             *((in6_addr_t *)&dying->
7216                                             ipsa_dstaddr));
7217                                 } else {
7218                                         outhash = OUTBOUND_HASH_V4(sp,
7219                                             *((ipaddr_t *)&dying->
7220                                             ipsa_dstaddr));
7221                                 }
7222                         } else if (dying->ipsa_addrfam == AF_INET6) {
7223                                 outhash = OUTBOUND_HASH_V6(sp,
7224                                     *((in6_addr_t *)&dying->
7225                                     ipsa_srcaddr));
7226                         } else {
7227                                 outhash = OUTBOUND_HASH_V4(sp,
7228                                     *((ipaddr_t *)&dying->
7229                                     ipsa_srcaddr));
7230                         }
7231                         bucket = &(sp->sdb_of[outhash]);
7232                 }
7233 
7234                 mutex_enter(&bucket->isaf_lock);
7235                 /*
7236                  * "haspeer" SA's have the same src/dst address ordering,
7237                  * "paired" SA's have the src/dst addresses reversed.
7238                  */
7239                 if (haspeer) {
7240                         peer_assoc = ipsec_getassocbyspi(bucket,
7241                             dying->ipsa_spi, dying->ipsa_srcaddr,
7242                             dying->ipsa_dstaddr, dying->ipsa_addrfam);
7243                 } else {
7244                         peer_assoc = ipsec_getassocbyspi(bucket,
7245                             dying->ipsa_otherspi, dying->ipsa_dstaddr,
7246                             dying->ipsa_srcaddr, dying->ipsa_addrfam);
7247                 }
7248 
7249                 mutex_exit(&bucket->isaf_lock);
7250                 if (peer_assoc != NULL) {
7251                         mutex_enter(&peer_assoc->ipsa_lock);
7252                         mutex_enter(&dying->ipsa_lock);
7253                         if (!haspeer) {
7254                                 /*
7255                                  * Only SA's which have a "peer" or are
7256                                  * "paired" end up on this list, so this
7257                                  * must be a "paired" SA, update the flags
7258                                  * to break the pair.
7259                                  */
7260                                 peer_assoc->ipsa_otherspi = 0;
7261                                 peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
7262                                 dying->ipsa_otherspi = 0;
7263                                 dying->ipsa_flags &= ~IPSA_F_PAIRED;
7264                         }
7265                         if (haspeer || outbound) {
7266                                 /*
7267                                  * Update the state of the "inbound" SA when
7268                                  * the "outbound" SA has expired. Don't update
7269                                  * the "outbound" SA when the "inbound" SA
7270                                  * SA expires because setting the hard_addtime
7271                                  * below will cause this to happen.
7272                                  */
7273                                 peer_assoc->ipsa_state = dying->ipsa_state;
7274                         }
7275                         if (dying->ipsa_state == IPSA_STATE_DEAD)
7276                                 peer_assoc->ipsa_hardexpiretime = 1;
7277 
7278                         mutex_exit(&dying->ipsa_lock);
7279                         mutex_exit(&peer_assoc->ipsa_lock);
7280                         IPSA_REFRELE(peer_assoc);
7281                 }
7282                 IPSA_REFRELE(dying);
7283         }
7284 }
7285 
7286 /*
7287  * Ensure that the IV used for CCM mode never repeats. The IV should
7288  * only be updated by this function. Also check to see if the IV
7289  * is about to wrap and generate a SOFT Expire. This function is only
7290  * called for outgoing packets, the IV for incomming packets is taken
7291  * from the wire. If the outgoing SA needs to be expired, update
7292  * the matching incomming SA.
7293  */
7294 boolean_t
7295 update_iv(uint8_t *iv_ptr, queue_t *pfkey_q, ipsa_t *assoc,
7296     ipsecesp_stack_t *espstack)
7297 {
7298         boolean_t rc = B_TRUE;
7299         isaf_t *inbound_bucket;
7300         sadb_t *sp;
7301         ipsa_t *pair_sa = NULL;
7302         int sa_new_state = 0;
7303 
7304         /* For non counter modes, the IV is random data. */
7305         if (!(assoc->ipsa_flags & IPSA_F_COUNTERMODE)) {
7306                 (void) random_get_pseudo_bytes(iv_ptr, assoc->ipsa_iv_len);
7307                 return (rc);
7308         }
7309 
7310         mutex_enter(&assoc->ipsa_lock);
7311 
7312         (*assoc->ipsa_iv)++;
7313 
7314         if (*assoc->ipsa_iv == assoc->ipsa_iv_hardexpire) {
7315                 sa_new_state = IPSA_STATE_DEAD;
7316                 rc = B_FALSE;
7317         } else if (*assoc->ipsa_iv == assoc->ipsa_iv_softexpire) {
7318                 if (assoc->ipsa_state != IPSA_STATE_DYING) {
7319                         /*
7320                          * This SA may have already been expired when its
7321                          * PAIR_SA expired.
7322                          */
7323                         sa_new_state = IPSA_STATE_DYING;
7324                 }
7325         }
7326         if (sa_new_state) {
7327                 /*
7328                  * If there is a state change, we need to update this SA
7329                  * and its "pair", we can find the bucket for the "pair" SA
7330                  * while holding the ipsa_t mutex, but we won't actually
7331                  * update anything untill the ipsa_t mutex has been released
7332                  * for _this_ SA.
7333                  */
7334                 assoc->ipsa_state = sa_new_state;
7335                 if (assoc->ipsa_addrfam == AF_INET6) {
7336                         sp = &espstack->esp_sadb.s_v6;
7337                 } else {
7338                         sp = &espstack->esp_sadb.s_v4;
7339                 }
7340                 inbound_bucket = INBOUND_BUCKET(sp, assoc->ipsa_otherspi);
7341                 sadb_expire_assoc(pfkey_q, assoc);
7342         }
7343         if (rc == B_TRUE)
7344                 bcopy(assoc->ipsa_iv, iv_ptr, assoc->ipsa_iv_len);
7345 
7346         mutex_exit(&assoc->ipsa_lock);
7347 
7348         if (sa_new_state) {
7349                 /* Find the inbound SA, need to lock hash bucket. */
7350                 mutex_enter(&inbound_bucket->isaf_lock);
7351                 pair_sa = ipsec_getassocbyspi(inbound_bucket,
7352                     assoc->ipsa_otherspi, assoc->ipsa_dstaddr,
7353                     assoc->ipsa_srcaddr, assoc->ipsa_addrfam);
7354                 mutex_exit(&inbound_bucket->isaf_lock);
7355                 if (pair_sa != NULL) {
7356                         mutex_enter(&pair_sa->ipsa_lock);
7357                         pair_sa->ipsa_state = sa_new_state;
7358                         mutex_exit(&pair_sa->ipsa_lock);
7359                         IPSA_REFRELE(pair_sa);
7360                 }
7361         }
7362 
7363         return (rc);
7364 }
7365 
7366 void
7367 ccm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7368     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7369 {
7370         uchar_t *nonce;
7371         crypto_mechanism_t *combined_mech;
7372         CK_AES_CCM_PARAMS *params;
7373 
7374         combined_mech = (crypto_mechanism_t *)cm_mech;
7375         params = (CK_AES_CCM_PARAMS *)(combined_mech + 1);
7376         nonce = (uchar_t *)(params + 1);
7377         params->ulMACSize = assoc->ipsa_mac_len;
7378         params->ulNonceSize = assoc->ipsa_nonce_len;
7379         params->ulAuthDataSize = sizeof (esph_t);
7380         params->ulDataSize = data_len;
7381         params->nonce = nonce;
7382         params->authData = esph;
7383 
7384         cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7385         cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
7386         cm_mech->combined_mech.cm_param = (caddr_t)params;
7387         /* See gcm_params_init() for comments. */
7388         bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
7389         nonce += assoc->ipsa_saltlen;
7390         bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
7391         crypto_data->cd_miscdata = NULL;
7392 }
7393 
7394 /* ARGSUSED */
7395 void
7396 cbc_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7397     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7398 {
7399         cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7400         cm_mech->combined_mech.cm_param_len = 0;
7401         cm_mech->combined_mech.cm_param = NULL;
7402         crypto_data->cd_miscdata = (char *)iv_ptr;
7403 }
7404 
7405 /* ARGSUSED */
7406 void
7407 gcm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7408     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7409 {
7410         uchar_t *nonce;
7411         crypto_mechanism_t *combined_mech;
7412         CK_AES_GCM_PARAMS *params;
7413 
7414         combined_mech = (crypto_mechanism_t *)cm_mech;
7415         params = (CK_AES_GCM_PARAMS *)(combined_mech + 1);
7416         nonce = (uchar_t *)(params + 1);
7417 
7418         params->pIv = nonce;
7419         params->ulIvLen = assoc->ipsa_nonce_len;
7420         params->ulIvBits = SADB_8TO1(assoc->ipsa_nonce_len);
7421         params->pAAD = esph;
7422         params->ulAADLen = sizeof (esph_t);
7423         params->ulTagBits = SADB_8TO1(assoc->ipsa_mac_len);
7424 
7425         cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7426         cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
7427         cm_mech->combined_mech.cm_param = (caddr_t)params;
7428         /*
7429          * Create the nonce, which is made up of the salt and the IV.
7430          * Copy the salt from the SA and the IV from the packet.
7431          * For inbound packets we copy the IV from the packet because it
7432          * was set by the sending system, for outbound packets we copy the IV
7433          * from the packet because the IV in the SA may be changed by another
7434          * thread, the IV in the packet was created while holding a mutex.
7435          */
7436         bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
7437         nonce += assoc->ipsa_saltlen;
7438         bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
7439         crypto_data->cd_miscdata = NULL;
7440 }