1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/param.h>
  27 #include <sys/types.h>
  28 #include <sys/stream.h>
  29 #include <sys/strsubr.h>
  30 #include <sys/strsun.h>
  31 #include <sys/stropts.h>
  32 #include <sys/vnode.h>
  33 #include <sys/zone.h>
  34 #include <sys/strlog.h>
  35 #include <sys/sysmacros.h>
  36 #define _SUN_TPI_VERSION 2
  37 #include <sys/tihdr.h>
  38 #include <sys/timod.h>
  39 #include <sys/tiuser.h>
  40 #include <sys/ddi.h>
  41 #include <sys/sunddi.h>
  42 #include <sys/sunldi.h>
  43 #include <sys/file.h>
  44 #include <sys/modctl.h>
  45 #include <sys/debug.h>
  46 #include <sys/kmem.h>
  47 #include <sys/cmn_err.h>
  48 #include <sys/proc.h>
  49 #include <sys/suntpi.h>
  50 #include <sys/atomic.h>
  51 #include <sys/mkdev.h>
  52 #include <sys/policy.h>
  53 #include <sys/disp.h>
  54 
  55 #include <sys/socket.h>
  56 #include <netinet/in.h>
  57 #include <net/pfkeyv2.h>
  58 
  59 #include <inet/common.h>
  60 #include <netinet/ip6.h>
  61 #include <inet/ip.h>
  62 #include <inet/proto_set.h>
  63 #include <inet/nd.h>
  64 #include <inet/optcom.h>
  65 #include <inet/ipsec_info.h>
  66 #include <inet/ipsec_impl.h>
  67 #include <inet/keysock.h>
  68 
  69 #include <sys/isa_defs.h>
  70 
  71 /*
  72  * This is a transport provider for the PF_KEY key mangement socket.
  73  * (See RFC 2367 for details.)
  74  * Downstream messages are wrapped in a keysock consumer interface KEYSOCK_IN
  75  * messages (see ipsec_info.h), and passed to the appropriate consumer.
  76  * Upstream messages are generated for all open PF_KEY sockets, when
  77  * appropriate, as well as the sender (as long as SO_USELOOPBACK is enabled)
  78  * in reply to downstream messages.
  79  *
  80  * Upstream messages must be created asynchronously for the following
  81  * situations:
  82  *
  83  *      1.) A keysock consumer requires an SA, and there is currently none.
  84  *      2.) An SA expires, either hard or soft lifetime.
  85  *      3.) Other events a consumer deems fit.
  86  *
  87  * The MT model of this is PERMOD, with shared put procedures.  Two types of
  88  * messages, SADB_FLUSH and SADB_DUMP, need to lock down the perimeter to send
  89  * down the *multiple* messages they create.
  90  */
  91 
  92 static vmem_t *keysock_vmem;            /* for minor numbers. */
  93 
  94 #define KEYSOCK_MAX_CONSUMERS 256
  95 
  96 /* Default structure copied into T_INFO_ACK messages (from rts.c...) */
  97 static struct T_info_ack keysock_g_t_info_ack = {
  98         T_INFO_ACK,
  99         T_INFINITE,     /* TSDU_size. Maximum size messages. */
 100         T_INVALID,      /* ETSDU_size. No expedited data. */
 101         T_INVALID,      /* CDATA_size. No connect data. */
 102         T_INVALID,      /* DDATA_size. No disconnect data. */
 103         0,              /* ADDR_size. */
 104         0,              /* OPT_size. No user-settable options */
 105         64 * 1024,      /* TIDU_size. keysock allows maximum size messages. */
 106         T_COTS,         /* SERV_type. keysock supports connection oriented. */
 107         TS_UNBND,       /* CURRENT_state. This is set from keysock_state. */
 108         (XPG4_1)        /* Provider flags */
 109 };
 110 
 111 /* Named Dispatch Parameter Management Structure */
 112 typedef struct keysockparam_s {
 113         uint_t  keysock_param_min;
 114         uint_t  keysock_param_max;
 115         uint_t  keysock_param_value;
 116         char    *keysock_param_name;
 117 } keysockparam_t;
 118 
 119 /*
 120  * Table of NDD variables supported by keysock. These are loaded into
 121  * keysock_g_nd in keysock_init_nd.
 122  * All of these are alterable, within the min/max values given, at run time.
 123  */
 124 static  keysockparam_t  lcl_param_arr[] = {
 125         /* min  max     value   name */
 126         { 4096, 65536,  8192,   "keysock_xmit_hiwat"},
 127         { 0,    65536,  1024,   "keysock_xmit_lowat"},
 128         { 4096, 65536,  8192,   "keysock_recv_hiwat"},
 129         { 65536, 1024*1024*1024, 256*1024,      "keysock_max_buf"},
 130         { 0,    3,      0,      "keysock_debug"},
 131 };
 132 #define keystack_xmit_hiwat     keystack_params[0].keysock_param_value
 133 #define keystack_xmit_lowat     keystack_params[1].keysock_param_value
 134 #define keystack_recv_hiwat     keystack_params[2].keysock_param_value
 135 #define keystack_max_buf        keystack_params[3].keysock_param_value
 136 #define keystack_debug  keystack_params[4].keysock_param_value
 137 
 138 #define ks0dbg(a)       printf a
 139 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
 140 #define ks1dbg(keystack, a)     if (keystack->keystack_debug != 0) printf a
 141 #define ks2dbg(keystack, a)     if (keystack->keystack_debug > 1) printf a
 142 #define ks3dbg(keystack, a)     if (keystack->keystack_debug > 2) printf a
 143 
 144 static int keysock_close(queue_t *);
 145 static int keysock_open(queue_t *, dev_t *, int, int, cred_t *);
 146 static void keysock_wput(queue_t *, mblk_t *);
 147 static void keysock_rput(queue_t *, mblk_t *);
 148 static void keysock_rsrv(queue_t *);
 149 static void keysock_passup(mblk_t *, sadb_msg_t *, minor_t,
 150     keysock_consumer_t *, boolean_t, keysock_stack_t *);
 151 static void *keysock_stack_init(netstackid_t stackid, netstack_t *ns);
 152 static void keysock_stack_fini(netstackid_t stackid, void *arg);
 153 
 154 static struct module_info info = {
 155         5138, "keysock", 1, INFPSZ, 512, 128
 156 };
 157 
 158 static struct qinit rinit = {
 159         (pfi_t)keysock_rput, (pfi_t)keysock_rsrv, keysock_open, keysock_close,
 160         NULL, &info
 161 };
 162 
 163 static struct qinit winit = {
 164         (pfi_t)keysock_wput, NULL, NULL, NULL, NULL, &info
 165 };
 166 
 167 struct streamtab keysockinfo = {
 168         &rinit, &winit
 169 };
 170 
 171 extern struct modlinkage *keysock_modlp;
 172 
 173 /*
 174  * Plumb IPsec.
 175  *
 176  * NOTE:  New "default" modules will need to be loaded here if needed before
 177  *        boot time.
 178  */
 179 
 180 /* Keep these in global space to keep the lint from complaining. */
 181 static char *IPSECESP = "ipsecesp";
 182 static char *IPSECESPDEV = "/devices/pseudo/ipsecesp@0:ipsecesp";
 183 static char *IPSECAH = "ipsecah";
 184 static char *IPSECAHDEV = "/devices/pseudo/ipsecah@0:ipsecah";
 185 static char *IP6DEV = "/devices/pseudo/ip6@0:ip6";
 186 static char *KEYSOCK = "keysock";
 187 static char *STRMOD = "strmod";
 188 
 189 /*
 190  * Load the other ipsec modules and plumb them together.
 191  */
 192 int
 193 keysock_plumb_ipsec(netstack_t *ns)
 194 {
 195         ldi_handle_t    lh, ip6_lh = NULL;
 196         ldi_ident_t     li = NULL;
 197         int             err = 0;
 198         int             muxid, rval;
 199         boolean_t       esp_present = B_TRUE;
 200         cred_t          *cr;
 201         keysock_stack_t *keystack = ns->netstack_keysock;
 202 
 203 #ifdef NS_DEBUG
 204         (void) printf("keysock_plumb_ipsec(%d)\n",
 205             ns->netstack_stackid);
 206 #endif
 207 
 208         keystack->keystack_plumbed = 0;      /* we're trying again.. */
 209 
 210         cr = zone_get_kcred(netstackid_to_zoneid(
 211             keystack->keystack_netstack->netstack_stackid));
 212         ASSERT(cr != NULL);
 213         /*
 214          * Load up the drivers (AH/ESP).
 215          *
 216          * I do this separately from the actual plumbing in case this function
 217          * ever gets called from a diskless boot before the root filesystem is
 218          * up.  I don't have to worry about "keysock" because, well, if I'm
 219          * here, keysock must've loaded successfully.
 220          */
 221         if (i_ddi_attach_pseudo_node(IPSECAH) == NULL) {
 222                 ks0dbg(("IPsec:  AH failed to attach.\n"));
 223                 goto bail;
 224         }
 225         if (i_ddi_attach_pseudo_node(IPSECESP) == NULL) {
 226                 ks0dbg(("IPsec:  ESP failed to attach.\n"));
 227                 esp_present = B_FALSE;
 228         }
 229 
 230         /*
 231          * Set up the IP streams for AH and ESP, as well as tacking keysock
 232          * on top of them.  Assume keysock has set the autopushes up already.
 233          */
 234 
 235         /* Open IP. */
 236         err = ldi_ident_from_mod(keysock_modlp, &li);
 237         if (err) {
 238                 ks0dbg(("IPsec:  lid_ident_from_mod failed (err %d).\n",
 239                     err));
 240                 goto bail;
 241         }
 242 
 243         err = ldi_open_by_name(IP6DEV, FREAD|FWRITE, cr, &ip6_lh, li);
 244         if (err) {
 245                 ks0dbg(("IPsec:  Open of IP6 failed (err %d).\n", err));
 246                 goto bail;
 247         }
 248 
 249         /* PLINK KEYSOCK/AH */
 250         err = ldi_open_by_name(IPSECAHDEV, FREAD|FWRITE, cr, &lh, li);
 251         if (err) {
 252                 ks0dbg(("IPsec:  Open of AH failed (err %d).\n", err));
 253                 goto bail;
 254         }
 255         err = ldi_ioctl(lh,
 256             I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, cr, &rval);
 257         if (err) {
 258                 ks0dbg(("IPsec:  Push of KEYSOCK onto AH failed (err %d).\n",
 259                     err));
 260                 (void) ldi_close(lh, FREAD|FWRITE, cr);
 261                 goto bail;
 262         }
 263         err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
 264             FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid);
 265         if (err) {
 266                 ks0dbg(("IPsec:  PLINK of KEYSOCK/AH failed (err %d).\n", err));
 267                 (void) ldi_close(lh, FREAD|FWRITE, cr);
 268                 goto bail;
 269         }
 270         (void) ldi_close(lh, FREAD|FWRITE, cr);
 271 
 272         /* PLINK KEYSOCK/ESP */
 273         if (esp_present) {
 274                 err = ldi_open_by_name(IPSECESPDEV,
 275                     FREAD|FWRITE, cr, &lh, li);
 276                 if (err) {
 277                         ks0dbg(("IPsec:  Open of ESP failed (err %d).\n", err));
 278                         goto bail;
 279                 }
 280                 err = ldi_ioctl(lh,
 281                     I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, cr, &rval);
 282                 if (err) {
 283                         ks0dbg(("IPsec:  "
 284                             "Push of KEYSOCK onto ESP failed (err %d).\n",
 285                             err));
 286                         (void) ldi_close(lh, FREAD|FWRITE, cr);
 287                         goto bail;
 288                 }
 289                 err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
 290                     FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid);
 291                 if (err) {
 292                         ks0dbg(("IPsec:  "
 293                             "PLINK of KEYSOCK/ESP failed (err %d).\n", err));
 294                         (void) ldi_close(lh, FREAD|FWRITE, cr);
 295                         goto bail;
 296                 }
 297                 (void) ldi_close(lh, FREAD|FWRITE, cr);
 298         }
 299 
 300 bail:
 301         keystack->keystack_plumbed = (err == 0) ? 1 : -1;
 302         if (ip6_lh != NULL) {
 303                 (void) ldi_close(ip6_lh, FREAD|FWRITE, cr);
 304         }
 305         if (li != NULL)
 306                 ldi_ident_release(li);
 307 #ifdef NS_DEBUG
 308         (void) printf("keysock_plumb_ipsec -> %d\n",
 309             keystack->keystack_plumbed);
 310 #endif
 311         crfree(cr);
 312         return (err);
 313 }
 314 
 315 /* ARGSUSED */
 316 static int
 317 keysock_param_get(q, mp, cp, cr)
 318         queue_t *q;
 319         mblk_t  *mp;
 320         caddr_t cp;
 321         cred_t *cr;
 322 {
 323         keysockparam_t  *keysockpa = (keysockparam_t *)cp;
 324         uint_t value;
 325         keysock_t *ks = (keysock_t *)q->q_ptr;
 326         keysock_stack_t *keystack = ks->keysock_keystack;
 327 
 328         mutex_enter(&keystack->keystack_param_lock);
 329         value = keysockpa->keysock_param_value;
 330         mutex_exit(&keystack->keystack_param_lock);
 331 
 332         (void) mi_mpprintf(mp, "%u", value);
 333         return (0);
 334 }
 335 
 336 /* This routine sets an NDD variable in a keysockparam_t structure. */
 337 /* ARGSUSED */
 338 static int
 339 keysock_param_set(q, mp, value, cp, cr)
 340         queue_t *q;
 341         mblk_t  *mp;
 342         char    *value;
 343         caddr_t cp;
 344         cred_t *cr;
 345 {
 346         ulong_t new_value;
 347         keysockparam_t  *keysockpa = (keysockparam_t *)cp;
 348         keysock_t *ks = (keysock_t *)q->q_ptr;
 349         keysock_stack_t *keystack = ks->keysock_keystack;
 350 
 351         /* Convert the value from a string into a long integer. */
 352         if (ddi_strtoul(value, NULL, 10, &new_value) != 0)
 353                 return (EINVAL);
 354 
 355         mutex_enter(&keystack->keystack_param_lock);
 356         /*
 357          * Fail the request if the new value does not lie within the
 358          * required bounds.
 359          */
 360         if (new_value < keysockpa->keysock_param_min ||
 361             new_value > keysockpa->keysock_param_max) {
 362                 mutex_exit(&keystack->keystack_param_lock);
 363                 return (EINVAL);
 364         }
 365 
 366         /* Set the new value */
 367         keysockpa->keysock_param_value = new_value;
 368         mutex_exit(&keystack->keystack_param_lock);
 369 
 370         return (0);
 371 }
 372 
 373 /*
 374  * Initialize keysock at module load time
 375  */
 376 boolean_t
 377 keysock_ddi_init(void)
 378 {
 379         keysock_max_optsize = optcom_max_optsize(
 380             keysock_opt_obj.odb_opt_des_arr, keysock_opt_obj.odb_opt_arr_cnt);
 381 
 382         keysock_vmem = vmem_create("keysock", (void *)1, MAXMIN, 1,
 383             NULL, NULL, NULL, 1, VM_SLEEP | VMC_IDENTIFIER);
 384 
 385         /*
 386          * We want to be informed each time a stack is created or
 387          * destroyed in the kernel, so we can maintain the
 388          * set of keysock_stack_t's.
 389          */
 390         netstack_register(NS_KEYSOCK, keysock_stack_init, NULL,
 391             keysock_stack_fini);
 392 
 393         return (B_TRUE);
 394 }
 395 
 396 /*
 397  * Walk through the param array specified registering each element with the
 398  * named dispatch handler.
 399  */
 400 static boolean_t
 401 keysock_param_register(IDP *ndp, keysockparam_t *ksp, int cnt)
 402 {
 403         for (; cnt-- > 0; ksp++) {
 404                 if (ksp->keysock_param_name != NULL &&
 405                     ksp->keysock_param_name[0]) {
 406                         if (!nd_load(ndp,
 407                             ksp->keysock_param_name,
 408                             keysock_param_get, keysock_param_set,
 409                             (caddr_t)ksp)) {
 410                                 nd_free(ndp);
 411                                 return (B_FALSE);
 412                         }
 413                 }
 414         }
 415         return (B_TRUE);
 416 }
 417 
 418 /*
 419  * Initialize keysock for one stack instance
 420  */
 421 /* ARGSUSED */
 422 static void *
 423 keysock_stack_init(netstackid_t stackid, netstack_t *ns)
 424 {
 425         keysock_stack_t *keystack;
 426         keysockparam_t *ksp;
 427 
 428         keystack = (keysock_stack_t *)kmem_zalloc(sizeof (*keystack), KM_SLEEP);
 429         keystack->keystack_netstack = ns;
 430 
 431         keystack->keystack_acquire_seq = 0xffffffff;
 432 
 433         ksp = (keysockparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
 434         keystack->keystack_params = ksp;
 435         bcopy(lcl_param_arr, ksp, sizeof (lcl_param_arr));
 436 
 437         (void) keysock_param_register(&keystack->keystack_g_nd, ksp,
 438             A_CNT(lcl_param_arr));
 439 
 440         mutex_init(&keystack->keystack_list_lock, NULL, MUTEX_DEFAULT, NULL);
 441         mutex_init(&keystack->keystack_consumers_lock,
 442             NULL, MUTEX_DEFAULT, NULL);
 443         mutex_init(&keystack->keystack_param_lock, NULL, MUTEX_DEFAULT, NULL);
 444         return (keystack);
 445 }
 446 
 447 /*
 448  * Free NDD variable space, and other destructors, for keysock.
 449  */
 450 void
 451 keysock_ddi_destroy(void)
 452 {
 453         netstack_unregister(NS_KEYSOCK);
 454         vmem_destroy(keysock_vmem);
 455 }
 456 
 457 /*
 458  * Remove one stack instance from keysock
 459  */
 460 /* ARGSUSED */
 461 static void
 462 keysock_stack_fini(netstackid_t stackid, void *arg)
 463 {
 464         keysock_stack_t *keystack = (keysock_stack_t *)arg;
 465 
 466         nd_free(&keystack->keystack_g_nd);
 467         kmem_free(keystack->keystack_params, sizeof (lcl_param_arr));
 468         keystack->keystack_params = NULL;
 469 
 470         mutex_destroy(&keystack->keystack_list_lock);
 471         mutex_destroy(&keystack->keystack_consumers_lock);
 472         mutex_destroy(&keystack->keystack_param_lock);
 473 
 474         kmem_free(keystack, sizeof (*keystack));
 475 }
 476 
 477 /*
 478  * Close routine for keysock.
 479  */
 480 static int
 481 keysock_close(queue_t *q)
 482 {
 483         keysock_t *ks;
 484         keysock_consumer_t *kc;
 485         void *ptr = q->q_ptr;
 486         int size;
 487         keysock_stack_t *keystack;
 488 
 489 
 490         qprocsoff(q);
 491 
 492         /* Safe assumption. */
 493         ASSERT(ptr != NULL);
 494 
 495         if (WR(q)->q_next) {
 496                 kc = (keysock_consumer_t *)ptr;
 497                 keystack = kc->kc_keystack;
 498 
 499                 ks1dbg(keystack, ("Module close, removing a consumer (%d).\n",
 500                     kc->kc_sa_type));
 501                 /*
 502                  * Because of PERMOD open/close exclusive perimeter, I
 503                  * can inspect KC_FLUSHING w/o locking down kc->kc_lock.
 504                  */
 505                 if (kc->kc_flags & KC_FLUSHING) {
 506                         /*
 507                          * If this decrement was the last one, send
 508                          * down the next pending one, if any.
 509                          *
 510                          * With a PERMOD perimeter, the mutexes ops aren't
 511                          * really necessary, but if we ever loosen up, we will
 512                          * have this bit covered already.
 513                          */
 514                         keystack->keystack_flushdump--;
 515                         if (keystack->keystack_flushdump == 0) {
 516                                 /*
 517                                  * The flush/dump terminated by having a
 518                                  * consumer go away.  I need to send up to the
 519                                  * appropriate keysock all of the relevant
 520                                  * information.  Unfortunately, I don't
 521                                  * have that handy.
 522                                  */
 523                                 ks0dbg(("Consumer went away while flushing or"
 524                                     " dumping.\n"));
 525                         }
 526                 }
 527                 size = sizeof (keysock_consumer_t);
 528                 mutex_enter(&keystack->keystack_consumers_lock);
 529                 keystack->keystack_consumers[kc->kc_sa_type] = NULL;
 530                 mutex_exit(&keystack->keystack_consumers_lock);
 531                 mutex_destroy(&kc->kc_lock);
 532                 netstack_rele(kc->kc_keystack->keystack_netstack);
 533         } else {
 534                 ks = (keysock_t *)ptr;
 535                 keystack = ks->keysock_keystack;
 536 
 537                 ks3dbg(keystack,
 538                     ("Driver close, PF_KEY socket is going away.\n"));
 539                 if ((ks->keysock_flags & KEYSOCK_EXTENDED) != 0)
 540                         atomic_dec_32(&keystack->keystack_num_extended);
 541                 size = sizeof (keysock_t);
 542                 mutex_enter(&keystack->keystack_list_lock);
 543                 *(ks->keysock_ptpn) = ks->keysock_next;
 544                 if (ks->keysock_next != NULL)
 545                         ks->keysock_next->keysock_ptpn = ks->keysock_ptpn;
 546                 mutex_exit(&keystack->keystack_list_lock);
 547                 mutex_destroy(&ks->keysock_lock);
 548                 vmem_free(keysock_vmem, (void *)(uintptr_t)ks->keysock_serial,
 549                     1);
 550                 netstack_rele(ks->keysock_keystack->keystack_netstack);
 551         }
 552 
 553         /* Now I'm free. */
 554         kmem_free(ptr, size);
 555         return (0);
 556 }
 557 /*
 558  * Open routine for keysock.
 559  */
 560 /* ARGSUSED */
 561 static int
 562 keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 563 {
 564         keysock_t *ks;
 565         keysock_consumer_t *kc;
 566         mblk_t *mp;
 567         ipsec_info_t *ii;
 568         netstack_t *ns;
 569         keysock_stack_t *keystack;
 570 
 571         if (secpolicy_ip_config(credp, B_FALSE) != 0) {
 572                 /* Privilege debugging will log the error */
 573                 return (EPERM);
 574         }
 575 
 576         if (q->q_ptr != NULL)
 577                 return (0);  /* Re-open of an already open instance. */
 578 
 579         ns = netstack_find_by_cred(credp);
 580         ASSERT(ns != NULL);
 581         keystack = ns->netstack_keysock;
 582         ASSERT(keystack != NULL);
 583 
 584         ks3dbg(keystack, ("Entering keysock open.\n"));
 585 
 586         if (keystack->keystack_plumbed < 1) {
 587                 netstack_t *ns = keystack->keystack_netstack;
 588 
 589                 keystack->keystack_plumbed = 0;
 590 #ifdef NS_DEBUG
 591                 printf("keysock_open(%d) - plumb\n",
 592                     keystack->keystack_netstack->netstack_stackid);
 593 #endif
 594                 /*
 595                  * Don't worry about ipsec_failure being true here.
 596                  * (See ip.c).  An open of keysock should try and force
 597                  * the issue.  Maybe it was a transient failure.
 598                  */
 599                 ipsec_loader_loadnow(ns->netstack_ipsec);
 600         }
 601 
 602         if (sflag & MODOPEN) {
 603                 /* Initialize keysock_consumer state here. */
 604                 kc = kmem_zalloc(sizeof (keysock_consumer_t), KM_NOSLEEP);
 605                 if (kc == NULL) {
 606                         netstack_rele(keystack->keystack_netstack);
 607                         return (ENOMEM);
 608                 }
 609                 mutex_init(&kc->kc_lock, NULL, MUTEX_DEFAULT, 0);
 610                 kc->kc_rq = q;
 611                 kc->kc_wq = WR(q);
 612 
 613                 q->q_ptr = kc;
 614                 WR(q)->q_ptr = kc;
 615 
 616                 kc->kc_keystack = keystack;
 617                 qprocson(q);
 618 
 619                 /*
 620                  * Send down initial message to whatever I was pushed on top
 621                  * of asking for its consumer type.  The reply will set it.
 622                  */
 623 
 624                 /* Allocate it. */
 625                 mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
 626                 if (mp == NULL) {
 627                         ks1dbg(keystack, (
 628                             "keysock_open:  Cannot allocate KEYSOCK_HELLO.\n"));
 629                         /* Do I need to set these to null? */
 630                         q->q_ptr = NULL;
 631                         WR(q)->q_ptr = NULL;
 632                         mutex_destroy(&kc->kc_lock);
 633                         kmem_free(kc, sizeof (*kc));
 634                         netstack_rele(keystack->keystack_netstack);
 635                         return (ENOMEM);
 636                 }
 637 
 638                 /* If I allocated okay, putnext to what I was pushed atop. */
 639                 mp->b_wptr += sizeof (ipsec_info_t);
 640                 mp->b_datap->db_type = M_CTL;
 641                 ii = (ipsec_info_t *)mp->b_rptr;
 642                 ii->ipsec_info_type = KEYSOCK_HELLO;
 643                 /* Length only of type/len. */
 644                 ii->ipsec_info_len = sizeof (ii->ipsec_allu);
 645                 ks2dbg(keystack, ("Ready to putnext KEYSOCK_HELLO.\n"));
 646                 putnext(kc->kc_wq, mp);
 647         } else {
 648                 minor_t ksminor;
 649 
 650                 /* Initialize keysock state. */
 651 
 652                 ks2dbg(keystack, ("Made it into PF_KEY socket open.\n"));
 653 
 654                 ksminor = (minor_t)(uintptr_t)
 655                     vmem_alloc(keysock_vmem, 1, VM_NOSLEEP);
 656                 if (ksminor == 0) {
 657                         netstack_rele(keystack->keystack_netstack);
 658                         return (ENOMEM);
 659                 }
 660                 ks = kmem_zalloc(sizeof (keysock_t), KM_NOSLEEP);
 661                 if (ks == NULL) {
 662                         vmem_free(keysock_vmem, (void *)(uintptr_t)ksminor, 1);
 663                         netstack_rele(keystack->keystack_netstack);
 664                         return (ENOMEM);
 665                 }
 666 
 667                 mutex_init(&ks->keysock_lock, NULL, MUTEX_DEFAULT, 0);
 668                 ks->keysock_rq = q;
 669                 ks->keysock_wq = WR(q);
 670                 ks->keysock_state = TS_UNBND;
 671                 ks->keysock_serial = ksminor;
 672 
 673                 q->q_ptr = ks;
 674                 WR(q)->q_ptr = ks;
 675                 ks->keysock_keystack = keystack;
 676 
 677                 /*
 678                  * The receive hiwat is only looked at on the stream head
 679                  * queue.  Store in q_hiwat in order to return on SO_RCVBUF
 680                  * getsockopts.
 681                  */
 682 
 683                 q->q_hiwat = keystack->keystack_recv_hiwat;
 684 
 685                 /*
 686                  * The transmit hiwat/lowat is only looked at on IP's queue.
 687                  * Store in q_hiwat/q_lowat in order to return on
 688                  * SO_SNDBUF/SO_SNDLOWAT getsockopts.
 689                  */
 690 
 691                 WR(q)->q_hiwat = keystack->keystack_xmit_hiwat;
 692                 WR(q)->q_lowat = keystack->keystack_xmit_lowat;
 693 
 694                 *devp = makedevice(getmajor(*devp), ksminor);
 695 
 696                 /*
 697                  * Thread keysock into the global keysock list.
 698                  */
 699                 mutex_enter(&keystack->keystack_list_lock);
 700                 ks->keysock_next = keystack->keystack_list;
 701                 ks->keysock_ptpn = &keystack->keystack_list;
 702                 if (keystack->keystack_list != NULL) {
 703                         keystack->keystack_list->keysock_ptpn =
 704                             &ks->keysock_next;
 705                 }
 706                 keystack->keystack_list = ks;
 707                 mutex_exit(&keystack->keystack_list_lock);
 708 
 709                 qprocson(q);
 710                 (void) proto_set_rx_hiwat(q, NULL,
 711                     keystack->keystack_recv_hiwat);
 712                 /*
 713                  * Wait outside the keysock module perimeter for IPsec
 714                  * plumbing to be completed.  If it fails, keysock_close()
 715                  * undoes everything we just did.
 716                  */
 717                 if (!ipsec_loader_wait(q,
 718                     keystack->keystack_netstack->netstack_ipsec)) {
 719                         (void) keysock_close(q);
 720                         return (EPFNOSUPPORT);
 721                 }
 722         }
 723 
 724         return (0);
 725 }
 726 
 727 /* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_wput(). */
 728 
 729 /*
 730  * Copy relevant state bits.
 731  */
 732 static void
 733 keysock_copy_info(struct T_info_ack *tap, keysock_t *ks)
 734 {
 735         *tap = keysock_g_t_info_ack;
 736         tap->CURRENT_state = ks->keysock_state;
 737         tap->OPT_size = keysock_max_optsize;
 738 }
 739 
 740 /*
 741  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
 742  * keysock_wput.  Much of the T_CAPABILITY_ACK information is copied from
 743  * keysock_g_t_info_ack.  The current state of the stream is copied from
 744  * keysock_state.
 745  */
 746 static void
 747 keysock_capability_req(queue_t *q, mblk_t *mp)
 748 {
 749         keysock_t *ks = (keysock_t *)q->q_ptr;
 750         t_uscalar_t cap_bits1;
 751         struct T_capability_ack *tcap;
 752 
 753         cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
 754 
 755         mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
 756             mp->b_datap->db_type, T_CAPABILITY_ACK);
 757         if (mp == NULL)
 758                 return;
 759 
 760         tcap = (struct T_capability_ack *)mp->b_rptr;
 761         tcap->CAP_bits1 = 0;
 762 
 763         if (cap_bits1 & TC1_INFO) {
 764                 keysock_copy_info(&tcap->INFO_ack, ks);
 765                 tcap->CAP_bits1 |= TC1_INFO;
 766         }
 767 
 768         qreply(q, mp);
 769 }
 770 
 771 /*
 772  * This routine responds to T_INFO_REQ messages. It is called by
 773  * keysock_wput_other.
 774  * Most of the T_INFO_ACK information is copied from keysock_g_t_info_ack.
 775  * The current state of the stream is copied from keysock_state.
 776  */
 777 static void
 778 keysock_info_req(q, mp)
 779         queue_t *q;
 780         mblk_t  *mp;
 781 {
 782         mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
 783             T_INFO_ACK);
 784         if (mp == NULL)
 785                 return;
 786         keysock_copy_info((struct T_info_ack *)mp->b_rptr,
 787             (keysock_t *)q->q_ptr);
 788         qreply(q, mp);
 789 }
 790 
 791 /*
 792  * keysock_err_ack. This routine creates a
 793  * T_ERROR_ACK message and passes it
 794  * upstream.
 795  */
 796 static void
 797 keysock_err_ack(q, mp, t_error, sys_error)
 798         queue_t *q;
 799         mblk_t  *mp;
 800         int     t_error;
 801         int     sys_error;
 802 {
 803         if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
 804                 qreply(q, mp);
 805 }
 806 
 807 /*
 808  * This routine retrieves the current status of socket options.
 809  * It returns the size of the option retrieved.
 810  */
 811 /* ARGSUSED */
 812 int
 813 keysock_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
 814 {
 815         int *i1 = (int *)ptr;
 816         keysock_t *ks = (keysock_t *)q->q_ptr;
 817 
 818         switch (level) {
 819         case SOL_SOCKET:
 820                 mutex_enter(&ks->keysock_lock);
 821                 switch (name) {
 822                 case SO_TYPE:
 823                         *i1 = SOCK_RAW;
 824                         break;
 825                 case SO_USELOOPBACK:
 826                         *i1 = (int)(!((ks->keysock_flags & KEYSOCK_NOLOOP) ==
 827                             KEYSOCK_NOLOOP));
 828                         break;
 829                 /*
 830                  * The following two items can be manipulated,
 831                  * but changing them should do nothing.
 832                  */
 833                 case SO_SNDBUF:
 834                         *i1 = (int)q->q_hiwat;
 835                         break;
 836                 case SO_RCVBUF:
 837                         *i1 = (int)(RD(q)->q_hiwat);
 838                         break;
 839                 }
 840                 mutex_exit(&ks->keysock_lock);
 841                 break;
 842         default:
 843                 return (0);
 844         }
 845         return (sizeof (int));
 846 }
 847 
 848 /*
 849  * This routine sets socket options.
 850  */
 851 /* ARGSUSED */
 852 int
 853 keysock_opt_set(queue_t *q, uint_t mgmt_flags, int level,
 854     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
 855     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
 856 {
 857         int *i1 = (int *)invalp, errno = 0;
 858         keysock_t *ks = (keysock_t *)q->q_ptr;
 859         keysock_stack_t *keystack = ks->keysock_keystack;
 860 
 861         switch (level) {
 862         case SOL_SOCKET:
 863                 mutex_enter(&ks->keysock_lock);
 864                 switch (name) {
 865                 case SO_USELOOPBACK:
 866                         if (!(*i1))
 867                                 ks->keysock_flags |= KEYSOCK_NOLOOP;
 868                         else ks->keysock_flags &= ~KEYSOCK_NOLOOP;
 869                         break;
 870                 case SO_SNDBUF:
 871                         if (*i1 > keystack->keystack_max_buf)
 872                                 errno = ENOBUFS;
 873                         else q->q_hiwat = *i1;
 874                         break;
 875                 case SO_RCVBUF:
 876                         if (*i1 > keystack->keystack_max_buf) {
 877                                 errno = ENOBUFS;
 878                         } else {
 879                                 RD(q)->q_hiwat = *i1;
 880                                 (void) proto_set_rx_hiwat(RD(q), NULL, *i1);
 881                         }
 882                         break;
 883                 default:
 884                         errno = EINVAL;
 885                 }
 886                 mutex_exit(&ks->keysock_lock);
 887                 break;
 888         default:
 889                 errno = EINVAL;
 890         }
 891         return (errno);
 892 }
 893 
 894 /*
 895  * Handle STREAMS messages.
 896  */
 897 static void
 898 keysock_wput_other(queue_t *q, mblk_t *mp)
 899 {
 900         struct iocblk *iocp;
 901         int error;
 902         keysock_t *ks = (keysock_t *)q->q_ptr;
 903         keysock_stack_t *keystack = ks->keysock_keystack;
 904         cred_t          *cr;
 905 
 906         switch (mp->b_datap->db_type) {
 907         case M_PROTO:
 908         case M_PCPROTO:
 909                 if ((mp->b_wptr - mp->b_rptr) < sizeof (long)) {
 910                         ks3dbg(keystack, (
 911                             "keysock_wput_other: Not big enough M_PROTO\n"));
 912                         freemsg(mp);
 913                         return;
 914                 }
 915                 switch (((union T_primitives *)mp->b_rptr)->type) {
 916                 case T_CAPABILITY_REQ:
 917                         keysock_capability_req(q, mp);
 918                         break;
 919                 case T_INFO_REQ:
 920                         keysock_info_req(q, mp);
 921                         break;
 922                 case T_SVR4_OPTMGMT_REQ:
 923                 case T_OPTMGMT_REQ:
 924                         /*
 925                          * All Solaris components should pass a db_credp
 926                          * for this TPI message, hence we ASSERT.
 927                          * But in case there is some other M_PROTO that looks
 928                          * like a TPI message sent by some other kernel
 929                          * component, we check and return an error.
 930                          */
 931                         cr = msg_getcred(mp, NULL);
 932                         ASSERT(cr != NULL);
 933                         if (cr == NULL) {
 934                                 keysock_err_ack(q, mp, TSYSERR, EINVAL);
 935                                 return;
 936                         }
 937                         if (((union T_primitives *)mp->b_rptr)->type ==
 938                             T_SVR4_OPTMGMT_REQ) {
 939                                 svr4_optcom_req(q, mp, cr, &keysock_opt_obj);
 940                         } else {
 941                                 tpi_optcom_req(q, mp, cr, &keysock_opt_obj);
 942                         }
 943                         break;
 944                 case T_DATA_REQ:
 945                 case T_EXDATA_REQ:
 946                 case T_ORDREL_REQ:
 947                         /* Illegal for keysock. */
 948                         freemsg(mp);
 949                         (void) putnextctl1(RD(q), M_ERROR, EPROTO);
 950                         break;
 951                 default:
 952                         /* Not supported by keysock. */
 953                         keysock_err_ack(q, mp, TNOTSUPPORT, 0);
 954                         break;
 955                 }
 956                 return;
 957         case M_IOCTL:
 958                 iocp = (struct iocblk *)mp->b_rptr;
 959                 error = EINVAL;
 960 
 961                 switch (iocp->ioc_cmd) {
 962                 case ND_SET:
 963                 case ND_GET:
 964                         if (nd_getset(q, keystack->keystack_g_nd, mp)) {
 965                                 qreply(q, mp);
 966                                 return;
 967                         } else
 968                                 error = ENOENT;
 969                         /* FALLTHRU */
 970                 default:
 971                         miocnak(q, mp, 0, error);
 972                         return;
 973                 }
 974         case M_FLUSH:
 975                 if (*mp->b_rptr & FLUSHW) {
 976                         flushq(q, FLUSHALL);
 977                         *mp->b_rptr &= ~FLUSHW;
 978                 }
 979                 if (*mp->b_rptr & FLUSHR) {
 980                         qreply(q, mp);
 981                         return;
 982                 }
 983                 /* Else FALLTHRU */
 984         }
 985 
 986         /* If fell through, just black-hole the message. */
 987         freemsg(mp);
 988 }
 989 
 990 /*
 991  * Transmit a PF_KEY error message to the instance either pointed to
 992  * by ks, the instance with serial number serial, or more, depending.
 993  *
 994  * The faulty message (or a reasonable facsimile thereof) is in mp.
 995  * This function will free mp or recycle it for delivery, thereby causing
 996  * the stream head to free it.
 997  */
 998 static void
 999 keysock_error(keysock_t *ks, mblk_t *mp, int error, int diagnostic)
1000 {
1001         sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
1002         keysock_stack_t *keystack = ks->keysock_keystack;
1003 
1004         ASSERT(mp->b_datap->db_type == M_DATA);
1005 
1006         if (samsg->sadb_msg_type < SADB_GETSPI ||
1007             samsg->sadb_msg_type > SADB_MAX)
1008                 samsg->sadb_msg_type = SADB_RESERVED;
1009 
1010         /*
1011          * Strip out extension headers.
1012          */
1013         ASSERT(mp->b_rptr + sizeof (*samsg) <= mp->b_datap->db_lim);
1014         mp->b_wptr = mp->b_rptr + sizeof (*samsg);
1015         samsg->sadb_msg_len = SADB_8TO64(sizeof (sadb_msg_t));
1016         samsg->sadb_msg_errno = (uint8_t)error;
1017         samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1018 
1019         keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE, keystack);
1020 }
1021 
1022 /*
1023  * Pass down a message to a consumer.  Wrap it in KEYSOCK_IN, and copy
1024  * in the extv if passed in.
1025  */
1026 static void
1027 keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[],
1028     boolean_t flushmsg)
1029 {
1030         keysock_consumer_t *kc;
1031         mblk_t *wrapper;
1032         keysock_in_t *ksi;
1033         int i;
1034         keysock_stack_t *keystack = ks->keysock_keystack;
1035 
1036         wrapper = allocb(sizeof (ipsec_info_t), BPRI_HI);
1037         if (wrapper == NULL) {
1038                 ks3dbg(keystack, ("keysock_passdown: allocb failed.\n"));
1039                 if (extv[SADB_EXT_KEY_ENCRYPT] != NULL)
1040                         bzero(extv[SADB_EXT_KEY_ENCRYPT],
1041                             SADB_64TO8(
1042                             extv[SADB_EXT_KEY_ENCRYPT]->sadb_ext_len));
1043                 if (extv[SADB_EXT_KEY_AUTH] != NULL)
1044                         bzero(extv[SADB_EXT_KEY_AUTH],
1045                             SADB_64TO8(
1046                             extv[SADB_EXT_KEY_AUTH]->sadb_ext_len));
1047                 if (flushmsg) {
1048                         ks0dbg((
1049                             "keysock: Downwards flush/dump message failed!\n"));
1050                         /* If this is true, I hold the perimeter. */
1051                         keystack->keystack_flushdump--;
1052                 }
1053                 freemsg(mp);
1054                 return;
1055         }
1056 
1057         wrapper->b_datap->db_type = M_CTL;
1058         ksi = (keysock_in_t *)wrapper->b_rptr;
1059         ksi->ks_in_type = KEYSOCK_IN;
1060         ksi->ks_in_len = sizeof (keysock_in_t);
1061         if (extv[SADB_EXT_ADDRESS_SRC] != NULL)
1062                 ksi->ks_in_srctype = KS_IN_ADDR_UNKNOWN;
1063         else ksi->ks_in_srctype = KS_IN_ADDR_NOTTHERE;
1064         if (extv[SADB_EXT_ADDRESS_DST] != NULL)
1065                 ksi->ks_in_dsttype = KS_IN_ADDR_UNKNOWN;
1066         else ksi->ks_in_dsttype = KS_IN_ADDR_NOTTHERE;
1067         for (i = 0; i <= SADB_EXT_MAX; i++)
1068                 ksi->ks_in_extv[i] = extv[i];
1069         ksi->ks_in_serial = ks->keysock_serial;
1070         wrapper->b_wptr += sizeof (ipsec_info_t);
1071         wrapper->b_cont = mp;
1072 
1073         /*
1074          * Find the appropriate consumer where the message is passed down.
1075          */
1076         kc = keystack->keystack_consumers[satype];
1077         if (kc == NULL) {
1078                 freeb(wrapper);
1079                 keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
1080                 if (flushmsg) {
1081                         ks0dbg((
1082                             "keysock: Downwards flush/dump message failed!\n"));
1083                         /* If this is true, I hold the perimeter. */
1084                         keystack->keystack_flushdump--;
1085                 }
1086                 return;
1087         }
1088 
1089         /*
1090          * NOTE: There used to be code in here to spin while a flush or
1091          *       dump finished.  Keysock now assumes that consumers have enough
1092          *       MT-savviness to deal with that.
1093          */
1094 
1095         /*
1096          * Current consumers (AH and ESP) are guaranteed to return a
1097          * FLUSH or DUMP message back, so when we reach here, we don't
1098          * have to worry about keysock_flushdumps.
1099          */
1100 
1101         putnext(kc->kc_wq, wrapper);
1102 }
1103 
1104 /*
1105  * High-level reality checking of extensions.
1106  */
1107 static boolean_t
1108 ext_check(sadb_ext_t *ext, keysock_stack_t *keystack)
1109 {
1110         int i;
1111         uint64_t *lp;
1112         sadb_ident_t *id;
1113         char *idstr;
1114 
1115         switch (ext->sadb_ext_type) {
1116         case SADB_EXT_ADDRESS_SRC:
1117         case SADB_EXT_ADDRESS_DST:
1118         case SADB_X_EXT_ADDRESS_INNER_SRC:
1119         case SADB_X_EXT_ADDRESS_INNER_DST:
1120                 /* Check for at least enough addtl length for a sockaddr. */
1121                 if (ext->sadb_ext_len <= SADB_8TO64(sizeof (sadb_address_t)))
1122                         return (B_FALSE);
1123                 break;
1124         case SADB_EXT_LIFETIME_HARD:
1125         case SADB_EXT_LIFETIME_SOFT:
1126         case SADB_EXT_LIFETIME_CURRENT:
1127                 if (ext->sadb_ext_len != SADB_8TO64(sizeof (sadb_lifetime_t)))
1128                         return (B_FALSE);
1129                 break;
1130         case SADB_EXT_SPIRANGE:
1131                 /* See if the SPI range is legit. */
1132                 if (htonl(((sadb_spirange_t *)ext)->sadb_spirange_min) >
1133                     htonl(((sadb_spirange_t *)ext)->sadb_spirange_max))
1134                         return (B_FALSE);
1135                 break;
1136         case SADB_EXT_KEY_AUTH:
1137         case SADB_EXT_KEY_ENCRYPT:
1138                 /* Key length check. */
1139                 if (((sadb_key_t *)ext)->sadb_key_bits == 0)
1140                         return (B_FALSE);
1141                 /*
1142                  * Check to see if the key length (in bits) is less than the
1143                  * extension length (in 8-bits words).
1144                  */
1145                 if ((roundup(SADB_1TO8(((sadb_key_t *)ext)->sadb_key_bits), 8) +
1146                     sizeof (sadb_key_t)) != SADB_64TO8(ext->sadb_ext_len)) {
1147                         ks1dbg(keystack, (
1148                             "ext_check:  Key bits/length inconsistent.\n"));
1149                         ks1dbg(keystack, ("%d bits, len is %d bytes.\n",
1150                             ((sadb_key_t *)ext)->sadb_key_bits,
1151                             SADB_64TO8(ext->sadb_ext_len)));
1152                         return (B_FALSE);
1153                 }
1154 
1155                 /* All-zeroes key check. */
1156                 lp = (uint64_t *)(((char *)ext) + sizeof (sadb_key_t));
1157                 for (i = 0;
1158                     i < (ext->sadb_ext_len - SADB_8TO64(sizeof (sadb_key_t)));
1159                     i++)
1160                         if (lp[i] != 0)
1161                                 break;  /* Out of for loop. */
1162                 /* If finished the loop naturally, it's an all zero key. */
1163                 if (lp[i] == 0)
1164                         return (B_FALSE);
1165                 break;
1166         case SADB_EXT_IDENTITY_SRC:
1167         case SADB_EXT_IDENTITY_DST:
1168                 /*
1169                  * Make sure the strings in these identities are
1170                  * null-terminated.  RFC 2367 underspecified how to handle
1171                  * such a case.  I "proactively" null-terminate the string
1172                  * at the last byte if it's not terminated sooner.
1173                  */
1174                 id = (sadb_ident_t *)ext;
1175                 i = SADB_64TO8(id->sadb_ident_len);
1176                 i -= sizeof (sadb_ident_t);
1177                 idstr = (char *)(id + 1);
1178                 while (*idstr != '\0' && i > 0) {
1179                         i--;
1180                         idstr++;
1181                 }
1182                 if (i == 0) {
1183                         /*
1184                          * I.e., if the bozo user didn't NULL-terminate the
1185                          * string...
1186                          */
1187                         idstr--;
1188                         *idstr = '\0';
1189                 }
1190                 break;
1191         }
1192         return (B_TRUE);        /* For now... */
1193 }
1194 
1195 /* Return values for keysock_get_ext(). */
1196 #define KGE_OK  0
1197 #define KGE_DUP 1
1198 #define KGE_UNK 2
1199 #define KGE_LEN 3
1200 #define KGE_CHK 4
1201 
1202 /*
1203  * Parse basic extension headers and return in the passed-in pointer vector.
1204  * Return values include:
1205  *
1206  *      KGE_OK  Everything's nice and parsed out.
1207  *              If there are no extensions, place NULL in extv[0].
1208  *      KGE_DUP There is a duplicate extension.
1209  *              First instance in appropriate bin.  First duplicate in
1210  *              extv[0].
1211  *      KGE_UNK Unknown extension type encountered.  extv[0] contains
1212  *              unknown header.
1213  *      KGE_LEN Extension length error.
1214  *      KGE_CHK High-level reality check failed on specific extension.
1215  *
1216  * My apologies for some of the pointer arithmetic in here.  I'm thinking
1217  * like an assembly programmer, yet trying to make the compiler happy.
1218  */
1219 static int
1220 keysock_get_ext(sadb_ext_t *extv[], sadb_msg_t *basehdr, uint_t msgsize,
1221     keysock_stack_t *keystack)
1222 {
1223         bzero(extv, sizeof (sadb_ext_t *) * (SADB_EXT_MAX + 1));
1224 
1225         /* Use extv[0] as the "current working pointer". */
1226 
1227         extv[0] = (sadb_ext_t *)(basehdr + 1);
1228 
1229         while (extv[0] < (sadb_ext_t *)(((uint8_t *)basehdr) + msgsize)) {
1230                 /* Check for unknown headers. */
1231                 if (extv[0]->sadb_ext_type == 0 ||
1232                     extv[0]->sadb_ext_type > SADB_EXT_MAX)
1233                         return (KGE_UNK);
1234 
1235                 /*
1236                  * Check length.  Use uint64_t because extlen is in units
1237                  * of 64-bit words.  If length goes beyond the msgsize,
1238                  * return an error.  (Zero length also qualifies here.)
1239                  */
1240                 if (extv[0]->sadb_ext_len == 0 ||
1241                     (void *)((uint64_t *)extv[0] + extv[0]->sadb_ext_len) >
1242                     (void *)((uint8_t *)basehdr + msgsize))
1243                         return (KGE_LEN);
1244 
1245                 /* Check for redundant headers. */
1246                 if (extv[extv[0]->sadb_ext_type] != NULL)
1247                         return (KGE_DUP);
1248 
1249                 /*
1250                  * Reality check the extension if possible at the keysock
1251                  * level.
1252                  */
1253                 if (!ext_check(extv[0], keystack))
1254                         return (KGE_CHK);
1255 
1256                 /* If I make it here, assign the appropriate bin. */
1257                 extv[extv[0]->sadb_ext_type] = extv[0];
1258 
1259                 /* Advance pointer (See above for uint64_t ptr reasoning.) */
1260                 extv[0] = (sadb_ext_t *)
1261                     ((uint64_t *)extv[0] + extv[0]->sadb_ext_len);
1262         }
1263 
1264         /* Everything's cool. */
1265 
1266         /*
1267          * If extv[0] == NULL, then there are no extension headers in this
1268          * message.  Ensure that this is the case.
1269          */
1270         if (extv[0] == (sadb_ext_t *)(basehdr + 1))
1271                 extv[0] = NULL;
1272 
1273         return (KGE_OK);
1274 }
1275 
1276 /*
1277  * qwriter() callback to handle flushes and dumps.  This routine will hold
1278  * the inner perimeter.
1279  */
1280 void
1281 keysock_do_flushdump(queue_t *q, mblk_t *mp)
1282 {
1283         int i, start, finish;
1284         mblk_t *mp1 = NULL;
1285         keysock_t *ks = (keysock_t *)q->q_ptr;
1286         sadb_ext_t *extv[SADB_EXT_MAX + 1];
1287         sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
1288         keysock_stack_t *keystack = ks->keysock_keystack;
1289 
1290         /*
1291          * I am guaranteed this will work.  I did the work in keysock_parse()
1292          * already.
1293          */
1294         (void) keysock_get_ext(extv, samsg, SADB_64TO8(samsg->sadb_msg_len),
1295             keystack);
1296 
1297         /*
1298          * I hold the perimeter, therefore I don't need to use atomic ops.
1299          */
1300         if (keystack->keystack_flushdump != 0) {
1301                 /* XXX Should I instead use EBUSY? */
1302                 /* XXX Or is there a way to queue these up? */
1303                 keysock_error(ks, mp, ENOMEM, SADB_X_DIAGNOSTIC_NONE);
1304                 return;
1305         }
1306 
1307         if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1308                 start = 0;
1309                 finish = KEYSOCK_MAX_CONSUMERS - 1;
1310         } else {
1311                 start = samsg->sadb_msg_satype;
1312                 finish = samsg->sadb_msg_satype;
1313         }
1314 
1315         /*
1316          * Fill up keysock_flushdump with the number of outstanding dumps
1317          * and/or flushes.
1318          */
1319 
1320         keystack->keystack_flushdump_errno = 0;
1321 
1322         /*
1323          * Okay, I hold the perimeter.  Eventually keysock_flushdump will
1324          * contain the number of consumers with outstanding flush operations.
1325          *
1326          * SO, here's the plan:
1327          *      * For each relevant consumer (Might be one, might be all)
1328          *              * Twiddle on the FLUSHING flag.
1329          *              * Pass down the FLUSH/DUMP message.
1330          *
1331          * When I see upbound FLUSH/DUMP messages, I will decrement the
1332          * keysock_flushdump.  When I decrement it to 0, I will pass the
1333          * FLUSH/DUMP message back up to the PF_KEY sockets.  Because I will
1334          * pass down the right SA type to the consumer (either its own, or
1335          * that of UNSPEC), the right one will be reflected from each consumer,
1336          * and accordingly back to the socket.
1337          */
1338 
1339         mutex_enter(&keystack->keystack_consumers_lock);
1340         for (i = start; i <= finish; i++) {
1341                 if (keystack->keystack_consumers[i] != NULL) {
1342                         mp1 = copymsg(mp);
1343                         if (mp1 == NULL) {
1344                                 ks0dbg(("SADB_FLUSH copymsg() failed.\n"));
1345                                 /*
1346                                  * Error?  And what about outstanding
1347                                  * flushes?  Oh, yeah, they get sucked up and
1348                                  * the counter is decremented.  Consumers
1349                                  * (see keysock_passdown()) are guaranteed
1350                                  * to deliver back a flush request, even if
1351                                  * it's an error.
1352                                  */
1353                                 keysock_error(ks, mp, ENOMEM,
1354                                     SADB_X_DIAGNOSTIC_NONE);
1355                                 return;
1356                         }
1357                         /*
1358                          * Because my entry conditions are met above, the
1359                          * following assertion should hold true.
1360                          */
1361                         mutex_enter(&keystack->keystack_consumers[i]->kc_lock);
1362                         ASSERT((keystack->keystack_consumers[i]->kc_flags &
1363                             KC_FLUSHING) == 0);
1364                         keystack->keystack_consumers[i]->kc_flags |=
1365                             KC_FLUSHING;
1366                         mutex_exit(&(keystack->keystack_consumers[i]->kc_lock));
1367                         /* Always increment the number of flushes... */
1368                         keystack->keystack_flushdump++;
1369                         /* Guaranteed to return a message. */
1370                         keysock_passdown(ks, mp1, i, extv, B_TRUE);
1371                 } else if (start == finish) {
1372                         /*
1373                          * In case where start == finish, and there's no
1374                          * consumer, should we force an error?  Yes.
1375                          */
1376                         mutex_exit(&keystack->keystack_consumers_lock);
1377                         keysock_error(ks, mp, EINVAL,
1378                             SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
1379                         return;
1380                 }
1381         }
1382         mutex_exit(&keystack->keystack_consumers_lock);
1383 
1384         if (keystack->keystack_flushdump == 0) {
1385                 /*
1386                  * There were no consumers at all for this message.
1387                  * XXX For now return ESRCH.
1388                  */
1389                 keysock_error(ks, mp, ESRCH, SADB_X_DIAGNOSTIC_NO_SADBS);
1390         } else {
1391                 /* Otherwise, free the original message. */
1392                 freemsg(mp);
1393         }
1394 }
1395 
1396 /*
1397  * Get the right diagnostic for a duplicate.  Should probably use a static
1398  * table lookup.
1399  */
1400 int
1401 keysock_duplicate(int ext_type)
1402 {
1403         int rc = 0;
1404 
1405         switch (ext_type) {
1406         case SADB_EXT_ADDRESS_SRC:
1407                 rc = SADB_X_DIAGNOSTIC_DUPLICATE_SRC;
1408                 break;
1409         case SADB_EXT_ADDRESS_DST:
1410                 rc = SADB_X_DIAGNOSTIC_DUPLICATE_DST;
1411                 break;
1412         case SADB_X_EXT_ADDRESS_INNER_SRC:
1413                 rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_SRC;
1414                 break;
1415         case SADB_X_EXT_ADDRESS_INNER_DST:
1416                 rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_DST;
1417                 break;
1418         case SADB_EXT_SA:
1419                 rc = SADB_X_DIAGNOSTIC_DUPLICATE_SA;
1420                 break;
1421         case SADB_EXT_SPIRANGE:
1422                 rc = SADB_X_DIAGNOSTIC_DUPLICATE_RANGE;
1423                 break;
1424         case SADB_EXT_KEY_AUTH:
1425                 rc = SADB_X_DIAGNOSTIC_DUPLICATE_AKEY;
1426                 break;
1427         case SADB_EXT_KEY_ENCRYPT:
1428                 rc = SADB_X_DIAGNOSTIC_DUPLICATE_EKEY;
1429                 break;
1430         }
1431         return (rc);
1432 }
1433 
1434 /*
1435  * Get the right diagnostic for a reality check failure.  Should probably use
1436  * a static table lookup.
1437  */
1438 int
1439 keysock_malformed(int ext_type)
1440 {
1441         int rc = 0;
1442 
1443         switch (ext_type) {
1444         case SADB_EXT_ADDRESS_SRC:
1445                 rc = SADB_X_DIAGNOSTIC_MALFORMED_SRC;
1446                 break;
1447         case SADB_EXT_ADDRESS_DST:
1448                 rc = SADB_X_DIAGNOSTIC_MALFORMED_DST;
1449                 break;
1450         case SADB_X_EXT_ADDRESS_INNER_SRC:
1451                 rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
1452                 break;
1453         case SADB_X_EXT_ADDRESS_INNER_DST:
1454                 rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
1455                 break;
1456         case SADB_EXT_SA:
1457                 rc = SADB_X_DIAGNOSTIC_MALFORMED_SA;
1458                 break;
1459         case SADB_EXT_SPIRANGE:
1460                 rc = SADB_X_DIAGNOSTIC_MALFORMED_RANGE;
1461                 break;
1462         case SADB_EXT_KEY_AUTH:
1463                 rc = SADB_X_DIAGNOSTIC_MALFORMED_AKEY;
1464                 break;
1465         case SADB_EXT_KEY_ENCRYPT:
1466                 rc = SADB_X_DIAGNOSTIC_MALFORMED_EKEY;
1467                 break;
1468         }
1469         return (rc);
1470 }
1471 
1472 /*
1473  * Keysock massaging of an inverse ACQUIRE.  Consult policy,
1474  * and construct an appropriate response.
1475  */
1476 static void
1477 keysock_inverse_acquire(mblk_t *mp, sadb_msg_t *samsg, sadb_ext_t *extv[],
1478     keysock_t *ks)
1479 {
1480         mblk_t *reply_mp;
1481         keysock_stack_t *keystack = ks->keysock_keystack;
1482 
1483         /*
1484          * Reality check things...
1485          */
1486         if (extv[SADB_EXT_ADDRESS_SRC] == NULL) {
1487                 keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_SRC);
1488                 return;
1489         }
1490         if (extv[SADB_EXT_ADDRESS_DST] == NULL) {
1491                 keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_DST);
1492                 return;
1493         }
1494 
1495         if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
1496             extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
1497                 keysock_error(ks, mp, EINVAL,
1498                     SADB_X_DIAGNOSTIC_MISSING_INNER_DST);
1499                 return;
1500         }
1501 
1502         if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL &&
1503             extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
1504                 keysock_error(ks, mp, EINVAL,
1505                     SADB_X_DIAGNOSTIC_MISSING_INNER_SRC);
1506                 return;
1507         }
1508 
1509         reply_mp = ipsec_construct_inverse_acquire(samsg, extv,
1510             keystack->keystack_netstack);
1511 
1512         if (reply_mp != NULL) {
1513                 freemsg(mp);
1514                 keysock_passup(reply_mp, (sadb_msg_t *)reply_mp->b_rptr,
1515                     ks->keysock_serial, NULL, B_FALSE, keystack);
1516         } else {
1517                 keysock_error(ks, mp, samsg->sadb_msg_errno,
1518                     samsg->sadb_x_msg_diagnostic);
1519         }
1520 }
1521 
1522 /*
1523  * Spew an extended REGISTER down to the relevant consumers.
1524  */
1525 static void
1526 keysock_extended_register(keysock_t *ks, mblk_t *mp, sadb_ext_t *extv[])
1527 {
1528         sadb_x_ereg_t *ereg = (sadb_x_ereg_t *)extv[SADB_X_EXT_EREG];
1529         uint8_t *satypes, *fencepost;
1530         mblk_t *downmp;
1531         sadb_ext_t *downextv[SADB_EXT_MAX + 1];
1532         keysock_stack_t *keystack = ks->keysock_keystack;
1533 
1534         if (ks->keysock_registered[0] != 0 || ks->keysock_registered[1] != 0 ||
1535             ks->keysock_registered[2] != 0 || ks->keysock_registered[3] != 0) {
1536                 keysock_error(ks, mp, EBUSY, 0);
1537         }
1538 
1539         ks->keysock_flags |= KEYSOCK_EXTENDED;
1540         if (ereg == NULL) {
1541                 keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1542         } else {
1543                 ASSERT(mp->b_rptr + msgdsize(mp) == mp->b_wptr);
1544                 fencepost = (uint8_t *)mp->b_wptr;
1545                 satypes = ereg->sadb_x_ereg_satypes;
1546                 while (*satypes != SADB_SATYPE_UNSPEC && satypes != fencepost) {
1547                         downmp = copymsg(mp);
1548                         if (downmp == NULL) {
1549                                 keysock_error(ks, mp, ENOMEM, 0);
1550                                 return;
1551                         }
1552                         /*
1553                          * Since we've made it here, keysock_get_ext will work!
1554                          */
1555                         (void) keysock_get_ext(downextv,
1556                             (sadb_msg_t *)downmp->b_rptr, msgdsize(downmp),
1557                             keystack);
1558                         keysock_passdown(ks, downmp, *satypes, downextv,
1559                             B_FALSE);
1560                         ++satypes;
1561                 }
1562                 freemsg(mp);
1563         }
1564 
1565         /*
1566          * Set global to indicate we prefer an extended ACQUIRE.
1567          */
1568         atomic_inc_32(&keystack->keystack_num_extended);
1569 }
1570 
1571 /*
1572  * Handle PF_KEY messages.
1573  */
1574 static void
1575 keysock_parse(queue_t *q, mblk_t *mp)
1576 {
1577         sadb_msg_t *samsg;
1578         sadb_ext_t *extv[SADB_EXT_MAX + 1];
1579         keysock_t *ks = (keysock_t *)q->q_ptr;
1580         uint_t msgsize;
1581         uint8_t satype;
1582         keysock_stack_t *keystack = ks->keysock_keystack;
1583 
1584         /* Make sure I'm a PF_KEY socket.  (i.e. nothing's below me) */
1585         ASSERT(WR(q)->q_next == NULL);
1586 
1587         samsg = (sadb_msg_t *)mp->b_rptr;
1588         ks2dbg(keystack, ("Received possible PF_KEY message, type %d.\n",
1589             samsg->sadb_msg_type));
1590 
1591         msgsize = SADB_64TO8(samsg->sadb_msg_len);
1592 
1593         if (msgdsize(mp) != msgsize) {
1594                 /*
1595                  * Message len incorrect w.r.t. actual size.  Send an error
1596                  * (EMSGSIZE).  It may be necessary to massage things a
1597                  * bit.  For example, if the sadb_msg_type is hosed,
1598                  * I need to set it to SADB_RESERVED to get delivery to
1599                  * do the right thing.  Then again, maybe just letting
1600                  * the error delivery do the right thing.
1601                  */
1602                 ks2dbg(keystack,
1603                     ("mblk (%lu) and base (%d) message sizes don't jibe.\n",
1604                     msgdsize(mp), msgsize));
1605                 keysock_error(ks, mp, EMSGSIZE, SADB_X_DIAGNOSTIC_NONE);
1606                 return;
1607         }
1608 
1609         if (msgsize > (uint_t)(mp->b_wptr - mp->b_rptr)) {
1610                 /* Get all message into one mblk. */
1611                 if (pullupmsg(mp, -1) == 0) {
1612                         /*
1613                          * Something screwy happened.
1614                          */
1615                         ks3dbg(keystack,
1616                             ("keysock_parse: pullupmsg() failed.\n"));
1617                         return;
1618                 } else {
1619                         samsg = (sadb_msg_t *)mp->b_rptr;
1620                 }
1621         }
1622 
1623         switch (keysock_get_ext(extv, samsg, msgsize, keystack)) {
1624         case KGE_DUP:
1625                 /* Handle duplicate extension. */
1626                 ks1dbg(keystack, ("Got duplicate extension of type %d.\n",
1627                     extv[0]->sadb_ext_type));
1628                 keysock_error(ks, mp, EINVAL,
1629                     keysock_duplicate(extv[0]->sadb_ext_type));
1630                 return;
1631         case KGE_UNK:
1632                 /* Handle unknown extension. */
1633                 ks1dbg(keystack, ("Got unknown extension of type %d.\n",
1634                     extv[0]->sadb_ext_type));
1635                 keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_EXT);
1636                 return;
1637         case KGE_LEN:
1638                 /* Length error. */
1639                 ks1dbg(keystack,
1640                     ("Length %d on extension type %d overrun or 0.\n",
1641                     extv[0]->sadb_ext_len, extv[0]->sadb_ext_type));
1642                 keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_BAD_EXTLEN);
1643                 return;
1644         case KGE_CHK:
1645                 /* Reality check failed. */
1646                 ks1dbg(keystack,
1647                     ("Reality check failed on extension type %d.\n",
1648                     extv[0]->sadb_ext_type));
1649                 keysock_error(ks, mp, EINVAL,
1650                     keysock_malformed(extv[0]->sadb_ext_type));
1651                 return;
1652         default:
1653                 /* Default case is no errors. */
1654                 break;
1655         }
1656 
1657         switch (samsg->sadb_msg_type) {
1658         case SADB_REGISTER:
1659                 /*
1660                  * There's a semantic weirdness in that a message OTHER than
1661                  * the return REGISTER message may be passed up if I set the
1662                  * registered bit BEFORE I pass it down.
1663                  *
1664                  * SOOOO, I'll not twiddle any registered bits until I see
1665                  * the upbound REGISTER (with a serial number in it).
1666                  */
1667                 if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1668                         /* Handle extended register here. */
1669                         keysock_extended_register(ks, mp, extv);
1670                         return;
1671                 } else if (ks->keysock_flags & KEYSOCK_EXTENDED) {
1672                         keysock_error(ks, mp, EBUSY, 0);
1673                         return;
1674                 }
1675                 /* FALLTHRU */
1676         case SADB_GETSPI:
1677         case SADB_ADD:
1678         case SADB_UPDATE:
1679         case SADB_X_UPDATEPAIR:
1680         case SADB_DELETE:
1681         case SADB_X_DELPAIR:
1682         case SADB_GET:
1683                 /*
1684                  * Pass down to appropriate consumer.
1685                  */
1686                 if (samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC)
1687                         keysock_passdown(ks, mp, samsg->sadb_msg_satype, extv,
1688                             B_FALSE);
1689                 else keysock_error(ks, mp, EINVAL,
1690                     SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1691                 return;
1692         case SADB_ACQUIRE:
1693                 /*
1694                  * If I _receive_ an acquire, this means I should spread it
1695                  * out to registered sockets.  Unless there's an errno...
1696                  *
1697                  * Need ADDRESS, may have ID, SENS, and PROP, unless errno,
1698                  * in which case there should be NO extensions.
1699                  *
1700                  * Return to registered.
1701                  */
1702                 if (samsg->sadb_msg_errno != 0) {
1703                         satype = samsg->sadb_msg_satype;
1704                         if (satype == SADB_SATYPE_UNSPEC) {
1705                                 if (!(ks->keysock_flags & KEYSOCK_EXTENDED)) {
1706                                         keysock_error(ks, mp, EINVAL,
1707                                             SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1708                                         return;
1709                                 }
1710                                 /*
1711                                  * Reassign satype based on the first
1712                                  * flags that KEYSOCK_SETREG says.
1713                                  */
1714                                 while (satype <= SADB_SATYPE_MAX) {
1715                                         if (KEYSOCK_ISREG(ks, satype))
1716                                                 break;
1717                                         satype++;
1718                                 }
1719                                 if (satype > SADB_SATYPE_MAX) {
1720                                         keysock_error(ks, mp, EBUSY, 0);
1721                                         return;
1722                                 }
1723                         }
1724                         keysock_passdown(ks, mp, satype, extv, B_FALSE);
1725                 } else {
1726                         if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1727                                 keysock_error(ks, mp, EINVAL,
1728                                     SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1729                         } else {
1730                                 keysock_passup(mp, samsg, 0, NULL, B_FALSE,
1731                                     keystack);
1732                         }
1733                 }
1734                 return;
1735         case SADB_EXPIRE:
1736                 /*
1737                  * If someone sends this in, then send out to all senders.
1738                  * (Save maybe ESP or AH, I have to be careful here.)
1739                  *
1740                  * Need ADDRESS, may have ID and SENS.
1741                  *
1742                  * XXX for now this is unsupported.
1743                  */
1744                 break;
1745         case SADB_FLUSH:
1746         case SADB_DUMP:  /* not used by normal applications */
1747                 /*
1748                  * Nuke all SAs.
1749                  *
1750                  * No extensions at all.  Return to all listeners.
1751                  *
1752                  * Question:    Should I hold a lock here to prevent
1753                  *              additions/deletions while flushing?
1754                  * Answer:      No.  (See keysock_passdown() for details.)
1755                  */
1756                 if (extv[0] != NULL) {
1757                         /*
1758                          * FLUSH messages shouldn't have extensions.
1759                          * Return EINVAL.
1760                          */
1761                         ks2dbg(keystack, ("FLUSH message with extension.\n"));
1762                         keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_NO_EXT);
1763                         return;
1764                 }
1765 
1766                 /* Passing down of DUMP/FLUSH messages are special. */
1767                 qwriter(q, mp, keysock_do_flushdump, PERIM_INNER);
1768                 return;
1769         case SADB_X_PROMISC:
1770                 /*
1771                  * Promiscuous processing message.
1772                  */
1773                 if (samsg->sadb_msg_satype == 0)
1774                         ks->keysock_flags &= ~KEYSOCK_PROMISC;
1775                 else
1776                         ks->keysock_flags |= KEYSOCK_PROMISC;
1777                 keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE,
1778                     keystack);
1779                 return;
1780         case SADB_X_INVERSE_ACQUIRE:
1781                 keysock_inverse_acquire(mp, samsg, extv, ks);
1782                 return;
1783         default:
1784                 ks2dbg(keystack, ("Got unknown message type %d.\n",
1785                     samsg->sadb_msg_type));
1786                 keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_MSG);
1787                 return;
1788         }
1789 
1790         /* As a placeholder... */
1791         ks0dbg(("keysock_parse():  Hit EOPNOTSUPP\n"));
1792         keysock_error(ks, mp, EOPNOTSUPP, SADB_X_DIAGNOSTIC_NONE);
1793 }
1794 
1795 /*
1796  * wput routing for PF_KEY/keysock/whatever.  Unlike the routing socket,
1797  * I don't convert to ioctl()'s for IP.  I am the end-all driver as far
1798  * as PF_KEY sockets are concerned.  I do some conversion, but not as much
1799  * as IP/rts does.
1800  */
1801 static void
1802 keysock_wput(queue_t *q, mblk_t *mp)
1803 {
1804         uchar_t *rptr = mp->b_rptr;
1805         mblk_t *mp1;
1806         keysock_t *ks;
1807         keysock_stack_t *keystack;
1808 
1809         if (WR(q)->q_next) {
1810                 keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
1811                 keystack = kc->kc_keystack;
1812 
1813                 ks3dbg(keystack, ("In keysock_wput\n"));
1814 
1815                 /*
1816                  * We shouldn't get writes on a consumer instance.
1817                  * But for now, just passthru.
1818                  */
1819                 ks1dbg(keystack, ("Huh?  wput for an consumer instance (%d)?\n",
1820                     kc->kc_sa_type));
1821                 putnext(q, mp);
1822                 return;
1823         }
1824         ks = (keysock_t *)q->q_ptr;
1825         keystack = ks->keysock_keystack;
1826 
1827         ks3dbg(keystack, ("In keysock_wput\n"));
1828 
1829         switch (mp->b_datap->db_type) {
1830         case M_DATA:
1831                 /*
1832                  * Silently discard.
1833                  */
1834                 ks2dbg(keystack, ("raw M_DATA in keysock.\n"));
1835                 freemsg(mp);
1836                 return;
1837         case M_PROTO:
1838         case M_PCPROTO:
1839                 if ((mp->b_wptr - rptr) >= sizeof (struct T_data_req)) {
1840                         if (((union T_primitives *)rptr)->type == T_DATA_REQ) {
1841                                 if ((mp1 = mp->b_cont) == NULL) {
1842                                         /* No data after T_DATA_REQ. */
1843                                         ks2dbg(keystack,
1844                                             ("No data after DATA_REQ.\n"));
1845                                         freemsg(mp);
1846                                         return;
1847                                 }
1848                                 freeb(mp);
1849                                 mp = mp1;
1850                                 ks2dbg(keystack, ("T_DATA_REQ\n"));
1851                                 break;  /* Out of switch. */
1852                         }
1853                 }
1854                 /* FALLTHRU */
1855         default:
1856                 ks3dbg(keystack, ("In default wput case (%d %d).\n",
1857                     mp->b_datap->db_type, ((union T_primitives *)rptr)->type));
1858                 keysock_wput_other(q, mp);
1859                 return;
1860         }
1861 
1862         /* I now have a PF_KEY message in an M_DATA block, pointed to by mp. */
1863         keysock_parse(q, mp);
1864 }
1865 
1866 /* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_rput(). */
1867 
1868 /*
1869  * Called upon receipt of a KEYSOCK_HELLO_ACK to set up the appropriate
1870  * state vectors.
1871  */
1872 static void
1873 keysock_link_consumer(uint8_t satype, keysock_consumer_t *kc)
1874 {
1875         keysock_t *ks;
1876         keysock_stack_t *keystack = kc->kc_keystack;
1877 
1878         mutex_enter(&keystack->keystack_consumers_lock);
1879         mutex_enter(&kc->kc_lock);
1880         if (keystack->keystack_consumers[satype] != NULL) {
1881                 ks0dbg((
1882                     "Hmmmm, someone closed %d before the HELLO_ACK happened.\n",
1883                     satype));
1884                 /*
1885                  * Perhaps updating the new below-me consumer with what I have
1886                  * so far would work too?
1887                  */
1888                 mutex_exit(&kc->kc_lock);
1889                 mutex_exit(&keystack->keystack_consumers_lock);
1890         } else {
1891                 /* Add new below-me consumer. */
1892                 keystack->keystack_consumers[satype] = kc;
1893 
1894                 kc->kc_flags = 0;
1895                 kc->kc_sa_type = satype;
1896                 mutex_exit(&kc->kc_lock);
1897                 mutex_exit(&keystack->keystack_consumers_lock);
1898 
1899                 /* Scan the keysock list. */
1900                 mutex_enter(&keystack->keystack_list_lock);
1901                 for (ks = keystack->keystack_list; ks != NULL;
1902                     ks = ks->keysock_next) {
1903                         if (KEYSOCK_ISREG(ks, satype)) {
1904                                 /*
1905                                  * XXX Perhaps send an SADB_REGISTER down on
1906                                  * the socket's behalf.
1907                                  */
1908                                 ks1dbg(keystack,
1909                                     ("Socket %u registered already for "
1910                                     "new consumer.\n", ks->keysock_serial));
1911                         }
1912                 }
1913                 mutex_exit(&keystack->keystack_list_lock);
1914         }
1915 }
1916 
1917 /*
1918  * Generate a KEYSOCK_OUT_ERR message for my consumer.
1919  */
1920 static void
1921 keysock_out_err(keysock_consumer_t *kc, int ks_errno, mblk_t *mp)
1922 {
1923         keysock_out_err_t *kse;
1924         mblk_t *imp;
1925         keysock_stack_t *keystack = kc->kc_keystack;
1926 
1927         imp = allocb(sizeof (ipsec_info_t), BPRI_HI);
1928         if (imp == NULL) {
1929                 ks1dbg(keystack, ("keysock_out_err:  Can't alloc message.\n"));
1930                 return;
1931         }
1932 
1933         imp->b_datap->db_type = M_CTL;
1934         imp->b_wptr += sizeof (ipsec_info_t);
1935 
1936         kse = (keysock_out_err_t *)imp->b_rptr;
1937         imp->b_cont = mp;
1938         kse->ks_err_type = KEYSOCK_OUT_ERR;
1939         kse->ks_err_len = sizeof (*kse);
1940         /* Is serial necessary? */
1941         kse->ks_err_serial = 0;
1942         kse->ks_err_errno = ks_errno;
1943 
1944         /*
1945          * XXX What else do I need to do here w.r.t. information
1946          * to tell the consumer what caused this error?
1947          *
1948          * I believe the answer is the PF_KEY ACQUIRE (or other) message
1949          * attached in mp, which is appended at the end.  I believe the
1950          * db_ref won't matter here, because the PF_KEY message is only read
1951          * for KEYSOCK_OUT_ERR.
1952          */
1953 
1954         putnext(kc->kc_wq, imp);
1955 }
1956 
1957 /* XXX this is a hack errno. */
1958 #define EIPSECNOSA 255
1959 
1960 /*
1961  * Route message (pointed by mp, header in samsg) toward appropriate
1962  * sockets.  Assume the message's creator did its job correctly.
1963  *
1964  * This should be a function that is followed by a return in its caller.
1965  * The compiler _should_ be able to use tail-call optimizations to make the
1966  * large ## of parameters not a huge deal.
1967  */
1968 static void
1969 keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial,
1970     keysock_consumer_t *kc, boolean_t persistent, keysock_stack_t *keystack)
1971 {
1972         keysock_t *ks;
1973         uint8_t satype = samsg->sadb_msg_satype;
1974         boolean_t toall = B_FALSE, allreg = B_FALSE, allereg = B_FALSE,
1975             setalg = B_FALSE;
1976         mblk_t *mp1;
1977         int err = EIPSECNOSA;
1978 
1979         /* Convert mp, which is M_DATA, into an M_PROTO of type T_DATA_IND */
1980         mp1 = allocb(sizeof (struct T_data_req), BPRI_HI);
1981         if (mp1 == NULL) {
1982                 err = ENOMEM;
1983                 goto error;
1984         }
1985         mp1->b_wptr += sizeof (struct T_data_req);
1986         ((struct T_data_ind *)mp1->b_rptr)->PRIM_type = T_DATA_IND;
1987         ((struct T_data_ind *)mp1->b_rptr)->MORE_flag = 0;
1988         mp1->b_datap->db_type = M_PROTO;
1989         mp1->b_cont = mp;
1990         mp = mp1;
1991 
1992         switch (samsg->sadb_msg_type) {
1993         case SADB_FLUSH:
1994         case SADB_GETSPI:
1995         case SADB_UPDATE:
1996         case SADB_X_UPDATEPAIR:
1997         case SADB_ADD:
1998         case SADB_DELETE:
1999         case SADB_X_DELPAIR:
2000         case SADB_EXPIRE:
2001                 /*
2002                  * These are most likely replies.  Don't worry about
2003                  * KEYSOCK_OUT_ERR handling.  Deliver to all sockets.
2004                  */
2005                 ks3dbg(keystack,
2006                     ("Delivering normal message (%d) to all sockets.\n",
2007                     samsg->sadb_msg_type));
2008                 toall = B_TRUE;
2009                 break;
2010         case SADB_REGISTER:
2011                 /*
2012                  * REGISTERs come up for one of three reasons:
2013                  *
2014                  *      1.) In response to a normal SADB_REGISTER
2015                  *              (samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
2016                  *                  serial != 0)
2017                  *              Deliver to normal SADB_REGISTERed sockets.
2018                  *      2.) In response to an extended REGISTER
2019                  *              (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC)
2020                  *              Deliver to extended REGISTERed socket.
2021                  *      3.) Spontaneous algorithm changes
2022                  *              (samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
2023                  *                  serial == 0)
2024                  *              Deliver to REGISTERed sockets of all sorts.
2025                  */
2026                 if (kc == NULL) {
2027                         /* Here because of keysock_error() call. */
2028                         ASSERT(samsg->sadb_msg_errno != 0);
2029                         break;  /* Out of switch. */
2030                 }
2031                 ks3dbg(keystack, ("Delivering REGISTER.\n"));
2032                 if (satype == SADB_SATYPE_UNSPEC) {
2033                         /* REGISTER Reason #2 */
2034                         allereg = B_TRUE;
2035                         /*
2036                          * Rewhack SA type so PF_KEY socket holder knows what
2037                          * consumer generated this algorithm list.
2038                          */
2039                         satype = kc->kc_sa_type;
2040                         samsg->sadb_msg_satype = satype;
2041                         setalg = B_TRUE;
2042                 } else if (serial == 0) {
2043                         /* REGISTER Reason #3 */
2044                         allreg = B_TRUE;
2045                         allereg = B_TRUE;
2046                 } else {
2047                         /* REGISTER Reason #1 */
2048                         allreg = B_TRUE;
2049                         setalg = B_TRUE;
2050                 }
2051                 break;
2052         case SADB_ACQUIRE:
2053                 /*
2054                  * ACQUIREs are either extended (sadb_msg_satype == 0) or
2055                  * regular (sadb_msg_satype != 0).  And we're guaranteed
2056                  * that serial == 0 for an ACQUIRE.
2057                  */
2058                 ks3dbg(keystack, ("Delivering ACQUIRE.\n"));
2059                 allereg = (satype == SADB_SATYPE_UNSPEC);
2060                 allreg = !allereg;
2061                 /*
2062                  * Corner case - if we send a regular ACQUIRE and there's
2063                  * extended ones registered, don't send an error down to
2064                  * consumers if nobody's listening and prematurely destroy
2065                  * their ACQUIRE record.  This might be too hackish of a
2066                  * solution.
2067                  */
2068                 if (allreg && keystack->keystack_num_extended > 0)
2069                         err = 0;
2070                 break;
2071         case SADB_X_PROMISC:
2072         case SADB_X_INVERSE_ACQUIRE:
2073         case SADB_DUMP:
2074         case SADB_GET:
2075         default:
2076                 /*
2077                  * Deliver to the sender and promiscuous only.
2078                  */
2079                 ks3dbg(keystack, ("Delivering sender/promisc only (%d).\n",
2080                     samsg->sadb_msg_type));
2081                 break;
2082         }
2083 
2084         mutex_enter(&keystack->keystack_list_lock);
2085         for (ks = keystack->keystack_list; ks != NULL; ks = ks->keysock_next) {
2086                 /* Delivery loop. */
2087 
2088                 /*
2089                  * Check special keysock-setting cases (REGISTER replies)
2090                  * here.
2091                  */
2092                 if (setalg && serial == ks->keysock_serial) {
2093                         ASSERT(kc != NULL);
2094                         ASSERT(kc->kc_sa_type == satype);
2095                         KEYSOCK_SETREG(ks, satype);
2096                 }
2097 
2098                 /*
2099                  * NOLOOP takes precedence over PROMISC.  So if you've set
2100                  * !SO_USELOOPBACK, don't expect to see any data...
2101                  */
2102                 if (ks->keysock_flags & KEYSOCK_NOLOOP)
2103                         continue;
2104 
2105                 /*
2106                  * Messages to all, or promiscuous sockets just GET the
2107                  * message.  Perform rules-type checking iff it's not for all
2108                  * listeners or the socket is in promiscuous mode.
2109                  *
2110                  * NOTE:Because of the (kc != NULL && ISREG()), make sure
2111                  *      extended ACQUIREs arrive off a consumer that is
2112                  *      part of the extended REGISTER set of consumers.
2113                  */
2114                 if (serial != ks->keysock_serial &&
2115                     !toall &&
2116                     !(ks->keysock_flags & KEYSOCK_PROMISC) &&
2117                     !((ks->keysock_flags & KEYSOCK_EXTENDED) ?
2118                     allereg : allreg && kc != NULL &&
2119                     KEYSOCK_ISREG(ks, kc->kc_sa_type)))
2120                         continue;
2121 
2122                 mp1 = dupmsg(mp);
2123                 if (mp1 == NULL) {
2124                         ks2dbg(keystack, (
2125                             "keysock_passup():  dupmsg() failed.\n"));
2126                         mp1 = mp;
2127                         mp = NULL;
2128                         err = ENOMEM;
2129                 }
2130 
2131                 /*
2132                  * At this point, we can deliver or attempt to deliver
2133                  * this message.  We're free of obligation to report
2134                  * no listening PF_KEY sockets.  So set err to 0.
2135                  */
2136                 err = 0;
2137 
2138                 /*
2139                  * See if we canputnext(), as well as see if the message
2140                  * needs to be queued if we can't.
2141                  */
2142                 if (!canputnext(ks->keysock_rq)) {
2143                         if (persistent) {
2144                                 if (putq(ks->keysock_rq, mp1) == 0) {
2145                                         ks1dbg(keystack, (
2146                                             "keysock_passup: putq failed.\n"));
2147                                 } else {
2148                                         continue;
2149                                 }
2150                         }
2151                         freemsg(mp1);
2152                         continue;
2153                 }
2154 
2155                 ks3dbg(keystack,
2156                     ("Putting to serial %d.\n", ks->keysock_serial));
2157                 /*
2158                  * Unlike the specific keysock instance case, this
2159                  * will only hit for listeners, so we will only
2160                  * putnext() if we can.
2161                  */
2162                 putnext(ks->keysock_rq, mp1);
2163                 if (mp == NULL)
2164                         break;  /* out of for loop. */
2165         }
2166         mutex_exit(&keystack->keystack_list_lock);
2167 
2168 error:
2169         if ((err != 0) && (kc != NULL)) {
2170                 /*
2171                  * Generate KEYSOCK_OUT_ERR for consumer.
2172                  * Basically, I send this back if I have not been able to
2173                  * transmit (for whatever reason)
2174                  */
2175                 ks1dbg(keystack,
2176                     ("keysock_passup():  No registered of type %d.\n",
2177                     satype));
2178                 if (mp != NULL) {
2179                         if (mp->b_datap->db_type == M_PROTO) {
2180                                 mp1 = mp;
2181                                 mp = mp->b_cont;
2182                                 freeb(mp1);
2183                         }
2184                         /*
2185                          * Do a copymsg() because people who get
2186                          * KEYSOCK_OUT_ERR may alter the message contents.
2187                          */
2188                         mp1 = copymsg(mp);
2189                         if (mp1 == NULL) {
2190                                 ks2dbg(keystack,
2191                                     ("keysock_passup: copymsg() failed.\n"));
2192                                 mp1 = mp;
2193                                 mp = NULL;
2194                         }
2195                         keysock_out_err(kc, err, mp1);
2196                 }
2197         }
2198 
2199         /*
2200          * XXX Blank the message somehow.  This is difficult because we don't
2201          * know at this point if the message has db_ref > 1, etc.
2202          *
2203          * Optimally, keysock messages containing actual keying material would
2204          * be allocated with esballoc(), with a zeroing free function.
2205          */
2206         if (mp != NULL)
2207                 freemsg(mp);
2208 }
2209 
2210 /*
2211  * Keysock's read service procedure is there only for PF_KEY reply
2212  * messages that really need to reach the top.
2213  */
2214 static void
2215 keysock_rsrv(queue_t *q)
2216 {
2217         mblk_t *mp;
2218 
2219         while ((mp = getq(q)) != NULL) {
2220                 if (canputnext(q)) {
2221                         putnext(q, mp);
2222                 } else {
2223                         (void) putbq(q, mp);
2224                         return;
2225                 }
2226         }
2227 }
2228 
2229 /*
2230  * The read procedure should only be invoked by a keysock consumer, like
2231  * ESP, AH, etc.  I should only see KEYSOCK_OUT and KEYSOCK_HELLO_ACK
2232  * messages on my read queues.
2233  */
2234 static void
2235 keysock_rput(queue_t *q, mblk_t *mp)
2236 {
2237         keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
2238         ipsec_info_t *ii;
2239         keysock_hello_ack_t *ksa;
2240         minor_t serial;
2241         mblk_t *mp1;
2242         sadb_msg_t *samsg;
2243         keysock_stack_t *keystack = kc->kc_keystack;
2244 
2245         /* Make sure I'm a consumer instance.  (i.e. something's below me) */
2246         ASSERT(WR(q)->q_next != NULL);
2247 
2248         if (mp->b_datap->db_type != M_CTL) {
2249                 /*
2250                  * Keysock should only see keysock consumer interface
2251                  * messages (see ipsec_info.h) on its read procedure.
2252                  * To be robust, however, putnext() up so the STREAM head can
2253                  * deal with it appropriately.
2254                  */
2255                 ks1dbg(keystack,
2256                     ("Hmmm, a non M_CTL (%d, 0x%x) on keysock_rput.\n",
2257                     mp->b_datap->db_type, mp->b_datap->db_type));
2258                 putnext(q, mp);
2259                 return;
2260         }
2261 
2262         ii = (ipsec_info_t *)mp->b_rptr;
2263 
2264         switch (ii->ipsec_info_type) {
2265         case KEYSOCK_OUT:
2266                 /*
2267                  * A consumer needs to pass a response message or an ACQUIRE
2268                  * UP.  I assume that the consumer has done the right
2269                  * thing w.r.t. message creation, etc.
2270                  */
2271                 serial = ((keysock_out_t *)mp->b_rptr)->ks_out_serial;
2272                 mp1 = mp->b_cont;    /* Get M_DATA portion. */
2273                 freeb(mp);
2274                 samsg = (sadb_msg_t *)mp1->b_rptr;
2275                 if (samsg->sadb_msg_type == SADB_FLUSH ||
2276                     (samsg->sadb_msg_type == SADB_DUMP &&
2277                     samsg->sadb_msg_len == SADB_8TO64(sizeof (*samsg)))) {
2278                         /*
2279                          * If I'm an end-of-FLUSH or an end-of-DUMP marker...
2280                          */
2281                         ASSERT(keystack->keystack_flushdump != 0);
2282                                                 /* Am I flushing? */
2283 
2284                         mutex_enter(&kc->kc_lock);
2285                         kc->kc_flags &= ~KC_FLUSHING;
2286                         mutex_exit(&kc->kc_lock);
2287 
2288                         if (samsg->sadb_msg_errno != 0)
2289                                 keystack->keystack_flushdump_errno =
2290                                     samsg->sadb_msg_errno;
2291 
2292                         /*
2293                          * Lower the atomic "flushing" count.  If it's
2294                          * the last one, send up the end-of-{FLUSH,DUMP} to
2295                          * the appropriate PF_KEY socket.
2296                          */
2297                         if (atomic_dec_32_nv(&keystack->keystack_flushdump) !=
2298                             0) {
2299                                 ks1dbg(keystack,
2300                                     ("One flush/dump message back from %d,"
2301                                     " more to go.\n", samsg->sadb_msg_satype));
2302                                 freemsg(mp1);
2303                                 return;
2304                         }
2305 
2306                         samsg->sadb_msg_errno =
2307                             (uint8_t)keystack->keystack_flushdump_errno;
2308                         if (samsg->sadb_msg_type == SADB_DUMP) {
2309                                 samsg->sadb_msg_seq = 0;
2310                         }
2311                 }
2312                 keysock_passup(mp1, samsg, serial, kc,
2313                     (samsg->sadb_msg_type == SADB_DUMP), keystack);
2314                 return;
2315         case KEYSOCK_HELLO_ACK:
2316                 /* Aha, now we can link in the consumer! */
2317                 ksa = (keysock_hello_ack_t *)ii;
2318                 keysock_link_consumer(ksa->ks_hello_satype, kc);
2319                 freemsg(mp);
2320                 return;
2321         default:
2322                 ks1dbg(keystack, ("Hmmm, an IPsec info I'm not used to, 0x%x\n",
2323                     ii->ipsec_info_type));
2324                 putnext(q, mp);
2325         }
2326 }
2327 
2328 /*
2329  * So we can avoid external linking problems....
2330  */
2331 boolean_t
2332 keysock_extended_reg(netstack_t *ns)
2333 {
2334         keysock_stack_t *keystack = ns->netstack_keysock;
2335 
2336         return (keystack->keystack_num_extended != 0);
2337 }
2338 
2339 uint32_t
2340 keysock_next_seq(netstack_t *ns)
2341 {
2342         keysock_stack_t *keystack = ns->netstack_keysock;
2343 
2344         return (atomic_dec_32_nv(&keystack->keystack_acquire_seq));
2345 }