Print this page
    
1915 IPsec kstats shouldn't be persistent
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/ip/ipsecesp.c
          +++ new/usr/src/uts/common/inet/ip/ipsecesp.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
  25   25   * Copyright (c) 2017 Joyent, Inc.
  26   26   */
  27   27  
  28   28  #include <sys/types.h>
  29   29  #include <sys/stream.h>
  30   30  #include <sys/stropts.h>
  31   31  #include <sys/errno.h>
  32   32  #include <sys/strlog.h>
  33   33  #include <sys/tihdr.h>
  34   34  #include <sys/socket.h>
  35   35  #include <sys/ddi.h>
  36   36  #include <sys/sunddi.h>
  37   37  #include <sys/kmem.h>
  38   38  #include <sys/zone.h>
  39   39  #include <sys/sysmacros.h>
  40   40  #include <sys/cmn_err.h>
  41   41  #include <sys/vtrace.h>
  42   42  #include <sys/debug.h>
  43   43  #include <sys/atomic.h>
  44   44  #include <sys/strsun.h>
  45   45  #include <sys/random.h>
  46   46  #include <netinet/in.h>
  47   47  #include <net/if.h>
  48   48  #include <netinet/ip6.h>
  49   49  #include <net/pfkeyv2.h>
  50   50  #include <net/pfpolicy.h>
  51   51  
  52   52  #include <inet/common.h>
  53   53  #include <inet/mi.h>
  54   54  #include <inet/nd.h>
  55   55  #include <inet/ip.h>
  56   56  #include <inet/ip_impl.h>
  57   57  #include <inet/ip6.h>
  58   58  #include <inet/ip_if.h>
  59   59  #include <inet/ip_ndp.h>
  60   60  #include <inet/sadb.h>
  61   61  #include <inet/ipsec_info.h>
  62   62  #include <inet/ipsec_impl.h>
  63   63  #include <inet/ipsecesp.h>
  64   64  #include <inet/ipdrop.h>
  65   65  #include <inet/tcp.h>
  66   66  #include <sys/kstat.h>
  67   67  #include <sys/policy.h>
  68   68  #include <sys/strsun.h>
  69   69  #include <sys/strsubr.h>
  70   70  #include <inet/udp_impl.h>
  71   71  #include <sys/taskq.h>
  72   72  #include <sys/note.h>
  73   73  
  74   74  #include <sys/tsol/tnet.h>
  75   75  
  76   76  /*
  77   77   * Table of ND variables supported by ipsecesp. These are loaded into
  78   78   * ipsecesp_g_nd in ipsecesp_init_nd.
  79   79   * All of these are alterable, within the min/max values given, at run time.
  80   80   */
  81   81  static  ipsecespparam_t lcl_param_arr[] = {
  82   82          /* min  max                     value   name */
  83   83          { 0,    3,                      0,      "ipsecesp_debug"},
  84   84          { 125,  32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"},
  85   85          { 1,    10,                     1,      "ipsecesp_reap_delay"},
  86   86          { 1,    SADB_MAX_REPLAY,        64,     "ipsecesp_replay_size"},
  87   87          { 1,    300,                    15,     "ipsecesp_acquire_timeout"},
  88   88          { 1,    1800,                   90,     "ipsecesp_larval_timeout"},
  89   89          /* Default lifetime values for ACQUIRE messages. */
  90   90          { 0,    0xffffffffU,    0,      "ipsecesp_default_soft_bytes"},
  91   91          { 0,    0xffffffffU,    0,      "ipsecesp_default_hard_bytes"},
  92   92          { 0,    0xffffffffU,    24000,  "ipsecesp_default_soft_addtime"},
  93   93          { 0,    0xffffffffU,    28800,  "ipsecesp_default_hard_addtime"},
  94   94          { 0,    0xffffffffU,    0,      "ipsecesp_default_soft_usetime"},
  95   95          { 0,    0xffffffffU,    0,      "ipsecesp_default_hard_usetime"},
  96   96          { 0,    1,              0,      "ipsecesp_log_unknown_spi"},
  97   97          { 0,    2,              1,      "ipsecesp_padding_check"},
  98   98          { 0,    600,            20,     "ipsecesp_nat_keepalive_interval"},
  99   99  };
 100  100  /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */
 101  101  
 102  102  #define esp0dbg(a)      printf a
 103  103  /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
 104  104  #define esp1dbg(espstack, a)    if (espstack->ipsecesp_debug != 0) printf a
 105  105  #define esp2dbg(espstack, a)    if (espstack->ipsecesp_debug > 1) printf a
 106  106  #define esp3dbg(espstack, a)    if (espstack->ipsecesp_debug > 2) printf a
 107  107  
 108  108  static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *);
 109  109  static int ipsecesp_close(queue_t *);
 110  110  static void ipsecesp_wput(queue_t *, mblk_t *);
 111  111  static void     *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns);
 112  112  static void     ipsecesp_stack_fini(netstackid_t stackid, void *arg);
 113  113  
 114  114  static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *);
 115  115  static void esp_outbound_finish(mblk_t *, ip_xmit_attr_t *);
 116  116  static void esp_inbound_restart(mblk_t *, ip_recv_attr_t *);
 117  117  
 118  118  static boolean_t esp_register_out(uint32_t, uint32_t, uint_t,
 119  119      ipsecesp_stack_t *, cred_t *);
 120  120  static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t,
 121  121      kstat_named_t **, ipsecesp_stack_t *);
 122  122  static mblk_t *esp_submit_req_inbound(mblk_t *, ip_recv_attr_t *,
 123  123      ipsa_t *, uint_t);
 124  124  static mblk_t *esp_submit_req_outbound(mblk_t *, ip_xmit_attr_t *,
 125  125      ipsa_t *, uchar_t *, uint_t);
 126  126  
 127  127  /* Setable in /etc/system */
 128  128  uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE;
 129  129  
 130  130  static struct module_info info = {
 131  131          5137, "ipsecesp", 0, INFPSZ, 65536, 1024
 132  132  };
 133  133  
 134  134  static struct qinit rinit = {
 135  135          (pfi_t)putnext, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
 136  136          NULL
 137  137  };
 138  138  
 139  139  static struct qinit winit = {
 140  140          (pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
 141  141          NULL
 142  142  };
 143  143  
 144  144  struct streamtab ipsecespinfo = {
 145  145          &rinit, &winit, NULL, NULL
 146  146  };
 147  147  
 148  148  static taskq_t *esp_taskq;
 149  149  
 150  150  /*
 151  151   * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now.
 152  152   *
 153  153   * Question:    Do I need this, given that all instance's esps->esps_wq point
 154  154   *              to IP?
 155  155   *
 156  156   * Answer:      Yes, because I need to know which queue is BOUND to
  
    | 
      ↓ open down ↓ | 
    156 lines elided | 
    
      ↑ open up ↑ | 
  
 157  157   *              IPPROTO_ESP
 158  158   */
 159  159  
 160  160  static int      esp_kstat_update(kstat_t *, int);
 161  161  
 162  162  static boolean_t
 163  163  esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid)
 164  164  {
 165  165          espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat",
 166  166              "net", KSTAT_TYPE_NAMED,
 167      -            sizeof (esp_kstats_t) / sizeof (kstat_named_t),
 168      -            KSTAT_FLAG_PERSISTENT, stackid);
      167 +            sizeof (esp_kstats_t) / sizeof (kstat_named_t), 0, stackid);
 169  168  
 170  169          if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL)
 171  170                  return (B_FALSE);
 172  171  
 173  172          espstack->esp_kstats = espstack->esp_ksp->ks_data;
 174  173  
 175  174          espstack->esp_ksp->ks_update = esp_kstat_update;
 176  175          espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid;
 177  176  
 178  177  #define K64 KSTAT_DATA_UINT64
 179  178  #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64)
 180  179  
 181  180          KI(num_aalgs);
 182  181          KI(num_ealgs);
 183  182          KI(good_auth);
 184  183          KI(bad_auth);
 185  184          KI(bad_padding);
 186  185          KI(replay_failures);
 187  186          KI(replay_early_failures);
 188  187          KI(keysock_in);
 189  188          KI(out_requests);
 190  189          KI(acquire_requests);
 191  190          KI(bytes_expired);
 192  191          KI(out_discards);
 193  192          KI(crypto_sync);
 194  193          KI(crypto_async);
 195  194          KI(crypto_failures);
 196  195          KI(bad_decrypt);
 197  196          KI(sa_port_renumbers);
 198  197  
 199  198  #undef KI
 200  199  #undef K64
 201  200  
 202  201          kstat_install(espstack->esp_ksp);
 203  202  
 204  203          return (B_TRUE);
 205  204  }
 206  205  
 207  206  static int
 208  207  esp_kstat_update(kstat_t *kp, int rw)
 209  208  {
 210  209          esp_kstats_t *ekp;
 211  210          netstackid_t    stackid = (zoneid_t)(uintptr_t)kp->ks_private;
 212  211          netstack_t      *ns;
 213  212          ipsec_stack_t   *ipss;
 214  213  
 215  214          if ((kp == NULL) || (kp->ks_data == NULL))
 216  215                  return (EIO);
 217  216  
 218  217          if (rw == KSTAT_WRITE)
 219  218                  return (EACCES);
 220  219  
 221  220          ns = netstack_find_by_stackid(stackid);
 222  221          if (ns == NULL)
 223  222                  return (-1);
 224  223          ipss = ns->netstack_ipsec;
 225  224          if (ipss == NULL) {
 226  225                  netstack_rele(ns);
 227  226                  return (-1);
 228  227          }
 229  228          ekp = (esp_kstats_t *)kp->ks_data;
 230  229  
 231  230          rw_enter(&ipss->ipsec_alg_lock, RW_READER);
 232  231          ekp->esp_stat_num_aalgs.value.ui64 =
 233  232              ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
 234  233          ekp->esp_stat_num_ealgs.value.ui64 =
 235  234              ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
 236  235          rw_exit(&ipss->ipsec_alg_lock);
 237  236  
 238  237          netstack_rele(ns);
 239  238          return (0);
 240  239  }
 241  240  
 242  241  #ifdef DEBUG
 243  242  /*
 244  243   * Debug routine, useful to see pre-encryption data.
 245  244   */
 246  245  static char *
 247  246  dump_msg(mblk_t *mp)
 248  247  {
 249  248          char tmp_str[3], tmp_line[256];
 250  249  
 251  250          while (mp != NULL) {
 252  251                  unsigned char *ptr;
 253  252  
 254  253                  printf("mblk address 0x%p, length %ld, db_ref %d "
 255  254                      "type %d, base 0x%p, lim 0x%p\n",
 256  255                      (void *) mp, (long)(mp->b_wptr - mp->b_rptr),
 257  256                      mp->b_datap->db_ref, mp->b_datap->db_type,
 258  257                      (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim);
 259  258                  ptr = mp->b_rptr;
 260  259  
 261  260                  tmp_line[0] = '\0';
 262  261                  while (ptr < mp->b_wptr) {
 263  262                          uint_t diff;
 264  263  
 265  264                          diff = (ptr - mp->b_rptr);
 266  265                          if (!(diff & 0x1f)) {
 267  266                                  if (strlen(tmp_line) > 0) {
 268  267                                          printf("bytes: %s\n", tmp_line);
 269  268                                          tmp_line[0] = '\0';
 270  269                                  }
 271  270                          }
 272  271                          if (!(diff & 0x3))
 273  272                                  (void) strcat(tmp_line, " ");
 274  273                          (void) sprintf(tmp_str, "%02x", *ptr);
 275  274                          (void) strcat(tmp_line, tmp_str);
 276  275                          ptr++;
 277  276                  }
 278  277                  if (strlen(tmp_line) > 0)
 279  278                          printf("bytes: %s\n", tmp_line);
 280  279  
 281  280                  mp = mp->b_cont;
 282  281          }
 283  282  
 284  283          return ("\n");
 285  284  }
 286  285  
 287  286  #else /* DEBUG */
 288  287  static char *
 289  288  dump_msg(mblk_t *mp)
 290  289  {
 291  290          printf("Find value of mp %p.\n", mp);
 292  291          return ("\n");
 293  292  }
 294  293  #endif /* DEBUG */
 295  294  
 296  295  /*
 297  296   * Don't have to lock age_interval, as only one thread will access it at
 298  297   * a time, because I control the one function that does with timeout().
 299  298   */
 300  299  static void
 301  300  esp_ager(void *arg)
 302  301  {
 303  302          ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
 304  303          netstack_t      *ns = espstack->ipsecesp_netstack;
 305  304          hrtime_t begin = gethrtime();
 306  305  
 307  306          sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q,
 308  307              espstack->ipsecesp_reap_delay, ns);
 309  308          sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q,
 310  309              espstack->ipsecesp_reap_delay, ns);
 311  310  
 312  311          espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q,
 313  312              esp_ager, espstack,
 314  313              &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max,
 315  314              info.mi_idnum);
 316  315  }
 317  316  
 318  317  /*
 319  318   * Get an ESP NDD parameter.
 320  319   */
 321  320  /* ARGSUSED */
 322  321  static int
 323  322  ipsecesp_param_get(
 324  323      queue_t     *q,
 325  324      mblk_t      *mp,
 326  325      caddr_t     cp,
 327  326      cred_t *cr)
 328  327  {
 329  328          ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp;
 330  329          uint_t value;
 331  330          ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
 332  331  
 333  332          mutex_enter(&espstack->ipsecesp_param_lock);
 334  333          value = ipsecesppa->ipsecesp_param_value;
 335  334          mutex_exit(&espstack->ipsecesp_param_lock);
 336  335  
 337  336          (void) mi_mpprintf(mp, "%u", value);
 338  337          return (0);
 339  338  }
 340  339  
 341  340  /*
 342  341   * This routine sets an NDD variable in a ipsecespparam_t structure.
 343  342   */
 344  343  /* ARGSUSED */
 345  344  static int
 346  345  ipsecesp_param_set(
 347  346      queue_t     *q,
 348  347      mblk_t      *mp,
 349  348      char        *value,
 350  349      caddr_t     cp,
 351  350      cred_t *cr)
 352  351  {
 353  352          ulong_t new_value;
 354  353          ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp;
 355  354          ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
 356  355  
 357  356          /*
 358  357           * Fail the request if the new value does not lie within the
 359  358           * required bounds.
 360  359           */
 361  360          if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
 362  361              new_value < ipsecesppa->ipsecesp_param_min ||
 363  362              new_value > ipsecesppa->ipsecesp_param_max) {
 364  363                  return (EINVAL);
 365  364          }
 366  365  
 367  366          /* Set the new value */
 368  367          mutex_enter(&espstack->ipsecesp_param_lock);
 369  368          ipsecesppa->ipsecesp_param_value = new_value;
 370  369          mutex_exit(&espstack->ipsecesp_param_lock);
 371  370          return (0);
 372  371  }
 373  372  
 374  373  /*
 375  374   * Using lifetime NDD variables, fill in an extended combination's
 376  375   * lifetime information.
 377  376   */
 378  377  void
 379  378  ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
 380  379  {
 381  380          ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
 382  381  
 383  382          ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes;
 384  383          ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes;
 385  384          ecomb->sadb_x_ecomb_soft_addtime =
 386  385              espstack->ipsecesp_default_soft_addtime;
 387  386          ecomb->sadb_x_ecomb_hard_addtime =
 388  387              espstack->ipsecesp_default_hard_addtime;
 389  388          ecomb->sadb_x_ecomb_soft_usetime =
 390  389              espstack->ipsecesp_default_soft_usetime;
 391  390          ecomb->sadb_x_ecomb_hard_usetime =
 392  391              espstack->ipsecesp_default_hard_usetime;
 393  392  }
 394  393  
 395  394  /*
 396  395   * Initialize things for ESP at module load time.
 397  396   */
 398  397  boolean_t
 399  398  ipsecesp_ddi_init(void)
 400  399  {
 401  400          esp_taskq = taskq_create("esp_taskq", 1, minclsyspri,
 402  401              IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
 403  402  
 404  403          /*
 405  404           * We want to be informed each time a stack is created or
 406  405           * destroyed in the kernel, so we can maintain the
 407  406           * set of ipsecesp_stack_t's.
 408  407           */
 409  408          netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL,
 410  409              ipsecesp_stack_fini);
 411  410  
 412  411          return (B_TRUE);
 413  412  }
 414  413  
 415  414  /*
 416  415   * Walk through the param array specified registering each element with the
 417  416   * named dispatch handler.
 418  417   */
 419  418  static boolean_t
 420  419  ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt)
 421  420  {
 422  421          for (; cnt-- > 0; espp++) {
 423  422                  if (espp->ipsecesp_param_name != NULL &&
 424  423                      espp->ipsecesp_param_name[0]) {
 425  424                          if (!nd_load(ndp,
 426  425                              espp->ipsecesp_param_name,
 427  426                              ipsecesp_param_get, ipsecesp_param_set,
 428  427                              (caddr_t)espp)) {
 429  428                                  nd_free(ndp);
 430  429                                  return (B_FALSE);
 431  430                          }
 432  431                  }
 433  432          }
 434  433          return (B_TRUE);
 435  434  }
 436  435  
 437  436  /*
 438  437   * Initialize things for ESP for each stack instance
 439  438   */
 440  439  static void *
 441  440  ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns)
 442  441  {
 443  442          ipsecesp_stack_t        *espstack;
 444  443          ipsecespparam_t         *espp;
 445  444  
 446  445          espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack),
 447  446              KM_SLEEP);
 448  447          espstack->ipsecesp_netstack = ns;
 449  448  
 450  449          espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
 451  450          espstack->ipsecesp_params = espp;
 452  451          bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr));
 453  452  
 454  453          (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp,
 455  454              A_CNT(lcl_param_arr));
 456  455  
 457  456          (void) esp_kstat_init(espstack, stackid);
 458  457  
 459  458          espstack->esp_sadb.s_acquire_timeout =
 460  459              &espstack->ipsecesp_acquire_timeout;
 461  460          sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size,
 462  461              espstack->ipsecesp_netstack);
 463  462  
 464  463          mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0);
 465  464  
 466  465          ip_drop_register(&espstack->esp_dropper, "IPsec ESP");
 467  466          return (espstack);
 468  467  }
 469  468  
 470  469  /*
 471  470   * Destroy things for ESP at module unload time.
 472  471   */
 473  472  void
 474  473  ipsecesp_ddi_destroy(void)
 475  474  {
 476  475          netstack_unregister(NS_IPSECESP);
 477  476          taskq_destroy(esp_taskq);
 478  477  }
 479  478  
 480  479  /*
 481  480   * Destroy things for ESP for one stack instance
 482  481   */
 483  482  static void
 484  483  ipsecesp_stack_fini(netstackid_t stackid, void *arg)
 485  484  {
 486  485          ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
 487  486  
 488  487          if (espstack->esp_pfkey_q != NULL) {
 489  488                  (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event);
 490  489          }
 491  490          espstack->esp_sadb.s_acquire_timeout = NULL;
 492  491          sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack);
 493  492          ip_drop_unregister(&espstack->esp_dropper);
 494  493          mutex_destroy(&espstack->ipsecesp_param_lock);
 495  494          nd_free(&espstack->ipsecesp_g_nd);
 496  495  
 497  496          kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr));
 498  497          espstack->ipsecesp_params = NULL;
 499  498          kstat_delete_netstack(espstack->esp_ksp, stackid);
 500  499          espstack->esp_ksp = NULL;
 501  500          espstack->esp_kstats = NULL;
 502  501          kmem_free(espstack, sizeof (*espstack));
 503  502  }
 504  503  
 505  504  /*
 506  505   * ESP module open routine, which is here for keysock plumbing.
 507  506   * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old
 508  507   * Days of export control, and fears that ESP would not be allowed
 509  508   * to be shipped at all by default.  Eventually, keysock should
 510  509   * either access AH and ESP via modstubs or krtld dependencies, or
 511  510   * perhaps be folded in with AH and ESP into a single IPsec/netsec
 512  511   * module ("netsec" if PF_KEY provides more than AH/ESP keying tables).
 513  512   */
 514  513  /* ARGSUSED */
 515  514  static int
 516  515  ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 517  516  {
 518  517          netstack_t              *ns;
 519  518          ipsecesp_stack_t        *espstack;
 520  519  
 521  520          if (secpolicy_ip_config(credp, B_FALSE) != 0)
 522  521                  return (EPERM);
 523  522  
 524  523          if (q->q_ptr != NULL)
 525  524                  return (0);  /* Re-open of an already open instance. */
 526  525  
 527  526          if (sflag != MODOPEN)
 528  527                  return (EINVAL);
 529  528  
 530  529          ns = netstack_find_by_cred(credp);
 531  530          ASSERT(ns != NULL);
 532  531          espstack = ns->netstack_ipsecesp;
 533  532          ASSERT(espstack != NULL);
 534  533  
 535  534          q->q_ptr = espstack;
 536  535          WR(q)->q_ptr = q->q_ptr;
 537  536  
 538  537          qprocson(q);
 539  538          return (0);
 540  539  }
 541  540  
 542  541  /*
 543  542   * ESP module close routine.
 544  543   */
 545  544  static int
 546  545  ipsecesp_close(queue_t *q)
 547  546  {
 548  547          ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
 549  548  
 550  549          /*
 551  550           * Clean up q_ptr, if needed.
 552  551           */
 553  552          qprocsoff(q);
 554  553  
 555  554          /* Keysock queue check is safe, because of OCEXCL perimeter. */
 556  555  
 557  556          if (q == espstack->esp_pfkey_q) {
 558  557                  esp1dbg(espstack,
 559  558                      ("ipsecesp_close:  Ummm... keysock is closing ESP.\n"));
 560  559                  espstack->esp_pfkey_q = NULL;
 561  560                  /* Detach qtimeouts. */
 562  561                  (void) quntimeout(q, espstack->esp_event);
 563  562          }
 564  563  
 565  564          netstack_rele(espstack->ipsecesp_netstack);
 566  565          return (0);
 567  566  }
 568  567  
 569  568  /*
 570  569   * Add a number of bytes to what the SA has protected so far.  Return
 571  570   * B_TRUE if the SA can still protect that many bytes.
 572  571   *
 573  572   * Caller must REFRELE the passed-in assoc.  This function must REFRELE
 574  573   * any obtained peer SA.
 575  574   */
 576  575  static boolean_t
 577  576  esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
 578  577  {
 579  578          ipsa_t *inassoc, *outassoc;
 580  579          isaf_t *bucket;
 581  580          boolean_t inrc, outrc, isv6;
 582  581          sadb_t *sp;
 583  582          int outhash;
 584  583          netstack_t              *ns = assoc->ipsa_netstack;
 585  584          ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
 586  585  
 587  586          /* No peer?  No problem! */
 588  587          if (!assoc->ipsa_haspeer) {
 589  588                  return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes,
 590  589                      B_TRUE));
 591  590          }
 592  591  
 593  592          /*
 594  593           * Otherwise, we want to grab both the original assoc and its peer.
 595  594           * There might be a race for this, but if it's a real race, two
 596  595           * expire messages may occur.  We limit this by only sending the
 597  596           * expire message on one of the peers, we'll pick the inbound
 598  597           * arbitrarily.
 599  598           *
 600  599           * If we need tight synchronization on the peer SA, then we need to
 601  600           * reconsider.
 602  601           */
 603  602  
 604  603          /* Use address length to select IPv6/IPv4 */
 605  604          isv6 = (assoc->ipsa_addrfam == AF_INET6);
 606  605          sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
 607  606  
 608  607          if (inbound) {
 609  608                  inassoc = assoc;
 610  609                  if (isv6) {
 611  610                          outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
 612  611                              &inassoc->ipsa_dstaddr));
 613  612                  } else {
 614  613                          outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
 615  614                              &inassoc->ipsa_dstaddr));
 616  615                  }
 617  616                  bucket = &sp->sdb_of[outhash];
 618  617                  mutex_enter(&bucket->isaf_lock);
 619  618                  outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
 620  619                      inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
 621  620                      inassoc->ipsa_addrfam);
 622  621                  mutex_exit(&bucket->isaf_lock);
 623  622                  if (outassoc == NULL) {
 624  623                          /* Q: Do we wish to set haspeer == B_FALSE? */
 625  624                          esp0dbg(("esp_age_bytes: "
 626  625                              "can't find peer for inbound.\n"));
 627  626                          return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc,
 628  627                              bytes, B_TRUE));
 629  628                  }
 630  629          } else {
 631  630                  outassoc = assoc;
 632  631                  bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
 633  632                  mutex_enter(&bucket->isaf_lock);
 634  633                  inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
 635  634                      outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
 636  635                      outassoc->ipsa_addrfam);
 637  636                  mutex_exit(&bucket->isaf_lock);
 638  637                  if (inassoc == NULL) {
 639  638                          /* Q: Do we wish to set haspeer == B_FALSE? */
 640  639                          esp0dbg(("esp_age_bytes: "
 641  640                              "can't find peer for outbound.\n"));
 642  641                          return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc,
 643  642                              bytes, B_TRUE));
 644  643                  }
 645  644          }
 646  645  
 647  646          inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE);
 648  647          outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE);
 649  648  
 650  649          /*
 651  650           * REFRELE any peer SA.
 652  651           *
 653  652           * Because of the multi-line macro nature of IPSA_REFRELE, keep
 654  653           * them in { }.
 655  654           */
 656  655          if (inbound) {
 657  656                  IPSA_REFRELE(outassoc);
 658  657          } else {
 659  658                  IPSA_REFRELE(inassoc);
 660  659          }
 661  660  
 662  661          return (inrc && outrc);
 663  662  }
 664  663  
 665  664  /*
 666  665   * Do incoming NAT-T manipulations for packet.
 667  666   * Returns NULL if the mblk chain is consumed.
 668  667   */
 669  668  static mblk_t *
 670  669  esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc)
 671  670  {
 672  671          ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
 673  672          tcpha_t *tcpha;
 674  673          udpha_t *udpha;
 675  674          /* Initialize to our inbound cksum adjustment... */
 676  675          uint32_t sum = assoc->ipsa_inbound_cksum;
 677  676  
 678  677          switch (ipha->ipha_protocol) {
 679  678          case IPPROTO_TCP:
 680  679                  tcpha = (tcpha_t *)(data_mp->b_rptr +
 681  680                      IPH_HDR_LENGTH(ipha));
 682  681  
 683  682  #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) +       ((x) >> 16)
 684  683                  sum += ~ntohs(tcpha->tha_sum) & 0xFFFF;
 685  684                  DOWN_SUM(sum);
 686  685                  DOWN_SUM(sum);
 687  686                  tcpha->tha_sum = ~htons(sum);
 688  687                  break;
 689  688          case IPPROTO_UDP:
 690  689                  udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha));
 691  690  
 692  691                  if (udpha->uha_checksum != 0) {
 693  692                          /* Adujst if the inbound one was not zero. */
 694  693                          sum += ~ntohs(udpha->uha_checksum) & 0xFFFF;
 695  694                          DOWN_SUM(sum);
 696  695                          DOWN_SUM(sum);
 697  696                          udpha->uha_checksum = ~htons(sum);
 698  697                          if (udpha->uha_checksum == 0)
 699  698                                  udpha->uha_checksum = 0xFFFF;
 700  699                  }
 701  700  #undef DOWN_SUM
 702  701                  break;
 703  702          case IPPROTO_IP:
 704  703                  /*
 705  704                   * This case is only an issue for self-encapsulated
 706  705                   * packets.  So for now, fall through.
 707  706                   */
 708  707                  break;
 709  708          }
 710  709          return (data_mp);
 711  710  }
 712  711  
 713  712  
 714  713  /*
 715  714   * Strip ESP header, check padding, and fix IP header.
 716  715   * Returns B_TRUE on success, B_FALSE if an error occured.
 717  716   */
 718  717  static boolean_t
 719  718  esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen,
 720  719      kstat_named_t **counter, ipsecesp_stack_t *espstack)
 721  720  {
 722  721          ipha_t *ipha;
 723  722          ip6_t *ip6h;
 724  723          uint_t divpoint;
 725  724          mblk_t *scratch;
 726  725          uint8_t nexthdr, padlen;
 727  726          uint8_t lastpad;
 728  727          ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
 729  728          uint8_t *lastbyte;
 730  729  
 731  730          /*
 732  731           * Strip ESP data and fix IP header.
 733  732           *
 734  733           * XXX In case the beginning of esp_inbound() changes to not do a
 735  734           * pullup, this part of the code can remain unchanged.
 736  735           */
 737  736          if (isv4) {
 738  737                  ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t));
 739  738                  ipha = (ipha_t *)data_mp->b_rptr;
 740  739                  ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) +
 741  740                      IPH_HDR_LENGTH(ipha));
 742  741                  divpoint = IPH_HDR_LENGTH(ipha);
 743  742          } else {
 744  743                  ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t));
 745  744                  ip6h = (ip6_t *)data_mp->b_rptr;
 746  745                  divpoint = ip_hdr_length_v6(data_mp, ip6h);
 747  746          }
 748  747  
 749  748          scratch = data_mp;
 750  749          while (scratch->b_cont != NULL)
 751  750                  scratch = scratch->b_cont;
 752  751  
 753  752          ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3);
 754  753  
 755  754          /*
 756  755           * "Next header" and padding length are the last two bytes in the
 757  756           * ESP-protected datagram, thus the explicit - 1 and - 2.
 758  757           * lastpad is the last byte of the padding, which can be used for
 759  758           * a quick check to see if the padding is correct.
 760  759           */
 761  760          lastbyte = scratch->b_wptr - 1;
 762  761          nexthdr = *lastbyte--;
 763  762          padlen = *lastbyte--;
 764  763  
 765  764          if (isv4) {
 766  765                  /* Fix part of the IP header. */
 767  766                  ipha->ipha_protocol = nexthdr;
 768  767                  /*
 769  768                   * Reality check the padlen.  The explicit - 2 is for the
 770  769                   * padding length and the next-header bytes.
 771  770                   */
 772  771                  if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 -
 773  772                      sizeof (esph_t) - ivlen) {
 774  773                          ESP_BUMP_STAT(espstack, bad_decrypt);
 775  774                          ipsec_rl_strlog(espstack->ipsecesp_netstack,
 776  775                              info.mi_idnum, 0, 0,
 777  776                              SL_ERROR | SL_WARN,
 778  777                              "Corrupt ESP packet (padlen too big).\n");
 779  778                          esp1dbg(espstack, ("padlen (%d) is greater than:\n",
 780  779                              padlen));
 781  780                          esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp "
 782  781                              "hdr - ivlen(%d) = %d.\n",
 783  782                              ntohs(ipha->ipha_length), ivlen,
 784  783                              (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) -
 785  784                              2 - sizeof (esph_t) - ivlen)));
 786  785                          *counter = DROPPER(ipss, ipds_esp_bad_padlen);
 787  786                          return (B_FALSE);
 788  787                  }
 789  788  
 790  789                  /*
 791  790                   * Fix the rest of the header.  The explicit - 2 is for the
 792  791                   * padding length and the next-header bytes.
 793  792                   */
 794  793                  ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen -
 795  794                      2 - sizeof (esph_t) - ivlen);
 796  795                  ipha->ipha_hdr_checksum = 0;
 797  796                  ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
 798  797          } else {
 799  798                  if (ip6h->ip6_nxt == IPPROTO_ESP) {
 800  799                          ip6h->ip6_nxt = nexthdr;
 801  800                  } else {
 802  801                          ip_pkt_t ipp;
 803  802  
 804  803                          bzero(&ipp, sizeof (ipp));
 805  804                          (void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp,
 806  805                              NULL);
 807  806                          if (ipp.ipp_dstopts != NULL) {
 808  807                                  ipp.ipp_dstopts->ip6d_nxt = nexthdr;
 809  808                          } else if (ipp.ipp_rthdr != NULL) {
 810  809                                  ipp.ipp_rthdr->ip6r_nxt = nexthdr;
 811  810                          } else if (ipp.ipp_hopopts != NULL) {
 812  811                                  ipp.ipp_hopopts->ip6h_nxt = nexthdr;
 813  812                          } else {
 814  813                                  /* Panic a DEBUG kernel. */
 815  814                                  ASSERT(ipp.ipp_hopopts != NULL);
 816  815                                  /* Otherwise, pretend it's IP + ESP. */
 817  816                                  cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n");
 818  817                                  ip6h->ip6_nxt = nexthdr;
 819  818                          }
 820  819                  }
 821  820  
 822  821                  if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) -
 823  822                      ivlen) {
 824  823                          ESP_BUMP_STAT(espstack, bad_decrypt);
 825  824                          ipsec_rl_strlog(espstack->ipsecesp_netstack,
 826  825                              info.mi_idnum, 0, 0,
 827  826                              SL_ERROR | SL_WARN,
 828  827                              "Corrupt ESP packet (v6 padlen too big).\n");
 829  828                          esp1dbg(espstack, ("padlen (%d) is greater than:\n",
 830  829                              padlen));
 831  830                          esp1dbg(espstack,
 832  831                              ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = "
 833  832                              "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen)
 834  833                              + sizeof (ip6_t)), ivlen,
 835  834                              (unsigned)(ntohs(ip6h->ip6_plen) - 2 -
 836  835                              sizeof (esph_t) - ivlen)));
 837  836                          *counter = DROPPER(ipss, ipds_esp_bad_padlen);
 838  837                          return (B_FALSE);
 839  838                  }
 840  839  
 841  840  
 842  841                  /*
 843  842                   * Fix the rest of the header.  The explicit - 2 is for the
 844  843                   * padding length and the next-header bytes.  IPv6 is nice,
 845  844                   * because there's no hdr checksum!
 846  845                   */
 847  846                  ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen -
 848  847                      2 - sizeof (esph_t) - ivlen);
 849  848          }
 850  849  
 851  850          if (espstack->ipsecesp_padding_check > 0 && padlen > 0) {
 852  851                  /*
 853  852                   * Weak padding check: compare last-byte to length, they
 854  853                   * should be equal.
 855  854                   */
 856  855                  lastpad = *lastbyte--;
 857  856  
 858  857                  if (padlen != lastpad) {
 859  858                          ipsec_rl_strlog(espstack->ipsecesp_netstack,
 860  859                              info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
 861  860                              "Corrupt ESP packet (lastpad != padlen).\n");
 862  861                          esp1dbg(espstack,
 863  862                              ("lastpad (%d) not equal to padlen (%d):\n",
 864  863                              lastpad, padlen));
 865  864                          ESP_BUMP_STAT(espstack, bad_padding);
 866  865                          *counter = DROPPER(ipss, ipds_esp_bad_padding);
 867  866                          return (B_FALSE);
 868  867                  }
 869  868  
 870  869                  /*
 871  870                   * Strong padding check: Check all pad bytes to see that
 872  871                   * they're ascending.  Go backwards using a descending counter
 873  872                   * to verify.  padlen == 1 is checked by previous block, so
 874  873                   * only bother if we've more than 1 byte of padding.
 875  874                   * Consequently, start the check one byte before the location
 876  875                   * of "lastpad".
 877  876                   */
 878  877                  if (espstack->ipsecesp_padding_check > 1) {
 879  878                          /*
 880  879                           * This assert may have to become an if and a pullup
 881  880                           * if we start accepting multi-dblk mblks. For now,
 882  881                           * though, any packet here will have been pulled up in
 883  882                           * esp_inbound.
 884  883                           */
 885  884                          ASSERT(MBLKL(scratch) >= lastpad + 3);
 886  885  
 887  886                          /*
 888  887                           * Use "--lastpad" because we already checked the very
 889  888                           * last pad byte previously.
 890  889                           */
 891  890                          while (--lastpad != 0) {
 892  891                                  if (lastpad != *lastbyte) {
 893  892                                          ipsec_rl_strlog(
 894  893                                              espstack->ipsecesp_netstack,
 895  894                                              info.mi_idnum, 0, 0,
 896  895                                              SL_ERROR | SL_WARN, "Corrupt ESP "
 897  896                                              "packet (bad padding).\n");
 898  897                                          esp1dbg(espstack,
 899  898                                              ("padding not in correct"
 900  899                                              " format:\n"));
 901  900                                          ESP_BUMP_STAT(espstack, bad_padding);
 902  901                                          *counter = DROPPER(ipss,
 903  902                                              ipds_esp_bad_padding);
 904  903                                          return (B_FALSE);
 905  904                                  }
 906  905                                  lastbyte--;
 907  906                          }
 908  907                  }
 909  908          }
 910  909  
 911  910          /* Trim off the padding. */
 912  911          ASSERT(data_mp->b_cont == NULL);
 913  912          data_mp->b_wptr -= (padlen + 2);
 914  913  
 915  914          /*
 916  915           * Remove the ESP header.
 917  916           *
 918  917           * The above assertions about data_mp's size will make this work.
 919  918           *
 920  919           * XXX  Question:  If I send up and get back a contiguous mblk,
 921  920           * would it be quicker to bcopy over, or keep doing the dupb stuff?
 922  921           * I go with copying for now.
 923  922           */
 924  923  
 925  924          if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) &&
 926  925              IS_P2ALIGNED(ivlen, sizeof (uint32_t))) {
 927  926                  uint8_t *start = data_mp->b_rptr;
 928  927                  uint32_t *src, *dst;
 929  928  
 930  929                  src = (uint32_t *)(start + divpoint);
 931  930                  dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen);
 932  931  
 933  932                  ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) &&
 934  933                      IS_P2ALIGNED(src, sizeof (uint32_t)));
 935  934  
 936  935                  do {
 937  936                          src--;
 938  937                          dst--;
 939  938                          *dst = *src;
 940  939                  } while (src != (uint32_t *)start);
 941  940  
 942  941                  data_mp->b_rptr = (uchar_t *)dst;
 943  942          } else {
 944  943                  uint8_t *start = data_mp->b_rptr;
 945  944                  uint8_t *src, *dst;
 946  945  
 947  946                  src = start + divpoint;
 948  947                  dst = src + sizeof (esph_t) + ivlen;
 949  948  
 950  949                  do {
 951  950                          src--;
 952  951                          dst--;
 953  952                          *dst = *src;
 954  953                  } while (src != start);
 955  954  
 956  955                  data_mp->b_rptr = dst;
 957  956          }
 958  957  
 959  958          esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n"));
 960  959          esp2dbg(espstack, (dump_msg(data_mp)));
 961  960  
 962  961          return (B_TRUE);
 963  962  }
 964  963  
 965  964  /*
 966  965   * Updating use times can be tricky business if the ipsa_haspeer flag is
 967  966   * set.  This function is called once in an SA's lifetime.
 968  967   *
 969  968   * Caller has to REFRELE "assoc" which is passed in.  This function has
 970  969   * to REFRELE any peer SA that is obtained.
 971  970   */
 972  971  static void
 973  972  esp_set_usetime(ipsa_t *assoc, boolean_t inbound)
 974  973  {
 975  974          ipsa_t *inassoc, *outassoc;
 976  975          isaf_t *bucket;
 977  976          sadb_t *sp;
 978  977          int outhash;
 979  978          boolean_t isv6;
 980  979          netstack_t              *ns = assoc->ipsa_netstack;
 981  980          ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
 982  981  
 983  982          /* No peer?  No problem! */
 984  983          if (!assoc->ipsa_haspeer) {
 985  984                  sadb_set_usetime(assoc);
 986  985                  return;
 987  986          }
 988  987  
 989  988          /*
 990  989           * Otherwise, we want to grab both the original assoc and its peer.
 991  990           * There might be a race for this, but if it's a real race, the times
 992  991           * will be out-of-synch by at most a second, and since our time
 993  992           * granularity is a second, this won't be a problem.
 994  993           *
 995  994           * If we need tight synchronization on the peer SA, then we need to
 996  995           * reconsider.
 997  996           */
 998  997  
 999  998          /* Use address length to select IPv6/IPv4 */
1000  999          isv6 = (assoc->ipsa_addrfam == AF_INET6);
1001 1000          sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
1002 1001  
1003 1002          if (inbound) {
1004 1003                  inassoc = assoc;
1005 1004                  if (isv6) {
1006 1005                          outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
1007 1006                              &inassoc->ipsa_dstaddr));
1008 1007                  } else {
1009 1008                          outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
1010 1009                              &inassoc->ipsa_dstaddr));
1011 1010                  }
1012 1011                  bucket = &sp->sdb_of[outhash];
1013 1012                  mutex_enter(&bucket->isaf_lock);
1014 1013                  outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1015 1014                      inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1016 1015                      inassoc->ipsa_addrfam);
1017 1016                  mutex_exit(&bucket->isaf_lock);
1018 1017                  if (outassoc == NULL) {
1019 1018                          /* Q: Do we wish to set haspeer == B_FALSE? */
1020 1019                          esp0dbg(("esp_set_usetime: "
1021 1020                              "can't find peer for inbound.\n"));
1022 1021                          sadb_set_usetime(inassoc);
1023 1022                          return;
1024 1023                  }
1025 1024          } else {
1026 1025                  outassoc = assoc;
1027 1026                  bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1028 1027                  mutex_enter(&bucket->isaf_lock);
1029 1028                  inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1030 1029                      outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1031 1030                      outassoc->ipsa_addrfam);
1032 1031                  mutex_exit(&bucket->isaf_lock);
1033 1032                  if (inassoc == NULL) {
1034 1033                          /* Q: Do we wish to set haspeer == B_FALSE? */
1035 1034                          esp0dbg(("esp_set_usetime: "
1036 1035                              "can't find peer for outbound.\n"));
1037 1036                          sadb_set_usetime(outassoc);
1038 1037                          return;
1039 1038                  }
1040 1039          }
1041 1040  
1042 1041          /* Update usetime on both. */
1043 1042          sadb_set_usetime(inassoc);
1044 1043          sadb_set_usetime(outassoc);
1045 1044  
1046 1045          /*
1047 1046           * REFRELE any peer SA.
1048 1047           *
1049 1048           * Because of the multi-line macro nature of IPSA_REFRELE, keep
1050 1049           * them in { }.
1051 1050           */
1052 1051          if (inbound) {
1053 1052                  IPSA_REFRELE(outassoc);
1054 1053          } else {
1055 1054                  IPSA_REFRELE(inassoc);
1056 1055          }
1057 1056  }
1058 1057  
1059 1058  /*
1060 1059   * Handle ESP inbound data for IPv4 and IPv6.
1061 1060   * On success returns B_TRUE, on failure returns B_FALSE and frees the
1062 1061   * mblk chain data_mp.
1063 1062   */
1064 1063  mblk_t *
1065 1064  esp_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira)
1066 1065  {
1067 1066          esph_t *esph = (esph_t *)arg;
1068 1067          ipsa_t *ipsa = ira->ira_ipsec_esp_sa;
1069 1068          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
1070 1069          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1071 1070          ipsec_stack_t   *ipss = ns->netstack_ipsec;
1072 1071  
1073 1072          /*
1074 1073           * We may wish to check replay in-range-only here as an optimization.
1075 1074           * Include the reality check of ipsa->ipsa_replay >
1076 1075           * ipsa->ipsa_replay_wsize for times when it's the first N packets,
1077 1076           * where N == ipsa->ipsa_replay_wsize.
1078 1077           *
1079 1078           * Another check that may come here later is the "collision" check.
1080 1079           * If legitimate packets flow quickly enough, this won't be a problem,
1081 1080           * but collisions may cause authentication algorithm crunching to
1082 1081           * take place when it doesn't need to.
1083 1082           */
1084 1083          if (!sadb_replay_peek(ipsa, esph->esph_replay)) {
1085 1084                  ESP_BUMP_STAT(espstack, replay_early_failures);
1086 1085                  IP_ESP_BUMP_STAT(ipss, in_discards);
1087 1086                  ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
1088 1087                      DROPPER(ipss, ipds_esp_early_replay),
1089 1088                      &espstack->esp_dropper);
1090 1089                  BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1091 1090                  return (NULL);
1092 1091          }
1093 1092  
1094 1093          /*
1095 1094           * Adjust the IP header's payload length to reflect the removal
1096 1095           * of the ICV.
1097 1096           */
1098 1097          if (!(ira->ira_flags & IRAF_IS_IPV4)) {
1099 1098                  ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
1100 1099                  ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) -
1101 1100                      ipsa->ipsa_mac_len);
1102 1101          } else {
1103 1102                  ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
1104 1103                  ipha->ipha_length = htons(ntohs(ipha->ipha_length) -
1105 1104                      ipsa->ipsa_mac_len);
1106 1105          }
1107 1106  
1108 1107          /* submit the request to the crypto framework */
1109 1108          return (esp_submit_req_inbound(data_mp, ira, ipsa,
1110 1109              (uint8_t *)esph - data_mp->b_rptr));
1111 1110  }
1112 1111  
1113 1112  /* XXX refactor me */
1114 1113  /*
1115 1114   * Handle the SADB_GETSPI message.  Create a larval SA.
1116 1115   */
1117 1116  static void
1118 1117  esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
1119 1118  {
1120 1119          ipsa_t *newbie, *target;
1121 1120          isaf_t *outbound, *inbound;
1122 1121          int rc, diagnostic;
1123 1122          sadb_sa_t *assoc;
1124 1123          keysock_out_t *kso;
1125 1124          uint32_t newspi;
1126 1125  
1127 1126          /*
1128 1127           * Randomly generate a proposed SPI value
1129 1128           */
1130 1129          if (cl_inet_getspi != NULL) {
1131 1130                  cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid,
1132 1131                      IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL);
1133 1132          } else {
1134 1133                  (void) random_get_pseudo_bytes((uint8_t *)&newspi,
1135 1134                      sizeof (uint32_t));
1136 1135          }
1137 1136          newbie = sadb_getspi(ksi, newspi, &diagnostic,
1138 1137              espstack->ipsecesp_netstack, IPPROTO_ESP);
1139 1138  
1140 1139          if (newbie == NULL) {
1141 1140                  sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic,
1142 1141                      ksi->ks_in_serial);
1143 1142                  return;
1144 1143          } else if (newbie == (ipsa_t *)-1) {
1145 1144                  sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
1146 1145                      ksi->ks_in_serial);
1147 1146                  return;
1148 1147          }
1149 1148  
1150 1149          /*
1151 1150           * XXX - We may randomly collide.  We really should recover from this.
1152 1151           *       Unfortunately, that could require spending way-too-much-time
1153 1152           *       in here.  For now, let the user retry.
1154 1153           */
1155 1154  
1156 1155          if (newbie->ipsa_addrfam == AF_INET6) {
1157 1156                  outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6,
1158 1157                      *(uint32_t *)(newbie->ipsa_dstaddr));
1159 1158                  inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6,
1160 1159                      newbie->ipsa_spi);
1161 1160          } else {
1162 1161                  ASSERT(newbie->ipsa_addrfam == AF_INET);
1163 1162                  outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4,
1164 1163                      *(uint32_t *)(newbie->ipsa_dstaddr));
1165 1164                  inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4,
1166 1165                      newbie->ipsa_spi);
1167 1166          }
1168 1167  
1169 1168          mutex_enter(&outbound->isaf_lock);
1170 1169          mutex_enter(&inbound->isaf_lock);
1171 1170  
1172 1171          /*
1173 1172           * Check for collisions (i.e. did sadb_getspi() return with something
1174 1173           * that already exists?).
1175 1174           *
1176 1175           * Try outbound first.  Even though SADB_GETSPI is traditionally
1177 1176           * for inbound SAs, you never know what a user might do.
1178 1177           */
1179 1178          target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
1180 1179              newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
1181 1180          if (target == NULL) {
1182 1181                  target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
1183 1182                      newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
1184 1183                      newbie->ipsa_addrfam);
1185 1184          }
1186 1185  
1187 1186          /*
1188 1187           * I don't have collisions elsewhere!
1189 1188           * (Nor will I because I'm still holding inbound/outbound locks.)
1190 1189           */
1191 1190  
1192 1191          if (target != NULL) {
1193 1192                  rc = EEXIST;
1194 1193                  IPSA_REFRELE(target);
1195 1194          } else {
1196 1195                  /*
1197 1196                   * sadb_insertassoc() also checks for collisions, so
1198 1197                   * if there's a colliding entry, rc will be set
1199 1198                   * to EEXIST.
1200 1199                   */
1201 1200                  rc = sadb_insertassoc(newbie, inbound);
1202 1201                  newbie->ipsa_hardexpiretime = gethrestime_sec();
1203 1202                  newbie->ipsa_hardexpiretime +=
1204 1203                      espstack->ipsecesp_larval_timeout;
1205 1204          }
1206 1205  
1207 1206          /*
1208 1207           * Can exit outbound mutex.  Hold inbound until we're done
1209 1208           * with newbie.
1210 1209           */
1211 1210          mutex_exit(&outbound->isaf_lock);
1212 1211  
1213 1212          if (rc != 0) {
1214 1213                  mutex_exit(&inbound->isaf_lock);
1215 1214                  IPSA_REFRELE(newbie);
1216 1215                  sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc,
1217 1216                      SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1218 1217                  return;
1219 1218          }
1220 1219  
1221 1220  
1222 1221          /* Can write here because I'm still holding the bucket lock. */
1223 1222          newbie->ipsa_type = SADB_SATYPE_ESP;
1224 1223  
1225 1224          /*
1226 1225           * Construct successful return message. We have one thing going
1227 1226           * for us in PF_KEY v2.  That's the fact that
1228 1227           *      sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
1229 1228           */
1230 1229          assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
1231 1230          assoc->sadb_sa_exttype = SADB_EXT_SA;
1232 1231          assoc->sadb_sa_spi = newbie->ipsa_spi;
1233 1232          *((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
1234 1233          mutex_exit(&inbound->isaf_lock);
1235 1234  
1236 1235          /* Convert KEYSOCK_IN to KEYSOCK_OUT. */
1237 1236          kso = (keysock_out_t *)ksi;
1238 1237          kso->ks_out_len = sizeof (*kso);
1239 1238          kso->ks_out_serial = ksi->ks_in_serial;
1240 1239          kso->ks_out_type = KEYSOCK_OUT;
1241 1240  
1242 1241          /*
1243 1242           * Can safely putnext() to esp_pfkey_q, because this is a turnaround
1244 1243           * from the esp_pfkey_q.
1245 1244           */
1246 1245          putnext(espstack->esp_pfkey_q, mp);
1247 1246  }
1248 1247  
1249 1248  /*
1250 1249   * Insert the ESP header into a packet.  Duplicate an mblk, and insert a newly
1251 1250   * allocated mblk with the ESP header in between the two.
1252 1251   */
1253 1252  static boolean_t
1254 1253  esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint,
1255 1254      ipsecesp_stack_t *espstack)
1256 1255  {
1257 1256          mblk_t *split_mp = mp;
1258 1257          uint_t wheretodiv = divpoint;
1259 1258  
1260 1259          while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) {
1261 1260                  wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr);
1262 1261                  split_mp = split_mp->b_cont;
1263 1262                  ASSERT(split_mp != NULL);
1264 1263          }
1265 1264  
1266 1265          if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) {
1267 1266                  mblk_t *scratch;
1268 1267  
1269 1268                  /* "scratch" is the 2nd half, split_mp is the first. */
1270 1269                  scratch = dupb(split_mp);
1271 1270                  if (scratch == NULL) {
1272 1271                          esp1dbg(espstack,
1273 1272                              ("esp_insert_esp: can't allocate scratch.\n"));
1274 1273                          return (B_FALSE);
1275 1274                  }
1276 1275                  /* NOTE:  dupb() doesn't set b_cont appropriately. */
1277 1276                  scratch->b_cont = split_mp->b_cont;
1278 1277                  scratch->b_rptr += wheretodiv;
1279 1278                  split_mp->b_wptr = split_mp->b_rptr + wheretodiv;
1280 1279                  split_mp->b_cont = scratch;
1281 1280          }
1282 1281          /*
1283 1282           * At this point, split_mp is exactly "wheretodiv" bytes long, and
1284 1283           * holds the end of the pre-ESP part of the datagram.
1285 1284           */
1286 1285          esp_mp->b_cont = split_mp->b_cont;
1287 1286          split_mp->b_cont = esp_mp;
1288 1287  
1289 1288          return (B_TRUE);
1290 1289  }
1291 1290  
1292 1291  /*
1293 1292   * Section 7 of RFC 3947 says:
1294 1293   *
1295 1294   * 7.  Recovering from the Expiring NAT Mappings
1296 1295   *
1297 1296   *    There are cases where NAT box decides to remove mappings that are still
1298 1297   *    alive (for example, when the keepalive interval is too long, or when the
1299 1298   *    NAT box is rebooted).  To recover from this, ends that are NOT behind
1300 1299   *    NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from
1301 1300   *    the other end to determine which IP and port addresses should be used.
1302 1301   *    The host behind dynamic NAT MUST NOT do this, as otherwise it opens a
1303 1302   *    DoS attack possibility because the IP address or port of the other host
1304 1303   *    will not change (it is not behind NAT).
1305 1304   *
1306 1305   *    Keepalives cannot be used for these purposes, as they are not
1307 1306   *    authenticated, but any IKE authenticated IKE packet or ESP packet can be
1308 1307   *    used to detect whether the IP address or the port has changed.
1309 1308   *
1310 1309   * The following function will check an SA and its explicitly-set pair to see
1311 1310   * if the NAT-T remote port matches the received packet (which must have
1312 1311   * passed ESP authentication, see esp_in_done() for the caller context).  If
1313 1312   * there is a mismatch, the SAs are updated.  It is not important if we race
1314 1313   * with a transmitting thread, as if there is a transmitting thread, it will
1315 1314   * merely emit a packet that will most-likely be dropped.
1316 1315   *
1317 1316   * "ports" are ordered src,dst, and assoc is an inbound SA, where src should
1318 1317   * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port.
1319 1318   */
1320 1319  #ifdef _LITTLE_ENDIAN
1321 1320  #define FIRST_16(x) ((x) & 0xFFFF)
1322 1321  #define NEXT_16(x) (((x) >> 16) & 0xFFFF)
1323 1322  #else
1324 1323  #define FIRST_16(x) (((x) >> 16) & 0xFFFF)
1325 1324  #define NEXT_16(x) ((x) & 0xFFFF)
1326 1325  #endif
1327 1326  static void
1328 1327  esp_port_freshness(uint32_t ports, ipsa_t *assoc)
1329 1328  {
1330 1329          uint16_t remote = FIRST_16(ports);
1331 1330          uint16_t local = NEXT_16(ports);
1332 1331          ipsa_t *outbound_peer;
1333 1332          isaf_t *bucket;
1334 1333          ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
1335 1334  
1336 1335          /* We found a conn_t, therefore local != 0. */
1337 1336          ASSERT(local != 0);
1338 1337          /* Assume an IPv4 SA. */
1339 1338          ASSERT(assoc->ipsa_addrfam == AF_INET);
1340 1339  
1341 1340          /*
1342 1341           * On-the-wire rport == 0 means something's very wrong.
1343 1342           * An unpaired SA is also useless to us.
1344 1343           * If we are behind the NAT, don't bother.
1345 1344           * A zero local NAT port defaults to 4500, so check that too.
1346 1345           * And, of course, if the ports already match, we don't need to
1347 1346           * bother.
1348 1347           */
1349 1348          if (remote == 0 || assoc->ipsa_otherspi == 0 ||
1350 1349              (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) ||
1351 1350              (assoc->ipsa_remote_nat_port == 0 &&
1352 1351              remote == htons(IPPORT_IKE_NATT)) ||
1353 1352              remote == assoc->ipsa_remote_nat_port)
1354 1353                  return;
1355 1354  
1356 1355          /* Try and snag the peer.   NOTE:  Assume IPv4 for now. */
1357 1356          bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4),
1358 1357              assoc->ipsa_srcaddr[0]);
1359 1358          mutex_enter(&bucket->isaf_lock);
1360 1359          outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi,
1361 1360              assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET);
1362 1361          mutex_exit(&bucket->isaf_lock);
1363 1362  
1364 1363          /* We probably lost a race to a deleting or expiring thread. */
1365 1364          if (outbound_peer == NULL)
1366 1365                  return;
1367 1366  
1368 1367          /*
1369 1368           * Hold the mutexes for both SAs so we don't race another inbound
1370 1369           * thread.  A lock-entry order shouldn't matter, since all other
1371 1370           * per-ipsa locks are individually held-then-released.
1372 1371           *
1373 1372           * Luckily, this has nothing to do with the remote-NAT address,
1374 1373           * so we don't have to re-scribble the cached-checksum differential.
1375 1374           */
1376 1375          mutex_enter(&outbound_peer->ipsa_lock);
1377 1376          mutex_enter(&assoc->ipsa_lock);
1378 1377          outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port =
1379 1378              remote;
1380 1379          mutex_exit(&assoc->ipsa_lock);
1381 1380          mutex_exit(&outbound_peer->ipsa_lock);
1382 1381          IPSA_REFRELE(outbound_peer);
1383 1382          ESP_BUMP_STAT(espstack, sa_port_renumbers);
1384 1383  }
1385 1384  /*
1386 1385   * Finish processing of an inbound ESP packet after processing by the
1387 1386   * crypto framework.
1388 1387   * - Remove the ESP header.
1389 1388   * - Send packet back to IP.
1390 1389   * If authentication was performed on the packet, this function is called
1391 1390   * only if the authentication succeeded.
1392 1391   * On success returns B_TRUE, on failure returns B_FALSE and frees the
1393 1392   * mblk chain data_mp.
1394 1393   */
1395 1394  static mblk_t *
1396 1395  esp_in_done(mblk_t *data_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
1397 1396  {
1398 1397          ipsa_t *assoc;
1399 1398          uint_t espstart;
1400 1399          uint32_t ivlen = 0;
1401 1400          uint_t processed_len;
1402 1401          esph_t *esph;
1403 1402          kstat_named_t *counter;
1404 1403          boolean_t is_natt;
1405 1404          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
1406 1405          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1407 1406          ipsec_stack_t   *ipss = ns->netstack_ipsec;
1408 1407  
1409 1408          assoc = ira->ira_ipsec_esp_sa;
1410 1409          ASSERT(assoc != NULL);
1411 1410  
1412 1411          is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
1413 1412  
1414 1413          /* get the pointer to the ESP header */
1415 1414          if (assoc->ipsa_encr_alg == SADB_EALG_NULL) {
1416 1415                  /* authentication-only ESP */
1417 1416                  espstart = ic->ic_crypto_data.cd_offset;
1418 1417                  processed_len = ic->ic_crypto_data.cd_length;
1419 1418          } else {
1420 1419                  /* encryption present */
1421 1420                  ivlen = assoc->ipsa_iv_len;
1422 1421                  if (assoc->ipsa_auth_alg == SADB_AALG_NONE) {
1423 1422                          /* encryption-only ESP */
1424 1423                          espstart = ic->ic_crypto_data.cd_offset -
1425 1424                              sizeof (esph_t) - assoc->ipsa_iv_len;
1426 1425                          processed_len = ic->ic_crypto_data.cd_length +
1427 1426                              ivlen;
1428 1427                  } else {
1429 1428                          /* encryption with authentication */
1430 1429                          espstart = ic->ic_crypto_dual_data.dd_offset1;
1431 1430                          processed_len = ic->ic_crypto_dual_data.dd_len2 +
1432 1431                              ivlen;
1433 1432                  }
1434 1433          }
1435 1434  
1436 1435          esph = (esph_t *)(data_mp->b_rptr + espstart);
1437 1436  
1438 1437          if (assoc->ipsa_auth_alg != IPSA_AALG_NONE ||
1439 1438              (assoc->ipsa_flags & IPSA_F_COMBINED)) {
1440 1439                  /*
1441 1440                   * Authentication passed if we reach this point.
1442 1441                   * Packets with authentication will have the ICV
1443 1442                   * after the crypto data. Adjust b_wptr before
1444 1443                   * making padlen checks.
1445 1444                   */
1446 1445                  ESP_BUMP_STAT(espstack, good_auth);
1447 1446                  data_mp->b_wptr -= assoc->ipsa_mac_len;
1448 1447  
1449 1448                  /*
1450 1449                   * Check replay window here!
1451 1450                   * For right now, assume keysock will set the replay window
1452 1451                   * size to zero for SAs that have an unspecified sender.
1453 1452                   * This may change...
1454 1453                   */
1455 1454  
1456 1455                  if (!sadb_replay_check(assoc, esph->esph_replay)) {
1457 1456                          /*
1458 1457                           * Log the event. As of now we print out an event.
1459 1458                           * Do not print the replay failure number, or else
1460 1459                           * syslog cannot collate the error messages.  Printing
1461 1460                           * the replay number that failed opens a denial-of-
1462 1461                           * service attack.
1463 1462                           */
1464 1463                          ipsec_assocfailure(info.mi_idnum, 0, 0,
1465 1464                              SL_ERROR | SL_WARN,
1466 1465                              "Replay failed for ESP spi 0x%x, dst %s.\n",
1467 1466                              assoc->ipsa_spi, assoc->ipsa_dstaddr,
1468 1467                              assoc->ipsa_addrfam, espstack->ipsecesp_netstack);
1469 1468                          ESP_BUMP_STAT(espstack, replay_failures);
1470 1469                          counter = DROPPER(ipss, ipds_esp_replay);
1471 1470                          goto drop_and_bail;
1472 1471                  }
1473 1472  
1474 1473                  if (is_natt) {
1475 1474                          ASSERT(ira->ira_flags & IRAF_ESP_UDP_PORTS);
1476 1475                          ASSERT(ira->ira_esp_udp_ports != 0);
1477 1476                          esp_port_freshness(ira->ira_esp_udp_ports, assoc);
1478 1477                  }
1479 1478          }
1480 1479  
1481 1480          esp_set_usetime(assoc, B_TRUE);
1482 1481  
1483 1482          if (!esp_age_bytes(assoc, processed_len, B_TRUE)) {
1484 1483                  /* The ipsa has hit hard expiration, LOG and AUDIT. */
1485 1484                  ipsec_assocfailure(info.mi_idnum, 0, 0,
1486 1485                      SL_ERROR | SL_WARN,
1487 1486                      "ESP association 0x%x, dst %s had bytes expire.\n",
1488 1487                      assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
1489 1488                      espstack->ipsecesp_netstack);
1490 1489                  ESP_BUMP_STAT(espstack, bytes_expired);
1491 1490                  counter = DROPPER(ipss, ipds_esp_bytes_expire);
1492 1491                  goto drop_and_bail;
1493 1492          }
1494 1493  
1495 1494          /*
1496 1495           * Remove ESP header and padding from packet.  I hope the compiler
1497 1496           * spews "branch, predict taken" code for this.
1498 1497           */
1499 1498  
1500 1499          if (esp_strip_header(data_mp, (ira->ira_flags & IRAF_IS_IPV4),
1501 1500              ivlen, &counter, espstack)) {
1502 1501  
1503 1502                  if (is_system_labeled() && assoc->ipsa_tsl != NULL) {
1504 1503                          if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) {
1505 1504                                  ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
1506 1505                                      DROPPER(ipss, ipds_ah_nomem),
1507 1506                                      &espstack->esp_dropper);
1508 1507                                  BUMP_MIB(ira->ira_ill->ill_ip_mib,
1509 1508                                      ipIfStatsInDiscards);
1510 1509                                  return (NULL);
1511 1510                          }
1512 1511                  }
1513 1512                  if (is_natt)
1514 1513                          return (esp_fix_natt_checksums(data_mp, assoc));
1515 1514  
1516 1515                  if (assoc->ipsa_state == IPSA_STATE_IDLE) {
1517 1516                          /*
1518 1517                           * Cluster buffering case.  Tell caller that we're
1519 1518                           * handling the packet.
1520 1519                           */
1521 1520                          sadb_buf_pkt(assoc, data_mp, ira);
1522 1521                          return (NULL);
1523 1522                  }
1524 1523  
1525 1524                  return (data_mp);
1526 1525          }
1527 1526  
1528 1527          esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n"));
1529 1528  drop_and_bail:
1530 1529          IP_ESP_BUMP_STAT(ipss, in_discards);
1531 1530          ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, counter,
1532 1531              &espstack->esp_dropper);
1533 1532          BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1534 1533          return (NULL);
1535 1534  }
1536 1535  
1537 1536  /*
1538 1537   * Called upon failing the inbound ICV check. The message passed as
1539 1538   * argument is freed.
1540 1539   */
1541 1540  static void
1542 1541  esp_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira)
1543 1542  {
1544 1543          ipsa_t          *assoc = ira->ira_ipsec_esp_sa;
1545 1544          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
1546 1545          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1547 1546          ipsec_stack_t   *ipss = ns->netstack_ipsec;
1548 1547  
1549 1548          /*
1550 1549           * Log the event. Don't print to the console, block
1551 1550           * potential denial-of-service attack.
1552 1551           */
1553 1552          ESP_BUMP_STAT(espstack, bad_auth);
1554 1553  
1555 1554          ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
1556 1555              "ESP Authentication failed for spi 0x%x, dst %s.\n",
1557 1556              assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
1558 1557              espstack->ipsecesp_netstack);
1559 1558  
1560 1559          IP_ESP_BUMP_STAT(ipss, in_discards);
1561 1560          ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1562 1561              DROPPER(ipss, ipds_esp_bad_auth),
1563 1562              &espstack->esp_dropper);
1564 1563  }
1565 1564  
1566 1565  
1567 1566  /*
1568 1567   * Invoked for outbound packets after ESP processing. If the packet
1569 1568   * also requires AH, performs the AH SA selection and AH processing.
1570 1569   *
1571 1570   * Returns data_mp (possibly with AH added) unless data_mp was consumed
1572 1571   * due to an error, or queued due to async. crypto or an ACQUIRE trigger.
1573 1572   */
1574 1573  static mblk_t *
1575 1574  esp_do_outbound_ah(mblk_t *data_mp, ip_xmit_attr_t *ixa)
1576 1575  {
1577 1576          ipsec_action_t *ap;
1578 1577  
1579 1578          ap = ixa->ixa_ipsec_action;
1580 1579          if (ap == NULL) {
1581 1580                  ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
1582 1581                  ap = pp->ipsp_act;
1583 1582          }
1584 1583  
1585 1584          if (!ap->ipa_want_ah)
1586 1585                  return (data_mp);
1587 1586  
1588 1587          /*
1589 1588           * Normally the AH SA would have already been put in place
1590 1589           * but it could have been flushed so we need to look for it.
1591 1590           */
1592 1591          if (ixa->ixa_ipsec_ah_sa == NULL) {
1593 1592                  if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) {
1594 1593                          sadb_acquire(data_mp, ixa, B_TRUE, B_FALSE);
1595 1594                          return (NULL);
1596 1595                  }
1597 1596          }
1598 1597          ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
1599 1598  
1600 1599          data_mp = ixa->ixa_ipsec_ah_sa->ipsa_output_func(data_mp, ixa);
1601 1600          return (data_mp);
1602 1601  }
1603 1602  
1604 1603  
1605 1604  /*
1606 1605   * Kernel crypto framework callback invoked after completion of async
1607 1606   * crypto requests for outbound packets.
1608 1607   */
1609 1608  static void
1610 1609  esp_kcf_callback_outbound(void *arg, int status)
1611 1610  {
1612 1611          mblk_t          *mp = (mblk_t *)arg;
1613 1612          mblk_t          *async_mp;
1614 1613          netstack_t      *ns;
1615 1614          ipsec_stack_t   *ipss;
1616 1615          ipsecesp_stack_t *espstack;
1617 1616          mblk_t          *data_mp;
1618 1617          ip_xmit_attr_t  ixas;
1619 1618          ipsec_crypto_t  *ic;
1620 1619          ill_t           *ill;
1621 1620  
1622 1621          /*
1623 1622           * First remove the ipsec_crypto_t mblk
1624 1623           * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
1625 1624           */
1626 1625          async_mp = ipsec_remove_crypto_data(mp, &ic);
1627 1626          ASSERT(async_mp != NULL);
1628 1627  
1629 1628          /*
1630 1629           * Extract the ip_xmit_attr_t from the first mblk.
1631 1630           * Verifies that the netstack and ill is still around; could
1632 1631           * have vanished while kEf was doing its work.
1633 1632           * On succesful return we have a nce_t and the ill/ipst can't
1634 1633           * disappear until we do the nce_refrele in ixa_cleanup.
1635 1634           */
1636 1635          data_mp = async_mp->b_cont;
1637 1636          async_mp->b_cont = NULL;
1638 1637          if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) {
1639 1638                  /* Disappeared on us - no ill/ipst for MIB */
1640 1639                  /* We have nowhere to do stats since ixa_ipst could be NULL */
1641 1640                  if (ixas.ixa_nce != NULL) {
1642 1641                          ill = ixas.ixa_nce->nce_ill;
1643 1642                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1644 1643                          ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
1645 1644                  }
1646 1645                  freemsg(data_mp);
1647 1646                  goto done;
1648 1647          }
1649 1648          ns = ixas.ixa_ipst->ips_netstack;
1650 1649          espstack = ns->netstack_ipsecesp;
1651 1650          ipss = ns->netstack_ipsec;
1652 1651          ill = ixas.ixa_nce->nce_ill;
1653 1652  
1654 1653          if (status == CRYPTO_SUCCESS) {
1655 1654                  /*
1656 1655                   * If a ICV was computed, it was stored by the
1657 1656                   * crypto framework at the end of the packet.
1658 1657                   */
1659 1658                  ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
1660 1659  
1661 1660                  esp_set_usetime(ixas.ixa_ipsec_esp_sa, B_FALSE);
1662 1661                  /* NAT-T packet. */
1663 1662                  if (IPH_HDR_VERSION(ipha) == IP_VERSION &&
1664 1663                      ipha->ipha_protocol == IPPROTO_UDP)
1665 1664                          esp_prepare_udp(ns, data_mp, ipha);
1666 1665  
1667 1666                  /* do AH processing if needed */
1668 1667                  data_mp = esp_do_outbound_ah(data_mp, &ixas);
1669 1668                  if (data_mp == NULL)
1670 1669                          goto done;
1671 1670  
1672 1671                  (void) ip_output_post_ipsec(data_mp, &ixas);
1673 1672          } else {
1674 1673                  /* Outbound shouldn't see invalid MAC */
1675 1674                  ASSERT(status != CRYPTO_INVALID_MAC);
1676 1675  
1677 1676                  esp1dbg(espstack,
1678 1677                      ("esp_kcf_callback_outbound: crypto failed with 0x%x\n",
1679 1678                      status));
1680 1679                  ESP_BUMP_STAT(espstack, crypto_failures);
1681 1680                  ESP_BUMP_STAT(espstack, out_discards);
1682 1681                  ip_drop_packet(data_mp, B_FALSE, ill,
1683 1682                      DROPPER(ipss, ipds_esp_crypto_failed),
1684 1683                      &espstack->esp_dropper);
1685 1684                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1686 1685          }
1687 1686  done:
1688 1687          ixa_cleanup(&ixas);
1689 1688          (void) ipsec_free_crypto_data(mp);
1690 1689  }
1691 1690  
1692 1691  /*
1693 1692   * Kernel crypto framework callback invoked after completion of async
1694 1693   * crypto requests for inbound packets.
1695 1694   */
1696 1695  static void
1697 1696  esp_kcf_callback_inbound(void *arg, int status)
1698 1697  {
1699 1698          mblk_t          *mp = (mblk_t *)arg;
1700 1699          mblk_t          *async_mp;
1701 1700          netstack_t      *ns;
1702 1701          ipsecesp_stack_t *espstack;
1703 1702          ipsec_stack_t   *ipss;
1704 1703          mblk_t          *data_mp;
1705 1704          ip_recv_attr_t  iras;
1706 1705          ipsec_crypto_t  *ic;
1707 1706  
1708 1707          /*
1709 1708           * First remove the ipsec_crypto_t mblk
1710 1709           * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
1711 1710           */
1712 1711          async_mp = ipsec_remove_crypto_data(mp, &ic);
1713 1712          ASSERT(async_mp != NULL);
1714 1713  
1715 1714          /*
1716 1715           * Extract the ip_recv_attr_t from the first mblk.
1717 1716           * Verifies that the netstack and ill is still around; could
1718 1717           * have vanished while kEf was doing its work.
1719 1718           */
1720 1719          data_mp = async_mp->b_cont;
1721 1720          async_mp->b_cont = NULL;
1722 1721          if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
1723 1722                  /* The ill or ip_stack_t disappeared on us */
1724 1723                  ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
1725 1724                  freemsg(data_mp);
1726 1725                  goto done;
1727 1726          }
1728 1727  
1729 1728          ns = iras.ira_ill->ill_ipst->ips_netstack;
1730 1729          espstack = ns->netstack_ipsecesp;
1731 1730          ipss = ns->netstack_ipsec;
1732 1731  
1733 1732          if (status == CRYPTO_SUCCESS) {
1734 1733                  data_mp = esp_in_done(data_mp, &iras, ic);
1735 1734                  if (data_mp == NULL)
1736 1735                          goto done;
1737 1736  
1738 1737                  /* finish IPsec processing */
1739 1738                  ip_input_post_ipsec(data_mp, &iras);
1740 1739          } else if (status == CRYPTO_INVALID_MAC) {
1741 1740                  esp_log_bad_auth(data_mp, &iras);
1742 1741          } else {
1743 1742                  esp1dbg(espstack,
1744 1743                      ("esp_kcf_callback: crypto failed with 0x%x\n",
1745 1744                      status));
1746 1745                  ESP_BUMP_STAT(espstack, crypto_failures);
1747 1746                  IP_ESP_BUMP_STAT(ipss, in_discards);
1748 1747                  ip_drop_packet(data_mp, B_TRUE, iras.ira_ill,
1749 1748                      DROPPER(ipss, ipds_esp_crypto_failed),
1750 1749                      &espstack->esp_dropper);
1751 1750                  BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1752 1751          }
1753 1752  done:
1754 1753          ira_cleanup(&iras, B_TRUE);
1755 1754          (void) ipsec_free_crypto_data(mp);
1756 1755  }
1757 1756  
1758 1757  /*
1759 1758   * Invoked on crypto framework failure during inbound and outbound processing.
1760 1759   */
1761 1760  static void
1762 1761  esp_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc,
1763 1762      ill_t *ill, ipsecesp_stack_t *espstack)
1764 1763  {
1765 1764          ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
1766 1765  
1767 1766          esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n",
1768 1767              is_inbound ? "inbound" : "outbound", kef_rc));
1769 1768          ip_drop_packet(data_mp, is_inbound, ill,
1770 1769              DROPPER(ipss, ipds_esp_crypto_failed),
1771 1770              &espstack->esp_dropper);
1772 1771          ESP_BUMP_STAT(espstack, crypto_failures);
1773 1772          if (is_inbound)
1774 1773                  IP_ESP_BUMP_STAT(ipss, in_discards);
1775 1774          else
1776 1775                  ESP_BUMP_STAT(espstack, out_discards);
1777 1776  }
1778 1777  
1779 1778  /*
1780 1779   * A statement-equivalent macro, _cr MUST point to a modifiable
1781 1780   * crypto_call_req_t.
1782 1781   */
1783 1782  #define ESP_INIT_CALLREQ(_cr, _mp, _callback)                           \
1784 1783          (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE; \
1785 1784          (_cr)->cr_callback_arg = (_mp);                         \
1786 1785          (_cr)->cr_callback_func = (_callback)
1787 1786  
1788 1787  #define ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {                      \
1789 1788          (mac)->cd_format = CRYPTO_DATA_RAW;                             \
1790 1789          (mac)->cd_offset = 0;                                           \
1791 1790          (mac)->cd_length = icvlen;                                      \
1792 1791          (mac)->cd_raw.iov_base = (char *)icvbuf;                        \
1793 1792          (mac)->cd_raw.iov_len = icvlen;                                 \
1794 1793  }
1795 1794  
1796 1795  #define ESP_INIT_CRYPTO_DATA(data, mp, off, len) {                      \
1797 1796          if (MBLKL(mp) >= (len) + (off)) {                               \
1798 1797                  (data)->cd_format = CRYPTO_DATA_RAW;                    \
1799 1798                  (data)->cd_raw.iov_base = (char *)(mp)->b_rptr;         \
1800 1799                  (data)->cd_raw.iov_len = MBLKL(mp);                     \
1801 1800                  (data)->cd_offset = off;                                \
1802 1801          } else {                                                        \
1803 1802                  (data)->cd_format = CRYPTO_DATA_MBLK;                   \
1804 1803                  (data)->cd_mp = mp;                                     \
1805 1804                  (data)->cd_offset = off;                                \
1806 1805          }                                                               \
1807 1806          (data)->cd_length = len;                                        \
1808 1807  }
1809 1808  
1810 1809  #define ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) {   \
1811 1810          (data)->dd_format = CRYPTO_DATA_MBLK;                           \
1812 1811          (data)->dd_mp = mp;                                             \
1813 1812          (data)->dd_len1 = len1;                                         \
1814 1813          (data)->dd_offset1 = off1;                                      \
1815 1814          (data)->dd_len2 = len2;                                         \
1816 1815          (data)->dd_offset2 = off2;                                      \
1817 1816  }
1818 1817  
1819 1818  /*
1820 1819   * Returns data_mp if successfully completed the request. Returns
1821 1820   * NULL if it failed (and increments InDiscards) or if it is pending.
1822 1821   */
1823 1822  static mblk_t *
1824 1823  esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira,
1825 1824      ipsa_t *assoc, uint_t esph_offset)
1826 1825  {
1827 1826          uint_t auth_offset, msg_len, auth_len;
1828 1827          crypto_call_req_t call_req, *callrp;
1829 1828          mblk_t *mp;
1830 1829          esph_t *esph_ptr;
1831 1830          int kef_rc;
1832 1831          uint_t icv_len = assoc->ipsa_mac_len;
1833 1832          crypto_ctx_template_t auth_ctx_tmpl;
1834 1833          boolean_t do_auth, do_encr, force;
1835 1834          uint_t encr_offset, encr_len;
1836 1835          uint_t iv_len = assoc->ipsa_iv_len;
1837 1836          crypto_ctx_template_t encr_ctx_tmpl;
1838 1837          ipsec_crypto_t  *ic, icstack;
1839 1838          uchar_t *iv_ptr;
1840 1839          netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
1841 1840          ipsec_stack_t *ipss = ns->netstack_ipsec;
1842 1841          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1843 1842  
1844 1843          do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
1845 1844          do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
1846 1845          force = (assoc->ipsa_flags & IPSA_F_ASYNC);
1847 1846  
1848 1847  #ifdef IPSEC_LATENCY_TEST
1849 1848          kef_rc = CRYPTO_SUCCESS;
1850 1849  #else
1851 1850          kef_rc = CRYPTO_FAILED;
1852 1851  #endif
1853 1852  
1854 1853          /*
1855 1854           * An inbound packet is of the form:
1856 1855           * [IP,options,ESP,IV,data,ICV,pad]
1857 1856           */
1858 1857          esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
1859 1858          iv_ptr = (uchar_t *)(esph_ptr + 1);
1860 1859          /* Packet length starting at IP header ending after ESP ICV. */
1861 1860          msg_len = MBLKL(esp_mp);
1862 1861  
1863 1862          encr_offset = esph_offset + sizeof (esph_t) + iv_len;
1864 1863          encr_len = msg_len - encr_offset;
1865 1864  
1866 1865          /*
1867 1866           * Counter mode algs need a nonce. This is setup in sadb_common_add().
1868 1867           * If for some reason we are using a SA which does not have a nonce
1869 1868           * then we must fail here.
1870 1869           */
1871 1870          if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
1872 1871              (assoc->ipsa_nonce == NULL)) {
1873 1872                  ip_drop_packet(esp_mp, B_TRUE, ira->ira_ill,
1874 1873                      DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
1875 1874                  return (NULL);
1876 1875          }
1877 1876  
1878 1877          if (force) {
1879 1878                  /* We are doing asynch; allocate mblks to hold state */
1880 1879                  if ((mp = ip_recv_attr_to_mblk(ira)) == NULL ||
1881 1880                      (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
1882 1881                          BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1883 1882                          ip_drop_input("ipIfStatsInDiscards", esp_mp,
1884 1883                              ira->ira_ill);
1885 1884                          return (NULL);
1886 1885                  }
1887 1886                  linkb(mp, esp_mp);
1888 1887                  callrp = &call_req;
1889 1888                  ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_inbound);
1890 1889          } else {
1891 1890                  /*
1892 1891                   * If we know we are going to do sync then ipsec_crypto_t
1893 1892                   * should be on the stack.
1894 1893                   */
1895 1894                  ic = &icstack;
1896 1895                  bzero(ic, sizeof (*ic));
1897 1896                  callrp = NULL;
1898 1897          }
1899 1898  
1900 1899          if (do_auth) {
1901 1900                  /* authentication context template */
1902 1901                  IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
1903 1902                      auth_ctx_tmpl);
1904 1903  
1905 1904                  /* ICV to be verified */
1906 1905                  ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
1907 1906                      icv_len, esp_mp->b_wptr - icv_len);
1908 1907  
1909 1908                  /* authentication starts at the ESP header */
1910 1909                  auth_offset = esph_offset;
1911 1910                  auth_len = msg_len - auth_offset - icv_len;
1912 1911                  if (!do_encr) {
1913 1912                          /* authentication only */
1914 1913                          /* initialize input data argument */
1915 1914                          ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
1916 1915                              esp_mp, auth_offset, auth_len);
1917 1916  
1918 1917                          /* call the crypto framework */
1919 1918                          kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
1920 1919                              &ic->ic_crypto_data,
1921 1920                              &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
1922 1921                              &ic->ic_crypto_mac, callrp);
1923 1922                  }
1924 1923          }
1925 1924  
1926 1925          if (do_encr) {
1927 1926                  /* encryption template */
1928 1927                  IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
1929 1928                      encr_ctx_tmpl);
1930 1929  
1931 1930                  /* Call the nonce update function. Also passes in IV */
1932 1931                  (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len,
1933 1932                      iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
1934 1933  
1935 1934                  if (!do_auth) {
1936 1935                          /* decryption only */
1937 1936                          /* initialize input data argument */
1938 1937                          ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
1939 1938                              esp_mp, encr_offset, encr_len);
1940 1939  
1941 1940                          /* call the crypto framework */
1942 1941                          kef_rc = crypto_decrypt((crypto_mechanism_t *)
1943 1942                              &ic->ic_cmm, &ic->ic_crypto_data,
1944 1943                              &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
1945 1944                              NULL, callrp);
1946 1945                  }
1947 1946          }
1948 1947  
1949 1948          if (do_auth && do_encr) {
1950 1949                  /* dual operation */
1951 1950                  /* initialize input data argument */
1952 1951                  ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
1953 1952                      esp_mp, auth_offset, auth_len,
1954 1953                      encr_offset, encr_len - icv_len);
1955 1954  
1956 1955                  /* specify IV */
1957 1956                  ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
1958 1957  
1959 1958                  /* call the framework */
1960 1959                  kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech,
1961 1960                      &assoc->ipsa_emech, &ic->ic_crypto_dual_data,
1962 1961                      &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey,
1963 1962                      auth_ctx_tmpl, encr_ctx_tmpl, &ic->ic_crypto_mac,
1964 1963                      NULL, callrp);
1965 1964          }
1966 1965  
1967 1966          switch (kef_rc) {
1968 1967          case CRYPTO_SUCCESS:
1969 1968                  ESP_BUMP_STAT(espstack, crypto_sync);
1970 1969                  esp_mp = esp_in_done(esp_mp, ira, ic);
1971 1970                  if (force) {
1972 1971                          /* Free mp after we are done with ic */
1973 1972                          mp = ipsec_free_crypto_data(mp);
1974 1973                          (void) ip_recv_attr_free_mblk(mp);
1975 1974                  }
1976 1975                  return (esp_mp);
1977 1976          case CRYPTO_QUEUED:
1978 1977                  /* esp_kcf_callback_inbound() will be invoked on completion */
1979 1978                  ESP_BUMP_STAT(espstack, crypto_async);
1980 1979                  return (NULL);
1981 1980          case CRYPTO_INVALID_MAC:
1982 1981                  if (force) {
1983 1982                          mp = ipsec_free_crypto_data(mp);
1984 1983                          esp_mp = ip_recv_attr_free_mblk(mp);
1985 1984                  }
1986 1985                  ESP_BUMP_STAT(espstack, crypto_sync);
1987 1986                  BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1988 1987                  esp_log_bad_auth(esp_mp, ira);
1989 1988                  /* esp_mp was passed to ip_drop_packet */
1990 1989                  return (NULL);
1991 1990          }
1992 1991  
1993 1992          if (force) {
1994 1993                  mp = ipsec_free_crypto_data(mp);
1995 1994                  esp_mp = ip_recv_attr_free_mblk(mp);
1996 1995          }
1997 1996          BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1998 1997          esp_crypto_failed(esp_mp, B_TRUE, kef_rc, ira->ira_ill, espstack);
1999 1998          /* esp_mp was passed to ip_drop_packet */
2000 1999          return (NULL);
2001 2000  }
2002 2001  
2003 2002  /*
2004 2003   * Compute the IP and UDP checksums -- common code for both keepalives and
2005 2004   * actual ESP-in-UDP packets.  Be flexible with multiple mblks because ESP
2006 2005   * uses mblk-insertion to insert the UDP header.
2007 2006   * TODO - If there is an easy way to prep a packet for HW checksums, make
2008 2007   * it happen here.
2009 2008   * Note that this is used before both before calling ip_output_simple and
2010 2009   * in the esp datapath. The former could use IXAF_SET_ULP_CKSUM but not the
2011 2010   * latter.
2012 2011   */
2013 2012  static void
2014 2013  esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha)
2015 2014  {
2016 2015          int offset;
2017 2016          uint32_t cksum;
2018 2017          uint16_t *arr;
2019 2018          mblk_t *udpmp = mp;
2020 2019          uint_t hlen = IPH_HDR_LENGTH(ipha);
2021 2020  
2022 2021          ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2023 2022  
2024 2023          ipha->ipha_hdr_checksum = 0;
2025 2024          ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
2026 2025  
2027 2026          if (ns->netstack_udp->us_do_checksum) {
2028 2027                  ASSERT(MBLKL(udpmp) >= sizeof (udpha_t));
2029 2028                  /* arr points to the IP header. */
2030 2029                  arr = (uint16_t *)ipha;
2031 2030                  IP_STAT(ns->netstack_ip, ip_out_sw_cksum);
2032 2031                  IP_STAT_UPDATE(ns->netstack_ip, ip_out_sw_cksum_bytes,
2033 2032                      ntohs(htons(ipha->ipha_length) - hlen));
2034 2033                  /* arr[6-9] are the IP addresses. */
2035 2034                  cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] +
2036 2035                      ntohs(htons(ipha->ipha_length) - hlen);
2037 2036                  cksum = IP_CSUM(mp, hlen, cksum);
2038 2037                  offset = hlen + UDP_CHECKSUM_OFFSET;
2039 2038                  while (offset >= MBLKL(udpmp)) {
2040 2039                          offset -= MBLKL(udpmp);
2041 2040                          udpmp = udpmp->b_cont;
2042 2041                  }
2043 2042                  /* arr points to the UDP header's checksum field. */
2044 2043                  arr = (uint16_t *)(udpmp->b_rptr + offset);
2045 2044                  *arr = cksum;
2046 2045          }
2047 2046  }
2048 2047  
2049 2048  /*
2050 2049   * taskq handler so we can send the NAT-T keepalive on a separate thread.
2051 2050   */
2052 2051  static void
2053 2052  actually_send_keepalive(void *arg)
2054 2053  {
2055 2054          mblk_t *mp = (mblk_t *)arg;
2056 2055          ip_xmit_attr_t ixas;
2057 2056          netstack_t      *ns;
2058 2057          netstackid_t    stackid;
2059 2058  
2060 2059          stackid = (netstackid_t)(uintptr_t)mp->b_prev;
2061 2060          mp->b_prev = NULL;
2062 2061          ns = netstack_find_by_stackid(stackid);
2063 2062          if (ns == NULL) {
2064 2063                  /* Disappeared */
2065 2064                  ip_drop_output("ipIfStatsOutDiscards", mp, NULL);
2066 2065                  freemsg(mp);
2067 2066                  return;
2068 2067          }
2069 2068  
2070 2069          bzero(&ixas, sizeof (ixas));
2071 2070          ixas.ixa_zoneid = ALL_ZONES;
2072 2071          ixas.ixa_cred = kcred;
2073 2072          ixas.ixa_cpid = NOPID;
2074 2073          ixas.ixa_tsl = NULL;
2075 2074          ixas.ixa_ipst = ns->netstack_ip;
2076 2075          /* No ULP checksum; done by esp_prepare_udp */
2077 2076          ixas.ixa_flags = (IXAF_IS_IPV4 | IXAF_NO_IPSEC | IXAF_VERIFY_SOURCE);
2078 2077  
2079 2078          (void) ip_output_simple(mp, &ixas);
2080 2079          ixa_cleanup(&ixas);
2081 2080          netstack_rele(ns);
2082 2081  }
2083 2082  
2084 2083  /*
2085 2084   * Send a one-byte UDP NAT-T keepalive.
2086 2085   */
2087 2086  void
2088 2087  ipsecesp_send_keepalive(ipsa_t *assoc)
2089 2088  {
2090 2089          mblk_t          *mp;
2091 2090          ipha_t          *ipha;
2092 2091          udpha_t         *udpha;
2093 2092          netstack_t      *ns = assoc->ipsa_netstack;
2094 2093  
2095 2094          ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock));
2096 2095  
2097 2096          mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI);
2098 2097          if (mp == NULL)
2099 2098                  return;
2100 2099          ipha = (ipha_t *)mp->b_rptr;
2101 2100          ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
2102 2101          ipha->ipha_type_of_service = 0;
2103 2102          ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1);
2104 2103          /* Use the low-16 of the SPI so we have some clue where it came from. */
2105 2104          ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1);
2106 2105          ipha->ipha_fragment_offset_and_flags = 0;  /* Too small to fragment! */
2107 2106          ipha->ipha_ttl = 0xFF;
2108 2107          ipha->ipha_protocol = IPPROTO_UDP;
2109 2108          ipha->ipha_hdr_checksum = 0;
2110 2109          ipha->ipha_src = assoc->ipsa_srcaddr[0];
2111 2110          ipha->ipha_dst = assoc->ipsa_dstaddr[0];
2112 2111          udpha = (udpha_t *)(ipha + 1);
2113 2112          udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
2114 2113              assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
2115 2114          udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
2116 2115              assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
2117 2116          udpha->uha_length = htons(sizeof (udpha_t) + 1);
2118 2117          udpha->uha_checksum = 0;
2119 2118          mp->b_wptr = (uint8_t *)(udpha + 1);
2120 2119          *(mp->b_wptr++) = 0xFF;
2121 2120  
2122 2121          esp_prepare_udp(ns, mp, ipha);
2123 2122  
2124 2123          /*
2125 2124           * We're holding an isaf_t bucket lock, so pawn off the actual
2126 2125           * packet transmission to another thread.  Just in case syncq
2127 2126           * processing causes a same-bucket packet to be processed.
2128 2127           */
2129 2128          mp->b_prev = (mblk_t *)(uintptr_t)ns->netstack_stackid;
2130 2129  
2131 2130          if (taskq_dispatch(esp_taskq, actually_send_keepalive, mp,
2132 2131              TQ_NOSLEEP) == 0) {
2133 2132                  /* Assume no memory if taskq_dispatch() fails. */
2134 2133                  mp->b_prev = NULL;
2135 2134                  ip_drop_packet(mp, B_FALSE, NULL,
2136 2135                      DROPPER(ns->netstack_ipsec, ipds_esp_nomem),
2137 2136                      &ns->netstack_ipsecesp->esp_dropper);
2138 2137          }
2139 2138  }
2140 2139  
2141 2140  /*
2142 2141   * Returns mp if successfully completed the request. Returns
2143 2142   * NULL if it failed (and increments InDiscards) or if it is pending.
2144 2143   */
2145 2144  static mblk_t *
2146 2145  esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc,
2147 2146      uchar_t *icv_buf, uint_t payload_len)
2148 2147  {
2149 2148          uint_t auth_len;
2150 2149          crypto_call_req_t call_req, *callrp;
2151 2150          mblk_t *esp_mp;
2152 2151          esph_t *esph_ptr;
2153 2152          mblk_t *mp;
2154 2153          int kef_rc = CRYPTO_FAILED;
2155 2154          uint_t icv_len = assoc->ipsa_mac_len;
2156 2155          crypto_ctx_template_t auth_ctx_tmpl;
2157 2156          boolean_t do_auth, do_encr, force;
2158 2157          uint_t iv_len = assoc->ipsa_iv_len;
2159 2158          crypto_ctx_template_t encr_ctx_tmpl;
2160 2159          boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
2161 2160          size_t esph_offset = (is_natt ? UDPH_SIZE : 0);
2162 2161          netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
2163 2162          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2164 2163          ipsec_crypto_t  *ic, icstack;
2165 2164          uchar_t         *iv_ptr;
2166 2165          crypto_data_t   *cd_ptr = NULL;
2167 2166          ill_t           *ill = ixa->ixa_nce->nce_ill;
2168 2167          ipsec_stack_t   *ipss = ns->netstack_ipsec;
2169 2168  
2170 2169          esp3dbg(espstack, ("esp_submit_req_outbound:%s",
2171 2170              is_natt ? "natt" : "not natt"));
2172 2171  
2173 2172          do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
2174 2173          do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
2175 2174          force = (assoc->ipsa_flags & IPSA_F_ASYNC);
2176 2175  
2177 2176  #ifdef IPSEC_LATENCY_TEST
2178 2177          kef_rc = CRYPTO_SUCCESS;
2179 2178  #else
2180 2179          kef_rc = CRYPTO_FAILED;
2181 2180  #endif
2182 2181  
2183 2182          /*
2184 2183           * Outbound IPsec packets are of the form:
2185 2184           * [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV]
2186 2185           * unless it's NATT, then it's
2187 2186           * [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV]
2188 2187           * Get a pointer to the mblk containing the ESP header.
2189 2188           */
2190 2189          ASSERT(data_mp->b_cont != NULL);
2191 2190          esp_mp = data_mp->b_cont;
2192 2191          esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
2193 2192          iv_ptr = (uchar_t *)(esph_ptr + 1);
2194 2193  
2195 2194          /*
2196 2195           * Combined mode algs need a nonce. This is setup in sadb_common_add().
2197 2196           * If for some reason we are using a SA which does not have a nonce
2198 2197           * then we must fail here.
2199 2198           */
2200 2199          if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
2201 2200              (assoc->ipsa_nonce == NULL)) {
2202 2201                  ip_drop_packet(data_mp, B_FALSE, NULL,
2203 2202                      DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
2204 2203                  return (NULL);
2205 2204          }
2206 2205  
2207 2206          if (force) {
2208 2207                  /* We are doing asynch; allocate mblks to hold state */
2209 2208                  if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL ||
2210 2209                      (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
2211 2210                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2212 2211                          ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
2213 2212                          freemsg(data_mp);
2214 2213                          return (NULL);
2215 2214                  }
2216 2215  
2217 2216                  linkb(mp, data_mp);
2218 2217                  callrp = &call_req;
2219 2218                  ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_outbound);
2220 2219          } else {
2221 2220                  /*
2222 2221                   * If we know we are going to do sync then ipsec_crypto_t
2223 2222                   * should be on the stack.
2224 2223                   */
2225 2224                  ic = &icstack;
2226 2225                  bzero(ic, sizeof (*ic));
2227 2226                  callrp = NULL;
2228 2227          }
2229 2228  
2230 2229  
2231 2230          if (do_auth) {
2232 2231                  /* authentication context template */
2233 2232                  IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
2234 2233                      auth_ctx_tmpl);
2235 2234  
2236 2235                  /* where to store the computed mac */
2237 2236                  ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
2238 2237                      icv_len, icv_buf);
2239 2238  
2240 2239                  /* authentication starts at the ESP header */
2241 2240                  auth_len = payload_len + iv_len + sizeof (esph_t);
2242 2241                  if (!do_encr) {
2243 2242                          /* authentication only */
2244 2243                          /* initialize input data argument */
2245 2244                          ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
2246 2245                              esp_mp, esph_offset, auth_len);
2247 2246  
2248 2247                          /* call the crypto framework */
2249 2248                          kef_rc = crypto_mac(&assoc->ipsa_amech,
2250 2249                              &ic->ic_crypto_data,
2251 2250                              &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
2252 2251                              &ic->ic_crypto_mac, callrp);
2253 2252                  }
2254 2253          }
2255 2254  
2256 2255          if (do_encr) {
2257 2256                  /* encryption context template */
2258 2257                  IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
2259 2258                      encr_ctx_tmpl);
2260 2259                  /* Call the nonce update function. */
2261 2260                  (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len,
2262 2261                      iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
2263 2262  
2264 2263                  if (!do_auth) {
2265 2264                          /* encryption only, skip mblk that contains ESP hdr */
2266 2265                          /* initialize input data argument */
2267 2266                          ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
2268 2267                              esp_mp->b_cont, 0, payload_len);
2269 2268  
2270 2269                          /*
2271 2270                           * For combined mode ciphers, the ciphertext is the same
2272 2271                           * size as the clear text, the ICV should follow the
2273 2272                           * ciphertext. To convince the kcf to allow in-line
2274 2273                           * encryption, with an ICV, use ipsec_out_crypto_mac
2275 2274                           * to point to the same buffer as the data. The calling
2276 2275                           * function need to ensure the buffer is large enough to
2277 2276                           * include the ICV.
2278 2277                           *
2279 2278                           * The IV is already written to the packet buffer, the
2280 2279                           * nonce setup function copied it to the params struct
2281 2280                           * for the cipher to use.
2282 2281                           */
2283 2282                          if (assoc->ipsa_flags & IPSA_F_COMBINED) {
2284 2283                                  bcopy(&ic->ic_crypto_data,
2285 2284                                      &ic->ic_crypto_mac,
2286 2285                                      sizeof (crypto_data_t));
2287 2286                                  ic->ic_crypto_mac.cd_length =
2288 2287                                      payload_len + icv_len;
2289 2288                                  cd_ptr = &ic->ic_crypto_mac;
2290 2289                          }
2291 2290  
2292 2291                          /* call the crypto framework */
2293 2292                          kef_rc = crypto_encrypt((crypto_mechanism_t *)
2294 2293                              &ic->ic_cmm, &ic->ic_crypto_data,
2295 2294                              &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
2296 2295                              cd_ptr, callrp);
2297 2296  
2298 2297                  }
2299 2298          }
2300 2299  
2301 2300          if (do_auth && do_encr) {
2302 2301                  /*
2303 2302                   * Encryption and authentication:
2304 2303                   * Pass the pointer to the mblk chain starting at the ESP
2305 2304                   * header to the framework. Skip the ESP header mblk
2306 2305                   * for encryption, which is reflected by an encryption
2307 2306                   * offset equal to the length of that mblk. Start
2308 2307                   * the authentication at the ESP header, i.e. use an
2309 2308                   * authentication offset of zero.
2310 2309                   */
2311 2310                  ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
2312 2311                      esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len);
2313 2312  
2314 2313                  /* specify IV */
2315 2314                  ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
2316 2315  
2317 2316                  /* call the framework */
2318 2317                  kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech,
2319 2318                      &assoc->ipsa_amech, NULL,
2320 2319                      &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey,
2321 2320                      encr_ctx_tmpl, auth_ctx_tmpl,
2322 2321                      &ic->ic_crypto_dual_data,
2323 2322                      &ic->ic_crypto_mac, callrp);
2324 2323          }
2325 2324  
2326 2325          switch (kef_rc) {
2327 2326          case CRYPTO_SUCCESS:
2328 2327                  ESP_BUMP_STAT(espstack, crypto_sync);
2329 2328                  esp_set_usetime(assoc, B_FALSE);
2330 2329                  if (force) {
2331 2330                          mp = ipsec_free_crypto_data(mp);
2332 2331                          data_mp = ip_xmit_attr_free_mblk(mp);
2333 2332                  }
2334 2333                  if (is_natt)
2335 2334                          esp_prepare_udp(ns, data_mp, (ipha_t *)data_mp->b_rptr);
2336 2335                  return (data_mp);
2337 2336          case CRYPTO_QUEUED:
2338 2337                  /* esp_kcf_callback_outbound() will be invoked on completion */
2339 2338                  ESP_BUMP_STAT(espstack, crypto_async);
2340 2339                  return (NULL);
2341 2340          }
2342 2341  
2343 2342          if (force) {
2344 2343                  mp = ipsec_free_crypto_data(mp);
2345 2344                  data_mp = ip_xmit_attr_free_mblk(mp);
2346 2345          }
2347 2346          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2348 2347          esp_crypto_failed(data_mp, B_FALSE, kef_rc, NULL, espstack);
2349 2348          /* data_mp was passed to ip_drop_packet */
2350 2349          return (NULL);
2351 2350  }
2352 2351  
2353 2352  /*
2354 2353   * Handle outbound IPsec processing for IPv4 and IPv6
2355 2354   *
2356 2355   * Returns data_mp if successfully completed the request. Returns
2357 2356   * NULL if it failed (and increments InDiscards) or if it is pending.
2358 2357   */
2359 2358  static mblk_t *
2360 2359  esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa)
2361 2360  {
2362 2361          mblk_t *espmp, *tailmp;
2363 2362          ipha_t *ipha;
2364 2363          ip6_t *ip6h;
2365 2364          esph_t *esph_ptr, *iv_ptr;
2366 2365          uint_t af;
2367 2366          uint8_t *nhp;
2368 2367          uintptr_t divpoint, datalen, adj, padlen, i, alloclen;
2369 2368          uintptr_t esplen = sizeof (esph_t);
2370 2369          uint8_t protocol;
2371 2370          ipsa_t *assoc;
2372 2371          uint_t iv_len, block_size, mac_len = 0;
2373 2372          uchar_t *icv_buf;
2374 2373          udpha_t *udpha;
2375 2374          boolean_t is_natt = B_FALSE;
2376 2375          netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
2377 2376          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2378 2377          ipsec_stack_t   *ipss = ns->netstack_ipsec;
2379 2378          ill_t           *ill = ixa->ixa_nce->nce_ill;
2380 2379          boolean_t       need_refrele = B_FALSE;
2381 2380  
2382 2381          ESP_BUMP_STAT(espstack, out_requests);
2383 2382  
2384 2383          /*
2385 2384           * <sigh> We have to copy the message here, because TCP (for example)
2386 2385           * keeps a dupb() of the message lying around for retransmission.
2387 2386           * Since ESP changes the whole of the datagram, we have to create our
2388 2387           * own copy lest we clobber TCP's data.  Since we have to copy anyway,
2389 2388           * we might as well make use of msgpullup() and get the mblk into one
2390 2389           * contiguous piece!
2391 2390           */
2392 2391          tailmp = msgpullup(data_mp, -1);
2393 2392          if (tailmp == NULL) {
2394 2393                  esp0dbg(("esp_outbound: msgpullup() failed, "
2395 2394                      "dropping packet.\n"));
2396 2395                  ip_drop_packet(data_mp, B_FALSE, ill,
2397 2396                      DROPPER(ipss, ipds_esp_nomem),
2398 2397                      &espstack->esp_dropper);
2399 2398                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2400 2399                  return (NULL);
2401 2400          }
2402 2401          freemsg(data_mp);
2403 2402          data_mp = tailmp;
2404 2403  
2405 2404          assoc = ixa->ixa_ipsec_esp_sa;
2406 2405          ASSERT(assoc != NULL);
2407 2406  
2408 2407          /*
2409 2408           * Get the outer IP header in shape to escape this system..
2410 2409           */
2411 2410          if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) {
2412 2411                  /*
2413 2412                   * Need to update packet with any CIPSO option and update
2414 2413                   * ixa_tsl to capture the new label.
2415 2414                   * We allocate a separate ixa for that purpose.
2416 2415                   */
2417 2416                  ixa = ip_xmit_attr_duplicate(ixa);
2418 2417                  if (ixa == NULL) {
2419 2418                          ip_drop_packet(data_mp, B_FALSE, ill,
2420 2419                              DROPPER(ipss, ipds_esp_nomem),
2421 2420                              &espstack->esp_dropper);
2422 2421                          return (NULL);
2423 2422                  }
2424 2423                  need_refrele = B_TRUE;
2425 2424  
2426 2425                  label_hold(assoc->ipsa_otsl);
2427 2426                  ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl);
2428 2427  
2429 2428                  data_mp = sadb_whack_label(data_mp, assoc, ixa,
2430 2429                      DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
2431 2430                  if (data_mp == NULL) {
2432 2431                          /* Packet dropped by sadb_whack_label */
2433 2432                          ixa_refrele(ixa);
2434 2433                          return (NULL);
2435 2434                  }
2436 2435          }
2437 2436  
2438 2437          /*
2439 2438           * Reality check....
2440 2439           */
2441 2440          ipha = (ipha_t *)data_mp->b_rptr;  /* So we can call esp_acquire(). */
2442 2441  
2443 2442          if (ixa->ixa_flags & IXAF_IS_IPV4) {
2444 2443                  ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
2445 2444  
2446 2445                  af = AF_INET;
2447 2446                  divpoint = IPH_HDR_LENGTH(ipha);
2448 2447                  datalen = ntohs(ipha->ipha_length) - divpoint;
2449 2448                  nhp = (uint8_t *)&ipha->ipha_protocol;
2450 2449          } else {
2451 2450                  ip_pkt_t ipp;
2452 2451  
2453 2452                  ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
2454 2453  
2455 2454                  af = AF_INET6;
2456 2455                  ip6h = (ip6_t *)ipha;
2457 2456                  bzero(&ipp, sizeof (ipp));
2458 2457                  divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL);
2459 2458                  if (ipp.ipp_dstopts != NULL &&
2460 2459                      ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) {
2461 2460                          /*
2462 2461                           * Destination options are tricky.  If we get in here,
2463 2462                           * then we have a terminal header following the
2464 2463                           * destination options.  We need to adjust backwards
2465 2464                           * so we insert ESP BEFORE the destination options
2466 2465                           * bag.  (So that the dstopts get encrypted!)
2467 2466                           *
2468 2467                           * Since this is for outbound packets only, we know
2469 2468                           * that non-terminal destination options only precede
2470 2469                           * routing headers.
2471 2470                           */
2472 2471                          divpoint -= ipp.ipp_dstoptslen;
2473 2472                  }
2474 2473                  datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint;
2475 2474  
2476 2475                  if (ipp.ipp_rthdr != NULL) {
2477 2476                          nhp = &ipp.ipp_rthdr->ip6r_nxt;
2478 2477                  } else if (ipp.ipp_hopopts != NULL) {
2479 2478                          nhp = &ipp.ipp_hopopts->ip6h_nxt;
2480 2479                  } else {
2481 2480                          ASSERT(divpoint == sizeof (ip6_t));
2482 2481                          /* It's probably IP + ESP. */
2483 2482                          nhp = &ip6h->ip6_nxt;
2484 2483                  }
2485 2484          }
2486 2485  
2487 2486          mac_len = assoc->ipsa_mac_len;
2488 2487  
2489 2488          if (assoc->ipsa_flags & IPSA_F_NATT) {
2490 2489                  /* wedge in UDP header */
2491 2490                  is_natt = B_TRUE;
2492 2491                  esplen += UDPH_SIZE;
2493 2492          }
2494 2493  
2495 2494          /*
2496 2495           * Set up ESP header and encryption padding for ENCR PI request.
2497 2496           */
2498 2497  
2499 2498          /* Determine the padding length.  Pad to 4-bytes for no-encryption. */
2500 2499          if (assoc->ipsa_encr_alg != SADB_EALG_NULL) {
2501 2500                  iv_len = assoc->ipsa_iv_len;
2502 2501                  block_size = assoc->ipsa_datalen;
2503 2502  
2504 2503                  /*
2505 2504                   * Pad the data to the length of the cipher block size.
2506 2505                   * Include the two additional bytes (hence the - 2) for the
2507 2506                   * padding length and the next header.  Take this into account
2508 2507                   * when calculating the actual length of the padding.
2509 2508                   */
2510 2509                  ASSERT(ISP2(iv_len));
2511 2510                  padlen = ((unsigned)(block_size - datalen - 2)) &
2512 2511                      (block_size - 1);
2513 2512          } else {
2514 2513                  iv_len = 0;
2515 2514                  padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) &
2516 2515                      (sizeof (uint32_t) - 1);
2517 2516          }
2518 2517  
2519 2518          /* Allocate ESP header and IV. */
2520 2519          esplen += iv_len;
2521 2520  
2522 2521          /*
2523 2522           * Update association byte-count lifetimes.  Don't forget to take
2524 2523           * into account the padding length and next-header (hence the + 2).
2525 2524           *
2526 2525           * Use the amount of data fed into the "encryption algorithm".  This
2527 2526           * is the IV, the data length, the padding length, and the final two
2528 2527           * bytes (padlen, and next-header).
2529 2528           *
2530 2529           */
2531 2530  
2532 2531          if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) {
2533 2532                  ip_drop_packet(data_mp, B_FALSE, ill,
2534 2533                      DROPPER(ipss, ipds_esp_bytes_expire),
2535 2534                      &espstack->esp_dropper);
2536 2535                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2537 2536                  if (need_refrele)
2538 2537                          ixa_refrele(ixa);
2539 2538                  return (NULL);
2540 2539          }
2541 2540  
2542 2541          espmp = allocb(esplen, BPRI_HI);
2543 2542          if (espmp == NULL) {
2544 2543                  ESP_BUMP_STAT(espstack, out_discards);
2545 2544                  esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n"));
2546 2545                  ip_drop_packet(data_mp, B_FALSE, ill,
2547 2546                      DROPPER(ipss, ipds_esp_nomem),
2548 2547                      &espstack->esp_dropper);
2549 2548                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2550 2549                  if (need_refrele)
2551 2550                          ixa_refrele(ixa);
2552 2551                  return (NULL);
2553 2552          }
2554 2553          espmp->b_wptr += esplen;
2555 2554          esph_ptr = (esph_t *)espmp->b_rptr;
2556 2555  
2557 2556          if (is_natt) {
2558 2557                  esp3dbg(espstack, ("esp_outbound: NATT"));
2559 2558  
2560 2559                  udpha = (udpha_t *)espmp->b_rptr;
2561 2560                  udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
2562 2561                      assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
2563 2562                  udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
2564 2563                      assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
2565 2564                  /*
2566 2565                   * Set the checksum to 0, so that the esp_prepare_udp() call
2567 2566                   * can do the right thing.
2568 2567                   */
2569 2568                  udpha->uha_checksum = 0;
2570 2569                  esph_ptr = (esph_t *)(udpha + 1);
2571 2570          }
2572 2571  
2573 2572          esph_ptr->esph_spi = assoc->ipsa_spi;
2574 2573  
2575 2574          esph_ptr->esph_replay = htonl(atomic_inc_32_nv(&assoc->ipsa_replay));
2576 2575          if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) {
2577 2576                  /*
2578 2577                   * XXX We have replay counter wrapping.
2579 2578                   * We probably want to nuke this SA (and its peer).
2580 2579                   */
2581 2580                  ipsec_assocfailure(info.mi_idnum, 0, 0,
2582 2581                      SL_ERROR | SL_CONSOLE | SL_WARN,
2583 2582                      "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n",
2584 2583                      esph_ptr->esph_spi, assoc->ipsa_dstaddr, af,
2585 2584                      espstack->ipsecesp_netstack);
2586 2585  
2587 2586                  ESP_BUMP_STAT(espstack, out_discards);
2588 2587                  sadb_replay_delete(assoc);
2589 2588                  ip_drop_packet(data_mp, B_FALSE, ill,
2590 2589                      DROPPER(ipss, ipds_esp_replay),
2591 2590                      &espstack->esp_dropper);
2592 2591                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2593 2592                  if (need_refrele)
2594 2593                          ixa_refrele(ixa);
2595 2594                  return (NULL);
2596 2595          }
2597 2596  
2598 2597          iv_ptr = (esph_ptr + 1);
2599 2598          /*
2600 2599           * iv_ptr points to the mblk which will contain the IV once we have
2601 2600           * written it there. This mblk will be part of a mblk chain that
2602 2601           * will make up the packet.
2603 2602           *
2604 2603           * For counter mode algorithms, the IV is a 64 bit quantity, it
2605 2604           * must NEVER repeat in the lifetime of the SA, otherwise an
2606 2605           * attacker who had recorded enough packets might be able to
2607 2606           * determine some clear text.
2608 2607           *
2609 2608           * To ensure this does not happen, the IV is stored in the SA and
2610 2609           * incremented for each packet, the IV is then copied into the
2611 2610           * "packet" for transmission to the receiving system. The IV will
2612 2611           * also be copied into the nonce, when the packet is encrypted.
2613 2612           *
2614 2613           * CBC mode algorithms use a random IV for each packet. We do not
2615 2614           * require the highest quality random bits, but for best security
2616 2615           * with CBC mode ciphers, the value must be unlikely to repeat and
2617 2616           * must not be known in advance to an adversary capable of influencing
2618 2617           * the clear text.
2619 2618           */
2620 2619          if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc,
2621 2620              espstack)) {
2622 2621                  ip_drop_packet(data_mp, B_FALSE, ill,
2623 2622                      DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper);
2624 2623                  if (need_refrele)
2625 2624                          ixa_refrele(ixa);
2626 2625                  return (NULL);
2627 2626          }
2628 2627  
2629 2628          /* Fix the IP header. */
2630 2629          alloclen = padlen + 2 + mac_len;
2631 2630          adj = alloclen + (espmp->b_wptr - espmp->b_rptr);
2632 2631  
2633 2632          protocol = *nhp;
2634 2633  
2635 2634          if (ixa->ixa_flags & IXAF_IS_IPV4) {
2636 2635                  ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj);
2637 2636                  if (is_natt) {
2638 2637                          *nhp = IPPROTO_UDP;
2639 2638                          udpha->uha_length = htons(ntohs(ipha->ipha_length) -
2640 2639                              IPH_HDR_LENGTH(ipha));
2641 2640                  } else {
2642 2641                          *nhp = IPPROTO_ESP;
2643 2642                  }
2644 2643                  ipha->ipha_hdr_checksum = 0;
2645 2644                  ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
2646 2645          } else {
2647 2646                  ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj);
2648 2647                  *nhp = IPPROTO_ESP;
2649 2648          }
2650 2649  
2651 2650          /* I've got the two ESP mblks, now insert them. */
2652 2651  
2653 2652          esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n"));
2654 2653          esp2dbg(espstack, (dump_msg(data_mp)));
2655 2654  
2656 2655          if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) {
2657 2656                  ESP_BUMP_STAT(espstack, out_discards);
2658 2657                  /* NOTE:  esp_insert_esp() only fails if there's no memory. */
2659 2658                  ip_drop_packet(data_mp, B_FALSE, ill,
2660 2659                      DROPPER(ipss, ipds_esp_nomem),
2661 2660                      &espstack->esp_dropper);
2662 2661                  freeb(espmp);
2663 2662                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2664 2663                  if (need_refrele)
2665 2664                          ixa_refrele(ixa);
2666 2665                  return (NULL);
2667 2666          }
2668 2667  
2669 2668          /* Append padding (and leave room for ICV). */
2670 2669          for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont)
2671 2670                  ;
2672 2671          if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) {
2673 2672                  tailmp->b_cont = allocb(alloclen, BPRI_HI);
2674 2673                  if (tailmp->b_cont == NULL) {
2675 2674                          ESP_BUMP_STAT(espstack, out_discards);
2676 2675                          esp0dbg(("esp_outbound:  Can't allocate tailmp.\n"));
2677 2676                          ip_drop_packet(data_mp, B_FALSE, ill,
2678 2677                              DROPPER(ipss, ipds_esp_nomem),
2679 2678                              &espstack->esp_dropper);
2680 2679                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2681 2680                          if (need_refrele)
2682 2681                                  ixa_refrele(ixa);
2683 2682                          return (NULL);
2684 2683                  }
2685 2684                  tailmp = tailmp->b_cont;
2686 2685          }
2687 2686  
2688 2687          /*
2689 2688           * If there's padding, N bytes of padding must be of the form 0x1,
2690 2689           * 0x2, 0x3... 0xN.
2691 2690           */
2692 2691          for (i = 0; i < padlen; ) {
2693 2692                  i++;
2694 2693                  *tailmp->b_wptr++ = i;
2695 2694          }
2696 2695          *tailmp->b_wptr++ = i;
2697 2696          *tailmp->b_wptr++ = protocol;
2698 2697  
2699 2698          esp2dbg(espstack, ("data_Mp before encryption:\n"));
2700 2699          esp2dbg(espstack, (dump_msg(data_mp)));
2701 2700  
2702 2701          /*
2703 2702           * Okay.  I've set up the pre-encryption ESP.  Let's do it!
2704 2703           */
2705 2704  
2706 2705          if (mac_len > 0) {
2707 2706                  ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim);
2708 2707                  icv_buf = tailmp->b_wptr;
2709 2708                  tailmp->b_wptr += mac_len;
2710 2709          } else {
2711 2710                  icv_buf = NULL;
2712 2711          }
2713 2712  
2714 2713          data_mp = esp_submit_req_outbound(data_mp, ixa, assoc, icv_buf,
2715 2714              datalen + padlen + 2);
2716 2715          if (need_refrele)
2717 2716                  ixa_refrele(ixa);
2718 2717          return (data_mp);
2719 2718  }
2720 2719  
2721 2720  /*
2722 2721   * IP calls this to validate the ICMP errors that
2723 2722   * we got from the network.
2724 2723   */
2725 2724  mblk_t *
2726 2725  ipsecesp_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira)
2727 2726  {
2728 2727          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
2729 2728          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2730 2729          ipsec_stack_t   *ipss = ns->netstack_ipsec;
2731 2730  
2732 2731          /*
2733 2732           * Unless we get an entire packet back, this function is useless.
2734 2733           * Why?
2735 2734           *
2736 2735           * 1.)  Partial packets are useless, because the "next header"
2737 2736           *      is at the end of the decrypted ESP packet.  Without the
2738 2737           *      whole packet, this is useless.
2739 2738           *
2740 2739           * 2.)  If we every use a stateful cipher, such as a stream or a
2741 2740           *      one-time pad, we can't do anything.
2742 2741           *
2743 2742           * Since the chances of us getting an entire packet back are very
2744 2743           * very small, we discard here.
2745 2744           */
2746 2745          IP_ESP_BUMP_STAT(ipss, in_discards);
2747 2746          ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
2748 2747              DROPPER(ipss, ipds_esp_icmp),
2749 2748              &espstack->esp_dropper);
2750 2749          return (NULL);
2751 2750  }
2752 2751  
2753 2752  /*
2754 2753   * Construct an SADB_REGISTER message with the current algorithms.
2755 2754   * This function gets called when 'ipsecalgs -s' is run or when
2756 2755   * in.iked (or other KMD) starts.
2757 2756   */
2758 2757  static boolean_t
2759 2758  esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
2760 2759      ipsecesp_stack_t *espstack, cred_t *cr)
2761 2760  {
2762 2761          mblk_t *pfkey_msg_mp, *keysock_out_mp;
2763 2762          sadb_msg_t *samsg;
2764 2763          sadb_supported_t *sasupp_auth = NULL;
2765 2764          sadb_supported_t *sasupp_encr = NULL;
2766 2765          sadb_alg_t *saalg;
2767 2766          uint_t allocsize = sizeof (*samsg);
2768 2767          uint_t i, numalgs_snap;
2769 2768          int current_aalgs;
2770 2769          ipsec_alginfo_t **authalgs;
2771 2770          uint_t num_aalgs;
2772 2771          int current_ealgs;
2773 2772          ipsec_alginfo_t **encralgs;
2774 2773          uint_t num_ealgs;
2775 2774          ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
2776 2775          sadb_sens_t *sens;
2777 2776          size_t sens_len = 0;
2778 2777          sadb_ext_t *nextext;
2779 2778          ts_label_t *sens_tsl = NULL;
2780 2779  
2781 2780          /* Allocate the KEYSOCK_OUT. */
2782 2781          keysock_out_mp = sadb_keysock_out(serial);
2783 2782          if (keysock_out_mp == NULL) {
2784 2783                  esp0dbg(("esp_register_out: couldn't allocate mblk.\n"));
2785 2784                  return (B_FALSE);
2786 2785          }
2787 2786  
2788 2787          if (is_system_labeled() && (cr != NULL)) {
2789 2788                  sens_tsl = crgetlabel(cr);
2790 2789                  if (sens_tsl != NULL) {
2791 2790                          sens_len = sadb_sens_len_from_label(sens_tsl);
2792 2791                          allocsize += sens_len;
2793 2792                  }
2794 2793          }
2795 2794  
2796 2795          /*
2797 2796           * Allocate the PF_KEY message that follows KEYSOCK_OUT.
2798 2797           */
2799 2798  
2800 2799          rw_enter(&ipss->ipsec_alg_lock, RW_READER);
2801 2800          /*
2802 2801           * Fill SADB_REGISTER message's algorithm descriptors.  Hold
2803 2802           * down the lock while filling it.
2804 2803           *
2805 2804           * Return only valid algorithms, so the number of algorithms
2806 2805           * to send up may be less than the number of algorithm entries
2807 2806           * in the table.
2808 2807           */
2809 2808          authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
2810 2809          for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
2811 2810                  if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
2812 2811                          num_aalgs++;
2813 2812  
2814 2813          if (num_aalgs != 0) {
2815 2814                  allocsize += (num_aalgs * sizeof (*saalg));
2816 2815                  allocsize += sizeof (*sasupp_auth);
2817 2816          }
2818 2817          encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR];
2819 2818          for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
2820 2819                  if (encralgs[i] != NULL && ALG_VALID(encralgs[i]))
2821 2820                          num_ealgs++;
2822 2821  
2823 2822          if (num_ealgs != 0) {
2824 2823                  allocsize += (num_ealgs * sizeof (*saalg));
2825 2824                  allocsize += sizeof (*sasupp_encr);
2826 2825          }
2827 2826          keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI);
2828 2827          if (keysock_out_mp->b_cont == NULL) {
2829 2828                  rw_exit(&ipss->ipsec_alg_lock);
2830 2829                  freemsg(keysock_out_mp);
2831 2830                  return (B_FALSE);
2832 2831          }
2833 2832          pfkey_msg_mp = keysock_out_mp->b_cont;
2834 2833          pfkey_msg_mp->b_wptr += allocsize;
2835 2834  
2836 2835          nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg));
2837 2836  
2838 2837          if (num_aalgs != 0) {
2839 2838                  sasupp_auth = (sadb_supported_t *)nextext;
2840 2839                  saalg = (sadb_alg_t *)(sasupp_auth + 1);
2841 2840  
2842 2841                  ASSERT(((ulong_t)saalg & 0x7) == 0);
2843 2842  
2844 2843                  numalgs_snap = 0;
2845 2844                  for (i = 0;
2846 2845                      ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
2847 2846                      i++) {
2848 2847                          if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
2849 2848                                  continue;
2850 2849  
2851 2850                          saalg->sadb_alg_id = authalgs[i]->alg_id;
2852 2851                          saalg->sadb_alg_ivlen = 0;
2853 2852                          saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits;
2854 2853                          saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits;
2855 2854                          saalg->sadb_x_alg_increment =
2856 2855                              authalgs[i]->alg_increment;
2857 2856                          saalg->sadb_x_alg_saltbits = SADB_8TO1(
2858 2857                              authalgs[i]->alg_saltlen);
2859 2858                          numalgs_snap++;
2860 2859                          saalg++;
2861 2860                  }
2862 2861                  ASSERT(numalgs_snap == num_aalgs);
2863 2862  #ifdef DEBUG
2864 2863                  /*
2865 2864                   * Reality check to make sure I snagged all of the
2866 2865                   * algorithms.
2867 2866                   */
2868 2867                  for (; i < IPSEC_MAX_ALGS; i++) {
2869 2868                          if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) {
2870 2869                                  cmn_err(CE_PANIC, "esp_register_out()! "
2871 2870                                      "Missed aalg #%d.\n", i);
2872 2871                          }
2873 2872                  }
2874 2873  #endif /* DEBUG */
2875 2874                  nextext = (sadb_ext_t *)saalg;
2876 2875          }
2877 2876  
2878 2877          if (num_ealgs != 0) {
2879 2878                  sasupp_encr = (sadb_supported_t *)nextext;
2880 2879                  saalg = (sadb_alg_t *)(sasupp_encr + 1);
2881 2880  
2882 2881                  numalgs_snap = 0;
2883 2882                  for (i = 0;
2884 2883                      ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) {
2885 2884                          if (encralgs[i] == NULL || !ALG_VALID(encralgs[i]))
2886 2885                                  continue;
2887 2886                          saalg->sadb_alg_id = encralgs[i]->alg_id;
2888 2887                          saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen;
2889 2888                          saalg->sadb_alg_minbits = encralgs[i]->alg_ef_minbits;
2890 2889                          saalg->sadb_alg_maxbits = encralgs[i]->alg_ef_maxbits;
2891 2890                          /*
2892 2891                           * We could advertise the ICV length, except there
2893 2892                           * is not a value in sadb_x_algb to do this.
2894 2893                           * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen;
2895 2894                           */
2896 2895                          saalg->sadb_x_alg_increment =
2897 2896                              encralgs[i]->alg_increment;
2898 2897                          saalg->sadb_x_alg_saltbits =
2899 2898                              SADB_8TO1(encralgs[i]->alg_saltlen);
2900 2899  
2901 2900                          numalgs_snap++;
2902 2901                          saalg++;
2903 2902                  }
2904 2903                  ASSERT(numalgs_snap == num_ealgs);
2905 2904  #ifdef DEBUG
2906 2905                  /*
2907 2906                   * Reality check to make sure I snagged all of the
2908 2907                   * algorithms.
2909 2908                   */
2910 2909                  for (; i < IPSEC_MAX_ALGS; i++) {
2911 2910                          if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) {
2912 2911                                  cmn_err(CE_PANIC, "esp_register_out()! "
2913 2912                                      "Missed ealg #%d.\n", i);
2914 2913                          }
2915 2914                  }
2916 2915  #endif /* DEBUG */
2917 2916                  nextext = (sadb_ext_t *)saalg;
2918 2917          }
2919 2918  
2920 2919          current_aalgs = num_aalgs;
2921 2920          current_ealgs = num_ealgs;
2922 2921  
2923 2922          rw_exit(&ipss->ipsec_alg_lock);
2924 2923  
2925 2924          if (sens_tsl != NULL) {
2926 2925                  sens = (sadb_sens_t *)nextext;
2927 2926                  sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
2928 2927                      sens_tsl, sens_len);
2929 2928  
2930 2929                  nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len);
2931 2930          }
2932 2931  
2933 2932          /* Now fill the rest of the SADB_REGISTER message. */
2934 2933  
2935 2934          samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr;
2936 2935          samsg->sadb_msg_version = PF_KEY_V2;
2937 2936          samsg->sadb_msg_type = SADB_REGISTER;
2938 2937          samsg->sadb_msg_errno = 0;
2939 2938          samsg->sadb_msg_satype = SADB_SATYPE_ESP;
2940 2939          samsg->sadb_msg_len = SADB_8TO64(allocsize);
2941 2940          samsg->sadb_msg_reserved = 0;
2942 2941          /*
2943 2942           * Assume caller has sufficient sequence/pid number info.  If it's one
2944 2943           * from me over a new alg., I could give two hoots about sequence.
2945 2944           */
2946 2945          samsg->sadb_msg_seq = sequence;
2947 2946          samsg->sadb_msg_pid = pid;
2948 2947  
2949 2948          if (sasupp_auth != NULL) {
2950 2949                  sasupp_auth->sadb_supported_len = SADB_8TO64(
2951 2950                      sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs);
2952 2951                  sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
2953 2952                  sasupp_auth->sadb_supported_reserved = 0;
2954 2953          }
2955 2954  
2956 2955          if (sasupp_encr != NULL) {
2957 2956                  sasupp_encr->sadb_supported_len = SADB_8TO64(
2958 2957                      sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs);
2959 2958                  sasupp_encr->sadb_supported_exttype =
2960 2959                      SADB_EXT_SUPPORTED_ENCRYPT;
2961 2960                  sasupp_encr->sadb_supported_reserved = 0;
2962 2961          }
2963 2962  
2964 2963          if (espstack->esp_pfkey_q != NULL)
2965 2964                  putnext(espstack->esp_pfkey_q, keysock_out_mp);
2966 2965          else {
2967 2966                  freemsg(keysock_out_mp);
2968 2967                  return (B_FALSE);
2969 2968          }
2970 2969  
2971 2970          return (B_TRUE);
2972 2971  }
2973 2972  
2974 2973  /*
2975 2974   * Invoked when the algorithm table changes. Causes SADB_REGISTER
2976 2975   * messages continaining the current list of algorithms to be
2977 2976   * sent up to the ESP listeners.
2978 2977   */
2979 2978  void
2980 2979  ipsecesp_algs_changed(netstack_t *ns)
2981 2980  {
2982 2981          ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
2983 2982  
2984 2983          /*
2985 2984           * Time to send a PF_KEY SADB_REGISTER message to ESP listeners
2986 2985           * everywhere.  (The function itself checks for NULL esp_pfkey_q.)
2987 2986           */
2988 2987          (void) esp_register_out(0, 0, 0, espstack, NULL);
2989 2988  }
2990 2989  
2991 2990  /*
2992 2991   * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
2993 2992   * and send() it into ESP and IP again.
2994 2993   */
2995 2994  static void
2996 2995  inbound_task(void *arg)
2997 2996  {
2998 2997          mblk_t          *mp = (mblk_t *)arg;
2999 2998          mblk_t          *async_mp;
3000 2999          ip_recv_attr_t  iras;
3001 3000  
3002 3001          async_mp = mp;
3003 3002          mp = async_mp->b_cont;
3004 3003          async_mp->b_cont = NULL;
3005 3004          if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
3006 3005                  /* The ill or ip_stack_t disappeared on us */
3007 3006                  ip_drop_input("ip_recv_attr_from_mblk", mp, NULL);
3008 3007                  freemsg(mp);
3009 3008                  goto done;
3010 3009          }
3011 3010  
3012 3011          esp_inbound_restart(mp, &iras);
3013 3012  done:
3014 3013          ira_cleanup(&iras, B_TRUE);
3015 3014  }
3016 3015  
3017 3016  /*
3018 3017   * Restart ESP after the SA has been added.
3019 3018   */
3020 3019  static void
3021 3020  esp_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira)
3022 3021  {
3023 3022          esph_t          *esph;
3024 3023          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
3025 3024          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3026 3025  
3027 3026          esp2dbg(espstack, ("in ESP inbound_task"));
3028 3027          ASSERT(espstack != NULL);
3029 3028  
3030 3029          mp = ipsec_inbound_esp_sa(mp, ira, &esph);
3031 3030          if (mp == NULL)
3032 3031                  return;
3033 3032  
3034 3033          ASSERT(esph != NULL);
3035 3034          ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3036 3035          ASSERT(ira->ira_ipsec_esp_sa != NULL);
3037 3036  
3038 3037          mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, ira);
3039 3038          if (mp == NULL) {
3040 3039                  /*
3041 3040                   * Either it failed or is pending. In the former case
3042 3041                   * ipIfStatsInDiscards was increased.
3043 3042                   */
3044 3043                  return;
3045 3044          }
3046 3045  
3047 3046          ip_input_post_ipsec(mp, ira);
3048 3047  }
3049 3048  
3050 3049  /*
3051 3050   * Now that weak-key passed, actually ADD the security association, and
3052 3051   * send back a reply ADD message.
3053 3052   */
3054 3053  static int
3055 3054  esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
3056 3055      int *diagnostic, ipsecesp_stack_t *espstack)
3057 3056  {
3058 3057          isaf_t *primary = NULL, *secondary;
3059 3058          boolean_t clone = B_FALSE, is_inbound = B_FALSE;
3060 3059          ipsa_t *larval = NULL;
3061 3060          ipsacq_t *acqrec;
3062 3061          iacqf_t *acq_bucket;
3063 3062          mblk_t *acq_msgs = NULL;
3064 3063          int rc;
3065 3064          mblk_t *lpkt;
3066 3065          int error;
3067 3066          ipsa_query_t sq;
3068 3067          ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
3069 3068  
3070 3069          /*
3071 3070           * Locate the appropriate table(s).
3072 3071           */
3073 3072          sq.spp = &espstack->esp_sadb;   /* XXX */
3074 3073          error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST,
3075 3074              IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
3076 3075              &sq, diagnostic);
3077 3076          if (error)
3078 3077                  return (error);
3079 3078  
3080 3079          /*
3081 3080           * Use the direction flags provided by the KMD to determine
3082 3081           * if the inbound or outbound table should be the primary
3083 3082           * for this SA. If these flags were absent then make this
3084 3083           * decision based on the addresses.
3085 3084           */
3086 3085          if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) {
3087 3086                  primary = sq.inbound;
3088 3087                  secondary = sq.outbound;
3089 3088                  is_inbound = B_TRUE;
3090 3089                  if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND)
3091 3090                          clone = B_TRUE;
3092 3091          } else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) {
3093 3092                  primary = sq.outbound;
3094 3093                  secondary = sq.inbound;
3095 3094          }
3096 3095  
3097 3096          if (primary == NULL) {
3098 3097                  /*
3099 3098                   * The KMD did not set a direction flag, determine which
3100 3099                   * table to insert the SA into based on addresses.
3101 3100                   */
3102 3101                  switch (ksi->ks_in_dsttype) {
3103 3102                  case KS_IN_ADDR_MBCAST:
3104 3103                          clone = B_TRUE; /* All mcast SAs can be bidirectional */
3105 3104                          sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
3106 3105                          /* FALLTHRU */
3107 3106                  /*
3108 3107                   * If the source address is either one of mine, or unspecified
3109 3108                   * (which is best summed up by saying "not 'not mine'"),
3110 3109                   * then the association is potentially bi-directional,
3111 3110                   * in that it can be used for inbound traffic and outbound
3112 3111                   * traffic.  The best example of such an SA is a multicast
3113 3112                   * SA (which allows me to receive the outbound traffic).
3114 3113                   */
3115 3114                  case KS_IN_ADDR_ME:
3116 3115                          sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
3117 3116                          primary = sq.inbound;
3118 3117                          secondary = sq.outbound;
3119 3118                          if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
3120 3119                                  clone = B_TRUE;
3121 3120                          is_inbound = B_TRUE;
3122 3121                          break;
3123 3122                  /*
3124 3123                   * If the source address literally not mine (either
3125 3124                   * unspecified or not mine), then this SA may have an
3126 3125                   * address that WILL be mine after some configuration.
3127 3126                   * We pay the price for this by making it a bi-directional
3128 3127                   * SA.
3129 3128                   */
3130 3129                  case KS_IN_ADDR_NOTME:
3131 3130                          sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
3132 3131                          primary = sq.outbound;
3133 3132                          secondary = sq.inbound;
3134 3133                          if (ksi->ks_in_srctype != KS_IN_ADDR_ME) {
3135 3134                                  sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
3136 3135                                  clone = B_TRUE;
3137 3136                          }
3138 3137                          break;
3139 3138                  default:
3140 3139                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
3141 3140                          return (EINVAL);
3142 3141                  }
3143 3142          }
3144 3143  
3145 3144          /*
3146 3145           * Find a ACQUIRE list entry if possible.  If we've added an SA that
3147 3146           * suits the needs of an ACQUIRE list entry, we can eliminate the
3148 3147           * ACQUIRE list entry and transmit the enqueued packets.  Use the
3149 3148           * high-bit of the sequence number to queue it.  Key off destination
3150 3149           * addr, and change acqrec's state.
3151 3150           */
3152 3151  
3153 3152          if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
3154 3153                  acq_bucket = &(sq.sp->sdb_acq[sq.outhash]);
3155 3154                  mutex_enter(&acq_bucket->iacqf_lock);
3156 3155                  for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
3157 3156                      acqrec = acqrec->ipsacq_next) {
3158 3157                          mutex_enter(&acqrec->ipsacq_lock);
3159 3158                          /*
3160 3159                           * Q:  I only check sequence.  Should I check dst?
3161 3160                           * A: Yes, check dest because those are the packets
3162 3161                           *    that are queued up.
3163 3162                           */
3164 3163                          if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
3165 3164                              IPSA_ARE_ADDR_EQUAL(sq.dstaddr,
3166 3165                              acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
3167 3166                                  break;
3168 3167                          mutex_exit(&acqrec->ipsacq_lock);
3169 3168                  }
3170 3169                  if (acqrec != NULL) {
3171 3170                          /*
3172 3171                           * AHA!  I found an ACQUIRE record for this SA.
3173 3172                           * Grab the msg list, and free the acquire record.
3174 3173                           * I already am holding the lock for this record,
3175 3174                           * so all I have to do is free it.
3176 3175                           */
3177 3176                          acq_msgs = acqrec->ipsacq_mp;
3178 3177                          acqrec->ipsacq_mp = NULL;
3179 3178                          mutex_exit(&acqrec->ipsacq_lock);
3180 3179                          sadb_destroy_acquire(acqrec,
3181 3180                              espstack->ipsecesp_netstack);
3182 3181                  }
3183 3182                  mutex_exit(&acq_bucket->iacqf_lock);
3184 3183          }
3185 3184  
3186 3185          /*
3187 3186           * Find PF_KEY message, and see if I'm an update.  If so, find entry
3188 3187           * in larval list (if there).
3189 3188           */
3190 3189          if (samsg->sadb_msg_type == SADB_UPDATE) {
3191 3190                  mutex_enter(&sq.inbound->isaf_lock);
3192 3191                  larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi,
3193 3192                      ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family);
3194 3193                  mutex_exit(&sq.inbound->isaf_lock);
3195 3194  
3196 3195                  if ((larval == NULL) ||
3197 3196                      (larval->ipsa_state != IPSA_STATE_LARVAL)) {
3198 3197                          *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
3199 3198                          if (larval != NULL) {
3200 3199                                  IPSA_REFRELE(larval);
3201 3200                          }
3202 3201                          esp0dbg(("Larval update, but larval disappeared.\n"));
3203 3202                          return (ESRCH);
3204 3203                  } /* Else sadb_common_add unlinks it for me! */
3205 3204          }
3206 3205  
3207 3206          if (larval != NULL) {
3208 3207                  /*
3209 3208                   * Hold again, because sadb_common_add() consumes a reference,
3210 3209                   * and we don't want to clear_lpkt() without a reference.
3211 3210                   */
3212 3211                  IPSA_REFHOLD(larval);
3213 3212          }
3214 3213  
3215 3214          rc = sadb_common_add(espstack->esp_pfkey_q,
3216 3215              mp, samsg, ksi, primary, secondary, larval, clone, is_inbound,
3217 3216              diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb);
3218 3217  
3219 3218          if (larval != NULL) {
3220 3219                  if (rc == 0) {
3221 3220                          lpkt = sadb_clear_lpkt(larval);
3222 3221                          if (lpkt != NULL) {
3223 3222                                  rc = !taskq_dispatch(esp_taskq, inbound_task,
3224 3223                                      lpkt, TQ_NOSLEEP);
3225 3224                          }
3226 3225                  }
3227 3226                  IPSA_REFRELE(larval);
3228 3227          }
3229 3228  
3230 3229          /*
3231 3230           * How much more stack will I create with all of these
3232 3231           * esp_outbound() calls?
3233 3232           */
3234 3233  
3235 3234          /* Handle the packets queued waiting for the SA */
3236 3235          while (acq_msgs != NULL) {
3237 3236                  mblk_t          *asyncmp;
3238 3237                  mblk_t          *data_mp;
3239 3238                  ip_xmit_attr_t  ixas;
3240 3239                  ill_t           *ill;
3241 3240  
3242 3241                  asyncmp = acq_msgs;
3243 3242                  acq_msgs = acq_msgs->b_next;
3244 3243                  asyncmp->b_next = NULL;
3245 3244  
3246 3245                  /*
3247 3246                   * Extract the ip_xmit_attr_t from the first mblk.
3248 3247                   * Verifies that the netstack and ill is still around; could
3249 3248                   * have vanished while iked was doing its work.
3250 3249                   * On succesful return we have a nce_t and the ill/ipst can't
3251 3250                   * disappear until we do the nce_refrele in ixa_cleanup.
3252 3251                   */
3253 3252                  data_mp = asyncmp->b_cont;
3254 3253                  asyncmp->b_cont = NULL;
3255 3254                  if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) {
3256 3255                          ESP_BUMP_STAT(espstack, out_discards);
3257 3256                          ip_drop_packet(data_mp, B_FALSE, NULL,
3258 3257                              DROPPER(ipss, ipds_sadb_acquire_timeout),
3259 3258                              &espstack->esp_dropper);
3260 3259                  } else if (rc != 0) {
3261 3260                          ill = ixas.ixa_nce->nce_ill;
3262 3261                          ESP_BUMP_STAT(espstack, out_discards);
3263 3262                          ip_drop_packet(data_mp, B_FALSE, ill,
3264 3263                              DROPPER(ipss, ipds_sadb_acquire_timeout),
3265 3264                              &espstack->esp_dropper);
3266 3265                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3267 3266                  } else {
3268 3267                          esp_outbound_finish(data_mp, &ixas);
3269 3268                  }
3270 3269                  ixa_cleanup(&ixas);
3271 3270          }
3272 3271  
3273 3272          return (rc);
3274 3273  }
3275 3274  
3276 3275  /*
3277 3276   * Process one of the queued messages (from ipsacq_mp) once the SA
3278 3277   * has been added.
3279 3278   */
3280 3279  static void
3281 3280  esp_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa)
3282 3281  {
3283 3282          netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
3284 3283          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3285 3284          ipsec_stack_t   *ipss = ns->netstack_ipsec;
3286 3285          ill_t           *ill = ixa->ixa_nce->nce_ill;
3287 3286  
3288 3287          if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_ESP)) {
3289 3288                  ESP_BUMP_STAT(espstack, out_discards);
3290 3289                  ip_drop_packet(data_mp, B_FALSE, ill,
3291 3290                      DROPPER(ipss, ipds_sadb_acquire_timeout),
3292 3291                      &espstack->esp_dropper);
3293 3292                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3294 3293                  return;
3295 3294          }
3296 3295  
3297 3296          data_mp = esp_outbound(data_mp, ixa);
3298 3297          if (data_mp == NULL)
3299 3298                  return;
3300 3299  
3301 3300          /* do AH processing if needed */
3302 3301          data_mp = esp_do_outbound_ah(data_mp, ixa);
3303 3302          if (data_mp == NULL)
3304 3303                  return;
3305 3304  
3306 3305          (void) ip_output_post_ipsec(data_mp, ixa);
3307 3306  }
3308 3307  
3309 3308  /*
3310 3309   * Add new ESP security association.  This may become a generic AH/ESP
3311 3310   * routine eventually.
3312 3311   */
3313 3312  static int
3314 3313  esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns)
3315 3314  {
3316 3315          sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3317 3316          sadb_address_t *srcext =
3318 3317              (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3319 3318          sadb_address_t *dstext =
3320 3319              (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3321 3320          sadb_address_t *isrcext =
3322 3321              (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
3323 3322          sadb_address_t *idstext =
3324 3323              (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
3325 3324          sadb_address_t *nttext_loc =
3326 3325              (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC];
3327 3326          sadb_address_t *nttext_rem =
3328 3327              (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM];
3329 3328          sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3330 3329          sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3331 3330          struct sockaddr_in *src, *dst;
3332 3331          struct sockaddr_in *natt_loc, *natt_rem;
3333 3332          struct sockaddr_in6 *natt_loc6, *natt_rem6;
3334 3333          sadb_lifetime_t *soft =
3335 3334              (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3336 3335          sadb_lifetime_t *hard =
3337 3336              (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3338 3337          sadb_lifetime_t *idle =
3339 3338              (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
3340 3339          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3341 3340          ipsec_stack_t   *ipss = ns->netstack_ipsec;
3342 3341  
3343 3342  
3344 3343  
3345 3344          /* I need certain extensions present for an ADD message. */
3346 3345          if (srcext == NULL) {
3347 3346                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3348 3347                  return (EINVAL);
3349 3348          }
3350 3349          if (dstext == NULL) {
3351 3350                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3352 3351                  return (EINVAL);
3353 3352          }
3354 3353          if (isrcext == NULL && idstext != NULL) {
3355 3354                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3356 3355                  return (EINVAL);
3357 3356          }
3358 3357          if (isrcext != NULL && idstext == NULL) {
3359 3358                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
3360 3359                  return (EINVAL);
3361 3360          }
3362 3361          if (assoc == NULL) {
3363 3362                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3364 3363                  return (EINVAL);
3365 3364          }
3366 3365          if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) {
3367 3366                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY;
3368 3367                  return (EINVAL);
3369 3368          }
3370 3369  
3371 3370          src = (struct sockaddr_in *)(srcext + 1);
3372 3371          dst = (struct sockaddr_in *)(dstext + 1);
3373 3372          natt_loc = (struct sockaddr_in *)(nttext_loc + 1);
3374 3373          natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1);
3375 3374          natt_rem = (struct sockaddr_in *)(nttext_rem + 1);
3376 3375          natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1);
3377 3376  
3378 3377          /* Sundry ADD-specific reality checks. */
3379 3378          /* XXX STATS :  Logging/stats here? */
3380 3379  
3381 3380          if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) &&
3382 3381              (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
3383 3382                  *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
3384 3383                  return (EINVAL);
3385 3384          }
3386 3385          if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) {
3387 3386                  *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3388 3387                  return (EINVAL);
3389 3388          }
3390 3389  
3391 3390  #ifndef IPSEC_LATENCY_TEST
3392 3391          if (assoc->sadb_sa_encrypt == SADB_EALG_NULL &&
3393 3392              assoc->sadb_sa_auth == SADB_AALG_NONE) {
3394 3393                  *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3395 3394                  return (EINVAL);
3396 3395          }
3397 3396  #endif
3398 3397  
3399 3398          if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) {
3400 3399                  *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
3401 3400                  return (EINVAL);
3402 3401          }
3403 3402  
3404 3403          if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
3405 3404                  return (EINVAL);
3406 3405          }
3407 3406          ASSERT(src->sin_family == dst->sin_family);
3408 3407  
3409 3408          if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) {
3410 3409                  if (nttext_loc == NULL) {
3411 3410                          *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3412 3411                          return (EINVAL);
3413 3412                  }
3414 3413  
3415 3414                  if (natt_loc->sin_family == AF_INET6 &&
3416 3415                      !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) {
3417 3416                          *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC;
3418 3417                          return (EINVAL);
3419 3418                  }
3420 3419          }
3421 3420  
3422 3421          if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) {
3423 3422                  if (nttext_rem == NULL) {
3424 3423                          *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3425 3424                          return (EINVAL);
3426 3425                  }
3427 3426                  if (natt_rem->sin_family == AF_INET6 &&
3428 3427                      !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) {
3429 3428                          *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM;
3430 3429                          return (EINVAL);
3431 3430                  }
3432 3431          }
3433 3432  
3434 3433  
3435 3434          /* Stuff I don't support, for now.  XXX Diagnostic? */
3436 3435          if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL)
3437 3436                  return (EOPNOTSUPP);
3438 3437  
3439 3438          if ((*diagnostic = sadb_labelchk(ksi)) != 0)
3440 3439                  return (EINVAL);
3441 3440  
3442 3441          /*
3443 3442           * XXX Policy :  I'm not checking identities at this time,
3444 3443           * but if I did, I'd do them here, before I sent
3445 3444           * the weak key check up to the algorithm.
3446 3445           */
3447 3446  
3448 3447          rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3449 3448  
3450 3449          /*
3451 3450           * First locate the authentication algorithm.
3452 3451           */
3453 3452  #ifdef IPSEC_LATENCY_TEST
3454 3453          if (akey != NULL && assoc->sadb_sa_auth != SADB_AALG_NONE) {
3455 3454  #else
3456 3455          if (akey != NULL) {
3457 3456  #endif
3458 3457                  ipsec_alginfo_t *aalg;
3459 3458  
3460 3459                  aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3461 3460                      [assoc->sadb_sa_auth];
3462 3461                  if (aalg == NULL || !ALG_VALID(aalg)) {
3463 3462                          rw_exit(&ipss->ipsec_alg_lock);
3464 3463                          esp1dbg(espstack, ("Couldn't find auth alg #%d.\n",
3465 3464                              assoc->sadb_sa_auth));
3466 3465                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3467 3466                          return (EINVAL);
3468 3467                  }
3469 3468  
3470 3469                  /*
3471 3470                   * Sanity check key sizes.
3472 3471                   * Note: It's not possible to use SADB_AALG_NONE because
3473 3472                   * this auth_alg is not defined with ALG_FLAG_VALID. If this
3474 3473                   * ever changes, the same check for SADB_AALG_NONE and
3475 3474                   * a auth_key != NULL should be made here ( see below).
3476 3475                   */
3477 3476                  if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) {
3478 3477                          rw_exit(&ipss->ipsec_alg_lock);
3479 3478                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
3480 3479                          return (EINVAL);
3481 3480                  }
3482 3481                  ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3483 3482  
3484 3483                  /* check key and fix parity if needed */
3485 3484                  if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE,
3486 3485                      diagnostic) != 0) {
3487 3486                          rw_exit(&ipss->ipsec_alg_lock);
3488 3487                          return (EINVAL);
3489 3488                  }
3490 3489          }
3491 3490  
3492 3491          /*
3493 3492           * Then locate the encryption algorithm.
3494 3493           */
3495 3494          if (ekey != NULL) {
3496 3495                  uint_t keybits;
3497 3496                  ipsec_alginfo_t *ealg;
3498 3497  
3499 3498                  ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3500 3499                      [assoc->sadb_sa_encrypt];
3501 3500                  if (ealg == NULL || !ALG_VALID(ealg)) {
3502 3501                          rw_exit(&ipss->ipsec_alg_lock);
3503 3502                          esp1dbg(espstack, ("Couldn't find encr alg #%d.\n",
3504 3503                              assoc->sadb_sa_encrypt));
3505 3504                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3506 3505                          return (EINVAL);
3507 3506                  }
3508 3507  
3509 3508                  /*
3510 3509                   * Sanity check key sizes. If the encryption algorithm is
3511 3510                   * SADB_EALG_NULL but the encryption key is NOT
3512 3511                   * NULL then complain.
3513 3512                   *
3514 3513                   * The keying material includes salt bits if required by
3515 3514                   * algorithm and optionally the Initial IV, check the
3516 3515                   * length of whats left.
3517 3516                   */
3518 3517                  keybits = ekey->sadb_key_bits;
3519 3518                  keybits -= ekey->sadb_key_reserved;
3520 3519                  keybits -= SADB_8TO1(ealg->alg_saltlen);
3521 3520                  if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) ||
3522 3521                      (!ipsec_valid_key_size(keybits, ealg))) {
3523 3522                          rw_exit(&ipss->ipsec_alg_lock);
3524 3523                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
3525 3524                          return (EINVAL);
3526 3525                  }
3527 3526                  ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3528 3527  
3529 3528                  /* check key */
3530 3529                  if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE,
3531 3530                      diagnostic) != 0) {
3532 3531                          rw_exit(&ipss->ipsec_alg_lock);
3533 3532                          return (EINVAL);
3534 3533                  }
3535 3534          }
3536 3535          rw_exit(&ipss->ipsec_alg_lock);
3537 3536  
3538 3537          return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
3539 3538              diagnostic, espstack));
3540 3539  }
3541 3540  
3542 3541  /*
3543 3542   * Update a security association.  Updates come in two varieties.  The first
3544 3543   * is an update of lifetimes on a non-larval SA.  The second is an update of
3545 3544   * a larval SA, which ends up looking a lot more like an add.
3546 3545   */
3547 3546  static int
3548 3547  esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
3549 3548      ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
3550 3549  {
3551 3550          sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3552 3551          mblk_t    *buf_pkt;
3553 3552          int rcode;
3554 3553  
3555 3554          sadb_address_t *dstext =
3556 3555              (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3557 3556  
3558 3557          if (dstext == NULL) {
3559 3558                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3560 3559                  return (EINVAL);
3561 3560          }
3562 3561  
3563 3562          rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb,
3564 3563              diagnostic, espstack->esp_pfkey_q, esp_add_sa,
3565 3564              espstack->ipsecesp_netstack, sadb_msg_type);
3566 3565  
3567 3566          if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) ||
3568 3567              (rcode != 0)) {
3569 3568                  return (rcode);
3570 3569          }
3571 3570  
3572 3571          HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec,
3573 3572              espstack->esp_dropper, buf_pkt);
3574 3573  
3575 3574          return (rcode);
3576 3575  }
3577 3576  
3578 3577  /* XXX refactor me */
3579 3578  /*
3580 3579   * Delete a security association.  This is REALLY likely to be code common to
3581 3580   * both AH and ESP.  Find the association, then unlink it.
3582 3581   */
3583 3582  static int
3584 3583  esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
3585 3584      ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
3586 3585  {
3587 3586          sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3588 3587          sadb_address_t *dstext =
3589 3588              (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3590 3589          sadb_address_t *srcext =
3591 3590              (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3592 3591          struct sockaddr_in *sin;
3593 3592  
3594 3593          if (assoc == NULL) {
3595 3594                  if (dstext != NULL) {
3596 3595                          sin = (struct sockaddr_in *)(dstext + 1);
3597 3596                  } else if (srcext != NULL) {
3598 3597                          sin = (struct sockaddr_in *)(srcext + 1);
3599 3598                  } else {
3600 3599                          *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3601 3600                          return (EINVAL);
3602 3601                  }
3603 3602                  return (sadb_purge_sa(mp, ksi,
3604 3603                      (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 :
3605 3604                      &espstack->esp_sadb.s_v4, diagnostic,
3606 3605                      espstack->esp_pfkey_q));
3607 3606          }
3608 3607  
3609 3608          return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic,
3610 3609              espstack->esp_pfkey_q, sadb_msg_type));
3611 3610  }
3612 3611  
3613 3612  /* XXX refactor me */
3614 3613  /*
3615 3614   * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP
3616 3615   * messages.
3617 3616   */
3618 3617  static void
3619 3618  esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
3620 3619  {
3621 3620          int error;
3622 3621          sadb_msg_t *samsg;
3623 3622  
3624 3623          /*
3625 3624           * Dump each fanout, bailing if error is non-zero.
3626 3625           */
3627 3626  
3628 3627          error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
3629 3628              &espstack->esp_sadb.s_v4);
3630 3629          if (error != 0)
3631 3630                  goto bail;
3632 3631  
3633 3632          error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
3634 3633              &espstack->esp_sadb.s_v6);
3635 3634  bail:
3636 3635          ASSERT(mp->b_cont != NULL);
3637 3636          samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3638 3637          samsg->sadb_msg_errno = (uint8_t)error;
3639 3638          sadb_pfkey_echo(espstack->esp_pfkey_q, mp,
3640 3639              (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL);
3641 3640  }
3642 3641  
3643 3642  /*
3644 3643   * First-cut reality check for an inbound PF_KEY message.
3645 3644   */
3646 3645  static boolean_t
3647 3646  esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi,
3648 3647      ipsecesp_stack_t *espstack)
3649 3648  {
3650 3649          int diagnostic;
3651 3650  
3652 3651          if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
3653 3652                  diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
3654 3653                  goto badmsg;
3655 3654          }
3656 3655          if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
3657 3656              ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
3658 3657                  diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
3659 3658                  goto badmsg;
3660 3659          }
3661 3660          return (B_FALSE);       /* False ==> no failures */
3662 3661  
3663 3662  badmsg:
3664 3663          sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
3665 3664              ksi->ks_in_serial);
3666 3665          return (B_TRUE);        /* True ==> failures */
3667 3666  }
3668 3667  
3669 3668  /*
3670 3669   * ESP parsing of PF_KEY messages.  Keysock did most of the really silly
3671 3670   * error cases.  What I receive is a fully-formed, syntactically legal
3672 3671   * PF_KEY message.  I then need to check semantics...
3673 3672   *
3674 3673   * This code may become common to AH and ESP.  Stay tuned.
3675 3674   *
3676 3675   * I also make the assumption that db_ref's are cool.  If this assumption
3677 3676   * is wrong, this means that someone other than keysock or me has been
3678 3677   * mucking with PF_KEY messages.
3679 3678   */
3680 3679  static void
3681 3680  esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack)
3682 3681  {
3683 3682          mblk_t *msg = mp->b_cont;
3684 3683          sadb_msg_t *samsg;
3685 3684          keysock_in_t *ksi;
3686 3685          int error;
3687 3686          int diagnostic = SADB_X_DIAGNOSTIC_NONE;
3688 3687  
3689 3688          ASSERT(msg != NULL);
3690 3689  
3691 3690          samsg = (sadb_msg_t *)msg->b_rptr;
3692 3691          ksi = (keysock_in_t *)mp->b_rptr;
3693 3692  
3694 3693          /*
3695 3694           * If applicable, convert unspecified AF_INET6 to unspecified
3696 3695           * AF_INET.  And do other address reality checks.
3697 3696           */
3698 3697          if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp,
3699 3698              espstack->ipsecesp_netstack) ||
3700 3699              esp_pfkey_reality_failures(mp, ksi, espstack)) {
3701 3700                  return;
3702 3701          }
3703 3702  
3704 3703          switch (samsg->sadb_msg_type) {
3705 3704          case SADB_ADD:
3706 3705                  error = esp_add_sa(mp, ksi, &diagnostic,
3707 3706                      espstack->ipsecesp_netstack);
3708 3707                  if (error != 0) {
3709 3708                          sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3710 3709                              diagnostic, ksi->ks_in_serial);
3711 3710                  }
3712 3711                  /* else esp_add_sa() took care of things. */
3713 3712                  break;
3714 3713          case SADB_DELETE:
3715 3714          case SADB_X_DELPAIR:
3716 3715          case SADB_X_DELPAIR_STATE:
3717 3716                  error = esp_del_sa(mp, ksi, &diagnostic, espstack,
3718 3717                      samsg->sadb_msg_type);
3719 3718                  if (error != 0) {
3720 3719                          sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3721 3720                              diagnostic, ksi->ks_in_serial);
3722 3721                  }
3723 3722                  /* Else esp_del_sa() took care of things. */
3724 3723                  break;
3725 3724          case SADB_GET:
3726 3725                  error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb,
3727 3726                      &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type);
3728 3727                  if (error != 0) {
3729 3728                          sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3730 3729                              diagnostic, ksi->ks_in_serial);
3731 3730                  }
3732 3731                  /* Else sadb_get_sa() took care of things. */
3733 3732                  break;
3734 3733          case SADB_FLUSH:
3735 3734                  sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack);
3736 3735                  sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL);
3737 3736                  break;
3738 3737          case SADB_REGISTER:
3739 3738                  /*
3740 3739                   * Hmmm, let's do it!  Check for extensions (there should
3741 3740                   * be none), extract the fields, call esp_register_out(),
3742 3741                   * then either free or report an error.
3743 3742                   *
3744 3743                   * Keysock takes care of the PF_KEY bookkeeping for this.
3745 3744                   */
3746 3745                  if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
3747 3746                      ksi->ks_in_serial, espstack, msg_getcred(mp, NULL))) {
3748 3747                          freemsg(mp);
3749 3748                  } else {
3750 3749                          /*
3751 3750                           * Only way this path hits is if there is a memory
3752 3751                           * failure.  It will not return B_FALSE because of
3753 3752                           * lack of esp_pfkey_q if I am in wput().
3754 3753                           */
3755 3754                          sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM,
3756 3755                              diagnostic, ksi->ks_in_serial);
3757 3756                  }
3758 3757                  break;
3759 3758          case SADB_UPDATE:
3760 3759          case SADB_X_UPDATEPAIR:
3761 3760                  /*
3762 3761                   * Find a larval, if not there, find a full one and get
3763 3762                   * strict.
3764 3763                   */
3765 3764                  error = esp_update_sa(mp, ksi, &diagnostic, espstack,
3766 3765                      samsg->sadb_msg_type);
3767 3766                  if (error != 0) {
3768 3767                          sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3769 3768                              diagnostic, ksi->ks_in_serial);
3770 3769                  }
3771 3770                  /* else esp_update_sa() took care of things. */
3772 3771                  break;
3773 3772          case SADB_GETSPI:
3774 3773                  /*
3775 3774                   * Reserve a new larval entry.
3776 3775                   */
3777 3776                  esp_getspi(mp, ksi, espstack);
3778 3777                  break;
3779 3778          case SADB_ACQUIRE:
3780 3779                  /*
3781 3780                   * Find larval and/or ACQUIRE record and kill it (them), I'm
3782 3781                   * most likely an error.  Inbound ACQUIRE messages should only
3783 3782                   * have the base header.
3784 3783                   */
3785 3784                  sadb_in_acquire(samsg, &espstack->esp_sadb,
3786 3785                      espstack->esp_pfkey_q, espstack->ipsecesp_netstack);
3787 3786                  freemsg(mp);
3788 3787                  break;
3789 3788          case SADB_DUMP:
3790 3789                  /*
3791 3790                   * Dump all entries.
3792 3791                   */
3793 3792                  esp_dump(mp, ksi, espstack);
3794 3793                  /* esp_dump will take care of the return message, etc. */
3795 3794                  break;
3796 3795          case SADB_EXPIRE:
3797 3796                  /* Should never reach me. */
3798 3797                  sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP,
3799 3798                      diagnostic, ksi->ks_in_serial);
3800 3799                  break;
3801 3800          default:
3802 3801                  sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL,
3803 3802                      SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
3804 3803                  break;
3805 3804          }
3806 3805  }
3807 3806  
3808 3807  /*
3809 3808   * Handle case where PF_KEY says it can't find a keysock for one of my
3810 3809   * ACQUIRE messages.
3811 3810   */
3812 3811  static void
3813 3812  esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack)
3814 3813  {
3815 3814          sadb_msg_t *samsg;
3816 3815          keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
3817 3816  
3818 3817          if (mp->b_cont == NULL) {
3819 3818                  freemsg(mp);
3820 3819                  return;
3821 3820          }
3822 3821          samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3823 3822  
3824 3823          /*
3825 3824           * If keysock can't find any registered, delete the acquire record
3826 3825           * immediately, and handle errors.
3827 3826           */
3828 3827          if (samsg->sadb_msg_type == SADB_ACQUIRE) {
3829 3828                  samsg->sadb_msg_errno = kse->ks_err_errno;
3830 3829                  samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
3831 3830                  /*
3832 3831                   * Use the write-side of the esp_pfkey_q
3833 3832                   */
3834 3833                  sadb_in_acquire(samsg, &espstack->esp_sadb,
3835 3834                      WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack);
3836 3835          }
3837 3836  
3838 3837          freemsg(mp);
3839 3838  }
3840 3839  
3841 3840  /*
3842 3841   * ESP module write put routine.
3843 3842   */
3844 3843  static void
3845 3844  ipsecesp_wput(queue_t *q, mblk_t *mp)
3846 3845  {
3847 3846          ipsec_info_t *ii;
3848 3847          struct iocblk *iocp;
3849 3848          ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
3850 3849  
3851 3850          esp3dbg(espstack, ("In esp_wput().\n"));
3852 3851  
3853 3852          /* NOTE: Each case must take care of freeing or passing mp. */
3854 3853          switch (mp->b_datap->db_type) {
3855 3854          case M_CTL:
3856 3855                  if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
3857 3856                          /* Not big enough message. */
3858 3857                          freemsg(mp);
3859 3858                          break;
3860 3859                  }
3861 3860                  ii = (ipsec_info_t *)mp->b_rptr;
3862 3861  
3863 3862                  switch (ii->ipsec_info_type) {
3864 3863                  case KEYSOCK_OUT_ERR:
3865 3864                          esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n"));
3866 3865                          esp_keysock_no_socket(mp, espstack);
3867 3866                          break;
3868 3867                  case KEYSOCK_IN:
3869 3868                          ESP_BUMP_STAT(espstack, keysock_in);
3870 3869                          esp3dbg(espstack, ("Got KEYSOCK_IN message.\n"));
3871 3870  
3872 3871                          /* Parse the message. */
3873 3872                          esp_parse_pfkey(mp, espstack);
3874 3873                          break;
3875 3874                  case KEYSOCK_HELLO:
3876 3875                          sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp,
3877 3876                              esp_ager, (void *)espstack, &espstack->esp_event,
3878 3877                              SADB_SATYPE_ESP);
3879 3878                          break;
3880 3879                  default:
3881 3880                          esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n",
3882 3881                              ii->ipsec_info_type));
3883 3882                          freemsg(mp);
3884 3883                          break;
3885 3884                  }
3886 3885                  break;
3887 3886          case M_IOCTL:
3888 3887                  iocp = (struct iocblk *)mp->b_rptr;
3889 3888                  switch (iocp->ioc_cmd) {
3890 3889                  case ND_SET:
3891 3890                  case ND_GET:
3892 3891                          if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) {
3893 3892                                  qreply(q, mp);
3894 3893                                  return;
3895 3894                          } else {
3896 3895                                  iocp->ioc_error = ENOENT;
3897 3896                          }
3898 3897                          /* FALLTHRU */
3899 3898                  default:
3900 3899                          /* We really don't support any other ioctls, do we? */
3901 3900  
3902 3901                          /* Return EINVAL */
3903 3902                          if (iocp->ioc_error != ENOENT)
3904 3903                                  iocp->ioc_error = EINVAL;
3905 3904                          iocp->ioc_count = 0;
3906 3905                          mp->b_datap->db_type = M_IOCACK;
3907 3906                          qreply(q, mp);
3908 3907                          return;
3909 3908                  }
3910 3909          default:
3911 3910                  esp3dbg(espstack,
3912 3911                      ("Got default message, type %d, passing to IP.\n",
3913 3912                      mp->b_datap->db_type));
3914 3913                  putnext(q, mp);
3915 3914          }
3916 3915  }
3917 3916  
3918 3917  /*
3919 3918   * Wrapper to allow IP to trigger an ESP association failure message
3920 3919   * during inbound SA selection.
3921 3920   */
3922 3921  void
3923 3922  ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
3924 3923      uint32_t spi, void *addr, int af, ip_recv_attr_t *ira)
3925 3924  {
3926 3925          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
3927 3926          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3928 3927          ipsec_stack_t   *ipss = ns->netstack_ipsec;
3929 3928  
3930 3929          if (espstack->ipsecesp_log_unknown_spi) {
3931 3930                  ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
3932 3931                      addr, af, espstack->ipsecesp_netstack);
3933 3932          }
3934 3933  
3935 3934          ip_drop_packet(mp, B_TRUE, ira->ira_ill,
3936 3935              DROPPER(ipss, ipds_esp_no_sa),
3937 3936              &espstack->esp_dropper);
3938 3937  }
3939 3938  
3940 3939  /*
3941 3940   * Initialize the ESP input and output processing functions.
3942 3941   */
3943 3942  void
3944 3943  ipsecesp_init_funcs(ipsa_t *sa)
3945 3944  {
3946 3945          if (sa->ipsa_output_func == NULL)
3947 3946                  sa->ipsa_output_func = esp_outbound;
3948 3947          if (sa->ipsa_input_func == NULL)
3949 3948                  sa->ipsa_input_func = esp_inbound;
3950 3949  }
  
    | 
      ↓ open down ↓ | 
    3772 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX