Print this page
    
Bayard's initial drop, needs finishing, or at least testing.
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/ip/ipsecesp.c
          +++ new/usr/src/uts/common/inet/ip/ipsecesp.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
       24 + * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
  24   25   */
  25   26  
  26   27  #include <sys/types.h>
  27   28  #include <sys/stream.h>
  28   29  #include <sys/stropts.h>
  29   30  #include <sys/errno.h>
  30   31  #include <sys/strlog.h>
  31   32  #include <sys/tihdr.h>
  32   33  #include <sys/socket.h>
  33   34  #include <sys/ddi.h>
  34   35  #include <sys/sunddi.h>
  35   36  #include <sys/kmem.h>
  36   37  #include <sys/zone.h>
  37   38  #include <sys/sysmacros.h>
  38   39  #include <sys/cmn_err.h>
  39   40  #include <sys/vtrace.h>
  40   41  #include <sys/debug.h>
  41   42  #include <sys/atomic.h>
  42   43  #include <sys/strsun.h>
  43   44  #include <sys/random.h>
  44   45  #include <netinet/in.h>
  45   46  #include <net/if.h>
  46   47  #include <netinet/ip6.h>
  47   48  #include <net/pfkeyv2.h>
  48   49  #include <net/pfpolicy.h>
  49   50  
  50   51  #include <inet/common.h>
  51   52  #include <inet/mi.h>
  52   53  #include <inet/nd.h>
  53   54  #include <inet/ip.h>
  54   55  #include <inet/ip_impl.h>
  55   56  #include <inet/ip6.h>
  56   57  #include <inet/ip_if.h>
  57   58  #include <inet/ip_ndp.h>
  58   59  #include <inet/sadb.h>
  59   60  #include <inet/ipsec_info.h>
  60   61  #include <inet/ipsec_impl.h>
  61   62  #include <inet/ipsecesp.h>
  62   63  #include <inet/ipdrop.h>
  63   64  #include <inet/tcp.h>
  64   65  #include <sys/kstat.h>
  65   66  #include <sys/policy.h>
  66   67  #include <sys/strsun.h>
  67   68  #include <sys/strsubr.h>
  68   69  #include <inet/udp_impl.h>
  69   70  #include <sys/taskq.h>
  70   71  #include <sys/note.h>
  71   72  
  72   73  #include <sys/tsol/tnet.h>
  73   74  
  74   75  /*
  75   76   * Table of ND variables supported by ipsecesp. These are loaded into
  76   77   * ipsecesp_g_nd in ipsecesp_init_nd.
  77   78   * All of these are alterable, within the min/max values given, at run time.
  78   79   */
  79   80  static  ipsecespparam_t lcl_param_arr[] = {
  80   81          /* min  max                     value   name */
  81   82          { 0,    3,                      0,      "ipsecesp_debug"},
  82   83          { 125,  32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"},
  83   84          { 1,    10,                     1,      "ipsecesp_reap_delay"},
  84   85          { 1,    SADB_MAX_REPLAY,        64,     "ipsecesp_replay_size"},
  85   86          { 1,    300,                    15,     "ipsecesp_acquire_timeout"},
  86   87          { 1,    1800,                   90,     "ipsecesp_larval_timeout"},
  87   88          /* Default lifetime values for ACQUIRE messages. */
  
    | 
      ↓ open down ↓ | 
    54 lines elided | 
    
      ↑ open up ↑ | 
  
  88   89          { 0,    0xffffffffU,    0,      "ipsecesp_default_soft_bytes"},
  89   90          { 0,    0xffffffffU,    0,      "ipsecesp_default_hard_bytes"},
  90   91          { 0,    0xffffffffU,    24000,  "ipsecesp_default_soft_addtime"},
  91   92          { 0,    0xffffffffU,    28800,  "ipsecesp_default_hard_addtime"},
  92   93          { 0,    0xffffffffU,    0,      "ipsecesp_default_soft_usetime"},
  93   94          { 0,    0xffffffffU,    0,      "ipsecesp_default_hard_usetime"},
  94   95          { 0,    1,              0,      "ipsecesp_log_unknown_spi"},
  95   96          { 0,    2,              1,      "ipsecesp_padding_check"},
  96   97          { 0,    600,            20,     "ipsecesp_nat_keepalive_interval"},
  97   98  };
  98      -#define ipsecesp_debug  ipsecesp_params[0].ipsecesp_param_value
  99      -#define ipsecesp_age_interval ipsecesp_params[1].ipsecesp_param_value
 100      -#define ipsecesp_age_int_max    ipsecesp_params[1].ipsecesp_param_max
 101      -#define ipsecesp_reap_delay     ipsecesp_params[2].ipsecesp_param_value
 102      -#define ipsecesp_replay_size    ipsecesp_params[3].ipsecesp_param_value
 103      -#define ipsecesp_acquire_timeout        \
 104      -        ipsecesp_params[4].ipsecesp_param_value
 105      -#define ipsecesp_larval_timeout \
 106      -        ipsecesp_params[5].ipsecesp_param_value
 107      -#define ipsecesp_default_soft_bytes     \
 108      -        ipsecesp_params[6].ipsecesp_param_value
 109      -#define ipsecesp_default_hard_bytes     \
 110      -        ipsecesp_params[7].ipsecesp_param_value
 111      -#define ipsecesp_default_soft_addtime   \
 112      -        ipsecesp_params[8].ipsecesp_param_value
 113      -#define ipsecesp_default_hard_addtime   \
 114      -        ipsecesp_params[9].ipsecesp_param_value
 115      -#define ipsecesp_default_soft_usetime   \
 116      -        ipsecesp_params[10].ipsecesp_param_value
 117      -#define ipsecesp_default_hard_usetime   \
 118      -        ipsecesp_params[11].ipsecesp_param_value
 119      -#define ipsecesp_log_unknown_spi        \
 120      -        ipsecesp_params[12].ipsecesp_param_value
 121      -#define ipsecesp_padding_check  \
 122      -        ipsecesp_params[13].ipsecesp_param_value
 123   99  /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */
 124  100  
 125  101  #define esp0dbg(a)      printf a
 126  102  /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
 127  103  #define esp1dbg(espstack, a)    if (espstack->ipsecesp_debug != 0) printf a
 128  104  #define esp2dbg(espstack, a)    if (espstack->ipsecesp_debug > 1) printf a
 129  105  #define esp3dbg(espstack, a)    if (espstack->ipsecesp_debug > 2) printf a
 130  106  
 131  107  static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *);
 132  108  static int ipsecesp_close(queue_t *);
 133  109  static void ipsecesp_wput(queue_t *, mblk_t *);
 134  110  static void     *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns);
 135  111  static void     ipsecesp_stack_fini(netstackid_t stackid, void *arg);
 136      -static void esp_send_acquire(ipsacq_t *, mblk_t *, netstack_t *);
 137  112  
 138  113  static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *);
 139  114  static void esp_outbound_finish(mblk_t *, ip_xmit_attr_t *);
 140  115  static void esp_inbound_restart(mblk_t *, ip_recv_attr_t *);
 141  116  
 142  117  static boolean_t esp_register_out(uint32_t, uint32_t, uint_t,
 143  118      ipsecesp_stack_t *, cred_t *);
 144  119  static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t,
 145  120      kstat_named_t **, ipsecesp_stack_t *);
 146  121  static mblk_t *esp_submit_req_inbound(mblk_t *, ip_recv_attr_t *,
 147  122      ipsa_t *, uint_t);
 148  123  static mblk_t *esp_submit_req_outbound(mblk_t *, ip_xmit_attr_t *,
 149  124      ipsa_t *, uchar_t *, uint_t);
 150  125  
 151  126  /* Setable in /etc/system */
 152  127  uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE;
 153  128  
 154  129  static struct module_info info = {
 155  130          5137, "ipsecesp", 0, INFPSZ, 65536, 1024
 156  131  };
 157  132  
 158  133  static struct qinit rinit = {
 159  134          (pfi_t)putnext, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
 160  135          NULL
 161  136  };
 162  137  
 163  138  static struct qinit winit = {
 164  139          (pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
 165  140          NULL
 166  141  };
 167  142  
 168  143  struct streamtab ipsecespinfo = {
 169  144          &rinit, &winit, NULL, NULL
 170  145  };
 171  146  
 172  147  static taskq_t *esp_taskq;
 173  148  
  
    | 
      ↓ open down ↓ | 
    27 lines elided | 
    
      ↑ open up ↑ | 
  
 174  149  /*
 175  150   * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now.
 176  151   *
 177  152   * Question:    Do I need this, given that all instance's esps->esps_wq point
 178  153   *              to IP?
 179  154   *
 180  155   * Answer:      Yes, because I need to know which queue is BOUND to
 181  156   *              IPPROTO_ESP
 182  157   */
 183  158  
 184      -/*
 185      - * Stats.  This may eventually become a full-blown SNMP MIB once that spec
 186      - * stabilizes.
 187      - */
 188      -
 189      -typedef struct esp_kstats_s {
 190      -        kstat_named_t esp_stat_num_aalgs;
 191      -        kstat_named_t esp_stat_good_auth;
 192      -        kstat_named_t esp_stat_bad_auth;
 193      -        kstat_named_t esp_stat_bad_padding;
 194      -        kstat_named_t esp_stat_replay_failures;
 195      -        kstat_named_t esp_stat_replay_early_failures;
 196      -        kstat_named_t esp_stat_keysock_in;
 197      -        kstat_named_t esp_stat_out_requests;
 198      -        kstat_named_t esp_stat_acquire_requests;
 199      -        kstat_named_t esp_stat_bytes_expired;
 200      -        kstat_named_t esp_stat_out_discards;
 201      -        kstat_named_t esp_stat_crypto_sync;
 202      -        kstat_named_t esp_stat_crypto_async;
 203      -        kstat_named_t esp_stat_crypto_failures;
 204      -        kstat_named_t esp_stat_num_ealgs;
 205      -        kstat_named_t esp_stat_bad_decrypt;
 206      -        kstat_named_t esp_stat_sa_port_renumbers;
 207      -} esp_kstats_t;
 208      -
 209      -/*
 210      - * espstack->esp_kstats is equal to espstack->esp_ksp->ks_data if
 211      - * kstat_create_netstack for espstack->esp_ksp succeeds, but when it
 212      - * fails, it will be NULL. Note this is done for all stack instances,
 213      - * so it *could* fail. hence a non-NULL checking is done for
 214      - * ESP_BUMP_STAT and ESP_DEBUMP_STAT
 215      - */
 216      -#define ESP_BUMP_STAT(espstack, x)                                      \
 217      -do {                                                                    \
 218      -        if (espstack->esp_kstats != NULL)                               \
 219      -                (espstack->esp_kstats->esp_stat_ ## x).value.ui64++;    \
 220      -_NOTE(CONSTCOND)                                                        \
 221      -} while (0)
 222      -
 223      -#define ESP_DEBUMP_STAT(espstack, x)                                    \
 224      -do {                                                                    \
 225      -        if (espstack->esp_kstats != NULL)                               \
 226      -                (espstack->esp_kstats->esp_stat_ ## x).value.ui64--;    \
 227      -_NOTE(CONSTCOND)                                                        \
 228      -} while (0)
 229      -
 230  159  static int      esp_kstat_update(kstat_t *, int);
 231  160  
 232  161  static boolean_t
 233  162  esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid)
 234  163  {
 235  164          espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat",
 236  165              "net", KSTAT_TYPE_NAMED,
 237  166              sizeof (esp_kstats_t) / sizeof (kstat_named_t),
 238  167              KSTAT_FLAG_PERSISTENT, stackid);
 239  168  
 240  169          if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL)
 241  170                  return (B_FALSE);
 242  171  
 243  172          espstack->esp_kstats = espstack->esp_ksp->ks_data;
 244  173  
 245  174          espstack->esp_ksp->ks_update = esp_kstat_update;
 246  175          espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid;
 247  176  
 248  177  #define K64 KSTAT_DATA_UINT64
 249  178  #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64)
 250  179  
 251  180          KI(num_aalgs);
 252  181          KI(num_ealgs);
 253  182          KI(good_auth);
 254  183          KI(bad_auth);
 255  184          KI(bad_padding);
 256  185          KI(replay_failures);
 257  186          KI(replay_early_failures);
 258  187          KI(keysock_in);
 259  188          KI(out_requests);
 260  189          KI(acquire_requests);
 261  190          KI(bytes_expired);
 262  191          KI(out_discards);
 263  192          KI(crypto_sync);
 264  193          KI(crypto_async);
 265  194          KI(crypto_failures);
 266  195          KI(bad_decrypt);
 267  196          KI(sa_port_renumbers);
 268  197  
 269  198  #undef KI
 270  199  #undef K64
 271  200  
 272  201          kstat_install(espstack->esp_ksp);
 273  202  
 274  203          return (B_TRUE);
 275  204  }
 276  205  
 277  206  static int
 278  207  esp_kstat_update(kstat_t *kp, int rw)
 279  208  {
 280  209          esp_kstats_t *ekp;
 281  210          netstackid_t    stackid = (zoneid_t)(uintptr_t)kp->ks_private;
 282  211          netstack_t      *ns;
 283  212          ipsec_stack_t   *ipss;
 284  213  
 285  214          if ((kp == NULL) || (kp->ks_data == NULL))
 286  215                  return (EIO);
 287  216  
 288  217          if (rw == KSTAT_WRITE)
 289  218                  return (EACCES);
 290  219  
  
    | 
      ↓ open down ↓ | 
    51 lines elided | 
    
      ↑ open up ↑ | 
  
 291  220          ns = netstack_find_by_stackid(stackid);
 292  221          if (ns == NULL)
 293  222                  return (-1);
 294  223          ipss = ns->netstack_ipsec;
 295  224          if (ipss == NULL) {
 296  225                  netstack_rele(ns);
 297  226                  return (-1);
 298  227          }
 299  228          ekp = (esp_kstats_t *)kp->ks_data;
 300  229  
 301      -        mutex_enter(&ipss->ipsec_alg_lock);
      230 +        rw_enter(&ipss->ipsec_alg_lock, RW_READER);
 302  231          ekp->esp_stat_num_aalgs.value.ui64 =
 303  232              ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
 304  233          ekp->esp_stat_num_ealgs.value.ui64 =
 305  234              ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
 306      -        mutex_exit(&ipss->ipsec_alg_lock);
      235 +        rw_exit(&ipss->ipsec_alg_lock);
 307  236  
 308  237          netstack_rele(ns);
 309  238          return (0);
 310  239  }
 311  240  
 312  241  #ifdef DEBUG
 313  242  /*
 314  243   * Debug routine, useful to see pre-encryption data.
 315  244   */
 316  245  static char *
 317  246  dump_msg(mblk_t *mp)
 318  247  {
 319  248          char tmp_str[3], tmp_line[256];
 320  249  
 321  250          while (mp != NULL) {
 322  251                  unsigned char *ptr;
 323  252  
 324  253                  printf("mblk address 0x%p, length %ld, db_ref %d "
 325  254                      "type %d, base 0x%p, lim 0x%p\n",
 326  255                      (void *) mp, (long)(mp->b_wptr - mp->b_rptr),
 327  256                      mp->b_datap->db_ref, mp->b_datap->db_type,
 328  257                      (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim);
 329  258                  ptr = mp->b_rptr;
 330  259  
 331  260                  tmp_line[0] = '\0';
 332  261                  while (ptr < mp->b_wptr) {
 333  262                          uint_t diff;
 334  263  
 335  264                          diff = (ptr - mp->b_rptr);
 336  265                          if (!(diff & 0x1f)) {
 337  266                                  if (strlen(tmp_line) > 0) {
 338  267                                          printf("bytes: %s\n", tmp_line);
 339  268                                          tmp_line[0] = '\0';
 340  269                                  }
 341  270                          }
 342  271                          if (!(diff & 0x3))
 343  272                                  (void) strcat(tmp_line, " ");
 344  273                          (void) sprintf(tmp_str, "%02x", *ptr);
 345  274                          (void) strcat(tmp_line, tmp_str);
 346  275                          ptr++;
 347  276                  }
 348  277                  if (strlen(tmp_line) > 0)
 349  278                          printf("bytes: %s\n", tmp_line);
 350  279  
 351  280                  mp = mp->b_cont;
 352  281          }
 353  282  
 354  283          return ("\n");
 355  284  }
 356  285  
 357  286  #else /* DEBUG */
 358  287  static char *
 359  288  dump_msg(mblk_t *mp)
 360  289  {
 361  290          printf("Find value of mp %p.\n", mp);
 362  291          return ("\n");
 363  292  }
 364  293  #endif /* DEBUG */
 365  294  
 366  295  /*
 367  296   * Don't have to lock age_interval, as only one thread will access it at
 368  297   * a time, because I control the one function that does with timeout().
 369  298   */
 370  299  static void
 371  300  esp_ager(void *arg)
 372  301  {
 373  302          ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
 374  303          netstack_t      *ns = espstack->ipsecesp_netstack;
 375  304          hrtime_t begin = gethrtime();
 376  305  
 377  306          sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q,
 378  307              espstack->ipsecesp_reap_delay, ns);
 379  308          sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q,
 380  309              espstack->ipsecesp_reap_delay, ns);
 381  310  
 382  311          espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q,
 383  312              esp_ager, espstack,
 384  313              &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max,
 385  314              info.mi_idnum);
 386  315  }
 387  316  
 388  317  /*
 389  318   * Get an ESP NDD parameter.
 390  319   */
 391  320  /* ARGSUSED */
 392  321  static int
 393  322  ipsecesp_param_get(q, mp, cp, cr)
 394  323          queue_t *q;
 395  324          mblk_t  *mp;
 396  325          caddr_t cp;
 397  326          cred_t *cr;
 398  327  {
 399  328          ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp;
 400  329          uint_t value;
 401  330          ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
 402  331  
 403  332          mutex_enter(&espstack->ipsecesp_param_lock);
 404  333          value = ipsecesppa->ipsecesp_param_value;
 405  334          mutex_exit(&espstack->ipsecesp_param_lock);
 406  335  
 407  336          (void) mi_mpprintf(mp, "%u", value);
 408  337          return (0);
 409  338  }
 410  339  
 411  340  /*
 412  341   * This routine sets an NDD variable in a ipsecespparam_t structure.
 413  342   */
 414  343  /* ARGSUSED */
 415  344  static int
 416  345  ipsecesp_param_set(q, mp, value, cp, cr)
 417  346          queue_t *q;
 418  347          mblk_t  *mp;
 419  348          char    *value;
 420  349          caddr_t cp;
 421  350          cred_t *cr;
 422  351  {
 423  352          ulong_t new_value;
 424  353          ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp;
 425  354          ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
 426  355  
 427  356          /*
 428  357           * Fail the request if the new value does not lie within the
 429  358           * required bounds.
 430  359           */
 431  360          if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
 432  361              new_value < ipsecesppa->ipsecesp_param_min ||
 433  362              new_value > ipsecesppa->ipsecesp_param_max) {
 434  363                  return (EINVAL);
 435  364          }
 436  365  
 437  366          /* Set the new value */
 438  367          mutex_enter(&espstack->ipsecesp_param_lock);
 439  368          ipsecesppa->ipsecesp_param_value = new_value;
 440  369          mutex_exit(&espstack->ipsecesp_param_lock);
 441  370          return (0);
 442  371  }
 443  372  
 444  373  /*
 445  374   * Using lifetime NDD variables, fill in an extended combination's
 446  375   * lifetime information.
 447  376   */
 448  377  void
 449  378  ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
 450  379  {
 451  380          ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
 452  381  
 453  382          ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes;
 454  383          ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes;
 455  384          ecomb->sadb_x_ecomb_soft_addtime =
 456  385              espstack->ipsecesp_default_soft_addtime;
 457  386          ecomb->sadb_x_ecomb_hard_addtime =
 458  387              espstack->ipsecesp_default_hard_addtime;
 459  388          ecomb->sadb_x_ecomb_soft_usetime =
 460  389              espstack->ipsecesp_default_soft_usetime;
 461  390          ecomb->sadb_x_ecomb_hard_usetime =
 462  391              espstack->ipsecesp_default_hard_usetime;
 463  392  }
 464  393  
 465  394  /*
 466  395   * Initialize things for ESP at module load time.
 467  396   */
 468  397  boolean_t
 469  398  ipsecesp_ddi_init(void)
 470  399  {
 471  400          esp_taskq = taskq_create("esp_taskq", 1, minclsyspri,
 472  401              IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
 473  402  
 474  403          /*
 475  404           * We want to be informed each time a stack is created or
 476  405           * destroyed in the kernel, so we can maintain the
 477  406           * set of ipsecesp_stack_t's.
 478  407           */
 479  408          netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL,
 480  409              ipsecesp_stack_fini);
 481  410  
 482  411          return (B_TRUE);
 483  412  }
 484  413  
 485  414  /*
 486  415   * Walk through the param array specified registering each element with the
 487  416   * named dispatch handler.
 488  417   */
 489  418  static boolean_t
 490  419  ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt)
 491  420  {
 492  421          for (; cnt-- > 0; espp++) {
 493  422                  if (espp->ipsecesp_param_name != NULL &&
 494  423                      espp->ipsecesp_param_name[0]) {
 495  424                          if (!nd_load(ndp,
  
    | 
      ↓ open down ↓ | 
    179 lines elided | 
    
      ↑ open up ↑ | 
  
 496  425                              espp->ipsecesp_param_name,
 497  426                              ipsecesp_param_get, ipsecesp_param_set,
 498  427                              (caddr_t)espp)) {
 499  428                                  nd_free(ndp);
 500  429                                  return (B_FALSE);
 501  430                          }
 502  431                  }
 503  432          }
 504  433          return (B_TRUE);
 505  434  }
      435 +
 506  436  /*
 507  437   * Initialize things for ESP for each stack instance
 508  438   */
 509  439  static void *
 510  440  ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns)
 511  441  {
 512  442          ipsecesp_stack_t        *espstack;
 513  443          ipsecespparam_t         *espp;
 514  444  
 515  445          espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack),
 516  446              KM_SLEEP);
 517  447          espstack->ipsecesp_netstack = ns;
 518  448  
 519  449          espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
  
    | 
      ↓ open down ↓ | 
    4 lines elided | 
    
      ↑ open up ↑ | 
  
 520  450          espstack->ipsecesp_params = espp;
 521  451          bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr));
 522  452  
 523  453          (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp,
 524  454              A_CNT(lcl_param_arr));
 525  455  
 526  456          (void) esp_kstat_init(espstack, stackid);
 527  457  
 528  458          espstack->esp_sadb.s_acquire_timeout =
 529  459              &espstack->ipsecesp_acquire_timeout;
 530      -        espstack->esp_sadb.s_acqfn = esp_send_acquire;
 531  460          sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size,
 532  461              espstack->ipsecesp_netstack);
 533  462  
 534  463          mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0);
 535  464  
 536  465          ip_drop_register(&espstack->esp_dropper, "IPsec ESP");
 537  466          return (espstack);
 538  467  }
 539  468  
 540  469  /*
 541  470   * Destroy things for ESP at module unload time.
 542  471   */
 543  472  void
 544  473  ipsecesp_ddi_destroy(void)
 545  474  {
 546  475          netstack_unregister(NS_IPSECESP);
 547  476          taskq_destroy(esp_taskq);
 548  477  }
 549  478  
 550  479  /*
  
    | 
      ↓ open down ↓ | 
    10 lines elided | 
    
      ↑ open up ↑ | 
  
 551  480   * Destroy things for ESP for one stack instance
 552  481   */
 553  482  static void
 554  483  ipsecesp_stack_fini(netstackid_t stackid, void *arg)
 555  484  {
 556  485          ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
 557  486  
 558  487          if (espstack->esp_pfkey_q != NULL) {
 559  488                  (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event);
 560  489          }
 561      -        espstack->esp_sadb.s_acqfn = NULL;
 562  490          espstack->esp_sadb.s_acquire_timeout = NULL;
 563  491          sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack);
 564  492          ip_drop_unregister(&espstack->esp_dropper);
 565  493          mutex_destroy(&espstack->ipsecesp_param_lock);
 566  494          nd_free(&espstack->ipsecesp_g_nd);
 567  495  
 568  496          kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr));
 569  497          espstack->ipsecesp_params = NULL;
 570  498          kstat_delete_netstack(espstack->esp_ksp, stackid);
 571  499          espstack->esp_ksp = NULL;
 572  500          espstack->esp_kstats = NULL;
 573  501          kmem_free(espstack, sizeof (*espstack));
 574  502  }
 575  503  
 576  504  /*
 577  505   * ESP module open routine, which is here for keysock plumbing.
 578  506   * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old
 579  507   * Days of export control, and fears that ESP would not be allowed
 580  508   * to be shipped at all by default.  Eventually, keysock should
 581  509   * either access AH and ESP via modstubs or krtld dependencies, or
 582  510   * perhaps be folded in with AH and ESP into a single IPsec/netsec
 583  511   * module ("netsec" if PF_KEY provides more than AH/ESP keying tables).
 584  512   */
 585  513  /* ARGSUSED */
 586  514  static int
 587  515  ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 588  516  {
 589  517          netstack_t              *ns;
 590  518          ipsecesp_stack_t        *espstack;
 591  519  
 592  520          if (secpolicy_ip_config(credp, B_FALSE) != 0)
 593  521                  return (EPERM);
 594  522  
 595  523          if (q->q_ptr != NULL)
 596  524                  return (0);  /* Re-open of an already open instance. */
 597  525  
 598  526          if (sflag != MODOPEN)
 599  527                  return (EINVAL);
 600  528  
 601  529          ns = netstack_find_by_cred(credp);
 602  530          ASSERT(ns != NULL);
 603  531          espstack = ns->netstack_ipsecesp;
 604  532          ASSERT(espstack != NULL);
 605  533  
 606  534          q->q_ptr = espstack;
 607  535          WR(q)->q_ptr = q->q_ptr;
 608  536  
 609  537          qprocson(q);
 610  538          return (0);
 611  539  }
 612  540  
 613  541  /*
 614  542   * ESP module close routine.
 615  543   */
 616  544  static int
 617  545  ipsecesp_close(queue_t *q)
 618  546  {
 619  547          ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
 620  548  
 621  549          /*
 622  550           * Clean up q_ptr, if needed.
 623  551           */
 624  552          qprocsoff(q);
 625  553  
 626  554          /* Keysock queue check is safe, because of OCEXCL perimeter. */
 627  555  
 628  556          if (q == espstack->esp_pfkey_q) {
 629  557                  esp1dbg(espstack,
 630  558                      ("ipsecesp_close:  Ummm... keysock is closing ESP.\n"));
 631  559                  espstack->esp_pfkey_q = NULL;
 632  560                  /* Detach qtimeouts. */
 633  561                  (void) quntimeout(q, espstack->esp_event);
 634  562          }
 635  563  
 636  564          netstack_rele(espstack->ipsecesp_netstack);
 637  565          return (0);
 638  566  }
 639  567  
 640  568  /*
 641  569   * Add a number of bytes to what the SA has protected so far.  Return
 642  570   * B_TRUE if the SA can still protect that many bytes.
 643  571   *
 644  572   * Caller must REFRELE the passed-in assoc.  This function must REFRELE
 645  573   * any obtained peer SA.
 646  574   */
 647  575  static boolean_t
 648  576  esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
 649  577  {
 650  578          ipsa_t *inassoc, *outassoc;
 651  579          isaf_t *bucket;
 652  580          boolean_t inrc, outrc, isv6;
 653  581          sadb_t *sp;
 654  582          int outhash;
 655  583          netstack_t              *ns = assoc->ipsa_netstack;
 656  584          ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
 657  585  
 658  586          /* No peer?  No problem! */
 659  587          if (!assoc->ipsa_haspeer) {
 660  588                  return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes,
 661  589                      B_TRUE));
 662  590          }
 663  591  
 664  592          /*
 665  593           * Otherwise, we want to grab both the original assoc and its peer.
 666  594           * There might be a race for this, but if it's a real race, two
 667  595           * expire messages may occur.  We limit this by only sending the
 668  596           * expire message on one of the peers, we'll pick the inbound
 669  597           * arbitrarily.
 670  598           *
 671  599           * If we need tight synchronization on the peer SA, then we need to
 672  600           * reconsider.
 673  601           */
 674  602  
 675  603          /* Use address length to select IPv6/IPv4 */
 676  604          isv6 = (assoc->ipsa_addrfam == AF_INET6);
 677  605          sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
 678  606  
 679  607          if (inbound) {
 680  608                  inassoc = assoc;
 681  609                  if (isv6) {
 682  610                          outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
 683  611                              &inassoc->ipsa_dstaddr));
 684  612                  } else {
 685  613                          outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
 686  614                              &inassoc->ipsa_dstaddr));
 687  615                  }
 688  616                  bucket = &sp->sdb_of[outhash];
 689  617                  mutex_enter(&bucket->isaf_lock);
 690  618                  outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
 691  619                      inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
 692  620                      inassoc->ipsa_addrfam);
 693  621                  mutex_exit(&bucket->isaf_lock);
 694  622                  if (outassoc == NULL) {
 695  623                          /* Q: Do we wish to set haspeer == B_FALSE? */
 696  624                          esp0dbg(("esp_age_bytes: "
 697  625                              "can't find peer for inbound.\n"));
 698  626                          return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc,
 699  627                              bytes, B_TRUE));
 700  628                  }
 701  629          } else {
 702  630                  outassoc = assoc;
 703  631                  bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
 704  632                  mutex_enter(&bucket->isaf_lock);
 705  633                  inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
 706  634                      outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
 707  635                      outassoc->ipsa_addrfam);
 708  636                  mutex_exit(&bucket->isaf_lock);
 709  637                  if (inassoc == NULL) {
 710  638                          /* Q: Do we wish to set haspeer == B_FALSE? */
 711  639                          esp0dbg(("esp_age_bytes: "
 712  640                              "can't find peer for outbound.\n"));
 713  641                          return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc,
 714  642                              bytes, B_TRUE));
 715  643                  }
 716  644          }
 717  645  
 718  646          inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE);
 719  647          outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE);
 720  648  
 721  649          /*
 722  650           * REFRELE any peer SA.
 723  651           *
 724  652           * Because of the multi-line macro nature of IPSA_REFRELE, keep
 725  653           * them in { }.
 726  654           */
 727  655          if (inbound) {
 728  656                  IPSA_REFRELE(outassoc);
 729  657          } else {
 730  658                  IPSA_REFRELE(inassoc);
 731  659          }
 732  660  
 733  661          return (inrc && outrc);
 734  662  }
 735  663  
 736  664  /*
 737  665   * Do incoming NAT-T manipulations for packet.
 738  666   * Returns NULL if the mblk chain is consumed.
 739  667   */
 740  668  static mblk_t *
 741  669  esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc)
 742  670  {
 743  671          ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
 744  672          tcpha_t *tcpha;
 745  673          udpha_t *udpha;
 746  674          /* Initialize to our inbound cksum adjustment... */
 747  675          uint32_t sum = assoc->ipsa_inbound_cksum;
 748  676  
 749  677          switch (ipha->ipha_protocol) {
 750  678          case IPPROTO_TCP:
 751  679                  tcpha = (tcpha_t *)(data_mp->b_rptr +
 752  680                      IPH_HDR_LENGTH(ipha));
 753  681  
 754  682  #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) +       ((x) >> 16)
 755  683                  sum += ~ntohs(tcpha->tha_sum) & 0xFFFF;
 756  684                  DOWN_SUM(sum);
 757  685                  DOWN_SUM(sum);
 758  686                  tcpha->tha_sum = ~htons(sum);
 759  687                  break;
 760  688          case IPPROTO_UDP:
 761  689                  udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha));
 762  690  
 763  691                  if (udpha->uha_checksum != 0) {
 764  692                          /* Adujst if the inbound one was not zero. */
 765  693                          sum += ~ntohs(udpha->uha_checksum) & 0xFFFF;
 766  694                          DOWN_SUM(sum);
 767  695                          DOWN_SUM(sum);
 768  696                          udpha->uha_checksum = ~htons(sum);
 769  697                          if (udpha->uha_checksum == 0)
 770  698                                  udpha->uha_checksum = 0xFFFF;
 771  699                  }
 772  700  #undef DOWN_SUM
 773  701                  break;
 774  702          case IPPROTO_IP:
 775  703                  /*
 776  704                   * This case is only an issue for self-encapsulated
 777  705                   * packets.  So for now, fall through.
 778  706                   */
 779  707                  break;
 780  708          }
 781  709          return (data_mp);
 782  710  }
 783  711  
 784  712  
 785  713  /*
 786  714   * Strip ESP header, check padding, and fix IP header.
 787  715   * Returns B_TRUE on success, B_FALSE if an error occured.
 788  716   */
 789  717  static boolean_t
 790  718  esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen,
 791  719      kstat_named_t **counter, ipsecesp_stack_t *espstack)
 792  720  {
 793  721          ipha_t *ipha;
 794  722          ip6_t *ip6h;
 795  723          uint_t divpoint;
 796  724          mblk_t *scratch;
 797  725          uint8_t nexthdr, padlen;
 798  726          uint8_t lastpad;
 799  727          ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
 800  728          uint8_t *lastbyte;
 801  729  
 802  730          /*
 803  731           * Strip ESP data and fix IP header.
 804  732           *
 805  733           * XXX In case the beginning of esp_inbound() changes to not do a
 806  734           * pullup, this part of the code can remain unchanged.
 807  735           */
 808  736          if (isv4) {
 809  737                  ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t));
 810  738                  ipha = (ipha_t *)data_mp->b_rptr;
 811  739                  ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) +
 812  740                      IPH_HDR_LENGTH(ipha));
 813  741                  divpoint = IPH_HDR_LENGTH(ipha);
 814  742          } else {
 815  743                  ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t));
 816  744                  ip6h = (ip6_t *)data_mp->b_rptr;
 817  745                  divpoint = ip_hdr_length_v6(data_mp, ip6h);
 818  746          }
 819  747  
 820  748          scratch = data_mp;
 821  749          while (scratch->b_cont != NULL)
 822  750                  scratch = scratch->b_cont;
 823  751  
 824  752          ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3);
 825  753  
 826  754          /*
 827  755           * "Next header" and padding length are the last two bytes in the
 828  756           * ESP-protected datagram, thus the explicit - 1 and - 2.
 829  757           * lastpad is the last byte of the padding, which can be used for
 830  758           * a quick check to see if the padding is correct.
 831  759           */
 832  760          lastbyte = scratch->b_wptr - 1;
 833  761          nexthdr = *lastbyte--;
 834  762          padlen = *lastbyte--;
 835  763  
 836  764          if (isv4) {
 837  765                  /* Fix part of the IP header. */
 838  766                  ipha->ipha_protocol = nexthdr;
 839  767                  /*
 840  768                   * Reality check the padlen.  The explicit - 2 is for the
 841  769                   * padding length and the next-header bytes.
 842  770                   */
 843  771                  if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 -
 844  772                      sizeof (esph_t) - ivlen) {
 845  773                          ESP_BUMP_STAT(espstack, bad_decrypt);
 846  774                          ipsec_rl_strlog(espstack->ipsecesp_netstack,
 847  775                              info.mi_idnum, 0, 0,
 848  776                              SL_ERROR | SL_WARN,
 849  777                              "Corrupt ESP packet (padlen too big).\n");
 850  778                          esp1dbg(espstack, ("padlen (%d) is greater than:\n",
 851  779                              padlen));
 852  780                          esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp "
 853  781                              "hdr - ivlen(%d) = %d.\n",
 854  782                              ntohs(ipha->ipha_length), ivlen,
 855  783                              (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) -
 856  784                              2 - sizeof (esph_t) - ivlen)));
 857  785                          *counter = DROPPER(ipss, ipds_esp_bad_padlen);
 858  786                          return (B_FALSE);
 859  787                  }
 860  788  
 861  789                  /*
 862  790                   * Fix the rest of the header.  The explicit - 2 is for the
 863  791                   * padding length and the next-header bytes.
 864  792                   */
 865  793                  ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen -
 866  794                      2 - sizeof (esph_t) - ivlen);
 867  795                  ipha->ipha_hdr_checksum = 0;
 868  796                  ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
 869  797          } else {
 870  798                  if (ip6h->ip6_nxt == IPPROTO_ESP) {
 871  799                          ip6h->ip6_nxt = nexthdr;
 872  800                  } else {
 873  801                          ip_pkt_t ipp;
 874  802  
 875  803                          bzero(&ipp, sizeof (ipp));
 876  804                          (void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp,
 877  805                              NULL);
 878  806                          if (ipp.ipp_dstopts != NULL) {
 879  807                                  ipp.ipp_dstopts->ip6d_nxt = nexthdr;
 880  808                          } else if (ipp.ipp_rthdr != NULL) {
 881  809                                  ipp.ipp_rthdr->ip6r_nxt = nexthdr;
 882  810                          } else if (ipp.ipp_hopopts != NULL) {
 883  811                                  ipp.ipp_hopopts->ip6h_nxt = nexthdr;
 884  812                          } else {
 885  813                                  /* Panic a DEBUG kernel. */
 886  814                                  ASSERT(ipp.ipp_hopopts != NULL);
 887  815                                  /* Otherwise, pretend it's IP + ESP. */
 888  816                                  cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n");
 889  817                                  ip6h->ip6_nxt = nexthdr;
 890  818                          }
 891  819                  }
 892  820  
 893  821                  if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) -
 894  822                      ivlen) {
 895  823                          ESP_BUMP_STAT(espstack, bad_decrypt);
 896  824                          ipsec_rl_strlog(espstack->ipsecesp_netstack,
 897  825                              info.mi_idnum, 0, 0,
 898  826                              SL_ERROR | SL_WARN,
 899  827                              "Corrupt ESP packet (v6 padlen too big).\n");
 900  828                          esp1dbg(espstack, ("padlen (%d) is greater than:\n",
 901  829                              padlen));
 902  830                          esp1dbg(espstack,
 903  831                              ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = "
 904  832                              "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen)
 905  833                              + sizeof (ip6_t)), ivlen,
 906  834                              (unsigned)(ntohs(ip6h->ip6_plen) - 2 -
 907  835                              sizeof (esph_t) - ivlen)));
 908  836                          *counter = DROPPER(ipss, ipds_esp_bad_padlen);
 909  837                          return (B_FALSE);
 910  838                  }
 911  839  
 912  840  
 913  841                  /*
 914  842                   * Fix the rest of the header.  The explicit - 2 is for the
 915  843                   * padding length and the next-header bytes.  IPv6 is nice,
 916  844                   * because there's no hdr checksum!
 917  845                   */
 918  846                  ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen -
 919  847                      2 - sizeof (esph_t) - ivlen);
 920  848          }
 921  849  
 922  850          if (espstack->ipsecesp_padding_check > 0 && padlen > 0) {
 923  851                  /*
 924  852                   * Weak padding check: compare last-byte to length, they
 925  853                   * should be equal.
 926  854                   */
 927  855                  lastpad = *lastbyte--;
 928  856  
 929  857                  if (padlen != lastpad) {
 930  858                          ipsec_rl_strlog(espstack->ipsecesp_netstack,
 931  859                              info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
 932  860                              "Corrupt ESP packet (lastpad != padlen).\n");
 933  861                          esp1dbg(espstack,
 934  862                              ("lastpad (%d) not equal to padlen (%d):\n",
 935  863                              lastpad, padlen));
 936  864                          ESP_BUMP_STAT(espstack, bad_padding);
 937  865                          *counter = DROPPER(ipss, ipds_esp_bad_padding);
 938  866                          return (B_FALSE);
 939  867                  }
 940  868  
 941  869                  /*
 942  870                   * Strong padding check: Check all pad bytes to see that
 943  871                   * they're ascending.  Go backwards using a descending counter
 944  872                   * to verify.  padlen == 1 is checked by previous block, so
 945  873                   * only bother if we've more than 1 byte of padding.
 946  874                   * Consequently, start the check one byte before the location
 947  875                   * of "lastpad".
 948  876                   */
 949  877                  if (espstack->ipsecesp_padding_check > 1) {
 950  878                          /*
 951  879                           * This assert may have to become an if and a pullup
 952  880                           * if we start accepting multi-dblk mblks. For now,
 953  881                           * though, any packet here will have been pulled up in
 954  882                           * esp_inbound.
 955  883                           */
 956  884                          ASSERT(MBLKL(scratch) >= lastpad + 3);
 957  885  
 958  886                          /*
 959  887                           * Use "--lastpad" because we already checked the very
 960  888                           * last pad byte previously.
 961  889                           */
 962  890                          while (--lastpad != 0) {
 963  891                                  if (lastpad != *lastbyte) {
 964  892                                          ipsec_rl_strlog(
 965  893                                              espstack->ipsecesp_netstack,
 966  894                                              info.mi_idnum, 0, 0,
 967  895                                              SL_ERROR | SL_WARN, "Corrupt ESP "
 968  896                                              "packet (bad padding).\n");
 969  897                                          esp1dbg(espstack,
 970  898                                              ("padding not in correct"
 971  899                                              " format:\n"));
 972  900                                          ESP_BUMP_STAT(espstack, bad_padding);
 973  901                                          *counter = DROPPER(ipss,
 974  902                                              ipds_esp_bad_padding);
 975  903                                          return (B_FALSE);
 976  904                                  }
 977  905                                  lastbyte--;
 978  906                          }
 979  907                  }
 980  908          }
 981  909  
 982  910          /* Trim off the padding. */
 983  911          ASSERT(data_mp->b_cont == NULL);
 984  912          data_mp->b_wptr -= (padlen + 2);
 985  913  
 986  914          /*
 987  915           * Remove the ESP header.
 988  916           *
 989  917           * The above assertions about data_mp's size will make this work.
 990  918           *
 991  919           * XXX  Question:  If I send up and get back a contiguous mblk,
 992  920           * would it be quicker to bcopy over, or keep doing the dupb stuff?
 993  921           * I go with copying for now.
 994  922           */
 995  923  
 996  924          if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) &&
 997  925              IS_P2ALIGNED(ivlen, sizeof (uint32_t))) {
 998  926                  uint8_t *start = data_mp->b_rptr;
 999  927                  uint32_t *src, *dst;
1000  928  
1001  929                  src = (uint32_t *)(start + divpoint);
1002  930                  dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen);
1003  931  
1004  932                  ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) &&
1005  933                      IS_P2ALIGNED(src, sizeof (uint32_t)));
1006  934  
1007  935                  do {
1008  936                          src--;
1009  937                          dst--;
1010  938                          *dst = *src;
1011  939                  } while (src != (uint32_t *)start);
1012  940  
1013  941                  data_mp->b_rptr = (uchar_t *)dst;
1014  942          } else {
1015  943                  uint8_t *start = data_mp->b_rptr;
1016  944                  uint8_t *src, *dst;
1017  945  
1018  946                  src = start + divpoint;
1019  947                  dst = src + sizeof (esph_t) + ivlen;
1020  948  
1021  949                  do {
1022  950                          src--;
1023  951                          dst--;
1024  952                          *dst = *src;
1025  953                  } while (src != start);
1026  954  
1027  955                  data_mp->b_rptr = dst;
1028  956          }
1029  957  
1030  958          esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n"));
1031  959          esp2dbg(espstack, (dump_msg(data_mp)));
1032  960  
1033  961          return (B_TRUE);
1034  962  }
1035  963  
1036  964  /*
1037  965   * Updating use times can be tricky business if the ipsa_haspeer flag is
1038  966   * set.  This function is called once in an SA's lifetime.
1039  967   *
1040  968   * Caller has to REFRELE "assoc" which is passed in.  This function has
1041  969   * to REFRELE any peer SA that is obtained.
1042  970   */
1043  971  static void
1044  972  esp_set_usetime(ipsa_t *assoc, boolean_t inbound)
1045  973  {
1046  974          ipsa_t *inassoc, *outassoc;
1047  975          isaf_t *bucket;
1048  976          sadb_t *sp;
1049  977          int outhash;
1050  978          boolean_t isv6;
1051  979          netstack_t              *ns = assoc->ipsa_netstack;
1052  980          ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
1053  981  
1054  982          /* No peer?  No problem! */
1055  983          if (!assoc->ipsa_haspeer) {
1056  984                  sadb_set_usetime(assoc);
1057  985                  return;
1058  986          }
1059  987  
1060  988          /*
1061  989           * Otherwise, we want to grab both the original assoc and its peer.
1062  990           * There might be a race for this, but if it's a real race, the times
1063  991           * will be out-of-synch by at most a second, and since our time
1064  992           * granularity is a second, this won't be a problem.
1065  993           *
1066  994           * If we need tight synchronization on the peer SA, then we need to
1067  995           * reconsider.
1068  996           */
1069  997  
1070  998          /* Use address length to select IPv6/IPv4 */
1071  999          isv6 = (assoc->ipsa_addrfam == AF_INET6);
1072 1000          sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
1073 1001  
1074 1002          if (inbound) {
1075 1003                  inassoc = assoc;
1076 1004                  if (isv6) {
1077 1005                          outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
1078 1006                              &inassoc->ipsa_dstaddr));
1079 1007                  } else {
1080 1008                          outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
1081 1009                              &inassoc->ipsa_dstaddr));
1082 1010                  }
1083 1011                  bucket = &sp->sdb_of[outhash];
1084 1012                  mutex_enter(&bucket->isaf_lock);
1085 1013                  outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1086 1014                      inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1087 1015                      inassoc->ipsa_addrfam);
1088 1016                  mutex_exit(&bucket->isaf_lock);
1089 1017                  if (outassoc == NULL) {
1090 1018                          /* Q: Do we wish to set haspeer == B_FALSE? */
1091 1019                          esp0dbg(("esp_set_usetime: "
1092 1020                              "can't find peer for inbound.\n"));
1093 1021                          sadb_set_usetime(inassoc);
1094 1022                          return;
1095 1023                  }
1096 1024          } else {
1097 1025                  outassoc = assoc;
1098 1026                  bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1099 1027                  mutex_enter(&bucket->isaf_lock);
1100 1028                  inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1101 1029                      outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1102 1030                      outassoc->ipsa_addrfam);
1103 1031                  mutex_exit(&bucket->isaf_lock);
1104 1032                  if (inassoc == NULL) {
1105 1033                          /* Q: Do we wish to set haspeer == B_FALSE? */
1106 1034                          esp0dbg(("esp_set_usetime: "
1107 1035                              "can't find peer for outbound.\n"));
1108 1036                          sadb_set_usetime(outassoc);
1109 1037                          return;
1110 1038                  }
1111 1039          }
1112 1040  
1113 1041          /* Update usetime on both. */
1114 1042          sadb_set_usetime(inassoc);
1115 1043          sadb_set_usetime(outassoc);
1116 1044  
1117 1045          /*
1118 1046           * REFRELE any peer SA.
1119 1047           *
1120 1048           * Because of the multi-line macro nature of IPSA_REFRELE, keep
1121 1049           * them in { }.
1122 1050           */
1123 1051          if (inbound) {
1124 1052                  IPSA_REFRELE(outassoc);
1125 1053          } else {
1126 1054                  IPSA_REFRELE(inassoc);
1127 1055          }
1128 1056  }
1129 1057  
1130 1058  /*
1131 1059   * Handle ESP inbound data for IPv4 and IPv6.
1132 1060   * On success returns B_TRUE, on failure returns B_FALSE and frees the
1133 1061   * mblk chain data_mp.
1134 1062   */
1135 1063  mblk_t *
1136 1064  esp_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira)
1137 1065  {
1138 1066          esph_t *esph = (esph_t *)arg;
1139 1067          ipsa_t *ipsa = ira->ira_ipsec_esp_sa;
1140 1068          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
1141 1069          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1142 1070          ipsec_stack_t   *ipss = ns->netstack_ipsec;
1143 1071  
1144 1072          /*
1145 1073           * We may wish to check replay in-range-only here as an optimization.
1146 1074           * Include the reality check of ipsa->ipsa_replay >
1147 1075           * ipsa->ipsa_replay_wsize for times when it's the first N packets,
1148 1076           * where N == ipsa->ipsa_replay_wsize.
1149 1077           *
1150 1078           * Another check that may come here later is the "collision" check.
1151 1079           * If legitimate packets flow quickly enough, this won't be a problem,
1152 1080           * but collisions may cause authentication algorithm crunching to
1153 1081           * take place when it doesn't need to.
1154 1082           */
1155 1083          if (!sadb_replay_peek(ipsa, esph->esph_replay)) {
1156 1084                  ESP_BUMP_STAT(espstack, replay_early_failures);
1157 1085                  IP_ESP_BUMP_STAT(ipss, in_discards);
1158 1086                  ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
1159 1087                      DROPPER(ipss, ipds_esp_early_replay),
1160 1088                      &espstack->esp_dropper);
1161 1089                  BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1162 1090                  return (NULL);
1163 1091          }
1164 1092  
1165 1093          /*
1166 1094           * Adjust the IP header's payload length to reflect the removal
1167 1095           * of the ICV.
1168 1096           */
1169 1097          if (!(ira->ira_flags & IRAF_IS_IPV4)) {
1170 1098                  ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
1171 1099                  ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) -
1172 1100                      ipsa->ipsa_mac_len);
1173 1101          } else {
  
    | 
      ↓ open down ↓ | 
    602 lines elided | 
    
      ↑ open up ↑ | 
  
1174 1102                  ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
1175 1103                  ipha->ipha_length = htons(ntohs(ipha->ipha_length) -
1176 1104                      ipsa->ipsa_mac_len);
1177 1105          }
1178 1106  
1179 1107          /* submit the request to the crypto framework */
1180 1108          return (esp_submit_req_inbound(data_mp, ira, ipsa,
1181 1109              (uint8_t *)esph - data_mp->b_rptr));
1182 1110  }
1183 1111  
1184      -/*
1185      - * Perform the really difficult work of inserting the proposed situation.
1186      - * Called while holding the algorithm lock.
1187      - */
1188      -static void
1189      -esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs,
1190      -    netstack_t *ns)
1191      -{
1192      -        sadb_comb_t *comb = (sadb_comb_t *)(prop + 1);
1193      -        ipsec_action_t *ap;
1194      -        ipsec_prot_t *prot;
1195      -        ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1196      -        ipsec_stack_t   *ipss = ns->netstack_ipsec;
1197      -
1198      -        ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
1199      -
1200      -        prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
1201      -        prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t));
1202      -        *(uint32_t *)(&prop->sadb_prop_replay) = 0;     /* Quick zero-out! */
1203      -
1204      -        prop->sadb_prop_replay = espstack->ipsecesp_replay_size;
1205      -
1206      -        /*
1207      -         * Based upon algorithm properties, and what-not, prioritize a
1208      -         * proposal, based on the ordering of the ESP algorithms in the
1209      -         * alternatives in the policy rule or socket that was placed
1210      -         * in the acquire record.
1211      -         *
1212      -         * For each action in policy list
1213      -         *   Add combination.  If I've hit limit, return.
1214      -         */
1215      -
1216      -        for (ap = acqrec->ipsacq_act; ap != NULL;
1217      -            ap = ap->ipa_next) {
1218      -                ipsec_alginfo_t *ealg = NULL;
1219      -                ipsec_alginfo_t *aalg = NULL;
1220      -
1221      -                if (ap->ipa_act.ipa_type != IPSEC_POLICY_APPLY)
1222      -                        continue;
1223      -
1224      -                prot = &ap->ipa_act.ipa_apply;
1225      -
1226      -                if (!(prot->ipp_use_esp))
1227      -                        continue;
1228      -
1229      -                if (prot->ipp_esp_auth_alg != 0) {
1230      -                        aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
1231      -                            [prot->ipp_esp_auth_alg];
1232      -                        if (aalg == NULL || !ALG_VALID(aalg))
1233      -                                continue;
1234      -                }
1235      -
1236      -                ASSERT(prot->ipp_encr_alg > 0);
1237      -                ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
1238      -                    [prot->ipp_encr_alg];
1239      -                if (ealg == NULL || !ALG_VALID(ealg))
1240      -                        continue;
1241      -
1242      -                comb->sadb_comb_flags = 0;
1243      -                comb->sadb_comb_reserved = 0;
1244      -                comb->sadb_comb_encrypt = ealg->alg_id;
1245      -                comb->sadb_comb_encrypt_minbits =
1246      -                    MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits);
1247      -                comb->sadb_comb_encrypt_maxbits =
1248      -                    MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits);
1249      -
1250      -                if (aalg == NULL) {
1251      -                        comb->sadb_comb_auth = 0;
1252      -                        comb->sadb_comb_auth_minbits = 0;
1253      -                        comb->sadb_comb_auth_maxbits = 0;
1254      -                } else {
1255      -                        comb->sadb_comb_auth = aalg->alg_id;
1256      -                        comb->sadb_comb_auth_minbits =
1257      -                            MAX(prot->ipp_espa_minbits, aalg->alg_ef_minbits);
1258      -                        comb->sadb_comb_auth_maxbits =
1259      -                            MIN(prot->ipp_espa_maxbits, aalg->alg_ef_maxbits);
1260      -                }
1261      -
1262      -                /*
1263      -                 * The following may be based on algorithm
1264      -                 * properties, but in the meantime, we just pick
1265      -                 * some good, sensible numbers.  Key mgmt. can
1266      -                 * (and perhaps should) be the place to finalize
1267      -                 * such decisions.
1268      -                 */
1269      -
1270      -                /*
1271      -                 * No limits on allocations, since we really don't
1272      -                 * support that concept currently.
1273      -                 */
1274      -                comb->sadb_comb_soft_allocations = 0;
1275      -                comb->sadb_comb_hard_allocations = 0;
1276      -
1277      -                /*
1278      -                 * These may want to come from policy rule..
1279      -                 */
1280      -                comb->sadb_comb_soft_bytes =
1281      -                    espstack->ipsecesp_default_soft_bytes;
1282      -                comb->sadb_comb_hard_bytes =
1283      -                    espstack->ipsecesp_default_hard_bytes;
1284      -                comb->sadb_comb_soft_addtime =
1285      -                    espstack->ipsecesp_default_soft_addtime;
1286      -                comb->sadb_comb_hard_addtime =
1287      -                    espstack->ipsecesp_default_hard_addtime;
1288      -                comb->sadb_comb_soft_usetime =
1289      -                    espstack->ipsecesp_default_soft_usetime;
1290      -                comb->sadb_comb_hard_usetime =
1291      -                    espstack->ipsecesp_default_hard_usetime;
1292      -
1293      -                prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
1294      -                if (--combs == 0)
1295      -                        break;  /* out of space.. */
1296      -                comb++;
1297      -        }
1298      -}
1299      -
1300      -/*
1301      - * Prepare and actually send the SADB_ACQUIRE message to PF_KEY.
1302      - */
1303      -static void
1304      -esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended, netstack_t *ns)
1305      -{
1306      -        uint_t combs;
1307      -        sadb_msg_t *samsg;
1308      -        sadb_prop_t *prop;
1309      -        mblk_t *pfkeymp, *msgmp;
1310      -        ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1311      -        ipsec_stack_t   *ipss = ns->netstack_ipsec;
1312      -
1313      -        ESP_BUMP_STAT(espstack, acquire_requests);
1314      -
1315      -        if (espstack->esp_pfkey_q == NULL) {
1316      -                mutex_exit(&acqrec->ipsacq_lock);
1317      -                return;
1318      -        }
1319      -
1320      -        /* Set up ACQUIRE. */
1321      -        pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_ESP,
1322      -            ns->netstack_ipsec);
1323      -        if (pfkeymp == NULL) {
1324      -                esp0dbg(("sadb_setup_acquire failed.\n"));
1325      -                mutex_exit(&acqrec->ipsacq_lock);
1326      -                return;
1327      -        }
1328      -        ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
1329      -        combs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
1330      -            ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
1331      -        msgmp = pfkeymp->b_cont;
1332      -        samsg = (sadb_msg_t *)(msgmp->b_rptr);
1333      -
1334      -        /* Insert proposal here. */
1335      -
1336      -        prop = (sadb_prop_t *)(((uint64_t *)samsg) + samsg->sadb_msg_len);
1337      -        esp_insert_prop(prop, acqrec, combs, ns);
1338      -        samsg->sadb_msg_len += prop->sadb_prop_len;
1339      -        msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len);
1340      -
1341      -        mutex_exit(&ipss->ipsec_alg_lock);
1342      -
1343      -        /*
1344      -         * Must mutex_exit() before sending PF_KEY message up, in
1345      -         * order to avoid recursive mutex_enter() if there are no registered
1346      -         * listeners.
1347      -         *
1348      -         * Once I've sent the message, I'm cool anyway.
1349      -         */
1350      -        mutex_exit(&acqrec->ipsacq_lock);
1351      -        if (extended != NULL) {
1352      -                putnext(espstack->esp_pfkey_q, extended);
1353      -        }
1354      -        putnext(espstack->esp_pfkey_q, pfkeymp);
1355      -}
1356      -
1357 1112  /* XXX refactor me */
1358 1113  /*
1359 1114   * Handle the SADB_GETSPI message.  Create a larval SA.
1360 1115   */
1361 1116  static void
1362 1117  esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
1363 1118  {
1364 1119          ipsa_t *newbie, *target;
1365 1120          isaf_t *outbound, *inbound;
1366 1121          int rc, diagnostic;
1367 1122          sadb_sa_t *assoc;
1368 1123          keysock_out_t *kso;
1369 1124          uint32_t newspi;
1370 1125  
1371 1126          /*
1372 1127           * Randomly generate a proposed SPI value
1373 1128           */
1374 1129          if (cl_inet_getspi != NULL) {
1375 1130                  cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid,
1376 1131                      IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL);
1377 1132          } else {
1378 1133                  (void) random_get_pseudo_bytes((uint8_t *)&newspi,
1379 1134                      sizeof (uint32_t));
1380 1135          }
1381 1136          newbie = sadb_getspi(ksi, newspi, &diagnostic,
1382 1137              espstack->ipsecesp_netstack, IPPROTO_ESP);
1383 1138  
1384 1139          if (newbie == NULL) {
1385 1140                  sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic,
1386 1141                      ksi->ks_in_serial);
1387 1142                  return;
1388 1143          } else if (newbie == (ipsa_t *)-1) {
1389 1144                  sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
1390 1145                      ksi->ks_in_serial);
1391 1146                  return;
1392 1147          }
1393 1148  
1394 1149          /*
1395 1150           * XXX - We may randomly collide.  We really should recover from this.
1396 1151           *       Unfortunately, that could require spending way-too-much-time
1397 1152           *       in here.  For now, let the user retry.
1398 1153           */
1399 1154  
1400 1155          if (newbie->ipsa_addrfam == AF_INET6) {
1401 1156                  outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6,
1402 1157                      *(uint32_t *)(newbie->ipsa_dstaddr));
1403 1158                  inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6,
1404 1159                      newbie->ipsa_spi);
1405 1160          } else {
1406 1161                  ASSERT(newbie->ipsa_addrfam == AF_INET);
1407 1162                  outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4,
1408 1163                      *(uint32_t *)(newbie->ipsa_dstaddr));
1409 1164                  inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4,
1410 1165                      newbie->ipsa_spi);
1411 1166          }
1412 1167  
1413 1168          mutex_enter(&outbound->isaf_lock);
1414 1169          mutex_enter(&inbound->isaf_lock);
1415 1170  
1416 1171          /*
1417 1172           * Check for collisions (i.e. did sadb_getspi() return with something
1418 1173           * that already exists?).
1419 1174           *
1420 1175           * Try outbound first.  Even though SADB_GETSPI is traditionally
1421 1176           * for inbound SAs, you never know what a user might do.
1422 1177           */
1423 1178          target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
1424 1179              newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
1425 1180          if (target == NULL) {
1426 1181                  target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
1427 1182                      newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
1428 1183                      newbie->ipsa_addrfam);
1429 1184          }
1430 1185  
1431 1186          /*
1432 1187           * I don't have collisions elsewhere!
1433 1188           * (Nor will I because I'm still holding inbound/outbound locks.)
1434 1189           */
1435 1190  
1436 1191          if (target != NULL) {
1437 1192                  rc = EEXIST;
1438 1193                  IPSA_REFRELE(target);
1439 1194          } else {
1440 1195                  /*
1441 1196                   * sadb_insertassoc() also checks for collisions, so
1442 1197                   * if there's a colliding entry, rc will be set
1443 1198                   * to EEXIST.
1444 1199                   */
1445 1200                  rc = sadb_insertassoc(newbie, inbound);
1446 1201                  newbie->ipsa_hardexpiretime = gethrestime_sec();
1447 1202                  newbie->ipsa_hardexpiretime +=
1448 1203                      espstack->ipsecesp_larval_timeout;
1449 1204          }
1450 1205  
1451 1206          /*
1452 1207           * Can exit outbound mutex.  Hold inbound until we're done
1453 1208           * with newbie.
1454 1209           */
1455 1210          mutex_exit(&outbound->isaf_lock);
1456 1211  
1457 1212          if (rc != 0) {
1458 1213                  mutex_exit(&inbound->isaf_lock);
1459 1214                  IPSA_REFRELE(newbie);
1460 1215                  sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc,
1461 1216                      SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1462 1217                  return;
1463 1218          }
1464 1219  
1465 1220  
1466 1221          /* Can write here because I'm still holding the bucket lock. */
1467 1222          newbie->ipsa_type = SADB_SATYPE_ESP;
1468 1223  
1469 1224          /*
1470 1225           * Construct successful return message. We have one thing going
1471 1226           * for us in PF_KEY v2.  That's the fact that
1472 1227           *      sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
1473 1228           */
1474 1229          assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
1475 1230          assoc->sadb_sa_exttype = SADB_EXT_SA;
1476 1231          assoc->sadb_sa_spi = newbie->ipsa_spi;
1477 1232          *((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
1478 1233          mutex_exit(&inbound->isaf_lock);
1479 1234  
1480 1235          /* Convert KEYSOCK_IN to KEYSOCK_OUT. */
1481 1236          kso = (keysock_out_t *)ksi;
1482 1237          kso->ks_out_len = sizeof (*kso);
1483 1238          kso->ks_out_serial = ksi->ks_in_serial;
1484 1239          kso->ks_out_type = KEYSOCK_OUT;
1485 1240  
1486 1241          /*
1487 1242           * Can safely putnext() to esp_pfkey_q, because this is a turnaround
1488 1243           * from the esp_pfkey_q.
1489 1244           */
1490 1245          putnext(espstack->esp_pfkey_q, mp);
1491 1246  }
1492 1247  
1493 1248  /*
1494 1249   * Insert the ESP header into a packet.  Duplicate an mblk, and insert a newly
1495 1250   * allocated mblk with the ESP header in between the two.
1496 1251   */
1497 1252  static boolean_t
1498 1253  esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint,
1499 1254      ipsecesp_stack_t *espstack)
1500 1255  {
1501 1256          mblk_t *split_mp = mp;
1502 1257          uint_t wheretodiv = divpoint;
1503 1258  
1504 1259          while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) {
1505 1260                  wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr);
1506 1261                  split_mp = split_mp->b_cont;
1507 1262                  ASSERT(split_mp != NULL);
1508 1263          }
1509 1264  
1510 1265          if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) {
1511 1266                  mblk_t *scratch;
1512 1267  
1513 1268                  /* "scratch" is the 2nd half, split_mp is the first. */
1514 1269                  scratch = dupb(split_mp);
1515 1270                  if (scratch == NULL) {
1516 1271                          esp1dbg(espstack,
1517 1272                              ("esp_insert_esp: can't allocate scratch.\n"));
1518 1273                          return (B_FALSE);
1519 1274                  }
1520 1275                  /* NOTE:  dupb() doesn't set b_cont appropriately. */
1521 1276                  scratch->b_cont = split_mp->b_cont;
1522 1277                  scratch->b_rptr += wheretodiv;
1523 1278                  split_mp->b_wptr = split_mp->b_rptr + wheretodiv;
1524 1279                  split_mp->b_cont = scratch;
1525 1280          }
1526 1281          /*
1527 1282           * At this point, split_mp is exactly "wheretodiv" bytes long, and
1528 1283           * holds the end of the pre-ESP part of the datagram.
1529 1284           */
1530 1285          esp_mp->b_cont = split_mp->b_cont;
1531 1286          split_mp->b_cont = esp_mp;
1532 1287  
1533 1288          return (B_TRUE);
1534 1289  }
1535 1290  
1536 1291  /*
1537 1292   * Section 7 of RFC 3947 says:
1538 1293   *
1539 1294   * 7.  Recovering from the Expiring NAT Mappings
1540 1295   *
1541 1296   *    There are cases where NAT box decides to remove mappings that are still
1542 1297   *    alive (for example, when the keepalive interval is too long, or when the
1543 1298   *    NAT box is rebooted).  To recover from this, ends that are NOT behind
1544 1299   *    NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from
1545 1300   *    the other end to determine which IP and port addresses should be used.
1546 1301   *    The host behind dynamic NAT MUST NOT do this, as otherwise it opens a
1547 1302   *    DoS attack possibility because the IP address or port of the other host
1548 1303   *    will not change (it is not behind NAT).
1549 1304   *
1550 1305   *    Keepalives cannot be used for these purposes, as they are not
1551 1306   *    authenticated, but any IKE authenticated IKE packet or ESP packet can be
1552 1307   *    used to detect whether the IP address or the port has changed.
1553 1308   *
1554 1309   * The following function will check an SA and its explicitly-set pair to see
1555 1310   * if the NAT-T remote port matches the received packet (which must have
1556 1311   * passed ESP authentication, see esp_in_done() for the caller context).  If
1557 1312   * there is a mismatch, the SAs are updated.  It is not important if we race
1558 1313   * with a transmitting thread, as if there is a transmitting thread, it will
1559 1314   * merely emit a packet that will most-likely be dropped.
1560 1315   *
1561 1316   * "ports" are ordered src,dst, and assoc is an inbound SA, where src should
1562 1317   * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port.
1563 1318   */
1564 1319  #ifdef _LITTLE_ENDIAN
1565 1320  #define FIRST_16(x) ((x) & 0xFFFF)
1566 1321  #define NEXT_16(x) (((x) >> 16) & 0xFFFF)
1567 1322  #else
1568 1323  #define FIRST_16(x) (((x) >> 16) & 0xFFFF)
1569 1324  #define NEXT_16(x) ((x) & 0xFFFF)
1570 1325  #endif
1571 1326  static void
1572 1327  esp_port_freshness(uint32_t ports, ipsa_t *assoc)
1573 1328  {
1574 1329          uint16_t remote = FIRST_16(ports);
1575 1330          uint16_t local = NEXT_16(ports);
1576 1331          ipsa_t *outbound_peer;
1577 1332          isaf_t *bucket;
1578 1333          ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
1579 1334  
1580 1335          /* We found a conn_t, therefore local != 0. */
1581 1336          ASSERT(local != 0);
1582 1337          /* Assume an IPv4 SA. */
1583 1338          ASSERT(assoc->ipsa_addrfam == AF_INET);
1584 1339  
1585 1340          /*
1586 1341           * On-the-wire rport == 0 means something's very wrong.
1587 1342           * An unpaired SA is also useless to us.
1588 1343           * If we are behind the NAT, don't bother.
1589 1344           * A zero local NAT port defaults to 4500, so check that too.
1590 1345           * And, of course, if the ports already match, we don't need to
1591 1346           * bother.
1592 1347           */
1593 1348          if (remote == 0 || assoc->ipsa_otherspi == 0 ||
1594 1349              (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) ||
1595 1350              (assoc->ipsa_remote_nat_port == 0 &&
1596 1351              remote == htons(IPPORT_IKE_NATT)) ||
1597 1352              remote == assoc->ipsa_remote_nat_port)
1598 1353                  return;
1599 1354  
1600 1355          /* Try and snag the peer.   NOTE:  Assume IPv4 for now. */
1601 1356          bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4),
1602 1357              assoc->ipsa_srcaddr[0]);
1603 1358          mutex_enter(&bucket->isaf_lock);
1604 1359          outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi,
1605 1360              assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET);
1606 1361          mutex_exit(&bucket->isaf_lock);
1607 1362  
1608 1363          /* We probably lost a race to a deleting or expiring thread. */
1609 1364          if (outbound_peer == NULL)
1610 1365                  return;
1611 1366  
1612 1367          /*
1613 1368           * Hold the mutexes for both SAs so we don't race another inbound
1614 1369           * thread.  A lock-entry order shouldn't matter, since all other
1615 1370           * per-ipsa locks are individually held-then-released.
1616 1371           *
1617 1372           * Luckily, this has nothing to do with the remote-NAT address,
1618 1373           * so we don't have to re-scribble the cached-checksum differential.
1619 1374           */
1620 1375          mutex_enter(&outbound_peer->ipsa_lock);
1621 1376          mutex_enter(&assoc->ipsa_lock);
1622 1377          outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port =
1623 1378              remote;
1624 1379          mutex_exit(&assoc->ipsa_lock);
1625 1380          mutex_exit(&outbound_peer->ipsa_lock);
1626 1381          IPSA_REFRELE(outbound_peer);
1627 1382          ESP_BUMP_STAT(espstack, sa_port_renumbers);
1628 1383  }
1629 1384  /*
1630 1385   * Finish processing of an inbound ESP packet after processing by the
1631 1386   * crypto framework.
1632 1387   * - Remove the ESP header.
1633 1388   * - Send packet back to IP.
1634 1389   * If authentication was performed on the packet, this function is called
1635 1390   * only if the authentication succeeded.
1636 1391   * On success returns B_TRUE, on failure returns B_FALSE and frees the
1637 1392   * mblk chain data_mp.
1638 1393   */
1639 1394  static mblk_t *
1640 1395  esp_in_done(mblk_t *data_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
1641 1396  {
1642 1397          ipsa_t *assoc;
1643 1398          uint_t espstart;
1644 1399          uint32_t ivlen = 0;
1645 1400          uint_t processed_len;
1646 1401          esph_t *esph;
1647 1402          kstat_named_t *counter;
1648 1403          boolean_t is_natt;
1649 1404          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
1650 1405          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1651 1406          ipsec_stack_t   *ipss = ns->netstack_ipsec;
1652 1407  
1653 1408          assoc = ira->ira_ipsec_esp_sa;
1654 1409          ASSERT(assoc != NULL);
1655 1410  
1656 1411          is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
1657 1412  
1658 1413          /* get the pointer to the ESP header */
1659 1414          if (assoc->ipsa_encr_alg == SADB_EALG_NULL) {
1660 1415                  /* authentication-only ESP */
1661 1416                  espstart = ic->ic_crypto_data.cd_offset;
1662 1417                  processed_len = ic->ic_crypto_data.cd_length;
1663 1418          } else {
1664 1419                  /* encryption present */
1665 1420                  ivlen = assoc->ipsa_iv_len;
1666 1421                  if (assoc->ipsa_auth_alg == SADB_AALG_NONE) {
1667 1422                          /* encryption-only ESP */
1668 1423                          espstart = ic->ic_crypto_data.cd_offset -
1669 1424                              sizeof (esph_t) - assoc->ipsa_iv_len;
1670 1425                          processed_len = ic->ic_crypto_data.cd_length +
1671 1426                              ivlen;
1672 1427                  } else {
1673 1428                          /* encryption with authentication */
1674 1429                          espstart = ic->ic_crypto_dual_data.dd_offset1;
1675 1430                          processed_len = ic->ic_crypto_dual_data.dd_len2 +
1676 1431                              ivlen;
1677 1432                  }
1678 1433          }
1679 1434  
1680 1435          esph = (esph_t *)(data_mp->b_rptr + espstart);
1681 1436  
1682 1437          if (assoc->ipsa_auth_alg != IPSA_AALG_NONE ||
1683 1438              (assoc->ipsa_flags & IPSA_F_COMBINED)) {
1684 1439                  /*
1685 1440                   * Authentication passed if we reach this point.
1686 1441                   * Packets with authentication will have the ICV
1687 1442                   * after the crypto data. Adjust b_wptr before
1688 1443                   * making padlen checks.
1689 1444                   */
1690 1445                  ESP_BUMP_STAT(espstack, good_auth);
1691 1446                  data_mp->b_wptr -= assoc->ipsa_mac_len;
1692 1447  
1693 1448                  /*
1694 1449                   * Check replay window here!
1695 1450                   * For right now, assume keysock will set the replay window
1696 1451                   * size to zero for SAs that have an unspecified sender.
1697 1452                   * This may change...
1698 1453                   */
1699 1454  
1700 1455                  if (!sadb_replay_check(assoc, esph->esph_replay)) {
1701 1456                          /*
1702 1457                           * Log the event. As of now we print out an event.
1703 1458                           * Do not print the replay failure number, or else
1704 1459                           * syslog cannot collate the error messages.  Printing
1705 1460                           * the replay number that failed opens a denial-of-
1706 1461                           * service attack.
1707 1462                           */
1708 1463                          ipsec_assocfailure(info.mi_idnum, 0, 0,
1709 1464                              SL_ERROR | SL_WARN,
1710 1465                              "Replay failed for ESP spi 0x%x, dst %s.\n",
1711 1466                              assoc->ipsa_spi, assoc->ipsa_dstaddr,
1712 1467                              assoc->ipsa_addrfam, espstack->ipsecesp_netstack);
1713 1468                          ESP_BUMP_STAT(espstack, replay_failures);
1714 1469                          counter = DROPPER(ipss, ipds_esp_replay);
1715 1470                          goto drop_and_bail;
1716 1471                  }
1717 1472  
1718 1473                  if (is_natt) {
1719 1474                          ASSERT(ira->ira_flags & IRAF_ESP_UDP_PORTS);
1720 1475                          ASSERT(ira->ira_esp_udp_ports != 0);
1721 1476                          esp_port_freshness(ira->ira_esp_udp_ports, assoc);
1722 1477                  }
1723 1478          }
1724 1479  
1725 1480          esp_set_usetime(assoc, B_TRUE);
1726 1481  
1727 1482          if (!esp_age_bytes(assoc, processed_len, B_TRUE)) {
1728 1483                  /* The ipsa has hit hard expiration, LOG and AUDIT. */
1729 1484                  ipsec_assocfailure(info.mi_idnum, 0, 0,
1730 1485                      SL_ERROR | SL_WARN,
1731 1486                      "ESP association 0x%x, dst %s had bytes expire.\n",
1732 1487                      assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
1733 1488                      espstack->ipsecesp_netstack);
1734 1489                  ESP_BUMP_STAT(espstack, bytes_expired);
1735 1490                  counter = DROPPER(ipss, ipds_esp_bytes_expire);
1736 1491                  goto drop_and_bail;
1737 1492          }
1738 1493  
1739 1494          /*
1740 1495           * Remove ESP header and padding from packet.  I hope the compiler
1741 1496           * spews "branch, predict taken" code for this.
1742 1497           */
1743 1498  
1744 1499          if (esp_strip_header(data_mp, (ira->ira_flags & IRAF_IS_IPV4),
1745 1500              ivlen, &counter, espstack)) {
1746 1501  
1747 1502                  if (is_system_labeled() && assoc->ipsa_tsl != NULL) {
1748 1503                          if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) {
1749 1504                                  ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
1750 1505                                      DROPPER(ipss, ipds_ah_nomem),
1751 1506                                      &espstack->esp_dropper);
1752 1507                                  BUMP_MIB(ira->ira_ill->ill_ip_mib,
1753 1508                                      ipIfStatsInDiscards);
1754 1509                                  return (NULL);
1755 1510                          }
1756 1511                  }
1757 1512                  if (is_natt)
1758 1513                          return (esp_fix_natt_checksums(data_mp, assoc));
1759 1514  
1760 1515                  if (assoc->ipsa_state == IPSA_STATE_IDLE) {
1761 1516                          /*
1762 1517                           * Cluster buffering case.  Tell caller that we're
1763 1518                           * handling the packet.
1764 1519                           */
1765 1520                          sadb_buf_pkt(assoc, data_mp, ira);
1766 1521                          return (NULL);
1767 1522                  }
1768 1523  
1769 1524                  return (data_mp);
1770 1525          }
1771 1526  
1772 1527          esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n"));
1773 1528  drop_and_bail:
1774 1529          IP_ESP_BUMP_STAT(ipss, in_discards);
1775 1530          ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, counter,
1776 1531              &espstack->esp_dropper);
1777 1532          BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1778 1533          return (NULL);
1779 1534  }
1780 1535  
1781 1536  /*
1782 1537   * Called upon failing the inbound ICV check. The message passed as
1783 1538   * argument is freed.
1784 1539   */
1785 1540  static void
1786 1541  esp_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira)
1787 1542  {
1788 1543          ipsa_t          *assoc = ira->ira_ipsec_esp_sa;
1789 1544          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
1790 1545          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1791 1546          ipsec_stack_t   *ipss = ns->netstack_ipsec;
1792 1547  
1793 1548          /*
1794 1549           * Log the event. Don't print to the console, block
1795 1550           * potential denial-of-service attack.
1796 1551           */
1797 1552          ESP_BUMP_STAT(espstack, bad_auth);
1798 1553  
1799 1554          ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
1800 1555              "ESP Authentication failed for spi 0x%x, dst %s.\n",
1801 1556              assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
1802 1557              espstack->ipsecesp_netstack);
1803 1558  
1804 1559          IP_ESP_BUMP_STAT(ipss, in_discards);
1805 1560          ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1806 1561              DROPPER(ipss, ipds_esp_bad_auth),
1807 1562              &espstack->esp_dropper);
1808 1563  }
1809 1564  
1810 1565  
1811 1566  /*
1812 1567   * Invoked for outbound packets after ESP processing. If the packet
1813 1568   * also requires AH, performs the AH SA selection and AH processing.
1814 1569   * Returns B_TRUE if the AH processing was not needed or if it was
1815 1570   * performed successfully. Returns B_FALSE and consumes the passed mblk
1816 1571   * if AH processing was required but could not be performed.
1817 1572   *
1818 1573   * Returns data_mp unless data_mp was consumed/queued.
1819 1574   */
1820 1575  static mblk_t *
1821 1576  esp_do_outbound_ah(mblk_t *data_mp, ip_xmit_attr_t *ixa)
1822 1577  {
1823 1578          ipsec_action_t *ap;
1824 1579  
1825 1580          ap = ixa->ixa_ipsec_action;
1826 1581          if (ap == NULL) {
1827 1582                  ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
1828 1583                  ap = pp->ipsp_act;
1829 1584          }
1830 1585  
1831 1586          if (!ap->ipa_want_ah)
1832 1587                  return (data_mp);
1833 1588  
1834 1589          /*
1835 1590           * Normally the AH SA would have already been put in place
1836 1591           * but it could have been flushed so we need to look for it.
1837 1592           */
1838 1593          if (ixa->ixa_ipsec_ah_sa == NULL) {
1839 1594                  if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) {
1840 1595                          sadb_acquire(data_mp, ixa, B_TRUE, B_FALSE);
1841 1596                          return (NULL);
1842 1597                  }
1843 1598          }
1844 1599          ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
1845 1600  
1846 1601          data_mp = ixa->ixa_ipsec_ah_sa->ipsa_output_func(data_mp, ixa);
1847 1602          return (data_mp);
1848 1603  }
1849 1604  
1850 1605  
1851 1606  /*
1852 1607   * Kernel crypto framework callback invoked after completion of async
1853 1608   * crypto requests for outbound packets.
1854 1609   */
1855 1610  static void
1856 1611  esp_kcf_callback_outbound(void *arg, int status)
1857 1612  {
1858 1613          mblk_t          *mp = (mblk_t *)arg;
1859 1614          mblk_t          *async_mp;
1860 1615          netstack_t      *ns;
1861 1616          ipsec_stack_t   *ipss;
1862 1617          ipsecesp_stack_t *espstack;
1863 1618          mblk_t          *data_mp;
1864 1619          ip_xmit_attr_t  ixas;
1865 1620          ipsec_crypto_t  *ic;
1866 1621          ill_t           *ill;
1867 1622  
1868 1623          /*
1869 1624           * First remove the ipsec_crypto_t mblk
1870 1625           * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
1871 1626           */
1872 1627          async_mp = ipsec_remove_crypto_data(mp, &ic);
1873 1628          ASSERT(async_mp != NULL);
1874 1629  
1875 1630          /*
1876 1631           * Extract the ip_xmit_attr_t from the first mblk.
1877 1632           * Verifies that the netstack and ill is still around; could
1878 1633           * have vanished while kEf was doing its work.
1879 1634           * On succesful return we have a nce_t and the ill/ipst can't
1880 1635           * disappear until we do the nce_refrele in ixa_cleanup.
1881 1636           */
1882 1637          data_mp = async_mp->b_cont;
1883 1638          async_mp->b_cont = NULL;
1884 1639          if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) {
1885 1640                  /* Disappeared on us - no ill/ipst for MIB */
1886 1641                  /* We have nowhere to do stats since ixa_ipst could be NULL */
1887 1642                  if (ixas.ixa_nce != NULL) {
1888 1643                          ill = ixas.ixa_nce->nce_ill;
1889 1644                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1890 1645                          ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
1891 1646                  }
1892 1647                  freemsg(data_mp);
1893 1648                  goto done;
1894 1649          }
1895 1650          ns = ixas.ixa_ipst->ips_netstack;
1896 1651          espstack = ns->netstack_ipsecesp;
1897 1652          ipss = ns->netstack_ipsec;
1898 1653          ill = ixas.ixa_nce->nce_ill;
1899 1654  
1900 1655          if (status == CRYPTO_SUCCESS) {
1901 1656                  /*
1902 1657                   * If a ICV was computed, it was stored by the
1903 1658                   * crypto framework at the end of the packet.
1904 1659                   */
1905 1660                  ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
1906 1661  
1907 1662                  esp_set_usetime(ixas.ixa_ipsec_esp_sa, B_FALSE);
1908 1663                  /* NAT-T packet. */
1909 1664                  if (IPH_HDR_VERSION(ipha) == IP_VERSION &&
1910 1665                      ipha->ipha_protocol == IPPROTO_UDP)
1911 1666                          esp_prepare_udp(ns, data_mp, ipha);
1912 1667  
1913 1668                  /* do AH processing if needed */
1914 1669                  data_mp = esp_do_outbound_ah(data_mp, &ixas);
1915 1670                  if (data_mp == NULL)
1916 1671                          goto done;
1917 1672  
1918 1673                  (void) ip_output_post_ipsec(data_mp, &ixas);
1919 1674          } else {
1920 1675                  /* Outbound shouldn't see invalid MAC */
1921 1676                  ASSERT(status != CRYPTO_INVALID_MAC);
1922 1677  
1923 1678                  esp1dbg(espstack,
1924 1679                      ("esp_kcf_callback_outbound: crypto failed with 0x%x\n",
1925 1680                      status));
1926 1681                  ESP_BUMP_STAT(espstack, crypto_failures);
1927 1682                  ESP_BUMP_STAT(espstack, out_discards);
1928 1683                  ip_drop_packet(data_mp, B_FALSE, ill,
1929 1684                      DROPPER(ipss, ipds_esp_crypto_failed),
1930 1685                      &espstack->esp_dropper);
1931 1686                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1932 1687          }
1933 1688  done:
1934 1689          ixa_cleanup(&ixas);
1935 1690          (void) ipsec_free_crypto_data(mp);
1936 1691  }
1937 1692  
1938 1693  /*
1939 1694   * Kernel crypto framework callback invoked after completion of async
1940 1695   * crypto requests for inbound packets.
1941 1696   */
1942 1697  static void
1943 1698  esp_kcf_callback_inbound(void *arg, int status)
1944 1699  {
1945 1700          mblk_t          *mp = (mblk_t *)arg;
1946 1701          mblk_t          *async_mp;
1947 1702          netstack_t      *ns;
1948 1703          ipsecesp_stack_t *espstack;
1949 1704          ipsec_stack_t   *ipss;
1950 1705          mblk_t          *data_mp;
1951 1706          ip_recv_attr_t  iras;
1952 1707          ipsec_crypto_t  *ic;
1953 1708  
1954 1709          /*
1955 1710           * First remove the ipsec_crypto_t mblk
1956 1711           * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
1957 1712           */
1958 1713          async_mp = ipsec_remove_crypto_data(mp, &ic);
1959 1714          ASSERT(async_mp != NULL);
1960 1715  
1961 1716          /*
1962 1717           * Extract the ip_recv_attr_t from the first mblk.
1963 1718           * Verifies that the netstack and ill is still around; could
1964 1719           * have vanished while kEf was doing its work.
1965 1720           */
1966 1721          data_mp = async_mp->b_cont;
1967 1722          async_mp->b_cont = NULL;
1968 1723          if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
1969 1724                  /* The ill or ip_stack_t disappeared on us */
1970 1725                  ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
1971 1726                  freemsg(data_mp);
1972 1727                  goto done;
1973 1728          }
1974 1729  
1975 1730          ns = iras.ira_ill->ill_ipst->ips_netstack;
1976 1731          espstack = ns->netstack_ipsecesp;
1977 1732          ipss = ns->netstack_ipsec;
1978 1733  
1979 1734          if (status == CRYPTO_SUCCESS) {
1980 1735                  data_mp = esp_in_done(data_mp, &iras, ic);
1981 1736                  if (data_mp == NULL)
1982 1737                          goto done;
1983 1738  
1984 1739                  /* finish IPsec processing */
1985 1740                  ip_input_post_ipsec(data_mp, &iras);
1986 1741          } else if (status == CRYPTO_INVALID_MAC) {
1987 1742                  esp_log_bad_auth(data_mp, &iras);
1988 1743          } else {
1989 1744                  esp1dbg(espstack,
1990 1745                      ("esp_kcf_callback: crypto failed with 0x%x\n",
1991 1746                      status));
1992 1747                  ESP_BUMP_STAT(espstack, crypto_failures);
1993 1748                  IP_ESP_BUMP_STAT(ipss, in_discards);
1994 1749                  ip_drop_packet(data_mp, B_TRUE, iras.ira_ill,
1995 1750                      DROPPER(ipss, ipds_esp_crypto_failed),
1996 1751                      &espstack->esp_dropper);
1997 1752                  BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1998 1753          }
1999 1754  done:
2000 1755          ira_cleanup(&iras, B_TRUE);
2001 1756          (void) ipsec_free_crypto_data(mp);
2002 1757  }
2003 1758  
2004 1759  /*
2005 1760   * Invoked on crypto framework failure during inbound and outbound processing.
2006 1761   */
2007 1762  static void
2008 1763  esp_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc,
2009 1764      ill_t *ill, ipsecesp_stack_t *espstack)
2010 1765  {
2011 1766          ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
2012 1767  
2013 1768          esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n",
2014 1769              is_inbound ? "inbound" : "outbound", kef_rc));
2015 1770          ip_drop_packet(data_mp, is_inbound, ill,
2016 1771              DROPPER(ipss, ipds_esp_crypto_failed),
2017 1772              &espstack->esp_dropper);
2018 1773          ESP_BUMP_STAT(espstack, crypto_failures);
2019 1774          if (is_inbound)
2020 1775                  IP_ESP_BUMP_STAT(ipss, in_discards);
2021 1776          else
2022 1777                  ESP_BUMP_STAT(espstack, out_discards);
2023 1778  }
2024 1779  
2025 1780  /*
2026 1781   * A statement-equivalent macro, _cr MUST point to a modifiable
2027 1782   * crypto_call_req_t.
2028 1783   */
2029 1784  #define ESP_INIT_CALLREQ(_cr, _mp, _callback)                           \
2030 1785          (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE; \
2031 1786          (_cr)->cr_callback_arg = (_mp);                         \
2032 1787          (_cr)->cr_callback_func = (_callback)
2033 1788  
2034 1789  #define ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {                      \
2035 1790          (mac)->cd_format = CRYPTO_DATA_RAW;                             \
2036 1791          (mac)->cd_offset = 0;                                           \
2037 1792          (mac)->cd_length = icvlen;                                      \
2038 1793          (mac)->cd_raw.iov_base = (char *)icvbuf;                        \
2039 1794          (mac)->cd_raw.iov_len = icvlen;                                 \
2040 1795  }
2041 1796  
2042 1797  #define ESP_INIT_CRYPTO_DATA(data, mp, off, len) {                      \
2043 1798          if (MBLKL(mp) >= (len) + (off)) {                               \
2044 1799                  (data)->cd_format = CRYPTO_DATA_RAW;                    \
2045 1800                  (data)->cd_raw.iov_base = (char *)(mp)->b_rptr;         \
2046 1801                  (data)->cd_raw.iov_len = MBLKL(mp);                     \
2047 1802                  (data)->cd_offset = off;                                \
2048 1803          } else {                                                        \
2049 1804                  (data)->cd_format = CRYPTO_DATA_MBLK;                   \
2050 1805                  (data)->cd_mp = mp;                                     \
2051 1806                  (data)->cd_offset = off;                                \
2052 1807          }                                                               \
2053 1808          (data)->cd_length = len;                                        \
2054 1809  }
2055 1810  
2056 1811  #define ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) {   \
2057 1812          (data)->dd_format = CRYPTO_DATA_MBLK;                           \
2058 1813          (data)->dd_mp = mp;                                             \
2059 1814          (data)->dd_len1 = len1;                                         \
2060 1815          (data)->dd_offset1 = off1;                                      \
2061 1816          (data)->dd_len2 = len2;                                         \
2062 1817          (data)->dd_offset2 = off2;                                      \
2063 1818  }
2064 1819  
2065 1820  /*
2066 1821   * Returns data_mp if successfully completed the request. Returns
2067 1822   * NULL if it failed (and increments InDiscards) or if it is pending.
2068 1823   */
2069 1824  static mblk_t *
2070 1825  esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira,
2071 1826      ipsa_t *assoc, uint_t esph_offset)
2072 1827  {
2073 1828          uint_t auth_offset, msg_len, auth_len;
2074 1829          crypto_call_req_t call_req, *callrp;
2075 1830          mblk_t *mp;
2076 1831          esph_t *esph_ptr;
2077 1832          int kef_rc;
2078 1833          uint_t icv_len = assoc->ipsa_mac_len;
2079 1834          crypto_ctx_template_t auth_ctx_tmpl;
2080 1835          boolean_t do_auth, do_encr, force;
2081 1836          uint_t encr_offset, encr_len;
2082 1837          uint_t iv_len = assoc->ipsa_iv_len;
2083 1838          crypto_ctx_template_t encr_ctx_tmpl;
2084 1839          ipsec_crypto_t  *ic, icstack;
2085 1840          uchar_t *iv_ptr;
2086 1841          netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
2087 1842          ipsec_stack_t *ipss = ns->netstack_ipsec;
2088 1843          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2089 1844  
2090 1845          do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
2091 1846          do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
2092 1847          force = (assoc->ipsa_flags & IPSA_F_ASYNC);
2093 1848  
2094 1849  #ifdef IPSEC_LATENCY_TEST
2095 1850          kef_rc = CRYPTO_SUCCESS;
2096 1851  #else
2097 1852          kef_rc = CRYPTO_FAILED;
2098 1853  #endif
2099 1854  
2100 1855          /*
2101 1856           * An inbound packet is of the form:
2102 1857           * [IP,options,ESP,IV,data,ICV,pad]
2103 1858           */
2104 1859          esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
2105 1860          iv_ptr = (uchar_t *)(esph_ptr + 1);
2106 1861          /* Packet length starting at IP header ending after ESP ICV. */
2107 1862          msg_len = MBLKL(esp_mp);
2108 1863  
2109 1864          encr_offset = esph_offset + sizeof (esph_t) + iv_len;
2110 1865          encr_len = msg_len - encr_offset;
2111 1866  
2112 1867          /*
2113 1868           * Counter mode algs need a nonce. This is setup in sadb_common_add().
2114 1869           * If for some reason we are using a SA which does not have a nonce
2115 1870           * then we must fail here.
2116 1871           */
2117 1872          if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
2118 1873              (assoc->ipsa_nonce == NULL)) {
2119 1874                  ip_drop_packet(esp_mp, B_TRUE, ira->ira_ill,
2120 1875                      DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
2121 1876                  return (NULL);
2122 1877          }
2123 1878  
2124 1879          if (force) {
2125 1880                  /* We are doing asynch; allocate mblks to hold state */
2126 1881                  if ((mp = ip_recv_attr_to_mblk(ira)) == NULL ||
2127 1882                      (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
2128 1883                          BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2129 1884                          ip_drop_input("ipIfStatsInDiscards", esp_mp,
2130 1885                              ira->ira_ill);
2131 1886                          return (NULL);
2132 1887                  }
2133 1888                  linkb(mp, esp_mp);
2134 1889                  callrp = &call_req;
2135 1890                  ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_inbound);
2136 1891          } else {
2137 1892                  /*
2138 1893                   * If we know we are going to do sync then ipsec_crypto_t
2139 1894                   * should be on the stack.
2140 1895                   */
2141 1896                  ic = &icstack;
2142 1897                  bzero(ic, sizeof (*ic));
2143 1898                  callrp = NULL;
2144 1899          }
2145 1900  
2146 1901          if (do_auth) {
2147 1902                  /* authentication context template */
2148 1903                  IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
2149 1904                      auth_ctx_tmpl);
2150 1905  
2151 1906                  /* ICV to be verified */
2152 1907                  ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
2153 1908                      icv_len, esp_mp->b_wptr - icv_len);
2154 1909  
2155 1910                  /* authentication starts at the ESP header */
2156 1911                  auth_offset = esph_offset;
2157 1912                  auth_len = msg_len - auth_offset - icv_len;
2158 1913                  if (!do_encr) {
2159 1914                          /* authentication only */
2160 1915                          /* initialize input data argument */
2161 1916                          ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
2162 1917                              esp_mp, auth_offset, auth_len);
2163 1918  
2164 1919                          /* call the crypto framework */
2165 1920                          kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
2166 1921                              &ic->ic_crypto_data,
2167 1922                              &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
2168 1923                              &ic->ic_crypto_mac, callrp);
2169 1924                  }
2170 1925          }
2171 1926  
2172 1927          if (do_encr) {
2173 1928                  /* encryption template */
2174 1929                  IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
2175 1930                      encr_ctx_tmpl);
2176 1931  
2177 1932                  /* Call the nonce update function. Also passes in IV */
2178 1933                  (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len,
2179 1934                      iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
2180 1935  
2181 1936                  if (!do_auth) {
2182 1937                          /* decryption only */
2183 1938                          /* initialize input data argument */
2184 1939                          ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
2185 1940                              esp_mp, encr_offset, encr_len);
2186 1941  
2187 1942                          /* call the crypto framework */
2188 1943                          kef_rc = crypto_decrypt((crypto_mechanism_t *)
2189 1944                              &ic->ic_cmm, &ic->ic_crypto_data,
2190 1945                              &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
2191 1946                              NULL, callrp);
2192 1947                  }
2193 1948          }
2194 1949  
2195 1950          if (do_auth && do_encr) {
2196 1951                  /* dual operation */
2197 1952                  /* initialize input data argument */
2198 1953                  ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
2199 1954                      esp_mp, auth_offset, auth_len,
2200 1955                      encr_offset, encr_len - icv_len);
2201 1956  
2202 1957                  /* specify IV */
2203 1958                  ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
2204 1959  
2205 1960                  /* call the framework */
2206 1961                  kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech,
2207 1962                      &assoc->ipsa_emech, &ic->ic_crypto_dual_data,
2208 1963                      &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey,
2209 1964                      auth_ctx_tmpl, encr_ctx_tmpl, &ic->ic_crypto_mac,
2210 1965                      NULL, callrp);
2211 1966          }
2212 1967  
2213 1968          switch (kef_rc) {
2214 1969          case CRYPTO_SUCCESS:
2215 1970                  ESP_BUMP_STAT(espstack, crypto_sync);
2216 1971                  esp_mp = esp_in_done(esp_mp, ira, ic);
2217 1972                  if (force) {
2218 1973                          /* Free mp after we are done with ic */
2219 1974                          mp = ipsec_free_crypto_data(mp);
2220 1975                          (void) ip_recv_attr_free_mblk(mp);
2221 1976                  }
2222 1977                  return (esp_mp);
2223 1978          case CRYPTO_QUEUED:
2224 1979                  /* esp_kcf_callback_inbound() will be invoked on completion */
2225 1980                  ESP_BUMP_STAT(espstack, crypto_async);
2226 1981                  return (NULL);
2227 1982          case CRYPTO_INVALID_MAC:
2228 1983                  if (force) {
2229 1984                          mp = ipsec_free_crypto_data(mp);
2230 1985                          esp_mp = ip_recv_attr_free_mblk(mp);
2231 1986                  }
2232 1987                  ESP_BUMP_STAT(espstack, crypto_sync);
2233 1988                  BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2234 1989                  esp_log_bad_auth(esp_mp, ira);
2235 1990                  /* esp_mp was passed to ip_drop_packet */
2236 1991                  return (NULL);
2237 1992          }
2238 1993  
2239 1994          if (force) {
2240 1995                  mp = ipsec_free_crypto_data(mp);
2241 1996                  esp_mp = ip_recv_attr_free_mblk(mp);
2242 1997          }
2243 1998          BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2244 1999          esp_crypto_failed(esp_mp, B_TRUE, kef_rc, ira->ira_ill, espstack);
2245 2000          /* esp_mp was passed to ip_drop_packet */
2246 2001          return (NULL);
2247 2002  }
2248 2003  
2249 2004  /*
2250 2005   * Compute the IP and UDP checksums -- common code for both keepalives and
2251 2006   * actual ESP-in-UDP packets.  Be flexible with multiple mblks because ESP
2252 2007   * uses mblk-insertion to insert the UDP header.
2253 2008   * TODO - If there is an easy way to prep a packet for HW checksums, make
2254 2009   * it happen here.
2255 2010   * Note that this is used before both before calling ip_output_simple and
2256 2011   * in the esp datapath. The former could use IXAF_SET_ULP_CKSUM but not the
2257 2012   * latter.
2258 2013   */
2259 2014  static void
2260 2015  esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha)
2261 2016  {
2262 2017          int offset;
2263 2018          uint32_t cksum;
2264 2019          uint16_t *arr;
2265 2020          mblk_t *udpmp = mp;
2266 2021          uint_t hlen = IPH_HDR_LENGTH(ipha);
2267 2022  
2268 2023          ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2269 2024  
2270 2025          ipha->ipha_hdr_checksum = 0;
2271 2026          ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
2272 2027  
2273 2028          if (ns->netstack_udp->us_do_checksum) {
2274 2029                  ASSERT(MBLKL(udpmp) >= sizeof (udpha_t));
2275 2030                  /* arr points to the IP header. */
2276 2031                  arr = (uint16_t *)ipha;
2277 2032                  IP_STAT(ns->netstack_ip, ip_out_sw_cksum);
2278 2033                  IP_STAT_UPDATE(ns->netstack_ip, ip_out_sw_cksum_bytes,
2279 2034                      ntohs(htons(ipha->ipha_length) - hlen));
2280 2035                  /* arr[6-9] are the IP addresses. */
2281 2036                  cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] +
2282 2037                      ntohs(htons(ipha->ipha_length) - hlen);
2283 2038                  cksum = IP_CSUM(mp, hlen, cksum);
2284 2039                  offset = hlen + UDP_CHECKSUM_OFFSET;
2285 2040                  while (offset >= MBLKL(udpmp)) {
2286 2041                          offset -= MBLKL(udpmp);
2287 2042                          udpmp = udpmp->b_cont;
2288 2043                  }
2289 2044                  /* arr points to the UDP header's checksum field. */
2290 2045                  arr = (uint16_t *)(udpmp->b_rptr + offset);
2291 2046                  *arr = cksum;
2292 2047          }
2293 2048  }
2294 2049  
2295 2050  /*
2296 2051   * taskq handler so we can send the NAT-T keepalive on a separate thread.
2297 2052   */
2298 2053  static void
2299 2054  actually_send_keepalive(void *arg)
2300 2055  {
2301 2056          mblk_t *mp = (mblk_t *)arg;
2302 2057          ip_xmit_attr_t ixas;
2303 2058          netstack_t      *ns;
2304 2059          netstackid_t    stackid;
2305 2060  
2306 2061          stackid = (netstackid_t)(uintptr_t)mp->b_prev;
2307 2062          mp->b_prev = NULL;
2308 2063          ns = netstack_find_by_stackid(stackid);
2309 2064          if (ns == NULL) {
2310 2065                  /* Disappeared */
2311 2066                  ip_drop_output("ipIfStatsOutDiscards", mp, NULL);
2312 2067                  freemsg(mp);
2313 2068                  return;
2314 2069          }
2315 2070  
2316 2071          bzero(&ixas, sizeof (ixas));
2317 2072          ixas.ixa_zoneid = ALL_ZONES;
2318 2073          ixas.ixa_cred = kcred;
2319 2074          ixas.ixa_cpid = NOPID;
2320 2075          ixas.ixa_tsl = NULL;
2321 2076          ixas.ixa_ipst = ns->netstack_ip;
2322 2077          /* No ULP checksum; done by esp_prepare_udp */
2323 2078          ixas.ixa_flags = (IXAF_IS_IPV4 | IXAF_NO_IPSEC | IXAF_VERIFY_SOURCE);
2324 2079  
2325 2080          (void) ip_output_simple(mp, &ixas);
2326 2081          ixa_cleanup(&ixas);
2327 2082          netstack_rele(ns);
2328 2083  }
2329 2084  
2330 2085  /*
2331 2086   * Send a one-byte UDP NAT-T keepalive.
2332 2087   */
2333 2088  void
2334 2089  ipsecesp_send_keepalive(ipsa_t *assoc)
2335 2090  {
2336 2091          mblk_t          *mp;
2337 2092          ipha_t          *ipha;
2338 2093          udpha_t         *udpha;
2339 2094          netstack_t      *ns = assoc->ipsa_netstack;
2340 2095  
2341 2096          ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock));
2342 2097  
2343 2098          mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI);
2344 2099          if (mp == NULL)
2345 2100                  return;
2346 2101          ipha = (ipha_t *)mp->b_rptr;
2347 2102          ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
2348 2103          ipha->ipha_type_of_service = 0;
2349 2104          ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1);
2350 2105          /* Use the low-16 of the SPI so we have some clue where it came from. */
2351 2106          ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1);
2352 2107          ipha->ipha_fragment_offset_and_flags = 0;  /* Too small to fragment! */
2353 2108          ipha->ipha_ttl = 0xFF;
2354 2109          ipha->ipha_protocol = IPPROTO_UDP;
2355 2110          ipha->ipha_hdr_checksum = 0;
2356 2111          ipha->ipha_src = assoc->ipsa_srcaddr[0];
2357 2112          ipha->ipha_dst = assoc->ipsa_dstaddr[0];
2358 2113          udpha = (udpha_t *)(ipha + 1);
2359 2114          udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
2360 2115              assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
2361 2116          udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
2362 2117              assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
2363 2118          udpha->uha_length = htons(sizeof (udpha_t) + 1);
2364 2119          udpha->uha_checksum = 0;
2365 2120          mp->b_wptr = (uint8_t *)(udpha + 1);
2366 2121          *(mp->b_wptr++) = 0xFF;
2367 2122  
2368 2123          esp_prepare_udp(ns, mp, ipha);
2369 2124  
2370 2125          /*
2371 2126           * We're holding an isaf_t bucket lock, so pawn off the actual
2372 2127           * packet transmission to another thread.  Just in case syncq
2373 2128           * processing causes a same-bucket packet to be processed.
2374 2129           */
2375 2130          mp->b_prev = (mblk_t *)(uintptr_t)ns->netstack_stackid;
2376 2131  
2377 2132          if (taskq_dispatch(esp_taskq, actually_send_keepalive, mp,
2378 2133              TQ_NOSLEEP) == 0) {
2379 2134                  /* Assume no memory if taskq_dispatch() fails. */
2380 2135                  mp->b_prev = NULL;
2381 2136                  ip_drop_packet(mp, B_FALSE, NULL,
2382 2137                      DROPPER(ns->netstack_ipsec, ipds_esp_nomem),
2383 2138                      &ns->netstack_ipsecesp->esp_dropper);
2384 2139          }
2385 2140  }
2386 2141  
2387 2142  /*
2388 2143   * Returns mp if successfully completed the request. Returns
2389 2144   * NULL if it failed (and increments InDiscards) or if it is pending.
2390 2145   */
2391 2146  static mblk_t *
2392 2147  esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc,
2393 2148      uchar_t *icv_buf, uint_t payload_len)
2394 2149  {
2395 2150          uint_t auth_len;
2396 2151          crypto_call_req_t call_req, *callrp;
2397 2152          mblk_t *esp_mp;
2398 2153          esph_t *esph_ptr;
2399 2154          mblk_t *mp;
2400 2155          int kef_rc = CRYPTO_FAILED;
2401 2156          uint_t icv_len = assoc->ipsa_mac_len;
2402 2157          crypto_ctx_template_t auth_ctx_tmpl;
2403 2158          boolean_t do_auth, do_encr, force;
2404 2159          uint_t iv_len = assoc->ipsa_iv_len;
2405 2160          crypto_ctx_template_t encr_ctx_tmpl;
2406 2161          boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
2407 2162          size_t esph_offset = (is_natt ? UDPH_SIZE : 0);
2408 2163          netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
2409 2164          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2410 2165          ipsec_crypto_t  *ic, icstack;
2411 2166          uchar_t         *iv_ptr;
2412 2167          crypto_data_t   *cd_ptr = NULL;
2413 2168          ill_t           *ill = ixa->ixa_nce->nce_ill;
2414 2169          ipsec_stack_t   *ipss = ns->netstack_ipsec;
2415 2170  
2416 2171          esp3dbg(espstack, ("esp_submit_req_outbound:%s",
2417 2172              is_natt ? "natt" : "not natt"));
2418 2173  
2419 2174          do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
2420 2175          do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
2421 2176          force = (assoc->ipsa_flags & IPSA_F_ASYNC);
2422 2177  
2423 2178  #ifdef IPSEC_LATENCY_TEST
2424 2179          kef_rc = CRYPTO_SUCCESS;
2425 2180  #else
2426 2181          kef_rc = CRYPTO_FAILED;
2427 2182  #endif
2428 2183  
2429 2184          /*
2430 2185           * Outbound IPsec packets are of the form:
2431 2186           * [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV]
2432 2187           * unless it's NATT, then it's
2433 2188           * [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV]
2434 2189           * Get a pointer to the mblk containing the ESP header.
2435 2190           */
2436 2191          ASSERT(data_mp->b_cont != NULL);
2437 2192          esp_mp = data_mp->b_cont;
2438 2193          esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
2439 2194          iv_ptr = (uchar_t *)(esph_ptr + 1);
2440 2195  
2441 2196          /*
2442 2197           * Combined mode algs need a nonce. This is setup in sadb_common_add().
2443 2198           * If for some reason we are using a SA which does not have a nonce
2444 2199           * then we must fail here.
2445 2200           */
2446 2201          if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
2447 2202              (assoc->ipsa_nonce == NULL)) {
2448 2203                  ip_drop_packet(data_mp, B_FALSE, NULL,
2449 2204                      DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
2450 2205                  return (NULL);
2451 2206          }
2452 2207  
2453 2208          if (force) {
2454 2209                  /* We are doing asynch; allocate mblks to hold state */
2455 2210                  if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL ||
2456 2211                      (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
2457 2212                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2458 2213                          ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
2459 2214                          freemsg(data_mp);
2460 2215                          return (NULL);
2461 2216                  }
2462 2217  
2463 2218                  linkb(mp, data_mp);
2464 2219                  callrp = &call_req;
2465 2220                  ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_outbound);
2466 2221          } else {
2467 2222                  /*
2468 2223                   * If we know we are going to do sync then ipsec_crypto_t
2469 2224                   * should be on the stack.
2470 2225                   */
2471 2226                  ic = &icstack;
2472 2227                  bzero(ic, sizeof (*ic));
2473 2228                  callrp = NULL;
2474 2229          }
2475 2230  
2476 2231  
2477 2232          if (do_auth) {
2478 2233                  /* authentication context template */
2479 2234                  IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
2480 2235                      auth_ctx_tmpl);
2481 2236  
2482 2237                  /* where to store the computed mac */
2483 2238                  ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
2484 2239                      icv_len, icv_buf);
2485 2240  
2486 2241                  /* authentication starts at the ESP header */
2487 2242                  auth_len = payload_len + iv_len + sizeof (esph_t);
2488 2243                  if (!do_encr) {
2489 2244                          /* authentication only */
2490 2245                          /* initialize input data argument */
2491 2246                          ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
2492 2247                              esp_mp, esph_offset, auth_len);
2493 2248  
2494 2249                          /* call the crypto framework */
2495 2250                          kef_rc = crypto_mac(&assoc->ipsa_amech,
2496 2251                              &ic->ic_crypto_data,
2497 2252                              &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
2498 2253                              &ic->ic_crypto_mac, callrp);
2499 2254                  }
2500 2255          }
2501 2256  
2502 2257          if (do_encr) {
2503 2258                  /* encryption context template */
2504 2259                  IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
2505 2260                      encr_ctx_tmpl);
2506 2261                  /* Call the nonce update function. */
2507 2262                  (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len,
2508 2263                      iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
2509 2264  
2510 2265                  if (!do_auth) {
2511 2266                          /* encryption only, skip mblk that contains ESP hdr */
2512 2267                          /* initialize input data argument */
2513 2268                          ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
2514 2269                              esp_mp->b_cont, 0, payload_len);
2515 2270  
2516 2271                          /*
2517 2272                           * For combined mode ciphers, the ciphertext is the same
2518 2273                           * size as the clear text, the ICV should follow the
2519 2274                           * ciphertext. To convince the kcf to allow in-line
2520 2275                           * encryption, with an ICV, use ipsec_out_crypto_mac
2521 2276                           * to point to the same buffer as the data. The calling
2522 2277                           * function need to ensure the buffer is large enough to
2523 2278                           * include the ICV.
2524 2279                           *
2525 2280                           * The IV is already written to the packet buffer, the
2526 2281                           * nonce setup function copied it to the params struct
2527 2282                           * for the cipher to use.
2528 2283                           */
2529 2284                          if (assoc->ipsa_flags & IPSA_F_COMBINED) {
2530 2285                                  bcopy(&ic->ic_crypto_data,
2531 2286                                      &ic->ic_crypto_mac,
2532 2287                                      sizeof (crypto_data_t));
2533 2288                                  ic->ic_crypto_mac.cd_length =
2534 2289                                      payload_len + icv_len;
2535 2290                                  cd_ptr = &ic->ic_crypto_mac;
2536 2291                          }
2537 2292  
2538 2293                          /* call the crypto framework */
2539 2294                          kef_rc = crypto_encrypt((crypto_mechanism_t *)
2540 2295                              &ic->ic_cmm, &ic->ic_crypto_data,
2541 2296                              &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
2542 2297                              cd_ptr, callrp);
2543 2298  
2544 2299                  }
2545 2300          }
2546 2301  
2547 2302          if (do_auth && do_encr) {
2548 2303                  /*
2549 2304                   * Encryption and authentication:
2550 2305                   * Pass the pointer to the mblk chain starting at the ESP
2551 2306                   * header to the framework. Skip the ESP header mblk
2552 2307                   * for encryption, which is reflected by an encryption
2553 2308                   * offset equal to the length of that mblk. Start
2554 2309                   * the authentication at the ESP header, i.e. use an
2555 2310                   * authentication offset of zero.
2556 2311                   */
2557 2312                  ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
2558 2313                      esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len);
2559 2314  
2560 2315                  /* specify IV */
2561 2316                  ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
2562 2317  
2563 2318                  /* call the framework */
2564 2319                  kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech,
2565 2320                      &assoc->ipsa_amech, NULL,
2566 2321                      &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey,
2567 2322                      encr_ctx_tmpl, auth_ctx_tmpl,
2568 2323                      &ic->ic_crypto_dual_data,
2569 2324                      &ic->ic_crypto_mac, callrp);
2570 2325          }
2571 2326  
2572 2327          switch (kef_rc) {
2573 2328          case CRYPTO_SUCCESS:
2574 2329                  ESP_BUMP_STAT(espstack, crypto_sync);
2575 2330                  esp_set_usetime(assoc, B_FALSE);
2576 2331                  if (force) {
2577 2332                          mp = ipsec_free_crypto_data(mp);
2578 2333                          data_mp = ip_xmit_attr_free_mblk(mp);
2579 2334                  }
2580 2335                  if (is_natt)
2581 2336                          esp_prepare_udp(ns, data_mp, (ipha_t *)data_mp->b_rptr);
2582 2337                  return (data_mp);
2583 2338          case CRYPTO_QUEUED:
2584 2339                  /* esp_kcf_callback_outbound() will be invoked on completion */
2585 2340                  ESP_BUMP_STAT(espstack, crypto_async);
2586 2341                  return (NULL);
2587 2342          }
2588 2343  
2589 2344          if (force) {
2590 2345                  mp = ipsec_free_crypto_data(mp);
2591 2346                  data_mp = ip_xmit_attr_free_mblk(mp);
2592 2347          }
2593 2348          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2594 2349          esp_crypto_failed(data_mp, B_FALSE, kef_rc, NULL, espstack);
2595 2350          /* data_mp was passed to ip_drop_packet */
2596 2351          return (NULL);
2597 2352  }
2598 2353  
2599 2354  /*
2600 2355   * Handle outbound IPsec processing for IPv4 and IPv6
2601 2356   *
2602 2357   * Returns data_mp if successfully completed the request. Returns
2603 2358   * NULL if it failed (and increments InDiscards) or if it is pending.
2604 2359   */
2605 2360  static mblk_t *
2606 2361  esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa)
2607 2362  {
2608 2363          mblk_t *espmp, *tailmp;
2609 2364          ipha_t *ipha;
2610 2365          ip6_t *ip6h;
2611 2366          esph_t *esph_ptr, *iv_ptr;
2612 2367          uint_t af;
2613 2368          uint8_t *nhp;
2614 2369          uintptr_t divpoint, datalen, adj, padlen, i, alloclen;
2615 2370          uintptr_t esplen = sizeof (esph_t);
2616 2371          uint8_t protocol;
2617 2372          ipsa_t *assoc;
2618 2373          uint_t iv_len, block_size, mac_len = 0;
2619 2374          uchar_t *icv_buf;
2620 2375          udpha_t *udpha;
2621 2376          boolean_t is_natt = B_FALSE;
2622 2377          netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
2623 2378          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2624 2379          ipsec_stack_t   *ipss = ns->netstack_ipsec;
2625 2380          ill_t           *ill = ixa->ixa_nce->nce_ill;
2626 2381          boolean_t       need_refrele = B_FALSE;
2627 2382  
2628 2383          ESP_BUMP_STAT(espstack, out_requests);
2629 2384  
2630 2385          /*
2631 2386           * <sigh> We have to copy the message here, because TCP (for example)
2632 2387           * keeps a dupb() of the message lying around for retransmission.
2633 2388           * Since ESP changes the whole of the datagram, we have to create our
2634 2389           * own copy lest we clobber TCP's data.  Since we have to copy anyway,
2635 2390           * we might as well make use of msgpullup() and get the mblk into one
2636 2391           * contiguous piece!
2637 2392           */
2638 2393          tailmp = msgpullup(data_mp, -1);
2639 2394          if (tailmp == NULL) {
2640 2395                  esp0dbg(("esp_outbound: msgpullup() failed, "
2641 2396                      "dropping packet.\n"));
2642 2397                  ip_drop_packet(data_mp, B_FALSE, ill,
2643 2398                      DROPPER(ipss, ipds_esp_nomem),
2644 2399                      &espstack->esp_dropper);
2645 2400                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2646 2401                  return (NULL);
2647 2402          }
2648 2403          freemsg(data_mp);
2649 2404          data_mp = tailmp;
2650 2405  
2651 2406          assoc = ixa->ixa_ipsec_esp_sa;
2652 2407          ASSERT(assoc != NULL);
2653 2408  
2654 2409          /*
2655 2410           * Get the outer IP header in shape to escape this system..
2656 2411           */
2657 2412          if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) {
2658 2413                  /*
2659 2414                   * Need to update packet with any CIPSO option and update
2660 2415                   * ixa_tsl to capture the new label.
2661 2416                   * We allocate a separate ixa for that purpose.
2662 2417                   */
2663 2418                  ixa = ip_xmit_attr_duplicate(ixa);
2664 2419                  if (ixa == NULL) {
2665 2420                          ip_drop_packet(data_mp, B_FALSE, ill,
2666 2421                              DROPPER(ipss, ipds_esp_nomem),
2667 2422                              &espstack->esp_dropper);
2668 2423                          return (NULL);
2669 2424                  }
2670 2425                  need_refrele = B_TRUE;
2671 2426  
2672 2427                  label_hold(assoc->ipsa_otsl);
2673 2428                  ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl);
2674 2429  
2675 2430                  data_mp = sadb_whack_label(data_mp, assoc, ixa,
2676 2431                      DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
2677 2432                  if (data_mp == NULL) {
2678 2433                          /* Packet dropped by sadb_whack_label */
2679 2434                          ixa_refrele(ixa);
2680 2435                          return (NULL);
2681 2436                  }
2682 2437          }
2683 2438  
2684 2439          /*
2685 2440           * Reality check....
2686 2441           */
2687 2442          ipha = (ipha_t *)data_mp->b_rptr;  /* So we can call esp_acquire(). */
2688 2443  
2689 2444          if (ixa->ixa_flags & IXAF_IS_IPV4) {
2690 2445                  ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
2691 2446  
2692 2447                  af = AF_INET;
2693 2448                  divpoint = IPH_HDR_LENGTH(ipha);
2694 2449                  datalen = ntohs(ipha->ipha_length) - divpoint;
2695 2450                  nhp = (uint8_t *)&ipha->ipha_protocol;
2696 2451          } else {
2697 2452                  ip_pkt_t ipp;
2698 2453  
2699 2454                  ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
2700 2455  
2701 2456                  af = AF_INET6;
2702 2457                  ip6h = (ip6_t *)ipha;
2703 2458                  bzero(&ipp, sizeof (ipp));
2704 2459                  divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL);
2705 2460                  if (ipp.ipp_dstopts != NULL &&
2706 2461                      ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) {
2707 2462                          /*
2708 2463                           * Destination options are tricky.  If we get in here,
2709 2464                           * then we have a terminal header following the
2710 2465                           * destination options.  We need to adjust backwards
2711 2466                           * so we insert ESP BEFORE the destination options
2712 2467                           * bag.  (So that the dstopts get encrypted!)
2713 2468                           *
2714 2469                           * Since this is for outbound packets only, we know
2715 2470                           * that non-terminal destination options only precede
2716 2471                           * routing headers.
2717 2472                           */
2718 2473                          divpoint -= ipp.ipp_dstoptslen;
2719 2474                  }
2720 2475                  datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint;
2721 2476  
2722 2477                  if (ipp.ipp_rthdr != NULL) {
2723 2478                          nhp = &ipp.ipp_rthdr->ip6r_nxt;
2724 2479                  } else if (ipp.ipp_hopopts != NULL) {
2725 2480                          nhp = &ipp.ipp_hopopts->ip6h_nxt;
2726 2481                  } else {
2727 2482                          ASSERT(divpoint == sizeof (ip6_t));
2728 2483                          /* It's probably IP + ESP. */
2729 2484                          nhp = &ip6h->ip6_nxt;
2730 2485                  }
2731 2486          }
2732 2487  
2733 2488          mac_len = assoc->ipsa_mac_len;
2734 2489  
2735 2490          if (assoc->ipsa_flags & IPSA_F_NATT) {
2736 2491                  /* wedge in UDP header */
2737 2492                  is_natt = B_TRUE;
2738 2493                  esplen += UDPH_SIZE;
2739 2494          }
2740 2495  
2741 2496          /*
2742 2497           * Set up ESP header and encryption padding for ENCR PI request.
2743 2498           */
2744 2499  
2745 2500          /* Determine the padding length.  Pad to 4-bytes for no-encryption. */
2746 2501          if (assoc->ipsa_encr_alg != SADB_EALG_NULL) {
2747 2502                  iv_len = assoc->ipsa_iv_len;
2748 2503                  block_size = assoc->ipsa_datalen;
2749 2504  
2750 2505                  /*
2751 2506                   * Pad the data to the length of the cipher block size.
2752 2507                   * Include the two additional bytes (hence the - 2) for the
2753 2508                   * padding length and the next header.  Take this into account
2754 2509                   * when calculating the actual length of the padding.
2755 2510                   */
2756 2511                  ASSERT(ISP2(iv_len));
2757 2512                  padlen = ((unsigned)(block_size - datalen - 2)) &
2758 2513                      (block_size - 1);
2759 2514          } else {
2760 2515                  iv_len = 0;
2761 2516                  padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) &
2762 2517                      (sizeof (uint32_t) - 1);
2763 2518          }
2764 2519  
2765 2520          /* Allocate ESP header and IV. */
2766 2521          esplen += iv_len;
2767 2522  
2768 2523          /*
2769 2524           * Update association byte-count lifetimes.  Don't forget to take
2770 2525           * into account the padding length and next-header (hence the + 2).
2771 2526           *
2772 2527           * Use the amount of data fed into the "encryption algorithm".  This
2773 2528           * is the IV, the data length, the padding length, and the final two
2774 2529           * bytes (padlen, and next-header).
2775 2530           *
2776 2531           */
2777 2532  
2778 2533          if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) {
2779 2534                  ip_drop_packet(data_mp, B_FALSE, ill,
2780 2535                      DROPPER(ipss, ipds_esp_bytes_expire),
2781 2536                      &espstack->esp_dropper);
2782 2537                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2783 2538                  if (need_refrele)
2784 2539                          ixa_refrele(ixa);
2785 2540                  return (NULL);
2786 2541          }
2787 2542  
2788 2543          espmp = allocb(esplen, BPRI_HI);
2789 2544          if (espmp == NULL) {
2790 2545                  ESP_BUMP_STAT(espstack, out_discards);
2791 2546                  esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n"));
2792 2547                  ip_drop_packet(data_mp, B_FALSE, ill,
2793 2548                      DROPPER(ipss, ipds_esp_nomem),
2794 2549                      &espstack->esp_dropper);
2795 2550                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2796 2551                  if (need_refrele)
2797 2552                          ixa_refrele(ixa);
2798 2553                  return (NULL);
2799 2554          }
2800 2555          espmp->b_wptr += esplen;
2801 2556          esph_ptr = (esph_t *)espmp->b_rptr;
2802 2557  
2803 2558          if (is_natt) {
2804 2559                  esp3dbg(espstack, ("esp_outbound: NATT"));
2805 2560  
2806 2561                  udpha = (udpha_t *)espmp->b_rptr;
2807 2562                  udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
2808 2563                      assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
2809 2564                  udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
2810 2565                      assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
2811 2566                  /*
2812 2567                   * Set the checksum to 0, so that the esp_prepare_udp() call
2813 2568                   * can do the right thing.
2814 2569                   */
2815 2570                  udpha->uha_checksum = 0;
2816 2571                  esph_ptr = (esph_t *)(udpha + 1);
2817 2572          }
2818 2573  
2819 2574          esph_ptr->esph_spi = assoc->ipsa_spi;
2820 2575  
2821 2576          esph_ptr->esph_replay = htonl(atomic_inc_32_nv(&assoc->ipsa_replay));
2822 2577          if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) {
2823 2578                  /*
2824 2579                   * XXX We have replay counter wrapping.
2825 2580                   * We probably want to nuke this SA (and its peer).
2826 2581                   */
2827 2582                  ipsec_assocfailure(info.mi_idnum, 0, 0,
2828 2583                      SL_ERROR | SL_CONSOLE | SL_WARN,
2829 2584                      "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n",
2830 2585                      esph_ptr->esph_spi, assoc->ipsa_dstaddr, af,
2831 2586                      espstack->ipsecesp_netstack);
2832 2587  
2833 2588                  ESP_BUMP_STAT(espstack, out_discards);
2834 2589                  sadb_replay_delete(assoc);
2835 2590                  ip_drop_packet(data_mp, B_FALSE, ill,
2836 2591                      DROPPER(ipss, ipds_esp_replay),
2837 2592                      &espstack->esp_dropper);
2838 2593                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2839 2594                  if (need_refrele)
2840 2595                          ixa_refrele(ixa);
2841 2596                  return (NULL);
2842 2597          }
2843 2598  
2844 2599          iv_ptr = (esph_ptr + 1);
2845 2600          /*
2846 2601           * iv_ptr points to the mblk which will contain the IV once we have
2847 2602           * written it there. This mblk will be part of a mblk chain that
2848 2603           * will make up the packet.
2849 2604           *
2850 2605           * For counter mode algorithms, the IV is a 64 bit quantity, it
2851 2606           * must NEVER repeat in the lifetime of the SA, otherwise an
2852 2607           * attacker who had recorded enough packets might be able to
2853 2608           * determine some clear text.
2854 2609           *
2855 2610           * To ensure this does not happen, the IV is stored in the SA and
2856 2611           * incremented for each packet, the IV is then copied into the
2857 2612           * "packet" for transmission to the receiving system. The IV will
2858 2613           * also be copied into the nonce, when the packet is encrypted.
2859 2614           *
2860 2615           * CBC mode algorithms use a random IV for each packet. We do not
2861 2616           * require the highest quality random bits, but for best security
2862 2617           * with CBC mode ciphers, the value must be unlikely to repeat and
2863 2618           * must not be known in advance to an adversary capable of influencing
2864 2619           * the clear text.
2865 2620           */
2866 2621          if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc,
2867 2622              espstack)) {
2868 2623                  ip_drop_packet(data_mp, B_FALSE, ill,
2869 2624                      DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper);
2870 2625                  if (need_refrele)
2871 2626                          ixa_refrele(ixa);
2872 2627                  return (NULL);
2873 2628          }
2874 2629  
2875 2630          /* Fix the IP header. */
2876 2631          alloclen = padlen + 2 + mac_len;
2877 2632          adj = alloclen + (espmp->b_wptr - espmp->b_rptr);
2878 2633  
2879 2634          protocol = *nhp;
2880 2635  
2881 2636          if (ixa->ixa_flags & IXAF_IS_IPV4) {
2882 2637                  ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj);
2883 2638                  if (is_natt) {
2884 2639                          *nhp = IPPROTO_UDP;
2885 2640                          udpha->uha_length = htons(ntohs(ipha->ipha_length) -
2886 2641                              IPH_HDR_LENGTH(ipha));
2887 2642                  } else {
2888 2643                          *nhp = IPPROTO_ESP;
2889 2644                  }
2890 2645                  ipha->ipha_hdr_checksum = 0;
2891 2646                  ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
2892 2647          } else {
2893 2648                  ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj);
2894 2649                  *nhp = IPPROTO_ESP;
2895 2650          }
2896 2651  
2897 2652          /* I've got the two ESP mblks, now insert them. */
2898 2653  
2899 2654          esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n"));
2900 2655          esp2dbg(espstack, (dump_msg(data_mp)));
2901 2656  
2902 2657          if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) {
2903 2658                  ESP_BUMP_STAT(espstack, out_discards);
2904 2659                  /* NOTE:  esp_insert_esp() only fails if there's no memory. */
2905 2660                  ip_drop_packet(data_mp, B_FALSE, ill,
2906 2661                      DROPPER(ipss, ipds_esp_nomem),
2907 2662                      &espstack->esp_dropper);
2908 2663                  freeb(espmp);
2909 2664                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2910 2665                  if (need_refrele)
2911 2666                          ixa_refrele(ixa);
2912 2667                  return (NULL);
2913 2668          }
2914 2669  
2915 2670          /* Append padding (and leave room for ICV). */
2916 2671          for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont)
2917 2672                  ;
2918 2673          if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) {
2919 2674                  tailmp->b_cont = allocb(alloclen, BPRI_HI);
2920 2675                  if (tailmp->b_cont == NULL) {
2921 2676                          ESP_BUMP_STAT(espstack, out_discards);
2922 2677                          esp0dbg(("esp_outbound:  Can't allocate tailmp.\n"));
2923 2678                          ip_drop_packet(data_mp, B_FALSE, ill,
2924 2679                              DROPPER(ipss, ipds_esp_nomem),
2925 2680                              &espstack->esp_dropper);
2926 2681                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2927 2682                          if (need_refrele)
2928 2683                                  ixa_refrele(ixa);
2929 2684                          return (NULL);
2930 2685                  }
2931 2686                  tailmp = tailmp->b_cont;
2932 2687          }
2933 2688  
2934 2689          /*
2935 2690           * If there's padding, N bytes of padding must be of the form 0x1,
2936 2691           * 0x2, 0x3... 0xN.
2937 2692           */
2938 2693          for (i = 0; i < padlen; ) {
2939 2694                  i++;
2940 2695                  *tailmp->b_wptr++ = i;
2941 2696          }
2942 2697          *tailmp->b_wptr++ = i;
2943 2698          *tailmp->b_wptr++ = protocol;
2944 2699  
2945 2700          esp2dbg(espstack, ("data_Mp before encryption:\n"));
2946 2701          esp2dbg(espstack, (dump_msg(data_mp)));
2947 2702  
2948 2703          /*
2949 2704           * Okay.  I've set up the pre-encryption ESP.  Let's do it!
2950 2705           */
2951 2706  
2952 2707          if (mac_len > 0) {
2953 2708                  ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim);
2954 2709                  icv_buf = tailmp->b_wptr;
2955 2710                  tailmp->b_wptr += mac_len;
2956 2711          } else {
2957 2712                  icv_buf = NULL;
2958 2713          }
2959 2714  
2960 2715          data_mp = esp_submit_req_outbound(data_mp, ixa, assoc, icv_buf,
2961 2716              datalen + padlen + 2);
2962 2717          if (need_refrele)
2963 2718                  ixa_refrele(ixa);
2964 2719          return (data_mp);
2965 2720  }
2966 2721  
2967 2722  /*
2968 2723   * IP calls this to validate the ICMP errors that
2969 2724   * we got from the network.
2970 2725   */
2971 2726  mblk_t *
2972 2727  ipsecesp_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira)
2973 2728  {
2974 2729          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
2975 2730          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2976 2731          ipsec_stack_t   *ipss = ns->netstack_ipsec;
2977 2732  
2978 2733          /*
2979 2734           * Unless we get an entire packet back, this function is useless.
2980 2735           * Why?
2981 2736           *
2982 2737           * 1.)  Partial packets are useless, because the "next header"
2983 2738           *      is at the end of the decrypted ESP packet.  Without the
2984 2739           *      whole packet, this is useless.
2985 2740           *
2986 2741           * 2.)  If we every use a stateful cipher, such as a stream or a
2987 2742           *      one-time pad, we can't do anything.
2988 2743           *
2989 2744           * Since the chances of us getting an entire packet back are very
2990 2745           * very small, we discard here.
2991 2746           */
2992 2747          IP_ESP_BUMP_STAT(ipss, in_discards);
2993 2748          ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
2994 2749              DROPPER(ipss, ipds_esp_icmp),
2995 2750              &espstack->esp_dropper);
2996 2751          return (NULL);
2997 2752  }
2998 2753  
2999 2754  /*
3000 2755   * Construct an SADB_REGISTER message with the current algorithms.
3001 2756   * This function gets called when 'ipsecalgs -s' is run or when
3002 2757   * in.iked (or other KMD) starts.
3003 2758   */
3004 2759  static boolean_t
3005 2760  esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
3006 2761      ipsecesp_stack_t *espstack, cred_t *cr)
3007 2762  {
3008 2763          mblk_t *pfkey_msg_mp, *keysock_out_mp;
3009 2764          sadb_msg_t *samsg;
3010 2765          sadb_supported_t *sasupp_auth = NULL;
3011 2766          sadb_supported_t *sasupp_encr = NULL;
3012 2767          sadb_alg_t *saalg;
3013 2768          uint_t allocsize = sizeof (*samsg);
3014 2769          uint_t i, numalgs_snap;
3015 2770          int current_aalgs;
3016 2771          ipsec_alginfo_t **authalgs;
3017 2772          uint_t num_aalgs;
3018 2773          int current_ealgs;
3019 2774          ipsec_alginfo_t **encralgs;
3020 2775          uint_t num_ealgs;
3021 2776          ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
3022 2777          sadb_sens_t *sens;
3023 2778          size_t sens_len = 0;
3024 2779          sadb_ext_t *nextext;
3025 2780          ts_label_t *sens_tsl = NULL;
3026 2781  
3027 2782          /* Allocate the KEYSOCK_OUT. */
3028 2783          keysock_out_mp = sadb_keysock_out(serial);
3029 2784          if (keysock_out_mp == NULL) {
3030 2785                  esp0dbg(("esp_register_out: couldn't allocate mblk.\n"));
3031 2786                  return (B_FALSE);
3032 2787          }
3033 2788  
3034 2789          if (is_system_labeled() && (cr != NULL)) {
3035 2790                  sens_tsl = crgetlabel(cr);
  
    | 
      ↓ open down ↓ | 
    1669 lines elided | 
    
      ↑ open up ↑ | 
  
3036 2791                  if (sens_tsl != NULL) {
3037 2792                          sens_len = sadb_sens_len_from_label(sens_tsl);
3038 2793                          allocsize += sens_len;
3039 2794                  }
3040 2795          }
3041 2796  
3042 2797          /*
3043 2798           * Allocate the PF_KEY message that follows KEYSOCK_OUT.
3044 2799           */
3045 2800  
3046      -        mutex_enter(&ipss->ipsec_alg_lock);
     2801 +        rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3047 2802          /*
3048 2803           * Fill SADB_REGISTER message's algorithm descriptors.  Hold
3049 2804           * down the lock while filling it.
3050 2805           *
3051 2806           * Return only valid algorithms, so the number of algorithms
3052 2807           * to send up may be less than the number of algorithm entries
3053 2808           * in the table.
3054 2809           */
3055 2810          authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
3056 2811          for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
3057 2812                  if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
3058 2813                          num_aalgs++;
3059 2814  
3060 2815          if (num_aalgs != 0) {
3061 2816                  allocsize += (num_aalgs * sizeof (*saalg));
3062 2817                  allocsize += sizeof (*sasupp_auth);
3063 2818          }
3064 2819          encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR];
  
    | 
      ↓ open down ↓ | 
    8 lines elided | 
    
      ↑ open up ↑ | 
  
3065 2820          for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
3066 2821                  if (encralgs[i] != NULL && ALG_VALID(encralgs[i]))
3067 2822                          num_ealgs++;
3068 2823  
3069 2824          if (num_ealgs != 0) {
3070 2825                  allocsize += (num_ealgs * sizeof (*saalg));
3071 2826                  allocsize += sizeof (*sasupp_encr);
3072 2827          }
3073 2828          keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI);
3074 2829          if (keysock_out_mp->b_cont == NULL) {
3075      -                mutex_exit(&ipss->ipsec_alg_lock);
     2830 +                rw_exit(&ipss->ipsec_alg_lock);
3076 2831                  freemsg(keysock_out_mp);
3077 2832                  return (B_FALSE);
3078 2833          }
3079 2834          pfkey_msg_mp = keysock_out_mp->b_cont;
3080 2835          pfkey_msg_mp->b_wptr += allocsize;
3081 2836  
3082 2837          nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg));
3083 2838  
3084 2839          if (num_aalgs != 0) {
3085 2840                  sasupp_auth = (sadb_supported_t *)nextext;
3086 2841                  saalg = (sadb_alg_t *)(sasupp_auth + 1);
3087 2842  
3088 2843                  ASSERT(((ulong_t)saalg & 0x7) == 0);
3089 2844  
3090 2845                  numalgs_snap = 0;
3091 2846                  for (i = 0;
3092 2847                      ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
3093 2848                      i++) {
3094 2849                          if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
3095 2850                                  continue;
3096 2851  
3097 2852                          saalg->sadb_alg_id = authalgs[i]->alg_id;
3098 2853                          saalg->sadb_alg_ivlen = 0;
3099 2854                          saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits;
3100 2855                          saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits;
3101 2856                          saalg->sadb_x_alg_increment =
3102 2857                              authalgs[i]->alg_increment;
3103 2858                          saalg->sadb_x_alg_saltbits = SADB_8TO1(
3104 2859                              authalgs[i]->alg_saltlen);
3105 2860                          numalgs_snap++;
3106 2861                          saalg++;
3107 2862                  }
3108 2863                  ASSERT(numalgs_snap == num_aalgs);
3109 2864  #ifdef DEBUG
3110 2865                  /*
3111 2866                   * Reality check to make sure I snagged all of the
3112 2867                   * algorithms.
3113 2868                   */
3114 2869                  for (; i < IPSEC_MAX_ALGS; i++) {
3115 2870                          if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) {
3116 2871                                  cmn_err(CE_PANIC, "esp_register_out()! "
3117 2872                                      "Missed aalg #%d.\n", i);
3118 2873                          }
3119 2874                  }
3120 2875  #endif /* DEBUG */
3121 2876                  nextext = (sadb_ext_t *)saalg;
3122 2877          }
3123 2878  
3124 2879          if (num_ealgs != 0) {
3125 2880                  sasupp_encr = (sadb_supported_t *)nextext;
3126 2881                  saalg = (sadb_alg_t *)(sasupp_encr + 1);
3127 2882  
3128 2883                  numalgs_snap = 0;
3129 2884                  for (i = 0;
3130 2885                      ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) {
3131 2886                          if (encralgs[i] == NULL || !ALG_VALID(encralgs[i]))
3132 2887                                  continue;
3133 2888                          saalg->sadb_alg_id = encralgs[i]->alg_id;
3134 2889                          saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen;
3135 2890                          saalg->sadb_alg_minbits = encralgs[i]->alg_ef_minbits;
3136 2891                          saalg->sadb_alg_maxbits = encralgs[i]->alg_ef_maxbits;
3137 2892                          /*
3138 2893                           * We could advertise the ICV length, except there
3139 2894                           * is not a value in sadb_x_algb to do this.
3140 2895                           * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen;
3141 2896                           */
3142 2897                          saalg->sadb_x_alg_increment =
3143 2898                              encralgs[i]->alg_increment;
3144 2899                          saalg->sadb_x_alg_saltbits =
3145 2900                              SADB_8TO1(encralgs[i]->alg_saltlen);
3146 2901  
3147 2902                          numalgs_snap++;
3148 2903                          saalg++;
3149 2904                  }
3150 2905                  ASSERT(numalgs_snap == num_ealgs);
3151 2906  #ifdef DEBUG
3152 2907                  /*
3153 2908                   * Reality check to make sure I snagged all of the
3154 2909                   * algorithms.
3155 2910                   */
3156 2911                  for (; i < IPSEC_MAX_ALGS; i++) {
3157 2912                          if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) {
3158 2913                                  cmn_err(CE_PANIC, "esp_register_out()! "
  
    | 
      ↓ open down ↓ | 
    73 lines elided | 
    
      ↑ open up ↑ | 
  
3159 2914                                      "Missed ealg #%d.\n", i);
3160 2915                          }
3161 2916                  }
3162 2917  #endif /* DEBUG */
3163 2918                  nextext = (sadb_ext_t *)saalg;
3164 2919          }
3165 2920  
3166 2921          current_aalgs = num_aalgs;
3167 2922          current_ealgs = num_ealgs;
3168 2923  
3169      -        mutex_exit(&ipss->ipsec_alg_lock);
     2924 +        rw_exit(&ipss->ipsec_alg_lock);
3170 2925  
3171 2926          if (sens_tsl != NULL) {
3172 2927                  sens = (sadb_sens_t *)nextext;
3173 2928                  sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
3174 2929                      sens_tsl, sens_len);
3175 2930  
3176 2931                  nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len);
3177 2932          }
3178 2933  
3179 2934          /* Now fill the rest of the SADB_REGISTER message. */
3180 2935  
3181 2936          samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr;
3182 2937          samsg->sadb_msg_version = PF_KEY_V2;
3183 2938          samsg->sadb_msg_type = SADB_REGISTER;
3184 2939          samsg->sadb_msg_errno = 0;
3185 2940          samsg->sadb_msg_satype = SADB_SATYPE_ESP;
3186 2941          samsg->sadb_msg_len = SADB_8TO64(allocsize);
3187 2942          samsg->sadb_msg_reserved = 0;
3188 2943          /*
3189 2944           * Assume caller has sufficient sequence/pid number info.  If it's one
3190 2945           * from me over a new alg., I could give two hoots about sequence.
3191 2946           */
3192 2947          samsg->sadb_msg_seq = sequence;
3193 2948          samsg->sadb_msg_pid = pid;
3194 2949  
3195 2950          if (sasupp_auth != NULL) {
3196 2951                  sasupp_auth->sadb_supported_len = SADB_8TO64(
3197 2952                      sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs);
3198 2953                  sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
3199 2954                  sasupp_auth->sadb_supported_reserved = 0;
3200 2955          }
3201 2956  
3202 2957          if (sasupp_encr != NULL) {
3203 2958                  sasupp_encr->sadb_supported_len = SADB_8TO64(
3204 2959                      sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs);
3205 2960                  sasupp_encr->sadb_supported_exttype =
3206 2961                      SADB_EXT_SUPPORTED_ENCRYPT;
3207 2962                  sasupp_encr->sadb_supported_reserved = 0;
3208 2963          }
3209 2964  
3210 2965          if (espstack->esp_pfkey_q != NULL)
3211 2966                  putnext(espstack->esp_pfkey_q, keysock_out_mp);
3212 2967          else {
3213 2968                  freemsg(keysock_out_mp);
3214 2969                  return (B_FALSE);
3215 2970          }
3216 2971  
3217 2972          return (B_TRUE);
3218 2973  }
3219 2974  
3220 2975  /*
3221 2976   * Invoked when the algorithm table changes. Causes SADB_REGISTER
3222 2977   * messages continaining the current list of algorithms to be
3223 2978   * sent up to the ESP listeners.
3224 2979   */
3225 2980  void
3226 2981  ipsecesp_algs_changed(netstack_t *ns)
3227 2982  {
3228 2983          ipsecesp_stack_t        *espstack = ns->netstack_ipsecesp;
3229 2984  
3230 2985          /*
3231 2986           * Time to send a PF_KEY SADB_REGISTER message to ESP listeners
3232 2987           * everywhere.  (The function itself checks for NULL esp_pfkey_q.)
3233 2988           */
3234 2989          (void) esp_register_out(0, 0, 0, espstack, NULL);
3235 2990  }
3236 2991  
3237 2992  /*
3238 2993   * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
3239 2994   * and send() it into ESP and IP again.
3240 2995   */
3241 2996  static void
3242 2997  inbound_task(void *arg)
3243 2998  {
3244 2999          mblk_t          *mp = (mblk_t *)arg;
3245 3000          mblk_t          *async_mp;
3246 3001          ip_recv_attr_t  iras;
3247 3002  
3248 3003          async_mp = mp;
3249 3004          mp = async_mp->b_cont;
3250 3005          async_mp->b_cont = NULL;
3251 3006          if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
3252 3007                  /* The ill or ip_stack_t disappeared on us */
3253 3008                  ip_drop_input("ip_recv_attr_from_mblk", mp, NULL);
3254 3009                  freemsg(mp);
3255 3010                  goto done;
3256 3011          }
3257 3012  
3258 3013          esp_inbound_restart(mp, &iras);
3259 3014  done:
3260 3015          ira_cleanup(&iras, B_TRUE);
3261 3016  }
3262 3017  
3263 3018  /*
3264 3019   * Restart ESP after the SA has been added.
3265 3020   */
3266 3021  static void
3267 3022  esp_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira)
3268 3023  {
3269 3024          esph_t          *esph;
3270 3025          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
3271 3026          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3272 3027  
3273 3028          esp2dbg(espstack, ("in ESP inbound_task"));
3274 3029          ASSERT(espstack != NULL);
3275 3030  
3276 3031          mp = ipsec_inbound_esp_sa(mp, ira, &esph);
3277 3032          if (mp == NULL)
3278 3033                  return;
3279 3034  
3280 3035          ASSERT(esph != NULL);
3281 3036          ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3282 3037          ASSERT(ira->ira_ipsec_esp_sa != NULL);
3283 3038  
3284 3039          mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, ira);
3285 3040          if (mp == NULL) {
3286 3041                  /*
3287 3042                   * Either it failed or is pending. In the former case
3288 3043                   * ipIfStatsInDiscards was increased.
3289 3044                   */
3290 3045                  return;
3291 3046          }
3292 3047  
3293 3048          ip_input_post_ipsec(mp, ira);
3294 3049  }
3295 3050  
3296 3051  /*
3297 3052   * Now that weak-key passed, actually ADD the security association, and
3298 3053   * send back a reply ADD message.
3299 3054   */
3300 3055  static int
3301 3056  esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
3302 3057      int *diagnostic, ipsecesp_stack_t *espstack)
3303 3058  {
3304 3059          isaf_t *primary = NULL, *secondary;
3305 3060          boolean_t clone = B_FALSE, is_inbound = B_FALSE;
3306 3061          ipsa_t *larval = NULL;
3307 3062          ipsacq_t *acqrec;
3308 3063          iacqf_t *acq_bucket;
3309 3064          mblk_t *acq_msgs = NULL;
3310 3065          int rc;
3311 3066          mblk_t *lpkt;
3312 3067          int error;
3313 3068          ipsa_query_t sq;
3314 3069          ipsec_stack_t   *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
3315 3070  
3316 3071          /*
3317 3072           * Locate the appropriate table(s).
3318 3073           */
3319 3074          sq.spp = &espstack->esp_sadb;   /* XXX */
3320 3075          error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST,
3321 3076              IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
3322 3077              &sq, diagnostic);
3323 3078          if (error)
3324 3079                  return (error);
3325 3080  
3326 3081          /*
3327 3082           * Use the direction flags provided by the KMD to determine
3328 3083           * if the inbound or outbound table should be the primary
3329 3084           * for this SA. If these flags were absent then make this
3330 3085           * decision based on the addresses.
3331 3086           */
3332 3087          if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) {
3333 3088                  primary = sq.inbound;
3334 3089                  secondary = sq.outbound;
3335 3090                  is_inbound = B_TRUE;
3336 3091                  if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND)
3337 3092                          clone = B_TRUE;
3338 3093          } else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) {
3339 3094                  primary = sq.outbound;
3340 3095                  secondary = sq.inbound;
3341 3096          }
3342 3097  
3343 3098          if (primary == NULL) {
3344 3099                  /*
3345 3100                   * The KMD did not set a direction flag, determine which
3346 3101                   * table to insert the SA into based on addresses.
3347 3102                   */
3348 3103                  switch (ksi->ks_in_dsttype) {
3349 3104                  case KS_IN_ADDR_MBCAST:
3350 3105                          clone = B_TRUE; /* All mcast SAs can be bidirectional */
3351 3106                          sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
3352 3107                          /* FALLTHRU */
3353 3108                  /*
3354 3109                   * If the source address is either one of mine, or unspecified
3355 3110                   * (which is best summed up by saying "not 'not mine'"),
3356 3111                   * then the association is potentially bi-directional,
3357 3112                   * in that it can be used for inbound traffic and outbound
3358 3113                   * traffic.  The best example of such an SA is a multicast
3359 3114                   * SA (which allows me to receive the outbound traffic).
3360 3115                   */
3361 3116                  case KS_IN_ADDR_ME:
3362 3117                          sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
3363 3118                          primary = sq.inbound;
3364 3119                          secondary = sq.outbound;
3365 3120                          if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
3366 3121                                  clone = B_TRUE;
3367 3122                          is_inbound = B_TRUE;
3368 3123                          break;
3369 3124                  /*
3370 3125                   * If the source address literally not mine (either
3371 3126                   * unspecified or not mine), then this SA may have an
3372 3127                   * address that WILL be mine after some configuration.
3373 3128                   * We pay the price for this by making it a bi-directional
3374 3129                   * SA.
3375 3130                   */
3376 3131                  case KS_IN_ADDR_NOTME:
3377 3132                          sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
3378 3133                          primary = sq.outbound;
3379 3134                          secondary = sq.inbound;
3380 3135                          if (ksi->ks_in_srctype != KS_IN_ADDR_ME) {
3381 3136                                  sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
3382 3137                                  clone = B_TRUE;
3383 3138                          }
3384 3139                          break;
3385 3140                  default:
3386 3141                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
3387 3142                          return (EINVAL);
3388 3143                  }
3389 3144          }
3390 3145  
3391 3146          /*
3392 3147           * Find a ACQUIRE list entry if possible.  If we've added an SA that
3393 3148           * suits the needs of an ACQUIRE list entry, we can eliminate the
3394 3149           * ACQUIRE list entry and transmit the enqueued packets.  Use the
3395 3150           * high-bit of the sequence number to queue it.  Key off destination
3396 3151           * addr, and change acqrec's state.
3397 3152           */
3398 3153  
3399 3154          if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
3400 3155                  acq_bucket = &(sq.sp->sdb_acq[sq.outhash]);
3401 3156                  mutex_enter(&acq_bucket->iacqf_lock);
3402 3157                  for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
3403 3158                      acqrec = acqrec->ipsacq_next) {
3404 3159                          mutex_enter(&acqrec->ipsacq_lock);
3405 3160                          /*
3406 3161                           * Q:  I only check sequence.  Should I check dst?
3407 3162                           * A: Yes, check dest because those are the packets
3408 3163                           *    that are queued up.
3409 3164                           */
3410 3165                          if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
3411 3166                              IPSA_ARE_ADDR_EQUAL(sq.dstaddr,
3412 3167                              acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
3413 3168                                  break;
3414 3169                          mutex_exit(&acqrec->ipsacq_lock);
3415 3170                  }
3416 3171                  if (acqrec != NULL) {
3417 3172                          /*
3418 3173                           * AHA!  I found an ACQUIRE record for this SA.
3419 3174                           * Grab the msg list, and free the acquire record.
3420 3175                           * I already am holding the lock for this record,
3421 3176                           * so all I have to do is free it.
3422 3177                           */
3423 3178                          acq_msgs = acqrec->ipsacq_mp;
3424 3179                          acqrec->ipsacq_mp = NULL;
3425 3180                          mutex_exit(&acqrec->ipsacq_lock);
3426 3181                          sadb_destroy_acquire(acqrec,
3427 3182                              espstack->ipsecesp_netstack);
3428 3183                  }
3429 3184                  mutex_exit(&acq_bucket->iacqf_lock);
3430 3185          }
3431 3186  
3432 3187          /*
3433 3188           * Find PF_KEY message, and see if I'm an update.  If so, find entry
3434 3189           * in larval list (if there).
3435 3190           */
3436 3191          if (samsg->sadb_msg_type == SADB_UPDATE) {
3437 3192                  mutex_enter(&sq.inbound->isaf_lock);
3438 3193                  larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi,
3439 3194                      ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family);
3440 3195                  mutex_exit(&sq.inbound->isaf_lock);
3441 3196  
3442 3197                  if ((larval == NULL) ||
3443 3198                      (larval->ipsa_state != IPSA_STATE_LARVAL)) {
3444 3199                          *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
3445 3200                          if (larval != NULL) {
3446 3201                                  IPSA_REFRELE(larval);
3447 3202                          }
3448 3203                          esp0dbg(("Larval update, but larval disappeared.\n"));
3449 3204                          return (ESRCH);
3450 3205                  } /* Else sadb_common_add unlinks it for me! */
3451 3206          }
3452 3207  
3453 3208          if (larval != NULL) {
3454 3209                  /*
3455 3210                   * Hold again, because sadb_common_add() consumes a reference,
3456 3211                   * and we don't want to clear_lpkt() without a reference.
3457 3212                   */
3458 3213                  IPSA_REFHOLD(larval);
3459 3214          }
3460 3215  
3461 3216          rc = sadb_common_add(espstack->esp_pfkey_q,
3462 3217              mp, samsg, ksi, primary, secondary, larval, clone, is_inbound,
3463 3218              diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb);
3464 3219  
3465 3220          if (larval != NULL) {
3466 3221                  if (rc == 0) {
3467 3222                          lpkt = sadb_clear_lpkt(larval);
3468 3223                          if (lpkt != NULL) {
3469 3224                                  rc = !taskq_dispatch(esp_taskq, inbound_task,
3470 3225                                      lpkt, TQ_NOSLEEP);
3471 3226                          }
3472 3227                  }
3473 3228                  IPSA_REFRELE(larval);
3474 3229          }
3475 3230  
3476 3231          /*
3477 3232           * How much more stack will I create with all of these
3478 3233           * esp_outbound() calls?
3479 3234           */
3480 3235  
3481 3236          /* Handle the packets queued waiting for the SA */
3482 3237          while (acq_msgs != NULL) {
3483 3238                  mblk_t          *asyncmp;
3484 3239                  mblk_t          *data_mp;
3485 3240                  ip_xmit_attr_t  ixas;
3486 3241                  ill_t           *ill;
3487 3242  
3488 3243                  asyncmp = acq_msgs;
3489 3244                  acq_msgs = acq_msgs->b_next;
3490 3245                  asyncmp->b_next = NULL;
3491 3246  
3492 3247                  /*
3493 3248                   * Extract the ip_xmit_attr_t from the first mblk.
3494 3249                   * Verifies that the netstack and ill is still around; could
3495 3250                   * have vanished while iked was doing its work.
3496 3251                   * On succesful return we have a nce_t and the ill/ipst can't
3497 3252                   * disappear until we do the nce_refrele in ixa_cleanup.
3498 3253                   */
3499 3254                  data_mp = asyncmp->b_cont;
3500 3255                  asyncmp->b_cont = NULL;
3501 3256                  if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) {
3502 3257                          ESP_BUMP_STAT(espstack, out_discards);
3503 3258                          ip_drop_packet(data_mp, B_FALSE, NULL,
3504 3259                              DROPPER(ipss, ipds_sadb_acquire_timeout),
3505 3260                              &espstack->esp_dropper);
3506 3261                  } else if (rc != 0) {
3507 3262                          ill = ixas.ixa_nce->nce_ill;
3508 3263                          ESP_BUMP_STAT(espstack, out_discards);
3509 3264                          ip_drop_packet(data_mp, B_FALSE, ill,
3510 3265                              DROPPER(ipss, ipds_sadb_acquire_timeout),
3511 3266                              &espstack->esp_dropper);
3512 3267                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3513 3268                  } else {
3514 3269                          esp_outbound_finish(data_mp, &ixas);
3515 3270                  }
3516 3271                  ixa_cleanup(&ixas);
3517 3272          }
3518 3273  
3519 3274          return (rc);
3520 3275  }
3521 3276  
3522 3277  /*
3523 3278   * Process one of the queued messages (from ipsacq_mp) once the SA
3524 3279   * has been added.
3525 3280   */
3526 3281  static void
3527 3282  esp_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa)
3528 3283  {
3529 3284          netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
3530 3285          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3531 3286          ipsec_stack_t   *ipss = ns->netstack_ipsec;
3532 3287          ill_t           *ill = ixa->ixa_nce->nce_ill;
3533 3288  
3534 3289          if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_ESP)) {
3535 3290                  ESP_BUMP_STAT(espstack, out_discards);
3536 3291                  ip_drop_packet(data_mp, B_FALSE, ill,
3537 3292                      DROPPER(ipss, ipds_sadb_acquire_timeout),
3538 3293                      &espstack->esp_dropper);
3539 3294                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3540 3295                  return;
3541 3296          }
3542 3297  
3543 3298          data_mp = esp_outbound(data_mp, ixa);
3544 3299          if (data_mp == NULL)
3545 3300                  return;
3546 3301  
3547 3302          /* do AH processing if needed */
3548 3303          data_mp = esp_do_outbound_ah(data_mp, ixa);
3549 3304          if (data_mp == NULL)
3550 3305                  return;
3551 3306  
3552 3307          (void) ip_output_post_ipsec(data_mp, ixa);
3553 3308  }
3554 3309  
3555 3310  /*
3556 3311   * Add new ESP security association.  This may become a generic AH/ESP
3557 3312   * routine eventually.
3558 3313   */
3559 3314  static int
3560 3315  esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns)
3561 3316  {
3562 3317          sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3563 3318          sadb_address_t *srcext =
3564 3319              (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3565 3320          sadb_address_t *dstext =
3566 3321              (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3567 3322          sadb_address_t *isrcext =
3568 3323              (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
3569 3324          sadb_address_t *idstext =
3570 3325              (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
3571 3326          sadb_address_t *nttext_loc =
3572 3327              (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC];
3573 3328          sadb_address_t *nttext_rem =
3574 3329              (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM];
3575 3330          sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3576 3331          sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3577 3332          struct sockaddr_in *src, *dst;
3578 3333          struct sockaddr_in *natt_loc, *natt_rem;
3579 3334          struct sockaddr_in6 *natt_loc6, *natt_rem6;
3580 3335          sadb_lifetime_t *soft =
3581 3336              (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3582 3337          sadb_lifetime_t *hard =
3583 3338              (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3584 3339          sadb_lifetime_t *idle =
3585 3340              (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
3586 3341          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3587 3342          ipsec_stack_t   *ipss = ns->netstack_ipsec;
3588 3343  
3589 3344  
3590 3345  
3591 3346          /* I need certain extensions present for an ADD message. */
3592 3347          if (srcext == NULL) {
3593 3348                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3594 3349                  return (EINVAL);
3595 3350          }
3596 3351          if (dstext == NULL) {
3597 3352                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3598 3353                  return (EINVAL);
3599 3354          }
3600 3355          if (isrcext == NULL && idstext != NULL) {
3601 3356                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3602 3357                  return (EINVAL);
3603 3358          }
3604 3359          if (isrcext != NULL && idstext == NULL) {
3605 3360                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
3606 3361                  return (EINVAL);
3607 3362          }
3608 3363          if (assoc == NULL) {
3609 3364                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3610 3365                  return (EINVAL);
3611 3366          }
3612 3367          if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) {
3613 3368                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY;
3614 3369                  return (EINVAL);
3615 3370          }
3616 3371  
3617 3372          src = (struct sockaddr_in *)(srcext + 1);
3618 3373          dst = (struct sockaddr_in *)(dstext + 1);
3619 3374          natt_loc = (struct sockaddr_in *)(nttext_loc + 1);
3620 3375          natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1);
3621 3376          natt_rem = (struct sockaddr_in *)(nttext_rem + 1);
3622 3377          natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1);
3623 3378  
3624 3379          /* Sundry ADD-specific reality checks. */
3625 3380          /* XXX STATS :  Logging/stats here? */
3626 3381  
3627 3382          if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) &&
3628 3383              (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
3629 3384                  *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
3630 3385                  return (EINVAL);
3631 3386          }
3632 3387          if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) {
3633 3388                  *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3634 3389                  return (EINVAL);
3635 3390          }
3636 3391  
3637 3392  #ifndef IPSEC_LATENCY_TEST
3638 3393          if (assoc->sadb_sa_encrypt == SADB_EALG_NULL &&
3639 3394              assoc->sadb_sa_auth == SADB_AALG_NONE) {
3640 3395                  *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3641 3396                  return (EINVAL);
3642 3397          }
3643 3398  #endif
3644 3399  
3645 3400          if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) {
3646 3401                  *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
3647 3402                  return (EINVAL);
3648 3403          }
3649 3404  
3650 3405          if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
3651 3406                  return (EINVAL);
3652 3407          }
3653 3408          ASSERT(src->sin_family == dst->sin_family);
3654 3409  
3655 3410          if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) {
3656 3411                  if (nttext_loc == NULL) {
3657 3412                          *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3658 3413                          return (EINVAL);
3659 3414                  }
3660 3415  
3661 3416                  if (natt_loc->sin_family == AF_INET6 &&
3662 3417                      !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) {
3663 3418                          *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC;
3664 3419                          return (EINVAL);
3665 3420                  }
3666 3421          }
3667 3422  
3668 3423          if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) {
3669 3424                  if (nttext_rem == NULL) {
3670 3425                          *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3671 3426                          return (EINVAL);
3672 3427                  }
3673 3428                  if (natt_rem->sin_family == AF_INET6 &&
3674 3429                      !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) {
3675 3430                          *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM;
3676 3431                          return (EINVAL);
3677 3432                  }
3678 3433          }
3679 3434  
3680 3435  
3681 3436          /* Stuff I don't support, for now.  XXX Diagnostic? */
3682 3437          if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL)
3683 3438                  return (EOPNOTSUPP);
  
    | 
      ↓ open down ↓ | 
    504 lines elided | 
    
      ↑ open up ↑ | 
  
3684 3439  
3685 3440          if ((*diagnostic = sadb_labelchk(ksi)) != 0)
3686 3441                  return (EINVAL);
3687 3442  
3688 3443          /*
3689 3444           * XXX Policy :  I'm not checking identities at this time,
3690 3445           * but if I did, I'd do them here, before I sent
3691 3446           * the weak key check up to the algorithm.
3692 3447           */
3693 3448  
3694      -        mutex_enter(&ipss->ipsec_alg_lock);
     3449 +        rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3695 3450  
3696 3451          /*
3697 3452           * First locate the authentication algorithm.
3698 3453           */
3699 3454  #ifdef IPSEC_LATENCY_TEST
3700 3455          if (akey != NULL && assoc->sadb_sa_auth != SADB_AALG_NONE) {
3701 3456  #else
3702 3457          if (akey != NULL) {
3703 3458  #endif
3704 3459                  ipsec_alginfo_t *aalg;
3705 3460  
3706 3461                  aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3707 3462                      [assoc->sadb_sa_auth];
3708 3463                  if (aalg == NULL || !ALG_VALID(aalg)) {
3709      -                        mutex_exit(&ipss->ipsec_alg_lock);
     3464 +                        rw_exit(&ipss->ipsec_alg_lock);
3710 3465                          esp1dbg(espstack, ("Couldn't find auth alg #%d.\n",
3711 3466                              assoc->sadb_sa_auth));
3712 3467                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3713 3468                          return (EINVAL);
3714 3469                  }
3715 3470  
3716 3471                  /*
3717 3472                   * Sanity check key sizes.
3718 3473                   * Note: It's not possible to use SADB_AALG_NONE because
3719 3474                   * this auth_alg is not defined with ALG_FLAG_VALID. If this
3720 3475                   * ever changes, the same check for SADB_AALG_NONE and
3721 3476                   * a auth_key != NULL should be made here ( see below).
3722 3477                   */
3723 3478                  if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) {
3724      -                        mutex_exit(&ipss->ipsec_alg_lock);
     3479 +                        rw_exit(&ipss->ipsec_alg_lock);
3725 3480                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
3726 3481                          return (EINVAL);
3727 3482                  }
3728 3483                  ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3729 3484  
3730 3485                  /* check key and fix parity if needed */
3731 3486                  if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE,
3732 3487                      diagnostic) != 0) {
3733      -                        mutex_exit(&ipss->ipsec_alg_lock);
     3488 +                        rw_exit(&ipss->ipsec_alg_lock);
3734 3489                          return (EINVAL);
3735 3490                  }
3736 3491          }
3737 3492  
3738 3493          /*
3739 3494           * Then locate the encryption algorithm.
3740 3495           */
3741 3496          if (ekey != NULL) {
3742 3497                  uint_t keybits;
3743 3498                  ipsec_alginfo_t *ealg;
3744 3499  
3745 3500                  ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3746 3501                      [assoc->sadb_sa_encrypt];
3747 3502                  if (ealg == NULL || !ALG_VALID(ealg)) {
3748      -                        mutex_exit(&ipss->ipsec_alg_lock);
     3503 +                        rw_exit(&ipss->ipsec_alg_lock);
3749 3504                          esp1dbg(espstack, ("Couldn't find encr alg #%d.\n",
3750 3505                              assoc->sadb_sa_encrypt));
3751 3506                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3752 3507                          return (EINVAL);
3753 3508                  }
3754 3509  
3755 3510                  /*
3756 3511                   * Sanity check key sizes. If the encryption algorithm is
3757 3512                   * SADB_EALG_NULL but the encryption key is NOT
3758 3513                   * NULL then complain.
3759 3514                   *
3760 3515                   * The keying material includes salt bits if required by
3761 3516                   * algorithm and optionally the Initial IV, check the
3762 3517                   * length of whats left.
3763 3518                   */
3764 3519                  keybits = ekey->sadb_key_bits;
3765 3520                  keybits -= ekey->sadb_key_reserved;
3766 3521                  keybits -= SADB_8TO1(ealg->alg_saltlen);
3767 3522                  if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) ||
3768 3523                      (!ipsec_valid_key_size(keybits, ealg))) {
3769      -                        mutex_exit(&ipss->ipsec_alg_lock);
     3524 +                        rw_exit(&ipss->ipsec_alg_lock);
3770 3525                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
3771 3526                          return (EINVAL);
3772 3527                  }
3773 3528                  ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3774 3529  
3775 3530                  /* check key */
3776 3531                  if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE,
3777 3532                      diagnostic) != 0) {
3778      -                        mutex_exit(&ipss->ipsec_alg_lock);
     3533 +                        rw_exit(&ipss->ipsec_alg_lock);
3779 3534                          return (EINVAL);
3780 3535                  }
3781 3536          }
3782      -        mutex_exit(&ipss->ipsec_alg_lock);
     3537 +        rw_exit(&ipss->ipsec_alg_lock);
3783 3538  
3784 3539          return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
3785 3540              diagnostic, espstack));
3786 3541  }
3787 3542  
3788 3543  /*
3789 3544   * Update a security association.  Updates come in two varieties.  The first
3790 3545   * is an update of lifetimes on a non-larval SA.  The second is an update of
3791 3546   * a larval SA, which ends up looking a lot more like an add.
3792 3547   */
3793 3548  static int
3794 3549  esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
3795 3550      ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
3796 3551  {
3797 3552          sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3798 3553          mblk_t    *buf_pkt;
3799 3554          int rcode;
3800 3555  
3801 3556          sadb_address_t *dstext =
3802 3557              (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3803 3558  
3804 3559          if (dstext == NULL) {
3805 3560                  *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3806 3561                  return (EINVAL);
3807 3562          }
3808 3563  
3809 3564          rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb,
3810 3565              diagnostic, espstack->esp_pfkey_q, esp_add_sa,
3811 3566              espstack->ipsecesp_netstack, sadb_msg_type);
3812 3567  
3813 3568          if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) ||
3814 3569              (rcode != 0)) {
3815 3570                  return (rcode);
3816 3571          }
3817 3572  
3818 3573          HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec,
3819 3574              espstack->esp_dropper, buf_pkt);
3820 3575  
3821 3576          return (rcode);
3822 3577  }
3823 3578  
3824 3579  /* XXX refactor me */
3825 3580  /*
3826 3581   * Delete a security association.  This is REALLY likely to be code common to
3827 3582   * both AH and ESP.  Find the association, then unlink it.
3828 3583   */
3829 3584  static int
3830 3585  esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
3831 3586      ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
3832 3587  {
3833 3588          sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3834 3589          sadb_address_t *dstext =
3835 3590              (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3836 3591          sadb_address_t *srcext =
3837 3592              (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3838 3593          struct sockaddr_in *sin;
3839 3594  
3840 3595          if (assoc == NULL) {
3841 3596                  if (dstext != NULL) {
3842 3597                          sin = (struct sockaddr_in *)(dstext + 1);
3843 3598                  } else if (srcext != NULL) {
3844 3599                          sin = (struct sockaddr_in *)(srcext + 1);
3845 3600                  } else {
3846 3601                          *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3847 3602                          return (EINVAL);
3848 3603                  }
3849 3604                  return (sadb_purge_sa(mp, ksi,
3850 3605                      (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 :
3851 3606                      &espstack->esp_sadb.s_v4, diagnostic,
3852 3607                      espstack->esp_pfkey_q));
3853 3608          }
3854 3609  
3855 3610          return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic,
3856 3611              espstack->esp_pfkey_q, sadb_msg_type));
3857 3612  }
3858 3613  
3859 3614  /* XXX refactor me */
3860 3615  /*
3861 3616   * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP
3862 3617   * messages.
3863 3618   */
3864 3619  static void
3865 3620  esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
3866 3621  {
3867 3622          int error;
3868 3623          sadb_msg_t *samsg;
3869 3624  
3870 3625          /*
3871 3626           * Dump each fanout, bailing if error is non-zero.
3872 3627           */
3873 3628  
3874 3629          error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
3875 3630              &espstack->esp_sadb.s_v4);
3876 3631          if (error != 0)
3877 3632                  goto bail;
3878 3633  
3879 3634          error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
3880 3635              &espstack->esp_sadb.s_v6);
3881 3636  bail:
3882 3637          ASSERT(mp->b_cont != NULL);
3883 3638          samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3884 3639          samsg->sadb_msg_errno = (uint8_t)error;
3885 3640          sadb_pfkey_echo(espstack->esp_pfkey_q, mp,
3886 3641              (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL);
3887 3642  }
3888 3643  
3889 3644  /*
3890 3645   * First-cut reality check for an inbound PF_KEY message.
3891 3646   */
3892 3647  static boolean_t
3893 3648  esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi,
3894 3649      ipsecesp_stack_t *espstack)
3895 3650  {
3896 3651          int diagnostic;
3897 3652  
3898 3653          if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
3899 3654                  diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
3900 3655                  goto badmsg;
3901 3656          }
3902 3657          if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
3903 3658              ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
3904 3659                  diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
3905 3660                  goto badmsg;
3906 3661          }
3907 3662          return (B_FALSE);       /* False ==> no failures */
3908 3663  
3909 3664  badmsg:
3910 3665          sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
3911 3666              ksi->ks_in_serial);
3912 3667          return (B_TRUE);        /* True ==> failures */
3913 3668  }
3914 3669  
3915 3670  /*
3916 3671   * ESP parsing of PF_KEY messages.  Keysock did most of the really silly
3917 3672   * error cases.  What I receive is a fully-formed, syntactically legal
3918 3673   * PF_KEY message.  I then need to check semantics...
3919 3674   *
3920 3675   * This code may become common to AH and ESP.  Stay tuned.
3921 3676   *
3922 3677   * I also make the assumption that db_ref's are cool.  If this assumption
3923 3678   * is wrong, this means that someone other than keysock or me has been
3924 3679   * mucking with PF_KEY messages.
3925 3680   */
3926 3681  static void
3927 3682  esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack)
3928 3683  {
3929 3684          mblk_t *msg = mp->b_cont;
3930 3685          sadb_msg_t *samsg;
3931 3686          keysock_in_t *ksi;
3932 3687          int error;
3933 3688          int diagnostic = SADB_X_DIAGNOSTIC_NONE;
3934 3689  
3935 3690          ASSERT(msg != NULL);
3936 3691  
3937 3692          samsg = (sadb_msg_t *)msg->b_rptr;
3938 3693          ksi = (keysock_in_t *)mp->b_rptr;
3939 3694  
3940 3695          /*
3941 3696           * If applicable, convert unspecified AF_INET6 to unspecified
3942 3697           * AF_INET.  And do other address reality checks.
3943 3698           */
3944 3699          if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp,
3945 3700              espstack->ipsecesp_netstack) ||
3946 3701              esp_pfkey_reality_failures(mp, ksi, espstack)) {
3947 3702                  return;
3948 3703          }
3949 3704  
3950 3705          switch (samsg->sadb_msg_type) {
3951 3706          case SADB_ADD:
3952 3707                  error = esp_add_sa(mp, ksi, &diagnostic,
3953 3708                      espstack->ipsecesp_netstack);
3954 3709                  if (error != 0) {
3955 3710                          sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3956 3711                              diagnostic, ksi->ks_in_serial);
3957 3712                  }
3958 3713                  /* else esp_add_sa() took care of things. */
3959 3714                  break;
3960 3715          case SADB_DELETE:
3961 3716          case SADB_X_DELPAIR:
3962 3717          case SADB_X_DELPAIR_STATE:
3963 3718                  error = esp_del_sa(mp, ksi, &diagnostic, espstack,
3964 3719                      samsg->sadb_msg_type);
3965 3720                  if (error != 0) {
3966 3721                          sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3967 3722                              diagnostic, ksi->ks_in_serial);
3968 3723                  }
3969 3724                  /* Else esp_del_sa() took care of things. */
3970 3725                  break;
3971 3726          case SADB_GET:
3972 3727                  error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb,
3973 3728                      &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type);
3974 3729                  if (error != 0) {
3975 3730                          sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3976 3731                              diagnostic, ksi->ks_in_serial);
3977 3732                  }
3978 3733                  /* Else sadb_get_sa() took care of things. */
3979 3734                  break;
3980 3735          case SADB_FLUSH:
3981 3736                  sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack);
3982 3737                  sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL);
3983 3738                  break;
3984 3739          case SADB_REGISTER:
3985 3740                  /*
3986 3741                   * Hmmm, let's do it!  Check for extensions (there should
3987 3742                   * be none), extract the fields, call esp_register_out(),
3988 3743                   * then either free or report an error.
3989 3744                   *
3990 3745                   * Keysock takes care of the PF_KEY bookkeeping for this.
3991 3746                   */
3992 3747                  if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
3993 3748                      ksi->ks_in_serial, espstack, msg_getcred(mp, NULL))) {
3994 3749                          freemsg(mp);
3995 3750                  } else {
3996 3751                          /*
3997 3752                           * Only way this path hits is if there is a memory
3998 3753                           * failure.  It will not return B_FALSE because of
3999 3754                           * lack of esp_pfkey_q if I am in wput().
4000 3755                           */
4001 3756                          sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM,
4002 3757                              diagnostic, ksi->ks_in_serial);
4003 3758                  }
4004 3759                  break;
4005 3760          case SADB_UPDATE:
4006 3761          case SADB_X_UPDATEPAIR:
4007 3762                  /*
4008 3763                   * Find a larval, if not there, find a full one and get
4009 3764                   * strict.
4010 3765                   */
4011 3766                  error = esp_update_sa(mp, ksi, &diagnostic, espstack,
4012 3767                      samsg->sadb_msg_type);
4013 3768                  if (error != 0) {
4014 3769                          sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
4015 3770                              diagnostic, ksi->ks_in_serial);
4016 3771                  }
4017 3772                  /* else esp_update_sa() took care of things. */
4018 3773                  break;
4019 3774          case SADB_GETSPI:
4020 3775                  /*
4021 3776                   * Reserve a new larval entry.
4022 3777                   */
4023 3778                  esp_getspi(mp, ksi, espstack);
4024 3779                  break;
4025 3780          case SADB_ACQUIRE:
4026 3781                  /*
4027 3782                   * Find larval and/or ACQUIRE record and kill it (them), I'm
4028 3783                   * most likely an error.  Inbound ACQUIRE messages should only
4029 3784                   * have the base header.
4030 3785                   */
4031 3786                  sadb_in_acquire(samsg, &espstack->esp_sadb,
4032 3787                      espstack->esp_pfkey_q, espstack->ipsecesp_netstack);
4033 3788                  freemsg(mp);
4034 3789                  break;
4035 3790          case SADB_DUMP:
4036 3791                  /*
4037 3792                   * Dump all entries.
4038 3793                   */
4039 3794                  esp_dump(mp, ksi, espstack);
4040 3795                  /* esp_dump will take care of the return message, etc. */
4041 3796                  break;
4042 3797          case SADB_EXPIRE:
4043 3798                  /* Should never reach me. */
4044 3799                  sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP,
4045 3800                      diagnostic, ksi->ks_in_serial);
4046 3801                  break;
4047 3802          default:
4048 3803                  sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL,
4049 3804                      SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
4050 3805                  break;
4051 3806          }
4052 3807  }
4053 3808  
4054 3809  /*
4055 3810   * Handle case where PF_KEY says it can't find a keysock for one of my
4056 3811   * ACQUIRE messages.
4057 3812   */
4058 3813  static void
4059 3814  esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack)
4060 3815  {
4061 3816          sadb_msg_t *samsg;
4062 3817          keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
4063 3818  
4064 3819          if (mp->b_cont == NULL) {
4065 3820                  freemsg(mp);
4066 3821                  return;
4067 3822          }
4068 3823          samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
4069 3824  
4070 3825          /*
4071 3826           * If keysock can't find any registered, delete the acquire record
4072 3827           * immediately, and handle errors.
4073 3828           */
4074 3829          if (samsg->sadb_msg_type == SADB_ACQUIRE) {
4075 3830                  samsg->sadb_msg_errno = kse->ks_err_errno;
4076 3831                  samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
4077 3832                  /*
4078 3833                   * Use the write-side of the esp_pfkey_q
4079 3834                   */
4080 3835                  sadb_in_acquire(samsg, &espstack->esp_sadb,
4081 3836                      WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack);
4082 3837          }
4083 3838  
4084 3839          freemsg(mp);
4085 3840  }
4086 3841  
4087 3842  /*
4088 3843   * ESP module write put routine.
4089 3844   */
4090 3845  static void
4091 3846  ipsecesp_wput(queue_t *q, mblk_t *mp)
4092 3847  {
4093 3848          ipsec_info_t *ii;
4094 3849          struct iocblk *iocp;
4095 3850          ipsecesp_stack_t        *espstack = (ipsecesp_stack_t *)q->q_ptr;
4096 3851  
4097 3852          esp3dbg(espstack, ("In esp_wput().\n"));
4098 3853  
4099 3854          /* NOTE: Each case must take care of freeing or passing mp. */
4100 3855          switch (mp->b_datap->db_type) {
4101 3856          case M_CTL:
4102 3857                  if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
4103 3858                          /* Not big enough message. */
4104 3859                          freemsg(mp);
4105 3860                          break;
4106 3861                  }
4107 3862                  ii = (ipsec_info_t *)mp->b_rptr;
4108 3863  
4109 3864                  switch (ii->ipsec_info_type) {
4110 3865                  case KEYSOCK_OUT_ERR:
4111 3866                          esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n"));
4112 3867                          esp_keysock_no_socket(mp, espstack);
4113 3868                          break;
4114 3869                  case KEYSOCK_IN:
4115 3870                          ESP_BUMP_STAT(espstack, keysock_in);
4116 3871                          esp3dbg(espstack, ("Got KEYSOCK_IN message.\n"));
4117 3872  
4118 3873                          /* Parse the message. */
4119 3874                          esp_parse_pfkey(mp, espstack);
4120 3875                          break;
4121 3876                  case KEYSOCK_HELLO:
4122 3877                          sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp,
4123 3878                              esp_ager, (void *)espstack, &espstack->esp_event,
4124 3879                              SADB_SATYPE_ESP);
4125 3880                          break;
4126 3881                  default:
4127 3882                          esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n",
4128 3883                              ii->ipsec_info_type));
4129 3884                          freemsg(mp);
4130 3885                          break;
4131 3886                  }
4132 3887                  break;
4133 3888          case M_IOCTL:
4134 3889                  iocp = (struct iocblk *)mp->b_rptr;
4135 3890                  switch (iocp->ioc_cmd) {
4136 3891                  case ND_SET:
4137 3892                  case ND_GET:
4138 3893                          if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) {
4139 3894                                  qreply(q, mp);
4140 3895                                  return;
4141 3896                          } else {
4142 3897                                  iocp->ioc_error = ENOENT;
4143 3898                          }
4144 3899                          /* FALLTHRU */
4145 3900                  default:
4146 3901                          /* We really don't support any other ioctls, do we? */
4147 3902  
4148 3903                          /* Return EINVAL */
4149 3904                          if (iocp->ioc_error != ENOENT)
4150 3905                                  iocp->ioc_error = EINVAL;
4151 3906                          iocp->ioc_count = 0;
4152 3907                          mp->b_datap->db_type = M_IOCACK;
4153 3908                          qreply(q, mp);
4154 3909                          return;
4155 3910                  }
4156 3911          default:
4157 3912                  esp3dbg(espstack,
4158 3913                      ("Got default message, type %d, passing to IP.\n",
4159 3914                      mp->b_datap->db_type));
4160 3915                  putnext(q, mp);
4161 3916          }
4162 3917  }
4163 3918  
4164 3919  /*
4165 3920   * Wrapper to allow IP to trigger an ESP association failure message
4166 3921   * during inbound SA selection.
4167 3922   */
4168 3923  void
4169 3924  ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
4170 3925      uint32_t spi, void *addr, int af, ip_recv_attr_t *ira)
4171 3926  {
4172 3927          netstack_t      *ns = ira->ira_ill->ill_ipst->ips_netstack;
4173 3928          ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
4174 3929          ipsec_stack_t   *ipss = ns->netstack_ipsec;
4175 3930  
4176 3931          if (espstack->ipsecesp_log_unknown_spi) {
4177 3932                  ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
4178 3933                      addr, af, espstack->ipsecesp_netstack);
4179 3934          }
4180 3935  
4181 3936          ip_drop_packet(mp, B_TRUE, ira->ira_ill,
4182 3937              DROPPER(ipss, ipds_esp_no_sa),
4183 3938              &espstack->esp_dropper);
4184 3939  }
4185 3940  
4186 3941  /*
4187 3942   * Initialize the ESP input and output processing functions.
4188 3943   */
4189 3944  void
4190 3945  ipsecesp_init_funcs(ipsa_t *sa)
4191 3946  {
4192 3947          if (sa->ipsa_output_func == NULL)
4193 3948                  sa->ipsa_output_func = esp_outbound;
4194 3949          if (sa->ipsa_input_func == NULL)
4195 3950                  sa->ipsa_input_func = esp_inbound;
4196 3951  }
  
    | 
      ↓ open down ↓ | 
    404 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX