Print this page
Bayard's initial drop, needs finishing, or at least testing.

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/ip/sadb.c
          +++ new/usr/src/uts/common/inet/ip/sadb.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
       24 + * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
  24   25   */
  25   26  
  26   27  #include <sys/types.h>
  27   28  #include <sys/stream.h>
  28   29  #include <sys/stropts.h>
  29   30  #include <sys/strsubr.h>
  30   31  #include <sys/errno.h>
  31   32  #include <sys/ddi.h>
  32   33  #include <sys/debug.h>
  33   34  #include <sys/cmn_err.h>
↓ open down ↓ 31 lines elided ↑ open up ↑
  65   66  #include <inet/ipdrop.h>
  66   67  #include <inet/ipclassifier.h>
  67   68  #include <inet/sctp_ip.h>
  68   69  #include <sys/tsol/tnet.h>
  69   70  
  70   71  /*
  71   72   * This source file contains Security Association Database (SADB) common
  72   73   * routines.  They are linked in with the AH module.  Since AH has no chance
  73   74   * of falling under export control, it was safe to link it in there.
  74   75   */
  75      -
  76      -static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
  77      -    ipsec_action_t *, boolean_t, uint32_t, uint32_t, sadb_sens_t *,
  78      -    netstack_t *);
  79   76  static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *);
  80   77  static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
  81   78                              netstack_t *);
  82   79  static void sadb_destroy(sadb_t *, netstack_t *);
  83   80  static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
  84   81  static ts_label_t *sadb_label_from_sens(sadb_sens_t *, uint64_t *);
  85   82  static sadb_sens_t *sadb_make_sens_ext(ts_label_t *tsl, int *len);
       83 +/* Args named here, as the booleans can be hard to distinguish */
       84 +static mblk_t *sadb_construct_acqmsg(ipsacq_t *acqrec, ipsec_selector_t *sel,
       85 +    ipsec_action_t *ap, ipsec_policy_t *pp, netstack_t *ns, sadb_sens_t *sens,
       86 +    boolean_t need_esp, boolean_t tunnel_mode, boolean_t extended,
       87 +    boolean_t with_prop);
       88 +static uint8_t *sadb_construct_eprop(const ipsec_action_t *,
       89 +    const ipsec_policy_t *, netstack_t *, const uint8_t *, const uint8_t *);
       90 +static void sadb_insert_prop(sadb_prop_t *, const ipsec_action_t *,
       91 +    netstack_t *, uint_t, boolean_t);
  86   92  
  87   93  static time_t sadb_add_time(time_t, uint64_t);
  88   94  static void lifetime_fuzz(ipsa_t *);
  89   95  static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
  90   96  static int get_ipsa_pair(ipsa_query_t *, ipsap_t *, int *);
  91   97  static void init_ipsa_pair(ipsap_t *);
  92   98  static void destroy_ipsa_pair(ipsap_t *);
  93   99  static int update_pairing(ipsap_t *, ipsa_query_t *, keysock_in_t *, int *);
  94  100  static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
  95  101  
  96  102  /*
  97  103   * ipsacq_maxpackets is defined here to make it tunable
  98  104   * from /etc/system.
  99  105   */
 100  106  extern uint64_t ipsacq_maxpackets;
 101  107  
      108 +/*
      109 + * Allocation size for sin_t/sin6_t in address extensions. We allocate IPv6
      110 + * because it's the larger of the two, and we roundup because the type isn't
      111 + * defined to guarantee 64-bit alignment.
      112 + */
      113 +#define SADB_SOCKADDR_SIZE      (roundup(sizeof (sin6_t), sizeof (uint64_t)))
      114 +
 102  115  #define SET_EXPIRE(sa, delta, exp) {                            \
 103  116          if (((sa)->ipsa_ ## delta) != 0) {                              \
 104  117                  (sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,  \
 105  118                          (sa)->ipsa_ ## delta);                          \
 106  119          }                                                               \
 107  120  }
 108  121  
 109  122  #define UPDATE_EXPIRE(sa, delta, exp) {                                 \
 110  123          if (((sa)->ipsa_ ## delta) != 0) {                              \
 111  124                  time_t tmp = sadb_add_time((sa)->ipsa_usetime,          \
 112  125                          (sa)->ipsa_ ## delta);                          \
 113  126                  if (((sa)->ipsa_ ## exp) == 0)                          \
 114  127                          (sa)->ipsa_ ## exp = tmp;                       \
 115  128                  else                                                    \
 116  129                          (sa)->ipsa_ ## exp =                            \
 117  130                              MIN((sa)->ipsa_ ## exp, tmp);               \
 118  131          }                                                               \
 119  132  }
 120  133  
      134 +/* Warning: watch for evaluation issues with complex args */
      135 +#define INITIALIZE_SAMSG(samsg, type)           \
      136 +        (samsg)->sadb_msg_version = PF_KEY_V2,  \
      137 +        (samsg)->sadb_msg_type = (type),        \
      138 +        (samsg)->sadb_msg_errno = 0,            \
      139 +        (samsg)->sadb_msg_reserved = 0
 121  140  
      141 +/* Warning: watch for evaluation issues with complex args */
      142 +#define ERRNO_SAMSG(samsg, errno)                               \
      143 +        (samsg)->sadb_msg_len = SADB_8TO64(sizeof (*samsg)),    \
      144 +        (samsg)->sadb_msg_errno = (errno),                      \
      145 +        (samsg)->sadb_x_msg_diagnostic = 0
      146 +
      147 +/*
      148 + * Warning: watch for evaluation issues with complex args. This is a rough,
      149 + * conservative calculation (e.g. combined mode encr algs can perform both
      150 + * encr/auth and ipsecconf drops auth algs in combinations). This is
      151 + * nevertheless reasonable, given that the kernel doesn't make or guarantee
      152 + * optimizations reducing the combination space.
      153 + */
      154 +#define CALC_COMBS(limit, ipss, need_esp)               {       \
      155 +        limit = (need_esp) ?                                    \
      156 +            (ipss)->ipsec_nalgs[IPSEC_ALG_AUTH] *               \
      157 +            (ipss)->ipsec_nalgs[IPSEC_ALG_ENCR]                 \
      158 +            : (ipss)->ipsec_nalgs[IPSEC_ALG_AUTH];              \
      159 +        ASSERT((limit) > 0);                                    \
      160 +}
      161 +
 122  162  /* wrap the macro so we can pass it as a function pointer */
 123  163  void
 124  164  sadb_sa_refrele(void *target)
 125  165  {
 126  166          IPSA_REFRELE(((ipsa_t *)target));
 127  167  }
 128  168  
 129  169  /*
 130  170   * We presume that sizeof (long) == sizeof (time_t) and that time_t is
 131  171   * a signed type.
↓ open down ↓ 836 lines elided ↑ open up ↑
 968 1008  
 969 1009          if (error) {
 970 1010                  sadb_freeassoc(newbie);
 971 1011                  return (NULL);
 972 1012          }
 973 1013  
 974 1014          return (newbie);
 975 1015  }
 976 1016  
 977 1017  /*
 978      - * Initialize a SADB address extension at the address specified by addrext.
 979      - * Return a pointer to the end of the new address extension.
     1018 + * Takes two uint8_t (bounds on buffer in which to construct extension) and an
     1019 + * addr (address to write into extension) pointer, a uint16_t (type of address
     1020 + * in extension), and af, port, proto, and prefix values (further extension
     1021 + * content). Returns a byte-aligned pointer to the end of the extension, which
     1022 + * is of variable length depending on the address family.
 980 1023   */
 981 1024  static uint8_t *
 982      -sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
     1025 +sadb_make_addr_ext(const uint8_t *start, const uint8_t *end, uint16_t exttype,
 983 1026      sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
 984 1027  {
 985 1028          struct sockaddr_in *sin;
 986 1029          struct sockaddr_in6 *sin6;
 987      -        uint8_t *cur = start;
     1030 +        uint8_t *cur = (uint8_t *)start;
 988 1031          int addrext_len;
 989      -        int sin_len;
 990 1032          sadb_address_t *addrext = (sadb_address_t *)cur;
 991 1033  
 992      -        if (cur == NULL)
 993      -                return (NULL);
     1034 +        ASSERT(cur != NULL && end != NULL);
 994 1035  
 995 1036          cur += sizeof (*addrext);
     1037 +        sin = (struct sockaddr_in *)cur;
     1038 +        sin6 = (struct sockaddr_in6 *)cur;
     1039 +        cur += (af == AF_INET) ? sizeof (*sin) : sizeof (*sin6);
     1040 +
     1041 +        addrext_len = roundup(cur - start, sizeof (uint64_t));
     1042 +        cur = (uint8_t *)start + addrext_len;
     1043 +
 996 1044          if (cur > end)
 997 1045                  return (NULL);
 998 1046  
 999 1047          addrext->sadb_address_proto = proto;
1000 1048          addrext->sadb_address_prefixlen = prefix;
1001 1049          addrext->sadb_address_reserved = 0;
1002 1050          addrext->sadb_address_exttype = exttype;
     1051 +        addrext->sadb_address_len = SADB_8TO64(addrext_len);
1003 1052  
1004 1053          switch (af) {
1005 1054          case AF_INET:
1006      -                sin = (struct sockaddr_in *)cur;
1007      -                sin_len = sizeof (*sin);
1008      -                cur += sin_len;
1009      -                if (cur > end)
1010      -                        return (NULL);
1011      -
1012 1055                  sin->sin_family = af;
1013 1056                  bzero(sin->sin_zero, sizeof (sin->sin_zero));
1014 1057                  sin->sin_port = port;
1015 1058                  IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1016 1059                  break;
1017 1060          case AF_INET6:
1018      -                sin6 = (struct sockaddr_in6 *)cur;
1019      -                sin_len = sizeof (*sin6);
1020      -                cur += sin_len;
1021      -                if (cur > end)
1022      -                        return (NULL);
1023      -
1024 1061                  bzero(sin6, sizeof (*sin6));
1025 1062                  sin6->sin6_family = af;
1026 1063                  sin6->sin6_port = port;
1027 1064                  IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1028 1065                  break;
1029 1066          }
1030 1067  
1031      -        addrext_len = roundup(cur - start, sizeof (uint64_t));
1032      -        addrext->sadb_address_len = SADB_8TO64(addrext_len);
     1068 +        return (cur);
     1069 +}
1033 1070  
1034      -        cur = start + addrext_len;
1035      -        if (cur > end)
1036      -                cur = NULL;
     1071 +/*
     1072 + * Takes ipsec_selector_t (address information used in forming addr
     1073 + * extensions) and ipsec_policy_t (contains pointer to selector key used in
     1074 + * tunnel mode) pointers, tunnel mode boolean, and creates address extensions
     1075 + * inside message contents bounds checked by byte-aligned start and end
     1076 + * pointers. Returns new value for cur pointer or NULL on failure.
     1077 + * XXX TODO: Original packet contents go here.
     1078 + */
     1079 +static uint8_t *
     1080 +sadb_sel_to_addrexts(const ipsec_selector_t *sel, const ipsec_policy_t *pp,
     1081 +    const ipsec_action_t *ap, const uint8_t *start, const uint8_t *end,
     1082 +    boolean_t tunnel_mode)
     1083 +{
     1084 +        uint8_t         proto, pfxlen, *cur = (uint8_t *)start;
     1085 +        ipsec_selkey_t  *ipsl;
     1086 +        sa_family_t     af;
     1087 +        uint16_t        lport, rport;
     1088 +        uint32_t        *saddrptr, *daddrptr;
1037 1089  
     1090 +        if (tunnel_mode) {
     1091 +                /*
     1092 +                 * Form inner address extensions based NOT on the inner
     1093 +                 * selectors (i.e. the packet data), but on the policy's
     1094 +                 * selector key (i.e. the policy's selector information).
     1095 +                 *
     1096 +                 * NOTE:  The position of IPv4 and IPv6 addresses is the
     1097 +                 * same in ipsec_selkey_t (unless the compiler does very
     1098 +                 * strange things with unions, consult your local C language
     1099 +                 * lawyer for details).
     1100 +                 */
     1101 +                ASSERT(pp != NULL);
     1102 +
     1103 +                ipsl = &(pp->ipsp_sel->ipsl_key);
     1104 +                if (ipsl->ipsl_valid & IPSL_IPV4) {
     1105 +                        af = AF_INET;
     1106 +                        ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
     1107 +                        ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
     1108 +                } else {
     1109 +                        af = AF_INET6;
     1110 +                        ASSERT(sel->ips_protocol == IPPROTO_IPV6);
     1111 +                        ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
     1112 +                }
     1113 +
     1114 +                if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
     1115 +                        saddrptr = (uint32_t *)(&ipsl->ipsl_local);
     1116 +                        pfxlen = ipsl->ipsl_local_pfxlen;
     1117 +                } else {
     1118 +                        saddrptr = (uint32_t *)(&ipv6_all_zeros);
     1119 +                        pfxlen = 0;
     1120 +                }
     1121 +                /* XXX What about ICMP type/code? */
     1122 +                lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
     1123 +                    ipsl->ipsl_lport : 0;
     1124 +                proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
     1125 +                    ipsl->ipsl_proto : 0;
     1126 +
     1127 +                cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
     1128 +                    af, saddrptr, lport, proto, pfxlen);
     1129 +                if (cur == NULL)
     1130 +                        goto done;
     1131 +
     1132 +                if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
     1133 +                        daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
     1134 +                        pfxlen = ipsl->ipsl_remote_pfxlen;
     1135 +                } else {
     1136 +                        daddrptr = (uint32_t *)(&ipv6_all_zeros);
     1137 +                        pfxlen = 0;
     1138 +                }
     1139 +                /* XXX What about ICMP type/code? */
     1140 +                rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
     1141 +                    ipsl->ipsl_rport : 0;
     1142 +
     1143 +                cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
     1144 +                    af, daddrptr, rport, proto, pfxlen);
     1145 +                if (cur == NULL)
     1146 +                        goto done;
     1147 +
     1148 +                /*
     1149 +                 * TODO - if we go to RFC 3408's dream of transport mode
     1150 +                 * IP-in-IP _with_ inner-packet address selectors, we'll need
     1151 +                 * to further distinguish tunnel mode here.  For now, having
     1152 +                 * inner addresses and/or ports is sufficient.
     1153 +                 *
     1154 +                 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
     1155 +                 * outer addresses.
     1156 +                 */
     1157 +                proto = sel->ips_protocol;      /* Either _ENCAP or _IPV6 */
     1158 +                lport = rport = 0;
     1159 +        } else if ((ap != NULL) && (!ap->ipa_want_unique)) {
     1160 +                /* Not in tunnel mode, action doesn't want pop from pkt */
     1161 +                proto = 0;
     1162 +                lport = 0;
     1163 +                rport = 0;
     1164 +                if (pp != NULL) {
     1165 +                        ipsl = &(pp->ipsp_sel->ipsl_key);
     1166 +                        if (ipsl->ipsl_valid & IPSL_PROTOCOL)
     1167 +                                proto = ipsl->ipsl_proto;
     1168 +                        if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
     1169 +                                rport = ipsl->ipsl_rport;
     1170 +                        if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
     1171 +                                lport = ipsl->ipsl_lport;
     1172 +                }
     1173 +        } else {
     1174 +                /* Not in tunnel mode, action wants pop from pkt */
     1175 +                proto = sel->ips_protocol;
     1176 +                lport = sel->ips_local_port;
     1177 +                rport = sel->ips_remote_port;
     1178 +        }
     1179 +
     1180 +        af = sel->ips_isv4 ? AF_INET : AF_INET6;
     1181 +
     1182 +        /*
     1183 +         * NOTE:  The position of IPv4 and IPv6 addresses is the same
     1184 +         * in ipsec_selector_t.
     1185 +         */
     1186 +        cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
     1187 +            (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
     1188 +        if (cur == NULL)
     1189 +                goto done;
     1190 +
     1191 +        cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
     1192 +            (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
     1193 +done:
1038 1194          return (cur);
1039 1195  }
1040 1196  
1041 1197  /*
1042      - * Construct a key management cookie extension.
     1198 + * Use byte aligned buffer defined by cur and end pointers to create a key
     1199 + * management extension using kmc and kmp uint32_t parameters.
1043 1200   */
1044      -
1045 1201  static uint8_t *
1046      -sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
     1202 +sadb_make_kmc_ext(const uint8_t *start, const uint8_t *end,
     1203 +    uint32_t kmp, uint32_t kmc)
1047 1204  {
1048      -        sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
     1205 +        uint8_t         *cur = (uint8_t *)start;
     1206 +        sadb_x_kmc_t    *kmcext = (sadb_x_kmc_t *)cur;
1049 1207  
1050      -        if (cur == NULL)
1051      -                return (NULL);
     1208 +        ASSERT(cur != NULL && end != NULL);
1052 1209  
1053 1210          cur += sizeof (*kmcext);
1054 1211  
1055 1212          if (cur > end)
1056 1213                  return (NULL);
1057 1214  
1058 1215          kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1059 1216          kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1060 1217          kmcext->sadb_x_kmc_proto = kmp;
1061 1218          kmcext->sadb_x_kmc_cookie = kmc;
↓ open down ↓ 2129 lines elided ↑ open up ↑
3191 3348                  bzero(akey + 1, newbie->ipsa_authkeylen);
3192 3349  
3193 3350                  /*
3194 3351                   * Pre-initialize the kernel crypto framework key
3195 3352                   * structure.
3196 3353                   */
3197 3354                  newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3198 3355                  newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3199 3356                  newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3200 3357  
3201      -                mutex_enter(&ipss->ipsec_alg_lock);
     3358 +                rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3202 3359                  alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3203 3360                      [newbie->ipsa_auth_alg];
3204 3361                  if (alg != NULL && ALG_VALID(alg)) {
3205 3362                          newbie->ipsa_amech.cm_type = alg->alg_mech_type;
3206 3363                          newbie->ipsa_amech.cm_param =
3207 3364                              (char *)&newbie->ipsa_mac_len;
3208 3365                          newbie->ipsa_amech.cm_param_len = sizeof (size_t);
3209 3366                          newbie->ipsa_mac_len = (size_t)alg->alg_datalen;
3210 3367                  } else {
3211 3368                          newbie->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3212 3369                  }
3213 3370                  error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3214      -                mutex_exit(&ipss->ipsec_alg_lock);
     3371 +                rw_exit(&ipss->ipsec_alg_lock);
3215 3372                  if (error != 0) {
3216 3373                          mutex_exit(&newbie->ipsa_lock);
3217 3374                          /*
3218 3375                           * An error here indicates that alg is the wrong type
3219 3376                           * (IE: not authentication) or its not in the alg tables
3220 3377                           * created by ipsecalgs(1m), or Kcf does not like the
3221 3378                           * parameters passed in with this algorithm, which is
3222 3379                           * probably a coding error!
3223 3380                           */
3224 3381                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3225 3382  
3226 3383                          goto error;
3227 3384                  }
3228 3385          }
3229 3386  
3230 3387          if (ekey != NULL) {
3231      -                mutex_enter(&ipss->ipsec_alg_lock);
     3388 +                rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3232 3389                  async = async || (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
3233 3390                      IPSEC_ALGS_EXEC_ASYNC);
3234 3391                  alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3235 3392                      [newbie->ipsa_encr_alg];
3236 3393  
3237 3394                  if (alg != NULL && ALG_VALID(alg)) {
3238 3395                          newbie->ipsa_emech.cm_type = alg->alg_mech_type;
3239 3396                          newbie->ipsa_datalen = alg->alg_datalen;
3240 3397                          if (alg->alg_flags & ALG_FLAG_COUNTERMODE)
3241 3398                                  newbie->ipsa_flags |= IPSA_F_COUNTERMODE;
↓ open down ↓ 12 lines elided ↑ open up ↑
3254 3411                          newbie->ipsa_saltlen = alg->alg_saltlen;
3255 3412                          newbie->ipsa_saltbits = SADB_8TO1(newbie->ipsa_saltlen);
3256 3413                          newbie->ipsa_iv_len = alg->alg_ivlen;
3257 3414                          newbie->ipsa_nonce_len = newbie->ipsa_saltlen +
3258 3415                              newbie->ipsa_iv_len;
3259 3416                          newbie->ipsa_emech.cm_param = NULL;
3260 3417                          newbie->ipsa_emech.cm_param_len = 0;
3261 3418                  } else {
3262 3419                          newbie->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3263 3420                  }
3264      -                mutex_exit(&ipss->ipsec_alg_lock);
     3421 +                rw_exit(&ipss->ipsec_alg_lock);
3265 3422  
3266 3423                  /*
3267 3424                   * The byte stream following the sadb_key_t is made up of:
3268 3425                   * key bytes, [salt bytes], [IV initial value]
3269 3426                   * All of these have variable length. The IV is typically
3270 3427                   * randomly generated by this function and not passed in.
3271 3428                   * By supporting the injection of a known IV, the whole
3272 3429                   * IPsec subsystem and the underlying crypto subsystem
3273 3430                   * can be tested with known test vectors.
3274 3431                   *
↓ open down ↓ 91 lines elided ↑ open up ↑
3366 3523                  bzero((ekey + 1), SADB_1TO8(ekey->sadb_key_bits));
3367 3524  
3368 3525                  /*
3369 3526                   * Pre-initialize the kernel crypto framework key
3370 3527                   * structure.
3371 3528                   */
3372 3529                  newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3373 3530                  newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3374 3531                  newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3375 3532  
3376      -                mutex_enter(&ipss->ipsec_alg_lock);
     3533 +                rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3377 3534                  error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3378      -                mutex_exit(&ipss->ipsec_alg_lock);
     3535 +                rw_exit(&ipss->ipsec_alg_lock);
3379 3536                  if (error != 0) {
3380 3537                          mutex_exit(&newbie->ipsa_lock);
3381 3538                          /* See above for error explanation. */
3382 3539                          *diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3383 3540                          goto error;
3384 3541                  }
3385 3542          }
3386 3543  
3387 3544          if (async)
3388 3545                  newbie->ipsa_flags |= IPSA_F_ASYNC;
↓ open down ↓ 403 lines elided ↑ open up ↑
3792 3949                  /*      "sadb_expire_assoc: Can't allocate message.\n"); */
3793 3950                  return;
3794 3951          }
3795 3952  
3796 3953          mp1 = mp;
3797 3954          mp = mp->b_cont;
3798 3955          end = mp->b_wptr + alloclen;
3799 3956  
3800 3957          samsg = (sadb_msg_t *)mp->b_wptr;
3801 3958          mp->b_wptr += sizeof (*samsg);
3802      -        samsg->sadb_msg_version = PF_KEY_V2;
3803      -        samsg->sadb_msg_type = SADB_EXPIRE;
3804      -        samsg->sadb_msg_errno = 0;
     3959 +        INITIALIZE_SAMSG(samsg, SADB_EXPIRE);
3805 3960          samsg->sadb_msg_satype = assoc->ipsa_type;
3806 3961          samsg->sadb_msg_len = SADB_8TO64(alloclen);
3807      -        samsg->sadb_msg_reserved = 0;
3808 3962          samsg->sadb_msg_seq = 0;
3809 3963          samsg->sadb_msg_pid = 0;
3810 3964  
3811 3965          saext = (sadb_sa_t *)mp->b_wptr;
3812 3966          mp->b_wptr += sizeof (*saext);
3813 3967          saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3814 3968          saext->sadb_sa_exttype = SADB_EXT_SA;
3815 3969          saext->sadb_sa_spi = assoc->ipsa_spi;
3816 3970          saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3817 3971          saext->sadb_sa_state = assoc->ipsa_state;
↓ open down ↓ 271 lines elided ↑ open up ↑
4089 4243                      current - assoc->ipsa_lastuse, inbound);
4090 4244          }
4091 4245  
4092 4246          if (!dropped_mutex)
4093 4247                  mutex_exit(&assoc->ipsa_lock);
4094 4248          return (retval);
4095 4249  }
4096 4250  
4097 4251  /*
4098 4252   * Called by a consumer protocol to do ther dirty work of reaping dead
4099      - * Security Associations.
     4253 + * Security Associations and outstanding acquire records.
4100 4254   *
4101 4255   * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4102 4256   * SA's that are already marked DEAD, so expired SA's are only reaped
4103 4257   * the second time sadb_ager() runs.
4104 4258   */
4105 4259  void
4106 4260  sadb_ager(sadb_t *sp, queue_t *pfkey_q, int reap_delay, netstack_t *ns)
4107 4261  {
4108 4262          int i;
4109 4263          isaf_t *bucket;
↓ open down ↓ 693 lines elided ↑ open up ↑
4803 4957   */
4804 4958  static ipsacq_t *
4805 4959  sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4806 4960      uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4807 4961      uint64_t unique_id, ts_label_t *tsl)
4808 4962  {
4809 4963          ipsacq_t *walker;
4810 4964          sa_family_t fam;
4811 4965          uint32_t blank_address[4] = {0, 0, 0, 0};
4812 4966  
     4967 +        ASSERT(MUTEX_HELD(&bucket->iacqf_lock));
     4968 +
4813 4969          if (isrc == NULL) {
4814 4970                  ASSERT(idst == NULL);
4815 4971                  isrc = idst = blank_address;
4816 4972          }
4817 4973  
4818 4974          /*
4819 4975           * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4820 4976           *
4821 4977           * XXX May need search for duplicates based on other things too!
4822 4978           */
↓ open down ↓ 13 lines elided ↑ open up ↑
4836 4992                      (unique_id == walker->ipsacq_unique_id) &&
4837 4993                      (ipsec_label_match(tsl, walker->ipsacq_tsl)))
4838 4994                          break;                  /* everything matched */
4839 4995                  mutex_exit(&walker->ipsacq_lock);
4840 4996          }
4841 4997  
4842 4998          return (walker);
4843 4999  }
4844 5000  
4845 5001  /*
4846      - * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4847      - * of all of the same length.  Give up (and drop) if memory
4848      - * cannot be allocated for a new one; otherwise, invoke callback to
4849      - * send the acquire up..
     5002 + * Take a pointers to mblk_t (packet for which we need to acquire an SA) and
     5003 + * ip_xmit_attr_t (transmit attributes used to generate or retrieve acquire
     5004 + * record) and two booleans need_ah and need_esp, one but not both of which
     5005 + * must be true. Acquire records are stored in hash buckets, and we assume
     5006 + * bucket contains addrs of all of the same length. If this is a new acquire
     5007 + * record, we generate an acquire samsg to send to protocol keysock layer,
     5008 + * which assumes ownership from there. If we run into problems along the way,
     5009 + * we generate errors if possible and drop packets if need be. Before sending
     5010 + * to keysock, we simply unlock the acquire record and let the ager deal with
     5011 + * releasing locks and freeing resources.
4850 5012   *
4851      - * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4852      - * list.  The ah_add_sa_finish() routines can look at the packet's attached
4853      - * attributes and handle this case specially.
     5013 + * This code is called by the IP stack when trying to send a packet for which
     5014 + * all necessary SAs can't be found to include in ip_xmit_attr_t. Be aware of
     5015 + * the following case: you need both ESP and AH and have SAs for neither. In
     5016 + * that case both need_esp and need_ah are true, but we go with need_esp, as
     5017 + * ESP will call us back for an AH acquire if it's successful and the AH SA
     5018 + * still missing. It can also be that the packet needs both, but an SA already
     5019 + * exists for one, in which case only the missing one will be flagged as
     5020 + * needed, although the ipsec_action_t has want flags for both.
4854 5021   */
4855 5022  void
4856 5023  sadb_acquire(mblk_t *datamp, ip_xmit_attr_t *ixa, boolean_t need_ah,
4857 5024      boolean_t need_esp)
4858 5025  {
4859      -        mblk_t  *asyncmp;
     5026 +        mblk_t          *asyncmp, *regular, *extended, *prop_m, *eprop_m;
4860 5027          sadbp_t *spp;
4861 5028          sadb_t *sp;
4862 5029          ipsacq_t *newbie;
4863 5030          iacqf_t *bucket;
4864      -        mblk_t *extended;
4865 5031          ipha_t *ipha = (ipha_t *)datamp->b_rptr;
4866 5032          ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
4867      -        uint32_t *src, *dst, *isrc, *idst;
     5033 +        uint32_t        seq, *src, *dst, *isrc, *idst;
4868 5034          ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
4869 5035          ipsec_action_t *ap = ixa->ixa_ipsec_action;
4870 5036          sa_family_t af;
4871      -        int hashoffset;
4872      -        uint32_t seq;
     5037 +        int             hashoffset, sens_len;
4873 5038          uint64_t unique_id = 0;
     5039 +        uint_t          propsize, epropsize, combs_limit;
     5040 +        uint8_t         *start, *end;
     5041 +        sadb_msg_t      *samsg;
     5042 +        sadb_prop_t     *prop, *eprop;
4874 5043          ipsec_selector_t sel;
4875 5044          boolean_t tunnel_mode = (ixa->ixa_flags & IXAF_IPSEC_TUNNEL) != 0;
4876 5045          ts_label_t      *tsl = NULL;
4877 5046          netstack_t      *ns = ixa->ixa_ipst->ips_netstack;
4878 5047          ipsec_stack_t   *ipss = ns->netstack_ipsec;
4879      -        sadb_sens_t     *sens = NULL;
4880      -        int             sens_len;
     5048 +        sadb_sens_t     *sens = NULL;
     5049 +        ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
     5050 +        ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
     5051 +        queue_t         *q;
4881 5052  
4882      -        ASSERT((pp != NULL) || (ap != NULL));
     5053 +        ASSERT(need_ah || need_esp);
     5054 +        ASSERT((ap != NULL) || (pp != NULL));
4883 5055  
4884      -        ASSERT(need_ah != NULL || need_esp != NULL);
4885      -
4886      -        /* Assign sadb pointers */
4887      -        if (need_esp) { /* ESP for AH+ESP */
4888      -                ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
4889      -
4890      -                spp = &espstack->esp_sadb;
4891      -        } else {
4892      -                ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
4893      -
4894      -                spp = &ahstack->ah_sadb;
4895      -        }
     5056 +        spp = need_esp ? &espstack->esp_sadb : &ahstack->ah_sadb;
4896 5057          sp = (ixa->ixa_flags & IXAF_IS_IPV4) ? &spp->s_v4 : &spp->s_v6;
4897 5058  
4898 5059          if (is_system_labeled())
4899 5060                  tsl = ixa->ixa_tsl;
4900 5061  
4901 5062          if (ap == NULL)
4902 5063                  ap = pp->ipsp_act;
4903      -
4904 5064          ASSERT(ap != NULL);
4905 5065  
4906 5066          if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
4907 5067                  unique_id = SA_FORM_UNIQUE_ID(ixa);
4908 5068  
4909 5069          /*
4910 5070           * Set up an ACQUIRE record.
4911 5071           *
4912 5072           * Immediately, make sure the ACQUIRE sequence number doesn't slip
4913 5073           * below the lowest point allowed in the kernel.  (In other words,
4914 5074           * make sure the high bit on the sequence number is set.)
4915 5075           */
4916      -
4917 5076          seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
4918 5077  
4919 5078          if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
4920 5079                  src = (uint32_t *)&ipha->ipha_src;
4921 5080                  dst = (uint32_t *)&ipha->ipha_dst;
4922 5081                  af = AF_INET;
     5082 +                ip6h = NULL;
4923 5083                  hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
4924 5084                  ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
4925 5085          } else {
4926 5086                  ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
4927 5087                  src = (uint32_t *)&ip6h->ip6_src;
4928 5088                  dst = (uint32_t *)&ip6h->ip6_dst;
4929 5089                  af = AF_INET6;
     5090 +                ipha = NULL;
4930 5091                  hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
4931 5092                  ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
4932 5093          }
4933 5094  
4934 5095          if (tunnel_mode) {
4935 5096                  if (pp == NULL) {
4936 5097                          /*
4937 5098                           * Tunnel mode with no policy pointer means this is a
4938 5099                           * reflected ICMP (like a ECHO REQUEST) that came in
4939 5100                           * with self-encapsulated protection.  Until we better
↓ open down ↓ 5 lines elided ↑ open up ↑
4945 5106                          return;
4946 5107                  }
4947 5108                  /* Snag inner addresses. */
4948 5109                  isrc = ixa->ixa_ipsec_insrc;
4949 5110                  idst = ixa->ixa_ipsec_indst;
4950 5111          } else {
4951 5112                  isrc = idst = NULL;
4952 5113          }
4953 5114  
4954 5115          /*
4955      -         * Check buckets to see if there is an existing entry.  If so,
4956      -         * grab it.  sadb_checkacquire locks newbie if found.
     5116 +         * Check bucket for existing matching entry. If so, grab it. On match
     5117 +         * sadb_checkacquire returns locked newbie.
4957 5118           */
4958 5119          bucket = &(sp->sdb_acq[hashoffset]);
4959 5120          mutex_enter(&bucket->iacqf_lock);
4960 5121          newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
4961 5122              unique_id, tsl);
4962 5123  
     5124 +        /* If not found, initialize a new one and insert into chain. */
4963 5125          if (newbie == NULL) {
4964      -                /*
4965      -                 * Otherwise, allocate a new one.
4966      -                 */
4967 5126                  newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
4968 5127                  if (newbie == NULL) {
4969 5128                          mutex_exit(&bucket->iacqf_lock);
4970 5129                          ip_drop_packet(datamp, B_FALSE, NULL,
4971 5130                              DROPPER(ipss, ipds_sadb_acquire_nomem),
4972 5131                              &ipss->ipsec_sadb_dropper);
4973 5132                          return;
4974 5133                  }
4975 5134                  newbie->ipsacq_policy = pp;
4976 5135                  if (pp != NULL) {
↓ open down ↓ 16 lines elided ↑ open up ↑
4993 5152           * XXX MLS does it actually help us to drop the bucket lock here?
4994 5153           * we have inserted a half-built, locked acquire record into the
4995 5154           * bucket.  any competing thread will now be able to lock the bucket
4996 5155           * to scan it, but will immediately pile up on the new acquire
4997 5156           * record's lock; I don't think we gain anything here other than to
4998 5157           * disperse blame for lock contention.
4999 5158           *
5000 5159           * we might be able to dispense with acquire record locks entirely..
5001 5160           * just use the bucket locks..
5002 5161           */
5003      -
5004 5162          mutex_exit(&bucket->iacqf_lock);
5005 5163  
5006 5164          /*
5007 5165           * This assert looks silly for now, but we may need to enter newbie's
5008      -         * mutex during a search.
     5166 +         * mutex during a search. Confirms we got locked newbie from
     5167 +         * sadb_checkacquire.
5009 5168           */
5010 5169          ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5011 5170  
5012      -        /*
5013      -         * Make the ip_xmit_attr_t into something we can queue.
5014      -         * If no memory it frees datamp.
5015      -         */
     5171 +        /* Make ip_xmit_attr_t into message we can queue, link packet data. */
5016 5172          asyncmp = ip_xmit_attr_to_mblk(ixa);
5017 5173          if (asyncmp != NULL)
5018 5174                  linkb(asyncmp, datamp);
5019 5175  
5020      -        /* Queue up packet.  Use b_next. */
5021      -
     5176 +        /* Bump appropriate discard stat & free datamp if allocation failed. */
5022 5177          if (asyncmp == NULL) {
5023      -                /* Statistics for allocation failure */
5024 5178                  if (ixa->ixa_flags & IXAF_IS_IPV4) {
5025 5179                          BUMP_MIB(&ixa->ixa_ipst->ips_ip_mib,
5026 5180                              ipIfStatsOutDiscards);
5027 5181                  } else {
5028 5182                          BUMP_MIB(&ixa->ixa_ipst->ips_ip6_mib,
5029 5183                              ipIfStatsOutDiscards);
5030 5184                  }
5031 5185                  ip_drop_output("No memory for asyncmp", datamp, NULL);
5032 5186                  freemsg(datamp);
5033      -        } else if (newbie->ipsacq_numpackets == 0) {
5034      -                /* First one. */
     5187 +        } else if (newbie->ipsacq_numpackets == 0) { /* Pkt queue forms here. */
5035 5188                  newbie->ipsacq_mp = asyncmp;
5036 5189                  newbie->ipsacq_numpackets = 1;
5037 5190                  newbie->ipsacq_expire = gethrestime_sec();
5038      -                /*
5039      -                 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5040      -                 * value.
5041      -                 */
     5191 +                /* Extended ACQUIRE with AH+ESP uses ESP's timeout */
5042 5192                  newbie->ipsacq_expire += *spp->s_acquire_timeout;
5043 5193                  newbie->ipsacq_seq = seq;
5044 5194                  newbie->ipsacq_addrfam = af;
5045 5195  
5046 5196                  newbie->ipsacq_srcport = ixa->ixa_ipsec_src_port;
5047 5197                  newbie->ipsacq_dstport = ixa->ixa_ipsec_dst_port;
5048 5198                  newbie->ipsacq_icmp_type = ixa->ixa_ipsec_icmp_type;
5049 5199                  newbie->ipsacq_icmp_code = ixa->ixa_ipsec_icmp_code;
5050 5200                  if (tunnel_mode) {
5051 5201                          newbie->ipsacq_inneraddrfam = ixa->ixa_ipsec_inaf;
↓ open down ↓ 3 lines elided ↑ open up ↑
5055 5205                          newbie->ipsacq_innerdstpfx = ixa->ixa_ipsec_indstpfx;
5056 5206                          IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5057 5207                              ixa->ixa_ipsec_insrc, ixa->ixa_ipsec_inaf);
5058 5208                          IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5059 5209                              ixa->ixa_ipsec_indst, ixa->ixa_ipsec_inaf);
5060 5210                  } else {
5061 5211                          newbie->ipsacq_proto = ixa->ixa_ipsec_proto;
5062 5212                  }
5063 5213                  newbie->ipsacq_unique_id = unique_id;
5064 5214  
5065      -                if (ixa->ixa_tsl != NULL) {
5066      -                        label_hold(ixa->ixa_tsl);
5067      -                        newbie->ipsacq_tsl = ixa->ixa_tsl;
     5215 +                if (tsl != NULL) {
     5216 +                        label_hold(tsl);
     5217 +                        newbie->ipsacq_tsl = tsl;
5068 5218                  }
5069      -        } else {
5070      -                /* Scan to the end of the list & insert. */
     5219 +        } else { /* Attempt to join packet queue as b_next. */
5071 5220                  mblk_t *lastone = newbie->ipsacq_mp;
5072 5221  
5073 5222                  while (lastone->b_next != NULL)
5074 5223                          lastone = lastone->b_next;
5075 5224                  lastone->b_next = asyncmp;
     5225 +                /* Queue maxed: set counter to max, unchain, free & drop pkt */
5076 5226                  if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5077 5227                          newbie->ipsacq_numpackets = ipsacq_maxpackets;
5078 5228                          lastone = newbie->ipsacq_mp;
5079 5229                          newbie->ipsacq_mp = lastone->b_next;
5080 5230                          lastone->b_next = NULL;
5081 5231  
5082      -                        /* Freeing the async message */
5083 5232                          lastone = ip_xmit_attr_free_mblk(lastone);
5084 5233                          ip_drop_packet(lastone, B_FALSE, NULL,
5085 5234                              DROPPER(ipss, ipds_sadb_acquire_toofull),
5086 5235                              &ipss->ipsec_sadb_dropper);
5087      -                } else {
     5236 +                } else { /* Successfully queued */
5088 5237                          IP_ACQUIRE_STAT(ipss, qhiwater,
5089 5238                              newbie->ipsacq_numpackets);
5090 5239                  }
5091 5240          }
5092 5241  
5093 5242          /*
5094 5243           * Reset addresses.  Set them to the most recently added mblk chain,
5095 5244           * so that the address pointers in the acquire record will point
5096 5245           * at an mblk still attached to the acquire list.
5097 5246           */
5098 5247  
5099 5248          newbie->ipsacq_srcaddr = src;
5100 5249          newbie->ipsacq_dstaddr = dst;
5101 5250  
5102 5251          /*
5103      -         * If the acquire record has more than one queued packet, we've
5104      -         * already sent an ACQUIRE, and don't need to repeat ourself.
     5252 +         * Sequence number mismatch or previously populated packet queue means
     5253 +         * we retrieved an already-pending ACQUIRE record and needn't repeat
     5254 +         * ourself. Unlock and return.
5105 5255           */
5106      -        if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5107      -                /* I have an acquire outstanding already! */
5108      -                mutex_exit(&newbie->ipsacq_lock);
5109      -                return;
     5256 +        if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1)
     5257 +                goto unlock_acqrec;
     5258 +
     5259 +        /*
     5260 +         * Even if we fail before sending to keysock, starting with a NULL
     5261 +         * queue pointer, if gets this far, it counts as an acquire request.
     5262 +         */
     5263 +        if (need_esp) {
     5264 +                ESP_BUMP_STAT(espstack, acquire_requests);
     5265 +                q = espstack->esp_pfkey_q;
     5266 +        } else {
     5267 +                AH_BUMP_STAT(ahstack, acquire_requests);
     5268 +                q = ahstack->ah_pfkey_q;
5110 5269          }
5111 5270  
5112      -        if (!keysock_extended_reg(ns))
5113      -                goto punt_extended;
     5271 +        if (q == NULL)
     5272 +                goto unlock_acqrec;
     5273 +
     5274 +        /* Initializes keysock M_CTL message for regular acquire. */
     5275 +        regular = sadb_keysock_out(0);
     5276 +        if (regular == NULL)
     5277 +                goto unlock_acqrec;
     5278 +
5114 5279          /*
5115      -         * Construct an extended ACQUIRE.  There are logging
5116      -         * opportunities here in failure cases.
     5280 +         * Check keysock stack to make sure we don't have extended register
     5281 +         * pending. If not, have keysock initialize M_CTL msg for extended
     5282 +         * acquire. If pending, set extended to NULL so we ignore it hereafter.
5117 5283           */
     5284 +        if (keysock_extended_reg(ns)) {
     5285 +                extended = sadb_keysock_out(0);
     5286 +                if (extended == NULL)
     5287 +                        goto bail_and_free_regular;
     5288 +        } else {
     5289 +                extended = NULL;
     5290 +        }
     5291 +
     5292 +        if (tsl != NULL) {
     5293 +                /*
     5294 +                 * XXX MLS correct condition here?
     5295 +                 * XXX MLS other credential attributes in acquire?
     5296 +                 * XXX malloc failure?  don't fall back to original?
     5297 +                 */
     5298 +                sens = sadb_make_sens_ext(tsl, &sens_len);
     5299 +
     5300 +                if (sens == NULL)
     5301 +                        goto bail_extended;
     5302 +        }
     5303 +        /* re-initialize selector using ixa and ipha */
5118 5304          bzero(&sel, sizeof (sel));
5119 5305          sel.ips_isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0;
5120 5306          if (tunnel_mode) {
5121 5307                  sel.ips_protocol = (ixa->ixa_ipsec_inaf == AF_INET) ?
5122 5308                      IPPROTO_ENCAP : IPPROTO_IPV6;
5123 5309          } else {
5124 5310                  sel.ips_protocol = ixa->ixa_ipsec_proto;
5125 5311                  sel.ips_local_port = ixa->ixa_ipsec_src_port;
5126 5312                  sel.ips_remote_port = ixa->ixa_ipsec_dst_port;
5127 5313          }
↓ open down ↓ 1 lines elided ↑ open up ↑
5129 5315          sel.ips_icmp_code = ixa->ixa_ipsec_icmp_code;
5130 5316          sel.ips_is_icmp_inv_acq = 0;
5131 5317          if (af == AF_INET) {
5132 5318                  sel.ips_local_addr_v4 = ipha->ipha_src;
5133 5319                  sel.ips_remote_addr_v4 = ipha->ipha_dst;
5134 5320          } else {
5135 5321                  sel.ips_local_addr_v6 = ip6h->ip6_src;
5136 5322                  sel.ips_remote_addr_v6 = ip6h->ip6_dst;
5137 5323          }
5138 5324  
5139      -        extended = sadb_keysock_out(0);
5140      -        if (extended == NULL)
5141      -                goto punt_extended;
     5325 +        /* Tack message containing sadb_msg_t onto keysock regular M_CTL */
     5326 +        regular->b_cont = sadb_construct_acqmsg(newbie, &sel, ap, pp, ns, sens,
     5327 +            need_esp, tunnel_mode, B_FALSE, B_FALSE); /* regular, no props */
     5328 +        /* We have to do this, no matter the result of previous call */
     5329 +        if (sens != NULL)
     5330 +                kmem_free(sens, sens_len);
5142 5331  
5143      -        if (ixa->ixa_tsl != NULL) {
5144      -                /*
5145      -                 * XXX MLS correct condition here?
5146      -                 * XXX MLS other credential attributes in acquire?
5147      -                 * XXX malloc failure?  don't fall back to original?
5148      -                 */
5149      -                sens = sadb_make_sens_ext(ixa->ixa_tsl, &sens_len);
     5332 +        if (regular->b_cont == NULL)
     5333 +                goto bail_extended;
5150 5334  
5151      -                if (sens == NULL) {
5152      -                        freeb(extended);
5153      -                        goto punt_extended;
5154      -                }
     5335 +        /*
     5336 +         * If there's no extended pending, duplicate regular samsg, tacking it
     5337 +         * on as the b_cont of the keysock-generated extended M_CTL.
     5338 +         */
     5339 +        if (extended != NULL) {
     5340 +                extended->b_cont = dupb(regular->b_cont);
     5341 +                if (extended->b_cont == NULL)
     5342 +                        goto bail_extended;
5155 5343          }
5156 5344  
5157      -        extended->b_cont = sadb_extended_acquire(&sel, pp, ap, tunnel_mode,
5158      -            seq, 0, sens, ns);
     5345 +        rw_enter(&ipss->ipsec_alg_lock, RW_READER);
     5346 +        CALC_COMBS(combs_limit, ipss, need_esp);
     5347 +        propsize = sizeof (sadb_prop_t) + (combs_limit * sizeof (sadb_comb_t));
5159 5348  
5160      -        if (sens != NULL)
5161      -                kmem_free(sens, sens_len);
     5349 +        if ((prop_m = allocb(propsize, BPRI_HI)) == NULL)
     5350 +                goto bail_and_unlock;
5162 5351  
5163      -        if (extended->b_cont == NULL) {
5164      -                freeb(extended);
5165      -                goto punt_extended;
     5352 +        if (extended != NULL) {
     5353 +                epropsize = sizeof (sadb_prop_t)
     5354 +                    + (combs_limit * sizeof (sadb_x_ecomb_t));
     5355 +                if ((eprop_m = allocb(epropsize, BPRI_HI)) == NULL)
     5356 +                        goto bail_and_unlock;
5166 5357          }
5167 5358  
5168      -        /*
5169      -         * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
5170      -         * this new record.  The send-acquire callback assumes that acqrec is
5171      -         * already locked.
5172      -         */
5173      -        (*spp->s_acqfn)(newbie, extended, ns);
     5359 +        prop = (sadb_prop_t *)prop_m->b_rptr;
     5360 +        sadb_insert_prop(prop, ap, ns, combs_limit, need_esp);
     5361 +        if (prop == NULL) {
     5362 +                goto bail_and_unlock;
     5363 +        /* 0 length prop is error, mark regular samsg a dud, & freeb prop_m */
     5364 +        } else {
     5365 +                samsg = (sadb_msg_t *)regular->b_cont->b_rptr;
     5366 +
     5367 +                if (prop->sadb_prop_len == 0) {
     5368 +                        ERRNO_SAMSG(samsg, ENOENT);
     5369 +                        freeb(prop_m);
     5370 +                }
     5371 +                samsg->sadb_msg_len += prop->sadb_prop_len;
     5372 +                prop_m->b_wptr += SADB_64TO8(prop->sadb_prop_len);
     5373 +                regular->b_cont->b_cont = prop_m;
     5374 +        }
     5375 +
     5376 +        if (extended != NULL) {
     5377 +                start = (uint8_t *)eprop_m->b_rptr;
     5378 +                end = start + epropsize;
     5379 +                eprop =
     5380 +                    (sadb_prop_t *)sadb_construct_eprop(ap, pp, ns, start, end);
     5381 +                if (eprop == NULL)
     5382 +                        goto bail_and_unlock;
     5383 +                /* If 0 ecombs, mark extended samsg a dud, and freeb eprop_m */
     5384 +                else {
     5385 +                        samsg = (sadb_msg_t *)extended->b_cont->b_rptr;
     5386 +
     5387 +                        if (eprop->sadb_x_prop_numecombs == 0) {
     5388 +                                ERRNO_SAMSG(samsg, ENOENT);
     5389 +                                freeb(eprop_m);
     5390 +                        }
     5391 +                        samsg->sadb_msg_len += eprop->sadb_prop_len;
     5392 +                        eprop_m->b_wptr += SADB_64TO8(eprop->sadb_prop_len);
     5393 +                        extended->b_cont->b_cont = eprop_m;
     5394 +                }
     5395 +        }
     5396 +
     5397 +        rw_exit(&ipss->ipsec_alg_lock);
     5398 +        mutex_exit(&newbie->ipsacq_lock);
     5399 +
     5400 +        if (extended != NULL)
     5401 +                putnext(q, extended);
     5402 +        putnext(q, regular);
5174 5403          return;
5175 5404  
5176      -punt_extended:
5177      -        (*spp->s_acqfn)(newbie, NULL, ns);
     5405 +/* We used a lot of b_cont mblk chaining, so we need to use freemsg. */
     5406 +bail_and_unlock:
     5407 +        rw_exit(&ipss->ipsec_alg_lock);
     5408 +bail_extended:
     5409 +        if (extended != NULL)
     5410 +                freemsg(extended);
     5411 +bail_and_free_regular:
     5412 +        freemsg(regular);
     5413 +unlock_acqrec:
     5414 +        mutex_exit(&newbie->ipsacq_lock);
5178 5415  }
5179 5416  
5180 5417  /*
5181 5418   * Unlink and free an acquire record.
5182 5419   */
5183 5420  void
5184 5421  sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
5185 5422  {
5186 5423          mblk_t          *mp;
5187 5424          ipsec_stack_t   *ipss = ns->netstack_ipsec;
5188 5425  
5189 5426          ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
5190 5427  
     5428 +        /* XXX Should references be released before mutex is acquired? */
5191 5429          if (acqrec->ipsacq_policy != NULL) {
5192 5430                  IPPOL_REFRELE(acqrec->ipsacq_policy);
5193 5431          }
5194 5432          if (acqrec->ipsacq_act != NULL) {
5195 5433                  IPACT_REFRELE(acqrec->ipsacq_act);
5196 5434          }
5197 5435  
5198 5436          /* Unlink */
5199 5437          *(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
5200 5438          if (acqrec->ipsacq_next != NULL)
↓ open down ↓ 54 lines elided ↑ open up ↑
5255 5493                  *listp = NULL;
5256 5494                  kmem_free(list, numentries * sizeof (*list));
5257 5495          }
5258 5496  }
5259 5497  
5260 5498  /*
5261 5499   * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
5262 5500   * framework's view of reality vs. IPsec's.  EF's wins, BTW.
5263 5501   */
5264 5502  static uint8_t *
5265      -sadb_new_algdesc(uint8_t *start, uint8_t *limit,
     5503 +sadb_new_algdesc(const uint8_t *start, const uint8_t *end,
5266 5504      sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
5267 5505      uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
5268 5506  {
5269      -        uint8_t *cur = start;
     5507 +        uint8_t *cur = (uint8_t *)start;
5270 5508          ipsec_alginfo_t *algp;
5271 5509          sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
5272 5510  
     5511 +        ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
     5512 +
5273 5513          cur += sizeof (*algdesc);
5274      -        if (cur >= limit)
     5514 +        if (cur >= end)
5275 5515                  return (NULL);
5276 5516  
5277 5517          ecomb->sadb_x_ecomb_numalgs++;
5278 5518  
5279 5519          /*
5280 5520           * Normalize vs. crypto framework's limits.  This way, you can specify
5281 5521           * a stronger policy, and when the framework loads a stronger version,
5282 5522           * you can just keep plowing w/o rewhacking your SPD.
5283 5523           */
5284      -        mutex_enter(&ipss->ipsec_alg_lock);
5285 5524          algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
5286 5525              IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
5287      -        if (algp == NULL) {
5288      -                mutex_exit(&ipss->ipsec_alg_lock);
     5526 +        if (algp == NULL)
5289 5527                  return (NULL);  /* Algorithm doesn't exist.  Fail gracefully. */
5290      -        }
5291 5528          if (minbits < algp->alg_ef_minbits)
5292 5529                  minbits = algp->alg_ef_minbits;
5293 5530          if (maxbits > algp->alg_ef_maxbits)
5294 5531                  maxbits = algp->alg_ef_maxbits;
5295      -        mutex_exit(&ipss->ipsec_alg_lock);
5296 5532  
5297 5533          algdesc->sadb_x_algdesc_reserved = SADB_8TO1(algp->alg_saltlen);
5298 5534          algdesc->sadb_x_algdesc_satype = satype;
5299 5535          algdesc->sadb_x_algdesc_algtype = algtype;
5300 5536          algdesc->sadb_x_algdesc_alg = alg;
5301 5537          algdesc->sadb_x_algdesc_minbits = minbits;
5302 5538          algdesc->sadb_x_algdesc_maxbits = maxbits;
5303 5539  
5304 5540          return (cur);
5305 5541  }
5306 5542  
5307 5543  /*
5308      - * Convert the given ipsec_action_t into an ecomb starting at *ecomb
5309      - * which must fit before *limit
5310      - *
5311      - * return NULL if we ran out of room or a pointer to the end of the ecomb.
     5544 + * Use buffer defined by byte-aligned pointers start and end to convert
     5545 + * ipsec_action_t pointer act into an ecomb, using alg data hanging off of
     5546 + * netstack_t pointer ns. Return NULL rather than overrun buffer, otherwise
     5547 + * pointer to end of ecomb (which should be exact size of buffer).
5312 5548   */
5313 5549  static uint8_t *
5314      -sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5315      -    netstack_t *ns)
     5550 +sadb_action_to_ecomb(const uint8_t *start, const uint8_t *end,
     5551 +    const ipsec_action_t *act, netstack_t *ns)
5316 5552  {
5317      -        uint8_t *cur = start;
     5553 +        uint8_t *cur = (uint8_t *)start;
5318 5554          sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5319 5555          ipsec_prot_t *ipp;
5320 5556          ipsec_stack_t *ipss = ns->netstack_ipsec;
5321 5557  
     5558 +        ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
     5559 +        ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
     5560 +
5322 5561          cur += sizeof (*ecomb);
5323      -        if (cur >= limit)
     5562 +        if (cur >= end)
5324 5563                  return (NULL);
5325 5564  
5326      -        ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
     5565 +        ipp = &((ipsec_action_t *)act)->ipa_act.ipa_apply;
5327 5566  
5328      -        ipp = &act->ipa_act.ipa_apply;
5329      -
5330 5567          ecomb->sadb_x_ecomb_numalgs = 0;
5331 5568          ecomb->sadb_x_ecomb_reserved = 0;
5332 5569          ecomb->sadb_x_ecomb_reserved2 = 0;
5333 5570          /*
5334 5571           * No limits on allocations, since we really don't support that
5335 5572           * concept currently.
5336 5573           */
5337 5574          ecomb->sadb_x_ecomb_soft_allocations = 0;
5338 5575          ecomb->sadb_x_ecomb_hard_allocations = 0;
5339 5576  
↓ open down ↓ 3 lines elided ↑ open up ↑
5343 5580           */
5344 5581          ecomb->sadb_x_ecomb_flags = 0;
5345 5582          ecomb->sadb_x_ecomb_soft_bytes = 0;
5346 5583          ecomb->sadb_x_ecomb_hard_bytes = 0;
5347 5584          ecomb->sadb_x_ecomb_soft_addtime = 0;
5348 5585          ecomb->sadb_x_ecomb_hard_addtime = 0;
5349 5586          ecomb->sadb_x_ecomb_soft_usetime = 0;
5350 5587          ecomb->sadb_x_ecomb_hard_usetime = 0;
5351 5588  
5352 5589          if (ipp->ipp_use_ah) {
5353      -                cur = sadb_new_algdesc(cur, limit, ecomb,
     5590 +                cur = sadb_new_algdesc(cur, end, ecomb,
5354 5591                      SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
5355 5592                      ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
5356 5593                  if (cur == NULL)
5357 5594                          return (NULL);
5358 5595                  ipsecah_fill_defs(ecomb, ns);
5359 5596          }
5360 5597  
5361 5598          if (ipp->ipp_use_esp) {
5362 5599                  if (ipp->ipp_use_espa) {
5363      -                        cur = sadb_new_algdesc(cur, limit, ecomb,
     5600 +                        cur = sadb_new_algdesc(cur, end, ecomb,
5364 5601                              SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
5365 5602                              ipp->ipp_esp_auth_alg,
5366 5603                              ipp->ipp_espa_minbits,
5367 5604                              ipp->ipp_espa_maxbits, ipss);
5368 5605                          if (cur == NULL)
5369 5606                                  return (NULL);
5370 5607                  }
5371 5608  
5372      -                cur = sadb_new_algdesc(cur, limit, ecomb,
     5609 +                cur = sadb_new_algdesc(cur, end, ecomb,
5373 5610                      SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
5374 5611                      ipp->ipp_encr_alg,
5375 5612                      ipp->ipp_espe_minbits,
5376 5613                      ipp->ipp_espe_maxbits, ipss);
5377 5614                  if (cur == NULL)
5378 5615                          return (NULL);
5379 5616                  /* Fill in lifetimes if and only if AH didn't already... */
5380 5617                  if (!ipp->ipp_use_ah)
5381 5618                          ipsecesp_fill_defs(ecomb, ns);
5382 5619          }
↓ open down ↓ 89 lines elided ↑ open up ↑
5472 5709                  return (NULL);
5473 5710  
5474 5711          if (sens->sadb_x_sens_flags & SADB_X_SENS_UNLABELED)
5475 5712                  tsl->tsl_flags |= TSLF_UNLABELED;
5476 5713          return (tsl);
5477 5714  }
5478 5715  
5479 5716  /* End XXX label-library-leakage */
5480 5717  
5481 5718  /*
5482      - * Construct an extended ACQUIRE message based on a selector and the resulting
5483      - * IPsec action.
5484      - *
5485      - * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5486      - * generation. As a consequence, expect this function to evolve
5487      - * rapidly.
     5719 + * Takes a pointer to sadb_prop_t (what we're initializing), ipsec_action_t
     5720 + * (first action in chain we need to walk of actions for each alg
     5721 + * combination), netstack_ns (contains pointers to alg properties and
     5722 + * per-protocol settings), a combs_limit integer (maximum applicable
     5723 + * combinations derived from per-protcol netstack_t alg array), and need_esp
     5724 + * boolean_t. We distinguish between two error cases: we exceed combs_limit,
     5725 + * which should only be a kernel bug (ipsec_alg_lock is our shepherd), or we
     5726 + * have an alg ID with a NULL netstack member or member with the valid bit
     5727 + * flipped, both of which indicate the needs to reset state, which we flag by
     5728 + * returning no combs. We return NULL if we exceed combs_limit and zero-length
     5729 + * prop if we run into an alg that can't be transferred into the prop.
5488 5730   */
5489      -static mblk_t *
5490      -sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5491      -    ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5492      -    sadb_sens_t *sens, netstack_t *ns)
     5731 +static void
     5732 +sadb_insert_prop(sadb_prop_t *prop, const ipsec_action_t *ap, netstack_t *ns,
     5733 +    uint_t combs_limit, boolean_t need_esp)
5493 5734  {
5494      -        mblk_t *mp;
5495      -        sadb_msg_t *samsg;
5496      -        uint8_t *start, *cur, *end;
5497      -        uint32_t *saddrptr, *daddrptr;
5498      -        sa_family_t af;
5499      -        sadb_prop_t *eprop;
5500      -        ipsec_action_t *ap, *an;
5501      -        ipsec_selkey_t *ipsl;
5502      -        uint8_t proto, pfxlen;
5503      -        uint16_t lport, rport;
5504      -        uint32_t kmp, kmc;
     5735 +        sadb_comb_t     *comb = (sadb_comb_t *)(prop + 1);
     5736 +        ipsec_action_t  *act = (ipsec_action_t *)ap;
     5737 +        ipsec_prot_t    *prot;
     5738 +        ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
     5739 +        ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
     5740 +        ipsec_stack_t   *ipss = ns->netstack_ipsec;
     5741 +        boolean_t       need_ah = !need_esp;
5505 5742  
5506      -        /*
5507      -         * Find the action we want sooner rather than later..
5508      -         */
5509      -        an = NULL;
5510      -        if (pol == NULL) {
5511      -                ap = act;
5512      -        } else {
5513      -                ap = pol->ipsp_act;
     5743 +        ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
     5744 +        ASSERT((need_esp && ap->ipa_want_esp) || (need_ah && ap->ipa_want_ah));
5514 5745  
5515      -                if (ap != NULL)
5516      -                        an = ap->ipa_next;
5517      -        }
     5746 +        prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
     5747 +        prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t));
     5748 +        *(uint32_t *)(&prop->sadb_prop_replay) = 0; /* Quick zero-out! */
     5749 +        prop->sadb_prop_replay = need_esp ?
     5750 +            espstack->ipsecesp_replay_size : ahstack->ipsecah_replay_size;
5518 5751  
5519      -        /*
5520      -         * Just take a swag for the allocation for now.  We can always
5521      -         * alter it later.
5522      -         */
5523      -#define SADB_EXTENDED_ACQUIRE_SIZE      4096
5524      -        mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5525      -        if (mp == NULL)
5526      -                return (NULL);
     5752 +        /* Prioritize a proposal, preserving policy order. */
     5753 +        for (; act != NULL; act = act->ipa_next) {
     5754 +                ipsec_alginfo_t *aalg = NULL;
     5755 +                ipsec_alginfo_t *ealg = NULL;
5527 5756  
5528      -        start = mp->b_rptr;
5529      -        end = start + SADB_EXTENDED_ACQUIRE_SIZE;
     5757 +                if ((act->ipa_act.ipa_type != IPSEC_POLICY_APPLY) ||
     5758 +                    (need_esp && !act->ipa_act.ipa_apply.ipp_use_esp) ||
     5759 +                    (need_ah && !act->ipa_act.ipa_apply.ipp_use_ah))
     5760 +                        continue;
5530 5761  
5531      -        cur = start;
     5762 +                if (--combs_limit == 0) {
     5763 +                        prop = NULL;
     5764 +                        return;
     5765 +                }
5532 5766  
5533      -        samsg = (sadb_msg_t *)cur;
5534      -        cur += sizeof (*samsg);
     5767 +                prot = &act->ipa_act.ipa_apply;
5535 5768  
5536      -        samsg->sadb_msg_version = PF_KEY_V2;
5537      -        samsg->sadb_msg_type = SADB_ACQUIRE;
5538      -        samsg->sadb_msg_errno = 0;
5539      -        samsg->sadb_msg_reserved = 0;
5540      -        samsg->sadb_msg_satype = 0;
5541      -        samsg->sadb_msg_seq = seq;
5542      -        samsg->sadb_msg_pid = pid;
5543      -
5544      -        if (tunnel_mode) {
5545 5769                  /*
5546      -                 * Form inner address extensions based NOT on the inner
5547      -                 * selectors (i.e. the packet data), but on the policy's
5548      -                 * selector key (i.e. the policy's selector information).
5549      -                 *
5550      -                 * NOTE:  The position of IPv4 and IPv6 addresses is the
5551      -                 * same in ipsec_selkey_t (unless the compiler does very
5552      -                 * strange things with unions, consult your local C language
5553      -                 * lawyer for details).
     5770 +                 * Alg ID 0 is none/any, which is valid only for ESP without
     5771 +                 * message integrity (ipp_esp_auth_alg). NULL encryption ESP
     5772 +                 * uses a distinct alg, non-zero ID.
5554 5773                   */
5555      -                ASSERT(pol != NULL);
5556      -
5557      -                ipsl = &(pol->ipsp_sel->ipsl_key);
5558      -                if (ipsl->ipsl_valid & IPSL_IPV4) {
5559      -                        af = AF_INET;
5560      -                        ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5561      -                        ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5562      -                } else {
5563      -                        af = AF_INET6;
5564      -                        ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5565      -                        ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
     5774 +                if ((need_esp && prot->ipp_esp_auth_alg != 0) || need_ah) {
     5775 +                        ASSERT(need_esp || (prot->ipp_auth_alg > 0));
     5776 +                        aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][need_esp ?
     5777 +                            prot->ipp_esp_auth_alg : prot->ipp_auth_alg];
     5778 +                        if (aalg == NULL || !ALG_VALID(aalg))
     5779 +                                goto failure;
5566 5780                  }
5567 5781  
5568      -                if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5569      -                        saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5570      -                        pfxlen = ipsl->ipsl_local_pfxlen;
5571      -                } else {
5572      -                        saddrptr = (uint32_t *)(&ipv6_all_zeros);
5573      -                        pfxlen = 0;
     5782 +                if (need_esp) {
     5783 +                        ASSERT(prot->ipp_encr_alg > 0);
     5784 +                        ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
     5785 +                            [prot->ipp_encr_alg];
     5786 +                        if (ealg == NULL || !ALG_VALID(ealg))
     5787 +                                goto failure;
5574 5788                  }
5575      -                /* XXX What about ICMP type/code? */
5576      -                lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5577      -                    ipsl->ipsl_lport : 0;
5578      -                proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5579      -                    ipsl->ipsl_proto : 0;
5580 5789  
5581      -                cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5582      -                    af, saddrptr, lport, proto, pfxlen);
5583      -                if (cur == NULL) {
5584      -                        freeb(mp);
5585      -                        return (NULL);
5586      -                }
     5790 +                comb->sadb_comb_flags = 0;
     5791 +                comb->sadb_comb_reserved = 0;
5587 5792  
5588      -                if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5589      -                        daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5590      -                        pfxlen = ipsl->ipsl_remote_pfxlen;
     5793 +                if (ealg != NULL) {
     5794 +                        comb->sadb_comb_encrypt = ealg->alg_id;
     5795 +                        comb->sadb_comb_encrypt_minbits =
     5796 +                            MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits);
     5797 +                        comb->sadb_comb_encrypt_maxbits =
     5798 +                            MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits);
5591 5799                  } else {
5592      -                        daddrptr = (uint32_t *)(&ipv6_all_zeros);
5593      -                        pfxlen = 0;
     5800 +                        comb->sadb_comb_encrypt = 0;
     5801 +                        comb->sadb_comb_encrypt_minbits = 0;
     5802 +                        comb->sadb_comb_encrypt_maxbits = 0;
5594 5803                  }
5595      -                /* XXX What about ICMP type/code? */
5596      -                rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5597      -                    ipsl->ipsl_rport : 0;
5598 5804  
5599      -                cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5600      -                    af, daddrptr, rport, proto, pfxlen);
5601      -                if (cur == NULL) {
5602      -                        freeb(mp);
5603      -                        return (NULL);
     5805 +                if (aalg != NULL) {
     5806 +                        uint16_t minbits, maxbits;
     5807 +                        minbits = need_esp ?
     5808 +                            prot->ipp_espa_minbits : prot->ipp_ah_minbits;
     5809 +                        maxbits = need_esp ?
     5810 +                            prot->ipp_espa_maxbits : prot->ipp_ah_maxbits;
     5811 +                        comb->sadb_comb_auth = aalg->alg_id;
     5812 +                        comb->sadb_comb_auth_minbits =
     5813 +                            MAX(minbits, aalg->alg_ef_minbits);
     5814 +                        comb->sadb_comb_auth_maxbits =
     5815 +                            MIN(maxbits, aalg->alg_ef_maxbits);
     5816 +                } else {
     5817 +                        comb->sadb_comb_auth = 0;
     5818 +                        comb->sadb_comb_auth_minbits = 0;
     5819 +                        comb->sadb_comb_auth_maxbits = 0;
5604 5820                  }
     5821 +
5605 5822                  /*
5606      -                 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5607      -                 * _with_ inner-packet address selectors, we'll need to further
5608      -                 * distinguish tunnel mode here.  For now, having inner
5609      -                 * addresses and/or ports is sufficient.
5610      -                 *
5611      -                 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5612      -                 * outer addresses.
     5823 +                 * The following may be based on algorithm properties, but in
     5824 +                 * the meantime, we just pick some good, sensible numbers.
     5825 +                 * Key mgmt. can (and perhaps should) be the place to finalize
     5826 +                 * such decisions.
5613 5827                   */
5614      -                proto = sel->ips_protocol;      /* Either _ENCAP or _IPV6 */
5615      -                lport = rport = 0;
5616      -        } else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5617      -                proto = 0;
5618      -                lport = 0;
5619      -                rport = 0;
5620      -                if (pol != NULL) {
5621      -                        ipsl = &(pol->ipsp_sel->ipsl_key);
5622      -                        if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5623      -                                proto = ipsl->ipsl_proto;
5624      -                        if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5625      -                                rport = ipsl->ipsl_rport;
5626      -                        if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5627      -                                lport = ipsl->ipsl_lport;
5628      -                }
5629      -        } else {
5630      -                proto = sel->ips_protocol;
5631      -                lport = sel->ips_local_port;
5632      -                rport = sel->ips_remote_port;
5633      -        }
5634 5828  
5635      -        af = sel->ips_isv4 ? AF_INET : AF_INET6;
     5829 +                /* 0 == unlimited == unsupported */
     5830 +                comb->sadb_comb_soft_allocations = 0;
     5831 +                comb->sadb_comb_hard_allocations = 0;
5636 5832  
5637      -        /*
5638      -         * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5639      -         * ipsec_selector_t.
5640      -         */
5641      -        cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5642      -            (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
     5833 +                /* These may want to come from policy rule. */
     5834 +                if (need_esp) {
     5835 +                        comb->sadb_comb_soft_bytes =
     5836 +                            espstack->ipsecesp_default_soft_bytes;
     5837 +                        comb->sadb_comb_hard_bytes =
     5838 +                            espstack->ipsecesp_default_hard_bytes;
     5839 +                        comb->sadb_comb_soft_addtime =
     5840 +                            espstack->ipsecesp_default_soft_addtime;
     5841 +                        comb->sadb_comb_hard_addtime =
     5842 +                            espstack->ipsecesp_default_hard_addtime;
     5843 +                        comb->sadb_comb_soft_usetime =
     5844 +                            espstack->ipsecesp_default_soft_usetime;
     5845 +                        comb->sadb_comb_hard_usetime =
     5846 +                            espstack->ipsecesp_default_hard_usetime;
     5847 +                } else {
     5848 +                        comb->sadb_comb_soft_bytes =
     5849 +                            ahstack->ipsecah_default_soft_bytes;
     5850 +                        comb->sadb_comb_hard_bytes =
     5851 +                            ahstack->ipsecah_default_hard_bytes;
     5852 +                        comb->sadb_comb_soft_addtime =
     5853 +                            ahstack->ipsecah_default_soft_addtime;
     5854 +                        comb->sadb_comb_hard_addtime =
     5855 +                            ahstack->ipsecah_default_hard_addtime;
     5856 +                        comb->sadb_comb_soft_usetime =
     5857 +                            ahstack->ipsecah_default_soft_usetime;
     5858 +                        comb->sadb_comb_hard_usetime =
     5859 +                            ahstack->ipsecah_default_hard_usetime;
     5860 +                }
5643 5861  
5644      -        if (cur == NULL) {
5645      -                freeb(mp);
5646      -                return (NULL);
     5862 +                prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
     5863 +                comb++;
5647 5864          }
5648 5865  
5649      -        cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5650      -            (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
     5866 +        return;
5651 5867  
5652      -        if (cur == NULL) {
5653      -                freeb(mp);
5654      -                return (NULL);
5655      -        }
     5868 +failure:
     5869 +        prop->sadb_prop_len = 0;
     5870 +}
5656 5871  
5657      -        if (sens != NULL) {
5658      -                uint8_t *sensext = cur;
5659      -                int senslen = SADB_64TO8(sens->sadb_sens_len);
     5872 +/*
     5873 + * Construct extended properties using ipsec_action_t, ipsec_policy_t, and
     5874 + * netstack_t pointers. Byte-aligned pointers cur and end are used for bounds
     5875 + * checking here and in called code. We don't set length if numecombs is 0, so
     5876 + * callers must check this for error handling.
     5877 + */
     5878 +static uint8_t *
     5879 +sadb_construct_eprop(const ipsec_action_t *act, const ipsec_policy_t *pp,
     5880 +    netstack_t *ns, const uint8_t *start, const uint8_t *end)
     5881 +{
     5882 +        uint8_t         *cur = (uint8_t *)start;
     5883 +        sadb_prop_t     *eprop = (sadb_prop_t *)cur;
     5884 +        ipsec_action_t  *an, *ap = (ipsec_action_t *)act;
     5885 +        ipsec_stack_t   *ipss = ns->netstack_ipsec;
5660 5886  
5661      -                cur += senslen;
5662      -                if (cur > end) {
5663      -                        freeb(mp);
5664      -                        return (NULL);
5665      -                }
5666      -                bcopy(sens, sensext, senslen);
5667      -        }
     5887 +        ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
5668 5888  
5669      -        /*
5670      -         * This section will change a lot as policy evolves.
5671      -         * For now, it'll be relatively simple.
5672      -         */
5673      -        eprop = (sadb_prop_t *)cur;
5674 5889          cur += sizeof (*eprop);
5675      -        if (cur > end) {
5676      -                /* no space left */
5677      -                freeb(mp);
     5890 +        if (cur > end)
5678 5891                  return (NULL);
5679      -        }
5680 5892  
5681 5893          eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5682 5894          eprop->sadb_x_prop_ereserved = 0;
5683 5895          eprop->sadb_x_prop_numecombs = 0;
5684 5896          eprop->sadb_prop_replay = 32;   /* default */
5685 5897  
5686      -        kmc = kmp = 0;
5687      -
5688 5898          for (; ap != NULL; ap = an) {
5689      -                an = (pol != NULL) ? ap->ipa_next : NULL;
5690      -
5691 5899                  /*
5692      -                 * Skip non-IPsec policies
     5900 +                 * XXX Don't walk past first ap if there's no pp. Not clear on
     5901 +                 * the rationale for this, but it's what extended path did.
5693 5902                   */
     5903 +                an = (pp != NULL) ? ap->ipa_next : NULL;
     5904 +
5694 5905                  if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5695 5906                          continue;
5696 5907  
5697      -                if (ap->ipa_act.ipa_apply.ipp_km_proto)
5698      -                        kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5699      -                if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5700      -                        kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5701 5908                  if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5702 5909                          eprop->sadb_prop_replay =
5703 5910                              ap->ipa_act.ipa_apply.ipp_replay_depth;
5704 5911                  }
5705 5912  
5706 5913                  cur = sadb_action_to_ecomb(cur, end, ap, ns);
5707      -                if (cur == NULL) { /* no space */
5708      -                        freeb(mp);
     5914 +                if (cur == NULL)
5709 5915                          return (NULL);
5710      -                }
5711 5916                  eprop->sadb_x_prop_numecombs++;
5712 5917          }
5713 5918  
5714      -        if (eprop->sadb_x_prop_numecombs == 0) {
     5919 +        /*
     5920 +         * This is an error. We return what we've got of eprops, caller needs
     5921 +         * to check for condition and pass it further up (e.g. by error samsg).
     5922 +         */
     5923 +        if (eprop->sadb_x_prop_numecombs == 0)
     5924 +                return (cur);
     5925 +
     5926 +        eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)start);
     5927 +
     5928 +        return (cur);
     5929 +bail:
     5930 +        return (NULL);
     5931 +}
     5932 +
     5933 +/*
     5934 + * Convert ipsec_policy_t and ipsec_action_t pointers to kmc extension. Byte-
     5935 + * aligned cur and end pointers used for bounds checking. sadb_x_kmcext_t
     5936 + * handling encapsulated in sadb_make_kmc_ext. Returns new value for cur,
     5937 + * NULL on failure.
     5938 + * We encapsulate for recursion since we have to walk ipsec_action_t.
     5939 + */
     5940 +static uint8_t *
     5941 +sadb_policy_to_kmcext(const ipsec_policy_t *pp, const ipsec_action_t *act,
     5942 +    const uint8_t *start, const uint8_t *end)
     5943 +{
     5944 +        uint8_t         *cur = (uint8_t *)start;
     5945 +        ipsec_action_t  *an, *ap = (ipsec_action_t *)act;
     5946 +        uint32_t        kmp = 0, kmc = 0;
     5947 +
     5948 +        for (; ap != NULL; ap = an) {
     5949 +                an = (pp != NULL) ? ap->ipa_next : NULL;
     5950 +
5715 5951                  /*
5716      -                 * This will happen if we fail to find a policy
5717      -                 * allowing for IPsec processing.
5718      -                 * Construct an error message.
     5952 +                 * Skip non-IPsec policies
5719 5953                   */
5720      -                samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5721      -                samsg->sadb_msg_errno = ENOENT;
5722      -                samsg->sadb_x_msg_diagnostic = 0;
5723      -                return (mp);
     5954 +                if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
     5955 +                        continue;
     5956 +
     5957 +                if (ap->ipa_act.ipa_apply.ipp_km_proto)
     5958 +                        kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
     5959 +                if (ap->ipa_act.ipa_apply.ipp_km_cookie)
     5960 +                        kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5724 5961          }
5725 5962  
5726      -        if ((kmp != 0) || (kmc != 0)) {
     5963 +        if ((kmp != 0) || (kmc != 0))
5727 5964                  cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5728      -                if (cur == NULL) {
5729      -                        freeb(mp);
5730      -                        return (NULL);
5731      -                }
5732      -        }
5733 5965  
5734      -        eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5735      -        samsg->sadb_msg_len = SADB_8TO64(cur - start);
5736      -        mp->b_wptr = cur;
5737      -
5738      -        return (mp);
     5966 +        return (cur);
5739 5967  }
5740 5968  
5741 5969  /*
5742      - * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5743      - *
5744      - * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5745      - * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5746      - * maximize code consolidation while preventing algorithm changes from messing
5747      - * with the callers finishing touches on the ACQUIRE itself.
     5970 + * Prepare the SADB_ACQUIRE message proper, which should be a b_cont to a
     5971 + * keysock registered M_CTL message. Takes a pointer to ipsacq_t (optional
     5972 + * acquire record for which we're sending message), ipsec_selector_t,
     5973 + * ipsec_action_t, ipsec_policy_t, netstack_t, and sense (required for called
     5974 + * to generate the message), and booleans for need_esp, tunnel_mode,
     5975 + * extended, and with_prop (all of these should be self-explanatory). Because
     5976 + * extended messages set satype to SADB_SATYPE_UNSPEC, extended-only callers
     5977 + * can fudge need_esp.
5748 5978   */
5749      -mblk_t *
5750      -sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
     5979 +static mblk_t *
     5980 +sadb_construct_acqmsg(ipsacq_t *acqrec, ipsec_selector_t *sel,
     5981 +    ipsec_action_t *ap, ipsec_policy_t *pp, netstack_t *ns, sadb_sens_t *sens,
     5982 +    boolean_t need_esp, boolean_t tunnel_mode, boolean_t extended,
     5983 +    boolean_t with_prop)
5751 5984  {
5752      -        uint_t allocsize;
5753      -        mblk_t *pfkeymp, *msgmp;
5754      -        sa_family_t af;
5755      -        uint8_t *cur, *end;
5756      -        sadb_msg_t *samsg;
5757      -        uint16_t sport_typecode;
5758      -        uint16_t dport_typecode;
5759      -        uint8_t check_proto;
5760      -        boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
     5985 +        uint_t          combs_limit, allocsize;
     5986 +        uint8_t         *cur, *end;
     5987 +        sadb_msg_t      *samsg;
     5988 +        sadb_prop_t     *prop, *eprop;
     5989 +        mblk_t          *mp;
     5990 +        int             satype = extended ? SADB_SATYPE_UNSPEC
     5991 +            : (need_esp ? SADB_SATYPE_ESP : SADB_SATYPE_AH);
     5992 +        ipsec_stack_t   *ipss = ns->netstack_ipsec;
5761 5993  
5762      -        ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
     5994 +        ASSERT((acqrec == NULL) || (MUTEX_HELD(&acqrec->ipsacq_lock)));
     5995 +        ASSERT(ap != NULL);
     5996 +        ASSERT((pp == NULL) || (pp->ipsp_refs != 0));
     5997 +        ASSERT((ap == NULL) || (ap->ipa_refs != 0));
5763 5998  
5764      -        pfkeymp = sadb_keysock_out(0);
5765      -        if (pfkeymp == NULL)
5766      -                return (NULL);
     5999 +        /*
     6000 +         * Set the limit used to size [e]prop [e]combs array to as many
     6001 +         * algorithms as defined on the netstack (must hold ipsec_alg_lock
     6002 +         * from here to when done reading off netstack for [e]prop
     6003 +         * formation). need_esp may be fudged, so be generous to extended.
     6004 +         */
     6005 +        if (with_prop) {
     6006 +                if (extended)
     6007 +                        need_esp = B_TRUE;
     6008 +                rw_enter(&ipss->ipsec_alg_lock, RW_READER);
     6009 +                CALC_COMBS(combs_limit, ipss, need_esp);
     6010 +        }
5767 6011  
5768 6012          /*
5769      -         * First, allocate a basic ACQUIRE message
     6013 +         * If this code is right, we may not need cur & end for bounds
     6014 +         * checking, but we'll keep normal runtime checks until that statement
     6015 +         * looks credible rather than merely plausible, at which point checks
     6016 +         * can be moved to ASSERTs. sens is variably sized but already
     6017 +         * set. kmc is fixed size. Pointers into message are byte-aligned, so
     6018 +         * we're generally depending on all structures used in this
     6019 +         * calculation to be so, too (in fact, all sadb_*_t types used here
     6020 +         * are 64-bit aligned per PF_KEY requirements).
5770 6021           */
5771      -        allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5772      -            sizeof (sadb_address_t) + sizeof (sadb_prop_t);
     6022 +        allocsize = sizeof (sadb_msg_t) + sizeof (sadb_prop_t);
     6023 +        allocsize += ((tunnel_mode) ? 4 : 2) * (sizeof (sadb_address_t)
     6024 +            + SADB_SOCKADDR_SIZE);
     6025 +        if (sens != NULL)
     6026 +                allocsize += SADB_64TO8(sens->sadb_sens_len);
     6027 +        allocsize += sizeof (sadb_x_kmc_t);
     6028 +        /* If we need props, size combs/combs array using combs_limit */
     6029 +        if (with_prop)
     6030 +                allocsize += combs_limit * (extended ?
     6031 +                    sizeof (sadb_x_ecomb_t) : sizeof (sadb_comb_t));
5773 6032  
5774      -        /* Make sure there's enough to cover both AF_INET and AF_INET6. */
5775      -        allocsize += 2 * sizeof (struct sockaddr_in6);
     6033 +        ASSERT((allocsize & 0x7) == 0);
5776 6034  
5777      -        mutex_enter(&ipss->ipsec_alg_lock);
5778      -        /* NOTE:  The lock is now held through to this function's return. */
5779      -        allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5780      -            ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
     6035 +        mp = allocb(allocsize, BPRI_HI);
     6036 +        if (mp == NULL)
     6037 +                goto unlock_and_fail;
5781 6038  
5782      -        if (tunnel_mode) {
5783      -                /* Tunnel mode! */
5784      -                allocsize += 2 * sizeof (sadb_address_t);
5785      -                /* Enough to cover both AF_INET and AF_INET6. */
5786      -                allocsize += 2 * sizeof (struct sockaddr_in6);
5787      -        }
     6039 +        cur = mp->b_rptr;
     6040 +        end = cur + allocsize;
5788 6041  
5789      -        msgmp = allocb(allocsize, BPRI_HI);
5790      -        if (msgmp == NULL) {
5791      -                freeb(pfkeymp);
5792      -                mutex_exit(&ipss->ipsec_alg_lock);
5793      -                return (NULL);
     6042 +        samsg = (sadb_msg_t *)cur;
     6043 +        INITIALIZE_SAMSG(samsg, SADB_ACQUIRE);
     6044 +        samsg->sadb_msg_satype = satype;
     6045 +        samsg->sadb_msg_pid = 0;
     6046 +        samsg->sadb_msg_seq = (acqrec != NULL) ? acqrec->ipsacq_seq : 0;
     6047 +
     6048 +        /* CALC_COMBS asserts on zero limit; broken config still possible */
     6049 +        if (with_prop && (combs_limit == 0)) {
     6050 +                ERRNO_SAMSG(samsg, ENOENT);
     6051 +                goto unlock_and_bail;
5794 6052          }
5795 6053  
5796      -        pfkeymp->b_cont = msgmp;
5797      -        cur = msgmp->b_rptr;
5798      -        end = cur + allocsize;
5799      -        samsg = (sadb_msg_t *)cur;
5800 6054          cur += sizeof (sadb_msg_t);
5801 6055  
5802      -        af = acqrec->ipsacq_addrfam;
5803      -        switch (af) {
5804      -        case AF_INET:
5805      -                check_proto = IPPROTO_ICMP;
5806      -                break;
5807      -        case AF_INET6:
5808      -                check_proto = IPPROTO_ICMPV6;
5809      -                break;
5810      -        default:
5811      -                /* This should never happen unless we have kernel bugs. */
5812      -                cmn_err(CE_WARN,
5813      -                    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5814      -                ASSERT(0);
5815      -                mutex_exit(&ipss->ipsec_alg_lock);
5816      -                return (NULL);
5817      -        }
     6056 +        cur = sadb_sel_to_addrexts(sel, pp, ap, cur, end, tunnel_mode);
     6057 +        if (cur == NULL)
     6058 +                goto unlock_and_fail;
5818 6059  
5819      -        samsg->sadb_msg_version = PF_KEY_V2;
5820      -        samsg->sadb_msg_type = SADB_ACQUIRE;
5821      -        samsg->sadb_msg_satype = satype;
5822      -        samsg->sadb_msg_errno = 0;
5823      -        samsg->sadb_msg_pid = 0;
5824      -        samsg->sadb_msg_reserved = 0;
5825      -        samsg->sadb_msg_seq = acqrec->ipsacq_seq;
     6060 +        if (with_prop) {
     6061 +                if (extended) {
     6062 +                        cur = sadb_construct_eprop(ap, pp, ns, cur, end);
     6063 +                        if (cur == NULL)
     6064 +                                goto unlock_and_fail;
5826 6065  
5827      -        ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
     6066 +                        eprop = (sadb_prop_t *)cur;
     6067 +                        if (eprop->sadb_x_prop_numecombs == 0) {
     6068 +                                ERRNO_SAMSG(samsg, ENOENT);
     6069 +                                goto unlock_and_bail;
     6070 +                        }
     6071 +                } else {
     6072 +                        prop = (sadb_prop_t *)cur;
5828 6073  
5829      -        if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5830      -                sport_typecode = dport_typecode = 0;
5831      -        } else {
5832      -                sport_typecode = acqrec->ipsacq_srcport;
5833      -                dport_typecode = acqrec->ipsacq_dstport;
5834      -        }
     6074 +                        sadb_insert_prop(prop, ap, ns, combs_limit, need_esp);
     6075 +                        if (prop == NULL) {
     6076 +                                goto unlock_and_fail;
     6077 +                        } else if (prop->sadb_prop_len == 0) {
     6078 +                                ERRNO_SAMSG(samsg, ENOENT);
     6079 +                                goto unlock_and_bail;
     6080 +                        }
5835 6081  
5836      -        cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5837      -            acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
     6082 +                        cur += SADB_64TO8(prop->sadb_prop_len);
     6083 +                }
5838 6084  
5839      -        cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5840      -            acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
     6085 +                rw_exit(&ipss->ipsec_alg_lock);
     6086 +        }
5841 6087  
5842      -        if (tunnel_mode) {
5843      -                sport_typecode = acqrec->ipsacq_srcport;
5844      -                dport_typecode = acqrec->ipsacq_dstport;
5845      -                cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5846      -                    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5847      -                    sport_typecode, acqrec->ipsacq_inner_proto,
5848      -                    acqrec->ipsacq_innersrcpfx);
5849      -                cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5850      -                    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5851      -                    dport_typecode, acqrec->ipsacq_inner_proto,
5852      -                    acqrec->ipsacq_innerdstpfx);
     6088 +        if (sens != NULL) {
     6089 +                uint8_t *sensext = cur;
     6090 +                int senslen = SADB_64TO8(sens->sadb_sens_len);
     6091 +
     6092 +                cur += senslen;
     6093 +                if (cur > end)
     6094 +                        goto freeb_bail;
     6095 +                bcopy(sens, sensext, senslen);
5853 6096          }
5854 6097  
5855      -        /* XXX Insert identity information here. */
     6098 +        cur = sadb_policy_to_kmcext(pp, ap, cur, end);
     6099 +        if (cur == NULL)
     6100 +                goto freeb_bail;
5856 6101  
5857      -        /* XXXMLS Insert sensitivity information here. */
     6102 +        samsg->sadb_msg_len = SADB_8TO64(cur - mp->b_rptr);
     6103 +        mp->b_wptr = cur;
5858 6104  
5859      -        if (cur != NULL)
5860      -                samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5861      -        else
5862      -                mutex_exit(&ipss->ipsec_alg_lock);
     6105 +        return (mp);
5863 6106  
5864      -        return (pfkeymp);
     6107 +freeb_bail:
     6108 +        /* This message isn't chained, so we can freeb. */
     6109 +        freeb(mp);
     6110 +        return (NULL);
     6111 +unlock_and_bail:
     6112 +        if (with_prop)
     6113 +                rw_exit(&ipss->ipsec_alg_lock);
     6114 +        return (mp);
     6115 +unlock_and_fail:
     6116 +        if (with_prop)
     6117 +                rw_exit(&ipss->ipsec_alg_lock);
     6118 +        return (NULL);
5865 6119  }
5866 6120  
5867 6121  /*
5868 6122   * Given an SADB_GETSPI message, find an appropriately ranged SA and
5869 6123   * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5870 6124   * If there was a memory allocation error, return NULL.  (Assume NULL !=
5871 6125   * (ipsa_t *)-1).
5872 6126   *
5873 6127   * master_spi is passed in host order.
5874 6128   */
↓ open down ↓ 364 lines elided ↑ open up ↑
6239 6493          char buf[INET6_ADDRSTRLEN];
6240 6494  
6241 6495          ASSERT(af == AF_INET6 || af == AF_INET);
6242 6496  
6243 6497          ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
6244 6498              inet_ntop(af, addr, buf, sizeof (buf)));
6245 6499  }
6246 6500  
6247 6501  /*
6248 6502   * Fills in a reference to the policy, if any, from the conn, in *ppp
     6503 + * If found, we hold a reference to the policy, caller must release.
6249 6504   */
6250 6505  static void
6251 6506  ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
6252 6507  {
6253 6508          ipsec_policy_t  *pp;
6254 6509          ipsec_latch_t   *ipl = connp->conn_latch;
6255 6510  
     6511 +        /* Use policy pointer already on conn_t if it's there. */
6256 6512          if ((ipl != NULL) && (connp->conn_ixa->ixa_ipsec_policy != NULL)) {
6257 6513                  pp = connp->conn_ixa->ixa_ipsec_policy;
6258 6514                  IPPOL_REFHOLD(pp);
6259      -        } else {
     6515 +        } else { /* otherwise query SPD */
     6516 +                /* This holds a reference for us if successful) */
6260 6517                  pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, sel,
6261 6518                      connp->conn_netstack);
6262 6519          }
6263 6520          *ppp = pp;
6264 6521  }
6265 6522  
6266 6523  /*
6267      - * The following functions scan through active conn_t structures
6268      - * and return a reference to the best-matching policy it can find.
6269      - * Caller must release the reference.
     6524 + * Takes ipsec_selector_t (for attributes to query), ipsec_policy_t (what we're
     6525 + * trying to find), and ip_stack_t (contains udp fanout we need to query). If we
     6526 + * find a matching connection, we return its policy settings.
6270 6527   */
6271 6528  static void
6272 6529  ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6273 6530  {
6274 6531          connf_t *connfp;
6275 6532          conn_t *connp = NULL;
6276 6533          ipsec_selector_t portonly;
6277 6534  
     6535 +        ASSERT(*ppp == NULL);
     6536 +
6278 6537          bzero((void *)&portonly, sizeof (portonly));
6279 6538  
6280 6539          if (sel->ips_local_port == 0)
6281 6540                  return;
6282 6541  
6283 6542          connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
6284 6543              ipst)];
6285 6544          mutex_enter(&connfp->connf_lock);
6286 6545  
6287 6546          if (sel->ips_isv4) {
↓ open down ↓ 29 lines elided ↑ open up ↑
6317 6576                  }
6318 6577          }
6319 6578  
6320 6579          CONN_INC_REF(connp);
6321 6580          mutex_exit(&connfp->connf_lock);
6322 6581  
6323 6582          ipsec_conn_pol(sel, connp, ppp);
6324 6583          CONN_DEC_REF(connp);
6325 6584  }
6326 6585  
     6586 +/*
     6587 + * Takes ipsec_selector_t (connection attributes to form query) and ip_stack_t
     6588 + * (contains bind fanout we need to query) pointers to look up existing TCP
     6589 + * listener, returned via conn_t pointer. We return NULL on failure.
     6590 + * We increment reference count on match, caller must decrement.
     6591 + */
6327 6592  static conn_t *
6328      -ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
     6593 +ipsec_find_listen_conn(ipsec_selector_t *sel, ip_stack_t *ipst)
6329 6594  {
6330 6595          connf_t *connfp;
6331 6596          conn_t *connp = NULL;
6332 6597          const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6333 6598  
6334      -        if (sel->ips_local_port == 0)
6335      -                return (NULL);
     6599 +        /* XXX Sure about the second part? */
     6600 +        ASSERT(sel->ips_local_port != 0 && ipst != NULL);
6336 6601  
6337 6602          connfp = &ipst->ips_ipcl_bind_fanout[
6338 6603              IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6339 6604          mutex_enter(&connfp->connf_lock);
6340 6605  
6341 6606          if (sel->ips_isv4) {
6342 6607                  connp = connfp->connf_head;
6343 6608                  while (connp != NULL) {
6344 6609                          if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6345      -                            sel->ips_local_addr_v4, pptr[1]))
     6610 +                            sel->ips_local_addr_v4, sel->ips_local_port))
6346 6611                                  break;
6347 6612                          connp = connp->conn_next;
6348 6613                  }
6349 6614  
6350 6615                  if (connp == NULL) {
6351 6616                          /* Match to all-zeroes. */
6352 6617                          v6addrmatch = &ipv6_all_zeros;
6353 6618                  }
6354 6619          }
6355 6620  
6356 6621          if (connp == NULL) {
6357 6622                  connp = connfp->connf_head;
6358 6623                  while (connp != NULL) {
6359 6624                          if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6360      -                            *v6addrmatch, pptr[1]))
     6625 +                            *v6addrmatch, sel->ips_local_port))
6361 6626                                  break;
6362 6627                          connp = connp->conn_next;
6363 6628                  }
6364 6629  
6365 6630                  if (connp == NULL) {
6366 6631                          mutex_exit(&connfp->connf_lock);
6367 6632                          return (NULL);
6368 6633                  }
6369 6634          }
6370 6635  
6371 6636          CONN_INC_REF(connp);
6372 6637          mutex_exit(&connfp->connf_lock);
6373 6638          return (connp);
6374 6639  }
6375 6640  
     6641 +/*
     6642 + * Given ipsec_selector_t (contains attributes to query, ipsec_policy_t (what we
     6643 + * need to find), and ip_stack_t pointer (contains connection state to query),
     6644 + * find a matching TCP connection or listener and return its policy pointer.
     6645 + */
6376 6646  static void
6377 6647  ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6378 6648  {
6379 6649          connf_t         *connfp;
6380 6650          conn_t          *connp;
6381 6651          uint32_t        ports;
6382 6652          uint16_t        *pptr = (uint16_t *)&ports;
6383 6653  
     6654 +        ASSERT(sel->ips_local_port != 0 && *ppp == NULL);
     6655 +
6384 6656          /*
6385 6657           * Find TCP state in the following order:
6386      -         * 1.) Connected conns.
     6658 +         * 1.) Connected conns. (walk ipst connection fanout)
6387 6659           * 2.) Listeners.
6388 6660           *
6389 6661           * Even though #2 will be the common case for inbound traffic, only
6390 6662           * following this order insures correctness.
6391 6663           */
6392 6664  
6393      -        if (sel->ips_local_port == 0)
6394      -                return;
6395 6665  
6396 6666          /*
6397      -         * 0 should be fport, 1 should be lport.  SRC is the local one here.
6398      -         * See ipsec_construct_inverse_acquire() for details.
     6667 +         * pptr makes an array of port values, 0 for fport, 1 for lport.  SRC is
     6668 +         * the local one here. Connection lookup macros want this instead of
     6669 +         * selector port members.
6399 6670           */
6400 6671          pptr[0] = sel->ips_remote_port;
6401 6672          pptr[1] = sel->ips_local_port;
6402 6673  
6403 6674          connfp = &ipst->ips_ipcl_conn_fanout[
6404 6675              IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6405 6676          mutex_enter(&connfp->connf_lock);
6406 6677          connp = connfp->connf_head;
6407 6678  
6408 6679          if (sel->ips_isv4) {
↓ open down ↓ 13 lines elided ↑ open up ↑
6422 6693                          connp = connp->conn_next;
6423 6694                  }
6424 6695          }
6425 6696  
6426 6697          if (connp != NULL) {
6427 6698                  CONN_INC_REF(connp);
6428 6699                  mutex_exit(&connfp->connf_lock);
6429 6700          } else {
6430 6701                  mutex_exit(&connfp->connf_lock);
6431 6702  
6432      -                /* Try the listen hash. */
6433      -                if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
     6703 +                /* Try the listen hash. If found, comes with incremented ref. */
     6704 +                if ((connp = ipsec_find_listen_conn(sel, ipst)) == NULL)
6434 6705                          return;
6435 6706          }
6436 6707  
6437 6708          ipsec_conn_pol(sel, connp, ppp);
6438 6709          CONN_DEC_REF(connp);
6439 6710  }
6440 6711  
     6712 +/*
     6713 + * Given ipsec_selector_t (connection attributes to form query), ipsec_policy_t
     6714 + * (populate with match), and ip_stack_t (connection state to query) pointers,
     6715 + * call into sctp to find an existing connection and return its policy.
     6716 + */
6441 6717  static void
6442      -ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6443      -    ip_stack_t *ipst)
     6718 +ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6444 6719  {
6445 6720          conn_t          *connp;
6446 6721          uint32_t        ports;
6447 6722          uint16_t        *pptr = (uint16_t *)&ports;
6448 6723  
     6724 +        ASSERT(sel->ips_local_port != 0 && *ppp == NULL);
     6725 +
6449 6726          /*
6450 6727           * Find SCP state in the following order:
6451 6728           * 1.) Connected conns.
6452 6729           * 2.) Listeners.
6453 6730           *
6454 6731           * Even though #2 will be the common case for inbound traffic, only
6455 6732           * following this order insures correctness.
6456 6733           */
6457 6734  
6458      -        if (sel->ips_local_port == 0)
6459      -                return;
6460      -
6461 6735          /*
6462      -         * 0 should be fport, 1 should be lport.  SRC is the local one here.
6463      -         * See ipsec_construct_inverse_acquire() for details.
     6736 +         * pptr makes an array of port values, 0 for fport, 1 for lport.  SRC is
     6737 +         * the local one here. Connection lookup macros want this instead of
     6738 +         * selector port members.
6464 6739           */
6465 6740          pptr[0] = sel->ips_remote_port;
6466 6741          pptr[1] = sel->ips_local_port;
6467 6742  
6468 6743          /*
6469 6744           * For labeled systems, there's no need to check the
6470 6745           * label here.  It's known to be good as we checked
6471 6746           * before allowing the connection to become bound.
6472 6747           */
6473 6748          if (sel->ips_isv4) {
↓ open down ↓ 8 lines elided ↑ open up ↑
6482 6757                      &sel->ips_local_addr_v6, ports, ALL_ZONES,
6483 6758                      0, ipst->ips_netstack->netstack_sctp);
6484 6759          }
6485 6760          if (connp == NULL)
6486 6761                  return;
6487 6762          ipsec_conn_pol(sel, connp, ppp);
6488 6763          CONN_DEC_REF(connp);
6489 6764  }
6490 6765  
6491 6766  /*
6492      - * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6493      - * Returns 0 or errno, and always sets *diagnostic to something appropriate
6494      - * to PF_KEY.
6495      - *
     6767 + * Takes ipsec_selector_t (what we're forming), two sadb_address_t (address
     6768 + * extentions needed to create selector), and diagnostic (what, if anything,
     6769 + * went wrong in PF_KEY terms) pointers, returns int (0 or errno).
6496 6770   * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6497 6771   * ignore prefix lengths in the address extension.  Since we match on first-
6498 6772   * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6499 6773   * set addresses to mask out the lower bits, we should get a suitable search
6500 6774   * key for the SPD anyway.  This is the function to change if the assumption
6501 6775   * about suitable search keys is wrong.
6502 6776   */
6503 6777  static int
6504 6778  ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6505 6779      sadb_address_t *dstext, int *diagnostic)
↓ open down ↓ 35 lines elided ↑ open up ↑
6541 6815                  } else {
6542 6816                          sel->ips_remote_port = dst->sin_port;
6543 6817                          sel->ips_local_port = src->sin_port;
6544 6818                  }
6545 6819                  sel->ips_isv4 = B_TRUE;
6546 6820          }
6547 6821          return (0);
6548 6822  }
6549 6823  
6550 6824  /*
6551      - * We have encapsulation.
6552      - * - Lookup tun_t by address and look for an associated
6553      - *   tunnel policy
6554      - * - If there are inner selectors
6555      - *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6556      - *   - Look up tunnel policy based on selectors
6557      - * - Else
6558      - *   - Sanity check the negotation
6559      - *   - If appropriate, fall through to global policy
     6825 + * We're passed pointers to ipsec_selector (inner info needed to form query),
     6826 + * ipsec_policy_t (what we're trying to populate), a pair of sadb_address_t
     6827 + * (extentions needed to reset selector), ipsec_tun_pol_t (tunnel policy that
     6828 + * may already be populated from previous SPD query), and integer (error detail
     6829 + * in PF_KEY2 terms, always 0). Return 0 or errno.
     6830 + * Caller may have fudged inner selector, so we need to reset it via if we have
     6831 + * to reuse it.
6560 6832   */
6561 6833  static int
6562 6834  ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6563 6835      sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6564 6836      int *diagnostic)
6565 6837  {
6566 6838          int err;
6567 6839          ipsec_policy_head_t *polhead;
6568 6840  
     6841 +        ASSERT(*ppp == NULL);
     6842 +
6569 6843          *diagnostic = 0;
6570 6844  
6571 6845          /* Check for inner selectors and act appropriately */
6572      -
6573 6846          if (innsrcext != NULL) {
6574      -                /* Inner selectors present */
6575      -                ASSERT(inndstext != NULL);
     6847 +                ASSERT(inndstext != NULL); /* Need a pair */
     6848 +                /*
     6849 +                 * If inner packet selectors, we must have negotiated tunnel and
     6850 +                 * active policy already. If the tunnel has transport-mode
     6851 +                 * policy set on it or no policy at all, fail.
     6852 +                 */
6576 6853                  if ((itp == NULL) ||
6577 6854                      (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6578 6855                      (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6579      -                        /*
6580      -                         * If inner packet selectors, we must have negotiate
6581      -                         * tunnel and active policy.  If the tunnel has
6582      -                         * transport-mode policy set on it, or has no policy,
6583      -                         * fail.
6584      -                         */
6585 6856                          return (ENOENT);
6586 6857                  } else {
6587 6858                          /*
6588      -                         * Reset "sel" to indicate inner selectors.  Pass
6589      -                         * inner PF_KEY address extensions for this to happen.
     6859 +                         * If we got a sane policy back from the SPD, reset the
     6860 +                         * possibly fudged selector for subsequent operations.
6590 6861                           */
6591 6862                          if ((err = ipsec_get_inverse_acquire_sel(sel,
6592      -                            innsrcext, inndstext, diagnostic)) != 0)
     6863 +                                    innsrcext, inndstext, diagnostic)) != 0)
6593 6864                                  return (err);
6594      -                        /*
6595      -                         * Now look for a tunnel policy based on those inner
6596      -                         * selectors.  (Common code is below.)
6597      -                         */
6598 6865                  }
6599      -        } else {
6600      -                /* No inner selectors present */
     6866 +        } else { /* No inner selectors present */
     6867 +
     6868 +                /*
     6869 +                 * Transport mode negotiation with no tunnel policy configured
     6870 +                 * - return to indicate a global policy check is needed.
     6871 +                 */
6601 6872                  if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6602      -                        /*
6603      -                         * Transport mode negotiation with no tunnel policy
6604      -                         * configured - return to indicate a global policy
6605      -                         * check is needed.
6606      -                         */
6607 6873                          return (0);
6608 6874                  } else if (itp->itp_flags & ITPF_P_TUNNEL) {
6609 6875                          /* Tunnel mode set with no inner selectors. */
6610 6876                          return (ENOENT);
6611 6877                  }
6612 6878                  /*
6613 6879                   * Else, this is a tunnel policy configured with ifconfig(1m)
6614 6880                   * or "negotiate transport" with ipsecconf(1m).  We have an
6615 6881                   * itp with policy set based on any match, so don't bother
6616 6882                   * changing fields in "sel".
↓ open down ↓ 11 lines elided ↑ open up ↑
6628 6894           * Don't default to global if we didn't find a matching policy entry.
6629 6895           * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6630 6896           */
6631 6897          if (*ppp == NULL)
6632 6898                  return (ENOENT);
6633 6899  
6634 6900          return (0);
6635 6901  }
6636 6902  
6637 6903  /*
6638      - * For sctp conn_faddr is the primary address, hence this is of limited
6639      - * use for sctp.
     6904 + * Takes ipsec_selector_t (data to form query), ipsec_policy_t (what we need
     6905 + * to populate), and ip_stack_t (contains state data to query) pointers. This is
     6906 + * a generic protocol look-up function to find a relevant connection that can be
     6907 + * converted in a policy.
     6908 + * XXX For sctp conn_faddr is the primary address, hence this is of limited
     6909 + * use for sctp. Do we care, given sctp has its own lookup?
6640 6910   */
6641 6911  static void
6642 6912  ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6643 6913      ip_stack_t *ipst)
6644 6914  {
6645 6915          boolean_t       isv4 = sel->ips_isv4;
6646 6916          connf_t         *connfp;
6647 6917          conn_t          *connp;
6648 6918  
     6919 +        ASSERT(*ppp == NULL);
     6920 +
6649 6921          if (isv4) {
6650 6922                  connfp = &ipst->ips_ipcl_proto_fanout_v4[sel->ips_protocol];
6651 6923          } else {
6652 6924                  connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6653 6925          }
6654 6926  
6655 6927          mutex_enter(&connfp->connf_lock);
6656 6928          for (connp = connfp->connf_head; connp != NULL;
6657 6929              connp = connp->conn_next) {
6658 6930                  if (isv4) {
↓ open down ↓ 18 lines elided ↑ open up ↑
6677 6949          }
6678 6950  
6679 6951          CONN_INC_REF(connp);
6680 6952          mutex_exit(&connfp->connf_lock);
6681 6953  
6682 6954          ipsec_conn_pol(sel, connp, ppp);
6683 6955          CONN_DEC_REF(connp);
6684 6956  }
6685 6957  
6686 6958  /*
6687      - * Construct an inverse ACQUIRE reply based on:
     6959 + * This code is called from keysock to handle inverse acquire messages.  We
     6960 + * are passed a pointer to sadb_msg_t, a fixed-size array of sadb_ext_t, and a
     6961 + * netstack_t pointer and return a mblk_t pointer, in which we attempt to
     6962 + * construct a return acquire message. In case of errors, we return a NULL
     6963 + * pointer and populate samsg->sadb_msg_errno and samsg->sadb_msg_diagnostic,
     6964 + * which is handled as an error at the keysock layer. Otherwise keysock does a
     6965 + * passup with our message.
     6966 + * Caller performs basic sanity checks such as NULL external addresses and
     6967 + * only one of two inner addrs being NULL. Remaining checks happen here.
6688 6968   *
6689      - * 1.) Current global policy.
6690      - * 2.) An conn_t match depending on what all was passed in the extv[].
6691      - * 3.) A tunnel's policy head.
6692      - * ...
6693      - * N.) Other stuff TBD (e.g. identities)
6694      - *
6695      - * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6696      - * in this function so the caller can extract them where appropriately.
6697      - *
6698      - * The SRC address is the local one - just like an outbound ACQUIRE message.
6699      - *
6700 6969   * XXX MLS: key management supplies a label which we just reflect back up
6701 6970   * again.  clearly we need to involve the label in the rest of the checks.
6702 6971   */
6703 6972  mblk_t *
6704 6973  ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6705 6974      netstack_t *ns)
6706 6975  {
6707 6976          int err;
6708 6977          int diagnostic;
6709 6978          sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6710 6979              *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6711 6980              *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6712 6981              *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6713 6982          sadb_sens_t *sens = (sadb_sens_t *)extv[SADB_EXT_SENSITIVITY];
6714 6983          struct sockaddr_in6 *src, *dst;
6715 6984          struct sockaddr_in6 *isrc, *idst;
6716 6985          ipsec_tun_pol_t *itp = NULL;
6717 6986          ipsec_policy_t *pp = NULL;
6718 6987          ipsec_selector_t sel, isel;
6719      -        mblk_t *retmp = NULL;
     6988 +        mblk_t *retmp;
6720 6989          ip_stack_t      *ipst = ns->netstack_ip;
     6990 +        sadb_msg_t *retmsg;
     6991 +        ipsec_action_t *ap;
     6992 +        boolean_t tunnel_mode = B_FALSE;
6721 6993  
6722      -
6723 6994          /* Normalize addresses */
6724 6995          if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6725 6996              == KS_IN_ADDR_UNKNOWN) {
6726 6997                  err = EINVAL;
6727 6998                  diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6728 6999                  goto bail;
6729 7000          }
6730 7001          src = (struct sockaddr_in6 *)(srcext + 1);
6731 7002          if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6732 7003              == KS_IN_ADDR_UNKNOWN) {
↓ open down ↓ 2 lines elided ↑ open up ↑
6735 7006                  goto bail;
6736 7007          }
6737 7008          dst = (struct sockaddr_in6 *)(dstext + 1);
6738 7009          if (src->sin6_family != dst->sin6_family) {
6739 7010                  err = EINVAL;
6740 7011                  diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6741 7012                  goto bail;
6742 7013          }
6743 7014  
6744 7015          /* Check for tunnel mode and act appropriately */
     7016 +        /*
     7017 +         * Note: keysock_inverse_acquire catches unbalanced extensions and
     7018 +         * makes them into keysock_error calls, so ASSERTs here to confirm.
     7019 +         */
6745 7020          if (innsrcext != NULL) {
6746      -                if (inndstext == NULL) {
6747      -                        err = EINVAL;
6748      -                        diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6749      -                        goto bail;
6750      -                }
     7021 +                ASSERT(inndstext != NULL);
6751 7022                  if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6752 7023                      (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6753 7024                          err = EINVAL;
6754 7025                          diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6755 7026                          goto bail;
6756 7027                  }
6757 7028                  isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6758 7029                  if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6759 7030                      (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6760 7031                          err = EINVAL;
↓ open down ↓ 5 lines elided ↑ open up ↑
6766 7037                          err = EINVAL;
6767 7038                          diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6768 7039                          goto bail;
6769 7040                  }
6770 7041                  if (isrc->sin6_family != AF_INET &&
6771 7042                      isrc->sin6_family != AF_INET6) {
6772 7043                          err = EINVAL;
6773 7044                          diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6774 7045                          goto bail;
6775 7046                  }
6776      -        } else if (inndstext != NULL) {
6777      -                err = EINVAL;
6778      -                diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6779      -                goto bail;
6780      -        }
     7047 +                tunnel_mode = B_TRUE;
     7048 +        } else
     7049 +                ASSERT(inndstext == NULL);
6781 7050  
6782      -        /* Get selectors first, based on outer addresses */
     7051 +        /* Convert address extensions into outer selector */
6783 7052          err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6784 7053          if (err != 0)
6785 7054                  goto bail;
6786 7055  
6787      -        /* Check for tunnel mode mismatches. */
6788      -        if (innsrcext != NULL &&
     7056 +        /* Sanity-check newfound outer selector for tunnel mode mismatches */
     7057 +        if (tunnel_mode &&
6789 7058              ((isrc->sin6_family == AF_INET &&
6790 7059              sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6791 7060              (isrc->sin6_family == AF_INET6 &&
6792 7061              sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6793 7062                  err = EPROTOTYPE;
6794 7063                  goto bail;
6795 7064          }
6796 7065  
6797 7066          /*
6798 7067           * Okay, we have the addresses and other selector information.
6799      -         * Let's first find a conn...
     7068 +         * If our selector is for a protocol on top of IP, we make protocol-
     7069 +         * specific queries that work through useful state (e.g. connections or
     7070 +         * listeners). If we get something back, a reference to it will already
     7071 +         * be held, and we need to release that reference.
6800 7072           */
6801      -        pp = NULL;
6802 7073          switch (sel.ips_protocol) {
6803 7074          case IPPROTO_TCP:
6804 7075                  ipsec_tcp_pol(&sel, &pp, ipst);
6805 7076                  break;
6806 7077          case IPPROTO_UDP:
6807 7078                  ipsec_udp_pol(&sel, &pp, ipst);
6808 7079                  break;
6809 7080          case IPPROTO_SCTP:
6810 7081                  ipsec_sctp_pol(&sel, &pp, ipst);
6811 7082                  break;
6812 7083          case IPPROTO_ENCAP:
6813 7084          case IPPROTO_IPV6:
6814 7085                  /*
6815      -                 * Assume sel.ips_remote_addr_* has the right address at
6816      -                 * that exact position.
     7086 +                 * These cases are IPv6 in IP or IP in IP. Revert to querying
     7087 +                 * SPD for tunnel policy, since there's no higher-level protocol
     7088 +                 * or stack state to assist. Assume sel.ips_remote_addr_* has
     7089 +                 * right address at exact position.
6817 7090                   */
6818 7091                  itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
6819 7092                      (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family,
6820 7093                      ipst);
6821 7094  
6822 7095                  if (innsrcext == NULL) {
6823 7096                          /*
6824 7097                           * Transport-mode tunnel, make sure we fake out isel
6825 7098                           * to contain something based on the outer protocol.
6826 7099                           */
6827 7100                          bzero(&isel, sizeof (isel));
6828 7101                          isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
     7102 +                        /* XXX does this make tunnel_mode true? */
6829 7103                  } /* Else isel is initialized by ipsec_tun_pol(). */
6830 7104                  err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6831 7105                      &diagnostic);
6832 7106                  /*
6833 7107                   * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6834 7108                   * may be.
6835 7109                   */
6836 7110                  if (err != 0)
6837 7111                          goto bail;
6838 7112                  break;
6839      -        default:
     7113 +        default: /* Fall through to generic lookup */
6840 7114                  ipsec_oth_pol(&sel, &pp, ipst);
6841 7115                  break;
6842 7116          }
6843 7117  
6844 7118          /*
6845      -         * If we didn't find a matching conn_t or other policy head, take a
6846      -         * look in the global policy.
     7119 +         * If we didn't find a matching conn_t or other policy head (pp retains
     7120 +         * initial NULL value), attempt to revert to the global policy.
6847 7121           */
6848 7122          if (pp == NULL) {
6849 7123                  pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, &sel, ns);
6850 7124                  if (pp == NULL) {
6851 7125                          /* There's no global policy. */
6852 7126                          err = ENOENT;
6853 7127                          diagnostic = 0;
6854 7128                          goto bail;
6855 7129                  }
6856 7130          }
6857 7131  
6858 7132          /*
6859 7133           * Now that we have a policy entry/widget, construct an ACQUIRE
6860 7134           * message based on that, fix fields where appropriate,
6861 7135           * and return the message.
6862 7136           */
6863      -        retmp = sadb_extended_acquire(&sel, pp, NULL,
6864      -            (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6865      -            samsg->sadb_msg_seq, samsg->sadb_msg_pid, sens, ns);
6866      -        if (pp != NULL) {
     7137 +        ap = pp->ipsp_act;
     7138 +        ASSERT(ap != NULL);
     7139 +
     7140 +        if (ap != NULL)
     7141 +                IPACT_REFHOLD(ap);
     7142 +
     7143 +        retmp = sadb_construct_acqmsg(NULL, &sel, ap, pp, ns, sens, 0,
     7144 +            tunnel_mode, B_TRUE, B_TRUE);
     7145 +        if (retmp == NULL)
     7146 +                goto nomem_bail;
     7147 +
     7148 +        retmsg = (sadb_msg_t *)retmp->b_rptr;
     7149 +        retmsg->sadb_msg_seq = samsg->sadb_msg_seq;
     7150 +        retmsg->sadb_msg_pid = samsg->sadb_msg_pid;
     7151 +
     7152 +        if (pp != NULL)
6867 7153                  IPPOL_REFRELE(pp);
6868      -        }
     7154 +        if (ap != NULL)
     7155 +                IPACT_REFRELE(ap);
     7156 +
     7157 +        return (retmp);
     7158 +
     7159 +nomem_bail:
     7160 +        if (pp != NULL)
     7161 +                IPPOL_REFRELE(pp);
     7162 +        if (ap != NULL)
     7163 +                IPACT_REFRELE(ap);
6869 7164          ASSERT(err == 0 && diagnostic == 0);
6870      -        if (retmp == NULL)
6871      -                err = ENOMEM;
     7165 +        err = ENOMEM;
6872 7166  bail:
6873 7167          if (itp != NULL) {
6874 7168                  ITP_REFRELE(itp, ns);
6875 7169          }
     7170 +        /*
     7171 +         * Write error info into original message, as we may not have resources
     7172 +         * for a proper reply.
     7173 +         */
6876 7174          samsg->sadb_msg_errno = (uint8_t)err;
6877 7175          samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6878 7176          return (retmp);
6879 7177  }
6880 7178  
6881 7179  /*
6882 7180   * ipsa_lpkt is a one-element queue, only manipulated by the next two
6883 7181   * functions.  They have to hold the ipsa_lock because of potential races
6884 7182   * between key management using SADB_UPDATE, and inbound packets that may
6885 7183   * queue up on the larval SA (hence the 'l' in "lpkt").
↓ open down ↓ 297 lines elided ↑ open up ↑
7183 7481  int
7184 7482  ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7185 7483  {
7186 7484          ipsec_alginfo_t *alg;
7187 7485          crypto_mechanism_t mech;
7188 7486          crypto_key_t *key;
7189 7487          crypto_ctx_template_t *sa_tmpl;
7190 7488          int rv;
7191 7489          ipsec_stack_t   *ipss = sa->ipsa_netstack->netstack_ipsec;
7192 7490  
7193      -        ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
     7491 +        ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
7194 7492          ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7195 7493  
7196 7494          /* get pointers to the algorithm info, context template, and key */
7197 7495          switch (alg_type) {
7198 7496          case IPSEC_ALG_AUTH:
7199 7497                  key = &sa->ipsa_kcfauthkey;
7200 7498                  sa_tmpl = &sa->ipsa_authtmpl;
7201 7499                  alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
7202 7500                  break;
7203 7501          case IPSEC_ALG_ENCR:
↓ open down ↓ 644 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX