Print this page
Bayard's initial drop, needs finishing, or at least testing.
@@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/stropts.h>
@@ -70,21 +71,26 @@
/*
* This source file contains Security Association Database (SADB) common
* routines. They are linked in with the AH module. Since AH has no chance
* of falling under export control, it was safe to link it in there.
*/
-
-static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
- ipsec_action_t *, boolean_t, uint32_t, uint32_t, sadb_sens_t *,
- netstack_t *);
static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *);
static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
netstack_t *);
static void sadb_destroy(sadb_t *, netstack_t *);
static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
static ts_label_t *sadb_label_from_sens(sadb_sens_t *, uint64_t *);
static sadb_sens_t *sadb_make_sens_ext(ts_label_t *tsl, int *len);
+/* Args named here, as the booleans can be hard to distinguish */
+static mblk_t *sadb_construct_acqmsg(ipsacq_t *acqrec, ipsec_selector_t *sel,
+ ipsec_action_t *ap, ipsec_policy_t *pp, netstack_t *ns, sadb_sens_t *sens,
+ boolean_t need_esp, boolean_t tunnel_mode, boolean_t extended,
+ boolean_t with_prop);
+static uint8_t *sadb_construct_eprop(const ipsec_action_t *,
+ const ipsec_policy_t *, netstack_t *, const uint8_t *, const uint8_t *);
+static void sadb_insert_prop(sadb_prop_t *, const ipsec_action_t *,
+ netstack_t *, uint_t, boolean_t);
static time_t sadb_add_time(time_t, uint64_t);
static void lifetime_fuzz(ipsa_t *);
static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
static int get_ipsa_pair(ipsa_query_t *, ipsap_t *, int *);
@@ -97,10 +103,17 @@
* ipsacq_maxpackets is defined here to make it tunable
* from /etc/system.
*/
extern uint64_t ipsacq_maxpackets;
+/*
+ * Allocation size for sin_t/sin6_t in address extensions. We allocate IPv6
+ * because it's the larger of the two, and we roundup because the type isn't
+ * defined to guarantee 64-bit alignment.
+ */
+#define SADB_SOCKADDR_SIZE (roundup(sizeof (sin6_t), sizeof (uint64_t)))
+
#define SET_EXPIRE(sa, delta, exp) { \
if (((sa)->ipsa_ ## delta) != 0) { \
(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime, \
(sa)->ipsa_ ## delta); \
} \
@@ -116,11 +129,38 @@
(sa)->ipsa_ ## exp = \
MIN((sa)->ipsa_ ## exp, tmp); \
} \
}
+/* Warning: watch for evaluation issues with complex args */
+#define INITIALIZE_SAMSG(samsg, type) \
+ (samsg)->sadb_msg_version = PF_KEY_V2, \
+ (samsg)->sadb_msg_type = (type), \
+ (samsg)->sadb_msg_errno = 0, \
+ (samsg)->sadb_msg_reserved = 0
+/* Warning: watch for evaluation issues with complex args */
+#define ERRNO_SAMSG(samsg, errno) \
+ (samsg)->sadb_msg_len = SADB_8TO64(sizeof (*samsg)), \
+ (samsg)->sadb_msg_errno = (errno), \
+ (samsg)->sadb_x_msg_diagnostic = 0
+
+/*
+ * Warning: watch for evaluation issues with complex args. This is a rough,
+ * conservative calculation (e.g. combined mode encr algs can perform both
+ * encr/auth and ipsecconf drops auth algs in combinations). This is
+ * nevertheless reasonable, given that the kernel doesn't make or guarantee
+ * optimizations reducing the combination space.
+ */
+#define CALC_COMBS(limit, ipss, need_esp) { \
+ limit = (need_esp) ? \
+ (ipss)->ipsec_nalgs[IPSEC_ALG_AUTH] * \
+ (ipss)->ipsec_nalgs[IPSEC_ALG_ENCR] \
+ : (ipss)->ipsec_nalgs[IPSEC_ALG_AUTH]; \
+ ASSERT((limit) > 0); \
+}
+
/* wrap the macro so we can pass it as a function pointer */
void
sadb_sa_refrele(void *target)
{
IPSA_REFRELE(((ipsa_t *)target));
@@ -973,84 +1013,201 @@
return (newbie);
}
/*
- * Initialize a SADB address extension at the address specified by addrext.
- * Return a pointer to the end of the new address extension.
+ * Takes two uint8_t (bounds on buffer in which to construct extension) and an
+ * addr (address to write into extension) pointer, a uint16_t (type of address
+ * in extension), and af, port, proto, and prefix values (further extension
+ * content). Returns a byte-aligned pointer to the end of the extension, which
+ * is of variable length depending on the address family.
*/
static uint8_t *
-sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
+sadb_make_addr_ext(const uint8_t *start, const uint8_t *end, uint16_t exttype,
sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
{
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6;
- uint8_t *cur = start;
+ uint8_t *cur = (uint8_t *)start;
int addrext_len;
- int sin_len;
sadb_address_t *addrext = (sadb_address_t *)cur;
- if (cur == NULL)
- return (NULL);
+ ASSERT(cur != NULL && end != NULL);
cur += sizeof (*addrext);
+ sin = (struct sockaddr_in *)cur;
+ sin6 = (struct sockaddr_in6 *)cur;
+ cur += (af == AF_INET) ? sizeof (*sin) : sizeof (*sin6);
+
+ addrext_len = roundup(cur - start, sizeof (uint64_t));
+ cur = (uint8_t *)start + addrext_len;
+
if (cur > end)
return (NULL);
addrext->sadb_address_proto = proto;
addrext->sadb_address_prefixlen = prefix;
addrext->sadb_address_reserved = 0;
addrext->sadb_address_exttype = exttype;
+ addrext->sadb_address_len = SADB_8TO64(addrext_len);
switch (af) {
case AF_INET:
- sin = (struct sockaddr_in *)cur;
- sin_len = sizeof (*sin);
- cur += sin_len;
- if (cur > end)
- return (NULL);
-
sin->sin_family = af;
bzero(sin->sin_zero, sizeof (sin->sin_zero));
sin->sin_port = port;
IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
break;
case AF_INET6:
- sin6 = (struct sockaddr_in6 *)cur;
- sin_len = sizeof (*sin6);
- cur += sin_len;
- if (cur > end)
- return (NULL);
-
bzero(sin6, sizeof (*sin6));
sin6->sin6_family = af;
sin6->sin6_port = port;
IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
break;
}
- addrext_len = roundup(cur - start, sizeof (uint64_t));
- addrext->sadb_address_len = SADB_8TO64(addrext_len);
+ return (cur);
+}
- cur = start + addrext_len;
- if (cur > end)
- cur = NULL;
+/*
+ * Takes ipsec_selector_t (address information used in forming addr
+ * extensions) and ipsec_policy_t (contains pointer to selector key used in
+ * tunnel mode) pointers, tunnel mode boolean, and creates address extensions
+ * inside message contents bounds checked by byte-aligned start and end
+ * pointers. Returns new value for cur pointer or NULL on failure.
+ * XXX TODO: Original packet contents go here.
+ */
+static uint8_t *
+sadb_sel_to_addrexts(const ipsec_selector_t *sel, const ipsec_policy_t *pp,
+ const ipsec_action_t *ap, const uint8_t *start, const uint8_t *end,
+ boolean_t tunnel_mode)
+{
+ uint8_t proto, pfxlen, *cur = (uint8_t *)start;
+ ipsec_selkey_t *ipsl;
+ sa_family_t af;
+ uint16_t lport, rport;
+ uint32_t *saddrptr, *daddrptr;
+ if (tunnel_mode) {
+ /*
+ * Form inner address extensions based NOT on the inner
+ * selectors (i.e. the packet data), but on the policy's
+ * selector key (i.e. the policy's selector information).
+ *
+ * NOTE: The position of IPv4 and IPv6 addresses is the
+ * same in ipsec_selkey_t (unless the compiler does very
+ * strange things with unions, consult your local C language
+ * lawyer for details).
+ */
+ ASSERT(pp != NULL);
+
+ ipsl = &(pp->ipsp_sel->ipsl_key);
+ if (ipsl->ipsl_valid & IPSL_IPV4) {
+ af = AF_INET;
+ ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
+ ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
+ } else {
+ af = AF_INET6;
+ ASSERT(sel->ips_protocol == IPPROTO_IPV6);
+ ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
+ }
+
+ if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
+ saddrptr = (uint32_t *)(&ipsl->ipsl_local);
+ pfxlen = ipsl->ipsl_local_pfxlen;
+ } else {
+ saddrptr = (uint32_t *)(&ipv6_all_zeros);
+ pfxlen = 0;
+ }
+ /* XXX What about ICMP type/code? */
+ lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
+ ipsl->ipsl_lport : 0;
+ proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
+ ipsl->ipsl_proto : 0;
+
+ cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
+ af, saddrptr, lport, proto, pfxlen);
+ if (cur == NULL)
+ goto done;
+
+ if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
+ daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
+ pfxlen = ipsl->ipsl_remote_pfxlen;
+ } else {
+ daddrptr = (uint32_t *)(&ipv6_all_zeros);
+ pfxlen = 0;
+ }
+ /* XXX What about ICMP type/code? */
+ rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
+ ipsl->ipsl_rport : 0;
+
+ cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
+ af, daddrptr, rport, proto, pfxlen);
+ if (cur == NULL)
+ goto done;
+
+ /*
+ * TODO - if we go to RFC 3408's dream of transport mode
+ * IP-in-IP _with_ inner-packet address selectors, we'll need
+ * to further distinguish tunnel mode here. For now, having
+ * inner addresses and/or ports is sufficient.
+ *
+ * Meanwhile, whack proto/ports to reflect IP-in-IP for the
+ * outer addresses.
+ */
+ proto = sel->ips_protocol; /* Either _ENCAP or _IPV6 */
+ lport = rport = 0;
+ } else if ((ap != NULL) && (!ap->ipa_want_unique)) {
+ /* Not in tunnel mode, action doesn't want pop from pkt */
+ proto = 0;
+ lport = 0;
+ rport = 0;
+ if (pp != NULL) {
+ ipsl = &(pp->ipsp_sel->ipsl_key);
+ if (ipsl->ipsl_valid & IPSL_PROTOCOL)
+ proto = ipsl->ipsl_proto;
+ if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
+ rport = ipsl->ipsl_rport;
+ if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
+ lport = ipsl->ipsl_lport;
+ }
+ } else {
+ /* Not in tunnel mode, action wants pop from pkt */
+ proto = sel->ips_protocol;
+ lport = sel->ips_local_port;
+ rport = sel->ips_remote_port;
+ }
+
+ af = sel->ips_isv4 ? AF_INET : AF_INET6;
+
+ /*
+ * NOTE: The position of IPv4 and IPv6 addresses is the same
+ * in ipsec_selector_t.
+ */
+ cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
+ (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
+ if (cur == NULL)
+ goto done;
+
+ cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
+ (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
+done:
return (cur);
}
/*
- * Construct a key management cookie extension.
+ * Use byte aligned buffer defined by cur and end pointers to create a key
+ * management extension using kmc and kmp uint32_t parameters.
*/
-
static uint8_t *
-sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
+sadb_make_kmc_ext(const uint8_t *start, const uint8_t *end,
+ uint32_t kmp, uint32_t kmc)
{
+ uint8_t *cur = (uint8_t *)start;
sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
- if (cur == NULL)
- return (NULL);
+ ASSERT(cur != NULL && end != NULL);
cur += sizeof (*kmcext);
if (cur > end)
return (NULL);
@@ -3196,11 +3353,11 @@
*/
newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
- mutex_enter(&ipss->ipsec_alg_lock);
+ rw_enter(&ipss->ipsec_alg_lock, RW_READER);
alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
[newbie->ipsa_auth_alg];
if (alg != NULL && ALG_VALID(alg)) {
newbie->ipsa_amech.cm_type = alg->alg_mech_type;
newbie->ipsa_amech.cm_param =
@@ -3209,11 +3366,11 @@
newbie->ipsa_mac_len = (size_t)alg->alg_datalen;
} else {
newbie->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
}
error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
- mutex_exit(&ipss->ipsec_alg_lock);
+ rw_exit(&ipss->ipsec_alg_lock);
if (error != 0) {
mutex_exit(&newbie->ipsa_lock);
/*
* An error here indicates that alg is the wrong type
* (IE: not authentication) or its not in the alg tables
@@ -3226,11 +3383,11 @@
goto error;
}
}
if (ekey != NULL) {
- mutex_enter(&ipss->ipsec_alg_lock);
+ rw_enter(&ipss->ipsec_alg_lock, RW_READER);
async = async || (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
IPSEC_ALGS_EXEC_ASYNC);
alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
[newbie->ipsa_encr_alg];
@@ -3259,11 +3416,11 @@
newbie->ipsa_emech.cm_param = NULL;
newbie->ipsa_emech.cm_param_len = 0;
} else {
newbie->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
}
- mutex_exit(&ipss->ipsec_alg_lock);
+ rw_exit(&ipss->ipsec_alg_lock);
/*
* The byte stream following the sadb_key_t is made up of:
* key bytes, [salt bytes], [IV initial value]
* All of these have variable length. The IV is typically
@@ -3371,13 +3528,13 @@
*/
newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
- mutex_enter(&ipss->ipsec_alg_lock);
+ rw_enter(&ipss->ipsec_alg_lock, RW_READER);
error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
- mutex_exit(&ipss->ipsec_alg_lock);
+ rw_exit(&ipss->ipsec_alg_lock);
if (error != 0) {
mutex_exit(&newbie->ipsa_lock);
/* See above for error explanation. */
*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
goto error;
@@ -3797,16 +3954,13 @@
mp = mp->b_cont;
end = mp->b_wptr + alloclen;
samsg = (sadb_msg_t *)mp->b_wptr;
mp->b_wptr += sizeof (*samsg);
- samsg->sadb_msg_version = PF_KEY_V2;
- samsg->sadb_msg_type = SADB_EXPIRE;
- samsg->sadb_msg_errno = 0;
+ INITIALIZE_SAMSG(samsg, SADB_EXPIRE);
samsg->sadb_msg_satype = assoc->ipsa_type;
samsg->sadb_msg_len = SADB_8TO64(alloclen);
- samsg->sadb_msg_reserved = 0;
samsg->sadb_msg_seq = 0;
samsg->sadb_msg_pid = 0;
saext = (sadb_sa_t *)mp->b_wptr;
mp->b_wptr += sizeof (*saext);
@@ -4094,11 +4248,11 @@
return (retval);
}
/*
* Called by a consumer protocol to do ther dirty work of reaping dead
- * Security Associations.
+ * Security Associations and outstanding acquire records.
*
* NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
* SA's that are already marked DEAD, so expired SA's are only reaped
* the second time sadb_ager() runs.
*/
@@ -4808,10 +4962,12 @@
{
ipsacq_t *walker;
sa_family_t fam;
uint32_t blank_address[4] = {0, 0, 0, 0};
+ ASSERT(MUTEX_HELD(&bucket->iacqf_lock));
+
if (isrc == NULL) {
ASSERT(idst == NULL);
isrc = idst = blank_address;
}
@@ -4841,68 +4997,72 @@
return (walker);
}
/*
- * For this mblk, insert a new acquire record. Assume bucket contains addrs
- * of all of the same length. Give up (and drop) if memory
- * cannot be allocated for a new one; otherwise, invoke callback to
- * send the acquire up..
+ * Take a pointers to mblk_t (packet for which we need to acquire an SA) and
+ * ip_xmit_attr_t (transmit attributes used to generate or retrieve acquire
+ * record) and two booleans need_ah and need_esp, one but not both of which
+ * must be true. Acquire records are stored in hash buckets, and we assume
+ * bucket contains addrs of all of the same length. If this is a new acquire
+ * record, we generate an acquire samsg to send to protocol keysock layer,
+ * which assumes ownership from there. If we run into problems along the way,
+ * we generate errors if possible and drop packets if need be. Before sending
+ * to keysock, we simply unlock the acquire record and let the ager deal with
+ * releasing locks and freeing resources.
*
- * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
- * list. The ah_add_sa_finish() routines can look at the packet's attached
- * attributes and handle this case specially.
+ * This code is called by the IP stack when trying to send a packet for which
+ * all necessary SAs can't be found to include in ip_xmit_attr_t. Be aware of
+ * the following case: you need both ESP and AH and have SAs for neither. In
+ * that case both need_esp and need_ah are true, but we go with need_esp, as
+ * ESP will call us back for an AH acquire if it's successful and the AH SA
+ * still missing. It can also be that the packet needs both, but an SA already
+ * exists for one, in which case only the missing one will be flagged as
+ * needed, although the ipsec_action_t has want flags for both.
*/
void
sadb_acquire(mblk_t *datamp, ip_xmit_attr_t *ixa, boolean_t need_ah,
boolean_t need_esp)
{
- mblk_t *asyncmp;
+ mblk_t *asyncmp, *regular, *extended, *prop_m, *eprop_m;
sadbp_t *spp;
sadb_t *sp;
ipsacq_t *newbie;
iacqf_t *bucket;
- mblk_t *extended;
ipha_t *ipha = (ipha_t *)datamp->b_rptr;
ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
- uint32_t *src, *dst, *isrc, *idst;
+ uint32_t seq, *src, *dst, *isrc, *idst;
ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
ipsec_action_t *ap = ixa->ixa_ipsec_action;
sa_family_t af;
- int hashoffset;
- uint32_t seq;
+ int hashoffset, sens_len;
uint64_t unique_id = 0;
+ uint_t propsize, epropsize, combs_limit;
+ uint8_t *start, *end;
+ sadb_msg_t *samsg;
+ sadb_prop_t *prop, *eprop;
ipsec_selector_t sel;
boolean_t tunnel_mode = (ixa->ixa_flags & IXAF_IPSEC_TUNNEL) != 0;
ts_label_t *tsl = NULL;
netstack_t *ns = ixa->ixa_ipst->ips_netstack;
ipsec_stack_t *ipss = ns->netstack_ipsec;
sadb_sens_t *sens = NULL;
- int sens_len;
-
- ASSERT((pp != NULL) || (ap != NULL));
-
- ASSERT(need_ah != NULL || need_esp != NULL);
-
- /* Assign sadb pointers */
- if (need_esp) { /* ESP for AH+ESP */
ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
-
- spp = &espstack->esp_sadb;
- } else {
ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
+ queue_t *q;
- spp = &ahstack->ah_sadb;
- }
+ ASSERT(need_ah || need_esp);
+ ASSERT((ap != NULL) || (pp != NULL));
+
+ spp = need_esp ? &espstack->esp_sadb : &ahstack->ah_sadb;
sp = (ixa->ixa_flags & IXAF_IS_IPV4) ? &spp->s_v4 : &spp->s_v6;
if (is_system_labeled())
tsl = ixa->ixa_tsl;
if (ap == NULL)
ap = pp->ipsp_act;
-
ASSERT(ap != NULL);
if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
unique_id = SA_FORM_UNIQUE_ID(ixa);
@@ -4911,24 +5071,25 @@
*
* Immediately, make sure the ACQUIRE sequence number doesn't slip
* below the lowest point allowed in the kernel. (In other words,
* make sure the high bit on the sequence number is set.)
*/
-
seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
src = (uint32_t *)&ipha->ipha_src;
dst = (uint32_t *)&ipha->ipha_dst;
af = AF_INET;
+ ip6h = NULL;
hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
} else {
ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
src = (uint32_t *)&ip6h->ip6_src;
dst = (uint32_t *)&ip6h->ip6_dst;
af = AF_INET6;
+ ipha = NULL;
hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
}
if (tunnel_mode) {
@@ -4950,22 +5111,20 @@
} else {
isrc = idst = NULL;
}
/*
- * Check buckets to see if there is an existing entry. If so,
- * grab it. sadb_checkacquire locks newbie if found.
+ * Check bucket for existing matching entry. If so, grab it. On match
+ * sadb_checkacquire returns locked newbie.
*/
bucket = &(sp->sdb_acq[hashoffset]);
mutex_enter(&bucket->iacqf_lock);
newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
unique_id, tsl);
+ /* If not found, initialize a new one and insert into chain. */
if (newbie == NULL) {
- /*
- * Otherwise, allocate a new one.
- */
newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
if (newbie == NULL) {
mutex_exit(&bucket->iacqf_lock);
ip_drop_packet(datamp, B_FALSE, NULL,
DROPPER(ipss, ipds_sadb_acquire_nomem),
@@ -4998,49 +5157,40 @@
* disperse blame for lock contention.
*
* we might be able to dispense with acquire record locks entirely..
* just use the bucket locks..
*/
-
mutex_exit(&bucket->iacqf_lock);
/*
* This assert looks silly for now, but we may need to enter newbie's
- * mutex during a search.
+ * mutex during a search. Confirms we got locked newbie from
+ * sadb_checkacquire.
*/
ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
- /*
- * Make the ip_xmit_attr_t into something we can queue.
- * If no memory it frees datamp.
- */
+ /* Make ip_xmit_attr_t into message we can queue, link packet data. */
asyncmp = ip_xmit_attr_to_mblk(ixa);
if (asyncmp != NULL)
linkb(asyncmp, datamp);
- /* Queue up packet. Use b_next. */
-
+ /* Bump appropriate discard stat & free datamp if allocation failed. */
if (asyncmp == NULL) {
- /* Statistics for allocation failure */
if (ixa->ixa_flags & IXAF_IS_IPV4) {
BUMP_MIB(&ixa->ixa_ipst->ips_ip_mib,
ipIfStatsOutDiscards);
} else {
BUMP_MIB(&ixa->ixa_ipst->ips_ip6_mib,
ipIfStatsOutDiscards);
}
ip_drop_output("No memory for asyncmp", datamp, NULL);
freemsg(datamp);
- } else if (newbie->ipsacq_numpackets == 0) {
- /* First one. */
+ } else if (newbie->ipsacq_numpackets == 0) { /* Pkt queue forms here. */
newbie->ipsacq_mp = asyncmp;
newbie->ipsacq_numpackets = 1;
newbie->ipsacq_expire = gethrestime_sec();
- /*
- * Extended ACQUIRE with both AH+ESP will use ESP's timeout
- * value.
- */
+ /* Extended ACQUIRE with AH+ESP uses ESP's timeout */
newbie->ipsacq_expire += *spp->s_acquire_timeout;
newbie->ipsacq_seq = seq;
newbie->ipsacq_addrfam = af;
newbie->ipsacq_srcport = ixa->ixa_ipsec_src_port;
@@ -5060,33 +5210,32 @@
} else {
newbie->ipsacq_proto = ixa->ixa_ipsec_proto;
}
newbie->ipsacq_unique_id = unique_id;
- if (ixa->ixa_tsl != NULL) {
- label_hold(ixa->ixa_tsl);
- newbie->ipsacq_tsl = ixa->ixa_tsl;
+ if (tsl != NULL) {
+ label_hold(tsl);
+ newbie->ipsacq_tsl = tsl;
}
- } else {
- /* Scan to the end of the list & insert. */
+ } else { /* Attempt to join packet queue as b_next. */
mblk_t *lastone = newbie->ipsacq_mp;
while (lastone->b_next != NULL)
lastone = lastone->b_next;
lastone->b_next = asyncmp;
+ /* Queue maxed: set counter to max, unchain, free & drop pkt */
if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
newbie->ipsacq_numpackets = ipsacq_maxpackets;
lastone = newbie->ipsacq_mp;
newbie->ipsacq_mp = lastone->b_next;
lastone->b_next = NULL;
- /* Freeing the async message */
lastone = ip_xmit_attr_free_mblk(lastone);
ip_drop_packet(lastone, B_FALSE, NULL,
DROPPER(ipss, ipds_sadb_acquire_toofull),
&ipss->ipsec_sadb_dropper);
- } else {
+ } else { /* Successfully queued */
IP_ACQUIRE_STAT(ipss, qhiwater,
newbie->ipsacq_numpackets);
}
}
@@ -5098,25 +5247,62 @@
newbie->ipsacq_srcaddr = src;
newbie->ipsacq_dstaddr = dst;
/*
- * If the acquire record has more than one queued packet, we've
- * already sent an ACQUIRE, and don't need to repeat ourself.
+ * Sequence number mismatch or previously populated packet queue means
+ * we retrieved an already-pending ACQUIRE record and needn't repeat
+ * ourself. Unlock and return.
*/
- if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
- /* I have an acquire outstanding already! */
- mutex_exit(&newbie->ipsacq_lock);
- return;
+ if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1)
+ goto unlock_acqrec;
+
+ /*
+ * Even if we fail before sending to keysock, starting with a NULL
+ * queue pointer, if gets this far, it counts as an acquire request.
+ */
+ if (need_esp) {
+ ESP_BUMP_STAT(espstack, acquire_requests);
+ q = espstack->esp_pfkey_q;
+ } else {
+ AH_BUMP_STAT(ahstack, acquire_requests);
+ q = ahstack->ah_pfkey_q;
}
- if (!keysock_extended_reg(ns))
- goto punt_extended;
+ if (q == NULL)
+ goto unlock_acqrec;
+
+ /* Initializes keysock M_CTL message for regular acquire. */
+ regular = sadb_keysock_out(0);
+ if (regular == NULL)
+ goto unlock_acqrec;
+
/*
- * Construct an extended ACQUIRE. There are logging
- * opportunities here in failure cases.
+ * Check keysock stack to make sure we don't have extended register
+ * pending. If not, have keysock initialize M_CTL msg for extended
+ * acquire. If pending, set extended to NULL so we ignore it hereafter.
*/
+ if (keysock_extended_reg(ns)) {
+ extended = sadb_keysock_out(0);
+ if (extended == NULL)
+ goto bail_and_free_regular;
+ } else {
+ extended = NULL;
+ }
+
+ if (tsl != NULL) {
+ /*
+ * XXX MLS correct condition here?
+ * XXX MLS other credential attributes in acquire?
+ * XXX malloc failure? don't fall back to original?
+ */
+ sens = sadb_make_sens_ext(tsl, &sens_len);
+
+ if (sens == NULL)
+ goto bail_extended;
+ }
+ /* re-initialize selector using ixa and ipha */
bzero(&sel, sizeof (sel));
sel.ips_isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0;
if (tunnel_mode) {
sel.ips_protocol = (ixa->ixa_ipsec_inaf == AF_INET) ?
IPPROTO_ENCAP : IPPROTO_IPV6;
@@ -5134,49 +5320,100 @@
} else {
sel.ips_local_addr_v6 = ip6h->ip6_src;
sel.ips_remote_addr_v6 = ip6h->ip6_dst;
}
- extended = sadb_keysock_out(0);
- if (extended == NULL)
- goto punt_extended;
+ /* Tack message containing sadb_msg_t onto keysock regular M_CTL */
+ regular->b_cont = sadb_construct_acqmsg(newbie, &sel, ap, pp, ns, sens,
+ need_esp, tunnel_mode, B_FALSE, B_FALSE); /* regular, no props */
+ /* We have to do this, no matter the result of previous call */
+ if (sens != NULL)
+ kmem_free(sens, sens_len);
- if (ixa->ixa_tsl != NULL) {
+ if (regular->b_cont == NULL)
+ goto bail_extended;
+
/*
- * XXX MLS correct condition here?
- * XXX MLS other credential attributes in acquire?
- * XXX malloc failure? don't fall back to original?
+ * If there's no extended pending, duplicate regular samsg, tacking it
+ * on as the b_cont of the keysock-generated extended M_CTL.
*/
- sens = sadb_make_sens_ext(ixa->ixa_tsl, &sens_len);
+ if (extended != NULL) {
+ extended->b_cont = dupb(regular->b_cont);
+ if (extended->b_cont == NULL)
+ goto bail_extended;
+ }
- if (sens == NULL) {
- freeb(extended);
- goto punt_extended;
+ rw_enter(&ipss->ipsec_alg_lock, RW_READER);
+ CALC_COMBS(combs_limit, ipss, need_esp);
+ propsize = sizeof (sadb_prop_t) + (combs_limit * sizeof (sadb_comb_t));
+
+ if ((prop_m = allocb(propsize, BPRI_HI)) == NULL)
+ goto bail_and_unlock;
+
+ if (extended != NULL) {
+ epropsize = sizeof (sadb_prop_t)
+ + (combs_limit * sizeof (sadb_x_ecomb_t));
+ if ((eprop_m = allocb(epropsize, BPRI_HI)) == NULL)
+ goto bail_and_unlock;
}
- }
- extended->b_cont = sadb_extended_acquire(&sel, pp, ap, tunnel_mode,
- seq, 0, sens, ns);
+ prop = (sadb_prop_t *)prop_m->b_rptr;
+ sadb_insert_prop(prop, ap, ns, combs_limit, need_esp);
+ if (prop == NULL) {
+ goto bail_and_unlock;
+ /* 0 length prop is error, mark regular samsg a dud, & freeb prop_m */
+ } else {
+ samsg = (sadb_msg_t *)regular->b_cont->b_rptr;
- if (sens != NULL)
- kmem_free(sens, sens_len);
+ if (prop->sadb_prop_len == 0) {
+ ERRNO_SAMSG(samsg, ENOENT);
+ freeb(prop_m);
+ }
+ samsg->sadb_msg_len += prop->sadb_prop_len;
+ prop_m->b_wptr += SADB_64TO8(prop->sadb_prop_len);
+ regular->b_cont->b_cont = prop_m;
+ }
- if (extended->b_cont == NULL) {
- freeb(extended);
- goto punt_extended;
+ if (extended != NULL) {
+ start = (uint8_t *)eprop_m->b_rptr;
+ end = start + epropsize;
+ eprop =
+ (sadb_prop_t *)sadb_construct_eprop(ap, pp, ns, start, end);
+ if (eprop == NULL)
+ goto bail_and_unlock;
+ /* If 0 ecombs, mark extended samsg a dud, and freeb eprop_m */
+ else {
+ samsg = (sadb_msg_t *)extended->b_cont->b_rptr;
+
+ if (eprop->sadb_x_prop_numecombs == 0) {
+ ERRNO_SAMSG(samsg, ENOENT);
+ freeb(eprop_m);
}
+ samsg->sadb_msg_len += eprop->sadb_prop_len;
+ eprop_m->b_wptr += SADB_64TO8(eprop->sadb_prop_len);
+ extended->b_cont->b_cont = eprop_m;
+ }
+ }
- /*
- * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
- * this new record. The send-acquire callback assumes that acqrec is
- * already locked.
- */
- (*spp->s_acqfn)(newbie, extended, ns);
+ rw_exit(&ipss->ipsec_alg_lock);
+ mutex_exit(&newbie->ipsacq_lock);
+
+ if (extended != NULL)
+ putnext(q, extended);
+ putnext(q, regular);
return;
-punt_extended:
- (*spp->s_acqfn)(newbie, NULL, ns);
+/* We used a lot of b_cont mblk chaining, so we need to use freemsg. */
+bail_and_unlock:
+ rw_exit(&ipss->ipsec_alg_lock);
+bail_extended:
+ if (extended != NULL)
+ freemsg(extended);
+bail_and_free_regular:
+ freemsg(regular);
+unlock_acqrec:
+ mutex_exit(&newbie->ipsacq_lock);
}
/*
* Unlink and free an acquire record.
*/
@@ -5186,10 +5423,11 @@
mblk_t *mp;
ipsec_stack_t *ipss = ns->netstack_ipsec;
ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
+ /* XXX Should references be released before mutex is acquired? */
if (acqrec->ipsacq_policy != NULL) {
IPPOL_REFRELE(acqrec->ipsacq_policy);
}
if (acqrec->ipsacq_act != NULL) {
IPACT_REFRELE(acqrec->ipsacq_act);
@@ -5260,41 +5498,39 @@
/*
* Create an algorithm descriptor for an extended ACQUIRE. Filter crypto
* framework's view of reality vs. IPsec's. EF's wins, BTW.
*/
static uint8_t *
-sadb_new_algdesc(uint8_t *start, uint8_t *limit,
+sadb_new_algdesc(const uint8_t *start, const uint8_t *end,
sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
{
- uint8_t *cur = start;
+ uint8_t *cur = (uint8_t *)start;
ipsec_alginfo_t *algp;
sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
+ ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
+
cur += sizeof (*algdesc);
- if (cur >= limit)
+ if (cur >= end)
return (NULL);
ecomb->sadb_x_ecomb_numalgs++;
/*
* Normalize vs. crypto framework's limits. This way, you can specify
* a stronger policy, and when the framework loads a stronger version,
* you can just keep plowing w/o rewhacking your SPD.
*/
- mutex_enter(&ipss->ipsec_alg_lock);
algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
- if (algp == NULL) {
- mutex_exit(&ipss->ipsec_alg_lock);
+ if (algp == NULL)
return (NULL); /* Algorithm doesn't exist. Fail gracefully. */
- }
if (minbits < algp->alg_ef_minbits)
minbits = algp->alg_ef_minbits;
if (maxbits > algp->alg_ef_maxbits)
maxbits = algp->alg_ef_maxbits;
- mutex_exit(&ipss->ipsec_alg_lock);
algdesc->sadb_x_algdesc_reserved = SADB_8TO1(algp->alg_saltlen);
algdesc->sadb_x_algdesc_satype = satype;
algdesc->sadb_x_algdesc_algtype = algtype;
algdesc->sadb_x_algdesc_alg = alg;
@@ -5303,32 +5539,33 @@
return (cur);
}
/*
- * Convert the given ipsec_action_t into an ecomb starting at *ecomb
- * which must fit before *limit
- *
- * return NULL if we ran out of room or a pointer to the end of the ecomb.
+ * Use buffer defined by byte-aligned pointers start and end to convert
+ * ipsec_action_t pointer act into an ecomb, using alg data hanging off of
+ * netstack_t pointer ns. Return NULL rather than overrun buffer, otherwise
+ * pointer to end of ecomb (which should be exact size of buffer).
*/
static uint8_t *
-sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
- netstack_t *ns)
+sadb_action_to_ecomb(const uint8_t *start, const uint8_t *end,
+ const ipsec_action_t *act, netstack_t *ns)
{
- uint8_t *cur = start;
+ uint8_t *cur = (uint8_t *)start;
sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
ipsec_prot_t *ipp;
ipsec_stack_t *ipss = ns->netstack_ipsec;
+ ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
+ ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
+
cur += sizeof (*ecomb);
- if (cur >= limit)
+ if (cur >= end)
return (NULL);
- ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
+ ipp = &((ipsec_action_t *)act)->ipa_act.ipa_apply;
- ipp = &act->ipa_act.ipa_apply;
-
ecomb->sadb_x_ecomb_numalgs = 0;
ecomb->sadb_x_ecomb_reserved = 0;
ecomb->sadb_x_ecomb_reserved2 = 0;
/*
* No limits on allocations, since we really don't support that
@@ -5348,30 +5585,30 @@
ecomb->sadb_x_ecomb_hard_addtime = 0;
ecomb->sadb_x_ecomb_soft_usetime = 0;
ecomb->sadb_x_ecomb_hard_usetime = 0;
if (ipp->ipp_use_ah) {
- cur = sadb_new_algdesc(cur, limit, ecomb,
+ cur = sadb_new_algdesc(cur, end, ecomb,
SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
if (cur == NULL)
return (NULL);
ipsecah_fill_defs(ecomb, ns);
}
if (ipp->ipp_use_esp) {
if (ipp->ipp_use_espa) {
- cur = sadb_new_algdesc(cur, limit, ecomb,
+ cur = sadb_new_algdesc(cur, end, ecomb,
SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
ipp->ipp_esp_auth_alg,
ipp->ipp_espa_minbits,
ipp->ipp_espa_maxbits, ipss);
if (cur == NULL)
return (NULL);
}
- cur = sadb_new_algdesc(cur, limit, ecomb,
+ cur = sadb_new_algdesc(cur, end, ecomb,
SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
ipp->ipp_encr_alg,
ipp->ipp_espe_minbits,
ipp->ipp_espe_maxbits, ipss);
if (cur == NULL)
@@ -5477,393 +5714,410 @@
}
/* End XXX label-library-leakage */
/*
- * Construct an extended ACQUIRE message based on a selector and the resulting
- * IPsec action.
- *
- * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
- * generation. As a consequence, expect this function to evolve
- * rapidly.
+ * Takes a pointer to sadb_prop_t (what we're initializing), ipsec_action_t
+ * (first action in chain we need to walk of actions for each alg
+ * combination), netstack_ns (contains pointers to alg properties and
+ * per-protocol settings), a combs_limit integer (maximum applicable
+ * combinations derived from per-protcol netstack_t alg array), and need_esp
+ * boolean_t. We distinguish between two error cases: we exceed combs_limit,
+ * which should only be a kernel bug (ipsec_alg_lock is our shepherd), or we
+ * have an alg ID with a NULL netstack member or member with the valid bit
+ * flipped, both of which indicate the needs to reset state, which we flag by
+ * returning no combs. We return NULL if we exceed combs_limit and zero-length
+ * prop if we run into an alg that can't be transferred into the prop.
*/
-static mblk_t *
-sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
- ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
- sadb_sens_t *sens, netstack_t *ns)
+static void
+sadb_insert_prop(sadb_prop_t *prop, const ipsec_action_t *ap, netstack_t *ns,
+ uint_t combs_limit, boolean_t need_esp)
{
- mblk_t *mp;
- sadb_msg_t *samsg;
- uint8_t *start, *cur, *end;
- uint32_t *saddrptr, *daddrptr;
- sa_family_t af;
- sadb_prop_t *eprop;
- ipsec_action_t *ap, *an;
- ipsec_selkey_t *ipsl;
- uint8_t proto, pfxlen;
- uint16_t lport, rport;
- uint32_t kmp, kmc;
+ sadb_comb_t *comb = (sadb_comb_t *)(prop + 1);
+ ipsec_action_t *act = (ipsec_action_t *)ap;
+ ipsec_prot_t *prot;
+ ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
+ ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
+ ipsec_stack_t *ipss = ns->netstack_ipsec;
+ boolean_t need_ah = !need_esp;
- /*
- * Find the action we want sooner rather than later..
- */
- an = NULL;
- if (pol == NULL) {
- ap = act;
- } else {
- ap = pol->ipsp_act;
+ ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
+ ASSERT((need_esp && ap->ipa_want_esp) || (need_ah && ap->ipa_want_ah));
- if (ap != NULL)
- an = ap->ipa_next;
- }
+ prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
+ prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t));
+ *(uint32_t *)(&prop->sadb_prop_replay) = 0; /* Quick zero-out! */
+ prop->sadb_prop_replay = need_esp ?
+ espstack->ipsecesp_replay_size : ahstack->ipsecah_replay_size;
- /*
- * Just take a swag for the allocation for now. We can always
- * alter it later.
- */
-#define SADB_EXTENDED_ACQUIRE_SIZE 4096
- mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
- if (mp == NULL)
- return (NULL);
+ /* Prioritize a proposal, preserving policy order. */
+ for (; act != NULL; act = act->ipa_next) {
+ ipsec_alginfo_t *aalg = NULL;
+ ipsec_alginfo_t *ealg = NULL;
- start = mp->b_rptr;
- end = start + SADB_EXTENDED_ACQUIRE_SIZE;
+ if ((act->ipa_act.ipa_type != IPSEC_POLICY_APPLY) ||
+ (need_esp && !act->ipa_act.ipa_apply.ipp_use_esp) ||
+ (need_ah && !act->ipa_act.ipa_apply.ipp_use_ah))
+ continue;
- cur = start;
+ if (--combs_limit == 0) {
+ prop = NULL;
+ return;
+ }
- samsg = (sadb_msg_t *)cur;
- cur += sizeof (*samsg);
+ prot = &act->ipa_act.ipa_apply;
- samsg->sadb_msg_version = PF_KEY_V2;
- samsg->sadb_msg_type = SADB_ACQUIRE;
- samsg->sadb_msg_errno = 0;
- samsg->sadb_msg_reserved = 0;
- samsg->sadb_msg_satype = 0;
- samsg->sadb_msg_seq = seq;
- samsg->sadb_msg_pid = pid;
-
- if (tunnel_mode) {
/*
- * Form inner address extensions based NOT on the inner
- * selectors (i.e. the packet data), but on the policy's
- * selector key (i.e. the policy's selector information).
- *
- * NOTE: The position of IPv4 and IPv6 addresses is the
- * same in ipsec_selkey_t (unless the compiler does very
- * strange things with unions, consult your local C language
- * lawyer for details).
+ * Alg ID 0 is none/any, which is valid only for ESP without
+ * message integrity (ipp_esp_auth_alg). NULL encryption ESP
+ * uses a distinct alg, non-zero ID.
*/
- ASSERT(pol != NULL);
-
- ipsl = &(pol->ipsp_sel->ipsl_key);
- if (ipsl->ipsl_valid & IPSL_IPV4) {
- af = AF_INET;
- ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
- ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
- } else {
- af = AF_INET6;
- ASSERT(sel->ips_protocol == IPPROTO_IPV6);
- ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
+ if ((need_esp && prot->ipp_esp_auth_alg != 0) || need_ah) {
+ ASSERT(need_esp || (prot->ipp_auth_alg > 0));
+ aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][need_esp ?
+ prot->ipp_esp_auth_alg : prot->ipp_auth_alg];
+ if (aalg == NULL || !ALG_VALID(aalg))
+ goto failure;
}
- if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
- saddrptr = (uint32_t *)(&ipsl->ipsl_local);
- pfxlen = ipsl->ipsl_local_pfxlen;
- } else {
- saddrptr = (uint32_t *)(&ipv6_all_zeros);
- pfxlen = 0;
+ if (need_esp) {
+ ASSERT(prot->ipp_encr_alg > 0);
+ ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
+ [prot->ipp_encr_alg];
+ if (ealg == NULL || !ALG_VALID(ealg))
+ goto failure;
}
- /* XXX What about ICMP type/code? */
- lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
- ipsl->ipsl_lport : 0;
- proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
- ipsl->ipsl_proto : 0;
- cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
- af, saddrptr, lport, proto, pfxlen);
- if (cur == NULL) {
- freeb(mp);
- return (NULL);
- }
+ comb->sadb_comb_flags = 0;
+ comb->sadb_comb_reserved = 0;
- if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
- daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
- pfxlen = ipsl->ipsl_remote_pfxlen;
+ if (ealg != NULL) {
+ comb->sadb_comb_encrypt = ealg->alg_id;
+ comb->sadb_comb_encrypt_minbits =
+ MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits);
+ comb->sadb_comb_encrypt_maxbits =
+ MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits);
} else {
- daddrptr = (uint32_t *)(&ipv6_all_zeros);
- pfxlen = 0;
+ comb->sadb_comb_encrypt = 0;
+ comb->sadb_comb_encrypt_minbits = 0;
+ comb->sadb_comb_encrypt_maxbits = 0;
}
- /* XXX What about ICMP type/code? */
- rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
- ipsl->ipsl_rport : 0;
- cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
- af, daddrptr, rport, proto, pfxlen);
- if (cur == NULL) {
- freeb(mp);
- return (NULL);
- }
- /*
- * TODO - if we go to 3408's dream of transport mode IP-in-IP
- * _with_ inner-packet address selectors, we'll need to further
- * distinguish tunnel mode here. For now, having inner
- * addresses and/or ports is sufficient.
- *
- * Meanwhile, whack proto/ports to reflect IP-in-IP for the
- * outer addresses.
- */
- proto = sel->ips_protocol; /* Either _ENCAP or _IPV6 */
- lport = rport = 0;
- } else if ((ap != NULL) && (!ap->ipa_want_unique)) {
- proto = 0;
- lport = 0;
- rport = 0;
- if (pol != NULL) {
- ipsl = &(pol->ipsp_sel->ipsl_key);
- if (ipsl->ipsl_valid & IPSL_PROTOCOL)
- proto = ipsl->ipsl_proto;
- if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
- rport = ipsl->ipsl_rport;
- if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
- lport = ipsl->ipsl_lport;
- }
+ if (aalg != NULL) {
+ uint16_t minbits, maxbits;
+ minbits = need_esp ?
+ prot->ipp_espa_minbits : prot->ipp_ah_minbits;
+ maxbits = need_esp ?
+ prot->ipp_espa_maxbits : prot->ipp_ah_maxbits;
+ comb->sadb_comb_auth = aalg->alg_id;
+ comb->sadb_comb_auth_minbits =
+ MAX(minbits, aalg->alg_ef_minbits);
+ comb->sadb_comb_auth_maxbits =
+ MIN(maxbits, aalg->alg_ef_maxbits);
} else {
- proto = sel->ips_protocol;
- lport = sel->ips_local_port;
- rport = sel->ips_remote_port;
+ comb->sadb_comb_auth = 0;
+ comb->sadb_comb_auth_minbits = 0;
+ comb->sadb_comb_auth_maxbits = 0;
}
- af = sel->ips_isv4 ? AF_INET : AF_INET6;
-
/*
- * NOTE: The position of IPv4 and IPv6 addresses is the same in
- * ipsec_selector_t.
+ * The following may be based on algorithm properties, but in
+ * the meantime, we just pick some good, sensible numbers.
+ * Key mgmt. can (and perhaps should) be the place to finalize
+ * such decisions.
*/
- cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
- (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
- if (cur == NULL) {
- freeb(mp);
- return (NULL);
+ /* 0 == unlimited == unsupported */
+ comb->sadb_comb_soft_allocations = 0;
+ comb->sadb_comb_hard_allocations = 0;
+
+ /* These may want to come from policy rule. */
+ if (need_esp) {
+ comb->sadb_comb_soft_bytes =
+ espstack->ipsecesp_default_soft_bytes;
+ comb->sadb_comb_hard_bytes =
+ espstack->ipsecesp_default_hard_bytes;
+ comb->sadb_comb_soft_addtime =
+ espstack->ipsecesp_default_soft_addtime;
+ comb->sadb_comb_hard_addtime =
+ espstack->ipsecesp_default_hard_addtime;
+ comb->sadb_comb_soft_usetime =
+ espstack->ipsecesp_default_soft_usetime;
+ comb->sadb_comb_hard_usetime =
+ espstack->ipsecesp_default_hard_usetime;
+ } else {
+ comb->sadb_comb_soft_bytes =
+ ahstack->ipsecah_default_soft_bytes;
+ comb->sadb_comb_hard_bytes =
+ ahstack->ipsecah_default_hard_bytes;
+ comb->sadb_comb_soft_addtime =
+ ahstack->ipsecah_default_soft_addtime;
+ comb->sadb_comb_hard_addtime =
+ ahstack->ipsecah_default_hard_addtime;
+ comb->sadb_comb_soft_usetime =
+ ahstack->ipsecah_default_soft_usetime;
+ comb->sadb_comb_hard_usetime =
+ ahstack->ipsecah_default_hard_usetime;
}
- cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
- (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
-
- if (cur == NULL) {
- freeb(mp);
- return (NULL);
+ prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
+ comb++;
}
- if (sens != NULL) {
- uint8_t *sensext = cur;
- int senslen = SADB_64TO8(sens->sadb_sens_len);
+ return;
- cur += senslen;
- if (cur > end) {
- freeb(mp);
- return (NULL);
- }
- bcopy(sens, sensext, senslen);
- }
+failure:
+ prop->sadb_prop_len = 0;
+}
- /*
- * This section will change a lot as policy evolves.
- * For now, it'll be relatively simple.
+/*
+ * Construct extended properties using ipsec_action_t, ipsec_policy_t, and
+ * netstack_t pointers. Byte-aligned pointers cur and end are used for bounds
+ * checking here and in called code. We don't set length if numecombs is 0, so
+ * callers must check this for error handling.
*/
- eprop = (sadb_prop_t *)cur;
+static uint8_t *
+sadb_construct_eprop(const ipsec_action_t *act, const ipsec_policy_t *pp,
+ netstack_t *ns, const uint8_t *start, const uint8_t *end)
+{
+ uint8_t *cur = (uint8_t *)start;
+ sadb_prop_t *eprop = (sadb_prop_t *)cur;
+ ipsec_action_t *an, *ap = (ipsec_action_t *)act;
+ ipsec_stack_t *ipss = ns->netstack_ipsec;
+
+ ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
+
cur += sizeof (*eprop);
- if (cur > end) {
- /* no space left */
- freeb(mp);
+ if (cur > end)
return (NULL);
- }
eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
eprop->sadb_x_prop_ereserved = 0;
eprop->sadb_x_prop_numecombs = 0;
eprop->sadb_prop_replay = 32; /* default */
- kmc = kmp = 0;
-
for (; ap != NULL; ap = an) {
- an = (pol != NULL) ? ap->ipa_next : NULL;
-
/*
- * Skip non-IPsec policies
+ * XXX Don't walk past first ap if there's no pp. Not clear on
+ * the rationale for this, but it's what extended path did.
*/
+ an = (pp != NULL) ? ap->ipa_next : NULL;
+
if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
continue;
- if (ap->ipa_act.ipa_apply.ipp_km_proto)
- kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
- if (ap->ipa_act.ipa_apply.ipp_km_cookie)
- kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
eprop->sadb_prop_replay =
ap->ipa_act.ipa_apply.ipp_replay_depth;
}
cur = sadb_action_to_ecomb(cur, end, ap, ns);
- if (cur == NULL) { /* no space */
- freeb(mp);
+ if (cur == NULL)
return (NULL);
- }
eprop->sadb_x_prop_numecombs++;
}
- if (eprop->sadb_x_prop_numecombs == 0) {
/*
- * This will happen if we fail to find a policy
- * allowing for IPsec processing.
- * Construct an error message.
+ * This is an error. We return what we've got of eprops, caller needs
+ * to check for condition and pass it further up (e.g. by error samsg).
*/
- samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
- samsg->sadb_msg_errno = ENOENT;
- samsg->sadb_x_msg_diagnostic = 0;
- return (mp);
- }
+ if (eprop->sadb_x_prop_numecombs == 0)
+ return (cur);
- if ((kmp != 0) || (kmc != 0)) {
- cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
- if (cur == NULL) {
- freeb(mp);
+ eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)start);
+
+ return (cur);
+bail:
return (NULL);
+}
+
+/*
+ * Convert ipsec_policy_t and ipsec_action_t pointers to kmc extension. Byte-
+ * aligned cur and end pointers used for bounds checking. sadb_x_kmcext_t
+ * handling encapsulated in sadb_make_kmc_ext. Returns new value for cur,
+ * NULL on failure.
+ * We encapsulate for recursion since we have to walk ipsec_action_t.
+ */
+static uint8_t *
+sadb_policy_to_kmcext(const ipsec_policy_t *pp, const ipsec_action_t *act,
+ const uint8_t *start, const uint8_t *end)
+{
+ uint8_t *cur = (uint8_t *)start;
+ ipsec_action_t *an, *ap = (ipsec_action_t *)act;
+ uint32_t kmp = 0, kmc = 0;
+
+ for (; ap != NULL; ap = an) {
+ an = (pp != NULL) ? ap->ipa_next : NULL;
+
+ /*
+ * Skip non-IPsec policies
+ */
+ if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
+ continue;
+
+ if (ap->ipa_act.ipa_apply.ipp_km_proto)
+ kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
+ if (ap->ipa_act.ipa_apply.ipp_km_cookie)
+ kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
}
- }
- eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
- samsg->sadb_msg_len = SADB_8TO64(cur - start);
- mp->b_wptr = cur;
+ if ((kmp != 0) || (kmc != 0))
+ cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
- return (mp);
+ return (cur);
}
/*
- * Generic setup of an RFC 2367 ACQUIRE message. Caller sets satype.
- *
- * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
- * succeed (i.e. return non-NULL). Caller MUST release it. This is to
- * maximize code consolidation while preventing algorithm changes from messing
- * with the callers finishing touches on the ACQUIRE itself.
+ * Prepare the SADB_ACQUIRE message proper, which should be a b_cont to a
+ * keysock registered M_CTL message. Takes a pointer to ipsacq_t (optional
+ * acquire record for which we're sending message), ipsec_selector_t,
+ * ipsec_action_t, ipsec_policy_t, netstack_t, and sense (required for called
+ * to generate the message), and booleans for need_esp, tunnel_mode,
+ * extended, and with_prop (all of these should be self-explanatory). Because
+ * extended messages set satype to SADB_SATYPE_UNSPEC, extended-only callers
+ * can fudge need_esp.
*/
-mblk_t *
-sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
+static mblk_t *
+sadb_construct_acqmsg(ipsacq_t *acqrec, ipsec_selector_t *sel,
+ ipsec_action_t *ap, ipsec_policy_t *pp, netstack_t *ns, sadb_sens_t *sens,
+ boolean_t need_esp, boolean_t tunnel_mode, boolean_t extended,
+ boolean_t with_prop)
{
- uint_t allocsize;
- mblk_t *pfkeymp, *msgmp;
- sa_family_t af;
+ uint_t combs_limit, allocsize;
uint8_t *cur, *end;
sadb_msg_t *samsg;
- uint16_t sport_typecode;
- uint16_t dport_typecode;
- uint8_t check_proto;
- boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
+ sadb_prop_t *prop, *eprop;
+ mblk_t *mp;
+ int satype = extended ? SADB_SATYPE_UNSPEC
+ : (need_esp ? SADB_SATYPE_ESP : SADB_SATYPE_AH);
+ ipsec_stack_t *ipss = ns->netstack_ipsec;
- ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
+ ASSERT((acqrec == NULL) || (MUTEX_HELD(&acqrec->ipsacq_lock)));
+ ASSERT(ap != NULL);
+ ASSERT((pp == NULL) || (pp->ipsp_refs != 0));
+ ASSERT((ap == NULL) || (ap->ipa_refs != 0));
- pfkeymp = sadb_keysock_out(0);
- if (pfkeymp == NULL)
- return (NULL);
+ /*
+ * Set the limit used to size [e]prop [e]combs array to as many
+ * algorithms as defined on the netstack (must hold ipsec_alg_lock
+ * from here to when done reading off netstack for [e]prop
+ * formation). need_esp may be fudged, so be generous to extended.
+ */
+ if (with_prop) {
+ if (extended)
+ need_esp = B_TRUE;
+ rw_enter(&ipss->ipsec_alg_lock, RW_READER);
+ CALC_COMBS(combs_limit, ipss, need_esp);
+ }
/*
- * First, allocate a basic ACQUIRE message
+ * If this code is right, we may not need cur & end for bounds
+ * checking, but we'll keep normal runtime checks until that statement
+ * looks credible rather than merely plausible, at which point checks
+ * can be moved to ASSERTs. sens is variably sized but already
+ * set. kmc is fixed size. Pointers into message are byte-aligned, so
+ * we're generally depending on all structures used in this
+ * calculation to be so, too (in fact, all sadb_*_t types used here
+ * are 64-bit aligned per PF_KEY requirements).
*/
- allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
- sizeof (sadb_address_t) + sizeof (sadb_prop_t);
+ allocsize = sizeof (sadb_msg_t) + sizeof (sadb_prop_t);
+ allocsize += ((tunnel_mode) ? 4 : 2) * (sizeof (sadb_address_t)
+ + SADB_SOCKADDR_SIZE);
+ if (sens != NULL)
+ allocsize += SADB_64TO8(sens->sadb_sens_len);
+ allocsize += sizeof (sadb_x_kmc_t);
+ /* If we need props, size combs/combs array using combs_limit */
+ if (with_prop)
+ allocsize += combs_limit * (extended ?
+ sizeof (sadb_x_ecomb_t) : sizeof (sadb_comb_t));
- /* Make sure there's enough to cover both AF_INET and AF_INET6. */
- allocsize += 2 * sizeof (struct sockaddr_in6);
+ ASSERT((allocsize & 0x7) == 0);
- mutex_enter(&ipss->ipsec_alg_lock);
- /* NOTE: The lock is now held through to this function's return. */
- allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
- ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
+ mp = allocb(allocsize, BPRI_HI);
+ if (mp == NULL)
+ goto unlock_and_fail;
- if (tunnel_mode) {
- /* Tunnel mode! */
- allocsize += 2 * sizeof (sadb_address_t);
- /* Enough to cover both AF_INET and AF_INET6. */
- allocsize += 2 * sizeof (struct sockaddr_in6);
- }
-
- msgmp = allocb(allocsize, BPRI_HI);
- if (msgmp == NULL) {
- freeb(pfkeymp);
- mutex_exit(&ipss->ipsec_alg_lock);
- return (NULL);
- }
-
- pfkeymp->b_cont = msgmp;
- cur = msgmp->b_rptr;
+ cur = mp->b_rptr;
end = cur + allocsize;
+
samsg = (sadb_msg_t *)cur;
- cur += sizeof (sadb_msg_t);
+ INITIALIZE_SAMSG(samsg, SADB_ACQUIRE);
+ samsg->sadb_msg_satype = satype;
+ samsg->sadb_msg_pid = 0;
+ samsg->sadb_msg_seq = (acqrec != NULL) ? acqrec->ipsacq_seq : 0;
- af = acqrec->ipsacq_addrfam;
- switch (af) {
- case AF_INET:
- check_proto = IPPROTO_ICMP;
- break;
- case AF_INET6:
- check_proto = IPPROTO_ICMPV6;
- break;
- default:
- /* This should never happen unless we have kernel bugs. */
- cmn_err(CE_WARN,
- "sadb_setup_acquire: corrupt ACQUIRE record.\n");
- ASSERT(0);
- mutex_exit(&ipss->ipsec_alg_lock);
- return (NULL);
+ /* CALC_COMBS asserts on zero limit; broken config still possible */
+ if (with_prop && (combs_limit == 0)) {
+ ERRNO_SAMSG(samsg, ENOENT);
+ goto unlock_and_bail;
}
- samsg->sadb_msg_version = PF_KEY_V2;
- samsg->sadb_msg_type = SADB_ACQUIRE;
- samsg->sadb_msg_satype = satype;
- samsg->sadb_msg_errno = 0;
- samsg->sadb_msg_pid = 0;
- samsg->sadb_msg_reserved = 0;
- samsg->sadb_msg_seq = acqrec->ipsacq_seq;
+ cur += sizeof (sadb_msg_t);
- ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
+ cur = sadb_sel_to_addrexts(sel, pp, ap, cur, end, tunnel_mode);
+ if (cur == NULL)
+ goto unlock_and_fail;
- if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
- sport_typecode = dport_typecode = 0;
+ if (with_prop) {
+ if (extended) {
+ cur = sadb_construct_eprop(ap, pp, ns, cur, end);
+ if (cur == NULL)
+ goto unlock_and_fail;
+
+ eprop = (sadb_prop_t *)cur;
+ if (eprop->sadb_x_prop_numecombs == 0) {
+ ERRNO_SAMSG(samsg, ENOENT);
+ goto unlock_and_bail;
+ }
} else {
- sport_typecode = acqrec->ipsacq_srcport;
- dport_typecode = acqrec->ipsacq_dstport;
+ prop = (sadb_prop_t *)cur;
+
+ sadb_insert_prop(prop, ap, ns, combs_limit, need_esp);
+ if (prop == NULL) {
+ goto unlock_and_fail;
+ } else if (prop->sadb_prop_len == 0) {
+ ERRNO_SAMSG(samsg, ENOENT);
+ goto unlock_and_bail;
}
- cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
- acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
+ cur += SADB_64TO8(prop->sadb_prop_len);
+ }
- cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
- acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
+ rw_exit(&ipss->ipsec_alg_lock);
+ }
- if (tunnel_mode) {
- sport_typecode = acqrec->ipsacq_srcport;
- dport_typecode = acqrec->ipsacq_dstport;
- cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
- acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
- sport_typecode, acqrec->ipsacq_inner_proto,
- acqrec->ipsacq_innersrcpfx);
- cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
- acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
- dport_typecode, acqrec->ipsacq_inner_proto,
- acqrec->ipsacq_innerdstpfx);
+ if (sens != NULL) {
+ uint8_t *sensext = cur;
+ int senslen = SADB_64TO8(sens->sadb_sens_len);
+
+ cur += senslen;
+ if (cur > end)
+ goto freeb_bail;
+ bcopy(sens, sensext, senslen);
}
- /* XXX Insert identity information here. */
+ cur = sadb_policy_to_kmcext(pp, ap, cur, end);
+ if (cur == NULL)
+ goto freeb_bail;
- /* XXXMLS Insert sensitivity information here. */
+ samsg->sadb_msg_len = SADB_8TO64(cur - mp->b_rptr);
+ mp->b_wptr = cur;
- if (cur != NULL)
- samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
- else
- mutex_exit(&ipss->ipsec_alg_lock);
+ return (mp);
- return (pfkeymp);
+freeb_bail:
+ /* This message isn't chained, so we can freeb. */
+ freeb(mp);
+ return (NULL);
+unlock_and_bail:
+ if (with_prop)
+ rw_exit(&ipss->ipsec_alg_lock);
+ return (mp);
+unlock_and_fail:
+ if (with_prop)
+ rw_exit(&ipss->ipsec_alg_lock);
+ return (NULL);
}
/*
* Given an SADB_GETSPI message, find an appropriately ranged SA and
* allocate an SA. If there are message improprieties, return (ipsa_t *)-1.
@@ -6244,39 +6498,44 @@
inet_ntop(af, addr, buf, sizeof (buf)));
}
/*
* Fills in a reference to the policy, if any, from the conn, in *ppp
+ * If found, we hold a reference to the policy, caller must release.
*/
static void
ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
{
ipsec_policy_t *pp;
ipsec_latch_t *ipl = connp->conn_latch;
+ /* Use policy pointer already on conn_t if it's there. */
if ((ipl != NULL) && (connp->conn_ixa->ixa_ipsec_policy != NULL)) {
pp = connp->conn_ixa->ixa_ipsec_policy;
IPPOL_REFHOLD(pp);
- } else {
+ } else { /* otherwise query SPD */
+ /* This holds a reference for us if successful) */
pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, sel,
connp->conn_netstack);
}
*ppp = pp;
}
/*
- * The following functions scan through active conn_t structures
- * and return a reference to the best-matching policy it can find.
- * Caller must release the reference.
+ * Takes ipsec_selector_t (for attributes to query), ipsec_policy_t (what we're
+ * trying to find), and ip_stack_t (contains udp fanout we need to query). If we
+ * find a matching connection, we return its policy settings.
*/
static void
ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
{
connf_t *connfp;
conn_t *connp = NULL;
ipsec_selector_t portonly;
+ ASSERT(*ppp == NULL);
+
bzero((void *)&portonly, sizeof (portonly));
if (sel->ips_local_port == 0)
return;
@@ -6322,29 +6581,35 @@
ipsec_conn_pol(sel, connp, ppp);
CONN_DEC_REF(connp);
}
+/*
+ * Takes ipsec_selector_t (connection attributes to form query) and ip_stack_t
+ * (contains bind fanout we need to query) pointers to look up existing TCP
+ * listener, returned via conn_t pointer. We return NULL on failure.
+ * We increment reference count on match, caller must decrement.
+ */
static conn_t *
-ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
+ipsec_find_listen_conn(ipsec_selector_t *sel, ip_stack_t *ipst)
{
connf_t *connfp;
conn_t *connp = NULL;
const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
- if (sel->ips_local_port == 0)
- return (NULL);
+ /* XXX Sure about the second part? */
+ ASSERT(sel->ips_local_port != 0 && ipst != NULL);
connfp = &ipst->ips_ipcl_bind_fanout[
IPCL_BIND_HASH(sel->ips_local_port, ipst)];
mutex_enter(&connfp->connf_lock);
if (sel->ips_isv4) {
connp = connfp->connf_head;
while (connp != NULL) {
if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
- sel->ips_local_addr_v4, pptr[1]))
+ sel->ips_local_addr_v4, sel->ips_local_port))
break;
connp = connp->conn_next;
}
if (connp == NULL) {
@@ -6355,11 +6620,11 @@
if (connp == NULL) {
connp = connfp->connf_head;
while (connp != NULL) {
if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
- *v6addrmatch, pptr[1]))
+ *v6addrmatch, sel->ips_local_port))
break;
connp = connp->conn_next;
}
if (connp == NULL) {
@@ -6371,33 +6636,39 @@
CONN_INC_REF(connp);
mutex_exit(&connfp->connf_lock);
return (connp);
}
+/*
+ * Given ipsec_selector_t (contains attributes to query, ipsec_policy_t (what we
+ * need to find), and ip_stack_t pointer (contains connection state to query),
+ * find a matching TCP connection or listener and return its policy pointer.
+ */
static void
ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
{
connf_t *connfp;
conn_t *connp;
uint32_t ports;
uint16_t *pptr = (uint16_t *)&ports;
+ ASSERT(sel->ips_local_port != 0 && *ppp == NULL);
+
/*
* Find TCP state in the following order:
- * 1.) Connected conns.
+ * 1.) Connected conns. (walk ipst connection fanout)
* 2.) Listeners.
*
* Even though #2 will be the common case for inbound traffic, only
* following this order insures correctness.
*/
- if (sel->ips_local_port == 0)
- return;
/*
- * 0 should be fport, 1 should be lport. SRC is the local one here.
- * See ipsec_construct_inverse_acquire() for details.
+ * pptr makes an array of port values, 0 for fport, 1 for lport. SRC is
+ * the local one here. Connection lookup macros want this instead of
+ * selector port members.
*/
pptr[0] = sel->ips_remote_port;
pptr[1] = sel->ips_local_port;
connfp = &ipst->ips_ipcl_conn_fanout[
@@ -6427,42 +6698,46 @@
CONN_INC_REF(connp);
mutex_exit(&connfp->connf_lock);
} else {
mutex_exit(&connfp->connf_lock);
- /* Try the listen hash. */
- if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
+ /* Try the listen hash. If found, comes with incremented ref. */
+ if ((connp = ipsec_find_listen_conn(sel, ipst)) == NULL)
return;
}
ipsec_conn_pol(sel, connp, ppp);
CONN_DEC_REF(connp);
}
+/*
+ * Given ipsec_selector_t (connection attributes to form query), ipsec_policy_t
+ * (populate with match), and ip_stack_t (connection state to query) pointers,
+ * call into sctp to find an existing connection and return its policy.
+ */
static void
-ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
- ip_stack_t *ipst)
+ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
{
conn_t *connp;
uint32_t ports;
uint16_t *pptr = (uint16_t *)&ports;
+ ASSERT(sel->ips_local_port != 0 && *ppp == NULL);
+
/*
* Find SCP state in the following order:
* 1.) Connected conns.
* 2.) Listeners.
*
* Even though #2 will be the common case for inbound traffic, only
* following this order insures correctness.
*/
- if (sel->ips_local_port == 0)
- return;
-
/*
- * 0 should be fport, 1 should be lport. SRC is the local one here.
- * See ipsec_construct_inverse_acquire() for details.
+ * pptr makes an array of port values, 0 for fport, 1 for lport. SRC is
+ * the local one here. Connection lookup macros want this instead of
+ * selector port members.
*/
pptr[0] = sel->ips_remote_port;
pptr[1] = sel->ips_local_port;
/*
@@ -6487,14 +6762,13 @@
ipsec_conn_pol(sel, connp, ppp);
CONN_DEC_REF(connp);
}
/*
- * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
- * Returns 0 or errno, and always sets *diagnostic to something appropriate
- * to PF_KEY.
- *
+ * Takes ipsec_selector_t (what we're forming), two sadb_address_t (address
+ * extentions needed to create selector), and diagnostic (what, if anything,
+ * went wrong in PF_KEY terms) pointers, returns int (0 or errno).
* NOTE: For right now, this function (and ipsec_selector_t for that matter),
* ignore prefix lengths in the address extension. Since we match on first-
* entered policies, this shouldn't matter. Also, since we normalize prefix-
* set addresses to mask out the lower bits, we should get a suitable search
* key for the SPD anyway. This is the function to change if the assumption
@@ -6546,66 +6820,58 @@
}
return (0);
}
/*
- * We have encapsulation.
- * - Lookup tun_t by address and look for an associated
- * tunnel policy
- * - If there are inner selectors
- * - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
- * - Look up tunnel policy based on selectors
- * - Else
- * - Sanity check the negotation
- * - If appropriate, fall through to global policy
+ * We're passed pointers to ipsec_selector (inner info needed to form query),
+ * ipsec_policy_t (what we're trying to populate), a pair of sadb_address_t
+ * (extentions needed to reset selector), ipsec_tun_pol_t (tunnel policy that
+ * may already be populated from previous SPD query), and integer (error detail
+ * in PF_KEY2 terms, always 0). Return 0 or errno.
+ * Caller may have fudged inner selector, so we need to reset it via if we have
+ * to reuse it.
*/
static int
ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
int *diagnostic)
{
int err;
ipsec_policy_head_t *polhead;
+ ASSERT(*ppp == NULL);
+
*diagnostic = 0;
/* Check for inner selectors and act appropriately */
-
if (innsrcext != NULL) {
- /* Inner selectors present */
- ASSERT(inndstext != NULL);
+ ASSERT(inndstext != NULL); /* Need a pair */
+ /*
+ * If inner packet selectors, we must have negotiated tunnel and
+ * active policy already. If the tunnel has transport-mode
+ * policy set on it or no policy at all, fail.
+ */
if ((itp == NULL) ||
(itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
(ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
- /*
- * If inner packet selectors, we must have negotiate
- * tunnel and active policy. If the tunnel has
- * transport-mode policy set on it, or has no policy,
- * fail.
- */
return (ENOENT);
} else {
/*
- * Reset "sel" to indicate inner selectors. Pass
- * inner PF_KEY address extensions for this to happen.
+ * If we got a sane policy back from the SPD, reset the
+ * possibly fudged selector for subsequent operations.
*/
if ((err = ipsec_get_inverse_acquire_sel(sel,
innsrcext, inndstext, diagnostic)) != 0)
return (err);
- /*
- * Now look for a tunnel policy based on those inner
- * selectors. (Common code is below.)
- */
}
- } else {
- /* No inner selectors present */
- if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
+ } else { /* No inner selectors present */
+
/*
- * Transport mode negotiation with no tunnel policy
- * configured - return to indicate a global policy
- * check is needed.
+ * Transport mode negotiation with no tunnel policy configured
+ * - return to indicate a global policy check is needed.
*/
+ if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
return (0);
} else if (itp->itp_flags & ITPF_P_TUNNEL) {
/* Tunnel mode set with no inner selectors. */
return (ENOENT);
}
@@ -6633,21 +6899,27 @@
return (0);
}
/*
- * For sctp conn_faddr is the primary address, hence this is of limited
- * use for sctp.
+ * Takes ipsec_selector_t (data to form query), ipsec_policy_t (what we need
+ * to populate), and ip_stack_t (contains state data to query) pointers. This is
+ * a generic protocol look-up function to find a relevant connection that can be
+ * converted in a policy.
+ * XXX For sctp conn_faddr is the primary address, hence this is of limited
+ * use for sctp. Do we care, given sctp has its own lookup?
*/
static void
ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
ip_stack_t *ipst)
{
boolean_t isv4 = sel->ips_isv4;
connf_t *connfp;
conn_t *connp;
+ ASSERT(*ppp == NULL);
+
if (isv4) {
connfp = &ipst->ips_ipcl_proto_fanout_v4[sel->ips_protocol];
} else {
connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
}
@@ -6682,23 +6954,20 @@
ipsec_conn_pol(sel, connp, ppp);
CONN_DEC_REF(connp);
}
/*
- * Construct an inverse ACQUIRE reply based on:
+ * This code is called from keysock to handle inverse acquire messages. We
+ * are passed a pointer to sadb_msg_t, a fixed-size array of sadb_ext_t, and a
+ * netstack_t pointer and return a mblk_t pointer, in which we attempt to
+ * construct a return acquire message. In case of errors, we return a NULL
+ * pointer and populate samsg->sadb_msg_errno and samsg->sadb_msg_diagnostic,
+ * which is handled as an error at the keysock layer. Otherwise keysock does a
+ * passup with our message.
+ * Caller performs basic sanity checks such as NULL external addresses and
+ * only one of two inner addrs being NULL. Remaining checks happen here.
*
- * 1.) Current global policy.
- * 2.) An conn_t match depending on what all was passed in the extv[].
- * 3.) A tunnel's policy head.
- * ...
- * N.) Other stuff TBD (e.g. identities)
- *
- * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
- * in this function so the caller can extract them where appropriately.
- *
- * The SRC address is the local one - just like an outbound ACQUIRE message.
- *
* XXX MLS: key management supplies a label which we just reflect back up
* again. clearly we need to involve the label in the rest of the checks.
*/
mblk_t *
ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
@@ -6714,14 +6983,16 @@
struct sockaddr_in6 *src, *dst;
struct sockaddr_in6 *isrc, *idst;
ipsec_tun_pol_t *itp = NULL;
ipsec_policy_t *pp = NULL;
ipsec_selector_t sel, isel;
- mblk_t *retmp = NULL;
+ mblk_t *retmp;
ip_stack_t *ipst = ns->netstack_ip;
+ sadb_msg_t *retmsg;
+ ipsec_action_t *ap;
+ boolean_t tunnel_mode = B_FALSE;
-
/* Normalize addresses */
if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
== KS_IN_ADDR_UNKNOWN) {
err = EINVAL;
diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
@@ -6740,16 +7011,16 @@
diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
goto bail;
}
/* Check for tunnel mode and act appropriately */
+ /*
+ * Note: keysock_inverse_acquire catches unbalanced extensions and
+ * makes them into keysock_error calls, so ASSERTs here to confirm.
+ */
if (innsrcext != NULL) {
- if (inndstext == NULL) {
- err = EINVAL;
- diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
- goto bail;
- }
+ ASSERT(inndstext != NULL);
if (sadb_addrcheck(NULL, (mblk_t *)samsg,
(sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
err = EINVAL;
diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
goto bail;
@@ -6771,23 +7042,21 @@
isrc->sin6_family != AF_INET6) {
err = EINVAL;
diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
goto bail;
}
- } else if (inndstext != NULL) {
- err = EINVAL;
- diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
- goto bail;
- }
+ tunnel_mode = B_TRUE;
+ } else
+ ASSERT(inndstext == NULL);
- /* Get selectors first, based on outer addresses */
+ /* Convert address extensions into outer selector */
err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
if (err != 0)
goto bail;
- /* Check for tunnel mode mismatches. */
- if (innsrcext != NULL &&
+ /* Sanity-check newfound outer selector for tunnel mode mismatches */
+ if (tunnel_mode &&
((isrc->sin6_family == AF_INET &&
sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
(isrc->sin6_family == AF_INET6 &&
sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
err = EPROTOTYPE;
@@ -6794,13 +7063,15 @@
goto bail;
}
/*
* Okay, we have the addresses and other selector information.
- * Let's first find a conn...
+ * If our selector is for a protocol on top of IP, we make protocol-
+ * specific queries that work through useful state (e.g. connections or
+ * listeners). If we get something back, a reference to it will already
+ * be held, and we need to release that reference.
*/
- pp = NULL;
switch (sel.ips_protocol) {
case IPPROTO_TCP:
ipsec_tcp_pol(&sel, &pp, ipst);
break;
case IPPROTO_UDP:
@@ -6810,12 +7081,14 @@
ipsec_sctp_pol(&sel, &pp, ipst);
break;
case IPPROTO_ENCAP:
case IPPROTO_IPV6:
/*
- * Assume sel.ips_remote_addr_* has the right address at
- * that exact position.
+ * These cases are IPv6 in IP or IP in IP. Revert to querying
+ * SPD for tunnel policy, since there's no higher-level protocol
+ * or stack state to assist. Assume sel.ips_remote_addr_* has
+ * right address at exact position.
*/
itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
(uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family,
ipst);
@@ -6824,10 +7097,11 @@
* Transport-mode tunnel, make sure we fake out isel
* to contain something based on the outer protocol.
*/
bzero(&isel, sizeof (isel));
isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
+ /* XXX does this make tunnel_mode true? */
} /* Else isel is initialized by ipsec_tun_pol(). */
err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
&diagnostic);
/*
* NOTE: isel isn't used for now, but in RFC 430x IPsec, it
@@ -6834,18 +7108,18 @@
* may be.
*/
if (err != 0)
goto bail;
break;
- default:
+ default: /* Fall through to generic lookup */
ipsec_oth_pol(&sel, &pp, ipst);
break;
}
/*
- * If we didn't find a matching conn_t or other policy head, take a
- * look in the global policy.
+ * If we didn't find a matching conn_t or other policy head (pp retains
+ * initial NULL value), attempt to revert to the global policy.
*/
if (pp == NULL) {
pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, &sel, ns);
if (pp == NULL) {
/* There's no global policy. */
@@ -6858,23 +7132,47 @@
/*
* Now that we have a policy entry/widget, construct an ACQUIRE
* message based on that, fix fields where appropriate,
* and return the message.
*/
- retmp = sadb_extended_acquire(&sel, pp, NULL,
- (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
- samsg->sadb_msg_seq, samsg->sadb_msg_pid, sens, ns);
- if (pp != NULL) {
+ ap = pp->ipsp_act;
+ ASSERT(ap != NULL);
+
+ if (ap != NULL)
+ IPACT_REFHOLD(ap);
+
+ retmp = sadb_construct_acqmsg(NULL, &sel, ap, pp, ns, sens, 0,
+ tunnel_mode, B_TRUE, B_TRUE);
+ if (retmp == NULL)
+ goto nomem_bail;
+
+ retmsg = (sadb_msg_t *)retmp->b_rptr;
+ retmsg->sadb_msg_seq = samsg->sadb_msg_seq;
+ retmsg->sadb_msg_pid = samsg->sadb_msg_pid;
+
+ if (pp != NULL)
IPPOL_REFRELE(pp);
- }
+ if (ap != NULL)
+ IPACT_REFRELE(ap);
+
+ return (retmp);
+
+nomem_bail:
+ if (pp != NULL)
+ IPPOL_REFRELE(pp);
+ if (ap != NULL)
+ IPACT_REFRELE(ap);
ASSERT(err == 0 && diagnostic == 0);
- if (retmp == NULL)
err = ENOMEM;
bail:
if (itp != NULL) {
ITP_REFRELE(itp, ns);
}
+ /*
+ * Write error info into original message, as we may not have resources
+ * for a proper reply.
+ */
samsg->sadb_msg_errno = (uint8_t)err;
samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
return (retmp);
}
@@ -7188,11 +7486,11 @@
crypto_key_t *key;
crypto_ctx_template_t *sa_tmpl;
int rv;
ipsec_stack_t *ipss = sa->ipsa_netstack->netstack_ipsec;
- ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
+ ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
ASSERT(MUTEX_HELD(&sa->ipsa_lock));
/* get pointers to the algorithm info, context template, and key */
switch (alg_type) {
case IPSEC_ALG_AUTH: