Print this page
11556 ip_attr.c functions need to not dereference conn_ixa directly after lock drop
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: Mike Gerdts <mgerdts@joyent.com>
Reviewed by: Andy Fiddaman <andy@omniosce.org>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/ip/ip_attr.c
+++ new/usr/src/uts/common/inet/ip/ip_attr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
|
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25 /* Copyright (c) 1990 Mentat Inc. */
26 26
27 +/*
28 + * Copyright 2019 Joyent, Inc.
29 + */
30 +
27 31 #include <sys/types.h>
28 32 #include <sys/stream.h>
29 33 #include <sys/strsun.h>
30 34 #include <sys/zone.h>
31 35 #include <sys/ddi.h>
32 36 #include <sys/sunddi.h>
33 37 #include <sys/cmn_err.h>
34 38 #include <sys/debug.h>
35 39 #include <sys/atomic.h>
36 40
37 41 #include <sys/systm.h>
38 42 #include <sys/param.h>
39 43 #include <sys/kmem.h>
40 44 #include <sys/sdt.h>
41 45 #include <sys/socket.h>
42 46 #include <sys/mac.h>
43 47 #include <net/if.h>
44 48 #include <net/if_arp.h>
45 49 #include <net/route.h>
46 50 #include <sys/sockio.h>
47 51 #include <netinet/in.h>
48 52 #include <net/if_dl.h>
49 53
50 54 #include <inet/common.h>
51 55 #include <inet/mi.h>
52 56 #include <inet/mib2.h>
53 57 #include <inet/nd.h>
54 58 #include <inet/arp.h>
55 59 #include <inet/snmpcom.h>
56 60 #include <inet/kstatcom.h>
57 61
58 62 #include <netinet/igmp_var.h>
59 63 #include <netinet/ip6.h>
60 64 #include <netinet/icmp6.h>
61 65 #include <netinet/sctp.h>
62 66
63 67 #include <inet/ip.h>
64 68 #include <inet/ip_impl.h>
65 69 #include <inet/ip6.h>
66 70 #include <inet/ip6_asp.h>
67 71 #include <inet/tcp.h>
68 72 #include <inet/ip_multi.h>
69 73 #include <inet/ip_if.h>
70 74 #include <inet/ip_ire.h>
71 75 #include <inet/ip_ftable.h>
72 76 #include <inet/ip_rts.h>
73 77 #include <inet/optcom.h>
74 78 #include <inet/ip_ndp.h>
75 79 #include <inet/ip_listutils.h>
76 80 #include <netinet/igmp.h>
77 81 #include <netinet/ip_mroute.h>
78 82 #include <inet/ipp_common.h>
79 83
80 84 #include <net/pfkeyv2.h>
81 85 #include <inet/sadb.h>
82 86 #include <inet/ipsec_impl.h>
83 87 #include <inet/ipdrop.h>
84 88 #include <inet/ip_netinfo.h>
85 89 #include <sys/squeue_impl.h>
86 90 #include <sys/squeue.h>
87 91
88 92 #include <inet/ipclassifier.h>
89 93 #include <inet/sctp_ip.h>
|
↓ open down ↓ |
53 lines elided |
↑ open up ↑ |
90 94 #include <inet/sctp/sctp_impl.h>
91 95 #include <inet/udp_impl.h>
92 96 #include <sys/sunddi.h>
93 97
94 98 #include <sys/tsol/label.h>
95 99 #include <sys/tsol/tnet.h>
96 100
97 101 /*
98 102 * Release a reference on ip_xmit_attr.
99 103 * The reference is acquired by conn_get_ixa()
104 + *
105 + * This macro has a lowercase function-call version for callers outside
106 + * this file.
100 107 */
101 108 #define IXA_REFRELE(ixa) \
102 109 { \
103 110 if (atomic_dec_32_nv(&(ixa)->ixa_refcnt) == 0) \
104 111 ixa_inactive(ixa); \
105 112 }
106 113
107 114 #define IXA_REFHOLD(ixa) \
108 115 { \
109 - ASSERT((ixa)->ixa_refcnt != 0); \
116 + ASSERT3U((ixa)->ixa_refcnt, !=, 0); \
110 117 atomic_inc_32(&(ixa)->ixa_refcnt); \
111 118 }
112 119
113 120 /*
114 121 * When we need to handle a transmit side asynchronous operation, then we need
115 122 * to save sufficient information so that we can call the fragment and postfrag
116 123 * functions. That information is captured in an mblk containing this structure.
117 124 *
118 125 * Since this is currently only used for IPsec, we include information for
119 126 * the kernel crypto framework.
120 127 */
121 128 typedef struct ixamblk_s {
122 129 boolean_t ixm_inbound; /* B_FALSE */
123 130 iaflags_t ixm_flags; /* ixa_flags */
124 131 netstackid_t ixm_stackid; /* Verify it didn't go away */
125 132 uint_t ixm_ifindex; /* Used to find the nce */
126 133 in6_addr_t ixm_nceaddr_v6; /* Used to find nce */
127 134 #define ixm_nceaddr_v4 V4_PART_OF_V6(ixm_nceaddr_v6)
128 135 uint32_t ixm_fragsize;
129 136 uint_t ixm_pktlen;
130 137 uint16_t ixm_ip_hdr_length; /* Points to ULP header */
131 138 uint8_t ixm_protocol; /* Protocol number for ULP cksum */
132 139 pfirepostfrag_t ixm_postfragfn;
133 140
134 141 zoneid_t ixm_zoneid; /* Needed for ipobs */
135 142 zoneid_t ixm_no_loop_zoneid; /* IXAF_NO_LOOP_ZONEID_SET */
136 143
137 144 uint_t ixm_scopeid; /* For IPv6 link-locals */
138 145
139 146 uint32_t ixm_ident; /* For IPv6 fragment header */
140 147 uint32_t ixm_xmit_hint;
141 148
142 149 uint64_t ixm_conn_id; /* Used by DTrace */
143 150 cred_t *ixm_cred; /* For getpeerucred - refhold if set */
|
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
144 151 pid_t ixm_cpid; /* For getpeerucred */
145 152
146 153 ts_label_t *ixm_tsl; /* Refhold if set. */
147 154
148 155 /*
149 156 * When the pointers below are set they have a refhold on the struct.
150 157 */
151 158 ipsec_latch_t *ixm_ipsec_latch;
152 159 struct ipsa_s *ixm_ipsec_ah_sa; /* SA for AH */
153 160 struct ipsa_s *ixm_ipsec_esp_sa; /* SA for ESP */
154 - struct ipsec_policy_s *ixm_ipsec_policy; /* why are we here? */
161 + struct ipsec_policy_s *ixm_ipsec_policy; /* why are we here? */
155 162 struct ipsec_action_s *ixm_ipsec_action; /* For reflected packets */
156 163
157 164 ipsa_ref_t ixm_ipsec_ref[2]; /* Soft reference to SA */
158 165
159 166 /* Need these while waiting for SA */
160 167 uint16_t ixm_ipsec_src_port; /* Source port number of d-gram. */
161 168 uint16_t ixm_ipsec_dst_port; /* Destination port number of d-gram. */
162 169 uint8_t ixm_ipsec_icmp_type; /* ICMP type of d-gram */
163 170 uint8_t ixm_ipsec_icmp_code; /* ICMP code of d-gram */
164 171
165 172 sa_family_t ixm_ipsec_inaf; /* Inner address family */
166 173 uint32_t ixm_ipsec_insrc[IXA_MAX_ADDRLEN]; /* Inner src address */
167 174 uint32_t ixm_ipsec_indst[IXA_MAX_ADDRLEN]; /* Inner dest address */
168 175 uint8_t ixm_ipsec_insrcpfx; /* Inner source prefix */
169 176 uint8_t ixm_ipsec_indstpfx; /* Inner destination prefix */
170 177
171 178 uint8_t ixm_ipsec_proto; /* IP protocol number for d-gram. */
172 179 } ixamblk_t;
173 180
174 181
175 182 /*
176 183 * When we need to handle a receive side asynchronous operation, then we need
177 184 * to save sufficient information so that we can call ip_fanout.
178 185 * That information is captured in an mblk containing this structure.
179 186 *
180 187 * Since this is currently only used for IPsec, we include information for
181 188 * the kernel crypto framework.
182 189 */
183 190 typedef struct iramblk_s {
184 191 boolean_t irm_inbound; /* B_TRUE */
185 192 iaflags_t irm_flags; /* ira_flags */
186 193 netstackid_t irm_stackid; /* Verify it didn't go away */
187 194 uint_t irm_ifindex; /* To find ira_ill */
188 195
189 196 uint_t irm_rifindex; /* ira_rifindex */
190 197 uint_t irm_ruifindex; /* ira_ruifindex */
191 198 uint_t irm_pktlen;
192 199 uint16_t irm_ip_hdr_length; /* Points to ULP header */
193 200 uint8_t irm_protocol; /* Protocol number for ULP cksum */
194 201 zoneid_t irm_zoneid; /* ALL_ZONES unless local delivery */
195 202
196 203 squeue_t *irm_sqp;
197 204 ill_rx_ring_t *irm_ring;
198 205
199 206 ipaddr_t irm_mroute_tunnel; /* IRAF_MROUTE_TUNNEL_SET */
200 207 zoneid_t irm_no_loop_zoneid; /* IRAF_NO_LOOP_ZONEID_SET */
201 208 uint32_t irm_esp_udp_ports; /* IRAF_ESP_UDP_PORTS */
202 209
203 210 char irm_l2src[IRA_L2SRC_SIZE]; /* If IRAF_L2SRC_SET */
204 211
205 212 cred_t *irm_cred; /* For getpeerucred - refhold if set */
206 213 pid_t irm_cpid; /* For getpeerucred */
207 214
208 215 ts_label_t *irm_tsl; /* Refhold if set. */
209 216
210 217 /*
211 218 * When set these correspond to a refhold on the object.
212 219 */
213 220 struct ipsa_s *irm_ipsec_ah_sa; /* SA for AH */
214 221 struct ipsa_s *irm_ipsec_esp_sa; /* SA for ESP */
215 222 struct ipsec_action_s *irm_ipsec_action; /* For reflected packets */
216 223 } iramblk_t;
217 224
218 225
219 226 /*
220 227 * Take the information in ip_xmit_attr_t and stick it in an mblk
221 228 * that can later be passed to ip_xmit_attr_from_mblk to recreate the
222 229 * ip_xmit_attr_t.
223 230 *
224 231 * Returns NULL on memory allocation failure.
225 232 */
226 233 mblk_t *
227 234 ip_xmit_attr_to_mblk(ip_xmit_attr_t *ixa)
228 235 {
229 236 mblk_t *ixamp;
230 237 ixamblk_t *ixm;
231 238 nce_t *nce = ixa->ixa_nce;
232 239
233 240 ASSERT(nce != NULL);
234 241 ixamp = allocb(sizeof (*ixm), BPRI_MED);
235 242 if (ixamp == NULL)
236 243 return (NULL);
237 244
238 245 ixamp->b_datap->db_type = M_BREAK;
239 246 ixamp->b_wptr += sizeof (*ixm);
240 247 ixm = (ixamblk_t *)ixamp->b_rptr;
241 248
242 249 bzero(ixm, sizeof (*ixm));
243 250 ixm->ixm_inbound = B_FALSE;
244 251 ixm->ixm_flags = ixa->ixa_flags;
245 252 ixm->ixm_stackid = ixa->ixa_ipst->ips_netstack->netstack_stackid;
246 253 ixm->ixm_ifindex = nce->nce_ill->ill_phyint->phyint_ifindex;
247 254 ixm->ixm_nceaddr_v6 = nce->nce_addr;
248 255 ixm->ixm_fragsize = ixa->ixa_fragsize;
249 256 ixm->ixm_pktlen = ixa->ixa_pktlen;
250 257 ixm->ixm_ip_hdr_length = ixa->ixa_ip_hdr_length;
251 258 ixm->ixm_protocol = ixa->ixa_protocol;
252 259 ixm->ixm_postfragfn = ixa->ixa_postfragfn;
253 260 ixm->ixm_zoneid = ixa->ixa_zoneid;
254 261 ixm->ixm_no_loop_zoneid = ixa->ixa_no_loop_zoneid;
255 262 ixm->ixm_scopeid = ixa->ixa_scopeid;
256 263 ixm->ixm_ident = ixa->ixa_ident;
257 264 ixm->ixm_xmit_hint = ixa->ixa_xmit_hint;
258 265
259 266 if (ixa->ixa_tsl != NULL) {
260 267 ixm->ixm_tsl = ixa->ixa_tsl;
261 268 label_hold(ixm->ixm_tsl);
262 269 }
263 270 if (ixa->ixa_cred != NULL) {
264 271 ixm->ixm_cred = ixa->ixa_cred;
265 272 crhold(ixa->ixa_cred);
266 273 }
267 274 ixm->ixm_cpid = ixa->ixa_cpid;
268 275 ixm->ixm_conn_id = ixa->ixa_conn_id;
269 276
270 277 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) {
271 278 if (ixa->ixa_ipsec_ah_sa != NULL) {
272 279 ixm->ixm_ipsec_ah_sa = ixa->ixa_ipsec_ah_sa;
273 280 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa);
274 281 }
275 282 if (ixa->ixa_ipsec_esp_sa != NULL) {
276 283 ixm->ixm_ipsec_esp_sa = ixa->ixa_ipsec_esp_sa;
277 284 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa);
278 285 }
279 286 if (ixa->ixa_ipsec_policy != NULL) {
280 287 ixm->ixm_ipsec_policy = ixa->ixa_ipsec_policy;
281 288 IPPOL_REFHOLD(ixa->ixa_ipsec_policy);
282 289 }
283 290 if (ixa->ixa_ipsec_action != NULL) {
284 291 ixm->ixm_ipsec_action = ixa->ixa_ipsec_action;
285 292 IPACT_REFHOLD(ixa->ixa_ipsec_action);
286 293 }
287 294 if (ixa->ixa_ipsec_latch != NULL) {
288 295 ixm->ixm_ipsec_latch = ixa->ixa_ipsec_latch;
289 296 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch);
290 297 }
291 298 ixm->ixm_ipsec_ref[0] = ixa->ixa_ipsec_ref[0];
292 299 ixm->ixm_ipsec_ref[1] = ixa->ixa_ipsec_ref[1];
293 300 ixm->ixm_ipsec_src_port = ixa->ixa_ipsec_src_port;
294 301 ixm->ixm_ipsec_dst_port = ixa->ixa_ipsec_dst_port;
295 302 ixm->ixm_ipsec_icmp_type = ixa->ixa_ipsec_icmp_type;
296 303 ixm->ixm_ipsec_icmp_code = ixa->ixa_ipsec_icmp_code;
297 304 ixm->ixm_ipsec_inaf = ixa->ixa_ipsec_inaf;
298 305 ixm->ixm_ipsec_insrc[0] = ixa->ixa_ipsec_insrc[0];
299 306 ixm->ixm_ipsec_insrc[1] = ixa->ixa_ipsec_insrc[1];
300 307 ixm->ixm_ipsec_insrc[2] = ixa->ixa_ipsec_insrc[2];
301 308 ixm->ixm_ipsec_insrc[3] = ixa->ixa_ipsec_insrc[3];
302 309 ixm->ixm_ipsec_indst[0] = ixa->ixa_ipsec_indst[0];
303 310 ixm->ixm_ipsec_indst[1] = ixa->ixa_ipsec_indst[1];
304 311 ixm->ixm_ipsec_indst[2] = ixa->ixa_ipsec_indst[2];
305 312 ixm->ixm_ipsec_indst[3] = ixa->ixa_ipsec_indst[3];
306 313 ixm->ixm_ipsec_insrcpfx = ixa->ixa_ipsec_insrcpfx;
307 314 ixm->ixm_ipsec_indstpfx = ixa->ixa_ipsec_indstpfx;
308 315 ixm->ixm_ipsec_proto = ixa->ixa_ipsec_proto;
309 316 }
310 317 return (ixamp);
311 318 }
312 319
313 320 /*
314 321 * Extract the ip_xmit_attr_t from the mblk, checking that the
315 322 * ip_stack_t, ill_t, and nce_t still exist. Returns B_FALSE if that is
316 323 * not the case.
317 324 *
318 325 * Otherwise ixa is updated.
319 326 * Caller needs to release references on the ixa by calling ixa_refrele()
320 327 * which will imediately call ixa_inactive to release the references.
321 328 */
322 329 boolean_t
323 330 ip_xmit_attr_from_mblk(mblk_t *ixamp, ip_xmit_attr_t *ixa)
324 331 {
325 332 ixamblk_t *ixm;
326 333 netstack_t *ns;
327 334 ip_stack_t *ipst;
328 335 ill_t *ill;
329 336 nce_t *nce;
330 337
331 338 /* We assume the caller hasn't initialized ixa */
332 339 bzero(ixa, sizeof (*ixa));
333 340
334 341 ASSERT(DB_TYPE(ixamp) == M_BREAK);
335 342 ASSERT(ixamp->b_cont == NULL);
336 343
337 344 ixm = (ixamblk_t *)ixamp->b_rptr;
338 345 ASSERT(!ixm->ixm_inbound);
339 346
340 347 /* Verify the netstack is still around */
341 348 ns = netstack_find_by_stackid(ixm->ixm_stackid);
342 349 if (ns == NULL) {
343 350 /* Disappeared on us */
344 351 (void) ip_xmit_attr_free_mblk(ixamp);
345 352 return (B_FALSE);
346 353 }
347 354 ipst = ns->netstack_ip;
348 355
349 356 /* Verify the ill is still around */
350 357 ill = ill_lookup_on_ifindex(ixm->ixm_ifindex,
351 358 !(ixm->ixm_flags & IXAF_IS_IPV4), ipst);
352 359
353 360 /* We have the ill, hence the netstack can't go away */
354 361 netstack_rele(ns);
355 362 if (ill == NULL) {
356 363 /* Disappeared on us */
357 364 (void) ip_xmit_attr_free_mblk(ixamp);
358 365 return (B_FALSE);
359 366 }
360 367 /*
361 368 * Find the nce. We don't load-spread (only lookup nce's on the ill)
362 369 * because we want to find the same nce as the one we had when
363 370 * ip_xmit_attr_to_mblk was called.
364 371 */
365 372 if (ixm->ixm_flags & IXAF_IS_IPV4) {
366 373 nce = nce_lookup_v4(ill, &ixm->ixm_nceaddr_v4);
367 374 } else {
368 375 nce = nce_lookup_v6(ill, &ixm->ixm_nceaddr_v6);
369 376 }
370 377
371 378 /* We have the nce, hence the ill can't go away */
372 379 ill_refrele(ill);
373 380 if (nce == NULL) {
374 381 /*
375 382 * Since this is unusual and we don't know what type of
376 383 * nce it was, we drop the packet.
377 384 */
378 385 (void) ip_xmit_attr_free_mblk(ixamp);
379 386 return (B_FALSE);
380 387 }
381 388
382 389 ixa->ixa_flags = ixm->ixm_flags;
383 390 ixa->ixa_refcnt = 1;
384 391 ixa->ixa_ipst = ipst;
385 392 ixa->ixa_fragsize = ixm->ixm_fragsize;
386 393 ixa->ixa_pktlen = ixm->ixm_pktlen;
387 394 ixa->ixa_ip_hdr_length = ixm->ixm_ip_hdr_length;
388 395 ixa->ixa_protocol = ixm->ixm_protocol;
389 396 ixa->ixa_nce = nce;
390 397 ixa->ixa_postfragfn = ixm->ixm_postfragfn;
391 398 ixa->ixa_zoneid = ixm->ixm_zoneid;
392 399 ixa->ixa_no_loop_zoneid = ixm->ixm_no_loop_zoneid;
393 400 ixa->ixa_scopeid = ixm->ixm_scopeid;
394 401 ixa->ixa_ident = ixm->ixm_ident;
395 402 ixa->ixa_xmit_hint = ixm->ixm_xmit_hint;
396 403
397 404 if (ixm->ixm_tsl != NULL) {
398 405 ixa->ixa_tsl = ixm->ixm_tsl;
399 406 ixa->ixa_free_flags |= IXA_FREE_TSL;
400 407 ixm->ixm_tsl = NULL;
401 408 }
402 409 if (ixm->ixm_cred != NULL) {
403 410 ixa->ixa_cred = ixm->ixm_cred;
404 411 ixa->ixa_free_flags |= IXA_FREE_CRED;
405 412 ixm->ixm_cred = NULL;
406 413 }
407 414 ixa->ixa_cpid = ixm->ixm_cpid;
408 415 ixa->ixa_conn_id = ixm->ixm_conn_id;
409 416
410 417 ixa->ixa_ipsec_ah_sa = ixm->ixm_ipsec_ah_sa;
411 418 ixa->ixa_ipsec_esp_sa = ixm->ixm_ipsec_esp_sa;
412 419 ixa->ixa_ipsec_policy = ixm->ixm_ipsec_policy;
413 420 ixa->ixa_ipsec_action = ixm->ixm_ipsec_action;
414 421 ixa->ixa_ipsec_latch = ixm->ixm_ipsec_latch;
415 422
416 423 ixa->ixa_ipsec_ref[0] = ixm->ixm_ipsec_ref[0];
417 424 ixa->ixa_ipsec_ref[1] = ixm->ixm_ipsec_ref[1];
418 425 ixa->ixa_ipsec_src_port = ixm->ixm_ipsec_src_port;
419 426 ixa->ixa_ipsec_dst_port = ixm->ixm_ipsec_dst_port;
420 427 ixa->ixa_ipsec_icmp_type = ixm->ixm_ipsec_icmp_type;
421 428 ixa->ixa_ipsec_icmp_code = ixm->ixm_ipsec_icmp_code;
422 429 ixa->ixa_ipsec_inaf = ixm->ixm_ipsec_inaf;
423 430 ixa->ixa_ipsec_insrc[0] = ixm->ixm_ipsec_insrc[0];
424 431 ixa->ixa_ipsec_insrc[1] = ixm->ixm_ipsec_insrc[1];
425 432 ixa->ixa_ipsec_insrc[2] = ixm->ixm_ipsec_insrc[2];
426 433 ixa->ixa_ipsec_insrc[3] = ixm->ixm_ipsec_insrc[3];
427 434 ixa->ixa_ipsec_indst[0] = ixm->ixm_ipsec_indst[0];
428 435 ixa->ixa_ipsec_indst[1] = ixm->ixm_ipsec_indst[1];
429 436 ixa->ixa_ipsec_indst[2] = ixm->ixm_ipsec_indst[2];
430 437 ixa->ixa_ipsec_indst[3] = ixm->ixm_ipsec_indst[3];
431 438 ixa->ixa_ipsec_insrcpfx = ixm->ixm_ipsec_insrcpfx;
432 439 ixa->ixa_ipsec_indstpfx = ixm->ixm_ipsec_indstpfx;
433 440 ixa->ixa_ipsec_proto = ixm->ixm_ipsec_proto;
434 441
435 442 freeb(ixamp);
436 443 return (B_TRUE);
437 444 }
438 445
439 446 /*
440 447 * Free the ixm mblk and any references it holds
441 448 * Returns b_cont.
442 449 */
443 450 mblk_t *
444 451 ip_xmit_attr_free_mblk(mblk_t *ixamp)
445 452 {
446 453 ixamblk_t *ixm;
447 454 mblk_t *mp;
448 455
449 456 /* Consume mp */
450 457 ASSERT(DB_TYPE(ixamp) == M_BREAK);
451 458 mp = ixamp->b_cont;
452 459
453 460 ixm = (ixamblk_t *)ixamp->b_rptr;
454 461 ASSERT(!ixm->ixm_inbound);
455 462
456 463 if (ixm->ixm_ipsec_ah_sa != NULL) {
457 464 IPSA_REFRELE(ixm->ixm_ipsec_ah_sa);
458 465 ixm->ixm_ipsec_ah_sa = NULL;
459 466 }
460 467 if (ixm->ixm_ipsec_esp_sa != NULL) {
461 468 IPSA_REFRELE(ixm->ixm_ipsec_esp_sa);
462 469 ixm->ixm_ipsec_esp_sa = NULL;
463 470 }
464 471 if (ixm->ixm_ipsec_policy != NULL) {
465 472 IPPOL_REFRELE(ixm->ixm_ipsec_policy);
466 473 ixm->ixm_ipsec_policy = NULL;
467 474 }
468 475 if (ixm->ixm_ipsec_action != NULL) {
469 476 IPACT_REFRELE(ixm->ixm_ipsec_action);
470 477 ixm->ixm_ipsec_action = NULL;
471 478 }
472 479 if (ixm->ixm_ipsec_latch) {
473 480 IPLATCH_REFRELE(ixm->ixm_ipsec_latch);
474 481 ixm->ixm_ipsec_latch = NULL;
475 482 }
476 483
477 484 if (ixm->ixm_tsl != NULL) {
478 485 label_rele(ixm->ixm_tsl);
479 486 ixm->ixm_tsl = NULL;
480 487 }
481 488 if (ixm->ixm_cred != NULL) {
482 489 crfree(ixm->ixm_cred);
483 490 ixm->ixm_cred = NULL;
484 491 }
485 492 freeb(ixamp);
486 493 return (mp);
487 494 }
488 495
489 496 /*
490 497 * Take the information in ip_recv_attr_t and stick it in an mblk
491 498 * that can later be passed to ip_recv_attr_from_mblk to recreate the
492 499 * ip_recv_attr_t.
493 500 *
494 501 * Returns NULL on memory allocation failure.
495 502 */
496 503 mblk_t *
497 504 ip_recv_attr_to_mblk(ip_recv_attr_t *ira)
498 505 {
499 506 mblk_t *iramp;
500 507 iramblk_t *irm;
501 508 ill_t *ill = ira->ira_ill;
502 509
503 510 ASSERT(ira->ira_ill != NULL || ira->ira_ruifindex != 0);
504 511
505 512 iramp = allocb(sizeof (*irm), BPRI_MED);
506 513 if (iramp == NULL)
507 514 return (NULL);
508 515
509 516 iramp->b_datap->db_type = M_BREAK;
510 517 iramp->b_wptr += sizeof (*irm);
511 518 irm = (iramblk_t *)iramp->b_rptr;
512 519
513 520 bzero(irm, sizeof (*irm));
514 521 irm->irm_inbound = B_TRUE;
515 522 irm->irm_flags = ira->ira_flags;
516 523 if (ill != NULL) {
517 524 /* Internal to IP - preserve ip_stack_t, ill and rill */
518 525 irm->irm_stackid =
519 526 ill->ill_ipst->ips_netstack->netstack_stackid;
520 527 irm->irm_ifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
521 528 ASSERT(ira->ira_rill->ill_phyint->phyint_ifindex ==
522 529 ira->ira_rifindex);
523 530 } else {
524 531 /* Let ip_recv_attr_from_stackid know there isn't one */
525 532 irm->irm_stackid = -1;
526 533 }
527 534 irm->irm_rifindex = ira->ira_rifindex;
528 535 irm->irm_ruifindex = ira->ira_ruifindex;
529 536 irm->irm_pktlen = ira->ira_pktlen;
530 537 irm->irm_ip_hdr_length = ira->ira_ip_hdr_length;
531 538 irm->irm_protocol = ira->ira_protocol;
532 539
533 540 irm->irm_sqp = ira->ira_sqp;
534 541 irm->irm_ring = ira->ira_ring;
535 542
536 543 irm->irm_zoneid = ira->ira_zoneid;
537 544 irm->irm_mroute_tunnel = ira->ira_mroute_tunnel;
538 545 irm->irm_no_loop_zoneid = ira->ira_no_loop_zoneid;
539 546 irm->irm_esp_udp_ports = ira->ira_esp_udp_ports;
540 547
541 548 if (ira->ira_tsl != NULL) {
542 549 irm->irm_tsl = ira->ira_tsl;
543 550 label_hold(irm->irm_tsl);
544 551 }
545 552 if (ira->ira_cred != NULL) {
546 553 irm->irm_cred = ira->ira_cred;
547 554 crhold(ira->ira_cred);
548 555 }
549 556 irm->irm_cpid = ira->ira_cpid;
550 557
551 558 if (ira->ira_flags & IRAF_L2SRC_SET)
552 559 bcopy(ira->ira_l2src, irm->irm_l2src, IRA_L2SRC_SIZE);
553 560
554 561 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
555 562 if (ira->ira_ipsec_ah_sa != NULL) {
556 563 irm->irm_ipsec_ah_sa = ira->ira_ipsec_ah_sa;
557 564 IPSA_REFHOLD(ira->ira_ipsec_ah_sa);
558 565 }
559 566 if (ira->ira_ipsec_esp_sa != NULL) {
560 567 irm->irm_ipsec_esp_sa = ira->ira_ipsec_esp_sa;
561 568 IPSA_REFHOLD(ira->ira_ipsec_esp_sa);
562 569 }
563 570 if (ira->ira_ipsec_action != NULL) {
564 571 irm->irm_ipsec_action = ira->ira_ipsec_action;
565 572 IPACT_REFHOLD(ira->ira_ipsec_action);
566 573 }
567 574 }
568 575 return (iramp);
569 576 }
570 577
571 578 /*
572 579 * Extract the ip_recv_attr_t from the mblk. If we are used inside IP
573 580 * then irm_stackid is not -1, in which case we check that the
574 581 * ip_stack_t and ill_t still exist. Returns B_FALSE if that is
575 582 * not the case.
576 583 * If irm_stackid is zero then we are used by an ULP (e.g., squeue_enter)
577 584 * and we just proceed with ira_ill and ira_rill as NULL.
578 585 *
579 586 * The caller needs to release any references on the pointers inside the ire
580 587 * by calling ira_cleanup.
581 588 */
582 589 boolean_t
583 590 ip_recv_attr_from_mblk(mblk_t *iramp, ip_recv_attr_t *ira)
584 591 {
585 592 iramblk_t *irm;
586 593 netstack_t *ns;
587 594 ip_stack_t *ipst = NULL;
588 595 ill_t *ill = NULL, *rill = NULL;
589 596
590 597 /* We assume the caller hasn't initialized ira */
591 598 bzero(ira, sizeof (*ira));
592 599
593 600 ASSERT(DB_TYPE(iramp) == M_BREAK);
594 601 ASSERT(iramp->b_cont == NULL);
595 602
596 603 irm = (iramblk_t *)iramp->b_rptr;
597 604 ASSERT(irm->irm_inbound);
598 605
599 606 if (irm->irm_stackid != -1) {
600 607 /* Verify the netstack is still around */
601 608 ns = netstack_find_by_stackid(irm->irm_stackid);
602 609 if (ns == NULL) {
603 610 /* Disappeared on us */
604 611 (void) ip_recv_attr_free_mblk(iramp);
605 612 return (B_FALSE);
606 613 }
607 614 ipst = ns->netstack_ip;
608 615
609 616 /* Verify the ill is still around */
610 617 ill = ill_lookup_on_ifindex(irm->irm_ifindex,
611 618 !(irm->irm_flags & IRAF_IS_IPV4), ipst);
612 619
613 620 if (irm->irm_ifindex == irm->irm_rifindex) {
614 621 rill = ill;
615 622 } else {
616 623 rill = ill_lookup_on_ifindex(irm->irm_rifindex,
617 624 !(irm->irm_flags & IRAF_IS_IPV4), ipst);
618 625 }
619 626
620 627 /* We have the ill, hence the netstack can't go away */
621 628 netstack_rele(ns);
622 629 if (ill == NULL || rill == NULL) {
623 630 /* Disappeared on us */
624 631 if (ill != NULL)
625 632 ill_refrele(ill);
626 633 if (rill != NULL && rill != ill)
627 634 ill_refrele(rill);
628 635 (void) ip_recv_attr_free_mblk(iramp);
629 636 return (B_FALSE);
630 637 }
631 638 }
632 639
633 640 ira->ira_flags = irm->irm_flags;
634 641 /* Caller must ill_refele(ira_ill) by using ira_cleanup() */
635 642 ira->ira_ill = ill;
636 643 ira->ira_rill = rill;
637 644
638 645 ira->ira_rifindex = irm->irm_rifindex;
639 646 ira->ira_ruifindex = irm->irm_ruifindex;
640 647 ira->ira_pktlen = irm->irm_pktlen;
641 648 ira->ira_ip_hdr_length = irm->irm_ip_hdr_length;
642 649 ira->ira_protocol = irm->irm_protocol;
643 650
644 651 ira->ira_sqp = irm->irm_sqp;
645 652 /* The rest of IP assumes that the rings never go away. */
646 653 ira->ira_ring = irm->irm_ring;
647 654
648 655 ira->ira_zoneid = irm->irm_zoneid;
649 656 ira->ira_mroute_tunnel = irm->irm_mroute_tunnel;
650 657 ira->ira_no_loop_zoneid = irm->irm_no_loop_zoneid;
651 658 ira->ira_esp_udp_ports = irm->irm_esp_udp_ports;
652 659
653 660 if (irm->irm_tsl != NULL) {
654 661 ira->ira_tsl = irm->irm_tsl;
655 662 ira->ira_free_flags |= IRA_FREE_TSL;
656 663 irm->irm_tsl = NULL;
657 664 }
658 665 if (irm->irm_cred != NULL) {
659 666 ira->ira_cred = irm->irm_cred;
660 667 ira->ira_free_flags |= IRA_FREE_CRED;
661 668 irm->irm_cred = NULL;
662 669 }
663 670 ira->ira_cpid = irm->irm_cpid;
664 671
665 672 if (ira->ira_flags & IRAF_L2SRC_SET)
666 673 bcopy(irm->irm_l2src, ira->ira_l2src, IRA_L2SRC_SIZE);
667 674
668 675 ira->ira_ipsec_ah_sa = irm->irm_ipsec_ah_sa;
669 676 ira->ira_ipsec_esp_sa = irm->irm_ipsec_esp_sa;
670 677 ira->ira_ipsec_action = irm->irm_ipsec_action;
671 678
672 679 freeb(iramp);
673 680 return (B_TRUE);
674 681 }
675 682
676 683 /*
677 684 * Free the irm mblk and any references it holds
678 685 * Returns b_cont.
679 686 */
680 687 mblk_t *
681 688 ip_recv_attr_free_mblk(mblk_t *iramp)
682 689 {
683 690 iramblk_t *irm;
684 691 mblk_t *mp;
685 692
686 693 /* Consume mp */
687 694 ASSERT(DB_TYPE(iramp) == M_BREAK);
688 695 mp = iramp->b_cont;
689 696
690 697 irm = (iramblk_t *)iramp->b_rptr;
691 698 ASSERT(irm->irm_inbound);
692 699
693 700 if (irm->irm_ipsec_ah_sa != NULL) {
694 701 IPSA_REFRELE(irm->irm_ipsec_ah_sa);
695 702 irm->irm_ipsec_ah_sa = NULL;
696 703 }
697 704 if (irm->irm_ipsec_esp_sa != NULL) {
698 705 IPSA_REFRELE(irm->irm_ipsec_esp_sa);
699 706 irm->irm_ipsec_esp_sa = NULL;
700 707 }
701 708 if (irm->irm_ipsec_action != NULL) {
702 709 IPACT_REFRELE(irm->irm_ipsec_action);
703 710 irm->irm_ipsec_action = NULL;
704 711 }
705 712 if (irm->irm_tsl != NULL) {
706 713 label_rele(irm->irm_tsl);
707 714 irm->irm_tsl = NULL;
708 715 }
709 716 if (irm->irm_cred != NULL) {
710 717 crfree(irm->irm_cred);
711 718 irm->irm_cred = NULL;
712 719 }
713 720
714 721 freeb(iramp);
715 722 return (mp);
716 723 }
717 724
718 725 /*
719 726 * Returns true if the mblk contains an ip_recv_attr_t
720 727 * For now we just check db_type.
721 728 */
722 729 boolean_t
723 730 ip_recv_attr_is_mblk(mblk_t *mp)
724 731 {
725 732 /*
726 733 * Need to handle the various forms of tcp_timermp which are tagged
727 734 * with b_wptr and might have a NULL b_datap.
728 735 */
729 736 if (mp->b_wptr == NULL || mp->b_wptr == (uchar_t *)-1)
730 737 return (B_FALSE);
731 738
732 739 #ifdef DEBUG
733 740 iramblk_t *irm;
734 741
735 742 if (DB_TYPE(mp) != M_BREAK)
736 743 return (B_FALSE);
737 744
738 745 irm = (iramblk_t *)mp->b_rptr;
|
↓ open down ↓ |
574 lines elided |
↑ open up ↑ |
739 746 ASSERT(irm->irm_inbound);
740 747 return (B_TRUE);
741 748 #else
742 749 return (DB_TYPE(mp) == M_BREAK);
743 750 #endif
744 751 }
745 752
746 753 static ip_xmit_attr_t *
747 754 conn_get_ixa_impl(conn_t *connp, boolean_t replace, int kmflag)
748 755 {
749 - ip_xmit_attr_t *ixa;
750 - ip_xmit_attr_t *oldixa;
756 + ip_xmit_attr_t *oldixa; /* Already attached to conn_t */
757 + ip_xmit_attr_t *ixa; /* New one, which we return. */
751 758
759 + /*
760 + * NOTE: If the marked-below common case isn't, move the
761 + * kmem_alloc() up here and put a free in what was marked as the
762 + * (not really) common case instead.
763 + */
764 +
752 765 mutex_enter(&connp->conn_lock);
753 - ixa = connp->conn_ixa;
766 + oldixa = connp->conn_ixa;
754 767
755 - /* At least one references for the conn_t */
756 - ASSERT(ixa->ixa_refcnt >= 1);
757 - if (atomic_inc_32_nv(&ixa->ixa_refcnt) == 2) {
758 - /* No other thread using conn_ixa */
768 + /* At least one reference for the conn_t */
769 + ASSERT3U(oldixa->ixa_refcnt, >=, 1);
770 + if (atomic_inc_32_nv(&oldixa->ixa_refcnt) == 2) {
771 + /* No other thread using conn_ixa (common case) */
759 772 mutex_exit(&connp->conn_lock);
760 - return (ixa);
773 + return (oldixa);
761 774 }
775 + /* Do allocation inside-the-conn_lock because it's less common. */
762 776 ixa = kmem_alloc(sizeof (*ixa), kmflag);
763 777 if (ixa == NULL) {
764 778 mutex_exit(&connp->conn_lock);
765 - ixa_refrele(connp->conn_ixa);
779 + IXA_REFRELE(oldixa);
766 780 return (NULL);
767 781 }
768 - ixa_safe_copy(connp->conn_ixa, ixa);
782 + ixa_safe_copy(oldixa, ixa);
769 783
770 784 /* Make sure we drop conn_lock before any refrele */
771 785 if (replace) {
772 786 ixa->ixa_refcnt++; /* No atomic needed - not visible */
773 - oldixa = connp->conn_ixa;
774 787 connp->conn_ixa = ixa;
775 788 mutex_exit(&connp->conn_lock);
776 789 IXA_REFRELE(oldixa); /* Undo refcnt from conn_t */
777 790 } else {
778 - oldixa = connp->conn_ixa;
779 791 mutex_exit(&connp->conn_lock);
780 792 }
781 793 IXA_REFRELE(oldixa); /* Undo above atomic_add_32_nv */
782 794
783 795 return (ixa);
784 796 }
785 797
786 798 /*
787 799 * Return an ip_xmit_attr_t to use with a conn_t that ensures that only
788 800 * the caller can access the ip_xmit_attr_t.
789 801 *
790 802 * If nobody else is using conn_ixa we return it.
791 803 * Otherwise we make a "safe" copy of conn_ixa
792 804 * and return it. The "safe" copy has the pointers set to NULL
793 805 * (since the pointers might be changed by another thread using
794 806 * conn_ixa). The caller needs to check for NULL pointers to see
795 807 * if ip_set_destination needs to be called to re-establish the pointers.
796 808 *
797 809 * If 'replace' is set then we replace conn_ixa with the new ip_xmit_attr_t.
798 810 * That is used when we connect() the ULP.
799 811 */
800 812 ip_xmit_attr_t *
801 813 conn_get_ixa(conn_t *connp, boolean_t replace)
802 814 {
803 815 return (conn_get_ixa_impl(connp, replace, KM_NOSLEEP));
804 816 }
805 817
806 818 /*
807 819 * Used only when the option is to have the kernel hang due to not
808 820 * cleaning up ixa references on ills etc.
809 821 */
810 822 ip_xmit_attr_t *
811 823 conn_get_ixa_tryhard(conn_t *connp, boolean_t replace)
812 824 {
813 825 return (conn_get_ixa_impl(connp, replace, KM_SLEEP));
814 826 }
815 827
816 828 /*
817 829 * Replace conn_ixa with the ixa argument.
818 830 *
819 831 * The caller must hold conn_lock.
820 832 *
821 833 * We return the old ixa; the caller must ixa_refrele that after conn_lock
822 834 * has been dropped.
823 835 */
824 836 ip_xmit_attr_t *
825 837 conn_replace_ixa(conn_t *connp, ip_xmit_attr_t *ixa)
826 838 {
827 839 ip_xmit_attr_t *oldixa;
828 840
829 841 ASSERT(MUTEX_HELD(&connp->conn_lock));
830 842
831 843 oldixa = connp->conn_ixa;
832 844 IXA_REFHOLD(ixa);
833 845 ixa->ixa_conn_id = oldixa->ixa_conn_id;
834 846 connp->conn_ixa = ixa;
835 847 return (oldixa);
836 848 }
837 849
838 850 /*
839 851 * Return a ip_xmit_attr_t to use with a conn_t that is based on but
|
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
840 852 * separate from conn_ixa.
841 853 *
842 854 * This "safe" copy has the pointers set to NULL
843 855 * (since the pointers might be changed by another thread using
844 856 * conn_ixa). The caller needs to check for NULL pointers to see
845 857 * if ip_set_destination needs to be called to re-establish the pointers.
846 858 */
847 859 ip_xmit_attr_t *
848 860 conn_get_ixa_exclusive(conn_t *connp)
849 861 {
862 + ip_xmit_attr_t *oldixa;
850 863 ip_xmit_attr_t *ixa;
851 864
865 + ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP | KM_NORMALPRI);
866 + if (ixa == NULL)
867 + return (NULL);
868 +
852 869 mutex_enter(&connp->conn_lock);
853 - ixa = connp->conn_ixa;
854 870
855 - /* At least one references for the conn_t */
856 - ASSERT(ixa->ixa_refcnt >= 1);
871 + oldixa = connp->conn_ixa;
872 + IXA_REFHOLD(oldixa);
857 873
858 - /* Make sure conn_ixa doesn't disappear while we copy it */
859 - atomic_inc_32(&ixa->ixa_refcnt);
860 -
861 - ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP);
862 - if (ixa == NULL) {
863 - mutex_exit(&connp->conn_lock);
864 - ixa_refrele(connp->conn_ixa);
865 - return (NULL);
866 - }
867 - ixa_safe_copy(connp->conn_ixa, ixa);
874 + ixa_safe_copy(oldixa, ixa);
868 875 mutex_exit(&connp->conn_lock);
869 - IXA_REFRELE(connp->conn_ixa);
876 + IXA_REFRELE(oldixa);
870 877 return (ixa);
871 878 }
872 879
873 880 void
874 881 ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa)
875 882 {
876 883 bcopy(src, ixa, sizeof (*ixa));
877 884 ixa->ixa_refcnt = 1;
878 885 /*
879 886 * Clear any pointers that have references and might be changed
880 887 * by ip_set_destination or the ULP
881 888 */
882 889 ixa->ixa_ire = NULL;
883 890 ixa->ixa_nce = NULL;
884 891 ixa->ixa_dce = NULL;
885 892 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
886 893 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
887 894 #ifdef DEBUG
888 895 ixa->ixa_curthread = NULL;
889 896 #endif
890 897 /* Clear all the IPsec pointers and the flag as well. */
891 898 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE;
892 899
893 900 ixa->ixa_ipsec_latch = NULL;
894 901 ixa->ixa_ipsec_ah_sa = NULL;
895 902 ixa->ixa_ipsec_esp_sa = NULL;
896 903 ixa->ixa_ipsec_policy = NULL;
897 904 ixa->ixa_ipsec_action = NULL;
898 905
899 906 /*
900 907 * We leave ixa_tsl unchanged, but if it has a refhold we need
901 908 * to get an extra refhold.
902 909 */
903 910 if (ixa->ixa_free_flags & IXA_FREE_TSL)
904 911 label_hold(ixa->ixa_tsl);
905 912
906 913 /*
907 914 * We leave ixa_cred unchanged, but if it has a refhold we need
908 915 * to get an extra refhold.
909 916 */
910 917 if (ixa->ixa_free_flags & IXA_FREE_CRED)
911 918 crhold(ixa->ixa_cred);
912 919
913 920 /*
914 921 * There is no cleanup in progress on this new copy.
915 922 */
916 923 ixa->ixa_tcpcleanup = IXATC_IDLE;
917 924 }
918 925
919 926 /*
920 927 * Duplicate an ip_xmit_attr_t.
921 928 * Assumes that the caller controls the ixa, hence we do not need to use
922 929 * a safe copy. We just have to increase the refcnt on any pointers.
923 930 */
924 931 ip_xmit_attr_t *
925 932 ip_xmit_attr_duplicate(ip_xmit_attr_t *src_ixa)
926 933 {
927 934 ip_xmit_attr_t *ixa;
928 935
929 936 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP);
930 937 if (ixa == NULL)
931 938 return (NULL);
932 939 bcopy(src_ixa, ixa, sizeof (*ixa));
933 940 ixa->ixa_refcnt = 1;
934 941
935 942 if (ixa->ixa_ire != NULL)
936 943 ire_refhold_notr(ixa->ixa_ire);
937 944 if (ixa->ixa_nce != NULL)
938 945 nce_refhold(ixa->ixa_nce);
939 946 if (ixa->ixa_dce != NULL)
940 947 dce_refhold_notr(ixa->ixa_dce);
941 948
942 949 #ifdef DEBUG
943 950 ixa->ixa_curthread = NULL;
944 951 #endif
945 952
946 953 if (ixa->ixa_ipsec_latch != NULL)
947 954 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch);
948 955 if (ixa->ixa_ipsec_ah_sa != NULL)
949 956 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa);
950 957 if (ixa->ixa_ipsec_esp_sa != NULL)
951 958 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa);
952 959 if (ixa->ixa_ipsec_policy != NULL)
953 960 IPPOL_REFHOLD(ixa->ixa_ipsec_policy);
954 961 if (ixa->ixa_ipsec_action != NULL)
955 962 IPACT_REFHOLD(ixa->ixa_ipsec_action);
956 963
957 964 if (ixa->ixa_tsl != NULL) {
958 965 label_hold(ixa->ixa_tsl);
959 966 ixa->ixa_free_flags |= IXA_FREE_TSL;
960 967 }
961 968 if (ixa->ixa_cred != NULL) {
962 969 crhold(ixa->ixa_cred);
963 970 ixa->ixa_free_flags |= IXA_FREE_CRED;
964 971 }
965 972 return (ixa);
966 973 }
967 974
968 975 /*
969 976 * Used to replace the ixa_label field.
970 977 * The caller should have a reference on the label, which we transfer to
971 978 * the attributes so that when the attribute is freed/cleaned up
972 979 * we will release that reference.
973 980 */
974 981 void
975 982 ip_xmit_attr_replace_tsl(ip_xmit_attr_t *ixa, ts_label_t *tsl)
976 983 {
977 984 ASSERT(tsl != NULL);
978 985
979 986 if (ixa->ixa_free_flags & IXA_FREE_TSL) {
980 987 ASSERT(ixa->ixa_tsl != NULL);
981 988 label_rele(ixa->ixa_tsl);
982 989 } else {
983 990 ixa->ixa_free_flags |= IXA_FREE_TSL;
984 991 }
985 992 ixa->ixa_tsl = tsl;
986 993 }
987 994
988 995 /*
989 996 * Replace the ip_recv_attr_t's label.
990 997 * Due to kernel RPC's use of db_credp we also need to replace ira_cred;
991 998 * TCP/UDP uses ira_cred to set db_credp for non-socket users.
992 999 * This can fail (and return B_FALSE) due to lack of memory.
993 1000 */
994 1001 boolean_t
995 1002 ip_recv_attr_replace_label(ip_recv_attr_t *ira, ts_label_t *tsl)
996 1003 {
997 1004 cred_t *newcr;
998 1005
999 1006 if (ira->ira_free_flags & IRA_FREE_TSL) {
1000 1007 ASSERT(ira->ira_tsl != NULL);
1001 1008 label_rele(ira->ira_tsl);
1002 1009 }
1003 1010 label_hold(tsl);
1004 1011 ira->ira_tsl = tsl;
1005 1012 ira->ira_free_flags |= IRA_FREE_TSL;
1006 1013
1007 1014 /*
1008 1015 * Reset zoneid if we have a shared address. That allows
1009 1016 * ip_fanout_tx_v4/v6 to determine the zoneid again.
1010 1017 */
1011 1018 if (ira->ira_flags & IRAF_TX_SHARED_ADDR)
1012 1019 ira->ira_zoneid = ALL_ZONES;
1013 1020
1014 1021 /* We update ira_cred for RPC */
1015 1022 newcr = copycred_from_tslabel(ira->ira_cred, ira->ira_tsl, KM_NOSLEEP);
1016 1023 if (newcr == NULL)
1017 1024 return (B_FALSE);
1018 1025 if (ira->ira_free_flags & IRA_FREE_CRED)
1019 1026 crfree(ira->ira_cred);
1020 1027 ira->ira_cred = newcr;
1021 1028 ira->ira_free_flags |= IRA_FREE_CRED;
1022 1029 return (B_TRUE);
1023 1030 }
1024 1031
1025 1032 /*
1026 1033 * This needs to be called after ip_set_destination/tsol_check_dest might
1027 1034 * have changed ixa_tsl to be specific for a destination, and we now want to
1028 1035 * send to a different destination.
1029 1036 * We have to restart with crgetlabel() since ip_set_destination/
1030 1037 * tsol_check_dest will start with ixa_tsl.
1031 1038 */
1032 1039 void
1033 1040 ip_xmit_attr_restore_tsl(ip_xmit_attr_t *ixa, cred_t *cr)
1034 1041 {
1035 1042 if (!is_system_labeled())
1036 1043 return;
1037 1044
1038 1045 if (ixa->ixa_free_flags & IXA_FREE_TSL) {
1039 1046 ASSERT(ixa->ixa_tsl != NULL);
1040 1047 label_rele(ixa->ixa_tsl);
1041 1048 ixa->ixa_free_flags &= ~IXA_FREE_TSL;
1042 1049 }
1043 1050 ixa->ixa_tsl = crgetlabel(cr);
1044 1051 }
1045 1052
1046 1053 void
1047 1054 ixa_refrele(ip_xmit_attr_t *ixa)
1048 1055 {
1049 1056 IXA_REFRELE(ixa);
1050 1057 }
1051 1058
1052 1059 void
1053 1060 ixa_inactive(ip_xmit_attr_t *ixa)
1054 1061 {
1055 1062 ASSERT(ixa->ixa_refcnt == 0);
1056 1063
1057 1064 ixa_cleanup(ixa);
1058 1065 kmem_free(ixa, sizeof (*ixa));
1059 1066 }
1060 1067
1061 1068 /*
1062 1069 * Release any references contained in the ixa.
1063 1070 * Also clear any fields that are not controlled by ixa_flags.
1064 1071 */
1065 1072 void
1066 1073 ixa_cleanup(ip_xmit_attr_t *ixa)
1067 1074 {
1068 1075 if (ixa->ixa_ire != NULL) {
1069 1076 ire_refrele_notr(ixa->ixa_ire);
1070 1077 ixa->ixa_ire = NULL;
1071 1078 }
1072 1079 if (ixa->ixa_dce != NULL) {
1073 1080 dce_refrele_notr(ixa->ixa_dce);
1074 1081 ixa->ixa_dce = NULL;
1075 1082 }
1076 1083 if (ixa->ixa_nce != NULL) {
1077 1084 nce_refrele(ixa->ixa_nce);
1078 1085 ixa->ixa_nce = NULL;
1079 1086 }
1080 1087 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1081 1088 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
1082 1089 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) {
1083 1090 ipsec_out_release_refs(ixa);
1084 1091 }
1085 1092 if (ixa->ixa_free_flags & IXA_FREE_TSL) {
1086 1093 ASSERT(ixa->ixa_tsl != NULL);
1087 1094 label_rele(ixa->ixa_tsl);
1088 1095 ixa->ixa_free_flags &= ~IXA_FREE_TSL;
1089 1096 }
1090 1097 ixa->ixa_tsl = NULL;
1091 1098 if (ixa->ixa_free_flags & IXA_FREE_CRED) {
1092 1099 ASSERT(ixa->ixa_cred != NULL);
1093 1100 crfree(ixa->ixa_cred);
1094 1101 ixa->ixa_free_flags &= ~IXA_FREE_CRED;
1095 1102 }
1096 1103 ixa->ixa_cred = NULL;
1097 1104 ixa->ixa_src_preferences = 0;
1098 1105 ixa->ixa_ifindex = 0;
1099 1106 ixa->ixa_multicast_ifindex = 0;
1100 1107 ixa->ixa_multicast_ifaddr = INADDR_ANY;
1101 1108 }
1102 1109
1103 1110 /*
1104 1111 * Release any references contained in the ira.
1105 1112 * Callers which use ip_recv_attr_from_mblk() would pass B_TRUE as the second
1106 1113 * argument.
1107 1114 */
1108 1115 void
1109 1116 ira_cleanup(ip_recv_attr_t *ira, boolean_t refrele_ill)
1110 1117 {
1111 1118 if (ira->ira_ill != NULL) {
1112 1119 if (ira->ira_rill != ira->ira_ill) {
1113 1120 /* Caused by async processing */
1114 1121 ill_refrele(ira->ira_rill);
1115 1122 }
1116 1123 if (refrele_ill)
1117 1124 ill_refrele(ira->ira_ill);
1118 1125 }
1119 1126 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1120 1127 ipsec_in_release_refs(ira);
1121 1128 }
1122 1129 if (ira->ira_free_flags & IRA_FREE_TSL) {
1123 1130 ASSERT(ira->ira_tsl != NULL);
1124 1131 label_rele(ira->ira_tsl);
1125 1132 ira->ira_free_flags &= ~IRA_FREE_TSL;
1126 1133 }
1127 1134 ira->ira_tsl = NULL;
1128 1135 if (ira->ira_free_flags & IRA_FREE_CRED) {
1129 1136 ASSERT(ira->ira_cred != NULL);
1130 1137 crfree(ira->ira_cred);
1131 1138 ira->ira_free_flags &= ~IRA_FREE_CRED;
1132 1139 }
1133 1140 ira->ira_cred = NULL;
1134 1141 }
1135 1142
1136 1143 /*
1137 1144 * Function to help release any IRE, NCE, or DCEs that
1138 1145 * have been deleted and are marked as condemned.
1139 1146 * The caller is responsible for any serialization which is different
1140 1147 * for TCP, SCTP, and others.
1141 1148 */
1142 1149 static void
1143 1150 ixa_cleanup_stale(ip_xmit_attr_t *ixa)
1144 1151 {
1145 1152 ire_t *ire;
1146 1153 nce_t *nce;
1147 1154 dce_t *dce;
1148 1155
1149 1156 ire = ixa->ixa_ire;
1150 1157 nce = ixa->ixa_nce;
1151 1158 dce = ixa->ixa_dce;
1152 1159
1153 1160 if (ire != NULL && IRE_IS_CONDEMNED(ire)) {
1154 1161 ire_refrele_notr(ire);
1155 1162 ire = ire_blackhole(ixa->ixa_ipst,
1156 1163 !(ixa->ixa_flags & IXAF_IS_IPV4));
1157 1164 ASSERT(ire != NULL);
1158 1165 #ifdef DEBUG
1159 1166 ire_refhold_notr(ire);
1160 1167 ire_refrele(ire);
1161 1168 #endif
1162 1169 ixa->ixa_ire = ire;
1163 1170 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1164 1171 }
1165 1172 if (nce != NULL && nce->nce_is_condemned) {
1166 1173 /* Can make it NULL as long as we set IRE_GENERATION_VERIFY */
1167 1174 nce_refrele(nce);
1168 1175 ixa->ixa_nce = NULL;
1169 1176 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1170 1177 }
1171 1178 if (dce != NULL && DCE_IS_CONDEMNED(dce)) {
1172 1179 dce_refrele_notr(dce);
1173 1180 dce = dce_get_default(ixa->ixa_ipst);
1174 1181 ASSERT(dce != NULL);
1175 1182 #ifdef DEBUG
1176 1183 dce_refhold_notr(dce);
1177 1184 dce_refrele(dce);
1178 1185 #endif
1179 1186 ixa->ixa_dce = dce;
1180 1187 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
1181 1188 }
1182 1189 }
1183 1190
1184 1191 static mblk_t *
1185 1192 tcp_ixa_cleanup_getmblk(conn_t *connp)
1186 1193 {
1187 1194 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;
1188 1195 int need_retry;
1189 1196 mblk_t *mp;
1190 1197
1191 1198 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1192 1199
1193 1200 /*
1194 1201 * It's possible that someone else came in and started cleaning up
1195 1202 * another connection between the time we verified this one is not being
1196 1203 * cleaned up and the time we actually get the shared mblk. If that's
1197 1204 * the case, we've dropped the lock, and some other thread may have
1198 1205 * cleaned up this connection again, and is still waiting for
1199 1206 * notification of that cleanup's completion. Therefore we need to
1200 1207 * recheck.
1201 1208 */
1202 1209 do {
1203 1210 need_retry = 0;
1204 1211 while (connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE) {
1205 1212 cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
1206 1213 &tcps->tcps_ixa_cleanup_lock);
1207 1214 }
1208 1215
1209 1216 while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) {
1210 1217 /*
1211 1218 * Multiple concurrent cleanups; need to have the last
1212 1219 * one run since it could be an unplumb.
1213 1220 */
1214 1221 need_retry = 1;
1215 1222 cv_wait(&tcps->tcps_ixa_cleanup_ready_cv,
1216 1223 &tcps->tcps_ixa_cleanup_lock);
1217 1224 }
1218 1225 } while (need_retry);
1219 1226
1220 1227 /*
1221 1228 * We now have the lock and the mblk; now make sure that no one else can
1222 1229 * try to clean up this connection or enqueue it for cleanup, clear the
1223 1230 * mblk pointer for this stack, drop the lock, and return the mblk.
1224 1231 */
1225 1232 ASSERT(MUTEX_HELD(&tcps->tcps_ixa_cleanup_lock));
1226 1233 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_IDLE);
1227 1234 ASSERT(tcps->tcps_ixa_cleanup_mp == mp);
1228 1235 ASSERT(mp != NULL);
1229 1236
1230 1237 connp->conn_ixa->ixa_tcpcleanup = IXATC_INPROGRESS;
1231 1238 tcps->tcps_ixa_cleanup_mp = NULL;
1232 1239 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1233 1240
1234 1241 return (mp);
1235 1242 }
1236 1243
1237 1244 /*
1238 1245 * Used to run ixa_cleanup_stale inside the tcp squeue.
1239 1246 * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp
1240 1247 * and waking up the caller.
1241 1248 */
1242 1249 /* ARGSUSED2 */
1243 1250 static void
1244 1251 tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2,
1245 1252 ip_recv_attr_t *dummy)
1246 1253 {
1247 1254 conn_t *connp = (conn_t *)arg;
1248 1255 tcp_stack_t *tcps;
1249 1256
1250 1257 tcps = connp->conn_netstack->netstack_tcp;
1251 1258
1252 1259 ixa_cleanup_stale(connp->conn_ixa);
1253 1260
1254 1261 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1255 1262 ASSERT(tcps->tcps_ixa_cleanup_mp == NULL);
1256 1263 connp->conn_ixa->ixa_tcpcleanup = IXATC_COMPLETE;
1257 1264 tcps->tcps_ixa_cleanup_mp = mp;
1258 1265 cv_signal(&tcps->tcps_ixa_cleanup_ready_cv);
1259 1266 /*
1260 1267 * It is possible for any number of threads to be waiting for cleanup of
1261 1268 * different connections. Absent a per-connection (or per-IXA) CV, we
1262 1269 * need to wake them all up even though only one can be waiting on this
1263 1270 * particular cleanup.
1264 1271 */
1265 1272 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);
1266 1273 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1267 1274 }
1268 1275
1269 1276 static void
1270 1277 tcp_ixa_cleanup_wait_and_finish(conn_t *connp)
1271 1278 {
1272 1279 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;
1273 1280
1274 1281 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1275 1282
1276 1283 ASSERT(connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE);
1277 1284
1278 1285 while (connp->conn_ixa->ixa_tcpcleanup == IXATC_INPROGRESS) {
1279 1286 cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
1280 1287 &tcps->tcps_ixa_cleanup_lock);
1281 1288 }
1282 1289
1283 1290 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_COMPLETE);
1284 1291 connp->conn_ixa->ixa_tcpcleanup = IXATC_IDLE;
1285 1292 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);
1286 1293
1287 1294 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1288 1295 }
1289 1296
1290 1297 /*
1291 1298 * ipcl_walk() function to help release any IRE, NCE, or DCEs that
1292 1299 * have been deleted and are marked as condemned.
1293 1300 * Note that we can't cleanup the pointers since there can be threads
1294 1301 * in conn_ip_output() sending while we are called.
1295 1302 */
1296 1303 void
1297 1304 conn_ixa_cleanup(conn_t *connp, void *arg)
1298 1305 {
1299 1306 boolean_t tryhard = (boolean_t)arg;
1300 1307
1301 1308 if (IPCL_IS_TCP(connp)) {
1302 1309 mblk_t *mp;
1303 1310
1304 1311 mp = tcp_ixa_cleanup_getmblk(connp);
1305 1312
1306 1313 if (connp->conn_sqp->sq_run == curthread) {
1307 1314 /* Already on squeue */
1308 1315 tcp_ixa_cleanup(connp, mp, NULL, NULL);
1309 1316 } else {
1310 1317 CONN_INC_REF(connp);
1311 1318 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup,
1312 1319 connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP);
1313 1320 }
1314 1321 tcp_ixa_cleanup_wait_and_finish(connp);
1315 1322 } else if (IPCL_IS_SCTP(connp)) {
1316 1323 sctp_t *sctp;
1317 1324 sctp_faddr_t *fp;
1318 1325
1319 1326 sctp = CONN2SCTP(connp);
1320 1327 RUN_SCTP(sctp);
1321 1328 ixa_cleanup_stale(connp->conn_ixa);
1322 1329 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next)
1323 1330 ixa_cleanup_stale(fp->sf_ixa);
1324 1331 WAKE_SCTP(sctp);
1325 1332 } else {
1326 1333 ip_xmit_attr_t *ixa;
1327 1334
1328 1335 /*
1329 1336 * If there is a different thread using conn_ixa then we get a
1330 1337 * new copy and cut the old one loose from conn_ixa. Otherwise
1331 1338 * we use conn_ixa and prevent any other thread from
1332 1339 * using/changing it. Anybody using conn_ixa (e.g., a thread in
1333 1340 * conn_ip_output) will do an ixa_refrele which will remove any
1334 1341 * references on the ire etc.
1335 1342 *
1336 1343 * Once we are done other threads can use conn_ixa since the
1337 1344 * refcnt will be back at one.
1338 1345 *
1339 1346 * We are called either because an ill is going away, or
1340 1347 * due to memory reclaim. In the former case we wait for
1341 1348 * memory since we must remove the refcnts on the ill.
1342 1349 */
1343 1350 if (tryhard) {
1344 1351 ixa = conn_get_ixa_tryhard(connp, B_TRUE);
1345 1352 ASSERT(ixa != NULL);
1346 1353 } else {
1347 1354 ixa = conn_get_ixa(connp, B_TRUE);
1348 1355 if (ixa == NULL) {
1349 1356 /*
|
↓ open down ↓ |
470 lines elided |
↑ open up ↑ |
1350 1357 * Somebody else was using it and kmem_alloc
1351 1358 * failed! Next memory reclaim will try to
1352 1359 * clean up.
1353 1360 */
1354 1361 DTRACE_PROBE1(conn__ixa__cleanup__bail,
1355 1362 conn_t *, connp);
1356 1363 return;
1357 1364 }
1358 1365 }
1359 1366 ixa_cleanup_stale(ixa);
1360 - ixa_refrele(ixa);
1367 + IXA_REFRELE(ixa);
1361 1368 }
1362 1369 }
1363 1370
1364 1371 /*
1365 1372 * ixa needs to be an exclusive copy so that no one changes the cookie
1366 1373 * or the ixa_nce.
1367 1374 */
1368 1375 boolean_t
1369 1376 ixa_check_drain_insert(conn_t *connp, ip_xmit_attr_t *ixa)
1370 1377 {
1371 1378 uintptr_t cookie = ixa->ixa_cookie;
1372 1379 ill_dld_direct_t *idd;
1373 1380 idl_tx_list_t *idl_txl;
1374 1381 ill_t *ill = ixa->ixa_nce->nce_ill;
1375 1382 boolean_t inserted = B_FALSE;
1376 1383
1377 1384 idd = &(ill)->ill_dld_capab->idc_direct;
1378 1385 idl_txl = &ixa->ixa_ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)];
1379 1386 mutex_enter(&idl_txl->txl_lock);
1380 1387
1381 1388 /*
1382 1389 * If `cookie' is zero, ip_xmit() -> canputnext() failed -- i.e., flow
1383 1390 * control is asserted on an ill that does not support direct calls.
1384 1391 * Jump to insert.
1385 1392 */
1386 1393 if (cookie == 0)
1387 1394 goto tryinsert;
1388 1395
1389 1396 ASSERT(ILL_DIRECT_CAPABLE(ill));
1390 1397
1391 1398 if (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, cookie) == 0) {
1392 1399 DTRACE_PROBE1(ill__tx__not__blocked, uintptr_t, cookie);
1393 1400 } else if (idl_txl->txl_cookie != (uintptr_t)NULL &&
1394 1401 idl_txl->txl_cookie != ixa->ixa_cookie) {
1395 1402 DTRACE_PROBE2(ill__tx__cookie__collision, uintptr_t, cookie,
1396 1403 uintptr_t, idl_txl->txl_cookie);
1397 1404 /* TODO: bump kstat for cookie collision */
1398 1405 } else {
1399 1406 /*
1400 1407 * Check/set conn_blocked under conn_lock. Note that txl_lock
1401 1408 * will not suffice since two separate UDP threads may be
1402 1409 * racing to send to different destinations that are
1403 1410 * associated with different cookies and thus may not be
1404 1411 * holding the same txl_lock. Further, since a given conn_t
1405 1412 * can only be on a single drain list, the conn_t will be
1406 1413 * enqueued on whichever thread wins this race.
1407 1414 */
1408 1415 tryinsert: mutex_enter(&connp->conn_lock);
1409 1416 if (connp->conn_blocked) {
1410 1417 DTRACE_PROBE1(ill__tx__conn__already__blocked,
1411 1418 conn_t *, connp);
1412 1419 mutex_exit(&connp->conn_lock);
1413 1420 } else {
1414 1421 connp->conn_blocked = B_TRUE;
1415 1422 mutex_exit(&connp->conn_lock);
1416 1423 idl_txl->txl_cookie = cookie;
1417 1424 conn_drain_insert(connp, idl_txl);
1418 1425 if (!IPCL_IS_NONSTR(connp))
1419 1426 noenable(connp->conn_wq);
1420 1427 inserted = B_TRUE;
1421 1428 }
1422 1429 }
1423 1430 mutex_exit(&idl_txl->txl_lock);
1424 1431 return (inserted);
1425 1432 }
|
↓ open down ↓ |
55 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX