Print this page
1915 IPsec kstats shouldn't be persistent
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/ip/ipsecesp.c
+++ new/usr/src/uts/common/inet/ip/ipsecesp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2017 Joyent, Inc.
26 26 */
27 27
28 28 #include <sys/types.h>
29 29 #include <sys/stream.h>
30 30 #include <sys/stropts.h>
31 31 #include <sys/errno.h>
32 32 #include <sys/strlog.h>
33 33 #include <sys/tihdr.h>
34 34 #include <sys/socket.h>
35 35 #include <sys/ddi.h>
36 36 #include <sys/sunddi.h>
37 37 #include <sys/kmem.h>
38 38 #include <sys/zone.h>
39 39 #include <sys/sysmacros.h>
40 40 #include <sys/cmn_err.h>
41 41 #include <sys/vtrace.h>
42 42 #include <sys/debug.h>
43 43 #include <sys/atomic.h>
44 44 #include <sys/strsun.h>
45 45 #include <sys/random.h>
46 46 #include <netinet/in.h>
47 47 #include <net/if.h>
48 48 #include <netinet/ip6.h>
49 49 #include <net/pfkeyv2.h>
50 50 #include <net/pfpolicy.h>
51 51
52 52 #include <inet/common.h>
53 53 #include <inet/mi.h>
54 54 #include <inet/nd.h>
55 55 #include <inet/ip.h>
56 56 #include <inet/ip_impl.h>
57 57 #include <inet/ip6.h>
58 58 #include <inet/ip_if.h>
59 59 #include <inet/ip_ndp.h>
60 60 #include <inet/sadb.h>
61 61 #include <inet/ipsec_info.h>
62 62 #include <inet/ipsec_impl.h>
63 63 #include <inet/ipsecesp.h>
64 64 #include <inet/ipdrop.h>
65 65 #include <inet/tcp.h>
66 66 #include <sys/kstat.h>
67 67 #include <sys/policy.h>
68 68 #include <sys/strsun.h>
69 69 #include <sys/strsubr.h>
70 70 #include <inet/udp_impl.h>
71 71 #include <sys/taskq.h>
72 72 #include <sys/note.h>
73 73
74 74 #include <sys/tsol/tnet.h>
75 75
76 76 /*
77 77 * Table of ND variables supported by ipsecesp. These are loaded into
78 78 * ipsecesp_g_nd in ipsecesp_init_nd.
79 79 * All of these are alterable, within the min/max values given, at run time.
80 80 */
81 81 static ipsecespparam_t lcl_param_arr[] = {
82 82 /* min max value name */
83 83 { 0, 3, 0, "ipsecesp_debug"},
84 84 { 125, 32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"},
85 85 { 1, 10, 1, "ipsecesp_reap_delay"},
86 86 { 1, SADB_MAX_REPLAY, 64, "ipsecesp_replay_size"},
87 87 { 1, 300, 15, "ipsecesp_acquire_timeout"},
88 88 { 1, 1800, 90, "ipsecesp_larval_timeout"},
89 89 /* Default lifetime values for ACQUIRE messages. */
90 90 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_bytes"},
91 91 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_bytes"},
92 92 { 0, 0xffffffffU, 24000, "ipsecesp_default_soft_addtime"},
93 93 { 0, 0xffffffffU, 28800, "ipsecesp_default_hard_addtime"},
94 94 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_usetime"},
95 95 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_usetime"},
96 96 { 0, 1, 0, "ipsecesp_log_unknown_spi"},
97 97 { 0, 2, 1, "ipsecesp_padding_check"},
98 98 { 0, 600, 20, "ipsecesp_nat_keepalive_interval"},
99 99 };
100 100 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */
101 101
102 102 #define esp0dbg(a) printf a
103 103 /* NOTE: != 0 instead of > 0 so lint doesn't complain. */
104 104 #define esp1dbg(espstack, a) if (espstack->ipsecesp_debug != 0) printf a
105 105 #define esp2dbg(espstack, a) if (espstack->ipsecesp_debug > 1) printf a
106 106 #define esp3dbg(espstack, a) if (espstack->ipsecesp_debug > 2) printf a
107 107
108 108 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *);
109 109 static int ipsecesp_close(queue_t *);
110 110 static void ipsecesp_wput(queue_t *, mblk_t *);
111 111 static void *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns);
112 112 static void ipsecesp_stack_fini(netstackid_t stackid, void *arg);
113 113
114 114 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *);
115 115 static void esp_outbound_finish(mblk_t *, ip_xmit_attr_t *);
116 116 static void esp_inbound_restart(mblk_t *, ip_recv_attr_t *);
117 117
118 118 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t,
119 119 ipsecesp_stack_t *, cred_t *);
120 120 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t,
121 121 kstat_named_t **, ipsecesp_stack_t *);
122 122 static mblk_t *esp_submit_req_inbound(mblk_t *, ip_recv_attr_t *,
123 123 ipsa_t *, uint_t);
124 124 static mblk_t *esp_submit_req_outbound(mblk_t *, ip_xmit_attr_t *,
125 125 ipsa_t *, uchar_t *, uint_t);
126 126
127 127 /* Setable in /etc/system */
128 128 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE;
129 129
130 130 static struct module_info info = {
131 131 5137, "ipsecesp", 0, INFPSZ, 65536, 1024
132 132 };
133 133
134 134 static struct qinit rinit = {
135 135 (pfi_t)putnext, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
136 136 NULL
137 137 };
138 138
139 139 static struct qinit winit = {
140 140 (pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
141 141 NULL
142 142 };
143 143
144 144 struct streamtab ipsecespinfo = {
145 145 &rinit, &winit, NULL, NULL
146 146 };
147 147
148 148 static taskq_t *esp_taskq;
149 149
150 150 /*
151 151 * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now.
152 152 *
153 153 * Question: Do I need this, given that all instance's esps->esps_wq point
154 154 * to IP?
155 155 *
156 156 * Answer: Yes, because I need to know which queue is BOUND to
|
↓ open down ↓ |
156 lines elided |
↑ open up ↑ |
157 157 * IPPROTO_ESP
158 158 */
159 159
160 160 static int esp_kstat_update(kstat_t *, int);
161 161
162 162 static boolean_t
163 163 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid)
164 164 {
165 165 espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat",
166 166 "net", KSTAT_TYPE_NAMED,
167 - sizeof (esp_kstats_t) / sizeof (kstat_named_t),
168 - KSTAT_FLAG_PERSISTENT, stackid);
167 + sizeof (esp_kstats_t) / sizeof (kstat_named_t), 0, stackid);
169 168
170 169 if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL)
171 170 return (B_FALSE);
172 171
173 172 espstack->esp_kstats = espstack->esp_ksp->ks_data;
174 173
175 174 espstack->esp_ksp->ks_update = esp_kstat_update;
176 175 espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid;
177 176
178 177 #define K64 KSTAT_DATA_UINT64
179 178 #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64)
180 179
181 180 KI(num_aalgs);
182 181 KI(num_ealgs);
183 182 KI(good_auth);
184 183 KI(bad_auth);
185 184 KI(bad_padding);
186 185 KI(replay_failures);
187 186 KI(replay_early_failures);
188 187 KI(keysock_in);
189 188 KI(out_requests);
190 189 KI(acquire_requests);
191 190 KI(bytes_expired);
192 191 KI(out_discards);
193 192 KI(crypto_sync);
194 193 KI(crypto_async);
195 194 KI(crypto_failures);
196 195 KI(bad_decrypt);
197 196 KI(sa_port_renumbers);
198 197
199 198 #undef KI
200 199 #undef K64
201 200
202 201 kstat_install(espstack->esp_ksp);
203 202
204 203 return (B_TRUE);
205 204 }
206 205
207 206 static int
208 207 esp_kstat_update(kstat_t *kp, int rw)
209 208 {
210 209 esp_kstats_t *ekp;
211 210 netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private;
212 211 netstack_t *ns;
213 212 ipsec_stack_t *ipss;
214 213
215 214 if ((kp == NULL) || (kp->ks_data == NULL))
216 215 return (EIO);
217 216
218 217 if (rw == KSTAT_WRITE)
219 218 return (EACCES);
220 219
221 220 ns = netstack_find_by_stackid(stackid);
222 221 if (ns == NULL)
223 222 return (-1);
224 223 ipss = ns->netstack_ipsec;
225 224 if (ipss == NULL) {
226 225 netstack_rele(ns);
227 226 return (-1);
228 227 }
229 228 ekp = (esp_kstats_t *)kp->ks_data;
230 229
231 230 rw_enter(&ipss->ipsec_alg_lock, RW_READER);
232 231 ekp->esp_stat_num_aalgs.value.ui64 =
233 232 ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
234 233 ekp->esp_stat_num_ealgs.value.ui64 =
235 234 ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
236 235 rw_exit(&ipss->ipsec_alg_lock);
237 236
238 237 netstack_rele(ns);
239 238 return (0);
240 239 }
241 240
242 241 #ifdef DEBUG
243 242 /*
244 243 * Debug routine, useful to see pre-encryption data.
245 244 */
246 245 static char *
247 246 dump_msg(mblk_t *mp)
248 247 {
249 248 char tmp_str[3], tmp_line[256];
250 249
251 250 while (mp != NULL) {
252 251 unsigned char *ptr;
253 252
254 253 printf("mblk address 0x%p, length %ld, db_ref %d "
255 254 "type %d, base 0x%p, lim 0x%p\n",
256 255 (void *) mp, (long)(mp->b_wptr - mp->b_rptr),
257 256 mp->b_datap->db_ref, mp->b_datap->db_type,
258 257 (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim);
259 258 ptr = mp->b_rptr;
260 259
261 260 tmp_line[0] = '\0';
262 261 while (ptr < mp->b_wptr) {
263 262 uint_t diff;
264 263
265 264 diff = (ptr - mp->b_rptr);
266 265 if (!(diff & 0x1f)) {
267 266 if (strlen(tmp_line) > 0) {
268 267 printf("bytes: %s\n", tmp_line);
269 268 tmp_line[0] = '\0';
270 269 }
271 270 }
272 271 if (!(diff & 0x3))
273 272 (void) strcat(tmp_line, " ");
274 273 (void) sprintf(tmp_str, "%02x", *ptr);
275 274 (void) strcat(tmp_line, tmp_str);
276 275 ptr++;
277 276 }
278 277 if (strlen(tmp_line) > 0)
279 278 printf("bytes: %s\n", tmp_line);
280 279
281 280 mp = mp->b_cont;
282 281 }
283 282
284 283 return ("\n");
285 284 }
286 285
287 286 #else /* DEBUG */
288 287 static char *
289 288 dump_msg(mblk_t *mp)
290 289 {
291 290 printf("Find value of mp %p.\n", mp);
292 291 return ("\n");
293 292 }
294 293 #endif /* DEBUG */
295 294
296 295 /*
297 296 * Don't have to lock age_interval, as only one thread will access it at
298 297 * a time, because I control the one function that does with timeout().
299 298 */
300 299 static void
301 300 esp_ager(void *arg)
302 301 {
303 302 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
304 303 netstack_t *ns = espstack->ipsecesp_netstack;
305 304 hrtime_t begin = gethrtime();
306 305
307 306 sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q,
308 307 espstack->ipsecesp_reap_delay, ns);
309 308 sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q,
310 309 espstack->ipsecesp_reap_delay, ns);
311 310
312 311 espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q,
313 312 esp_ager, espstack,
314 313 &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max,
315 314 info.mi_idnum);
316 315 }
317 316
318 317 /*
319 318 * Get an ESP NDD parameter.
320 319 */
321 320 /* ARGSUSED */
322 321 static int
323 322 ipsecesp_param_get(
324 323 queue_t *q,
325 324 mblk_t *mp,
326 325 caddr_t cp,
327 326 cred_t *cr)
328 327 {
329 328 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp;
330 329 uint_t value;
331 330 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr;
332 331
333 332 mutex_enter(&espstack->ipsecesp_param_lock);
334 333 value = ipsecesppa->ipsecesp_param_value;
335 334 mutex_exit(&espstack->ipsecesp_param_lock);
336 335
337 336 (void) mi_mpprintf(mp, "%u", value);
338 337 return (0);
339 338 }
340 339
341 340 /*
342 341 * This routine sets an NDD variable in a ipsecespparam_t structure.
343 342 */
344 343 /* ARGSUSED */
345 344 static int
346 345 ipsecesp_param_set(
347 346 queue_t *q,
348 347 mblk_t *mp,
349 348 char *value,
350 349 caddr_t cp,
351 350 cred_t *cr)
352 351 {
353 352 ulong_t new_value;
354 353 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp;
355 354 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr;
356 355
357 356 /*
358 357 * Fail the request if the new value does not lie within the
359 358 * required bounds.
360 359 */
361 360 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
362 361 new_value < ipsecesppa->ipsecesp_param_min ||
363 362 new_value > ipsecesppa->ipsecesp_param_max) {
364 363 return (EINVAL);
365 364 }
366 365
367 366 /* Set the new value */
368 367 mutex_enter(&espstack->ipsecesp_param_lock);
369 368 ipsecesppa->ipsecesp_param_value = new_value;
370 369 mutex_exit(&espstack->ipsecesp_param_lock);
371 370 return (0);
372 371 }
373 372
374 373 /*
375 374 * Using lifetime NDD variables, fill in an extended combination's
376 375 * lifetime information.
377 376 */
378 377 void
379 378 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
380 379 {
381 380 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
382 381
383 382 ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes;
384 383 ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes;
385 384 ecomb->sadb_x_ecomb_soft_addtime =
386 385 espstack->ipsecesp_default_soft_addtime;
387 386 ecomb->sadb_x_ecomb_hard_addtime =
388 387 espstack->ipsecesp_default_hard_addtime;
389 388 ecomb->sadb_x_ecomb_soft_usetime =
390 389 espstack->ipsecesp_default_soft_usetime;
391 390 ecomb->sadb_x_ecomb_hard_usetime =
392 391 espstack->ipsecesp_default_hard_usetime;
393 392 }
394 393
395 394 /*
396 395 * Initialize things for ESP at module load time.
397 396 */
398 397 boolean_t
399 398 ipsecesp_ddi_init(void)
400 399 {
401 400 esp_taskq = taskq_create("esp_taskq", 1, minclsyspri,
402 401 IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
403 402
404 403 /*
405 404 * We want to be informed each time a stack is created or
406 405 * destroyed in the kernel, so we can maintain the
407 406 * set of ipsecesp_stack_t's.
408 407 */
409 408 netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL,
410 409 ipsecesp_stack_fini);
411 410
412 411 return (B_TRUE);
413 412 }
414 413
415 414 /*
416 415 * Walk through the param array specified registering each element with the
417 416 * named dispatch handler.
418 417 */
419 418 static boolean_t
420 419 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt)
421 420 {
422 421 for (; cnt-- > 0; espp++) {
423 422 if (espp->ipsecesp_param_name != NULL &&
424 423 espp->ipsecesp_param_name[0]) {
425 424 if (!nd_load(ndp,
426 425 espp->ipsecesp_param_name,
427 426 ipsecesp_param_get, ipsecesp_param_set,
428 427 (caddr_t)espp)) {
429 428 nd_free(ndp);
430 429 return (B_FALSE);
431 430 }
432 431 }
433 432 }
434 433 return (B_TRUE);
435 434 }
436 435
437 436 /*
438 437 * Initialize things for ESP for each stack instance
439 438 */
440 439 static void *
441 440 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns)
442 441 {
443 442 ipsecesp_stack_t *espstack;
444 443 ipsecespparam_t *espp;
445 444
446 445 espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack),
447 446 KM_SLEEP);
448 447 espstack->ipsecesp_netstack = ns;
449 448
450 449 espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
451 450 espstack->ipsecesp_params = espp;
452 451 bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr));
453 452
454 453 (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp,
455 454 A_CNT(lcl_param_arr));
456 455
457 456 (void) esp_kstat_init(espstack, stackid);
458 457
459 458 espstack->esp_sadb.s_acquire_timeout =
460 459 &espstack->ipsecesp_acquire_timeout;
461 460 sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size,
462 461 espstack->ipsecesp_netstack);
463 462
464 463 mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0);
465 464
466 465 ip_drop_register(&espstack->esp_dropper, "IPsec ESP");
467 466 return (espstack);
468 467 }
469 468
470 469 /*
471 470 * Destroy things for ESP at module unload time.
472 471 */
473 472 void
474 473 ipsecesp_ddi_destroy(void)
475 474 {
476 475 netstack_unregister(NS_IPSECESP);
477 476 taskq_destroy(esp_taskq);
478 477 }
479 478
480 479 /*
481 480 * Destroy things for ESP for one stack instance
482 481 */
483 482 static void
484 483 ipsecesp_stack_fini(netstackid_t stackid, void *arg)
485 484 {
486 485 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
487 486
488 487 if (espstack->esp_pfkey_q != NULL) {
489 488 (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event);
490 489 }
491 490 espstack->esp_sadb.s_acquire_timeout = NULL;
492 491 sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack);
493 492 ip_drop_unregister(&espstack->esp_dropper);
494 493 mutex_destroy(&espstack->ipsecesp_param_lock);
495 494 nd_free(&espstack->ipsecesp_g_nd);
496 495
497 496 kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr));
498 497 espstack->ipsecesp_params = NULL;
499 498 kstat_delete_netstack(espstack->esp_ksp, stackid);
500 499 espstack->esp_ksp = NULL;
501 500 espstack->esp_kstats = NULL;
502 501 kmem_free(espstack, sizeof (*espstack));
503 502 }
504 503
505 504 /*
506 505 * ESP module open routine, which is here for keysock plumbing.
507 506 * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old
508 507 * Days of export control, and fears that ESP would not be allowed
509 508 * to be shipped at all by default. Eventually, keysock should
510 509 * either access AH and ESP via modstubs or krtld dependencies, or
511 510 * perhaps be folded in with AH and ESP into a single IPsec/netsec
512 511 * module ("netsec" if PF_KEY provides more than AH/ESP keying tables).
513 512 */
514 513 /* ARGSUSED */
515 514 static int
516 515 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
517 516 {
518 517 netstack_t *ns;
519 518 ipsecesp_stack_t *espstack;
520 519
521 520 if (secpolicy_ip_config(credp, B_FALSE) != 0)
522 521 return (EPERM);
523 522
524 523 if (q->q_ptr != NULL)
525 524 return (0); /* Re-open of an already open instance. */
526 525
527 526 if (sflag != MODOPEN)
528 527 return (EINVAL);
529 528
530 529 ns = netstack_find_by_cred(credp);
531 530 ASSERT(ns != NULL);
532 531 espstack = ns->netstack_ipsecesp;
533 532 ASSERT(espstack != NULL);
534 533
535 534 q->q_ptr = espstack;
536 535 WR(q)->q_ptr = q->q_ptr;
537 536
538 537 qprocson(q);
539 538 return (0);
540 539 }
541 540
542 541 /*
543 542 * ESP module close routine.
544 543 */
545 544 static int
546 545 ipsecesp_close(queue_t *q)
547 546 {
548 547 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr;
549 548
550 549 /*
551 550 * Clean up q_ptr, if needed.
552 551 */
553 552 qprocsoff(q);
554 553
555 554 /* Keysock queue check is safe, because of OCEXCL perimeter. */
556 555
557 556 if (q == espstack->esp_pfkey_q) {
558 557 esp1dbg(espstack,
559 558 ("ipsecesp_close: Ummm... keysock is closing ESP.\n"));
560 559 espstack->esp_pfkey_q = NULL;
561 560 /* Detach qtimeouts. */
562 561 (void) quntimeout(q, espstack->esp_event);
563 562 }
564 563
565 564 netstack_rele(espstack->ipsecesp_netstack);
566 565 return (0);
567 566 }
568 567
569 568 /*
570 569 * Add a number of bytes to what the SA has protected so far. Return
571 570 * B_TRUE if the SA can still protect that many bytes.
572 571 *
573 572 * Caller must REFRELE the passed-in assoc. This function must REFRELE
574 573 * any obtained peer SA.
575 574 */
576 575 static boolean_t
577 576 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
578 577 {
579 578 ipsa_t *inassoc, *outassoc;
580 579 isaf_t *bucket;
581 580 boolean_t inrc, outrc, isv6;
582 581 sadb_t *sp;
583 582 int outhash;
584 583 netstack_t *ns = assoc->ipsa_netstack;
585 584 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
586 585
587 586 /* No peer? No problem! */
588 587 if (!assoc->ipsa_haspeer) {
589 588 return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes,
590 589 B_TRUE));
591 590 }
592 591
593 592 /*
594 593 * Otherwise, we want to grab both the original assoc and its peer.
595 594 * There might be a race for this, but if it's a real race, two
596 595 * expire messages may occur. We limit this by only sending the
597 596 * expire message on one of the peers, we'll pick the inbound
598 597 * arbitrarily.
599 598 *
600 599 * If we need tight synchronization on the peer SA, then we need to
601 600 * reconsider.
602 601 */
603 602
604 603 /* Use address length to select IPv6/IPv4 */
605 604 isv6 = (assoc->ipsa_addrfam == AF_INET6);
606 605 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
607 606
608 607 if (inbound) {
609 608 inassoc = assoc;
610 609 if (isv6) {
611 610 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
612 611 &inassoc->ipsa_dstaddr));
613 612 } else {
614 613 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
615 614 &inassoc->ipsa_dstaddr));
616 615 }
617 616 bucket = &sp->sdb_of[outhash];
618 617 mutex_enter(&bucket->isaf_lock);
619 618 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
620 619 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
621 620 inassoc->ipsa_addrfam);
622 621 mutex_exit(&bucket->isaf_lock);
623 622 if (outassoc == NULL) {
624 623 /* Q: Do we wish to set haspeer == B_FALSE? */
625 624 esp0dbg(("esp_age_bytes: "
626 625 "can't find peer for inbound.\n"));
627 626 return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc,
628 627 bytes, B_TRUE));
629 628 }
630 629 } else {
631 630 outassoc = assoc;
632 631 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
633 632 mutex_enter(&bucket->isaf_lock);
634 633 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
635 634 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
636 635 outassoc->ipsa_addrfam);
637 636 mutex_exit(&bucket->isaf_lock);
638 637 if (inassoc == NULL) {
639 638 /* Q: Do we wish to set haspeer == B_FALSE? */
640 639 esp0dbg(("esp_age_bytes: "
641 640 "can't find peer for outbound.\n"));
642 641 return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc,
643 642 bytes, B_TRUE));
644 643 }
645 644 }
646 645
647 646 inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE);
648 647 outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE);
649 648
650 649 /*
651 650 * REFRELE any peer SA.
652 651 *
653 652 * Because of the multi-line macro nature of IPSA_REFRELE, keep
654 653 * them in { }.
655 654 */
656 655 if (inbound) {
657 656 IPSA_REFRELE(outassoc);
658 657 } else {
659 658 IPSA_REFRELE(inassoc);
660 659 }
661 660
662 661 return (inrc && outrc);
663 662 }
664 663
665 664 /*
666 665 * Do incoming NAT-T manipulations for packet.
667 666 * Returns NULL if the mblk chain is consumed.
668 667 */
669 668 static mblk_t *
670 669 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc)
671 670 {
672 671 ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
673 672 tcpha_t *tcpha;
674 673 udpha_t *udpha;
675 674 /* Initialize to our inbound cksum adjustment... */
676 675 uint32_t sum = assoc->ipsa_inbound_cksum;
677 676
678 677 switch (ipha->ipha_protocol) {
679 678 case IPPROTO_TCP:
680 679 tcpha = (tcpha_t *)(data_mp->b_rptr +
681 680 IPH_HDR_LENGTH(ipha));
682 681
683 682 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) + ((x) >> 16)
684 683 sum += ~ntohs(tcpha->tha_sum) & 0xFFFF;
685 684 DOWN_SUM(sum);
686 685 DOWN_SUM(sum);
687 686 tcpha->tha_sum = ~htons(sum);
688 687 break;
689 688 case IPPROTO_UDP:
690 689 udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha));
691 690
692 691 if (udpha->uha_checksum != 0) {
693 692 /* Adujst if the inbound one was not zero. */
694 693 sum += ~ntohs(udpha->uha_checksum) & 0xFFFF;
695 694 DOWN_SUM(sum);
696 695 DOWN_SUM(sum);
697 696 udpha->uha_checksum = ~htons(sum);
698 697 if (udpha->uha_checksum == 0)
699 698 udpha->uha_checksum = 0xFFFF;
700 699 }
701 700 #undef DOWN_SUM
702 701 break;
703 702 case IPPROTO_IP:
704 703 /*
705 704 * This case is only an issue for self-encapsulated
706 705 * packets. So for now, fall through.
707 706 */
708 707 break;
709 708 }
710 709 return (data_mp);
711 710 }
712 711
713 712
714 713 /*
715 714 * Strip ESP header, check padding, and fix IP header.
716 715 * Returns B_TRUE on success, B_FALSE if an error occured.
717 716 */
718 717 static boolean_t
719 718 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen,
720 719 kstat_named_t **counter, ipsecesp_stack_t *espstack)
721 720 {
722 721 ipha_t *ipha;
723 722 ip6_t *ip6h;
724 723 uint_t divpoint;
725 724 mblk_t *scratch;
726 725 uint8_t nexthdr, padlen;
727 726 uint8_t lastpad;
728 727 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
729 728 uint8_t *lastbyte;
730 729
731 730 /*
732 731 * Strip ESP data and fix IP header.
733 732 *
734 733 * XXX In case the beginning of esp_inbound() changes to not do a
735 734 * pullup, this part of the code can remain unchanged.
736 735 */
737 736 if (isv4) {
738 737 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t));
739 738 ipha = (ipha_t *)data_mp->b_rptr;
740 739 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) +
741 740 IPH_HDR_LENGTH(ipha));
742 741 divpoint = IPH_HDR_LENGTH(ipha);
743 742 } else {
744 743 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t));
745 744 ip6h = (ip6_t *)data_mp->b_rptr;
746 745 divpoint = ip_hdr_length_v6(data_mp, ip6h);
747 746 }
748 747
749 748 scratch = data_mp;
750 749 while (scratch->b_cont != NULL)
751 750 scratch = scratch->b_cont;
752 751
753 752 ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3);
754 753
755 754 /*
756 755 * "Next header" and padding length are the last two bytes in the
757 756 * ESP-protected datagram, thus the explicit - 1 and - 2.
758 757 * lastpad is the last byte of the padding, which can be used for
759 758 * a quick check to see if the padding is correct.
760 759 */
761 760 lastbyte = scratch->b_wptr - 1;
762 761 nexthdr = *lastbyte--;
763 762 padlen = *lastbyte--;
764 763
765 764 if (isv4) {
766 765 /* Fix part of the IP header. */
767 766 ipha->ipha_protocol = nexthdr;
768 767 /*
769 768 * Reality check the padlen. The explicit - 2 is for the
770 769 * padding length and the next-header bytes.
771 770 */
772 771 if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 -
773 772 sizeof (esph_t) - ivlen) {
774 773 ESP_BUMP_STAT(espstack, bad_decrypt);
775 774 ipsec_rl_strlog(espstack->ipsecesp_netstack,
776 775 info.mi_idnum, 0, 0,
777 776 SL_ERROR | SL_WARN,
778 777 "Corrupt ESP packet (padlen too big).\n");
779 778 esp1dbg(espstack, ("padlen (%d) is greater than:\n",
780 779 padlen));
781 780 esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp "
782 781 "hdr - ivlen(%d) = %d.\n",
783 782 ntohs(ipha->ipha_length), ivlen,
784 783 (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) -
785 784 2 - sizeof (esph_t) - ivlen)));
786 785 *counter = DROPPER(ipss, ipds_esp_bad_padlen);
787 786 return (B_FALSE);
788 787 }
789 788
790 789 /*
791 790 * Fix the rest of the header. The explicit - 2 is for the
792 791 * padding length and the next-header bytes.
793 792 */
794 793 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen -
795 794 2 - sizeof (esph_t) - ivlen);
796 795 ipha->ipha_hdr_checksum = 0;
797 796 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
798 797 } else {
799 798 if (ip6h->ip6_nxt == IPPROTO_ESP) {
800 799 ip6h->ip6_nxt = nexthdr;
801 800 } else {
802 801 ip_pkt_t ipp;
803 802
804 803 bzero(&ipp, sizeof (ipp));
805 804 (void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp,
806 805 NULL);
807 806 if (ipp.ipp_dstopts != NULL) {
808 807 ipp.ipp_dstopts->ip6d_nxt = nexthdr;
809 808 } else if (ipp.ipp_rthdr != NULL) {
810 809 ipp.ipp_rthdr->ip6r_nxt = nexthdr;
811 810 } else if (ipp.ipp_hopopts != NULL) {
812 811 ipp.ipp_hopopts->ip6h_nxt = nexthdr;
813 812 } else {
814 813 /* Panic a DEBUG kernel. */
815 814 ASSERT(ipp.ipp_hopopts != NULL);
816 815 /* Otherwise, pretend it's IP + ESP. */
817 816 cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n");
818 817 ip6h->ip6_nxt = nexthdr;
819 818 }
820 819 }
821 820
822 821 if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) -
823 822 ivlen) {
824 823 ESP_BUMP_STAT(espstack, bad_decrypt);
825 824 ipsec_rl_strlog(espstack->ipsecesp_netstack,
826 825 info.mi_idnum, 0, 0,
827 826 SL_ERROR | SL_WARN,
828 827 "Corrupt ESP packet (v6 padlen too big).\n");
829 828 esp1dbg(espstack, ("padlen (%d) is greater than:\n",
830 829 padlen));
831 830 esp1dbg(espstack,
832 831 ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = "
833 832 "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen)
834 833 + sizeof (ip6_t)), ivlen,
835 834 (unsigned)(ntohs(ip6h->ip6_plen) - 2 -
836 835 sizeof (esph_t) - ivlen)));
837 836 *counter = DROPPER(ipss, ipds_esp_bad_padlen);
838 837 return (B_FALSE);
839 838 }
840 839
841 840
842 841 /*
843 842 * Fix the rest of the header. The explicit - 2 is for the
844 843 * padding length and the next-header bytes. IPv6 is nice,
845 844 * because there's no hdr checksum!
846 845 */
847 846 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen -
848 847 2 - sizeof (esph_t) - ivlen);
849 848 }
850 849
851 850 if (espstack->ipsecesp_padding_check > 0 && padlen > 0) {
852 851 /*
853 852 * Weak padding check: compare last-byte to length, they
854 853 * should be equal.
855 854 */
856 855 lastpad = *lastbyte--;
857 856
858 857 if (padlen != lastpad) {
859 858 ipsec_rl_strlog(espstack->ipsecesp_netstack,
860 859 info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
861 860 "Corrupt ESP packet (lastpad != padlen).\n");
862 861 esp1dbg(espstack,
863 862 ("lastpad (%d) not equal to padlen (%d):\n",
864 863 lastpad, padlen));
865 864 ESP_BUMP_STAT(espstack, bad_padding);
866 865 *counter = DROPPER(ipss, ipds_esp_bad_padding);
867 866 return (B_FALSE);
868 867 }
869 868
870 869 /*
871 870 * Strong padding check: Check all pad bytes to see that
872 871 * they're ascending. Go backwards using a descending counter
873 872 * to verify. padlen == 1 is checked by previous block, so
874 873 * only bother if we've more than 1 byte of padding.
875 874 * Consequently, start the check one byte before the location
876 875 * of "lastpad".
877 876 */
878 877 if (espstack->ipsecesp_padding_check > 1) {
879 878 /*
880 879 * This assert may have to become an if and a pullup
881 880 * if we start accepting multi-dblk mblks. For now,
882 881 * though, any packet here will have been pulled up in
883 882 * esp_inbound.
884 883 */
885 884 ASSERT(MBLKL(scratch) >= lastpad + 3);
886 885
887 886 /*
888 887 * Use "--lastpad" because we already checked the very
889 888 * last pad byte previously.
890 889 */
891 890 while (--lastpad != 0) {
892 891 if (lastpad != *lastbyte) {
893 892 ipsec_rl_strlog(
894 893 espstack->ipsecesp_netstack,
895 894 info.mi_idnum, 0, 0,
896 895 SL_ERROR | SL_WARN, "Corrupt ESP "
897 896 "packet (bad padding).\n");
898 897 esp1dbg(espstack,
899 898 ("padding not in correct"
900 899 " format:\n"));
901 900 ESP_BUMP_STAT(espstack, bad_padding);
902 901 *counter = DROPPER(ipss,
903 902 ipds_esp_bad_padding);
904 903 return (B_FALSE);
905 904 }
906 905 lastbyte--;
907 906 }
908 907 }
909 908 }
910 909
911 910 /* Trim off the padding. */
912 911 ASSERT(data_mp->b_cont == NULL);
913 912 data_mp->b_wptr -= (padlen + 2);
914 913
915 914 /*
916 915 * Remove the ESP header.
917 916 *
918 917 * The above assertions about data_mp's size will make this work.
919 918 *
920 919 * XXX Question: If I send up and get back a contiguous mblk,
921 920 * would it be quicker to bcopy over, or keep doing the dupb stuff?
922 921 * I go with copying for now.
923 922 */
924 923
925 924 if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) &&
926 925 IS_P2ALIGNED(ivlen, sizeof (uint32_t))) {
927 926 uint8_t *start = data_mp->b_rptr;
928 927 uint32_t *src, *dst;
929 928
930 929 src = (uint32_t *)(start + divpoint);
931 930 dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen);
932 931
933 932 ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) &&
934 933 IS_P2ALIGNED(src, sizeof (uint32_t)));
935 934
936 935 do {
937 936 src--;
938 937 dst--;
939 938 *dst = *src;
940 939 } while (src != (uint32_t *)start);
941 940
942 941 data_mp->b_rptr = (uchar_t *)dst;
943 942 } else {
944 943 uint8_t *start = data_mp->b_rptr;
945 944 uint8_t *src, *dst;
946 945
947 946 src = start + divpoint;
948 947 dst = src + sizeof (esph_t) + ivlen;
949 948
950 949 do {
951 950 src--;
952 951 dst--;
953 952 *dst = *src;
954 953 } while (src != start);
955 954
956 955 data_mp->b_rptr = dst;
957 956 }
958 957
959 958 esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n"));
960 959 esp2dbg(espstack, (dump_msg(data_mp)));
961 960
962 961 return (B_TRUE);
963 962 }
964 963
965 964 /*
966 965 * Updating use times can be tricky business if the ipsa_haspeer flag is
967 966 * set. This function is called once in an SA's lifetime.
968 967 *
969 968 * Caller has to REFRELE "assoc" which is passed in. This function has
970 969 * to REFRELE any peer SA that is obtained.
971 970 */
972 971 static void
973 972 esp_set_usetime(ipsa_t *assoc, boolean_t inbound)
974 973 {
975 974 ipsa_t *inassoc, *outassoc;
976 975 isaf_t *bucket;
977 976 sadb_t *sp;
978 977 int outhash;
979 978 boolean_t isv6;
980 979 netstack_t *ns = assoc->ipsa_netstack;
981 980 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
982 981
983 982 /* No peer? No problem! */
984 983 if (!assoc->ipsa_haspeer) {
985 984 sadb_set_usetime(assoc);
986 985 return;
987 986 }
988 987
989 988 /*
990 989 * Otherwise, we want to grab both the original assoc and its peer.
991 990 * There might be a race for this, but if it's a real race, the times
992 991 * will be out-of-synch by at most a second, and since our time
993 992 * granularity is a second, this won't be a problem.
994 993 *
995 994 * If we need tight synchronization on the peer SA, then we need to
996 995 * reconsider.
997 996 */
998 997
999 998 /* Use address length to select IPv6/IPv4 */
1000 999 isv6 = (assoc->ipsa_addrfam == AF_INET6);
1001 1000 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
1002 1001
1003 1002 if (inbound) {
1004 1003 inassoc = assoc;
1005 1004 if (isv6) {
1006 1005 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
1007 1006 &inassoc->ipsa_dstaddr));
1008 1007 } else {
1009 1008 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
1010 1009 &inassoc->ipsa_dstaddr));
1011 1010 }
1012 1011 bucket = &sp->sdb_of[outhash];
1013 1012 mutex_enter(&bucket->isaf_lock);
1014 1013 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1015 1014 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1016 1015 inassoc->ipsa_addrfam);
1017 1016 mutex_exit(&bucket->isaf_lock);
1018 1017 if (outassoc == NULL) {
1019 1018 /* Q: Do we wish to set haspeer == B_FALSE? */
1020 1019 esp0dbg(("esp_set_usetime: "
1021 1020 "can't find peer for inbound.\n"));
1022 1021 sadb_set_usetime(inassoc);
1023 1022 return;
1024 1023 }
1025 1024 } else {
1026 1025 outassoc = assoc;
1027 1026 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1028 1027 mutex_enter(&bucket->isaf_lock);
1029 1028 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1030 1029 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1031 1030 outassoc->ipsa_addrfam);
1032 1031 mutex_exit(&bucket->isaf_lock);
1033 1032 if (inassoc == NULL) {
1034 1033 /* Q: Do we wish to set haspeer == B_FALSE? */
1035 1034 esp0dbg(("esp_set_usetime: "
1036 1035 "can't find peer for outbound.\n"));
1037 1036 sadb_set_usetime(outassoc);
1038 1037 return;
1039 1038 }
1040 1039 }
1041 1040
1042 1041 /* Update usetime on both. */
1043 1042 sadb_set_usetime(inassoc);
1044 1043 sadb_set_usetime(outassoc);
1045 1044
1046 1045 /*
1047 1046 * REFRELE any peer SA.
1048 1047 *
1049 1048 * Because of the multi-line macro nature of IPSA_REFRELE, keep
1050 1049 * them in { }.
1051 1050 */
1052 1051 if (inbound) {
1053 1052 IPSA_REFRELE(outassoc);
1054 1053 } else {
1055 1054 IPSA_REFRELE(inassoc);
1056 1055 }
1057 1056 }
1058 1057
1059 1058 /*
1060 1059 * Handle ESP inbound data for IPv4 and IPv6.
1061 1060 * On success returns B_TRUE, on failure returns B_FALSE and frees the
1062 1061 * mblk chain data_mp.
1063 1062 */
1064 1063 mblk_t *
1065 1064 esp_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira)
1066 1065 {
1067 1066 esph_t *esph = (esph_t *)arg;
1068 1067 ipsa_t *ipsa = ira->ira_ipsec_esp_sa;
1069 1068 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
1070 1069 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1071 1070 ipsec_stack_t *ipss = ns->netstack_ipsec;
1072 1071
1073 1072 /*
1074 1073 * We may wish to check replay in-range-only here as an optimization.
1075 1074 * Include the reality check of ipsa->ipsa_replay >
1076 1075 * ipsa->ipsa_replay_wsize for times when it's the first N packets,
1077 1076 * where N == ipsa->ipsa_replay_wsize.
1078 1077 *
1079 1078 * Another check that may come here later is the "collision" check.
1080 1079 * If legitimate packets flow quickly enough, this won't be a problem,
1081 1080 * but collisions may cause authentication algorithm crunching to
1082 1081 * take place when it doesn't need to.
1083 1082 */
1084 1083 if (!sadb_replay_peek(ipsa, esph->esph_replay)) {
1085 1084 ESP_BUMP_STAT(espstack, replay_early_failures);
1086 1085 IP_ESP_BUMP_STAT(ipss, in_discards);
1087 1086 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
1088 1087 DROPPER(ipss, ipds_esp_early_replay),
1089 1088 &espstack->esp_dropper);
1090 1089 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1091 1090 return (NULL);
1092 1091 }
1093 1092
1094 1093 /*
1095 1094 * Adjust the IP header's payload length to reflect the removal
1096 1095 * of the ICV.
1097 1096 */
1098 1097 if (!(ira->ira_flags & IRAF_IS_IPV4)) {
1099 1098 ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
1100 1099 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) -
1101 1100 ipsa->ipsa_mac_len);
1102 1101 } else {
1103 1102 ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
1104 1103 ipha->ipha_length = htons(ntohs(ipha->ipha_length) -
1105 1104 ipsa->ipsa_mac_len);
1106 1105 }
1107 1106
1108 1107 /* submit the request to the crypto framework */
1109 1108 return (esp_submit_req_inbound(data_mp, ira, ipsa,
1110 1109 (uint8_t *)esph - data_mp->b_rptr));
1111 1110 }
1112 1111
1113 1112 /* XXX refactor me */
1114 1113 /*
1115 1114 * Handle the SADB_GETSPI message. Create a larval SA.
1116 1115 */
1117 1116 static void
1118 1117 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
1119 1118 {
1120 1119 ipsa_t *newbie, *target;
1121 1120 isaf_t *outbound, *inbound;
1122 1121 int rc, diagnostic;
1123 1122 sadb_sa_t *assoc;
1124 1123 keysock_out_t *kso;
1125 1124 uint32_t newspi;
1126 1125
1127 1126 /*
1128 1127 * Randomly generate a proposed SPI value
1129 1128 */
1130 1129 if (cl_inet_getspi != NULL) {
1131 1130 cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid,
1132 1131 IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL);
1133 1132 } else {
1134 1133 (void) random_get_pseudo_bytes((uint8_t *)&newspi,
1135 1134 sizeof (uint32_t));
1136 1135 }
1137 1136 newbie = sadb_getspi(ksi, newspi, &diagnostic,
1138 1137 espstack->ipsecesp_netstack, IPPROTO_ESP);
1139 1138
1140 1139 if (newbie == NULL) {
1141 1140 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic,
1142 1141 ksi->ks_in_serial);
1143 1142 return;
1144 1143 } else if (newbie == (ipsa_t *)-1) {
1145 1144 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
1146 1145 ksi->ks_in_serial);
1147 1146 return;
1148 1147 }
1149 1148
1150 1149 /*
1151 1150 * XXX - We may randomly collide. We really should recover from this.
1152 1151 * Unfortunately, that could require spending way-too-much-time
1153 1152 * in here. For now, let the user retry.
1154 1153 */
1155 1154
1156 1155 if (newbie->ipsa_addrfam == AF_INET6) {
1157 1156 outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6,
1158 1157 *(uint32_t *)(newbie->ipsa_dstaddr));
1159 1158 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6,
1160 1159 newbie->ipsa_spi);
1161 1160 } else {
1162 1161 ASSERT(newbie->ipsa_addrfam == AF_INET);
1163 1162 outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4,
1164 1163 *(uint32_t *)(newbie->ipsa_dstaddr));
1165 1164 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4,
1166 1165 newbie->ipsa_spi);
1167 1166 }
1168 1167
1169 1168 mutex_enter(&outbound->isaf_lock);
1170 1169 mutex_enter(&inbound->isaf_lock);
1171 1170
1172 1171 /*
1173 1172 * Check for collisions (i.e. did sadb_getspi() return with something
1174 1173 * that already exists?).
1175 1174 *
1176 1175 * Try outbound first. Even though SADB_GETSPI is traditionally
1177 1176 * for inbound SAs, you never know what a user might do.
1178 1177 */
1179 1178 target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
1180 1179 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
1181 1180 if (target == NULL) {
1182 1181 target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
1183 1182 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
1184 1183 newbie->ipsa_addrfam);
1185 1184 }
1186 1185
1187 1186 /*
1188 1187 * I don't have collisions elsewhere!
1189 1188 * (Nor will I because I'm still holding inbound/outbound locks.)
1190 1189 */
1191 1190
1192 1191 if (target != NULL) {
1193 1192 rc = EEXIST;
1194 1193 IPSA_REFRELE(target);
1195 1194 } else {
1196 1195 /*
1197 1196 * sadb_insertassoc() also checks for collisions, so
1198 1197 * if there's a colliding entry, rc will be set
1199 1198 * to EEXIST.
1200 1199 */
1201 1200 rc = sadb_insertassoc(newbie, inbound);
1202 1201 newbie->ipsa_hardexpiretime = gethrestime_sec();
1203 1202 newbie->ipsa_hardexpiretime +=
1204 1203 espstack->ipsecesp_larval_timeout;
1205 1204 }
1206 1205
1207 1206 /*
1208 1207 * Can exit outbound mutex. Hold inbound until we're done
1209 1208 * with newbie.
1210 1209 */
1211 1210 mutex_exit(&outbound->isaf_lock);
1212 1211
1213 1212 if (rc != 0) {
1214 1213 mutex_exit(&inbound->isaf_lock);
1215 1214 IPSA_REFRELE(newbie);
1216 1215 sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc,
1217 1216 SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1218 1217 return;
1219 1218 }
1220 1219
1221 1220
1222 1221 /* Can write here because I'm still holding the bucket lock. */
1223 1222 newbie->ipsa_type = SADB_SATYPE_ESP;
1224 1223
1225 1224 /*
1226 1225 * Construct successful return message. We have one thing going
1227 1226 * for us in PF_KEY v2. That's the fact that
1228 1227 * sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
1229 1228 */
1230 1229 assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
1231 1230 assoc->sadb_sa_exttype = SADB_EXT_SA;
1232 1231 assoc->sadb_sa_spi = newbie->ipsa_spi;
1233 1232 *((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
1234 1233 mutex_exit(&inbound->isaf_lock);
1235 1234
1236 1235 /* Convert KEYSOCK_IN to KEYSOCK_OUT. */
1237 1236 kso = (keysock_out_t *)ksi;
1238 1237 kso->ks_out_len = sizeof (*kso);
1239 1238 kso->ks_out_serial = ksi->ks_in_serial;
1240 1239 kso->ks_out_type = KEYSOCK_OUT;
1241 1240
1242 1241 /*
1243 1242 * Can safely putnext() to esp_pfkey_q, because this is a turnaround
1244 1243 * from the esp_pfkey_q.
1245 1244 */
1246 1245 putnext(espstack->esp_pfkey_q, mp);
1247 1246 }
1248 1247
1249 1248 /*
1250 1249 * Insert the ESP header into a packet. Duplicate an mblk, and insert a newly
1251 1250 * allocated mblk with the ESP header in between the two.
1252 1251 */
1253 1252 static boolean_t
1254 1253 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint,
1255 1254 ipsecesp_stack_t *espstack)
1256 1255 {
1257 1256 mblk_t *split_mp = mp;
1258 1257 uint_t wheretodiv = divpoint;
1259 1258
1260 1259 while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) {
1261 1260 wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr);
1262 1261 split_mp = split_mp->b_cont;
1263 1262 ASSERT(split_mp != NULL);
1264 1263 }
1265 1264
1266 1265 if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) {
1267 1266 mblk_t *scratch;
1268 1267
1269 1268 /* "scratch" is the 2nd half, split_mp is the first. */
1270 1269 scratch = dupb(split_mp);
1271 1270 if (scratch == NULL) {
1272 1271 esp1dbg(espstack,
1273 1272 ("esp_insert_esp: can't allocate scratch.\n"));
1274 1273 return (B_FALSE);
1275 1274 }
1276 1275 /* NOTE: dupb() doesn't set b_cont appropriately. */
1277 1276 scratch->b_cont = split_mp->b_cont;
1278 1277 scratch->b_rptr += wheretodiv;
1279 1278 split_mp->b_wptr = split_mp->b_rptr + wheretodiv;
1280 1279 split_mp->b_cont = scratch;
1281 1280 }
1282 1281 /*
1283 1282 * At this point, split_mp is exactly "wheretodiv" bytes long, and
1284 1283 * holds the end of the pre-ESP part of the datagram.
1285 1284 */
1286 1285 esp_mp->b_cont = split_mp->b_cont;
1287 1286 split_mp->b_cont = esp_mp;
1288 1287
1289 1288 return (B_TRUE);
1290 1289 }
1291 1290
1292 1291 /*
1293 1292 * Section 7 of RFC 3947 says:
1294 1293 *
1295 1294 * 7. Recovering from the Expiring NAT Mappings
1296 1295 *
1297 1296 * There are cases where NAT box decides to remove mappings that are still
1298 1297 * alive (for example, when the keepalive interval is too long, or when the
1299 1298 * NAT box is rebooted). To recover from this, ends that are NOT behind
1300 1299 * NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from
1301 1300 * the other end to determine which IP and port addresses should be used.
1302 1301 * The host behind dynamic NAT MUST NOT do this, as otherwise it opens a
1303 1302 * DoS attack possibility because the IP address or port of the other host
1304 1303 * will not change (it is not behind NAT).
1305 1304 *
1306 1305 * Keepalives cannot be used for these purposes, as they are not
1307 1306 * authenticated, but any IKE authenticated IKE packet or ESP packet can be
1308 1307 * used to detect whether the IP address or the port has changed.
1309 1308 *
1310 1309 * The following function will check an SA and its explicitly-set pair to see
1311 1310 * if the NAT-T remote port matches the received packet (which must have
1312 1311 * passed ESP authentication, see esp_in_done() for the caller context). If
1313 1312 * there is a mismatch, the SAs are updated. It is not important if we race
1314 1313 * with a transmitting thread, as if there is a transmitting thread, it will
1315 1314 * merely emit a packet that will most-likely be dropped.
1316 1315 *
1317 1316 * "ports" are ordered src,dst, and assoc is an inbound SA, where src should
1318 1317 * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port.
1319 1318 */
1320 1319 #ifdef _LITTLE_ENDIAN
1321 1320 #define FIRST_16(x) ((x) & 0xFFFF)
1322 1321 #define NEXT_16(x) (((x) >> 16) & 0xFFFF)
1323 1322 #else
1324 1323 #define FIRST_16(x) (((x) >> 16) & 0xFFFF)
1325 1324 #define NEXT_16(x) ((x) & 0xFFFF)
1326 1325 #endif
1327 1326 static void
1328 1327 esp_port_freshness(uint32_t ports, ipsa_t *assoc)
1329 1328 {
1330 1329 uint16_t remote = FIRST_16(ports);
1331 1330 uint16_t local = NEXT_16(ports);
1332 1331 ipsa_t *outbound_peer;
1333 1332 isaf_t *bucket;
1334 1333 ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
1335 1334
1336 1335 /* We found a conn_t, therefore local != 0. */
1337 1336 ASSERT(local != 0);
1338 1337 /* Assume an IPv4 SA. */
1339 1338 ASSERT(assoc->ipsa_addrfam == AF_INET);
1340 1339
1341 1340 /*
1342 1341 * On-the-wire rport == 0 means something's very wrong.
1343 1342 * An unpaired SA is also useless to us.
1344 1343 * If we are behind the NAT, don't bother.
1345 1344 * A zero local NAT port defaults to 4500, so check that too.
1346 1345 * And, of course, if the ports already match, we don't need to
1347 1346 * bother.
1348 1347 */
1349 1348 if (remote == 0 || assoc->ipsa_otherspi == 0 ||
1350 1349 (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) ||
1351 1350 (assoc->ipsa_remote_nat_port == 0 &&
1352 1351 remote == htons(IPPORT_IKE_NATT)) ||
1353 1352 remote == assoc->ipsa_remote_nat_port)
1354 1353 return;
1355 1354
1356 1355 /* Try and snag the peer. NOTE: Assume IPv4 for now. */
1357 1356 bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4),
1358 1357 assoc->ipsa_srcaddr[0]);
1359 1358 mutex_enter(&bucket->isaf_lock);
1360 1359 outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi,
1361 1360 assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET);
1362 1361 mutex_exit(&bucket->isaf_lock);
1363 1362
1364 1363 /* We probably lost a race to a deleting or expiring thread. */
1365 1364 if (outbound_peer == NULL)
1366 1365 return;
1367 1366
1368 1367 /*
1369 1368 * Hold the mutexes for both SAs so we don't race another inbound
1370 1369 * thread. A lock-entry order shouldn't matter, since all other
1371 1370 * per-ipsa locks are individually held-then-released.
1372 1371 *
1373 1372 * Luckily, this has nothing to do with the remote-NAT address,
1374 1373 * so we don't have to re-scribble the cached-checksum differential.
1375 1374 */
1376 1375 mutex_enter(&outbound_peer->ipsa_lock);
1377 1376 mutex_enter(&assoc->ipsa_lock);
1378 1377 outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port =
1379 1378 remote;
1380 1379 mutex_exit(&assoc->ipsa_lock);
1381 1380 mutex_exit(&outbound_peer->ipsa_lock);
1382 1381 IPSA_REFRELE(outbound_peer);
1383 1382 ESP_BUMP_STAT(espstack, sa_port_renumbers);
1384 1383 }
1385 1384 /*
1386 1385 * Finish processing of an inbound ESP packet after processing by the
1387 1386 * crypto framework.
1388 1387 * - Remove the ESP header.
1389 1388 * - Send packet back to IP.
1390 1389 * If authentication was performed on the packet, this function is called
1391 1390 * only if the authentication succeeded.
1392 1391 * On success returns B_TRUE, on failure returns B_FALSE and frees the
1393 1392 * mblk chain data_mp.
1394 1393 */
1395 1394 static mblk_t *
1396 1395 esp_in_done(mblk_t *data_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
1397 1396 {
1398 1397 ipsa_t *assoc;
1399 1398 uint_t espstart;
1400 1399 uint32_t ivlen = 0;
1401 1400 uint_t processed_len;
1402 1401 esph_t *esph;
1403 1402 kstat_named_t *counter;
1404 1403 boolean_t is_natt;
1405 1404 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
1406 1405 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1407 1406 ipsec_stack_t *ipss = ns->netstack_ipsec;
1408 1407
1409 1408 assoc = ira->ira_ipsec_esp_sa;
1410 1409 ASSERT(assoc != NULL);
1411 1410
1412 1411 is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
1413 1412
1414 1413 /* get the pointer to the ESP header */
1415 1414 if (assoc->ipsa_encr_alg == SADB_EALG_NULL) {
1416 1415 /* authentication-only ESP */
1417 1416 espstart = ic->ic_crypto_data.cd_offset;
1418 1417 processed_len = ic->ic_crypto_data.cd_length;
1419 1418 } else {
1420 1419 /* encryption present */
1421 1420 ivlen = assoc->ipsa_iv_len;
1422 1421 if (assoc->ipsa_auth_alg == SADB_AALG_NONE) {
1423 1422 /* encryption-only ESP */
1424 1423 espstart = ic->ic_crypto_data.cd_offset -
1425 1424 sizeof (esph_t) - assoc->ipsa_iv_len;
1426 1425 processed_len = ic->ic_crypto_data.cd_length +
1427 1426 ivlen;
1428 1427 } else {
1429 1428 /* encryption with authentication */
1430 1429 espstart = ic->ic_crypto_dual_data.dd_offset1;
1431 1430 processed_len = ic->ic_crypto_dual_data.dd_len2 +
1432 1431 ivlen;
1433 1432 }
1434 1433 }
1435 1434
1436 1435 esph = (esph_t *)(data_mp->b_rptr + espstart);
1437 1436
1438 1437 if (assoc->ipsa_auth_alg != IPSA_AALG_NONE ||
1439 1438 (assoc->ipsa_flags & IPSA_F_COMBINED)) {
1440 1439 /*
1441 1440 * Authentication passed if we reach this point.
1442 1441 * Packets with authentication will have the ICV
1443 1442 * after the crypto data. Adjust b_wptr before
1444 1443 * making padlen checks.
1445 1444 */
1446 1445 ESP_BUMP_STAT(espstack, good_auth);
1447 1446 data_mp->b_wptr -= assoc->ipsa_mac_len;
1448 1447
1449 1448 /*
1450 1449 * Check replay window here!
1451 1450 * For right now, assume keysock will set the replay window
1452 1451 * size to zero for SAs that have an unspecified sender.
1453 1452 * This may change...
1454 1453 */
1455 1454
1456 1455 if (!sadb_replay_check(assoc, esph->esph_replay)) {
1457 1456 /*
1458 1457 * Log the event. As of now we print out an event.
1459 1458 * Do not print the replay failure number, or else
1460 1459 * syslog cannot collate the error messages. Printing
1461 1460 * the replay number that failed opens a denial-of-
1462 1461 * service attack.
1463 1462 */
1464 1463 ipsec_assocfailure(info.mi_idnum, 0, 0,
1465 1464 SL_ERROR | SL_WARN,
1466 1465 "Replay failed for ESP spi 0x%x, dst %s.\n",
1467 1466 assoc->ipsa_spi, assoc->ipsa_dstaddr,
1468 1467 assoc->ipsa_addrfam, espstack->ipsecesp_netstack);
1469 1468 ESP_BUMP_STAT(espstack, replay_failures);
1470 1469 counter = DROPPER(ipss, ipds_esp_replay);
1471 1470 goto drop_and_bail;
1472 1471 }
1473 1472
1474 1473 if (is_natt) {
1475 1474 ASSERT(ira->ira_flags & IRAF_ESP_UDP_PORTS);
1476 1475 ASSERT(ira->ira_esp_udp_ports != 0);
1477 1476 esp_port_freshness(ira->ira_esp_udp_ports, assoc);
1478 1477 }
1479 1478 }
1480 1479
1481 1480 esp_set_usetime(assoc, B_TRUE);
1482 1481
1483 1482 if (!esp_age_bytes(assoc, processed_len, B_TRUE)) {
1484 1483 /* The ipsa has hit hard expiration, LOG and AUDIT. */
1485 1484 ipsec_assocfailure(info.mi_idnum, 0, 0,
1486 1485 SL_ERROR | SL_WARN,
1487 1486 "ESP association 0x%x, dst %s had bytes expire.\n",
1488 1487 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
1489 1488 espstack->ipsecesp_netstack);
1490 1489 ESP_BUMP_STAT(espstack, bytes_expired);
1491 1490 counter = DROPPER(ipss, ipds_esp_bytes_expire);
1492 1491 goto drop_and_bail;
1493 1492 }
1494 1493
1495 1494 /*
1496 1495 * Remove ESP header and padding from packet. I hope the compiler
1497 1496 * spews "branch, predict taken" code for this.
1498 1497 */
1499 1498
1500 1499 if (esp_strip_header(data_mp, (ira->ira_flags & IRAF_IS_IPV4),
1501 1500 ivlen, &counter, espstack)) {
1502 1501
1503 1502 if (is_system_labeled() && assoc->ipsa_tsl != NULL) {
1504 1503 if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) {
1505 1504 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
1506 1505 DROPPER(ipss, ipds_ah_nomem),
1507 1506 &espstack->esp_dropper);
1508 1507 BUMP_MIB(ira->ira_ill->ill_ip_mib,
1509 1508 ipIfStatsInDiscards);
1510 1509 return (NULL);
1511 1510 }
1512 1511 }
1513 1512 if (is_natt)
1514 1513 return (esp_fix_natt_checksums(data_mp, assoc));
1515 1514
1516 1515 if (assoc->ipsa_state == IPSA_STATE_IDLE) {
1517 1516 /*
1518 1517 * Cluster buffering case. Tell caller that we're
1519 1518 * handling the packet.
1520 1519 */
1521 1520 sadb_buf_pkt(assoc, data_mp, ira);
1522 1521 return (NULL);
1523 1522 }
1524 1523
1525 1524 return (data_mp);
1526 1525 }
1527 1526
1528 1527 esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n"));
1529 1528 drop_and_bail:
1530 1529 IP_ESP_BUMP_STAT(ipss, in_discards);
1531 1530 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, counter,
1532 1531 &espstack->esp_dropper);
1533 1532 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1534 1533 return (NULL);
1535 1534 }
1536 1535
1537 1536 /*
1538 1537 * Called upon failing the inbound ICV check. The message passed as
1539 1538 * argument is freed.
1540 1539 */
1541 1540 static void
1542 1541 esp_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira)
1543 1542 {
1544 1543 ipsa_t *assoc = ira->ira_ipsec_esp_sa;
1545 1544 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
1546 1545 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1547 1546 ipsec_stack_t *ipss = ns->netstack_ipsec;
1548 1547
1549 1548 /*
1550 1549 * Log the event. Don't print to the console, block
1551 1550 * potential denial-of-service attack.
1552 1551 */
1553 1552 ESP_BUMP_STAT(espstack, bad_auth);
1554 1553
1555 1554 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
1556 1555 "ESP Authentication failed for spi 0x%x, dst %s.\n",
1557 1556 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
1558 1557 espstack->ipsecesp_netstack);
1559 1558
1560 1559 IP_ESP_BUMP_STAT(ipss, in_discards);
1561 1560 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
1562 1561 DROPPER(ipss, ipds_esp_bad_auth),
1563 1562 &espstack->esp_dropper);
1564 1563 }
1565 1564
1566 1565
1567 1566 /*
1568 1567 * Invoked for outbound packets after ESP processing. If the packet
1569 1568 * also requires AH, performs the AH SA selection and AH processing.
1570 1569 *
1571 1570 * Returns data_mp (possibly with AH added) unless data_mp was consumed
1572 1571 * due to an error, or queued due to async. crypto or an ACQUIRE trigger.
1573 1572 */
1574 1573 static mblk_t *
1575 1574 esp_do_outbound_ah(mblk_t *data_mp, ip_xmit_attr_t *ixa)
1576 1575 {
1577 1576 ipsec_action_t *ap;
1578 1577
1579 1578 ap = ixa->ixa_ipsec_action;
1580 1579 if (ap == NULL) {
1581 1580 ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
1582 1581 ap = pp->ipsp_act;
1583 1582 }
1584 1583
1585 1584 if (!ap->ipa_want_ah)
1586 1585 return (data_mp);
1587 1586
1588 1587 /*
1589 1588 * Normally the AH SA would have already been put in place
1590 1589 * but it could have been flushed so we need to look for it.
1591 1590 */
1592 1591 if (ixa->ixa_ipsec_ah_sa == NULL) {
1593 1592 if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) {
1594 1593 sadb_acquire(data_mp, ixa, B_TRUE, B_FALSE);
1595 1594 return (NULL);
1596 1595 }
1597 1596 }
1598 1597 ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
1599 1598
1600 1599 data_mp = ixa->ixa_ipsec_ah_sa->ipsa_output_func(data_mp, ixa);
1601 1600 return (data_mp);
1602 1601 }
1603 1602
1604 1603
1605 1604 /*
1606 1605 * Kernel crypto framework callback invoked after completion of async
1607 1606 * crypto requests for outbound packets.
1608 1607 */
1609 1608 static void
1610 1609 esp_kcf_callback_outbound(void *arg, int status)
1611 1610 {
1612 1611 mblk_t *mp = (mblk_t *)arg;
1613 1612 mblk_t *async_mp;
1614 1613 netstack_t *ns;
1615 1614 ipsec_stack_t *ipss;
1616 1615 ipsecesp_stack_t *espstack;
1617 1616 mblk_t *data_mp;
1618 1617 ip_xmit_attr_t ixas;
1619 1618 ipsec_crypto_t *ic;
1620 1619 ill_t *ill;
1621 1620
1622 1621 /*
1623 1622 * First remove the ipsec_crypto_t mblk
1624 1623 * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
1625 1624 */
1626 1625 async_mp = ipsec_remove_crypto_data(mp, &ic);
1627 1626 ASSERT(async_mp != NULL);
1628 1627
1629 1628 /*
1630 1629 * Extract the ip_xmit_attr_t from the first mblk.
1631 1630 * Verifies that the netstack and ill is still around; could
1632 1631 * have vanished while kEf was doing its work.
1633 1632 * On succesful return we have a nce_t and the ill/ipst can't
1634 1633 * disappear until we do the nce_refrele in ixa_cleanup.
1635 1634 */
1636 1635 data_mp = async_mp->b_cont;
1637 1636 async_mp->b_cont = NULL;
1638 1637 if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) {
1639 1638 /* Disappeared on us - no ill/ipst for MIB */
1640 1639 /* We have nowhere to do stats since ixa_ipst could be NULL */
1641 1640 if (ixas.ixa_nce != NULL) {
1642 1641 ill = ixas.ixa_nce->nce_ill;
1643 1642 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1644 1643 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
1645 1644 }
1646 1645 freemsg(data_mp);
1647 1646 goto done;
1648 1647 }
1649 1648 ns = ixas.ixa_ipst->ips_netstack;
1650 1649 espstack = ns->netstack_ipsecesp;
1651 1650 ipss = ns->netstack_ipsec;
1652 1651 ill = ixas.ixa_nce->nce_ill;
1653 1652
1654 1653 if (status == CRYPTO_SUCCESS) {
1655 1654 /*
1656 1655 * If a ICV was computed, it was stored by the
1657 1656 * crypto framework at the end of the packet.
1658 1657 */
1659 1658 ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
1660 1659
1661 1660 esp_set_usetime(ixas.ixa_ipsec_esp_sa, B_FALSE);
1662 1661 /* NAT-T packet. */
1663 1662 if (IPH_HDR_VERSION(ipha) == IP_VERSION &&
1664 1663 ipha->ipha_protocol == IPPROTO_UDP)
1665 1664 esp_prepare_udp(ns, data_mp, ipha);
1666 1665
1667 1666 /* do AH processing if needed */
1668 1667 data_mp = esp_do_outbound_ah(data_mp, &ixas);
1669 1668 if (data_mp == NULL)
1670 1669 goto done;
1671 1670
1672 1671 (void) ip_output_post_ipsec(data_mp, &ixas);
1673 1672 } else {
1674 1673 /* Outbound shouldn't see invalid MAC */
1675 1674 ASSERT(status != CRYPTO_INVALID_MAC);
1676 1675
1677 1676 esp1dbg(espstack,
1678 1677 ("esp_kcf_callback_outbound: crypto failed with 0x%x\n",
1679 1678 status));
1680 1679 ESP_BUMP_STAT(espstack, crypto_failures);
1681 1680 ESP_BUMP_STAT(espstack, out_discards);
1682 1681 ip_drop_packet(data_mp, B_FALSE, ill,
1683 1682 DROPPER(ipss, ipds_esp_crypto_failed),
1684 1683 &espstack->esp_dropper);
1685 1684 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1686 1685 }
1687 1686 done:
1688 1687 ixa_cleanup(&ixas);
1689 1688 (void) ipsec_free_crypto_data(mp);
1690 1689 }
1691 1690
1692 1691 /*
1693 1692 * Kernel crypto framework callback invoked after completion of async
1694 1693 * crypto requests for inbound packets.
1695 1694 */
1696 1695 static void
1697 1696 esp_kcf_callback_inbound(void *arg, int status)
1698 1697 {
1699 1698 mblk_t *mp = (mblk_t *)arg;
1700 1699 mblk_t *async_mp;
1701 1700 netstack_t *ns;
1702 1701 ipsecesp_stack_t *espstack;
1703 1702 ipsec_stack_t *ipss;
1704 1703 mblk_t *data_mp;
1705 1704 ip_recv_attr_t iras;
1706 1705 ipsec_crypto_t *ic;
1707 1706
1708 1707 /*
1709 1708 * First remove the ipsec_crypto_t mblk
1710 1709 * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
1711 1710 */
1712 1711 async_mp = ipsec_remove_crypto_data(mp, &ic);
1713 1712 ASSERT(async_mp != NULL);
1714 1713
1715 1714 /*
1716 1715 * Extract the ip_recv_attr_t from the first mblk.
1717 1716 * Verifies that the netstack and ill is still around; could
1718 1717 * have vanished while kEf was doing its work.
1719 1718 */
1720 1719 data_mp = async_mp->b_cont;
1721 1720 async_mp->b_cont = NULL;
1722 1721 if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
1723 1722 /* The ill or ip_stack_t disappeared on us */
1724 1723 ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
1725 1724 freemsg(data_mp);
1726 1725 goto done;
1727 1726 }
1728 1727
1729 1728 ns = iras.ira_ill->ill_ipst->ips_netstack;
1730 1729 espstack = ns->netstack_ipsecesp;
1731 1730 ipss = ns->netstack_ipsec;
1732 1731
1733 1732 if (status == CRYPTO_SUCCESS) {
1734 1733 data_mp = esp_in_done(data_mp, &iras, ic);
1735 1734 if (data_mp == NULL)
1736 1735 goto done;
1737 1736
1738 1737 /* finish IPsec processing */
1739 1738 ip_input_post_ipsec(data_mp, &iras);
1740 1739 } else if (status == CRYPTO_INVALID_MAC) {
1741 1740 esp_log_bad_auth(data_mp, &iras);
1742 1741 } else {
1743 1742 esp1dbg(espstack,
1744 1743 ("esp_kcf_callback: crypto failed with 0x%x\n",
1745 1744 status));
1746 1745 ESP_BUMP_STAT(espstack, crypto_failures);
1747 1746 IP_ESP_BUMP_STAT(ipss, in_discards);
1748 1747 ip_drop_packet(data_mp, B_TRUE, iras.ira_ill,
1749 1748 DROPPER(ipss, ipds_esp_crypto_failed),
1750 1749 &espstack->esp_dropper);
1751 1750 BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1752 1751 }
1753 1752 done:
1754 1753 ira_cleanup(&iras, B_TRUE);
1755 1754 (void) ipsec_free_crypto_data(mp);
1756 1755 }
1757 1756
1758 1757 /*
1759 1758 * Invoked on crypto framework failure during inbound and outbound processing.
1760 1759 */
1761 1760 static void
1762 1761 esp_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc,
1763 1762 ill_t *ill, ipsecesp_stack_t *espstack)
1764 1763 {
1765 1764 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
1766 1765
1767 1766 esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n",
1768 1767 is_inbound ? "inbound" : "outbound", kef_rc));
1769 1768 ip_drop_packet(data_mp, is_inbound, ill,
1770 1769 DROPPER(ipss, ipds_esp_crypto_failed),
1771 1770 &espstack->esp_dropper);
1772 1771 ESP_BUMP_STAT(espstack, crypto_failures);
1773 1772 if (is_inbound)
1774 1773 IP_ESP_BUMP_STAT(ipss, in_discards);
1775 1774 else
1776 1775 ESP_BUMP_STAT(espstack, out_discards);
1777 1776 }
1778 1777
1779 1778 /*
1780 1779 * A statement-equivalent macro, _cr MUST point to a modifiable
1781 1780 * crypto_call_req_t.
1782 1781 */
1783 1782 #define ESP_INIT_CALLREQ(_cr, _mp, _callback) \
1784 1783 (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE; \
1785 1784 (_cr)->cr_callback_arg = (_mp); \
1786 1785 (_cr)->cr_callback_func = (_callback)
1787 1786
1788 1787 #define ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) { \
1789 1788 (mac)->cd_format = CRYPTO_DATA_RAW; \
1790 1789 (mac)->cd_offset = 0; \
1791 1790 (mac)->cd_length = icvlen; \
1792 1791 (mac)->cd_raw.iov_base = (char *)icvbuf; \
1793 1792 (mac)->cd_raw.iov_len = icvlen; \
1794 1793 }
1795 1794
1796 1795 #define ESP_INIT_CRYPTO_DATA(data, mp, off, len) { \
1797 1796 if (MBLKL(mp) >= (len) + (off)) { \
1798 1797 (data)->cd_format = CRYPTO_DATA_RAW; \
1799 1798 (data)->cd_raw.iov_base = (char *)(mp)->b_rptr; \
1800 1799 (data)->cd_raw.iov_len = MBLKL(mp); \
1801 1800 (data)->cd_offset = off; \
1802 1801 } else { \
1803 1802 (data)->cd_format = CRYPTO_DATA_MBLK; \
1804 1803 (data)->cd_mp = mp; \
1805 1804 (data)->cd_offset = off; \
1806 1805 } \
1807 1806 (data)->cd_length = len; \
1808 1807 }
1809 1808
1810 1809 #define ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) { \
1811 1810 (data)->dd_format = CRYPTO_DATA_MBLK; \
1812 1811 (data)->dd_mp = mp; \
1813 1812 (data)->dd_len1 = len1; \
1814 1813 (data)->dd_offset1 = off1; \
1815 1814 (data)->dd_len2 = len2; \
1816 1815 (data)->dd_offset2 = off2; \
1817 1816 }
1818 1817
1819 1818 /*
1820 1819 * Returns data_mp if successfully completed the request. Returns
1821 1820 * NULL if it failed (and increments InDiscards) or if it is pending.
1822 1821 */
1823 1822 static mblk_t *
1824 1823 esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira,
1825 1824 ipsa_t *assoc, uint_t esph_offset)
1826 1825 {
1827 1826 uint_t auth_offset, msg_len, auth_len;
1828 1827 crypto_call_req_t call_req, *callrp;
1829 1828 mblk_t *mp;
1830 1829 esph_t *esph_ptr;
1831 1830 int kef_rc;
1832 1831 uint_t icv_len = assoc->ipsa_mac_len;
1833 1832 crypto_ctx_template_t auth_ctx_tmpl;
1834 1833 boolean_t do_auth, do_encr, force;
1835 1834 uint_t encr_offset, encr_len;
1836 1835 uint_t iv_len = assoc->ipsa_iv_len;
1837 1836 crypto_ctx_template_t encr_ctx_tmpl;
1838 1837 ipsec_crypto_t *ic, icstack;
1839 1838 uchar_t *iv_ptr;
1840 1839 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
1841 1840 ipsec_stack_t *ipss = ns->netstack_ipsec;
1842 1841 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
1843 1842
1844 1843 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
1845 1844 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
1846 1845 force = (assoc->ipsa_flags & IPSA_F_ASYNC);
1847 1846
1848 1847 #ifdef IPSEC_LATENCY_TEST
1849 1848 kef_rc = CRYPTO_SUCCESS;
1850 1849 #else
1851 1850 kef_rc = CRYPTO_FAILED;
1852 1851 #endif
1853 1852
1854 1853 /*
1855 1854 * An inbound packet is of the form:
1856 1855 * [IP,options,ESP,IV,data,ICV,pad]
1857 1856 */
1858 1857 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
1859 1858 iv_ptr = (uchar_t *)(esph_ptr + 1);
1860 1859 /* Packet length starting at IP header ending after ESP ICV. */
1861 1860 msg_len = MBLKL(esp_mp);
1862 1861
1863 1862 encr_offset = esph_offset + sizeof (esph_t) + iv_len;
1864 1863 encr_len = msg_len - encr_offset;
1865 1864
1866 1865 /*
1867 1866 * Counter mode algs need a nonce. This is setup in sadb_common_add().
1868 1867 * If for some reason we are using a SA which does not have a nonce
1869 1868 * then we must fail here.
1870 1869 */
1871 1870 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
1872 1871 (assoc->ipsa_nonce == NULL)) {
1873 1872 ip_drop_packet(esp_mp, B_TRUE, ira->ira_ill,
1874 1873 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
1875 1874 return (NULL);
1876 1875 }
1877 1876
1878 1877 if (force) {
1879 1878 /* We are doing asynch; allocate mblks to hold state */
1880 1879 if ((mp = ip_recv_attr_to_mblk(ira)) == NULL ||
1881 1880 (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
1882 1881 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1883 1882 ip_drop_input("ipIfStatsInDiscards", esp_mp,
1884 1883 ira->ira_ill);
1885 1884 return (NULL);
1886 1885 }
1887 1886 linkb(mp, esp_mp);
1888 1887 callrp = &call_req;
1889 1888 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_inbound);
1890 1889 } else {
1891 1890 /*
1892 1891 * If we know we are going to do sync then ipsec_crypto_t
1893 1892 * should be on the stack.
1894 1893 */
1895 1894 ic = &icstack;
1896 1895 bzero(ic, sizeof (*ic));
1897 1896 callrp = NULL;
1898 1897 }
1899 1898
1900 1899 if (do_auth) {
1901 1900 /* authentication context template */
1902 1901 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
1903 1902 auth_ctx_tmpl);
1904 1903
1905 1904 /* ICV to be verified */
1906 1905 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
1907 1906 icv_len, esp_mp->b_wptr - icv_len);
1908 1907
1909 1908 /* authentication starts at the ESP header */
1910 1909 auth_offset = esph_offset;
1911 1910 auth_len = msg_len - auth_offset - icv_len;
1912 1911 if (!do_encr) {
1913 1912 /* authentication only */
1914 1913 /* initialize input data argument */
1915 1914 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
1916 1915 esp_mp, auth_offset, auth_len);
1917 1916
1918 1917 /* call the crypto framework */
1919 1918 kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
1920 1919 &ic->ic_crypto_data,
1921 1920 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
1922 1921 &ic->ic_crypto_mac, callrp);
1923 1922 }
1924 1923 }
1925 1924
1926 1925 if (do_encr) {
1927 1926 /* encryption template */
1928 1927 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
1929 1928 encr_ctx_tmpl);
1930 1929
1931 1930 /* Call the nonce update function. Also passes in IV */
1932 1931 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len,
1933 1932 iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
1934 1933
1935 1934 if (!do_auth) {
1936 1935 /* decryption only */
1937 1936 /* initialize input data argument */
1938 1937 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
1939 1938 esp_mp, encr_offset, encr_len);
1940 1939
1941 1940 /* call the crypto framework */
1942 1941 kef_rc = crypto_decrypt((crypto_mechanism_t *)
1943 1942 &ic->ic_cmm, &ic->ic_crypto_data,
1944 1943 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
1945 1944 NULL, callrp);
1946 1945 }
1947 1946 }
1948 1947
1949 1948 if (do_auth && do_encr) {
1950 1949 /* dual operation */
1951 1950 /* initialize input data argument */
1952 1951 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
1953 1952 esp_mp, auth_offset, auth_len,
1954 1953 encr_offset, encr_len - icv_len);
1955 1954
1956 1955 /* specify IV */
1957 1956 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
1958 1957
1959 1958 /* call the framework */
1960 1959 kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech,
1961 1960 &assoc->ipsa_emech, &ic->ic_crypto_dual_data,
1962 1961 &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey,
1963 1962 auth_ctx_tmpl, encr_ctx_tmpl, &ic->ic_crypto_mac,
1964 1963 NULL, callrp);
1965 1964 }
1966 1965
1967 1966 switch (kef_rc) {
1968 1967 case CRYPTO_SUCCESS:
1969 1968 ESP_BUMP_STAT(espstack, crypto_sync);
1970 1969 esp_mp = esp_in_done(esp_mp, ira, ic);
1971 1970 if (force) {
1972 1971 /* Free mp after we are done with ic */
1973 1972 mp = ipsec_free_crypto_data(mp);
1974 1973 (void) ip_recv_attr_free_mblk(mp);
1975 1974 }
1976 1975 return (esp_mp);
1977 1976 case CRYPTO_QUEUED:
1978 1977 /* esp_kcf_callback_inbound() will be invoked on completion */
1979 1978 ESP_BUMP_STAT(espstack, crypto_async);
1980 1979 return (NULL);
1981 1980 case CRYPTO_INVALID_MAC:
1982 1981 if (force) {
1983 1982 mp = ipsec_free_crypto_data(mp);
1984 1983 esp_mp = ip_recv_attr_free_mblk(mp);
1985 1984 }
1986 1985 ESP_BUMP_STAT(espstack, crypto_sync);
1987 1986 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1988 1987 esp_log_bad_auth(esp_mp, ira);
1989 1988 /* esp_mp was passed to ip_drop_packet */
1990 1989 return (NULL);
1991 1990 }
1992 1991
1993 1992 if (force) {
1994 1993 mp = ipsec_free_crypto_data(mp);
1995 1994 esp_mp = ip_recv_attr_free_mblk(mp);
1996 1995 }
1997 1996 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1998 1997 esp_crypto_failed(esp_mp, B_TRUE, kef_rc, ira->ira_ill, espstack);
1999 1998 /* esp_mp was passed to ip_drop_packet */
2000 1999 return (NULL);
2001 2000 }
2002 2001
2003 2002 /*
2004 2003 * Compute the IP and UDP checksums -- common code for both keepalives and
2005 2004 * actual ESP-in-UDP packets. Be flexible with multiple mblks because ESP
2006 2005 * uses mblk-insertion to insert the UDP header.
2007 2006 * TODO - If there is an easy way to prep a packet for HW checksums, make
2008 2007 * it happen here.
2009 2008 * Note that this is used before both before calling ip_output_simple and
2010 2009 * in the esp datapath. The former could use IXAF_SET_ULP_CKSUM but not the
2011 2010 * latter.
2012 2011 */
2013 2012 static void
2014 2013 esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha)
2015 2014 {
2016 2015 int offset;
2017 2016 uint32_t cksum;
2018 2017 uint16_t *arr;
2019 2018 mblk_t *udpmp = mp;
2020 2019 uint_t hlen = IPH_HDR_LENGTH(ipha);
2021 2020
2022 2021 ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2023 2022
2024 2023 ipha->ipha_hdr_checksum = 0;
2025 2024 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
2026 2025
2027 2026 if (ns->netstack_udp->us_do_checksum) {
2028 2027 ASSERT(MBLKL(udpmp) >= sizeof (udpha_t));
2029 2028 /* arr points to the IP header. */
2030 2029 arr = (uint16_t *)ipha;
2031 2030 IP_STAT(ns->netstack_ip, ip_out_sw_cksum);
2032 2031 IP_STAT_UPDATE(ns->netstack_ip, ip_out_sw_cksum_bytes,
2033 2032 ntohs(htons(ipha->ipha_length) - hlen));
2034 2033 /* arr[6-9] are the IP addresses. */
2035 2034 cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] +
2036 2035 ntohs(htons(ipha->ipha_length) - hlen);
2037 2036 cksum = IP_CSUM(mp, hlen, cksum);
2038 2037 offset = hlen + UDP_CHECKSUM_OFFSET;
2039 2038 while (offset >= MBLKL(udpmp)) {
2040 2039 offset -= MBLKL(udpmp);
2041 2040 udpmp = udpmp->b_cont;
2042 2041 }
2043 2042 /* arr points to the UDP header's checksum field. */
2044 2043 arr = (uint16_t *)(udpmp->b_rptr + offset);
2045 2044 *arr = cksum;
2046 2045 }
2047 2046 }
2048 2047
2049 2048 /*
2050 2049 * taskq handler so we can send the NAT-T keepalive on a separate thread.
2051 2050 */
2052 2051 static void
2053 2052 actually_send_keepalive(void *arg)
2054 2053 {
2055 2054 mblk_t *mp = (mblk_t *)arg;
2056 2055 ip_xmit_attr_t ixas;
2057 2056 netstack_t *ns;
2058 2057 netstackid_t stackid;
2059 2058
2060 2059 stackid = (netstackid_t)(uintptr_t)mp->b_prev;
2061 2060 mp->b_prev = NULL;
2062 2061 ns = netstack_find_by_stackid(stackid);
2063 2062 if (ns == NULL) {
2064 2063 /* Disappeared */
2065 2064 ip_drop_output("ipIfStatsOutDiscards", mp, NULL);
2066 2065 freemsg(mp);
2067 2066 return;
2068 2067 }
2069 2068
2070 2069 bzero(&ixas, sizeof (ixas));
2071 2070 ixas.ixa_zoneid = ALL_ZONES;
2072 2071 ixas.ixa_cred = kcred;
2073 2072 ixas.ixa_cpid = NOPID;
2074 2073 ixas.ixa_tsl = NULL;
2075 2074 ixas.ixa_ipst = ns->netstack_ip;
2076 2075 /* No ULP checksum; done by esp_prepare_udp */
2077 2076 ixas.ixa_flags = (IXAF_IS_IPV4 | IXAF_NO_IPSEC | IXAF_VERIFY_SOURCE);
2078 2077
2079 2078 (void) ip_output_simple(mp, &ixas);
2080 2079 ixa_cleanup(&ixas);
2081 2080 netstack_rele(ns);
2082 2081 }
2083 2082
2084 2083 /*
2085 2084 * Send a one-byte UDP NAT-T keepalive.
2086 2085 */
2087 2086 void
2088 2087 ipsecesp_send_keepalive(ipsa_t *assoc)
2089 2088 {
2090 2089 mblk_t *mp;
2091 2090 ipha_t *ipha;
2092 2091 udpha_t *udpha;
2093 2092 netstack_t *ns = assoc->ipsa_netstack;
2094 2093
2095 2094 ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock));
2096 2095
2097 2096 mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI);
2098 2097 if (mp == NULL)
2099 2098 return;
2100 2099 ipha = (ipha_t *)mp->b_rptr;
2101 2100 ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
2102 2101 ipha->ipha_type_of_service = 0;
2103 2102 ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1);
2104 2103 /* Use the low-16 of the SPI so we have some clue where it came from. */
2105 2104 ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1);
2106 2105 ipha->ipha_fragment_offset_and_flags = 0; /* Too small to fragment! */
2107 2106 ipha->ipha_ttl = 0xFF;
2108 2107 ipha->ipha_protocol = IPPROTO_UDP;
2109 2108 ipha->ipha_hdr_checksum = 0;
2110 2109 ipha->ipha_src = assoc->ipsa_srcaddr[0];
2111 2110 ipha->ipha_dst = assoc->ipsa_dstaddr[0];
2112 2111 udpha = (udpha_t *)(ipha + 1);
2113 2112 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
2114 2113 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
2115 2114 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
2116 2115 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
2117 2116 udpha->uha_length = htons(sizeof (udpha_t) + 1);
2118 2117 udpha->uha_checksum = 0;
2119 2118 mp->b_wptr = (uint8_t *)(udpha + 1);
2120 2119 *(mp->b_wptr++) = 0xFF;
2121 2120
2122 2121 esp_prepare_udp(ns, mp, ipha);
2123 2122
2124 2123 /*
2125 2124 * We're holding an isaf_t bucket lock, so pawn off the actual
2126 2125 * packet transmission to another thread. Just in case syncq
2127 2126 * processing causes a same-bucket packet to be processed.
2128 2127 */
2129 2128 mp->b_prev = (mblk_t *)(uintptr_t)ns->netstack_stackid;
2130 2129
2131 2130 if (taskq_dispatch(esp_taskq, actually_send_keepalive, mp,
2132 2131 TQ_NOSLEEP) == 0) {
2133 2132 /* Assume no memory if taskq_dispatch() fails. */
2134 2133 mp->b_prev = NULL;
2135 2134 ip_drop_packet(mp, B_FALSE, NULL,
2136 2135 DROPPER(ns->netstack_ipsec, ipds_esp_nomem),
2137 2136 &ns->netstack_ipsecesp->esp_dropper);
2138 2137 }
2139 2138 }
2140 2139
2141 2140 /*
2142 2141 * Returns mp if successfully completed the request. Returns
2143 2142 * NULL if it failed (and increments InDiscards) or if it is pending.
2144 2143 */
2145 2144 static mblk_t *
2146 2145 esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc,
2147 2146 uchar_t *icv_buf, uint_t payload_len)
2148 2147 {
2149 2148 uint_t auth_len;
2150 2149 crypto_call_req_t call_req, *callrp;
2151 2150 mblk_t *esp_mp;
2152 2151 esph_t *esph_ptr;
2153 2152 mblk_t *mp;
2154 2153 int kef_rc = CRYPTO_FAILED;
2155 2154 uint_t icv_len = assoc->ipsa_mac_len;
2156 2155 crypto_ctx_template_t auth_ctx_tmpl;
2157 2156 boolean_t do_auth, do_encr, force;
2158 2157 uint_t iv_len = assoc->ipsa_iv_len;
2159 2158 crypto_ctx_template_t encr_ctx_tmpl;
2160 2159 boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
2161 2160 size_t esph_offset = (is_natt ? UDPH_SIZE : 0);
2162 2161 netstack_t *ns = ixa->ixa_ipst->ips_netstack;
2163 2162 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2164 2163 ipsec_crypto_t *ic, icstack;
2165 2164 uchar_t *iv_ptr;
2166 2165 crypto_data_t *cd_ptr = NULL;
2167 2166 ill_t *ill = ixa->ixa_nce->nce_ill;
2168 2167 ipsec_stack_t *ipss = ns->netstack_ipsec;
2169 2168
2170 2169 esp3dbg(espstack, ("esp_submit_req_outbound:%s",
2171 2170 is_natt ? "natt" : "not natt"));
2172 2171
2173 2172 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
2174 2173 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
2175 2174 force = (assoc->ipsa_flags & IPSA_F_ASYNC);
2176 2175
2177 2176 #ifdef IPSEC_LATENCY_TEST
2178 2177 kef_rc = CRYPTO_SUCCESS;
2179 2178 #else
2180 2179 kef_rc = CRYPTO_FAILED;
2181 2180 #endif
2182 2181
2183 2182 /*
2184 2183 * Outbound IPsec packets are of the form:
2185 2184 * [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV]
2186 2185 * unless it's NATT, then it's
2187 2186 * [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV]
2188 2187 * Get a pointer to the mblk containing the ESP header.
2189 2188 */
2190 2189 ASSERT(data_mp->b_cont != NULL);
2191 2190 esp_mp = data_mp->b_cont;
2192 2191 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
2193 2192 iv_ptr = (uchar_t *)(esph_ptr + 1);
2194 2193
2195 2194 /*
2196 2195 * Combined mode algs need a nonce. This is setup in sadb_common_add().
2197 2196 * If for some reason we are using a SA which does not have a nonce
2198 2197 * then we must fail here.
2199 2198 */
2200 2199 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
2201 2200 (assoc->ipsa_nonce == NULL)) {
2202 2201 ip_drop_packet(data_mp, B_FALSE, NULL,
2203 2202 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
2204 2203 return (NULL);
2205 2204 }
2206 2205
2207 2206 if (force) {
2208 2207 /* We are doing asynch; allocate mblks to hold state */
2209 2208 if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL ||
2210 2209 (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
2211 2210 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2212 2211 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
2213 2212 freemsg(data_mp);
2214 2213 return (NULL);
2215 2214 }
2216 2215
2217 2216 linkb(mp, data_mp);
2218 2217 callrp = &call_req;
2219 2218 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_outbound);
2220 2219 } else {
2221 2220 /*
2222 2221 * If we know we are going to do sync then ipsec_crypto_t
2223 2222 * should be on the stack.
2224 2223 */
2225 2224 ic = &icstack;
2226 2225 bzero(ic, sizeof (*ic));
2227 2226 callrp = NULL;
2228 2227 }
2229 2228
2230 2229
2231 2230 if (do_auth) {
2232 2231 /* authentication context template */
2233 2232 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
2234 2233 auth_ctx_tmpl);
2235 2234
2236 2235 /* where to store the computed mac */
2237 2236 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
2238 2237 icv_len, icv_buf);
2239 2238
2240 2239 /* authentication starts at the ESP header */
2241 2240 auth_len = payload_len + iv_len + sizeof (esph_t);
2242 2241 if (!do_encr) {
2243 2242 /* authentication only */
2244 2243 /* initialize input data argument */
2245 2244 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
2246 2245 esp_mp, esph_offset, auth_len);
2247 2246
2248 2247 /* call the crypto framework */
2249 2248 kef_rc = crypto_mac(&assoc->ipsa_amech,
2250 2249 &ic->ic_crypto_data,
2251 2250 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
2252 2251 &ic->ic_crypto_mac, callrp);
2253 2252 }
2254 2253 }
2255 2254
2256 2255 if (do_encr) {
2257 2256 /* encryption context template */
2258 2257 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
2259 2258 encr_ctx_tmpl);
2260 2259 /* Call the nonce update function. */
2261 2260 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len,
2262 2261 iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
2263 2262
2264 2263 if (!do_auth) {
2265 2264 /* encryption only, skip mblk that contains ESP hdr */
2266 2265 /* initialize input data argument */
2267 2266 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
2268 2267 esp_mp->b_cont, 0, payload_len);
2269 2268
2270 2269 /*
2271 2270 * For combined mode ciphers, the ciphertext is the same
2272 2271 * size as the clear text, the ICV should follow the
2273 2272 * ciphertext. To convince the kcf to allow in-line
2274 2273 * encryption, with an ICV, use ipsec_out_crypto_mac
2275 2274 * to point to the same buffer as the data. The calling
2276 2275 * function need to ensure the buffer is large enough to
2277 2276 * include the ICV.
2278 2277 *
2279 2278 * The IV is already written to the packet buffer, the
2280 2279 * nonce setup function copied it to the params struct
2281 2280 * for the cipher to use.
2282 2281 */
2283 2282 if (assoc->ipsa_flags & IPSA_F_COMBINED) {
2284 2283 bcopy(&ic->ic_crypto_data,
2285 2284 &ic->ic_crypto_mac,
2286 2285 sizeof (crypto_data_t));
2287 2286 ic->ic_crypto_mac.cd_length =
2288 2287 payload_len + icv_len;
2289 2288 cd_ptr = &ic->ic_crypto_mac;
2290 2289 }
2291 2290
2292 2291 /* call the crypto framework */
2293 2292 kef_rc = crypto_encrypt((crypto_mechanism_t *)
2294 2293 &ic->ic_cmm, &ic->ic_crypto_data,
2295 2294 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
2296 2295 cd_ptr, callrp);
2297 2296
2298 2297 }
2299 2298 }
2300 2299
2301 2300 if (do_auth && do_encr) {
2302 2301 /*
2303 2302 * Encryption and authentication:
2304 2303 * Pass the pointer to the mblk chain starting at the ESP
2305 2304 * header to the framework. Skip the ESP header mblk
2306 2305 * for encryption, which is reflected by an encryption
2307 2306 * offset equal to the length of that mblk. Start
2308 2307 * the authentication at the ESP header, i.e. use an
2309 2308 * authentication offset of zero.
2310 2309 */
2311 2310 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
2312 2311 esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len);
2313 2312
2314 2313 /* specify IV */
2315 2314 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
2316 2315
2317 2316 /* call the framework */
2318 2317 kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech,
2319 2318 &assoc->ipsa_amech, NULL,
2320 2319 &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey,
2321 2320 encr_ctx_tmpl, auth_ctx_tmpl,
2322 2321 &ic->ic_crypto_dual_data,
2323 2322 &ic->ic_crypto_mac, callrp);
2324 2323 }
2325 2324
2326 2325 switch (kef_rc) {
2327 2326 case CRYPTO_SUCCESS:
2328 2327 ESP_BUMP_STAT(espstack, crypto_sync);
2329 2328 esp_set_usetime(assoc, B_FALSE);
2330 2329 if (force) {
2331 2330 mp = ipsec_free_crypto_data(mp);
2332 2331 data_mp = ip_xmit_attr_free_mblk(mp);
2333 2332 }
2334 2333 if (is_natt)
2335 2334 esp_prepare_udp(ns, data_mp, (ipha_t *)data_mp->b_rptr);
2336 2335 return (data_mp);
2337 2336 case CRYPTO_QUEUED:
2338 2337 /* esp_kcf_callback_outbound() will be invoked on completion */
2339 2338 ESP_BUMP_STAT(espstack, crypto_async);
2340 2339 return (NULL);
2341 2340 }
2342 2341
2343 2342 if (force) {
2344 2343 mp = ipsec_free_crypto_data(mp);
2345 2344 data_mp = ip_xmit_attr_free_mblk(mp);
2346 2345 }
2347 2346 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2348 2347 esp_crypto_failed(data_mp, B_FALSE, kef_rc, NULL, espstack);
2349 2348 /* data_mp was passed to ip_drop_packet */
2350 2349 return (NULL);
2351 2350 }
2352 2351
2353 2352 /*
2354 2353 * Handle outbound IPsec processing for IPv4 and IPv6
2355 2354 *
2356 2355 * Returns data_mp if successfully completed the request. Returns
2357 2356 * NULL if it failed (and increments InDiscards) or if it is pending.
2358 2357 */
2359 2358 static mblk_t *
2360 2359 esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa)
2361 2360 {
2362 2361 mblk_t *espmp, *tailmp;
2363 2362 ipha_t *ipha;
2364 2363 ip6_t *ip6h;
2365 2364 esph_t *esph_ptr, *iv_ptr;
2366 2365 uint_t af;
2367 2366 uint8_t *nhp;
2368 2367 uintptr_t divpoint, datalen, adj, padlen, i, alloclen;
2369 2368 uintptr_t esplen = sizeof (esph_t);
2370 2369 uint8_t protocol;
2371 2370 ipsa_t *assoc;
2372 2371 uint_t iv_len, block_size, mac_len = 0;
2373 2372 uchar_t *icv_buf;
2374 2373 udpha_t *udpha;
2375 2374 boolean_t is_natt = B_FALSE;
2376 2375 netstack_t *ns = ixa->ixa_ipst->ips_netstack;
2377 2376 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2378 2377 ipsec_stack_t *ipss = ns->netstack_ipsec;
2379 2378 ill_t *ill = ixa->ixa_nce->nce_ill;
2380 2379 boolean_t need_refrele = B_FALSE;
2381 2380
2382 2381 ESP_BUMP_STAT(espstack, out_requests);
2383 2382
2384 2383 /*
2385 2384 * <sigh> We have to copy the message here, because TCP (for example)
2386 2385 * keeps a dupb() of the message lying around for retransmission.
2387 2386 * Since ESP changes the whole of the datagram, we have to create our
2388 2387 * own copy lest we clobber TCP's data. Since we have to copy anyway,
2389 2388 * we might as well make use of msgpullup() and get the mblk into one
2390 2389 * contiguous piece!
2391 2390 */
2392 2391 tailmp = msgpullup(data_mp, -1);
2393 2392 if (tailmp == NULL) {
2394 2393 esp0dbg(("esp_outbound: msgpullup() failed, "
2395 2394 "dropping packet.\n"));
2396 2395 ip_drop_packet(data_mp, B_FALSE, ill,
2397 2396 DROPPER(ipss, ipds_esp_nomem),
2398 2397 &espstack->esp_dropper);
2399 2398 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2400 2399 return (NULL);
2401 2400 }
2402 2401 freemsg(data_mp);
2403 2402 data_mp = tailmp;
2404 2403
2405 2404 assoc = ixa->ixa_ipsec_esp_sa;
2406 2405 ASSERT(assoc != NULL);
2407 2406
2408 2407 /*
2409 2408 * Get the outer IP header in shape to escape this system..
2410 2409 */
2411 2410 if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) {
2412 2411 /*
2413 2412 * Need to update packet with any CIPSO option and update
2414 2413 * ixa_tsl to capture the new label.
2415 2414 * We allocate a separate ixa for that purpose.
2416 2415 */
2417 2416 ixa = ip_xmit_attr_duplicate(ixa);
2418 2417 if (ixa == NULL) {
2419 2418 ip_drop_packet(data_mp, B_FALSE, ill,
2420 2419 DROPPER(ipss, ipds_esp_nomem),
2421 2420 &espstack->esp_dropper);
2422 2421 return (NULL);
2423 2422 }
2424 2423 need_refrele = B_TRUE;
2425 2424
2426 2425 label_hold(assoc->ipsa_otsl);
2427 2426 ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl);
2428 2427
2429 2428 data_mp = sadb_whack_label(data_mp, assoc, ixa,
2430 2429 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
2431 2430 if (data_mp == NULL) {
2432 2431 /* Packet dropped by sadb_whack_label */
2433 2432 ixa_refrele(ixa);
2434 2433 return (NULL);
2435 2434 }
2436 2435 }
2437 2436
2438 2437 /*
2439 2438 * Reality check....
2440 2439 */
2441 2440 ipha = (ipha_t *)data_mp->b_rptr; /* So we can call esp_acquire(). */
2442 2441
2443 2442 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2444 2443 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
2445 2444
2446 2445 af = AF_INET;
2447 2446 divpoint = IPH_HDR_LENGTH(ipha);
2448 2447 datalen = ntohs(ipha->ipha_length) - divpoint;
2449 2448 nhp = (uint8_t *)&ipha->ipha_protocol;
2450 2449 } else {
2451 2450 ip_pkt_t ipp;
2452 2451
2453 2452 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
2454 2453
2455 2454 af = AF_INET6;
2456 2455 ip6h = (ip6_t *)ipha;
2457 2456 bzero(&ipp, sizeof (ipp));
2458 2457 divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL);
2459 2458 if (ipp.ipp_dstopts != NULL &&
2460 2459 ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) {
2461 2460 /*
2462 2461 * Destination options are tricky. If we get in here,
2463 2462 * then we have a terminal header following the
2464 2463 * destination options. We need to adjust backwards
2465 2464 * so we insert ESP BEFORE the destination options
2466 2465 * bag. (So that the dstopts get encrypted!)
2467 2466 *
2468 2467 * Since this is for outbound packets only, we know
2469 2468 * that non-terminal destination options only precede
2470 2469 * routing headers.
2471 2470 */
2472 2471 divpoint -= ipp.ipp_dstoptslen;
2473 2472 }
2474 2473 datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint;
2475 2474
2476 2475 if (ipp.ipp_rthdr != NULL) {
2477 2476 nhp = &ipp.ipp_rthdr->ip6r_nxt;
2478 2477 } else if (ipp.ipp_hopopts != NULL) {
2479 2478 nhp = &ipp.ipp_hopopts->ip6h_nxt;
2480 2479 } else {
2481 2480 ASSERT(divpoint == sizeof (ip6_t));
2482 2481 /* It's probably IP + ESP. */
2483 2482 nhp = &ip6h->ip6_nxt;
2484 2483 }
2485 2484 }
2486 2485
2487 2486 mac_len = assoc->ipsa_mac_len;
2488 2487
2489 2488 if (assoc->ipsa_flags & IPSA_F_NATT) {
2490 2489 /* wedge in UDP header */
2491 2490 is_natt = B_TRUE;
2492 2491 esplen += UDPH_SIZE;
2493 2492 }
2494 2493
2495 2494 /*
2496 2495 * Set up ESP header and encryption padding for ENCR PI request.
2497 2496 */
2498 2497
2499 2498 /* Determine the padding length. Pad to 4-bytes for no-encryption. */
2500 2499 if (assoc->ipsa_encr_alg != SADB_EALG_NULL) {
2501 2500 iv_len = assoc->ipsa_iv_len;
2502 2501 block_size = assoc->ipsa_datalen;
2503 2502
2504 2503 /*
2505 2504 * Pad the data to the length of the cipher block size.
2506 2505 * Include the two additional bytes (hence the - 2) for the
2507 2506 * padding length and the next header. Take this into account
2508 2507 * when calculating the actual length of the padding.
2509 2508 */
2510 2509 ASSERT(ISP2(iv_len));
2511 2510 padlen = ((unsigned)(block_size - datalen - 2)) &
2512 2511 (block_size - 1);
2513 2512 } else {
2514 2513 iv_len = 0;
2515 2514 padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) &
2516 2515 (sizeof (uint32_t) - 1);
2517 2516 }
2518 2517
2519 2518 /* Allocate ESP header and IV. */
2520 2519 esplen += iv_len;
2521 2520
2522 2521 /*
2523 2522 * Update association byte-count lifetimes. Don't forget to take
2524 2523 * into account the padding length and next-header (hence the + 2).
2525 2524 *
2526 2525 * Use the amount of data fed into the "encryption algorithm". This
2527 2526 * is the IV, the data length, the padding length, and the final two
2528 2527 * bytes (padlen, and next-header).
2529 2528 *
2530 2529 */
2531 2530
2532 2531 if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) {
2533 2532 ip_drop_packet(data_mp, B_FALSE, ill,
2534 2533 DROPPER(ipss, ipds_esp_bytes_expire),
2535 2534 &espstack->esp_dropper);
2536 2535 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2537 2536 if (need_refrele)
2538 2537 ixa_refrele(ixa);
2539 2538 return (NULL);
2540 2539 }
2541 2540
2542 2541 espmp = allocb(esplen, BPRI_HI);
2543 2542 if (espmp == NULL) {
2544 2543 ESP_BUMP_STAT(espstack, out_discards);
2545 2544 esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n"));
2546 2545 ip_drop_packet(data_mp, B_FALSE, ill,
2547 2546 DROPPER(ipss, ipds_esp_nomem),
2548 2547 &espstack->esp_dropper);
2549 2548 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2550 2549 if (need_refrele)
2551 2550 ixa_refrele(ixa);
2552 2551 return (NULL);
2553 2552 }
2554 2553 espmp->b_wptr += esplen;
2555 2554 esph_ptr = (esph_t *)espmp->b_rptr;
2556 2555
2557 2556 if (is_natt) {
2558 2557 esp3dbg(espstack, ("esp_outbound: NATT"));
2559 2558
2560 2559 udpha = (udpha_t *)espmp->b_rptr;
2561 2560 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
2562 2561 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
2563 2562 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
2564 2563 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
2565 2564 /*
2566 2565 * Set the checksum to 0, so that the esp_prepare_udp() call
2567 2566 * can do the right thing.
2568 2567 */
2569 2568 udpha->uha_checksum = 0;
2570 2569 esph_ptr = (esph_t *)(udpha + 1);
2571 2570 }
2572 2571
2573 2572 esph_ptr->esph_spi = assoc->ipsa_spi;
2574 2573
2575 2574 esph_ptr->esph_replay = htonl(atomic_inc_32_nv(&assoc->ipsa_replay));
2576 2575 if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) {
2577 2576 /*
2578 2577 * XXX We have replay counter wrapping.
2579 2578 * We probably want to nuke this SA (and its peer).
2580 2579 */
2581 2580 ipsec_assocfailure(info.mi_idnum, 0, 0,
2582 2581 SL_ERROR | SL_CONSOLE | SL_WARN,
2583 2582 "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n",
2584 2583 esph_ptr->esph_spi, assoc->ipsa_dstaddr, af,
2585 2584 espstack->ipsecesp_netstack);
2586 2585
2587 2586 ESP_BUMP_STAT(espstack, out_discards);
2588 2587 sadb_replay_delete(assoc);
2589 2588 ip_drop_packet(data_mp, B_FALSE, ill,
2590 2589 DROPPER(ipss, ipds_esp_replay),
2591 2590 &espstack->esp_dropper);
2592 2591 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2593 2592 if (need_refrele)
2594 2593 ixa_refrele(ixa);
2595 2594 return (NULL);
2596 2595 }
2597 2596
2598 2597 iv_ptr = (esph_ptr + 1);
2599 2598 /*
2600 2599 * iv_ptr points to the mblk which will contain the IV once we have
2601 2600 * written it there. This mblk will be part of a mblk chain that
2602 2601 * will make up the packet.
2603 2602 *
2604 2603 * For counter mode algorithms, the IV is a 64 bit quantity, it
2605 2604 * must NEVER repeat in the lifetime of the SA, otherwise an
2606 2605 * attacker who had recorded enough packets might be able to
2607 2606 * determine some clear text.
2608 2607 *
2609 2608 * To ensure this does not happen, the IV is stored in the SA and
2610 2609 * incremented for each packet, the IV is then copied into the
2611 2610 * "packet" for transmission to the receiving system. The IV will
2612 2611 * also be copied into the nonce, when the packet is encrypted.
2613 2612 *
2614 2613 * CBC mode algorithms use a random IV for each packet. We do not
2615 2614 * require the highest quality random bits, but for best security
2616 2615 * with CBC mode ciphers, the value must be unlikely to repeat and
2617 2616 * must not be known in advance to an adversary capable of influencing
2618 2617 * the clear text.
2619 2618 */
2620 2619 if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc,
2621 2620 espstack)) {
2622 2621 ip_drop_packet(data_mp, B_FALSE, ill,
2623 2622 DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper);
2624 2623 if (need_refrele)
2625 2624 ixa_refrele(ixa);
2626 2625 return (NULL);
2627 2626 }
2628 2627
2629 2628 /* Fix the IP header. */
2630 2629 alloclen = padlen + 2 + mac_len;
2631 2630 adj = alloclen + (espmp->b_wptr - espmp->b_rptr);
2632 2631
2633 2632 protocol = *nhp;
2634 2633
2635 2634 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2636 2635 ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj);
2637 2636 if (is_natt) {
2638 2637 *nhp = IPPROTO_UDP;
2639 2638 udpha->uha_length = htons(ntohs(ipha->ipha_length) -
2640 2639 IPH_HDR_LENGTH(ipha));
2641 2640 } else {
2642 2641 *nhp = IPPROTO_ESP;
2643 2642 }
2644 2643 ipha->ipha_hdr_checksum = 0;
2645 2644 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
2646 2645 } else {
2647 2646 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj);
2648 2647 *nhp = IPPROTO_ESP;
2649 2648 }
2650 2649
2651 2650 /* I've got the two ESP mblks, now insert them. */
2652 2651
2653 2652 esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n"));
2654 2653 esp2dbg(espstack, (dump_msg(data_mp)));
2655 2654
2656 2655 if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) {
2657 2656 ESP_BUMP_STAT(espstack, out_discards);
2658 2657 /* NOTE: esp_insert_esp() only fails if there's no memory. */
2659 2658 ip_drop_packet(data_mp, B_FALSE, ill,
2660 2659 DROPPER(ipss, ipds_esp_nomem),
2661 2660 &espstack->esp_dropper);
2662 2661 freeb(espmp);
2663 2662 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2664 2663 if (need_refrele)
2665 2664 ixa_refrele(ixa);
2666 2665 return (NULL);
2667 2666 }
2668 2667
2669 2668 /* Append padding (and leave room for ICV). */
2670 2669 for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont)
2671 2670 ;
2672 2671 if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) {
2673 2672 tailmp->b_cont = allocb(alloclen, BPRI_HI);
2674 2673 if (tailmp->b_cont == NULL) {
2675 2674 ESP_BUMP_STAT(espstack, out_discards);
2676 2675 esp0dbg(("esp_outbound: Can't allocate tailmp.\n"));
2677 2676 ip_drop_packet(data_mp, B_FALSE, ill,
2678 2677 DROPPER(ipss, ipds_esp_nomem),
2679 2678 &espstack->esp_dropper);
2680 2679 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2681 2680 if (need_refrele)
2682 2681 ixa_refrele(ixa);
2683 2682 return (NULL);
2684 2683 }
2685 2684 tailmp = tailmp->b_cont;
2686 2685 }
2687 2686
2688 2687 /*
2689 2688 * If there's padding, N bytes of padding must be of the form 0x1,
2690 2689 * 0x2, 0x3... 0xN.
2691 2690 */
2692 2691 for (i = 0; i < padlen; ) {
2693 2692 i++;
2694 2693 *tailmp->b_wptr++ = i;
2695 2694 }
2696 2695 *tailmp->b_wptr++ = i;
2697 2696 *tailmp->b_wptr++ = protocol;
2698 2697
2699 2698 esp2dbg(espstack, ("data_Mp before encryption:\n"));
2700 2699 esp2dbg(espstack, (dump_msg(data_mp)));
2701 2700
2702 2701 /*
2703 2702 * Okay. I've set up the pre-encryption ESP. Let's do it!
2704 2703 */
2705 2704
2706 2705 if (mac_len > 0) {
2707 2706 ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim);
2708 2707 icv_buf = tailmp->b_wptr;
2709 2708 tailmp->b_wptr += mac_len;
2710 2709 } else {
2711 2710 icv_buf = NULL;
2712 2711 }
2713 2712
2714 2713 data_mp = esp_submit_req_outbound(data_mp, ixa, assoc, icv_buf,
2715 2714 datalen + padlen + 2);
2716 2715 if (need_refrele)
2717 2716 ixa_refrele(ixa);
2718 2717 return (data_mp);
2719 2718 }
2720 2719
2721 2720 /*
2722 2721 * IP calls this to validate the ICMP errors that
2723 2722 * we got from the network.
2724 2723 */
2725 2724 mblk_t *
2726 2725 ipsecesp_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira)
2727 2726 {
2728 2727 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
2729 2728 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2730 2729 ipsec_stack_t *ipss = ns->netstack_ipsec;
2731 2730
2732 2731 /*
2733 2732 * Unless we get an entire packet back, this function is useless.
2734 2733 * Why?
2735 2734 *
2736 2735 * 1.) Partial packets are useless, because the "next header"
2737 2736 * is at the end of the decrypted ESP packet. Without the
2738 2737 * whole packet, this is useless.
2739 2738 *
2740 2739 * 2.) If we every use a stateful cipher, such as a stream or a
2741 2740 * one-time pad, we can't do anything.
2742 2741 *
2743 2742 * Since the chances of us getting an entire packet back are very
2744 2743 * very small, we discard here.
2745 2744 */
2746 2745 IP_ESP_BUMP_STAT(ipss, in_discards);
2747 2746 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
2748 2747 DROPPER(ipss, ipds_esp_icmp),
2749 2748 &espstack->esp_dropper);
2750 2749 return (NULL);
2751 2750 }
2752 2751
2753 2752 /*
2754 2753 * Construct an SADB_REGISTER message with the current algorithms.
2755 2754 * This function gets called when 'ipsecalgs -s' is run or when
2756 2755 * in.iked (or other KMD) starts.
2757 2756 */
2758 2757 static boolean_t
2759 2758 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
2760 2759 ipsecesp_stack_t *espstack, cred_t *cr)
2761 2760 {
2762 2761 mblk_t *pfkey_msg_mp, *keysock_out_mp;
2763 2762 sadb_msg_t *samsg;
2764 2763 sadb_supported_t *sasupp_auth = NULL;
2765 2764 sadb_supported_t *sasupp_encr = NULL;
2766 2765 sadb_alg_t *saalg;
2767 2766 uint_t allocsize = sizeof (*samsg);
2768 2767 uint_t i, numalgs_snap;
2769 2768 int current_aalgs;
2770 2769 ipsec_alginfo_t **authalgs;
2771 2770 uint_t num_aalgs;
2772 2771 int current_ealgs;
2773 2772 ipsec_alginfo_t **encralgs;
2774 2773 uint_t num_ealgs;
2775 2774 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
2776 2775 sadb_sens_t *sens;
2777 2776 size_t sens_len = 0;
2778 2777 sadb_ext_t *nextext;
2779 2778 ts_label_t *sens_tsl = NULL;
2780 2779
2781 2780 /* Allocate the KEYSOCK_OUT. */
2782 2781 keysock_out_mp = sadb_keysock_out(serial);
2783 2782 if (keysock_out_mp == NULL) {
2784 2783 esp0dbg(("esp_register_out: couldn't allocate mblk.\n"));
2785 2784 return (B_FALSE);
2786 2785 }
2787 2786
2788 2787 if (is_system_labeled() && (cr != NULL)) {
2789 2788 sens_tsl = crgetlabel(cr);
2790 2789 if (sens_tsl != NULL) {
2791 2790 sens_len = sadb_sens_len_from_label(sens_tsl);
2792 2791 allocsize += sens_len;
2793 2792 }
2794 2793 }
2795 2794
2796 2795 /*
2797 2796 * Allocate the PF_KEY message that follows KEYSOCK_OUT.
2798 2797 */
2799 2798
2800 2799 rw_enter(&ipss->ipsec_alg_lock, RW_READER);
2801 2800 /*
2802 2801 * Fill SADB_REGISTER message's algorithm descriptors. Hold
2803 2802 * down the lock while filling it.
2804 2803 *
2805 2804 * Return only valid algorithms, so the number of algorithms
2806 2805 * to send up may be less than the number of algorithm entries
2807 2806 * in the table.
2808 2807 */
2809 2808 authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
2810 2809 for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
2811 2810 if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
2812 2811 num_aalgs++;
2813 2812
2814 2813 if (num_aalgs != 0) {
2815 2814 allocsize += (num_aalgs * sizeof (*saalg));
2816 2815 allocsize += sizeof (*sasupp_auth);
2817 2816 }
2818 2817 encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR];
2819 2818 for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
2820 2819 if (encralgs[i] != NULL && ALG_VALID(encralgs[i]))
2821 2820 num_ealgs++;
2822 2821
2823 2822 if (num_ealgs != 0) {
2824 2823 allocsize += (num_ealgs * sizeof (*saalg));
2825 2824 allocsize += sizeof (*sasupp_encr);
2826 2825 }
2827 2826 keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI);
2828 2827 if (keysock_out_mp->b_cont == NULL) {
2829 2828 rw_exit(&ipss->ipsec_alg_lock);
2830 2829 freemsg(keysock_out_mp);
2831 2830 return (B_FALSE);
2832 2831 }
2833 2832 pfkey_msg_mp = keysock_out_mp->b_cont;
2834 2833 pfkey_msg_mp->b_wptr += allocsize;
2835 2834
2836 2835 nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg));
2837 2836
2838 2837 if (num_aalgs != 0) {
2839 2838 sasupp_auth = (sadb_supported_t *)nextext;
2840 2839 saalg = (sadb_alg_t *)(sasupp_auth + 1);
2841 2840
2842 2841 ASSERT(((ulong_t)saalg & 0x7) == 0);
2843 2842
2844 2843 numalgs_snap = 0;
2845 2844 for (i = 0;
2846 2845 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
2847 2846 i++) {
2848 2847 if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
2849 2848 continue;
2850 2849
2851 2850 saalg->sadb_alg_id = authalgs[i]->alg_id;
2852 2851 saalg->sadb_alg_ivlen = 0;
2853 2852 saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits;
2854 2853 saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits;
2855 2854 saalg->sadb_x_alg_increment =
2856 2855 authalgs[i]->alg_increment;
2857 2856 saalg->sadb_x_alg_saltbits = SADB_8TO1(
2858 2857 authalgs[i]->alg_saltlen);
2859 2858 numalgs_snap++;
2860 2859 saalg++;
2861 2860 }
2862 2861 ASSERT(numalgs_snap == num_aalgs);
2863 2862 #ifdef DEBUG
2864 2863 /*
2865 2864 * Reality check to make sure I snagged all of the
2866 2865 * algorithms.
2867 2866 */
2868 2867 for (; i < IPSEC_MAX_ALGS; i++) {
2869 2868 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) {
2870 2869 cmn_err(CE_PANIC, "esp_register_out()! "
2871 2870 "Missed aalg #%d.\n", i);
2872 2871 }
2873 2872 }
2874 2873 #endif /* DEBUG */
2875 2874 nextext = (sadb_ext_t *)saalg;
2876 2875 }
2877 2876
2878 2877 if (num_ealgs != 0) {
2879 2878 sasupp_encr = (sadb_supported_t *)nextext;
2880 2879 saalg = (sadb_alg_t *)(sasupp_encr + 1);
2881 2880
2882 2881 numalgs_snap = 0;
2883 2882 for (i = 0;
2884 2883 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) {
2885 2884 if (encralgs[i] == NULL || !ALG_VALID(encralgs[i]))
2886 2885 continue;
2887 2886 saalg->sadb_alg_id = encralgs[i]->alg_id;
2888 2887 saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen;
2889 2888 saalg->sadb_alg_minbits = encralgs[i]->alg_ef_minbits;
2890 2889 saalg->sadb_alg_maxbits = encralgs[i]->alg_ef_maxbits;
2891 2890 /*
2892 2891 * We could advertise the ICV length, except there
2893 2892 * is not a value in sadb_x_algb to do this.
2894 2893 * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen;
2895 2894 */
2896 2895 saalg->sadb_x_alg_increment =
2897 2896 encralgs[i]->alg_increment;
2898 2897 saalg->sadb_x_alg_saltbits =
2899 2898 SADB_8TO1(encralgs[i]->alg_saltlen);
2900 2899
2901 2900 numalgs_snap++;
2902 2901 saalg++;
2903 2902 }
2904 2903 ASSERT(numalgs_snap == num_ealgs);
2905 2904 #ifdef DEBUG
2906 2905 /*
2907 2906 * Reality check to make sure I snagged all of the
2908 2907 * algorithms.
2909 2908 */
2910 2909 for (; i < IPSEC_MAX_ALGS; i++) {
2911 2910 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) {
2912 2911 cmn_err(CE_PANIC, "esp_register_out()! "
2913 2912 "Missed ealg #%d.\n", i);
2914 2913 }
2915 2914 }
2916 2915 #endif /* DEBUG */
2917 2916 nextext = (sadb_ext_t *)saalg;
2918 2917 }
2919 2918
2920 2919 current_aalgs = num_aalgs;
2921 2920 current_ealgs = num_ealgs;
2922 2921
2923 2922 rw_exit(&ipss->ipsec_alg_lock);
2924 2923
2925 2924 if (sens_tsl != NULL) {
2926 2925 sens = (sadb_sens_t *)nextext;
2927 2926 sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
2928 2927 sens_tsl, sens_len);
2929 2928
2930 2929 nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len);
2931 2930 }
2932 2931
2933 2932 /* Now fill the rest of the SADB_REGISTER message. */
2934 2933
2935 2934 samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr;
2936 2935 samsg->sadb_msg_version = PF_KEY_V2;
2937 2936 samsg->sadb_msg_type = SADB_REGISTER;
2938 2937 samsg->sadb_msg_errno = 0;
2939 2938 samsg->sadb_msg_satype = SADB_SATYPE_ESP;
2940 2939 samsg->sadb_msg_len = SADB_8TO64(allocsize);
2941 2940 samsg->sadb_msg_reserved = 0;
2942 2941 /*
2943 2942 * Assume caller has sufficient sequence/pid number info. If it's one
2944 2943 * from me over a new alg., I could give two hoots about sequence.
2945 2944 */
2946 2945 samsg->sadb_msg_seq = sequence;
2947 2946 samsg->sadb_msg_pid = pid;
2948 2947
2949 2948 if (sasupp_auth != NULL) {
2950 2949 sasupp_auth->sadb_supported_len = SADB_8TO64(
2951 2950 sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs);
2952 2951 sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
2953 2952 sasupp_auth->sadb_supported_reserved = 0;
2954 2953 }
2955 2954
2956 2955 if (sasupp_encr != NULL) {
2957 2956 sasupp_encr->sadb_supported_len = SADB_8TO64(
2958 2957 sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs);
2959 2958 sasupp_encr->sadb_supported_exttype =
2960 2959 SADB_EXT_SUPPORTED_ENCRYPT;
2961 2960 sasupp_encr->sadb_supported_reserved = 0;
2962 2961 }
2963 2962
2964 2963 if (espstack->esp_pfkey_q != NULL)
2965 2964 putnext(espstack->esp_pfkey_q, keysock_out_mp);
2966 2965 else {
2967 2966 freemsg(keysock_out_mp);
2968 2967 return (B_FALSE);
2969 2968 }
2970 2969
2971 2970 return (B_TRUE);
2972 2971 }
2973 2972
2974 2973 /*
2975 2974 * Invoked when the algorithm table changes. Causes SADB_REGISTER
2976 2975 * messages continaining the current list of algorithms to be
2977 2976 * sent up to the ESP listeners.
2978 2977 */
2979 2978 void
2980 2979 ipsecesp_algs_changed(netstack_t *ns)
2981 2980 {
2982 2981 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
2983 2982
2984 2983 /*
2985 2984 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners
2986 2985 * everywhere. (The function itself checks for NULL esp_pfkey_q.)
2987 2986 */
2988 2987 (void) esp_register_out(0, 0, 0, espstack, NULL);
2989 2988 }
2990 2989
2991 2990 /*
2992 2991 * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
2993 2992 * and send() it into ESP and IP again.
2994 2993 */
2995 2994 static void
2996 2995 inbound_task(void *arg)
2997 2996 {
2998 2997 mblk_t *mp = (mblk_t *)arg;
2999 2998 mblk_t *async_mp;
3000 2999 ip_recv_attr_t iras;
3001 3000
3002 3001 async_mp = mp;
3003 3002 mp = async_mp->b_cont;
3004 3003 async_mp->b_cont = NULL;
3005 3004 if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
3006 3005 /* The ill or ip_stack_t disappeared on us */
3007 3006 ip_drop_input("ip_recv_attr_from_mblk", mp, NULL);
3008 3007 freemsg(mp);
3009 3008 goto done;
3010 3009 }
3011 3010
3012 3011 esp_inbound_restart(mp, &iras);
3013 3012 done:
3014 3013 ira_cleanup(&iras, B_TRUE);
3015 3014 }
3016 3015
3017 3016 /*
3018 3017 * Restart ESP after the SA has been added.
3019 3018 */
3020 3019 static void
3021 3020 esp_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira)
3022 3021 {
3023 3022 esph_t *esph;
3024 3023 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
3025 3024 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3026 3025
3027 3026 esp2dbg(espstack, ("in ESP inbound_task"));
3028 3027 ASSERT(espstack != NULL);
3029 3028
3030 3029 mp = ipsec_inbound_esp_sa(mp, ira, &esph);
3031 3030 if (mp == NULL)
3032 3031 return;
3033 3032
3034 3033 ASSERT(esph != NULL);
3035 3034 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3036 3035 ASSERT(ira->ira_ipsec_esp_sa != NULL);
3037 3036
3038 3037 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, ira);
3039 3038 if (mp == NULL) {
3040 3039 /*
3041 3040 * Either it failed or is pending. In the former case
3042 3041 * ipIfStatsInDiscards was increased.
3043 3042 */
3044 3043 return;
3045 3044 }
3046 3045
3047 3046 ip_input_post_ipsec(mp, ira);
3048 3047 }
3049 3048
3050 3049 /*
3051 3050 * Now that weak-key passed, actually ADD the security association, and
3052 3051 * send back a reply ADD message.
3053 3052 */
3054 3053 static int
3055 3054 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
3056 3055 int *diagnostic, ipsecesp_stack_t *espstack)
3057 3056 {
3058 3057 isaf_t *primary = NULL, *secondary;
3059 3058 boolean_t clone = B_FALSE, is_inbound = B_FALSE;
3060 3059 ipsa_t *larval = NULL;
3061 3060 ipsacq_t *acqrec;
3062 3061 iacqf_t *acq_bucket;
3063 3062 mblk_t *acq_msgs = NULL;
3064 3063 int rc;
3065 3064 mblk_t *lpkt;
3066 3065 int error;
3067 3066 ipsa_query_t sq;
3068 3067 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec;
3069 3068
3070 3069 /*
3071 3070 * Locate the appropriate table(s).
3072 3071 */
3073 3072 sq.spp = &espstack->esp_sadb; /* XXX */
3074 3073 error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST,
3075 3074 IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
3076 3075 &sq, diagnostic);
3077 3076 if (error)
3078 3077 return (error);
3079 3078
3080 3079 /*
3081 3080 * Use the direction flags provided by the KMD to determine
3082 3081 * if the inbound or outbound table should be the primary
3083 3082 * for this SA. If these flags were absent then make this
3084 3083 * decision based on the addresses.
3085 3084 */
3086 3085 if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) {
3087 3086 primary = sq.inbound;
3088 3087 secondary = sq.outbound;
3089 3088 is_inbound = B_TRUE;
3090 3089 if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND)
3091 3090 clone = B_TRUE;
3092 3091 } else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) {
3093 3092 primary = sq.outbound;
3094 3093 secondary = sq.inbound;
3095 3094 }
3096 3095
3097 3096 if (primary == NULL) {
3098 3097 /*
3099 3098 * The KMD did not set a direction flag, determine which
3100 3099 * table to insert the SA into based on addresses.
3101 3100 */
3102 3101 switch (ksi->ks_in_dsttype) {
3103 3102 case KS_IN_ADDR_MBCAST:
3104 3103 clone = B_TRUE; /* All mcast SAs can be bidirectional */
3105 3104 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
3106 3105 /* FALLTHRU */
3107 3106 /*
3108 3107 * If the source address is either one of mine, or unspecified
3109 3108 * (which is best summed up by saying "not 'not mine'"),
3110 3109 * then the association is potentially bi-directional,
3111 3110 * in that it can be used for inbound traffic and outbound
3112 3111 * traffic. The best example of such an SA is a multicast
3113 3112 * SA (which allows me to receive the outbound traffic).
3114 3113 */
3115 3114 case KS_IN_ADDR_ME:
3116 3115 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
3117 3116 primary = sq.inbound;
3118 3117 secondary = sq.outbound;
3119 3118 if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
3120 3119 clone = B_TRUE;
3121 3120 is_inbound = B_TRUE;
3122 3121 break;
3123 3122 /*
3124 3123 * If the source address literally not mine (either
3125 3124 * unspecified or not mine), then this SA may have an
3126 3125 * address that WILL be mine after some configuration.
3127 3126 * We pay the price for this by making it a bi-directional
3128 3127 * SA.
3129 3128 */
3130 3129 case KS_IN_ADDR_NOTME:
3131 3130 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
3132 3131 primary = sq.outbound;
3133 3132 secondary = sq.inbound;
3134 3133 if (ksi->ks_in_srctype != KS_IN_ADDR_ME) {
3135 3134 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
3136 3135 clone = B_TRUE;
3137 3136 }
3138 3137 break;
3139 3138 default:
3140 3139 *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
3141 3140 return (EINVAL);
3142 3141 }
3143 3142 }
3144 3143
3145 3144 /*
3146 3145 * Find a ACQUIRE list entry if possible. If we've added an SA that
3147 3146 * suits the needs of an ACQUIRE list entry, we can eliminate the
3148 3147 * ACQUIRE list entry and transmit the enqueued packets. Use the
3149 3148 * high-bit of the sequence number to queue it. Key off destination
3150 3149 * addr, and change acqrec's state.
3151 3150 */
3152 3151
3153 3152 if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
3154 3153 acq_bucket = &(sq.sp->sdb_acq[sq.outhash]);
3155 3154 mutex_enter(&acq_bucket->iacqf_lock);
3156 3155 for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
3157 3156 acqrec = acqrec->ipsacq_next) {
3158 3157 mutex_enter(&acqrec->ipsacq_lock);
3159 3158 /*
3160 3159 * Q: I only check sequence. Should I check dst?
3161 3160 * A: Yes, check dest because those are the packets
3162 3161 * that are queued up.
3163 3162 */
3164 3163 if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
3165 3164 IPSA_ARE_ADDR_EQUAL(sq.dstaddr,
3166 3165 acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
3167 3166 break;
3168 3167 mutex_exit(&acqrec->ipsacq_lock);
3169 3168 }
3170 3169 if (acqrec != NULL) {
3171 3170 /*
3172 3171 * AHA! I found an ACQUIRE record for this SA.
3173 3172 * Grab the msg list, and free the acquire record.
3174 3173 * I already am holding the lock for this record,
3175 3174 * so all I have to do is free it.
3176 3175 */
3177 3176 acq_msgs = acqrec->ipsacq_mp;
3178 3177 acqrec->ipsacq_mp = NULL;
3179 3178 mutex_exit(&acqrec->ipsacq_lock);
3180 3179 sadb_destroy_acquire(acqrec,
3181 3180 espstack->ipsecesp_netstack);
3182 3181 }
3183 3182 mutex_exit(&acq_bucket->iacqf_lock);
3184 3183 }
3185 3184
3186 3185 /*
3187 3186 * Find PF_KEY message, and see if I'm an update. If so, find entry
3188 3187 * in larval list (if there).
3189 3188 */
3190 3189 if (samsg->sadb_msg_type == SADB_UPDATE) {
3191 3190 mutex_enter(&sq.inbound->isaf_lock);
3192 3191 larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi,
3193 3192 ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family);
3194 3193 mutex_exit(&sq.inbound->isaf_lock);
3195 3194
3196 3195 if ((larval == NULL) ||
3197 3196 (larval->ipsa_state != IPSA_STATE_LARVAL)) {
3198 3197 *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
3199 3198 if (larval != NULL) {
3200 3199 IPSA_REFRELE(larval);
3201 3200 }
3202 3201 esp0dbg(("Larval update, but larval disappeared.\n"));
3203 3202 return (ESRCH);
3204 3203 } /* Else sadb_common_add unlinks it for me! */
3205 3204 }
3206 3205
3207 3206 if (larval != NULL) {
3208 3207 /*
3209 3208 * Hold again, because sadb_common_add() consumes a reference,
3210 3209 * and we don't want to clear_lpkt() without a reference.
3211 3210 */
3212 3211 IPSA_REFHOLD(larval);
3213 3212 }
3214 3213
3215 3214 rc = sadb_common_add(espstack->esp_pfkey_q,
3216 3215 mp, samsg, ksi, primary, secondary, larval, clone, is_inbound,
3217 3216 diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb);
3218 3217
3219 3218 if (larval != NULL) {
3220 3219 if (rc == 0) {
3221 3220 lpkt = sadb_clear_lpkt(larval);
3222 3221 if (lpkt != NULL) {
3223 3222 rc = !taskq_dispatch(esp_taskq, inbound_task,
3224 3223 lpkt, TQ_NOSLEEP);
3225 3224 }
3226 3225 }
3227 3226 IPSA_REFRELE(larval);
3228 3227 }
3229 3228
3230 3229 /*
3231 3230 * How much more stack will I create with all of these
3232 3231 * esp_outbound() calls?
3233 3232 */
3234 3233
3235 3234 /* Handle the packets queued waiting for the SA */
3236 3235 while (acq_msgs != NULL) {
3237 3236 mblk_t *asyncmp;
3238 3237 mblk_t *data_mp;
3239 3238 ip_xmit_attr_t ixas;
3240 3239 ill_t *ill;
3241 3240
3242 3241 asyncmp = acq_msgs;
3243 3242 acq_msgs = acq_msgs->b_next;
3244 3243 asyncmp->b_next = NULL;
3245 3244
3246 3245 /*
3247 3246 * Extract the ip_xmit_attr_t from the first mblk.
3248 3247 * Verifies that the netstack and ill is still around; could
3249 3248 * have vanished while iked was doing its work.
3250 3249 * On succesful return we have a nce_t and the ill/ipst can't
3251 3250 * disappear until we do the nce_refrele in ixa_cleanup.
3252 3251 */
3253 3252 data_mp = asyncmp->b_cont;
3254 3253 asyncmp->b_cont = NULL;
3255 3254 if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) {
3256 3255 ESP_BUMP_STAT(espstack, out_discards);
3257 3256 ip_drop_packet(data_mp, B_FALSE, NULL,
3258 3257 DROPPER(ipss, ipds_sadb_acquire_timeout),
3259 3258 &espstack->esp_dropper);
3260 3259 } else if (rc != 0) {
3261 3260 ill = ixas.ixa_nce->nce_ill;
3262 3261 ESP_BUMP_STAT(espstack, out_discards);
3263 3262 ip_drop_packet(data_mp, B_FALSE, ill,
3264 3263 DROPPER(ipss, ipds_sadb_acquire_timeout),
3265 3264 &espstack->esp_dropper);
3266 3265 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3267 3266 } else {
3268 3267 esp_outbound_finish(data_mp, &ixas);
3269 3268 }
3270 3269 ixa_cleanup(&ixas);
3271 3270 }
3272 3271
3273 3272 return (rc);
3274 3273 }
3275 3274
3276 3275 /*
3277 3276 * Process one of the queued messages (from ipsacq_mp) once the SA
3278 3277 * has been added.
3279 3278 */
3280 3279 static void
3281 3280 esp_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa)
3282 3281 {
3283 3282 netstack_t *ns = ixa->ixa_ipst->ips_netstack;
3284 3283 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3285 3284 ipsec_stack_t *ipss = ns->netstack_ipsec;
3286 3285 ill_t *ill = ixa->ixa_nce->nce_ill;
3287 3286
3288 3287 if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_ESP)) {
3289 3288 ESP_BUMP_STAT(espstack, out_discards);
3290 3289 ip_drop_packet(data_mp, B_FALSE, ill,
3291 3290 DROPPER(ipss, ipds_sadb_acquire_timeout),
3292 3291 &espstack->esp_dropper);
3293 3292 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3294 3293 return;
3295 3294 }
3296 3295
3297 3296 data_mp = esp_outbound(data_mp, ixa);
3298 3297 if (data_mp == NULL)
3299 3298 return;
3300 3299
3301 3300 /* do AH processing if needed */
3302 3301 data_mp = esp_do_outbound_ah(data_mp, ixa);
3303 3302 if (data_mp == NULL)
3304 3303 return;
3305 3304
3306 3305 (void) ip_output_post_ipsec(data_mp, ixa);
3307 3306 }
3308 3307
3309 3308 /*
3310 3309 * Add new ESP security association. This may become a generic AH/ESP
3311 3310 * routine eventually.
3312 3311 */
3313 3312 static int
3314 3313 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns)
3315 3314 {
3316 3315 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3317 3316 sadb_address_t *srcext =
3318 3317 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3319 3318 sadb_address_t *dstext =
3320 3319 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3321 3320 sadb_address_t *isrcext =
3322 3321 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
3323 3322 sadb_address_t *idstext =
3324 3323 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
3325 3324 sadb_address_t *nttext_loc =
3326 3325 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC];
3327 3326 sadb_address_t *nttext_rem =
3328 3327 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM];
3329 3328 sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3330 3329 sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3331 3330 struct sockaddr_in *src, *dst;
3332 3331 struct sockaddr_in *natt_loc, *natt_rem;
3333 3332 struct sockaddr_in6 *natt_loc6, *natt_rem6;
3334 3333 sadb_lifetime_t *soft =
3335 3334 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3336 3335 sadb_lifetime_t *hard =
3337 3336 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3338 3337 sadb_lifetime_t *idle =
3339 3338 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
3340 3339 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3341 3340 ipsec_stack_t *ipss = ns->netstack_ipsec;
3342 3341
3343 3342
3344 3343
3345 3344 /* I need certain extensions present for an ADD message. */
3346 3345 if (srcext == NULL) {
3347 3346 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3348 3347 return (EINVAL);
3349 3348 }
3350 3349 if (dstext == NULL) {
3351 3350 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3352 3351 return (EINVAL);
3353 3352 }
3354 3353 if (isrcext == NULL && idstext != NULL) {
3355 3354 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3356 3355 return (EINVAL);
3357 3356 }
3358 3357 if (isrcext != NULL && idstext == NULL) {
3359 3358 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
3360 3359 return (EINVAL);
3361 3360 }
3362 3361 if (assoc == NULL) {
3363 3362 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3364 3363 return (EINVAL);
3365 3364 }
3366 3365 if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) {
3367 3366 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY;
3368 3367 return (EINVAL);
3369 3368 }
3370 3369
3371 3370 src = (struct sockaddr_in *)(srcext + 1);
3372 3371 dst = (struct sockaddr_in *)(dstext + 1);
3373 3372 natt_loc = (struct sockaddr_in *)(nttext_loc + 1);
3374 3373 natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1);
3375 3374 natt_rem = (struct sockaddr_in *)(nttext_rem + 1);
3376 3375 natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1);
3377 3376
3378 3377 /* Sundry ADD-specific reality checks. */
3379 3378 /* XXX STATS : Logging/stats here? */
3380 3379
3381 3380 if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) &&
3382 3381 (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
3383 3382 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
3384 3383 return (EINVAL);
3385 3384 }
3386 3385 if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) {
3387 3386 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3388 3387 return (EINVAL);
3389 3388 }
3390 3389
3391 3390 #ifndef IPSEC_LATENCY_TEST
3392 3391 if (assoc->sadb_sa_encrypt == SADB_EALG_NULL &&
3393 3392 assoc->sadb_sa_auth == SADB_AALG_NONE) {
3394 3393 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3395 3394 return (EINVAL);
3396 3395 }
3397 3396 #endif
3398 3397
3399 3398 if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) {
3400 3399 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
3401 3400 return (EINVAL);
3402 3401 }
3403 3402
3404 3403 if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
3405 3404 return (EINVAL);
3406 3405 }
3407 3406 ASSERT(src->sin_family == dst->sin_family);
3408 3407
3409 3408 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) {
3410 3409 if (nttext_loc == NULL) {
3411 3410 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3412 3411 return (EINVAL);
3413 3412 }
3414 3413
3415 3414 if (natt_loc->sin_family == AF_INET6 &&
3416 3415 !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) {
3417 3416 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC;
3418 3417 return (EINVAL);
3419 3418 }
3420 3419 }
3421 3420
3422 3421 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) {
3423 3422 if (nttext_rem == NULL) {
3424 3423 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3425 3424 return (EINVAL);
3426 3425 }
3427 3426 if (natt_rem->sin_family == AF_INET6 &&
3428 3427 !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) {
3429 3428 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM;
3430 3429 return (EINVAL);
3431 3430 }
3432 3431 }
3433 3432
3434 3433
3435 3434 /* Stuff I don't support, for now. XXX Diagnostic? */
3436 3435 if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL)
3437 3436 return (EOPNOTSUPP);
3438 3437
3439 3438 if ((*diagnostic = sadb_labelchk(ksi)) != 0)
3440 3439 return (EINVAL);
3441 3440
3442 3441 /*
3443 3442 * XXX Policy : I'm not checking identities at this time,
3444 3443 * but if I did, I'd do them here, before I sent
3445 3444 * the weak key check up to the algorithm.
3446 3445 */
3447 3446
3448 3447 rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3449 3448
3450 3449 /*
3451 3450 * First locate the authentication algorithm.
3452 3451 */
3453 3452 #ifdef IPSEC_LATENCY_TEST
3454 3453 if (akey != NULL && assoc->sadb_sa_auth != SADB_AALG_NONE) {
3455 3454 #else
3456 3455 if (akey != NULL) {
3457 3456 #endif
3458 3457 ipsec_alginfo_t *aalg;
3459 3458
3460 3459 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3461 3460 [assoc->sadb_sa_auth];
3462 3461 if (aalg == NULL || !ALG_VALID(aalg)) {
3463 3462 rw_exit(&ipss->ipsec_alg_lock);
3464 3463 esp1dbg(espstack, ("Couldn't find auth alg #%d.\n",
3465 3464 assoc->sadb_sa_auth));
3466 3465 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
3467 3466 return (EINVAL);
3468 3467 }
3469 3468
3470 3469 /*
3471 3470 * Sanity check key sizes.
3472 3471 * Note: It's not possible to use SADB_AALG_NONE because
3473 3472 * this auth_alg is not defined with ALG_FLAG_VALID. If this
3474 3473 * ever changes, the same check for SADB_AALG_NONE and
3475 3474 * a auth_key != NULL should be made here ( see below).
3476 3475 */
3477 3476 if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) {
3478 3477 rw_exit(&ipss->ipsec_alg_lock);
3479 3478 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
3480 3479 return (EINVAL);
3481 3480 }
3482 3481 ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3483 3482
3484 3483 /* check key and fix parity if needed */
3485 3484 if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE,
3486 3485 diagnostic) != 0) {
3487 3486 rw_exit(&ipss->ipsec_alg_lock);
3488 3487 return (EINVAL);
3489 3488 }
3490 3489 }
3491 3490
3492 3491 /*
3493 3492 * Then locate the encryption algorithm.
3494 3493 */
3495 3494 if (ekey != NULL) {
3496 3495 uint_t keybits;
3497 3496 ipsec_alginfo_t *ealg;
3498 3497
3499 3498 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3500 3499 [assoc->sadb_sa_encrypt];
3501 3500 if (ealg == NULL || !ALG_VALID(ealg)) {
3502 3501 rw_exit(&ipss->ipsec_alg_lock);
3503 3502 esp1dbg(espstack, ("Couldn't find encr alg #%d.\n",
3504 3503 assoc->sadb_sa_encrypt));
3505 3504 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
3506 3505 return (EINVAL);
3507 3506 }
3508 3507
3509 3508 /*
3510 3509 * Sanity check key sizes. If the encryption algorithm is
3511 3510 * SADB_EALG_NULL but the encryption key is NOT
3512 3511 * NULL then complain.
3513 3512 *
3514 3513 * The keying material includes salt bits if required by
3515 3514 * algorithm and optionally the Initial IV, check the
3516 3515 * length of whats left.
3517 3516 */
3518 3517 keybits = ekey->sadb_key_bits;
3519 3518 keybits -= ekey->sadb_key_reserved;
3520 3519 keybits -= SADB_8TO1(ealg->alg_saltlen);
3521 3520 if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) ||
3522 3521 (!ipsec_valid_key_size(keybits, ealg))) {
3523 3522 rw_exit(&ipss->ipsec_alg_lock);
3524 3523 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
3525 3524 return (EINVAL);
3526 3525 }
3527 3526 ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
3528 3527
3529 3528 /* check key */
3530 3529 if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE,
3531 3530 diagnostic) != 0) {
3532 3531 rw_exit(&ipss->ipsec_alg_lock);
3533 3532 return (EINVAL);
3534 3533 }
3535 3534 }
3536 3535 rw_exit(&ipss->ipsec_alg_lock);
3537 3536
3538 3537 return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
3539 3538 diagnostic, espstack));
3540 3539 }
3541 3540
3542 3541 /*
3543 3542 * Update a security association. Updates come in two varieties. The first
3544 3543 * is an update of lifetimes on a non-larval SA. The second is an update of
3545 3544 * a larval SA, which ends up looking a lot more like an add.
3546 3545 */
3547 3546 static int
3548 3547 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
3549 3548 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
3550 3549 {
3551 3550 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3552 3551 mblk_t *buf_pkt;
3553 3552 int rcode;
3554 3553
3555 3554 sadb_address_t *dstext =
3556 3555 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3557 3556
3558 3557 if (dstext == NULL) {
3559 3558 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3560 3559 return (EINVAL);
3561 3560 }
3562 3561
3563 3562 rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb,
3564 3563 diagnostic, espstack->esp_pfkey_q, esp_add_sa,
3565 3564 espstack->ipsecesp_netstack, sadb_msg_type);
3566 3565
3567 3566 if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) ||
3568 3567 (rcode != 0)) {
3569 3568 return (rcode);
3570 3569 }
3571 3570
3572 3571 HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec,
3573 3572 espstack->esp_dropper, buf_pkt);
3574 3573
3575 3574 return (rcode);
3576 3575 }
3577 3576
3578 3577 /* XXX refactor me */
3579 3578 /*
3580 3579 * Delete a security association. This is REALLY likely to be code common to
3581 3580 * both AH and ESP. Find the association, then unlink it.
3582 3581 */
3583 3582 static int
3584 3583 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
3585 3584 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
3586 3585 {
3587 3586 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3588 3587 sadb_address_t *dstext =
3589 3588 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3590 3589 sadb_address_t *srcext =
3591 3590 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3592 3591 struct sockaddr_in *sin;
3593 3592
3594 3593 if (assoc == NULL) {
3595 3594 if (dstext != NULL) {
3596 3595 sin = (struct sockaddr_in *)(dstext + 1);
3597 3596 } else if (srcext != NULL) {
3598 3597 sin = (struct sockaddr_in *)(srcext + 1);
3599 3598 } else {
3600 3599 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3601 3600 return (EINVAL);
3602 3601 }
3603 3602 return (sadb_purge_sa(mp, ksi,
3604 3603 (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 :
3605 3604 &espstack->esp_sadb.s_v4, diagnostic,
3606 3605 espstack->esp_pfkey_q));
3607 3606 }
3608 3607
3609 3608 return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic,
3610 3609 espstack->esp_pfkey_q, sadb_msg_type));
3611 3610 }
3612 3611
3613 3612 /* XXX refactor me */
3614 3613 /*
3615 3614 * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP
3616 3615 * messages.
3617 3616 */
3618 3617 static void
3619 3618 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
3620 3619 {
3621 3620 int error;
3622 3621 sadb_msg_t *samsg;
3623 3622
3624 3623 /*
3625 3624 * Dump each fanout, bailing if error is non-zero.
3626 3625 */
3627 3626
3628 3627 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
3629 3628 &espstack->esp_sadb.s_v4);
3630 3629 if (error != 0)
3631 3630 goto bail;
3632 3631
3633 3632 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
3634 3633 &espstack->esp_sadb.s_v6);
3635 3634 bail:
3636 3635 ASSERT(mp->b_cont != NULL);
3637 3636 samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3638 3637 samsg->sadb_msg_errno = (uint8_t)error;
3639 3638 sadb_pfkey_echo(espstack->esp_pfkey_q, mp,
3640 3639 (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL);
3641 3640 }
3642 3641
3643 3642 /*
3644 3643 * First-cut reality check for an inbound PF_KEY message.
3645 3644 */
3646 3645 static boolean_t
3647 3646 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi,
3648 3647 ipsecesp_stack_t *espstack)
3649 3648 {
3650 3649 int diagnostic;
3651 3650
3652 3651 if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
3653 3652 diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
3654 3653 goto badmsg;
3655 3654 }
3656 3655 if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
3657 3656 ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
3658 3657 diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
3659 3658 goto badmsg;
3660 3659 }
3661 3660 return (B_FALSE); /* False ==> no failures */
3662 3661
3663 3662 badmsg:
3664 3663 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
3665 3664 ksi->ks_in_serial);
3666 3665 return (B_TRUE); /* True ==> failures */
3667 3666 }
3668 3667
3669 3668 /*
3670 3669 * ESP parsing of PF_KEY messages. Keysock did most of the really silly
3671 3670 * error cases. What I receive is a fully-formed, syntactically legal
3672 3671 * PF_KEY message. I then need to check semantics...
3673 3672 *
3674 3673 * This code may become common to AH and ESP. Stay tuned.
3675 3674 *
3676 3675 * I also make the assumption that db_ref's are cool. If this assumption
3677 3676 * is wrong, this means that someone other than keysock or me has been
3678 3677 * mucking with PF_KEY messages.
3679 3678 */
3680 3679 static void
3681 3680 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack)
3682 3681 {
3683 3682 mblk_t *msg = mp->b_cont;
3684 3683 sadb_msg_t *samsg;
3685 3684 keysock_in_t *ksi;
3686 3685 int error;
3687 3686 int diagnostic = SADB_X_DIAGNOSTIC_NONE;
3688 3687
3689 3688 ASSERT(msg != NULL);
3690 3689
3691 3690 samsg = (sadb_msg_t *)msg->b_rptr;
3692 3691 ksi = (keysock_in_t *)mp->b_rptr;
3693 3692
3694 3693 /*
3695 3694 * If applicable, convert unspecified AF_INET6 to unspecified
3696 3695 * AF_INET. And do other address reality checks.
3697 3696 */
3698 3697 if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp,
3699 3698 espstack->ipsecesp_netstack) ||
3700 3699 esp_pfkey_reality_failures(mp, ksi, espstack)) {
3701 3700 return;
3702 3701 }
3703 3702
3704 3703 switch (samsg->sadb_msg_type) {
3705 3704 case SADB_ADD:
3706 3705 error = esp_add_sa(mp, ksi, &diagnostic,
3707 3706 espstack->ipsecesp_netstack);
3708 3707 if (error != 0) {
3709 3708 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3710 3709 diagnostic, ksi->ks_in_serial);
3711 3710 }
3712 3711 /* else esp_add_sa() took care of things. */
3713 3712 break;
3714 3713 case SADB_DELETE:
3715 3714 case SADB_X_DELPAIR:
3716 3715 case SADB_X_DELPAIR_STATE:
3717 3716 error = esp_del_sa(mp, ksi, &diagnostic, espstack,
3718 3717 samsg->sadb_msg_type);
3719 3718 if (error != 0) {
3720 3719 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3721 3720 diagnostic, ksi->ks_in_serial);
3722 3721 }
3723 3722 /* Else esp_del_sa() took care of things. */
3724 3723 break;
3725 3724 case SADB_GET:
3726 3725 error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb,
3727 3726 &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type);
3728 3727 if (error != 0) {
3729 3728 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3730 3729 diagnostic, ksi->ks_in_serial);
3731 3730 }
3732 3731 /* Else sadb_get_sa() took care of things. */
3733 3732 break;
3734 3733 case SADB_FLUSH:
3735 3734 sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack);
3736 3735 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL);
3737 3736 break;
3738 3737 case SADB_REGISTER:
3739 3738 /*
3740 3739 * Hmmm, let's do it! Check for extensions (there should
3741 3740 * be none), extract the fields, call esp_register_out(),
3742 3741 * then either free or report an error.
3743 3742 *
3744 3743 * Keysock takes care of the PF_KEY bookkeeping for this.
3745 3744 */
3746 3745 if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
3747 3746 ksi->ks_in_serial, espstack, msg_getcred(mp, NULL))) {
3748 3747 freemsg(mp);
3749 3748 } else {
3750 3749 /*
3751 3750 * Only way this path hits is if there is a memory
3752 3751 * failure. It will not return B_FALSE because of
3753 3752 * lack of esp_pfkey_q if I am in wput().
3754 3753 */
3755 3754 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM,
3756 3755 diagnostic, ksi->ks_in_serial);
3757 3756 }
3758 3757 break;
3759 3758 case SADB_UPDATE:
3760 3759 case SADB_X_UPDATEPAIR:
3761 3760 /*
3762 3761 * Find a larval, if not there, find a full one and get
3763 3762 * strict.
3764 3763 */
3765 3764 error = esp_update_sa(mp, ksi, &diagnostic, espstack,
3766 3765 samsg->sadb_msg_type);
3767 3766 if (error != 0) {
3768 3767 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
3769 3768 diagnostic, ksi->ks_in_serial);
3770 3769 }
3771 3770 /* else esp_update_sa() took care of things. */
3772 3771 break;
3773 3772 case SADB_GETSPI:
3774 3773 /*
3775 3774 * Reserve a new larval entry.
3776 3775 */
3777 3776 esp_getspi(mp, ksi, espstack);
3778 3777 break;
3779 3778 case SADB_ACQUIRE:
3780 3779 /*
3781 3780 * Find larval and/or ACQUIRE record and kill it (them), I'm
3782 3781 * most likely an error. Inbound ACQUIRE messages should only
3783 3782 * have the base header.
3784 3783 */
3785 3784 sadb_in_acquire(samsg, &espstack->esp_sadb,
3786 3785 espstack->esp_pfkey_q, espstack->ipsecesp_netstack);
3787 3786 freemsg(mp);
3788 3787 break;
3789 3788 case SADB_DUMP:
3790 3789 /*
3791 3790 * Dump all entries.
3792 3791 */
3793 3792 esp_dump(mp, ksi, espstack);
3794 3793 /* esp_dump will take care of the return message, etc. */
3795 3794 break;
3796 3795 case SADB_EXPIRE:
3797 3796 /* Should never reach me. */
3798 3797 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP,
3799 3798 diagnostic, ksi->ks_in_serial);
3800 3799 break;
3801 3800 default:
3802 3801 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL,
3803 3802 SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
3804 3803 break;
3805 3804 }
3806 3805 }
3807 3806
3808 3807 /*
3809 3808 * Handle case where PF_KEY says it can't find a keysock for one of my
3810 3809 * ACQUIRE messages.
3811 3810 */
3812 3811 static void
3813 3812 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack)
3814 3813 {
3815 3814 sadb_msg_t *samsg;
3816 3815 keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
3817 3816
3818 3817 if (mp->b_cont == NULL) {
3819 3818 freemsg(mp);
3820 3819 return;
3821 3820 }
3822 3821 samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
3823 3822
3824 3823 /*
3825 3824 * If keysock can't find any registered, delete the acquire record
3826 3825 * immediately, and handle errors.
3827 3826 */
3828 3827 if (samsg->sadb_msg_type == SADB_ACQUIRE) {
3829 3828 samsg->sadb_msg_errno = kse->ks_err_errno;
3830 3829 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
3831 3830 /*
3832 3831 * Use the write-side of the esp_pfkey_q
3833 3832 */
3834 3833 sadb_in_acquire(samsg, &espstack->esp_sadb,
3835 3834 WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack);
3836 3835 }
3837 3836
3838 3837 freemsg(mp);
3839 3838 }
3840 3839
3841 3840 /*
3842 3841 * ESP module write put routine.
3843 3842 */
3844 3843 static void
3845 3844 ipsecesp_wput(queue_t *q, mblk_t *mp)
3846 3845 {
3847 3846 ipsec_info_t *ii;
3848 3847 struct iocblk *iocp;
3849 3848 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr;
3850 3849
3851 3850 esp3dbg(espstack, ("In esp_wput().\n"));
3852 3851
3853 3852 /* NOTE: Each case must take care of freeing or passing mp. */
3854 3853 switch (mp->b_datap->db_type) {
3855 3854 case M_CTL:
3856 3855 if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
3857 3856 /* Not big enough message. */
3858 3857 freemsg(mp);
3859 3858 break;
3860 3859 }
3861 3860 ii = (ipsec_info_t *)mp->b_rptr;
3862 3861
3863 3862 switch (ii->ipsec_info_type) {
3864 3863 case KEYSOCK_OUT_ERR:
3865 3864 esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n"));
3866 3865 esp_keysock_no_socket(mp, espstack);
3867 3866 break;
3868 3867 case KEYSOCK_IN:
3869 3868 ESP_BUMP_STAT(espstack, keysock_in);
3870 3869 esp3dbg(espstack, ("Got KEYSOCK_IN message.\n"));
3871 3870
3872 3871 /* Parse the message. */
3873 3872 esp_parse_pfkey(mp, espstack);
3874 3873 break;
3875 3874 case KEYSOCK_HELLO:
3876 3875 sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp,
3877 3876 esp_ager, (void *)espstack, &espstack->esp_event,
3878 3877 SADB_SATYPE_ESP);
3879 3878 break;
3880 3879 default:
3881 3880 esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n",
3882 3881 ii->ipsec_info_type));
3883 3882 freemsg(mp);
3884 3883 break;
3885 3884 }
3886 3885 break;
3887 3886 case M_IOCTL:
3888 3887 iocp = (struct iocblk *)mp->b_rptr;
3889 3888 switch (iocp->ioc_cmd) {
3890 3889 case ND_SET:
3891 3890 case ND_GET:
3892 3891 if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) {
3893 3892 qreply(q, mp);
3894 3893 return;
3895 3894 } else {
3896 3895 iocp->ioc_error = ENOENT;
3897 3896 }
3898 3897 /* FALLTHRU */
3899 3898 default:
3900 3899 /* We really don't support any other ioctls, do we? */
3901 3900
3902 3901 /* Return EINVAL */
3903 3902 if (iocp->ioc_error != ENOENT)
3904 3903 iocp->ioc_error = EINVAL;
3905 3904 iocp->ioc_count = 0;
3906 3905 mp->b_datap->db_type = M_IOCACK;
3907 3906 qreply(q, mp);
3908 3907 return;
3909 3908 }
3910 3909 default:
3911 3910 esp3dbg(espstack,
3912 3911 ("Got default message, type %d, passing to IP.\n",
3913 3912 mp->b_datap->db_type));
3914 3913 putnext(q, mp);
3915 3914 }
3916 3915 }
3917 3916
3918 3917 /*
3919 3918 * Wrapper to allow IP to trigger an ESP association failure message
3920 3919 * during inbound SA selection.
3921 3920 */
3922 3921 void
3923 3922 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
3924 3923 uint32_t spi, void *addr, int af, ip_recv_attr_t *ira)
3925 3924 {
3926 3925 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
3927 3926 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
3928 3927 ipsec_stack_t *ipss = ns->netstack_ipsec;
3929 3928
3930 3929 if (espstack->ipsecesp_log_unknown_spi) {
3931 3930 ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
3932 3931 addr, af, espstack->ipsecesp_netstack);
3933 3932 }
3934 3933
3935 3934 ip_drop_packet(mp, B_TRUE, ira->ira_ill,
3936 3935 DROPPER(ipss, ipds_esp_no_sa),
3937 3936 &espstack->esp_dropper);
3938 3937 }
3939 3938
3940 3939 /*
3941 3940 * Initialize the ESP input and output processing functions.
3942 3941 */
3943 3942 void
3944 3943 ipsecesp_init_funcs(ipsa_t *sa)
3945 3944 {
3946 3945 if (sa->ipsa_output_func == NULL)
3947 3946 sa->ipsa_output_func = esp_outbound;
3948 3947 if (sa->ipsa_input_func == NULL)
3949 3948 sa->ipsa_input_func = esp_inbound;
3950 3949 }
|
↓ open down ↓ |
3772 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX