1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 */
15
16 /*
17 * Overlay device target cache management
18 *
19 * For more information, see the big theory statement in
20 * uts/common/io/overlay/overlay.c
21 */
22
23 #include <sys/types.h>
24 #include <sys/ethernet.h>
25 #include <sys/kmem.h>
26 #include <sys/policy.h>
27 #include <sys/sysmacros.h>
28 #include <sys/stream.h>
29 #include <sys/strsun.h>
30 #include <sys/strsubr.h>
31 #include <sys/mac_provider.h>
32 #include <sys/mac_client.h>
33 #include <sys/mac_client_priv.h>
34 #include <sys/vlan.h>
35 #include <sys/crc32.h>
36 #include <sys/cred.h>
37 #include <sys/file.h>
38 #include <sys/errno.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41
42 #include <sys/overlay_impl.h>
43 #include <sys/sdt.h>
44
45 /*
46 * This is total straw man, but at least it's a prime number. Here we're
47 * going to have to go through and do a lot of evaluation and understanding as
48 * to how these target caches should grow and shrink, as well as, memory
49 * pressure and evictions. This just gives us a starting point that'll be 'good
50 * enough', until it's not.
51 */
52 #define OVERLAY_HSIZE 823
53
54 /*
55 * We use this data structure to keep track of what requests have been actively
56 * allocated to a given instance so we know what to put back on the pending
57 * list.
58 */
59 typedef struct overlay_target_hdl {
60 minor_t oth_minor; /* RO */
61 zoneid_t oth_zoneid; /* RO */
62 int oth_oflags; /* RO */
63 list_node_t oth_link; /* overlay_target_lock */
64 kmutex_t oth_lock;
65 list_t oth_outstanding; /* oth_lock */
66 } overlay_target_hdl_t;
67
68 typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int);
69 typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *);
70 typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int);
71
72 typedef struct overaly_target_ioctl {
73 int oti_cmd; /* ioctl id */
74 boolean_t oti_write; /* ioctl requires FWRITE */
75 boolean_t oti_ncopyout; /* copyout data? */
76 overlay_target_copyin_f oti_copyin; /* copyin func */
77 overlay_target_ioctl_f oti_func; /* function to call */
78 overlay_target_copyout_f oti_copyout; /* copyin func */
79 size_t oti_size; /* size of user level structure */
80 } overlay_target_ioctl_t;
81
82 static kmem_cache_t *overlay_target_cache;
83 static kmem_cache_t *overlay_entry_cache;
84 static id_space_t *overlay_thdl_idspace;
85 static void *overlay_thdl_state;
86
87 /*
88 * When we support overlay devices in the NGZ, then all of these need to become
89 * zone aware, by plugging into the netstack engine and becoming per-netstack
90 * data.
91 */
92 static list_t overlay_thdl_list;
93 static kmutex_t overlay_target_lock;
94 static kcondvar_t overlay_target_condvar;
95 static list_t overlay_target_list;
96 static boolean_t overlay_target_excl;
97
98 /*
99 * Outstanding data per hash table entry.
100 */
101 static int overlay_ent_size = 128 * 1024;
102
103 /* ARGSUSED */
104 static int
105 overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
106 {
107 overlay_target_t *ott = buf;
108
109 mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL);
110 cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL);
111 return (0);
112 }
113
114 /* ARGSUSED */
115 static void
116 overlay_target_cache_destructor(void *buf, void *arg)
117 {
118 overlay_target_t *ott = buf;
119
120 cv_destroy(&ott->ott_cond);
121 mutex_destroy(&ott->ott_lock);
122 }
123
124 /* ARGSUSED */
125 static int
126 overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs)
127 {
128 overlay_target_entry_t *ote = buf;
129
130 bzero(ote, sizeof (overlay_target_entry_t));
131 mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL);
132 return (0);
133 }
134
135 /* ARGSUSED */
136 static void
137 overlay_entry_cache_destructor(void *buf, void *arg)
138 {
139 overlay_target_entry_t *ote = buf;
140
141 mutex_destroy(&ote->ote_lock);
142 }
143
144 /* TODO: we will need to modify these to hash/cmp DCID + MAC */
145
146 static uint64_t
147 overlay_mac_hash(const void *v)
148 {
149 uint32_t crc;
150 CRC32(crc, v, ETHERADDRL, -1U, crc32_table);
151 return (crc);
152 }
153
154 static int
155 overlay_mac_cmp(const void *a, const void *b)
156 {
157 return (bcmp(a, b, ETHERADDRL));
158 }
159
160 /* ARGSUSED */
161 static void
162 overlay_target_entry_dtor(void *arg)
163 {
164 overlay_target_entry_t *ote = arg;
165
166 ote->ote_flags = 0;
167 bzero(ote->ote_addr, ETHERADDRL);
168 ote->ote_ott = NULL;
169 ote->ote_odd = NULL;
170 freemsgchain(ote->ote_chead);
171 ote->ote_chead = ote->ote_ctail = NULL;
172 ote->ote_mbsize = 0;
173 ote->ote_vtime = 0;
174 kmem_cache_free(overlay_entry_cache, ote);
175 }
176
177 static int
178 overlay_mac_avl(const void *a, const void *b)
179 {
180 int i;
181 const overlay_target_entry_t *l, *r;
182 l = a;
183 r = b;
184
185 for (i = 0; i < ETHERADDRL; i++) {
186 if (l->ote_addr[i] > r->ote_addr[i])
187 return (1);
188 else if (l->ote_addr[i] < r->ote_addr[i])
189 return (-1);
190 }
191
192 return (0);
193 }
194
195 void
196 overlay_target_init(void)
197 {
198 int ret;
199 ret = ddi_soft_state_init(&overlay_thdl_state,
200 sizeof (overlay_target_hdl_t), 1);
201 VERIFY(ret == 0);
202 overlay_target_cache = kmem_cache_create("overlay_target",
203 sizeof (overlay_target_t), 0, overlay_target_cache_constructor,
204 overlay_target_cache_destructor, NULL, NULL, NULL, 0);
205 overlay_entry_cache = kmem_cache_create("overlay_entry",
206 sizeof (overlay_target_entry_t), 0, overlay_entry_cache_constructor,
207 overlay_entry_cache_destructor, NULL, NULL, NULL, 0);
208 mutex_init(&overlay_target_lock, NULL, MUTEX_DRIVER, NULL);
209 cv_init(&overlay_target_condvar, NULL, CV_DRIVER, NULL);
210 list_create(&overlay_target_list, sizeof (overlay_target_entry_t),
211 offsetof(overlay_target_entry_t, ote_qlink));
212 list_create(&overlay_thdl_list, sizeof (overlay_target_hdl_t),
213 offsetof(overlay_target_hdl_t, oth_link));
214 overlay_thdl_idspace = id_space_create("overlay_target_minors",
215 1, INT32_MAX);
216 }
217
218 void
219 overlay_target_fini(void)
220 {
221 id_space_destroy(overlay_thdl_idspace);
222 list_destroy(&overlay_thdl_list);
223 list_destroy(&overlay_target_list);
224 cv_destroy(&overlay_target_condvar);
225 mutex_destroy(&overlay_target_lock);
226 kmem_cache_destroy(overlay_entry_cache);
227 kmem_cache_destroy(overlay_target_cache);
228 ddi_soft_state_fini(&overlay_thdl_state);
229 }
230
231 void
232 overlay_target_free(overlay_dev_t *odd)
233 {
234 if (odd->odd_target == NULL)
235 return;
236
237 if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
238 refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
239 avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
240 overlay_target_entry_t *ote;
241
242 /* TODO: remove from L3 trees */
243
244 /*
245 * Our AVL tree and hashtable contain the same elements,
246 * therefore we should just remove it from the tree, but then
247 * delete the entries when we remove them from the hash table
248 * (which happens through the refhash dtor).
249 */
250 while ((ote = avl_first(ap)) != NULL)
251 avl_remove(ap, ote);
252
253 avl_destroy(ap);
254 for (ote = refhash_first(rp); ote != NULL;
255 ote = refhash_next(rp, ote)) {
256 refhash_remove(rp, ote);
257 }
258 refhash_destroy(rp);
259 }
260
261 ASSERT(odd->odd_target->ott_ocount == 0);
262 kmem_cache_free(overlay_target_cache, odd->odd_target);
263 }
264
265 int
266 overlay_target_busy()
267 {
268 int ret;
269
270 mutex_enter(&overlay_target_lock);
271 ret = !list_is_empty(&overlay_thdl_list);
272 mutex_exit(&overlay_target_lock);
273
274 return (ret);
275 }
276
277 static void
278 overlay_target_queue(overlay_target_entry_t *entry)
279 {
280 mutex_enter(&overlay_target_lock);
281 mutex_enter(&entry->ote_ott->ott_lock);
282 if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
283 mutex_exit(&entry->ote_ott->ott_lock);
284 mutex_exit(&overlay_target_lock);
285 return;
286 }
287 entry->ote_ott->ott_ocount++;
288 mutex_exit(&entry->ote_ott->ott_lock);
289 list_insert_tail(&overlay_target_list, entry);
290 cv_signal(&overlay_target_condvar);
291 mutex_exit(&overlay_target_lock);
292 }
293
294 void
295 overlay_target_quiesce(overlay_target_t *ott)
296 {
297 if (ott == NULL)
298 return;
299 mutex_enter(&ott->ott_lock);
300 ott->ott_flags |= OVERLAY_T_TEARDOWN;
301 while (ott->ott_ocount != 0)
302 cv_wait(&ott->ott_cond, &ott->ott_lock);
303 mutex_exit(&ott->ott_lock);
304 }
305
306 /*
307 * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
308 * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
309 * this time, say for NVGRE, we drop all packets that mcuh this.
310 *
311 * XXX: It might be better to replace the 'sock' argument with
312 * overlay_target_entry_t** and set it with the found entry in the case
313 * of OVERLAY_TARGET_OK.
314 */
315 int
316 overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
317 socklen_t *slenp)
318 {
319 int ret;
320 struct sockaddr_in6 *v6;
321 overlay_target_t *ott;
322 mac_header_info_t mhi;
323 overlay_target_entry_t *entry;
324
325 ASSERT(odd->odd_target != NULL);
326
327 /*
328 * At this point, the overlay device is in a mux which means that it's
329 * been activated. At this point, parts of the target, such as the mode
330 * and the destination are now read-only and we don't have to worry
331 * about synchronization for them.
332 */
333 ott = odd->odd_target;
334 if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
335 return (OVERLAY_TARGET_DROP);
336
337 v6 = (struct sockaddr_in6 *)sock;
338 bzero(v6, sizeof (struct sockaddr_in6));
339 v6->sin6_family = AF_INET6;
340
341 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
342 mutex_enter(&ott->ott_lock);
343 bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr,
344 sizeof (struct in6_addr));
345 v6->sin6_port = htons(ott->ott_u.ott_point.otp_port);
346 mutex_exit(&ott->ott_lock);
347 *slenp = sizeof (struct sockaddr_in6);
348
349 return (OVERLAY_TARGET_OK);
350 }
351
352 ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
353
354 /*
355 * Note we only want the MAC address here, therefore we won't bother
356 * using mac_vlan_header_info(). If any caller needs the vlan info at
357 * this point, this should change to a call to mac_vlan_header_info().
358 */
359 if (mac_header_info(odd->odd_mh, mp, &mhi) != 0)
360 return (OVERLAY_TARGET_DROP);
361
362 /*
363 * TODO: compare mhi.mhi_daddr with odd->macaddr.
364 * If match,
365 * get VL3 dest from mp
366 * lookup target using VL3 dest
367 * otherwise,
368 * lookup target using VL2 dest (existing refhash_lookup() call
369 * below)
370 */
371 mutex_enter(&ott->ott_lock);
372 entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
373 mhi.mhi_daddr);
374 if (entry == NULL) {
375 entry = kmem_cache_alloc(overlay_entry_cache,
376 KM_NOSLEEP | KM_NORMALPRI);
377 if (entry == NULL) {
378 mutex_exit(&ott->ott_lock);
379 return (OVERLAY_TARGET_DROP);
380 }
381 /*
382 * TODO: set entry->ote_dcid, if VL3 lookup, copy dst addr
383 * into entry->ote_ip. Probably zero out the address we're
384 * not lookup up (VL2 or VL3) as well.
385 */
386 bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
387 entry->ote_chead = entry->ote_ctail = mp;
388 entry->ote_mbsize = msgsize(mp);
389 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
390 entry->ote_ott = ott;
391 entry->ote_odd = odd;
392 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
393 avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
394 mutex_exit(&ott->ott_lock);
395 overlay_target_queue(entry);
396 return (OVERLAY_TARGET_ASYNC);
397 }
398 refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry);
399 mutex_exit(&ott->ott_lock);
400
401 mutex_enter(&entry->ote_lock);
402 if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
403 ret = OVERLAY_TARGET_DROP;
404 } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
405 bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
406 sizeof (struct in6_addr));
407 v6->sin6_port = htons(entry->ote_dest.otp_port);
408 *slenp = sizeof (struct sockaddr_in6);
409 ret = OVERLAY_TARGET_OK;
410 } else {
411 size_t mlen = msgsize(mp);
412
413 if (mlen + entry->ote_mbsize > overlay_ent_size) {
414 ret = OVERLAY_TARGET_DROP;
415 } else {
416 if (entry->ote_ctail != NULL) {
417 ASSERT(entry->ote_ctail->b_next ==
418 NULL);
419 entry->ote_ctail->b_next = mp;
420 entry->ote_ctail = mp;
421 } else {
422 entry->ote_chead = mp;
423 entry->ote_ctail = mp;
424 }
425 entry->ote_mbsize += mlen;
426 if ((entry->ote_flags &
427 OVERLAY_ENTRY_F_PENDING) == 0) {
428 entry->ote_flags |=
429 OVERLAY_ENTRY_F_PENDING;
430 overlay_target_queue(entry);
431 }
432 ret = OVERLAY_TARGET_ASYNC;
433 }
434 }
435 mutex_exit(&entry->ote_lock);
436
437 mutex_enter(&ott->ott_lock);
438 refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry);
439 mutex_exit(&ott->ott_lock);
440
441 return (ret);
442 }
443
444 /* ARGSUSED */
445 static int
446 overlay_target_info(overlay_target_hdl_t *thdl, void *arg)
447 {
448 overlay_dev_t *odd;
449 overlay_targ_info_t *oti = arg;
450
451 odd = overlay_hold_by_dlid(oti->oti_linkid);
452 if (odd == NULL)
453 return (ENOENT);
454
455 mutex_enter(&odd->odd_lock);
456 oti->oti_flags = 0;
457 oti->oti_needs = odd->odd_plugin->ovp_dest;
458 if (odd->odd_flags & OVERLAY_F_DEGRADED)
459 oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED;
460 if (odd->odd_flags & OVERLAY_F_ACTIVATED)
461 oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE;
462 oti->oti_vnetid = odd->odd_vid;
463 oti->oti_dcid = odd->odd_dcid;
464 mutex_exit(&odd->odd_lock);
465 overlay_hold_rele(odd);
466 return (0);
467 }
468
469 /* ARGSUSED */
470 static int
471 overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
472 {
473 overlay_dev_t *odd;
474 overlay_target_t *ott;
475 overlay_targ_associate_t *ota = arg;
476
477 odd = overlay_hold_by_dlid(ota->ota_linkid);
478 if (odd == NULL)
479 return (ENOENT);
480
481 if (ota->ota_id == 0) {
482 overlay_hold_rele(odd);
483 return (EINVAL);
484 }
485
486 if (ota->ota_mode != OVERLAY_TARGET_POINT &&
487 ota->ota_mode != OVERLAY_TARGET_DYNAMIC) {
488 overlay_hold_rele(odd);
489 return (EINVAL);
490 }
491
492 if (ota->ota_provides != odd->odd_plugin->ovp_dest) {
493 overlay_hold_rele(odd);
494 return (EINVAL);
495 }
496
497 if (ota->ota_mode == OVERLAY_TARGET_POINT) {
498 if (ota->ota_provides & OVERLAY_PLUGIN_D_IP) {
499 if (IN6_IS_ADDR_UNSPECIFIED(&ota->ota_point.otp_ip) ||
500 IN6_IS_ADDR_V4COMPAT(&ota->ota_point.otp_ip) ||
501 IN6_IS_ADDR_V4MAPPED_ANY(&ota->ota_point.otp_ip)) {
502 overlay_hold_rele(odd);
503 return (EINVAL);
504 }
505 }
506
507 if (ota->ota_provides & OVERLAY_PLUGIN_D_PORT) {
508 if (ota->ota_point.otp_port == 0) {
509 overlay_hold_rele(odd);
510 return (EINVAL);
511 }
512 }
513 }
514
515 ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP);
516 ott->ott_flags = 0;
517 ott->ott_ocount = 0;
518 ott->ott_mode = ota->ota_mode;
519 ott->ott_dest = ota->ota_provides;
520 ott->ott_id = ota->ota_id;
521
522 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
523 bcopy(&ota->ota_point, &ott->ott_u.ott_point,
524 sizeof (overlay_target_point_t));
525 } else {
526 ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
527 overlay_mac_hash, overlay_mac_cmp,
528 overlay_target_entry_dtor, sizeof (overlay_target_entry_t),
529 offsetof(overlay_target_entry_t, ote_reflink),
530 offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
531 avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
532 sizeof (overlay_target_entry_t),
533 offsetof(overlay_target_entry_t, ote_avllink));
534 }
535 mutex_enter(&odd->odd_lock);
536 if (odd->odd_flags & OVERLAY_F_VARPD) {
537 mutex_exit(&odd->odd_lock);
538 kmem_cache_free(overlay_target_cache, ott);
539 overlay_hold_rele(odd);
540 return (EEXIST);
541 }
542
543 odd->odd_flags |= OVERLAY_F_VARPD;
544 odd->odd_target = ott;
545 mutex_exit(&odd->odd_lock);
546
547 overlay_hold_rele(odd);
548
549
550 return (0);
551 }
552
553
554 /* ARGSUSED */
555 static int
556 overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg)
557 {
558 overlay_dev_t *odd;
559 overlay_targ_degrade_t *otd = arg;
560
561 odd = overlay_hold_by_dlid(otd->otd_linkid);
562 if (odd == NULL)
563 return (ENOENT);
564
565 overlay_fm_degrade(odd, otd->otd_buf);
566 overlay_hold_rele(odd);
567 return (0);
568 }
569
570 /* ARGSUSED */
571 static int
572 overlay_target_restore(overlay_target_hdl_t *thdl, void *arg)
573 {
574 overlay_dev_t *odd;
575 overlay_targ_id_t *otid = arg;
576
577 odd = overlay_hold_by_dlid(otid->otid_linkid);
578 if (odd == NULL)
579 return (ENOENT);
580
581 overlay_fm_restore(odd);
582 overlay_hold_rele(odd);
583 return (0);
584 }
585
586 /* ARGSUSED */
587 static int
588 overlay_target_disassociate(overlay_target_hdl_t *thdl, void *arg)
589 {
590 overlay_dev_t *odd;
591 overlay_targ_id_t *otid = arg;
592
593 odd = overlay_hold_by_dlid(otid->otid_linkid);
594 if (odd == NULL)
595 return (ENOENT);
596
597 mutex_enter(&odd->odd_lock);
598 odd->odd_flags &= ~OVERLAY_F_VARPD;
599 mutex_exit(&odd->odd_lock);
600
601 overlay_hold_rele(odd);
602 return (0);
603
604 }
605
606 static int
607 overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
608 {
609 overlay_targ_lookup_t *otl = arg;
610 overlay_target_entry_t *entry;
611 clock_t ret, timeout;
612 mac_header_info_t mhi;
613
614 timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
615 again:
616 mutex_enter(&overlay_target_lock);
617 while (list_is_empty(&overlay_target_list)) {
618 ret = cv_timedwait(&overlay_target_condvar,
619 &overlay_target_lock, timeout);
620 if (ret == -1) {
621 mutex_exit(&overlay_target_lock);
622 return (ETIME);
623 }
624 }
625 entry = list_remove_head(&overlay_target_list);
626 mutex_exit(&overlay_target_lock);
627 mutex_enter(&entry->ote_lock);
628 if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
629 ASSERT(entry->ote_chead == NULL);
630 mutex_exit(&entry->ote_lock);
631 goto again;
632 }
633 ASSERT(entry->ote_chead != NULL);
634
635 /*
636 * If we have a bogon that doesn't have a valid mac header, drop it and
637 * try again.
638 */
639 if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
640 &mhi) != 0) {
641 boolean_t queue = B_FALSE;
642 mblk_t *mp = entry->ote_chead;
643 entry->ote_chead = mp->b_next;
644 mp->b_next = NULL;
645 if (entry->ote_ctail == mp)
646 entry->ote_ctail = entry->ote_chead;
647 entry->ote_mbsize -= msgsize(mp);
648 if (entry->ote_chead != NULL)
649 queue = B_TRUE;
650 mutex_exit(&entry->ote_lock);
651 if (queue == B_TRUE)
652 overlay_target_queue(entry);
653 freemsg(mp);
654 goto again;
655 }
656
657 /*
658 * TODO: If VL3 request,
659 * set otl->otl_l3req
660 * Fill in otl_{src,dst}ip
661 * Else
662 * clear otl->otl_l3req
663 */
664 otl->otl_dlid = entry->ote_odd->odd_linkid;
665 otl->otl_reqid = (uintptr_t)entry;
666 otl->otl_varpdid = entry->ote_ott->ott_id;
667 otl->otl_vnetid = entry->ote_odd->odd_vid;
668
669 otl->otl_hdrsize = mhi.mhi_hdrsize;
670 otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
671 bcopy(mhi.mhi_daddr, otl->otl_addru.otlu_l2.otl2_dstaddr, ETHERADDRL);
672 bcopy(mhi.mhi_saddr, otl->otl_addru.otlu_l2.otl2_srcaddr, ETHERADDRL);
673 otl->otl_addru.otlu_l2.otl2_dsttype = mhi.mhi_dsttype;
674 otl->otl_addru.otlu_l2.otl2_sap = mhi.mhi_bindsap;
675 otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
676 mutex_exit(&entry->ote_lock);
677
678 mutex_enter(&thdl->oth_lock);
679 list_insert_tail(&thdl->oth_outstanding, entry);
680 mutex_exit(&thdl->oth_lock);
681
682 return (0);
683 }
684
685 static int
686 overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
687 {
688 const overlay_targ_resp_t *otr = arg;
689 overlay_target_entry_t *entry;
690 mblk_t *mp;
691
692 mutex_enter(&thdl->oth_lock);
693 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
694 entry = list_next(&thdl->oth_outstanding, entry)) {
695 if ((uintptr_t)entry == otr->otr_reqid)
696 break;
697 }
698
699 if (entry == NULL) {
700 mutex_exit(&thdl->oth_lock);
701 return (EINVAL);
702 }
703 list_remove(&thdl->oth_outstanding, entry);
704 mutex_exit(&thdl->oth_lock);
705
706 mutex_enter(&entry->ote_lock);
707 bcopy(&otr->otr_answer, &entry->ote_dest,
708 sizeof (overlay_target_point_t));
709 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
710 entry->ote_flags |= OVERLAY_ENTRY_F_VALID;
711 mp = entry->ote_chead;
712 entry->ote_chead = NULL;
713 entry->ote_ctail = NULL;
714 entry->ote_mbsize = 0;
715 entry->ote_vtime = gethrtime();
716 mutex_exit(&entry->ote_lock);
717
718 /*
719 * For now do an in-situ drain.
720 *
721 * TODO: overlay_m_tx() will need to perform remote fabric attachment
722 * checks, which may leave mblk_t's left in the msg chain for
723 * mblk_t's whose connectivity with the target entry are unknown.
724 * This will then need to deal with the leftovers.
725 */
726 mp = overlay_m_tx(entry->ote_odd, mp);
727 freemsgchain(mp);
728
729 mutex_enter(&entry->ote_ott->ott_lock);
730 entry->ote_ott->ott_ocount--;
731 cv_signal(&entry->ote_ott->ott_cond);
732 mutex_exit(&entry->ote_ott->ott_lock);
733
734 return (0);
735 }
736
737 static int
738 overlay_target_lookup_drop(overlay_target_hdl_t *thdl, void *arg)
739 {
740 const overlay_targ_resp_t *otr = arg;
741 overlay_target_entry_t *entry;
742 mblk_t *mp;
743 boolean_t queue = B_FALSE;
744
745 mutex_enter(&thdl->oth_lock);
746 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
747 entry = list_next(&thdl->oth_outstanding, entry)) {
748 if ((uintptr_t)entry == otr->otr_reqid)
749 break;
750 }
751
752 if (entry == NULL) {
753 mutex_exit(&thdl->oth_lock);
754 return (EINVAL);
755 }
756 list_remove(&thdl->oth_outstanding, entry);
757 mutex_exit(&thdl->oth_lock);
758
759 mutex_enter(&entry->ote_lock);
760
761 /* Safeguard against a confused varpd */
762 if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
763 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
764 DTRACE_PROBE1(overlay__target__valid__drop,
765 overlay_target_entry_t *, entry);
766 mutex_exit(&entry->ote_lock);
767 goto done;
768 }
769
770 /*
771 * TODO: This will need to be smarter. This drop can only apply to
772 * packets from the same source fabric as the first mblk_t in the
773 * chain. If the target exists, packets from other fabrics which
774 * are chained to this target entry may be able to be sent (if we
775 * already know they are attached), or we might need to query from
776 * those other source fabrics if we don't know if the two are
777 * attached.
778 */
779 mp = entry->ote_chead;
780 if (mp != NULL) {
781 entry->ote_chead = mp->b_next;
782 mp->b_next = NULL;
783 if (entry->ote_ctail == mp)
784 entry->ote_ctail = entry->ote_chead;
785 entry->ote_mbsize -= msgsize(mp);
786 }
787 if (entry->ote_chead != NULL) {
788 queue = B_TRUE;
789 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
790 } else {
791 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
792 }
793 mutex_exit(&entry->ote_lock);
794
795 if (queue == B_TRUE)
796 overlay_target_queue(entry);
797 freemsg(mp);
798
799 done:
800 mutex_enter(&entry->ote_ott->ott_lock);
801 entry->ote_ott->ott_ocount--;
802 cv_signal(&entry->ote_ott->ott_cond);
803 mutex_exit(&entry->ote_ott->ott_lock);
804
805 return (0);
806 }
807
808 /* ARGSUSED */
809 static int
810 overlay_target_pkt_copyin(const void *ubuf, void **outp, size_t *bsize,
811 int flags)
812 {
813 overlay_targ_pkt_t *pkt;
814 overlay_targ_pkt32_t *pkt32;
815
816 pkt = kmem_alloc(sizeof (overlay_targ_pkt_t), KM_SLEEP);
817 *outp = pkt;
818 *bsize = sizeof (overlay_targ_pkt_t);
819 if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
820 uintptr_t addr;
821
822 if (ddi_copyin(ubuf, pkt, sizeof (overlay_targ_pkt32_t),
823 flags & FKIOCTL) != 0) {
824 kmem_free(pkt, *bsize);
825 return (EFAULT);
826 }
827 pkt32 = (overlay_targ_pkt32_t *)pkt;
828 addr = pkt32->otp_buf;
829 pkt->otp_buf = (void *)addr;
830 } else {
831 if (ddi_copyin(ubuf, pkt, *bsize, flags & FKIOCTL) != 0) {
832 kmem_free(pkt, *bsize);
833 return (EFAULT);
834 }
835 }
836 return (0);
837 }
838
839 static int
840 overlay_target_pkt_copyout(void *ubuf, void *buf, size_t bufsize,
841 int flags)
842 {
843 if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
844 overlay_targ_pkt_t *pkt = buf;
845 overlay_targ_pkt32_t *pkt32 = buf;
846 uintptr_t addr = (uintptr_t)pkt->otp_buf;
847 pkt32->otp_buf = (caddr32_t)addr;
848 if (ddi_copyout(buf, ubuf, sizeof (overlay_targ_pkt32_t),
849 flags & FKIOCTL) != 0)
850 return (EFAULT);
851 } else {
852 if (ddi_copyout(buf, ubuf, bufsize, flags & FKIOCTL) != 0)
853 return (EFAULT);
854 }
855 return (0);
856 }
857
858 static int
859 overlay_target_packet(overlay_target_hdl_t *thdl, void *arg)
860 {
861 overlay_targ_pkt_t *pkt = arg;
862 overlay_target_entry_t *entry;
863 mblk_t *mp;
864 size_t mlen;
865 size_t boff;
866
867 mutex_enter(&thdl->oth_lock);
868 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
869 entry = list_next(&thdl->oth_outstanding, entry)) {
870 if ((uintptr_t)entry == pkt->otp_reqid)
871 break;
872 }
873
874 if (entry == NULL) {
875 mutex_exit(&thdl->oth_lock);
876 return (EINVAL);
877 }
878 mutex_enter(&entry->ote_lock);
879 mutex_exit(&thdl->oth_lock);
880 mp = entry->ote_chead;
881 /* Protect against a rogue varpd */
882 if (mp == NULL) {
883 mutex_exit(&entry->ote_lock);
884 return (EINVAL);
885 }
886 mlen = MIN(msgsize(mp), pkt->otp_size);
887 pkt->otp_size = mlen;
888 boff = 0;
889 while (mlen > 0) {
890 size_t wlen = MIN(MBLKL(mp), mlen);
891 if (ddi_copyout(mp->b_rptr,
892 (void *)((uintptr_t)pkt->otp_buf + boff),
893 wlen, 0) != 0) {
894 mutex_exit(&entry->ote_lock);
895 return (EFAULT);
896 }
897 mlen -= wlen;
898 boff += wlen;
899 mp = mp->b_cont;
900 }
901 mutex_exit(&entry->ote_lock);
902 return (0);
903 }
904
905 static int
906 overlay_target_inject(overlay_target_hdl_t *thdl, void *arg)
907 {
908 overlay_targ_pkt_t *pkt = arg;
909 overlay_target_entry_t *entry;
910 overlay_dev_t *odd;
911 mblk_t *mp;
912
913 if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
914 return (EINVAL);
915
916 mp = allocb(pkt->otp_size, 0);
917 if (mp == NULL)
918 return (ENOMEM);
919
920 if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
921 freeb(mp);
922 return (EFAULT);
923 }
924 mp->b_wptr += pkt->otp_size;
925
926 if (pkt->otp_linkid != UINT64_MAX) {
927 odd = overlay_hold_by_dlid(pkt->otp_linkid);
928 if (odd == NULL) {
929 freeb(mp);
930 return (ENOENT);
931 }
932 } else {
933 mutex_enter(&thdl->oth_lock);
934 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
935 entry = list_next(&thdl->oth_outstanding, entry)) {
936 if ((uintptr_t)entry == pkt->otp_reqid)
937 break;
938 }
939
940 if (entry == NULL) {
941 mutex_exit(&thdl->oth_lock);
942 freeb(mp);
943 return (ENOENT);
944 }
945 odd = entry->ote_odd;
946 mutex_exit(&thdl->oth_lock);
947 }
948
949 mutex_enter(&odd->odd_lock);
950 overlay_io_start(odd, OVERLAY_F_IN_RX);
951 mutex_exit(&odd->odd_lock);
952
953 mac_rx(odd->odd_mh, NULL, mp);
954
955 mutex_enter(&odd->odd_lock);
956 overlay_io_done(odd, OVERLAY_F_IN_RX);
957 mutex_exit(&odd->odd_lock);
958
959 return (0);
960 }
961
962 static int
963 overlay_target_resend(overlay_target_hdl_t *thdl, void *arg)
964 {
965 overlay_targ_pkt_t *pkt = arg;
966 overlay_target_entry_t *entry;
967 overlay_dev_t *odd;
968 mblk_t *mp;
969
970 if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
971 return (EINVAL);
972
973 mp = allocb(pkt->otp_size, 0);
974 if (mp == NULL)
975 return (ENOMEM);
976
977 if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
978 freeb(mp);
979 return (EFAULT);
980 }
981 mp->b_wptr += pkt->otp_size;
982
983 if (pkt->otp_linkid != UINT64_MAX) {
984 odd = overlay_hold_by_dlid(pkt->otp_linkid);
985 if (odd == NULL) {
986 freeb(mp);
987 return (ENOENT);
988 }
989 } else {
990 mutex_enter(&thdl->oth_lock);
991 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
992 entry = list_next(&thdl->oth_outstanding, entry)) {
993 if ((uintptr_t)entry == pkt->otp_reqid)
994 break;
995 }
996
997 if (entry == NULL) {
998 mutex_exit(&thdl->oth_lock);
999 freeb(mp);
1000 return (ENOENT);
1001 }
1002 odd = entry->ote_odd;
1003 mutex_exit(&thdl->oth_lock);
1004 }
1005
1006 mp = overlay_m_tx(odd, mp);
1007 freemsgchain(mp);
1008
1009 return (0);
1010 }
1011
1012 typedef struct overlay_targ_list_int {
1013 boolean_t otli_count;
1014 uint32_t otli_cur;
1015 uint32_t otli_nents;
1016 uint32_t otli_ents[];
1017 } overlay_targ_list_int_t;
1018
1019 static int
1020 overlay_target_list_copyin(const void *ubuf, void **outp, size_t *bsize,
1021 int flags)
1022 {
1023 overlay_targ_list_t n;
1024 overlay_targ_list_int_t *otl;
1025
1026 if (ddi_copyin(ubuf, &n, sizeof (overlay_targ_list_t),
1027 flags & FKIOCTL) != 0)
1028 return (EFAULT);
1029
1030 /*
1031 */
1032 if (n.otl_nents >= INT32_MAX / sizeof (uint32_t))
1033 return (EINVAL);
1034 *bsize = sizeof (overlay_targ_list_int_t) +
1035 sizeof (uint32_t) * n.otl_nents;
1036 otl = kmem_zalloc(*bsize, KM_SLEEP);
1037 otl->otli_cur = 0;
1038 otl->otli_nents = n.otl_nents;
1039 if (otl->otli_nents != 0) {
1040 otl->otli_count = B_FALSE;
1041 if (ddi_copyin((void *)((uintptr_t)ubuf +
1042 offsetof(overlay_targ_list_t, otl_ents)),
1043 otl->otli_ents, n.otl_nents * sizeof (uint32_t),
1044 flags & FKIOCTL) != 0) {
1045 kmem_free(otl, *bsize);
1046 return (EFAULT);
1047 }
1048 } else {
1049 otl->otli_count = B_TRUE;
1050 }
1051
1052 *outp = otl;
1053 return (0);
1054 }
1055
1056 static int
1057 overlay_target_ioctl_list_cb(overlay_dev_t *odd, void *arg)
1058 {
1059 overlay_targ_list_int_t *otl = arg;
1060
1061 if (otl->otli_cur < otl->otli_nents)
1062 otl->otli_ents[otl->otli_cur] = odd->odd_linkid;
1063 otl->otli_cur++;
1064 return (0);
1065 }
1066
1067 /* ARGSUSED */
1068 static int
1069 overlay_target_ioctl_list(overlay_target_hdl_t *thdl, void *arg)
1070 {
1071 overlay_dev_iter(overlay_target_ioctl_list_cb, arg);
1072 return (0);
1073 }
1074
1075 /* ARGSUSED */
1076 static int
1077 overlay_target_list_copyout(void *ubuf, void *buf, size_t bufsize, int flags)
1078 {
1079 overlay_targ_list_int_t *otl = buf;
1080
1081 if (ddi_copyout(&otl->otli_cur, ubuf, sizeof (uint32_t),
1082 flags & FKIOCTL) != 0)
1083 return (EFAULT);
1084
1085 if (otl->otli_count == B_FALSE) {
1086 if (ddi_copyout(otl->otli_ents,
1087 (void *)((uintptr_t)ubuf +
1088 offsetof(overlay_targ_list_t, otl_ents)),
1089 sizeof (uint32_t) * otl->otli_nents,
1090 flags & FKIOCTL) != 0)
1091 return (EFAULT);
1092 }
1093 return (0);
1094 }
1095
1096 /* ARGSUSED */
1097 static int
1098 overlay_target_cache_get(overlay_target_hdl_t *thdl, void *arg)
1099 {
1100 int ret = 0;
1101 overlay_dev_t *odd;
1102 overlay_target_t *ott;
1103 overlay_targ_cache_t *otc = arg;
1104
1105 odd = overlay_hold_by_dlid(otc->otc_linkid);
1106 if (odd == NULL)
1107 return (ENOENT);
1108
1109 mutex_enter(&odd->odd_lock);
1110 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1111 mutex_exit(&odd->odd_lock);
1112 overlay_hold_rele(odd);
1113 return (ENXIO);
1114 }
1115 ott = odd->odd_target;
1116 if (ott->ott_mode != OVERLAY_TARGET_POINT &&
1117 ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1118 mutex_exit(&odd->odd_lock);
1119 overlay_hold_rele(odd);
1120 return (ENOTSUP);
1121 }
1122 mutex_enter(&ott->ott_lock);
1123 mutex_exit(&odd->odd_lock);
1124
1125 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
1126 otc->otc_entry.otce_flags = 0;
1127 bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest,
1128 sizeof (overlay_target_point_t));
1129 } else {
1130 overlay_target_entry_t *ote;
1131 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1132 otc->otc_entry.otce_mac);
1133 if (ote != NULL) {
1134 mutex_enter(&ote->ote_lock);
1135 if ((ote->ote_flags &
1136 OVERLAY_ENTRY_F_VALID_MASK) != 0) {
1137 if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
1138 otc->otc_entry.otce_flags =
1139 OVERLAY_TARGET_CACHE_DROP;
1140 } else {
1141 otc->otc_entry.otce_flags = 0;
1142 bcopy(&ote->ote_dest,
1143 &otc->otc_entry.otce_dest,
1144 sizeof (overlay_target_point_t));
1145 }
1146 ret = 0;
1147 } else {
1148 ret = ENOENT;
1149 }
1150 mutex_exit(&ote->ote_lock);
1151 } else {
1152 ret = ENOENT;
1153 }
1154 }
1155
1156 mutex_exit(&ott->ott_lock);
1157 overlay_hold_rele(odd);
1158
1159 return (ret);
1160 }
1161
1162 /* ARGSUSED */
1163 static int
1164 overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg)
1165 {
1166 overlay_dev_t *odd;
1167 overlay_target_t *ott;
1168 overlay_target_entry_t *ote;
1169 overlay_targ_cache_t *otc = arg;
1170 mblk_t *mp = NULL;
1171
1172 if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP)
1173 return (EINVAL);
1174
1175 odd = overlay_hold_by_dlid(otc->otc_linkid);
1176 if (odd == NULL)
1177 return (ENOENT);
1178
1179 mutex_enter(&odd->odd_lock);
1180 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1181 mutex_exit(&odd->odd_lock);
1182 overlay_hold_rele(odd);
1183 return (ENXIO);
1184 }
1185 ott = odd->odd_target;
1186 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1187 mutex_exit(&odd->odd_lock);
1188 overlay_hold_rele(odd);
1189 return (ENOTSUP);
1190 }
1191 mutex_enter(&ott->ott_lock);
1192 mutex_exit(&odd->odd_lock);
1193
1194 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1195 otc->otc_entry.otce_mac);
1196 if (ote == NULL) {
1197 ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP);
1198 bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL);
1199 ote->ote_chead = ote->ote_ctail = NULL;
1200 ote->ote_mbsize = 0;
1201 ote->ote_ott = ott;
1202 ote->ote_odd = odd;
1203 mutex_enter(&ote->ote_lock);
1204 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote);
1205 avl_add(&ott->ott_u.ott_dyn.ott_tree, ote);
1206 } else {
1207 mutex_enter(&ote->ote_lock);
1208 }
1209
1210 if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
1211 ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
1212 } else {
1213 ote->ote_flags |= OVERLAY_ENTRY_F_VALID;
1214 bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
1215 sizeof (overlay_target_point_t));
1216 mp = ote->ote_chead;
1217 ote->ote_chead = NULL;
1218 ote->ote_ctail = NULL;
1219 ote->ote_mbsize = 0;
1220 ote->ote_vtime = gethrtime();
1221 }
1222
1223 mutex_exit(&ote->ote_lock);
1224 mutex_exit(&ott->ott_lock);
1225
1226 if (mp != NULL) {
1227 mp = overlay_m_tx(ote->ote_odd, mp);
1228 freemsgchain(mp);
1229 }
1230
1231 overlay_hold_rele(odd);
1232
1233 return (0);
1234 }
1235
1236 /* ARGSUSED */
1237 static int
1238 overlay_target_cache_remove(overlay_target_hdl_t *thdl, void *arg)
1239 {
1240 int ret = 0;
1241 overlay_dev_t *odd;
1242 overlay_target_t *ott;
1243 overlay_target_entry_t *ote;
1244 overlay_targ_cache_t *otc = arg;
1245
1246 odd = overlay_hold_by_dlid(otc->otc_linkid);
1247 if (odd == NULL)
1248 return (ENOENT);
1249
1250 mutex_enter(&odd->odd_lock);
1251 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1252 mutex_exit(&odd->odd_lock);
1253 overlay_hold_rele(odd);
1254 return (ENXIO);
1255 }
1256 ott = odd->odd_target;
1257 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1258 mutex_exit(&odd->odd_lock);
1259 overlay_hold_rele(odd);
1260 return (ENOTSUP);
1261 }
1262 mutex_enter(&ott->ott_lock);
1263 mutex_exit(&odd->odd_lock);
1264
1265 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1266 otc->otc_entry.otce_mac);
1267 if (ote != NULL) {
1268 mutex_enter(&ote->ote_lock);
1269 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
1270 mutex_exit(&ote->ote_lock);
1271 ret = 0;
1272 } else {
1273 ret = ENOENT;
1274 }
1275
1276 mutex_exit(&ott->ott_lock);
1277 overlay_hold_rele(odd);
1278
1279 return (ret);
1280 }
1281
1282 /* ARGSUSED */
1283 static int
1284 overlay_target_cache_flush(overlay_target_hdl_t *thdl, void *arg)
1285 {
1286 avl_tree_t *avl;
1287 overlay_dev_t *odd;
1288 overlay_target_t *ott;
1289 overlay_target_entry_t *ote;
1290 overlay_targ_cache_t *otc = arg;
1291
1292 odd = overlay_hold_by_dlid(otc->otc_linkid);
1293 if (odd == NULL)
1294 return (ENOENT);
1295
1296 mutex_enter(&odd->odd_lock);
1297 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1298 mutex_exit(&odd->odd_lock);
1299 overlay_hold_rele(odd);
1300 return (ENXIO);
1301 }
1302 ott = odd->odd_target;
1303 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1304 mutex_exit(&odd->odd_lock);
1305 overlay_hold_rele(odd);
1306 return (ENOTSUP);
1307 }
1308 mutex_enter(&ott->ott_lock);
1309 mutex_exit(&odd->odd_lock);
1310 avl = &ott->ott_u.ott_dyn.ott_tree;
1311
1312 for (ote = avl_first(avl); ote != NULL; ote = AVL_NEXT(avl, ote)) {
1313 mutex_enter(&ote->ote_lock);
1314 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
1315 mutex_exit(&ote->ote_lock);
1316 }
1317 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1318 otc->otc_entry.otce_mac);
1319
1320 mutex_exit(&ott->ott_lock);
1321 overlay_hold_rele(odd);
1322
1323 return (0);
1324 }
1325
1326 static int
1327 overlay_target_cache_iter_copyin(const void *ubuf, void **outp, size_t *bsize,
1328 int flags)
1329 {
1330 overlay_targ_cache_iter_t base, *iter;
1331
1332 if (ddi_copyin(ubuf, &base, sizeof (overlay_targ_cache_iter_t),
1333 flags & FKIOCTL) != 0)
1334 return (EFAULT);
1335
1336 if (base.otci_count > OVERLAY_TARGET_ITER_MAX)
1337 return (E2BIG);
1338
1339 if (base.otci_count == 0)
1340 return (EINVAL);
1341
1342 *bsize = sizeof (overlay_targ_cache_iter_t) +
1343 base.otci_count * sizeof (overlay_targ_cache_entry_t);
1344 iter = kmem_alloc(*bsize, KM_SLEEP);
1345 bcopy(&base, iter, sizeof (overlay_targ_cache_iter_t));
1346 *outp = iter;
1347
1348 return (0);
1349 }
1350
1351 typedef struct overlay_targ_cache_marker {
1352 uint8_t otcm_mac[ETHERADDRL];
1353 uint16_t otcm_done;
1354 } overlay_targ_cache_marker_t;
1355
1356 /* ARGSUSED */
1357 static int
1358 overlay_target_cache_iter(overlay_target_hdl_t *thdl, void *arg)
1359 {
1360 overlay_dev_t *odd;
1361 overlay_target_t *ott;
1362 overlay_target_entry_t lookup, *ent;
1363 overlay_targ_cache_marker_t *mark;
1364 avl_index_t where;
1365 avl_tree_t *avl;
1366 uint16_t written = 0;
1367
1368 overlay_targ_cache_iter_t *iter = arg;
1369 mark = (void *)&iter->otci_marker;
1370
1371 if (mark->otcm_done != 0) {
1372 iter->otci_count = 0;
1373 return (0);
1374 }
1375
1376 odd = overlay_hold_by_dlid(iter->otci_linkid);
1377 if (odd == NULL)
1378 return (ENOENT);
1379
1380 mutex_enter(&odd->odd_lock);
1381 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1382 mutex_exit(&odd->odd_lock);
1383 overlay_hold_rele(odd);
1384 return (ENXIO);
1385 }
1386 ott = odd->odd_target;
1387 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC &&
1388 ott->ott_mode != OVERLAY_TARGET_POINT) {
1389 mutex_exit(&odd->odd_lock);
1390 overlay_hold_rele(odd);
1391 return (ENOTSUP);
1392 }
1393
1394 /*
1395 * Holding this lock across the entire iteration probably isn't very
1396 * good. We should perhaps add an r/w lock for the avl tree. But we'll
1397 * wait until we now it's necessary before we do more.
1398 */
1399 mutex_enter(&ott->ott_lock);
1400 mutex_exit(&odd->odd_lock);
1401
1402 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
1403 overlay_targ_cache_entry_t *out = &iter->otci_ents[0];
1404 bzero(out->otce_mac, ETHERADDRL);
1405 out->otce_flags = 0;
1406 bcopy(&ott->ott_u.ott_point, &out->otce_dest,
1407 sizeof (overlay_target_point_t));
1408 written++;
1409 mark->otcm_done = 1;
1410 }
1411
1412 avl = &ott->ott_u.ott_dyn.ott_tree;
1413 bcopy(mark->otcm_mac, lookup.ote_addr, ETHERADDRL);
1414 ent = avl_find(avl, &lookup, &where);
1415
1416 /*
1417 * NULL ent means that the entry does not exist, so we want to start
1418 * with the closest node in the tree. This means that we implicitly rely
1419 * on the tree's order and the first node will be the mac 00:00:00:00:00
1420 * and the last will be ff:ff:ff:ff:ff:ff.
1421 */
1422 if (ent == NULL) {
1423 ent = avl_nearest(avl, where, AVL_AFTER);
1424 if (ent == NULL) {
1425 mark->otcm_done = 1;
1426 goto done;
1427 }
1428 }
1429
1430 for (; ent != NULL && written < iter->otci_count;
1431 ent = AVL_NEXT(avl, ent)) {
1432 overlay_targ_cache_entry_t *out = &iter->otci_ents[written];
1433 mutex_enter(&ent->ote_lock);
1434 if ((ent->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) == 0) {
1435 mutex_exit(&ent->ote_lock);
1436 continue;
1437 }
1438 bcopy(ent->ote_addr, out->otce_mac, ETHERADDRL);
1439 out->otce_flags = 0;
1440 if (ent->ote_flags & OVERLAY_ENTRY_F_DROP)
1441 out->otce_flags |= OVERLAY_TARGET_CACHE_DROP;
1442 if (ent->ote_flags & OVERLAY_ENTRY_F_VALID)
1443 bcopy(&ent->ote_dest, &out->otce_dest,
1444 sizeof (overlay_target_point_t));
1445 written++;
1446 mutex_exit(&ent->ote_lock);
1447 }
1448
1449 if (ent != NULL) {
1450 bcopy(ent->ote_addr, mark->otcm_mac, ETHERADDRL);
1451 } else {
1452 mark->otcm_done = 1;
1453 }
1454
1455 done:
1456 iter->otci_count = written;
1457 mutex_exit(&ott->ott_lock);
1458 overlay_hold_rele(odd);
1459
1460 return (0);
1461 }
1462
1463 /* ARGSUSED */
1464 static int
1465 overlay_target_cache_iter_copyout(void *ubuf, void *buf, size_t bufsize,
1466 int flags)
1467 {
1468 size_t outsize;
1469 const overlay_targ_cache_iter_t *iter = buf;
1470
1471 outsize = sizeof (overlay_targ_cache_iter_t) +
1472 iter->otci_count * sizeof (overlay_targ_cache_entry_t);
1473
1474 if (ddi_copyout(buf, ubuf, outsize, flags & FKIOCTL) != 0)
1475 return (EFAULT);
1476
1477 return (0);
1478 }
1479
1480 static overlay_target_ioctl_t overlay_target_ioctab[] = {
1481 { OVERLAY_TARG_INFO, B_TRUE, B_TRUE,
1482 NULL, overlay_target_info,
1483 NULL, sizeof (overlay_targ_info_t) },
1484 { OVERLAY_TARG_ASSOCIATE, B_TRUE, B_FALSE,
1485 NULL, overlay_target_associate,
1486 NULL, sizeof (overlay_targ_associate_t) },
1487 { OVERLAY_TARG_DISASSOCIATE, B_TRUE, B_FALSE,
1488 NULL, overlay_target_disassociate,
1489 NULL, sizeof (overlay_targ_id_t) },
1490 { OVERLAY_TARG_DEGRADE, B_TRUE, B_FALSE,
1491 NULL, overlay_target_degrade,
1492 NULL, sizeof (overlay_targ_degrade_t) },
1493 { OVERLAY_TARG_RESTORE, B_TRUE, B_FALSE,
1494 NULL, overlay_target_restore,
1495 NULL, sizeof (overlay_targ_id_t) },
1496 { OVERLAY_TARG_LOOKUP, B_FALSE, B_TRUE,
1497 NULL, overlay_target_lookup_request,
1498 NULL, sizeof (overlay_targ_lookup_t) },
1499 { OVERLAY_TARG_RESPOND, B_TRUE, B_FALSE,
1500 NULL, overlay_target_lookup_respond,
1501 NULL, sizeof (overlay_targ_resp_t) },
1502 { OVERLAY_TARG_DROP, B_TRUE, B_FALSE,
1503 NULL, overlay_target_lookup_drop,
1504 NULL, sizeof (overlay_targ_resp_t) },
1505 { OVERLAY_TARG_PKT, B_TRUE, B_TRUE,
1506 overlay_target_pkt_copyin,
1507 overlay_target_packet,
1508 overlay_target_pkt_copyout,
1509 sizeof (overlay_targ_pkt_t) },
1510 { OVERLAY_TARG_INJECT, B_TRUE, B_FALSE,
1511 overlay_target_pkt_copyin,
1512 overlay_target_inject,
1513 NULL, sizeof (overlay_targ_pkt_t) },
1514 { OVERLAY_TARG_RESEND, B_TRUE, B_FALSE,
1515 overlay_target_pkt_copyin,
1516 overlay_target_resend,
1517 NULL, sizeof (overlay_targ_pkt_t) },
1518 { OVERLAY_TARG_LIST, B_FALSE, B_TRUE,
1519 overlay_target_list_copyin,
1520 overlay_target_ioctl_list,
1521 overlay_target_list_copyout,
1522 sizeof (overlay_targ_list_t) },
1523 { OVERLAY_TARG_CACHE_GET, B_FALSE, B_TRUE,
1524 NULL, overlay_target_cache_get,
1525 NULL, sizeof (overlay_targ_cache_t) },
1526 { OVERLAY_TARG_CACHE_SET, B_TRUE, B_TRUE,
1527 NULL, overlay_target_cache_set,
1528 NULL, sizeof (overlay_targ_cache_t) },
1529 { OVERLAY_TARG_CACHE_REMOVE, B_TRUE, B_TRUE,
1530 NULL, overlay_target_cache_remove,
1531 NULL, sizeof (overlay_targ_cache_t) },
1532 { OVERLAY_TARG_CACHE_FLUSH, B_TRUE, B_TRUE,
1533 NULL, overlay_target_cache_flush,
1534 NULL, sizeof (overlay_targ_cache_t) },
1535 { OVERLAY_TARG_CACHE_ITER, B_FALSE, B_TRUE,
1536 overlay_target_cache_iter_copyin,
1537 overlay_target_cache_iter,
1538 overlay_target_cache_iter_copyout,
1539 sizeof (overlay_targ_cache_iter_t) },
1540 { 0 }
1541 };
1542
1543 int
1544 overlay_target_open(dev_t *devp, int flags, int otype, cred_t *credp)
1545 {
1546 minor_t mid;
1547 overlay_target_hdl_t *thdl;
1548
1549 if (secpolicy_dl_config(credp) != 0)
1550 return (EPERM);
1551
1552 if (getminor(*devp) != 0)
1553 return (ENXIO);
1554
1555 if (otype & OTYP_BLK)
1556 return (EINVAL);
1557
1558 if (flags & ~(FREAD | FWRITE | FEXCL))
1559 return (EINVAL);
1560
1561 if ((flags & FWRITE) &&
1562 !(flags & FEXCL))
1563 return (EINVAL);
1564
1565 if (!(flags & FREAD) && !(flags & FWRITE))
1566 return (EINVAL);
1567
1568 if (crgetzoneid(credp) != GLOBAL_ZONEID)
1569 return (EPERM);
1570
1571 mid = id_alloc(overlay_thdl_idspace);
1572 if (ddi_soft_state_zalloc(overlay_thdl_state, mid) != 0) {
1573 id_free(overlay_thdl_idspace, mid);
1574 return (ENXIO);
1575 }
1576
1577 thdl = ddi_get_soft_state(overlay_thdl_state, mid);
1578 VERIFY(thdl != NULL);
1579 thdl->oth_minor = mid;
1580 thdl->oth_zoneid = crgetzoneid(credp);
1581 thdl->oth_oflags = flags;
1582 mutex_init(&thdl->oth_lock, NULL, MUTEX_DRIVER, NULL);
1583 list_create(&thdl->oth_outstanding, sizeof (overlay_target_entry_t),
1584 offsetof(overlay_target_entry_t, ote_qlink));
1585 *devp = makedevice(getmajor(*devp), mid);
1586
1587 mutex_enter(&overlay_target_lock);
1588 if ((flags & FEXCL) && overlay_target_excl == B_TRUE) {
1589 mutex_exit(&overlay_target_lock);
1590 list_destroy(&thdl->oth_outstanding);
1591 mutex_destroy(&thdl->oth_lock);
1592 ddi_soft_state_free(overlay_thdl_state, mid);
1593 id_free(overlay_thdl_idspace, mid);
1594 return (EEXIST);
1595 } else if ((flags & FEXCL) != 0) {
1596 VERIFY(overlay_target_excl == B_FALSE);
1597 overlay_target_excl = B_TRUE;
1598 }
1599 list_insert_tail(&overlay_thdl_list, thdl);
1600 mutex_exit(&overlay_target_lock);
1601
1602 return (0);
1603 }
1604
1605 /* ARGSUSED */
1606 int
1607 overlay_target_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
1608 int *rvalp)
1609 {
1610 overlay_target_ioctl_t *ioc;
1611 overlay_target_hdl_t *thdl;
1612
1613 if (secpolicy_dl_config(credp) != 0)
1614 return (EPERM);
1615
1616 if ((thdl = ddi_get_soft_state(overlay_thdl_state,
1617 getminor(dev))) == NULL)
1618 return (ENXIO);
1619
1620 for (ioc = &overlay_target_ioctab[0]; ioc->oti_cmd != 0; ioc++) {
1621 int ret;
1622 caddr_t buf;
1623 size_t bufsize;
1624
1625 if (ioc->oti_cmd != cmd)
1626 continue;
1627
1628 if (ioc->oti_write == B_TRUE && !(mode & FWRITE))
1629 return (EBADF);
1630
1631 if (ioc->oti_copyin == NULL) {
1632 bufsize = ioc->oti_size;
1633 buf = kmem_alloc(bufsize, KM_SLEEP);
1634 if (ddi_copyin((void *)(uintptr_t)arg, buf, bufsize,
1635 mode & FKIOCTL) != 0) {
1636 kmem_free(buf, bufsize);
1637 return (EFAULT);
1638 }
1639 } else {
1640 if ((ret = ioc->oti_copyin((void *)(uintptr_t)arg,
1641 (void **)&buf, &bufsize, mode)) != 0)
1642 return (ret);
1643 }
1644
1645 ret = ioc->oti_func(thdl, buf);
1646 if (ret == 0 && ioc->oti_size != 0 &&
1647 ioc->oti_ncopyout == B_TRUE) {
1648 if (ioc->oti_copyout == NULL) {
1649 if (ddi_copyout(buf, (void *)(uintptr_t)arg,
1650 bufsize, mode & FKIOCTL) != 0)
1651 ret = EFAULT;
1652 } else {
1653 ret = ioc->oti_copyout((void *)(uintptr_t)arg,
1654 buf, bufsize, mode);
1655 }
1656 }
1657
1658 kmem_free(buf, bufsize);
1659 return (ret);
1660 }
1661
1662 return (ENOTTY);
1663 }
1664
1665 /* ARGSUSED */
1666 int
1667 overlay_target_close(dev_t dev, int flags, int otype, cred_t *credp)
1668 {
1669 overlay_target_hdl_t *thdl;
1670 overlay_target_entry_t *entry;
1671 minor_t mid = getminor(dev);
1672
1673 if ((thdl = ddi_get_soft_state(overlay_thdl_state, mid)) == NULL)
1674 return (ENXIO);
1675
1676 mutex_enter(&overlay_target_lock);
1677 list_remove(&overlay_thdl_list, thdl);
1678 mutex_enter(&thdl->oth_lock);
1679 while ((entry = list_remove_head(&thdl->oth_outstanding)) != NULL)
1680 list_insert_tail(&overlay_target_list, entry);
1681 cv_signal(&overlay_target_condvar);
1682 mutex_exit(&thdl->oth_lock);
1683 if ((thdl->oth_oflags & FEXCL) != 0) {
1684 VERIFY(overlay_target_excl == B_TRUE);
1685 overlay_target_excl = B_FALSE;
1686 }
1687 mutex_exit(&overlay_target_lock);
1688
1689 list_destroy(&thdl->oth_outstanding);
1690 mutex_destroy(&thdl->oth_lock);
1691 mid = thdl->oth_minor;
1692 ddi_soft_state_free(overlay_thdl_state, mid);
1693 id_free(overlay_thdl_idspace, mid);
1694
1695 return (0);
1696 }