1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * Overlay device target cache management
  18  *
  19  * For more information, see the big theory statement in
  20  * uts/common/io/overlay/overlay.c
  21  */
  22 
  23 #include <sys/types.h>
  24 #include <sys/ethernet.h>
  25 #include <sys/kmem.h>
  26 #include <sys/policy.h>
  27 #include <sys/sysmacros.h>
  28 #include <sys/stream.h>
  29 #include <sys/strsun.h>
  30 #include <sys/strsubr.h>
  31 #include <sys/mac_provider.h>
  32 #include <sys/mac_client.h>
  33 #include <sys/mac_client_priv.h>
  34 #include <sys/vlan.h>
  35 #include <sys/crc32.h>
  36 #include <sys/cred.h>
  37 #include <sys/file.h>
  38 #include <sys/errno.h>
  39 #include <sys/ddi.h>
  40 #include <sys/sunddi.h>
  41 
  42 #include <sys/overlay_impl.h>
  43 #include <sys/sdt.h>
  44 
  45 /*
  46  * This is total straw man, but at least it's a prime number. Here we're
  47  * going to have to go through and do a lot of evaluation and understanding as
  48  * to how these target caches should grow and shrink, as well as, memory
  49  * pressure and evictions. This just gives us a starting point that'll be 'good
  50  * enough', until it's not.
  51  */
  52 #define OVERLAY_HSIZE   823
  53 
  54 /*
  55  * We use this data structure to keep track of what requests have been actively
  56  * allocated to a given instance so we know what to put back on the pending
  57  * list.
  58  */
  59 typedef struct overlay_target_hdl {
  60         minor_t oth_minor;              /* RO */
  61         zoneid_t oth_zoneid;            /* RO */
  62         int oth_oflags;                 /* RO */
  63         list_node_t oth_link;           /* overlay_target_lock */
  64         kmutex_t oth_lock;
  65         list_t  oth_outstanding;        /* oth_lock */
  66 } overlay_target_hdl_t;
  67 
  68 typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int);
  69 typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *);
  70 typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int);
  71 
  72 typedef struct overaly_target_ioctl {
  73         int             oti_cmd;        /* ioctl id */
  74         boolean_t       oti_write;      /* ioctl requires FWRITE */
  75         boolean_t       oti_ncopyout;   /* copyout data? */
  76         overlay_target_copyin_f oti_copyin;     /* copyin func */
  77         overlay_target_ioctl_f oti_func; /* function to call */
  78         overlay_target_copyout_f oti_copyout;   /* copyin func */
  79         size_t          oti_size;       /* size of user level structure */
  80 } overlay_target_ioctl_t;
  81 
  82 static kmem_cache_t *overlay_target_cache;
  83 kmem_cache_t *overlay_entry_cache;
  84 static id_space_t *overlay_thdl_idspace;
  85 static void *overlay_thdl_state;
  86 
  87 /*
  88  * When we support overlay devices in the NGZ, then all of these need to become
  89  * zone aware, by plugging into the netstack engine and becoming per-netstack
  90  * data.
  91  */
  92 static list_t overlay_thdl_list;
  93 static kmutex_t overlay_target_lock;
  94 static kcondvar_t overlay_target_condvar;
  95 static list_t overlay_target_list;
  96 static boolean_t overlay_target_excl;
  97 
  98 /*
  99  * Outstanding data per hash table entry.
 100  */
 101 int overlay_ent_size = 128 * 1024;
 102 
 103 /* ARGSUSED */
 104 static int
 105 overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
 106 {
 107         overlay_target_t *ott = buf;
 108 
 109         mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL);
 110         cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL);
 111         return (0);
 112 }
 113 
 114 /* ARGSUSED */
 115 static void
 116 overlay_target_cache_destructor(void *buf, void *arg)
 117 {
 118         overlay_target_t *ott = buf;
 119 
 120         cv_destroy(&ott->ott_cond);
 121         mutex_destroy(&ott->ott_lock);
 122 }
 123 
 124 /* ARGSUSED */
 125 static int
 126 overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs)
 127 {
 128         overlay_target_entry_t *ote = buf;
 129 
 130         bzero(ote, sizeof (overlay_target_entry_t));
 131         mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL);
 132         return (0);
 133 }
 134 
 135 /* ARGSUSED */
 136 static void
 137 overlay_entry_cache_destructor(void *buf, void *arg)
 138 {
 139         overlay_target_entry_t *ote = buf;
 140 
 141         mutex_destroy(&ote->ote_lock);
 142 }
 143 
 144 static uint64_t
 145 overlay_mac_hash(const void *v)
 146 {
 147         uint32_t crc;
 148         CRC32(crc, v, ETHERADDRL, -1U, crc32_table);
 149         return (crc);
 150 }
 151 
 152 static int
 153 overlay_mac_cmp(const void *a, const void *b)
 154 {
 155         return (bcmp(a, b, ETHERADDRL));
 156 }
 157 
 158 static uint64_t
 159 overlay_vl3_hash(const void *v)
 160 {
 161         const overlay_target_entry_t *ote = v;
 162         uint32_t crc;
 163 
 164         CRC32(crc, &ote->ote_ip, sizeof (ote->ote_ip), -1U, crc32_table);
 165         CRC32(crc, &ote->ote_fab, sizeof (ote->ote_fab), crc, crc32_table);
 166         return (crc);
 167 }
 168 
 169 static int
 170 overlay_vl3_cmp(const void *a, const void *b)
 171 {
 172         const overlay_target_entry_t *l = a;
 173         const overlay_target_entry_t *r = b;
 174 
 175         if (l->ote_fab != r->ote_fab ||
 176             bcmp(&l->ote_ip, &r->ote_ip, sizeof (struct in6_addr)) != 0)
 177                 return (1);
 178         return (0);
 179 }
 180 
 181 static int
 182 overlay_vl3_avl(const void *a, const void *b)
 183 {
 184         const overlay_target_entry_t *l = a;
 185         const overlay_target_entry_t *r = b;
 186 
 187         if (l->ote_fab < r->ote_fab)
 188                 return (-1);
 189         if (l->ote_fab > r->ote_fab)
 190                 return (1);
 191         return (memcmp(&l->ote_ip, &r->ote_ip, sizeof (struct in6_addr)));
 192 }
 193 
 194 /* ARGSUSED */
 195 void
 196 overlay_target_entry_null_dtor(void *arg)
 197 {
 198 }
 199 
 200 /* ARGSUSED */
 201 void
 202 overlay_target_entry_dtor(void *arg)
 203 {
 204         overlay_target_entry_t *ote = arg;
 205 
 206         ASSERT3U(ote->ote_refcnt, ==, 0);
 207 
 208         ote->ote_flags = 0;
 209         bzero(ote->ote_addr, ETHERADDRL);
 210         bzero(&ote->ote_ip, sizeof (ote->ote_ip));
 211         ote->ote_ott = NULL;
 212         ote->ote_odd = NULL;
 213         ote->ote_fab = NULL;
 214         freemsgchain(ote->ote_chead);
 215         ote->ote_chead = ote->ote_ctail = NULL;
 216         ote->ote_mbsize = 0;
 217         ote->ote_vtime = 0;
 218         kmem_cache_free(overlay_entry_cache, ote);
 219 }
 220 
 221 static int
 222 overlay_mac_avl(const void *a, const void *b)
 223 {
 224         int i;
 225         const overlay_target_entry_t *l, *r;
 226         l = a;
 227         r = b;
 228 
 229         for (i = 0; i < ETHERADDRL; i++) {
 230                 if (l->ote_addr[i] > r->ote_addr[i])
 231                         return (1);
 232                 else if (l->ote_addr[i] < r->ote_addr[i])
 233                         return (-1);
 234         }
 235 
 236         return (0);
 237 }
 238 
 239 void
 240 overlay_target_init(void)
 241 {
 242         int ret;
 243         ret = ddi_soft_state_init(&overlay_thdl_state,
 244             sizeof (overlay_target_hdl_t), 1);
 245         VERIFY(ret == 0);
 246         overlay_target_cache = kmem_cache_create("overlay_target",
 247             sizeof (overlay_target_t), 0, overlay_target_cache_constructor,
 248             overlay_target_cache_destructor, NULL, NULL, NULL, 0);
 249         overlay_entry_cache = kmem_cache_create("overlay_entry",
 250             sizeof (overlay_target_entry_t), 0, overlay_entry_cache_constructor,
 251             overlay_entry_cache_destructor, NULL, NULL, NULL, 0);
 252         mutex_init(&overlay_target_lock, NULL, MUTEX_DRIVER, NULL);
 253         cv_init(&overlay_target_condvar, NULL, CV_DRIVER, NULL);
 254         list_create(&overlay_target_list, sizeof (overlay_target_entry_t),
 255             offsetof(overlay_target_entry_t, ote_qlink));
 256         list_create(&overlay_thdl_list, sizeof (overlay_target_hdl_t),
 257             offsetof(overlay_target_hdl_t, oth_link));
 258         overlay_thdl_idspace = id_space_create("overlay_target_minors",
 259             1, INT32_MAX);
 260 }
 261 
 262 void
 263 overlay_target_fini(void)
 264 {
 265         id_space_destroy(overlay_thdl_idspace);
 266         list_destroy(&overlay_thdl_list);
 267         list_destroy(&overlay_target_list);
 268         cv_destroy(&overlay_target_condvar);
 269         mutex_destroy(&overlay_target_lock);
 270         kmem_cache_destroy(overlay_entry_cache);
 271         kmem_cache_destroy(overlay_target_cache);
 272         ddi_soft_state_fini(&overlay_thdl_state);
 273 }
 274 
 275 void
 276 overlay_target_free(overlay_dev_t *odd)
 277 {
 278         if (odd->odd_target == NULL)
 279                 return;
 280 
 281         if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
 282                 refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
 283                 refhash_t *r3p = odd->odd_target->ott_u.ott_dyn.ott_l3dhash;
 284                 avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
 285                 avl_tree_t *a3p = &odd->odd_target->ott_u.ott_dyn.ott_l3tree;
 286                 overlay_target_entry_t *ote;
 287 
 288                 /*
 289                  * Our AVL tree and hashtable contain the same elements,
 290                  * therefore we should just remove it from the tree, but then
 291                  * delete the entries when we remove them from the hash table
 292                  * (which happens through the refhash dtor).
 293                  */
 294                 while ((ote = avl_first(ap)) != NULL) {
 295                         avl_remove(ap, ote);
 296                         OVERLAY_TARG_ENTRY_REFRELE(ote);
 297                 }
 298                 avl_destroy(ap);
 299 
 300                 while ((ote = avl_first(a3p)) != NULL) {
 301                         avl_remove(a3p, ote);
 302                         OVERLAY_TARG_ENTRY_REFRELE(ote);
 303                 }
 304                 avl_destroy(a3p);
 305 
 306                 for (ote = refhash_first(rp); ote != NULL;
 307                     ote = refhash_next(rp, ote)) {
 308                         refhash_remove(rp, ote);
 309                         OVERLAY_TARG_ENTRY_REFRELE(ote);
 310                 }
 311                 refhash_destroy(rp);
 312 
 313                 for (ote = refhash_first(r3p); ote != NULL;
 314                     ote = refhash_next(r3p, ote)) {
 315                         refhash_remove(r3p, ote);
 316                         OVERLAY_TARG_ENTRY_REFRELE(ote);
 317                 }
 318                 refhash_destroy(r3p);
 319         }
 320 
 321         ASSERT(odd->odd_target->ott_ocount == 0);
 322         bzero(&odd->odd_target->ott_u, sizeof (odd->odd_target->ott_u));
 323         kmem_cache_free(overlay_target_cache, odd->odd_target);
 324         odd->odd_target = NULL;
 325 }
 326 
 327 int
 328 overlay_target_busy()
 329 {
 330         int ret;
 331 
 332         mutex_enter(&overlay_target_lock);
 333         ret = !list_is_empty(&overlay_thdl_list);
 334         mutex_exit(&overlay_target_lock);
 335 
 336         return (ret);
 337 }
 338 
 339 void
 340 overlay_target_queue(overlay_target_entry_t *entry)
 341 {
 342         mutex_enter(&overlay_target_lock);
 343         mutex_enter(&entry->ote_ott->ott_lock);
 344         if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
 345                 mutex_exit(&entry->ote_ott->ott_lock);
 346                 mutex_exit(&overlay_target_lock);
 347                 return;
 348         }
 349         entry->ote_ott->ott_ocount++;
 350         mutex_exit(&entry->ote_ott->ott_lock);
 351         list_insert_tail(&overlay_target_list, entry);
 352         cv_signal(&overlay_target_condvar);
 353         mutex_exit(&overlay_target_lock);
 354 }
 355 
 356 void
 357 overlay_target_quiesce(overlay_target_t *ott)
 358 {
 359         if (ott == NULL)
 360                 return;
 361         mutex_enter(&ott->ott_lock);
 362         ott->ott_flags |= OVERLAY_T_TEARDOWN;
 363         while (ott->ott_ocount != 0)
 364                 cv_wait(&ott->ott_cond, &ott->ott_lock);
 365         mutex_exit(&ott->ott_lock);
 366 }
 367 
 368 /*
 369  * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
 370  * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
 371  * this time, say for NVGRE, we drop all packets that match this.
 372  */
 373 int
 374 overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
 375     socklen_t *slenp, uint64_t *vidp)
 376 {
 377         int ret;
 378         struct sockaddr_in6 *v6;
 379         overlay_target_t *ott;
 380         mac_header_info_t mhi;
 381         overlay_target_entry_t *entry;
 382 
 383         ASSERT(odd->odd_target != NULL);
 384 
 385         *vidp = odd->odd_vid;
 386 
 387         /*
 388          * At this point, the overlay device is in a mux which means that it's
 389          * been activated. At this point, parts of the target, such as the mode
 390          * and the destination are now read-only and we don't have to worry
 391          * about synchronization for them.
 392          */
 393         ott = odd->odd_target;
 394         if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
 395                 return (OVERLAY_TARGET_DROP);
 396 
 397         v6 = (struct sockaddr_in6 *)sock;
 398         bzero(v6, sizeof (struct sockaddr_in6));
 399         v6->sin6_family = AF_INET6;
 400 
 401         if (ott->ott_mode == OVERLAY_TARGET_POINT) {
 402                 mutex_enter(&ott->ott_lock);
 403                 bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr,
 404                     sizeof (struct in6_addr));
 405                 v6->sin6_port = htons(ott->ott_u.ott_point.otp_port);
 406                 mutex_exit(&ott->ott_lock);
 407                 *slenp = sizeof (struct sockaddr_in6);
 408 
 409                 return (OVERLAY_TARGET_OK);
 410         }
 411 
 412         ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
 413 
 414         /*
 415          * VL2 -> UL3 lookups only need the destination VL2 mac address,
 416          * however, if we end up having to route the packet, we will need
 417          * the source vlan as part of the destination selection.
 418          */
 419         if (mac_vlan_header_info(odd->odd_mh, mp, &mhi) != 0)
 420                 return (OVERLAY_TARGET_DROP);
 421 
 422         mutex_enter(&ott->ott_lock);
 423         entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
 424             mhi.mhi_daddr);
 425         if (entry == NULL) {
 426                 entry = kmem_cache_alloc(overlay_entry_cache,
 427                     KM_NOSLEEP | KM_NORMALPRI);
 428                 if (entry == NULL) {
 429                         mutex_exit(&ott->ott_lock);
 430                         return (OVERLAY_TARGET_DROP);
 431                 }
 432                 bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
 433                 entry->ote_chead = entry->ote_ctail = mp;
 434                 entry->ote_mbsize = msgsize(mp);
 435                 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
 436                 entry->ote_ott = ott;
 437                 entry->ote_odd = odd;
 438 
 439                 OVERLAY_TARG_ENTRY_REFHOLD(entry);
 440                 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
 441 
 442                 OVERLAY_TARG_ENTRY_REFHOLD(entry);
 443                 avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
 444 
 445                 mutex_exit(&ott->ott_lock);
 446                 overlay_target_queue(entry);
 447                 return (OVERLAY_TARGET_ASYNC);
 448         }
 449         OVERLAY_TARG_ENTRY_REFHOLD(entry);
 450         mutex_exit(&ott->ott_lock);
 451 
 452         mutex_enter(&entry->ote_lock);
 453         if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
 454                 ret = OVERLAY_TARGET_DROP;
 455         } else if (entry->ote_flags & OVERLAY_ENTRY_F_ROUTER) {
 456                 ret = overlay_route_lookup(odd, mp, &mhi, sock, slenp, vidp);
 457         } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
 458                 bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
 459                     sizeof (struct in6_addr));
 460                 v6->sin6_port = htons(entry->ote_dest.otp_port);
 461                 *slenp = sizeof (struct sockaddr_in6);
 462                 ret = OVERLAY_TARGET_OK;
 463         } else {
 464                 size_t mlen = msgsize(mp);
 465 
 466                 if (mlen + entry->ote_mbsize > overlay_ent_size) {
 467                         ret = OVERLAY_TARGET_DROP;
 468                 } else {
 469                         if (entry->ote_ctail != NULL) {
 470                                 ASSERT(entry->ote_ctail->b_next ==
 471                                     NULL);
 472                                 entry->ote_ctail->b_next = mp;
 473                                 entry->ote_ctail = mp;
 474                         } else {
 475                                 entry->ote_chead = mp;
 476                                 entry->ote_ctail = mp;
 477                         }
 478                         entry->ote_mbsize += mlen;
 479                         if ((entry->ote_flags &
 480                             OVERLAY_ENTRY_F_PENDING) == 0) {
 481                                 entry->ote_flags |=
 482                                     OVERLAY_ENTRY_F_PENDING;
 483                                 overlay_target_queue(entry);
 484                         }
 485                         ret = OVERLAY_TARGET_ASYNC;
 486                 }
 487         }
 488         mutex_exit(&entry->ote_lock);
 489 
 490         mutex_enter(&ott->ott_lock);
 491         OVERLAY_TARG_ENTRY_REFRELE(entry);
 492         mutex_exit(&ott->ott_lock);
 493 
 494         return (ret);
 495 }
 496 
 497 /* ARGSUSED */
 498 static int
 499 overlay_target_info(overlay_target_hdl_t *thdl, void *arg)
 500 {
 501         overlay_dev_t *odd;
 502         overlay_targ_info_t *oti = arg;
 503 
 504         odd = overlay_hold_by_dlid(oti->oti_linkid);
 505         if (odd == NULL)
 506                 return (ENOENT);
 507 
 508         mutex_enter(&odd->odd_lock);
 509         oti->oti_flags = 0;
 510         oti->oti_needs = odd->odd_plugin->ovp_dest;
 511         if (odd->odd_flags & OVERLAY_F_DEGRADED)
 512                 oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED;
 513         if (odd->odd_flags & OVERLAY_F_ACTIVATED)
 514                 oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE;
 515         oti->oti_vnetid = odd->odd_vid;
 516         oti->oti_dcid = odd->odd_dcid;
 517         mutex_exit(&odd->odd_lock);
 518         overlay_hold_rele(odd);
 519         return (0);
 520 }
 521 
 522 /* ARGSUSED */
 523 static int
 524 overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
 525 {
 526         overlay_dev_t *odd;
 527         overlay_target_t *ott;
 528         overlay_targ_associate_t *ota = arg;
 529         overlay_router_t *ort;
 530 
 531         odd = overlay_hold_by_dlid(ota->ota_linkid);
 532         if (odd == NULL)
 533                 return (ENOENT);
 534 
 535         if (ota->ota_id == 0) {
 536                 overlay_hold_rele(odd);
 537                 return (EINVAL);
 538         }
 539 
 540         if (ota->ota_mode != OVERLAY_TARGET_POINT &&
 541             ota->ota_mode != OVERLAY_TARGET_DYNAMIC) {
 542                 overlay_hold_rele(odd);
 543                 return (EINVAL);
 544         }
 545 
 546         if (ota->ota_provides != odd->odd_plugin->ovp_dest) {
 547                 overlay_hold_rele(odd);
 548                 return (EINVAL);
 549         }
 550 
 551         if (ota->ota_mode == OVERLAY_TARGET_POINT) {
 552                 if (ota->ota_provides & OVERLAY_PLUGIN_D_IP) {
 553                         if (IN6_IS_ADDR_UNSPECIFIED(&ota->ota_point.otp_ip) ||
 554                             IN6_IS_ADDR_V4COMPAT(&ota->ota_point.otp_ip) ||
 555                             IN6_IS_ADDR_V4MAPPED_ANY(&ota->ota_point.otp_ip)) {
 556                                 overlay_hold_rele(odd);
 557                                 return (EINVAL);
 558                         }
 559                 }
 560 
 561                 if (ota->ota_provides & OVERLAY_PLUGIN_D_PORT) {
 562                         if (ota->ota_point.otp_port == 0) {
 563                                 overlay_hold_rele(odd);
 564                                 return (EINVAL);
 565                         }
 566                 }
 567         }
 568 
 569         ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP);
 570         ott->ott_flags = 0;
 571         ott->ott_ocount = 0;
 572         ott->ott_mode = ota->ota_mode;
 573         ott->ott_dest = ota->ota_provides;
 574         ott->ott_id = ota->ota_id;
 575 
 576         if (ott->ott_mode == OVERLAY_TARGET_POINT) {
 577                 bcopy(&ota->ota_point, &ott->ott_u.ott_point,
 578                     sizeof (overlay_target_point_t));
 579         } else {
 580                 ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
 581                     overlay_mac_hash, overlay_mac_cmp,
 582                     overlay_target_entry_null_dtor,
 583                     sizeof (overlay_target_entry_t),
 584                     offsetof(overlay_target_entry_t, ote_reflink),
 585                     offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
 586                 ott->ott_u.ott_dyn.ott_l3dhash = refhash_create(OVERLAY_HSIZE,
 587                     overlay_vl3_hash, overlay_vl3_cmp,
 588                     overlay_target_entry_null_dtor,
 589                     sizeof (overlay_target_entry_t),
 590                     offsetof(overlay_target_entry_t, ote_l3_reflink), 0,
 591                     KM_SLEEP);
 592                 avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
 593                     sizeof (overlay_target_entry_t),
 594                     offsetof(overlay_target_entry_t, ote_avllink));
 595                 avl_create(&ott->ott_u.ott_dyn.ott_l3tree, overlay_vl3_avl,
 596                     sizeof (overlay_target_entry_t),
 597                     offsetof(overlay_target_entry_t, ote_l3_avllink));
 598 
 599                 ort = kmem_zalloc(sizeof (*ort), KM_SLEEP);
 600                 mutex_init(&ort->otr_lock, NULL, MUTEX_DRIVER, NULL);
 601                 list_create(&ort->otr_tables, sizeof (overlay_route_table_t),
 602                     offsetof(overlay_route_table_t, ort_link));
 603                 avl_create(&ort->otr_tree, overlay_fabric_avl,
 604                     sizeof (overlay_fabric_entry_t),
 605                     offsetof(overlay_fabric_entry_t, ofe_avllink));
 606         }
 607         mutex_enter(&odd->odd_lock);
 608         if (odd->odd_flags & OVERLAY_F_VARPD) {
 609                 mutex_exit(&odd->odd_lock);
 610                 kmem_cache_free(overlay_target_cache, ott);
 611                 overlay_hold_rele(odd);
 612                 return (EEXIST);
 613         }
 614 
 615         odd->odd_flags |= OVERLAY_F_VARPD;
 616         odd->odd_target = ott;
 617         mutex_exit(&odd->odd_lock);
 618 
 619         overlay_hold_rele(odd);
 620 
 621         return (0);
 622 }
 623 
 624 
 625 /* ARGSUSED */
 626 static int
 627 overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg)
 628 {
 629         overlay_dev_t *odd;
 630         overlay_targ_degrade_t *otd = arg;
 631 
 632         odd = overlay_hold_by_dlid(otd->otd_linkid);
 633         if (odd == NULL)
 634                 return (ENOENT);
 635 
 636         overlay_fm_degrade(odd, otd->otd_buf);
 637         overlay_hold_rele(odd);
 638         return (0);
 639 }
 640 
 641 /* ARGSUSED */
 642 static int
 643 overlay_target_restore(overlay_target_hdl_t *thdl, void *arg)
 644 {
 645         overlay_dev_t *odd;
 646         overlay_targ_id_t *otid = arg;
 647 
 648         odd = overlay_hold_by_dlid(otid->otid_linkid);
 649         if (odd == NULL)
 650                 return (ENOENT);
 651 
 652         overlay_fm_restore(odd);
 653         overlay_hold_rele(odd);
 654         return (0);
 655 }
 656 
 657 /* ARGSUSED */
 658 static int
 659 overlay_target_disassociate(overlay_target_hdl_t *thdl, void *arg)
 660 {
 661         overlay_dev_t *odd;
 662         overlay_targ_id_t *otid = arg;
 663 
 664         odd = overlay_hold_by_dlid(otid->otid_linkid);
 665         if (odd == NULL)
 666                 return (ENOENT);
 667 
 668         mutex_enter(&odd->odd_lock);
 669         odd->odd_flags &= ~OVERLAY_F_VARPD;
 670         mutex_exit(&odd->odd_lock);
 671 
 672         overlay_hold_rele(odd);
 673         return (0);
 674 
 675 }
 676 
 677 static int
 678 overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
 679 {
 680         overlay_targ_lookup_t *otl = arg;
 681         overlay_target_entry_t *entry;
 682         void *src, *dst;
 683         clock_t ret, timeout;
 684         mac_header_info_t mhi;
 685         timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
 686 again:
 687         mutex_enter(&overlay_target_lock);
 688         while (list_is_empty(&overlay_target_list)) {
 689                 ret = cv_timedwait(&overlay_target_condvar,
 690                     &overlay_target_lock, timeout);
 691                 if (ret == -1) {
 692                         mutex_exit(&overlay_target_lock);
 693                         return (ETIME);
 694                 }
 695         }
 696         entry = list_remove_head(&overlay_target_list);
 697         mutex_exit(&overlay_target_lock);
 698         mutex_enter(&entry->ote_lock);
 699         if (entry->ote_flags &
 700             (OVERLAY_ENTRY_F_PENDING | OVERLAY_ENTRY_F_VL3_PENDING)) {
 701                 ASSERT(entry->ote_chead == NULL);
 702                 mutex_exit(&entry->ote_lock);
 703                 goto again;
 704         }
 705         ASSERT(entry->ote_chead != NULL);
 706 
 707 
 708         otl->otl_l3req = (entry->ote_flags & OVERLAY_ENTRY_F_VL3_PENDING) ?
 709             B_TRUE : B_FALSE;
 710 
 711         if (otl->otl_l3req) {
 712                 src = &otl->otl_addru.otlu_l3.otl3_srcip;
 713                 dst = &otl->otl_addru.otlu_l3.otl3_dstip;
 714         } else {
 715                 src = &otl->otl_addru.otlu_l2.otl2_srcaddr;
 716                 dst = &otl->otl_addru.otlu_l2.otl2_dstaddr;
 717         }
 718 
 719         /*
 720          * If we have a bogon that doesn't have a valid mac header, or an
 721          * invalid IP header for IP requests, drop it and try again.
 722          */
 723         if ((mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
 724             &mhi) != 0) ||
 725             (otl->otl_l3req && overlay_mblk_vl3ip(entry->ote_chead, src,
 726             dst) != 0)) {
 727                 boolean_t queue = B_FALSE;
 728                 mblk_t *mp = entry->ote_chead;
 729                 entry->ote_chead = mp->b_next;
 730                 mp->b_next = NULL;
 731                 if (entry->ote_ctail == mp)
 732                         entry->ote_ctail = entry->ote_chead;
 733                 entry->ote_mbsize -= msgsize(mp);
 734                 if (entry->ote_chead != NULL)
 735                         queue = B_TRUE;
 736                 mutex_exit(&entry->ote_lock);
 737                 if (queue == B_TRUE)
 738                         overlay_target_queue(entry);
 739                 freemsg(mp);
 740                 goto again;
 741         }
 742 
 743         otl->otl_dlid = entry->ote_odd->odd_linkid;
 744         otl->otl_reqid = (uintptr_t)entry;
 745         otl->otl_varpdid = entry->ote_ott->ott_id;
 746         otl->otl_vnetid = entry->ote_odd->odd_vid;
 747 
 748         otl->otl_hdrsize = mhi.mhi_hdrsize;
 749         otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
 750         otl->otl_addru.otlu_l2.otl2_dsttype = mhi.mhi_dsttype;
 751         otl->otl_addru.otlu_l2.otl2_sap = mhi.mhi_bindsap;
 752         otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
 753 
 754         /*
 755          * The overlay_mblk_vl3ip() call above fills in dst & src for
 756          * VL3->UL3 requests, so only need to care about VL2->UL3 here.
 757          */
 758         if (!otl->otl_l3req) {
 759                 bcopy(mhi.mhi_daddr, dst, ETHERADDRL);
 760                 bcopy(mhi.mhi_saddr, src, ETHERADDRL);
 761         }
 762         mutex_exit(&entry->ote_lock);
 763 
 764         mutex_enter(&thdl->oth_lock);
 765         list_insert_tail(&thdl->oth_outstanding, entry);
 766         mutex_exit(&thdl->oth_lock);
 767 
 768         return (0);
 769 }
 770 
 771 static int
 772 overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
 773 {
 774         const overlay_targ_resp_t *otr = arg;
 775         overlay_target_entry_t *entry;
 776         mblk_t *mp;
 777         boolean_t is_router = B_FALSE;
 778 
 779         /*
 780          * If we ever support a protocol that uses MAC addresses for the UL
 781          * destination addr, we probably should expand this to check that
 782          * all of otr is zero.
 783          */
 784         if (IN6_IS_ADDR_UNSPECIFIED(&otr->otr_answer.otp_ip) &&
 785             otr->otr_answer.otp_port == 0)
 786                 is_router = B_TRUE;
 787 
 788         mutex_enter(&thdl->oth_lock);
 789         for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
 790             entry = list_next(&thdl->oth_outstanding, entry)) {
 791                 if ((uintptr_t)entry == otr->otr_reqid)
 792                         break;
 793         }
 794 
 795         if (entry == NULL) {
 796                 mutex_exit(&thdl->oth_lock);
 797                 return (EINVAL);
 798         }
 799         list_remove(&thdl->oth_outstanding, entry);
 800         mutex_exit(&thdl->oth_lock);
 801 
 802         mutex_enter(&entry->ote_lock);
 803         bcopy(&otr->otr_answer, &entry->ote_dest,
 804             sizeof (overlay_target_point_t));
 805         entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
 806         entry->ote_flags |= OVERLAY_ENTRY_F_VALID;
 807         if (is_router)
 808                 entry->ote_flags |= OVERLAY_ENTRY_F_ROUTER;
 809         mp = entry->ote_chead;
 810         entry->ote_chead = NULL;
 811         entry->ote_ctail = NULL;
 812         entry->ote_mbsize = 0;
 813         entry->ote_vtime = gethrtime();
 814         mutex_exit(&entry->ote_lock);
 815 
 816         /*
 817          * For now do an in-situ drain.
 818          *
 819          * TODO: overlay_m_tx() will need to perform remote fabric attachment
 820          * checks, which may leave mblk_t's left in the msg chain for
 821          * mblk_t's whose connectivity with the target entry are unknown.
 822          * This will then need to deal with the leftovers.
 823          */
 824         mp = overlay_m_tx(entry->ote_odd, mp);
 825         freemsgchain(mp);
 826 
 827         mutex_enter(&entry->ote_ott->ott_lock);
 828         entry->ote_ott->ott_ocount--;
 829         cv_signal(&entry->ote_ott->ott_cond);
 830         mutex_exit(&entry->ote_ott->ott_lock);
 831 
 832         return (0);
 833 }
 834 
 835 static int
 836 overlay_target_lookup_drop(overlay_target_hdl_t *thdl, void *arg)
 837 {
 838         const overlay_targ_resp_t *otr = arg;
 839         overlay_target_entry_t *entry;
 840         mblk_t *mp;
 841         boolean_t queue = B_FALSE;
 842 
 843         mutex_enter(&thdl->oth_lock);
 844         for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
 845             entry = list_next(&thdl->oth_outstanding, entry)) {
 846                 if ((uintptr_t)entry == otr->otr_reqid)
 847                         break;
 848         }
 849 
 850         if (entry == NULL) {
 851                 mutex_exit(&thdl->oth_lock);
 852                 return (EINVAL);
 853         }
 854         list_remove(&thdl->oth_outstanding, entry);
 855         mutex_exit(&thdl->oth_lock);
 856 
 857         mutex_enter(&entry->ote_lock);
 858 
 859         /* Safeguard against a confused varpd */
 860         if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
 861                 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
 862                 DTRACE_PROBE1(overlay__target__valid__drop,
 863                     overlay_target_entry_t *, entry);
 864                 mutex_exit(&entry->ote_lock);
 865                 goto done;
 866         }
 867 
 868         /*
 869          * TODO: This will need to be smarter.  This drop can only apply to
 870          * packets from the same source fabric as the first mblk_t in the
 871          * chain.  If the target exists, packets from other fabrics which
 872          * are chained to this target entry may be able to be sent (if we
 873          * already know they are attached), or we might need to query from
 874          * those other source fabrics if we don't know if the two are
 875          * attached.
 876          */
 877         mp = entry->ote_chead;
 878         if (mp != NULL) {
 879                 entry->ote_chead = mp->b_next;
 880                 mp->b_next = NULL;
 881                 if (entry->ote_ctail == mp)
 882                         entry->ote_ctail = entry->ote_chead;
 883                 entry->ote_mbsize -= msgsize(mp);
 884         }
 885         if (entry->ote_chead != NULL) {
 886                 queue = B_TRUE;
 887                 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
 888         } else {
 889                 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
 890         }
 891         mutex_exit(&entry->ote_lock);
 892 
 893         if (queue == B_TRUE)
 894                 overlay_target_queue(entry);
 895         freemsg(mp);
 896 
 897 done:
 898         mutex_enter(&entry->ote_ott->ott_lock);
 899         entry->ote_ott->ott_ocount--;
 900         cv_signal(&entry->ote_ott->ott_cond);
 901         mutex_exit(&entry->ote_ott->ott_lock);
 902 
 903         return (0);
 904 }
 905 
 906 /* ARGSUSED */
 907 static int
 908 overlay_target_pkt_copyin(const void *ubuf, void **outp, size_t *bsize,
 909     int flags)
 910 {
 911         overlay_targ_pkt_t *pkt;
 912         overlay_targ_pkt32_t *pkt32;
 913 
 914         pkt = kmem_alloc(sizeof (overlay_targ_pkt_t), KM_SLEEP);
 915         *outp = pkt;
 916         *bsize = sizeof (overlay_targ_pkt_t);
 917         if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
 918                 uintptr_t addr;
 919 
 920                 if (ddi_copyin(ubuf, pkt, sizeof (overlay_targ_pkt32_t),
 921                     flags & FKIOCTL) != 0) {
 922                         kmem_free(pkt, *bsize);
 923                         return (EFAULT);
 924                 }
 925                 pkt32 = (overlay_targ_pkt32_t *)pkt;
 926                 addr = pkt32->otp_buf;
 927                 pkt->otp_buf = (void *)addr;
 928         } else {
 929                 if (ddi_copyin(ubuf, pkt, *bsize, flags & FKIOCTL) != 0) {
 930                         kmem_free(pkt, *bsize);
 931                         return (EFAULT);
 932                 }
 933         }
 934         return (0);
 935 }
 936 
 937 static int
 938 overlay_target_pkt_copyout(void *ubuf, void *buf, size_t bufsize,
 939     int flags)
 940 {
 941         if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
 942                 overlay_targ_pkt_t *pkt = buf;
 943                 overlay_targ_pkt32_t *pkt32 = buf;
 944                 uintptr_t addr = (uintptr_t)pkt->otp_buf;
 945                 pkt32->otp_buf = (caddr32_t)addr;
 946                 if (ddi_copyout(buf, ubuf, sizeof (overlay_targ_pkt32_t),
 947                     flags & FKIOCTL) != 0)
 948                         return (EFAULT);
 949         } else {
 950                 if (ddi_copyout(buf, ubuf, bufsize, flags & FKIOCTL) != 0)
 951                         return (EFAULT);
 952         }
 953         return (0);
 954 }
 955 
 956 static int
 957 overlay_target_packet(overlay_target_hdl_t *thdl, void *arg)
 958 {
 959         overlay_targ_pkt_t *pkt = arg;
 960         overlay_target_entry_t *entry;
 961         mblk_t *mp;
 962         size_t mlen;
 963         size_t boff;
 964 
 965         mutex_enter(&thdl->oth_lock);
 966         for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
 967             entry = list_next(&thdl->oth_outstanding, entry)) {
 968                 if ((uintptr_t)entry == pkt->otp_reqid)
 969                         break;
 970         }
 971 
 972         if (entry == NULL) {
 973                 mutex_exit(&thdl->oth_lock);
 974                 return (EINVAL);
 975         }
 976         mutex_enter(&entry->ote_lock);
 977         mutex_exit(&thdl->oth_lock);
 978         mp = entry->ote_chead;
 979         /* Protect against a rogue varpd */
 980         if (mp == NULL) {
 981                 mutex_exit(&entry->ote_lock);
 982                 return (EINVAL);
 983         }
 984         mlen = MIN(msgsize(mp), pkt->otp_size);
 985         pkt->otp_size = mlen;
 986         boff = 0;
 987         while (mlen > 0) {
 988                 size_t wlen = MIN(MBLKL(mp), mlen);
 989                 if (ddi_copyout(mp->b_rptr,
 990                     (void *)((uintptr_t)pkt->otp_buf + boff),
 991                     wlen, 0) != 0) {
 992                         mutex_exit(&entry->ote_lock);
 993                         return (EFAULT);
 994                 }
 995                 mlen -= wlen;
 996                 boff += wlen;
 997                 mp = mp->b_cont;
 998         }
 999         mutex_exit(&entry->ote_lock);
1000         return (0);
1001 }
1002 
1003 static int
1004 overlay_target_inject(overlay_target_hdl_t *thdl, void *arg)
1005 {
1006         overlay_targ_pkt_t *pkt = arg;
1007         overlay_target_entry_t *entry;
1008         overlay_dev_t *odd;
1009         mblk_t *mp;
1010 
1011         if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
1012                 return (EINVAL);
1013 
1014         mp = allocb(pkt->otp_size, 0);
1015         if (mp == NULL)
1016                 return (ENOMEM);
1017 
1018         if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
1019                 freeb(mp);
1020                 return (EFAULT);
1021         }
1022         mp->b_wptr += pkt->otp_size;
1023 
1024         if (pkt->otp_linkid != UINT64_MAX) {
1025                 odd = overlay_hold_by_dlid(pkt->otp_linkid);
1026                 if (odd == NULL) {
1027                         freeb(mp);
1028                         return (ENOENT);
1029                 }
1030         } else {
1031                 mutex_enter(&thdl->oth_lock);
1032                 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
1033                     entry = list_next(&thdl->oth_outstanding, entry)) {
1034                         if ((uintptr_t)entry == pkt->otp_reqid)
1035                                 break;
1036                 }
1037 
1038                 if (entry == NULL) {
1039                         mutex_exit(&thdl->oth_lock);
1040                         freeb(mp);
1041                         return (ENOENT);
1042                 }
1043                 odd = entry->ote_odd;
1044                 mutex_exit(&thdl->oth_lock);
1045         }
1046 
1047         mutex_enter(&odd->odd_lock);
1048         overlay_io_start(odd, OVERLAY_F_IN_RX);
1049         mutex_exit(&odd->odd_lock);
1050 
1051         mac_rx(odd->odd_mh, NULL, mp);
1052 
1053         mutex_enter(&odd->odd_lock);
1054         overlay_io_done(odd, OVERLAY_F_IN_RX);
1055         mutex_exit(&odd->odd_lock);
1056 
1057         return (0);
1058 }
1059 
1060 static int
1061 overlay_target_resend(overlay_target_hdl_t *thdl, void *arg)
1062 {
1063         overlay_targ_pkt_t *pkt = arg;
1064         overlay_target_entry_t *entry;
1065         overlay_dev_t *odd;
1066         mblk_t *mp;
1067 
1068         if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
1069                 return (EINVAL);
1070 
1071         mp = allocb(pkt->otp_size, 0);
1072         if (mp == NULL)
1073                 return (ENOMEM);
1074 
1075         if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
1076                 freeb(mp);
1077                 return (EFAULT);
1078         }
1079         mp->b_wptr += pkt->otp_size;
1080 
1081         if (pkt->otp_linkid != UINT64_MAX) {
1082                 odd = overlay_hold_by_dlid(pkt->otp_linkid);
1083                 if (odd == NULL) {
1084                         freeb(mp);
1085                         return (ENOENT);
1086                 }
1087         } else {
1088                 mutex_enter(&thdl->oth_lock);
1089                 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
1090                     entry = list_next(&thdl->oth_outstanding, entry)) {
1091                         if ((uintptr_t)entry == pkt->otp_reqid)
1092                                 break;
1093                 }
1094 
1095                 if (entry == NULL) {
1096                         mutex_exit(&thdl->oth_lock);
1097                         freeb(mp);
1098                         return (ENOENT);
1099                 }
1100                 odd = entry->ote_odd;
1101                 mutex_exit(&thdl->oth_lock);
1102         }
1103 
1104         mp = overlay_m_tx(odd, mp);
1105         freemsgchain(mp);
1106 
1107         return (0);
1108 }
1109 
1110 typedef struct overlay_targ_list_int {
1111         boolean_t       otli_count;
1112         uint32_t        otli_cur;
1113         uint32_t        otli_nents;
1114         uint32_t        otli_ents[];
1115 } overlay_targ_list_int_t;
1116 
1117 static int
1118 overlay_target_list_copyin(const void *ubuf, void **outp, size_t *bsize,
1119     int flags)
1120 {
1121         overlay_targ_list_t n;
1122         overlay_targ_list_int_t *otl;
1123 
1124         if (ddi_copyin(ubuf, &n, sizeof (overlay_targ_list_t),
1125             flags & FKIOCTL) != 0)
1126                 return (EFAULT);
1127 
1128         /*
1129          */
1130         if (n.otl_nents >= INT32_MAX / sizeof (uint32_t))
1131                 return (EINVAL);
1132         *bsize = sizeof (overlay_targ_list_int_t) +
1133             sizeof (uint32_t) * n.otl_nents;
1134         otl = kmem_zalloc(*bsize, KM_SLEEP);
1135         otl->otli_cur = 0;
1136         otl->otli_nents = n.otl_nents;
1137         if (otl->otli_nents != 0) {
1138                 otl->otli_count = B_FALSE;
1139                 if (ddi_copyin((void *)((uintptr_t)ubuf +
1140                     offsetof(overlay_targ_list_t, otl_ents)),
1141                     otl->otli_ents, n.otl_nents * sizeof (uint32_t),
1142                     flags & FKIOCTL) != 0) {
1143                         kmem_free(otl, *bsize);
1144                         return (EFAULT);
1145                 }
1146         } else {
1147                 otl->otli_count = B_TRUE;
1148         }
1149 
1150         *outp = otl;
1151         return (0);
1152 }
1153 
1154 static int
1155 overlay_target_ioctl_list_cb(overlay_dev_t *odd, void *arg)
1156 {
1157         overlay_targ_list_int_t *otl = arg;
1158 
1159         if (otl->otli_cur < otl->otli_nents)
1160                 otl->otli_ents[otl->otli_cur] = odd->odd_linkid;
1161         otl->otli_cur++;
1162         return (0);
1163 }
1164 
1165 /* ARGSUSED */
1166 static int
1167 overlay_target_ioctl_list(overlay_target_hdl_t *thdl, void *arg)
1168 {
1169         overlay_dev_iter(overlay_target_ioctl_list_cb, arg);
1170         return (0);
1171 }
1172 
1173 /* ARGSUSED */
1174 static int
1175 overlay_target_list_copyout(void *ubuf, void *buf, size_t bufsize, int flags)
1176 {
1177         overlay_targ_list_int_t *otl = buf;
1178 
1179         if (ddi_copyout(&otl->otli_cur, ubuf, sizeof (uint32_t),
1180             flags & FKIOCTL) != 0)
1181                 return (EFAULT);
1182 
1183         if (otl->otli_count == B_FALSE) {
1184                 if (ddi_copyout(otl->otli_ents,
1185                     (void *)((uintptr_t)ubuf +
1186                     offsetof(overlay_targ_list_t, otl_ents)),
1187                     sizeof (uint32_t) * otl->otli_nents,
1188                     flags & FKIOCTL) != 0)
1189                         return (EFAULT);
1190         }
1191         return (0);
1192 }
1193 
1194 /* ARGSUSED */
1195 static int
1196 overlay_target_cache_get(overlay_target_hdl_t *thdl, void *arg)
1197 {
1198         int ret = 0;
1199         overlay_dev_t *odd;
1200         overlay_target_t *ott;
1201         overlay_targ_cache_t *otc = arg;
1202 
1203         odd = overlay_hold_by_dlid(otc->otc_linkid);
1204         if (odd == NULL)
1205                 return (ENOENT);
1206 
1207         mutex_enter(&odd->odd_lock);
1208         if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1209                 mutex_exit(&odd->odd_lock);
1210                 overlay_hold_rele(odd);
1211                 return (ENXIO);
1212         }
1213         ott = odd->odd_target;
1214         if (ott->ott_mode != OVERLAY_TARGET_POINT &&
1215             ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1216                 mutex_exit(&odd->odd_lock);
1217                 overlay_hold_rele(odd);
1218                 return (ENOTSUP);
1219         }
1220         mutex_enter(&ott->ott_lock);
1221         mutex_exit(&odd->odd_lock);
1222 
1223         if (ott->ott_mode == OVERLAY_TARGET_POINT) {
1224                 otc->otc_entry.otce_flags = 0;
1225                 bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest,
1226                     sizeof (overlay_target_point_t));
1227         } else {
1228                 overlay_target_entry_t *ote;
1229                 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1230                     otc->otc_entry.otce_mac);
1231                 if (ote == NULL) {
1232                         ret = ENOENT;
1233                         goto done;
1234                 }
1235 
1236                 mutex_enter(&ote->ote_lock);
1237                 if ((ote->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) != 0) {
1238                         if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
1239                                 otc->otc_entry.otce_flags =
1240                                     OVERLAY_TARGET_CACHE_DROP;
1241                         } else if (ote->ote_flags & OVERLAY_ENTRY_F_ROUTER) {
1242                                 otc->otc_entry.otce_flags =
1243                                     OVERLAY_TARGET_CACHE_ROUTER;
1244                         } else {
1245                                 otc->otc_entry.otce_flags = 0;
1246                                 bcopy(&ote->ote_dest, &otc->otc_entry.otce_dest,
1247                                     sizeof (overlay_target_point_t));
1248                         }
1249                         ret = 0;
1250                 } else {
1251                         ret = ENOENT;
1252                 }
1253                 mutex_exit(&ote->ote_lock);
1254         }
1255 
1256 done:
1257         mutex_exit(&ott->ott_lock);
1258         overlay_hold_rele(odd);
1259 
1260         return (ret);
1261 }
1262 
1263 /* ARGSUSED */
1264 static int
1265 overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg)
1266 {
1267         overlay_dev_t *odd;
1268         overlay_target_t *ott;
1269         overlay_target_entry_t *ote;
1270         overlay_targ_cache_t *otc = arg;
1271         mblk_t *mp = NULL;
1272 
1273         if (otc->otc_entry.otce_flags &
1274             ~(OVERLAY_TARGET_CACHE_DROP | OVERLAY_TARGET_CACHE_ROUTER))
1275                 return (EINVAL);
1276 
1277         if (otc->otc_entry.otce_flags ==
1278             (OVERLAY_TARGET_CACHE_DROP | OVERLAY_TARGET_CACHE_ROUTER))
1279                 return (EINVAL);
1280 
1281         odd = overlay_hold_by_dlid(otc->otc_linkid);
1282         if (odd == NULL)
1283                 return (ENOENT);
1284 
1285         mutex_enter(&odd->odd_lock);
1286         if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1287                 mutex_exit(&odd->odd_lock);
1288                 overlay_hold_rele(odd);
1289                 return (ENXIO);
1290         }
1291         ott = odd->odd_target;
1292         if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1293                 mutex_exit(&odd->odd_lock);
1294                 overlay_hold_rele(odd);
1295                 return (ENOTSUP);
1296         }
1297         mutex_enter(&ott->ott_lock);
1298         mutex_exit(&odd->odd_lock);
1299 
1300         ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1301             otc->otc_entry.otce_mac);
1302         if (ote == NULL) {
1303                 ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP);
1304                 bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL);
1305                 ote->ote_chead = ote->ote_ctail = NULL;
1306                 ote->ote_mbsize = 0;
1307                 ote->ote_ott = ott;
1308                 ote->ote_odd = odd;
1309                 mutex_enter(&ote->ote_lock);
1310                 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote);
1311                 avl_add(&ott->ott_u.ott_dyn.ott_tree, ote);
1312         } else {
1313                 mutex_enter(&ote->ote_lock);
1314         }
1315 
1316         if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
1317                 ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
1318         } else {
1319                 ote->ote_flags |= OVERLAY_ENTRY_F_VALID;
1320                 if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_ROUTER)
1321                         ote->ote_flags |= OVERLAY_ENTRY_F_ROUTER;
1322                 bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
1323                     sizeof (overlay_target_point_t));
1324                 mp = ote->ote_chead;
1325                 ote->ote_chead = NULL;
1326                 ote->ote_ctail = NULL;
1327                 ote->ote_mbsize = 0;
1328                 ote->ote_vtime = gethrtime();
1329         }
1330 
1331         mutex_exit(&ote->ote_lock);
1332         mutex_exit(&ott->ott_lock);
1333 
1334         if (mp != NULL) {
1335                 mp = overlay_m_tx(ote->ote_odd, mp);
1336                 freemsgchain(mp);
1337         }
1338 
1339         overlay_hold_rele(odd);
1340 
1341         return (0);
1342 }
1343 
1344 /* ARGSUSED */
1345 static int
1346 overlay_target_cache_remove(overlay_target_hdl_t *thdl, void *arg)
1347 {
1348         int ret = 0;
1349         overlay_dev_t *odd;
1350         overlay_target_t *ott;
1351         overlay_target_entry_t *ote;
1352         overlay_targ_cache_t *otc = arg;
1353 
1354         odd = overlay_hold_by_dlid(otc->otc_linkid);
1355         if (odd == NULL)
1356                 return (ENOENT);
1357 
1358         mutex_enter(&odd->odd_lock);
1359         if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1360                 mutex_exit(&odd->odd_lock);
1361                 overlay_hold_rele(odd);
1362                 return (ENXIO);
1363         }
1364         ott = odd->odd_target;
1365         if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1366                 mutex_exit(&odd->odd_lock);
1367                 overlay_hold_rele(odd);
1368                 return (ENOTSUP);
1369         }
1370         mutex_enter(&ott->ott_lock);
1371         mutex_exit(&odd->odd_lock);
1372 
1373         ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1374             otc->otc_entry.otce_mac);
1375         if (ote != NULL) {
1376                 mutex_enter(&ote->ote_lock);
1377                 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
1378                 mutex_exit(&ote->ote_lock);
1379                 ret = 0;
1380         } else {
1381                 ret = ENOENT;
1382         }
1383 
1384         mutex_exit(&ott->ott_lock);
1385         overlay_hold_rele(odd);
1386 
1387         return (ret);
1388 }
1389 
1390 /* ARGSUSED */
1391 static int
1392 overlay_target_cache_flush(overlay_target_hdl_t *thdl, void *arg)
1393 {
1394         avl_tree_t *avl;
1395         overlay_dev_t *odd;
1396         overlay_target_t *ott;
1397         overlay_target_entry_t *ote;
1398         overlay_targ_cache_t *otc = arg;
1399 
1400         odd = overlay_hold_by_dlid(otc->otc_linkid);
1401         if (odd == NULL)
1402                 return (ENOENT);
1403 
1404         mutex_enter(&odd->odd_lock);
1405         if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1406                 mutex_exit(&odd->odd_lock);
1407                 overlay_hold_rele(odd);
1408                 return (ENXIO);
1409         }
1410         ott = odd->odd_target;
1411         if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1412                 mutex_exit(&odd->odd_lock);
1413                 overlay_hold_rele(odd);
1414                 return (ENOTSUP);
1415         }
1416         mutex_enter(&ott->ott_lock);
1417         mutex_exit(&odd->odd_lock);
1418         avl = &ott->ott_u.ott_dyn.ott_tree;
1419 
1420         for (ote = avl_first(avl); ote != NULL; ote = AVL_NEXT(avl, ote)) {
1421                 mutex_enter(&ote->ote_lock);
1422                 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
1423                 mutex_exit(&ote->ote_lock);
1424         }
1425         ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1426             otc->otc_entry.otce_mac);
1427 
1428         mutex_exit(&ott->ott_lock);
1429         overlay_hold_rele(odd);
1430 
1431         return (0);
1432 }
1433 
1434 static int
1435 overlay_target_cache_iter_copyin(const void *ubuf, void **outp, size_t *bsize,
1436     int flags)
1437 {
1438         overlay_targ_cache_iter_t base, *iter;
1439 
1440         if (ddi_copyin(ubuf, &base, sizeof (overlay_targ_cache_iter_t),
1441             flags & FKIOCTL) != 0)
1442                 return (EFAULT);
1443 
1444         if (base.otci_count > OVERLAY_TARGET_ITER_MAX)
1445                 return (E2BIG);
1446 
1447         if (base.otci_count == 0)
1448                 return (EINVAL);
1449 
1450         *bsize = sizeof (overlay_targ_cache_iter_t) +
1451             base.otci_count * sizeof (overlay_targ_cache_entry_t);
1452         iter = kmem_alloc(*bsize, KM_SLEEP);
1453         bcopy(&base, iter, sizeof (overlay_targ_cache_iter_t));
1454         *outp = iter;
1455 
1456         return (0);
1457 }
1458 
1459 typedef struct overlay_targ_cache_marker {
1460         uint8_t         otcm_mac[ETHERADDRL];
1461         uint16_t        otcm_done;
1462 } overlay_targ_cache_marker_t;
1463 
1464 /* ARGSUSED */
1465 static int
1466 overlay_target_cache_iter(overlay_target_hdl_t *thdl, void *arg)
1467 {
1468         overlay_dev_t *odd;
1469         overlay_target_t *ott;
1470         overlay_target_entry_t lookup, *ent;
1471         overlay_targ_cache_marker_t *mark;
1472         avl_index_t where;
1473         avl_tree_t *avl;
1474         uint16_t written = 0;
1475 
1476         overlay_targ_cache_iter_t *iter = arg;
1477         mark = (void *)&iter->otci_marker;
1478 
1479         if (mark->otcm_done != 0) {
1480                 iter->otci_count = 0;
1481                 return (0);
1482         }
1483 
1484         odd = overlay_hold_by_dlid(iter->otci_linkid);
1485         if (odd == NULL)
1486                 return (ENOENT);
1487 
1488         mutex_enter(&odd->odd_lock);
1489         if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1490                 mutex_exit(&odd->odd_lock);
1491                 overlay_hold_rele(odd);
1492                 return (ENXIO);
1493         }
1494         ott = odd->odd_target;
1495         if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC &&
1496             ott->ott_mode != OVERLAY_TARGET_POINT) {
1497                 mutex_exit(&odd->odd_lock);
1498                 overlay_hold_rele(odd);
1499                 return (ENOTSUP);
1500         }
1501 
1502         /*
1503          * Holding this lock across the entire iteration probably isn't very
1504          * good. We should perhaps add an r/w lock for the avl tree. But we'll
1505          * wait until we now it's necessary before we do more.
1506          */
1507         mutex_enter(&ott->ott_lock);
1508         mutex_exit(&odd->odd_lock);
1509 
1510         if (ott->ott_mode == OVERLAY_TARGET_POINT) {
1511                 overlay_targ_cache_entry_t *out = &iter->otci_ents[0];
1512                 bzero(out->otce_mac, ETHERADDRL);
1513                 out->otce_flags = 0;
1514                 bcopy(&ott->ott_u.ott_point, &out->otce_dest,
1515                     sizeof (overlay_target_point_t));
1516                 written++;
1517                 mark->otcm_done = 1;
1518         }
1519 
1520         avl = &ott->ott_u.ott_dyn.ott_tree;
1521         bcopy(mark->otcm_mac, lookup.ote_addr, ETHERADDRL);
1522         ent = avl_find(avl, &lookup, &where);
1523 
1524         /*
1525          * NULL ent means that the entry does not exist, so we want to start
1526          * with the closest node in the tree. This means that we implicitly rely
1527          * on the tree's order and the first node will be the mac 00:00:00:00:00
1528          * and the last will be ff:ff:ff:ff:ff:ff.
1529          */
1530         if (ent == NULL) {
1531                 ent = avl_nearest(avl, where, AVL_AFTER);
1532                 if (ent == NULL) {
1533                         mark->otcm_done = 1;
1534                         goto done;
1535                 }
1536         }
1537 
1538         for (; ent != NULL && written < iter->otci_count;
1539             ent = AVL_NEXT(avl, ent)) {
1540                 overlay_targ_cache_entry_t *out = &iter->otci_ents[written];
1541                 mutex_enter(&ent->ote_lock);
1542                 if ((ent->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) == 0) {
1543                         mutex_exit(&ent->ote_lock);
1544                         continue;
1545                 }
1546                 bcopy(ent->ote_addr, out->otce_mac, ETHERADDRL);
1547                 out->otce_flags = 0;
1548                 if (ent->ote_flags & OVERLAY_ENTRY_F_DROP)
1549                         out->otce_flags |= OVERLAY_TARGET_CACHE_DROP;
1550                 if (ent->ote_flags & OVERLAY_ENTRY_F_VALID)
1551                         bcopy(&ent->ote_dest, &out->otce_dest,
1552                             sizeof (overlay_target_point_t));
1553                 written++;
1554                 mutex_exit(&ent->ote_lock);
1555         }
1556 
1557         if (ent != NULL) {
1558                 bcopy(ent->ote_addr, mark->otcm_mac, ETHERADDRL);
1559         } else {
1560                 mark->otcm_done = 1;
1561         }
1562 
1563 done:
1564         iter->otci_count = written;
1565         mutex_exit(&ott->ott_lock);
1566         overlay_hold_rele(odd);
1567 
1568         return (0);
1569 }
1570 
1571 /* ARGSUSED */
1572 static int
1573 overlay_target_cache_iter_copyout(void *ubuf, void *buf, size_t bufsize,
1574     int flags)
1575 {
1576         size_t outsize;
1577         const overlay_targ_cache_iter_t *iter = buf;
1578 
1579         outsize = sizeof (overlay_targ_cache_iter_t) +
1580             iter->otci_count * sizeof (overlay_targ_cache_entry_t);
1581 
1582         if (ddi_copyout(buf, ubuf, outsize, flags & FKIOCTL) != 0)
1583                 return (EFAULT);
1584 
1585         return (0);
1586 }
1587 
1588 static overlay_target_ioctl_t overlay_target_ioctab[] = {
1589         { OVERLAY_TARG_INFO, B_TRUE, B_TRUE,
1590                 NULL, overlay_target_info,
1591                 NULL, sizeof (overlay_targ_info_t)      },
1592         { OVERLAY_TARG_ASSOCIATE, B_TRUE, B_FALSE,
1593                 NULL, overlay_target_associate,
1594                 NULL, sizeof (overlay_targ_associate_t) },
1595         { OVERLAY_TARG_DISASSOCIATE, B_TRUE, B_FALSE,
1596                 NULL, overlay_target_disassociate,
1597                 NULL, sizeof (overlay_targ_id_t)        },
1598         { OVERLAY_TARG_DEGRADE, B_TRUE, B_FALSE,
1599                 NULL, overlay_target_degrade,
1600                 NULL, sizeof (overlay_targ_degrade_t)   },
1601         { OVERLAY_TARG_RESTORE, B_TRUE, B_FALSE,
1602                 NULL, overlay_target_restore,
1603                 NULL, sizeof (overlay_targ_id_t)        },
1604         { OVERLAY_TARG_LOOKUP, B_FALSE, B_TRUE,
1605                 NULL, overlay_target_lookup_request,
1606                 NULL, sizeof (overlay_targ_lookup_t)    },
1607         { OVERLAY_TARG_RESPOND, B_TRUE, B_FALSE,
1608                 NULL, overlay_target_lookup_respond,
1609                 NULL, sizeof (overlay_targ_resp_t)      },
1610         { OVERLAY_TARG_DROP, B_TRUE, B_FALSE,
1611                 NULL, overlay_target_lookup_drop,
1612                 NULL, sizeof (overlay_targ_resp_t)      },
1613         { OVERLAY_TARG_PKT, B_TRUE, B_TRUE,
1614                 overlay_target_pkt_copyin,
1615                 overlay_target_packet,
1616                 overlay_target_pkt_copyout,
1617                 sizeof (overlay_targ_pkt_t)             },
1618         { OVERLAY_TARG_INJECT, B_TRUE, B_FALSE,
1619                 overlay_target_pkt_copyin,
1620                 overlay_target_inject,
1621                 NULL, sizeof (overlay_targ_pkt_t)       },
1622         { OVERLAY_TARG_RESEND, B_TRUE, B_FALSE,
1623                 overlay_target_pkt_copyin,
1624                 overlay_target_resend,
1625                 NULL, sizeof (overlay_targ_pkt_t)       },
1626         { OVERLAY_TARG_LIST, B_FALSE, B_TRUE,
1627                 overlay_target_list_copyin,
1628                 overlay_target_ioctl_list,
1629                 overlay_target_list_copyout,
1630                 sizeof (overlay_targ_list_t)            },
1631         { OVERLAY_TARG_CACHE_GET, B_FALSE, B_TRUE,
1632                 NULL, overlay_target_cache_get,
1633                 NULL, sizeof (overlay_targ_cache_t)     },
1634         { OVERLAY_TARG_CACHE_SET, B_TRUE, B_TRUE,
1635                 NULL, overlay_target_cache_set,
1636                 NULL, sizeof (overlay_targ_cache_t)     },
1637         { OVERLAY_TARG_CACHE_REMOVE, B_TRUE, B_TRUE,
1638                 NULL, overlay_target_cache_remove,
1639                 NULL, sizeof (overlay_targ_cache_t)     },
1640         { OVERLAY_TARG_CACHE_FLUSH, B_TRUE, B_TRUE,
1641                 NULL, overlay_target_cache_flush,
1642                 NULL, sizeof (overlay_targ_cache_t)     },
1643         { OVERLAY_TARG_CACHE_ITER, B_FALSE, B_TRUE,
1644                 overlay_target_cache_iter_copyin,
1645                 overlay_target_cache_iter,
1646                 overlay_target_cache_iter_copyout,
1647                 sizeof (overlay_targ_cache_iter_t)              },
1648         { 0 }
1649 };
1650 
1651 int
1652 overlay_target_open(dev_t *devp, int flags, int otype, cred_t *credp)
1653 {
1654         minor_t mid;
1655         overlay_target_hdl_t *thdl;
1656 
1657         if (secpolicy_dl_config(credp) != 0)
1658                 return (EPERM);
1659 
1660         if (getminor(*devp) != 0)
1661                 return (ENXIO);
1662 
1663         if (otype & OTYP_BLK)
1664                 return (EINVAL);
1665 
1666         if (flags & ~(FREAD | FWRITE | FEXCL))
1667                 return (EINVAL);
1668 
1669         if ((flags & FWRITE) &&
1670             !(flags & FEXCL))
1671                 return (EINVAL);
1672 
1673         if (!(flags & FREAD) && !(flags & FWRITE))
1674                 return (EINVAL);
1675 
1676         if (crgetzoneid(credp) != GLOBAL_ZONEID)
1677                 return (EPERM);
1678 
1679         mid = id_alloc(overlay_thdl_idspace);
1680         if (ddi_soft_state_zalloc(overlay_thdl_state, mid) != 0) {
1681                 id_free(overlay_thdl_idspace, mid);
1682                 return (ENXIO);
1683         }
1684 
1685         thdl = ddi_get_soft_state(overlay_thdl_state, mid);
1686         VERIFY(thdl != NULL);
1687         thdl->oth_minor = mid;
1688         thdl->oth_zoneid = crgetzoneid(credp);
1689         thdl->oth_oflags = flags;
1690         mutex_init(&thdl->oth_lock, NULL, MUTEX_DRIVER, NULL);
1691         list_create(&thdl->oth_outstanding, sizeof (overlay_target_entry_t),
1692             offsetof(overlay_target_entry_t, ote_qlink));
1693         *devp = makedevice(getmajor(*devp), mid);
1694 
1695         mutex_enter(&overlay_target_lock);
1696         if ((flags & FEXCL) && overlay_target_excl == B_TRUE) {
1697                 mutex_exit(&overlay_target_lock);
1698                 list_destroy(&thdl->oth_outstanding);
1699                 mutex_destroy(&thdl->oth_lock);
1700                 ddi_soft_state_free(overlay_thdl_state, mid);
1701                 id_free(overlay_thdl_idspace, mid);
1702                 return (EEXIST);
1703         } else if ((flags & FEXCL) != 0) {
1704                 VERIFY(overlay_target_excl == B_FALSE);
1705                 overlay_target_excl = B_TRUE;
1706         }
1707         list_insert_tail(&overlay_thdl_list, thdl);
1708         mutex_exit(&overlay_target_lock);
1709 
1710         return (0);
1711 }
1712 
1713 /* ARGSUSED */
1714 int
1715 overlay_target_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
1716     int *rvalp)
1717 {
1718         overlay_target_ioctl_t *ioc;
1719         overlay_target_hdl_t *thdl;
1720 
1721         if (secpolicy_dl_config(credp) != 0)
1722                 return (EPERM);
1723 
1724         if ((thdl = ddi_get_soft_state(overlay_thdl_state,
1725             getminor(dev))) == NULL)
1726                 return (ENXIO);
1727 
1728         for (ioc = &overlay_target_ioctab[0]; ioc->oti_cmd != 0; ioc++) {
1729                 int ret;
1730                 caddr_t buf;
1731                 size_t bufsize;
1732 
1733                 if (ioc->oti_cmd != cmd)
1734                         continue;
1735 
1736                 if (ioc->oti_write == B_TRUE && !(mode & FWRITE))
1737                         return (EBADF);
1738 
1739                 if (ioc->oti_copyin == NULL) {
1740                         bufsize = ioc->oti_size;
1741                         buf = kmem_alloc(bufsize, KM_SLEEP);
1742                         if (ddi_copyin((void *)(uintptr_t)arg, buf, bufsize,
1743                             mode & FKIOCTL) != 0) {
1744                                 kmem_free(buf, bufsize);
1745                                 return (EFAULT);
1746                         }
1747                 } else {
1748                         if ((ret = ioc->oti_copyin((void *)(uintptr_t)arg,
1749                             (void **)&buf, &bufsize, mode)) != 0)
1750                                 return (ret);
1751                 }
1752 
1753                 ret = ioc->oti_func(thdl, buf);
1754                 if (ret == 0 && ioc->oti_size != 0 &&
1755                     ioc->oti_ncopyout == B_TRUE) {
1756                         if (ioc->oti_copyout == NULL) {
1757                                 if (ddi_copyout(buf, (void *)(uintptr_t)arg,
1758                                     bufsize, mode & FKIOCTL) != 0)
1759                                         ret = EFAULT;
1760                         } else {
1761                                 ret = ioc->oti_copyout((void *)(uintptr_t)arg,
1762                                     buf, bufsize, mode);
1763                         }
1764                 }
1765 
1766                 kmem_free(buf, bufsize);
1767                 return (ret);
1768         }
1769 
1770         return (ENOTTY);
1771 }
1772 
1773 /* ARGSUSED */
1774 int
1775 overlay_target_close(dev_t dev, int flags, int otype, cred_t *credp)
1776 {
1777         overlay_target_hdl_t *thdl;
1778         overlay_target_entry_t *entry;
1779         minor_t mid = getminor(dev);
1780 
1781         if ((thdl = ddi_get_soft_state(overlay_thdl_state, mid)) == NULL)
1782                 return (ENXIO);
1783 
1784         mutex_enter(&overlay_target_lock);
1785         list_remove(&overlay_thdl_list, thdl);
1786         mutex_enter(&thdl->oth_lock);
1787         while ((entry = list_remove_head(&thdl->oth_outstanding)) != NULL)
1788                 list_insert_tail(&overlay_target_list, entry);
1789         cv_signal(&overlay_target_condvar);
1790         mutex_exit(&thdl->oth_lock);
1791         if ((thdl->oth_oflags & FEXCL) != 0) {
1792                 VERIFY(overlay_target_excl == B_TRUE);
1793                 overlay_target_excl = B_FALSE;
1794         }
1795         mutex_exit(&overlay_target_lock);
1796 
1797         list_destroy(&thdl->oth_outstanding);
1798         mutex_destroy(&thdl->oth_lock);
1799         mid = thdl->oth_minor;
1800         ddi_soft_state_free(overlay_thdl_state, mid);
1801         id_free(overlay_thdl_idspace, mid);
1802 
1803         return (0);
1804 }