1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2019 Joyent, Inc.
  25  */
  26 /*
  27  * Copyright (c) 2016 by Delphix. All rights reserved.
  28  */
  29 
  30 /*
  31  * Datalink management routines.
  32  */
  33 
  34 #include <sys/types.h>
  35 #include <sys/door.h>
  36 #include <sys/zone.h>
  37 #include <sys/modctl.h>
  38 #include <sys/file.h>
  39 #include <sys/modhash.h>
  40 #include <sys/kstat.h>
  41 #include <sys/vnode.h>
  42 #include <sys/cmn_err.h>
  43 #include <sys/softmac.h>
  44 #include <sys/dls.h>
  45 #include <sys/dls_impl.h>
  46 #include <sys/stropts.h>
  47 #include <sys/netstack.h>
  48 #include <inet/iptun/iptun_impl.h>
  49 
  50 /*
  51  * This vanity name management module is treated as part of the GLD framework
  52  * and we don't hold any GLD framework lock across a call to any mac
  53  * function that needs to acquire the mac perimeter. The hierarchy is
  54  * mac perimeter -> framework locks
  55  */
  56 
  57 typedef struct dls_stack {
  58         zoneid_t        dlss_zoneid;
  59 } dls_stack_t;
  60 
  61 static kmem_cache_t     *i_dls_devnet_cachep;
  62 static kmutex_t         i_dls_mgmt_lock;
  63 static krwlock_t        i_dls_devnet_lock;
  64 static mod_hash_t       *i_dls_devnet_id_hash;
  65 static mod_hash_t       *i_dls_devnet_hash;
  66 
  67 boolean_t               devnet_need_rebuild;
  68 
  69 #define VLAN_HASHSZ     67      /* prime */
  70 
  71 /*
  72  * The following macros take a link name without the trailing PPA as input.
  73  * Opening a /dev/net node with one of these names causes a tunnel link to be
  74  * implicitly created in dls_devnet_hold_by_name() for backward compatibility
  75  * with Solaris 10 and prior.
  76  */
  77 #define IS_IPV4_TUN(name)       (strcmp((name), "ip.tun") == 0)
  78 #define IS_IPV6_TUN(name)       (strcmp((name), "ip6.tun") == 0)
  79 #define IS_6TO4_TUN(name)       (strcmp((name), "ip.6to4tun") == 0)
  80 #define IS_IPTUN_LINK(name)     (                                       \
  81     IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name))
  82 
  83 /* Upcall door handle */
  84 static door_handle_t    dls_mgmt_dh = NULL;
  85 
  86 /* dls_devnet_t dd_flags */
  87 #define DD_CONDEMNED            0x1
  88 #define DD_IMPLICIT_IPTUN       0x2 /* Implicitly-created ip*.*tun* tunnel */
  89 #define DD_INITIALIZING         0x4
  90 
  91 /*
  92  * If the link is marked as initializing or condemned then it should
  93  * not be visible outside of the DLS framework.
  94  */
  95 #define DD_NOT_VISIBLE(flags)   (                                       \
  96         (flags & (DD_CONDEMNED | DD_INITIALIZING)) != 0)
  97 
  98 /*
  99  * This structure is used to keep the <linkid, macname> mapping.
 100  * This structure itself is not protected by the mac perimeter, but is
 101  * protected by the dd_mutex and i_dls_devnet_lock. Thus most of the
 102  * functions manipulating this structure such as dls_devnet_set/unset etc.
 103  * may be called while not holding the mac perimeter.
 104  */
 105 typedef struct dls_devnet_s {
 106         datalink_id_t   dd_linkid;
 107         char            dd_linkname[MAXLINKNAMELEN];
 108         char            dd_mac[MAXNAMELEN];
 109         kstat_t         *dd_ksp;        /* kstat in owner_zid */
 110         kstat_t         *dd_zone_ksp;   /* in dd_zid if != owner_zid */
 111         uint32_t        dd_ref;
 112         kmutex_t        dd_mutex;
 113         kcondvar_t      dd_cv;
 114         uint32_t        dd_tref;
 115         uint_t          dd_flags;
 116         zoneid_t        dd_owner_zid;   /* zone where node was created */
 117         zoneid_t        dd_zid;         /* current zone */
 118         boolean_t       dd_prop_loaded;
 119         taskqid_t       dd_prop_taskid;
 120         boolean_t       dd_transient;   /* link goes away when zone does */
 121 } dls_devnet_t;
 122 
 123 static int i_dls_devnet_create_iptun(const char *, const char *,
 124     datalink_id_t *);
 125 static int i_dls_devnet_destroy_iptun(datalink_id_t);
 126 static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t);
 127 static int dls_devnet_unset(mac_handle_t, datalink_id_t *, boolean_t);
 128 
 129 /*ARGSUSED*/
 130 static int
 131 i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
 132 {
 133         dls_devnet_t    *ddp = buf;
 134 
 135         bzero(buf, sizeof (dls_devnet_t));
 136         mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL);
 137         cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL);
 138         return (0);
 139 }
 140 
 141 /*ARGSUSED*/
 142 static void
 143 i_dls_devnet_destructor(void *buf, void *arg)
 144 {
 145         dls_devnet_t    *ddp = buf;
 146 
 147         VERIFY(ddp->dd_ksp == NULL);
 148         VERIFY(ddp->dd_ref == 0);
 149         VERIFY(ddp->dd_tref == 0);
 150         mutex_destroy(&ddp->dd_mutex);
 151         cv_destroy(&ddp->dd_cv);
 152 }
 153 
 154 /* ARGSUSED */
 155 static int
 156 dls_zone_remove(datalink_id_t linkid, void *arg)
 157 {
 158         dls_devnet_t *ddp;
 159 
 160         if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
 161                 /*
 162                  * Don't bother moving transient links back to the global zone
 163                  * since we will simply delete them in dls_devnet_unset.
 164                  */
 165                 if (!ddp->dd_transient)
 166                         (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
 167                 dls_devnet_rele_tmp(ddp);
 168         }
 169         return (0);
 170 }
 171 
 172 /* ARGSUSED */
 173 static void *
 174 dls_stack_init(netstackid_t stackid, netstack_t *ns)
 175 {
 176         dls_stack_t *dlss;
 177 
 178         dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP);
 179         dlss->dlss_zoneid = netstackid_to_zoneid(stackid);
 180         return (dlss);
 181 }
 182 
 183 /* ARGSUSED */
 184 static void
 185 dls_stack_shutdown(netstackid_t stackid, void *arg)
 186 {
 187         dls_stack_t     *dlss = (dls_stack_t *)arg;
 188 
 189         /* Move remaining datalinks in this zone back to the global zone. */
 190         (void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL);
 191 }
 192 
 193 /* ARGSUSED */
 194 static void
 195 dls_stack_fini(netstackid_t stackid, void *arg)
 196 {
 197         dls_stack_t     *dlss = (dls_stack_t *)arg;
 198 
 199         kmem_free(dlss, sizeof (*dlss));
 200 }
 201 
 202 /*
 203  * Module initialization and finalization functions.
 204  */
 205 void
 206 dls_mgmt_init(void)
 207 {
 208         mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL);
 209         rw_init(&i_dls_devnet_lock, NULL, RW_DEFAULT, NULL);
 210 
 211         /*
 212          * Create a kmem_cache of dls_devnet_t structures.
 213          */
 214         i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache",
 215             sizeof (dls_devnet_t), 0, i_dls_devnet_constructor,
 216             i_dls_devnet_destructor, NULL, NULL, NULL, 0);
 217         ASSERT(i_dls_devnet_cachep != NULL);
 218 
 219         /*
 220          * Create a hash table, keyed by dd_linkid, of dls_devnet_t.
 221          */
 222         i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash",
 223             VLAN_HASHSZ, mod_hash_null_valdtor);
 224 
 225         /*
 226          * Create a hash table, keyed by dd_mac
 227          */
 228         i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash",
 229             VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
 230             mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
 231 
 232         devnet_need_rebuild = B_FALSE;
 233 
 234         netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown,
 235             dls_stack_fini);
 236 }
 237 
 238 void
 239 dls_mgmt_fini(void)
 240 {
 241         netstack_unregister(NS_DLS);
 242         mod_hash_destroy_hash(i_dls_devnet_hash);
 243         mod_hash_destroy_hash(i_dls_devnet_id_hash);
 244         kmem_cache_destroy(i_dls_devnet_cachep);
 245         rw_destroy(&i_dls_devnet_lock);
 246         mutex_destroy(&i_dls_mgmt_lock);
 247 }
 248 
 249 int
 250 dls_mgmt_door_set(boolean_t start)
 251 {
 252         int     err;
 253 
 254         /* handle daemon restart */
 255         mutex_enter(&i_dls_mgmt_lock);
 256         if (dls_mgmt_dh != NULL) {
 257                 door_ki_rele(dls_mgmt_dh);
 258                 dls_mgmt_dh = NULL;
 259         }
 260 
 261         if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) {
 262                 mutex_exit(&i_dls_mgmt_lock);
 263                 return (err);
 264         }
 265 
 266         mutex_exit(&i_dls_mgmt_lock);
 267 
 268         /*
 269          * Create and associate <link name, linkid> mapping for network devices
 270          * which are already attached before the daemon is started.
 271          */
 272         if (start)
 273                 softmac_recreate();
 274         return (0);
 275 }
 276 
 277 static boolean_t
 278 i_dls_mgmt_door_revoked(door_handle_t dh)
 279 {
 280         struct door_info info;
 281         extern int sys_shutdown;
 282 
 283         ASSERT(dh != NULL);
 284 
 285         if (sys_shutdown) {
 286                 cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n");
 287                 return (B_TRUE);
 288         }
 289 
 290         if (door_ki_info(dh, &info) != 0)
 291                 return (B_TRUE);
 292 
 293         return ((info.di_attributes & DOOR_REVOKED) != 0);
 294 }
 295 
 296 /*
 297  * Upcall to the datalink management daemon (dlmgmtd).
 298  */
 299 static int
 300 i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t rsize)
 301 {
 302         door_arg_t                      darg, save_arg;
 303         door_handle_t                   dh;
 304         int                             err;
 305         int                             retry = 0;
 306 
 307 #define MAXRETRYNUM     3
 308 
 309         ASSERT(arg);
 310         darg.data_ptr = arg;
 311         darg.data_size = asize;
 312         darg.desc_ptr = NULL;
 313         darg.desc_num = 0;
 314         darg.rbuf = rbuf;
 315         darg.rsize = rsize;
 316         save_arg = darg;
 317 
 318 retry:
 319         mutex_enter(&i_dls_mgmt_lock);
 320         dh = dls_mgmt_dh;
 321         if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) {
 322                 mutex_exit(&i_dls_mgmt_lock);
 323                 return (EBADF);
 324         }
 325         door_ki_hold(dh);
 326         mutex_exit(&i_dls_mgmt_lock);
 327 
 328         for (;;) {
 329                 retry++;
 330                 if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(),
 331                     SIZE_MAX, 0)) == 0)
 332                         break;
 333 
 334                 /*
 335                  * handle door call errors
 336                  */
 337                 darg = save_arg;
 338                 switch (err) {
 339                 case EINTR:
 340                         /*
 341                          * If the operation which caused this door upcall gets
 342                          * interrupted, return directly.
 343                          */
 344                         goto done;
 345                 case EAGAIN:
 346                         /*
 347                          * Repeat upcall if the maximum attempt limit has not
 348                          * been reached.
 349                          */
 350                         if (retry < MAXRETRYNUM) {
 351                                 delay(2 * hz);
 352                                 break;
 353                         }
 354                         cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
 355                         goto done;
 356                 default:
 357                         /* A fatal door error */
 358                         if (i_dls_mgmt_door_revoked(dh)) {
 359                                 cmn_err(CE_NOTE,
 360                                     "dls: dlmgmtd door service revoked\n");
 361 
 362                                 if (retry < MAXRETRYNUM) {
 363                                         door_ki_rele(dh);
 364                                         goto retry;
 365                                 }
 366                         }
 367                         cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
 368                         goto done;
 369                 }
 370         }
 371 
 372         if (darg.rbuf != rbuf) {
 373                 /*
 374                  * The size of the input rbuf was not big enough, so the
 375                  * upcall allocated the rbuf itself.  If this happens, assume
 376                  * that this was an invalid door call request.
 377                  */
 378                 kmem_free(darg.rbuf, darg.rsize);
 379                 err = ENOSPC;
 380                 goto done;
 381         }
 382 
 383         if (darg.rsize != rsize) {
 384                 err = EINVAL;
 385                 goto done;
 386         }
 387 
 388         err = ((dlmgmt_retval_t *)rbuf)->lr_err;
 389 
 390 done:
 391         door_ki_rele(dh);
 392         return (err);
 393 }
 394 
 395 /*
 396  * Request the datalink management daemon to create a link with the attributes
 397  * below.  Upon success, zero is returned and linkidp contains the linkid for
 398  * the new link; otherwise, an errno is returned.
 399  *
 400  *     - dev            physical dev_t.  required for all physical links,
 401  *                      including GLDv3 links.  It will be used to force the
 402  *                      attachment of a physical device, hence the
 403  *                      registration of its mac
 404  *     - class          datalink class
 405  *     - media type     media type; DL_OTHER means unknown
 406  *     - persist        whether to persist the datalink
 407  */
 408 int
 409 dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class,
 410     uint32_t media, boolean_t persist, datalink_id_t *linkidp)
 411 {
 412         dlmgmt_upcall_arg_create_t      create;
 413         dlmgmt_create_retval_t          retval;
 414         int                             err;
 415 
 416         create.ld_cmd = DLMGMT_CMD_DLS_CREATE;
 417         create.ld_class = class;
 418         create.ld_media = media;
 419         create.ld_phymaj = getmajor(dev);
 420         create.ld_phyinst = getminor(dev);
 421         create.ld_persist = persist;
 422         if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >=
 423             sizeof (create.ld_devname))
 424                 return (EINVAL);
 425 
 426         if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval,
 427             sizeof (retval))) == 0) {
 428                 *linkidp = retval.lr_linkid;
 429         }
 430         return (err);
 431 }
 432 
 433 /*
 434  * Request the datalink management daemon to destroy the specified link.
 435  * Returns zero upon success, or an errno upon failure.
 436  */
 437 int
 438 dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist)
 439 {
 440         dlmgmt_upcall_arg_destroy_t     destroy;
 441         dlmgmt_destroy_retval_t         retval;
 442 
 443         destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY;
 444         destroy.ld_linkid = linkid;
 445         destroy.ld_persist = persist;
 446 
 447         return (i_dls_mgmt_upcall(&destroy, sizeof (destroy),
 448             &retval, sizeof (retval)));
 449 }
 450 
 451 /*
 452  * Request the datalink management daemon to verify/update the information
 453  * for a physical link.  Upon success, get its linkid.
 454  *
 455  *     - media type     media type
 456  *     - novanity       whether this physical datalink supports vanity naming.
 457  *                      physical links that do not use the GLDv3 MAC plugin
 458  *                      cannot suport vanity naming
 459  *
 460  * This function could fail with ENOENT or EEXIST.  Two cases return EEXIST:
 461  *
 462  * 1. A link with devname already exists, but the media type does not match.
 463  *    In this case, mediap will bee set to the media type of the existing link.
 464  * 2. A link with devname already exists, but its link name does not match
 465  *    the device name, although this link does not support vanity naming.
 466  */
 467 int
 468 dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity,
 469     uint32_t *mediap, datalink_id_t *linkidp)
 470 {
 471         dlmgmt_upcall_arg_update_t      update;
 472         dlmgmt_update_retval_t          retval;
 473         int                             err;
 474 
 475         update.ld_cmd = DLMGMT_CMD_DLS_UPDATE;
 476 
 477         if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >=
 478             sizeof (update.ld_devname))
 479                 return (EINVAL);
 480 
 481         update.ld_media = media;
 482         update.ld_novanity = novanity;
 483 
 484         if ((err = i_dls_mgmt_upcall(&update, sizeof (update), &retval,
 485             sizeof (retval))) == EEXIST) {
 486                 *linkidp = retval.lr_linkid;
 487                 *mediap = retval.lr_media;
 488         } else if (err == 0) {
 489                 *linkidp = retval.lr_linkid;
 490         }
 491 
 492         return (err);
 493 }
 494 
 495 /*
 496  * Request the datalink management daemon to get the information for a link.
 497  * Returns zero upon success, or an errno upon failure.
 498  *
 499  * Only fills in information for argument pointers that are non-NULL.
 500  * Note that the link argument is expected to be MAXLINKNAMELEN bytes.
 501  */
 502 int
 503 dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link,
 504     datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp)
 505 {
 506         dlmgmt_door_getname_t   getname;
 507         dlmgmt_getname_retval_t retval;
 508         int                     err, len;
 509 
 510         getname.ld_cmd = DLMGMT_CMD_GETNAME;
 511         getname.ld_linkid = linkid;
 512 
 513         if ((err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval,
 514             sizeof (retval))) != 0) {
 515                 return (err);
 516         }
 517 
 518         len = strlen(retval.lr_link);
 519         if (len <= 1 || len >= MAXLINKNAMELEN)
 520                 return (EINVAL);
 521 
 522         if (link != NULL)
 523                 (void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN);
 524         if (classp != NULL)
 525                 *classp = retval.lr_class;
 526         if (mediap != NULL)
 527                 *mediap = retval.lr_media;
 528         if (flagsp != NULL)
 529                 *flagsp = retval.lr_flags;
 530         return (0);
 531 }
 532 
 533 /*
 534  * Request the datalink management daemon to get the linkid for a link.
 535  * Returns a non-zero error code on failure.  The linkid argument is only
 536  * set on success (when zero is returned.)
 537  */
 538 int
 539 dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
 540 {
 541         dlmgmt_door_getlinkid_t         getlinkid;
 542         dlmgmt_getlinkid_retval_t       retval;
 543         int                             err;
 544 
 545         getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
 546         (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
 547         getlinkid.ld_zoneid = getzoneid();
 548 
 549         if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
 550             sizeof (retval))) == 0) {
 551                 *linkid = retval.lr_linkid;
 552         }
 553         return (err);
 554 }
 555 
 556 int
 557 dls_mgmt_get_linkid_in_zone(const char *link, datalink_id_t *linkid,
 558     zoneid_t zid)
 559 {
 560         dlmgmt_door_getlinkid_t         getlinkid;
 561         dlmgmt_getlinkid_retval_t       retval;
 562         int                             err;
 563 
 564         ASSERT(getzoneid() == GLOBAL_ZONEID || zid == getzoneid());
 565         getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
 566         (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
 567         getlinkid.ld_zoneid = zid;
 568 
 569         if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
 570             sizeof (retval))) == 0) {
 571                 *linkid = retval.lr_linkid;
 572         }
 573         return (err);
 574 }
 575 
 576 
 577 datalink_id_t
 578 dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
 579     datalink_media_t dmedia, uint32_t flags)
 580 {
 581         dlmgmt_door_getnext_t   getnext;
 582         dlmgmt_getnext_retval_t retval;
 583 
 584         getnext.ld_cmd = DLMGMT_CMD_GETNEXT;
 585         getnext.ld_class = class;
 586         getnext.ld_dmedia = dmedia;
 587         getnext.ld_flags = flags;
 588         getnext.ld_linkid = linkid;
 589 
 590         if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval,
 591             sizeof (retval)) != 0) {
 592                 return (DATALINK_INVALID_LINKID);
 593         }
 594 
 595         return (retval.lr_linkid);
 596 }
 597 
 598 static int
 599 i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr,
 600     void *attrval, size_t *attrszp)
 601 {
 602         dlmgmt_upcall_arg_getattr_t     getattr;
 603         dlmgmt_getattr_retval_t         retval;
 604         int                             err;
 605 
 606         getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR;
 607         getattr.ld_linkid = linkid;
 608         (void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN);
 609 
 610         if ((err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), &retval,
 611             sizeof (retval))) == 0) {
 612                 if (*attrszp < retval.lr_attrsz)
 613                         return (EINVAL);
 614                 *attrszp = retval.lr_attrsz;
 615                 bcopy(retval.lr_attrval, attrval, retval.lr_attrsz);
 616         }
 617 
 618         return (err);
 619 }
 620 
 621 /*
 622  * Note that this function can only get devp successfully for non-VLAN link.
 623  */
 624 int
 625 dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp)
 626 {
 627         uint64_t        maj, inst;
 628         size_t          attrsz = sizeof (uint64_t);
 629 
 630         if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 ||
 631             attrsz != sizeof (uint64_t) ||
 632             i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 ||
 633             attrsz != sizeof (uint64_t)) {
 634                 return (EINVAL);
 635         }
 636 
 637         *devp = makedevice((major_t)maj, (minor_t)inst);
 638         return (0);
 639 }
 640 
 641 /*
 642  * Request the datalink management daemon to push in
 643  * all properties associated with the link.
 644  * Returns a non-zero error code on failure.
 645  */
 646 int
 647 dls_mgmt_linkprop_init(datalink_id_t linkid)
 648 {
 649         dlmgmt_door_linkprop_init_t     li;
 650         dlmgmt_linkprop_init_retval_t   retval;
 651         int                             err;
 652 
 653         li.ld_cmd = DLMGMT_CMD_LINKPROP_INIT;
 654         li.ld_linkid = linkid;
 655 
 656         err = i_dls_mgmt_upcall(&li, sizeof (li), &retval, sizeof (retval));
 657         return (err);
 658 }
 659 
 660 static void
 661 dls_devnet_prop_task(void *arg)
 662 {
 663         dls_devnet_t            *ddp = arg;
 664 
 665         (void) dls_mgmt_linkprop_init(ddp->dd_linkid);
 666 
 667         mutex_enter(&ddp->dd_mutex);
 668         ddp->dd_prop_loaded = B_TRUE;
 669         ddp->dd_prop_taskid = 0;
 670         cv_broadcast(&ddp->dd_cv);
 671         mutex_exit(&ddp->dd_mutex);
 672 }
 673 
 674 /*
 675  * Ensure property loading task is completed.
 676  */
 677 void
 678 dls_devnet_prop_task_wait(dls_dl_handle_t ddp)
 679 {
 680         mutex_enter(&ddp->dd_mutex);
 681         while (ddp->dd_prop_taskid != 0)
 682                 cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
 683         mutex_exit(&ddp->dd_mutex);
 684 }
 685 
 686 void
 687 dls_devnet_rele_tmp(dls_dl_handle_t dlh)
 688 {
 689         dls_devnet_t            *ddp = dlh;
 690 
 691         mutex_enter(&ddp->dd_mutex);
 692         ASSERT(ddp->dd_tref != 0);
 693         if (--ddp->dd_tref == 0)
 694                 cv_signal(&ddp->dd_cv);
 695         mutex_exit(&ddp->dd_mutex);
 696 }
 697 
 698 int
 699 dls_devnet_hold_link(datalink_id_t linkid, dls_dl_handle_t *ddhp,
 700     dls_link_t **dlpp)
 701 {
 702         dls_dl_handle_t dlh;
 703         dls_link_t      *dlp;
 704         int             err;
 705 
 706         if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
 707                 return (err);
 708 
 709         if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) {
 710                 dls_devnet_rele_tmp(dlh);
 711                 return (err);
 712         }
 713 
 714         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 715 
 716         *ddhp = dlh;
 717         *dlpp = dlp;
 718         return (0);
 719 }
 720 
 721 void
 722 dls_devnet_rele_link(dls_dl_handle_t dlh, dls_link_t *dlp)
 723 {
 724         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 725 
 726         dls_link_rele(dlp);
 727         dls_devnet_rele_tmp(dlh);
 728 }
 729 
 730 /*
 731  * "link" kstats related functions.
 732  */
 733 
 734 /*
 735  * Query the "link" kstats.
 736  *
 737  * We may be called from the kstat subsystem in an arbitrary context.
 738  * If the caller is the stack, the context could be an upcall data
 739  * thread. Hence we can't acquire the mac perimeter in this function
 740  * for fear of deadlock.
 741  */
 742 static int
 743 dls_devnet_stat_update(kstat_t *ksp, int rw)
 744 {
 745         datalink_id_t   linkid = (datalink_id_t)(uintptr_t)ksp->ks_private;
 746         dls_devnet_t    *ddp;
 747         dls_link_t      *dlp;
 748         int             err;
 749 
 750         if ((err = dls_devnet_hold_tmp(linkid, &ddp)) != 0) {
 751                 return (err);
 752         }
 753 
 754         /*
 755          * If a device detach happens at this time, it will block in
 756          * dls_devnet_unset since the dd_tref has been bumped in
 757          * dls_devnet_hold_tmp(). So the access to 'dlp' is safe even though
 758          * we don't hold the mac perimeter.
 759          */
 760         if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)ddp->dd_mac,
 761             (mod_hash_val_t *)&dlp) != 0) {
 762                 dls_devnet_rele_tmp(ddp);
 763                 return (ENOENT);
 764         }
 765 
 766         err = dls_stat_update(ksp, dlp, rw);
 767 
 768         dls_devnet_rele_tmp(ddp);
 769         return (err);
 770 }
 771 
 772 /*
 773  * Create the "link" kstats.
 774  */
 775 static void
 776 dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid, zoneid_t newzoneid)
 777 {
 778         kstat_t *ksp;
 779         char    *nm;
 780         char    kname[MAXLINKNAMELEN];
 781 
 782         if (zoneid != newzoneid) {
 783                 ASSERT(zoneid == GLOBAL_ZONEID);
 784                 (void) snprintf(kname, sizeof (kname), "z%d_%s", newzoneid,
 785                     ddp->dd_linkname);
 786                 nm = kname;
 787         } else {
 788                 nm = ddp->dd_linkname;
 789         }
 790 
 791         if (dls_stat_create("link", 0, nm, zoneid,
 792             dls_devnet_stat_update, (void *)(uintptr_t)ddp->dd_linkid,
 793             &ksp, newzoneid) == 0) {
 794                 ASSERT(ksp != NULL);
 795                 if (zoneid == ddp->dd_owner_zid) {
 796                         ASSERT(ddp->dd_ksp == NULL);
 797                         ddp->dd_ksp = ksp;
 798                 } else {
 799                         ASSERT(ddp->dd_zone_ksp == NULL);
 800                         ddp->dd_zone_ksp = ksp;
 801                 }
 802         }
 803 }
 804 
 805 /*
 806  * Destroy the "link" kstats.
 807  */
 808 static void
 809 dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
 810 {
 811         if (zoneid == ddp->dd_owner_zid) {
 812                 if (ddp->dd_ksp != NULL) {
 813                         dls_stat_delete(ddp->dd_ksp);
 814                         ddp->dd_ksp = NULL;
 815                 }
 816         } else {
 817                 if (ddp->dd_zone_ksp != NULL) {
 818                         dls_stat_delete(ddp->dd_zone_ksp);
 819                         ddp->dd_zone_ksp = NULL;
 820                 }
 821         }
 822 }
 823 
 824 /*
 825  * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
 826  * and create the new set using the new name.
 827  */
 828 static void
 829 dls_devnet_stat_rename(dls_devnet_t *ddp, boolean_t zoneinit)
 830 {
 831         if (ddp->dd_ksp != NULL) {
 832                 dls_stat_delete(ddp->dd_ksp);
 833                 ddp->dd_ksp = NULL;
 834         }
 835         if (zoneinit && ddp->dd_zone_ksp != NULL) {
 836                 dls_stat_delete(ddp->dd_zone_ksp);
 837                 ddp->dd_zone_ksp = NULL;
 838         }
 839         /*
 840          * We can't rename a link while it's assigned to a non-global zone
 841          * unless we're first initializing the zone while readying it.
 842          */
 843         ASSERT(ddp->dd_zone_ksp == NULL);
 844         dls_devnet_stat_create(ddp, ddp->dd_owner_zid,
 845             (zoneinit ? ddp->dd_zid : ddp->dd_owner_zid));
 846         if (zoneinit)
 847                 dls_devnet_stat_create(ddp, ddp->dd_zid, ddp->dd_zid);
 848 }
 849 
 850 /*
 851  * Associate the linkid with the link identified by macname. If this
 852  * is called on behalf of a physical link then linkid may be
 853  * DATALINK_INVALID_LINKID. Otherwise, if called on behalf of a
 854  * virtual link, linkid must have a value.
 855  */
 856 static int
 857 dls_devnet_set(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid,
 858     dls_devnet_t **ddpp)
 859 {
 860         const char              *macname = mac_name(mh);
 861         dls_devnet_t            *ddp = NULL;
 862         datalink_class_t        class;
 863         int                     err;
 864         boolean_t               stat_create = B_FALSE;
 865         char                    linkname[MAXLINKNAMELEN];
 866 
 867         rw_enter(&i_dls_devnet_lock, RW_WRITER);
 868 
 869         /*
 870          * Don't allow callers to set a link name with a linkid that already
 871          * has a name association (that's what rename is for).
 872          */
 873         if (linkid != DATALINK_INVALID_LINKID) {
 874                 if (mod_hash_find(i_dls_devnet_id_hash,
 875                     (mod_hash_key_t)(uintptr_t)linkid,
 876                     (mod_hash_val_t *)&ddp) == 0) {
 877                         err = EEXIST;
 878                         goto done;
 879                 }
 880                 if ((err = dls_mgmt_get_linkinfo(linkid, linkname, &class,
 881                     NULL, NULL)) != 0)
 882                         goto done;
 883         }
 884 
 885         if ((err = mod_hash_find(i_dls_devnet_hash,
 886             (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) {
 887                 if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
 888                         err = EEXIST;
 889                         goto done;
 890                 }
 891 
 892                 /*
 893                  * If we arrive here we know we are attempting to set
 894                  * the linkid on a physical link. A virtual link
 895                  * should never arrive here because it should never
 896                  * call this function without a linkid. Virtual links
 897                  * are created through dlgmtmd and thus we know
 898                  * dlmgmtd is alive to assign it a linkid (search for
 899                  * uses of dladm_create_datalink_id() to prove this to
 900                  * yourself); we don't have the same guarantee for a
 901                  * physical link which may perform an upcall for a
 902                  * linkid while dlmgmtd is down but will continue
 903                  * creating a devnet without the linkid (see
 904                  * softmac_create_datalink() to see how physical link
 905                  * creation works). That is why there is no entry in
 906                  * the id hash but there is one in the macname hash --
 907                  * softmac couldn't acquire a linkid the first time it
 908                  * called this function.
 909                  *
 910                  * Because of the check above, we also know that
 911                  * ddp->dd_linkid is not set. Following this, the link
 912                  * must still be in the DD_INITIALIZING state because
 913                  * that flag is removed IFF dd_linkid is set. This is
 914                  * why we can ASSERT the DD_INITIALIZING flag below if
 915                  * the call to i_dls_devnet_setzid() fails.
 916                  */
 917                 if (linkid == DATALINK_INVALID_LINKID ||
 918                     class != DATALINK_CLASS_PHYS) {
 919                         err = EINVAL;
 920                         goto done;
 921                 }
 922 
 923                 ASSERT(ddp->dd_flags & DD_INITIALIZING);
 924 
 925         } else {
 926                 ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP);
 927                 ddp->dd_flags = DD_INITIALIZING;
 928                 ddp->dd_tref = 0;
 929                 ddp->dd_ref++;
 930                 ddp->dd_owner_zid = zoneid;
 931                 /*
 932                  * If we are creating a new devnet which will be owned by a NGZ
 933                  * then mark it as transient. This link has never been in the
 934                  * GZ, the GZ will not have a hold on its reference, and we do
 935                  * not want to return it to the GZ when the zone halts.
 936                  */
 937                 if (zoneid != GLOBAL_ZONEID)
 938                         ddp->dd_transient = B_TRUE;
 939                 (void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac));
 940                 VERIFY(mod_hash_insert(i_dls_devnet_hash,
 941                     (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0);
 942         }
 943 
 944         if (linkid != DATALINK_INVALID_LINKID) {
 945                 ddp->dd_linkid = linkid;
 946                 (void) strlcpy(ddp->dd_linkname, linkname,
 947                     sizeof (ddp->dd_linkname));
 948                 VERIFY(mod_hash_insert(i_dls_devnet_id_hash,
 949                     (mod_hash_key_t)(uintptr_t)linkid,
 950                     (mod_hash_val_t)ddp) == 0);
 951                 devnet_need_rebuild = B_TRUE;
 952                 stat_create = B_TRUE;
 953         }
 954         err = 0;
 955 done:
 956         /*
 957          * It is safe to drop the i_dls_devnet_lock at this point. In the case
 958          * of physical devices, the softmac framework will fail the device
 959          * detach based on the smac_state or smac_hold_cnt. Other cases like
 960          * vnic and aggr use their own scheme to serialize creates and deletes
 961          * and ensure that *ddp is valid.
 962          */
 963         rw_exit(&i_dls_devnet_lock);
 964 
 965         if (err == 0 && zoneid != GLOBAL_ZONEID) {
 966                 /*
 967                  * If this link is being created directly within a non-global
 968                  * zone, then flag it as transient so that it will be cleaned
 969                  * up when the zone is shut down.
 970                  */
 971                 err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE, B_TRUE);
 972                 if (err != 0) {
 973                         /*
 974                          * At this point the link is marked as
 975                          * DD_INITIALIZING -- there can be no
 976                          * outstanding temp refs and therefore no need
 977                          * to wait for them.
 978                          */
 979                         ASSERT(ddp->dd_flags & DD_INITIALIZING);
 980                         (void) dls_devnet_unset(mh, &linkid, B_FALSE);
 981                         return (err);
 982                 }
 983         }
 984 
 985         if (err == 0) {
 986                 if (zoneid != GLOBAL_ZONEID &&
 987                     (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE,
 988                     B_FALSE)) != 0) {
 989                         /*
 990                          * At this point the link is marked as
 991                          * DD_INITIALIZING -- there can be no
 992                          * outstanding temp refs and therefore no need
 993                          * to wait for them.
 994                          */
 995                         ASSERT(ddp->dd_flags & DD_INITIALIZING);
 996                         (void) dls_devnet_unset(mh, &linkid, B_FALSE);
 997                         return (err);
 998                 }
 999 
1000                 /*
1001                  * The kstat subsystem holds its own locks (rather perimeter)
1002                  * before calling the ks_update (dls_devnet_stat_update) entry
1003                  * point which in turn grabs the i_dls_devnet_lock. So the
1004                  * lock hierarchy is kstat locks -> i_dls_devnet_lock.
1005                  */
1006                 if (stat_create)
1007                         dls_devnet_stat_create(ddp, zoneid, zoneid);
1008                 if (ddpp != NULL)
1009                         *ddpp = ddp;
1010 
1011                 mutex_enter(&ddp->dd_mutex);
1012                 if (linkid != DATALINK_INVALID_LINKID &&
1013                     !ddp->dd_prop_loaded && ddp->dd_prop_taskid == 0) {
1014                         ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1015                             dls_devnet_prop_task, ddp, TQ_SLEEP);
1016                 }
1017                 mutex_exit(&ddp->dd_mutex);
1018 
1019         }
1020         return (err);
1021 }
1022 
1023 /*
1024  * Disassociate the linkid from the link identified by macname. If
1025  * wait is B_TRUE, wait until all temporary refs are released and the
1026  * prop task is finished.
1027  *
1028  * If waiting then you SHOULD NOT call this from inside the MAC perim
1029  * as deadlock will ensue. Otherwise, this function is safe to call
1030  * from inside or outside the MAC perim.
1031  */
1032 static int
1033 dls_devnet_unset(mac_handle_t mh, datalink_id_t *id, boolean_t wait)
1034 {
1035         const char      *macname = mac_name(mh);
1036         dls_devnet_t    *ddp;
1037         int             err;
1038         mod_hash_val_t  val;
1039 
1040         rw_enter(&i_dls_devnet_lock, RW_WRITER);
1041         if ((err = mod_hash_find(i_dls_devnet_hash,
1042             (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) != 0) {
1043                 ASSERT(err == MH_ERR_NOTFOUND);
1044                 rw_exit(&i_dls_devnet_lock);
1045                 return (ENOENT);
1046         }
1047 
1048         mutex_enter(&ddp->dd_mutex);
1049 
1050         /*
1051          * Make sure downcalls into softmac_create or softmac_destroy from
1052          * devfs don't cv_wait on any devfs related condition for fear of
1053          * deadlock. Return EBUSY if the asynchronous thread started for
1054          * property loading as part of the post attach hasn't yet completed.
1055          */
1056         VERIFY(ddp->dd_ref != 0);
1057         if ((ddp->dd_ref != 1) || (!wait &&
1058             (ddp->dd_tref != 0 || ddp->dd_prop_taskid != 0))) {
1059                 int zstatus = 0;
1060 
1061                 /*
1062                  * There are a couple of alternatives that might be going on
1063                  * here; a) the zone is shutting down and it has a transient
1064                  * link assigned, in which case we want to clean it up instead
1065                  * of moving it back to the global zone, or b) its possible
1066                  * that we're trying to clean up an orphaned vnic that was
1067                  * delegated to a zone and which wasn't cleaned up properly
1068                  * when the zone went away.  Check for either of these cases
1069                  * before we simply return EBUSY.
1070                  *
1071                  * zstatus indicates which situation we are dealing with:
1072                  *       0 - means return EBUSY
1073                  *       1 - means case (a), cleanup transient link
1074                  *      -1 - means case (b), orphaned VNIC
1075                  */
1076                 if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
1077                         zone_t  *zp;
1078 
1079                         if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
1080                                 zstatus = -1;
1081                         } else {
1082                                 if (ddp->dd_transient) {
1083                                         zone_status_t s = zone_status_get(zp);
1084 
1085                                         if (s >= ZONE_IS_SHUTTING_DOWN)
1086                                                 zstatus = 1;
1087                                 }
1088                                 zone_rele(zp);
1089                         }
1090                 }
1091 
1092                 if (zstatus == 0) {
1093                         mutex_exit(&ddp->dd_mutex);
1094                         rw_exit(&i_dls_devnet_lock);
1095                         return (EBUSY);
1096                 }
1097 
1098                 /*
1099                  * We want to delete the link, reset ref to 1;
1100                  */
1101                 if (zstatus == -1) {
1102                         /* Log a warning, but continue in this case */
1103                         cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
1104                             ddp->dd_linkname);
1105                 }
1106                 ddp->dd_ref = 1;
1107         }
1108 
1109         ddp->dd_flags |= DD_CONDEMNED;
1110         ddp->dd_ref--;
1111         *id = ddp->dd_linkid;
1112 
1113         /*
1114          * Remove this dls_devnet_t from the hash table.
1115          */
1116         VERIFY(mod_hash_remove(i_dls_devnet_hash,
1117             (mod_hash_key_t)ddp->dd_mac, &val) == 0);
1118 
1119         if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
1120                 VERIFY(mod_hash_remove(i_dls_devnet_id_hash,
1121                     (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0);
1122 
1123                 devnet_need_rebuild = B_TRUE;
1124         }
1125         rw_exit(&i_dls_devnet_lock);
1126 
1127         /*
1128          * It is important to call i_dls_devnet_setzid() WITHOUT the
1129          * i_dls_devnet_lock held. The setzid call grabs the MAC
1130          * perim; thus causing DLS -> MAC lock ordering if performed
1131          * with the i_dls_devnet_lock held. This forces consumers to
1132          * grab the MAC perim before calling dls_devnet_unset() (the
1133          * locking rules state MAC -> DLS order). By performing the
1134          * setzid outside of the i_dls_devnet_lock consumers can
1135          * safely call dls_devnet_unset() outside the MAC perim.
1136          */
1137         if (ddp->dd_zid != GLOBAL_ZONEID) {
1138                 /*
1139                  * We need to release the dd_mutex before we try and destroy the
1140                  * stat. When we destroy it, we'll need to grab the lock for the
1141                  * kstat but if there's a concurrent reader of the kstat, we'll
1142                  * be blocked on it. This will lead to deadlock because these
1143                  * kstats employ a ks_update function (dls_devnet_stat_update)
1144                  * which needs the dd_mutex that we currently hold.
1145                  *
1146                  * Because we've already flagged the dls_devnet_t as
1147                  * DD_CONDEMNED and we still have a write lock on
1148                  * i_dls_devnet_lock, we should be able to release the dd_mutex.
1149                  */
1150                 mutex_exit(&ddp->dd_mutex);
1151                 dls_devnet_stat_destroy(ddp, ddp->dd_zid);
1152                 mutex_enter(&ddp->dd_mutex);
1153                 (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
1154                     B_FALSE);
1155         }
1156 
1157         if (wait) {
1158                 /*
1159                  * Wait until all temporary references are released.
1160                  * The holders of the tref need the MAC perim to
1161                  * perform their work and release the tref. To avoid
1162                  * deadlock, assert that the perim is never held here.
1163                  */
1164                 ASSERT0(MAC_PERIM_HELD(mh));
1165                 while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != 0))
1166                         cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
1167         } else {
1168                 VERIFY(ddp->dd_tref == 0);
1169                 VERIFY(ddp->dd_prop_taskid == 0);
1170         }
1171 
1172         if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
1173                 dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
1174         }
1175 
1176         ddp->dd_prop_loaded = B_FALSE;
1177         ddp->dd_linkid = DATALINK_INVALID_LINKID;
1178         ddp->dd_flags = 0;
1179         mutex_exit(&ddp->dd_mutex);
1180         kmem_cache_free(i_dls_devnet_cachep, ddp);
1181 
1182         return (0);
1183 }
1184 
1185 /*
1186  * This is a private hold routine used when we already have the dls_link_t, thus
1187  * we know that it cannot go away.
1188  */
1189 int
1190 dls_devnet_hold_tmp_by_link(dls_link_t *dlp, dls_dl_handle_t *ddhp)
1191 {
1192         int err;
1193         dls_devnet_t *ddp = NULL;
1194 
1195         rw_enter(&i_dls_devnet_lock, RW_WRITER);
1196         if ((err = mod_hash_find(i_dls_devnet_hash,
1197             (mod_hash_key_t)dlp->dl_name, (mod_hash_val_t *)&ddp)) != 0) {
1198                 ASSERT(err == MH_ERR_NOTFOUND);
1199                 rw_exit(&i_dls_devnet_lock);
1200                 return (ENOENT);
1201         }
1202 
1203         mutex_enter(&ddp->dd_mutex);
1204         VERIFY(ddp->dd_ref > 0);
1205         if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1206                 mutex_exit(&ddp->dd_mutex);
1207                 rw_exit(&i_dls_devnet_lock);
1208                 return (ENOENT);
1209         }
1210         ddp->dd_tref++;
1211         mutex_exit(&ddp->dd_mutex);
1212         rw_exit(&i_dls_devnet_lock);
1213 
1214         *ddhp = ddp;
1215         return (0);
1216 }
1217 
1218 static int
1219 dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp,
1220     boolean_t tmp_hold)
1221 {
1222         dls_devnet_t            *ddp;
1223         int                     err;
1224 
1225         rw_enter(&i_dls_devnet_lock, RW_READER);
1226         if ((err = mod_hash_find(i_dls_devnet_id_hash,
1227             (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) {
1228                 ASSERT(err == MH_ERR_NOTFOUND);
1229                 rw_exit(&i_dls_devnet_lock);
1230                 return (ENOENT);
1231         }
1232 
1233         mutex_enter(&ddp->dd_mutex);
1234         VERIFY(ddp->dd_ref > 0);
1235         if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1236                 mutex_exit(&ddp->dd_mutex);
1237                 rw_exit(&i_dls_devnet_lock);
1238                 return (ENOENT);
1239         }
1240         if (tmp_hold)
1241                 ddp->dd_tref++;
1242         else
1243                 ddp->dd_ref++;
1244         mutex_exit(&ddp->dd_mutex);
1245         rw_exit(&i_dls_devnet_lock);
1246 
1247         *ddpp = ddp;
1248         return (0);
1249 }
1250 
1251 int
1252 dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp)
1253 {
1254         return (dls_devnet_hold_common(linkid, ddpp, B_FALSE));
1255 }
1256 
1257 /*
1258  * Hold the vanity naming structure (dls_devnet_t) temporarily.  The request to
1259  * delete the dls_devnet_t will wait until the temporary reference is released.
1260  */
1261 int
1262 dls_devnet_hold_tmp(datalink_id_t linkid, dls_devnet_t **ddpp)
1263 {
1264         return (dls_devnet_hold_common(linkid, ddpp, B_TRUE));
1265 }
1266 
1267 /*
1268  * This funtion is called when a DLS client tries to open a device node.
1269  * This dev_t could be a result of a /dev/net node access (returned by
1270  * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access.
1271  * In both cases, this function bumps up the reference count of the
1272  * dls_devnet_t structure. The reference is held as long as the device node
1273  * is open. In the case of /dev/net while it is true that the initial reference
1274  * is held when the devnet_create_rvp->dls_devnet_open call happens, this
1275  * initial reference is released immediately in devnet_inactive_callback ->
1276  * dls_devnet_close(). (Note that devnet_inactive_callback() is called right
1277  * after dld_open completes, not when the /dev/net node is being closed).
1278  * To undo this function, call dls_devnet_rele()
1279  */
1280 int
1281 dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp)
1282 {
1283         char                    name[MAXNAMELEN];
1284         char                    *drv;
1285         dls_devnet_t            *ddp;
1286         int                     err;
1287 
1288         if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1289                 return (EINVAL);
1290 
1291         (void) snprintf(name, sizeof (name), "%s%d", drv,
1292             DLS_MINOR2INST(getminor(dev)));
1293 
1294         rw_enter(&i_dls_devnet_lock, RW_READER);
1295         if ((err = mod_hash_find(i_dls_devnet_hash,
1296             (mod_hash_key_t)name, (mod_hash_val_t *)&ddp)) != 0) {
1297                 ASSERT(err == MH_ERR_NOTFOUND);
1298                 rw_exit(&i_dls_devnet_lock);
1299                 return (ENOENT);
1300         }
1301         mutex_enter(&ddp->dd_mutex);
1302         VERIFY(ddp->dd_ref > 0);
1303         if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1304                 mutex_exit(&ddp->dd_mutex);
1305                 rw_exit(&i_dls_devnet_lock);
1306                 return (ENOENT);
1307         }
1308         ddp->dd_ref++;
1309         mutex_exit(&ddp->dd_mutex);
1310         rw_exit(&i_dls_devnet_lock);
1311 
1312         *ddhp = ddp;
1313         return (0);
1314 }
1315 
1316 void
1317 dls_devnet_rele(dls_devnet_t *ddp)
1318 {
1319         mutex_enter(&ddp->dd_mutex);
1320         VERIFY(ddp->dd_ref > 1);
1321         ddp->dd_ref--;
1322         if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) {
1323                 mutex_exit(&ddp->dd_mutex);
1324                 if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0)
1325                         ddp->dd_flags |= DD_IMPLICIT_IPTUN;
1326                 return;
1327         }
1328         mutex_exit(&ddp->dd_mutex);
1329 }
1330 
1331 static int
1332 dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp, zoneid_t zid)
1333 {
1334         char                    drv[MAXLINKNAMELEN];
1335         uint_t                  ppa;
1336         major_t                 major;
1337         dev_t                   phy_dev, tmp_dev;
1338         datalink_id_t           linkid;
1339         dls_dev_handle_t        ddh;
1340         int                     err;
1341 
1342         if ((err = dls_mgmt_get_linkid_in_zone(link, &linkid, zid)) == 0)
1343                 return (dls_devnet_hold(linkid, ddpp));
1344 
1345         /*
1346          * If we failed to get the link's linkid because the dlmgmtd daemon
1347          * has not been started, return ENOENT so that the application can
1348          * fallback to open the /dev node.
1349          */
1350         if (err == EBADF)
1351                 return (ENOENT);
1352 
1353         if (err != ENOENT)
1354                 return (err);
1355 
1356         /*
1357          * If we reach this point it means dlmgmtd is up but has no
1358          * mapping for the link name.
1359          */
1360         if (ddi_parse(link, drv, &ppa) != DDI_SUCCESS)
1361                 return (ENOENT);
1362 
1363         if (IS_IPTUN_LINK(drv)) {
1364                 if ((err = i_dls_devnet_create_iptun(link, drv, &linkid)) != 0)
1365                         return (err);
1366                 /*
1367                  * At this point, an IP tunnel MAC has registered, which
1368                  * resulted in a link being created.
1369                  */
1370                 err = dls_devnet_hold(linkid, ddpp);
1371                 if (err != 0) {
1372                         VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0);
1373                         return (err);
1374                 }
1375                 /*
1376                  * dls_devnet_rele() will know to destroy the implicit IP
1377                  * tunnel on last reference release if DD_IMPLICIT_IPTUN is
1378                  * set.
1379                  */
1380                 (*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN;
1381                 return (0);
1382         }
1383 
1384         /*
1385          * If this link:
1386          * (a) is a physical device, (b) this is the first boot, (c) the MAC
1387          * is not registered yet, and (d) we cannot find its linkid, then the
1388          * linkname is the same as the devname.
1389          *
1390          * First filter out invalid names.
1391          */
1392         if ((major = ddi_name_to_major(drv)) == (major_t)-1)
1393                 return (ENOENT);
1394 
1395         phy_dev = makedevice(major, DLS_PPA2MINOR(ppa));
1396         if (softmac_hold_device(phy_dev, &ddh) != 0)
1397                 return (ENOENT);
1398 
1399         /*
1400          * At this time, the MAC should be registered, check its phy_dev using
1401          * the given name.
1402          */
1403         if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0 ||
1404             (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) {
1405                 softmac_rele_device(ddh);
1406                 return (err);
1407         }
1408         if (tmp_dev != phy_dev) {
1409                 softmac_rele_device(ddh);
1410                 return (ENOENT);
1411         }
1412 
1413         err = dls_devnet_hold(linkid, ddpp);
1414         softmac_rele_device(ddh);
1415         return (err);
1416 }
1417 
1418 int
1419 dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp)
1420 {
1421         dls_devnet_t    *ddp;
1422 
1423         rw_enter(&i_dls_devnet_lock, RW_READER);
1424         if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname,
1425             (mod_hash_val_t *)&ddp) != 0) {
1426                 rw_exit(&i_dls_devnet_lock);
1427                 return (ENOENT);
1428         }
1429 
1430         *linkidp = ddp->dd_linkid;
1431         rw_exit(&i_dls_devnet_lock);
1432         return (0);
1433 }
1434 
1435 /*
1436  * Get linkid for the given dev.
1437  */
1438 int
1439 dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp)
1440 {
1441         char    macname[MAXNAMELEN];
1442         char    *drv;
1443 
1444         if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1445                 return (EINVAL);
1446 
1447         (void) snprintf(macname, sizeof (macname), "%s%d", drv,
1448             DLS_MINOR2INST(getminor(dev)));
1449         return (dls_devnet_macname2linkid(macname, linkidp));
1450 }
1451 
1452 /*
1453  * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the
1454  * link this VLAN is created on.
1455  */
1456 int
1457 dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
1458 {
1459         dls_devnet_t    *ddp;
1460         int             err;
1461 
1462         if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0)
1463                 return (err);
1464 
1465         err = dls_mgmt_get_phydev(ddp->dd_linkid, devp);
1466         dls_devnet_rele_tmp(ddp);
1467         return (err);
1468 }
1469 
1470 /*
1471  * Handle the renaming requests.  There are two rename cases:
1472  *
1473  * 1. Request to rename a valid link (id1) to an non-existent link name
1474  *    (id2). In this case id2 is DATALINK_INVALID_LINKID.  Just check whether
1475  *    id1 is held by any applications.
1476  *
1477  *    In this case, the link's kstats need to be updated using the given name.
1478  *
1479  * 2. Request to rename a valid link (id1) to the name of a REMOVED
1480  *    physical link (id2). In this case, check that id1 and its associated
1481  *    mac is not held by any application, and update the link's linkid to id2.
1482  *
1483  *    This case does not change the <link name, linkid> mapping, so the link's
1484  *    kstats need to be updated with using name associated the given id2.
1485  *
1486  * The zoneinit parameter is used to allow us to create a VNIC in the global
1487  * zone which is assigned to a non-global zone.  Since there is a race condition
1488  * in the create process if two VNICs have the same name, we need to rename it
1489  * after it has been assigned to the zone.
1490  */
1491 int
1492 dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link,
1493     boolean_t zoneinit)
1494 {
1495         dls_dev_handle_t        ddh = NULL;
1496         int                     err = 0;
1497         dev_t                   phydev = 0;
1498         dls_devnet_t            *ddp;
1499         mac_perim_handle_t      mph = NULL;
1500         mac_handle_t            mh;
1501         mod_hash_val_t          val;
1502 
1503         /*
1504          * In the second case, id2 must be a REMOVED physical link.
1505          */
1506         if ((id2 != DATALINK_INVALID_LINKID) &&
1507             (dls_mgmt_get_phydev(id2, &phydev) == 0) &&
1508             softmac_hold_device(phydev, &ddh) == 0) {
1509                 softmac_rele_device(ddh);
1510                 return (EEXIST);
1511         }
1512 
1513         /*
1514          * Hold id1 to prevent it from being detached (if a physical link).
1515          */
1516         if (dls_mgmt_get_phydev(id1, &phydev) == 0)
1517                 (void) softmac_hold_device(phydev, &ddh);
1518 
1519         /*
1520          * The framework does not hold hold locks across calls to the
1521          * mac perimeter, hence enter the perimeter first. This also waits
1522          * for the property loading to finish.
1523          */
1524         if ((err = mac_perim_enter_by_linkid(id1, &mph)) != 0) {
1525                 softmac_rele_device(ddh);
1526                 return (err);
1527         }
1528 
1529         rw_enter(&i_dls_devnet_lock, RW_WRITER);
1530         if ((err = mod_hash_find(i_dls_devnet_id_hash,
1531             (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) {
1532                 ASSERT(err == MH_ERR_NOTFOUND);
1533                 err = ENOENT;
1534                 goto done;
1535         }
1536 
1537         mutex_enter(&ddp->dd_mutex);
1538         if (!zoneinit) {
1539                 if (ddp->dd_ref > 1) {
1540                         mutex_exit(&ddp->dd_mutex);
1541                         err = EBUSY;
1542                         goto done;
1543                 }
1544         }
1545         mutex_exit(&ddp->dd_mutex);
1546 
1547         if (id2 == DATALINK_INVALID_LINKID) {
1548                 (void) strlcpy(ddp->dd_linkname, link,
1549                     sizeof (ddp->dd_linkname));
1550 
1551                 /* rename mac client name and its flow if exists */
1552                 if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1553                         goto done;
1554                 if (zoneinit) {
1555                         char tname[MAXLINKNAMELEN];
1556 
1557                         (void) snprintf(tname, sizeof (tname), "z%d_%s",
1558                             ddp->dd_zid, link);
1559                         (void) mac_rename_primary(mh, tname);
1560                 } else {
1561                         (void) mac_rename_primary(mh, link);
1562                 }
1563                 mac_close(mh);
1564                 goto done;
1565         }
1566 
1567         /*
1568          * The second case, check whether the MAC is used by any MAC
1569          * user.  This must be a physical link so ddh must not be NULL.
1570          */
1571         if (ddh == NULL) {
1572                 err = EINVAL;
1573                 goto done;
1574         }
1575 
1576         if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1577                 goto done;
1578 
1579         /*
1580          * We release the reference of the MAC which mac_open() is
1581          * holding. Note that this mac will not be unregistered
1582          * because the physical device is held.
1583          */
1584         mac_close(mh);
1585 
1586         /*
1587          * Check if there is any other MAC clients, if not, hold this mac
1588          * exclusively until we are done.
1589          */
1590         if ((err = mac_mark_exclusive(mh)) != 0)
1591                 goto done;
1592 
1593         /*
1594          * Update the link's linkid.
1595          */
1596         if ((err = mod_hash_find(i_dls_devnet_id_hash,
1597             (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) {
1598                 mac_unmark_exclusive(mh);
1599                 err = EEXIST;
1600                 goto done;
1601         }
1602 
1603         err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL);
1604         if (err != 0) {
1605                 mac_unmark_exclusive(mh);
1606                 goto done;
1607         }
1608 
1609         (void) mod_hash_remove(i_dls_devnet_id_hash,
1610             (mod_hash_key_t)(uintptr_t)id1, &val);
1611 
1612         ddp->dd_linkid = id2;
1613         (void) mod_hash_insert(i_dls_devnet_id_hash,
1614             (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, (mod_hash_val_t)ddp);
1615 
1616         mac_unmark_exclusive(mh);
1617 
1618         /* load properties for new id */
1619         mutex_enter(&ddp->dd_mutex);
1620         ddp->dd_prop_loaded = B_FALSE;
1621         ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1622             dls_devnet_prop_task, ddp, TQ_SLEEP);
1623         mutex_exit(&ddp->dd_mutex);
1624 
1625 done:
1626         rw_exit(&i_dls_devnet_lock);
1627 
1628         if (err == 0)
1629                 dls_devnet_stat_rename(ddp, zoneinit);
1630 
1631         if (mph != NULL)
1632                 mac_perim_exit(mph);
1633         softmac_rele_device(ddh);
1634         return (err);
1635 }
1636 
1637 static int
1638 i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop,
1639     boolean_t transient)
1640 {
1641         int                     err;
1642         mac_perim_handle_t      mph;
1643         boolean_t               upcall_done = B_FALSE;
1644         datalink_id_t           linkid = ddp->dd_linkid;
1645         zoneid_t                old_zoneid = ddp->dd_zid;
1646         dlmgmt_door_setzoneid_t setzid;
1647         dlmgmt_setzoneid_retval_t retval;
1648 
1649         if (old_zoneid == new_zoneid)
1650                 return (0);
1651 
1652         if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0)
1653                 return (err);
1654 
1655         /*
1656          * When changing the zoneid of an existing link, we need to tell
1657          * dlmgmtd about it.  dlmgmtd already knows the zoneid associated with
1658          * newly created links.
1659          */
1660         if (setprop) {
1661                 setzid.ld_cmd = DLMGMT_CMD_SETZONEID;
1662                 setzid.ld_linkid = linkid;
1663                 setzid.ld_zoneid = new_zoneid;
1664                 err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1665                     sizeof (retval));
1666                 if (err != 0)
1667                         goto done;
1668 
1669                 /*
1670                  * We set upcall_done only if the upcall is
1671                  * successful. This way, if dls_link_setzid() fails,
1672                  * we know another upcall must be done to reset the
1673                  * dlmgmtd state.
1674                  */
1675                 upcall_done = B_TRUE;
1676         }
1677         if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
1678                 ddp->dd_zid = new_zoneid;
1679                 ddp->dd_transient = transient;
1680                 devnet_need_rebuild = B_TRUE;
1681         }
1682 
1683 done:
1684         if (err != 0 && upcall_done) {
1685                 setzid.ld_zoneid = old_zoneid;
1686                 (void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1687                     sizeof (retval));
1688         }
1689         mac_perim_exit(mph);
1690         return (err);
1691 }
1692 
1693 int
1694 dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid, boolean_t transient)
1695 {
1696         dls_devnet_t    *ddp;
1697         int             err;
1698         zoneid_t        old_zid;
1699         boolean_t       refheld = B_FALSE;
1700 
1701         old_zid = ddh->dd_zid;
1702 
1703         if (old_zid == new_zid)
1704                 return (0);
1705 
1706         /*
1707          * Acquire an additional reference to the link if it is being assigned
1708          * to a non-global zone from the global zone.
1709          */
1710         if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) {
1711                 if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
1712                         return (err);
1713                 refheld = B_TRUE;
1714         }
1715 
1716         if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, transient)) != 0) {
1717                 if (refheld)
1718                         dls_devnet_rele(ddp);
1719                 return (err);
1720         }
1721 
1722         /*
1723          * Release the additional reference if the link is returning to the
1724          * global zone from a non-global zone.
1725          */
1726         if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID)
1727                 dls_devnet_rele(ddh);
1728 
1729         /* Re-create kstats in the appropriate zones. */
1730         if (old_zid != GLOBAL_ZONEID)
1731                 dls_devnet_stat_destroy(ddh, old_zid);
1732         if (new_zid != GLOBAL_ZONEID)
1733                 dls_devnet_stat_create(ddh, new_zid, new_zid);
1734 
1735         return (0);
1736 }
1737 
1738 zoneid_t
1739 dls_devnet_getzid(dls_dl_handle_t ddh)
1740 {
1741         return (((dls_devnet_t *)ddh)->dd_zid);
1742 }
1743 
1744 zoneid_t
1745 dls_devnet_getownerzid(dls_dl_handle_t ddh)
1746 {
1747         return (((dls_devnet_t *)ddh)->dd_owner_zid);
1748 }
1749 
1750 /*
1751  * Is linkid visible from zoneid?  A link is visible if it was created in the
1752  * zone, or if it is currently assigned to the zone.
1753  */
1754 boolean_t
1755 dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
1756 {
1757         dls_devnet_t    *ddp;
1758         boolean_t       result;
1759 
1760         if (dls_devnet_hold_tmp(linkid, &ddp) != 0)
1761                 return (B_FALSE);
1762         result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid);
1763         dls_devnet_rele_tmp(ddp);
1764         return (result);
1765 }
1766 
1767 /*
1768  * Access a vanity naming node.
1769  */
1770 int
1771 dls_devnet_open_in_zone(const char *link, dls_dl_handle_t *dhp, dev_t *devp,
1772     zoneid_t zid)
1773 {
1774         dls_devnet_t    *ddp;
1775         dls_link_t      *dlp;
1776         zoneid_t        czid = getzoneid();
1777         int             err;
1778         mac_perim_handle_t      mph;
1779 
1780         if (czid != GLOBAL_ZONEID && czid != zid)
1781                 return (ENOENT);
1782 
1783         if ((err = dls_devnet_hold_by_name(link, &ddp, zid)) != 0)
1784                 return (err);
1785 
1786         dls_devnet_prop_task_wait(ddp);
1787 
1788         /*
1789          * Opening a link that does not belong to the current non-global zone
1790          * is not allowed.
1791          */
1792         if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) {
1793                 dls_devnet_rele(ddp);
1794                 return (ENOENT);
1795         }
1796 
1797         err = mac_perim_enter_by_macname(ddp->dd_mac, &mph);
1798         if (err != 0) {
1799                 dls_devnet_rele(ddp);
1800                 return (err);
1801         }
1802 
1803         err = dls_link_hold_create(ddp->dd_mac, &dlp);
1804         mac_perim_exit(mph);
1805 
1806         if (err != 0) {
1807                 dls_devnet_rele(ddp);
1808                 return (err);
1809         }
1810 
1811         *dhp = ddp;
1812         *devp = dls_link_dev(dlp);
1813         return (0);
1814 }
1815 
1816 int
1817 dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
1818 {
1819         return (dls_devnet_open_in_zone(link, dhp, devp, getzoneid()));
1820 }
1821 
1822 /*
1823  * Close access to a vanity naming node.
1824  */
1825 void
1826 dls_devnet_close(dls_dl_handle_t dlh)
1827 {
1828         dls_devnet_t    *ddp = dlh;
1829         dls_link_t      *dlp;
1830         mac_perim_handle_t      mph;
1831 
1832         VERIFY(mac_perim_enter_by_macname(ddp->dd_mac, &mph) == 0);
1833         VERIFY(dls_link_hold(ddp->dd_mac, &dlp) == 0);
1834 
1835         /*
1836          * One rele for the hold placed in dls_devnet_open, another for
1837          * the hold done just above
1838          */
1839         dls_link_rele(dlp);
1840         dls_link_rele(dlp);
1841         mac_perim_exit(mph);
1842 
1843         dls_devnet_rele(ddp);
1844 }
1845 
1846 /*
1847  * This is used by /dev/net to rebuild the nodes for readdir().  It is not
1848  * critical and no protection is needed.
1849  */
1850 boolean_t
1851 dls_devnet_rebuild()
1852 {
1853         boolean_t updated = devnet_need_rebuild;
1854 
1855         devnet_need_rebuild = B_FALSE;
1856         return (updated);
1857 }
1858 
1859 int
1860 dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid)
1861 {
1862         dls_link_t      *dlp;
1863         dls_devnet_t    *ddp;
1864         int             err;
1865         mac_perim_handle_t mph;
1866 
1867         /*
1868          * Holding the mac perimeter ensures that the downcall from the
1869          * dlmgmt daemon which does the property loading does not proceed
1870          * until we relinquish the perimeter.
1871          */
1872         mac_perim_enter_by_mh(mh, &mph);
1873         /*
1874          * Make this association before we call dls_link_hold_create as
1875          * we need to use the linkid to get the user name for the link
1876          * when we create the MAC client.
1877          */
1878         if ((err = dls_devnet_set(mh, linkid, zoneid, &ddp)) == 0) {
1879                 if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) {
1880                         mac_perim_exit(mph);
1881                         (void) dls_devnet_unset(mh, &linkid, B_FALSE);
1882                         return (err);
1883                 }
1884 
1885                 /*
1886                  * If dd_linkid is set then the link was successfully
1887                  * initialized. In this case we can remove the
1888                  * initializing flag and make the link visible to the
1889                  * rest of the system.
1890                  *
1891                  * If not set then we were called by softmac and it
1892                  * was unable to obtain a linkid for the physical link
1893                  * because dlmgmtd is down. In that case softmac will
1894                  * eventually obtain a linkid and call
1895                  * dls_devnet_recreate() to complete initialization.
1896                  */
1897                 mutex_enter(&ddp->dd_mutex);
1898                 if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1899                         ddp->dd_flags &= ~DD_INITIALIZING;
1900                 mutex_exit(&ddp->dd_mutex);
1901 
1902         }
1903 
1904         mac_perim_exit(mph);
1905         return (err);
1906 }
1907 
1908 /*
1909  * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash.
1910  * This is called in the case that the dlmgmtd daemon is started later than
1911  * the physical devices get attached, and the linkid is only known after the
1912  * daemon starts.
1913  */
1914 int
1915 dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid)
1916 {
1917         dls_devnet_t    *ddp;
1918         int             err;
1919 
1920         VERIFY(linkid != DATALINK_INVALID_LINKID);
1921         if ((err = dls_devnet_set(mh, linkid, GLOBAL_ZONEID, &ddp)) == 0) {
1922                 mutex_enter(&ddp->dd_mutex);
1923                 if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1924                         ddp->dd_flags &= ~DD_INITIALIZING;
1925                 mutex_exit(&ddp->dd_mutex);
1926         }
1927 
1928         return (err);
1929 
1930 }
1931 
1932 int
1933 dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait)
1934 {
1935         int                     err;
1936         mac_perim_handle_t      mph;
1937 
1938         *idp = DATALINK_INVALID_LINKID;
1939         err = dls_devnet_unset(mh, idp, wait);
1940 
1941         /*
1942          * We continue on in the face of ENOENT because the devnet
1943          * unset and DLS link release are not atomic and we may have a
1944          * scenario where there is no entry in i_dls_devnet_hash for
1945          * the MAC name but there is an entry in i_dls_link_hash. For
1946          * example, if the following occurred:
1947          *
1948          * 1. dls_devnet_unset() returns success, and
1949          *
1950          * 2. dls_link_rele_by_name() fails with ENOTEMPTY because
1951          *    flows still exist, and
1952          *
1953          * 3. dls_devnet_set() fails to set the zone id and calls
1954          *    dls_devnet_unset() -- leaving an entry in
1955          *    i_dls_link_hash but no corresponding entry in
1956          *    i_dls_devnet_hash.
1957          *
1958          * Even if #3 wasn't true the dls_devnet_set() may fail for
1959          * different reasons in the future; the point is that it _can_
1960          * fail as part of its contract. We can't rely on it working
1961          * so we must assume that these two pieces of state (devnet
1962          * and link hashes), which should always be in sync, can get
1963          * out of sync and thus even if we get ENOENT from the devnet
1964          * hash we should still try to delete from the link hash just
1965          * in case.
1966          *
1967          * We could prevent the ENOTEMPTY from dls_link_rele_by_name()
1968          * by calling mac_disable() before calling
1969          * dls_devnet_destroy() but that's not currently possible due
1970          * to a long-standing bug. OpenSolaris 6791335: The semantics
1971          * of mac_disable() were modified by Crossbow such that
1972          * dls_devnet_destroy() needs to be called before
1973          * mac_disable() can succeed. This is because of the implicit
1974          * reference that dls has on the mac_impl_t.
1975          */
1976         if (err != 0 && err != ENOENT)
1977                 return (err);
1978 
1979         mac_perim_enter_by_mh(mh, &mph);
1980         err = dls_link_rele_by_name(mac_name(mh));
1981         if (err != 0) {
1982                 dls_devnet_t    *ddp;
1983 
1984                 /*
1985                  * XXX It is a general GLDv3 bug that dls_devnet_set() has to
1986                  * be called to re-set the link when destroy fails.  The
1987                  * zoneid below will be incorrect if this function is ever
1988                  * called from kernel context or from a zone other than that
1989                  * which initially created the link.
1990                  */
1991                 (void) dls_devnet_set(mh, *idp, crgetzoneid(CRED()), &ddp);
1992 
1993                 /*
1994                  * You might think dd_linkid should always be set
1995                  * here, but in the case where dls_devnet_unset()
1996                  * returns ENOENT it will be DATALINK_INVALID_LINKID.
1997                  * Stay consistent with the rest of DLS and only
1998                  * remove the initializing flag if linkid is set.
1999                  */
2000                 mutex_enter(&ddp->dd_mutex);
2001                 if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
2002                         ddp->dd_flags &= ~DD_INITIALIZING;
2003                 mutex_exit(&ddp->dd_mutex);
2004         }
2005 
2006         mac_perim_exit(mph);
2007         return (err);
2008 }
2009 
2010 /*
2011  * Implicitly create an IP tunnel link.
2012  */
2013 static int
2014 i_dls_devnet_create_iptun(const char *linkname, const char *drvname,
2015     datalink_id_t *linkid)
2016 {
2017         int             err;
2018         iptun_kparams_t ik;
2019         uint32_t        media;
2020         netstack_t      *ns;
2021         major_t         iptun_major;
2022         dev_info_t      *iptun_dip;
2023 
2024         /* First ensure that the iptun device is attached. */
2025         if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1)
2026                 return (EINVAL);
2027         if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL)
2028                 return (EINVAL);
2029 
2030         if (IS_IPV4_TUN(drvname)) {
2031                 ik.iptun_kparam_type = IPTUN_TYPE_IPV4;
2032                 media = DL_IPV4;
2033         } else if (IS_6TO4_TUN(drvname)) {
2034                 ik.iptun_kparam_type = IPTUN_TYPE_6TO4;
2035                 media = DL_6TO4;
2036         } else if (IS_IPV6_TUN(drvname)) {
2037                 ik.iptun_kparam_type = IPTUN_TYPE_IPV6;
2038                 media = DL_IPV6;
2039         }
2040         ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT);
2041 
2042         /* Obtain a datalink id for this tunnel. */
2043         err = dls_mgmt_create((char *)linkname, 0, DATALINK_CLASS_IPTUN, media,
2044             B_FALSE, &ik.iptun_kparam_linkid);
2045         if (err != 0) {
2046                 ddi_release_devi(iptun_dip);
2047                 return (err);
2048         }
2049 
2050         ns = netstack_get_current();
2051         err = iptun_create(&ik, CRED());
2052         netstack_rele(ns);
2053 
2054         if (err != 0)
2055                 VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0);
2056         else
2057                 *linkid = ik.iptun_kparam_linkid;
2058 
2059         ddi_release_devi(iptun_dip);
2060         return (err);
2061 }
2062 
2063 static int
2064 i_dls_devnet_destroy_iptun(datalink_id_t linkid)
2065 {
2066         int err;
2067 
2068         /*
2069          * Note the use of zone_kcred() here as opposed to CRED().  This is
2070          * because the process that does the last close of this /dev/net node
2071          * may not have necessary privileges to delete this IP tunnel, but the
2072          * tunnel must always be implicitly deleted on last close.
2073          */
2074         if ((err = iptun_delete(linkid, zone_kcred())) == 0)
2075                 (void) dls_mgmt_destroy(linkid, B_FALSE);
2076         return (err);
2077 }
2078 
2079 const char *
2080 dls_devnet_link(dls_dl_handle_t ddh)
2081 {
2082         return (ddh->dd_linkname);
2083 }
2084 
2085 const char *
2086 dls_devnet_mac(dls_dl_handle_t ddh)
2087 {
2088         return (ddh->dd_mac);
2089 }
2090 
2091 datalink_id_t
2092 dls_devnet_linkid(dls_dl_handle_t ddh)
2093 {
2094         return (ddh->dd_linkid);
2095 }