Print this page
OS-1885 deadlock between vnic destroy and kstat read
OS-676 debug kernel blew assertion in dls_devnet_stat_create()
OS-428 add link zonename kstat
OS-406
OS-327
OS-276 global zone duplicate kstat when two zones have same vnic name
OS-249

@@ -19,10 +19,11 @@
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ * Copyright (c) 2013 Joyent, Inc.  All rights reserved.
  */
 
 /*
  * Datalink management routines.
  */

@@ -103,16 +104,17 @@
         uint_t          dd_flags;
         zoneid_t        dd_owner_zid;   /* zone where node was created */
         zoneid_t        dd_zid;         /* current zone */
         boolean_t       dd_prop_loaded;
         taskqid_t       dd_prop_taskid;
+        boolean_t       dd_transient;   /* link goes away when zone does */
 } dls_devnet_t;
 
 static int i_dls_devnet_create_iptun(const char *, const char *,
     datalink_id_t *);
 static int i_dls_devnet_destroy_iptun(datalink_id_t);
-static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t);
+static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t);
 static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
 
 /*ARGSUSED*/
 static int
 i_dls_devnet_constructor(void *buf, void *arg, int kmflag)

@@ -143,11 +145,16 @@
 dls_zone_remove(datalink_id_t linkid, void *arg)
 {
         dls_devnet_t *ddp;
 
         if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
-                (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
+                /*
+                 * Don't bother moving transient links back to the global zone
+                 * since we will simply delete them in dls_devnet_unset.
+                 */
+                if (!ddp->dd_transient)
+                        (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
                 dls_devnet_rele_tmp(ddp);
         }
         return (0);
 }
 

@@ -524,10 +531,11 @@
         dlmgmt_getlinkid_retval_t       retval;
         int                             err;
 
         getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
         (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+        getlinkid.ld_zoneid = getzoneid();
 
         if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
             sizeof (retval))) == 0) {
                 *linkid = retval.lr_linkid;
         }

@@ -738,16 +746,27 @@
 
 /*
  * Create the "link" kstats.
  */
 static void
-dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
+dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid, zoneid_t newzoneid)
 {
         kstat_t *ksp;
+        char    *nm;
+        char    kname[MAXLINKNAMELEN];
 
-        if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
-            dls_devnet_stat_update, ddp, &ksp) == 0) {
+        if (zoneid != newzoneid) {
+                ASSERT(zoneid == GLOBAL_ZONEID);
+                (void) snprintf(kname, sizeof (kname), "z%d_%s", newzoneid,
+                    ddp->dd_linkname);
+                nm = kname;
+        } else {
+                nm = ddp->dd_linkname;
+        }
+
+        if (dls_stat_create("link", 0, nm, zoneid,
+            dls_devnet_stat_update, ddp, &ksp, newzoneid) == 0) {
                 ASSERT(ksp != NULL);
                 if (zoneid == ddp->dd_owner_zid) {
                         ASSERT(ddp->dd_ksp == NULL);
                         ddp->dd_ksp = ksp;
                 } else {

@@ -763,16 +782,16 @@
 static void
 dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
 {
         if (zoneid == ddp->dd_owner_zid) {
                 if (ddp->dd_ksp != NULL) {
-                        kstat_delete(ddp->dd_ksp);
+                        dls_stat_delete(ddp->dd_ksp);
                         ddp->dd_ksp = NULL;
                 }
         } else {
                 if (ddp->dd_zone_ksp != NULL) {
-                        kstat_delete(ddp->dd_zone_ksp);
+                        dls_stat_delete(ddp->dd_zone_ksp);
                         ddp->dd_zone_ksp = NULL;
                 }
         }
 }
 

@@ -779,19 +798,29 @@
 /*
  * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
  * and create the new set using the new name.
  */
 static void
-dls_devnet_stat_rename(dls_devnet_t *ddp)
+dls_devnet_stat_rename(dls_devnet_t *ddp, boolean_t zoneinit)
 {
         if (ddp->dd_ksp != NULL) {
-                kstat_delete(ddp->dd_ksp);
+                dls_stat_delete(ddp->dd_ksp);
                 ddp->dd_ksp = NULL;
         }
-        /* We can't rename a link while it's assigned to a non-global zone. */
+        if (zoneinit && ddp->dd_zone_ksp != NULL) {
+                dls_stat_delete(ddp->dd_zone_ksp);
+                ddp->dd_zone_ksp = NULL;
+        }
+        /*
+         * We can't rename a link while it's assigned to a non-global zone
+         * unless we're first initializing the zone while readying it.
+         */
         ASSERT(ddp->dd_zone_ksp == NULL);
-        dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
+        dls_devnet_stat_create(ddp, ddp->dd_owner_zid,
+            (zoneinit ? ddp->dd_zid : ddp->dd_owner_zid));
+        if (zoneinit)
+                dls_devnet_stat_create(ddp, ddp->dd_zid, ddp->dd_zid);
 }
 
 /*
  * Associate a linkid with a given link (identified by macname)
  */

@@ -876,20 +905,21 @@
          * and ensure that *ddp is valid.
          */
         rw_exit(&i_dls_devnet_lock);
         if (err == 0) {
                 if (zoneid != GLOBAL_ZONEID &&
-                    (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0)
+                    (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE,
+                    B_FALSE)) != 0)
                         (void) dls_devnet_unset(macname, &linkid, B_TRUE);
                 /*
                  * The kstat subsystem holds its own locks (rather perimeter)
                  * before calling the ks_update (dls_devnet_stat_update) entry
                  * point which in turn grabs the i_dls_devnet_lock. So the
                  * lock hierarchy is kstat locks -> i_dls_devnet_lock.
                  */
                 if (stat_create)
-                        dls_devnet_stat_create(ddp, zoneid);
+                        dls_devnet_stat_create(ddp, zoneid, zoneid);
                 if (ddpp != NULL)
                         *ddpp = ddp;
         }
         return (err);
 }

@@ -922,21 +952,82 @@
          * property loading as part of the post attach hasn't yet completed.
          */
         ASSERT(ddp->dd_ref != 0);
         if ((ddp->dd_ref != 1) || (!wait &&
             (ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
+                int zstatus = 0;
+
+                /*
+                 * There are a couple of alternatives that might be going on
+                 * here; a) the zone is shutting down and it has a transient
+                 * link assigned, in which case we want to clean it up instead
+                 * of moving it back to the global zone, or b) its possible
+                 * that we're trying to clean up an orphaned vnic that was
+                 * delegated to a zone and which wasn't cleaned up properly
+                 * when the zone went away.  Check for either of these cases
+                 * before we simply return EBUSY.
+                 *
+                 * zstatus indicates which situation we are dealing with:
+                 *       0 - means return EBUSY
+                 *       1 - means case (a), cleanup transient link
+                 *      -1 - means case (b), orphained VNIC
+                 */
+                if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
+                        zone_t  *zp;
+
+                        if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
+                                zstatus = -1;
+                        } else {
+                                if (ddp->dd_transient) {
+                                        zone_status_t s = zone_status_get(zp);
+
+                                        if (s >= ZONE_IS_SHUTTING_DOWN)
+                                                zstatus = 1;
+                                }
+                                zone_rele(zp);
+                        }
+                }
+
+                if (zstatus == 0) {
                 mutex_exit(&ddp->dd_mutex);
                 rw_exit(&i_dls_devnet_lock);
                 return (EBUSY);
         }
 
+                /*
+                 * We want to delete the link, reset ref to 1;
+                 */
+                if (zstatus == -1)
+                        /* Log a warning, but continue in this case */
+                        cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
+                            ddp->dd_linkname);
+                ddp->dd_ref = 1;
+        }
+
         ddp->dd_flags |= DD_CONDEMNED;
         ddp->dd_ref--;
         *id = ddp->dd_linkid;
 
-        if (ddp->dd_zid != GLOBAL_ZONEID)
-                (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
+        if (ddp->dd_zid != GLOBAL_ZONEID) {
+                /*
+                 * We need to release the dd_mutex before we try and destroy the
+                 * stat. When we destroy it, we'll need to grab the lock for the
+                 * kstat but if there's a concurrent reader of the kstat, we'll
+                 * be blocked on it. This will lead to deadlock because these
+                 * kstats employ a ks_update function (dls_devnet_stat_update)
+                 * which needs the dd_mutex that we currently hold.
+                 *
+                 * Because we've already flagged the dls_devnet_t as
+                 * DD_CONDEMNED and we still have a write lock on
+                 * i_dls_devnet_lock, we should be able to release the dd_mutex.
+                 */
+                mutex_exit(&ddp->dd_mutex);
+                dls_devnet_stat_destroy(ddp, ddp->dd_zid);
+                mutex_enter(&ddp->dd_mutex);
+                (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
+                    B_FALSE);
+        }
 
         /*
          * Remove this dls_devnet_t from the hash table.
          */
         VERIFY(mod_hash_remove(i_dls_devnet_hash,

@@ -958,13 +1049,20 @@
                         cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
         } else {
                 ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
         }
 
-        if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
+        if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
+                /*
+                 * See the earlier call in this function for an explanation.
+                 */
+                mutex_exit(&ddp->dd_mutex);
                 dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
+                mutex_enter(&ddp->dd_mutex);
+        }
 
+
         ddp->dd_prop_loaded = B_FALSE;
         ddp->dd_linkid = DATALINK_INVALID_LINKID;
         ddp->dd_flags = 0;
         mutex_exit(&ddp->dd_mutex);
         kmem_cache_free(i_dls_devnet_cachep, ddp);

@@ -1259,13 +1357,19 @@
  *    physical link (id2). In this case, check that id1 and its associated
  *    mac is not held by any application, and update the link's linkid to id2.
  *
  *    This case does not change the <link name, linkid> mapping, so the link's
  *    kstats need to be updated with using name associated the given id2.
+ *
+ * The zonename parameter is used to allow us to create a VNIC in the global
+ * zone which is assigned to a non-global zone.  Since there is a race condition
+ * in the create process if two VNICs have the same name, we need to rename it
+ * after it has been assigned to the zone.
  */
 int
-dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
+dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link,
+    boolean_t zoneinit)
 {
         dls_dev_handle_t        ddh = NULL;
         int                     err = 0;
         dev_t                   phydev = 0;
         dls_devnet_t            *ddp;

@@ -1311,18 +1415,21 @@
         /*
          * Return EBUSY if any applications have this link open, if any thread
          * is currently accessing the link kstats, or if the link is on-loan
          * to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
          * prevent any access to the kstats while we delete and recreate
-         * kstats below.
+         * kstats below.  However, we skip this check if we're renaming the
+         * vnic as part of bringing it up for a zone.
          */
         mutex_enter(&ddp->dd_mutex);
+        if (!zoneinit) {
         if (ddp->dd_ref > 1) {
                 mutex_exit(&ddp->dd_mutex);
                 err = EBUSY;
                 goto done;
         }
+        }
 
         ddp->dd_flags |= DD_KSTAT_CHANGING;
         clear_dd_flag = B_TRUE;
         mutex_exit(&ddp->dd_mutex);
 

@@ -1331,11 +1438,19 @@
                     sizeof (ddp->dd_linkname));
 
                 /* rename mac client name and its flow if exists */
                 if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
                         goto done;
+                if (zoneinit) {
+                        char tname[MAXLINKNAMELEN];
+
+                        (void) snprintf(tname, sizeof (tname), "z%d_%s",
+                            ddp->dd_zid, link);
+                        (void) mac_rename_primary(mh, tname);
+                } else {
                 (void) mac_rename_primary(mh, link);
+                }
                 mac_close(mh);
                 goto done;
         }
 
         /*

@@ -1404,11 +1519,11 @@
          * function prevents any access to the dd_ksp while we delete and
          * recreate it below.
          */
         rw_exit(&i_dls_devnet_lock);
         if (err == 0)
-                dls_devnet_stat_rename(ddp);
+                dls_devnet_stat_rename(ddp, zoneinit);
 
         if (clear_dd_flag) {
                 mutex_enter(&ddp->dd_mutex);
                 ddp->dd_flags &= ~DD_KSTAT_CHANGING;
                 mutex_exit(&ddp->dd_mutex);

@@ -1419,11 +1534,12 @@
         softmac_rele_device(ddh);
         return (err);
 }
 
 static int
-i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
+i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop,
+    boolean_t transient)
 {
         int                     err;
         mac_perim_handle_t      mph;
         boolean_t               upcall_done = B_FALSE;
         datalink_id_t           linkid = ddp->dd_linkid;

@@ -1452,10 +1568,11 @@
                         goto done;
                 upcall_done = B_TRUE;
         }
         if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
                 ddp->dd_zid = new_zoneid;
+                ddp->dd_transient = transient;
                 devnet_need_rebuild = B_TRUE;
         }
 
 done:
         if (err != 0 && upcall_done) {

@@ -1466,11 +1583,11 @@
         mac_perim_exit(mph);
         return (err);
 }
 
 int
-dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
+dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid, boolean_t transient)
 {
         dls_devnet_t    *ddp;
         int             err;
         zoneid_t        old_zid;
         boolean_t       refheld = B_FALSE;

@@ -1488,11 +1605,11 @@
                 if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
                         return (err);
                 refheld = B_TRUE;
         }
 
-        if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) {
+        if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, transient)) != 0) {
                 if (refheld)
                         dls_devnet_rele(ddp);
                 return (err);
         }
 

@@ -1505,11 +1622,11 @@
 
         /* Re-create kstats in the appropriate zones. */
         if (old_zid != GLOBAL_ZONEID)
                 dls_devnet_stat_destroy(ddh, old_zid);
         if (new_zid != GLOBAL_ZONEID)
-                dls_devnet_stat_create(ddh, new_zid);
+                dls_devnet_stat_create(ddh, new_zid, new_zid);
 
         return (0);
 }
 
 zoneid_t