Print this page
OS-1885 deadlock between vnic destroy and kstat read
OS-676 debug kernel blew assertion in dls_devnet_stat_create()
OS-428 add link zonename kstat
OS-406
OS-327
OS-276 global zone duplicate kstat when two zones have same vnic name
OS-249
@@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2013 Joyent, Inc. All rights reserved.
*/
/*
* Datalink management routines.
*/
@@ -103,16 +104,17 @@
uint_t dd_flags;
zoneid_t dd_owner_zid; /* zone where node was created */
zoneid_t dd_zid; /* current zone */
boolean_t dd_prop_loaded;
taskqid_t dd_prop_taskid;
+ boolean_t dd_transient; /* link goes away when zone does */
} dls_devnet_t;
static int i_dls_devnet_create_iptun(const char *, const char *,
datalink_id_t *);
static int i_dls_devnet_destroy_iptun(datalink_id_t);
-static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t);
+static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t);
static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
/*ARGSUSED*/
static int
i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
@@ -143,11 +145,16 @@
dls_zone_remove(datalink_id_t linkid, void *arg)
{
dls_devnet_t *ddp;
if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
- (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
+ /*
+ * Don't bother moving transient links back to the global zone
+ * since we will simply delete them in dls_devnet_unset.
+ */
+ if (!ddp->dd_transient)
+ (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
dls_devnet_rele_tmp(ddp);
}
return (0);
}
@@ -524,10 +531,11 @@
dlmgmt_getlinkid_retval_t retval;
int err;
getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+ getlinkid.ld_zoneid = getzoneid();
if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
sizeof (retval))) == 0) {
*linkid = retval.lr_linkid;
}
@@ -738,16 +746,27 @@
/*
* Create the "link" kstats.
*/
static void
-dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
+dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid, zoneid_t newzoneid)
{
kstat_t *ksp;
+ char *nm;
+ char kname[MAXLINKNAMELEN];
- if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
- dls_devnet_stat_update, ddp, &ksp) == 0) {
+ if (zoneid != newzoneid) {
+ ASSERT(zoneid == GLOBAL_ZONEID);
+ (void) snprintf(kname, sizeof (kname), "z%d_%s", newzoneid,
+ ddp->dd_linkname);
+ nm = kname;
+ } else {
+ nm = ddp->dd_linkname;
+ }
+
+ if (dls_stat_create("link", 0, nm, zoneid,
+ dls_devnet_stat_update, ddp, &ksp, newzoneid) == 0) {
ASSERT(ksp != NULL);
if (zoneid == ddp->dd_owner_zid) {
ASSERT(ddp->dd_ksp == NULL);
ddp->dd_ksp = ksp;
} else {
@@ -763,16 +782,16 @@
static void
dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
{
if (zoneid == ddp->dd_owner_zid) {
if (ddp->dd_ksp != NULL) {
- kstat_delete(ddp->dd_ksp);
+ dls_stat_delete(ddp->dd_ksp);
ddp->dd_ksp = NULL;
}
} else {
if (ddp->dd_zone_ksp != NULL) {
- kstat_delete(ddp->dd_zone_ksp);
+ dls_stat_delete(ddp->dd_zone_ksp);
ddp->dd_zone_ksp = NULL;
}
}
}
@@ -779,19 +798,29 @@
/*
* The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
* and create the new set using the new name.
*/
static void
-dls_devnet_stat_rename(dls_devnet_t *ddp)
+dls_devnet_stat_rename(dls_devnet_t *ddp, boolean_t zoneinit)
{
if (ddp->dd_ksp != NULL) {
- kstat_delete(ddp->dd_ksp);
+ dls_stat_delete(ddp->dd_ksp);
ddp->dd_ksp = NULL;
}
- /* We can't rename a link while it's assigned to a non-global zone. */
+ if (zoneinit && ddp->dd_zone_ksp != NULL) {
+ dls_stat_delete(ddp->dd_zone_ksp);
+ ddp->dd_zone_ksp = NULL;
+ }
+ /*
+ * We can't rename a link while it's assigned to a non-global zone
+ * unless we're first initializing the zone while readying it.
+ */
ASSERT(ddp->dd_zone_ksp == NULL);
- dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
+ dls_devnet_stat_create(ddp, ddp->dd_owner_zid,
+ (zoneinit ? ddp->dd_zid : ddp->dd_owner_zid));
+ if (zoneinit)
+ dls_devnet_stat_create(ddp, ddp->dd_zid, ddp->dd_zid);
}
/*
* Associate a linkid with a given link (identified by macname)
*/
@@ -876,20 +905,21 @@
* and ensure that *ddp is valid.
*/
rw_exit(&i_dls_devnet_lock);
if (err == 0) {
if (zoneid != GLOBAL_ZONEID &&
- (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0)
+ (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE,
+ B_FALSE)) != 0)
(void) dls_devnet_unset(macname, &linkid, B_TRUE);
/*
* The kstat subsystem holds its own locks (rather perimeter)
* before calling the ks_update (dls_devnet_stat_update) entry
* point which in turn grabs the i_dls_devnet_lock. So the
* lock hierarchy is kstat locks -> i_dls_devnet_lock.
*/
if (stat_create)
- dls_devnet_stat_create(ddp, zoneid);
+ dls_devnet_stat_create(ddp, zoneid, zoneid);
if (ddpp != NULL)
*ddpp = ddp;
}
return (err);
}
@@ -922,21 +952,82 @@
* property loading as part of the post attach hasn't yet completed.
*/
ASSERT(ddp->dd_ref != 0);
if ((ddp->dd_ref != 1) || (!wait &&
(ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
+ int zstatus = 0;
+
+ /*
+ * There are a couple of alternatives that might be going on
+ * here; a) the zone is shutting down and it has a transient
+ * link assigned, in which case we want to clean it up instead
+ * of moving it back to the global zone, or b) its possible
+ * that we're trying to clean up an orphaned vnic that was
+ * delegated to a zone and which wasn't cleaned up properly
+ * when the zone went away. Check for either of these cases
+ * before we simply return EBUSY.
+ *
+ * zstatus indicates which situation we are dealing with:
+ * 0 - means return EBUSY
+ * 1 - means case (a), cleanup transient link
+ * -1 - means case (b), orphained VNIC
+ */
+ if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
+ zone_t *zp;
+
+ if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
+ zstatus = -1;
+ } else {
+ if (ddp->dd_transient) {
+ zone_status_t s = zone_status_get(zp);
+
+ if (s >= ZONE_IS_SHUTTING_DOWN)
+ zstatus = 1;
+ }
+ zone_rele(zp);
+ }
+ }
+
+ if (zstatus == 0) {
mutex_exit(&ddp->dd_mutex);
rw_exit(&i_dls_devnet_lock);
return (EBUSY);
}
+ /*
+ * We want to delete the link, reset ref to 1;
+ */
+ if (zstatus == -1)
+ /* Log a warning, but continue in this case */
+ cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
+ ddp->dd_linkname);
+ ddp->dd_ref = 1;
+ }
+
ddp->dd_flags |= DD_CONDEMNED;
ddp->dd_ref--;
*id = ddp->dd_linkid;
- if (ddp->dd_zid != GLOBAL_ZONEID)
- (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
+ if (ddp->dd_zid != GLOBAL_ZONEID) {
+ /*
+ * We need to release the dd_mutex before we try and destroy the
+ * stat. When we destroy it, we'll need to grab the lock for the
+ * kstat but if there's a concurrent reader of the kstat, we'll
+ * be blocked on it. This will lead to deadlock because these
+ * kstats employ a ks_update function (dls_devnet_stat_update)
+ * which needs the dd_mutex that we currently hold.
+ *
+ * Because we've already flagged the dls_devnet_t as
+ * DD_CONDEMNED and we still have a write lock on
+ * i_dls_devnet_lock, we should be able to release the dd_mutex.
+ */
+ mutex_exit(&ddp->dd_mutex);
+ dls_devnet_stat_destroy(ddp, ddp->dd_zid);
+ mutex_enter(&ddp->dd_mutex);
+ (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
+ B_FALSE);
+ }
/*
* Remove this dls_devnet_t from the hash table.
*/
VERIFY(mod_hash_remove(i_dls_devnet_hash,
@@ -958,13 +1049,20 @@
cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
} else {
ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
}
- if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
+ if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
+ /*
+ * See the earlier call in this function for an explanation.
+ */
+ mutex_exit(&ddp->dd_mutex);
dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
+ mutex_enter(&ddp->dd_mutex);
+ }
+
ddp->dd_prop_loaded = B_FALSE;
ddp->dd_linkid = DATALINK_INVALID_LINKID;
ddp->dd_flags = 0;
mutex_exit(&ddp->dd_mutex);
kmem_cache_free(i_dls_devnet_cachep, ddp);
@@ -1259,13 +1357,19 @@
* physical link (id2). In this case, check that id1 and its associated
* mac is not held by any application, and update the link's linkid to id2.
*
* This case does not change the <link name, linkid> mapping, so the link's
* kstats need to be updated with using name associated the given id2.
+ *
+ * The zonename parameter is used to allow us to create a VNIC in the global
+ * zone which is assigned to a non-global zone. Since there is a race condition
+ * in the create process if two VNICs have the same name, we need to rename it
+ * after it has been assigned to the zone.
*/
int
-dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
+dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link,
+ boolean_t zoneinit)
{
dls_dev_handle_t ddh = NULL;
int err = 0;
dev_t phydev = 0;
dls_devnet_t *ddp;
@@ -1311,18 +1415,21 @@
/*
* Return EBUSY if any applications have this link open, if any thread
* is currently accessing the link kstats, or if the link is on-loan
* to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
* prevent any access to the kstats while we delete and recreate
- * kstats below.
+ * kstats below. However, we skip this check if we're renaming the
+ * vnic as part of bringing it up for a zone.
*/
mutex_enter(&ddp->dd_mutex);
+ if (!zoneinit) {
if (ddp->dd_ref > 1) {
mutex_exit(&ddp->dd_mutex);
err = EBUSY;
goto done;
}
+ }
ddp->dd_flags |= DD_KSTAT_CHANGING;
clear_dd_flag = B_TRUE;
mutex_exit(&ddp->dd_mutex);
@@ -1331,11 +1438,19 @@
sizeof (ddp->dd_linkname));
/* rename mac client name and its flow if exists */
if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
goto done;
+ if (zoneinit) {
+ char tname[MAXLINKNAMELEN];
+
+ (void) snprintf(tname, sizeof (tname), "z%d_%s",
+ ddp->dd_zid, link);
+ (void) mac_rename_primary(mh, tname);
+ } else {
(void) mac_rename_primary(mh, link);
+ }
mac_close(mh);
goto done;
}
/*
@@ -1404,11 +1519,11 @@
* function prevents any access to the dd_ksp while we delete and
* recreate it below.
*/
rw_exit(&i_dls_devnet_lock);
if (err == 0)
- dls_devnet_stat_rename(ddp);
+ dls_devnet_stat_rename(ddp, zoneinit);
if (clear_dd_flag) {
mutex_enter(&ddp->dd_mutex);
ddp->dd_flags &= ~DD_KSTAT_CHANGING;
mutex_exit(&ddp->dd_mutex);
@@ -1419,11 +1534,12 @@
softmac_rele_device(ddh);
return (err);
}
static int
-i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
+i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop,
+ boolean_t transient)
{
int err;
mac_perim_handle_t mph;
boolean_t upcall_done = B_FALSE;
datalink_id_t linkid = ddp->dd_linkid;
@@ -1452,10 +1568,11 @@
goto done;
upcall_done = B_TRUE;
}
if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
ddp->dd_zid = new_zoneid;
+ ddp->dd_transient = transient;
devnet_need_rebuild = B_TRUE;
}
done:
if (err != 0 && upcall_done) {
@@ -1466,11 +1583,11 @@
mac_perim_exit(mph);
return (err);
}
int
-dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
+dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid, boolean_t transient)
{
dls_devnet_t *ddp;
int err;
zoneid_t old_zid;
boolean_t refheld = B_FALSE;
@@ -1488,11 +1605,11 @@
if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
return (err);
refheld = B_TRUE;
}
- if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) {
+ if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, transient)) != 0) {
if (refheld)
dls_devnet_rele(ddp);
return (err);
}
@@ -1505,11 +1622,11 @@
/* Re-create kstats in the appropriate zones. */
if (old_zid != GLOBAL_ZONEID)
dls_devnet_stat_destroy(ddh, old_zid);
if (new_zid != GLOBAL_ZONEID)
- dls_devnet_stat_create(ddh, new_zid);
+ dls_devnet_stat_create(ddh, new_zid, new_zid);
return (0);
}
zoneid_t