Print this page
OS-1885 deadlock between vnic destroy and kstat read
OS-676 debug kernel blew assertion in dls_devnet_stat_create()
OS-428 add link zonename kstat
OS-406
OS-327
OS-276 global zone duplicate kstat when two zones have same vnic name
OS-249
*** 19,28 ****
--- 19,29 ----
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2013 Joyent, Inc. All rights reserved.
*/
/*
* Datalink management routines.
*/
*** 103,118 ****
uint_t dd_flags;
zoneid_t dd_owner_zid; /* zone where node was created */
zoneid_t dd_zid; /* current zone */
boolean_t dd_prop_loaded;
taskqid_t dd_prop_taskid;
} dls_devnet_t;
static int i_dls_devnet_create_iptun(const char *, const char *,
datalink_id_t *);
static int i_dls_devnet_destroy_iptun(datalink_id_t);
! static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t);
static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
/*ARGSUSED*/
static int
i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
--- 104,120 ----
uint_t dd_flags;
zoneid_t dd_owner_zid; /* zone where node was created */
zoneid_t dd_zid; /* current zone */
boolean_t dd_prop_loaded;
taskqid_t dd_prop_taskid;
+ boolean_t dd_transient; /* link goes away when zone does */
} dls_devnet_t;
static int i_dls_devnet_create_iptun(const char *, const char *,
datalink_id_t *);
static int i_dls_devnet_destroy_iptun(datalink_id_t);
! static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t);
static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
/*ARGSUSED*/
static int
i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
*** 143,153 ****
dls_zone_remove(datalink_id_t linkid, void *arg)
{
dls_devnet_t *ddp;
if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
! (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
dls_devnet_rele_tmp(ddp);
}
return (0);
}
--- 145,160 ----
dls_zone_remove(datalink_id_t linkid, void *arg)
{
dls_devnet_t *ddp;
if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
! /*
! * Don't bother moving transient links back to the global zone
! * since we will simply delete them in dls_devnet_unset.
! */
! if (!ddp->dd_transient)
! (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
dls_devnet_rele_tmp(ddp);
}
return (0);
}
*** 524,533 ****
--- 531,541 ----
dlmgmt_getlinkid_retval_t retval;
int err;
getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+ getlinkid.ld_zoneid = getzoneid();
if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
sizeof (retval))) == 0) {
*linkid = retval.lr_linkid;
}
*** 738,753 ****
/*
* Create the "link" kstats.
*/
static void
! dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
{
kstat_t *ksp;
! if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
! dls_devnet_stat_update, ddp, &ksp) == 0) {
ASSERT(ksp != NULL);
if (zoneid == ddp->dd_owner_zid) {
ASSERT(ddp->dd_ksp == NULL);
ddp->dd_ksp = ksp;
} else {
--- 746,772 ----
/*
* Create the "link" kstats.
*/
static void
! dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid, zoneid_t newzoneid)
{
kstat_t *ksp;
+ char *nm;
+ char kname[MAXLINKNAMELEN];
! if (zoneid != newzoneid) {
! ASSERT(zoneid == GLOBAL_ZONEID);
! (void) snprintf(kname, sizeof (kname), "z%d_%s", newzoneid,
! ddp->dd_linkname);
! nm = kname;
! } else {
! nm = ddp->dd_linkname;
! }
!
! if (dls_stat_create("link", 0, nm, zoneid,
! dls_devnet_stat_update, ddp, &ksp, newzoneid) == 0) {
ASSERT(ksp != NULL);
if (zoneid == ddp->dd_owner_zid) {
ASSERT(ddp->dd_ksp == NULL);
ddp->dd_ksp = ksp;
} else {
*** 763,778 ****
static void
dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
{
if (zoneid == ddp->dd_owner_zid) {
if (ddp->dd_ksp != NULL) {
! kstat_delete(ddp->dd_ksp);
ddp->dd_ksp = NULL;
}
} else {
if (ddp->dd_zone_ksp != NULL) {
! kstat_delete(ddp->dd_zone_ksp);
ddp->dd_zone_ksp = NULL;
}
}
}
--- 782,797 ----
static void
dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
{
if (zoneid == ddp->dd_owner_zid) {
if (ddp->dd_ksp != NULL) {
! dls_stat_delete(ddp->dd_ksp);
ddp->dd_ksp = NULL;
}
} else {
if (ddp->dd_zone_ksp != NULL) {
! dls_stat_delete(ddp->dd_zone_ksp);
ddp->dd_zone_ksp = NULL;
}
}
}
*** 779,797 ****
/*
* The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
* and create the new set using the new name.
*/
static void
! dls_devnet_stat_rename(dls_devnet_t *ddp)
{
if (ddp->dd_ksp != NULL) {
! kstat_delete(ddp->dd_ksp);
ddp->dd_ksp = NULL;
}
! /* We can't rename a link while it's assigned to a non-global zone. */
ASSERT(ddp->dd_zone_ksp == NULL);
! dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
}
/*
* Associate a linkid with a given link (identified by macname)
*/
--- 798,826 ----
/*
* The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
* and create the new set using the new name.
*/
static void
! dls_devnet_stat_rename(dls_devnet_t *ddp, boolean_t zoneinit)
{
if (ddp->dd_ksp != NULL) {
! dls_stat_delete(ddp->dd_ksp);
ddp->dd_ksp = NULL;
}
! if (zoneinit && ddp->dd_zone_ksp != NULL) {
! dls_stat_delete(ddp->dd_zone_ksp);
! ddp->dd_zone_ksp = NULL;
! }
! /*
! * We can't rename a link while it's assigned to a non-global zone
! * unless we're first initializing the zone while readying it.
! */
ASSERT(ddp->dd_zone_ksp == NULL);
! dls_devnet_stat_create(ddp, ddp->dd_owner_zid,
! (zoneinit ? ddp->dd_zid : ddp->dd_owner_zid));
! if (zoneinit)
! dls_devnet_stat_create(ddp, ddp->dd_zid, ddp->dd_zid);
}
/*
* Associate a linkid with a given link (identified by macname)
*/
*** 876,895 ****
* and ensure that *ddp is valid.
*/
rw_exit(&i_dls_devnet_lock);
if (err == 0) {
if (zoneid != GLOBAL_ZONEID &&
! (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0)
(void) dls_devnet_unset(macname, &linkid, B_TRUE);
/*
* The kstat subsystem holds its own locks (rather perimeter)
* before calling the ks_update (dls_devnet_stat_update) entry
* point which in turn grabs the i_dls_devnet_lock. So the
* lock hierarchy is kstat locks -> i_dls_devnet_lock.
*/
if (stat_create)
! dls_devnet_stat_create(ddp, zoneid);
if (ddpp != NULL)
*ddpp = ddp;
}
return (err);
}
--- 905,925 ----
* and ensure that *ddp is valid.
*/
rw_exit(&i_dls_devnet_lock);
if (err == 0) {
if (zoneid != GLOBAL_ZONEID &&
! (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE,
! B_FALSE)) != 0)
(void) dls_devnet_unset(macname, &linkid, B_TRUE);
/*
* The kstat subsystem holds its own locks (rather perimeter)
* before calling the ks_update (dls_devnet_stat_update) entry
* point which in turn grabs the i_dls_devnet_lock. So the
* lock hierarchy is kstat locks -> i_dls_devnet_lock.
*/
if (stat_create)
! dls_devnet_stat_create(ddp, zoneid, zoneid);
if (ddpp != NULL)
*ddpp = ddp;
}
return (err);
}
*** 922,942 ****
* property loading as part of the post attach hasn't yet completed.
*/
ASSERT(ddp->dd_ref != 0);
if ((ddp->dd_ref != 1) || (!wait &&
(ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
mutex_exit(&ddp->dd_mutex);
rw_exit(&i_dls_devnet_lock);
return (EBUSY);
}
ddp->dd_flags |= DD_CONDEMNED;
ddp->dd_ref--;
*id = ddp->dd_linkid;
! if (ddp->dd_zid != GLOBAL_ZONEID)
! (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
/*
* Remove this dls_devnet_t from the hash table.
*/
VERIFY(mod_hash_remove(i_dls_devnet_hash,
--- 952,1033 ----
* property loading as part of the post attach hasn't yet completed.
*/
ASSERT(ddp->dd_ref != 0);
if ((ddp->dd_ref != 1) || (!wait &&
(ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
+ int zstatus = 0;
+
+ /*
+ * There are a couple of alternatives that might be going on
+ * here; a) the zone is shutting down and it has a transient
+ * link assigned, in which case we want to clean it up instead
+ * of moving it back to the global zone, or b) its possible
+ * that we're trying to clean up an orphaned vnic that was
+ * delegated to a zone and which wasn't cleaned up properly
+ * when the zone went away. Check for either of these cases
+ * before we simply return EBUSY.
+ *
+ * zstatus indicates which situation we are dealing with:
+ * 0 - means return EBUSY
+ * 1 - means case (a), cleanup transient link
+ * -1 - means case (b), orphained VNIC
+ */
+ if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
+ zone_t *zp;
+
+ if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
+ zstatus = -1;
+ } else {
+ if (ddp->dd_transient) {
+ zone_status_t s = zone_status_get(zp);
+
+ if (s >= ZONE_IS_SHUTTING_DOWN)
+ zstatus = 1;
+ }
+ zone_rele(zp);
+ }
+ }
+
+ if (zstatus == 0) {
mutex_exit(&ddp->dd_mutex);
rw_exit(&i_dls_devnet_lock);
return (EBUSY);
}
+ /*
+ * We want to delete the link, reset ref to 1;
+ */
+ if (zstatus == -1)
+ /* Log a warning, but continue in this case */
+ cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
+ ddp->dd_linkname);
+ ddp->dd_ref = 1;
+ }
+
ddp->dd_flags |= DD_CONDEMNED;
ddp->dd_ref--;
*id = ddp->dd_linkid;
! if (ddp->dd_zid != GLOBAL_ZONEID) {
! /*
! * We need to release the dd_mutex before we try and destroy the
! * stat. When we destroy it, we'll need to grab the lock for the
! * kstat but if there's a concurrent reader of the kstat, we'll
! * be blocked on it. This will lead to deadlock because these
! * kstats employ a ks_update function (dls_devnet_stat_update)
! * which needs the dd_mutex that we currently hold.
! *
! * Because we've already flagged the dls_devnet_t as
! * DD_CONDEMNED and we still have a write lock on
! * i_dls_devnet_lock, we should be able to release the dd_mutex.
! */
! mutex_exit(&ddp->dd_mutex);
! dls_devnet_stat_destroy(ddp, ddp->dd_zid);
! mutex_enter(&ddp->dd_mutex);
! (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
! B_FALSE);
! }
/*
* Remove this dls_devnet_t from the hash table.
*/
VERIFY(mod_hash_remove(i_dls_devnet_hash,
*** 958,970 ****
cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
} else {
ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
}
! if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
ddp->dd_prop_loaded = B_FALSE;
ddp->dd_linkid = DATALINK_INVALID_LINKID;
ddp->dd_flags = 0;
mutex_exit(&ddp->dd_mutex);
kmem_cache_free(i_dls_devnet_cachep, ddp);
--- 1049,1068 ----
cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
} else {
ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
}
! if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
! /*
! * See the earlier call in this function for an explanation.
! */
! mutex_exit(&ddp->dd_mutex);
dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
+ mutex_enter(&ddp->dd_mutex);
+ }
+
ddp->dd_prop_loaded = B_FALSE;
ddp->dd_linkid = DATALINK_INVALID_LINKID;
ddp->dd_flags = 0;
mutex_exit(&ddp->dd_mutex);
kmem_cache_free(i_dls_devnet_cachep, ddp);
*** 1259,1271 ****
* physical link (id2). In this case, check that id1 and its associated
* mac is not held by any application, and update the link's linkid to id2.
*
* This case does not change the <link name, linkid> mapping, so the link's
* kstats need to be updated with using name associated the given id2.
*/
int
! dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
{
dls_dev_handle_t ddh = NULL;
int err = 0;
dev_t phydev = 0;
dls_devnet_t *ddp;
--- 1357,1375 ----
* physical link (id2). In this case, check that id1 and its associated
* mac is not held by any application, and update the link's linkid to id2.
*
* This case does not change the <link name, linkid> mapping, so the link's
* kstats need to be updated with using name associated the given id2.
+ *
+ * The zonename parameter is used to allow us to create a VNIC in the global
+ * zone which is assigned to a non-global zone. Since there is a race condition
+ * in the create process if two VNICs have the same name, we need to rename it
+ * after it has been assigned to the zone.
*/
int
! dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link,
! boolean_t zoneinit)
{
dls_dev_handle_t ddh = NULL;
int err = 0;
dev_t phydev = 0;
dls_devnet_t *ddp;
*** 1311,1328 ****
/*
* Return EBUSY if any applications have this link open, if any thread
* is currently accessing the link kstats, or if the link is on-loan
* to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
* prevent any access to the kstats while we delete and recreate
! * kstats below.
*/
mutex_enter(&ddp->dd_mutex);
if (ddp->dd_ref > 1) {
mutex_exit(&ddp->dd_mutex);
err = EBUSY;
goto done;
}
ddp->dd_flags |= DD_KSTAT_CHANGING;
clear_dd_flag = B_TRUE;
mutex_exit(&ddp->dd_mutex);
--- 1415,1435 ----
/*
* Return EBUSY if any applications have this link open, if any thread
* is currently accessing the link kstats, or if the link is on-loan
* to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
* prevent any access to the kstats while we delete and recreate
! * kstats below. However, we skip this check if we're renaming the
! * vnic as part of bringing it up for a zone.
*/
mutex_enter(&ddp->dd_mutex);
+ if (!zoneinit) {
if (ddp->dd_ref > 1) {
mutex_exit(&ddp->dd_mutex);
err = EBUSY;
goto done;
}
+ }
ddp->dd_flags |= DD_KSTAT_CHANGING;
clear_dd_flag = B_TRUE;
mutex_exit(&ddp->dd_mutex);
*** 1331,1341 ****
--- 1438,1456 ----
sizeof (ddp->dd_linkname));
/* rename mac client name and its flow if exists */
if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
goto done;
+ if (zoneinit) {
+ char tname[MAXLINKNAMELEN];
+
+ (void) snprintf(tname, sizeof (tname), "z%d_%s",
+ ddp->dd_zid, link);
+ (void) mac_rename_primary(mh, tname);
+ } else {
(void) mac_rename_primary(mh, link);
+ }
mac_close(mh);
goto done;
}
/*
*** 1404,1414 ****
* function prevents any access to the dd_ksp while we delete and
* recreate it below.
*/
rw_exit(&i_dls_devnet_lock);
if (err == 0)
! dls_devnet_stat_rename(ddp);
if (clear_dd_flag) {
mutex_enter(&ddp->dd_mutex);
ddp->dd_flags &= ~DD_KSTAT_CHANGING;
mutex_exit(&ddp->dd_mutex);
--- 1519,1529 ----
* function prevents any access to the dd_ksp while we delete and
* recreate it below.
*/
rw_exit(&i_dls_devnet_lock);
if (err == 0)
! dls_devnet_stat_rename(ddp, zoneinit);
if (clear_dd_flag) {
mutex_enter(&ddp->dd_mutex);
ddp->dd_flags &= ~DD_KSTAT_CHANGING;
mutex_exit(&ddp->dd_mutex);
*** 1419,1429 ****
softmac_rele_device(ddh);
return (err);
}
static int
! i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
{
int err;
mac_perim_handle_t mph;
boolean_t upcall_done = B_FALSE;
datalink_id_t linkid = ddp->dd_linkid;
--- 1534,1545 ----
softmac_rele_device(ddh);
return (err);
}
static int
! i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop,
! boolean_t transient)
{
int err;
mac_perim_handle_t mph;
boolean_t upcall_done = B_FALSE;
datalink_id_t linkid = ddp->dd_linkid;
*** 1452,1461 ****
--- 1568,1578 ----
goto done;
upcall_done = B_TRUE;
}
if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
ddp->dd_zid = new_zoneid;
+ ddp->dd_transient = transient;
devnet_need_rebuild = B_TRUE;
}
done:
if (err != 0 && upcall_done) {
*** 1466,1476 ****
mac_perim_exit(mph);
return (err);
}
int
! dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
{
dls_devnet_t *ddp;
int err;
zoneid_t old_zid;
boolean_t refheld = B_FALSE;
--- 1583,1593 ----
mac_perim_exit(mph);
return (err);
}
int
! dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid, boolean_t transient)
{
dls_devnet_t *ddp;
int err;
zoneid_t old_zid;
boolean_t refheld = B_FALSE;
*** 1488,1498 ****
if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
return (err);
refheld = B_TRUE;
}
! if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) {
if (refheld)
dls_devnet_rele(ddp);
return (err);
}
--- 1605,1615 ----
if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
return (err);
refheld = B_TRUE;
}
! if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, transient)) != 0) {
if (refheld)
dls_devnet_rele(ddp);
return (err);
}
*** 1505,1515 ****
/* Re-create kstats in the appropriate zones. */
if (old_zid != GLOBAL_ZONEID)
dls_devnet_stat_destroy(ddh, old_zid);
if (new_zid != GLOBAL_ZONEID)
! dls_devnet_stat_create(ddh, new_zid);
return (0);
}
zoneid_t
--- 1622,1632 ----
/* Re-create kstats in the appropriate zones. */
if (old_zid != GLOBAL_ZONEID)
dls_devnet_stat_destroy(ddh, old_zid);
if (new_zid != GLOBAL_ZONEID)
! dls_devnet_stat_create(ddh, new_zid, new_zid);
return (0);
}
zoneid_t