Print this page
OS-1885 deadlock between vnic destroy and kstat read
OS-676 debug kernel blew assertion in dls_devnet_stat_create()
OS-428 add link zonename kstat
OS-406
OS-327
OS-276 global zone duplicate kstat when two zones have same vnic name
OS-249

*** 19,28 **** --- 19,29 ---- * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2013 Joyent, Inc. All rights reserved. */ /* * Datalink management routines. */
*** 103,118 **** uint_t dd_flags; zoneid_t dd_owner_zid; /* zone where node was created */ zoneid_t dd_zid; /* current zone */ boolean_t dd_prop_loaded; taskqid_t dd_prop_taskid; } dls_devnet_t; static int i_dls_devnet_create_iptun(const char *, const char *, datalink_id_t *); static int i_dls_devnet_destroy_iptun(datalink_id_t); ! static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t); static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t); /*ARGSUSED*/ static int i_dls_devnet_constructor(void *buf, void *arg, int kmflag) --- 104,120 ---- uint_t dd_flags; zoneid_t dd_owner_zid; /* zone where node was created */ zoneid_t dd_zid; /* current zone */ boolean_t dd_prop_loaded; taskqid_t dd_prop_taskid; + boolean_t dd_transient; /* link goes away when zone does */ } dls_devnet_t; static int i_dls_devnet_create_iptun(const char *, const char *, datalink_id_t *); static int i_dls_devnet_destroy_iptun(datalink_id_t); ! static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t); static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t); /*ARGSUSED*/ static int i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
*** 143,153 **** dls_zone_remove(datalink_id_t linkid, void *arg) { dls_devnet_t *ddp; if (dls_devnet_hold_tmp(linkid, &ddp) == 0) { ! (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID); dls_devnet_rele_tmp(ddp); } return (0); } --- 145,160 ---- dls_zone_remove(datalink_id_t linkid, void *arg) { dls_devnet_t *ddp; if (dls_devnet_hold_tmp(linkid, &ddp) == 0) { ! /* ! * Don't bother moving transient links back to the global zone ! * since we will simply delete them in dls_devnet_unset. ! */ ! if (!ddp->dd_transient) ! (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE); dls_devnet_rele_tmp(ddp); } return (0); }
*** 524,533 **** --- 531,541 ---- dlmgmt_getlinkid_retval_t retval; int err; getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID; (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN); + getlinkid.ld_zoneid = getzoneid(); if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval, sizeof (retval))) == 0) { *linkid = retval.lr_linkid; }
*** 738,753 **** /* * Create the "link" kstats. */ static void ! dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid) { kstat_t *ksp; ! if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid, ! dls_devnet_stat_update, ddp, &ksp) == 0) { ASSERT(ksp != NULL); if (zoneid == ddp->dd_owner_zid) { ASSERT(ddp->dd_ksp == NULL); ddp->dd_ksp = ksp; } else { --- 746,772 ---- /* * Create the "link" kstats. */ static void ! dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid, zoneid_t newzoneid) { kstat_t *ksp; + char *nm; + char kname[MAXLINKNAMELEN]; ! if (zoneid != newzoneid) { ! ASSERT(zoneid == GLOBAL_ZONEID); ! (void) snprintf(kname, sizeof (kname), "z%d_%s", newzoneid, ! ddp->dd_linkname); ! nm = kname; ! } else { ! nm = ddp->dd_linkname; ! } ! ! if (dls_stat_create("link", 0, nm, zoneid, ! dls_devnet_stat_update, ddp, &ksp, newzoneid) == 0) { ASSERT(ksp != NULL); if (zoneid == ddp->dd_owner_zid) { ASSERT(ddp->dd_ksp == NULL); ddp->dd_ksp = ksp; } else {
*** 763,778 **** static void dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid) { if (zoneid == ddp->dd_owner_zid) { if (ddp->dd_ksp != NULL) { ! kstat_delete(ddp->dd_ksp); ddp->dd_ksp = NULL; } } else { if (ddp->dd_zone_ksp != NULL) { ! kstat_delete(ddp->dd_zone_ksp); ddp->dd_zone_ksp = NULL; } } } --- 782,797 ---- static void dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid) { if (zoneid == ddp->dd_owner_zid) { if (ddp->dd_ksp != NULL) { ! dls_stat_delete(ddp->dd_ksp); ddp->dd_ksp = NULL; } } else { if (ddp->dd_zone_ksp != NULL) { ! dls_stat_delete(ddp->dd_zone_ksp); ddp->dd_zone_ksp = NULL; } } }
*** 779,797 **** /* * The link has been renamed. Destroy the old non-legacy kstats ("link kstats") * and create the new set using the new name. */ static void ! dls_devnet_stat_rename(dls_devnet_t *ddp) { if (ddp->dd_ksp != NULL) { ! kstat_delete(ddp->dd_ksp); ddp->dd_ksp = NULL; } ! /* We can't rename a link while it's assigned to a non-global zone. */ ASSERT(ddp->dd_zone_ksp == NULL); ! dls_devnet_stat_create(ddp, ddp->dd_owner_zid); } /* * Associate a linkid with a given link (identified by macname) */ --- 798,826 ---- /* * The link has been renamed. Destroy the old non-legacy kstats ("link kstats") * and create the new set using the new name. */ static void ! dls_devnet_stat_rename(dls_devnet_t *ddp, boolean_t zoneinit) { if (ddp->dd_ksp != NULL) { ! dls_stat_delete(ddp->dd_ksp); ddp->dd_ksp = NULL; } ! if (zoneinit && ddp->dd_zone_ksp != NULL) { ! dls_stat_delete(ddp->dd_zone_ksp); ! ddp->dd_zone_ksp = NULL; ! } ! /* ! * We can't rename a link while it's assigned to a non-global zone ! * unless we're first initializing the zone while readying it. ! */ ASSERT(ddp->dd_zone_ksp == NULL); ! dls_devnet_stat_create(ddp, ddp->dd_owner_zid, ! (zoneinit ? ddp->dd_zid : ddp->dd_owner_zid)); ! if (zoneinit) ! dls_devnet_stat_create(ddp, ddp->dd_zid, ddp->dd_zid); } /* * Associate a linkid with a given link (identified by macname) */
*** 876,895 **** * and ensure that *ddp is valid. */ rw_exit(&i_dls_devnet_lock); if (err == 0) { if (zoneid != GLOBAL_ZONEID && ! (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0) (void) dls_devnet_unset(macname, &linkid, B_TRUE); /* * The kstat subsystem holds its own locks (rather perimeter) * before calling the ks_update (dls_devnet_stat_update) entry * point which in turn grabs the i_dls_devnet_lock. So the * lock hierarchy is kstat locks -> i_dls_devnet_lock. */ if (stat_create) ! dls_devnet_stat_create(ddp, zoneid); if (ddpp != NULL) *ddpp = ddp; } return (err); } --- 905,925 ---- * and ensure that *ddp is valid. */ rw_exit(&i_dls_devnet_lock); if (err == 0) { if (zoneid != GLOBAL_ZONEID && ! (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE, ! B_FALSE)) != 0) (void) dls_devnet_unset(macname, &linkid, B_TRUE); /* * The kstat subsystem holds its own locks (rather perimeter) * before calling the ks_update (dls_devnet_stat_update) entry * point which in turn grabs the i_dls_devnet_lock. So the * lock hierarchy is kstat locks -> i_dls_devnet_lock. */ if (stat_create) ! dls_devnet_stat_create(ddp, zoneid, zoneid); if (ddpp != NULL) *ddpp = ddp; } return (err); }
*** 922,942 **** * property loading as part of the post attach hasn't yet completed. */ ASSERT(ddp->dd_ref != 0); if ((ddp->dd_ref != 1) || (!wait && (ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) { mutex_exit(&ddp->dd_mutex); rw_exit(&i_dls_devnet_lock); return (EBUSY); } ddp->dd_flags |= DD_CONDEMNED; ddp->dd_ref--; *id = ddp->dd_linkid; ! if (ddp->dd_zid != GLOBAL_ZONEID) ! (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE); /* * Remove this dls_devnet_t from the hash table. */ VERIFY(mod_hash_remove(i_dls_devnet_hash, --- 952,1033 ---- * property loading as part of the post attach hasn't yet completed. */ ASSERT(ddp->dd_ref != 0); if ((ddp->dd_ref != 1) || (!wait && (ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) { + int zstatus = 0; + + /* + * There are a couple of alternatives that might be going on + * here; a) the zone is shutting down and it has a transient + * link assigned, in which case we want to clean it up instead + * of moving it back to the global zone, or b) its possible + * that we're trying to clean up an orphaned vnic that was + * delegated to a zone and which wasn't cleaned up properly + * when the zone went away. Check for either of these cases + * before we simply return EBUSY. + * + * zstatus indicates which situation we are dealing with: + * 0 - means return EBUSY + * 1 - means case (a), cleanup transient link + * -1 - means case (b), orphained VNIC + */ + if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) { + zone_t *zp; + + if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) { + zstatus = -1; + } else { + if (ddp->dd_transient) { + zone_status_t s = zone_status_get(zp); + + if (s >= ZONE_IS_SHUTTING_DOWN) + zstatus = 1; + } + zone_rele(zp); + } + } + + if (zstatus == 0) { mutex_exit(&ddp->dd_mutex); rw_exit(&i_dls_devnet_lock); return (EBUSY); } + /* + * We want to delete the link, reset ref to 1; + */ + if (zstatus == -1) + /* Log a warning, but continue in this case */ + cmn_err(CE_WARN, "clear orphaned datalink: %s\n", + ddp->dd_linkname); + ddp->dd_ref = 1; + } + ddp->dd_flags |= DD_CONDEMNED; ddp->dd_ref--; *id = ddp->dd_linkid; ! if (ddp->dd_zid != GLOBAL_ZONEID) { ! /* ! * We need to release the dd_mutex before we try and destroy the ! * stat. When we destroy it, we'll need to grab the lock for the ! * kstat but if there's a concurrent reader of the kstat, we'll ! * be blocked on it. This will lead to deadlock because these ! * kstats employ a ks_update function (dls_devnet_stat_update) ! * which needs the dd_mutex that we currently hold. ! * ! * Because we've already flagged the dls_devnet_t as ! * DD_CONDEMNED and we still have a write lock on ! * i_dls_devnet_lock, we should be able to release the dd_mutex. ! */ ! mutex_exit(&ddp->dd_mutex); ! dls_devnet_stat_destroy(ddp, ddp->dd_zid); ! mutex_enter(&ddp->dd_mutex); ! (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE, ! B_FALSE); ! } /* * Remove this dls_devnet_t from the hash table. */ VERIFY(mod_hash_remove(i_dls_devnet_hash,
*** 958,970 **** cv_wait(&ddp->dd_cv, &ddp->dd_mutex); } else { ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL); } ! if (ddp->dd_linkid != DATALINK_INVALID_LINKID) dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid); ddp->dd_prop_loaded = B_FALSE; ddp->dd_linkid = DATALINK_INVALID_LINKID; ddp->dd_flags = 0; mutex_exit(&ddp->dd_mutex); kmem_cache_free(i_dls_devnet_cachep, ddp); --- 1049,1068 ---- cv_wait(&ddp->dd_cv, &ddp->dd_mutex); } else { ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL); } ! if (ddp->dd_linkid != DATALINK_INVALID_LINKID) { ! /* ! * See the earlier call in this function for an explanation. ! */ ! mutex_exit(&ddp->dd_mutex); dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid); + mutex_enter(&ddp->dd_mutex); + } + ddp->dd_prop_loaded = B_FALSE; ddp->dd_linkid = DATALINK_INVALID_LINKID; ddp->dd_flags = 0; mutex_exit(&ddp->dd_mutex); kmem_cache_free(i_dls_devnet_cachep, ddp);
*** 1259,1271 **** * physical link (id2). In this case, check that id1 and its associated * mac is not held by any application, and update the link's linkid to id2. * * This case does not change the <link name, linkid> mapping, so the link's * kstats need to be updated with using name associated the given id2. */ int ! dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link) { dls_dev_handle_t ddh = NULL; int err = 0; dev_t phydev = 0; dls_devnet_t *ddp; --- 1357,1375 ---- * physical link (id2). In this case, check that id1 and its associated * mac is not held by any application, and update the link's linkid to id2. * * This case does not change the <link name, linkid> mapping, so the link's * kstats need to be updated with using name associated the given id2. + * + * The zonename parameter is used to allow us to create a VNIC in the global + * zone which is assigned to a non-global zone. Since there is a race condition + * in the create process if two VNICs have the same name, we need to rename it + * after it has been assigned to the zone. */ int ! dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link, ! boolean_t zoneinit) { dls_dev_handle_t ddh = NULL; int err = 0; dev_t phydev = 0; dls_devnet_t *ddp;
*** 1311,1328 **** /* * Return EBUSY if any applications have this link open, if any thread * is currently accessing the link kstats, or if the link is on-loan * to a non-global zone. Then set the DD_KSTAT_CHANGING flag to * prevent any access to the kstats while we delete and recreate ! * kstats below. */ mutex_enter(&ddp->dd_mutex); if (ddp->dd_ref > 1) { mutex_exit(&ddp->dd_mutex); err = EBUSY; goto done; } ddp->dd_flags |= DD_KSTAT_CHANGING; clear_dd_flag = B_TRUE; mutex_exit(&ddp->dd_mutex); --- 1415,1435 ---- /* * Return EBUSY if any applications have this link open, if any thread * is currently accessing the link kstats, or if the link is on-loan * to a non-global zone. Then set the DD_KSTAT_CHANGING flag to * prevent any access to the kstats while we delete and recreate ! * kstats below. However, we skip this check if we're renaming the ! * vnic as part of bringing it up for a zone. */ mutex_enter(&ddp->dd_mutex); + if (!zoneinit) { if (ddp->dd_ref > 1) { mutex_exit(&ddp->dd_mutex); err = EBUSY; goto done; } + } ddp->dd_flags |= DD_KSTAT_CHANGING; clear_dd_flag = B_TRUE; mutex_exit(&ddp->dd_mutex);
*** 1331,1341 **** --- 1438,1456 ---- sizeof (ddp->dd_linkname)); /* rename mac client name and its flow if exists */ if ((err = mac_open(ddp->dd_mac, &mh)) != 0) goto done; + if (zoneinit) { + char tname[MAXLINKNAMELEN]; + + (void) snprintf(tname, sizeof (tname), "z%d_%s", + ddp->dd_zid, link); + (void) mac_rename_primary(mh, tname); + } else { (void) mac_rename_primary(mh, link); + } mac_close(mh); goto done; } /*
*** 1404,1414 **** * function prevents any access to the dd_ksp while we delete and * recreate it below. */ rw_exit(&i_dls_devnet_lock); if (err == 0) ! dls_devnet_stat_rename(ddp); if (clear_dd_flag) { mutex_enter(&ddp->dd_mutex); ddp->dd_flags &= ~DD_KSTAT_CHANGING; mutex_exit(&ddp->dd_mutex); --- 1519,1529 ---- * function prevents any access to the dd_ksp while we delete and * recreate it below. */ rw_exit(&i_dls_devnet_lock); if (err == 0) ! dls_devnet_stat_rename(ddp, zoneinit); if (clear_dd_flag) { mutex_enter(&ddp->dd_mutex); ddp->dd_flags &= ~DD_KSTAT_CHANGING; mutex_exit(&ddp->dd_mutex);
*** 1419,1429 **** softmac_rele_device(ddh); return (err); } static int ! i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop) { int err; mac_perim_handle_t mph; boolean_t upcall_done = B_FALSE; datalink_id_t linkid = ddp->dd_linkid; --- 1534,1545 ---- softmac_rele_device(ddh); return (err); } static int ! i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop, ! boolean_t transient) { int err; mac_perim_handle_t mph; boolean_t upcall_done = B_FALSE; datalink_id_t linkid = ddp->dd_linkid;
*** 1452,1461 **** --- 1568,1578 ---- goto done; upcall_done = B_TRUE; } if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) { ddp->dd_zid = new_zoneid; + ddp->dd_transient = transient; devnet_need_rebuild = B_TRUE; } done: if (err != 0 && upcall_done) {
*** 1466,1476 **** mac_perim_exit(mph); return (err); } int ! dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid) { dls_devnet_t *ddp; int err; zoneid_t old_zid; boolean_t refheld = B_FALSE; --- 1583,1593 ---- mac_perim_exit(mph); return (err); } int ! dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid, boolean_t transient) { dls_devnet_t *ddp; int err; zoneid_t old_zid; boolean_t refheld = B_FALSE;
*** 1488,1498 **** if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0) return (err); refheld = B_TRUE; } ! if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) { if (refheld) dls_devnet_rele(ddp); return (err); } --- 1605,1615 ---- if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0) return (err); refheld = B_TRUE; } ! if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, transient)) != 0) { if (refheld) dls_devnet_rele(ddp); return (err); }
*** 1505,1515 **** /* Re-create kstats in the appropriate zones. */ if (old_zid != GLOBAL_ZONEID) dls_devnet_stat_destroy(ddh, old_zid); if (new_zid != GLOBAL_ZONEID) ! dls_devnet_stat_create(ddh, new_zid); return (0); } zoneid_t --- 1622,1632 ---- /* Re-create kstats in the appropriate zones. */ if (old_zid != GLOBAL_ZONEID) dls_devnet_stat_destroy(ddh, old_zid); if (new_zid != GLOBAL_ZONEID) ! dls_devnet_stat_create(ddh, new_zid, new_zid); return (0); } zoneid_t