Print this page
OS-1885 deadlock between vnic destroy and kstat read
OS-676 debug kernel blew assertion in dls_devnet_stat_create()
OS-428 add link zonename kstat
OS-406
OS-327
OS-276 global zone duplicate kstat when two zones have same vnic name
OS-249
        
*** 19,28 ****
--- 19,29 ----
   * CDDL HEADER END
   */
  /*
   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
   * Use is subject to license terms.
+  * Copyright (c) 2013 Joyent, Inc.  All rights reserved.
   */
  
  /*
   * Datalink management routines.
   */
*** 103,118 ****
          uint_t          dd_flags;
          zoneid_t        dd_owner_zid;   /* zone where node was created */
          zoneid_t        dd_zid;         /* current zone */
          boolean_t       dd_prop_loaded;
          taskqid_t       dd_prop_taskid;
  } dls_devnet_t;
  
  static int i_dls_devnet_create_iptun(const char *, const char *,
      datalink_id_t *);
  static int i_dls_devnet_destroy_iptun(datalink_id_t);
! static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t);
  static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
  
  /*ARGSUSED*/
  static int
  i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
--- 104,120 ----
          uint_t          dd_flags;
          zoneid_t        dd_owner_zid;   /* zone where node was created */
          zoneid_t        dd_zid;         /* current zone */
          boolean_t       dd_prop_loaded;
          taskqid_t       dd_prop_taskid;
+         boolean_t       dd_transient;   /* link goes away when zone does */
  } dls_devnet_t;
  
  static int i_dls_devnet_create_iptun(const char *, const char *,
      datalink_id_t *);
  static int i_dls_devnet_destroy_iptun(datalink_id_t);
! static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t);
  static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
  
  /*ARGSUSED*/
  static int
  i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
*** 143,153 ****
  dls_zone_remove(datalink_id_t linkid, void *arg)
  {
          dls_devnet_t *ddp;
  
          if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
!                 (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
                  dls_devnet_rele_tmp(ddp);
          }
          return (0);
  }
  
--- 145,160 ----
  dls_zone_remove(datalink_id_t linkid, void *arg)
  {
          dls_devnet_t *ddp;
  
          if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
!                 /*
!                  * Don't bother moving transient links back to the global zone
!                  * since we will simply delete them in dls_devnet_unset.
!                  */
!                 if (!ddp->dd_transient)
!                         (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
                  dls_devnet_rele_tmp(ddp);
          }
          return (0);
  }
  
*** 524,533 ****
--- 531,541 ----
          dlmgmt_getlinkid_retval_t       retval;
          int                             err;
  
          getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
          (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+         getlinkid.ld_zoneid = getzoneid();
  
          if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
              sizeof (retval))) == 0) {
                  *linkid = retval.lr_linkid;
          }
*** 738,753 ****
  
  /*
   * Create the "link" kstats.
   */
  static void
! dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
  {
          kstat_t *ksp;
  
!         if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
!             dls_devnet_stat_update, ddp, &ksp) == 0) {
                  ASSERT(ksp != NULL);
                  if (zoneid == ddp->dd_owner_zid) {
                          ASSERT(ddp->dd_ksp == NULL);
                          ddp->dd_ksp = ksp;
                  } else {
--- 746,772 ----
  
  /*
   * Create the "link" kstats.
   */
  static void
! dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid, zoneid_t newzoneid)
  {
          kstat_t *ksp;
+         char    *nm;
+         char    kname[MAXLINKNAMELEN];
  
!         if (zoneid != newzoneid) {
!                 ASSERT(zoneid == GLOBAL_ZONEID);
!                 (void) snprintf(kname, sizeof (kname), "z%d_%s", newzoneid,
!                     ddp->dd_linkname);
!                 nm = kname;
!         } else {
!                 nm = ddp->dd_linkname;
!         }
! 
!         if (dls_stat_create("link", 0, nm, zoneid,
!             dls_devnet_stat_update, ddp, &ksp, newzoneid) == 0) {
                  ASSERT(ksp != NULL);
                  if (zoneid == ddp->dd_owner_zid) {
                          ASSERT(ddp->dd_ksp == NULL);
                          ddp->dd_ksp = ksp;
                  } else {
*** 763,778 ****
  static void
  dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
  {
          if (zoneid == ddp->dd_owner_zid) {
                  if (ddp->dd_ksp != NULL) {
!                         kstat_delete(ddp->dd_ksp);
                          ddp->dd_ksp = NULL;
                  }
          } else {
                  if (ddp->dd_zone_ksp != NULL) {
!                         kstat_delete(ddp->dd_zone_ksp);
                          ddp->dd_zone_ksp = NULL;
                  }
          }
  }
  
--- 782,797 ----
  static void
  dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
  {
          if (zoneid == ddp->dd_owner_zid) {
                  if (ddp->dd_ksp != NULL) {
!                         dls_stat_delete(ddp->dd_ksp);
                          ddp->dd_ksp = NULL;
                  }
          } else {
                  if (ddp->dd_zone_ksp != NULL) {
!                         dls_stat_delete(ddp->dd_zone_ksp);
                          ddp->dd_zone_ksp = NULL;
                  }
          }
  }
  
*** 779,797 ****
  /*
   * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
   * and create the new set using the new name.
   */
  static void
! dls_devnet_stat_rename(dls_devnet_t *ddp)
  {
          if (ddp->dd_ksp != NULL) {
!                 kstat_delete(ddp->dd_ksp);
                  ddp->dd_ksp = NULL;
          }
!         /* We can't rename a link while it's assigned to a non-global zone. */
          ASSERT(ddp->dd_zone_ksp == NULL);
!         dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
  }
  
  /*
   * Associate a linkid with a given link (identified by macname)
   */
--- 798,826 ----
  /*
   * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
   * and create the new set using the new name.
   */
  static void
! dls_devnet_stat_rename(dls_devnet_t *ddp, boolean_t zoneinit)
  {
          if (ddp->dd_ksp != NULL) {
!                 dls_stat_delete(ddp->dd_ksp);
                  ddp->dd_ksp = NULL;
          }
!         if (zoneinit && ddp->dd_zone_ksp != NULL) {
!                 dls_stat_delete(ddp->dd_zone_ksp);
!                 ddp->dd_zone_ksp = NULL;
!         }
!         /*
!          * We can't rename a link while it's assigned to a non-global zone
!          * unless we're first initializing the zone while readying it.
!          */
          ASSERT(ddp->dd_zone_ksp == NULL);
!         dls_devnet_stat_create(ddp, ddp->dd_owner_zid,
!             (zoneinit ? ddp->dd_zid : ddp->dd_owner_zid));
!         if (zoneinit)
!                 dls_devnet_stat_create(ddp, ddp->dd_zid, ddp->dd_zid);
  }
  
  /*
   * Associate a linkid with a given link (identified by macname)
   */
*** 876,895 ****
           * and ensure that *ddp is valid.
           */
          rw_exit(&i_dls_devnet_lock);
          if (err == 0) {
                  if (zoneid != GLOBAL_ZONEID &&
!                     (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0)
                          (void) dls_devnet_unset(macname, &linkid, B_TRUE);
                  /*
                   * The kstat subsystem holds its own locks (rather perimeter)
                   * before calling the ks_update (dls_devnet_stat_update) entry
                   * point which in turn grabs the i_dls_devnet_lock. So the
                   * lock hierarchy is kstat locks -> i_dls_devnet_lock.
                   */
                  if (stat_create)
!                         dls_devnet_stat_create(ddp, zoneid);
                  if (ddpp != NULL)
                          *ddpp = ddp;
          }
          return (err);
  }
--- 905,925 ----
           * and ensure that *ddp is valid.
           */
          rw_exit(&i_dls_devnet_lock);
          if (err == 0) {
                  if (zoneid != GLOBAL_ZONEID &&
!                     (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE,
!                     B_FALSE)) != 0)
                          (void) dls_devnet_unset(macname, &linkid, B_TRUE);
                  /*
                   * The kstat subsystem holds its own locks (rather perimeter)
                   * before calling the ks_update (dls_devnet_stat_update) entry
                   * point which in turn grabs the i_dls_devnet_lock. So the
                   * lock hierarchy is kstat locks -> i_dls_devnet_lock.
                   */
                  if (stat_create)
!                         dls_devnet_stat_create(ddp, zoneid, zoneid);
                  if (ddpp != NULL)
                          *ddpp = ddp;
          }
          return (err);
  }
*** 922,942 ****
           * property loading as part of the post attach hasn't yet completed.
           */
          ASSERT(ddp->dd_ref != 0);
          if ((ddp->dd_ref != 1) || (!wait &&
              (ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
                  mutex_exit(&ddp->dd_mutex);
                  rw_exit(&i_dls_devnet_lock);
                  return (EBUSY);
          }
  
          ddp->dd_flags |= DD_CONDEMNED;
          ddp->dd_ref--;
          *id = ddp->dd_linkid;
  
!         if (ddp->dd_zid != GLOBAL_ZONEID)
!                 (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
  
          /*
           * Remove this dls_devnet_t from the hash table.
           */
          VERIFY(mod_hash_remove(i_dls_devnet_hash,
--- 952,1033 ----
           * property loading as part of the post attach hasn't yet completed.
           */
          ASSERT(ddp->dd_ref != 0);
          if ((ddp->dd_ref != 1) || (!wait &&
              (ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
+                 int zstatus = 0;
+ 
+                 /*
+                  * There are a couple of alternatives that might be going on
+                  * here; a) the zone is shutting down and it has a transient
+                  * link assigned, in which case we want to clean it up instead
+                  * of moving it back to the global zone, or b) its possible
+                  * that we're trying to clean up an orphaned vnic that was
+                  * delegated to a zone and which wasn't cleaned up properly
+                  * when the zone went away.  Check for either of these cases
+                  * before we simply return EBUSY.
+                  *
+                  * zstatus indicates which situation we are dealing with:
+                  *       0 - means return EBUSY
+                  *       1 - means case (a), cleanup transient link
+                  *      -1 - means case (b), orphained VNIC
+                  */
+                 if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
+                         zone_t  *zp;
+ 
+                         if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
+                                 zstatus = -1;
+                         } else {
+                                 if (ddp->dd_transient) {
+                                         zone_status_t s = zone_status_get(zp);
+ 
+                                         if (s >= ZONE_IS_SHUTTING_DOWN)
+                                                 zstatus = 1;
+                                 }
+                                 zone_rele(zp);
+                         }
+                 }
+ 
+                 if (zstatus == 0) {
                          mutex_exit(&ddp->dd_mutex);
                          rw_exit(&i_dls_devnet_lock);
                          return (EBUSY);
                  }
  
+                 /*
+                  * We want to delete the link, reset ref to 1;
+                  */
+                 if (zstatus == -1)
+                         /* Log a warning, but continue in this case */
+                         cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
+                             ddp->dd_linkname);
+                 ddp->dd_ref = 1;
+         }
+ 
          ddp->dd_flags |= DD_CONDEMNED;
          ddp->dd_ref--;
          *id = ddp->dd_linkid;
  
!         if (ddp->dd_zid != GLOBAL_ZONEID) {
!                 /*
!                  * We need to release the dd_mutex before we try and destroy the
!                  * stat. When we destroy it, we'll need to grab the lock for the
!                  * kstat but if there's a concurrent reader of the kstat, we'll
!                  * be blocked on it. This will lead to deadlock because these
!                  * kstats employ a ks_update function (dls_devnet_stat_update)
!                  * which needs the dd_mutex that we currently hold.
!                  *
!                  * Because we've already flagged the dls_devnet_t as
!                  * DD_CONDEMNED and we still have a write lock on
!                  * i_dls_devnet_lock, we should be able to release the dd_mutex.
!                  */
!                 mutex_exit(&ddp->dd_mutex);
!                 dls_devnet_stat_destroy(ddp, ddp->dd_zid);
!                 mutex_enter(&ddp->dd_mutex);
!                 (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
!                     B_FALSE);
!         }
  
          /*
           * Remove this dls_devnet_t from the hash table.
           */
          VERIFY(mod_hash_remove(i_dls_devnet_hash,
*** 958,970 ****
                          cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
          } else {
                  ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
          }
  
!         if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
                  dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
  
          ddp->dd_prop_loaded = B_FALSE;
          ddp->dd_linkid = DATALINK_INVALID_LINKID;
          ddp->dd_flags = 0;
          mutex_exit(&ddp->dd_mutex);
          kmem_cache_free(i_dls_devnet_cachep, ddp);
--- 1049,1068 ----
                          cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
          } else {
                  ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
          }
  
!         if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
!                 /*
!                  * See the earlier call in this function for an explanation.
!                  */
!                 mutex_exit(&ddp->dd_mutex);
                  dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
+                 mutex_enter(&ddp->dd_mutex);
+         }
  
+ 
          ddp->dd_prop_loaded = B_FALSE;
          ddp->dd_linkid = DATALINK_INVALID_LINKID;
          ddp->dd_flags = 0;
          mutex_exit(&ddp->dd_mutex);
          kmem_cache_free(i_dls_devnet_cachep, ddp);
*** 1259,1271 ****
   *    physical link (id2). In this case, check that id1 and its associated
   *    mac is not held by any application, and update the link's linkid to id2.
   *
   *    This case does not change the <link name, linkid> mapping, so the link's
   *    kstats need to be updated with using name associated the given id2.
   */
  int
! dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
  {
          dls_dev_handle_t        ddh = NULL;
          int                     err = 0;
          dev_t                   phydev = 0;
          dls_devnet_t            *ddp;
--- 1357,1375 ----
   *    physical link (id2). In this case, check that id1 and its associated
   *    mac is not held by any application, and update the link's linkid to id2.
   *
   *    This case does not change the <link name, linkid> mapping, so the link's
   *    kstats need to be updated with using name associated the given id2.
+  *
+  * The zonename parameter is used to allow us to create a VNIC in the global
+  * zone which is assigned to a non-global zone.  Since there is a race condition
+  * in the create process if two VNICs have the same name, we need to rename it
+  * after it has been assigned to the zone.
   */
  int
! dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link,
!     boolean_t zoneinit)
  {
          dls_dev_handle_t        ddh = NULL;
          int                     err = 0;
          dev_t                   phydev = 0;
          dls_devnet_t            *ddp;
*** 1311,1328 ****
          /*
           * Return EBUSY if any applications have this link open, if any thread
           * is currently accessing the link kstats, or if the link is on-loan
           * to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
           * prevent any access to the kstats while we delete and recreate
!          * kstats below.
           */
          mutex_enter(&ddp->dd_mutex);
          if (ddp->dd_ref > 1) {
                  mutex_exit(&ddp->dd_mutex);
                  err = EBUSY;
                  goto done;
          }
  
          ddp->dd_flags |= DD_KSTAT_CHANGING;
          clear_dd_flag = B_TRUE;
          mutex_exit(&ddp->dd_mutex);
  
--- 1415,1435 ----
          /*
           * Return EBUSY if any applications have this link open, if any thread
           * is currently accessing the link kstats, or if the link is on-loan
           * to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
           * prevent any access to the kstats while we delete and recreate
!          * kstats below.  However, we skip this check if we're renaming the
!          * vnic as part of bringing it up for a zone.
           */
          mutex_enter(&ddp->dd_mutex);
+         if (!zoneinit) {
                  if (ddp->dd_ref > 1) {
                          mutex_exit(&ddp->dd_mutex);
                          err = EBUSY;
                          goto done;
                  }
+         }
  
          ddp->dd_flags |= DD_KSTAT_CHANGING;
          clear_dd_flag = B_TRUE;
          mutex_exit(&ddp->dd_mutex);
  
*** 1331,1341 ****
--- 1438,1456 ----
                      sizeof (ddp->dd_linkname));
  
                  /* rename mac client name and its flow if exists */
                  if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
                          goto done;
+                 if (zoneinit) {
+                         char tname[MAXLINKNAMELEN];
+ 
+                         (void) snprintf(tname, sizeof (tname), "z%d_%s",
+                             ddp->dd_zid, link);
+                         (void) mac_rename_primary(mh, tname);
+                 } else {
                          (void) mac_rename_primary(mh, link);
+                 }
                  mac_close(mh);
                  goto done;
          }
  
          /*
*** 1404,1414 ****
           * function prevents any access to the dd_ksp while we delete and
           * recreate it below.
           */
          rw_exit(&i_dls_devnet_lock);
          if (err == 0)
!                 dls_devnet_stat_rename(ddp);
  
          if (clear_dd_flag) {
                  mutex_enter(&ddp->dd_mutex);
                  ddp->dd_flags &= ~DD_KSTAT_CHANGING;
                  mutex_exit(&ddp->dd_mutex);
--- 1519,1529 ----
           * function prevents any access to the dd_ksp while we delete and
           * recreate it below.
           */
          rw_exit(&i_dls_devnet_lock);
          if (err == 0)
!                 dls_devnet_stat_rename(ddp, zoneinit);
  
          if (clear_dd_flag) {
                  mutex_enter(&ddp->dd_mutex);
                  ddp->dd_flags &= ~DD_KSTAT_CHANGING;
                  mutex_exit(&ddp->dd_mutex);
*** 1419,1429 ****
          softmac_rele_device(ddh);
          return (err);
  }
  
  static int
! i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
  {
          int                     err;
          mac_perim_handle_t      mph;
          boolean_t               upcall_done = B_FALSE;
          datalink_id_t           linkid = ddp->dd_linkid;
--- 1534,1545 ----
          softmac_rele_device(ddh);
          return (err);
  }
  
  static int
! i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop,
!     boolean_t transient)
  {
          int                     err;
          mac_perim_handle_t      mph;
          boolean_t               upcall_done = B_FALSE;
          datalink_id_t           linkid = ddp->dd_linkid;
*** 1452,1461 ****
--- 1568,1578 ----
                          goto done;
                  upcall_done = B_TRUE;
          }
          if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
                  ddp->dd_zid = new_zoneid;
+                 ddp->dd_transient = transient;
                  devnet_need_rebuild = B_TRUE;
          }
  
  done:
          if (err != 0 && upcall_done) {
*** 1466,1476 ****
          mac_perim_exit(mph);
          return (err);
  }
  
  int
! dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
  {
          dls_devnet_t    *ddp;
          int             err;
          zoneid_t        old_zid;
          boolean_t       refheld = B_FALSE;
--- 1583,1593 ----
          mac_perim_exit(mph);
          return (err);
  }
  
  int
! dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid, boolean_t transient)
  {
          dls_devnet_t    *ddp;
          int             err;
          zoneid_t        old_zid;
          boolean_t       refheld = B_FALSE;
*** 1488,1498 ****
                  if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
                          return (err);
                  refheld = B_TRUE;
          }
  
!         if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) {
                  if (refheld)
                          dls_devnet_rele(ddp);
                  return (err);
          }
  
--- 1605,1615 ----
                  if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
                          return (err);
                  refheld = B_TRUE;
          }
  
!         if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, transient)) != 0) {
                  if (refheld)
                          dls_devnet_rele(ddp);
                  return (err);
          }
  
*** 1505,1515 ****
  
          /* Re-create kstats in the appropriate zones. */
          if (old_zid != GLOBAL_ZONEID)
                  dls_devnet_stat_destroy(ddh, old_zid);
          if (new_zid != GLOBAL_ZONEID)
!                 dls_devnet_stat_create(ddh, new_zid);
  
          return (0);
  }
  
  zoneid_t
--- 1622,1632 ----
  
          /* Re-create kstats in the appropriate zones. */
          if (old_zid != GLOBAL_ZONEID)
                  dls_devnet_stat_destroy(ddh, old_zid);
          if (new_zid != GLOBAL_ZONEID)
!                 dls_devnet_stat_create(ddh, new_zid, new_zid);
  
          return (0);
  }
  
  zoneid_t