Print this page
Reduce lint
dlmgmt mismerge
OS-3839 dlmgmtd clobbers its cachefile with excessive use of /native (fix lx)
OS-3839 dlmgmtd clobbers its cachefile with excessive use of /native
OS-3342 dlmgmtd needs to be mindful of lock ordering
OS-2608 dlmgmtd needs to record zone identifiers
OS-3492 zone_free asserts to its destruction when dlmgmtd has fallen
OS-3494 zoneadmd tears down networking too soon when boot fails
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3007 dlmgmtd needs to work with non-native zones
        
*** 19,28 ****
--- 19,29 ----
   * CDDL HEADER END
   */
  
  /*
   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+  * Copyright 2015, Joyent Inc.
   */
  
  #include <assert.h>
  #include <ctype.h>
  #include <errno.h>
*** 41,50 ****
--- 42,53 ----
  #include <unistd.h>
  #include <wait.h>
  #include <libcontract.h>
  #include <libcontract_priv.h>
  #include <sys/contract/process.h>
+ #include <sys/vnic.h>
+ #include <zone.h>
  #include "dlmgmt_impl.h"
  
  typedef enum dlmgmt_db_op {
          DLMGMT_DB_OP_WRITE,
          DLMGMT_DB_OP_DELETE,
*** 712,730 ****
--- 715,735 ----
          int                     i, len;
          char                    *curr;
          char                    attr_name[MAXLINKATTRLEN];
          size_t                  attr_buf_len = 0;
          void                    *attr_buf = NULL;
+         boolean_t               rename;
  
          curr = buf;
          len = strlen(buf);
          attr_name[0] = '\0';
          for (i = 0; i < len; i++) {
                  char            c = buf[i];
                  boolean_t       match = (c == '=' ||
                      (c == ',' && !found_type) || c == ';');
  
+                 rename = B_FALSE;
                  /*
                   * Move to the next character if there is no match and
                   * if we have not reached the last character.
                   */
                  if (!match && i != len - 1)
*** 766,775 ****
--- 771,795 ----
                          } else if (strcmp(attr_name, "media") == 0) {
                                  if (read_int64(curr, &attr_buf) == 0)
                                          goto parse_fail;
                                  linkp->ll_media =
                                      (uint32_t)*(int64_t *)attr_buf;
+                         } else if (strcmp(attr_name, "zone") == 0) {
+                                 if (read_str(curr, &attr_buf) == 0)
+                                         goto parse_fail;
+                                 linkp->ll_zoneid = getzoneidbyname(attr_buf);
+                                 if (linkp->ll_zoneid == -1) {
+                                         if (errno == EFAULT)
+                                                 abort();
+                                         /*
+                                          * If we can't find the zone, assign the
+                                          * link to the GZ and mark it for being
+                                          * renamed.
+                                          */
+                                         linkp->ll_zoneid = 0;
+                                         rename = B_TRUE;
+                                 }
                          } else {
                                  attr_buf_len = translators[type].read_func(curr,
                                      &attr_buf);
                                  if (attr_buf_len == 0)
                                          goto parse_fail;
*** 809,818 ****
--- 829,848 ----
                          if (c != '=')
                                  goto parse_fail;
  
                          (void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr);
                  }
+ 
+                 /*
+                  * The zone that this link belongs to has died, we are
+                  * reparenting it to the GZ and renaming it to avoid name
+                  * collisions.
+                  */
+                 if (rename == B_TRUE) {
+                         (void) snprintf(linkp->ll_link, MAXLINKNAMELEN,
+                             "SUNWorphan%u", (uint16_t)(gethrtime() / 1000));
+                 }
                  curr = buf + i + 1;
          }
  
          /* Correct any erroneous IPTUN datalink class constant in the file */
          if (linkp->ll_class == 0x60) {
*** 1220,1236 ****
          dlmgmt_linkattr_t       *cur_p = NULL;
          uint64_t                u64;
  
          ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
          if (!persist) {
                  /*
!                  * We store the linkid in the active database so that dlmgmtd
!                  * can recover in the event that it is restarted.
                   */
                  u64 = linkp->ll_linkid;
                  ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
          }
          u64 = linkp->ll_class;
          ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
          u64 = linkp->ll_media;
          ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
  
--- 1250,1273 ----
          dlmgmt_linkattr_t       *cur_p = NULL;
          uint64_t                u64;
  
          ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
          if (!persist) {
+                 char zname[ZONENAME_MAX];
                  /*
!                  * We store the linkid and the zone name in the active database
!                  * so that dlmgmtd can recover in the event that it is
!                  * restarted.
                   */
                  u64 = linkp->ll_linkid;
                  ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
+ 
+                 if (getzonenamebyid(linkp->ll_zoneid, zname,
+                     sizeof (zname)) != -1) {
+                         ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname);
                  }
+         }
          u64 = linkp->ll_class;
          ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
          u64 = linkp->ll_media;
          ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
  
*** 1380,1416 ****
                          func(linkp);
          }
  }
  
  /*
   * Initialize the datalink <link name, linkid> mapping and the link's
   * attributes list based on the configuration file /etc/dladm/datalink.conf
   * and the active configuration cache file
   * /etc/svc/volatile/dladm/datalink-management:default.cache.
   */
  int
! dlmgmt_db_init(zoneid_t zoneid)
  {
          dlmgmt_db_req_t *req;
          int             err;
          boolean_t       boot = B_FALSE;
  
          if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
              DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
                  return (err);
  
          if ((err = dlmgmt_process_db_req(req)) != 0) {
                  /*
                   * If we get back ENOENT, that means that the active
!                  * configuration file doesn't exist yet, and is not an error.
!                  * We'll create it down below after we've loaded the
!                  * persistent configuration.
                   */
                  if (err != ENOENT)
                          goto done;
                  boot = B_TRUE;
          }
  
          req->ls_flags = DLMGMT_PERSIST;
          err = dlmgmt_process_db_req(req);
          if (err != 0 && err != ENOENT)
                  goto done;
--- 1417,1501 ----
                          func(linkp);
          }
  }
  
  /*
+  * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture.
+  *
+  * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to
+  * dlmgmt_zfop() which tries to fork, enter the zone and read the file.
+  * Because of the upcall architecture of dlmgmtd this can lead to deadlock
+  * with the following scenario:
+  *    a) the thread preparing to fork will have acquired the malloc locks
+  *       then attempt to suspend every thread in preparation to fork.
+  *    b) all of the upcalls will be blocked in door_ucred() trying to malloc()
+  *       and get the credentials of their caller.
+  *    c) we can't suspend the in-kernel thread making the upcall.
+  *
+  * Thus, we cannot serve door requests because we're blocked in malloc()
+  * which fork() owns, but fork() is in turn blocked on the in-kernel thread
+  * making the door upcall.  This is a fundamental architectural problem with
+  * any server handling upcalls and also trying to fork().
+  *
+  * To minimize the chance of this deadlock occuring, we check ahead of time to
+  * see if the file we want to read actually exists in the zone (which it almost
+  * never does), so we don't need fork in that case (i.e. rarely to never).
+  */
+ static boolean_t
+ zone_file_exists(char *zoneroot, char *filename)
+ {
+         struct stat     sb;
+         char            fname[MAXPATHLEN];
+ 
+         (void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename);
+ 
+         if (stat(fname, &sb) == -1)
+                 return (B_FALSE);
+ 
+         return (B_TRUE);
+ }
+ 
+ /*
   * Initialize the datalink <link name, linkid> mapping and the link's
   * attributes list based on the configuration file /etc/dladm/datalink.conf
   * and the active configuration cache file
   * /etc/svc/volatile/dladm/datalink-management:default.cache.
   */
  int
! dlmgmt_db_init(zoneid_t zoneid, char *zoneroot)
  {
          dlmgmt_db_req_t *req;
          int             err;
          boolean_t       boot = B_FALSE;
+         char            tdir[MAXPATHLEN];
+         char            *path = cachefile;
  
          if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
              DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
                  return (err);
  
+         /* Handle running in a non-native branded zone (i.e. has /native) */
+         if (zone_file_exists(zoneroot, "/native" DLMGMT_TMPFS_DIR)) {
+                 (void) snprintf(tdir, sizeof (tdir), "/native%s", cachefile);
+                 path = tdir;
+         }
+ 
+         if (zone_file_exists(zoneroot, path)) {
                  if ((err = dlmgmt_process_db_req(req)) != 0) {
                          /*
                           * If we get back ENOENT, that means that the active
!                          * configuration file doesn't exist yet, and is not an
!                          * error.  We'll create it down below after we've
!                          * loaded the persistent configuration.
                           */
                          if (err != ENOENT)
                                  goto done;
                          boot = B_TRUE;
                  }
+         } else {
+                 boot = B_TRUE;
+         }
  
          req->ls_flags = DLMGMT_PERSIST;
          err = dlmgmt_process_db_req(req);
          if (err != 0 && err != ENOENT)
                  goto done;
*** 1440,1459 ****
--- 1525,1605 ----
          return (err);
  }
  
  /*
   * Remove all links in the given zoneid.
+  *
+  * We do this work in two different passes. In the first pass, we remove any
+  * entry that hasn't been loaned and mark every entry that has been loaned as
+  * something that is going to be tombstomed. In the second pass, we drop the
+  * table lock for every entry and remove the tombstombed entry for our zone.
   */
  void
  dlmgmt_db_fini(zoneid_t zoneid)
  {
          dlmgmt_link_t *linkp = avl_first(&dlmgmt_name_avl), *next_linkp;
  
          while (linkp != NULL) {
                  next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
                  if (linkp->ll_zoneid == zoneid) {
+                         boolean_t onloan = linkp->ll_onloan;
+ 
+                         /*
+                          * Cleanup any VNICs that were loaned to the zone
+                          * before the zone goes away and we can no longer
+                          * refer to the VNIC by the name/zoneid.
+                          */
+                         if (onloan) {
+                                 (void) dlmgmt_delete_db_entry(linkp,
+                                     DLMGMT_ACTIVE);
+                                 linkp->ll_tomb = B_TRUE;
+                         } else {
                                  (void) dlmgmt_destroy_common(linkp,
                                      DLMGMT_ACTIVE | DLMGMT_PERSIST);
                          }
+                 }
                  linkp = next_linkp;
          }
+ 
+ again:
+         linkp = avl_first(&dlmgmt_name_avl);
+         while (linkp != NULL) {
+                 vnic_ioc_delete_t ioc;
+ 
+                 next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
+ 
+                 if (linkp->ll_zoneid != zoneid) {
+                         linkp = next_linkp;
+                         continue;
+                 }
+                 ioc.vd_vnic_id = linkp->ll_linkid;
+                 if (linkp->ll_tomb != B_TRUE)
+                         abort();
+ 
+                 /*
+                  * We have to drop the table lock while going up into the
+                  * kernel. If we hold the table lock while deleting a vnic, we
+                  * may get blocked on the mac perimeter and the holder of it may
+                  * want something from dlmgmtd.
+                  */
+                 dlmgmt_table_unlock();
+ 
+                 if (ioctl(dladm_dld_fd(dld_handle),
+                     VNIC_IOC_DELETE, &ioc) < 0)
+                         dlmgmt_log(LOG_WARNING, "dlmgmt_db_fini "
+                             "delete VNIC ioctl failed %d %d",
+                             ioc.vd_vnic_id, errno);
+ 
+                 /*
+                  * Even though we've dropped the lock, we know that nothing else
+                  * could have removed us. Therefore, it should be safe to go
+                  * through and delete ourselves, but do nothing else. We'll have
+                  * to restart iteration from the beginning. This can be painful.
+                  */
+                 dlmgmt_table_lock(B_TRUE);
+ 
+                 (void) dlmgmt_destroy_common(linkp,
+                     DLMGMT_ACTIVE | DLMGMT_PERSIST);
+                 goto again;
+         }
+ 
  }