Print this page
Reduce lint
dlmgmt mismerge
OS-3839 dlmgmtd clobbers its cachefile with excessive use of /native (fix lx)
OS-3839 dlmgmtd clobbers its cachefile with excessive use of /native
OS-3342 dlmgmtd needs to be mindful of lock ordering
OS-2608 dlmgmtd needs to record zone identifiers
OS-3492 zone_free asserts to its destruction when dlmgmtd has fallen
OS-3494 zoneadmd tears down networking too soon when boot fails
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3007 dlmgmtd needs to work with non-native zones
        
@@ -19,10 +19,11 @@
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent Inc.
  */
 
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
@@ -41,10 +42,12 @@
 #include <unistd.h>
 #include <wait.h>
 #include <libcontract.h>
 #include <libcontract_priv.h>
 #include <sys/contract/process.h>
+#include <sys/vnic.h>
+#include <zone.h>
 #include "dlmgmt_impl.h"
 
 typedef enum dlmgmt_db_op {
         DLMGMT_DB_OP_WRITE,
         DLMGMT_DB_OP_DELETE,
@@ -712,19 +715,21 @@
         int                     i, len;
         char                    *curr;
         char                    attr_name[MAXLINKATTRLEN];
         size_t                  attr_buf_len = 0;
         void                    *attr_buf = NULL;
+        boolean_t               rename;
 
         curr = buf;
         len = strlen(buf);
         attr_name[0] = '\0';
         for (i = 0; i < len; i++) {
                 char            c = buf[i];
                 boolean_t       match = (c == '=' ||
                     (c == ',' && !found_type) || c == ';');
 
+                rename = B_FALSE;
                 /*
                  * Move to the next character if there is no match and
                  * if we have not reached the last character.
                  */
                 if (!match && i != len - 1)
@@ -766,10 +771,25 @@
                         } else if (strcmp(attr_name, "media") == 0) {
                                 if (read_int64(curr, &attr_buf) == 0)
                                         goto parse_fail;
                                 linkp->ll_media =
                                     (uint32_t)*(int64_t *)attr_buf;
+                        } else if (strcmp(attr_name, "zone") == 0) {
+                                if (read_str(curr, &attr_buf) == 0)
+                                        goto parse_fail;
+                                linkp->ll_zoneid = getzoneidbyname(attr_buf);
+                                if (linkp->ll_zoneid == -1) {
+                                        if (errno == EFAULT)
+                                                abort();
+                                        /*
+                                         * If we can't find the zone, assign the
+                                         * link to the GZ and mark it for being
+                                         * renamed.
+                                         */
+                                        linkp->ll_zoneid = 0;
+                                        rename = B_TRUE;
+                                }
                         } else {
                                 attr_buf_len = translators[type].read_func(curr,
                                     &attr_buf);
                                 if (attr_buf_len == 0)
                                         goto parse_fail;
@@ -809,10 +829,20 @@
                         if (c != '=')
                                 goto parse_fail;
 
                         (void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr);
                 }
+
+                /*
+                 * The zone that this link belongs to has died, we are
+                 * reparenting it to the GZ and renaming it to avoid name
+                 * collisions.
+                 */
+                if (rename == B_TRUE) {
+                        (void) snprintf(linkp->ll_link, MAXLINKNAMELEN,
+                            "SUNWorphan%u", (uint16_t)(gethrtime() / 1000));
+                }
                 curr = buf + i + 1;
         }
 
         /* Correct any erroneous IPTUN datalink class constant in the file */
         if (linkp->ll_class == 0x60) {
@@ -1220,17 +1250,24 @@
         dlmgmt_linkattr_t       *cur_p = NULL;
         uint64_t                u64;
 
         ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
         if (!persist) {
+                char zname[ZONENAME_MAX];
                 /*
-                 * We store the linkid in the active database so that dlmgmtd
-                 * can recover in the event that it is restarted.
+                 * We store the linkid and the zone name in the active database
+                 * so that dlmgmtd can recover in the event that it is
+                 * restarted.
                  */
                 u64 = linkp->ll_linkid;
                 ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
+
+                if (getzonenamebyid(linkp->ll_zoneid, zname,
+                    sizeof (zname)) != -1) {
+                        ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname);
         }
+        }
         u64 = linkp->ll_class;
         ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
         u64 = linkp->ll_media;
         ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
 
@@ -1380,37 +1417,85 @@
                         func(linkp);
         }
 }
 
 /*
+ * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture.
+ *
+ * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to
+ * dlmgmt_zfop() which tries to fork, enter the zone and read the file.
+ * Because of the upcall architecture of dlmgmtd this can lead to deadlock
+ * with the following scenario:
+ *    a) the thread preparing to fork will have acquired the malloc locks
+ *       then attempt to suspend every thread in preparation to fork.
+ *    b) all of the upcalls will be blocked in door_ucred() trying to malloc()
+ *       and get the credentials of their caller.
+ *    c) we can't suspend the in-kernel thread making the upcall.
+ *
+ * Thus, we cannot serve door requests because we're blocked in malloc()
+ * which fork() owns, but fork() is in turn blocked on the in-kernel thread
+ * making the door upcall.  This is a fundamental architectural problem with
+ * any server handling upcalls and also trying to fork().
+ *
+ * To minimize the chance of this deadlock occuring, we check ahead of time to
+ * see if the file we want to read actually exists in the zone (which it almost
+ * never does), so we don't need fork in that case (i.e. rarely to never).
+ */
+static boolean_t
+zone_file_exists(char *zoneroot, char *filename)
+{
+        struct stat     sb;
+        char            fname[MAXPATHLEN];
+
+        (void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename);
+
+        if (stat(fname, &sb) == -1)
+                return (B_FALSE);
+
+        return (B_TRUE);
+}
+
+/*
  * Initialize the datalink <link name, linkid> mapping and the link's
  * attributes list based on the configuration file /etc/dladm/datalink.conf
  * and the active configuration cache file
  * /etc/svc/volatile/dladm/datalink-management:default.cache.
  */
 int
-dlmgmt_db_init(zoneid_t zoneid)
+dlmgmt_db_init(zoneid_t zoneid, char *zoneroot)
 {
         dlmgmt_db_req_t *req;
         int             err;
         boolean_t       boot = B_FALSE;
+        char            tdir[MAXPATHLEN];
+        char            *path = cachefile;
 
         if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
             DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
                 return (err);
 
+        /* Handle running in a non-native branded zone (i.e. has /native) */
+        if (zone_file_exists(zoneroot, "/native" DLMGMT_TMPFS_DIR)) {
+                (void) snprintf(tdir, sizeof (tdir), "/native%s", cachefile);
+                path = tdir;
+        }
+
+        if (zone_file_exists(zoneroot, path)) {
         if ((err = dlmgmt_process_db_req(req)) != 0) {
                 /*
                  * If we get back ENOENT, that means that the active
-                 * configuration file doesn't exist yet, and is not an error.
-                 * We'll create it down below after we've loaded the
-                 * persistent configuration.
+                         * configuration file doesn't exist yet, and is not an
+                         * error.  We'll create it down below after we've
+                         * loaded the persistent configuration.
                  */
                 if (err != ENOENT)
                         goto done;
                 boot = B_TRUE;
         }
+        } else {
+                boot = B_TRUE;
+        }
 
         req->ls_flags = DLMGMT_PERSIST;
         err = dlmgmt_process_db_req(req);
         if (err != 0 && err != ENOENT)
                 goto done;
@@ -1440,20 +1525,81 @@
         return (err);
 }
 
 /*
  * Remove all links in the given zoneid.
+ *
+ * We do this work in two different passes. In the first pass, we remove any
+ * entry that hasn't been loaned and mark every entry that has been loaned as
+ * something that is going to be tombstomed. In the second pass, we drop the
+ * table lock for every entry and remove the tombstombed entry for our zone.
  */
 void
 dlmgmt_db_fini(zoneid_t zoneid)
 {
         dlmgmt_link_t *linkp = avl_first(&dlmgmt_name_avl), *next_linkp;
 
         while (linkp != NULL) {
                 next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
                 if (linkp->ll_zoneid == zoneid) {
+                        boolean_t onloan = linkp->ll_onloan;
+
+                        /*
+                         * Cleanup any VNICs that were loaned to the zone
+                         * before the zone goes away and we can no longer
+                         * refer to the VNIC by the name/zoneid.
+                         */
+                        if (onloan) {
+                                (void) dlmgmt_delete_db_entry(linkp,
+                                    DLMGMT_ACTIVE);
+                                linkp->ll_tomb = B_TRUE;
+                        } else {
                         (void) dlmgmt_destroy_common(linkp,
                             DLMGMT_ACTIVE | DLMGMT_PERSIST);
                 }
+                }
                 linkp = next_linkp;
         }
+
+again:
+        linkp = avl_first(&dlmgmt_name_avl);
+        while (linkp != NULL) {
+                vnic_ioc_delete_t ioc;
+
+                next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
+
+                if (linkp->ll_zoneid != zoneid) {
+                        linkp = next_linkp;
+                        continue;
+                }
+                ioc.vd_vnic_id = linkp->ll_linkid;
+                if (linkp->ll_tomb != B_TRUE)
+                        abort();
+
+                /*
+                 * We have to drop the table lock while going up into the
+                 * kernel. If we hold the table lock while deleting a vnic, we
+                 * may get blocked on the mac perimeter and the holder of it may
+                 * want something from dlmgmtd.
+                 */
+                dlmgmt_table_unlock();
+
+                if (ioctl(dladm_dld_fd(dld_handle),
+                    VNIC_IOC_DELETE, &ioc) < 0)
+                        dlmgmt_log(LOG_WARNING, "dlmgmt_db_fini "
+                            "delete VNIC ioctl failed %d %d",
+                            ioc.vd_vnic_id, errno);
+
+                /*
+                 * Even though we've dropped the lock, we know that nothing else
+                 * could have removed us. Therefore, it should be safe to go
+                 * through and delete ourselves, but do nothing else. We'll have
+                 * to restart iteration from the beginning. This can be painful.
+                 */
+                dlmgmt_table_lock(B_TRUE);
+
+                (void) dlmgmt_destroy_common(linkp,
+                    DLMGMT_ACTIVE | DLMGMT_PERSIST);
+                goto again;
+        }
+
 }