Print this page
Fixes to allow compilation on OmniOS and OI
OS-3342+co
OS-3007 dlmgmtd needs to work with non-native zones
OS-375 i_dls_mgmt_upcall()/dlmgmt_zfop() deadlock in dlmgmtd
OS-383 dladm rename-link doesn't update /etc/svc/volatile/dladm/network-datalink-management:default.cache
OS-249
*** 19,28 ****
--- 19,29 ----
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014, Joyent Inc. All rights reserved.
*/
#include <assert.h>
#include <ctype.h>
#include <errno.h>
*** 41,50 ****
--- 42,53 ----
#include <unistd.h>
#include <wait.h>
#include <libcontract.h>
#include <libcontract_priv.h>
#include <sys/contract/process.h>
+ #include <sys/vnic.h>
+ #include <zone.h>
#include "dlmgmt_impl.h"
typedef enum dlmgmt_db_op {
DLMGMT_DB_OP_WRITE,
DLMGMT_DB_OP_DELETE,
*** 550,559 ****
--- 553,566 ----
if ((req = dlmgmt_db_req_alloc(op, entryname, linkp->ll_linkid,
linkp->ll_zoneid, flags, &err)) == NULL)
return (err);
+ /* If transient op and onloan, use the global zone cache file. */
+ if (flags == DLMGMT_ACTIVE && linkp->ll_onloan)
+ req->ls_zoneid = GLOBAL_ZONEID;
+
/*
* If the return error is EINPROGRESS, this request is handled
* asynchronously; return success.
*/
err = dlmgmt_process_db_req(req);
*** 712,730 ****
--- 719,740 ----
int i, len;
char *curr;
char attr_name[MAXLINKATTRLEN];
size_t attr_buf_len = 0;
void *attr_buf = NULL;
+ boolean_t rename;
curr = buf;
len = strlen(buf);
attr_name[0] = '\0';
for (i = 0; i < len; i++) {
char c = buf[i];
boolean_t match = (c == '=' ||
(c == ',' && !found_type) || c == ';');
+ rename = B_FALSE;
+
/*
* Move to the next character if there is no match and
* if we have not reached the last character.
*/
if (!match && i != len - 1)
*** 766,775 ****
--- 776,800 ----
} else if (strcmp(attr_name, "media") == 0) {
if (read_int64(curr, &attr_buf) == 0)
goto parse_fail;
linkp->ll_media =
(uint32_t)*(int64_t *)attr_buf;
+ } else if (strcmp(attr_name, "zone") == 0) {
+ if (read_str(curr, &attr_buf) == 0)
+ goto parse_fail;
+ linkp->ll_zoneid = getzoneidbyname(attr_buf);
+ if (linkp->ll_zoneid == -1) {
+ if (errno == EFAULT)
+ abort();
+ /*
+ * If we can't find the zone, assign the
+ * link to the GZ and mark it for being
+ * renamed.
+ */
+ linkp->ll_zoneid = 0;
+ rename = B_TRUE;
+ }
} else {
attr_buf_len = translators[type].read_func(curr,
&attr_buf);
if (attr_buf_len == 0)
goto parse_fail;
*** 809,818 ****
--- 834,853 ----
if (c != '=')
goto parse_fail;
(void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr);
}
+
+ /*
+ * The zone that this link belongs to has died, we are
+ * reparenting it to the GZ and renaming it to avoid name
+ * collisions.
+ */
+ if (rename == B_TRUE) {
+ (void) snprintf(linkp->ll_link, MAXLINKNAMELEN,
+ "SUNWorphan%u", (uint16_t)(gethrtime() / 1000));
+ }
curr = buf + i + 1;
}
/* Correct any erroneous IPTUN datalink class constant in the file */
if (linkp->ll_class == 0x60) {
*** 1220,1236 ****
dlmgmt_linkattr_t *cur_p = NULL;
uint64_t u64;
ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
if (!persist) {
/*
! * We store the linkid in the active database so that dlmgmtd
! * can recover in the event that it is restarted.
*/
u64 = linkp->ll_linkid;
ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
}
u64 = linkp->ll_class;
ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
u64 = linkp->ll_media;
ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
--- 1255,1278 ----
dlmgmt_linkattr_t *cur_p = NULL;
uint64_t u64;
ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
if (!persist) {
+ char zname[ZONENAME_MAX];
/*
! * We store the linkid and the zone name in the active database
! * so that dlmgmtd can recover in the event that it is
! * restarted.
*/
u64 = linkp->ll_linkid;
ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
+
+ if (getzonenamebyid(linkp->ll_zoneid, zname,
+ sizeof (zname)) != -1) {
+ ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname);
}
+ }
u64 = linkp->ll_class;
ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
u64 = linkp->ll_media;
ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
*** 1380,1421 ****
func(linkp);
}
}
/*
* Initialize the datalink <link name, linkid> mapping and the link's
* attributes list based on the configuration file /etc/dladm/datalink.conf
* and the active configuration cache file
* /etc/svc/volatile/dladm/datalink-management:default.cache.
*/
int
! dlmgmt_db_init(zoneid_t zoneid)
{
dlmgmt_db_req_t *req;
int err;
boolean_t boot = B_FALSE;
if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
return (err);
if ((err = dlmgmt_process_db_req(req)) != 0) {
/*
* If we get back ENOENT, that means that the active
! * configuration file doesn't exist yet, and is not an error.
! * We'll create it down below after we've loaded the
! * persistent configuration.
*/
if (err != ENOENT)
goto done;
boot = B_TRUE;
}
req->ls_flags = DLMGMT_PERSIST;
err = dlmgmt_process_db_req(req);
if (err != 0 && err != ENOENT)
goto done;
err = 0;
if (rewrite_needed) {
/*
* First update links in memory, then dump the entire db to
* disk.
--- 1422,1513 ----
func(linkp);
}
}
/*
+ * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture.
+ *
+ * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to
+ * dlmgmt_zfop() which tries to fork, enter the zone and read the file.
+ * Because of the upcall architecture of dlmgmtd this can lead to deadlock
+ * with the following scenario:
+ * a) the thread preparing to fork will have acquired the malloc locks
+ * then attempt to suspend every thread in preparation to fork.
+ * b) all of the upcalls will be blocked in door_ucred() trying to malloc()
+ * and get the credentials of their caller.
+ * c) we can't suspend the in-kernel thread making the upcall.
+ *
+ * Thus, we cannot serve door requests because we're blocked in malloc()
+ * which fork() owns, but fork() is in turn blocked on the in-kernel thread
+ * making the door upcall. This is a fundamental architectural problem with
+ * any server handling upcalls and also trying to fork().
+ *
+ * To minimize the chance of this deadlock occuring, we check ahead of time to
+ * see if the file we want to read actually exists in the zone (which it almost
+ * never does), so we don't need fork in that case (i.e. rarely to never).
+ */
+ static boolean_t
+ zone_file_exists(char *zoneroot, char *filename)
+ {
+ struct stat sb;
+ char fname[MAXPATHLEN];
+
+ (void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename);
+
+ if (stat(fname, &sb) == -1)
+ return (B_FALSE);
+
+ return (B_TRUE);
+ }
+
+ /*
* Initialize the datalink <link name, linkid> mapping and the link's
* attributes list based on the configuration file /etc/dladm/datalink.conf
* and the active configuration cache file
* /etc/svc/volatile/dladm/datalink-management:default.cache.
*/
int
! dlmgmt_db_init(zoneid_t zoneid, char *zoneroot)
{
dlmgmt_db_req_t *req;
int err;
boolean_t boot = B_FALSE;
if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
return (err);
+ /* Handle running in a non-native branded zone (i.e. has /native) */
+ if (zone_file_exists(zoneroot, "/native" DLMGMT_TMPFS_DIR)) {
+ char tdir[MAXPATHLEN];
+
+ (void) snprintf(tdir, sizeof (tdir), "/native%s", cachefile);
+ (void) strlcpy(cachefile, tdir, sizeof (cachefile));
+ }
+
+ if (zone_file_exists(zoneroot, cachefile)) {
if ((err = dlmgmt_process_db_req(req)) != 0) {
/*
* If we get back ENOENT, that means that the active
! * configuration file doesn't exist yet, and is not an
! * error. We'll create it down below after we've
! * loaded the persistent configuration.
*/
if (err != ENOENT)
goto done;
boot = B_TRUE;
}
+ } else {
+ boot = B_TRUE;
+ }
+ if (zone_file_exists(zoneroot, DLMGMT_PERSISTENT_DB_PATH)) {
req->ls_flags = DLMGMT_PERSIST;
err = dlmgmt_process_db_req(req);
if (err != 0 && err != ENOENT)
goto done;
+ }
err = 0;
if (rewrite_needed) {
/*
* First update links in memory, then dump the entire db to
* disk.
*** 1440,1459 ****
--- 1532,1613 ----
return (err);
}
/*
* Remove all links in the given zoneid.
+ *
+ * We do this work in two different passes. In the first pass, we remove any
+ * entry that hasn't been loaned and mark every entry that has been loaned as
+ * something that is going to be tombstomed. In the second pass, we drop the
+ * table lock for every entry and remove the tombstombed entry for our zone.
*/
void
dlmgmt_db_fini(zoneid_t zoneid)
{
dlmgmt_link_t *linkp = avl_first(&dlmgmt_name_avl), *next_linkp;
while (linkp != NULL) {
next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
if (linkp->ll_zoneid == zoneid) {
+ boolean_t onloan = linkp->ll_onloan;
+
+ /*
+ * Cleanup any VNICs that were loaned to the zone
+ * before the zone goes away and we can no longer
+ * refer to the VNIC by the name/zoneid.
+ */
+ if (onloan) {
+ (void) dlmgmt_delete_db_entry(linkp,
+ DLMGMT_ACTIVE);
+ linkp->ll_tomb = B_TRUE;
+ } else {
(void) dlmgmt_destroy_common(linkp,
DLMGMT_ACTIVE | DLMGMT_PERSIST);
}
+
+ }
linkp = next_linkp;
}
+
+ again:
+ linkp = avl_first(&dlmgmt_name_avl);
+ while (linkp != NULL) {
+ vnic_ioc_delete_t ioc;
+
+ next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
+
+ if (linkp->ll_zoneid != zoneid) {
+ linkp = next_linkp;
+ continue;
+ }
+ ioc.vd_vnic_id = linkp->ll_linkid;
+ if (linkp->ll_tomb != B_TRUE)
+ abort();
+
+ /*
+ * We have to drop the table lock while going up into the
+ * kernel. If we hold the table lock while deleting a vnic, we
+ * may get blocked on the mac perimeter and the holder of it may
+ * want something from dlmgmtd.
+ */
+ dlmgmt_table_unlock();
+
+ if (ioctl(dladm_dld_fd(dld_handle),
+ VNIC_IOC_DELETE, &ioc) < 0)
+ dlmgmt_log(LOG_WARNING, "dlmgmt_db_fini "
+ "delete VNIC ioctl failed %d %d",
+ ioc.vd_vnic_id, errno);
+
+ /*
+ * Even though we've dropped the lock, we know that nothing else
+ * could have removed us. Therefore, it should be safe to go
+ * through and delete ourselves, but do nothing else. We'll have
+ * to restart iteration from the beginning. This can be painful.
+ */
+ dlmgmt_table_lock(B_TRUE);
+
+ (void) dlmgmt_destroy_common(linkp,
+ DLMGMT_ACTIVE | DLMGMT_PERSIST);
+ goto again;
+ }
+
}