Print this page
Fixes to allow compilation on OmniOS and OI
OS-3342+co
OS-3007 dlmgmtd needs to work with non-native zones
OS-375 i_dls_mgmt_upcall()/dlmgmt_zfop() deadlock in dlmgmtd
OS-383 dladm rename-link doesn't update /etc/svc/volatile/dladm/network-datalink-management:default.cache
OS-249

*** 19,28 **** --- 19,29 ---- * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2014, Joyent Inc. All rights reserved. */ #include <assert.h> #include <ctype.h> #include <errno.h>
*** 41,50 **** --- 42,53 ---- #include <unistd.h> #include <wait.h> #include <libcontract.h> #include <libcontract_priv.h> #include <sys/contract/process.h> + #include <sys/vnic.h> + #include <zone.h> #include "dlmgmt_impl.h" typedef enum dlmgmt_db_op { DLMGMT_DB_OP_WRITE, DLMGMT_DB_OP_DELETE,
*** 550,559 **** --- 553,566 ---- if ((req = dlmgmt_db_req_alloc(op, entryname, linkp->ll_linkid, linkp->ll_zoneid, flags, &err)) == NULL) return (err); + /* If transient op and onloan, use the global zone cache file. */ + if (flags == DLMGMT_ACTIVE && linkp->ll_onloan) + req->ls_zoneid = GLOBAL_ZONEID; + /* * If the return error is EINPROGRESS, this request is handled * asynchronously; return success. */ err = dlmgmt_process_db_req(req);
*** 712,730 **** --- 719,740 ---- int i, len; char *curr; char attr_name[MAXLINKATTRLEN]; size_t attr_buf_len = 0; void *attr_buf = NULL; + boolean_t rename; curr = buf; len = strlen(buf); attr_name[0] = '\0'; for (i = 0; i < len; i++) { char c = buf[i]; boolean_t match = (c == '=' || (c == ',' && !found_type) || c == ';'); + rename = B_FALSE; + /* * Move to the next character if there is no match and * if we have not reached the last character. */ if (!match && i != len - 1)
*** 766,775 **** --- 776,800 ---- } else if (strcmp(attr_name, "media") == 0) { if (read_int64(curr, &attr_buf) == 0) goto parse_fail; linkp->ll_media = (uint32_t)*(int64_t *)attr_buf; + } else if (strcmp(attr_name, "zone") == 0) { + if (read_str(curr, &attr_buf) == 0) + goto parse_fail; + linkp->ll_zoneid = getzoneidbyname(attr_buf); + if (linkp->ll_zoneid == -1) { + if (errno == EFAULT) + abort(); + /* + * If we can't find the zone, assign the + * link to the GZ and mark it for being + * renamed. + */ + linkp->ll_zoneid = 0; + rename = B_TRUE; + } } else { attr_buf_len = translators[type].read_func(curr, &attr_buf); if (attr_buf_len == 0) goto parse_fail;
*** 809,818 **** --- 834,853 ---- if (c != '=') goto parse_fail; (void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr); } + + /* + * The zone that this link belongs to has died, we are + * reparenting it to the GZ and renaming it to avoid name + * collisions. + */ + if (rename == B_TRUE) { + (void) snprintf(linkp->ll_link, MAXLINKNAMELEN, + "SUNWorphan%u", (uint16_t)(gethrtime() / 1000)); + } curr = buf + i + 1; } /* Correct any erroneous IPTUN datalink class constant in the file */ if (linkp->ll_class == 0x60) {
*** 1220,1236 **** dlmgmt_linkattr_t *cur_p = NULL; uint64_t u64; ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link); if (!persist) { /* ! * We store the linkid in the active database so that dlmgmtd ! * can recover in the event that it is restarted. */ u64 = linkp->ll_linkid; ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64); } u64 = linkp->ll_class; ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64); u64 = linkp->ll_media; ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64); --- 1255,1278 ---- dlmgmt_linkattr_t *cur_p = NULL; uint64_t u64; ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link); if (!persist) { + char zname[ZONENAME_MAX]; /* ! * We store the linkid and the zone name in the active database ! * so that dlmgmtd can recover in the event that it is ! * restarted. */ u64 = linkp->ll_linkid; ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64); + + if (getzonenamebyid(linkp->ll_zoneid, zname, + sizeof (zname)) != -1) { + ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname); } + } u64 = linkp->ll_class; ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64); u64 = linkp->ll_media; ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
*** 1380,1421 **** func(linkp); } } /* * Initialize the datalink <link name, linkid> mapping and the link's * attributes list based on the configuration file /etc/dladm/datalink.conf * and the active configuration cache file * /etc/svc/volatile/dladm/datalink-management:default.cache. */ int ! dlmgmt_db_init(zoneid_t zoneid) { dlmgmt_db_req_t *req; int err; boolean_t boot = B_FALSE; if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL, DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL) return (err); if ((err = dlmgmt_process_db_req(req)) != 0) { /* * If we get back ENOENT, that means that the active ! * configuration file doesn't exist yet, and is not an error. ! * We'll create it down below after we've loaded the ! * persistent configuration. */ if (err != ENOENT) goto done; boot = B_TRUE; } req->ls_flags = DLMGMT_PERSIST; err = dlmgmt_process_db_req(req); if (err != 0 && err != ENOENT) goto done; err = 0; if (rewrite_needed) { /* * First update links in memory, then dump the entire db to * disk. --- 1422,1513 ---- func(linkp); } } /* + * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture. + * + * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to + * dlmgmt_zfop() which tries to fork, enter the zone and read the file. + * Because of the upcall architecture of dlmgmtd this can lead to deadlock + * with the following scenario: + * a) the thread preparing to fork will have acquired the malloc locks + * then attempt to suspend every thread in preparation to fork. + * b) all of the upcalls will be blocked in door_ucred() trying to malloc() + * and get the credentials of their caller. + * c) we can't suspend the in-kernel thread making the upcall. + * + * Thus, we cannot serve door requests because we're blocked in malloc() + * which fork() owns, but fork() is in turn blocked on the in-kernel thread + * making the door upcall. This is a fundamental architectural problem with + * any server handling upcalls and also trying to fork(). + * + * To minimize the chance of this deadlock occuring, we check ahead of time to + * see if the file we want to read actually exists in the zone (which it almost + * never does), so we don't need fork in that case (i.e. rarely to never). + */ + static boolean_t + zone_file_exists(char *zoneroot, char *filename) + { + struct stat sb; + char fname[MAXPATHLEN]; + + (void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename); + + if (stat(fname, &sb) == -1) + return (B_FALSE); + + return (B_TRUE); + } + + /* * Initialize the datalink <link name, linkid> mapping and the link's * attributes list based on the configuration file /etc/dladm/datalink.conf * and the active configuration cache file * /etc/svc/volatile/dladm/datalink-management:default.cache. */ int ! dlmgmt_db_init(zoneid_t zoneid, char *zoneroot) { dlmgmt_db_req_t *req; int err; boolean_t boot = B_FALSE; if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL, DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL) return (err); + /* Handle running in a non-native branded zone (i.e. has /native) */ + if (zone_file_exists(zoneroot, "/native" DLMGMT_TMPFS_DIR)) { + char tdir[MAXPATHLEN]; + + (void) snprintf(tdir, sizeof (tdir), "/native%s", cachefile); + (void) strlcpy(cachefile, tdir, sizeof (cachefile)); + } + + if (zone_file_exists(zoneroot, cachefile)) { if ((err = dlmgmt_process_db_req(req)) != 0) { /* * If we get back ENOENT, that means that the active ! * configuration file doesn't exist yet, and is not an ! * error. We'll create it down below after we've ! * loaded the persistent configuration. */ if (err != ENOENT) goto done; boot = B_TRUE; } + } else { + boot = B_TRUE; + } + if (zone_file_exists(zoneroot, DLMGMT_PERSISTENT_DB_PATH)) { req->ls_flags = DLMGMT_PERSIST; err = dlmgmt_process_db_req(req); if (err != 0 && err != ENOENT) goto done; + } err = 0; if (rewrite_needed) { /* * First update links in memory, then dump the entire db to * disk.
*** 1440,1459 **** --- 1532,1613 ---- return (err); } /* * Remove all links in the given zoneid. + * + * We do this work in two different passes. In the first pass, we remove any + * entry that hasn't been loaned and mark every entry that has been loaned as + * something that is going to be tombstomed. In the second pass, we drop the + * table lock for every entry and remove the tombstombed entry for our zone. */ void dlmgmt_db_fini(zoneid_t zoneid) { dlmgmt_link_t *linkp = avl_first(&dlmgmt_name_avl), *next_linkp; while (linkp != NULL) { next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp); if (linkp->ll_zoneid == zoneid) { + boolean_t onloan = linkp->ll_onloan; + + /* + * Cleanup any VNICs that were loaned to the zone + * before the zone goes away and we can no longer + * refer to the VNIC by the name/zoneid. + */ + if (onloan) { + (void) dlmgmt_delete_db_entry(linkp, + DLMGMT_ACTIVE); + linkp->ll_tomb = B_TRUE; + } else { (void) dlmgmt_destroy_common(linkp, DLMGMT_ACTIVE | DLMGMT_PERSIST); } + + } linkp = next_linkp; } + + again: + linkp = avl_first(&dlmgmt_name_avl); + while (linkp != NULL) { + vnic_ioc_delete_t ioc; + + next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp); + + if (linkp->ll_zoneid != zoneid) { + linkp = next_linkp; + continue; + } + ioc.vd_vnic_id = linkp->ll_linkid; + if (linkp->ll_tomb != B_TRUE) + abort(); + + /* + * We have to drop the table lock while going up into the + * kernel. If we hold the table lock while deleting a vnic, we + * may get blocked on the mac perimeter and the holder of it may + * want something from dlmgmtd. + */ + dlmgmt_table_unlock(); + + if (ioctl(dladm_dld_fd(dld_handle), + VNIC_IOC_DELETE, &ioc) < 0) + dlmgmt_log(LOG_WARNING, "dlmgmt_db_fini " + "delete VNIC ioctl failed %d %d", + ioc.vd_vnic_id, errno); + + /* + * Even though we've dropped the lock, we know that nothing else + * could have removed us. Therefore, it should be safe to go + * through and delete ourselves, but do nothing else. We'll have + * to restart iteration from the beginning. This can be painful. + */ + dlmgmt_table_lock(B_TRUE); + + (void) dlmgmt_destroy_common(linkp, + DLMGMT_ACTIVE | DLMGMT_PERSIST); + goto again; + } + }