Print this page
Reduce lint
dlmgmt mismerge
OS-3839 dlmgmtd clobbers its cachefile with excessive use of /native (fix lx)
OS-3839 dlmgmtd clobbers its cachefile with excessive use of /native
OS-3342 dlmgmtd needs to be mindful of lock ordering
OS-2608 dlmgmtd needs to record zone identifiers
OS-3492 zone_free asserts to its destruction when dlmgmtd has fallen
OS-3494 zoneadmd tears down networking too soon when boot fails
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3007 dlmgmtd needs to work with non-native zones

Split Close
Expand all
Collapse all
          --- old/usr/src/cmd/dlmgmtd/dlmgmt_db.c
          +++ new/usr/src/cmd/dlmgmtd/dlmgmt_db.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
       24 + * Copyright 2015, Joyent Inc.
  24   25   */
  25   26  
  26   27  #include <assert.h>
  27   28  #include <ctype.h>
  28   29  #include <errno.h>
  29   30  #include <fcntl.h>
  30   31  #include <stdio.h>
  31   32  #include <stdlib.h>
  32   33  #include <string.h>
  33   34  #include <strings.h>
↓ open down ↓ 2 lines elided ↑ open up ↑
  36   37  #include <sys/types.h>
  37   38  #include <sys/stat.h>
  38   39  #include <stropts.h>
  39   40  #include <sys/conf.h>
  40   41  #include <pthread.h>
  41   42  #include <unistd.h>
  42   43  #include <wait.h>
  43   44  #include <libcontract.h>
  44   45  #include <libcontract_priv.h>
  45   46  #include <sys/contract/process.h>
       47 +#include <sys/vnic.h>
       48 +#include <zone.h>
  46   49  #include "dlmgmt_impl.h"
  47   50  
  48   51  typedef enum dlmgmt_db_op {
  49   52          DLMGMT_DB_OP_WRITE,
  50   53          DLMGMT_DB_OP_DELETE,
  51   54          DLMGMT_DB_OP_READ
  52   55  } dlmgmt_db_op_t;
  53   56  
  54   57  typedef struct dlmgmt_db_req_s {
  55   58          struct dlmgmt_db_req_s  *ls_next;
↓ open down ↓ 651 lines elided ↑ open up ↑
 707  710  static int
 708  711  parse_linkprops(char *buf, dlmgmt_link_t *linkp)
 709  712  {
 710  713          boolean_t               found_type = B_FALSE;
 711  714          dladm_datatype_t        type = DLADM_TYPE_STR;
 712  715          int                     i, len;
 713  716          char                    *curr;
 714  717          char                    attr_name[MAXLINKATTRLEN];
 715  718          size_t                  attr_buf_len = 0;
 716  719          void                    *attr_buf = NULL;
      720 +        boolean_t               rename;
 717  721  
 718  722          curr = buf;
 719  723          len = strlen(buf);
 720  724          attr_name[0] = '\0';
 721  725          for (i = 0; i < len; i++) {
 722  726                  char            c = buf[i];
 723  727                  boolean_t       match = (c == '=' ||
 724  728                      (c == ',' && !found_type) || c == ';');
 725  729  
      730 +                rename = B_FALSE;
 726  731                  /*
 727  732                   * Move to the next character if there is no match and
 728  733                   * if we have not reached the last character.
 729  734                   */
 730  735                  if (!match && i != len - 1)
 731  736                          continue;
 732  737  
 733  738                  if (match) {
 734  739                          /*
 735  740                           * NUL-terminate the string pointed to by 'curr'.
↓ open down ↓ 25 lines elided ↑ open up ↑
 761  766                          } else if (strcmp(attr_name, "class") == 0) {
 762  767                                  if (read_int64(curr, &attr_buf) == 0)
 763  768                                          goto parse_fail;
 764  769                                  linkp->ll_class =
 765  770                                      (datalink_class_t)*(int64_t *)attr_buf;
 766  771                          } else if (strcmp(attr_name, "media") == 0) {
 767  772                                  if (read_int64(curr, &attr_buf) == 0)
 768  773                                          goto parse_fail;
 769  774                                  linkp->ll_media =
 770  775                                      (uint32_t)*(int64_t *)attr_buf;
      776 +                        } else if (strcmp(attr_name, "zone") == 0) {
      777 +                                if (read_str(curr, &attr_buf) == 0)
      778 +                                        goto parse_fail;
      779 +                                linkp->ll_zoneid = getzoneidbyname(attr_buf);
      780 +                                if (linkp->ll_zoneid == -1) {
      781 +                                        if (errno == EFAULT)
      782 +                                                abort();
      783 +                                        /*
      784 +                                         * If we can't find the zone, assign the
      785 +                                         * link to the GZ and mark it for being
      786 +                                         * renamed.
      787 +                                         */
      788 +                                        linkp->ll_zoneid = 0;
      789 +                                        rename = B_TRUE;
      790 +                                }
 771  791                          } else {
 772  792                                  attr_buf_len = translators[type].read_func(curr,
 773  793                                      &attr_buf);
 774  794                                  if (attr_buf_len == 0)
 775  795                                          goto parse_fail;
 776  796  
 777  797                                  if (linkattr_set(&(linkp->ll_head), attr_name,
 778  798                                      attr_buf, attr_buf_len, type) != 0) {
 779  799                                          free(attr_buf);
 780  800                                          goto parse_fail;
↓ open down ↓ 23 lines elided ↑ open up ↑
 804  824                  } else {
 805  825                          /*
 806  826                           * A zero length attr_name indicates we are looking
 807  827                           * at the beginning of a link attribute.
 808  828                           */
 809  829                          if (c != '=')
 810  830                                  goto parse_fail;
 811  831  
 812  832                          (void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr);
 813  833                  }
      834 +
      835 +                /*
      836 +                 * The zone that this link belongs to has died, we are
      837 +                 * reparenting it to the GZ and renaming it to avoid name
      838 +                 * collisions.
      839 +                 */
      840 +                if (rename == B_TRUE) {
      841 +                        (void) snprintf(linkp->ll_link, MAXLINKNAMELEN,
      842 +                            "SUNWorphan%u", (uint16_t)(gethrtime() / 1000));
      843 +                }
 814  844                  curr = buf + i + 1;
 815  845          }
 816  846  
 817  847          /* Correct any erroneous IPTUN datalink class constant in the file */
 818  848          if (linkp->ll_class == 0x60) {
 819  849                  linkp->ll_class = DATALINK_CLASS_IPTUN;
 820  850                  rewrite_needed = B_TRUE;
 821  851          }
 822  852  
 823  853          return (0);
↓ open down ↓ 391 lines elided ↑ open up ↑
1215 1245  generate_link_line(dlmgmt_link_t *linkp, boolean_t persist, char *buf)
1216 1246  {
1217 1247          char                    tmpbuf[MAXLINELEN];
1218 1248          char                    *ptr = tmpbuf;
1219 1249          char                    *lim = tmpbuf + MAXLINELEN;
1220 1250          dlmgmt_linkattr_t       *cur_p = NULL;
1221 1251          uint64_t                u64;
1222 1252  
1223 1253          ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
1224 1254          if (!persist) {
     1255 +                char zname[ZONENAME_MAX];
1225 1256                  /*
1226      -                 * We store the linkid in the active database so that dlmgmtd
1227      -                 * can recover in the event that it is restarted.
     1257 +                 * We store the linkid and the zone name in the active database
     1258 +                 * so that dlmgmtd can recover in the event that it is
     1259 +                 * restarted.
1228 1260                   */
1229 1261                  u64 = linkp->ll_linkid;
1230 1262                  ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
     1263 +
     1264 +                if (getzonenamebyid(linkp->ll_zoneid, zname,
     1265 +                    sizeof (zname)) != -1) {
     1266 +                        ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname);
     1267 +                }
1231 1268          }
1232 1269          u64 = linkp->ll_class;
1233 1270          ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
1234 1271          u64 = linkp->ll_media;
1235 1272          ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
1236 1273  
1237 1274          /*
1238 1275           * The daemon does not keep any active link attribute. Only store the
1239 1276           * attributes if this request is for persistent configuration,
1240 1277           */
↓ open down ↓ 134 lines elided ↑ open up ↑
1375 1412          dlmgmt_link_t *linkp;
1376 1413  
1377 1414          for (linkp = avl_first(&dlmgmt_id_avl); linkp != NULL;
1378 1415              linkp = AVL_NEXT(&dlmgmt_id_avl, linkp)) {
1379 1416                  if (linkp->ll_zoneid == zoneid && (linkp->ll_class & class))
1380 1417                          func(linkp);
1381 1418          }
1382 1419  }
1383 1420  
1384 1421  /*
     1422 + * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture.
     1423 + *
     1424 + * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to
     1425 + * dlmgmt_zfop() which tries to fork, enter the zone and read the file.
     1426 + * Because of the upcall architecture of dlmgmtd this can lead to deadlock
     1427 + * with the following scenario:
     1428 + *    a) the thread preparing to fork will have acquired the malloc locks
     1429 + *       then attempt to suspend every thread in preparation to fork.
     1430 + *    b) all of the upcalls will be blocked in door_ucred() trying to malloc()
     1431 + *       and get the credentials of their caller.
     1432 + *    c) we can't suspend the in-kernel thread making the upcall.
     1433 + *
     1434 + * Thus, we cannot serve door requests because we're blocked in malloc()
     1435 + * which fork() owns, but fork() is in turn blocked on the in-kernel thread
     1436 + * making the door upcall.  This is a fundamental architectural problem with
     1437 + * any server handling upcalls and also trying to fork().
     1438 + *
     1439 + * To minimize the chance of this deadlock occuring, we check ahead of time to
     1440 + * see if the file we want to read actually exists in the zone (which it almost
     1441 + * never does), so we don't need fork in that case (i.e. rarely to never).
     1442 + */
     1443 +static boolean_t
     1444 +zone_file_exists(char *zoneroot, char *filename)
     1445 +{
     1446 +        struct stat     sb;
     1447 +        char            fname[MAXPATHLEN];
     1448 +
     1449 +        (void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename);
     1450 +
     1451 +        if (stat(fname, &sb) == -1)
     1452 +                return (B_FALSE);
     1453 +
     1454 +        return (B_TRUE);
     1455 +}
     1456 +
     1457 +/*
1385 1458   * Initialize the datalink <link name, linkid> mapping and the link's
1386 1459   * attributes list based on the configuration file /etc/dladm/datalink.conf
1387 1460   * and the active configuration cache file
1388 1461   * /etc/svc/volatile/dladm/datalink-management:default.cache.
1389 1462   */
1390 1463  int
1391      -dlmgmt_db_init(zoneid_t zoneid)
     1464 +dlmgmt_db_init(zoneid_t zoneid, char *zoneroot)
1392 1465  {
1393 1466          dlmgmt_db_req_t *req;
1394 1467          int             err;
1395 1468          boolean_t       boot = B_FALSE;
     1469 +        char            tdir[MAXPATHLEN];
     1470 +        char            *path = cachefile;
1396 1471  
1397 1472          if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
1398 1473              DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
1399 1474                  return (err);
1400 1475  
1401      -        if ((err = dlmgmt_process_db_req(req)) != 0) {
1402      -                /*
1403      -                 * If we get back ENOENT, that means that the active
1404      -                 * configuration file doesn't exist yet, and is not an error.
1405      -                 * We'll create it down below after we've loaded the
1406      -                 * persistent configuration.
1407      -                 */
1408      -                if (err != ENOENT)
1409      -                        goto done;
     1476 +        /* Handle running in a non-native branded zone (i.e. has /native) */
     1477 +        if (zone_file_exists(zoneroot, "/native" DLMGMT_TMPFS_DIR)) {
     1478 +                (void) snprintf(tdir, sizeof (tdir), "/native%s", cachefile);
     1479 +                path = tdir;
     1480 +        }
     1481 +
     1482 +        if (zone_file_exists(zoneroot, path)) {
     1483 +                if ((err = dlmgmt_process_db_req(req)) != 0) {
     1484 +                        /*
     1485 +                         * If we get back ENOENT, that means that the active
     1486 +                         * configuration file doesn't exist yet, and is not an
     1487 +                         * error.  We'll create it down below after we've
     1488 +                         * loaded the persistent configuration.
     1489 +                         */
     1490 +                        if (err != ENOENT)
     1491 +                                goto done;
     1492 +                        boot = B_TRUE;
     1493 +                }
     1494 +        } else {
1410 1495                  boot = B_TRUE;
1411 1496          }
1412 1497  
1413 1498          req->ls_flags = DLMGMT_PERSIST;
1414 1499          err = dlmgmt_process_db_req(req);
1415 1500          if (err != 0 && err != ENOENT)
1416 1501                  goto done;
1417 1502          err = 0;
1418 1503          if (rewrite_needed) {
1419 1504                  /*
↓ open down ↓ 15 lines elided ↑ open up ↑
1435 1520  done:
1436 1521          if (err == EINPROGRESS)
1437 1522                  err = 0;
1438 1523          else
1439 1524                  free(req);
1440 1525          return (err);
1441 1526  }
1442 1527  
1443 1528  /*
1444 1529   * Remove all links in the given zoneid.
     1530 + *
     1531 + * We do this work in two different passes. In the first pass, we remove any
     1532 + * entry that hasn't been loaned and mark every entry that has been loaned as
     1533 + * something that is going to be tombstomed. In the second pass, we drop the
     1534 + * table lock for every entry and remove the tombstombed entry for our zone.
1445 1535   */
1446 1536  void
1447 1537  dlmgmt_db_fini(zoneid_t zoneid)
1448 1538  {
1449 1539          dlmgmt_link_t *linkp = avl_first(&dlmgmt_name_avl), *next_linkp;
1450 1540  
1451 1541          while (linkp != NULL) {
1452 1542                  next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
1453 1543                  if (linkp->ll_zoneid == zoneid) {
1454      -                        (void) dlmgmt_destroy_common(linkp,
1455      -                            DLMGMT_ACTIVE | DLMGMT_PERSIST);
     1544 +                        boolean_t onloan = linkp->ll_onloan;
     1545 +
     1546 +                        /*
     1547 +                         * Cleanup any VNICs that were loaned to the zone
     1548 +                         * before the zone goes away and we can no longer
     1549 +                         * refer to the VNIC by the name/zoneid.
     1550 +                         */
     1551 +                        if (onloan) {
     1552 +                                (void) dlmgmt_delete_db_entry(linkp,
     1553 +                                    DLMGMT_ACTIVE);
     1554 +                                linkp->ll_tomb = B_TRUE;
     1555 +                        } else {
     1556 +                                (void) dlmgmt_destroy_common(linkp,
     1557 +                                    DLMGMT_ACTIVE | DLMGMT_PERSIST);
     1558 +                        }
1456 1559                  }
1457 1560                  linkp = next_linkp;
1458 1561          }
     1562 +
     1563 +again:
     1564 +        linkp = avl_first(&dlmgmt_name_avl);
     1565 +        while (linkp != NULL) {
     1566 +                vnic_ioc_delete_t ioc;
     1567 +
     1568 +                next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
     1569 +
     1570 +                if (linkp->ll_zoneid != zoneid) {
     1571 +                        linkp = next_linkp;
     1572 +                        continue;
     1573 +                }
     1574 +                ioc.vd_vnic_id = linkp->ll_linkid;
     1575 +                if (linkp->ll_tomb != B_TRUE)
     1576 +                        abort();
     1577 +
     1578 +                /*
     1579 +                 * We have to drop the table lock while going up into the
     1580 +                 * kernel. If we hold the table lock while deleting a vnic, we
     1581 +                 * may get blocked on the mac perimeter and the holder of it may
     1582 +                 * want something from dlmgmtd.
     1583 +                 */
     1584 +                dlmgmt_table_unlock();
     1585 +
     1586 +                if (ioctl(dladm_dld_fd(dld_handle),
     1587 +                    VNIC_IOC_DELETE, &ioc) < 0)
     1588 +                        dlmgmt_log(LOG_WARNING, "dlmgmt_db_fini "
     1589 +                            "delete VNIC ioctl failed %d %d",
     1590 +                            ioc.vd_vnic_id, errno);
     1591 +
     1592 +                /*
     1593 +                 * Even though we've dropped the lock, we know that nothing else
     1594 +                 * could have removed us. Therefore, it should be safe to go
     1595 +                 * through and delete ourselves, but do nothing else. We'll have
     1596 +                 * to restart iteration from the beginning. This can be painful.
     1597 +                 */
     1598 +                dlmgmt_table_lock(B_TRUE);
     1599 +
     1600 +                (void) dlmgmt_destroy_common(linkp,
     1601 +                    DLMGMT_ACTIVE | DLMGMT_PERSIST);
     1602 +                goto again;
     1603 +        }
     1604 +
1459 1605  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX