Print this page
Fixes to allow compilation on OmniOS and OI
OS-3342+co
OS-3007 dlmgmtd needs to work with non-native zones
OS-375 i_dls_mgmt_upcall()/dlmgmt_zfop() deadlock in dlmgmtd
OS-383 dladm rename-link doesn't update /etc/svc/volatile/dladm/network-datalink-management:default.cache
OS-249

Split Close
Expand all
Collapse all
          --- old/usr/src/cmd/dlmgmtd/dlmgmt_db.c
          +++ new/usr/src/cmd/dlmgmtd/dlmgmt_db.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
       24 + * Copyright 2014, Joyent Inc. All rights reserved.
  24   25   */
  25   26  
  26   27  #include <assert.h>
  27   28  #include <ctype.h>
  28   29  #include <errno.h>
  29   30  #include <fcntl.h>
  30   31  #include <stdio.h>
  31   32  #include <stdlib.h>
  32   33  #include <string.h>
  33   34  #include <strings.h>
↓ open down ↓ 2 lines elided ↑ open up ↑
  36   37  #include <sys/types.h>
  37   38  #include <sys/stat.h>
  38   39  #include <stropts.h>
  39   40  #include <sys/conf.h>
  40   41  #include <pthread.h>
  41   42  #include <unistd.h>
  42   43  #include <wait.h>
  43   44  #include <libcontract.h>
  44   45  #include <libcontract_priv.h>
  45   46  #include <sys/contract/process.h>
       47 +#include <sys/vnic.h>
       48 +#include <zone.h>
  46   49  #include "dlmgmt_impl.h"
  47   50  
  48   51  typedef enum dlmgmt_db_op {
  49   52          DLMGMT_DB_OP_WRITE,
  50   53          DLMGMT_DB_OP_DELETE,
  51   54          DLMGMT_DB_OP_READ
  52   55  } dlmgmt_db_op_t;
  53   56  
  54   57  typedef struct dlmgmt_db_req_s {
  55   58          struct dlmgmt_db_req_s  *ls_next;
↓ open down ↓ 489 lines elided ↑ open up ↑
 545  548          dlmgmt_db_req_t *req;
 546  549          int             err;
 547  550  
 548  551          /* It is either a persistent request or an active request, not both. */
 549  552          assert((flags == DLMGMT_PERSIST) || (flags == DLMGMT_ACTIVE));
 550  553  
 551  554          if ((req = dlmgmt_db_req_alloc(op, entryname, linkp->ll_linkid,
 552  555              linkp->ll_zoneid, flags, &err)) == NULL)
 553  556                  return (err);
 554  557  
      558 +        /* If transient op and onloan, use the global zone cache file. */
      559 +        if (flags == DLMGMT_ACTIVE && linkp->ll_onloan)
      560 +                req->ls_zoneid = GLOBAL_ZONEID;
      561 +
 555  562          /*
 556  563           * If the return error is EINPROGRESS, this request is handled
 557  564           * asynchronously; return success.
 558  565           */
 559  566          err = dlmgmt_process_db_req(req);
 560  567          if (err != EINPROGRESS)
 561  568                  free(req);
 562  569          else
 563  570                  err = 0;
 564  571          return (err);
↓ open down ↓ 142 lines elided ↑ open up ↑
 707  714  static int
 708  715  parse_linkprops(char *buf, dlmgmt_link_t *linkp)
 709  716  {
 710  717          boolean_t               found_type = B_FALSE;
 711  718          dladm_datatype_t        type = DLADM_TYPE_STR;
 712  719          int                     i, len;
 713  720          char                    *curr;
 714  721          char                    attr_name[MAXLINKATTRLEN];
 715  722          size_t                  attr_buf_len = 0;
 716  723          void                    *attr_buf = NULL;
      724 +        boolean_t               rename;
 717  725  
 718  726          curr = buf;
 719  727          len = strlen(buf);
 720  728          attr_name[0] = '\0';
 721  729          for (i = 0; i < len; i++) {
 722  730                  char            c = buf[i];
 723  731                  boolean_t       match = (c == '=' ||
 724  732                      (c == ',' && !found_type) || c == ';');
 725  733  
      734 +                rename = B_FALSE;
      735 +
 726  736                  /*
 727  737                   * Move to the next character if there is no match and
 728  738                   * if we have not reached the last character.
 729  739                   */
 730  740                  if (!match && i != len - 1)
 731  741                          continue;
 732  742  
 733  743                  if (match) {
 734  744                          /*
 735  745                           * NUL-terminate the string pointed to by 'curr'.
↓ open down ↓ 25 lines elided ↑ open up ↑
 761  771                          } else if (strcmp(attr_name, "class") == 0) {
 762  772                                  if (read_int64(curr, &attr_buf) == 0)
 763  773                                          goto parse_fail;
 764  774                                  linkp->ll_class =
 765  775                                      (datalink_class_t)*(int64_t *)attr_buf;
 766  776                          } else if (strcmp(attr_name, "media") == 0) {
 767  777                                  if (read_int64(curr, &attr_buf) == 0)
 768  778                                          goto parse_fail;
 769  779                                  linkp->ll_media =
 770  780                                      (uint32_t)*(int64_t *)attr_buf;
      781 +                        } else if (strcmp(attr_name, "zone") == 0) {
      782 +                                if (read_str(curr, &attr_buf) == 0)
      783 +                                        goto parse_fail;
      784 +                                linkp->ll_zoneid = getzoneidbyname(attr_buf);
      785 +                                if (linkp->ll_zoneid == -1) {
      786 +                                        if (errno == EFAULT)
      787 +                                                abort();
      788 +                                        /*
      789 +                                         * If we can't find the zone, assign the
      790 +                                         * link to the GZ and mark it for being
      791 +                                         * renamed.
      792 +                                         */
      793 +                                        linkp->ll_zoneid = 0;
      794 +                                        rename = B_TRUE;
      795 +                                }
 771  796                          } else {
 772  797                                  attr_buf_len = translators[type].read_func(curr,
 773  798                                      &attr_buf);
 774  799                                  if (attr_buf_len == 0)
 775  800                                          goto parse_fail;
 776  801  
 777  802                                  if (linkattr_set(&(linkp->ll_head), attr_name,
 778  803                                      attr_buf, attr_buf_len, type) != 0) {
 779  804                                          free(attr_buf);
 780  805                                          goto parse_fail;
↓ open down ↓ 23 lines elided ↑ open up ↑
 804  829                  } else {
 805  830                          /*
 806  831                           * A zero length attr_name indicates we are looking
 807  832                           * at the beginning of a link attribute.
 808  833                           */
 809  834                          if (c != '=')
 810  835                                  goto parse_fail;
 811  836  
 812  837                          (void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr);
 813  838                  }
      839 +
      840 +                /*
      841 +                 * The zone that this link belongs to has died, we are
      842 +                 * reparenting it to the GZ and renaming it to avoid name
      843 +                 * collisions.
      844 +                 */
      845 +                if (rename == B_TRUE) {
      846 +                        (void) snprintf(linkp->ll_link, MAXLINKNAMELEN,
      847 +                            "SUNWorphan%u", (uint16_t)(gethrtime() / 1000));
      848 +                }
 814  849                  curr = buf + i + 1;
 815  850          }
 816  851  
 817  852          /* Correct any erroneous IPTUN datalink class constant in the file */
 818  853          if (linkp->ll_class == 0x60) {
 819  854                  linkp->ll_class = DATALINK_CLASS_IPTUN;
 820  855                  rewrite_needed = B_TRUE;
 821  856          }
 822  857  
 823  858          return (0);
↓ open down ↓ 391 lines elided ↑ open up ↑
1215 1250  generate_link_line(dlmgmt_link_t *linkp, boolean_t persist, char *buf)
1216 1251  {
1217 1252          char                    tmpbuf[MAXLINELEN];
1218 1253          char                    *ptr = tmpbuf;
1219 1254          char                    *lim = tmpbuf + MAXLINELEN;
1220 1255          dlmgmt_linkattr_t       *cur_p = NULL;
1221 1256          uint64_t                u64;
1222 1257  
1223 1258          ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
1224 1259          if (!persist) {
     1260 +                char zname[ZONENAME_MAX];
1225 1261                  /*
1226      -                 * We store the linkid in the active database so that dlmgmtd
1227      -                 * can recover in the event that it is restarted.
     1262 +                 * We store the linkid and the zone name in the active database
     1263 +                 * so that dlmgmtd can recover in the event that it is
     1264 +                 * restarted.
1228 1265                   */
1229 1266                  u64 = linkp->ll_linkid;
1230 1267                  ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
     1268 +
     1269 +                if (getzonenamebyid(linkp->ll_zoneid, zname,
     1270 +                    sizeof (zname)) != -1) {
     1271 +                        ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname);
     1272 +                }
1231 1273          }
1232 1274          u64 = linkp->ll_class;
1233 1275          ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
1234 1276          u64 = linkp->ll_media;
1235 1277          ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
1236 1278  
1237 1279          /*
1238 1280           * The daemon does not keep any active link attribute. Only store the
1239 1281           * attributes if this request is for persistent configuration,
1240 1282           */
↓ open down ↓ 134 lines elided ↑ open up ↑
1375 1417          dlmgmt_link_t *linkp;
1376 1418  
1377 1419          for (linkp = avl_first(&dlmgmt_id_avl); linkp != NULL;
1378 1420              linkp = AVL_NEXT(&dlmgmt_id_avl, linkp)) {
1379 1421                  if (linkp->ll_zoneid == zoneid && (linkp->ll_class & class))
1380 1422                          func(linkp);
1381 1423          }
1382 1424  }
1383 1425  
1384 1426  /*
     1427 + * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture.
     1428 + *
     1429 + * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to
     1430 + * dlmgmt_zfop() which tries to fork, enter the zone and read the file.
     1431 + * Because of the upcall architecture of dlmgmtd this can lead to deadlock
     1432 + * with the following scenario:
     1433 + *    a) the thread preparing to fork will have acquired the malloc locks
     1434 + *       then attempt to suspend every thread in preparation to fork.
     1435 + *    b) all of the upcalls will be blocked in door_ucred() trying to malloc()
     1436 + *       and get the credentials of their caller.
     1437 + *    c) we can't suspend the in-kernel thread making the upcall.
     1438 + *
     1439 + * Thus, we cannot serve door requests because we're blocked in malloc()
     1440 + * which fork() owns, but fork() is in turn blocked on the in-kernel thread
     1441 + * making the door upcall.  This is a fundamental architectural problem with
     1442 + * any server handling upcalls and also trying to fork().
     1443 + *
     1444 + * To minimize the chance of this deadlock occuring, we check ahead of time to
     1445 + * see if the file we want to read actually exists in the zone (which it almost
     1446 + * never does), so we don't need fork in that case (i.e. rarely to never).
     1447 + */
     1448 +static boolean_t
     1449 +zone_file_exists(char *zoneroot, char *filename)
     1450 +{
     1451 +        struct stat     sb;
     1452 +        char            fname[MAXPATHLEN];
     1453 +
     1454 +        (void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename);
     1455 +
     1456 +        if (stat(fname, &sb) == -1)
     1457 +                return (B_FALSE);
     1458 +
     1459 +        return (B_TRUE);
     1460 +}
     1461 +
     1462 +/*
1385 1463   * Initialize the datalink <link name, linkid> mapping and the link's
1386 1464   * attributes list based on the configuration file /etc/dladm/datalink.conf
1387 1465   * and the active configuration cache file
1388 1466   * /etc/svc/volatile/dladm/datalink-management:default.cache.
1389 1467   */
1390 1468  int
1391      -dlmgmt_db_init(zoneid_t zoneid)
     1469 +dlmgmt_db_init(zoneid_t zoneid, char *zoneroot)
1392 1470  {
1393 1471          dlmgmt_db_req_t *req;
1394 1472          int             err;
1395 1473          boolean_t       boot = B_FALSE;
1396 1474  
1397 1475          if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
1398 1476              DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
1399 1477                  return (err);
1400 1478  
1401      -        if ((err = dlmgmt_process_db_req(req)) != 0) {
1402      -                /*
1403      -                 * If we get back ENOENT, that means that the active
1404      -                 * configuration file doesn't exist yet, and is not an error.
1405      -                 * We'll create it down below after we've loaded the
1406      -                 * persistent configuration.
1407      -                 */
1408      -                if (err != ENOENT)
1409      -                        goto done;
     1479 +        /* Handle running in a non-native branded zone (i.e. has /native) */
     1480 +        if (zone_file_exists(zoneroot, "/native" DLMGMT_TMPFS_DIR)) {
     1481 +                char tdir[MAXPATHLEN];
     1482 +
     1483 +                (void) snprintf(tdir, sizeof (tdir), "/native%s", cachefile);
     1484 +                (void) strlcpy(cachefile, tdir, sizeof (cachefile));
     1485 +        }
     1486 +
     1487 +        if (zone_file_exists(zoneroot, cachefile)) {
     1488 +                if ((err = dlmgmt_process_db_req(req)) != 0) {
     1489 +                        /*
     1490 +                         * If we get back ENOENT, that means that the active
     1491 +                         * configuration file doesn't exist yet, and is not an
     1492 +                         * error.  We'll create it down below after we've
     1493 +                         * loaded the persistent configuration.
     1494 +                         */
     1495 +                        if (err != ENOENT)
     1496 +                                goto done;
     1497 +                        boot = B_TRUE;
     1498 +                }
     1499 +        } else {
1410 1500                  boot = B_TRUE;
1411 1501          }
1412 1502  
1413      -        req->ls_flags = DLMGMT_PERSIST;
1414      -        err = dlmgmt_process_db_req(req);
1415      -        if (err != 0 && err != ENOENT)
1416      -                goto done;
     1503 +        if (zone_file_exists(zoneroot, DLMGMT_PERSISTENT_DB_PATH)) {
     1504 +                req->ls_flags = DLMGMT_PERSIST;
     1505 +                err = dlmgmt_process_db_req(req);
     1506 +                if (err != 0 && err != ENOENT)
     1507 +                        goto done;
     1508 +        }
1417 1509          err = 0;
1418 1510          if (rewrite_needed) {
1419 1511                  /*
1420 1512                   * First update links in memory, then dump the entire db to
1421 1513                   * disk.
1422 1514                   */
1423 1515                  dlmgmt_db_walk(zoneid, DATALINK_CLASS_ALL, dlmgmt_db_upgrade);
1424 1516                  req->ls_op = DLMGMT_DB_OP_WRITE;
1425 1517                  req->ls_linkid = DATALINK_ALL_LINKID;
1426 1518                  if ((err = dlmgmt_process_db_req(req)) != 0 &&
↓ open down ↓ 8 lines elided ↑ open up ↑
1435 1527  done:
1436 1528          if (err == EINPROGRESS)
1437 1529                  err = 0;
1438 1530          else
1439 1531                  free(req);
1440 1532          return (err);
1441 1533  }
1442 1534  
1443 1535  /*
1444 1536   * Remove all links in the given zoneid.
     1537 + *
     1538 + * We do this work in two different passes. In the first pass, we remove any
     1539 + * entry that hasn't been loaned and mark every entry that has been loaned as
     1540 + * something that is going to be tombstomed. In the second pass, we drop the
     1541 + * table lock for every entry and remove the tombstombed entry for our zone.
1445 1542   */
1446 1543  void
1447 1544  dlmgmt_db_fini(zoneid_t zoneid)
1448 1545  {
1449 1546          dlmgmt_link_t *linkp = avl_first(&dlmgmt_name_avl), *next_linkp;
1450 1547  
1451 1548          while (linkp != NULL) {
1452 1549                  next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
1453 1550                  if (linkp->ll_zoneid == zoneid) {
1454      -                        (void) dlmgmt_destroy_common(linkp,
1455      -                            DLMGMT_ACTIVE | DLMGMT_PERSIST);
     1551 +                        boolean_t onloan = linkp->ll_onloan;
     1552 +
     1553 +                        /*
     1554 +                         * Cleanup any VNICs that were loaned to the zone
     1555 +                         * before the zone goes away and we can no longer
     1556 +                         * refer to the VNIC by the name/zoneid.
     1557 +                         */
     1558 +                        if (onloan) {
     1559 +                                (void) dlmgmt_delete_db_entry(linkp,
     1560 +                                    DLMGMT_ACTIVE);
     1561 +                                linkp->ll_tomb = B_TRUE;
     1562 +                        } else {
     1563 +                                (void) dlmgmt_destroy_common(linkp,
     1564 +                                    DLMGMT_ACTIVE | DLMGMT_PERSIST);
     1565 +                        }
     1566 +
1456 1567                  }
1457 1568                  linkp = next_linkp;
1458 1569          }
     1570 +
     1571 +again:
     1572 +        linkp = avl_first(&dlmgmt_name_avl);
     1573 +        while (linkp != NULL) {
     1574 +                vnic_ioc_delete_t ioc;
     1575 +
     1576 +                next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
     1577 +
     1578 +                if (linkp->ll_zoneid != zoneid) {
     1579 +                        linkp = next_linkp;
     1580 +                        continue;
     1581 +                }
     1582 +                ioc.vd_vnic_id = linkp->ll_linkid;
     1583 +                if (linkp->ll_tomb != B_TRUE)
     1584 +                        abort();
     1585 +
     1586 +                /*
     1587 +                 * We have to drop the table lock while going up into the
     1588 +                 * kernel. If we hold the table lock while deleting a vnic, we
     1589 +                 * may get blocked on the mac perimeter and the holder of it may
     1590 +                 * want something from dlmgmtd.
     1591 +                 */
     1592 +                dlmgmt_table_unlock();
     1593 +
     1594 +                if (ioctl(dladm_dld_fd(dld_handle),
     1595 +                    VNIC_IOC_DELETE, &ioc) < 0)
     1596 +                        dlmgmt_log(LOG_WARNING, "dlmgmt_db_fini "
     1597 +                            "delete VNIC ioctl failed %d %d",
     1598 +                            ioc.vd_vnic_id, errno);
     1599 +
     1600 +                /*
     1601 +                 * Even though we've dropped the lock, we know that nothing else
     1602 +                 * could have removed us. Therefore, it should be safe to go
     1603 +                 * through and delete ourselves, but do nothing else. We'll have
     1604 +                 * to restart iteration from the beginning. This can be painful.
     1605 +                 */
     1606 +                dlmgmt_table_lock(B_TRUE);
     1607 +
     1608 +                (void) dlmgmt_destroy_common(linkp,
     1609 +                    DLMGMT_ACTIVE | DLMGMT_PERSIST);
     1610 +                goto again;
     1611 +        }
     1612 +
1459 1613  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX