Print this page
ZoL PR 9145

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dnode.c
          +++ new/usr/src/uts/common/fs/zfs/dnode.c
↓ open down ↓ 48 lines elided ↑ open up ↑
  49   49          { "dnode_hold_alloc_interior",          KSTAT_DATA_UINT64 },
  50   50          { "dnode_hold_alloc_lock_retry",        KSTAT_DATA_UINT64 },
  51   51          { "dnode_hold_alloc_lock_misses",       KSTAT_DATA_UINT64 },
  52   52          { "dnode_hold_alloc_type_none",         KSTAT_DATA_UINT64 },
  53   53          { "dnode_hold_free_hits",               KSTAT_DATA_UINT64 },
  54   54          { "dnode_hold_free_misses",             KSTAT_DATA_UINT64 },
  55   55          { "dnode_hold_free_lock_misses",        KSTAT_DATA_UINT64 },
  56   56          { "dnode_hold_free_lock_retry",         KSTAT_DATA_UINT64 },
  57   57          { "dnode_hold_free_overflow",           KSTAT_DATA_UINT64 },
  58   58          { "dnode_hold_free_refcount",           KSTAT_DATA_UINT64 },
  59      -        { "dnode_hold_free_txg",                KSTAT_DATA_UINT64 },
  60   59          { "dnode_free_interior_lock_retry",     KSTAT_DATA_UINT64 },
  61   60          { "dnode_allocate",                     KSTAT_DATA_UINT64 },
  62   61          { "dnode_reallocate",                   KSTAT_DATA_UINT64 },
  63   62          { "dnode_buf_evict",                    KSTAT_DATA_UINT64 },
  64   63          { "dnode_alloc_next_chunk",             KSTAT_DATA_UINT64 },
  65   64          { "dnode_alloc_race",                   KSTAT_DATA_UINT64 },
  66   65          { "dnode_alloc_next_block",             KSTAT_DATA_UINT64 },
  67   66          { "dnode_move_invalid",                 KSTAT_DATA_UINT64 },
  68   67          { "dnode_move_recheck1",                KSTAT_DATA_UINT64 },
  69   68          { "dnode_move_recheck2",                KSTAT_DATA_UINT64 },
↓ open down ↓ 1183 lines elided ↑ open up ↑
1253 1252   * hold the dnode being created. The slots parameter is also used to ensure
1254 1253   * a dnode does not span multiple dnode blocks. In both of these cases, if
1255 1254   * a failure occurs, ENOSPC is returned. Keep in mind, these failure cases
1256 1255   * are only possible when using DNODE_MUST_BE_FREE.
1257 1256   *
1258 1257   * If the DNODE_MUST_BE_ALLOCATED flag is set, "slots" must be 0.
1259 1258   * dnode_hold_impl() will check if the requested dnode is already consumed
1260 1259   * as an extra dnode slot by an large dnode, in which case it returns
1261 1260   * ENOENT.
1262 1261   *
     1262 + * If the DNODE_DRY_RUN flag is set, we don't actually hold the dnode, just
     1263 + * return whether the hold would succeed or not. tag and dnp should set to
     1264 + * NULL in this case.
     1265 + *
1263 1266   * errors:
1264 1267   * EINVAL - invalid object number or flags.
1265 1268   * ENOSPC - hole too small to fulfill "slots" request (DNODE_MUST_BE_FREE)
1266 1269   * EEXIST - Refers to an allocated dnode (DNODE_MUST_BE_FREE)
1267 1270   *        - Refers to a freeing dnode (DNODE_MUST_BE_FREE)
1268 1271   *        - Refers to an interior dnode slot (DNODE_MUST_BE_ALLOCATED)
1269 1272   * ENOENT - The requested dnode is not allocated (DNODE_MUST_BE_ALLOCATED)
1270 1273   *        - The requested dnode is being freed (DNODE_MUST_BE_ALLOCATED)
1271 1274   * EIO    - i/o error error when reading the meta dnode dbuf.
1272 1275   * succeeds even for free dnodes.
↓ open down ↓ 7 lines elided ↑ open up ↑
1280 1283          int type;
1281 1284          uint64_t blk;
1282 1285          dnode_t *mdn, *dn;
1283 1286          dmu_buf_impl_t *db;
1284 1287          dnode_children_t *dnc;
1285 1288          dnode_phys_t *dn_block;
1286 1289          dnode_handle_t *dnh;
1287 1290  
1288 1291          ASSERT(!(flag & DNODE_MUST_BE_ALLOCATED) || (slots == 0));
1289 1292          ASSERT(!(flag & DNODE_MUST_BE_FREE) || (slots > 0));
     1293 +        IMPLY(flag & DNODE_DRY_RUN, (tag == NULL) && (dnp == NULL));
1290 1294  
1291 1295          /*
1292 1296           * If you are holding the spa config lock as writer, you shouldn't
1293 1297           * be asking the DMU to do *anything* unless it's the root pool
1294 1298           * which may require us to read from the root filesystem while
1295 1299           * holding some (not all) of the locks as writer.
1296 1300           */
1297 1301          ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0 ||
1298 1302              (spa_is_root(os->os_spa) &&
1299 1303              spa_config_held(os->os_spa, SCL_STATE, RW_WRITER)));
↓ open down ↓ 9 lines elided ↑ open up ↑
1309 1313                  else
1310 1314                          dn = DMU_PROJECTUSED_DNODE(os);
1311 1315                  if (dn == NULL)
1312 1316                          return (SET_ERROR(ENOENT));
1313 1317                  type = dn->dn_type;
1314 1318                  if ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE)
1315 1319                          return (SET_ERROR(ENOENT));
1316 1320                  if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
1317 1321                          return (SET_ERROR(EEXIST));
1318 1322                  DNODE_VERIFY(dn);
1319      -                (void) zfs_refcount_add(&dn->dn_holds, tag);
1320      -                *dnp = dn;
     1323 +                /* Don't actually hold if dry run, just return 0 */
     1324 +                if (!(flag & DNODE_DRY_RUN)) {
     1325 +                        (void) zfs_refcount_add(&dn->dn_holds, tag);
     1326 +                        *dnp = dn;
     1327 +                }
1321 1328                  return (0);
1322 1329          }
1323 1330  
1324 1331          if (object == 0 || object >= DN_MAX_OBJECT)
1325 1332                  return (SET_ERROR(EINVAL));
1326 1333  
1327 1334          mdn = DMU_META_DNODE(os);
1328 1335          ASSERT(mdn->dn_object == DMU_META_DNODE_OBJECT);
1329 1336  
1330 1337          DNODE_VERIFY(mdn);
↓ open down ↓ 124 lines elided ↑ open up ↑
1455 1462  
1456 1463                  mutex_enter(&dn->dn_mtx);
1457 1464                  if (dn->dn_type == DMU_OT_NONE || dn->dn_free_txg != 0) {
1458 1465                          DNODE_STAT_BUMP(dnode_hold_alloc_type_none);
1459 1466                          mutex_exit(&dn->dn_mtx);
1460 1467                          dnode_slots_rele(dnc, idx, slots);
1461 1468                          dbuf_rele(db, FTAG);
1462 1469                          return (SET_ERROR(ENOENT));
1463 1470                  }
1464 1471  
     1472 +                /* Don't actually hold if dry run, just return 0 */
     1473 +                if (flag & DNODE_DRY_RUN) {
     1474 +                        mutex_exit(&dn->dn_mtx);
     1475 +                        dnode_slots_rele(dnc, idx, slots);
     1476 +                        dbuf_rele(db, FTAG);
     1477 +                        return (0);
     1478 +                }
     1479 +
1465 1480                  DNODE_STAT_BUMP(dnode_hold_alloc_hits);
1466 1481          } else if (flag & DNODE_MUST_BE_FREE) {
1467 1482  
1468 1483                  if (idx + slots - 1 >= DNODES_PER_BLOCK) {
1469 1484                          DNODE_STAT_BUMP(dnode_hold_free_overflow);
1470 1485                          dbuf_rele(db, FTAG);
1471 1486                          return (SET_ERROR(ENOSPC));
1472 1487                  }
1473 1488  
1474 1489                  while (dn == DN_SLOT_UNINIT) {
↓ open down ↓ 39 lines elided ↑ open up ↑
1514 1529  
1515 1530                  mutex_enter(&dn->dn_mtx);
1516 1531                  if (!zfs_refcount_is_zero(&dn->dn_holds) || dn->dn_free_txg) {
1517 1532                          DNODE_STAT_BUMP(dnode_hold_free_refcount);
1518 1533                          mutex_exit(&dn->dn_mtx);
1519 1534                          dnode_slots_rele(dnc, idx, slots);
1520 1535                          dbuf_rele(db, FTAG);
1521 1536                          return (SET_ERROR(EEXIST));
1522 1537                  }
1523 1538  
     1539 +                /* Don't actually hold if dry run, just return 0 */
     1540 +                if (flag & DNODE_DRY_RUN) {
     1541 +                        mutex_exit(&dn->dn_mtx);
     1542 +                        dnode_slots_rele(dnc, idx, slots);
     1543 +                        dbuf_rele(db, FTAG);
     1544 +                        return (0);
     1545 +                }
     1546 +
1524 1547                  dnode_set_slots(dnc, idx + 1, slots - 1, DN_SLOT_INTERIOR);
1525 1548                  DNODE_STAT_BUMP(dnode_hold_free_hits);
1526 1549          } else {
1527 1550                  dbuf_rele(db, FTAG);
1528 1551                  return (SET_ERROR(EINVAL));
1529 1552          }
1530 1553  
1531      -        if (dn->dn_free_txg) {
1532      -                DNODE_STAT_BUMP(dnode_hold_free_txg);
1533      -                type = dn->dn_type;
1534      -                mutex_exit(&dn->dn_mtx);
1535      -                dnode_slots_rele(dnc, idx, slots);
1536      -                dbuf_rele(db, FTAG);
1537      -                return (SET_ERROR((flag & DNODE_MUST_BE_ALLOCATED) ?
1538      -                    ENOENT : EEXIST));
1539      -        }
     1554 +        ASSERT0(dn->dn_free_txg);
1540 1555  
1541 1556          if (zfs_refcount_add(&dn->dn_holds, tag) == 1)
1542 1557                  dbuf_add_ref(db, dnh);
1543 1558  
1544 1559          mutex_exit(&dn->dn_mtx);
1545 1560  
1546 1561          /* Now we can rely on the hold to prevent the dnode from moving. */
1547 1562          dnode_slots_rele(dnc, idx, slots);
1548 1563  
1549 1564          DNODE_VERIFY(dn);
↓ open down ↓ 70 lines elided ↑ open up ↑
1620 1635                   * hold on the parent dbuf prevents the handle from being
1621 1636                   * destroyed, the hold on the handle is OK. We can't yet assert
1622 1637                   * that the handle has zero references, but that will be
1623 1638                   * asserted anyway when the handle gets destroyed.
1624 1639                   */
1625 1640                  mutex_enter(&db->db_mtx);
1626 1641                  dbuf_rele_and_unlock(db, dnh, evicting);
1627 1642          }
1628 1643  }
1629 1644  
     1645 +/*
     1646 + * Test whether we can create a dnode at the specified location.
     1647 + */
     1648 +int
     1649 +dnode_try_claim(objset_t *os, uint64_t object, int slots)
     1650 +{
     1651 +        return (dnode_hold_impl(os, object, DNODE_MUST_BE_FREE | DNODE_DRY_RUN,
     1652 +            slots, NULL, NULL));
     1653 +}
     1654 +
1630 1655  void
1631 1656  dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
1632 1657  {
1633 1658          objset_t *os = dn->dn_objset;
1634 1659          uint64_t txg = tx->tx_txg;
1635 1660  
1636 1661          if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
1637 1662                  dsl_dataset_dirty(os->os_dsl_dataset, tx);
1638 1663                  return;
1639 1664          }
↓ open down ↓ 840 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX