Print this page
701 UNMAP support for COMSTAR
Contributed by: Sumit Gupta <sumit.gupta@nexenta.com>
Reviewed by: Garrett D'Amore <garrett@nexenta.com>
Reviewed by: Eric Schrock <eric.schrock@delphix.com>
Reviewed by: George Wilson <gwilson@zfsmail.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/zvol.c
          +++ new/usr/src/uts/common/fs/zfs/zvol.c
↓ open down ↓ 12 lines elided ↑ open up ↑
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + *
       24 + * Portions Copyright 2010 Robert Milkowski
       25 + *
       26 + * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23   27   */
  24   28  
  25      -/* Portions Copyright 2010 Robert Milkowski */
  26      -
  27   29  /*
  28   30   * ZFS volume emulation driver.
  29   31   *
  30   32   * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
  31   33   * Volumes are accessed through the symbolic links named:
  32   34   *
  33   35   * /dev/zvol/dsk/<pool_name>/<dataset_name>
  34   36   * /dev/zvol/rdsk/<pool_name>/<dataset_name>
  35   37   *
  36   38   * These links are created by the /dev filesystem (sdev_zvolops.c).
↓ open down ↓ 298 lines elided ↑ open up ↑
 335  337  
 336  338          error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
 337  339              DMU_OT_NONE, 0, tx);
 338  340          ASSERT(error == 0);
 339  341  
 340  342          error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
 341  343          ASSERT(error == 0);
 342  344  }
 343  345  
 344  346  /*
      347 + * Replay a TX_TRUNCATE ZIL transaction if asked.  TX_TRUNCATE is how we
      348 + * implement DKIOCFREE/free-long-range.
      349 + */
      350 +static int
      351 +zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap)
      352 +{
      353 +        uint64_t offset, length;
      354 +
      355 +        if (byteswap)
      356 +                byteswap_uint64_array(lr, sizeof (*lr));
      357 +
      358 +        offset = lr->lr_offset;
      359 +        length = lr->lr_length;
      360 +
      361 +        return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
      362 +}
      363 +
      364 +/*
 345  365   * Replay a TX_WRITE ZIL transaction that didn't get committed
 346  366   * after a system failure
 347  367   */
 348  368  static int
 349  369  zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
 350  370  {
 351  371          objset_t *os = zv->zv_objset;
 352  372          char *data = (char *)(lr + 1);  /* data follows lr_write_t */
 353  373          uint64_t offset, length;
 354  374          dmu_tx_t *tx;
↓ open down ↓ 29 lines elided ↑ open up ↑
 384  404  
 385  405  /* ARGSUSED */
 386  406  static int
 387  407  zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
 388  408  {
 389  409          return (ENOTSUP);
 390  410  }
 391  411  
 392  412  /*
 393  413   * Callback vectors for replaying records.
 394      - * Only TX_WRITE is needed for zvol.
      414 + * Only TX_WRITE and TX_TRUNCATE are needed for zvol.
 395  415   */
 396  416  zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
 397  417          zvol_replay_err,        /* 0 no such transaction type */
 398  418          zvol_replay_err,        /* TX_CREATE */
 399  419          zvol_replay_err,        /* TX_MKDIR */
 400  420          zvol_replay_err,        /* TX_MKXATTR */
 401  421          zvol_replay_err,        /* TX_SYMLINK */
 402  422          zvol_replay_err,        /* TX_REMOVE */
 403  423          zvol_replay_err,        /* TX_RMDIR */
 404  424          zvol_replay_err,        /* TX_LINK */
 405  425          zvol_replay_err,        /* TX_RENAME */
 406  426          zvol_replay_write,      /* TX_WRITE */
 407      -        zvol_replay_err,        /* TX_TRUNCATE */
      427 +        zvol_replay_truncate,   /* TX_TRUNCATE */
 408  428          zvol_replay_err,        /* TX_SETATTR */
 409  429          zvol_replay_err,        /* TX_ACL */
 410  430          zvol_replay_err,        /* TX_CREATE_ACL */
 411  431          zvol_replay_err,        /* TX_CREATE_ATTR */
 412  432          zvol_replay_err,        /* TX_CREATE_ACL_ATTR */
 413  433          zvol_replay_err,        /* TX_MKDIR_ACL */
 414  434          zvol_replay_err,        /* TX_MKDIR_ATTR */
 415  435          zvol_replay_err,        /* TX_MKDIR_ACL_ATTR */
 416  436          zvol_replay_err,        /* TX_WRITE2 */
 417  437  };
↓ open down ↓ 1087 lines elided ↑ open up ↑
1505 1525  {
1506 1526          zvol_state_t *zv = minor_hdl;
1507 1527  
1508 1528          zvol_log_write(zv, tx, off, resid, sync);
1509 1529  }
1510 1530  /*
1511 1531   * END entry points to allow external callers access to the volume.
1512 1532   */
1513 1533  
1514 1534  /*
     1535 + * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
     1536 + */
     1537 +static void
     1538 +zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
     1539 +    boolean_t sync)
     1540 +{
     1541 +        itx_t *itx;
     1542 +        lr_truncate_t *lr;
     1543 +        zilog_t *zilog = zv->zv_zilog;
     1544 +
     1545 +        if (zil_replaying(zilog, tx))
     1546 +                return;
     1547 +
     1548 +        itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
     1549 +        lr = (lr_truncate_t *)&itx->itx_lr;
     1550 +        lr->lr_foid = ZVOL_OBJ;
     1551 +        lr->lr_offset = off;
     1552 +        lr->lr_length = len;
     1553 +
     1554 +        itx->itx_sync = sync;
     1555 +        zil_itx_assign(zilog, itx, tx);
     1556 +}
     1557 +
     1558 +/*
1515 1559   * Dirtbag ioctls to support mkfs(1M) for UFS filesystems.  See dkio(7I).
     1560 + * Also a dirtbag dkio ioctl for unmap/free-block functionality.
1516 1561   */
1517 1562  /*ARGSUSED*/
1518 1563  int
1519 1564  zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
1520 1565  {
1521 1566          zvol_state_t *zv;
1522 1567          struct dk_cinfo dki;
1523 1568          struct dk_minfo dkm;
1524 1569          struct dk_callback *dkc;
1525 1570          int error = 0;
↓ open down ↓ 98 lines elided ↑ open up ↑
1624 1669  
1625 1670          case DKIOCDUMPFINI:
1626 1671                  if (!(zv->zv_flags & ZVOL_DUMPIFIED))
1627 1672                          break;
1628 1673                  rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1629 1674                      RL_WRITER);
1630 1675                  error = zvol_dump_fini(zv);
1631 1676                  zfs_range_unlock(rl);
1632 1677                  break;
1633 1678  
     1679 +        case DKIOCFREE:
     1680 +        {
     1681 +                dkioc_free_t df;
     1682 +                dmu_tx_t *tx;
     1683 +
     1684 +                if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) {
     1685 +                        error = EFAULT;
     1686 +                        break;
     1687 +                }
     1688 +
     1689 +                /*
     1690 +                 * Apply Postel's Law to length-checking.  If they overshoot,
     1691 +                 * just blank out until the end, if there's a need to blank
     1692 +                 * out anything.
     1693 +                 */
     1694 +                if (df.df_start >= zv->zv_volsize)
     1695 +                        break;  /* No need to do anything... */
     1696 +                if (df.df_start + df.df_length > zv->zv_volsize)
     1697 +                        df.df_length = DMU_OBJECT_END;
     1698 +
     1699 +                rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length,
     1700 +                    RL_WRITER);
     1701 +                tx = dmu_tx_create(zv->zv_objset);
     1702 +                error = dmu_tx_assign(tx, TXG_WAIT);
     1703 +                if (error != 0) {
     1704 +                        dmu_tx_abort(tx);
     1705 +                } else {
     1706 +                        zvol_log_truncate(zv, tx, df.df_start,
     1707 +                            df.df_length, B_TRUE);
     1708 +                        error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
     1709 +                            df.df_start, df.df_length);
     1710 +                        dmu_tx_commit(tx);
     1711 +                }
     1712 +
     1713 +                zfs_range_unlock(rl);
     1714 +
     1715 +                if (error == 0) {
     1716 +                        /*
     1717 +                         * If the write-cache is disabled or 'sync' property
     1718 +                         * is set to 'always' then treat this as a synchronous
     1719 +                         * operation (i.e. commit to zil).
     1720 +                         */
     1721 +                        if (!(zv->zv_flags & ZVOL_WCE) ||
     1722 +                            (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS))
     1723 +                                zil_commit(zv->zv_zilog, ZVOL_OBJ);
     1724 +
     1725 +                        /*
     1726 +                         * If the caller really wants synchronous writes, and
     1727 +                         * can't wait for them, don't return until the write
     1728 +                         * is done.
     1729 +                         */
     1730 +                        if (df.df_flags & DF_WAIT_SYNC) {
     1731 +                                txg_wait_synced(
     1732 +                                    dmu_objset_pool(zv->zv_objset), 0);
     1733 +                        }
     1734 +                }
     1735 +                break;
     1736 +        }
     1737 +
1634 1738          default:
1635 1739                  error = ENOTTY;
1636 1740                  break;
1637 1741  
1638 1742          }
1639 1743          mutex_exit(&zfsdev_state_lock);
1640 1744          return (error);
1641 1745  }
1642 1746  
1643 1747  int
↓ open down ↓ 251 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX