3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /* Portions Copyright 2010 Robert Milkowski */
26
27 /*
28 * ZFS volume emulation driver.
29 *
30 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
31 * Volumes are accessed through the symbolic links named:
32 *
33 * /dev/zvol/dsk/<pool_name>/<dataset_name>
34 * /dev/zvol/rdsk/<pool_name>/<dataset_name>
35 *
36 * These links are created by the /dev filesystem (sdev_zvolops.c).
37 * Volumes are persistent through reboot. No user command needs to be
38 * run before opening and using a device.
39 */
40
41 #include <sys/types.h>
42 #include <sys/param.h>
43 #include <sys/errno.h>
44 #include <sys/uio.h>
45 #include <sys/buf.h>
46 #include <sys/modctl.h>
325 * property setting step won't apply to them.
326 */
327 VERIFY(nvlist_remove_all(nvprops,
328 zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
329 (void) nvlist_remove_all(nvprops,
330 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
331
332 error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
333 DMU_OT_NONE, 0, tx);
334 ASSERT(error == 0);
335
336 error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
337 DMU_OT_NONE, 0, tx);
338 ASSERT(error == 0);
339
340 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
341 ASSERT(error == 0);
342 }
343
344 /*
345 * Replay a TX_WRITE ZIL transaction that didn't get committed
346 * after a system failure
347 */
348 static int
349 zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
350 {
351 objset_t *os = zv->zv_objset;
352 char *data = (char *)(lr + 1); /* data follows lr_write_t */
353 uint64_t offset, length;
354 dmu_tx_t *tx;
355 int error;
356
357 if (byteswap)
358 byteswap_uint64_array(lr, sizeof (*lr));
359
360 offset = lr->lr_offset;
361 length = lr->lr_length;
362
363 /* If it's a dmu_sync() block, write the whole block */
364 if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
374 error = dmu_tx_assign(tx, TXG_WAIT);
375 if (error) {
376 dmu_tx_abort(tx);
377 } else {
378 dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
379 dmu_tx_commit(tx);
380 }
381
382 return (error);
383 }
384
385 /* ARGSUSED */
386 static int
387 zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
388 {
389 return (ENOTSUP);
390 }
391
392 /*
393 * Callback vectors for replaying records.
394 * Only TX_WRITE is needed for zvol.
395 */
396 zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
397 zvol_replay_err, /* 0 no such transaction type */
398 zvol_replay_err, /* TX_CREATE */
399 zvol_replay_err, /* TX_MKDIR */
400 zvol_replay_err, /* TX_MKXATTR */
401 zvol_replay_err, /* TX_SYMLINK */
402 zvol_replay_err, /* TX_REMOVE */
403 zvol_replay_err, /* TX_RMDIR */
404 zvol_replay_err, /* TX_LINK */
405 zvol_replay_err, /* TX_RENAME */
406 zvol_replay_write, /* TX_WRITE */
407 zvol_replay_err, /* TX_TRUNCATE */
408 zvol_replay_err, /* TX_SETATTR */
409 zvol_replay_err, /* TX_ACL */
410 zvol_replay_err, /* TX_CREATE_ACL */
411 zvol_replay_err, /* TX_CREATE_ATTR */
412 zvol_replay_err, /* TX_CREATE_ACL_ATTR */
413 zvol_replay_err, /* TX_MKDIR_ACL */
414 zvol_replay_err, /* TX_MKDIR_ATTR */
415 zvol_replay_err, /* TX_MKDIR_ACL_ATTR */
416 zvol_replay_err, /* TX_WRITE2 */
417 };
418
419 int
420 zvol_name2minor(const char *name, minor_t *minor)
421 {
422 zvol_state_t *zv;
423
424 mutex_enter(&zfsdev_state_lock);
425 zv = zvol_minor_lookup(name);
426 if (minor && zv)
427 *minor = zv->zv_minor;
1495
1496 return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0);
1497 }
1498
1499 /*
1500 * Entry point for external callers to zvol_log_write
1501 */
1502 void
1503 zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid,
1504 boolean_t sync)
1505 {
1506 zvol_state_t *zv = minor_hdl;
1507
1508 zvol_log_write(zv, tx, off, resid, sync);
1509 }
1510 /*
1511 * END entry points to allow external callers access to the volume.
1512 */
1513
1514 /*
1515 * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I).
1516 */
1517 /*ARGSUSED*/
1518 int
1519 zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
1520 {
1521 zvol_state_t *zv;
1522 struct dk_cinfo dki;
1523 struct dk_minfo dkm;
1524 struct dk_callback *dkc;
1525 int error = 0;
1526 rl_t *rl;
1527
1528 mutex_enter(&zfsdev_state_lock);
1529
1530 zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL);
1531
1532 if (zv == NULL) {
1533 mutex_exit(&zfsdev_state_lock);
1534 return (ENXIO);
1535 }
1614 */
1615 error = ENOTSUP;
1616 break;
1617
1618 case DKIOCDUMPINIT:
1619 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1620 RL_WRITER);
1621 error = zvol_dumpify(zv);
1622 zfs_range_unlock(rl);
1623 break;
1624
1625 case DKIOCDUMPFINI:
1626 if (!(zv->zv_flags & ZVOL_DUMPIFIED))
1627 break;
1628 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1629 RL_WRITER);
1630 error = zvol_dump_fini(zv);
1631 zfs_range_unlock(rl);
1632 break;
1633
1634 default:
1635 error = ENOTTY;
1636 break;
1637
1638 }
1639 mutex_exit(&zfsdev_state_lock);
1640 return (error);
1641 }
1642
1643 int
1644 zvol_busy(void)
1645 {
1646 return (zvol_minors != 0);
1647 }
1648
1649 void
1650 zvol_init(void)
1651 {
1652 VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t),
1653 1) == 0);
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 *
24 * Portions Copyright 2010 Robert Milkowski
25 *
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 */
28
29 /*
30 * ZFS volume emulation driver.
31 *
32 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
33 * Volumes are accessed through the symbolic links named:
34 *
35 * /dev/zvol/dsk/<pool_name>/<dataset_name>
36 * /dev/zvol/rdsk/<pool_name>/<dataset_name>
37 *
38 * These links are created by the /dev filesystem (sdev_zvolops.c).
39 * Volumes are persistent through reboot. No user command needs to be
40 * run before opening and using a device.
41 */
42
43 #include <sys/types.h>
44 #include <sys/param.h>
45 #include <sys/errno.h>
46 #include <sys/uio.h>
47 #include <sys/buf.h>
48 #include <sys/modctl.h>
327 * property setting step won't apply to them.
328 */
329 VERIFY(nvlist_remove_all(nvprops,
330 zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
331 (void) nvlist_remove_all(nvprops,
332 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
333
334 error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
335 DMU_OT_NONE, 0, tx);
336 ASSERT(error == 0);
337
338 error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
339 DMU_OT_NONE, 0, tx);
340 ASSERT(error == 0);
341
342 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
343 ASSERT(error == 0);
344 }
345
346 /*
347 * Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we
348 * implement DKIOCFREE/free-long-range.
349 */
350 static int
351 zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap)
352 {
353 uint64_t offset, length;
354
355 if (byteswap)
356 byteswap_uint64_array(lr, sizeof (*lr));
357
358 offset = lr->lr_offset;
359 length = lr->lr_length;
360
361 return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
362 }
363
364 /*
365 * Replay a TX_WRITE ZIL transaction that didn't get committed
366 * after a system failure
367 */
368 static int
369 zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
370 {
371 objset_t *os = zv->zv_objset;
372 char *data = (char *)(lr + 1); /* data follows lr_write_t */
373 uint64_t offset, length;
374 dmu_tx_t *tx;
375 int error;
376
377 if (byteswap)
378 byteswap_uint64_array(lr, sizeof (*lr));
379
380 offset = lr->lr_offset;
381 length = lr->lr_length;
382
383 /* If it's a dmu_sync() block, write the whole block */
384 if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
394 error = dmu_tx_assign(tx, TXG_WAIT);
395 if (error) {
396 dmu_tx_abort(tx);
397 } else {
398 dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
399 dmu_tx_commit(tx);
400 }
401
402 return (error);
403 }
404
405 /* ARGSUSED */
406 static int
407 zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
408 {
409 return (ENOTSUP);
410 }
411
412 /*
413 * Callback vectors for replaying records.
414 * Only TX_WRITE and TX_TRUNCATE are needed for zvol.
415 */
416 zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
417 zvol_replay_err, /* 0 no such transaction type */
418 zvol_replay_err, /* TX_CREATE */
419 zvol_replay_err, /* TX_MKDIR */
420 zvol_replay_err, /* TX_MKXATTR */
421 zvol_replay_err, /* TX_SYMLINK */
422 zvol_replay_err, /* TX_REMOVE */
423 zvol_replay_err, /* TX_RMDIR */
424 zvol_replay_err, /* TX_LINK */
425 zvol_replay_err, /* TX_RENAME */
426 zvol_replay_write, /* TX_WRITE */
427 zvol_replay_truncate, /* TX_TRUNCATE */
428 zvol_replay_err, /* TX_SETATTR */
429 zvol_replay_err, /* TX_ACL */
430 zvol_replay_err, /* TX_CREATE_ACL */
431 zvol_replay_err, /* TX_CREATE_ATTR */
432 zvol_replay_err, /* TX_CREATE_ACL_ATTR */
433 zvol_replay_err, /* TX_MKDIR_ACL */
434 zvol_replay_err, /* TX_MKDIR_ATTR */
435 zvol_replay_err, /* TX_MKDIR_ACL_ATTR */
436 zvol_replay_err, /* TX_WRITE2 */
437 };
438
439 int
440 zvol_name2minor(const char *name, minor_t *minor)
441 {
442 zvol_state_t *zv;
443
444 mutex_enter(&zfsdev_state_lock);
445 zv = zvol_minor_lookup(name);
446 if (minor && zv)
447 *minor = zv->zv_minor;
1515
1516 return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0);
1517 }
1518
1519 /*
1520 * Entry point for external callers to zvol_log_write
1521 */
1522 void
1523 zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid,
1524 boolean_t sync)
1525 {
1526 zvol_state_t *zv = minor_hdl;
1527
1528 zvol_log_write(zv, tx, off, resid, sync);
1529 }
1530 /*
1531 * END entry points to allow external callers access to the volume.
1532 */
1533
1534 /*
1535 * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
1536 */
1537 static void
1538 zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
1539 boolean_t sync)
1540 {
1541 itx_t *itx;
1542 lr_truncate_t *lr;
1543 zilog_t *zilog = zv->zv_zilog;
1544
1545 if (zil_replaying(zilog, tx))
1546 return;
1547
1548 itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
1549 lr = (lr_truncate_t *)&itx->itx_lr;
1550 lr->lr_foid = ZVOL_OBJ;
1551 lr->lr_offset = off;
1552 lr->lr_length = len;
1553
1554 itx->itx_sync = sync;
1555 zil_itx_assign(zilog, itx, tx);
1556 }
1557
1558 /*
1559 * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I).
1560 * Also a dirtbag dkio ioctl for unmap/free-block functionality.
1561 */
1562 /*ARGSUSED*/
1563 int
1564 zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
1565 {
1566 zvol_state_t *zv;
1567 struct dk_cinfo dki;
1568 struct dk_minfo dkm;
1569 struct dk_callback *dkc;
1570 int error = 0;
1571 rl_t *rl;
1572
1573 mutex_enter(&zfsdev_state_lock);
1574
1575 zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL);
1576
1577 if (zv == NULL) {
1578 mutex_exit(&zfsdev_state_lock);
1579 return (ENXIO);
1580 }
1659 */
1660 error = ENOTSUP;
1661 break;
1662
1663 case DKIOCDUMPINIT:
1664 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1665 RL_WRITER);
1666 error = zvol_dumpify(zv);
1667 zfs_range_unlock(rl);
1668 break;
1669
1670 case DKIOCDUMPFINI:
1671 if (!(zv->zv_flags & ZVOL_DUMPIFIED))
1672 break;
1673 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1674 RL_WRITER);
1675 error = zvol_dump_fini(zv);
1676 zfs_range_unlock(rl);
1677 break;
1678
1679 case DKIOCFREE:
1680 {
1681 dkioc_free_t df;
1682 dmu_tx_t *tx;
1683
1684 if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) {
1685 error = EFAULT;
1686 break;
1687 }
1688
1689 /*
1690 * Apply Postel's Law to length-checking. If they overshoot,
1691 * just blank out until the end, if there's a need to blank
1692 * out anything.
1693 */
1694 if (df.df_start >= zv->zv_volsize)
1695 break; /* No need to do anything... */
1696 if (df.df_start + df.df_length > zv->zv_volsize)
1697 df.df_length = DMU_OBJECT_END;
1698
1699 rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length,
1700 RL_WRITER);
1701 tx = dmu_tx_create(zv->zv_objset);
1702 error = dmu_tx_assign(tx, TXG_WAIT);
1703 if (error != 0) {
1704 dmu_tx_abort(tx);
1705 } else {
1706 zvol_log_truncate(zv, tx, df.df_start,
1707 df.df_length, B_TRUE);
1708 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
1709 df.df_start, df.df_length);
1710 dmu_tx_commit(tx);
1711 }
1712
1713 zfs_range_unlock(rl);
1714
1715 if (error == 0) {
1716 /*
1717 * If the write-cache is disabled or 'sync' property
1718 * is set to 'always' then treat this as a synchronous
1719 * operation (i.e. commit to zil).
1720 */
1721 if (!(zv->zv_flags & ZVOL_WCE) ||
1722 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS))
1723 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1724
1725 /*
1726 * If the caller really wants synchronous writes, and
1727 * can't wait for them, don't return until the write
1728 * is done.
1729 */
1730 if (df.df_flags & DF_WAIT_SYNC) {
1731 txg_wait_synced(
1732 dmu_objset_pool(zv->zv_objset), 0);
1733 }
1734 }
1735 break;
1736 }
1737
1738 default:
1739 error = ENOTTY;
1740 break;
1741
1742 }
1743 mutex_exit(&zfsdev_state_lock);
1744 return (error);
1745 }
1746
1747 int
1748 zvol_busy(void)
1749 {
1750 return (zvol_minors != 0);
1751 }
1752
1753 void
1754 zvol_init(void)
1755 {
1756 VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t),
1757 1) == 0);
|