9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 *
24 * Portions Copyright 2010 Robert Milkowski
25 *
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
28 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29 * Copyright (c) 2014 Integros [integros.com]
30 */
31
32 /*
33 * ZFS volume emulation driver.
34 *
35 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
36 * Volumes are accessed through the symbolic links named:
37 *
38 * /dev/zvol/dsk/<pool_name>/<dataset_name>
39 * /dev/zvol/rdsk/<pool_name>/<dataset_name>
40 *
41 * These links are created by the /dev filesystem (sdev_zvolops.c).
42 * Volumes are persistent through reboot. No user command needs to be
43 * run before opening and using a device.
44 */
45
46 #include <sys/types.h>
47 #include <sys/param.h>
48 #include <sys/errno.h>
71 #include <sys/crc32.h>
72 #include <sys/dirent.h>
73 #include <sys/policy.h>
74 #include <sys/fs/zfs.h>
75 #include <sys/zfs_ioctl.h>
76 #include <sys/mkdev.h>
77 #include <sys/zil.h>
78 #include <sys/refcount.h>
79 #include <sys/zfs_znode.h>
80 #include <sys/zfs_rlock.h>
81 #include <sys/vdev_disk.h>
82 #include <sys/vdev_impl.h>
83 #include <sys/vdev_raidz.h>
84 #include <sys/zvol.h>
85 #include <sys/dumphdr.h>
86 #include <sys/zil_impl.h>
87 #include <sys/dbuf.h>
88 #include <sys/dmu_tx.h>
89 #include <sys/zfeature.h>
90 #include <sys/zio_checksum.h>
91 #include <sys/zil_impl.h>
92
93 #include "zfs_namecheck.h"
94
95 void *zfsdev_state;
96 static char *zvol_tag = "zvol_tag";
97
98 #define ZVOL_DUMPSIZE "dumpsize"
99
100 /*
101 * This lock protects the zfsdev_state structure from being modified
102 * while it's being used, e.g. an open that comes in before a create
103 * finishes. It also protects temporary opens of the dataset so that,
104 * e.g., an open doesn't get a spurious EBUSY.
105 */
106 kmutex_t zfsdev_state_lock;
107 static uint32_t zvol_minors;
108
109 typedef struct zvol_extent {
110 list_node_t ze_node;
955 /*
956 * If the open count is zero, this is a spurious close.
957 * That indicates a bug in the kernel / DDI framework.
958 */
959 ASSERT(zv->zv_open_count[otyp] != 0);
960 ASSERT(zv->zv_total_opens != 0);
961
962 /*
963 * You may get multiple opens, but only one close.
964 */
965 zv->zv_open_count[otyp]--;
966 zv->zv_total_opens--;
967
968 if (zv->zv_total_opens == 0)
969 zvol_last_close(zv);
970
971 mutex_exit(&zfsdev_state_lock);
972 return (error);
973 }
974
975 static void
976 zvol_get_done(zgd_t *zgd, int error)
977 {
978 if (zgd->zgd_db)
979 dmu_buf_rele(zgd->zgd_db, zgd);
980
981 zfs_range_unlock(zgd->zgd_rl);
982
983 if (error == 0 && zgd->zgd_bp)
984 zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
985
986 kmem_free(zgd, sizeof (zgd_t));
987 }
988
989 /*
990 * Get data to generate a TX_WRITE intent log record.
991 */
992 static int
993 zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
994 {
995 zvol_state_t *zv = arg;
996 objset_t *os = zv->zv_objset;
997 uint64_t object = ZVOL_OBJ;
998 uint64_t offset = lr->lr_offset;
999 uint64_t size = lr->lr_length; /* length of user data */
1000 dmu_buf_t *db;
1001 zgd_t *zgd;
1002 int error;
1003
1004 ASSERT3P(lwb, !=, NULL);
1005 ASSERT3P(zio, !=, NULL);
1052
1053 zvol_get_done(zgd, error);
1054
1055 return (error);
1056 }
1057
1058 /*
1059 * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
1060 *
1061 * We store data in the log buffers if it's small enough.
1062 * Otherwise we will later flush the data out via dmu_sync().
1063 */
1064 ssize_t zvol_immediate_write_sz = 32768;
1065
1066 static void
1067 zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
1068 boolean_t sync)
1069 {
1070 uint32_t blocksize = zv->zv_volblocksize;
1071 zilog_t *zilog = zv->zv_zilog;
1072 itx_wr_state_t write_state;
1073
1074 if (zil_replaying(zilog, tx))
1075 return;
1076
1077 if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
1078 write_state = WR_INDIRECT;
1079 else if (!spa_has_slogs(zilog->zl_spa) &&
1080 resid >= blocksize && blocksize > zvol_immediate_write_sz)
1081 write_state = WR_INDIRECT;
1082 else if (sync)
1083 write_state = WR_COPIED;
1084 else
1085 write_state = WR_NEED_COPY;
1086
1087 while (resid) {
1088 itx_t *itx;
1089 lr_write_t *lr;
1090 itx_wr_state_t wr_state = write_state;
1091 ssize_t len = resid;
1092
1093 if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA)
1094 wr_state = WR_NEED_COPY;
1095 else if (wr_state == WR_INDIRECT)
1096 len = MIN(blocksize - P2PHASE(off, blocksize), resid);
1097
1098 itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
1099 (wr_state == WR_COPIED ? len : 0));
1100 lr = (lr_write_t *)&itx->itx_lr;
1101 if (wr_state == WR_COPIED && dmu_read(zv->zv_objset,
1111 lr->lr_offset = off;
1112 lr->lr_length = len;
1113 lr->lr_blkoff = 0;
1114 BP_ZERO(&lr->lr_blkptr);
1115
1116 itx->itx_private = zv;
1117 itx->itx_sync = sync;
1118
1119 zil_itx_assign(zilog, itx, tx);
1120
1121 off += len;
1122 resid -= len;
1123 }
1124 }
1125
1126 static int
1127 zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset,
1128 uint64_t size, boolean_t doread, boolean_t isdump)
1129 {
1130 vdev_disk_t *dvd;
1131 int c;
1132 int numerrors = 0;
1133
1134 if (vd->vdev_ops == &vdev_mirror_ops ||
1135 vd->vdev_ops == &vdev_replacing_ops ||
1136 vd->vdev_ops == &vdev_spare_ops) {
1137 for (c = 0; c < vd->vdev_children; c++) {
1138 int err = zvol_dumpio_vdev(vd->vdev_child[c],
1139 addr, offset, origoffset, size, doread, isdump);
1140 if (err != 0) {
1141 numerrors++;
1142 } else if (doread) {
1143 break;
1144 }
1145 }
1146 }
1147
1148 if (!vd->vdev_ops->vdev_op_leaf && vd->vdev_ops != &vdev_raidz_ops)
1149 return (numerrors < vd->vdev_children ? 0 : EIO);
1150
1151 if (doread && !vdev_readable(vd))
1152 return (SET_ERROR(EIO));
1153 else if (!doread && !vdev_writeable(vd))
1154 return (SET_ERROR(EIO));
1155
1156 if (vd->vdev_ops == &vdev_raidz_ops) {
1157 return (vdev_raidz_physio(vd,
1158 addr, size, offset, origoffset, doread, isdump));
1159 }
1160
1161 offset += VDEV_LABEL_START_SIZE;
1162
1163 if (ddi_in_panic() || isdump) {
1164 ASSERT(!doread);
1165 if (doread)
1166 return (SET_ERROR(EIO));
1167 dvd = vd->vdev_tsd;
1168 ASSERT3P(dvd, !=, NULL);
1169 return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
1170 lbtodb(size)));
1171 } else {
1172 dvd = vd->vdev_tsd;
1173 ASSERT3P(dvd, !=, NULL);
1174 return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size,
1175 offset, doread ? B_READ : B_WRITE));
1176 }
1177 }
1178
1179 static int
1180 zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
1181 boolean_t doread, boolean_t isdump)
1182 {
1183 vdev_t *vd;
1184 int error;
1185 zvol_extent_t *ze;
1186 spa_t *spa = dmu_objset_spa(zv->zv_objset);
1187
1188 /* Must be sector aligned, and not stradle a block boundary. */
1189 if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) ||
1190 P2BOUNDARY(offset, size, zv->zv_volblocksize)) {
1191 return (SET_ERROR(EINVAL));
1192 }
1193 ASSERT(size <= zv->zv_volblocksize);
1194
1195 /* Locate the extent this belongs to */
1196 ze = list_head(&zv->zv_extents);
1763 break;
1764
1765 case DKIOCDUMPINIT:
1766 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1767 RL_WRITER);
1768 error = zvol_dumpify(zv);
1769 zfs_range_unlock(rl);
1770 break;
1771
1772 case DKIOCDUMPFINI:
1773 if (!(zv->zv_flags & ZVOL_DUMPIFIED))
1774 break;
1775 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1776 RL_WRITER);
1777 error = zvol_dump_fini(zv);
1778 zfs_range_unlock(rl);
1779 break;
1780
1781 case DKIOCFREE:
1782 {
1783 dkioc_free_t df;
1784 dmu_tx_t *tx;
1785
1786 if (!zvol_unmap_enabled)
1787 break;
1788
1789 if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) {
1790 error = SET_ERROR(EFAULT);
1791 break;
1792 }
1793
1794 /*
1795 * Apply Postel's Law to length-checking. If they overshoot,
1796 * just blank out until the end, if there's a need to blank
1797 * out anything.
1798 */
1799 if (df.df_start >= zv->zv_volsize)
1800 break; /* No need to do anything... */
1801
1802 mutex_exit(&zfsdev_state_lock);
1803
1804 rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length,
1805 RL_WRITER);
1806 tx = dmu_tx_create(zv->zv_objset);
1807 dmu_tx_mark_netfree(tx);
1808 error = dmu_tx_assign(tx, TXG_WAIT);
1809 if (error != 0) {
1810 dmu_tx_abort(tx);
1811 } else {
1812 zvol_log_truncate(zv, tx, df.df_start,
1813 df.df_length, B_TRUE);
1814 dmu_tx_commit(tx);
1815 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
1816 df.df_start, df.df_length);
1817 }
1818
1819 zfs_range_unlock(rl);
1820
1821 /*
1822 * If the write-cache is disabled, 'sync' property
1823 * is set to 'always', or if the caller is asking for
1824 * a synchronous free, commit this operation to the zil.
1825 * This will sync any previous uncommitted writes to the
1826 * zvol object.
1827 * Can be overridden by the zvol_unmap_sync_enabled tunable.
1828 */
1829 if ((error == 0) && zvol_unmap_sync_enabled &&
1830 (!(zv->zv_flags & ZVOL_WCE) ||
1831 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) ||
1832 (df.df_flags & DF_WAIT_SYNC))) {
1833 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1834 }
1835
1836 return (error);
1837 }
1838
1839 default:
1840 error = SET_ERROR(ENOTTY);
1841 break;
1842
1843 }
1844 mutex_exit(&zfsdev_state_lock);
1845 return (error);
1846 }
1847
1848 int
1849 zvol_busy(void)
1850 {
1851 return (zvol_minors != 0);
1852 }
1853
1854 void
1855 zvol_init(void)
|
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 *
24 * Portions Copyright 2010 Robert Milkowski
25 *
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
28 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29 * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
30 * Copyright (c) 2014 Integros [integros.com]
31 */
32
33 /*
34 * ZFS volume emulation driver.
35 *
36 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
37 * Volumes are accessed through the symbolic links named:
38 *
39 * /dev/zvol/dsk/<pool_name>/<dataset_name>
40 * /dev/zvol/rdsk/<pool_name>/<dataset_name>
41 *
42 * These links are created by the /dev filesystem (sdev_zvolops.c).
43 * Volumes are persistent through reboot. No user command needs to be
44 * run before opening and using a device.
45 */
46
47 #include <sys/types.h>
48 #include <sys/param.h>
49 #include <sys/errno.h>
72 #include <sys/crc32.h>
73 #include <sys/dirent.h>
74 #include <sys/policy.h>
75 #include <sys/fs/zfs.h>
76 #include <sys/zfs_ioctl.h>
77 #include <sys/mkdev.h>
78 #include <sys/zil.h>
79 #include <sys/refcount.h>
80 #include <sys/zfs_znode.h>
81 #include <sys/zfs_rlock.h>
82 #include <sys/vdev_disk.h>
83 #include <sys/vdev_impl.h>
84 #include <sys/vdev_raidz.h>
85 #include <sys/zvol.h>
86 #include <sys/dumphdr.h>
87 #include <sys/zil_impl.h>
88 #include <sys/dbuf.h>
89 #include <sys/dmu_tx.h>
90 #include <sys/zfeature.h>
91 #include <sys/zio_checksum.h>
92 #include <sys/dkioc_free_util.h>
93 #include <sys/zil_impl.h>
94
95 #include "zfs_namecheck.h"
96
97 void *zfsdev_state;
98 static char *zvol_tag = "zvol_tag";
99
100 #define ZVOL_DUMPSIZE "dumpsize"
101
102 /*
103 * This lock protects the zfsdev_state structure from being modified
104 * while it's being used, e.g. an open that comes in before a create
105 * finishes. It also protects temporary opens of the dataset so that,
106 * e.g., an open doesn't get a spurious EBUSY.
107 */
108 kmutex_t zfsdev_state_lock;
109 static uint32_t zvol_minors;
110
111 typedef struct zvol_extent {
112 list_node_t ze_node;
957 /*
958 * If the open count is zero, this is a spurious close.
959 * That indicates a bug in the kernel / DDI framework.
960 */
961 ASSERT(zv->zv_open_count[otyp] != 0);
962 ASSERT(zv->zv_total_opens != 0);
963
964 /*
965 * You may get multiple opens, but only one close.
966 */
967 zv->zv_open_count[otyp]--;
968 zv->zv_total_opens--;
969
970 if (zv->zv_total_opens == 0)
971 zvol_last_close(zv);
972
973 mutex_exit(&zfsdev_state_lock);
974 return (error);
975 }
976
977 /* ARGSUSED */
978 static void
979 zvol_get_done(zgd_t *zgd, int error)
980 {
981 if (zgd->zgd_db)
982 dmu_buf_rele(zgd->zgd_db, zgd);
983
984 zfs_range_unlock(zgd->zgd_rl);
985
986 kmem_free(zgd, sizeof (zgd_t));
987 }
988
989 /*
990 * Get data to generate a TX_WRITE intent log record.
991 */
992 static int
993 zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
994 {
995 zvol_state_t *zv = arg;
996 objset_t *os = zv->zv_objset;
997 uint64_t object = ZVOL_OBJ;
998 uint64_t offset = lr->lr_offset;
999 uint64_t size = lr->lr_length; /* length of user data */
1000 dmu_buf_t *db;
1001 zgd_t *zgd;
1002 int error;
1003
1004 ASSERT3P(lwb, !=, NULL);
1005 ASSERT3P(zio, !=, NULL);
1052
1053 zvol_get_done(zgd, error);
1054
1055 return (error);
1056 }
1057
1058 /*
1059 * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
1060 *
1061 * We store data in the log buffers if it's small enough.
1062 * Otherwise we will later flush the data out via dmu_sync().
1063 */
1064 ssize_t zvol_immediate_write_sz = 32768;
1065
1066 static void
1067 zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
1068 boolean_t sync)
1069 {
1070 uint32_t blocksize = zv->zv_volblocksize;
1071 zilog_t *zilog = zv->zv_zilog;
1072 spa_t *spa = zilog->zl_spa;
1073 spa_meta_placement_t *mp = &spa->spa_meta_policy;
1074 boolean_t slogging, zil_to_special, write_to_special;
1075 ssize_t immediate_write_sz;
1076 itx_wr_state_t write_state;
1077
1078 if (zil_replaying(zilog, tx))
1079 return;
1080
1081 /*
1082 * See comments in zfs_log_write()
1083 */
1084
1085 immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
1086 ? 0 : zvol_immediate_write_sz;
1087
1088 zil_to_special = !spa_has_slogs(spa) &&
1089 spa_can_special_be_used(spa) &&
1090 mp->spa_sync_to_special != SYNC_TO_SPECIAL_DISABLED;
1091
1092 write_to_special = !spa_has_slogs(spa) &&
1093 spa_write_data_to_special(spa, zilog->zl_os) &&
1094 (mp->spa_sync_to_special == SYNC_TO_SPECIAL_ALWAYS ||
1095 (mp->spa_sync_to_special == SYNC_TO_SPECIAL_BALANCED &&
1096 spa->spa_avg_stat_rotor % 100 < spa->spa_special_to_normal_ratio));
1097
1098 slogging = (spa_has_slogs(spa) || zil_to_special) &&
1099 (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
1100
1101 if (blocksize > immediate_write_sz && !slogging &&
1102 resid >= blocksize && off % blocksize == 0)
1103 write_state = WR_INDIRECT;
1104 else if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
1105 write_state = WR_INDIRECT;
1106 else if (!spa_has_slogs(zilog->zl_spa) &&
1107 resid >= blocksize && blocksize > zvol_immediate_write_sz)
1108 write_state = WR_INDIRECT;
1109 else if (write_to_special)
1110 write_state = WR_INDIRECT;
1111 else if (sync)
1112 write_state = WR_COPIED;
1113 else
1114 write_state = WR_NEED_COPY;
1115
1116 while (resid) {
1117 itx_t *itx;
1118 lr_write_t *lr;
1119 itx_wr_state_t wr_state = write_state;
1120 ssize_t len = resid;
1121
1122 if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA)
1123 wr_state = WR_NEED_COPY;
1124 else if (wr_state == WR_INDIRECT)
1125 len = MIN(blocksize - P2PHASE(off, blocksize), resid);
1126
1127 itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
1128 (wr_state == WR_COPIED ? len : 0));
1129 lr = (lr_write_t *)&itx->itx_lr;
1130 if (wr_state == WR_COPIED && dmu_read(zv->zv_objset,
1140 lr->lr_offset = off;
1141 lr->lr_length = len;
1142 lr->lr_blkoff = 0;
1143 BP_ZERO(&lr->lr_blkptr);
1144
1145 itx->itx_private = zv;
1146 itx->itx_sync = sync;
1147
1148 zil_itx_assign(zilog, itx, tx);
1149
1150 off += len;
1151 resid -= len;
1152 }
1153 }
1154
1155 static int
1156 zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset,
1157 uint64_t size, boolean_t doread, boolean_t isdump)
1158 {
1159 vdev_disk_t *dvd;
1160 int c, rc;
1161 int numerrors = 0;
1162
1163 if (vd->vdev_ops == &vdev_mirror_ops ||
1164 vd->vdev_ops == &vdev_replacing_ops ||
1165 vd->vdev_ops == &vdev_spare_ops) {
1166 for (c = 0; c < vd->vdev_children; c++) {
1167 int err = zvol_dumpio_vdev(vd->vdev_child[c],
1168 addr, offset, origoffset, size, doread, isdump);
1169 if (err != 0) {
1170 numerrors++;
1171 } else if (doread) {
1172 break;
1173 }
1174 }
1175 }
1176
1177 if (!vd->vdev_ops->vdev_op_leaf && vd->vdev_ops != &vdev_raidz_ops)
1178 return (numerrors < vd->vdev_children ? 0 : EIO);
1179
1180 if (doread && !vdev_readable(vd))
1181 return (SET_ERROR(EIO));
1182 else if (!doread && !vdev_writeable(vd))
1183 return (SET_ERROR(EIO));
1184
1185 if (vd->vdev_ops == &vdev_raidz_ops) {
1186 return (vdev_raidz_physio(vd,
1187 addr, size, offset, origoffset, doread, isdump));
1188 }
1189
1190 offset += VDEV_LABEL_START_SIZE;
1191
1192 rw_enter(&vd->vdev_tsd_lock, RW_READER);
1193 dvd = vd->vdev_tsd;
1194 if (ddi_in_panic() || isdump) {
1195 ASSERT(!doread);
1196 if (doread) {
1197 rw_exit(&vd->vdev_tsd_lock);
1198 return (SET_ERROR(EIO));
1199 }
1200 /* We assume here dvd is not NULL */
1201 ASSERT3P(dvd, !=, NULL);
1202
1203 /* If our assumption is wrong, we do not want to crash */
1204 if (dvd != NULL && dvd->vd_lh != NULL) {
1205 rc = ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
1206 lbtodb(size));
1207 } else {
1208 rc = SET_ERROR(ENXIO);
1209 }
1210 } else {
1211 /* We assume here dvd is not NULL */
1212 ASSERT3P(dvd, !=, NULL);
1213
1214 /* If our assumption is wrong, we do not want to crash */
1215 if (dvd != NULL && dvd->vd_lh != NULL) {
1216 rc = vdev_disk_ldi_physio(dvd->vd_lh, addr, size,
1217 offset, doread ? B_READ : B_WRITE);
1218 } else {
1219 rc = SET_ERROR(ENXIO);
1220 }
1221 }
1222 rw_exit(&vd->vdev_tsd_lock);
1223 return (rc);
1224 }
1225
1226 static int
1227 zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
1228 boolean_t doread, boolean_t isdump)
1229 {
1230 vdev_t *vd;
1231 int error;
1232 zvol_extent_t *ze;
1233 spa_t *spa = dmu_objset_spa(zv->zv_objset);
1234
1235 /* Must be sector aligned, and not stradle a block boundary. */
1236 if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) ||
1237 P2BOUNDARY(offset, size, zv->zv_volblocksize)) {
1238 return (SET_ERROR(EINVAL));
1239 }
1240 ASSERT(size <= zv->zv_volblocksize);
1241
1242 /* Locate the extent this belongs to */
1243 ze = list_head(&zv->zv_extents);
1810 break;
1811
1812 case DKIOCDUMPINIT:
1813 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1814 RL_WRITER);
1815 error = zvol_dumpify(zv);
1816 zfs_range_unlock(rl);
1817 break;
1818
1819 case DKIOCDUMPFINI:
1820 if (!(zv->zv_flags & ZVOL_DUMPIFIED))
1821 break;
1822 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1823 RL_WRITER);
1824 error = zvol_dump_fini(zv);
1825 zfs_range_unlock(rl);
1826 break;
1827
1828 case DKIOCFREE:
1829 {
1830 dkioc_free_list_t *dfl;
1831 dmu_tx_t *tx;
1832
1833 mutex_exit(&zfsdev_state_lock);
1834
1835 if (!zvol_unmap_enabled)
1836 break;
1837
1838 if (!(flag & FKIOCTL)) {
1839 dfl = dfl_copyin((void *)arg, flag, KM_SLEEP);
1840 if (dfl == NULL) {
1841 error = SET_ERROR(EFAULT);
1842 break;
1843 }
1844 } else {
1845 dfl = (dkioc_free_list_t *)arg;
1846 }
1847
1848 for (int i = 0; i < dfl->dfl_num_exts; i++) {
1849 uint64_t start = dfl->dfl_exts[i].dfle_start,
1850 length = dfl->dfl_exts[i].dfle_length,
1851 end = start + length;
1852
1853 /*
1854 * Apply Postel's Law to length-checking. If they
1855 * overshoot, just blank out until the end, if there's
1856 * a need to blank out anything.
1857 */
1858 if (start >= zv->zv_volsize)
1859 continue; /* No need to do anything... */
1860 if (end > zv->zv_volsize) {
1861 end = DMU_OBJECT_END;
1862 length = end - start;
1863 }
1864
1865 rl = zfs_range_lock(&zv->zv_znode, start, length,
1866 RL_WRITER);
1867 tx = dmu_tx_create(zv->zv_objset);
1868 error = dmu_tx_assign(tx, TXG_WAIT);
1869 if (error != 0) {
1870 dmu_tx_abort(tx);
1871 } else {
1872 zvol_log_truncate(zv, tx, start, length,
1873 B_TRUE);
1874 dmu_tx_commit(tx);
1875 error = dmu_free_long_range(zv->zv_objset,
1876 ZVOL_OBJ, start, length);
1877 }
1878
1879 zfs_range_unlock(rl);
1880
1881 if (error != 0)
1882 break;
1883 }
1884
1885 /*
1886 * If the write-cache is disabled, 'sync' property
1887 * is set to 'always', or if the caller is asking for
1888 * a synchronous free, commit this operation to the zil.
1889 * This will sync any previous uncommitted writes to the
1890 * zvol object.
1891 * Can be overridden by the zvol_unmap_sync_enabled tunable.
1892 */
1893 if ((error == 0) && zvol_unmap_sync_enabled &&
1894 (!(zv->zv_flags & ZVOL_WCE) ||
1895 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) ||
1896 (dfl->dfl_flags & DF_WAIT_SYNC))) {
1897 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1898 }
1899
1900 if (!(flag & FKIOCTL))
1901 dfl_free(dfl);
1902
1903 return (error);
1904 }
1905
1906 default:
1907 error = SET_ERROR(ENOTTY);
1908 break;
1909
1910 }
1911 mutex_exit(&zfsdev_state_lock);
1912 return (error);
1913 }
1914
1915 int
1916 zvol_busy(void)
1917 {
1918 return (zvol_minors != 0);
1919 }
1920
1921 void
1922 zvol_init(void)
|