4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27
28 #include <sys/sysmacros.h>
29 #include <sys/conf.h>
30 #include <sys/file.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/modctl.h>
34 #include <sys/scsi/scsi.h>
35 #include <sys/scsi/impl/scsi_reset_notify.h>
36 #include <sys/disp.h>
37 #include <sys/byteorder.h>
38 #include <sys/pathname.h>
39 #include <sys/atomic.h>
40 #include <sys/nvpair.h>
41 #include <sys/fs/zfs.h>
42 #include <sys/sdt.h>
43 #include <sys/dkio.h>
44 #include <sys/zfs_ioctl.h>
45
46 #include <sys/stmf.h>
47 #include <sys/lpif.h>
48 #include <sys/stmf_ioctl.h>
49 #include <sys/stmf_sbd_ioctl.h>
50
51 #include "stmf_sbd.h"
52 #include "sbd_impl.h"
53
54 #define SBD_IS_ZVOL(zvol) (strncmp("/dev/zvol", zvol, 9))
55
56 extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
57 extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
58 extern void sbd_pgr_reset(sbd_lu_t *sl);
59
60 static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
61 void **result);
62 static int sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
63 static int sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
64 static int sbd_open(dev_t *devp, int flag, int otype, cred_t *credp);
65 static int sbd_close(dev_t dev, int flag, int otype, cred_t *credp);
66 static int stmf_sbd_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
67 cred_t *credp, int *rval);
68 void sbd_lp_cb(stmf_lu_provider_t *lp, int cmd, void *arg, uint32_t flags);
69 stmf_status_t sbd_proxy_reg_lu(uint8_t *luid, void *proxy_reg_arg,
70 uint32_t proxy_reg_arg_len);
71 stmf_status_t sbd_proxy_dereg_lu(uint8_t *luid, void *proxy_reg_arg,
72 uint32_t proxy_reg_arg_len);
73 stmf_status_t sbd_proxy_msg(uint8_t *luid, void *proxy_arg,
74 uint32_t proxy_arg_len, uint32_t type);
75 int sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz,
76 uint32_t *err_ret);
77 int sbd_create_standby_lu(sbd_create_standby_lu_t *slu, uint32_t *err_ret);
78 int sbd_set_lu_standby(sbd_set_lu_standby_t *stlu, uint32_t *err_ret);
92 uint32_t *err_ret);
93 sbd_status_t sbd_create_zfs_meta_object(sbd_lu_t *sl);
94 sbd_status_t sbd_open_zfs_meta(sbd_lu_t *sl);
95 sbd_status_t sbd_read_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
96 uint64_t off);
97 sbd_status_t sbd_write_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
98 uint64_t off);
99 sbd_status_t sbd_update_zfs_prop(sbd_lu_t *sl);
100 int sbd_is_zvol(char *path);
101 int sbd_zvolget(char *zvol_name, char **comstarprop);
102 int sbd_zvolset(char *zvol_name, char *comstarprop);
103 char sbd_ctoi(char c);
104 void sbd_close_lu(sbd_lu_t *sl);
105
106 static ldi_ident_t sbd_zfs_ident;
107 static stmf_lu_provider_t *sbd_lp;
108 static sbd_lu_t *sbd_lu_list = NULL;
109 static kmutex_t sbd_lock;
110 static dev_info_t *sbd_dip;
111 static uint32_t sbd_lu_count = 0;
112
113 /* Global property settings for the logical unit */
114 char sbd_vendor_id[] = "SUN ";
115 char sbd_product_id[] = "COMSTAR ";
116 char sbd_revision[] = "1.0 ";
117 char *sbd_mgmt_url = NULL;
118 uint16_t sbd_mgmt_url_alloc_size = 0;
119 krwlock_t sbd_global_prop_lock;
120
121 static char sbd_name[] = "sbd";
122
123 static struct cb_ops sbd_cb_ops = {
124 sbd_open, /* open */
125 sbd_close, /* close */
126 nodev, /* strategy */
127 nodev, /* print */
128 nodev, /* dump */
129 nodev, /* read */
130 nodev, /* write */
131 stmf_sbd_ioctl, /* ioctl */
132 nodev, /* devmap */
133 nodev, /* mmap */
134 nodev, /* segmap */
138 D_NEW | D_MP, /* cb_flag */
139 CB_REV, /* rev */
140 nodev, /* aread */
141 nodev /* awrite */
142 };
143
144 static struct dev_ops sbd_ops = {
145 DEVO_REV,
146 0,
147 sbd_getinfo,
148 nulldev, /* identify */
149 nulldev, /* probe */
150 sbd_attach,
151 sbd_detach,
152 nodev, /* reset */
153 &sbd_cb_ops,
154 NULL, /* bus_ops */
155 NULL /* power */
156 };
157
158 #define SBD_NAME "COMSTAR SBD"
159
160 static struct modldrv modldrv = {
161 &mod_driverops,
162 SBD_NAME,
163 &sbd_ops
164 };
165
166 static struct modlinkage modlinkage = {
167 MODREV_1,
168 &modldrv,
169 NULL
170 };
171
172 int
173 _init(void)
174 {
175 int ret;
176
177 ret = mod_install(&modlinkage);
178 if (ret)
179 return (ret);
180 sbd_lp = (stmf_lu_provider_t *)stmf_alloc(STMF_STRUCT_LU_PROVIDER,
181 0, 0);
182 sbd_lp->lp_lpif_rev = LPIF_REV_2;
183 sbd_lp->lp_instance = 0;
184 sbd_lp->lp_name = sbd_name;
185 sbd_lp->lp_cb = sbd_lp_cb;
186 sbd_lp->lp_alua_support = 1;
187 sbd_lp->lp_proxy_msg = sbd_proxy_msg;
188 sbd_zfs_ident = ldi_ident_from_anon();
189
190 if (stmf_register_lu_provider(sbd_lp) != STMF_SUCCESS) {
191 (void) mod_remove(&modlinkage);
192 stmf_free(sbd_lp);
193 return (EINVAL);
194 }
195 mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
196 rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL);
197 return (0);
198 }
199
200 int
201 _fini(void)
202 {
203 int ret;
204
205 /*
206 * If we have registered lus, then make sure they are all offline
207 * if so then deregister them. This should drop the sbd_lu_count
208 * to zero.
209 */
210 if (sbd_lu_count) {
211 sbd_lu_t *slu;
212
213 /* See if all of them are offline */
214 mutex_enter(&sbd_lock);
215 for (slu = sbd_lu_list; slu != NULL; slu = slu->sl_next) {
216 if ((slu->sl_state != STMF_STATE_OFFLINE) ||
255 static int
256 sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
257 {
258 switch (cmd) {
259 case DDI_INFO_DEVT2DEVINFO:
260 *result = sbd_dip;
261 break;
262 case DDI_INFO_DEVT2INSTANCE:
263 *result = (void *)(uintptr_t)ddi_get_instance(sbd_dip);
264 break;
265 default:
266 return (DDI_FAILURE);
267 }
268
269 return (DDI_SUCCESS);
270 }
271
272 static int
273 sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
274 {
275 switch (cmd) {
276 case DDI_ATTACH:
277 sbd_dip = dip;
278
279 if (ddi_create_minor_node(dip, "admin", S_IFCHR, 0,
280 DDI_NT_STMF_LP, 0) != DDI_SUCCESS) {
281 break;
282 }
283 ddi_report_dev(dip);
284 return (DDI_SUCCESS);
285 }
286
287 return (DDI_FAILURE);
288 }
289
290 static int
291 sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
292 {
293 switch (cmd) {
294 case DDI_DETACH:
295 ddi_remove_minor_node(dip, 0);
296 return (DDI_SUCCESS);
297 }
298
299 return (DDI_FAILURE);
300 }
301
302 /* ARGSUSED */
303 static int
1379 sli->sli_data_blocksize_shift = sl->sl_data_blocksize_shift;
1380 sli->sli_data_order = SMS_DATA_ORDER;
1381 bcopy(sl->sl_device_id, sli->sli_device_id, 20);
1382
1383 sli->sli_sms_header.sms_size = sizeof (*sli) + s;
1384 sli->sli_sms_header.sms_id = SMS_ID_LU_INFO_1_1;
1385 sli->sli_sms_header.sms_data_order = SMS_DATA_ORDER;
1386
1387 mutex_exit(&sl->sl_lock);
1388 ret = sbd_write_meta_section(sl, (sm_section_hdr_t *)sli);
1389 kmem_free(sli, sizeof (*sli) + s);
1390 return (ret);
1391 }
1392
1393 /*
1394 * Will scribble SL_UNMAP_ENABLED into sl_flags if we succeed.
1395 */
1396 static void
1397 do_unmap_setup(sbd_lu_t *sl)
1398 {
1399 ASSERT((sl->sl_flags & SL_UNMAP_ENABLED) == 0);
1400
1401 if ((sl->sl_flags & SL_ZFS_META) == 0)
1402 return; /* No UNMAP for you. */
1403
1404 sl->sl_flags |= SL_UNMAP_ENABLED;
1405 }
1406
1407 int
1408 sbd_populate_and_register_lu(sbd_lu_t *sl, uint32_t *err_ret)
1409 {
1410 stmf_lu_t *lu = sl->sl_lu;
1411 stmf_status_t ret;
1412
1413 do_unmap_setup(sl);
1414
1415 lu->lu_id = (scsi_devid_desc_t *)sl->sl_device_id;
1416 if (sl->sl_alias) {
1417 lu->lu_alias = sl->sl_alias;
1418 } else {
1419 lu->lu_alias = sl->sl_name;
1424 if (ret != STMF_SUCCESS) {
1425 *err_ret = SBD_RET_ACCESS_STATE_FAILED;
1426 return (EIO);
1427 }
1428 }
1429 /* set proxy_reg_cb_arg to meta filename */
1430 if (sl->sl_meta_filename) {
1431 lu->lu_proxy_reg_arg = sl->sl_meta_filename;
1432 lu->lu_proxy_reg_arg_len = strlen(sl->sl_meta_filename) + 1;
1433 } else {
1434 lu->lu_proxy_reg_arg = sl->sl_data_filename;
1435 lu->lu_proxy_reg_arg_len = strlen(sl->sl_data_filename) + 1;
1436 }
1437 lu->lu_lp = sbd_lp;
1438 lu->lu_task_alloc = sbd_task_alloc;
1439 lu->lu_new_task = sbd_new_task;
1440 lu->lu_dbuf_xfer_done = sbd_dbuf_xfer_done;
1441 lu->lu_send_status_done = sbd_send_status_done;
1442 lu->lu_task_free = sbd_task_free;
1443 lu->lu_abort = sbd_abort;
1444 lu->lu_dbuf_free = sbd_dbuf_free;
1445 lu->lu_ctl = sbd_ctl;
1446 lu->lu_info = sbd_info;
1447 sl->sl_state = STMF_STATE_OFFLINE;
1448
1449 if ((ret = stmf_register_lu(lu)) != STMF_SUCCESS) {
1450 stmf_trace(0, "Failed to register with framework, ret=%llx",
1451 ret);
1452 if (ret == STMF_ALREADY) {
1453 *err_ret = SBD_RET_GUID_ALREADY_REGISTERED;
1454 }
1455 return (EIO);
1456 }
1457
1458 *err_ret = 0;
1459 return (0);
1460 }
1461
1462 int
1463 sbd_open_data_file(sbd_lu_t *sl, uint32_t *err_ret, int lu_size_valid,
1464 int vp_valid, int keep_open)
1465 {
1466 int ret;
1467 int flag;
1468 ulong_t nbits;
1469 uint64_t supported_size;
1470 vattr_t vattr;
1471 enum vtype vt;
1472 struct dk_cinfo dki;
1473 int unused;
1474
1475 mutex_enter(&sl->sl_lock);
1476 if (vp_valid) {
1477 goto odf_over_open;
1544 *err_ret = SBD_RET_SIZE_NOT_SUPPORTED_BY_FS;
1545 ret = EINVAL;
1546 goto odf_close_data_and_exit;
1547 }
1548 }
1549 } else {
1550 sl->sl_total_data_size = vattr.va_size;
1551 if (sl->sl_flags & SL_SHARED_META) {
1552 if (vattr.va_size > SHARED_META_DATA_SIZE) {
1553 sl->sl_lu_size = vattr.va_size -
1554 SHARED_META_DATA_SIZE;
1555 } else {
1556 *err_ret = SBD_RET_FILE_SIZE_ERROR;
1557 ret = EINVAL;
1558 goto odf_close_data_and_exit;
1559 }
1560 } else {
1561 sl->sl_lu_size = vattr.va_size;
1562 }
1563 }
1564 if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
1565 *err_ret = SBD_RET_FILE_SIZE_ERROR;
1566 ret = EINVAL;
1567 goto odf_close_data_and_exit;
1568 }
1569 if (sl->sl_lu_size &
1570 ((((uint64_t)1) << sl->sl_data_blocksize_shift) - 1)) {
1571 *err_ret = SBD_RET_FILE_ALIGN_ERROR;
1572 ret = EINVAL;
1573 goto odf_close_data_and_exit;
1574 }
1575 /*
1576 * Get the minor device for direct zvol access
1577 */
1578 if (sl->sl_flags & SL_ZFS_META) {
1579 if ((ret = VOP_IOCTL(sl->sl_data_vp, DKIOCINFO, (intptr_t)&dki,
1580 FKIOCTL, kcred, &unused, NULL)) != 0) {
1581 cmn_err(CE_WARN, "ioctl(DKIOCINFO) failed %d", ret);
1582 /* zvol reserves 0, so this would fail later */
1583 sl->sl_zvol_minor = 0;
1820 sl->sl_serial_no_size = slu->slu_serial_size;
1821 p += slu->slu_serial_size;
1822 }
1823 kmem_free(namebuf, sz);
1824 if (slu->slu_vid_valid) {
1825 bcopy(slu->slu_vid, sl->sl_vendor_id, 8);
1826 sl->sl_flags |= SL_VID_VALID;
1827 }
1828 if (slu->slu_pid_valid) {
1829 bcopy(slu->slu_pid, sl->sl_product_id, 16);
1830 sl->sl_flags |= SL_PID_VALID;
1831 }
1832 if (slu->slu_rev_valid) {
1833 bcopy(slu->slu_rev, sl->sl_revision, 4);
1834 sl->sl_flags |= SL_REV_VALID;
1835 }
1836 if (slu->slu_write_protected) {
1837 sl->sl_flags |= SL_WRITE_PROTECTED;
1838 }
1839 if (slu->slu_blksize_valid) {
1840 if (!ISP2(slu->slu_blksize) ||
1841 (slu->slu_blksize > (32 * 1024)) ||
1842 (slu->slu_blksize == 0)) {
1843 *err_ret = SBD_RET_INVALID_BLKSIZE;
1844 ret = EINVAL;
1845 goto scm_err_out;
1846 }
1847 while ((1 << sl->sl_data_blocksize_shift) != slu->slu_blksize) {
1848 sl->sl_data_blocksize_shift++;
1849 }
1850 } else {
1851 sl->sl_data_blocksize_shift = 9; /* 512 by default */
1852 slu->slu_blksize = 512;
1853 }
1854
1855 /* Now lets start creating meta */
1856 sl->sl_trans_op = SL_OP_CREATE_REGISTER_LU;
1857 if (sbd_link_lu(sl) != SBD_SUCCESS) {
1858 *err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
1859 ret = EALREADY;
1860 goto scm_err_out;
2980 return (ENOENT);
2981 }
2982 return (EIO);
2983 }
2984
2985 ssi.st_rflags = STMF_RFLAG_USER_REQUEST;
2986 ssi.st_additional_info = "sbd_delete_lu call (ioctl)";
2987 ret = sbd_delete_locked_lu(sl, err_ret, &ssi);
2988
2989 if (ret) {
2990 /* Once its locked, no need to grab mutex again */
2991 sl->sl_trans_op = SL_OP_NONE;
2992 }
2993 return (ret);
2994 }
2995
2996 sbd_status_t
2997 sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
2998 uint64_t offset, uint64_t size, uint8_t *buf)
2999 {
3000 int ret;
3001 long resid;
3002
3003 if ((offset + size) > sl->sl_lu_size) {
3004 return (SBD_IO_PAST_EOF);
3005 }
3006
3007 offset += sl->sl_data_offset;
3008
3009 if ((offset + size) > sl->sl_data_readable_size) {
3010 uint64_t store_end;
3011 if (offset > sl->sl_data_readable_size) {
3012 bzero(buf, size);
3013 return (SBD_SUCCESS);
3014 }
3015 store_end = sl->sl_data_readable_size - offset;
3016 bzero(buf + store_end, size - store_end);
3017 size = store_end;
3018 }
3019
3020 DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
3021 uint8_t *, buf, uint64_t, size, uint64_t, offset,
3022 scsi_task_t *, task);
3023
3024 /*
3025 * Don't proceed if the device has been closed
3026 * This can occur on an access state change to standby or
3027 * a delete. The writer lock is acquired before closing the
3028 * lu.
3029 */
3030 rw_enter(&sl->sl_access_state_lock, RW_READER);
3031 if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3032 rw_exit(&sl->sl_access_state_lock);
3033 return (SBD_FAILURE);
3034 }
3035 ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3036 (offset_t)offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, CRED(),
3037 &resid);
3038 rw_exit(&sl->sl_access_state_lock);
3039
3040 DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
3041 uint8_t *, buf, uint64_t, size, uint64_t, offset,
3042 int, ret, scsi_task_t *, task);
3043
3044 over_sl_data_read:
3045 if (ret || resid) {
3046 stmf_trace(0, "UIO_READ failed, ret = %d, resid = %d", ret,
3047 resid);
3048 return (SBD_FAILURE);
3049 }
3050
3051 return (SBD_SUCCESS);
3052 }
3053
3054 sbd_status_t
3055 sbd_data_write(sbd_lu_t *sl, struct scsi_task *task,
3056 uint64_t offset, uint64_t size, uint8_t *buf)
3057 {
3058 int ret;
3059 long resid;
3060 sbd_status_t sret = SBD_SUCCESS;
3061 int ioflag;
3062
3063 if ((offset + size) > sl->sl_lu_size) {
3064 return (SBD_IO_PAST_EOF);
3065 }
3066
3067 offset += sl->sl_data_offset;
3068
3069 if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3070 (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
3071 ioflag = FSYNC;
3072 } else {
3073 ioflag = 0;
3074 }
3075
3076 DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
3077 uint8_t *, buf, uint64_t, size, uint64_t, offset,
3078 scsi_task_t *, task);
3079
3080 /*
3081 * Don't proceed if the device has been closed
3082 * This can occur on an access state change to standby or
3083 * a delete. The writer lock is acquired before closing the
3084 * lu.
3085 */
3086 rw_enter(&sl->sl_access_state_lock, RW_READER);
3087 if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3088 rw_exit(&sl->sl_access_state_lock);
3089 return (SBD_FAILURE);
3090 }
3091 ret = vn_rdwr(UIO_WRITE, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3092 (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
3093 &resid);
3094 rw_exit(&sl->sl_access_state_lock);
3095
3096 DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
3097 uint8_t *, buf, uint64_t, size, uint64_t, offset,
3098 int, ret, scsi_task_t *, task);
3099
3100 if ((ret == 0) && (resid == 0) &&
3101 (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3102 (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
3103 sret = sbd_flush_data_cache(sl, 1);
3104 }
3105 over_sl_data_write:
3106
3107 if ((ret || resid) || (sret != SBD_SUCCESS)) {
3108 return (SBD_FAILURE);
3109 } else if ((offset + size) > sl->sl_data_readable_size) {
3110 uint64_t old_size, new_size;
3111
3112 do {
3113 old_size = sl->sl_data_readable_size;
3114 if ((offset + size) <= old_size)
3115 break;
3116 new_size = offset + size;
3117 } while (atomic_cas_64(&sl->sl_data_readable_size, old_size,
3118 new_size) != old_size);
3119 }
3120
3121 return (SBD_SUCCESS);
3122 }
3123
3124 int
3125 sbd_get_global_props(sbd_global_props_t *oslp, uint32_t oslp_sz,
3126 uint32_t *err_ret)
3622 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3623 size = newsize;
3624 goto again;
3625 } else if (rc != 0) {
3626 goto out;
3627 }
3628 rc = nvlist_unpack((char *)(uintptr_t)zc->zc_nvlist_dst,
3629 zc->zc_nvlist_dst_size, &nv, 0);
3630 ASSERT(rc == 0); /* nvlist_unpack should not fail */
3631 if ((rc = nvlist_lookup_nvlist(nv, "stmf_sbd_lu", &nv2)) == 0) {
3632 rc = nvlist_lookup_string(nv2, ZPROP_VALUE, &ptr);
3633 if (rc != 0) {
3634 cmn_err(CE_WARN, "couldn't get value");
3635 } else {
3636 *comstarprop = kmem_alloc(strlen(ptr) + 1,
3637 KM_SLEEP);
3638 (void) strcpy(*comstarprop, ptr);
3639 }
3640 }
3641 out:
3642 nvlist_free(nv);
3643 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3644 kmem_free(zc, sizeof (zfs_cmd_t));
3645 (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3646
3647 return (rc);
3648 }
3649
3650 int
3651 sbd_zvolset(char *zvol_name, char *comstarprop)
3652 {
3653 ldi_handle_t zfs_lh;
3654 nvlist_t *nv;
3655 char *packed = NULL;
3656 size_t len;
3657 zfs_cmd_t *zc;
3658 int unused;
3659 int rc;
3660
3661 if ((rc = ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
3672 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3673 (void) strlcpy(zc->zc_name, zvol_name, sizeof (zc->zc_name));
3674 zc->zc_nvlist_src = (uint64_t)(intptr_t)packed;
3675 zc->zc_nvlist_src_size = len;
3676 rc = ldi_ioctl(zfs_lh, ZFS_IOC_SET_PROP, (intptr_t)zc,
3677 FKIOCTL, kcred, &unused);
3678 if (rc != 0) {
3679 cmn_err(CE_NOTE, "ioctl failed %d", rc);
3680 }
3681 kmem_free(zc, sizeof (zfs_cmd_t));
3682 if (packed)
3683 kmem_free(packed, len);
3684 out:
3685 nvlist_free(nv);
3686 (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3687 return (rc);
3688 }
3689
3690 /*
3691 * Unmap a region in a volume. Currently only supported for zvols.
3692 */
3693 int
3694 sbd_unmap(sbd_lu_t *sl, uint64_t offset, uint64_t length)
3695 {
3696 vnode_t *vp;
3697 int unused;
3698 dkioc_free_t df;
3699
3700 /* Right now, we only support UNMAP on zvols. */
3701 if (!(sl->sl_flags & SL_ZFS_META))
3702 return (EIO);
3703
3704 df.df_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?
3705 DF_WAIT_SYNC : 0;
3706 df.df_start = offset;
3707 df.df_length = length;
3708
3709 /* Use the data vnode we have to send a fop_ioctl(). */
3710 vp = sl->sl_data_vp;
3711 if (vp == NULL) {
3712 cmn_err(CE_WARN, "Cannot unmap - no vnode pointer.");
3713 return (EIO);
3714 }
3715
3716 return (VOP_IOCTL(vp, DKIOCFREE, (intptr_t)(&df), FKIOCTL, kcred,
3717 &unused, NULL));
3718 }
|
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013 by Delphix. All rights reserved.
25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
26 */
27
28 #include <sys/conf.h>
29 #include <sys/list.h>
30 #include <sys/file.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/modctl.h>
34 #include <sys/scsi/scsi.h>
35 #include <sys/scsi/impl/scsi_reset_notify.h>
36 #include <sys/disp.h>
37 #include <sys/byteorder.h>
38 #include <sys/pathname.h>
39 #include <sys/atomic.h>
40 #include <sys/nvpair.h>
41 #include <sys/fs/zfs.h>
42 #include <sys/sdt.h>
43 #include <sys/dkio.h>
44 #include <sys/zfs_ioctl.h>
45
46 #include <sys/stmf.h>
47 #include <sys/lpif.h>
48 #include <sys/stmf_ioctl.h>
49 #include <sys/stmf_sbd_ioctl.h>
50
51 #include "stmf_sbd.h"
52 #include "sbd_impl.h"
53
54 #define SBD_IS_ZVOL(zvol) (strncmp("/dev/zvol", zvol, 9))
55
56 extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
57 extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
58 extern void sbd_pgr_reset(sbd_lu_t *sl);
59 extern int HardwareAcceleratedLocking;
60 extern int HardwareAcceleratedInit;
61 extern int HardwareAcceleratedMove;
62 extern uint8_t sbd_unmap_enable;
63
64 static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
65 void **result);
66 static int sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
67 static int sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
68 static int sbd_open(dev_t *devp, int flag, int otype, cred_t *credp);
69 static int sbd_close(dev_t dev, int flag, int otype, cred_t *credp);
70 static int stmf_sbd_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
71 cred_t *credp, int *rval);
72 void sbd_lp_cb(stmf_lu_provider_t *lp, int cmd, void *arg, uint32_t flags);
73 stmf_status_t sbd_proxy_reg_lu(uint8_t *luid, void *proxy_reg_arg,
74 uint32_t proxy_reg_arg_len);
75 stmf_status_t sbd_proxy_dereg_lu(uint8_t *luid, void *proxy_reg_arg,
76 uint32_t proxy_reg_arg_len);
77 stmf_status_t sbd_proxy_msg(uint8_t *luid, void *proxy_arg,
78 uint32_t proxy_arg_len, uint32_t type);
79 int sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz,
80 uint32_t *err_ret);
81 int sbd_create_standby_lu(sbd_create_standby_lu_t *slu, uint32_t *err_ret);
82 int sbd_set_lu_standby(sbd_set_lu_standby_t *stlu, uint32_t *err_ret);
96 uint32_t *err_ret);
97 sbd_status_t sbd_create_zfs_meta_object(sbd_lu_t *sl);
98 sbd_status_t sbd_open_zfs_meta(sbd_lu_t *sl);
99 sbd_status_t sbd_read_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
100 uint64_t off);
101 sbd_status_t sbd_write_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
102 uint64_t off);
103 sbd_status_t sbd_update_zfs_prop(sbd_lu_t *sl);
104 int sbd_is_zvol(char *path);
105 int sbd_zvolget(char *zvol_name, char **comstarprop);
106 int sbd_zvolset(char *zvol_name, char *comstarprop);
107 char sbd_ctoi(char c);
108 void sbd_close_lu(sbd_lu_t *sl);
109
110 static ldi_ident_t sbd_zfs_ident;
111 static stmf_lu_provider_t *sbd_lp;
112 static sbd_lu_t *sbd_lu_list = NULL;
113 static kmutex_t sbd_lock;
114 static dev_info_t *sbd_dip;
115 static uint32_t sbd_lu_count = 0;
116 uint8_t sbd_enable_unmap_sync = 0;
117
118 /* Global property settings for the logical unit */
119 char sbd_vendor_id[] = "NEXENTA ";
120 char sbd_product_id[] = "COMSTAR ";
121 char sbd_revision[] = "1.0 ";
122 char *sbd_mgmt_url = NULL;
123 uint16_t sbd_mgmt_url_alloc_size = 0;
124 krwlock_t sbd_global_prop_lock;
125
126 static char sbd_name[] = "sbd";
127
128 static struct cb_ops sbd_cb_ops = {
129 sbd_open, /* open */
130 sbd_close, /* close */
131 nodev, /* strategy */
132 nodev, /* print */
133 nodev, /* dump */
134 nodev, /* read */
135 nodev, /* write */
136 stmf_sbd_ioctl, /* ioctl */
137 nodev, /* devmap */
138 nodev, /* mmap */
139 nodev, /* segmap */
143 D_NEW | D_MP, /* cb_flag */
144 CB_REV, /* rev */
145 nodev, /* aread */
146 nodev /* awrite */
147 };
148
149 static struct dev_ops sbd_ops = {
150 DEVO_REV,
151 0,
152 sbd_getinfo,
153 nulldev, /* identify */
154 nulldev, /* probe */
155 sbd_attach,
156 sbd_detach,
157 nodev, /* reset */
158 &sbd_cb_ops,
159 NULL, /* bus_ops */
160 NULL /* power */
161 };
162
163 #ifdef DEBUG
164 #define SBD_NAME "COMSTAR SBD+ " __DATE__ " " __TIME__ " DEBUG"
165 #else
166 #define SBD_NAME "COMSTAR SBD+"
167 #endif
168
169 static struct modldrv modldrv = {
170 &mod_driverops,
171 SBD_NAME,
172 &sbd_ops
173 };
174
175 static struct modlinkage modlinkage = {
176 MODREV_1,
177 &modldrv,
178 NULL
179 };
180
181 int
182 _init(void)
183 {
184 int ret;
185
186 ret = mod_install(&modlinkage);
187 if (ret)
188 return (ret);
189 sbd_lp = (stmf_lu_provider_t *)stmf_alloc(STMF_STRUCT_LU_PROVIDER,
190 0, 0);
191 sbd_lp->lp_lpif_rev = LPIF_REV_2;
192 sbd_lp->lp_instance = 0;
193 sbd_lp->lp_name = sbd_name;
194 sbd_lp->lp_cb = sbd_lp_cb;
195 sbd_lp->lp_alua_support = 1;
196 sbd_lp->lp_proxy_msg = sbd_proxy_msg;
197 sbd_zfs_ident = ldi_ident_from_anon();
198
199 if (stmf_register_lu_provider(sbd_lp) != STMF_SUCCESS) {
200 (void) mod_remove(&modlinkage);
201 stmf_free(sbd_lp);
202 return (EINVAL);
203 }
204 mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
205 rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL);
206
207 if (HardwareAcceleratedLocking == 0)
208 cmn_err(CE_NOTE, "HardwareAcceleratedLocking Disabled");
209 if (HardwareAcceleratedMove == 0)
210 cmn_err(CE_NOTE, "HardwareAcceleratedMove Disabled");
211 if (HardwareAcceleratedInit == 0)
212 cmn_err(CE_NOTE, "HardwareAcceleratedInit Disabled");
213
214 return (0);
215 }
216
217 int
218 _fini(void)
219 {
220 int ret;
221
222 /*
223 * If we have registered lus, then make sure they are all offline
224 * if so then deregister them. This should drop the sbd_lu_count
225 * to zero.
226 */
227 if (sbd_lu_count) {
228 sbd_lu_t *slu;
229
230 /* See if all of them are offline */
231 mutex_enter(&sbd_lock);
232 for (slu = sbd_lu_list; slu != NULL; slu = slu->sl_next) {
233 if ((slu->sl_state != STMF_STATE_OFFLINE) ||
272 static int
273 sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
274 {
275 switch (cmd) {
276 case DDI_INFO_DEVT2DEVINFO:
277 *result = sbd_dip;
278 break;
279 case DDI_INFO_DEVT2INSTANCE:
280 *result = (void *)(uintptr_t)ddi_get_instance(sbd_dip);
281 break;
282 default:
283 return (DDI_FAILURE);
284 }
285
286 return (DDI_SUCCESS);
287 }
288
289 static int
290 sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
291 {
292 char *prop;
293
294 switch (cmd) {
295 case DDI_ATTACH:
296 sbd_dip = dip;
297
298 if (ddi_create_minor_node(dip, "admin", S_IFCHR, 0,
299 DDI_NT_STMF_LP, 0) != DDI_SUCCESS) {
300 break;
301 }
302 ddi_report_dev(dip);
303
304 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
305 DDI_PROP_DONTPASS, "vendor-id", &prop) == DDI_SUCCESS) {
306 (void) snprintf(sbd_vendor_id, 9, "%s%8s", prop, "");
307 ddi_prop_free(prop);
308 }
309 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
310 DDI_PROP_DONTPASS, "product-id", &prop) == DDI_SUCCESS) {
311 (void) snprintf(sbd_product_id, 17, "%s%16s", prop, "");
312 ddi_prop_free(prop);
313 }
314 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
315 DDI_PROP_DONTPASS, "revision", &prop) == DDI_SUCCESS) {
316 (void) snprintf(sbd_revision, 5, "%s%4s", prop, "");
317 ddi_prop_free(prop);
318 }
319
320 return (DDI_SUCCESS);
321 }
322
323 return (DDI_FAILURE);
324 }
325
326 static int
327 sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
328 {
329 switch (cmd) {
330 case DDI_DETACH:
331 ddi_remove_minor_node(dip, 0);
332 return (DDI_SUCCESS);
333 }
334
335 return (DDI_FAILURE);
336 }
337
338 /* ARGSUSED */
339 static int
1415 sli->sli_data_blocksize_shift = sl->sl_data_blocksize_shift;
1416 sli->sli_data_order = SMS_DATA_ORDER;
1417 bcopy(sl->sl_device_id, sli->sli_device_id, 20);
1418
1419 sli->sli_sms_header.sms_size = sizeof (*sli) + s;
1420 sli->sli_sms_header.sms_id = SMS_ID_LU_INFO_1_1;
1421 sli->sli_sms_header.sms_data_order = SMS_DATA_ORDER;
1422
1423 mutex_exit(&sl->sl_lock);
1424 ret = sbd_write_meta_section(sl, (sm_section_hdr_t *)sli);
1425 kmem_free(sli, sizeof (*sli) + s);
1426 return (ret);
1427 }
1428
1429 /*
1430 * Will scribble SL_UNMAP_ENABLED into sl_flags if we succeed.
1431 */
1432 static void
1433 do_unmap_setup(sbd_lu_t *sl)
1434 {
1435 if (sbd_unmap_enable == 0) {
1436 sl->sl_flags &= ~(SL_UNMAP_ENABLED);
1437 return;
1438 }
1439
1440 if ((sl->sl_flags & SL_ZFS_META) == 0)
1441 return; /* No UNMAP for you. */
1442
1443 sl->sl_flags |= SL_UNMAP_ENABLED;
1444 }
1445
1446 int
1447 sbd_populate_and_register_lu(sbd_lu_t *sl, uint32_t *err_ret)
1448 {
1449 stmf_lu_t *lu = sl->sl_lu;
1450 stmf_status_t ret;
1451
1452 do_unmap_setup(sl);
1453
1454 lu->lu_id = (scsi_devid_desc_t *)sl->sl_device_id;
1455 if (sl->sl_alias) {
1456 lu->lu_alias = sl->sl_alias;
1457 } else {
1458 lu->lu_alias = sl->sl_name;
1463 if (ret != STMF_SUCCESS) {
1464 *err_ret = SBD_RET_ACCESS_STATE_FAILED;
1465 return (EIO);
1466 }
1467 }
1468 /* set proxy_reg_cb_arg to meta filename */
1469 if (sl->sl_meta_filename) {
1470 lu->lu_proxy_reg_arg = sl->sl_meta_filename;
1471 lu->lu_proxy_reg_arg_len = strlen(sl->sl_meta_filename) + 1;
1472 } else {
1473 lu->lu_proxy_reg_arg = sl->sl_data_filename;
1474 lu->lu_proxy_reg_arg_len = strlen(sl->sl_data_filename) + 1;
1475 }
1476 lu->lu_lp = sbd_lp;
1477 lu->lu_task_alloc = sbd_task_alloc;
1478 lu->lu_new_task = sbd_new_task;
1479 lu->lu_dbuf_xfer_done = sbd_dbuf_xfer_done;
1480 lu->lu_send_status_done = sbd_send_status_done;
1481 lu->lu_task_free = sbd_task_free;
1482 lu->lu_abort = sbd_abort;
1483 lu->lu_task_poll = sbd_task_poll;
1484 lu->lu_dbuf_free = sbd_dbuf_free;
1485 lu->lu_ctl = sbd_ctl;
1486 lu->lu_task_done = sbd_ats_remove_by_task;
1487 lu->lu_info = sbd_info;
1488 sl->sl_state = STMF_STATE_OFFLINE;
1489
1490 if ((ret = stmf_register_lu(lu)) != STMF_SUCCESS) {
1491 stmf_trace(0, "Failed to register with framework, ret=%llx",
1492 ret);
1493 if (ret == STMF_ALREADY) {
1494 *err_ret = SBD_RET_GUID_ALREADY_REGISTERED;
1495 }
1496 return (EIO);
1497 }
1498
1499 /*
1500 * setup the ATS (compare and write) lists to handle multiple
1501 * ATS commands simultaneously
1502 */
1503 list_create(&sl->sl_ats_io_list, sizeof (ats_state_t),
1504 offsetof(ats_state_t, as_next));
1505 *err_ret = 0;
1506 return (0);
1507 }
1508
1509 int
1510 sbd_open_data_file(sbd_lu_t *sl, uint32_t *err_ret, int lu_size_valid,
1511 int vp_valid, int keep_open)
1512 {
1513 int ret;
1514 int flag;
1515 ulong_t nbits;
1516 uint64_t supported_size;
1517 vattr_t vattr;
1518 enum vtype vt;
1519 struct dk_cinfo dki;
1520 int unused;
1521
1522 mutex_enter(&sl->sl_lock);
1523 if (vp_valid) {
1524 goto odf_over_open;
1591 *err_ret = SBD_RET_SIZE_NOT_SUPPORTED_BY_FS;
1592 ret = EINVAL;
1593 goto odf_close_data_and_exit;
1594 }
1595 }
1596 } else {
1597 sl->sl_total_data_size = vattr.va_size;
1598 if (sl->sl_flags & SL_SHARED_META) {
1599 if (vattr.va_size > SHARED_META_DATA_SIZE) {
1600 sl->sl_lu_size = vattr.va_size -
1601 SHARED_META_DATA_SIZE;
1602 } else {
1603 *err_ret = SBD_RET_FILE_SIZE_ERROR;
1604 ret = EINVAL;
1605 goto odf_close_data_and_exit;
1606 }
1607 } else {
1608 sl->sl_lu_size = vattr.va_size;
1609 }
1610 }
1611
1612 if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
1613 *err_ret = SBD_RET_FILE_SIZE_ERROR;
1614 ret = EINVAL;
1615 goto odf_close_data_and_exit;
1616 }
1617 if (sl->sl_lu_size &
1618 ((((uint64_t)1) << sl->sl_data_blocksize_shift) - 1)) {
1619 *err_ret = SBD_RET_FILE_ALIGN_ERROR;
1620 ret = EINVAL;
1621 goto odf_close_data_and_exit;
1622 }
1623 /*
1624 * Get the minor device for direct zvol access
1625 */
1626 if (sl->sl_flags & SL_ZFS_META) {
1627 if ((ret = VOP_IOCTL(sl->sl_data_vp, DKIOCINFO, (intptr_t)&dki,
1628 FKIOCTL, kcred, &unused, NULL)) != 0) {
1629 cmn_err(CE_WARN, "ioctl(DKIOCINFO) failed %d", ret);
1630 /* zvol reserves 0, so this would fail later */
1631 sl->sl_zvol_minor = 0;
1868 sl->sl_serial_no_size = slu->slu_serial_size;
1869 p += slu->slu_serial_size;
1870 }
1871 kmem_free(namebuf, sz);
1872 if (slu->slu_vid_valid) {
1873 bcopy(slu->slu_vid, sl->sl_vendor_id, 8);
1874 sl->sl_flags |= SL_VID_VALID;
1875 }
1876 if (slu->slu_pid_valid) {
1877 bcopy(slu->slu_pid, sl->sl_product_id, 16);
1878 sl->sl_flags |= SL_PID_VALID;
1879 }
1880 if (slu->slu_rev_valid) {
1881 bcopy(slu->slu_rev, sl->sl_revision, 4);
1882 sl->sl_flags |= SL_REV_VALID;
1883 }
1884 if (slu->slu_write_protected) {
1885 sl->sl_flags |= SL_WRITE_PROTECTED;
1886 }
1887 if (slu->slu_blksize_valid) {
1888 if ((slu->slu_blksize & (slu->slu_blksize - 1)) ||
1889 (slu->slu_blksize > (32 * 1024)) ||
1890 (slu->slu_blksize == 0)) {
1891 *err_ret = SBD_RET_INVALID_BLKSIZE;
1892 ret = EINVAL;
1893 goto scm_err_out;
1894 }
1895 while ((1 << sl->sl_data_blocksize_shift) != slu->slu_blksize) {
1896 sl->sl_data_blocksize_shift++;
1897 }
1898 } else {
1899 sl->sl_data_blocksize_shift = 9; /* 512 by default */
1900 slu->slu_blksize = 512;
1901 }
1902
1903 /* Now lets start creating meta */
1904 sl->sl_trans_op = SL_OP_CREATE_REGISTER_LU;
1905 if (sbd_link_lu(sl) != SBD_SUCCESS) {
1906 *err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
1907 ret = EALREADY;
1908 goto scm_err_out;
3028 return (ENOENT);
3029 }
3030 return (EIO);
3031 }
3032
3033 ssi.st_rflags = STMF_RFLAG_USER_REQUEST;
3034 ssi.st_additional_info = "sbd_delete_lu call (ioctl)";
3035 ret = sbd_delete_locked_lu(sl, err_ret, &ssi);
3036
3037 if (ret) {
3038 /* Once its locked, no need to grab mutex again */
3039 sl->sl_trans_op = SL_OP_NONE;
3040 }
3041 return (ret);
3042 }
3043
3044 sbd_status_t
3045 sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
3046 uint64_t offset, uint64_t size, uint8_t *buf)
3047 {
3048 int ret, ioflag = 0;
3049 long resid;
3050 hrtime_t xfer_start;
3051 uint8_t op = task->task_cdb[0];
3052
3053 if ((offset + size) > sl->sl_lu_size) {
3054 return (SBD_IO_PAST_EOF);
3055 }
3056
3057 offset += sl->sl_data_offset;
3058
3059 /*
3060 * Check to see if the command is READ(10), READ(12), or READ(16).
3061 * If it is then check for bit 3 being set to indicate if Forced
3062 * Unit Access is being requested. If so, the FSYNC flag will be set
3063 * on the read.
3064 */
3065 if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
3066 (op == SCMD_READ_G5)) && (task->task_cdb[1] & BIT_3)) {
3067 ioflag = FSYNC;
3068 }
3069 if ((offset + size) > sl->sl_data_readable_size) {
3070 uint64_t store_end;
3071 if (offset > sl->sl_data_readable_size) {
3072 bzero(buf, size);
3073 return (SBD_SUCCESS);
3074 }
3075 store_end = sl->sl_data_readable_size - offset;
3076 bzero(buf + store_end, size - store_end);
3077 size = store_end;
3078 }
3079
3080 xfer_start = gethrtime();
3081 DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
3082 uint8_t *, buf, uint64_t, size, uint64_t, offset,
3083 scsi_task_t *, task);
3084
3085 /*
3086 * Don't proceed if the device has been closed
3087 * This can occur on an access state change to standby or
3088 * a delete. The writer lock is acquired before closing the
3089 * lu.
3090 */
3091 rw_enter(&sl->sl_access_state_lock, RW_READER);
3092 if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3093 rw_exit(&sl->sl_access_state_lock);
3094 return (SBD_FAILURE);
3095 }
3096
3097 ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3098 (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
3099 &resid);
3100 rw_exit(&sl->sl_access_state_lock);
3101
3102 stmf_lu_xfer_done(task, B_TRUE /* read */,
3103 (gethrtime() - xfer_start));
3104 DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
3105 uint8_t *, buf, uint64_t, size, uint64_t, offset,
3106 int, ret, scsi_task_t *, task);
3107
3108 over_sl_data_read:
3109 if (ret || resid) {
3110 stmf_trace(0, "UIO_READ failed, ret = %d, resid = %d", ret,
3111 resid);
3112 return (SBD_FAILURE);
3113 }
3114
3115 return (SBD_SUCCESS);
3116 }
3117
3118 sbd_status_t
3119 sbd_data_write(sbd_lu_t *sl, struct scsi_task *task,
3120 uint64_t offset, uint64_t size, uint8_t *buf)
3121 {
3122 int ret;
3123 long resid;
3124 sbd_status_t sret = SBD_SUCCESS;
3125 int ioflag;
3126 hrtime_t xfer_start;
3127 uint8_t op = task->task_cdb[0];
3128 boolean_t fua_bit = B_FALSE;
3129
3130 if ((offset + size) > sl->sl_lu_size) {
3131 return (SBD_IO_PAST_EOF);
3132 }
3133
3134 offset += sl->sl_data_offset;
3135
3136 /*
3137 * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
3138 * If it is then check for bit 3 being set to indicate if Forced
3139 * Unit Access is being requested. If so, the FSYNC flag will be set
3140 * on the write.
3141 */
3142 if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
3143 (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
3144 fua_bit = B_TRUE;
3145 }
3146 if (((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3147 (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) || fua_bit) {
3148 ioflag = FSYNC;
3149 } else {
3150 ioflag = 0;
3151 }
3152
3153 xfer_start = gethrtime();
3154 DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
3155 uint8_t *, buf, uint64_t, size, uint64_t, offset,
3156 scsi_task_t *, task);
3157
3158 /*
3159 * Don't proceed if the device has been closed
3160 * This can occur on an access state change to standby or
3161 * a delete. The writer lock is acquired before closing the
3162 * lu.
3163 */
3164 rw_enter(&sl->sl_access_state_lock, RW_READER);
3165 if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3166 rw_exit(&sl->sl_access_state_lock);
3167 return (SBD_FAILURE);
3168 }
3169 ret = vn_rdwr(UIO_WRITE, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3170 (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
3171 &resid);
3172 rw_exit(&sl->sl_access_state_lock);
3173
3174 stmf_lu_xfer_done(task, B_FALSE /* write */,
3175 (gethrtime() - xfer_start));
3176 DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
3177 uint8_t *, buf, uint64_t, size, uint64_t, offset,
3178 int, ret, scsi_task_t *, task);
3179
3180 if ((ret == 0) && (resid == 0) &&
3181 (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3182 (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
3183 sret = sbd_flush_data_cache(sl, 1);
3184 }
3185 over_sl_data_write:
3186 if ((ret || resid) || (sret != SBD_SUCCESS)) {
3187 return (SBD_FAILURE);
3188 } else if ((offset + size) > sl->sl_data_readable_size) {
3189 uint64_t old_size, new_size;
3190
3191 do {
3192 old_size = sl->sl_data_readable_size;
3193 if ((offset + size) <= old_size)
3194 break;
3195 new_size = offset + size;
3196 } while (atomic_cas_64(&sl->sl_data_readable_size, old_size,
3197 new_size) != old_size);
3198 }
3199
3200 return (SBD_SUCCESS);
3201 }
3202
3203 int
3204 sbd_get_global_props(sbd_global_props_t *oslp, uint32_t oslp_sz,
3205 uint32_t *err_ret)
3701 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3702 size = newsize;
3703 goto again;
3704 } else if (rc != 0) {
3705 goto out;
3706 }
3707 rc = nvlist_unpack((char *)(uintptr_t)zc->zc_nvlist_dst,
3708 zc->zc_nvlist_dst_size, &nv, 0);
3709 ASSERT(rc == 0); /* nvlist_unpack should not fail */
3710 if ((rc = nvlist_lookup_nvlist(nv, "stmf_sbd_lu", &nv2)) == 0) {
3711 rc = nvlist_lookup_string(nv2, ZPROP_VALUE, &ptr);
3712 if (rc != 0) {
3713 cmn_err(CE_WARN, "couldn't get value");
3714 } else {
3715 *comstarprop = kmem_alloc(strlen(ptr) + 1,
3716 KM_SLEEP);
3717 (void) strcpy(*comstarprop, ptr);
3718 }
3719 }
3720 out:
3721 if (nv != NULL)
3722 nvlist_free(nv);
3723 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3724 kmem_free(zc, sizeof (zfs_cmd_t));
3725 (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3726
3727 return (rc);
3728 }
3729
3730 int
3731 sbd_zvolset(char *zvol_name, char *comstarprop)
3732 {
3733 ldi_handle_t zfs_lh;
3734 nvlist_t *nv;
3735 char *packed = NULL;
3736 size_t len;
3737 zfs_cmd_t *zc;
3738 int unused;
3739 int rc;
3740
3741 if ((rc = ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
3752 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3753 (void) strlcpy(zc->zc_name, zvol_name, sizeof (zc->zc_name));
3754 zc->zc_nvlist_src = (uint64_t)(intptr_t)packed;
3755 zc->zc_nvlist_src_size = len;
3756 rc = ldi_ioctl(zfs_lh, ZFS_IOC_SET_PROP, (intptr_t)zc,
3757 FKIOCTL, kcred, &unused);
3758 if (rc != 0) {
3759 cmn_err(CE_NOTE, "ioctl failed %d", rc);
3760 }
3761 kmem_free(zc, sizeof (zfs_cmd_t));
3762 if (packed)
3763 kmem_free(packed, len);
3764 out:
3765 nvlist_free(nv);
3766 (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3767 return (rc);
3768 }
3769
3770 /*
3771 * Unmap a region in a volume. Currently only supported for zvols.
3772 * The list of extents to be freed is passed in a dkioc_free_list_t
3773 * which the caller is responsible for destroying.
3774 */
3775 int
3776 sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl)
3777 {
3778 vnode_t *vp;
3779 int unused, ret;
3780
3781 /* Nothing to do */
3782 if (dfl->dfl_num_exts == 0)
3783 return (0);
3784
3785 /*
3786 * TODO: unmap performance may be improved by not doing the synchronous
3787 * removal of the blocks and writing of the metadata. The
3788 * transaction is in the zil so the state should be stable.
3789 */
3790 dfl->dfl_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?
3791 DF_WAIT_SYNC : 0;
3792
3793 /* Use the data vnode we have to send a fop_ioctl(). */
3794 vp = sl->sl_data_vp;
3795 if (vp == NULL) {
3796 cmn_err(CE_WARN, "Cannot unmap - no vnode pointer.");
3797 return (EIO);
3798 }
3799
3800 ret = VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred,
3801 &unused, NULL);
3802
3803 return (ret);
3804 }
3805
3806 /*
3807 * Check if this lu belongs to sbd or some other lu
3808 * provider. A simple check for one of the module
3809 * entry points is sufficient.
3810 */
3811 int
3812 sbd_is_valid_lu(stmf_lu_t *lu)
3813 {
3814 if (lu->lu_new_task == sbd_new_task)
3815 return (1);
3816 return (0);
3817 }
3818
3819 uint8_t
3820 sbd_get_lbasize_shift(stmf_lu_t *lu)
3821 {
3822 sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3823
3824 return (sl->sl_data_blocksize_shift);
3825 }
|