Print this page
NEX-6832 fcsm module's debug level default should be 0 (cstyle fix)
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-7503 backport illumos #7307
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-7048 COMSTAR MODE_SENSE support is broken
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-5428 Backout the 5.0 changes
NEX-2937 Continuous write_same starves all other commands
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-4707 memory leak in stmf_sbd`sbd_attach() on successful property lookup
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Marcel Telka <marcel.telka@nexenta.com>
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
    usr/src/uts/common/io/scsi/targets/sd.c
    usr/src/uts/common/sys/scsi/targets/sddef.h
NEX-3111 Comstar does not pass cstyle and hdrchk
        Reviewed by: Jean McCormack <jean.mccormack@nexenta.com>
        Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
        Reviewed by: Tony Nguyen <tony.nguyen@nexenta.com>
NEX-3023 Panics and hangs when using write_same and compare_and_write
Review by: Bayard Bell <bayard.bell@nexenta.com>
Review by: Rick McNeal <rick.mcneal@nexenta.com>
Review by: Jean McCormack <jean.mccormack@nexenta.com>
Approved by: Jean McCormack <jean.mccormack@nexenta.com>
Related bug: NEX-2723 Kernel panic in xfer_completion code for write_same (0x93) and compare_and_write (0x89)
NEX-1965 Page fault at netbios_first_level_name_decode+0xbb
Support simultaneous compare_and_write operations for VAAI
Bug IDs SUP-505
                SUP-1768
                SUP-1928
Code Reviewers:
        Sarah Jelinek
        Jeffry Molanus
        Albert Lee
        Harold Shaw
NEX-988 itask_lu_[read|write]_time was inadvertently removed by the Illumos 3862 fix
re #12618 rb4053 Creating LU unconditionally enables write cache on backing store device
re #7936 rb3706 Support for COMSTAR/OEM
re #8002 rb3706 Allow setting iSCSI vendor ID via stmf_sbd.conf
re #11454 rb3750 Fix inconsistent vid/pid in stmf
Re #6790 backspace should perform delete on console
VAAI (XXX ATS support for COMSTAR, YYY Block-copy support for COMSTAR)


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.

  26  */
  27 
  28 #include <sys/sysmacros.h>
  29 #include <sys/conf.h>

  30 #include <sys/file.h>
  31 #include <sys/ddi.h>
  32 #include <sys/sunddi.h>
  33 #include <sys/modctl.h>
  34 #include <sys/scsi/scsi.h>
  35 #include <sys/scsi/impl/scsi_reset_notify.h>
  36 #include <sys/disp.h>
  37 #include <sys/byteorder.h>
  38 #include <sys/pathname.h>
  39 #include <sys/atomic.h>
  40 #include <sys/nvpair.h>
  41 #include <sys/fs/zfs.h>
  42 #include <sys/sdt.h>
  43 #include <sys/dkio.h>
  44 #include <sys/zfs_ioctl.h>
  45 
  46 #include <sys/stmf.h>
  47 #include <sys/lpif.h>
  48 #include <sys/stmf_ioctl.h>
  49 #include <sys/stmf_sbd_ioctl.h>
  50 
  51 #include "stmf_sbd.h"
  52 #include "sbd_impl.h"
  53 
  54 #define SBD_IS_ZVOL(zvol)       (strncmp("/dev/zvol", zvol, 9))
  55 
  56 extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
  57 extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
  58 extern void sbd_pgr_reset(sbd_lu_t *sl);




  59 
  60 static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
  61     void **result);
  62 static int sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
  63 static int sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
  64 static int sbd_open(dev_t *devp, int flag, int otype, cred_t *credp);
  65 static int sbd_close(dev_t dev, int flag, int otype, cred_t *credp);
  66 static int stmf_sbd_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
  67     cred_t *credp, int *rval);
  68 void sbd_lp_cb(stmf_lu_provider_t *lp, int cmd, void *arg, uint32_t flags);
  69 stmf_status_t sbd_proxy_reg_lu(uint8_t *luid, void *proxy_reg_arg,
  70     uint32_t proxy_reg_arg_len);
  71 stmf_status_t sbd_proxy_dereg_lu(uint8_t *luid, void *proxy_reg_arg,
  72     uint32_t proxy_reg_arg_len);
  73 stmf_status_t sbd_proxy_msg(uint8_t *luid, void *proxy_arg,
  74     uint32_t proxy_arg_len, uint32_t type);
  75 int sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz,
  76     uint32_t *err_ret);
  77 int sbd_create_standby_lu(sbd_create_standby_lu_t *slu, uint32_t *err_ret);
  78 int sbd_set_lu_standby(sbd_set_lu_standby_t *stlu, uint32_t *err_ret);


  92     uint32_t *err_ret);
  93 sbd_status_t sbd_create_zfs_meta_object(sbd_lu_t *sl);
  94 sbd_status_t sbd_open_zfs_meta(sbd_lu_t *sl);
  95 sbd_status_t sbd_read_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
  96     uint64_t off);
  97 sbd_status_t sbd_write_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
  98     uint64_t off);
  99 sbd_status_t sbd_update_zfs_prop(sbd_lu_t *sl);
 100 int sbd_is_zvol(char *path);
 101 int sbd_zvolget(char *zvol_name, char **comstarprop);
 102 int sbd_zvolset(char *zvol_name, char *comstarprop);
 103 char sbd_ctoi(char c);
 104 void sbd_close_lu(sbd_lu_t *sl);
 105 
 106 static ldi_ident_t      sbd_zfs_ident;
 107 static stmf_lu_provider_t *sbd_lp;
 108 static sbd_lu_t         *sbd_lu_list = NULL;
 109 static kmutex_t         sbd_lock;
 110 static dev_info_t       *sbd_dip;
 111 static uint32_t         sbd_lu_count = 0;

 112 
 113 /* Global property settings for the logical unit */
 114 char sbd_vendor_id[]    = "SUN     ";
 115 char sbd_product_id[]   = "COMSTAR         ";
 116 char sbd_revision[]     = "1.0 ";
 117 char *sbd_mgmt_url = NULL;
 118 uint16_t sbd_mgmt_url_alloc_size = 0;
 119 krwlock_t sbd_global_prop_lock;
 120 
 121 static char sbd_name[] = "sbd";
 122 
 123 static struct cb_ops sbd_cb_ops = {
 124         sbd_open,                       /* open */
 125         sbd_close,                      /* close */
 126         nodev,                          /* strategy */
 127         nodev,                          /* print */
 128         nodev,                          /* dump */
 129         nodev,                          /* read */
 130         nodev,                          /* write */
 131         stmf_sbd_ioctl,                 /* ioctl */
 132         nodev,                          /* devmap */
 133         nodev,                          /* mmap */
 134         nodev,                          /* segmap */


 138         D_NEW | D_MP,                   /* cb_flag */
 139         CB_REV,                         /* rev */
 140         nodev,                          /* aread */
 141         nodev                           /* awrite */
 142 };
 143 
 144 static struct dev_ops sbd_ops = {
 145         DEVO_REV,
 146         0,
 147         sbd_getinfo,
 148         nulldev,                /* identify */
 149         nulldev,                /* probe */
 150         sbd_attach,
 151         sbd_detach,
 152         nodev,                  /* reset */
 153         &sbd_cb_ops,
 154         NULL,                   /* bus_ops */
 155         NULL                    /* power */
 156 };
 157 
 158 #define SBD_NAME        "COMSTAR SBD"




 159 
 160 static struct modldrv modldrv = {
 161         &mod_driverops,
 162         SBD_NAME,
 163         &sbd_ops
 164 };
 165 
 166 static struct modlinkage modlinkage = {
 167         MODREV_1,
 168         &modldrv,
 169         NULL
 170 };
 171 
 172 int
 173 _init(void)
 174 {
 175         int ret;
 176 
 177         ret = mod_install(&modlinkage);
 178         if (ret)
 179                 return (ret);
 180         sbd_lp = (stmf_lu_provider_t *)stmf_alloc(STMF_STRUCT_LU_PROVIDER,
 181             0, 0);
 182         sbd_lp->lp_lpif_rev = LPIF_REV_2;
 183         sbd_lp->lp_instance = 0;
 184         sbd_lp->lp_name = sbd_name;
 185         sbd_lp->lp_cb = sbd_lp_cb;
 186         sbd_lp->lp_alua_support = 1;
 187         sbd_lp->lp_proxy_msg = sbd_proxy_msg;
 188         sbd_zfs_ident = ldi_ident_from_anon();
 189 
 190         if (stmf_register_lu_provider(sbd_lp) != STMF_SUCCESS) {
 191                 (void) mod_remove(&modlinkage);
 192                 stmf_free(sbd_lp);
 193                 return (EINVAL);
 194         }
 195         mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
 196         rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL);








 197         return (0);
 198 }
 199 
 200 int
 201 _fini(void)
 202 {
 203         int ret;
 204 
 205         /*
 206          * If we have registered lus, then make sure they are all offline
 207          * if so then deregister them. This should drop the sbd_lu_count
 208          * to zero.
 209          */
 210         if (sbd_lu_count) {
 211                 sbd_lu_t *slu;
 212 
 213                 /* See if all of them are offline */
 214                 mutex_enter(&sbd_lock);
 215                 for (slu = sbd_lu_list; slu != NULL; slu = slu->sl_next) {
 216                         if ((slu->sl_state != STMF_STATE_OFFLINE) ||


 255 static int
 256 sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 257 {
 258         switch (cmd) {
 259         case DDI_INFO_DEVT2DEVINFO:
 260                 *result = sbd_dip;
 261                 break;
 262         case DDI_INFO_DEVT2INSTANCE:
 263                 *result = (void *)(uintptr_t)ddi_get_instance(sbd_dip);
 264                 break;
 265         default:
 266                 return (DDI_FAILURE);
 267         }
 268 
 269         return (DDI_SUCCESS);
 270 }
 271 
 272 static int
 273 sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 274 {


 275         switch (cmd) {
 276         case DDI_ATTACH:
 277                 sbd_dip = dip;
 278 
 279                 if (ddi_create_minor_node(dip, "admin", S_IFCHR, 0,
 280                     DDI_NT_STMF_LP, 0) != DDI_SUCCESS) {
 281                         break;
 282                 }
 283                 ddi_report_dev(dip);

















 284                 return (DDI_SUCCESS);
 285         }
 286 
 287         return (DDI_FAILURE);
 288 }
 289 
 290 static int
 291 sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 292 {
 293         switch (cmd) {
 294         case DDI_DETACH:
 295                 ddi_remove_minor_node(dip, 0);
 296                 return (DDI_SUCCESS);
 297         }
 298 
 299         return (DDI_FAILURE);
 300 }
 301 
 302 /* ARGSUSED */
 303 static int


1379         sli->sli_data_blocksize_shift = sl->sl_data_blocksize_shift;
1380         sli->sli_data_order = SMS_DATA_ORDER;
1381         bcopy(sl->sl_device_id, sli->sli_device_id, 20);
1382 
1383         sli->sli_sms_header.sms_size = sizeof (*sli) + s;
1384         sli->sli_sms_header.sms_id = SMS_ID_LU_INFO_1_1;
1385         sli->sli_sms_header.sms_data_order = SMS_DATA_ORDER;
1386 
1387         mutex_exit(&sl->sl_lock);
1388         ret = sbd_write_meta_section(sl, (sm_section_hdr_t *)sli);
1389         kmem_free(sli, sizeof (*sli) + s);
1390         return (ret);
1391 }
1392 
1393 /*
1394  * Will scribble SL_UNMAP_ENABLED into sl_flags if we succeed.
1395  */
1396 static void
1397 do_unmap_setup(sbd_lu_t *sl)
1398 {
1399         ASSERT((sl->sl_flags & SL_UNMAP_ENABLED) == 0);



1400 
1401         if ((sl->sl_flags & SL_ZFS_META) == 0)
1402                 return; /* No UNMAP for you. */
1403 
1404         sl->sl_flags |= SL_UNMAP_ENABLED;
1405 }
1406 
1407 int
1408 sbd_populate_and_register_lu(sbd_lu_t *sl, uint32_t *err_ret)
1409 {
1410         stmf_lu_t *lu = sl->sl_lu;
1411         stmf_status_t ret;
1412 
1413         do_unmap_setup(sl);
1414 
1415         lu->lu_id = (scsi_devid_desc_t *)sl->sl_device_id;
1416         if (sl->sl_alias) {
1417                 lu->lu_alias = sl->sl_alias;
1418         } else {
1419                 lu->lu_alias = sl->sl_name;


1424                 if (ret != STMF_SUCCESS) {
1425                         *err_ret = SBD_RET_ACCESS_STATE_FAILED;
1426                         return (EIO);
1427                 }
1428         }
1429         /* set proxy_reg_cb_arg to meta filename */
1430         if (sl->sl_meta_filename) {
1431                 lu->lu_proxy_reg_arg = sl->sl_meta_filename;
1432                 lu->lu_proxy_reg_arg_len = strlen(sl->sl_meta_filename) + 1;
1433         } else {
1434                 lu->lu_proxy_reg_arg = sl->sl_data_filename;
1435                 lu->lu_proxy_reg_arg_len = strlen(sl->sl_data_filename) + 1;
1436         }
1437         lu->lu_lp = sbd_lp;
1438         lu->lu_task_alloc = sbd_task_alloc;
1439         lu->lu_new_task = sbd_new_task;
1440         lu->lu_dbuf_xfer_done = sbd_dbuf_xfer_done;
1441         lu->lu_send_status_done = sbd_send_status_done;
1442         lu->lu_task_free = sbd_task_free;
1443         lu->lu_abort = sbd_abort;

1444         lu->lu_dbuf_free = sbd_dbuf_free;
1445         lu->lu_ctl = sbd_ctl;

1446         lu->lu_info = sbd_info;
1447         sl->sl_state = STMF_STATE_OFFLINE;
1448 
1449         if ((ret = stmf_register_lu(lu)) != STMF_SUCCESS) {
1450                 stmf_trace(0, "Failed to register with framework, ret=%llx",
1451                     ret);
1452                 if (ret == STMF_ALREADY) {
1453                         *err_ret = SBD_RET_GUID_ALREADY_REGISTERED;
1454                 }
1455                 return (EIO);
1456         }
1457 






1458         *err_ret = 0;
1459         return (0);
1460 }
1461 
1462 int
1463 sbd_open_data_file(sbd_lu_t *sl, uint32_t *err_ret, int lu_size_valid,
1464     int vp_valid, int keep_open)
1465 {
1466         int ret;
1467         int flag;
1468         ulong_t nbits;
1469         uint64_t supported_size;
1470         vattr_t vattr;
1471         enum vtype vt;
1472         struct dk_cinfo dki;
1473         int unused;
1474 
1475         mutex_enter(&sl->sl_lock);
1476         if (vp_valid) {
1477                 goto odf_over_open;


1544                                 *err_ret = SBD_RET_SIZE_NOT_SUPPORTED_BY_FS;
1545                                 ret = EINVAL;
1546                                 goto odf_close_data_and_exit;
1547                         }
1548                 }
1549         } else {
1550                 sl->sl_total_data_size = vattr.va_size;
1551                 if (sl->sl_flags & SL_SHARED_META) {
1552                         if (vattr.va_size > SHARED_META_DATA_SIZE) {
1553                                 sl->sl_lu_size = vattr.va_size -
1554                                     SHARED_META_DATA_SIZE;
1555                         } else {
1556                                 *err_ret = SBD_RET_FILE_SIZE_ERROR;
1557                                 ret = EINVAL;
1558                                 goto odf_close_data_and_exit;
1559                         }
1560                 } else {
1561                         sl->sl_lu_size = vattr.va_size;
1562                 }
1563         }

1564         if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
1565                 *err_ret = SBD_RET_FILE_SIZE_ERROR;
1566                 ret = EINVAL;
1567                 goto odf_close_data_and_exit;
1568         }
1569         if (sl->sl_lu_size &
1570             ((((uint64_t)1) << sl->sl_data_blocksize_shift) - 1)) {
1571                 *err_ret = SBD_RET_FILE_ALIGN_ERROR;
1572                 ret = EINVAL;
1573                 goto odf_close_data_and_exit;
1574         }
1575         /*
1576          * Get the minor device for direct zvol access
1577          */
1578         if (sl->sl_flags & SL_ZFS_META) {
1579                 if ((ret = VOP_IOCTL(sl->sl_data_vp, DKIOCINFO, (intptr_t)&dki,
1580                     FKIOCTL, kcred, &unused, NULL)) != 0) {
1581                         cmn_err(CE_WARN, "ioctl(DKIOCINFO) failed %d", ret);
1582                         /* zvol reserves 0, so this would fail later */
1583                         sl->sl_zvol_minor = 0;


1820                 sl->sl_serial_no_size = slu->slu_serial_size;
1821                 p += slu->slu_serial_size;
1822         }
1823         kmem_free(namebuf, sz);
1824         if (slu->slu_vid_valid) {
1825                 bcopy(slu->slu_vid, sl->sl_vendor_id, 8);
1826                 sl->sl_flags |= SL_VID_VALID;
1827         }
1828         if (slu->slu_pid_valid) {
1829                 bcopy(slu->slu_pid, sl->sl_product_id, 16);
1830                 sl->sl_flags |= SL_PID_VALID;
1831         }
1832         if (slu->slu_rev_valid) {
1833                 bcopy(slu->slu_rev, sl->sl_revision, 4);
1834                 sl->sl_flags |= SL_REV_VALID;
1835         }
1836         if (slu->slu_write_protected) {
1837                 sl->sl_flags |= SL_WRITE_PROTECTED;
1838         }
1839         if (slu->slu_blksize_valid) {
1840                 if (!ISP2(slu->slu_blksize) ||
1841                     (slu->slu_blksize > (32 * 1024)) ||
1842                     (slu->slu_blksize == 0)) {
1843                         *err_ret = SBD_RET_INVALID_BLKSIZE;
1844                         ret = EINVAL;
1845                         goto scm_err_out;
1846                 }
1847                 while ((1 << sl->sl_data_blocksize_shift) != slu->slu_blksize) {
1848                         sl->sl_data_blocksize_shift++;
1849                 }
1850         } else {
1851                 sl->sl_data_blocksize_shift = 9;     /* 512 by default */
1852                 slu->slu_blksize = 512;
1853         }
1854 
1855         /* Now lets start creating meta */
1856         sl->sl_trans_op = SL_OP_CREATE_REGISTER_LU;
1857         if (sbd_link_lu(sl) != SBD_SUCCESS) {
1858                 *err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
1859                 ret = EALREADY;
1860                 goto scm_err_out;


2980                         return (ENOENT);
2981                 }
2982                 return (EIO);
2983         }
2984 
2985         ssi.st_rflags = STMF_RFLAG_USER_REQUEST;
2986         ssi.st_additional_info = "sbd_delete_lu call (ioctl)";
2987         ret = sbd_delete_locked_lu(sl, err_ret, &ssi);
2988 
2989         if (ret) {
2990                 /* Once its locked, no need to grab mutex again */
2991                 sl->sl_trans_op = SL_OP_NONE;
2992         }
2993         return (ret);
2994 }
2995 
2996 sbd_status_t
2997 sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
2998     uint64_t offset, uint64_t size, uint8_t *buf)
2999 {
3000         int ret;
3001         long resid;


3002 
3003         if ((offset + size) > sl->sl_lu_size) {
3004                 return (SBD_IO_PAST_EOF);
3005         }
3006 
3007         offset += sl->sl_data_offset;
3008 










3009         if ((offset + size) > sl->sl_data_readable_size) {
3010                 uint64_t store_end;
3011                 if (offset > sl->sl_data_readable_size) {
3012                         bzero(buf, size);
3013                         return (SBD_SUCCESS);
3014                 }
3015                 store_end = sl->sl_data_readable_size - offset;
3016                 bzero(buf + store_end, size - store_end);
3017                 size = store_end;
3018         }
3019 

3020         DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
3021             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3022             scsi_task_t *, task);
3023 
3024         /*
3025          * Don't proceed if the device has been closed
3026          * This can occur on an access state change to standby or
3027          * a delete. The writer lock is acquired before closing the
3028          * lu.
3029          */
3030         rw_enter(&sl->sl_access_state_lock, RW_READER);
3031         if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3032                 rw_exit(&sl->sl_access_state_lock);
3033                 return (SBD_FAILURE);
3034         }

3035         ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3036             (offset_t)offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, CRED(),
3037             &resid);
3038         rw_exit(&sl->sl_access_state_lock);
3039 


3040         DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
3041             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3042             int, ret, scsi_task_t *, task);
3043 
3044 over_sl_data_read:
3045         if (ret || resid) {
3046                 stmf_trace(0, "UIO_READ failed, ret = %d, resid = %d", ret,
3047                     resid);
3048                 return (SBD_FAILURE);
3049         }
3050 
3051         return (SBD_SUCCESS);
3052 }
3053 
3054 sbd_status_t
3055 sbd_data_write(sbd_lu_t *sl, struct scsi_task *task,
3056     uint64_t offset, uint64_t size, uint8_t *buf)
3057 {
3058         int ret;
3059         long resid;
3060         sbd_status_t sret = SBD_SUCCESS;
3061         int ioflag;



3062 
3063         if ((offset + size) > sl->sl_lu_size) {
3064                 return (SBD_IO_PAST_EOF);
3065         }
3066 
3067         offset += sl->sl_data_offset;
3068 
3069         if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3070             (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {










3071                 ioflag = FSYNC;
3072         } else {
3073                 ioflag = 0;
3074         }
3075 

3076         DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
3077             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3078             scsi_task_t *, task);
3079 
3080         /*
3081          * Don't proceed if the device has been closed
3082          * This can occur on an access state change to standby or
3083          * a delete. The writer lock is acquired before closing the
3084          * lu.
3085          */
3086         rw_enter(&sl->sl_access_state_lock, RW_READER);
3087         if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3088                 rw_exit(&sl->sl_access_state_lock);
3089                 return (SBD_FAILURE);
3090         }
3091         ret = vn_rdwr(UIO_WRITE, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3092             (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
3093             &resid);
3094         rw_exit(&sl->sl_access_state_lock);
3095 


3096         DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
3097             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3098             int, ret, scsi_task_t *, task);
3099 
3100         if ((ret == 0) && (resid == 0) &&
3101             (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3102             (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
3103                 sret = sbd_flush_data_cache(sl, 1);
3104         }
3105 over_sl_data_write:
3106 
3107         if ((ret || resid) || (sret != SBD_SUCCESS)) {
3108                 return (SBD_FAILURE);
3109         } else if ((offset + size) > sl->sl_data_readable_size) {
3110                 uint64_t old_size, new_size;
3111 
3112                 do {
3113                         old_size = sl->sl_data_readable_size;
3114                         if ((offset + size) <= old_size)
3115                                 break;
3116                         new_size = offset + size;
3117                 } while (atomic_cas_64(&sl->sl_data_readable_size, old_size,
3118                     new_size) != old_size);
3119         }
3120 
3121         return (SBD_SUCCESS);
3122 }
3123 
3124 int
3125 sbd_get_global_props(sbd_global_props_t *oslp, uint32_t oslp_sz,
3126     uint32_t *err_ret)


3622                 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3623                 size = newsize;
3624                 goto again;
3625         } else if (rc != 0) {
3626                 goto out;
3627         }
3628         rc = nvlist_unpack((char *)(uintptr_t)zc->zc_nvlist_dst,
3629             zc->zc_nvlist_dst_size, &nv, 0);
3630         ASSERT(rc == 0);        /* nvlist_unpack should not fail */
3631         if ((rc = nvlist_lookup_nvlist(nv, "stmf_sbd_lu", &nv2)) == 0) {
3632                 rc = nvlist_lookup_string(nv2, ZPROP_VALUE, &ptr);
3633                 if (rc != 0) {
3634                         cmn_err(CE_WARN, "couldn't get value");
3635                 } else {
3636                         *comstarprop = kmem_alloc(strlen(ptr) + 1,
3637                             KM_SLEEP);
3638                         (void) strcpy(*comstarprop, ptr);
3639                 }
3640         }
3641 out:

3642         nvlist_free(nv);
3643         kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3644         kmem_free(zc, sizeof (zfs_cmd_t));
3645         (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3646 
3647         return (rc);
3648 }
3649 
3650 int
3651 sbd_zvolset(char *zvol_name, char *comstarprop)
3652 {
3653         ldi_handle_t    zfs_lh;
3654         nvlist_t        *nv;
3655         char            *packed = NULL;
3656         size_t          len;
3657         zfs_cmd_t       *zc;
3658         int unused;
3659         int rc;
3660 
3661         if ((rc = ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,


3672         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3673         (void) strlcpy(zc->zc_name, zvol_name, sizeof (zc->zc_name));
3674         zc->zc_nvlist_src = (uint64_t)(intptr_t)packed;
3675         zc->zc_nvlist_src_size = len;
3676         rc = ldi_ioctl(zfs_lh, ZFS_IOC_SET_PROP, (intptr_t)zc,
3677             FKIOCTL, kcred, &unused);
3678         if (rc != 0) {
3679                 cmn_err(CE_NOTE, "ioctl failed %d", rc);
3680         }
3681         kmem_free(zc, sizeof (zfs_cmd_t));
3682         if (packed)
3683                 kmem_free(packed, len);
3684 out:
3685         nvlist_free(nv);
3686         (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3687         return (rc);
3688 }
3689 
3690 /*
3691  * Unmap a region in a volume.  Currently only supported for zvols.


3692  */
3693 int
3694 sbd_unmap(sbd_lu_t *sl, uint64_t offset, uint64_t length)
3695 {
3696         vnode_t *vp;
3697         int unused;
3698         dkioc_free_t df;
3699 
3700         /* Right now, we only support UNMAP on zvols. */
3701         if (!(sl->sl_flags & SL_ZFS_META))
3702                 return (EIO);
3703 
3704         df.df_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?





3705             DF_WAIT_SYNC : 0;
3706         df.df_start = offset;
3707         df.df_length = length;
3708 
3709         /* Use the data vnode we have to send a fop_ioctl(). */
3710         vp = sl->sl_data_vp;
3711         if (vp == NULL) {
3712                 cmn_err(CE_WARN, "Cannot unmap - no vnode pointer.");
3713                 return (EIO);
3714         }
3715 
3716         return (VOP_IOCTL(vp, DKIOCFREE, (intptr_t)(&df), FKIOCTL, kcred,
3717             &unused, NULL));























3718 }


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.

  24  * Copyright (c) 2013 by Delphix. All rights reserved.
  25  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  26  */
  27 

  28 #include <sys/conf.h>
  29 #include <sys/list.h>
  30 #include <sys/file.h>
  31 #include <sys/ddi.h>
  32 #include <sys/sunddi.h>
  33 #include <sys/modctl.h>
  34 #include <sys/scsi/scsi.h>
  35 #include <sys/scsi/impl/scsi_reset_notify.h>
  36 #include <sys/disp.h>
  37 #include <sys/byteorder.h>
  38 #include <sys/pathname.h>
  39 #include <sys/atomic.h>
  40 #include <sys/nvpair.h>
  41 #include <sys/fs/zfs.h>
  42 #include <sys/sdt.h>
  43 #include <sys/dkio.h>
  44 #include <sys/zfs_ioctl.h>
  45 
  46 #include <sys/stmf.h>
  47 #include <sys/lpif.h>
  48 #include <sys/stmf_ioctl.h>
  49 #include <sys/stmf_sbd_ioctl.h>
  50 
  51 #include "stmf_sbd.h"
  52 #include "sbd_impl.h"
  53 
  54 #define SBD_IS_ZVOL(zvol)       (strncmp("/dev/zvol", zvol, 9))
  55 
  56 extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
  57 extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
  58 extern void sbd_pgr_reset(sbd_lu_t *sl);
  59 extern int HardwareAcceleratedLocking;
  60 extern int HardwareAcceleratedInit;
  61 extern int HardwareAcceleratedMove;
  62 extern uint8_t sbd_unmap_enable;
  63 
  64 static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
  65     void **result);
  66 static int sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
  67 static int sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
  68 static int sbd_open(dev_t *devp, int flag, int otype, cred_t *credp);
  69 static int sbd_close(dev_t dev, int flag, int otype, cred_t *credp);
  70 static int stmf_sbd_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
  71     cred_t *credp, int *rval);
  72 void sbd_lp_cb(stmf_lu_provider_t *lp, int cmd, void *arg, uint32_t flags);
  73 stmf_status_t sbd_proxy_reg_lu(uint8_t *luid, void *proxy_reg_arg,
  74     uint32_t proxy_reg_arg_len);
  75 stmf_status_t sbd_proxy_dereg_lu(uint8_t *luid, void *proxy_reg_arg,
  76     uint32_t proxy_reg_arg_len);
  77 stmf_status_t sbd_proxy_msg(uint8_t *luid, void *proxy_arg,
  78     uint32_t proxy_arg_len, uint32_t type);
  79 int sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz,
  80     uint32_t *err_ret);
  81 int sbd_create_standby_lu(sbd_create_standby_lu_t *slu, uint32_t *err_ret);
  82 int sbd_set_lu_standby(sbd_set_lu_standby_t *stlu, uint32_t *err_ret);


  96     uint32_t *err_ret);
  97 sbd_status_t sbd_create_zfs_meta_object(sbd_lu_t *sl);
  98 sbd_status_t sbd_open_zfs_meta(sbd_lu_t *sl);
  99 sbd_status_t sbd_read_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
 100     uint64_t off);
 101 sbd_status_t sbd_write_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
 102     uint64_t off);
 103 sbd_status_t sbd_update_zfs_prop(sbd_lu_t *sl);
 104 int sbd_is_zvol(char *path);
 105 int sbd_zvolget(char *zvol_name, char **comstarprop);
 106 int sbd_zvolset(char *zvol_name, char *comstarprop);
 107 char sbd_ctoi(char c);
 108 void sbd_close_lu(sbd_lu_t *sl);
 109 
 110 static ldi_ident_t      sbd_zfs_ident;
 111 static stmf_lu_provider_t *sbd_lp;
 112 static sbd_lu_t         *sbd_lu_list = NULL;
 113 static kmutex_t         sbd_lock;
 114 static dev_info_t       *sbd_dip;
 115 static uint32_t         sbd_lu_count = 0;
 116 uint8_t sbd_enable_unmap_sync = 0;
 117 
 118 /* Global property settings for the logical unit */
 119 char sbd_vendor_id[]    = "NEXENTA ";
 120 char sbd_product_id[]   = "COMSTAR         ";
 121 char sbd_revision[]     = "1.0 ";
 122 char *sbd_mgmt_url = NULL;
 123 uint16_t sbd_mgmt_url_alloc_size = 0;
 124 krwlock_t sbd_global_prop_lock;
 125 
 126 static char sbd_name[] = "sbd";
 127 
 128 static struct cb_ops sbd_cb_ops = {
 129         sbd_open,                       /* open */
 130         sbd_close,                      /* close */
 131         nodev,                          /* strategy */
 132         nodev,                          /* print */
 133         nodev,                          /* dump */
 134         nodev,                          /* read */
 135         nodev,                          /* write */
 136         stmf_sbd_ioctl,                 /* ioctl */
 137         nodev,                          /* devmap */
 138         nodev,                          /* mmap */
 139         nodev,                          /* segmap */


 143         D_NEW | D_MP,                   /* cb_flag */
 144         CB_REV,                         /* rev */
 145         nodev,                          /* aread */
 146         nodev                           /* awrite */
 147 };
 148 
 149 static struct dev_ops sbd_ops = {
 150         DEVO_REV,
 151         0,
 152         sbd_getinfo,
 153         nulldev,                /* identify */
 154         nulldev,                /* probe */
 155         sbd_attach,
 156         sbd_detach,
 157         nodev,                  /* reset */
 158         &sbd_cb_ops,
 159         NULL,                   /* bus_ops */
 160         NULL                    /* power */
 161 };
 162 
 163 #ifdef DEBUG
 164 #define SBD_NAME        "COMSTAR SBD+ " __DATE__ " " __TIME__ " DEBUG"
 165 #else
 166 #define SBD_NAME        "COMSTAR SBD+"
 167 #endif
 168 
 169 static struct modldrv modldrv = {
 170         &mod_driverops,
 171         SBD_NAME,
 172         &sbd_ops
 173 };
 174 
 175 static struct modlinkage modlinkage = {
 176         MODREV_1,
 177         &modldrv,
 178         NULL
 179 };
 180 
 181 int
 182 _init(void)
 183 {
 184         int ret;
 185 
 186         ret = mod_install(&modlinkage);
 187         if (ret)
 188                 return (ret);
 189         sbd_lp = (stmf_lu_provider_t *)stmf_alloc(STMF_STRUCT_LU_PROVIDER,
 190             0, 0);
 191         sbd_lp->lp_lpif_rev = LPIF_REV_2;
 192         sbd_lp->lp_instance = 0;
 193         sbd_lp->lp_name = sbd_name;
 194         sbd_lp->lp_cb = sbd_lp_cb;
 195         sbd_lp->lp_alua_support = 1;
 196         sbd_lp->lp_proxy_msg = sbd_proxy_msg;
 197         sbd_zfs_ident = ldi_ident_from_anon();
 198 
 199         if (stmf_register_lu_provider(sbd_lp) != STMF_SUCCESS) {
 200                 (void) mod_remove(&modlinkage);
 201                 stmf_free(sbd_lp);
 202                 return (EINVAL);
 203         }
 204         mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
 205         rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL);
 206 
 207         if (HardwareAcceleratedLocking == 0)
 208                 cmn_err(CE_NOTE, "HardwareAcceleratedLocking Disabled");
 209         if (HardwareAcceleratedMove == 0)
 210                 cmn_err(CE_NOTE, "HardwareAcceleratedMove  Disabled");
 211         if (HardwareAcceleratedInit == 0)
 212                 cmn_err(CE_NOTE, "HardwareAcceleratedInit  Disabled");
 213 
 214         return (0);
 215 }
 216 
 217 int
 218 _fini(void)
 219 {
 220         int ret;
 221 
 222         /*
 223          * If we have registered lus, then make sure they are all offline
 224          * if so then deregister them. This should drop the sbd_lu_count
 225          * to zero.
 226          */
 227         if (sbd_lu_count) {
 228                 sbd_lu_t *slu;
 229 
 230                 /* See if all of them are offline */
 231                 mutex_enter(&sbd_lock);
 232                 for (slu = sbd_lu_list; slu != NULL; slu = slu->sl_next) {
 233                         if ((slu->sl_state != STMF_STATE_OFFLINE) ||


 272 static int
 273 sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 274 {
 275         switch (cmd) {
 276         case DDI_INFO_DEVT2DEVINFO:
 277                 *result = sbd_dip;
 278                 break;
 279         case DDI_INFO_DEVT2INSTANCE:
 280                 *result = (void *)(uintptr_t)ddi_get_instance(sbd_dip);
 281                 break;
 282         default:
 283                 return (DDI_FAILURE);
 284         }
 285 
 286         return (DDI_SUCCESS);
 287 }
 288 
 289 static int
 290 sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 291 {
 292         char    *prop;
 293 
 294         switch (cmd) {
 295         case DDI_ATTACH:
 296                 sbd_dip = dip;
 297 
 298                 if (ddi_create_minor_node(dip, "admin", S_IFCHR, 0,
 299                     DDI_NT_STMF_LP, 0) != DDI_SUCCESS) {
 300                         break;
 301                 }
 302                 ddi_report_dev(dip);
 303 
 304                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
 305                     DDI_PROP_DONTPASS, "vendor-id", &prop) == DDI_SUCCESS) {
 306                         (void) snprintf(sbd_vendor_id, 9, "%s%8s", prop, "");
 307                         ddi_prop_free(prop);
 308                 }
 309                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
 310                     DDI_PROP_DONTPASS, "product-id", &prop) == DDI_SUCCESS) {
 311                         (void) snprintf(sbd_product_id, 17, "%s%16s", prop, "");
 312                         ddi_prop_free(prop);
 313                 }
 314                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
 315                     DDI_PROP_DONTPASS, "revision", &prop) == DDI_SUCCESS) {
 316                         (void) snprintf(sbd_revision, 5, "%s%4s", prop, "");
 317                         ddi_prop_free(prop);
 318                 }
 319 
 320                 return (DDI_SUCCESS);
 321         }
 322 
 323         return (DDI_FAILURE);
 324 }
 325 
 326 static int
 327 sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 328 {
 329         switch (cmd) {
 330         case DDI_DETACH:
 331                 ddi_remove_minor_node(dip, 0);
 332                 return (DDI_SUCCESS);
 333         }
 334 
 335         return (DDI_FAILURE);
 336 }
 337 
 338 /* ARGSUSED */
 339 static int


1415         sli->sli_data_blocksize_shift = sl->sl_data_blocksize_shift;
1416         sli->sli_data_order = SMS_DATA_ORDER;
1417         bcopy(sl->sl_device_id, sli->sli_device_id, 20);
1418 
1419         sli->sli_sms_header.sms_size = sizeof (*sli) + s;
1420         sli->sli_sms_header.sms_id = SMS_ID_LU_INFO_1_1;
1421         sli->sli_sms_header.sms_data_order = SMS_DATA_ORDER;
1422 
1423         mutex_exit(&sl->sl_lock);
1424         ret = sbd_write_meta_section(sl, (sm_section_hdr_t *)sli);
1425         kmem_free(sli, sizeof (*sli) + s);
1426         return (ret);
1427 }
1428 
1429 /*
1430  * Will scribble SL_UNMAP_ENABLED into sl_flags if we succeed.
1431  */
1432 static void
1433 do_unmap_setup(sbd_lu_t *sl)
1434 {
1435         if (sbd_unmap_enable == 0) {
1436                 sl->sl_flags &= ~(SL_UNMAP_ENABLED);
1437                 return;
1438         }
1439 
1440         if ((sl->sl_flags & SL_ZFS_META) == 0)
1441                 return; /* No UNMAP for you. */
1442 
1443         sl->sl_flags |= SL_UNMAP_ENABLED;
1444 }
1445 
1446 int
1447 sbd_populate_and_register_lu(sbd_lu_t *sl, uint32_t *err_ret)
1448 {
1449         stmf_lu_t *lu = sl->sl_lu;
1450         stmf_status_t ret;
1451 
1452         do_unmap_setup(sl);
1453 
1454         lu->lu_id = (scsi_devid_desc_t *)sl->sl_device_id;
1455         if (sl->sl_alias) {
1456                 lu->lu_alias = sl->sl_alias;
1457         } else {
1458                 lu->lu_alias = sl->sl_name;


1463                 if (ret != STMF_SUCCESS) {
1464                         *err_ret = SBD_RET_ACCESS_STATE_FAILED;
1465                         return (EIO);
1466                 }
1467         }
1468         /* set proxy_reg_cb_arg to meta filename */
1469         if (sl->sl_meta_filename) {
1470                 lu->lu_proxy_reg_arg = sl->sl_meta_filename;
1471                 lu->lu_proxy_reg_arg_len = strlen(sl->sl_meta_filename) + 1;
1472         } else {
1473                 lu->lu_proxy_reg_arg = sl->sl_data_filename;
1474                 lu->lu_proxy_reg_arg_len = strlen(sl->sl_data_filename) + 1;
1475         }
1476         lu->lu_lp = sbd_lp;
1477         lu->lu_task_alloc = sbd_task_alloc;
1478         lu->lu_new_task = sbd_new_task;
1479         lu->lu_dbuf_xfer_done = sbd_dbuf_xfer_done;
1480         lu->lu_send_status_done = sbd_send_status_done;
1481         lu->lu_task_free = sbd_task_free;
1482         lu->lu_abort = sbd_abort;
1483         lu->lu_task_poll = sbd_task_poll;
1484         lu->lu_dbuf_free = sbd_dbuf_free;
1485         lu->lu_ctl = sbd_ctl;
1486         lu->lu_task_done = sbd_ats_remove_by_task;
1487         lu->lu_info = sbd_info;
1488         sl->sl_state = STMF_STATE_OFFLINE;
1489 
1490         if ((ret = stmf_register_lu(lu)) != STMF_SUCCESS) {
1491                 stmf_trace(0, "Failed to register with framework, ret=%llx",
1492                     ret);
1493                 if (ret == STMF_ALREADY) {
1494                         *err_ret = SBD_RET_GUID_ALREADY_REGISTERED;
1495                 }
1496                 return (EIO);
1497         }
1498 
1499         /*
1500          * setup the ATS (compare and write) lists to handle multiple
1501          * ATS commands simultaneously
1502          */
1503         list_create(&sl->sl_ats_io_list, sizeof (ats_state_t),
1504             offsetof(ats_state_t, as_next));
1505         *err_ret = 0;
1506         return (0);
1507 }
1508 
1509 int
1510 sbd_open_data_file(sbd_lu_t *sl, uint32_t *err_ret, int lu_size_valid,
1511     int vp_valid, int keep_open)
1512 {
1513         int ret;
1514         int flag;
1515         ulong_t nbits;
1516         uint64_t supported_size;
1517         vattr_t vattr;
1518         enum vtype vt;
1519         struct dk_cinfo dki;
1520         int unused;
1521 
1522         mutex_enter(&sl->sl_lock);
1523         if (vp_valid) {
1524                 goto odf_over_open;


1591                                 *err_ret = SBD_RET_SIZE_NOT_SUPPORTED_BY_FS;
1592                                 ret = EINVAL;
1593                                 goto odf_close_data_and_exit;
1594                         }
1595                 }
1596         } else {
1597                 sl->sl_total_data_size = vattr.va_size;
1598                 if (sl->sl_flags & SL_SHARED_META) {
1599                         if (vattr.va_size > SHARED_META_DATA_SIZE) {
1600                                 sl->sl_lu_size = vattr.va_size -
1601                                     SHARED_META_DATA_SIZE;
1602                         } else {
1603                                 *err_ret = SBD_RET_FILE_SIZE_ERROR;
1604                                 ret = EINVAL;
1605                                 goto odf_close_data_and_exit;
1606                         }
1607                 } else {
1608                         sl->sl_lu_size = vattr.va_size;
1609                 }
1610         }
1611 
1612         if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
1613                 *err_ret = SBD_RET_FILE_SIZE_ERROR;
1614                 ret = EINVAL;
1615                 goto odf_close_data_and_exit;
1616         }
1617         if (sl->sl_lu_size &
1618             ((((uint64_t)1) << sl->sl_data_blocksize_shift) - 1)) {
1619                 *err_ret = SBD_RET_FILE_ALIGN_ERROR;
1620                 ret = EINVAL;
1621                 goto odf_close_data_and_exit;
1622         }
1623         /*
1624          * Get the minor device for direct zvol access
1625          */
1626         if (sl->sl_flags & SL_ZFS_META) {
1627                 if ((ret = VOP_IOCTL(sl->sl_data_vp, DKIOCINFO, (intptr_t)&dki,
1628                     FKIOCTL, kcred, &unused, NULL)) != 0) {
1629                         cmn_err(CE_WARN, "ioctl(DKIOCINFO) failed %d", ret);
1630                         /* zvol reserves 0, so this would fail later */
1631                         sl->sl_zvol_minor = 0;


1868                 sl->sl_serial_no_size = slu->slu_serial_size;
1869                 p += slu->slu_serial_size;
1870         }
1871         kmem_free(namebuf, sz);
1872         if (slu->slu_vid_valid) {
1873                 bcopy(slu->slu_vid, sl->sl_vendor_id, 8);
1874                 sl->sl_flags |= SL_VID_VALID;
1875         }
1876         if (slu->slu_pid_valid) {
1877                 bcopy(slu->slu_pid, sl->sl_product_id, 16);
1878                 sl->sl_flags |= SL_PID_VALID;
1879         }
1880         if (slu->slu_rev_valid) {
1881                 bcopy(slu->slu_rev, sl->sl_revision, 4);
1882                 sl->sl_flags |= SL_REV_VALID;
1883         }
1884         if (slu->slu_write_protected) {
1885                 sl->sl_flags |= SL_WRITE_PROTECTED;
1886         }
1887         if (slu->slu_blksize_valid) {
1888                 if ((slu->slu_blksize & (slu->slu_blksize - 1)) ||
1889                     (slu->slu_blksize > (32 * 1024)) ||
1890                     (slu->slu_blksize == 0)) {
1891                         *err_ret = SBD_RET_INVALID_BLKSIZE;
1892                         ret = EINVAL;
1893                         goto scm_err_out;
1894                 }
1895                 while ((1 << sl->sl_data_blocksize_shift) != slu->slu_blksize) {
1896                         sl->sl_data_blocksize_shift++;
1897                 }
1898         } else {
1899                 sl->sl_data_blocksize_shift = 9;     /* 512 by default */
1900                 slu->slu_blksize = 512;
1901         }
1902 
1903         /* Now lets start creating meta */
1904         sl->sl_trans_op = SL_OP_CREATE_REGISTER_LU;
1905         if (sbd_link_lu(sl) != SBD_SUCCESS) {
1906                 *err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
1907                 ret = EALREADY;
1908                 goto scm_err_out;


3028                         return (ENOENT);
3029                 }
3030                 return (EIO);
3031         }
3032 
3033         ssi.st_rflags = STMF_RFLAG_USER_REQUEST;
3034         ssi.st_additional_info = "sbd_delete_lu call (ioctl)";
3035         ret = sbd_delete_locked_lu(sl, err_ret, &ssi);
3036 
3037         if (ret) {
3038                 /* Once its locked, no need to grab mutex again */
3039                 sl->sl_trans_op = SL_OP_NONE;
3040         }
3041         return (ret);
3042 }
3043 
3044 sbd_status_t
3045 sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
3046     uint64_t offset, uint64_t size, uint8_t *buf)
3047 {
3048         int ret, ioflag = 0;
3049         long resid;
3050         hrtime_t xfer_start;
3051         uint8_t op = task->task_cdb[0];
3052 
3053         if ((offset + size) > sl->sl_lu_size) {
3054                 return (SBD_IO_PAST_EOF);
3055         }
3056 
3057         offset += sl->sl_data_offset;
3058 
3059         /*
3060          * Check to see if the command is READ(10), READ(12), or READ(16).
3061          * If it is then check for bit 3 being set to indicate if Forced
3062          * Unit Access is being requested. If so, the FSYNC flag will be set
3063          * on the read.
3064          */
3065         if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
3066             (op == SCMD_READ_G5)) && (task->task_cdb[1] & BIT_3)) {
3067                 ioflag = FSYNC;
3068         }
3069         if ((offset + size) > sl->sl_data_readable_size) {
3070                 uint64_t store_end;
3071                 if (offset > sl->sl_data_readable_size) {
3072                         bzero(buf, size);
3073                         return (SBD_SUCCESS);
3074                 }
3075                 store_end = sl->sl_data_readable_size - offset;
3076                 bzero(buf + store_end, size - store_end);
3077                 size = store_end;
3078         }
3079 
3080         xfer_start = gethrtime();
3081         DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
3082             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3083             scsi_task_t *, task);
3084 
3085         /*
3086          * Don't proceed if the device has been closed
3087          * This can occur on an access state change to standby or
3088          * a delete. The writer lock is acquired before closing the
3089          * lu.
3090          */
3091         rw_enter(&sl->sl_access_state_lock, RW_READER);
3092         if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3093                 rw_exit(&sl->sl_access_state_lock);
3094                 return (SBD_FAILURE);
3095         }
3096 
3097         ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3098             (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
3099             &resid);
3100         rw_exit(&sl->sl_access_state_lock);
3101 
3102         stmf_lu_xfer_done(task, B_TRUE /* read */,
3103             (gethrtime() - xfer_start));
3104         DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
3105             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3106             int, ret, scsi_task_t *, task);
3107 
3108 over_sl_data_read:
3109         if (ret || resid) {
3110                 stmf_trace(0, "UIO_READ failed, ret = %d, resid = %d", ret,
3111                     resid);
3112                 return (SBD_FAILURE);
3113         }
3114 
3115         return (SBD_SUCCESS);
3116 }
3117 
3118 sbd_status_t
3119 sbd_data_write(sbd_lu_t *sl, struct scsi_task *task,
3120     uint64_t offset, uint64_t size, uint8_t *buf)
3121 {
3122         int ret;
3123         long resid;
3124         sbd_status_t sret = SBD_SUCCESS;
3125         int ioflag;
3126         hrtime_t xfer_start;
3127         uint8_t op = task->task_cdb[0];
3128         boolean_t fua_bit = B_FALSE;
3129 
3130         if ((offset + size) > sl->sl_lu_size) {
3131                 return (SBD_IO_PAST_EOF);
3132         }
3133 
3134         offset += sl->sl_data_offset;
3135 
3136         /*
3137          * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
3138          * If it is then check for bit 3 being set to indicate if Forced
3139          * Unit Access is being requested. If so, the FSYNC flag will be set
3140          * on the write.
3141          */
3142         if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
3143             (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
3144                 fua_bit = B_TRUE;
3145         }
3146         if (((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3147             (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) || fua_bit) {
3148                 ioflag = FSYNC;
3149         } else {
3150                 ioflag = 0;
3151         }
3152 
3153         xfer_start = gethrtime();
3154         DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
3155             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3156             scsi_task_t *, task);
3157 
3158         /*
3159          * Don't proceed if the device has been closed
3160          * This can occur on an access state change to standby or
3161          * a delete. The writer lock is acquired before closing the
3162          * lu.
3163          */
3164         rw_enter(&sl->sl_access_state_lock, RW_READER);
3165         if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3166                 rw_exit(&sl->sl_access_state_lock);
3167                 return (SBD_FAILURE);
3168         }
3169         ret = vn_rdwr(UIO_WRITE, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3170             (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
3171             &resid);
3172         rw_exit(&sl->sl_access_state_lock);
3173 
3174         stmf_lu_xfer_done(task, B_FALSE /* write */,
3175             (gethrtime() - xfer_start));
3176         DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
3177             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3178             int, ret, scsi_task_t *, task);
3179 
3180         if ((ret == 0) && (resid == 0) &&
3181             (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3182             (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
3183                 sret = sbd_flush_data_cache(sl, 1);
3184         }
3185 over_sl_data_write:

3186         if ((ret || resid) || (sret != SBD_SUCCESS)) {
3187                 return (SBD_FAILURE);
3188         } else if ((offset + size) > sl->sl_data_readable_size) {
3189                 uint64_t old_size, new_size;
3190 
3191                 do {
3192                         old_size = sl->sl_data_readable_size;
3193                         if ((offset + size) <= old_size)
3194                                 break;
3195                         new_size = offset + size;
3196                 } while (atomic_cas_64(&sl->sl_data_readable_size, old_size,
3197                     new_size) != old_size);
3198         }
3199 
3200         return (SBD_SUCCESS);
3201 }
3202 
3203 int
3204 sbd_get_global_props(sbd_global_props_t *oslp, uint32_t oslp_sz,
3205     uint32_t *err_ret)


3701                 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3702                 size = newsize;
3703                 goto again;
3704         } else if (rc != 0) {
3705                 goto out;
3706         }
3707         rc = nvlist_unpack((char *)(uintptr_t)zc->zc_nvlist_dst,
3708             zc->zc_nvlist_dst_size, &nv, 0);
3709         ASSERT(rc == 0);        /* nvlist_unpack should not fail */
3710         if ((rc = nvlist_lookup_nvlist(nv, "stmf_sbd_lu", &nv2)) == 0) {
3711                 rc = nvlist_lookup_string(nv2, ZPROP_VALUE, &ptr);
3712                 if (rc != 0) {
3713                         cmn_err(CE_WARN, "couldn't get value");
3714                 } else {
3715                         *comstarprop = kmem_alloc(strlen(ptr) + 1,
3716                             KM_SLEEP);
3717                         (void) strcpy(*comstarprop, ptr);
3718                 }
3719         }
3720 out:
3721         if (nv != NULL)
3722                 nvlist_free(nv);
3723         kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3724         kmem_free(zc, sizeof (zfs_cmd_t));
3725         (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3726 
3727         return (rc);
3728 }
3729 
3730 int
3731 sbd_zvolset(char *zvol_name, char *comstarprop)
3732 {
3733         ldi_handle_t    zfs_lh;
3734         nvlist_t        *nv;
3735         char            *packed = NULL;
3736         size_t          len;
3737         zfs_cmd_t       *zc;
3738         int unused;
3739         int rc;
3740 
3741         if ((rc = ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,


3752         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3753         (void) strlcpy(zc->zc_name, zvol_name, sizeof (zc->zc_name));
3754         zc->zc_nvlist_src = (uint64_t)(intptr_t)packed;
3755         zc->zc_nvlist_src_size = len;
3756         rc = ldi_ioctl(zfs_lh, ZFS_IOC_SET_PROP, (intptr_t)zc,
3757             FKIOCTL, kcred, &unused);
3758         if (rc != 0) {
3759                 cmn_err(CE_NOTE, "ioctl failed %d", rc);
3760         }
3761         kmem_free(zc, sizeof (zfs_cmd_t));
3762         if (packed)
3763                 kmem_free(packed, len);
3764 out:
3765         nvlist_free(nv);
3766         (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3767         return (rc);
3768 }
3769 
3770 /*
3771  * Unmap a region in a volume.  Currently only supported for zvols.
3772  * The list of extents to be freed is passed in a dkioc_free_list_t
3773  * which the caller is responsible for destroying.
3774  */
3775 int
3776 sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl)
3777 {
3778         vnode_t *vp;
3779         int unused, ret;

3780 
3781         /* Nothing to do */
3782         if (dfl->dfl_num_exts == 0)
3783                 return (0);
3784 
3785         /*
3786          * TODO: unmap performance may be improved by not doing the synchronous
3787          * removal of the blocks and writing of the metadata.  The
3788          * transaction is in the zil so the state should be stable.
3789          */
3790         dfl->dfl_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?
3791             DF_WAIT_SYNC : 0;


3792 
3793         /* Use the data vnode we have to send a fop_ioctl(). */
3794         vp = sl->sl_data_vp;
3795         if (vp == NULL) {
3796                 cmn_err(CE_WARN, "Cannot unmap - no vnode pointer.");
3797                 return (EIO);
3798         }
3799 
3800         ret = VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred,
3801             &unused, NULL);
3802 
3803         return (ret);
3804 }
3805 
3806 /*
3807  * Check if this lu belongs to sbd or some other lu
3808  * provider. A simple check for one of the module
3809  * entry points is sufficient.
3810  */
3811 int
3812 sbd_is_valid_lu(stmf_lu_t *lu)
3813 {
3814         if (lu->lu_new_task == sbd_new_task)
3815                 return (1);
3816         return (0);
3817 }
3818 
3819 uint8_t
3820 sbd_get_lbasize_shift(stmf_lu_t *lu)
3821 {
3822         sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3823 
3824         return (sl->sl_data_blocksize_shift);
3825 }