1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013 by Delphix. All rights reserved.
  25  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  26  */
  27 
  28 #include <sys/conf.h>
  29 #include <sys/list.h>
  30 #include <sys/file.h>
  31 #include <sys/ddi.h>
  32 #include <sys/sunddi.h>
  33 #include <sys/modctl.h>
  34 #include <sys/scsi/scsi.h>
  35 #include <sys/scsi/impl/scsi_reset_notify.h>
  36 #include <sys/disp.h>
  37 #include <sys/byteorder.h>
  38 #include <sys/pathname.h>
  39 #include <sys/atomic.h>
  40 #include <sys/nvpair.h>
  41 #include <sys/fs/zfs.h>
  42 #include <sys/sdt.h>
  43 #include <sys/dkio.h>
  44 #include <sys/zfs_ioctl.h>
  45 
  46 #include <sys/stmf.h>
  47 #include <sys/lpif.h>
  48 #include <sys/stmf_ioctl.h>
  49 #include <sys/stmf_sbd_ioctl.h>
  50 
  51 #include "stmf_sbd.h"
  52 #include "sbd_impl.h"
  53 
  54 #define SBD_IS_ZVOL(zvol)       (strncmp("/dev/zvol", zvol, 9))
  55 
  56 extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
  57 extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
  58 extern void sbd_pgr_reset(sbd_lu_t *sl);
  59 extern int HardwareAcceleratedLocking;
  60 extern int HardwareAcceleratedInit;
  61 extern int HardwareAcceleratedMove;
  62 extern uint8_t sbd_unmap_enable;
  63 
  64 static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
  65     void **result);
  66 static int sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
  67 static int sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
  68 static int sbd_open(dev_t *devp, int flag, int otype, cred_t *credp);
  69 static int sbd_close(dev_t dev, int flag, int otype, cred_t *credp);
  70 static int stmf_sbd_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
  71     cred_t *credp, int *rval);
  72 void sbd_lp_cb(stmf_lu_provider_t *lp, int cmd, void *arg, uint32_t flags);
  73 stmf_status_t sbd_proxy_reg_lu(uint8_t *luid, void *proxy_reg_arg,
  74     uint32_t proxy_reg_arg_len);
  75 stmf_status_t sbd_proxy_dereg_lu(uint8_t *luid, void *proxy_reg_arg,
  76     uint32_t proxy_reg_arg_len);
  77 stmf_status_t sbd_proxy_msg(uint8_t *luid, void *proxy_arg,
  78     uint32_t proxy_arg_len, uint32_t type);
  79 int sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz,
  80     uint32_t *err_ret);
  81 int sbd_create_standby_lu(sbd_create_standby_lu_t *slu, uint32_t *err_ret);
  82 int sbd_set_lu_standby(sbd_set_lu_standby_t *stlu, uint32_t *err_ret);
  83 int sbd_import_lu(sbd_import_lu_t *ilu, int struct_sz, uint32_t *err_ret,
  84     int no_register, sbd_lu_t **slr);
  85 int sbd_import_active_lu(sbd_import_lu_t *ilu, sbd_lu_t *sl, uint32_t *err_ret);
  86 int sbd_delete_lu(sbd_delete_lu_t *dlu, int struct_sz, uint32_t *err_ret);
  87 int sbd_modify_lu(sbd_modify_lu_t *mlu, int struct_sz, uint32_t *err_ret);
  88 int sbd_set_global_props(sbd_global_props_t *mlu, int struct_sz,
  89     uint32_t *err_ret);
  90 int sbd_get_global_props(sbd_global_props_t *oslp, uint32_t oslp_sz,
  91     uint32_t *err_ret);
  92 int sbd_get_lu_props(sbd_lu_props_t *islp, uint32_t islp_sz,
  93     sbd_lu_props_t *oslp, uint32_t oslp_sz, uint32_t *err_ret);
  94 static char *sbd_get_zvol_name(sbd_lu_t *);
  95 static int sbd_get_unmap_props(sbd_unmap_props_t *sup, sbd_unmap_props_t *osup,
  96     uint32_t *err_ret);
  97 sbd_status_t sbd_create_zfs_meta_object(sbd_lu_t *sl);
  98 sbd_status_t sbd_open_zfs_meta(sbd_lu_t *sl);
  99 sbd_status_t sbd_read_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
 100     uint64_t off);
 101 sbd_status_t sbd_write_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
 102     uint64_t off);
 103 sbd_status_t sbd_update_zfs_prop(sbd_lu_t *sl);
 104 int sbd_is_zvol(char *path);
 105 int sbd_zvolget(char *zvol_name, char **comstarprop);
 106 int sbd_zvolset(char *zvol_name, char *comstarprop);
 107 char sbd_ctoi(char c);
 108 void sbd_close_lu(sbd_lu_t *sl);
 109 
 110 static ldi_ident_t      sbd_zfs_ident;
 111 static stmf_lu_provider_t *sbd_lp;
 112 static sbd_lu_t         *sbd_lu_list = NULL;
 113 static kmutex_t         sbd_lock;
 114 static dev_info_t       *sbd_dip;
 115 static uint32_t         sbd_lu_count = 0;
 116 uint8_t sbd_enable_unmap_sync = 0;
 117 
 118 /* Global property settings for the logical unit */
 119 char sbd_vendor_id[]    = "NEXENTA ";
 120 char sbd_product_id[]   = "COMSTAR         ";
 121 char sbd_revision[]     = "1.0 ";
 122 char *sbd_mgmt_url = NULL;
 123 uint16_t sbd_mgmt_url_alloc_size = 0;
 124 krwlock_t sbd_global_prop_lock;
 125 
 126 static char sbd_name[] = "sbd";
 127 
 128 static struct cb_ops sbd_cb_ops = {
 129         sbd_open,                       /* open */
 130         sbd_close,                      /* close */
 131         nodev,                          /* strategy */
 132         nodev,                          /* print */
 133         nodev,                          /* dump */
 134         nodev,                          /* read */
 135         nodev,                          /* write */
 136         stmf_sbd_ioctl,                 /* ioctl */
 137         nodev,                          /* devmap */
 138         nodev,                          /* mmap */
 139         nodev,                          /* segmap */
 140         nochpoll,                       /* chpoll */
 141         ddi_prop_op,                    /* cb_prop_op */
 142         0,                              /* streamtab */
 143         D_NEW | D_MP,                   /* cb_flag */
 144         CB_REV,                         /* rev */
 145         nodev,                          /* aread */
 146         nodev                           /* awrite */
 147 };
 148 
 149 static struct dev_ops sbd_ops = {
 150         DEVO_REV,
 151         0,
 152         sbd_getinfo,
 153         nulldev,                /* identify */
 154         nulldev,                /* probe */
 155         sbd_attach,
 156         sbd_detach,
 157         nodev,                  /* reset */
 158         &sbd_cb_ops,
 159         NULL,                   /* bus_ops */
 160         NULL                    /* power */
 161 };
 162 
 163 #ifdef DEBUG
 164 #define SBD_NAME        "COMSTAR SBD+ " __DATE__ " " __TIME__ " DEBUG"
 165 #else
 166 #define SBD_NAME        "COMSTAR SBD+"
 167 #endif
 168 
 169 static struct modldrv modldrv = {
 170         &mod_driverops,
 171         SBD_NAME,
 172         &sbd_ops
 173 };
 174 
 175 static struct modlinkage modlinkage = {
 176         MODREV_1,
 177         &modldrv,
 178         NULL
 179 };
 180 
 181 int
 182 _init(void)
 183 {
 184         int ret;
 185 
 186         ret = mod_install(&modlinkage);
 187         if (ret)
 188                 return (ret);
 189         sbd_lp = (stmf_lu_provider_t *)stmf_alloc(STMF_STRUCT_LU_PROVIDER,
 190             0, 0);
 191         sbd_lp->lp_lpif_rev = LPIF_REV_2;
 192         sbd_lp->lp_instance = 0;
 193         sbd_lp->lp_name = sbd_name;
 194         sbd_lp->lp_cb = sbd_lp_cb;
 195         sbd_lp->lp_alua_support = 1;
 196         sbd_lp->lp_proxy_msg = sbd_proxy_msg;
 197         sbd_zfs_ident = ldi_ident_from_anon();
 198 
 199         if (stmf_register_lu_provider(sbd_lp) != STMF_SUCCESS) {
 200                 (void) mod_remove(&modlinkage);
 201                 stmf_free(sbd_lp);
 202                 return (EINVAL);
 203         }
 204         mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
 205         rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL);
 206 
 207         if (HardwareAcceleratedLocking == 0)
 208                 cmn_err(CE_NOTE, "HardwareAcceleratedLocking Disabled");
 209         if (HardwareAcceleratedMove == 0)
 210                 cmn_err(CE_NOTE, "HardwareAcceleratedMove  Disabled");
 211         if (HardwareAcceleratedInit == 0)
 212                 cmn_err(CE_NOTE, "HardwareAcceleratedInit  Disabled");
 213 
 214         return (0);
 215 }
 216 
 217 int
 218 _fini(void)
 219 {
 220         int ret;
 221 
 222         /*
 223          * If we have registered lus, then make sure they are all offline
 224          * if so then deregister them. This should drop the sbd_lu_count
 225          * to zero.
 226          */
 227         if (sbd_lu_count) {
 228                 sbd_lu_t *slu;
 229 
 230                 /* See if all of them are offline */
 231                 mutex_enter(&sbd_lock);
 232                 for (slu = sbd_lu_list; slu != NULL; slu = slu->sl_next) {
 233                         if ((slu->sl_state != STMF_STATE_OFFLINE) ||
 234                             slu->sl_state_not_acked) {
 235                                 mutex_exit(&sbd_lock);
 236                                 return (EBUSY);
 237                         }
 238                 }
 239                 mutex_exit(&sbd_lock);
 240 
 241 #if 0
 242                 /* ok start deregistering them */
 243                 while (sbd_lu_list) {
 244                         sbd_store_t *sst = sbd_lu_list->sl_sst;
 245                         if (sst->sst_deregister_lu(sst) != STMF_SUCCESS)
 246                                 return (EBUSY);
 247                 }
 248 #endif
 249                 return (EBUSY);
 250         }
 251         if (stmf_deregister_lu_provider(sbd_lp) != STMF_SUCCESS)
 252                 return (EBUSY);
 253         ret = mod_remove(&modlinkage);
 254         if (ret != 0) {
 255                 (void) stmf_register_lu_provider(sbd_lp);
 256                 return (ret);
 257         }
 258         stmf_free(sbd_lp);
 259         mutex_destroy(&sbd_lock);
 260         rw_destroy(&sbd_global_prop_lock);
 261         ldi_ident_release(sbd_zfs_ident);
 262         return (0);
 263 }
 264 
 265 int
 266 _info(struct modinfo *modinfop)
 267 {
 268         return (mod_info(&modlinkage, modinfop));
 269 }
 270 
 271 /* ARGSUSED */
 272 static int
 273 sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 274 {
 275         switch (cmd) {
 276         case DDI_INFO_DEVT2DEVINFO:
 277                 *result = sbd_dip;
 278                 break;
 279         case DDI_INFO_DEVT2INSTANCE:
 280                 *result = (void *)(uintptr_t)ddi_get_instance(sbd_dip);
 281                 break;
 282         default:
 283                 return (DDI_FAILURE);
 284         }
 285 
 286         return (DDI_SUCCESS);
 287 }
 288 
 289 static int
 290 sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 291 {
 292         char    *prop;
 293 
 294         switch (cmd) {
 295         case DDI_ATTACH:
 296                 sbd_dip = dip;
 297 
 298                 if (ddi_create_minor_node(dip, "admin", S_IFCHR, 0,
 299                     DDI_NT_STMF_LP, 0) != DDI_SUCCESS) {
 300                         break;
 301                 }
 302                 ddi_report_dev(dip);
 303 
 304                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
 305                     DDI_PROP_DONTPASS, "vendor-id", &prop) == DDI_SUCCESS) {
 306                         (void) snprintf(sbd_vendor_id, 9, "%s%8s", prop, "");
 307                         ddi_prop_free(prop);
 308                 }
 309                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
 310                     DDI_PROP_DONTPASS, "product-id", &prop) == DDI_SUCCESS) {
 311                         (void) snprintf(sbd_product_id, 17, "%s%16s", prop, "");
 312                         ddi_prop_free(prop);
 313                 }
 314                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
 315                     DDI_PROP_DONTPASS, "revision", &prop) == DDI_SUCCESS) {
 316                         (void) snprintf(sbd_revision, 5, "%s%4s", prop, "");
 317                         ddi_prop_free(prop);
 318                 }
 319 
 320                 return (DDI_SUCCESS);
 321         }
 322 
 323         return (DDI_FAILURE);
 324 }
 325 
 326 static int
 327 sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 328 {
 329         switch (cmd) {
 330         case DDI_DETACH:
 331                 ddi_remove_minor_node(dip, 0);
 332                 return (DDI_SUCCESS);
 333         }
 334 
 335         return (DDI_FAILURE);
 336 }
 337 
 338 /* ARGSUSED */
 339 static int
 340 sbd_open(dev_t *devp, int flag, int otype, cred_t *credp)
 341 {
 342         if (otype != OTYP_CHR)
 343                 return (EINVAL);
 344         return (0);
 345 }
 346 
 347 /* ARGSUSED */
 348 static int
 349 sbd_close(dev_t dev, int flag, int otype, cred_t *credp)
 350 {
 351         return (0);
 352 }
 353 
 354 /* ARGSUSED */
 355 static int
 356 stmf_sbd_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
 357     cred_t *credp, int *rval)
 358 {
 359         stmf_iocdata_t          *iocd;
 360         void                    *ibuf   = NULL;
 361         void                    *obuf   = NULL;
 362         sbd_lu_t                *nsl;
 363         int                     i;
 364         int                     ret;
 365 
 366         if (drv_priv(credp) != 0) {
 367                 return (EPERM);
 368         }
 369 
 370         ret = stmf_copyin_iocdata(data, mode, &iocd, &ibuf, &obuf);
 371         if (ret)
 372                 return (ret);
 373         iocd->stmf_error = 0;
 374 
 375         switch (cmd) {
 376         case SBD_IOCTL_CREATE_AND_REGISTER_LU:
 377                 if (iocd->stmf_ibuf_size <
 378                     (sizeof (sbd_create_and_reg_lu_t) - 8)) {
 379                         ret = EFAULT;
 380                         break;
 381                 }
 382                 if ((iocd->stmf_obuf_size == 0) ||
 383                     (iocd->stmf_obuf_size > iocd->stmf_ibuf_size)) {
 384                         ret = EINVAL;
 385                         break;
 386                 }
 387                 ret = sbd_create_register_lu((sbd_create_and_reg_lu_t *)
 388                     ibuf, iocd->stmf_ibuf_size, &iocd->stmf_error);
 389                 bcopy(ibuf, obuf, iocd->stmf_obuf_size);
 390                 break;
 391         case SBD_IOCTL_SET_LU_STANDBY:
 392                 if (iocd->stmf_ibuf_size < sizeof (sbd_set_lu_standby_t)) {
 393                         ret = EFAULT;
 394                         break;
 395                 }
 396                 if (iocd->stmf_obuf_size) {
 397                         ret = EINVAL;
 398                         break;
 399                 }
 400                 ret = sbd_set_lu_standby((sbd_set_lu_standby_t *)ibuf,
 401                     &iocd->stmf_error);
 402                 break;
 403         case SBD_IOCTL_IMPORT_LU:
 404                 if (iocd->stmf_ibuf_size <
 405                     (sizeof (sbd_import_lu_t) - 8)) {
 406                         ret = EFAULT;
 407                         break;
 408                 }
 409                 if ((iocd->stmf_obuf_size == 0) ||
 410                     (iocd->stmf_obuf_size > iocd->stmf_ibuf_size)) {
 411                         ret = EINVAL;
 412                         break;
 413                 }
 414                 ret = sbd_import_lu((sbd_import_lu_t *)ibuf,
 415                     iocd->stmf_ibuf_size, &iocd->stmf_error, 0, NULL);
 416                 bcopy(ibuf, obuf, iocd->stmf_obuf_size);
 417                 break;
 418         case SBD_IOCTL_DELETE_LU:
 419                 if (iocd->stmf_ibuf_size < (sizeof (sbd_delete_lu_t) - 8)) {
 420                         ret = EFAULT;
 421                         break;
 422                 }
 423                 if (iocd->stmf_obuf_size) {
 424                         ret = EINVAL;
 425                         break;
 426                 }
 427                 ret = sbd_delete_lu((sbd_delete_lu_t *)ibuf,
 428                     iocd->stmf_ibuf_size, &iocd->stmf_error);
 429                 break;
 430         case SBD_IOCTL_MODIFY_LU:
 431                 if (iocd->stmf_ibuf_size < (sizeof (sbd_modify_lu_t) - 8)) {
 432                         ret = EFAULT;
 433                         break;
 434                 }
 435                 if (iocd->stmf_obuf_size) {
 436                         ret = EINVAL;
 437                         break;
 438                 }
 439                 ret = sbd_modify_lu((sbd_modify_lu_t *)ibuf,
 440                     iocd->stmf_ibuf_size, &iocd->stmf_error);
 441                 break;
 442         case SBD_IOCTL_SET_GLOBAL_LU:
 443                 if (iocd->stmf_ibuf_size < (sizeof (sbd_global_props_t) - 8)) {
 444                         ret = EFAULT;
 445                         break;
 446                 }
 447                 if (iocd->stmf_obuf_size) {
 448                         ret = EINVAL;
 449                         break;
 450                 }
 451                 ret = sbd_set_global_props((sbd_global_props_t *)ibuf,
 452                     iocd->stmf_ibuf_size, &iocd->stmf_error);
 453                 break;
 454         case SBD_IOCTL_GET_GLOBAL_LU:
 455                 if (iocd->stmf_ibuf_size) {
 456                         ret = EINVAL;
 457                         break;
 458                 }
 459                 if (iocd->stmf_obuf_size < sizeof (sbd_global_props_t)) {
 460                         ret = EINVAL;
 461                         break;
 462                 }
 463                 ret = sbd_get_global_props((sbd_global_props_t *)obuf,
 464                     iocd->stmf_obuf_size, &iocd->stmf_error);
 465                 break;
 466         case SBD_IOCTL_GET_LU_PROPS:
 467                 if (iocd->stmf_ibuf_size < (sizeof (sbd_lu_props_t) - 8)) {
 468                         ret = EFAULT;
 469                         break;
 470                 }
 471                 if (iocd->stmf_obuf_size < sizeof (sbd_lu_props_t)) {
 472                         ret = EINVAL;
 473                         break;
 474                 }
 475                 ret = sbd_get_lu_props((sbd_lu_props_t *)ibuf,
 476                     iocd->stmf_ibuf_size, (sbd_lu_props_t *)obuf,
 477                     iocd->stmf_obuf_size, &iocd->stmf_error);
 478                 break;
 479         case SBD_IOCTL_GET_LU_LIST:
 480                 mutex_enter(&sbd_lock);
 481                 iocd->stmf_obuf_max_nentries = sbd_lu_count;
 482                 iocd->stmf_obuf_nentries = min((iocd->stmf_obuf_size >> 4),
 483                     sbd_lu_count);
 484                 for (nsl = sbd_lu_list, i = 0; nsl &&
 485                     (i < iocd->stmf_obuf_nentries); i++, nsl = nsl->sl_next) {
 486                         bcopy(nsl->sl_device_id + 4,
 487                             &(((uint8_t *)obuf)[i << 4]), 16);
 488                 }
 489                 mutex_exit(&sbd_lock);
 490                 ret = 0;
 491                 iocd->stmf_error = 0;
 492                 break;
 493         case SBD_IOCTL_GET_UNMAP_PROPS:
 494                 if (iocd->stmf_ibuf_size < sizeof (sbd_unmap_props_t)) {
 495                         ret = EFAULT;
 496                         break;
 497                 }
 498                 if (iocd->stmf_obuf_size < sizeof (sbd_unmap_props_t)) {
 499                         ret = EINVAL;
 500                         break;
 501                 }
 502                 ret = sbd_get_unmap_props((sbd_unmap_props_t *)ibuf,
 503                     (sbd_unmap_props_t *)obuf, &iocd->stmf_error);
 504                 break;
 505         default:
 506                 ret = ENOTTY;
 507         }
 508 
 509         if (ret == 0) {
 510                 ret = stmf_copyout_iocdata(data, mode, iocd, obuf);
 511         } else if (iocd->stmf_error) {
 512                 (void) stmf_copyout_iocdata(data, mode, iocd, obuf);
 513         }
 514         if (obuf) {
 515                 kmem_free(obuf, iocd->stmf_obuf_size);
 516                 obuf = NULL;
 517         }
 518         if (ibuf) {
 519                 kmem_free(ibuf, iocd->stmf_ibuf_size);
 520                 ibuf = NULL;
 521         }
 522         kmem_free(iocd, sizeof (stmf_iocdata_t));
 523         return (ret);
 524 }
 525 
 526 /* ARGSUSED */
 527 void
 528 sbd_lp_cb(stmf_lu_provider_t *lp, int cmd, void *arg, uint32_t flags)
 529 {
 530         nvpair_t        *np;
 531         char            *s;
 532         sbd_import_lu_t *ilu;
 533         uint32_t        ilu_sz;
 534         uint32_t        struct_sz;
 535         uint32_t        err_ret;
 536         int             iret;
 537 
 538         if ((cmd != STMF_PROVIDER_DATA_UPDATED) || (arg == NULL)) {
 539                 return;
 540         }
 541 
 542         if ((flags & (STMF_PCB_STMF_ONLINING | STMF_PCB_PREG_COMPLETE)) == 0) {
 543                 return;
 544         }
 545 
 546         np = NULL;
 547         ilu_sz = 1024;
 548         ilu = (sbd_import_lu_t *)kmem_zalloc(ilu_sz, KM_SLEEP);
 549         while ((np = nvlist_next_nvpair((nvlist_t *)arg, np)) != NULL) {
 550                 if (nvpair_type(np) != DATA_TYPE_STRING) {
 551                         continue;
 552                 }
 553                 if (nvpair_value_string(np, &s) != 0) {
 554                         continue;
 555                 }
 556                 struct_sz = max(8, strlen(s) + 1);
 557                 struct_sz += sizeof (sbd_import_lu_t) - 8;
 558                 if (struct_sz > ilu_sz) {
 559                         kmem_free(ilu, ilu_sz);
 560                         ilu_sz = struct_sz + 32;
 561                         ilu = (sbd_import_lu_t *)kmem_zalloc(ilu_sz, KM_SLEEP);
 562                 }
 563                 ilu->ilu_struct_size = struct_sz;
 564                 (void) strcpy(ilu->ilu_meta_fname, s);
 565                 iret = sbd_import_lu(ilu, struct_sz, &err_ret, 0, NULL);
 566                 if (iret) {
 567                         stmf_trace(0, "sbd_lp_cb: import_lu failed, ret = %d, "
 568                             "err_ret = %d", iret, err_ret);
 569                 } else {
 570                         stmf_trace(0, "Imported the LU %s", nvpair_name(np));
 571                 }
 572         }
 573 
 574         if (ilu) {
 575                 kmem_free(ilu, ilu_sz);
 576                 ilu = NULL;
 577         }
 578 }
 579 
 580 sbd_status_t
 581 sbd_link_lu(sbd_lu_t *sl)
 582 {
 583         sbd_lu_t *nsl;
 584 
 585         mutex_enter(&sbd_lock);
 586         mutex_enter(&sl->sl_lock);
 587         ASSERT(sl->sl_trans_op != SL_OP_NONE);
 588 
 589         if (sl->sl_flags & SL_LINKED) {
 590                 mutex_exit(&sbd_lock);
 591                 mutex_exit(&sl->sl_lock);
 592                 return (SBD_ALREADY);
 593         }
 594         for (nsl = sbd_lu_list; nsl; nsl = nsl->sl_next) {
 595                 if (strcmp(nsl->sl_name, sl->sl_name) == 0)
 596                         break;
 597         }
 598         if (nsl) {
 599                 mutex_exit(&sbd_lock);
 600                 mutex_exit(&sl->sl_lock);
 601                 return (SBD_ALREADY);
 602         }
 603         sl->sl_next = sbd_lu_list;
 604         sbd_lu_list = sl;
 605         sl->sl_flags |= SL_LINKED;
 606         mutex_exit(&sbd_lock);
 607         mutex_exit(&sl->sl_lock);
 608         return (SBD_SUCCESS);
 609 }
 610 
 611 void
 612 sbd_unlink_lu(sbd_lu_t *sl)
 613 {
 614         sbd_lu_t **ppnsl;
 615 
 616         mutex_enter(&sbd_lock);
 617         mutex_enter(&sl->sl_lock);
 618         ASSERT(sl->sl_trans_op != SL_OP_NONE);
 619 
 620         ASSERT(sl->sl_flags & SL_LINKED);
 621         for (ppnsl = &sbd_lu_list; *ppnsl; ppnsl = &((*ppnsl)->sl_next)) {
 622                 if (*ppnsl == sl)
 623                         break;
 624         }
 625         ASSERT(*ppnsl);
 626         *ppnsl = (*ppnsl)->sl_next;
 627         sl->sl_flags &= ~SL_LINKED;
 628         mutex_exit(&sbd_lock);
 629         mutex_exit(&sl->sl_lock);
 630 }
 631 
 632 sbd_status_t
 633 sbd_find_and_lock_lu(uint8_t *guid, uint8_t *meta_name, uint8_t op,
 634     sbd_lu_t **ppsl)
 635 {
 636         sbd_lu_t *sl;
 637         int found = 0;
 638         sbd_status_t sret;
 639 
 640         mutex_enter(&sbd_lock);
 641         for (sl = sbd_lu_list; sl; sl = sl->sl_next) {
 642                 if (guid) {
 643                         found = bcmp(sl->sl_device_id + 4, guid, 16) == 0;
 644                 } else {
 645                         found = strcmp(sl->sl_name, (char *)meta_name) == 0;
 646                 }
 647                 if (found)
 648                         break;
 649         }
 650         if (!found) {
 651                 mutex_exit(&sbd_lock);
 652                 return (SBD_NOT_FOUND);
 653         }
 654         mutex_enter(&sl->sl_lock);
 655         if (sl->sl_trans_op == SL_OP_NONE) {
 656                 sl->sl_trans_op = op;
 657                 *ppsl = sl;
 658                 sret = SBD_SUCCESS;
 659         } else {
 660                 sret = SBD_BUSY;
 661         }
 662         mutex_exit(&sl->sl_lock);
 663         mutex_exit(&sbd_lock);
 664         return (sret);
 665 }
 666 
 667 sbd_status_t
 668 sbd_read_meta(sbd_lu_t *sl, uint64_t offset, uint64_t size, uint8_t *buf)
 669 {
 670         uint64_t        meta_align;
 671         uint64_t        starting_off;
 672         uint64_t        data_off;
 673         uint64_t        ending_off;
 674         uint64_t        io_size;
 675         uint8_t         *io_buf;
 676         vnode_t         *vp;
 677         sbd_status_t    ret;
 678         ssize_t         resid;
 679         int             vret;
 680 
 681         ASSERT(sl->sl_flags & SL_META_OPENED);
 682         if (sl->sl_flags & SL_SHARED_META) {
 683                 meta_align = (((uint64_t)1) << sl->sl_data_blocksize_shift) - 1;
 684                 vp = sl->sl_data_vp;
 685                 ASSERT(vp);
 686         } else {
 687                 meta_align = (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
 688                 if ((sl->sl_flags & SL_ZFS_META) == 0) {
 689                         vp = sl->sl_meta_vp;
 690                         ASSERT(vp);
 691                 }
 692         }
 693         starting_off = offset & ~(meta_align);
 694         data_off = offset & meta_align;
 695         ending_off = (offset + size + meta_align) & (~meta_align);
 696         if (ending_off > sl->sl_meta_size_used) {
 697                 bzero(buf, size);
 698                 if (starting_off >= sl->sl_meta_size_used) {
 699                         return (SBD_SUCCESS);
 700                 }
 701                 ending_off = (sl->sl_meta_size_used + meta_align) &
 702                     (~meta_align);
 703                 if (size > (ending_off - (starting_off + data_off))) {
 704                         size = ending_off - (starting_off + data_off);
 705                 }
 706         }
 707         io_size = ending_off - starting_off;
 708         io_buf = (uint8_t *)kmem_zalloc(io_size, KM_SLEEP);
 709         ASSERT((starting_off + io_size) <= sl->sl_total_meta_size);
 710 
 711         /*
 712          * Don't proceed if the device has been closed
 713          * This can occur on an access state change to standby or
 714          * a delete. The writer lock is acquired before closing the
 715          * lu. If importing, reading the metadata is valid, hence
 716          * the check on SL_OP_IMPORT_LU.
 717          */
 718         rw_enter(&sl->sl_access_state_lock, RW_READER);
 719         if ((sl->sl_flags & SL_MEDIA_LOADED) == 0 &&
 720             sl->sl_trans_op != SL_OP_IMPORT_LU) {
 721                 rw_exit(&sl->sl_access_state_lock);
 722                 ret = SBD_FILEIO_FAILURE;
 723                 goto sbd_read_meta_failure;
 724         }
 725         if (sl->sl_flags & SL_ZFS_META) {
 726                 if ((ret = sbd_read_zfs_meta(sl, io_buf, io_size,
 727                     starting_off)) != SBD_SUCCESS) {
 728                         rw_exit(&sl->sl_access_state_lock);
 729                         goto sbd_read_meta_failure;
 730                 }
 731         } else {
 732                 vret = vn_rdwr(UIO_READ, vp, (caddr_t)io_buf, (ssize_t)io_size,
 733                     (offset_t)starting_off, UIO_SYSSPACE, FRSYNC,
 734                     RLIM64_INFINITY, CRED(), &resid);
 735 
 736                 if (vret || resid) {
 737                         ret = SBD_FILEIO_FAILURE | vret;
 738                         rw_exit(&sl->sl_access_state_lock);
 739                         goto sbd_read_meta_failure;
 740                 }
 741         }
 742         rw_exit(&sl->sl_access_state_lock);
 743 
 744         bcopy(io_buf + data_off, buf, size);
 745         ret = SBD_SUCCESS;
 746 
 747 sbd_read_meta_failure:
 748         kmem_free(io_buf, io_size);
 749         return (ret);
 750 }
 751 
 752 sbd_status_t
 753 sbd_write_meta(sbd_lu_t *sl, uint64_t offset, uint64_t size, uint8_t *buf)
 754 {
 755         uint64_t        meta_align;
 756         uint64_t        starting_off;
 757         uint64_t        data_off;
 758         uint64_t        ending_off;
 759         uint64_t        io_size;
 760         uint8_t         *io_buf;
 761         vnode_t         *vp;
 762         sbd_status_t    ret;
 763         ssize_t         resid;
 764         int             vret;
 765 
 766         ASSERT(sl->sl_flags & SL_META_OPENED);
 767         if (sl->sl_flags & SL_SHARED_META) {
 768                 meta_align = (((uint64_t)1) << sl->sl_data_blocksize_shift) - 1;
 769                 vp = sl->sl_data_vp;
 770                 ASSERT(vp);
 771         } else {
 772                 meta_align = (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
 773                 if ((sl->sl_flags & SL_ZFS_META) == 0) {
 774                         vp = sl->sl_meta_vp;
 775                         ASSERT(vp);
 776                 }
 777         }
 778         starting_off = offset & ~(meta_align);
 779         data_off = offset & meta_align;
 780         ending_off = (offset + size + meta_align) & (~meta_align);
 781         io_size = ending_off - starting_off;
 782         io_buf = (uint8_t *)kmem_zalloc(io_size, KM_SLEEP);
 783         ret = sbd_read_meta(sl, starting_off, io_size, io_buf);
 784         if (ret != SBD_SUCCESS) {
 785                 goto sbd_write_meta_failure;
 786         }
 787         bcopy(buf, io_buf + data_off, size);
 788         /*
 789          * Don't proceed if the device has been closed
 790          * This can occur on an access state change to standby or
 791          * a delete. The writer lock is acquired before closing the
 792          * lu. If importing, reading the metadata is valid, hence
 793          * the check on SL_OP_IMPORT_LU.
 794          */
 795         rw_enter(&sl->sl_access_state_lock, RW_READER);
 796         if ((sl->sl_flags & SL_MEDIA_LOADED) == 0 &&
 797             sl->sl_trans_op != SL_OP_IMPORT_LU) {
 798                 rw_exit(&sl->sl_access_state_lock);
 799                 ret = SBD_FILEIO_FAILURE;
 800                 goto sbd_write_meta_failure;
 801         }
 802         if (sl->sl_flags & SL_ZFS_META) {
 803                 if ((ret = sbd_write_zfs_meta(sl, io_buf, io_size,
 804                     starting_off)) != SBD_SUCCESS) {
 805                         rw_exit(&sl->sl_access_state_lock);
 806                         goto sbd_write_meta_failure;
 807                 }
 808         } else {
 809                 vret = vn_rdwr(UIO_WRITE, vp, (caddr_t)io_buf, (ssize_t)io_size,
 810                     (offset_t)starting_off, UIO_SYSSPACE, FDSYNC,
 811                     RLIM64_INFINITY, CRED(), &resid);
 812 
 813                 if (vret || resid) {
 814                         ret = SBD_FILEIO_FAILURE | vret;
 815                         rw_exit(&sl->sl_access_state_lock);
 816                         goto sbd_write_meta_failure;
 817                 }
 818         }
 819         rw_exit(&sl->sl_access_state_lock);
 820 
 821         ret = SBD_SUCCESS;
 822 
 823 sbd_write_meta_failure:
 824         kmem_free(io_buf, io_size);
 825         return (ret);
 826 }
 827 
 828 uint8_t
 829 sbd_calc_sum(uint8_t *buf, int size)
 830 {
 831         uint8_t s = 0;
 832 
 833         while (size > 0)
 834                 s += buf[--size];
 835 
 836         return (s);
 837 }
 838 
 839 uint8_t
 840 sbd_calc_section_sum(sm_section_hdr_t *sm, uint32_t sz)
 841 {
 842         uint8_t s, o;
 843 
 844         o = sm->sms_chksum;
 845         sm->sms_chksum = 0;
 846         s = sbd_calc_sum((uint8_t *)sm, sz);
 847         sm->sms_chksum = o;
 848 
 849         return (s);
 850 }
 851 
 852 uint32_t
 853 sbd_strlen(char *str, uint32_t maxlen)
 854 {
 855         uint32_t i;
 856 
 857         for (i = 0; i < maxlen; i++) {
 858                 if (str[i] == 0)
 859                         return (i);
 860         }
 861         return (i);
 862 }
 863 
 864 void
 865 sbd_swap_meta_start(sbd_meta_start_t *sm)
 866 {
 867         if (sm->sm_magic == SBD_MAGIC)
 868                 return;
 869         sm->sm_magic         = BSWAP_64(sm->sm_magic);
 870         sm->sm_meta_size     = BSWAP_64(sm->sm_meta_size);
 871         sm->sm_meta_size_used        = BSWAP_64(sm->sm_meta_size_used);
 872         sm->sm_ver_major     = BSWAP_16(sm->sm_ver_major);
 873         sm->sm_ver_minor     = BSWAP_16(sm->sm_ver_minor);
 874         sm->sm_ver_subminor  = BSWAP_16(sm->sm_ver_subminor);
 875 }
 876 
 877 void
 878 sbd_swap_section_hdr(sm_section_hdr_t *sm)
 879 {
 880         if (sm->sms_data_order == SMS_DATA_ORDER)
 881                 return;
 882         sm->sms_offset               = BSWAP_64(sm->sms_offset);
 883         sm->sms_size         = BSWAP_32(sm->sms_size);
 884         sm->sms_id           = BSWAP_16(sm->sms_id);
 885         sm->sms_chksum               += SMS_DATA_ORDER - sm->sms_data_order;
 886         sm->sms_data_order   = SMS_DATA_ORDER;
 887 }
 888 
 889 void
 890 sbd_swap_lu_info_1_0(sbd_lu_info_1_0_t *sli)
 891 {
 892         sbd_swap_section_hdr(&sli->sli_sms_header);
 893         if (sli->sli_data_order == SMS_DATA_ORDER)
 894                 return;
 895         sli->sli_sms_header.sms_chksum       += SMS_DATA_ORDER - sli->sli_data_order;
 896         sli->sli_data_order          = SMS_DATA_ORDER;
 897         sli->sli_total_store_size    = BSWAP_64(sli->sli_total_store_size);
 898         sli->sli_total_meta_size     = BSWAP_64(sli->sli_total_meta_size);
 899         sli->sli_lu_data_offset              = BSWAP_64(sli->sli_lu_data_offset);
 900         sli->sli_lu_data_size                = BSWAP_64(sli->sli_lu_data_size);
 901         sli->sli_flags                       = BSWAP_32(sli->sli_flags);
 902         sli->sli_blocksize           = BSWAP_16(sli->sli_blocksize);
 903 }
 904 
 905 void
 906 sbd_swap_lu_info_1_1(sbd_lu_info_1_1_t *sli)
 907 {
 908         sbd_swap_section_hdr(&sli->sli_sms_header);
 909         if (sli->sli_data_order == SMS_DATA_ORDER)
 910                 return;
 911         sli->sli_sms_header.sms_chksum       += SMS_DATA_ORDER - sli->sli_data_order;
 912         sli->sli_data_order          = SMS_DATA_ORDER;
 913         sli->sli_flags                       = BSWAP_32(sli->sli_flags);
 914         sli->sli_lu_size             = BSWAP_64(sli->sli_lu_size);
 915         sli->sli_meta_fname_offset   = BSWAP_64(sli->sli_meta_fname_offset);
 916         sli->sli_data_fname_offset   = BSWAP_64(sli->sli_data_fname_offset);
 917         sli->sli_serial_offset               = BSWAP_64(sli->sli_serial_offset);
 918         sli->sli_alias_offset                = BSWAP_64(sli->sli_alias_offset);
 919         sli->sli_mgmt_url_offset     = BSWAP_64(sli->sli_mgmt_url_offset);
 920 }
 921 
 922 sbd_status_t
 923 sbd_load_section_hdr(sbd_lu_t *sl, sm_section_hdr_t *sms)
 924 {
 925         sm_section_hdr_t        h;
 926         uint64_t                st;
 927         sbd_status_t            ret;
 928 
 929         for (st = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
 930             st < sl->sl_meta_size_used; st += h.sms_size) {
 931                 if ((ret = sbd_read_meta(sl, st, sizeof (sm_section_hdr_t),
 932                     (uint8_t *)&h)) != SBD_SUCCESS) {
 933                         return (ret);
 934                 }
 935                 if (h.sms_data_order != SMS_DATA_ORDER) {
 936                         sbd_swap_section_hdr(&h);
 937                 }
 938                 if ((h.sms_data_order != SMS_DATA_ORDER) ||
 939                     (h.sms_offset != st) || (h.sms_size < sizeof (h)) ||
 940                     ((st + h.sms_size) > sl->sl_meta_size_used)) {
 941                         return (SBD_META_CORRUPTED);
 942                 }
 943                 if (h.sms_id == sms->sms_id) {
 944                         bcopy(&h, sms, sizeof (h));
 945                         return (SBD_SUCCESS);
 946                 }
 947         }
 948 
 949         return (SBD_NOT_FOUND);
 950 }
 951 
 952 sbd_status_t
 953 sbd_load_meta_start(sbd_lu_t *sl)
 954 {
 955         sbd_meta_start_t *sm;
 956         sbd_status_t ret;
 957 
 958         /* Fake meta params initially */
 959         sl->sl_total_meta_size = (uint64_t)-1;
 960         sl->sl_meta_size_used = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
 961 
 962         sm = kmem_zalloc(sizeof (*sm), KM_SLEEP);
 963         ret = sbd_read_meta(sl, sl->sl_meta_offset, sizeof (*sm),
 964             (uint8_t *)sm);
 965         if (ret != SBD_SUCCESS) {
 966                 goto load_meta_start_failed;
 967         }
 968 
 969         if (sm->sm_magic != SBD_MAGIC) {
 970                 sbd_swap_meta_start(sm);
 971         }
 972 
 973         if ((sm->sm_magic != SBD_MAGIC) || (sbd_calc_sum((uint8_t *)sm,
 974             sizeof (*sm) - 1) != sm->sm_chksum)) {
 975                 ret = SBD_META_CORRUPTED;
 976                 goto load_meta_start_failed;
 977         }
 978 
 979         if (sm->sm_ver_major != SBD_VER_MAJOR) {
 980                 ret = SBD_NOT_SUPPORTED;
 981                 goto load_meta_start_failed;
 982         }
 983 
 984         sl->sl_total_meta_size = sm->sm_meta_size;
 985         sl->sl_meta_size_used = sm->sm_meta_size_used;
 986         ret = SBD_SUCCESS;
 987 
 988 load_meta_start_failed:
 989         kmem_free(sm, sizeof (*sm));
 990         return (ret);
 991 }
 992 
 993 sbd_status_t
 994 sbd_write_meta_start(sbd_lu_t *sl, uint64_t meta_size, uint64_t meta_size_used)
 995 {
 996         sbd_meta_start_t *sm;
 997         sbd_status_t ret;
 998 
 999         sm = (sbd_meta_start_t *)kmem_zalloc(sizeof (sbd_meta_start_t),
1000             KM_SLEEP);
1001 
1002         sm->sm_magic = SBD_MAGIC;
1003         sm->sm_meta_size = meta_size;
1004         sm->sm_meta_size_used = meta_size_used;
1005         sm->sm_ver_major = SBD_VER_MAJOR;
1006         sm->sm_ver_minor = SBD_VER_MINOR;
1007         sm->sm_ver_subminor = SBD_VER_SUBMINOR;
1008         sm->sm_chksum = sbd_calc_sum((uint8_t *)sm, sizeof (*sm) - 1);
1009 
1010         ret = sbd_write_meta(sl, sl->sl_meta_offset, sizeof (*sm),
1011             (uint8_t *)sm);
1012         kmem_free(sm, sizeof (*sm));
1013 
1014         return (ret);
1015 }
1016 
1017 sbd_status_t
1018 sbd_read_meta_section(sbd_lu_t *sl, sm_section_hdr_t **ppsms, uint16_t sms_id)
1019 {
1020         sbd_status_t ret;
1021         sm_section_hdr_t sms;
1022         int alloced = 0;
1023 
1024         mutex_enter(&sl->sl_metadata_lock);
1025         if (((*ppsms) == NULL) || ((*ppsms)->sms_offset == 0)) {
1026                 bzero(&sms, sizeof (sm_section_hdr_t));
1027                 sms.sms_id = sms_id;
1028                 if ((ret = sbd_load_section_hdr(sl, &sms)) != SBD_SUCCESS) {
1029                         mutex_exit(&sl->sl_metadata_lock);
1030                         return (ret);
1031                 } else {
1032                         if ((*ppsms) == NULL) {
1033                                 *ppsms = (sm_section_hdr_t *)kmem_zalloc(
1034                                     sms.sms_size, KM_SLEEP);
1035                                 alloced = 1;
1036                         }
1037                         bcopy(&sms, *ppsms, sizeof (sm_section_hdr_t));
1038                 }
1039         }
1040 
1041         ret = sbd_read_meta(sl, (*ppsms)->sms_offset, (*ppsms)->sms_size,
1042             (uint8_t *)(*ppsms));
1043         if (ret == SBD_SUCCESS) {
1044                 uint8_t s;
1045                 if ((*ppsms)->sms_data_order != SMS_DATA_ORDER)
1046                         sbd_swap_section_hdr(*ppsms);
1047                 if ((*ppsms)->sms_id != SMS_ID_UNUSED) {
1048                         s = sbd_calc_section_sum(*ppsms, (*ppsms)->sms_size);
1049                         if (s != (*ppsms)->sms_chksum)
1050                                 ret = SBD_META_CORRUPTED;
1051                 }
1052         }
1053         mutex_exit(&sl->sl_metadata_lock);
1054 
1055         if ((ret != SBD_SUCCESS) && alloced)
1056                 kmem_free(*ppsms, sms.sms_size);
1057         return (ret);
1058 }
1059 
1060 sbd_status_t
1061 sbd_load_section_hdr_unbuffered(sbd_lu_t *sl, sm_section_hdr_t *sms)
1062 {
1063         sbd_status_t    ret;
1064 
1065         /*
1066          * Bypass buffering and re-read the meta data from permanent storage.
1067          */
1068         if (sl->sl_flags & SL_ZFS_META) {
1069                 if ((ret = sbd_open_zfs_meta(sl)) != SBD_SUCCESS) {
1070                         return (ret);
1071                 }
1072         }
1073         /* Re-get the meta sizes into sl */
1074         if ((ret = sbd_load_meta_start(sl)) != SBD_SUCCESS) {
1075                 return (ret);
1076         }
1077         return (sbd_load_section_hdr(sl, sms));
1078 }
1079 
1080 sbd_status_t
1081 sbd_write_meta_section(sbd_lu_t *sl, sm_section_hdr_t *sms)
1082 {
1083         sm_section_hdr_t t;
1084         uint64_t off, s;
1085         uint64_t unused_start;
1086         sbd_status_t ret;
1087         sbd_status_t write_meta_ret = SBD_SUCCESS;
1088         uint8_t *cb;
1089         int meta_size_changed = 0;
1090         sm_section_hdr_t sms_before_unused = {0};
1091 
1092         mutex_enter(&sl->sl_metadata_lock);
1093 write_meta_section_again:
1094         if (sms->sms_offset) {
1095                 /*
1096                  * If the section already exists and the size is the
1097                  * same as this new data then overwrite in place. If
1098                  * the sizes are different then mark the existing as
1099                  * unused and look for free space.
1100                  */
1101                 ret = sbd_read_meta(sl, sms->sms_offset, sizeof (t),
1102                     (uint8_t *)&t);
1103                 if (ret != SBD_SUCCESS) {
1104                         mutex_exit(&sl->sl_metadata_lock);
1105                         return (ret);
1106                 }
1107                 if (t.sms_data_order != SMS_DATA_ORDER) {
1108                         sbd_swap_section_hdr(&t);
1109                 }
1110                 if (t.sms_id != sms->sms_id) {
1111                         mutex_exit(&sl->sl_metadata_lock);
1112                         return (SBD_INVALID_ARG);
1113                 }
1114                 if (t.sms_size == sms->sms_size) {
1115                         ret = sbd_write_meta(sl, sms->sms_offset,
1116                             sms->sms_size, (uint8_t *)sms);
1117                         mutex_exit(&sl->sl_metadata_lock);
1118                         return (ret);
1119                 }
1120                 sms_before_unused = t;
1121 
1122                 t.sms_id = SMS_ID_UNUSED;
1123                 /*
1124                  * For unused sections we only use chksum of the header. for
1125                  * all other sections, the chksum is for the entire section.
1126                  */
1127                 t.sms_chksum = sbd_calc_section_sum(&t, sizeof (t));
1128                 ret = sbd_write_meta(sl, t.sms_offset, sizeof (t),
1129                     (uint8_t *)&t);
1130                 if (ret != SBD_SUCCESS) {
1131                         mutex_exit(&sl->sl_metadata_lock);
1132                         return (ret);
1133                 }
1134                 sms->sms_offset = 0;
1135         } else {
1136                 /* Section location is unknown, search for it. */
1137                 t.sms_id = sms->sms_id;
1138                 t.sms_data_order = SMS_DATA_ORDER;
1139                 ret = sbd_load_section_hdr(sl, &t);
1140                 if (ret == SBD_SUCCESS) {
1141                         sms->sms_offset = t.sms_offset;
1142                         sms->sms_chksum =
1143                             sbd_calc_section_sum(sms, sms->sms_size);
1144                         goto write_meta_section_again;
1145                 } else if (ret != SBD_NOT_FOUND) {
1146                         mutex_exit(&sl->sl_metadata_lock);
1147                         return (ret);
1148                 }
1149         }
1150 
1151         /*
1152          * At this point we know that section does not already exist.
1153          * Find space large enough to hold the section or grow meta if
1154          * possible.
1155          */
1156         unused_start = 0;
1157         s = 0;  /* size of space found */
1158 
1159         /*
1160          * Search all sections for unused space of sufficient size.
1161          * The first one found is taken. Contiguous unused sections
1162          * will be combined.
1163          */
1164         for (off = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
1165             off < sl->sl_meta_size_used; off += t.sms_size) {
1166                 ret = sbd_read_meta(sl, off, sizeof (t), (uint8_t *)&t);
1167                 if (ret != SBD_SUCCESS) {
1168                         mutex_exit(&sl->sl_metadata_lock);
1169                         return (ret);
1170                 }
1171                 if (t.sms_data_order != SMS_DATA_ORDER)
1172                         sbd_swap_section_hdr(&t);
1173                 if (t.sms_size == 0) {
1174                         mutex_exit(&sl->sl_metadata_lock);
1175                         return (SBD_META_CORRUPTED);
1176                 }
1177                 if (t.sms_id == SMS_ID_UNUSED) {
1178                         if (unused_start == 0)
1179                                 unused_start = off;
1180                         /*
1181                          * Calculate size of the unused space, break out
1182                          * if it satisfies the requirement.
1183                          */
1184                         s = t.sms_size - unused_start + off;
1185                         if ((s == sms->sms_size) || (s >= (sms->sms_size +
1186                             sizeof (t)))) {
1187                                 break;
1188                         } else {
1189                                 s = 0;
1190                         }
1191                 } else {
1192                         unused_start = 0;
1193                 }
1194         }
1195 
1196         off = (unused_start == 0) ? sl->sl_meta_size_used : unused_start;
1197         /*
1198          * If none found, how much room is at the end?
1199          * See if the data can be expanded.
1200          */
1201         if (s == 0) {
1202                 s = sl->sl_total_meta_size - off;
1203                 if (s >= sms->sms_size || !(sl->sl_flags & SL_SHARED_META)) {
1204                         s = sms->sms_size;
1205                         meta_size_changed = 1;
1206                 } else {
1207                         s = 0;
1208                 }
1209         }
1210 
1211         if (s == 0) {
1212                 mutex_exit(&sl->sl_metadata_lock);
1213                 return (SBD_ALLOC_FAILURE);
1214         }
1215 
1216         sms->sms_offset = off;
1217         sms->sms_chksum = sbd_calc_section_sum(sms, sms->sms_size);
1218         /*
1219          * Since we may have to write more than one section (current +
1220          * any unused), use a combined buffer.
1221          */
1222         cb = kmem_zalloc(s, KM_SLEEP);
1223         bcopy(sms, cb, sms->sms_size);
1224         if (s > sms->sms_size) {
1225                 t.sms_offset = off + sms->sms_size;
1226                 t.sms_size = s - sms->sms_size;
1227                 t.sms_id = SMS_ID_UNUSED;
1228                 t.sms_data_order = SMS_DATA_ORDER;
1229                 t.sms_chksum = sbd_calc_section_sum(&t, sizeof (t));
1230                 bcopy(&t, cb + sms->sms_size, sizeof (t));
1231         }
1232         /*
1233          * Two write events & statuses take place. Failure writing the
1234          * meta section takes precedence, can possibly be rolled back,
1235          * & gets reported. Else return status from writing the meta start.
1236          */
1237         ret = SBD_SUCCESS; /* Set a default, it's not always loaded below. */
1238         if (meta_size_changed) {
1239                 uint64_t old_meta_size;
1240                 uint64_t old_sz_used = sl->sl_meta_size_used; /* save a copy */
1241                 old_meta_size = sl->sl_total_meta_size; /* save a copy */
1242 
1243                 write_meta_ret = sbd_write_meta(sl, off, s, cb);
1244                 if (write_meta_ret == SBD_SUCCESS) {
1245                         sl->sl_meta_size_used = off + s;
1246                         if (sl->sl_total_meta_size < sl->sl_meta_size_used) {
1247                                 uint64_t meta_align =
1248                                     (((uint64_t)1) <<
1249                                     sl->sl_meta_blocksize_shift) - 1;
1250                                 sl->sl_total_meta_size =
1251                                     (sl->sl_meta_size_used + meta_align) &
1252                                     (~meta_align);
1253                         }
1254                         ret = sbd_write_meta_start(sl, sl->sl_total_meta_size,
1255                             sl->sl_meta_size_used);
1256                         if (ret != SBD_SUCCESS) {
1257                                 sl->sl_meta_size_used = old_sz_used;
1258                                 sl->sl_total_meta_size = old_meta_size;
1259                         }
1260                 } else {
1261                         sl->sl_meta_size_used = old_sz_used;
1262                         sl->sl_total_meta_size = old_meta_size;
1263                 }
1264         } else {
1265                 write_meta_ret = sbd_write_meta(sl, off, s, cb);
1266         }
1267         if ((write_meta_ret != SBD_SUCCESS) &&
1268             (sms_before_unused.sms_offset != 0)) {
1269                 sm_section_hdr_t new_sms;
1270                 sm_section_hdr_t *unused_sms;
1271                 /*
1272                  * On failure writing the meta section attempt to undo
1273                  * the change to unused.
1274                  * Re-read the meta data from permanent storage.
1275                  * The section id can't exist for undo to be possible.
1276                  * Read what should be the entire old section data and
1277                  * insure the old data's still present by validating
1278                  * against it's old checksum.
1279                  */
1280                 new_sms.sms_id = sms->sms_id;
1281                 new_sms.sms_data_order = SMS_DATA_ORDER;
1282                 if (sbd_load_section_hdr_unbuffered(sl, &new_sms) !=
1283                     SBD_NOT_FOUND) {
1284                         goto done;
1285                 }
1286                 unused_sms = kmem_zalloc(sms_before_unused.sms_size, KM_SLEEP);
1287                 if (sbd_read_meta(sl, sms_before_unused.sms_offset,
1288                     sms_before_unused.sms_size,
1289                     (uint8_t *)unused_sms) != SBD_SUCCESS) {
1290                         goto done;
1291                 }
1292                 if (unused_sms->sms_data_order != SMS_DATA_ORDER) {
1293                         sbd_swap_section_hdr(unused_sms);
1294                 }
1295                 if (unused_sms->sms_id != SMS_ID_UNUSED) {
1296                         goto done;
1297                 }
1298                 if (unused_sms->sms_offset != sms_before_unused.sms_offset) {
1299                         goto done;
1300                 }
1301                 if (unused_sms->sms_size != sms_before_unused.sms_size) {
1302                         goto done;
1303                 }
1304                 unused_sms->sms_id = sms_before_unused.sms_id;
1305                 if (sbd_calc_section_sum(unused_sms,
1306                     sizeof (sm_section_hdr_t)) !=
1307                     sbd_calc_section_sum(&sms_before_unused,
1308                     sizeof (sm_section_hdr_t))) {
1309                         goto done;
1310                 }
1311                 unused_sms->sms_chksum =
1312                     sbd_calc_section_sum(unused_sms, unused_sms->sms_size);
1313                 if (unused_sms->sms_chksum != sms_before_unused.sms_chksum) {
1314                         goto done;
1315                 }
1316                 (void) sbd_write_meta(sl, unused_sms->sms_offset,
1317                     sizeof (sm_section_hdr_t), (uint8_t *)unused_sms);
1318         }
1319 done:
1320         mutex_exit(&sl->sl_metadata_lock);
1321         kmem_free(cb, s);
1322         if (write_meta_ret != SBD_SUCCESS) {
1323                 return (write_meta_ret);
1324         }
1325         return (ret);
1326 }
1327 
1328 sbd_status_t
1329 sbd_write_lu_info(sbd_lu_t *sl)
1330 {
1331         sbd_lu_info_1_1_t *sli;
1332         int s;
1333         uint8_t *p;
1334         char *zvol_name = NULL;
1335         sbd_status_t ret;
1336 
1337         mutex_enter(&sl->sl_lock);
1338 
1339         s = sl->sl_serial_no_size;
1340         if ((sl->sl_flags & (SL_SHARED_META | SL_ZFS_META)) == 0) {
1341                 if (sl->sl_data_filename) {
1342                         s += strlen(sl->sl_data_filename) + 1;
1343                 }
1344         }
1345         if (sl->sl_flags & SL_ZFS_META) {
1346                 zvol_name = sbd_get_zvol_name(sl);
1347                 s += strlen(zvol_name) + 1;
1348         }
1349         if (sl->sl_alias) {
1350                 s += strlen(sl->sl_alias) + 1;
1351         }
1352         if (sl->sl_mgmt_url) {
1353                 s += strlen(sl->sl_mgmt_url) + 1;
1354         }
1355         sli = (sbd_lu_info_1_1_t *)kmem_zalloc(sizeof (*sli) + s, KM_SLEEP);
1356         p = sli->sli_buf;
1357         if ((sl->sl_flags & (SL_SHARED_META | SL_ZFS_META)) == 0) {
1358                 sli->sli_flags |= SLI_SEPARATE_META;
1359                 (void) strcpy((char *)p, sl->sl_data_filename);
1360                 sli->sli_data_fname_offset =
1361                     (uintptr_t)p - (uintptr_t)sli->sli_buf;
1362                 sli->sli_flags |= SLI_DATA_FNAME_VALID;
1363                 p += strlen(sl->sl_data_filename) + 1;
1364         }
1365         if (sl->sl_flags & SL_ZFS_META) {
1366                 (void) strcpy((char *)p, zvol_name);
1367                 sli->sli_meta_fname_offset =
1368                     (uintptr_t)p - (uintptr_t)sli->sli_buf;
1369                 sli->sli_flags |= SLI_META_FNAME_VALID | SLI_ZFS_META;
1370                 p += strlen(zvol_name) + 1;
1371                 kmem_free(zvol_name, strlen(zvol_name) + 1);
1372                 zvol_name = NULL;
1373         }
1374         if (sl->sl_alias) {
1375                 (void) strcpy((char *)p, sl->sl_alias);
1376                 sli->sli_alias_offset =
1377                     (uintptr_t)p - (uintptr_t)sli->sli_buf;
1378                 sli->sli_flags |= SLI_ALIAS_VALID;
1379                 p += strlen(sl->sl_alias) + 1;
1380         }
1381         if (sl->sl_mgmt_url) {
1382                 (void) strcpy((char *)p, sl->sl_mgmt_url);
1383                 sli->sli_mgmt_url_offset =
1384                     (uintptr_t)p - (uintptr_t)sli->sli_buf;
1385                 sli->sli_flags |= SLI_MGMT_URL_VALID;
1386                 p += strlen(sl->sl_mgmt_url) + 1;
1387         }
1388         if (sl->sl_flags & SL_WRITE_PROTECTED) {
1389                 sli->sli_flags |= SLI_WRITE_PROTECTED;
1390         }
1391         if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
1392                 sli->sli_flags |= SLI_WRITEBACK_CACHE_DISABLE;
1393         }
1394         if (sl->sl_flags & SL_VID_VALID) {
1395                 bcopy(sl->sl_vendor_id, sli->sli_vid, 8);
1396                 sli->sli_flags |= SLI_VID_VALID;
1397         }
1398         if (sl->sl_flags & SL_PID_VALID) {
1399                 bcopy(sl->sl_product_id, sli->sli_pid, 16);
1400                 sli->sli_flags |= SLI_PID_VALID;
1401         }
1402         if (sl->sl_flags & SL_REV_VALID) {
1403                 bcopy(sl->sl_revision, sli->sli_rev, 4);
1404                 sli->sli_flags |= SLI_REV_VALID;
1405         }
1406         if (sl->sl_serial_no_size) {
1407                 bcopy(sl->sl_serial_no, p, sl->sl_serial_no_size);
1408                 sli->sli_serial_size = sl->sl_serial_no_size;
1409                 sli->sli_serial_offset =
1410                     (uintptr_t)p - (uintptr_t)sli->sli_buf;
1411                 sli->sli_flags |= SLI_SERIAL_VALID;
1412                 p += sli->sli_serial_size;
1413         }
1414         sli->sli_lu_size = sl->sl_lu_size;
1415         sli->sli_data_blocksize_shift = sl->sl_data_blocksize_shift;
1416         sli->sli_data_order = SMS_DATA_ORDER;
1417         bcopy(sl->sl_device_id, sli->sli_device_id, 20);
1418 
1419         sli->sli_sms_header.sms_size = sizeof (*sli) + s;
1420         sli->sli_sms_header.sms_id = SMS_ID_LU_INFO_1_1;
1421         sli->sli_sms_header.sms_data_order = SMS_DATA_ORDER;
1422 
1423         mutex_exit(&sl->sl_lock);
1424         ret = sbd_write_meta_section(sl, (sm_section_hdr_t *)sli);
1425         kmem_free(sli, sizeof (*sli) + s);
1426         return (ret);
1427 }
1428 
1429 /*
1430  * Will scribble SL_UNMAP_ENABLED into sl_flags if we succeed.
1431  */
1432 static void
1433 do_unmap_setup(sbd_lu_t *sl)
1434 {
1435         if (sbd_unmap_enable == 0) {
1436                 sl->sl_flags &= ~(SL_UNMAP_ENABLED);
1437                 return;
1438         }
1439 
1440         if ((sl->sl_flags & SL_ZFS_META) == 0)
1441                 return; /* No UNMAP for you. */
1442 
1443         sl->sl_flags |= SL_UNMAP_ENABLED;
1444 }
1445 
1446 int
1447 sbd_populate_and_register_lu(sbd_lu_t *sl, uint32_t *err_ret)
1448 {
1449         stmf_lu_t *lu = sl->sl_lu;
1450         stmf_status_t ret;
1451 
1452         do_unmap_setup(sl);
1453 
1454         lu->lu_id = (scsi_devid_desc_t *)sl->sl_device_id;
1455         if (sl->sl_alias) {
1456                 lu->lu_alias = sl->sl_alias;
1457         } else {
1458                 lu->lu_alias = sl->sl_name;
1459         }
1460         if (sl->sl_access_state == SBD_LU_STANDBY) {
1461                 /* call set access state */
1462                 ret = stmf_set_lu_access(lu, STMF_LU_STANDBY);
1463                 if (ret != STMF_SUCCESS) {
1464                         *err_ret = SBD_RET_ACCESS_STATE_FAILED;
1465                         return (EIO);
1466                 }
1467         }
1468         /* set proxy_reg_cb_arg to meta filename */
1469         if (sl->sl_meta_filename) {
1470                 lu->lu_proxy_reg_arg = sl->sl_meta_filename;
1471                 lu->lu_proxy_reg_arg_len = strlen(sl->sl_meta_filename) + 1;
1472         } else {
1473                 lu->lu_proxy_reg_arg = sl->sl_data_filename;
1474                 lu->lu_proxy_reg_arg_len = strlen(sl->sl_data_filename) + 1;
1475         }
1476         lu->lu_lp = sbd_lp;
1477         lu->lu_task_alloc = sbd_task_alloc;
1478         lu->lu_new_task = sbd_new_task;
1479         lu->lu_dbuf_xfer_done = sbd_dbuf_xfer_done;
1480         lu->lu_send_status_done = sbd_send_status_done;
1481         lu->lu_task_free = sbd_task_free;
1482         lu->lu_abort = sbd_abort;
1483         lu->lu_task_poll = sbd_task_poll;
1484         lu->lu_dbuf_free = sbd_dbuf_free;
1485         lu->lu_ctl = sbd_ctl;
1486         lu->lu_task_done = sbd_ats_remove_by_task;
1487         lu->lu_info = sbd_info;
1488         sl->sl_state = STMF_STATE_OFFLINE;
1489 
1490         if ((ret = stmf_register_lu(lu)) != STMF_SUCCESS) {
1491                 stmf_trace(0, "Failed to register with framework, ret=%llx",
1492                     ret);
1493                 if (ret == STMF_ALREADY) {
1494                         *err_ret = SBD_RET_GUID_ALREADY_REGISTERED;
1495                 }
1496                 return (EIO);
1497         }
1498 
1499         /*
1500          * setup the ATS (compare and write) lists to handle multiple
1501          * ATS commands simultaneously
1502          */
1503         list_create(&sl->sl_ats_io_list, sizeof (ats_state_t),
1504             offsetof(ats_state_t, as_next));
1505         *err_ret = 0;
1506         return (0);
1507 }
1508 
1509 int
1510 sbd_open_data_file(sbd_lu_t *sl, uint32_t *err_ret, int lu_size_valid,
1511     int vp_valid, int keep_open)
1512 {
1513         int ret;
1514         int flag;
1515         ulong_t nbits;
1516         uint64_t supported_size;
1517         vattr_t vattr;
1518         enum vtype vt;
1519         struct dk_cinfo dki;
1520         int unused;
1521 
1522         mutex_enter(&sl->sl_lock);
1523         if (vp_valid) {
1524                 goto odf_over_open;
1525         }
1526         if (sl->sl_data_filename[0] != '/') {
1527                 *err_ret = SBD_RET_DATA_PATH_NOT_ABSOLUTE;
1528                 mutex_exit(&sl->sl_lock);
1529                 return (EINVAL);
1530         }
1531         if ((ret = lookupname(sl->sl_data_filename, UIO_SYSSPACE, FOLLOW,
1532             NULLVPP, &sl->sl_data_vp)) != 0) {
1533                 *err_ret = SBD_RET_DATA_FILE_LOOKUP_FAILED;
1534                 mutex_exit(&sl->sl_lock);
1535                 return (ret);
1536         }
1537         sl->sl_data_vtype = vt = sl->sl_data_vp->v_type;
1538         VN_RELE(sl->sl_data_vp);
1539         if ((vt != VREG) && (vt != VCHR) && (vt != VBLK)) {
1540                 *err_ret = SBD_RET_WRONG_DATA_FILE_TYPE;
1541                 mutex_exit(&sl->sl_lock);
1542                 return (EINVAL);
1543         }
1544         if (sl->sl_flags & SL_WRITE_PROTECTED) {
1545                 flag = FREAD | FOFFMAX;
1546         } else {
1547                 flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1548         }
1549         if ((ret = vn_open(sl->sl_data_filename, UIO_SYSSPACE, flag, 0,
1550             &sl->sl_data_vp, 0, 0)) != 0) {
1551                 *err_ret = SBD_RET_DATA_FILE_OPEN_FAILED;
1552                 mutex_exit(&sl->sl_lock);
1553                 return (ret);
1554         }
1555 odf_over_open:
1556         vattr.va_mask = AT_SIZE;
1557         if ((ret = VOP_GETATTR(sl->sl_data_vp, &vattr, 0, CRED(), NULL)) != 0) {
1558                 *err_ret = SBD_RET_DATA_FILE_GETATTR_FAILED;
1559                 goto odf_close_data_and_exit;
1560         }
1561         if ((vt != VREG) && (vattr.va_size == 0)) {
1562                 /*
1563                  * Its a zero byte block or char device. This cannot be
1564                  * a raw disk.
1565                  */
1566                 *err_ret = SBD_RET_WRONG_DATA_FILE_TYPE;
1567                 ret = EINVAL;
1568                 goto odf_close_data_and_exit;
1569         }
1570         /* sl_data_readable size includes any metadata. */
1571         sl->sl_data_readable_size = vattr.va_size;
1572 
1573         if (VOP_PATHCONF(sl->sl_data_vp, _PC_FILESIZEBITS, &nbits,
1574             CRED(), NULL) != 0) {
1575                 nbits = 0;
1576         }
1577         /* nbits cannot be greater than 64 */
1578         sl->sl_data_fs_nbits = (uint8_t)nbits;
1579         if (lu_size_valid) {
1580                 sl->sl_total_data_size = sl->sl_lu_size;
1581                 if (sl->sl_flags & SL_SHARED_META) {
1582                         sl->sl_total_data_size += SHARED_META_DATA_SIZE;
1583                 }
1584                 if ((nbits > 0) && (nbits < 64)) {
1585                         /*
1586                          * The expression below is correct only if nbits is
1587                          * positive and less than 64.
1588                          */
1589                         supported_size = (((uint64_t)1) << nbits) - 1;
1590                         if (sl->sl_total_data_size > supported_size) {
1591                                 *err_ret = SBD_RET_SIZE_NOT_SUPPORTED_BY_FS;
1592                                 ret = EINVAL;
1593                                 goto odf_close_data_and_exit;
1594                         }
1595                 }
1596         } else {
1597                 sl->sl_total_data_size = vattr.va_size;
1598                 if (sl->sl_flags & SL_SHARED_META) {
1599                         if (vattr.va_size > SHARED_META_DATA_SIZE) {
1600                                 sl->sl_lu_size = vattr.va_size -
1601                                     SHARED_META_DATA_SIZE;
1602                         } else {
1603                                 *err_ret = SBD_RET_FILE_SIZE_ERROR;
1604                                 ret = EINVAL;
1605                                 goto odf_close_data_and_exit;
1606                         }
1607                 } else {
1608                         sl->sl_lu_size = vattr.va_size;
1609                 }
1610         }
1611 
1612         if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
1613                 *err_ret = SBD_RET_FILE_SIZE_ERROR;
1614                 ret = EINVAL;
1615                 goto odf_close_data_and_exit;
1616         }
1617         if (sl->sl_lu_size &
1618             ((((uint64_t)1) << sl->sl_data_blocksize_shift) - 1)) {
1619                 *err_ret = SBD_RET_FILE_ALIGN_ERROR;
1620                 ret = EINVAL;
1621                 goto odf_close_data_and_exit;
1622         }
1623         /*
1624          * Get the minor device for direct zvol access
1625          */
1626         if (sl->sl_flags & SL_ZFS_META) {
1627                 if ((ret = VOP_IOCTL(sl->sl_data_vp, DKIOCINFO, (intptr_t)&dki,
1628                     FKIOCTL, kcred, &unused, NULL)) != 0) {
1629                         cmn_err(CE_WARN, "ioctl(DKIOCINFO) failed %d", ret);
1630                         /* zvol reserves 0, so this would fail later */
1631                         sl->sl_zvol_minor = 0;
1632                 } else {
1633                         sl->sl_zvol_minor = dki.dki_unit;
1634                         if (sbd_zvol_get_volume_params(sl) == 0)
1635                                 sl->sl_flags |= SL_CALL_ZVOL;
1636                 }
1637         }
1638         sl->sl_flags |= SL_MEDIA_LOADED;
1639         mutex_exit(&sl->sl_lock);
1640         return (0);
1641 
1642 odf_close_data_and_exit:
1643         if (!keep_open) {
1644                 (void) VOP_CLOSE(sl->sl_data_vp, flag, 1, 0, CRED(), NULL);
1645                 VN_RELE(sl->sl_data_vp);
1646         }
1647         mutex_exit(&sl->sl_lock);
1648         return (ret);
1649 }
1650 
1651 void
1652 sbd_close_lu(sbd_lu_t *sl)
1653 {
1654         int flag;
1655 
1656         if (((sl->sl_flags & SL_SHARED_META) == 0) &&
1657             (sl->sl_flags & SL_META_OPENED)) {
1658                 if (sl->sl_flags & SL_ZFS_META) {
1659                         rw_destroy(&sl->sl_zfs_meta_lock);
1660                         if (sl->sl_zfs_meta) {
1661                                 kmem_free(sl->sl_zfs_meta, ZAP_MAXVALUELEN / 2);
1662                                 sl->sl_zfs_meta = NULL;
1663                         }
1664                 } else {
1665                         flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1666                         (void) VOP_CLOSE(sl->sl_meta_vp, flag, 1, 0,
1667                             CRED(), NULL);
1668                         VN_RELE(sl->sl_meta_vp);
1669                 }
1670                 sl->sl_flags &= ~SL_META_OPENED;
1671         }
1672         if (sl->sl_flags & SL_MEDIA_LOADED) {
1673                 if (sl->sl_flags & SL_WRITE_PROTECTED) {
1674                         flag = FREAD | FOFFMAX;
1675                 } else {
1676                         flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1677                 }
1678                 (void) VOP_CLOSE(sl->sl_data_vp, flag, 1, 0, CRED(), NULL);
1679                 VN_RELE(sl->sl_data_vp);
1680                 sl->sl_flags &= ~SL_MEDIA_LOADED;
1681                 if (sl->sl_flags & SL_SHARED_META) {
1682                         sl->sl_flags &= ~SL_META_OPENED;
1683                 }
1684         }
1685 }
1686 
1687 int
1688 sbd_set_lu_standby(sbd_set_lu_standby_t *stlu, uint32_t *err_ret)
1689 {
1690         sbd_lu_t *sl;
1691         sbd_status_t sret;
1692         stmf_status_t stret;
1693         uint8_t old_access_state;
1694 
1695         sret = sbd_find_and_lock_lu(stlu->stlu_guid, NULL,
1696             SL_OP_MODIFY_LU, &sl);
1697         if (sret != SBD_SUCCESS) {
1698                 if (sret == SBD_BUSY) {
1699                         *err_ret = SBD_RET_LU_BUSY;
1700                         return (EBUSY);
1701                 } else if (sret == SBD_NOT_FOUND) {
1702                         *err_ret = SBD_RET_NOT_FOUND;
1703                         return (ENOENT);
1704                 }
1705                 *err_ret = SBD_RET_ACCESS_STATE_FAILED;
1706                 return (EIO);
1707         }
1708 
1709         old_access_state = sl->sl_access_state;
1710         sl->sl_access_state = SBD_LU_TRANSITION_TO_STANDBY;
1711         stret = stmf_set_lu_access((stmf_lu_t *)sl->sl_lu, STMF_LU_STANDBY);
1712         if (stret != STMF_SUCCESS) {
1713                 sl->sl_trans_op = SL_OP_NONE;
1714                 *err_ret = SBD_RET_ACCESS_STATE_FAILED;
1715                 sl->sl_access_state = old_access_state;
1716                 return (EIO);
1717         }
1718 
1719         /*
1720          * acquire the writer lock here to ensure we're not pulling
1721          * the rug from the vn_rdwr to the backing store
1722          */
1723         rw_enter(&sl->sl_access_state_lock, RW_WRITER);
1724         sbd_close_lu(sl);
1725         rw_exit(&sl->sl_access_state_lock);
1726 
1727         sl->sl_trans_op = SL_OP_NONE;
1728         return (0);
1729 }
1730 
1731 int
1732 sbd_close_delete_lu(sbd_lu_t *sl, int ret)
1733 {
1734 
1735         /*
1736          * acquire the writer lock here to ensure we're not pulling
1737          * the rug from the vn_rdwr to the backing store
1738          */
1739         rw_enter(&sl->sl_access_state_lock, RW_WRITER);
1740         sbd_close_lu(sl);
1741         rw_exit(&sl->sl_access_state_lock);
1742 
1743         if (sl->sl_flags & SL_LINKED)
1744                 sbd_unlink_lu(sl);
1745         mutex_destroy(&sl->sl_metadata_lock);
1746         mutex_destroy(&sl->sl_lock);
1747         rw_destroy(&sl->sl_pgr->pgr_lock);
1748         rw_destroy(&sl->sl_access_state_lock);
1749         if (sl->sl_serial_no_alloc_size) {
1750                 kmem_free(sl->sl_serial_no, sl->sl_serial_no_alloc_size);
1751         }
1752         if (sl->sl_data_fname_alloc_size) {
1753                 kmem_free(sl->sl_data_filename, sl->sl_data_fname_alloc_size);
1754         }
1755         if (sl->sl_alias_alloc_size) {
1756                 kmem_free(sl->sl_alias, sl->sl_alias_alloc_size);
1757         }
1758         if (sl->sl_mgmt_url_alloc_size) {
1759                 kmem_free(sl->sl_mgmt_url, sl->sl_mgmt_url_alloc_size);
1760         }
1761         stmf_free(sl->sl_lu);
1762         return (ret);
1763 }
1764 
1765 int
1766 sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz,
1767     uint32_t *err_ret)
1768 {
1769         char *namebuf;
1770         sbd_lu_t *sl;
1771         stmf_lu_t *lu;
1772         char *p;
1773         int sz;
1774         int alloc_sz;
1775         int ret = EIO;
1776         int flag;
1777         int wcd = 0;
1778         uint32_t hid = 0;
1779         enum vtype vt;
1780 
1781         sz = struct_sz - sizeof (sbd_create_and_reg_lu_t) + 8 + 1;
1782 
1783         *err_ret = 0;
1784 
1785         /* Lets validate various offsets */
1786         if (((slu->slu_meta_fname_valid) &&
1787             (slu->slu_meta_fname_off >= sz)) ||
1788             (slu->slu_data_fname_off >= sz) ||
1789             ((slu->slu_alias_valid) &&
1790             (slu->slu_alias_off >= sz)) ||
1791             ((slu->slu_mgmt_url_valid) &&
1792             (slu->slu_mgmt_url_off >= sz)) ||
1793             ((slu->slu_serial_valid) &&
1794             ((slu->slu_serial_off + slu->slu_serial_size) >= sz))) {
1795                 return (EINVAL);
1796         }
1797 
1798         namebuf = kmem_zalloc(sz, KM_SLEEP);
1799         bcopy(slu->slu_buf, namebuf, sz - 1);
1800         namebuf[sz - 1] = 0;
1801 
1802         alloc_sz = sizeof (sbd_lu_t) + sizeof (sbd_pgr_t);
1803         if (slu->slu_meta_fname_valid) {
1804                 alloc_sz += strlen(namebuf + slu->slu_meta_fname_off) + 1;
1805         }
1806         alloc_sz += strlen(namebuf + slu->slu_data_fname_off) + 1;
1807         if (slu->slu_alias_valid) {
1808                 alloc_sz += strlen(namebuf + slu->slu_alias_off) + 1;
1809         }
1810         if (slu->slu_mgmt_url_valid) {
1811                 alloc_sz += strlen(namebuf + slu->slu_mgmt_url_off) + 1;
1812         }
1813         if (slu->slu_serial_valid) {
1814                 alloc_sz += slu->slu_serial_size;
1815         }
1816 
1817         lu = (stmf_lu_t *)stmf_alloc(STMF_STRUCT_STMF_LU, alloc_sz, 0);
1818         if (lu == NULL) {
1819                 kmem_free(namebuf, sz);
1820                 return (ENOMEM);
1821         }
1822         sl = (sbd_lu_t *)lu->lu_provider_private;
1823         bzero(sl, alloc_sz);
1824         sl->sl_lu = lu;
1825         sl->sl_alloc_size = alloc_sz;
1826         sl->sl_pgr = (sbd_pgr_t *)(sl + 1);
1827         rw_init(&sl->sl_pgr->pgr_lock, NULL, RW_DRIVER, NULL);
1828         mutex_init(&sl->sl_lock, NULL, MUTEX_DRIVER, NULL);
1829         mutex_init(&sl->sl_metadata_lock, NULL, MUTEX_DRIVER, NULL);
1830         rw_init(&sl->sl_access_state_lock, NULL, RW_DRIVER, NULL);
1831         p = ((char *)sl) + sizeof (sbd_lu_t) + sizeof (sbd_pgr_t);
1832         sl->sl_data_filename = p;
1833         (void) strcpy(sl->sl_data_filename, namebuf + slu->slu_data_fname_off);
1834         p += strlen(sl->sl_data_filename) + 1;
1835         sl->sl_meta_offset = SBD_META_OFFSET;
1836         sl->sl_access_state = SBD_LU_ACTIVE;
1837         if (slu->slu_meta_fname_valid) {
1838                 sl->sl_alias = sl->sl_name = sl->sl_meta_filename = p;
1839                 (void) strcpy(sl->sl_meta_filename, namebuf +
1840                     slu->slu_meta_fname_off);
1841                 p += strlen(sl->sl_meta_filename) + 1;
1842         } else {
1843                 sl->sl_alias = sl->sl_name = sl->sl_data_filename;
1844                 if (sbd_is_zvol(sl->sl_data_filename)) {
1845                         sl->sl_flags |= SL_ZFS_META;
1846                         sl->sl_meta_offset = 0;
1847                 } else {
1848                         sl->sl_flags |= SL_SHARED_META;
1849                         sl->sl_data_offset = SHARED_META_DATA_SIZE;
1850                         sl->sl_total_meta_size = SHARED_META_DATA_SIZE;
1851                         sl->sl_meta_size_used = 0;
1852                 }
1853         }
1854         if (slu->slu_alias_valid) {
1855                 sl->sl_alias = p;
1856                 (void) strcpy(p, namebuf + slu->slu_alias_off);
1857                 p += strlen(sl->sl_alias) + 1;
1858         }
1859         if (slu->slu_mgmt_url_valid) {
1860                 sl->sl_mgmt_url = p;
1861                 (void) strcpy(p, namebuf + slu->slu_mgmt_url_off);
1862                 p += strlen(sl->sl_mgmt_url) + 1;
1863         }
1864         if (slu->slu_serial_valid) {
1865                 sl->sl_serial_no = (uint8_t *)p;
1866                 bcopy(namebuf + slu->slu_serial_off, sl->sl_serial_no,
1867                     slu->slu_serial_size);
1868                 sl->sl_serial_no_size = slu->slu_serial_size;
1869                 p += slu->slu_serial_size;
1870         }
1871         kmem_free(namebuf, sz);
1872         if (slu->slu_vid_valid) {
1873                 bcopy(slu->slu_vid, sl->sl_vendor_id, 8);
1874                 sl->sl_flags |= SL_VID_VALID;
1875         }
1876         if (slu->slu_pid_valid) {
1877                 bcopy(slu->slu_pid, sl->sl_product_id, 16);
1878                 sl->sl_flags |= SL_PID_VALID;
1879         }
1880         if (slu->slu_rev_valid) {
1881                 bcopy(slu->slu_rev, sl->sl_revision, 4);
1882                 sl->sl_flags |= SL_REV_VALID;
1883         }
1884         if (slu->slu_write_protected) {
1885                 sl->sl_flags |= SL_WRITE_PROTECTED;
1886         }
1887         if (slu->slu_blksize_valid) {
1888                 if ((slu->slu_blksize & (slu->slu_blksize - 1)) ||
1889                     (slu->slu_blksize > (32 * 1024)) ||
1890                     (slu->slu_blksize == 0)) {
1891                         *err_ret = SBD_RET_INVALID_BLKSIZE;
1892                         ret = EINVAL;
1893                         goto scm_err_out;
1894                 }
1895                 while ((1 << sl->sl_data_blocksize_shift) != slu->slu_blksize) {
1896                         sl->sl_data_blocksize_shift++;
1897                 }
1898         } else {
1899                 sl->sl_data_blocksize_shift = 9;     /* 512 by default */
1900                 slu->slu_blksize = 512;
1901         }
1902 
1903         /* Now lets start creating meta */
1904         sl->sl_trans_op = SL_OP_CREATE_REGISTER_LU;
1905         if (sbd_link_lu(sl) != SBD_SUCCESS) {
1906                 *err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
1907                 ret = EALREADY;
1908                 goto scm_err_out;
1909         }
1910 
1911         /* 1st focus on the data store */
1912         if (slu->slu_lu_size_valid) {
1913                 sl->sl_lu_size = slu->slu_lu_size;
1914         }
1915         ret = sbd_open_data_file(sl, err_ret, slu->slu_lu_size_valid, 0, 0);
1916         slu->slu_ret_filesize_nbits = sl->sl_data_fs_nbits;
1917         slu->slu_lu_size = sl->sl_lu_size;
1918         if (ret) {
1919                 goto scm_err_out;
1920         }
1921 
1922         /*
1923          * Check if we were explicitly asked to disable/enable write
1924          * cache on the device, otherwise get current device setting.
1925          */
1926         if (slu->slu_writeback_cache_disable_valid) {
1927                 if (slu->slu_writeback_cache_disable) {
1928                         /*
1929                          * Set write cache disable on the device. If it fails,
1930                          * we'll support it using sync/flush.
1931                          */
1932                         (void) sbd_wcd_set(1, sl);
1933                         wcd = 1;
1934                 } else {
1935                         /*
1936                          * Set write cache enable on the device. If it fails,
1937                          * return an error.
1938                          */
1939                         if (sbd_wcd_set(0, sl) != SBD_SUCCESS) {
1940                                 *err_ret = SBD_RET_WRITE_CACHE_SET_FAILED;
1941                                 ret = EFAULT;
1942                                 goto scm_err_out;
1943                         }
1944                 }
1945         } else {
1946                 sbd_wcd_get(&wcd, sl);
1947         }
1948 
1949         if (wcd) {
1950                 sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE |
1951                     SL_SAVED_WRITE_CACHE_DISABLE;
1952         }
1953 
1954         if (sl->sl_flags & SL_SHARED_META) {
1955                 goto over_meta_open;
1956         }
1957         if (sl->sl_flags & SL_ZFS_META) {
1958                 if (sbd_create_zfs_meta_object(sl) != SBD_SUCCESS) {
1959                         *err_ret = SBD_RET_ZFS_META_CREATE_FAILED;
1960                         ret = ENOMEM;
1961                         goto scm_err_out;
1962                 }
1963                 sl->sl_meta_blocksize_shift = 0;
1964                 goto over_meta_create;
1965         }
1966         if ((ret = lookupname(sl->sl_meta_filename, UIO_SYSSPACE, FOLLOW,
1967             NULLVPP, &sl->sl_meta_vp)) != 0) {
1968                 *err_ret = SBD_RET_META_FILE_LOOKUP_FAILED;
1969                 goto scm_err_out;
1970         }
1971         sl->sl_meta_vtype = vt = sl->sl_meta_vp->v_type;
1972         VN_RELE(sl->sl_meta_vp);
1973         if ((vt != VREG) && (vt != VCHR) && (vt != VBLK)) {
1974                 *err_ret = SBD_RET_WRONG_META_FILE_TYPE;
1975                 ret = EINVAL;
1976                 goto scm_err_out;
1977         }
1978         if (vt == VREG) {
1979                 sl->sl_meta_blocksize_shift = 0;
1980         } else {
1981                 sl->sl_meta_blocksize_shift = 9;
1982         }
1983         flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1984         if ((ret = vn_open(sl->sl_meta_filename, UIO_SYSSPACE, flag, 0,
1985             &sl->sl_meta_vp, 0, 0)) != 0) {
1986                 *err_ret = SBD_RET_META_FILE_OPEN_FAILED;
1987                 goto scm_err_out;
1988         }
1989 over_meta_create:
1990         sl->sl_total_meta_size = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
1991         sl->sl_total_meta_size +=
1992             (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
1993         sl->sl_total_meta_size &=
1994             ~((((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1);
1995         sl->sl_meta_size_used = 0;
1996 over_meta_open:
1997         sl->sl_flags |= SL_META_OPENED;
1998 
1999         sl->sl_device_id[3] = 16;
2000         if (slu->slu_guid_valid) {
2001                 sl->sl_device_id[0] = 0xf1;
2002                 sl->sl_device_id[1] = 3;
2003                 sl->sl_device_id[2] = 0;
2004                 bcopy(slu->slu_guid, sl->sl_device_id + 4, 16);
2005         } else {
2006                 if (slu->slu_host_id_valid)
2007                         hid = slu->slu_host_id;
2008                 if (!slu->slu_company_id_valid)
2009                         slu->slu_company_id = COMPANY_ID_SUN;
2010                 if (stmf_scsilib_uniq_lu_id2(slu->slu_company_id, hid,
2011                     (scsi_devid_desc_t *)&sl->sl_device_id[0]) !=
2012                     STMF_SUCCESS) {
2013                         *err_ret = SBD_RET_META_CREATION_FAILED;
2014                         ret = EIO;
2015                         goto scm_err_out;
2016                 }
2017                 bcopy(sl->sl_device_id + 4, slu->slu_guid, 16);
2018         }
2019 
2020         /* Lets create the meta now */
2021         mutex_enter(&sl->sl_metadata_lock);
2022         if (sbd_write_meta_start(sl, sl->sl_total_meta_size,
2023             sizeof (sbd_meta_start_t)) != SBD_SUCCESS) {
2024                 mutex_exit(&sl->sl_metadata_lock);
2025                 *err_ret = SBD_RET_META_CREATION_FAILED;
2026                 ret = EIO;
2027                 goto scm_err_out;
2028         }
2029         mutex_exit(&sl->sl_metadata_lock);
2030         sl->sl_meta_size_used = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
2031 
2032         if (sbd_write_lu_info(sl) != SBD_SUCCESS) {
2033                 *err_ret = SBD_RET_META_CREATION_FAILED;
2034                 ret = EIO;
2035                 goto scm_err_out;
2036         }
2037 
2038         if (sbd_pgr_meta_init(sl) != SBD_SUCCESS) {
2039                 *err_ret = SBD_RET_META_CREATION_FAILED;
2040                 ret = EIO;
2041                 goto scm_err_out;
2042         }
2043 
2044         /*
2045          * Update the zvol separately as this need only be called upon
2046          * completion of the metadata initialization.
2047          */
2048         if (sl->sl_flags & SL_ZFS_META) {
2049                 if (sbd_update_zfs_prop(sl) != SBD_SUCCESS) {
2050                         *err_ret = SBD_RET_META_CREATION_FAILED;
2051                         ret = EIO;
2052                         goto scm_err_out;
2053                 }
2054         }
2055 
2056         ret = sbd_populate_and_register_lu(sl, err_ret);
2057         if (ret) {
2058                 goto scm_err_out;
2059         }
2060 
2061         sl->sl_trans_op = SL_OP_NONE;
2062         atomic_inc_32(&sbd_lu_count);
2063         return (0);
2064 
2065 scm_err_out:
2066         return (sbd_close_delete_lu(sl, ret));
2067 }
2068 
2069 stmf_status_t
2070 sbd_proxy_msg(uint8_t *luid, void *proxy_arg, uint32_t proxy_arg_len,
2071     uint32_t type)
2072 {
2073         switch (type) {
2074                 case STMF_MSG_LU_ACTIVE:
2075                         return (sbd_proxy_reg_lu(luid, proxy_arg,
2076                             proxy_arg_len));
2077                 case STMF_MSG_LU_REGISTER:
2078                         return (sbd_proxy_reg_lu(luid, proxy_arg,
2079                             proxy_arg_len));
2080                 case STMF_MSG_LU_DEREGISTER:
2081                         return (sbd_proxy_dereg_lu(luid, proxy_arg,
2082                             proxy_arg_len));
2083                 default:
2084                         return (STMF_INVALID_ARG);
2085         }
2086 }
2087 
2088 
2089 /*
2090  * register a standby logical unit
2091  * proxy_reg_arg contains the meta filename
2092  */
2093 stmf_status_t
2094 sbd_proxy_reg_lu(uint8_t *luid, void *proxy_reg_arg, uint32_t proxy_reg_arg_len)
2095 {
2096         sbd_lu_t *sl;
2097         sbd_status_t sret;
2098         sbd_create_standby_lu_t *stlu;
2099         int alloc_sz;
2100         uint32_t err_ret = 0;
2101         stmf_status_t stret = STMF_SUCCESS;
2102 
2103         if (luid == NULL) {
2104                 return (STMF_INVALID_ARG);
2105         }
2106 
2107         do {
2108                 sret = sbd_find_and_lock_lu(luid, NULL, SL_OP_MODIFY_LU, &sl);
2109         } while (sret == SBD_BUSY);
2110 
2111         if (sret == SBD_NOT_FOUND) {
2112                 alloc_sz = sizeof (*stlu) + proxy_reg_arg_len - 8;
2113                 stlu = (sbd_create_standby_lu_t *)kmem_zalloc(alloc_sz,
2114                     KM_SLEEP);
2115                 bcopy(luid, stlu->stlu_guid, 16);
2116                 if (proxy_reg_arg_len) {
2117                         bcopy(proxy_reg_arg, stlu->stlu_meta_fname,
2118                             proxy_reg_arg_len);
2119                         stlu->stlu_meta_fname_size = proxy_reg_arg_len;
2120                 }
2121                 if (sbd_create_standby_lu(stlu, &err_ret) != 0) {
2122                         cmn_err(CE_WARN,
2123                             "Unable to create standby logical unit for %s",
2124                             stlu->stlu_meta_fname);
2125                         stret = STMF_FAILURE;
2126                 }
2127                 kmem_free(stlu, alloc_sz);
2128                 return (stret);
2129         } else if (sret == SBD_SUCCESS) {
2130                 /*
2131                  * if the lu is already registered, then the lu should now
2132                  * be in standby mode
2133                  */
2134                 sbd_it_data_t *it;
2135                 if (sl->sl_access_state != SBD_LU_STANDBY) {
2136                         mutex_enter(&sl->sl_lock);
2137                         sl->sl_access_state = SBD_LU_STANDBY;
2138                         for (it = sl->sl_it_list; it != NULL;
2139                             it = it->sbd_it_next) {
2140                                 it->sbd_it_ua_conditions |=
2141                                     SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
2142                                 it->sbd_it_flags &=
2143                                     ~SBD_IT_HAS_SCSI2_RESERVATION;
2144                                 sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
2145                         }
2146                         mutex_exit(&sl->sl_lock);
2147                         sbd_pgr_reset(sl);
2148                 }
2149                 sl->sl_trans_op = SL_OP_NONE;
2150         } else {
2151                 cmn_err(CE_WARN, "could not find and lock logical unit");
2152                 stret = STMF_FAILURE;
2153         }
2154 out:
2155         return (stret);
2156 }
2157 
2158 /* ARGSUSED */
2159 stmf_status_t
2160 sbd_proxy_dereg_lu(uint8_t *luid, void *proxy_reg_arg,
2161     uint32_t proxy_reg_arg_len)
2162 {
2163         sbd_delete_lu_t dlu = {0};
2164         uint32_t err_ret;
2165 
2166         if (luid == NULL) {
2167                 cmn_err(CE_WARN, "de-register lu request had null luid");
2168                 return (STMF_INVALID_ARG);
2169         }
2170 
2171         bcopy(luid, &dlu.dlu_guid, 16);
2172 
2173         if (sbd_delete_lu(&dlu, (int)sizeof (dlu), &err_ret) != 0) {
2174                 cmn_err(CE_WARN, "failed to delete de-register lu request");
2175                 return (STMF_FAILURE);
2176         }
2177 
2178         return (STMF_SUCCESS);
2179 }
2180 
2181 int
2182 sbd_create_standby_lu(sbd_create_standby_lu_t *slu, uint32_t *err_ret)
2183 {
2184         sbd_lu_t *sl;
2185         stmf_lu_t *lu;
2186         int ret = EIO;
2187         int alloc_sz;
2188 
2189         alloc_sz = sizeof (sbd_lu_t) + sizeof (sbd_pgr_t) +
2190             slu->stlu_meta_fname_size;
2191         lu = (stmf_lu_t *)stmf_alloc(STMF_STRUCT_STMF_LU, alloc_sz, 0);
2192         if (lu == NULL) {
2193                 return (ENOMEM);
2194         }
2195         sl = (sbd_lu_t *)lu->lu_provider_private;
2196         bzero(sl, alloc_sz);
2197         sl->sl_lu = lu;
2198         sl->sl_alloc_size = alloc_sz;
2199 
2200         sl->sl_pgr = (sbd_pgr_t *)(sl + 1);
2201         sl->sl_meta_filename = ((char *)sl) + sizeof (sbd_lu_t) +
2202             sizeof (sbd_pgr_t);
2203 
2204         if (slu->stlu_meta_fname_size > 0) {
2205                 (void) strcpy(sl->sl_meta_filename, slu->stlu_meta_fname);
2206         }
2207         sl->sl_name = sl->sl_meta_filename;
2208 
2209         sl->sl_device_id[3] = 16;
2210         sl->sl_device_id[0] = 0xf1;
2211         sl->sl_device_id[1] = 3;
2212         sl->sl_device_id[2] = 0;
2213         bcopy(slu->stlu_guid, sl->sl_device_id + 4, 16);
2214         lu->lu_id = (scsi_devid_desc_t *)sl->sl_device_id;
2215         sl->sl_access_state = SBD_LU_STANDBY;
2216 
2217         rw_init(&sl->sl_pgr->pgr_lock, NULL, RW_DRIVER, NULL);
2218         mutex_init(&sl->sl_lock, NULL, MUTEX_DRIVER, NULL);
2219         mutex_init(&sl->sl_metadata_lock, NULL, MUTEX_DRIVER, NULL);
2220         rw_init(&sl->sl_access_state_lock, NULL, RW_DRIVER, NULL);
2221 
2222         sl->sl_trans_op = SL_OP_CREATE_REGISTER_LU;
2223 
2224         if (sbd_link_lu(sl) != SBD_SUCCESS) {
2225                 *err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
2226                 ret = EALREADY;
2227                 goto scs_err_out;
2228         }
2229 
2230         ret = sbd_populate_and_register_lu(sl, err_ret);
2231         if (ret) {
2232                 goto scs_err_out;
2233         }
2234 
2235         sl->sl_trans_op = SL_OP_NONE;
2236         atomic_inc_32(&sbd_lu_count);
2237         return (0);
2238 
2239 scs_err_out:
2240         return (sbd_close_delete_lu(sl, ret));
2241 }
2242 
2243 int
2244 sbd_load_sli_1_0(sbd_lu_t *sl, uint32_t *err_ret)
2245 {
2246         sbd_lu_info_1_0_t *sli = NULL;
2247         sbd_status_t sret;
2248 
2249         sret = sbd_read_meta_section(sl, (sm_section_hdr_t **)&sli,
2250             SMS_ID_LU_INFO_1_0);
2251 
2252         if (sret != SBD_SUCCESS) {
2253                 *err_ret = SBD_RET_NO_META;
2254                 return (EIO);
2255         }
2256         if (sli->sli_data_order != SMS_DATA_ORDER) {
2257                 sbd_swap_lu_info_1_0(sli);
2258                 if (sli->sli_data_order != SMS_DATA_ORDER) {
2259                         kmem_free(sli, sli->sli_sms_header.sms_size);
2260                         *err_ret = SBD_RET_NO_META;
2261                         return (EIO);
2262                 }
2263         }
2264 
2265         sl->sl_flags |= SL_SHARED_META;
2266         sl->sl_data_blocksize_shift = 9;
2267         sl->sl_data_offset = SHARED_META_DATA_SIZE;
2268         sl->sl_lu_size = sli->sli_total_store_size - SHARED_META_DATA_SIZE;
2269         sl->sl_total_data_size = SHARED_META_DATA_SIZE + sl->sl_lu_size;
2270         bcopy(sli->sli_lu_devid, sl->sl_device_id, 20);
2271 
2272         kmem_free(sli, sli->sli_sms_header.sms_size);
2273         return (0);
2274 }
2275 
2276 int
2277 sbd_import_lu(sbd_import_lu_t *ilu, int struct_sz, uint32_t *err_ret,
2278     int no_register, sbd_lu_t **slr)
2279 {
2280         stmf_lu_t *lu;
2281         sbd_lu_t *sl;
2282         sbd_lu_info_1_1_t *sli = NULL;
2283         int asz;
2284         int ret = 0;
2285         stmf_status_t stret;
2286         int flag;
2287         int wcd = 0;
2288         int data_opened;
2289         uint16_t sli_buf_sz;
2290         uint8_t *sli_buf_copy = NULL;
2291         enum vtype vt;
2292         int standby = 0;
2293         sbd_status_t sret;
2294 
2295         if (no_register && slr == NULL) {
2296                 return (EINVAL);
2297         }
2298         ilu->ilu_meta_fname[struct_sz - sizeof (*ilu) + 8 - 1] = 0;
2299         /*
2300          * check whether logical unit is already registered ALUA
2301          * For a standby logical unit, the meta filename is set. Use
2302          * that to search for an existing logical unit.
2303          */
2304         sret = sbd_find_and_lock_lu(NULL, (uint8_t *)&(ilu->ilu_meta_fname),
2305             SL_OP_IMPORT_LU, &sl);
2306 
2307         if (sret == SBD_SUCCESS) {
2308                 if (sl->sl_access_state != SBD_LU_ACTIVE) {
2309                         no_register = 1;
2310                         standby = 1;
2311                         lu = sl->sl_lu;
2312                         if (sl->sl_alias_alloc_size) {
2313                                 kmem_free(sl->sl_alias,
2314                                     sl->sl_alias_alloc_size);
2315                                 sl->sl_alias_alloc_size = 0;
2316                                 sl->sl_alias = NULL;
2317                                 lu->lu_alias = NULL;
2318                         }
2319                         if (sl->sl_meta_filename == NULL) {
2320                                 sl->sl_meta_filename = sl->sl_data_filename;
2321                         } else if (sl->sl_data_fname_alloc_size) {
2322                                 kmem_free(sl->sl_data_filename,
2323                                     sl->sl_data_fname_alloc_size);
2324                                 sl->sl_data_fname_alloc_size = 0;
2325                         }
2326                         if (sl->sl_serial_no_alloc_size) {
2327                                 kmem_free(sl->sl_serial_no,
2328                                     sl->sl_serial_no_alloc_size);
2329                                 sl->sl_serial_no_alloc_size = 0;
2330                         }
2331                         if (sl->sl_mgmt_url_alloc_size) {
2332                                 kmem_free(sl->sl_mgmt_url,
2333                                     sl->sl_mgmt_url_alloc_size);
2334                                 sl->sl_mgmt_url_alloc_size = 0;
2335                         }
2336                 } else {
2337                         *err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
2338                         bcopy(sl->sl_device_id + 4, ilu->ilu_ret_guid, 16);
2339                         sl->sl_trans_op = SL_OP_NONE;
2340                         return (EALREADY);
2341                 }
2342         } else if (sret == SBD_NOT_FOUND) {
2343                 asz = strlen(ilu->ilu_meta_fname) + 1;
2344 
2345                 lu = (stmf_lu_t *)stmf_alloc(STMF_STRUCT_STMF_LU,
2346                     sizeof (sbd_lu_t) + sizeof (sbd_pgr_t) + asz, 0);
2347                 if (lu == NULL) {
2348                         return (ENOMEM);
2349                 }
2350                 sl = (sbd_lu_t *)lu->lu_provider_private;
2351                 bzero(sl, sizeof (*sl));
2352                 sl->sl_lu = lu;
2353                 sl->sl_pgr = (sbd_pgr_t *)(sl + 1);
2354                 sl->sl_meta_filename = ((char *)sl) + sizeof (*sl) +
2355                     sizeof (sbd_pgr_t);
2356                 (void) strcpy(sl->sl_meta_filename, ilu->ilu_meta_fname);
2357                 sl->sl_name = sl->sl_meta_filename;
2358                 rw_init(&sl->sl_pgr->pgr_lock, NULL, RW_DRIVER, NULL);
2359                 rw_init(&sl->sl_access_state_lock, NULL, RW_DRIVER, NULL);
2360                 mutex_init(&sl->sl_lock, NULL, MUTEX_DRIVER, NULL);
2361                 mutex_init(&sl->sl_metadata_lock, NULL, MUTEX_DRIVER, NULL);
2362                 sl->sl_trans_op = SL_OP_IMPORT_LU;
2363         } else {
2364                 *err_ret = SBD_RET_META_FILE_LOOKUP_FAILED;
2365                 return (EIO);
2366         }
2367 
2368         /* we're only loading the metadata */
2369         if (!no_register) {
2370                 if (sbd_link_lu(sl) != SBD_SUCCESS) {
2371                         *err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
2372                         bcopy(sl->sl_device_id + 4, ilu->ilu_ret_guid, 16);
2373                         ret = EALREADY;
2374                         goto sim_err_out;
2375                 }
2376         }
2377         if ((ret = lookupname(sl->sl_meta_filename, UIO_SYSSPACE, FOLLOW,
2378             NULLVPP, &sl->sl_meta_vp)) != 0) {
2379                 *err_ret = SBD_RET_META_FILE_LOOKUP_FAILED;
2380                 goto sim_err_out;
2381         }
2382         if (sbd_is_zvol(sl->sl_meta_filename)) {
2383                 sl->sl_flags |= SL_ZFS_META;
2384                 sl->sl_data_filename = sl->sl_meta_filename;
2385         }
2386         sl->sl_meta_vtype = vt = sl->sl_meta_vp->v_type;
2387         VN_RELE(sl->sl_meta_vp);
2388         if ((vt != VREG) && (vt != VCHR) && (vt != VBLK)) {
2389                 *err_ret = SBD_RET_WRONG_META_FILE_TYPE;
2390                 ret = EINVAL;
2391                 goto sim_err_out;
2392         }
2393         if (sl->sl_flags & SL_ZFS_META) {
2394                 if (sbd_open_zfs_meta(sl) != SBD_SUCCESS) {
2395                         /* let see if metadata is in the 64k block */
2396                         sl->sl_flags &= ~SL_ZFS_META;
2397                 }
2398         }
2399         if (!(sl->sl_flags & SL_ZFS_META)) {
2400                 /* metadata is always writable */
2401                 flag = FREAD | FWRITE | FOFFMAX | FEXCL;
2402                 if ((ret = vn_open(sl->sl_meta_filename, UIO_SYSSPACE, flag, 0,
2403                     &sl->sl_meta_vp, 0, 0)) != 0) {
2404                         *err_ret = SBD_RET_META_FILE_OPEN_FAILED;
2405                         goto sim_err_out;
2406                 }
2407         }
2408         if ((sl->sl_flags & SL_ZFS_META) || (vt == VREG)) {
2409                 sl->sl_meta_blocksize_shift = 0;
2410         } else {
2411                 sl->sl_meta_blocksize_shift = 9;
2412         }
2413         sl->sl_meta_offset = (sl->sl_flags & SL_ZFS_META) ? 0 : SBD_META_OFFSET;
2414         sl->sl_flags |= SL_META_OPENED;
2415 
2416         mutex_enter(&sl->sl_metadata_lock);
2417         sret = sbd_load_meta_start(sl);
2418         mutex_exit(&sl->sl_metadata_lock);
2419         if (sret != SBD_SUCCESS) {
2420                 if (sret == SBD_META_CORRUPTED) {
2421                         *err_ret = SBD_RET_NO_META;
2422                 } else if (sret == SBD_NOT_SUPPORTED) {
2423                         *err_ret = SBD_RET_VERSION_NOT_SUPPORTED;
2424                 } else {
2425                         *err_ret = SBD_RET_NO_META;
2426                 }
2427                 ret = EINVAL;
2428                 goto sim_err_out;
2429         }
2430 
2431         /* Now lets see if we can read the most recent LU info */
2432         sret = sbd_read_meta_section(sl, (sm_section_hdr_t **)&sli,
2433             SMS_ID_LU_INFO_1_1);
2434         if ((sret == SBD_NOT_FOUND) && ((sl->sl_flags & SL_ZFS_META) == 0)) {
2435                 ret = sbd_load_sli_1_0(sl, err_ret);
2436                 if (ret) {
2437                         goto sim_err_out;
2438                 }
2439                 goto sim_sli_loaded;
2440         }
2441         if (sret != SBD_SUCCESS) {
2442                 *err_ret = SBD_RET_NO_META;
2443                 ret = EIO;
2444                 goto sim_err_out;
2445         }
2446         /* load sli 1.1 */
2447         if (sli->sli_data_order != SMS_DATA_ORDER) {
2448                 sbd_swap_lu_info_1_1(sli);
2449                 if (sli->sli_data_order != SMS_DATA_ORDER) {
2450                         *err_ret = SBD_RET_NO_META;
2451                         ret = EIO;
2452                         goto sim_err_out;
2453                 }
2454         }
2455 
2456         sli_buf_sz = sli->sli_sms_header.sms_size -
2457             sizeof (sbd_lu_info_1_1_t) + 8;
2458         sli_buf_copy = kmem_alloc(sli_buf_sz + 1, KM_SLEEP);
2459         bcopy(sli->sli_buf, sli_buf_copy, sli_buf_sz);
2460         sli_buf_copy[sli_buf_sz] = 0;
2461 
2462         /* Make sure all the offsets are within limits */
2463         if (((sli->sli_flags & SLI_META_FNAME_VALID) &&
2464             (sli->sli_meta_fname_offset > sli_buf_sz)) ||
2465             ((sli->sli_flags & SLI_DATA_FNAME_VALID) &&
2466             (sli->sli_data_fname_offset > sli_buf_sz)) ||
2467             ((sli->sli_flags & SLI_MGMT_URL_VALID) &&
2468             (sli->sli_mgmt_url_offset > sli_buf_sz)) ||
2469             ((sli->sli_flags & SLI_SERIAL_VALID) &&
2470             ((sli->sli_serial_offset + sli->sli_serial_size) > sli_buf_sz)) ||
2471             ((sli->sli_flags & SLI_ALIAS_VALID) &&
2472             (sli->sli_alias_offset > sli_buf_sz))) {
2473                 *err_ret = SBD_RET_NO_META;
2474                 ret = EIO;
2475                 goto sim_err_out;
2476         }
2477 
2478         sl->sl_lu_size = sli->sli_lu_size;
2479         sl->sl_data_blocksize_shift = sli->sli_data_blocksize_shift;
2480         bcopy(sli->sli_device_id, sl->sl_device_id, 20);
2481         if (sli->sli_flags & SLI_SERIAL_VALID) {
2482                 sl->sl_serial_no_size = sl->sl_serial_no_alloc_size =
2483                     sli->sli_serial_size;
2484                 sl->sl_serial_no = kmem_zalloc(sli->sli_serial_size, KM_SLEEP);
2485                 bcopy(sli_buf_copy + sli->sli_serial_offset, sl->sl_serial_no,
2486                     sl->sl_serial_no_size);
2487         }
2488         if (sli->sli_flags & SLI_SEPARATE_META) {
2489                 sl->sl_total_data_size = sl->sl_lu_size;
2490                 if (sli->sli_flags & SLI_DATA_FNAME_VALID) {
2491                         sl->sl_data_fname_alloc_size = strlen((char *)
2492                             sli_buf_copy + sli->sli_data_fname_offset) + 1;
2493                         sl->sl_data_filename = kmem_zalloc(
2494                             sl->sl_data_fname_alloc_size, KM_SLEEP);
2495                         (void) strcpy(sl->sl_data_filename,
2496                             (char *)sli_buf_copy + sli->sli_data_fname_offset);
2497                 }
2498         } else {
2499                 if (sl->sl_flags & SL_ZFS_META) {
2500                         sl->sl_total_data_size = sl->sl_lu_size;
2501                         sl->sl_data_offset = 0;
2502                 } else {
2503                         sl->sl_total_data_size =
2504                             sl->sl_lu_size + SHARED_META_DATA_SIZE;
2505                         sl->sl_data_offset = SHARED_META_DATA_SIZE;
2506                         sl->sl_flags |= SL_SHARED_META;
2507                 }
2508         }
2509         if (sli->sli_flags & SLI_ALIAS_VALID) {
2510                 sl->sl_alias_alloc_size = strlen((char *)sli_buf_copy +
2511                     sli->sli_alias_offset) + 1;
2512                 sl->sl_alias = kmem_alloc(sl->sl_alias_alloc_size, KM_SLEEP);
2513                 (void) strcpy(sl->sl_alias, (char *)sli_buf_copy +
2514                     sli->sli_alias_offset);
2515         }
2516         if (sli->sli_flags & SLI_MGMT_URL_VALID) {
2517                 sl->sl_mgmt_url_alloc_size = strlen((char *)sli_buf_copy +
2518                     sli->sli_mgmt_url_offset) + 1;
2519                 sl->sl_mgmt_url = kmem_alloc(sl->sl_mgmt_url_alloc_size,
2520                     KM_SLEEP);
2521                 (void) strcpy(sl->sl_mgmt_url, (char *)sli_buf_copy +
2522                     sli->sli_mgmt_url_offset);
2523         }
2524         if (sli->sli_flags & SLI_WRITE_PROTECTED) {
2525                 sl->sl_flags |= SL_WRITE_PROTECTED;
2526         }
2527         if (sli->sli_flags & SLI_VID_VALID) {
2528                 sl->sl_flags |= SL_VID_VALID;
2529                 bcopy(sli->sli_vid, sl->sl_vendor_id, 8);
2530         }
2531         if (sli->sli_flags & SLI_PID_VALID) {
2532                 sl->sl_flags |= SL_PID_VALID;
2533                 bcopy(sli->sli_pid, sl->sl_product_id, 16);
2534         }
2535         if (sli->sli_flags & SLI_REV_VALID) {
2536                 sl->sl_flags |= SL_REV_VALID;
2537                 bcopy(sli->sli_rev, sl->sl_revision, 4);
2538         }
2539         if (sli->sli_flags & SLI_WRITEBACK_CACHE_DISABLE) {
2540                 sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2541         }
2542 sim_sli_loaded:
2543         if ((sl->sl_flags & SL_SHARED_META) == 0) {
2544                 data_opened = 0;
2545         } else {
2546                 data_opened = 1;
2547                 sl->sl_data_filename = sl->sl_meta_filename;
2548                 sl->sl_data_vp = sl->sl_meta_vp;
2549                 sl->sl_data_vtype = sl->sl_meta_vtype;
2550         }
2551 
2552         sret = sbd_pgr_meta_load(sl);
2553         if (sret != SBD_SUCCESS) {
2554                 *err_ret = SBD_RET_NO_META;
2555                 ret = EIO;
2556                 goto sim_err_out;
2557         }
2558 
2559         ret = sbd_open_data_file(sl, err_ret, 1, data_opened, 0);
2560         if (ret) {
2561                 goto sim_err_out;
2562         }
2563 
2564         /*
2565          * set write cache disable on the device
2566          * Note: this shouldn't fail on import unless the cache capabilities
2567          * of the device changed. If that happened, modify will need to
2568          * be used to set the cache flag appropriately after import is done.
2569          */
2570         if (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) {
2571                 (void) sbd_wcd_set(1, sl);
2572                 wcd = 1;
2573         /*
2574          * if not explicitly set, attempt to set it to enable, if that fails
2575          * get the current setting and use that
2576          */
2577         } else {
2578                 sret = sbd_wcd_set(0, sl);
2579                 if (sret != SBD_SUCCESS) {
2580                         sbd_wcd_get(&wcd, sl);
2581                 }
2582         }
2583 
2584         if (wcd) {
2585                 sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE |
2586                     SL_SAVED_WRITE_CACHE_DISABLE;
2587         }
2588 
2589         /* we're only loading the metadata */
2590         if (!no_register) {
2591                 ret = sbd_populate_and_register_lu(sl, err_ret);
2592                 if (ret) {
2593                         goto sim_err_out;
2594                 }
2595                 atomic_inc_32(&sbd_lu_count);
2596         }
2597 
2598         bcopy(sl->sl_device_id + 4, ilu->ilu_ret_guid, 16);
2599         sl->sl_trans_op = SL_OP_NONE;
2600 
2601         if (sli) {
2602                 kmem_free(sli, sli->sli_sms_header.sms_size);
2603                 sli = NULL;
2604         }
2605         if (sli_buf_copy) {
2606                 kmem_free(sli_buf_copy, sli_buf_sz + 1);
2607                 sli_buf_copy = NULL;
2608         }
2609         if (no_register && !standby) {
2610                 *slr = sl;
2611         }
2612 
2613         /*
2614          * if this was imported from standby, set the access state
2615          * to active.
2616          */
2617         if (standby) {
2618                 sbd_it_data_t *it;
2619                 mutex_enter(&sl->sl_lock);
2620                 sl->sl_access_state = SBD_LU_ACTIVE;
2621                 for (it = sl->sl_it_list; it != NULL;
2622                     it = it->sbd_it_next) {
2623                         it->sbd_it_ua_conditions |=
2624                             SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
2625                         it->sbd_it_ua_conditions |= SBD_UA_POR;
2626                         it->sbd_it_flags |=  SBD_IT_PGR_CHECK_FLAG;
2627                 }
2628                 mutex_exit(&sl->sl_lock);
2629                 /* call set access state */
2630                 stret = stmf_set_lu_access(lu, STMF_LU_ACTIVE);
2631                 if (stret != STMF_SUCCESS) {
2632                         *err_ret = SBD_RET_ACCESS_STATE_FAILED;
2633                         sl->sl_access_state = SBD_LU_STANDBY;
2634                         goto sim_err_out;
2635                 }
2636                 if (sl->sl_alias) {
2637                         lu->lu_alias = sl->sl_alias;
2638                 } else {
2639                         lu->lu_alias = sl->sl_name;
2640                 }
2641         }
2642         sl->sl_access_state = SBD_LU_ACTIVE;
2643         return (0);
2644 
2645 sim_err_out:
2646         if (sli) {
2647                 kmem_free(sli, sli->sli_sms_header.sms_size);
2648                 sli = NULL;
2649         }
2650         if (sli_buf_copy) {
2651                 kmem_free(sli_buf_copy, sli_buf_sz + 1);
2652                 sli_buf_copy = NULL;
2653         }
2654 
2655         if (standby) {
2656                 *err_ret = SBD_RET_ACCESS_STATE_FAILED;
2657                 sl->sl_trans_op = SL_OP_NONE;
2658                 return (EIO);
2659         } else {
2660                 return (sbd_close_delete_lu(sl, ret));
2661         }
2662 }
2663 
2664 int
2665 sbd_modify_lu(sbd_modify_lu_t *mlu, int struct_sz, uint32_t *err_ret)
2666 {
2667         sbd_lu_t *sl = NULL;
2668         uint16_t alias_sz;
2669         int ret = 0;
2670         sbd_it_data_t *it;
2671         sbd_status_t sret;
2672         uint64_t old_size;
2673         int modify_unregistered = 0;
2674         int ua = 0;
2675         sbd_import_lu_t *ilu;
2676         stmf_lu_t *lu;
2677         uint32_t ilu_sz;
2678         uint32_t sz;
2679 
2680         sz = struct_sz - sizeof (*mlu) + 8 + 1;
2681 
2682         /* if there is data in the buf, null terminate it */
2683         if (struct_sz > sizeof (*mlu)) {
2684                 mlu->mlu_buf[struct_sz - sizeof (*mlu) + 8 - 1] = 0;
2685         }
2686 
2687         *err_ret = 0;
2688 
2689         /* Lets validate offsets */
2690         if (((mlu->mlu_alias_valid) &&
2691             (mlu->mlu_alias_off >= sz)) ||
2692             ((mlu->mlu_mgmt_url_valid) &&
2693             (mlu->mlu_mgmt_url_off >= sz)) ||
2694             (mlu->mlu_by_fname) &&
2695             (mlu->mlu_fname_off >= sz)) {
2696                 return (EINVAL);
2697         }
2698 
2699         /*
2700          * We'll look for the device but if we don't find it registered,
2701          * we'll still try to modify the unregistered device.
2702          */
2703         if (mlu->mlu_by_guid) {
2704                 sret = sbd_find_and_lock_lu(mlu->mlu_input_guid, NULL,
2705                     SL_OP_MODIFY_LU, &sl);
2706         } else if (mlu->mlu_by_fname) {
2707                 sret = sbd_find_and_lock_lu(NULL,
2708                     (uint8_t *)&(mlu->mlu_buf[mlu->mlu_fname_off]),
2709                     SL_OP_MODIFY_LU, &sl);
2710         } else {
2711                 return (EINVAL);
2712         }
2713 
2714 
2715         if (sret != SBD_SUCCESS) {
2716                 if (sret == SBD_BUSY) {
2717                         *err_ret = SBD_RET_LU_BUSY;
2718                         return (EBUSY);
2719                 } else if (sret != SBD_NOT_FOUND) {
2720                         return (EIO);
2721                 } else if (!mlu->mlu_by_fname) {
2722                         return (EINVAL);
2723                 }
2724                 /* Okay, try to import the device */
2725                 struct_sz = max(8, strlen(&(mlu->mlu_buf[mlu->mlu_fname_off]))
2726                     + 1);
2727                 struct_sz += sizeof (sbd_import_lu_t) - 8;
2728                 ilu_sz = struct_sz;
2729                 ilu = (sbd_import_lu_t *)kmem_zalloc(ilu_sz, KM_SLEEP);
2730                 ilu->ilu_struct_size = struct_sz;
2731                 (void) strcpy(ilu->ilu_meta_fname,
2732                     &(mlu->mlu_buf[mlu->mlu_fname_off]));
2733                 ret = sbd_import_lu(ilu, struct_sz, err_ret, 1, &sl);
2734                 kmem_free(ilu, ilu_sz);
2735                 if (ret != SBD_SUCCESS) {
2736                         return (ENOENT);
2737                 }
2738                 modify_unregistered = 1;
2739         }
2740 
2741         if (sl->sl_access_state != SBD_LU_ACTIVE) {
2742                 *err_ret = SBD_RET_ACCESS_STATE_FAILED;
2743                 ret = EINVAL;
2744                 goto smm_err_out;
2745         }
2746 
2747         /* check for write cache change */
2748         if (mlu->mlu_writeback_cache_disable_valid) {
2749                 /* set wce on device */
2750                 sret = sbd_wcd_set(mlu->mlu_writeback_cache_disable, sl);
2751                 if (!mlu->mlu_writeback_cache_disable && sret != SBD_SUCCESS) {
2752                         *err_ret = SBD_RET_WRITE_CACHE_SET_FAILED;
2753                         ret = EFAULT;
2754                         goto smm_err_out;
2755                 }
2756                 mutex_enter(&sl->sl_lock);
2757                 if (!mlu->mlu_writeback_cache_disable) {
2758                         if (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) {
2759                                 ua = 1;
2760                                 sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2761                                 sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2762                         }
2763                 } else {
2764                         if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
2765                                 ua = 1;
2766                                 sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2767                                 sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2768                         }
2769                 }
2770                 for (it = sl->sl_it_list; ua && it != NULL;
2771                     it = it->sbd_it_next) {
2772                         it->sbd_it_ua_conditions |=
2773                             SBD_UA_MODE_PARAMETERS_CHANGED;
2774                 }
2775                 mutex_exit(&sl->sl_lock);
2776         }
2777         ua = 0;
2778 
2779         if (mlu->mlu_alias_valid) {
2780                 alias_sz = strlen((char *)mlu->mlu_buf +
2781                     mlu->mlu_alias_off) + 1;
2782                 /*
2783                  * Use the allocated buffer or alloc a new one.
2784                  * Don't copy into sl_alias if sl_alias_alloc_size is 0
2785                  * otherwise or you'll be writing over the data/metadata
2786                  * filename.
2787                  */
2788                 mutex_enter(&sl->sl_lock);
2789                 if (sl->sl_alias_alloc_size > 0 &&
2790                     sl->sl_alias_alloc_size < alias_sz) {
2791                         kmem_free(sl->sl_alias,
2792                             sl->sl_alias_alloc_size);
2793                         sl->sl_alias_alloc_size = 0;
2794                 }
2795                 if (sl->sl_alias_alloc_size == 0) {
2796                         sl->sl_alias = kmem_alloc(alias_sz, KM_SLEEP);
2797                         sl->sl_alias_alloc_size = alias_sz;
2798                 }
2799                 (void) strcpy(sl->sl_alias, (char *)mlu->mlu_buf +
2800                     mlu->mlu_alias_off);
2801                 lu = sl->sl_lu;
2802                 lu->lu_alias = sl->sl_alias;
2803                 mutex_exit(&sl->sl_lock);
2804         }
2805 
2806         if (mlu->mlu_mgmt_url_valid) {
2807                 uint16_t url_sz;
2808 
2809                 url_sz = strlen((char *)mlu->mlu_buf + mlu->mlu_mgmt_url_off);
2810                 if (url_sz > 0)
2811                         url_sz++;
2812 
2813                 mutex_enter(&sl->sl_lock);
2814                 if (sl->sl_mgmt_url_alloc_size > 0 &&
2815                     (url_sz == 0 || sl->sl_mgmt_url_alloc_size < url_sz)) {
2816                         kmem_free(sl->sl_mgmt_url, sl->sl_mgmt_url_alloc_size);
2817                         sl->sl_mgmt_url = NULL;
2818                         sl->sl_mgmt_url_alloc_size = 0;
2819                 }
2820                 if (url_sz > 0) {
2821                         if (sl->sl_mgmt_url_alloc_size == 0) {
2822                                 sl->sl_mgmt_url = kmem_alloc(url_sz, KM_SLEEP);
2823                                 sl->sl_mgmt_url_alloc_size = url_sz;
2824                         }
2825                         (void) strcpy(sl->sl_mgmt_url, (char *)mlu->mlu_buf +
2826                             mlu->mlu_mgmt_url_off);
2827                 }
2828                 for (it = sl->sl_it_list; it != NULL;
2829                     it = it->sbd_it_next) {
2830                         it->sbd_it_ua_conditions |=
2831                             SBD_UA_MODE_PARAMETERS_CHANGED;
2832                 }
2833                 mutex_exit(&sl->sl_lock);
2834         }
2835 
2836         if (mlu->mlu_write_protected_valid) {
2837                 mutex_enter(&sl->sl_lock);
2838                 if (mlu->mlu_write_protected) {
2839                         if ((sl->sl_flags & SL_WRITE_PROTECTED) == 0) {
2840                                 ua = 1;
2841                                 sl->sl_flags |= SL_WRITE_PROTECTED;
2842                         }
2843                 } else {
2844                         if (sl->sl_flags & SL_WRITE_PROTECTED) {
2845                                 ua = 1;
2846                                 sl->sl_flags &= ~SL_WRITE_PROTECTED;
2847                         }
2848                 }
2849                 for (it = sl->sl_it_list; ua && it != NULL;
2850                     it = it->sbd_it_next) {
2851                         it->sbd_it_ua_conditions |=
2852                             SBD_UA_MODE_PARAMETERS_CHANGED;
2853                 }
2854                 mutex_exit(&sl->sl_lock);
2855         }
2856 
2857         if (mlu->mlu_lu_size_valid) {
2858                 /*
2859                  * validate lu size and set
2860                  * For open file only (registered lu)
2861                  */
2862                 mutex_enter(&sl->sl_lock);
2863                 old_size = sl->sl_lu_size;
2864                 sl->sl_lu_size = mlu->mlu_lu_size;
2865                 mutex_exit(&sl->sl_lock);
2866                 ret = sbd_open_data_file(sl, err_ret, 1, 1, 1);
2867                 if (ret) {
2868                         mutex_enter(&sl->sl_lock);
2869                         sl->sl_lu_size = old_size;
2870                         mutex_exit(&sl->sl_lock);
2871                         goto smm_err_out;
2872                 }
2873                 if (old_size != mlu->mlu_lu_size) {
2874                         mutex_enter(&sl->sl_lock);
2875                         for (it = sl->sl_it_list; it != NULL;
2876                             it = it->sbd_it_next) {
2877                                 it->sbd_it_ua_conditions |=
2878                                     SBD_UA_CAPACITY_CHANGED;
2879                         }
2880                         mutex_exit(&sl->sl_lock);
2881                 }
2882         }
2883 
2884         if (sbd_write_lu_info(sl) != SBD_SUCCESS) {
2885                 *err_ret = SBD_RET_META_CREATION_FAILED;
2886                 ret = EIO;
2887         }
2888 
2889 smm_err_out:
2890         if (modify_unregistered) {
2891                 (void) sbd_close_delete_lu(sl, 0);
2892         } else {
2893                 sl->sl_trans_op = SL_OP_NONE;
2894         }
2895         return (ret);
2896 }
2897 
2898 int
2899 sbd_set_global_props(sbd_global_props_t *mlu, int struct_sz,
2900     uint32_t *err_ret)
2901 {
2902         sbd_lu_t *sl = NULL;
2903         int ret = 0;
2904         sbd_it_data_t *it;
2905         uint32_t sz;
2906 
2907         sz = struct_sz - sizeof (*mlu) + 8 + 1;
2908 
2909         /* if there is data in the buf, null terminate it */
2910         if (struct_sz > sizeof (*mlu)) {
2911                 mlu->mlu_buf[struct_sz - sizeof (*mlu) + 8 - 1] = 0;
2912         }
2913 
2914         *err_ret = 0;
2915 
2916         /* Lets validate offsets */
2917         if (((mlu->mlu_mgmt_url_valid) &&
2918             (mlu->mlu_mgmt_url_off >= sz))) {
2919                 return (EINVAL);
2920         }
2921 
2922         if (mlu->mlu_mgmt_url_valid) {
2923                 uint16_t url_sz;
2924 
2925                 url_sz = strlen((char *)mlu->mlu_buf + mlu->mlu_mgmt_url_off);
2926                 if (url_sz > 0)
2927                         url_sz++;
2928 
2929                 rw_enter(&sbd_global_prop_lock, RW_WRITER);
2930                 if (sbd_mgmt_url_alloc_size > 0 &&
2931                     (url_sz == 0 || sbd_mgmt_url_alloc_size < url_sz)) {
2932                         kmem_free(sbd_mgmt_url, sbd_mgmt_url_alloc_size);
2933                         sbd_mgmt_url = NULL;
2934                         sbd_mgmt_url_alloc_size = 0;
2935                 }
2936                 if (url_sz > 0) {
2937                         if (sbd_mgmt_url_alloc_size == 0) {
2938                                 sbd_mgmt_url = kmem_alloc(url_sz, KM_SLEEP);
2939                                 sbd_mgmt_url_alloc_size = url_sz;
2940                         }
2941                         (void) strcpy(sbd_mgmt_url, (char *)mlu->mlu_buf +
2942                             mlu->mlu_mgmt_url_off);
2943                 }
2944                 /*
2945                  * check each lu to determine whether a UA is needed.
2946                  */
2947                 mutex_enter(&sbd_lock);
2948                 for (sl = sbd_lu_list; sl; sl = sl->sl_next) {
2949                         if (sl->sl_mgmt_url) {
2950                                 continue;
2951                         }
2952                         mutex_enter(&sl->sl_lock);
2953                         for (it = sl->sl_it_list; it != NULL;
2954                             it = it->sbd_it_next) {
2955                                 it->sbd_it_ua_conditions |=
2956                                     SBD_UA_MODE_PARAMETERS_CHANGED;
2957                         }
2958                         mutex_exit(&sl->sl_lock);
2959                 }
2960                 mutex_exit(&sbd_lock);
2961                 rw_exit(&sbd_global_prop_lock);
2962         }
2963         return (ret);
2964 }
2965 
2966 /* ARGSUSED */
2967 int
2968 sbd_delete_locked_lu(sbd_lu_t *sl, uint32_t *err_ret,
2969     stmf_state_change_info_t *ssi)
2970 {
2971         int i;
2972         stmf_status_t ret;
2973 
2974         if ((sl->sl_state == STMF_STATE_OFFLINE) &&
2975             !sl->sl_state_not_acked) {
2976                 goto sdl_do_dereg;
2977         }
2978 
2979         if ((sl->sl_state != STMF_STATE_ONLINE) ||
2980             sl->sl_state_not_acked) {
2981                 return (EBUSY);
2982         }
2983 
2984         ret = stmf_ctl(STMF_CMD_LU_OFFLINE, sl->sl_lu, ssi);
2985         if ((ret != STMF_SUCCESS) && (ret != STMF_ALREADY)) {
2986                 return (EBUSY);
2987         }
2988 
2989         for (i = 0; i < 500; i++) {
2990                 if ((sl->sl_state == STMF_STATE_OFFLINE) &&
2991                     !sl->sl_state_not_acked) {
2992                         goto sdl_do_dereg;
2993                 }
2994                 delay(drv_usectohz(10000));
2995         }
2996         return (EBUSY);
2997 
2998 sdl_do_dereg:;
2999         if (stmf_deregister_lu(sl->sl_lu) != STMF_SUCCESS)
3000                 return (EBUSY);
3001         atomic_dec_32(&sbd_lu_count);
3002 
3003         return (sbd_close_delete_lu(sl, 0));
3004 }
3005 
3006 int
3007 sbd_delete_lu(sbd_delete_lu_t *dlu, int struct_sz, uint32_t *err_ret)
3008 {
3009         sbd_lu_t *sl;
3010         sbd_status_t sret;
3011         stmf_state_change_info_t ssi;
3012         int ret;
3013 
3014         if (dlu->dlu_by_meta_name) {
3015                 ((char *)dlu)[struct_sz - 1] = 0;
3016                 sret = sbd_find_and_lock_lu(NULL, dlu->dlu_meta_name,
3017                     SL_OP_DELETE_LU, &sl);
3018         } else {
3019                 sret = sbd_find_and_lock_lu(dlu->dlu_guid, NULL,
3020                     SL_OP_DELETE_LU, &sl);
3021         }
3022         if (sret != SBD_SUCCESS) {
3023                 if (sret == SBD_BUSY) {
3024                         *err_ret = SBD_RET_LU_BUSY;
3025                         return (EBUSY);
3026                 } else if (sret == SBD_NOT_FOUND) {
3027                         *err_ret = SBD_RET_NOT_FOUND;
3028                         return (ENOENT);
3029                 }
3030                 return (EIO);
3031         }
3032 
3033         ssi.st_rflags = STMF_RFLAG_USER_REQUEST;
3034         ssi.st_additional_info = "sbd_delete_lu call (ioctl)";
3035         ret = sbd_delete_locked_lu(sl, err_ret, &ssi);
3036 
3037         if (ret) {
3038                 /* Once its locked, no need to grab mutex again */
3039                 sl->sl_trans_op = SL_OP_NONE;
3040         }
3041         return (ret);
3042 }
3043 
3044 sbd_status_t
3045 sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
3046     uint64_t offset, uint64_t size, uint8_t *buf)
3047 {
3048         int ret, ioflag = 0;
3049         long resid;
3050         hrtime_t xfer_start;
3051         uint8_t op = task->task_cdb[0];
3052 
3053         if ((offset + size) > sl->sl_lu_size) {
3054                 return (SBD_IO_PAST_EOF);
3055         }
3056 
3057         offset += sl->sl_data_offset;
3058 
3059         /*
3060          * Check to see if the command is READ(10), READ(12), or READ(16).
3061          * If it is then check for bit 3 being set to indicate if Forced
3062          * Unit Access is being requested. If so, the FSYNC flag will be set
3063          * on the read.
3064          */
3065         if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
3066             (op == SCMD_READ_G5)) && (task->task_cdb[1] & BIT_3)) {
3067                 ioflag = FSYNC;
3068         }
3069         if ((offset + size) > sl->sl_data_readable_size) {
3070                 uint64_t store_end;
3071                 if (offset > sl->sl_data_readable_size) {
3072                         bzero(buf, size);
3073                         return (SBD_SUCCESS);
3074                 }
3075                 store_end = sl->sl_data_readable_size - offset;
3076                 bzero(buf + store_end, size - store_end);
3077                 size = store_end;
3078         }
3079 
3080         xfer_start = gethrtime();
3081         DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
3082             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3083             scsi_task_t *, task);
3084 
3085         /*
3086          * Don't proceed if the device has been closed
3087          * This can occur on an access state change to standby or
3088          * a delete. The writer lock is acquired before closing the
3089          * lu.
3090          */
3091         rw_enter(&sl->sl_access_state_lock, RW_READER);
3092         if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3093                 rw_exit(&sl->sl_access_state_lock);
3094                 return (SBD_FAILURE);
3095         }
3096 
3097         ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3098             (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
3099             &resid);
3100         rw_exit(&sl->sl_access_state_lock);
3101 
3102         stmf_lu_xfer_done(task, B_TRUE /* read */,
3103             (gethrtime() - xfer_start));
3104         DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
3105             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3106             int, ret, scsi_task_t *, task);
3107 
3108 over_sl_data_read:
3109         if (ret || resid) {
3110                 stmf_trace(0, "UIO_READ failed, ret = %d, resid = %d", ret,
3111                     resid);
3112                 return (SBD_FAILURE);
3113         }
3114 
3115         return (SBD_SUCCESS);
3116 }
3117 
3118 sbd_status_t
3119 sbd_data_write(sbd_lu_t *sl, struct scsi_task *task,
3120     uint64_t offset, uint64_t size, uint8_t *buf)
3121 {
3122         int ret;
3123         long resid;
3124         sbd_status_t sret = SBD_SUCCESS;
3125         int ioflag;
3126         hrtime_t xfer_start;
3127         uint8_t op = task->task_cdb[0];
3128         boolean_t fua_bit = B_FALSE;
3129 
3130         if ((offset + size) > sl->sl_lu_size) {
3131                 return (SBD_IO_PAST_EOF);
3132         }
3133 
3134         offset += sl->sl_data_offset;
3135 
3136         /*
3137          * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
3138          * If it is then check for bit 3 being set to indicate if Forced
3139          * Unit Access is being requested. If so, the FSYNC flag will be set
3140          * on the write.
3141          */
3142         if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
3143             (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
3144                 fua_bit = B_TRUE;
3145         }
3146         if (((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3147             (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) || fua_bit) {
3148                 ioflag = FSYNC;
3149         } else {
3150                 ioflag = 0;
3151         }
3152 
3153         xfer_start = gethrtime();
3154         DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
3155             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3156             scsi_task_t *, task);
3157 
3158         /*
3159          * Don't proceed if the device has been closed
3160          * This can occur on an access state change to standby or
3161          * a delete. The writer lock is acquired before closing the
3162          * lu.
3163          */
3164         rw_enter(&sl->sl_access_state_lock, RW_READER);
3165         if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3166                 rw_exit(&sl->sl_access_state_lock);
3167                 return (SBD_FAILURE);
3168         }
3169         ret = vn_rdwr(UIO_WRITE, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3170             (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
3171             &resid);
3172         rw_exit(&sl->sl_access_state_lock);
3173 
3174         stmf_lu_xfer_done(task, B_FALSE /* write */,
3175             (gethrtime() - xfer_start));
3176         DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
3177             uint8_t *, buf, uint64_t, size, uint64_t, offset,
3178             int, ret, scsi_task_t *, task);
3179 
3180         if ((ret == 0) && (resid == 0) &&
3181             (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3182             (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
3183                 sret = sbd_flush_data_cache(sl, 1);
3184         }
3185 over_sl_data_write:
3186         if ((ret || resid) || (sret != SBD_SUCCESS)) {
3187                 return (SBD_FAILURE);
3188         } else if ((offset + size) > sl->sl_data_readable_size) {
3189                 uint64_t old_size, new_size;
3190 
3191                 do {
3192                         old_size = sl->sl_data_readable_size;
3193                         if ((offset + size) <= old_size)
3194                                 break;
3195                         new_size = offset + size;
3196                 } while (atomic_cas_64(&sl->sl_data_readable_size, old_size,
3197                     new_size) != old_size);
3198         }
3199 
3200         return (SBD_SUCCESS);
3201 }
3202 
3203 int
3204 sbd_get_global_props(sbd_global_props_t *oslp, uint32_t oslp_sz,
3205     uint32_t *err_ret)
3206 {
3207         uint32_t sz = 0;
3208         uint16_t off;
3209 
3210         rw_enter(&sbd_global_prop_lock, RW_READER);
3211         if (sbd_mgmt_url) {
3212                 sz += strlen(sbd_mgmt_url) + 1;
3213         }
3214         bzero(oslp, sizeof (*oslp) - 8);
3215         oslp->mlu_buf_size_needed = sz;
3216 
3217         if (sz > (oslp_sz - sizeof (*oslp) + 8)) {
3218                 *err_ret = SBD_RET_INSUFFICIENT_BUF_SPACE;
3219                 rw_exit(&sbd_global_prop_lock);
3220                 return (ENOMEM);
3221         }
3222 
3223         off = 0;
3224         if (sbd_mgmt_url) {
3225                 oslp->mlu_mgmt_url_valid = 1;
3226                 oslp->mlu_mgmt_url_off = off;
3227                 (void) strcpy((char *)&oslp->mlu_buf[off], sbd_mgmt_url);
3228                 off += strlen(sbd_mgmt_url) + 1;
3229         }
3230 
3231         rw_exit(&sbd_global_prop_lock);
3232         return (0);
3233 }
3234 
3235 static int
3236 sbd_get_unmap_props(sbd_unmap_props_t *sup,
3237     sbd_unmap_props_t *osup, uint32_t *err_ret)
3238 {
3239         sbd_status_t sret;
3240         sbd_lu_t *sl = NULL;
3241 
3242         if (sup->sup_guid_valid) {
3243                 sret = sbd_find_and_lock_lu(sup->sup_guid,
3244                     NULL, SL_OP_LU_PROPS, &sl);
3245         } else {
3246                 sret = sbd_find_and_lock_lu(NULL,
3247                     (uint8_t *)sup->sup_zvol_path, SL_OP_LU_PROPS,
3248                     &sl);
3249         }
3250         if (sret != SBD_SUCCESS) {
3251                 if (sret == SBD_BUSY) {
3252                         *err_ret = SBD_RET_LU_BUSY;
3253                         return (EBUSY);
3254                 } else if (sret == SBD_NOT_FOUND) {
3255                         *err_ret = SBD_RET_NOT_FOUND;
3256                         return (ENOENT);
3257                 }
3258                 return (EIO);
3259         }
3260 
3261         sup->sup_found_lu = 1;
3262         sup->sup_guid_valid = 1;
3263         bcopy(sl->sl_device_id + 4, sup->sup_guid, 16);
3264         if (sl->sl_flags & SL_UNMAP_ENABLED)
3265                 sup->sup_unmap_enabled = 1;
3266         else
3267                 sup->sup_unmap_enabled = 0;
3268 
3269         *osup = *sup;
3270         sl->sl_trans_op = SL_OP_NONE;
3271 
3272         return (0);
3273 }
3274 
3275 int
3276 sbd_get_lu_props(sbd_lu_props_t *islp, uint32_t islp_sz,
3277     sbd_lu_props_t *oslp, uint32_t oslp_sz, uint32_t *err_ret)
3278 {
3279         sbd_status_t sret;
3280         sbd_lu_t *sl = NULL;
3281         uint32_t sz;
3282         uint16_t off;
3283 
3284         if (islp->slp_input_guid) {
3285                 sret = sbd_find_and_lock_lu(islp->slp_guid, NULL,
3286                     SL_OP_LU_PROPS, &sl);
3287         } else {
3288                 ((char *)islp)[islp_sz - 1] = 0;
3289                 sret = sbd_find_and_lock_lu(NULL, islp->slp_buf,
3290                     SL_OP_LU_PROPS, &sl);
3291         }
3292         if (sret != SBD_SUCCESS) {
3293                 if (sret == SBD_BUSY) {
3294                         *err_ret = SBD_RET_LU_BUSY;
3295                         return (EBUSY);
3296                 } else if (sret == SBD_NOT_FOUND) {
3297                         *err_ret = SBD_RET_NOT_FOUND;
3298                         return (ENOENT);
3299                 }
3300                 return (EIO);
3301         }
3302 
3303         sz = strlen(sl->sl_name) + 1;
3304         if ((sl->sl_flags & (SL_ZFS_META | SL_SHARED_META)) == 0) {
3305                 if (sl->sl_data_filename) {
3306                         sz += strlen(sl->sl_data_filename) + 1;
3307                 }
3308         }
3309         sz += sl->sl_serial_no_size;
3310         if (sl->sl_alias) {
3311                 sz += strlen(sl->sl_alias) + 1;
3312         }
3313 
3314         rw_enter(&sbd_global_prop_lock, RW_READER);
3315         if (sl->sl_mgmt_url) {
3316                 sz += strlen(sl->sl_mgmt_url) + 1;
3317         } else if (sbd_mgmt_url) {
3318                 sz += strlen(sbd_mgmt_url) + 1;
3319         }
3320         bzero(oslp, sizeof (*oslp) - 8);
3321         oslp->slp_buf_size_needed = sz;
3322 
3323         if (sz > (oslp_sz - sizeof (*oslp) + 8)) {
3324                 sl->sl_trans_op = SL_OP_NONE;
3325                 *err_ret = SBD_RET_INSUFFICIENT_BUF_SPACE;
3326                 rw_exit(&sbd_global_prop_lock);
3327                 return (ENOMEM);
3328         }
3329 
3330         off = 0;
3331         (void) strcpy((char *)oslp->slp_buf, sl->sl_name);
3332         oslp->slp_meta_fname_off = off;
3333         off += strlen(sl->sl_name) + 1;
3334         if ((sl->sl_flags & (SL_ZFS_META | SL_SHARED_META)) == 0) {
3335                 oslp->slp_meta_fname_valid = 1;
3336                 oslp->slp_separate_meta = 1;
3337                 if (sl->sl_data_filename) {
3338                         oslp->slp_data_fname_valid = 1;
3339                         oslp->slp_data_fname_off = off;
3340                         (void) strcpy((char *)&oslp->slp_buf[off],
3341                             sl->sl_data_filename);
3342                         off += strlen(sl->sl_data_filename) + 1;
3343                 }
3344         } else {
3345                 oslp->slp_data_fname_valid = 1;
3346                 oslp->slp_data_fname_off = oslp->slp_meta_fname_off;
3347                 if (sl->sl_flags & SL_ZFS_META) {
3348                         oslp->slp_zfs_meta = 1;
3349                 }
3350         }
3351         if (sl->sl_alias) {
3352                 oslp->slp_alias_valid = 1;
3353                 oslp->slp_alias_off = off;
3354                 (void) strcpy((char *)&oslp->slp_buf[off], sl->sl_alias);
3355                 off += strlen(sl->sl_alias) + 1;
3356         }
3357         if (sl->sl_mgmt_url) {
3358                 oslp->slp_mgmt_url_valid = 1;
3359                 oslp->slp_mgmt_url_off = off;
3360                 (void) strcpy((char *)&oslp->slp_buf[off], sl->sl_mgmt_url);
3361                 off += strlen(sl->sl_mgmt_url) + 1;
3362         } else if (sbd_mgmt_url) {
3363                 oslp->slp_mgmt_url_valid = 1;
3364                 oslp->slp_mgmt_url_off = off;
3365                 (void) strcpy((char *)&oslp->slp_buf[off], sbd_mgmt_url);
3366                 off += strlen(sbd_mgmt_url) + 1;
3367         }
3368         if (sl->sl_serial_no_size) {
3369                 oslp->slp_serial_off = off;
3370                 bcopy(sl->sl_serial_no, &oslp->slp_buf[off],
3371                     sl->sl_serial_no_size);
3372                 oslp->slp_serial_size = sl->sl_serial_no_size;
3373                 oslp->slp_serial_valid = 1;
3374                 off += sl->sl_serial_no_size;
3375         }
3376 
3377         oslp->slp_lu_size = sl->sl_lu_size;
3378         oslp->slp_blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
3379 
3380         oslp->slp_access_state = sl->sl_access_state;
3381 
3382         if (sl->sl_flags & SL_VID_VALID) {
3383                 oslp->slp_lu_vid = 1;
3384                 bcopy(sl->sl_vendor_id, oslp->slp_vid, 8);
3385         } else {
3386                 bcopy(sbd_vendor_id, oslp->slp_vid, 8);
3387         }
3388         if (sl->sl_flags & SL_PID_VALID) {
3389                 oslp->slp_lu_pid = 1;
3390                 bcopy(sl->sl_product_id, oslp->slp_pid, 16);
3391         } else {
3392                 bcopy(sbd_product_id, oslp->slp_pid, 16);
3393         }
3394         if (sl->sl_flags & SL_REV_VALID) {
3395                 oslp->slp_lu_rev = 1;
3396                 bcopy(sl->sl_revision, oslp->slp_rev, 4);
3397         } else {
3398                 bcopy(sbd_revision, oslp->slp_rev, 4);
3399         }
3400         bcopy(sl->sl_device_id + 4, oslp->slp_guid, 16);
3401 
3402         if (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE)
3403                 oslp->slp_writeback_cache_disable_cur = 1;
3404         if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE)
3405                 oslp->slp_writeback_cache_disable_saved = 1;
3406         if (sl->sl_flags & SL_WRITE_PROTECTED)
3407                 oslp->slp_write_protected = 1;
3408 
3409         sl->sl_trans_op = SL_OP_NONE;
3410 
3411         rw_exit(&sbd_global_prop_lock);
3412         return (0);
3413 }
3414 
3415 /*
3416  * Returns an allocated string with the "<pool>/..." form of the zvol name.
3417  */
3418 static char *
3419 sbd_get_zvol_name(sbd_lu_t *sl)
3420 {
3421         char *src;
3422         char *p;
3423 
3424         if (sl->sl_data_filename)
3425                 src = sl->sl_data_filename;
3426         else
3427                 src = sl->sl_meta_filename;
3428         /* There has to be a better way */
3429         if (SBD_IS_ZVOL(src) != 0) {
3430                 ASSERT(0);
3431         }
3432         src += 14;      /* Past /dev/zvol/dsk/ */
3433         if (*src == '/')
3434                 src++;  /* or /dev/zvol/rdsk/ */
3435         p = (char *)kmem_alloc(strlen(src) + 1, KM_SLEEP);
3436         (void) strcpy(p, src);
3437         return (p);
3438 }
3439 
3440 /*
3441  * this function creates a local metadata zvol property
3442  */
3443 sbd_status_t
3444 sbd_create_zfs_meta_object(sbd_lu_t *sl)
3445 {
3446         /*
3447          * -allocate 1/2 the property size, the zfs property
3448          *  is 8k in size and stored as ascii hex string, all
3449          *  we needed is 4k buffer to store the binary data.
3450          * -initialize reader/write lock
3451          */
3452         if ((sl->sl_zfs_meta = kmem_zalloc(ZAP_MAXVALUELEN / 2, KM_SLEEP))
3453             == NULL)
3454                 return (SBD_FAILURE);
3455         rw_init(&sl->sl_zfs_meta_lock, NULL, RW_DRIVER, NULL);
3456         return (SBD_SUCCESS);
3457 }
3458 
3459 char
3460 sbd_ctoi(char c)
3461 {
3462         if ((c >= '0') && (c <= '9'))
3463                 c -= '0';
3464         else if ((c >= 'A') && (c <= 'F'))
3465                 c = c - 'A' + 10;
3466         else if ((c >= 'a') && (c <= 'f'))
3467                 c = c - 'a' + 10;
3468         else
3469                 c = -1;
3470         return (c);
3471 }
3472 
3473 /*
3474  * read zvol property and convert to binary
3475  */
3476 sbd_status_t
3477 sbd_open_zfs_meta(sbd_lu_t *sl)
3478 {
3479         char            *meta = NULL, cl, ch;
3480         int             i;
3481         char            *tmp, *ptr;
3482         uint64_t        rc = SBD_SUCCESS;
3483         int             len;
3484         char            *file;
3485 
3486         if (sl->sl_zfs_meta == NULL) {
3487                 if (sbd_create_zfs_meta_object(sl) == SBD_FAILURE)
3488                         return (SBD_FAILURE);
3489         } else {
3490                 bzero(sl->sl_zfs_meta, (ZAP_MAXVALUELEN / 2));
3491         }
3492 
3493         rw_enter(&sl->sl_zfs_meta_lock, RW_WRITER);
3494         file = sbd_get_zvol_name(sl);
3495         if (sbd_zvolget(file, &meta)) {
3496                 rc = SBD_FAILURE;
3497                 goto done;
3498         }
3499         tmp = meta;
3500         /* convert ascii hex to binary meta */
3501         len = strlen(meta);
3502         ptr = sl->sl_zfs_meta;
3503         for (i = 0; i < len; i += 2) {
3504                 ch = sbd_ctoi(*tmp++);
3505                 cl = sbd_ctoi(*tmp++);
3506                 if (ch == -1 || cl == -1) {
3507                         rc = SBD_FAILURE;
3508                         break;
3509                 }
3510                 *ptr++ = (ch << 4) + cl;
3511         }
3512 done:
3513         rw_exit(&sl->sl_zfs_meta_lock);
3514         if (meta)
3515                 kmem_free(meta, len + 1);
3516         kmem_free(file, strlen(file) + 1);
3517         return (rc);
3518 }
3519 
3520 sbd_status_t
3521 sbd_read_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz, uint64_t off)
3522 {
3523         ASSERT(sl->sl_zfs_meta);
3524         rw_enter(&sl->sl_zfs_meta_lock, RW_READER);
3525         bcopy(&sl->sl_zfs_meta[off], buf, sz);
3526         rw_exit(&sl->sl_zfs_meta_lock);
3527         return (SBD_SUCCESS);
3528 }
3529 
3530 sbd_status_t
3531 sbd_write_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz, uint64_t off)
3532 {
3533         ASSERT(sl->sl_zfs_meta);
3534         if ((off + sz) > (ZAP_MAXVALUELEN / 2 - 1)) {
3535                 return (SBD_META_CORRUPTED);
3536         }
3537         if ((off + sz) > sl->sl_meta_size_used) {
3538                 sl->sl_meta_size_used = off + sz;
3539                 if (sl->sl_total_meta_size < sl->sl_meta_size_used) {
3540                         uint64_t meta_align =
3541                             (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
3542                         sl->sl_total_meta_size = (sl->sl_meta_size_used +
3543                             meta_align) & (~meta_align);
3544                 }
3545         }
3546         rw_enter(&sl->sl_zfs_meta_lock, RW_WRITER);
3547         bcopy(buf, &sl->sl_zfs_meta[off], sz);
3548         rw_exit(&sl->sl_zfs_meta_lock);
3549         /*
3550          * During creation of a logical unit, sbd_update_zfs_prop will be
3551          * called separately to avoid multiple calls as each meta section
3552          * create/update will result in a call to sbd_write_zfs_meta().
3553          * We only need to update the zvol once during create.
3554          */
3555         mutex_enter(&sl->sl_lock);
3556         if (sl->sl_trans_op != SL_OP_CREATE_REGISTER_LU) {
3557                 mutex_exit(&sl->sl_lock);
3558                 return (sbd_update_zfs_prop(sl));
3559         }
3560         mutex_exit(&sl->sl_lock);
3561         return (SBD_SUCCESS);
3562 }
3563 
3564 sbd_status_t
3565 sbd_update_zfs_prop(sbd_lu_t *sl)
3566 {
3567         char    *ptr, *ah_meta;
3568         char    *dp = NULL;
3569         int     i, num;
3570         char    *file;
3571         sbd_status_t ret = SBD_SUCCESS;
3572 
3573         ASSERT(sl->sl_zfs_meta);
3574         ptr = ah_meta = kmem_zalloc(ZAP_MAXVALUELEN, KM_SLEEP);
3575         rw_enter(&sl->sl_zfs_meta_lock, RW_READER);
3576         /* convert local copy to ascii hex */
3577         dp = sl->sl_zfs_meta;
3578         for (i = 0; i < sl->sl_total_meta_size; i++, dp++) {
3579                 num = ((*dp) >> 4) & 0xF;
3580                 *ah_meta++ = (num < 10) ? (num + '0') : (num + ('a' - 10));
3581                 num = (*dp) & 0xF;
3582                 *ah_meta++ = (num < 10) ? (num + '0') : (num + ('a' - 10));
3583         }
3584         *ah_meta = NULL;
3585         file = sbd_get_zvol_name(sl);
3586         if (sbd_zvolset(file, (char *)ptr)) {
3587                 ret = SBD_META_CORRUPTED;
3588         }
3589         rw_exit(&sl->sl_zfs_meta_lock);
3590         kmem_free(ptr, ZAP_MAXVALUELEN);
3591         kmem_free(file, strlen(file) + 1);
3592         return (ret);
3593 }
3594 
3595 int
3596 sbd_is_zvol(char *path)
3597 {
3598         int is_zfs = 0;
3599 
3600         if (SBD_IS_ZVOL(path) == 0)
3601                 is_zfs = 1;
3602 
3603         return (is_zfs);
3604 }
3605 
3606 /*
3607  * set write cache disable
3608  * wcd - 1 = disable, 0 = enable
3609  */
3610 sbd_status_t
3611 sbd_wcd_set(int wcd, sbd_lu_t *sl)
3612 {
3613         /* translate to wce bit */
3614         int wce = wcd ? 0 : 1;
3615         int ret;
3616         sbd_status_t sret = SBD_SUCCESS;
3617 
3618         mutex_enter(&sl->sl_lock);
3619         sl->sl_flags &= ~SL_WRITEBACK_CACHE_SET_UNSUPPORTED;
3620 
3621         if (sl->sl_data_vp->v_type == VREG) {
3622                 sl->sl_flags |= SL_FLUSH_ON_DISABLED_WRITECACHE;
3623                 goto done;
3624         }
3625 
3626         ret = VOP_IOCTL(sl->sl_data_vp, DKIOCSETWCE, (intptr_t)&wce, FKIOCTL,
3627             kcred, NULL, NULL);
3628         if (ret == 0) {
3629                 sl->sl_flags &= ~SL_WRITEBACK_CACHE_SET_UNSUPPORTED;
3630                 sl->sl_flags &= ~SL_FLUSH_ON_DISABLED_WRITECACHE;
3631         } else {
3632                 sl->sl_flags |= SL_WRITEBACK_CACHE_SET_UNSUPPORTED;
3633                 sl->sl_flags |= SL_FLUSH_ON_DISABLED_WRITECACHE;
3634                 sret = SBD_FAILURE;
3635                 goto done;
3636         }
3637 
3638 done:
3639         mutex_exit(&sl->sl_lock);
3640         return (sret);
3641 }
3642 
3643 /*
3644  * get write cache disable
3645  * wcd - 1 = disable, 0 = enable
3646  */
3647 void
3648 sbd_wcd_get(int *wcd, sbd_lu_t *sl)
3649 {
3650         int wce;
3651         int ret;
3652 
3653         if (sl->sl_data_vp->v_type == VREG) {
3654                 *wcd = 0;
3655                 return;
3656         }
3657 
3658         ret = VOP_IOCTL(sl->sl_data_vp, DKIOCGETWCE, (intptr_t)&wce, FKIOCTL,
3659             kcred, NULL, NULL);
3660         /* if write cache get failed, assume disabled */
3661         if (ret) {
3662                 *wcd = 1;
3663         } else {
3664                 /* translate to wcd bit */
3665                 *wcd = wce ? 0 : 1;
3666         }
3667 }
3668 
3669 int
3670 sbd_zvolget(char *zvol_name, char **comstarprop)
3671 {
3672         ldi_handle_t    zfs_lh;
3673         nvlist_t        *nv = NULL, *nv2;
3674         zfs_cmd_t       *zc;
3675         char            *ptr;
3676         int size = 1024;
3677         int unused;
3678         int rc;
3679 
3680         if ((rc = ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
3681             &zfs_lh, sbd_zfs_ident)) != 0) {
3682                 cmn_err(CE_WARN, "ldi_open %d", rc);
3683                 return (ENXIO);
3684         }
3685 
3686         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3687         (void) strlcpy(zc->zc_name, zvol_name, sizeof (zc->zc_name));
3688 again:
3689         zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(size,
3690             KM_SLEEP);
3691         zc->zc_nvlist_dst_size = size;
3692         rc = ldi_ioctl(zfs_lh, ZFS_IOC_OBJSET_STATS, (intptr_t)zc,
3693             FKIOCTL, kcred, &unused);
3694         /*
3695          * ENOMEM means the list is larger than what we've allocated
3696          * ldi_ioctl will fail with ENOMEM only once
3697          */
3698         if (rc == ENOMEM) {
3699                 int newsize;
3700                 newsize = zc->zc_nvlist_dst_size;
3701                 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3702                 size = newsize;
3703                 goto again;
3704         } else if (rc != 0) {
3705                 goto out;
3706         }
3707         rc = nvlist_unpack((char *)(uintptr_t)zc->zc_nvlist_dst,
3708             zc->zc_nvlist_dst_size, &nv, 0);
3709         ASSERT(rc == 0);        /* nvlist_unpack should not fail */
3710         if ((rc = nvlist_lookup_nvlist(nv, "stmf_sbd_lu", &nv2)) == 0) {
3711                 rc = nvlist_lookup_string(nv2, ZPROP_VALUE, &ptr);
3712                 if (rc != 0) {
3713                         cmn_err(CE_WARN, "couldn't get value");
3714                 } else {
3715                         *comstarprop = kmem_alloc(strlen(ptr) + 1,
3716                             KM_SLEEP);
3717                         (void) strcpy(*comstarprop, ptr);
3718                 }
3719         }
3720 out:
3721         if (nv != NULL)
3722                 nvlist_free(nv);
3723         kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3724         kmem_free(zc, sizeof (zfs_cmd_t));
3725         (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3726 
3727         return (rc);
3728 }
3729 
3730 int
3731 sbd_zvolset(char *zvol_name, char *comstarprop)
3732 {
3733         ldi_handle_t    zfs_lh;
3734         nvlist_t        *nv;
3735         char            *packed = NULL;
3736         size_t          len;
3737         zfs_cmd_t       *zc;
3738         int unused;
3739         int rc;
3740 
3741         if ((rc = ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
3742             &zfs_lh, sbd_zfs_ident)) != 0) {
3743                 cmn_err(CE_WARN, "ldi_open %d", rc);
3744                 return (ENXIO);
3745         }
3746         (void) nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP);
3747         (void) nvlist_add_string(nv, "stmf_sbd_lu", comstarprop);
3748         if ((rc = nvlist_pack(nv, &packed, &len, NV_ENCODE_NATIVE, KM_SLEEP))) {
3749                 goto out;
3750         }
3751 
3752         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3753         (void) strlcpy(zc->zc_name, zvol_name, sizeof (zc->zc_name));
3754         zc->zc_nvlist_src = (uint64_t)(intptr_t)packed;
3755         zc->zc_nvlist_src_size = len;
3756         rc = ldi_ioctl(zfs_lh, ZFS_IOC_SET_PROP, (intptr_t)zc,
3757             FKIOCTL, kcred, &unused);
3758         if (rc != 0) {
3759                 cmn_err(CE_NOTE, "ioctl failed %d", rc);
3760         }
3761         kmem_free(zc, sizeof (zfs_cmd_t));
3762         if (packed)
3763                 kmem_free(packed, len);
3764 out:
3765         nvlist_free(nv);
3766         (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3767         return (rc);
3768 }
3769 
3770 /*
3771  * Unmap a region in a volume.  Currently only supported for zvols.
3772  * The list of extents to be freed is passed in a dkioc_free_list_t
3773  * which the caller is responsible for destroying.
3774  */
3775 int
3776 sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl)
3777 {
3778         vnode_t *vp;
3779         int unused, ret;
3780 
3781         /* Nothing to do */
3782         if (dfl->dfl_num_exts == 0)
3783                 return (0);
3784 
3785         /*
3786          * TODO: unmap performance may be improved by not doing the synchronous
3787          * removal of the blocks and writing of the metadata.  The
3788          * transaction is in the zil so the state should be stable.
3789          */
3790         dfl->dfl_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?
3791             DF_WAIT_SYNC : 0;
3792 
3793         /* Use the data vnode we have to send a fop_ioctl(). */
3794         vp = sl->sl_data_vp;
3795         if (vp == NULL) {
3796                 cmn_err(CE_WARN, "Cannot unmap - no vnode pointer.");
3797                 return (EIO);
3798         }
3799 
3800         ret = VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred,
3801             &unused, NULL);
3802 
3803         return (ret);
3804 }
3805 
3806 /*
3807  * Check if this lu belongs to sbd or some other lu
3808  * provider. A simple check for one of the module
3809  * entry points is sufficient.
3810  */
3811 int
3812 sbd_is_valid_lu(stmf_lu_t *lu)
3813 {
3814         if (lu->lu_new_task == sbd_new_task)
3815                 return (1);
3816         return (0);
3817 }
3818 
3819 uint8_t
3820 sbd_get_lbasize_shift(stmf_lu_t *lu)
3821 {
3822         sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3823 
3824         return (sl->sl_data_blocksize_shift);
3825 }