1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 /*
  25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright (c) 2016 by Delphix. All rights reserved.
  27  */
  28 
  29 /*
  30  * Multiplexed I/O SCSI vHCI implementation
  31  */
  32 
  33 #include <sys/conf.h>
  34 #include <sys/file.h>
  35 #include <sys/ddi.h>
  36 #include <sys/sunddi.h>
  37 #include <sys/scsi/scsi.h>
  38 #include <sys/scsi/impl/scsi_reset_notify.h>
  39 #include <sys/scsi/impl/services.h>
  40 #include <sys/sunmdi.h>
  41 #include <sys/mdi_impldefs.h>
  42 #include <sys/scsi/adapters/scsi_vhci.h>
  43 #include <sys/disp.h>
  44 #include <sys/byteorder.h>
  45 
  46 extern uintptr_t scsi_callback_id;
  47 extern ddi_dma_attr_t scsi_alloc_attr;
  48 
  49 #ifdef  DEBUG
  50 int     vhci_debug = VHCI_DEBUG_DEFAULT_VAL;
  51 #endif
  52 
  53 /* retry for the vhci_do_prout command when a not ready is returned */
  54 int vhci_prout_not_ready_retry = 180;
  55 
  56 /*
  57  * These values are defined to support the internal retry of
  58  * SCSI packets for better sense code handling.
  59  */
  60 #define VHCI_CMD_CMPLT  0
  61 #define VHCI_CMD_RETRY  1
  62 #define VHCI_CMD_ERROR  -1
  63 
  64 #define PROPFLAGS (DDI_PROP_DONTPASS | DDI_PROP_NOTPROM)
  65 #define VHCI_SCSI_PERR          0x47
  66 #define VHCI_PGR_ILLEGALOP      -2
  67 #define VHCI_NUM_UPDATE_TASKQ   8
  68 /* changed to 132 to accomodate HDS */
  69 
  70 /*
  71  * Version Macros
  72  */
  73 #define VHCI_NAME_VERSION       "SCSI VHCI Driver"
  74 char            vhci_version_name[] = VHCI_NAME_VERSION;
  75 
  76 int             vhci_first_time = 0;
  77 clock_t         vhci_to_ticks = 0;
  78 int             vhci_init_wait_timeout = VHCI_INIT_WAIT_TIMEOUT;
  79 kcondvar_t      vhci_cv;
  80 kmutex_t        vhci_global_mutex;
  81 void            *vhci_softstate = NULL; /* for soft state */
  82 
  83 /*
  84  * Flag to delay the retry of the reserve command
  85  */
  86 int             vhci_reserve_delay = 100000;
  87 static int      vhci_path_quiesce_timeout = 60;
  88 static uchar_t  zero_key[MHIOC_RESV_KEY_SIZE];
  89 
  90 /* uscsi delay for a TRAN_BUSY */
  91 static int vhci_uscsi_delay = 100000;
  92 static int vhci_uscsi_retry_count = 180;
  93 /* uscsi_restart_sense timeout id in case it needs to get canceled */
  94 static timeout_id_t vhci_restart_timeid = 0;
  95 
  96 static int      vhci_bus_config_debug = 0;
  97 
  98 /*
  99  * Bidirectional map of 'target-port' to port id <pid> for support of
 100  * iostat(1M) '-Xx' and '-Yx' output.
 101  */
 102 static kmutex_t         vhci_targetmap_mutex;
 103 static uint_t           vhci_targetmap_pid = 1;
 104 static mod_hash_t       *vhci_targetmap_bypid;  /* <pid> -> 'target-port' */
 105 static mod_hash_t       *vhci_targetmap_byport; /* 'target-port' -> <pid> */
 106 
 107 /*
 108  * functions exported by scsi_vhci struct cb_ops
 109  */
 110 static int vhci_open(dev_t *, int, int, cred_t *);
 111 static int vhci_close(dev_t, int, int, cred_t *);
 112 static int vhci_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 113 
 114 /*
 115  * functions exported by scsi_vhci struct dev_ops
 116  */
 117 static int vhci_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
 118 static int vhci_attach(dev_info_t *, ddi_attach_cmd_t);
 119 static int vhci_detach(dev_info_t *, ddi_detach_cmd_t);
 120 
 121 /*
 122  * functions exported by scsi_vhci scsi_hba_tran_t transport table
 123  */
 124 static int vhci_scsi_tgt_init(dev_info_t *, dev_info_t *,
 125     scsi_hba_tran_t *, struct scsi_device *);
 126 static void vhci_scsi_tgt_free(dev_info_t *, dev_info_t *, scsi_hba_tran_t *,
 127     struct scsi_device *);
 128 static int vhci_pgr_register_start(scsi_vhci_lun_t *, struct scsi_pkt *);
 129 static int vhci_scsi_start(struct scsi_address *, struct scsi_pkt *);
 130 static int vhci_scsi_abort(struct scsi_address *, struct scsi_pkt *);
 131 static int vhci_scsi_reset(struct scsi_address *, int);
 132 static int vhci_scsi_reset_target(struct scsi_address *, int level,
 133     uint8_t select_path);
 134 static int vhci_scsi_reset_bus(struct scsi_address *);
 135 static int vhci_scsi_getcap(struct scsi_address *, char *, int);
 136 static int vhci_scsi_setcap(struct scsi_address *, char *, int, int);
 137 static int vhci_commoncap(struct scsi_address *, char *, int, int, int);
 138 static int vhci_pHCI_cap(struct scsi_address *ap, char *cap, int val, int whom,
 139     mdi_pathinfo_t *pip);
 140 static struct scsi_pkt *vhci_scsi_init_pkt(struct scsi_address *,
 141     struct scsi_pkt *, struct buf *, int, int, int, int, int (*)(), caddr_t);
 142 static void vhci_scsi_destroy_pkt(struct scsi_address *, struct scsi_pkt *);
 143 static void vhci_scsi_dmafree(struct scsi_address *, struct scsi_pkt *);
 144 static void vhci_scsi_sync_pkt(struct scsi_address *, struct scsi_pkt *);
 145 static int vhci_scsi_reset_notify(struct scsi_address *, int, void (*)(caddr_t),
 146     caddr_t);
 147 static int vhci_scsi_get_bus_addr(struct scsi_device *, char *, int);
 148 static int vhci_scsi_get_name(struct scsi_device *, char *, int);
 149 static int vhci_scsi_bus_power(dev_info_t *, void *, pm_bus_power_op_t,
 150     void *, void *);
 151 static int vhci_scsi_bus_config(dev_info_t *, uint_t, ddi_bus_config_op_t,
 152     void *, dev_info_t **);
 153 static int vhci_scsi_bus_unconfig(dev_info_t *, uint_t, ddi_bus_config_op_t,
 154     void *);
 155 static struct scsi_failover_ops *vhci_dev_fo(dev_info_t *, struct scsi_device *,
 156     void **, char **);
 157 
 158 /*
 159  * functions registered with the mpxio framework via mdi_vhci_ops_t
 160  */
 161 static int vhci_pathinfo_init(dev_info_t *, mdi_pathinfo_t *, int);
 162 static int vhci_pathinfo_uninit(dev_info_t *, mdi_pathinfo_t *, int);
 163 static int vhci_pathinfo_state_change(dev_info_t *, mdi_pathinfo_t *,
 164                 mdi_pathinfo_state_t, uint32_t, int);
 165 static int vhci_pathinfo_online(dev_info_t *, mdi_pathinfo_t *, int);
 166 static int vhci_pathinfo_offline(dev_info_t *, mdi_pathinfo_t *, int);
 167 static int vhci_failover(dev_info_t *, dev_info_t *, int);
 168 static void vhci_client_attached(dev_info_t *);
 169 static int vhci_is_dev_supported(dev_info_t *, dev_info_t *, void *);
 170 
 171 static int vhci_ctl(dev_t, int, intptr_t, int, cred_t *, int *);
 172 static int vhci_devctl(dev_t, int, intptr_t, int, cred_t *, int *);
 173 static int vhci_ioc_get_phci_path(sv_iocdata_t *, caddr_t, int, caddr_t);
 174 static int vhci_ioc_get_client_path(sv_iocdata_t *, caddr_t, int, caddr_t);
 175 static int vhci_ioc_get_paddr(sv_iocdata_t *, caddr_t, int, caddr_t);
 176 static int vhci_ioc_send_client_path(caddr_t, sv_iocdata_t *, int, caddr_t);
 177 static void vhci_ioc_devi_to_path(dev_info_t *, caddr_t);
 178 static int vhci_get_phci_path_list(dev_info_t *, sv_path_info_t *, uint_t);
 179 static int vhci_get_client_path_list(dev_info_t *, sv_path_info_t *, uint_t);
 180 static int vhci_get_iocdata(const void *, sv_iocdata_t *, int, caddr_t);
 181 static int vhci_get_iocswitchdata(const void *, sv_switch_to_cntlr_iocdata_t *,
 182     int, caddr_t);
 183 static int vhci_ioc_alloc_pathinfo(sv_path_info_t **, sv_path_info_t **,
 184     uint_t, sv_iocdata_t *, int, caddr_t);
 185 static void vhci_ioc_free_pathinfo(sv_path_info_t *, sv_path_info_t *, uint_t);
 186 static int vhci_ioc_send_pathinfo(sv_path_info_t *, sv_path_info_t *, uint_t,
 187     sv_iocdata_t *, int, caddr_t);
 188 static int vhci_handle_ext_fo(struct scsi_pkt *, int);
 189 static int vhci_efo_watch_cb(caddr_t, struct scsi_watch_result *);
 190 static int vhci_quiesce_lun(struct scsi_vhci_lun *);
 191 static int vhci_pgr_validate_and_register(scsi_vhci_priv_t *);
 192 static void vhci_dispatch_scsi_start(void *);
 193 static void vhci_efo_done(void *);
 194 static void vhci_initiate_auto_failback(void *);
 195 static void vhci_update_pHCI_pkt(struct vhci_pkt *, struct scsi_pkt *);
 196 static int vhci_update_pathinfo(struct scsi_device *, mdi_pathinfo_t *,
 197     struct scsi_failover_ops *, scsi_vhci_lun_t *, struct scsi_vhci *);
 198 static void vhci_kstat_create_pathinfo(mdi_pathinfo_t *);
 199 static int vhci_quiesce_paths(dev_info_t *, dev_info_t *,
 200     scsi_vhci_lun_t *, char *, char *);
 201 
 202 static char *vhci_devnm_to_guid(char *);
 203 static int vhci_bind_transport(struct scsi_address *, struct vhci_pkt *,
 204     int, int (*func)(caddr_t));
 205 static void vhci_intr(struct scsi_pkt *);
 206 static int vhci_do_prout(scsi_vhci_priv_t *);
 207 static void vhci_run_cmd(void *);
 208 static int vhci_do_prin(struct vhci_pkt **);
 209 static struct scsi_pkt *vhci_create_retry_pkt(struct vhci_pkt *);
 210 static struct vhci_pkt *vhci_sync_retry_pkt(struct vhci_pkt *);
 211 static struct scsi_vhci_lun *vhci_lun_lookup(dev_info_t *);
 212 static struct scsi_vhci_lun *vhci_lun_lookup_alloc(dev_info_t *, char *, int *);
 213 static void vhci_lun_free(struct scsi_vhci_lun *dvlp, struct scsi_device *sd);
 214 static int vhci_recovery_reset(scsi_vhci_lun_t *, struct scsi_address *,
 215     uint8_t, uint8_t);
 216 void vhci_update_pathstates(void *);
 217 
 218 #ifdef DEBUG
 219 static void vhci_print_prin_keys(vhci_prin_readkeys_t *, int);
 220 static void vhci_print_cdb(dev_info_t *dip, uint_t level,
 221     char *title, uchar_t *cdb);
 222 static void vhci_clean_print(dev_info_t *dev, uint_t level,
 223     char *title, uchar_t *data, int len);
 224 #endif
 225 static void vhci_print_prout_keys(scsi_vhci_lun_t *, char *);
 226 static void vhci_uscsi_iodone(struct scsi_pkt *pkt);
 227 static void vhci_invalidate_mpapi_lu(struct scsi_vhci *, scsi_vhci_lun_t *);
 228 
 229 /*
 230  * MP-API related functions
 231  */
 232 extern int vhci_mpapi_init(struct scsi_vhci *);
 233 extern void vhci_mpapi_add_dev_prod(struct scsi_vhci *, char *);
 234 extern int vhci_mpapi_ctl(dev_t, int, intptr_t, int, cred_t *, int *);
 235 extern void vhci_update_mpapi_data(struct scsi_vhci *,
 236     scsi_vhci_lun_t *, mdi_pathinfo_t *);
 237 extern void* vhci_get_mpapi_item(struct scsi_vhci *, mpapi_list_header_t *,
 238     uint8_t, void*);
 239 extern void vhci_mpapi_set_path_state(dev_info_t *, mdi_pathinfo_t *, int);
 240 extern int vhci_mpapi_update_tpg_acc_state_for_lu(struct scsi_vhci *,
 241     scsi_vhci_lun_t *);
 242 
 243 #define VHCI_DMA_MAX_XFER_CAP   INT_MAX
 244 
 245 #define VHCI_MAX_PGR_RETRIES    3
 246 
 247 /*
 248  * Macros for the device-type mpxio options
 249  */
 250 #define LOAD_BALANCE_OPTIONS            "load-balance-options"
 251 #define LOGICAL_BLOCK_REGION_SIZE       "region-size"
 252 #define MPXIO_OPTIONS_LIST              "device-type-mpxio-options-list"
 253 #define DEVICE_TYPE_STR                 "device-type"
 254 #define isdigit(ch)                     ((ch) >= '0' && (ch) <= '9')
 255 
 256 static struct cb_ops vhci_cb_ops = {
 257         vhci_open,                      /* open */
 258         vhci_close,                     /* close */
 259         nodev,                          /* strategy */
 260         nodev,                          /* print */
 261         nodev,                          /* dump */
 262         nodev,                          /* read */
 263         nodev,                          /* write */
 264         vhci_ioctl,                     /* ioctl */
 265         nodev,                          /* devmap */
 266         nodev,                          /* mmap */
 267         nodev,                          /* segmap */
 268         nochpoll,                       /* chpoll */
 269         ddi_prop_op,                    /* cb_prop_op */
 270         0,                              /* streamtab */
 271         D_NEW | D_MP,                   /* cb_flag */
 272         CB_REV,                         /* rev */
 273         nodev,                          /* aread */
 274         nodev                           /* awrite */
 275 };
 276 
 277 static struct dev_ops vhci_ops = {
 278         DEVO_REV,
 279         0,
 280         vhci_getinfo,
 281         nulldev,                /* identify */
 282         nulldev,                /* probe */
 283         vhci_attach,            /* attach and detach are mandatory */
 284         vhci_detach,
 285         nodev,                  /* reset */
 286         &vhci_cb_ops,               /* cb_ops */
 287         NULL,                   /* bus_ops */
 288         NULL,                   /* power */
 289         ddi_quiesce_not_needed, /* quiesce */
 290 };
 291 
 292 extern struct mod_ops mod_driverops;
 293 
 294 static struct modldrv modldrv = {
 295         &mod_driverops,
 296         vhci_version_name,      /* module name */
 297         &vhci_ops
 298 };
 299 
 300 static struct modlinkage modlinkage = {
 301         MODREV_1,
 302         &modldrv,
 303         NULL
 304 };
 305 
 306 static mdi_vhci_ops_t vhci_opinfo = {
 307         MDI_VHCI_OPS_REV,
 308         vhci_pathinfo_init,             /* Pathinfo node init callback */
 309         vhci_pathinfo_uninit,           /* Pathinfo uninit callback */
 310         vhci_pathinfo_state_change,     /* Pathinfo node state change */
 311         vhci_failover,                  /* failover callback */
 312         vhci_client_attached,           /* client attached callback     */
 313         vhci_is_dev_supported           /* is device supported by mdi */
 314 };
 315 
 316 /*
 317  * The scsi_failover table defines an ordered set of 'fops' modules supported
 318  * by scsi_vhci.  Currently, initialize this table from the 'ddi-forceload'
 319  * property specified in scsi_vhci.conf.
 320  */
 321 static struct scsi_failover {
 322         ddi_modhandle_t                 sf_mod;
 323         struct scsi_failover_ops        *sf_sfo;
 324 } *scsi_failover_table;
 325 static uint_t   scsi_nfailover;
 326 
 327 int
 328 _init(void)
 329 {
 330         int     rval;
 331 
 332         /*
 333          * Allocate soft state and prepare to do ddi_soft_state_zalloc()
 334          * before registering with the transport first.
 335          */
 336         if ((rval = ddi_soft_state_init(&vhci_softstate,
 337             sizeof (struct scsi_vhci), 1)) != 0) {
 338                 VHCI_DEBUG(1, (CE_NOTE, NULL,
 339                     "!_init:soft state init failed\n"));
 340                 return (rval);
 341         }
 342 
 343         if ((rval = scsi_hba_init(&modlinkage)) != 0) {
 344                 VHCI_DEBUG(1, (CE_NOTE, NULL,
 345                     "!_init: scsi hba init failed\n"));
 346                 ddi_soft_state_fini(&vhci_softstate);
 347                 return (rval);
 348         }
 349 
 350         mutex_init(&vhci_global_mutex, NULL, MUTEX_DRIVER, NULL);
 351         cv_init(&vhci_cv, NULL, CV_DRIVER, NULL);
 352 
 353         mutex_init(&vhci_targetmap_mutex, NULL, MUTEX_DRIVER, NULL);
 354         vhci_targetmap_byport = mod_hash_create_strhash(
 355             "vhci_targetmap_byport", 256, mod_hash_null_valdtor);
 356         vhci_targetmap_bypid = mod_hash_create_idhash(
 357             "vhci_targetmap_bypid", 256, mod_hash_null_valdtor);
 358 
 359         if ((rval = mod_install(&modlinkage)) != 0) {
 360                 VHCI_DEBUG(1, (CE_NOTE, NULL, "!_init: mod_install failed\n"));
 361                 if (vhci_targetmap_bypid)
 362                         mod_hash_destroy_idhash(vhci_targetmap_bypid);
 363                 if (vhci_targetmap_byport)
 364                         mod_hash_destroy_strhash(vhci_targetmap_byport);
 365                 mutex_destroy(&vhci_targetmap_mutex);
 366                 cv_destroy(&vhci_cv);
 367                 mutex_destroy(&vhci_global_mutex);
 368                 scsi_hba_fini(&modlinkage);
 369                 ddi_soft_state_fini(&vhci_softstate);
 370         }
 371         return (rval);
 372 }
 373 
 374 
 375 /*
 376  * the system is done with us as a driver, so clean up
 377  */
 378 int
 379 _fini(void)
 380 {
 381         int rval;
 382 
 383         /*
 384          * don't start cleaning up until we know that the module remove
 385          * has worked  -- if this works, then we know that each instance
 386          * has successfully been DDI_DETACHed
 387          */
 388         if ((rval = mod_remove(&modlinkage)) != 0) {
 389                 VHCI_DEBUG(4, (CE_NOTE, NULL, "!_fini: mod_remove failed\n"));
 390                 return (rval);
 391         }
 392 
 393         if (vhci_targetmap_bypid)
 394                 mod_hash_destroy_idhash(vhci_targetmap_bypid);
 395         if (vhci_targetmap_byport)
 396                 mod_hash_destroy_strhash(vhci_targetmap_byport);
 397         mutex_destroy(&vhci_targetmap_mutex);
 398         cv_destroy(&vhci_cv);
 399         mutex_destroy(&vhci_global_mutex);
 400         scsi_hba_fini(&modlinkage);
 401         ddi_soft_state_fini(&vhci_softstate);
 402 
 403         return (rval);
 404 }
 405 
 406 int
 407 _info(struct modinfo *modinfop)
 408 {
 409         return (mod_info(&modlinkage, modinfop));
 410 }
 411 
 412 /*
 413  * Lookup scsi_failover by "short name" of failover module.
 414  */
 415 struct scsi_failover_ops *
 416 vhci_failover_ops_by_name(char *name)
 417 {
 418         struct scsi_failover    *sf;
 419 
 420         for (sf = scsi_failover_table; sf->sf_mod; sf++) {
 421                 if (sf->sf_sfo == NULL)
 422                         continue;
 423                 if (strcmp(sf->sf_sfo->sfo_name, name) == 0)
 424                         return (sf->sf_sfo);
 425         }
 426         return (NULL);
 427 }
 428 
 429 /*
 430  * Load all scsi_failover_ops 'fops' modules.
 431  */
 432 static void
 433 vhci_failover_modopen(struct scsi_vhci *vhci)
 434 {
 435         char                    **module;
 436         int                     i;
 437         struct scsi_failover    *sf;
 438         char                    **dt;
 439         int                     e;
 440 
 441         if (scsi_failover_table)
 442                 return;
 443 
 444         /* Get the list of modules from scsi_vhci.conf */
 445         if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY,
 446             vhci->vhci_dip, DDI_PROP_DONTPASS, "ddi-forceload",
 447             &module, &scsi_nfailover) != DDI_PROP_SUCCESS) {
 448                 cmn_err(CE_WARN, "scsi_vhci: "
 449                     "scsi_vhci.conf is missing 'ddi-forceload'");
 450                 return;
 451         }
 452         if (scsi_nfailover == 0) {
 453                 cmn_err(CE_WARN, "scsi_vhci: "
 454                     "scsi_vhci.conf has empty 'ddi-forceload'");
 455                 ddi_prop_free(module);
 456                 return;
 457         }
 458 
 459         /* allocate failover table based on number of modules */
 460         scsi_failover_table = (struct scsi_failover *)
 461             kmem_zalloc(sizeof (struct scsi_failover) * (scsi_nfailover + 1),
 462             KM_SLEEP);
 463 
 464         /* loop over modules specified in scsi_vhci.conf and open each module */
 465         for (i = 0, sf = scsi_failover_table; i < scsi_nfailover; i++) {
 466                 if (module[i] == NULL)
 467                         continue;
 468 
 469                 sf->sf_mod = ddi_modopen(module[i], KRTLD_MODE_FIRST, &e);
 470                 if (sf->sf_mod == NULL) {
 471                         /*
 472                          * A module returns EEXIST if other software is
 473                          * supporting the intended function: for example
 474                          * the scsi_vhci_f_sum_emc module returns EEXIST
 475                          * from _init if EMC powerpath software is installed.
 476                          */
 477                         if (e != EEXIST)
 478                                 cmn_err(CE_WARN, "scsi_vhci: unable to open "
 479                                     "module '%s', error %d", module[i], e);
 480                         continue;
 481                 }
 482                 sf->sf_sfo = ddi_modsym(sf->sf_mod,
 483                     "scsi_vhci_failover_ops", &e);
 484                 if (sf->sf_sfo == NULL) {
 485                         cmn_err(CE_WARN, "scsi_vhci: "
 486                             "unable to import 'scsi_failover_ops' from '%s', "
 487                             "error %d", module[i], e);
 488                         (void) ddi_modclose(sf->sf_mod);
 489                         sf->sf_mod = NULL;
 490                         continue;
 491                 }
 492 
 493                 /* register vid/pid of devices supported with mpapi */
 494                 for (dt = sf->sf_sfo->sfo_devices; *dt; dt++)
 495                         vhci_mpapi_add_dev_prod(vhci, *dt);
 496                 sf++;
 497         }
 498 
 499         /* verify that at least the "well-known" modules were there */
 500         if (vhci_failover_ops_by_name(SFO_NAME_SYM) == NULL)
 501                 cmn_err(CE_WARN, "scsi_vhci: well-known module \""
 502                     SFO_NAME_SYM "\" not defined in scsi_vhci.conf's "
 503                     "'ddi-forceload'");
 504         if (vhci_failover_ops_by_name(SFO_NAME_TPGS) == NULL)
 505                 cmn_err(CE_WARN, "scsi_vhci: well-known module \""
 506                     SFO_NAME_TPGS "\" not defined in scsi_vhci.conf's "
 507                     "'ddi-forceload'");
 508 
 509         /* call sfo_init for modules that need it */
 510         for (sf = scsi_failover_table; sf->sf_mod; sf++) {
 511                 if (sf->sf_sfo && sf->sf_sfo->sfo_init)
 512                         sf->sf_sfo->sfo_init();
 513         }
 514 
 515         ddi_prop_free(module);
 516 }
 517 
 518 /*
 519  * unload all loaded scsi_failover_ops modules
 520  */
 521 static void
 522 vhci_failover_modclose()
 523 {
 524         struct scsi_failover    *sf;
 525 
 526         for (sf = scsi_failover_table; sf->sf_mod; sf++) {
 527                 if ((sf->sf_mod == NULL) || (sf->sf_sfo == NULL))
 528                         continue;
 529                 (void) ddi_modclose(sf->sf_mod);
 530                 sf->sf_mod = NULL;
 531                 sf->sf_sfo = NULL;
 532         }
 533 
 534         if (scsi_failover_table && scsi_nfailover)
 535                 kmem_free(scsi_failover_table,
 536                     sizeof (struct scsi_failover) * (scsi_nfailover + 1));
 537         scsi_failover_table = NULL;
 538         scsi_nfailover = 0;
 539 }
 540 
 541 /* ARGSUSED */
 542 static int
 543 vhci_open(dev_t *devp, int flag, int otype, cred_t *credp)
 544 {
 545         struct scsi_vhci        *vhci;
 546 
 547         if (otype != OTYP_CHR) {
 548                 return (EINVAL);
 549         }
 550 
 551         vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(*devp)));
 552         if (vhci == NULL) {
 553                 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_open: failed ENXIO\n"));
 554                 return (ENXIO);
 555         }
 556 
 557         mutex_enter(&vhci->vhci_mutex);
 558         if ((flag & FEXCL) && (vhci->vhci_state & VHCI_STATE_OPEN)) {
 559                 mutex_exit(&vhci->vhci_mutex);
 560                 vhci_log(CE_NOTE, vhci->vhci_dip,
 561                     "!vhci%d: Already open\n", getminor(*devp));
 562                 return (EBUSY);
 563         }
 564 
 565         vhci->vhci_state |= VHCI_STATE_OPEN;
 566         mutex_exit(&vhci->vhci_mutex);
 567         return (0);
 568 }
 569 
 570 
 571 /* ARGSUSED */
 572 static int
 573 vhci_close(dev_t dev, int flag, int otype, cred_t *credp)
 574 {
 575         struct scsi_vhci        *vhci;
 576 
 577         if (otype != OTYP_CHR) {
 578                 return (EINVAL);
 579         }
 580 
 581         vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(dev)));
 582         if (vhci == NULL) {
 583                 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_close: failed ENXIO\n"));
 584                 return (ENXIO);
 585         }
 586 
 587         mutex_enter(&vhci->vhci_mutex);
 588         vhci->vhci_state &= ~VHCI_STATE_OPEN;
 589         mutex_exit(&vhci->vhci_mutex);
 590 
 591         return (0);
 592 }
 593 
 594 /* ARGSUSED */
 595 static int
 596 vhci_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
 597         cred_t *credp, int *rval)
 598 {
 599         if (IS_DEVCTL(cmd)) {
 600                 return (vhci_devctl(dev, cmd, data, mode, credp, rval));
 601         } else if (cmd == MP_CMD) {
 602                 return (vhci_mpapi_ctl(dev, cmd, data, mode, credp, rval));
 603         } else {
 604                 return (vhci_ctl(dev, cmd, data, mode, credp, rval));
 605         }
 606 }
 607 
 608 /*
 609  * attach the module
 610  */
 611 static int
 612 vhci_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 613 {
 614         int                     rval = DDI_FAILURE;
 615         int                     scsi_hba_attached = 0;
 616         int                     vhci_attached = 0;
 617         int                     mutex_initted = 0;
 618         int                     instance;
 619         struct scsi_vhci        *vhci;
 620         scsi_hba_tran_t         *tran;
 621         char                    cache_name_buf[64];
 622         char                    *data;
 623 
 624         VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_attach: cmd=0x%x\n", cmd));
 625 
 626         instance = ddi_get_instance(dip);
 627 
 628         switch (cmd) {
 629         case DDI_ATTACH:
 630                 break;
 631 
 632         case DDI_RESUME:
 633         case DDI_PM_RESUME:
 634                 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_attach: resume not yet"
 635                     "implemented\n"));
 636                 return (rval);
 637 
 638         default:
 639                 VHCI_DEBUG(1, (CE_NOTE, NULL,
 640                     "!vhci_attach: unknown ddi command\n"));
 641                 return (rval);
 642         }
 643 
 644         /*
 645          * Allocate vhci data structure.
 646          */
 647         if (ddi_soft_state_zalloc(vhci_softstate, instance) != DDI_SUCCESS) {
 648                 VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
 649                     "soft state alloc failed\n"));
 650                 return (DDI_FAILURE);
 651         }
 652 
 653         if ((vhci = ddi_get_soft_state(vhci_softstate, instance)) == NULL) {
 654                 VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
 655                     "bad soft state\n"));
 656                 ddi_soft_state_free(vhci_softstate, instance);
 657                 return (DDI_FAILURE);
 658         }
 659 
 660         /* Allocate packet cache */
 661         (void) snprintf(cache_name_buf, sizeof (cache_name_buf),
 662             "vhci%d_cache", instance);
 663 
 664         mutex_init(&vhci->vhci_mutex, NULL, MUTEX_DRIVER, NULL);
 665         mutex_initted++;
 666 
 667         /*
 668          * Allocate a transport structure
 669          */
 670         tran = scsi_hba_tran_alloc(dip, SCSI_HBA_CANSLEEP);
 671         ASSERT(tran != NULL);
 672 
 673         vhci->vhci_tran              = tran;
 674         vhci->vhci_dip               = dip;
 675         vhci->vhci_instance  = instance;
 676 
 677         tran->tran_hba_private       = vhci;
 678         tran->tran_tgt_init  = vhci_scsi_tgt_init;
 679         tran->tran_tgt_probe = NULL;
 680         tran->tran_tgt_free  = vhci_scsi_tgt_free;
 681 
 682         tran->tran_start     = vhci_scsi_start;
 683         tran->tran_abort     = vhci_scsi_abort;
 684         tran->tran_reset     = vhci_scsi_reset;
 685         tran->tran_getcap    = vhci_scsi_getcap;
 686         tran->tran_setcap    = vhci_scsi_setcap;
 687         tran->tran_init_pkt  = vhci_scsi_init_pkt;
 688         tran->tran_destroy_pkt       = vhci_scsi_destroy_pkt;
 689         tran->tran_dmafree   = vhci_scsi_dmafree;
 690         tran->tran_sync_pkt  = vhci_scsi_sync_pkt;
 691         tran->tran_reset_notify = vhci_scsi_reset_notify;
 692 
 693         tran->tran_get_bus_addr      = vhci_scsi_get_bus_addr;
 694         tran->tran_get_name  = vhci_scsi_get_name;
 695         tran->tran_bus_reset = NULL;
 696         tran->tran_quiesce   = NULL;
 697         tran->tran_unquiesce = NULL;
 698 
 699         /*
 700          * register event notification routines with scsa
 701          */
 702         tran->tran_get_eventcookie = NULL;
 703         tran->tran_add_eventcall = NULL;
 704         tran->tran_remove_eventcall = NULL;
 705         tran->tran_post_event        = NULL;
 706 
 707         tran->tran_bus_power = vhci_scsi_bus_power;
 708 
 709         tran->tran_bus_config        = vhci_scsi_bus_config;
 710         tran->tran_bus_unconfig      = vhci_scsi_bus_unconfig;
 711 
 712         /*
 713          * Attach this instance with the mpxio framework
 714          */
 715         if (mdi_vhci_register(MDI_HCI_CLASS_SCSI, dip, &vhci_opinfo, 0)
 716             != MDI_SUCCESS) {
 717                 VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
 718                     "mdi_vhci_register failed\n"));
 719                 goto attach_fail;
 720         }
 721         vhci_attached++;
 722 
 723         /*
 724          * Attach this instance of the hba.
 725          *
 726          * Regarding dma attributes: Since scsi_vhci is a virtual scsi HBA
 727          * driver, it has nothing to do with DMA. However, when calling
 728          * scsi_hba_attach_setup() we need to pass something valid in the
 729          * dma attributes parameter. So we just use scsi_alloc_attr.
 730          * SCSA itself seems to care only for dma_attr_minxfer and
 731          * dma_attr_burstsizes fields of dma attributes structure.
 732          * It expects those fileds to be non-zero.
 733          */
 734         if (scsi_hba_attach_setup(dip, &scsi_alloc_attr, tran,
 735             SCSI_HBA_ADDR_COMPLEX) != DDI_SUCCESS) {
 736                 VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
 737                     "hba attach failed\n"));
 738                 goto attach_fail;
 739         }
 740         scsi_hba_attached++;
 741 
 742         if (ddi_create_minor_node(dip, "devctl", S_IFCHR,
 743             INST2DEVCTL(instance), DDI_NT_SCSI_NEXUS, 0) != DDI_SUCCESS) {
 744                 VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
 745                     " ddi_create_minor_node failed\n"));
 746                 goto attach_fail;
 747         }
 748 
 749         /*
 750          * Set pm-want-child-notification property for
 751          * power management of the phci and client
 752          */
 753         if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 754             "pm-want-child-notification?", NULL, NULL) != DDI_PROP_SUCCESS) {
 755                 cmn_err(CE_WARN,
 756                     "%s%d fail to create pm-want-child-notification? prop",
 757                     ddi_driver_name(dip), ddi_get_instance(dip));
 758                 goto attach_fail;
 759         }
 760 
 761         vhci->vhci_taskq = taskq_create("vhci_taskq", 1, MINCLSYSPRI, 1, 4, 0);
 762         vhci->vhci_update_pathstates_taskq =
 763             taskq_create("vhci_update_pathstates", VHCI_NUM_UPDATE_TASKQ,
 764             MINCLSYSPRI, 1, 4, 0);
 765         ASSERT(vhci->vhci_taskq);
 766         ASSERT(vhci->vhci_update_pathstates_taskq);
 767 
 768         /*
 769          * Set appropriate configuration flags based on options set in
 770          * conf file.
 771          */
 772         vhci->vhci_conf_flags = 0;
 773         if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, PROPFLAGS,
 774             "auto-failback", &data) == DDI_SUCCESS) {
 775                 if (strcmp(data, "enable") == 0)
 776                         vhci->vhci_conf_flags |= VHCI_CONF_FLAGS_AUTO_FAILBACK;
 777                 ddi_prop_free(data);
 778         }
 779 
 780         if (!(vhci->vhci_conf_flags & VHCI_CONF_FLAGS_AUTO_FAILBACK))
 781                 vhci_log(CE_NOTE, dip, "!Auto-failback capability "
 782                     "disabled through scsi_vhci.conf file.");
 783 
 784         /*
 785          * Allocate an mpapi private structure
 786          */
 787         vhci->mp_priv = kmem_zalloc(sizeof (mpapi_priv_t), KM_SLEEP);
 788         if (vhci_mpapi_init(vhci) != 0) {
 789                 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_attach: "
 790                     "vhci_mpapi_init() failed"));
 791         }
 792 
 793         vhci_failover_modopen(vhci);            /* load failover modules */
 794 
 795         ddi_report_dev(dip);
 796         return (DDI_SUCCESS);
 797 
 798 attach_fail:
 799         if (vhci_attached)
 800                 (void) mdi_vhci_unregister(dip, 0);
 801 
 802         if (scsi_hba_attached)
 803                 (void) scsi_hba_detach(dip);
 804 
 805         if (vhci->vhci_tran)
 806                 scsi_hba_tran_free(vhci->vhci_tran);
 807 
 808         if (mutex_initted) {
 809                 mutex_destroy(&vhci->vhci_mutex);
 810         }
 811 
 812         ddi_soft_state_free(vhci_softstate, instance);
 813         return (DDI_FAILURE);
 814 }
 815 
 816 
 817 /*ARGSUSED*/
 818 static int
 819 vhci_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 820 {
 821         int                     instance = ddi_get_instance(dip);
 822         scsi_hba_tran_t         *tran;
 823         struct scsi_vhci        *vhci;
 824 
 825         VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_detach: cmd=0x%x\n", cmd));
 826 
 827         if ((tran = ddi_get_driver_private(dip)) == NULL)
 828                 return (DDI_FAILURE);
 829 
 830         vhci = TRAN2HBAPRIVATE(tran);
 831         if (!vhci) {
 832                 return (DDI_FAILURE);
 833         }
 834 
 835         switch (cmd) {
 836         case DDI_DETACH:
 837                 break;
 838 
 839         case DDI_SUSPEND:
 840         case DDI_PM_SUSPEND:
 841                 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_detach: suspend/pm not yet"
 842                     "implemented\n"));
 843                 return (DDI_FAILURE);
 844 
 845         default:
 846                 VHCI_DEBUG(1, (CE_NOTE, NULL,
 847                     "!vhci_detach: unknown ddi command\n"));
 848                 return (DDI_FAILURE);
 849         }
 850 
 851         (void) mdi_vhci_unregister(dip, 0);
 852         (void) scsi_hba_detach(dip);
 853         scsi_hba_tran_free(tran);
 854 
 855         if (ddi_prop_remove(DDI_DEV_T_NONE, dip,
 856             "pm-want-child-notification?") != DDI_PROP_SUCCESS) {
 857                 cmn_err(CE_WARN,
 858                     "%s%d unable to remove prop pm-want_child_notification?",
 859                     ddi_driver_name(dip), ddi_get_instance(dip));
 860         }
 861         if (vhci_restart_timeid != 0) {
 862                 (void) untimeout(vhci_restart_timeid);
 863         }
 864         vhci_restart_timeid = 0;
 865 
 866         mutex_destroy(&vhci->vhci_mutex);
 867         vhci->vhci_dip = NULL;
 868         vhci->vhci_tran = NULL;
 869         taskq_destroy(vhci->vhci_taskq);
 870         taskq_destroy(vhci->vhci_update_pathstates_taskq);
 871         ddi_remove_minor_node(dip, NULL);
 872         ddi_soft_state_free(vhci_softstate, instance);
 873 
 874         vhci_failover_modclose();               /* unload failover modules */
 875         return (DDI_SUCCESS);
 876 }
 877 
 878 /*
 879  * vhci_getinfo()
 880  * Given the device number, return the devinfo pointer or the
 881  * instance number.
 882  * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach.
 883  */
 884 
 885 /*ARGSUSED*/
 886 static int
 887 vhci_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 888 {
 889         struct scsi_vhci        *vhcip;
 890         int                     instance = MINOR2INST(getminor((dev_t)arg));
 891 
 892         switch (cmd) {
 893         case DDI_INFO_DEVT2DEVINFO:
 894                 vhcip = ddi_get_soft_state(vhci_softstate, instance);
 895                 if (vhcip != NULL)
 896                         *result = vhcip->vhci_dip;
 897                 else {
 898                         *result = NULL;
 899                         return (DDI_FAILURE);
 900                 }
 901                 break;
 902 
 903         case DDI_INFO_DEVT2INSTANCE:
 904                 *result = (void *)(uintptr_t)instance;
 905                 break;
 906 
 907         default:
 908                 return (DDI_FAILURE);
 909         }
 910 
 911         return (DDI_SUCCESS);
 912 }
 913 
 914 /*ARGSUSED*/
 915 static int
 916 vhci_scsi_tgt_init(dev_info_t *hba_dip, dev_info_t *tgt_dip,
 917         scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
 918 {
 919         char                    *guid;
 920         scsi_vhci_lun_t         *vlun;
 921         struct scsi_vhci        *vhci;
 922         clock_t                 from_ticks;
 923         mdi_pathinfo_t          *pip;
 924         int                     rval;
 925 
 926         ASSERT(hba_dip != NULL);
 927         ASSERT(tgt_dip != NULL);
 928 
 929         if (ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip, PROPFLAGS,
 930             MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
 931                 /*
 932                  * This must be the .conf node without GUID property.
 933                  * The node under fp already inserts a delay, so we
 934                  * just return from here. We rely on this delay to have
 935                  * all dips be posted to the ndi hotplug thread's newdev
 936                  * list. This is necessary for the deferred attach
 937                  * mechanism to work and opens() done soon after boot to
 938                  * succeed.
 939                  */
 940                 VHCI_DEBUG(4, (CE_WARN, hba_dip, "tgt_init: lun guid "
 941                     "property failed"));
 942                 return (DDI_NOT_WELL_FORMED);
 943         }
 944 
 945         if (ndi_dev_is_persistent_node(tgt_dip) == 0) {
 946                 /*
 947                  * This must be .conf node with the GUID property. We don't
 948                  * merge property by ndi_merge_node() here  because the
 949                  * devi_addr_buf of .conf node is "" always according the
 950                  * implementation of vhci_scsi_get_name_bus_addr().
 951                  */
 952                 ddi_set_name_addr(tgt_dip, NULL);
 953                 return (DDI_FAILURE);
 954         }
 955 
 956         vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(hba_dip));
 957         ASSERT(vhci != NULL);
 958 
 959         VHCI_DEBUG(4, (CE_NOTE, hba_dip,
 960             "!tgt_init: called for %s (instance %d)\n",
 961             ddi_driver_name(tgt_dip), ddi_get_instance(tgt_dip)));
 962 
 963         vlun = vhci_lun_lookup(tgt_dip);
 964 
 965         mutex_enter(&vhci_global_mutex);
 966 
 967         from_ticks = ddi_get_lbolt();
 968         if (vhci_to_ticks == 0) {
 969                 vhci_to_ticks = from_ticks +
 970                     drv_usectohz(vhci_init_wait_timeout);
 971         }
 972 
 973 #if DEBUG
 974         if (vlun) {
 975                 VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
 976                     "vhci_scsi_tgt_init: guid %s : found vlun 0x%p "
 977                     "from_ticks %lx to_ticks %lx",
 978                     guid, (void *)vlun, from_ticks, vhci_to_ticks));
 979         } else {
 980                 VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
 981                     "vhci_scsi_tgt_init: guid %s : vlun not found "
 982                     "from_ticks %lx to_ticks %lx", guid, from_ticks,
 983                     vhci_to_ticks));
 984         }
 985 #endif
 986 
 987         rval = mdi_select_path(tgt_dip, NULL,
 988             (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH), NULL, &pip);
 989         if (rval == MDI_SUCCESS) {
 990                 mdi_rele_path(pip);
 991         }
 992 
 993         /*
 994          * Wait for the following conditions :
 995          *      1. no vlun available yet
 996          *      2. no path established
 997          *      3. timer did not expire
 998          */
 999         while ((vlun == NULL) || (mdi_client_get_path_count(tgt_dip) == 0) ||
1000             (rval != MDI_SUCCESS)) {
1001                 if (vlun && vlun->svl_not_supported) {
1002                         VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
1003                             "vlun 0x%p lun guid %s not supported!",
1004                             (void *)vlun, guid));
1005                         mutex_exit(&vhci_global_mutex);
1006                         ddi_prop_free(guid);
1007                         return (DDI_NOT_WELL_FORMED);
1008                 }
1009                 if ((vhci_first_time == 0) && (from_ticks >= vhci_to_ticks)) {
1010                         vhci_first_time = 1;
1011                 }
1012                 if (vhci_first_time == 1) {
1013                         VHCI_DEBUG(1, (CE_WARN, hba_dip, "vhci_scsi_tgt_init: "
1014                             "no wait for %s. from_tick %lx, to_tick %lx",
1015                             guid, from_ticks, vhci_to_ticks));
1016                         mutex_exit(&vhci_global_mutex);
1017                         ddi_prop_free(guid);
1018                         return (DDI_NOT_WELL_FORMED);
1019                 }
1020 
1021                 if (cv_timedwait(&vhci_cv,
1022                     &vhci_global_mutex, vhci_to_ticks) == -1) {
1023                         /* Timed out */
1024 #ifdef DEBUG
1025                         if (vlun == NULL) {
1026                                 VHCI_DEBUG(1, (CE_WARN, hba_dip,
1027                                     "tgt_init: no vlun for %s!", guid));
1028                         } else if (mdi_client_get_path_count(tgt_dip) == 0) {
1029                                 VHCI_DEBUG(1, (CE_WARN, hba_dip,
1030                                     "tgt_init: client path count is "
1031                                     "zero for %s!", guid));
1032                         } else {
1033                                 VHCI_DEBUG(1, (CE_WARN, hba_dip,
1034                                     "tgt_init: client path not "
1035                                     "available yet for %s!", guid));
1036                         }
1037 #endif /* DEBUG */
1038                         mutex_exit(&vhci_global_mutex);
1039                         ddi_prop_free(guid);
1040                         return (DDI_NOT_WELL_FORMED);
1041                 }
1042                 vlun = vhci_lun_lookup(tgt_dip);
1043                 rval = mdi_select_path(tgt_dip, NULL,
1044                     (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
1045                     NULL, &pip);
1046                 if (rval == MDI_SUCCESS) {
1047                         mdi_rele_path(pip);
1048                 }
1049                 from_ticks = ddi_get_lbolt();
1050         }
1051         mutex_exit(&vhci_global_mutex);
1052 
1053         ASSERT(vlun != NULL);
1054         ddi_prop_free(guid);
1055 
1056         scsi_device_hba_private_set(sd, vlun);
1057 
1058         return (DDI_SUCCESS);
1059 }
1060 
1061 /*ARGSUSED*/
1062 static void
1063 vhci_scsi_tgt_free(dev_info_t *hba_dip, dev_info_t *tgt_dip,
1064         scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
1065 {
1066         struct scsi_vhci_lun *dvlp;
1067         ASSERT(mdi_client_get_path_count(tgt_dip) <= 0);
1068         dvlp = (struct scsi_vhci_lun *)scsi_device_hba_private_get(sd);
1069         ASSERT(dvlp != NULL);
1070 
1071         vhci_lun_free(dvlp, sd);
1072 }
1073 
1074 /*
1075  * a PGR register command has started; copy the info we need
1076  */
1077 int
1078 vhci_pgr_register_start(scsi_vhci_lun_t *vlun, struct scsi_pkt *pkt)
1079 {
1080         struct vhci_pkt         *vpkt = TGTPKT2VHCIPKT(pkt);
1081         void                    *addr;
1082 
1083         if (!vpkt->vpkt_tgt_init_bp)
1084                 return (TRAN_BADPKT);
1085 
1086         addr = bp_mapin_common(vpkt->vpkt_tgt_init_bp,
1087             (vpkt->vpkt_flags & CFLAG_NOWAIT) ? VM_NOSLEEP : VM_SLEEP);
1088         if (addr == NULL)
1089                 return (TRAN_BUSY);
1090 
1091         mutex_enter(&vlun->svl_mutex);
1092 
1093         vhci_print_prout_keys(vlun, "v_pgr_reg_start: before bcopy:");
1094 
1095         bcopy(addr, &vlun->svl_prout, sizeof (vhci_prout_t) -
1096             (2 * MHIOC_RESV_KEY_SIZE*sizeof (char)));
1097         bcopy(pkt->pkt_cdbp, vlun->svl_cdb, sizeof (vlun->svl_cdb));
1098 
1099         vhci_print_prout_keys(vlun, "v_pgr_reg_start: after bcopy:");
1100 
1101         vlun->svl_time = pkt->pkt_time;
1102         vlun->svl_bcount = vpkt->vpkt_tgt_init_bp->b_bcount;
1103         vlun->svl_first_path = vpkt->vpkt_path;
1104         mutex_exit(&vlun->svl_mutex);
1105         return (0);
1106 }
1107 
1108 /*
1109  * Function name : vhci_scsi_start()
1110  *
1111  * Return Values : TRAN_FATAL_ERROR     - vhci has been shutdown
1112  *                                        or other fatal failure
1113  *                                        preventing packet transportation
1114  *                 TRAN_BUSY            - request queue is full
1115  *                 TRAN_ACCEPT          - pkt has been submitted to phci
1116  *                                        (or is held in the waitQ)
1117  * Description   : Implements SCSA's tran_start() entry point for
1118  *                 packet transport
1119  *
1120  */
1121 static int
1122 vhci_scsi_start(struct scsi_address *ap, struct scsi_pkt *pkt)
1123 {
1124         int                     rval = TRAN_ACCEPT;
1125         int                     instance, held;
1126         struct scsi_vhci        *vhci = ADDR2VHCI(ap);
1127         struct scsi_vhci_lun    *vlun = ADDR2VLUN(ap);
1128         struct vhci_pkt         *vpkt = TGTPKT2VHCIPKT(pkt);
1129         int                     flags = 0;
1130         scsi_vhci_priv_t        *svp, *svp_resrv;
1131         dev_info_t              *cdip;
1132         client_lb_t             lbp;
1133         int                     restore_lbp = 0;
1134         /* set if pkt is SCSI-II RESERVE cmd */
1135         int                     pkt_reserve_cmd = 0;
1136         int                     reserve_failed = 0;
1137         int                     resrv_instance = 0;
1138         mdi_pathinfo_t          *pip;
1139         struct scsi_pkt         *rel_pkt;
1140 
1141         ASSERT(vhci != NULL);
1142         ASSERT(vpkt != NULL);
1143         ASSERT(vpkt->vpkt_state != VHCI_PKT_ISSUED);
1144         cdip = ADDR2DIP(ap);
1145 
1146         /*
1147          * Block IOs if LUN is held or QUIESCED for IOs.
1148          */
1149         if ((VHCI_LUN_IS_HELD(vlun)) ||
1150             ((vlun->svl_flags & VLUN_QUIESCED_FLG) == VLUN_QUIESCED_FLG)) {
1151                 return (TRAN_BUSY);
1152         }
1153 
1154         /*
1155          * vhci_lun needs to be quiesced before SCSI-II RESERVE command
1156          * can be issued.  This may require a cv_timedwait, which is
1157          * dangerous to perform in an interrupt context.  So if this
1158          * is a RESERVE command a taskq is dispatched to service it.
1159          * This taskq shall again call vhci_scsi_start, but we shall be
1160          * sure its not in an interrupt context.
1161          */
1162         if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
1163             (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
1164                 if (!(vpkt->vpkt_state & VHCI_PKT_THRU_TASKQ)) {
1165                         if (taskq_dispatch(vhci->vhci_taskq,
1166                             vhci_dispatch_scsi_start, (void *) vpkt,
1167                             KM_NOSLEEP)) {
1168                                 return (TRAN_ACCEPT);
1169                         } else {
1170                                 return (TRAN_BUSY);
1171                         }
1172                 }
1173 
1174                 /*
1175                  * Here we ensure that simultaneous SCSI-II RESERVE cmds don't
1176                  * get serviced for a lun.
1177                  */
1178                 VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
1179                 if (!held) {
1180                         return (TRAN_BUSY);
1181                 } else if ((vlun->svl_flags & VLUN_QUIESCED_FLG) ==
1182                     VLUN_QUIESCED_FLG) {
1183                         VHCI_RELEASE_LUN(vlun);
1184                         return (TRAN_BUSY);
1185                 }
1186 
1187                 /*
1188                  * To ensure that no IOs occur for this LUN for the duration
1189                  * of this pkt set the VLUN_QUIESCED_FLG.
1190                  * In case this routine needs to exit on error make sure that
1191                  * this flag is cleared.
1192                  */
1193                 vlun->svl_flags |= VLUN_QUIESCED_FLG;
1194                 pkt_reserve_cmd = 1;
1195 
1196                 /*
1197                  * if this is a SCSI-II RESERVE command, set load balancing
1198                  * policy to be ALTERNATE PATH to ensure that all subsequent
1199                  * IOs are routed on the same path.  This is because if commands
1200                  * are routed across multiple paths then IOs on paths other than
1201                  * the one on which the RESERVE was executed will get a
1202                  * RESERVATION CONFLICT
1203                  */
1204                 lbp = mdi_get_lb_policy(cdip);
1205                 if (lbp != LOAD_BALANCE_NONE) {
1206                         if (vhci_quiesce_lun(vlun) != 1) {
1207                                 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1208                                 VHCI_RELEASE_LUN(vlun);
1209                                 return (TRAN_FATAL_ERROR);
1210                         }
1211                         vlun->svl_lb_policy_save = lbp;
1212                         if (mdi_set_lb_policy(cdip, LOAD_BALANCE_NONE) !=
1213                             MDI_SUCCESS) {
1214                                 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1215                                 VHCI_RELEASE_LUN(vlun);
1216                                 return (TRAN_FATAL_ERROR);
1217                         }
1218                         restore_lbp = 1;
1219                 }
1220 
1221                 VHCI_DEBUG(2, (CE_NOTE, vhci->vhci_dip,
1222                     "!vhci_scsi_start: sending SCSI-2 RESERVE, vlun 0x%p, "
1223                     "svl_resrv_pip 0x%p, svl_flags: %x, lb_policy %x",
1224                     (void *)vlun, (void *)vlun->svl_resrv_pip, vlun->svl_flags,
1225                     mdi_get_lb_policy(cdip)));
1226 
1227                 /*
1228                  * See comments for VLUN_RESERVE_ACTIVE_FLG in scsi_vhci.h
1229                  * To narrow this window where a reserve command may be sent
1230                  * down an inactive path the path states first need to be
1231                  * updated.  Before calling vhci_update_pathstates reset
1232                  * VLUN_RESERVE_ACTIVE_FLG, just in case it was already set
1233                  * for this lun.  This shall prevent an unnecessary reset
1234                  * from being sent out.  Also remember currently reserved path
1235                  * just for a case the new reservation will go to another path.
1236                  */
1237                 if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
1238                         resrv_instance = mdi_pi_get_path_instance(
1239                             vlun->svl_resrv_pip);
1240                 }
1241                 vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
1242                 vhci_update_pathstates((void *)vlun);
1243         }
1244 
1245         instance = ddi_get_instance(vhci->vhci_dip);
1246 
1247         /*
1248          * If the command is PRIN with action of zero, then the cmd
1249          * is reading PR keys which requires filtering on completion.
1250          * Data cache sync must be guaranteed.
1251          */
1252         if ((pkt->pkt_cdbp[0] == SCMD_PRIN) && (pkt->pkt_cdbp[1] == 0) &&
1253             (vpkt->vpkt_org_vpkt == NULL)) {
1254                 vpkt->vpkt_tgt_init_pkt_flags |= PKT_CONSISTENT;
1255         }
1256 
1257         /*
1258          * Do not defer bind for PKT_DMA_PARTIAL
1259          */
1260         if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1261 
1262                 /* This is a non pkt_dma_partial case */
1263                 if ((rval = vhci_bind_transport(
1264                     ap, vpkt, vpkt->vpkt_tgt_init_pkt_flags, NULL_FUNC))
1265                     != TRAN_ACCEPT) {
1266                         VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1267                             "!vhci%d %x: failed to bind transport: "
1268                             "vlun 0x%p pkt_reserved %x restore_lbp %x,"
1269                             "lbp %x", instance, rval, (void *)vlun,
1270                             pkt_reserve_cmd, restore_lbp, lbp));
1271                         if (restore_lbp)
1272                                 (void) mdi_set_lb_policy(cdip, lbp);
1273                         if (pkt_reserve_cmd)
1274                                 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1275                         return (rval);
1276                 }
1277                 VHCI_DEBUG(8, (CE_NOTE, NULL,
1278                     "vhci_scsi_start: v_b_t called 0x%p\n", (void *)vpkt));
1279         }
1280         ASSERT(vpkt->vpkt_hba_pkt != NULL);
1281         ASSERT(vpkt->vpkt_path != NULL);
1282 
1283         /*
1284          * This is the chance to adjust the pHCI's pkt and other information
1285          * from target driver's pkt.
1286          */
1287         VHCI_DEBUG(8, (CE_NOTE, vhci->vhci_dip, "vhci_scsi_start vpkt %p\n",
1288             (void *)vpkt));
1289         vhci_update_pHCI_pkt(vpkt, pkt);
1290 
1291         if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
1292                 if (vpkt->vpkt_path != vlun->svl_resrv_pip) {
1293                         VHCI_DEBUG(1, (CE_WARN, vhci->vhci_dip,
1294                             "!vhci_bind: reserve flag set for vlun 0x%p, but, "
1295                             "pktpath 0x%p resrv path 0x%p differ. lb_policy %x",
1296                             (void *)vlun, (void *)vpkt->vpkt_path,
1297                             (void *)vlun->svl_resrv_pip,
1298                             mdi_get_lb_policy(cdip)));
1299                         reserve_failed = 1;
1300                 }
1301         }
1302 
1303         svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(vpkt->vpkt_path);
1304         if (svp == NULL || reserve_failed) {
1305                 if (pkt_reserve_cmd) {
1306                         VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1307                             "!vhci_bind returned null svp vlun 0x%p",
1308                             (void *)vlun));
1309                         vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1310                         if (restore_lbp)
1311                                 (void) mdi_set_lb_policy(cdip, lbp);
1312                 }
1313 pkt_cleanup:
1314                 if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1315                         scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1316                         vpkt->vpkt_hba_pkt = NULL;
1317                         if (vpkt->vpkt_path) {
1318                                 mdi_rele_path(vpkt->vpkt_path);
1319                                 vpkt->vpkt_path = NULL;
1320                         }
1321                 }
1322                 if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1323                     (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1324                     ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
1325                         sema_v(&vlun->svl_pgr_sema);
1326                 }
1327                 return (TRAN_BUSY);
1328         }
1329 
1330         if ((resrv_instance != 0) && (resrv_instance !=
1331             mdi_pi_get_path_instance(vpkt->vpkt_path))) {
1332                 /*
1333                  * This is an attempt to reserve vpkt->vpkt_path.  But the
1334                  * previously reserved path referred by resrv_instance might
1335                  * still be reserved.  Hence we will send a release command
1336                  * there in order to avoid a reservation conflict.
1337                  */
1338                 VHCI_DEBUG(1, (CE_NOTE, vhci->vhci_dip, "!vhci_scsi_start: "
1339                     "conflicting reservation on another path, vlun 0x%p, "
1340                     "reserved instance %d, new instance: %d, pip: 0x%p",
1341                     (void *)vlun, resrv_instance,
1342                     mdi_pi_get_path_instance(vpkt->vpkt_path),
1343                     (void *)vpkt->vpkt_path));
1344 
1345                 /*
1346                  * In rare cases, the path referred by resrv_instance could
1347                  * disappear in the meantime. Calling mdi_select_path() below
1348                  * is an attempt to find out if the path still exists. It also
1349                  * ensures that the path will be held when the release is sent.
1350                  */
1351                 rval = mdi_select_path(cdip, NULL, MDI_SELECT_PATH_INSTANCE,
1352                     (void *)(intptr_t)resrv_instance, &pip);
1353 
1354                 if ((rval == MDI_SUCCESS) && (pip != NULL)) {
1355                         svp_resrv = (scsi_vhci_priv_t *)
1356                             mdi_pi_get_vhci_private(pip);
1357                         rel_pkt = scsi_init_pkt(&svp_resrv->svp_psd->sd_address,
1358                             NULL, NULL, CDB_GROUP0,
1359                             sizeof (struct scsi_arq_status), 0, 0, SLEEP_FUNC,
1360                             NULL);
1361 
1362                         if (rel_pkt == NULL) {
1363                                 char    *p_path;
1364 
1365                                 /*
1366                                  * This is very unlikely.
1367                                  * scsi_init_pkt(SLEEP_FUNC) does not fail
1368                                  * because of resources. But in theory it could
1369                                  * fail for some other reason. There is not an
1370                                  * easy way how to recover though. Log a warning
1371                                  * and return.
1372                                  */
1373                                 p_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1374                                 vhci_log(CE_WARN, vhci->vhci_dip, "!Sending "
1375                                     "RELEASE(6) to %s failed, a potential "
1376                                     "reservation conflict ahead.",
1377                                     ddi_pathname(mdi_pi_get_phci(pip), p_path));
1378                                 kmem_free(p_path, MAXPATHLEN);
1379 
1380                                 if (restore_lbp)
1381                                         (void) mdi_set_lb_policy(cdip, lbp);
1382 
1383                                 /* no need to check pkt_reserve_cmd here */
1384                                 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1385                                 return (TRAN_FATAL_ERROR);
1386                         }
1387 
1388                         rel_pkt->pkt_cdbp[0] = SCMD_RELEASE;
1389                         rel_pkt->pkt_time = 60;
1390 
1391                         /*
1392                          * Ignore the return value.  If it will fail
1393                          * then most likely it is no longer reserved
1394                          * anyway.
1395                          */
1396                         (void) vhci_do_scsi_cmd(rel_pkt);
1397                         VHCI_DEBUG(1, (CE_NOTE, NULL,
1398                             "!vhci_scsi_start: path 0x%p, issued SCSI-2"
1399                             " RELEASE\n", (void *)pip));
1400                         scsi_destroy_pkt(rel_pkt);
1401                         mdi_rele_path(pip);
1402                 }
1403         }
1404 
1405         VHCI_INCR_PATH_CMDCOUNT(svp);
1406 
1407         /*
1408          * Ensure that no other IOs raced ahead, while a RESERVE cmd was
1409          * QUIESCING the same lun.
1410          */
1411         if ((!pkt_reserve_cmd) &&
1412             ((vlun->svl_flags & VLUN_QUIESCED_FLG) == VLUN_QUIESCED_FLG)) {
1413                 VHCI_DECR_PATH_CMDCOUNT(svp);
1414                 goto pkt_cleanup;
1415         }
1416 
1417         if ((pkt->pkt_cdbp[0] == SCMD_PRIN) ||
1418             (pkt->pkt_cdbp[0] == SCMD_PROUT)) {
1419                 /*
1420                  * currently this thread only handles running PGR
1421                  * commands, so don't bother creating it unless
1422                  * something interesting is going to happen (like
1423                  * either a PGR out, or a PGR in with enough space
1424                  * to hold the keys that are getting returned)
1425                  */
1426                 mutex_enter(&vlun->svl_mutex);
1427                 if (((vlun->svl_flags & VLUN_TASK_D_ALIVE_FLG) == 0) &&
1428                     (pkt->pkt_cdbp[0] == SCMD_PROUT)) {
1429                         vlun->svl_taskq = taskq_create("vlun_pgr_task_daemon",
1430                             1, MINCLSYSPRI, 1, 4, 0);
1431                         vlun->svl_flags |= VLUN_TASK_D_ALIVE_FLG;
1432                 }
1433                 mutex_exit(&vlun->svl_mutex);
1434                 if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1435                     (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1436                     ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
1437                         if (rval = vhci_pgr_register_start(vlun, pkt)) {
1438                                 /* an error */
1439                                 sema_v(&vlun->svl_pgr_sema);
1440                                 return (rval);
1441                         }
1442                 }
1443         }
1444 
1445         /*
1446          * SCSI-II RESERVE cmd is not expected in polled mode.
1447          * If this changes it needs to be handled for the polled scenario.
1448          */
1449         flags = vpkt->vpkt_hba_pkt->pkt_flags;
1450 
1451         /*
1452          * Set the path_instance *before* sending the scsi_pkt down the path
1453          * to mpxio's pHCI so that additional path abstractions at a pHCI
1454          * level (like maybe iSCSI at some point in the future) can update
1455          * the path_instance.
1456          */
1457         if (scsi_pkt_allocated_correctly(vpkt->vpkt_hba_pkt))
1458                 vpkt->vpkt_hba_pkt->pkt_path_instance =
1459                     mdi_pi_get_path_instance(vpkt->vpkt_path);
1460 
1461         rval = scsi_transport(vpkt->vpkt_hba_pkt);
1462         if (rval == TRAN_ACCEPT) {
1463                 if (flags & FLAG_NOINTR) {
1464                         struct scsi_pkt *tpkt = vpkt->vpkt_tgt_pkt;
1465                         struct scsi_pkt *pkt = vpkt->vpkt_hba_pkt;
1466 
1467                         ASSERT(tpkt != NULL);
1468                         *(tpkt->pkt_scbp) = *(pkt->pkt_scbp);
1469                         tpkt->pkt_resid = pkt->pkt_resid;
1470                         tpkt->pkt_state = pkt->pkt_state;
1471                         tpkt->pkt_statistics = pkt->pkt_statistics;
1472                         tpkt->pkt_reason = pkt->pkt_reason;
1473 
1474                         if ((*(pkt->pkt_scbp) == STATUS_CHECK) &&
1475                             (pkt->pkt_state & STATE_ARQ_DONE)) {
1476                                 bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
1477                                     vpkt->vpkt_tgt_init_scblen);
1478                         }
1479 
1480                         VHCI_DECR_PATH_CMDCOUNT(svp);
1481                         if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1482                                 scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1483                                 vpkt->vpkt_hba_pkt = NULL;
1484                                 if (vpkt->vpkt_path) {
1485                                         mdi_rele_path(vpkt->vpkt_path);
1486                                         vpkt->vpkt_path = NULL;
1487                                 }
1488                         }
1489                         /*
1490                          * This path will not automatically retry pkts
1491                          * internally, therefore, vpkt_org_vpkt should
1492                          * never be set.
1493                          */
1494                         ASSERT(vpkt->vpkt_org_vpkt == NULL);
1495                         scsi_hba_pkt_comp(tpkt);
1496                 }
1497                 return (rval);
1498         } else if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1499             (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1500             ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
1501                 /* the command exited with bad status */
1502                 sema_v(&vlun->svl_pgr_sema);
1503         } else if (vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PRIN) {
1504                 /* the command exited with bad status */
1505                 sema_v(&vlun->svl_pgr_sema);
1506         } else if (pkt_reserve_cmd) {
1507                 VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1508                     "!vhci_scsi_start: reserve failed vlun 0x%p",
1509                     (void *)vlun));
1510                 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1511                 if (restore_lbp)
1512                         (void) mdi_set_lb_policy(cdip, lbp);
1513         }
1514 
1515         ASSERT(vpkt->vpkt_hba_pkt != NULL);
1516         VHCI_DECR_PATH_CMDCOUNT(svp);
1517 
1518         /* Do not destroy phci packet information for PKT_DMA_PARTIAL */
1519         if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1520                 scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1521                 vpkt->vpkt_hba_pkt = NULL;
1522                 if (vpkt->vpkt_path) {
1523                         MDI_PI_ERRSTAT(vpkt->vpkt_path, MDI_PI_TRANSERR);
1524                         mdi_rele_path(vpkt->vpkt_path);
1525                         vpkt->vpkt_path = NULL;
1526                 }
1527         }
1528         return (TRAN_BUSY);
1529 }
1530 
1531 /*
1532  * Function name : vhci_scsi_reset()
1533  *
1534  * Return Values : 0 - reset failed
1535  *                 1 - reset succeeded
1536  */
1537 
1538 /* ARGSUSED */
1539 static int
1540 vhci_scsi_reset(struct scsi_address *ap, int level)
1541 {
1542         int rval = 0;
1543 
1544         cmn_err(CE_WARN, "!vhci_scsi_reset 0x%x", level);
1545         if ((level == RESET_TARGET) || (level == RESET_LUN)) {
1546                 return (vhci_scsi_reset_target(ap, level, TRUE));
1547         } else if (level == RESET_ALL) {
1548                 return (vhci_scsi_reset_bus(ap));
1549         }
1550 
1551         return (rval);
1552 }
1553 
1554 /*
1555  * vhci_recovery_reset:
1556  *      Issues reset to the device
1557  * Input:
1558  *      vlun - vhci lun pointer of the device
1559  *      ap - address of the device
1560  *      select_path:
1561  *              If select_path is FALSE, then the address specified in ap is
1562  *              the path on which reset will be issued.
1563  *              If select_path is TRUE, then path is obtained by calling
1564  *              mdi_select_path.
1565  *
1566  *      recovery_depth:
1567  *              Caller can specify the level of reset.
1568  *              VHCI_DEPTH_LUN -
1569  *                      Issues LUN RESET if device supports lun reset.
1570  *              VHCI_DEPTH_TARGET -
1571  *                      If Lun Reset fails or the device does not support
1572  *                      Lun Reset, issues TARGET RESET
1573  *              VHCI_DEPTH_ALL -
1574  *                      If Lun Reset fails or the device does not support
1575  *                      Lun Reset, issues TARGET RESET.
1576  *                      If TARGET RESET does not succeed, issues Bus Reset.
1577  */
1578 
1579 static int
1580 vhci_recovery_reset(scsi_vhci_lun_t *vlun, struct scsi_address *ap,
1581         uint8_t select_path, uint8_t recovery_depth)
1582 {
1583         int     ret = 0;
1584 
1585         ASSERT(ap != NULL);
1586 
1587         if (vlun && vlun->svl_support_lun_reset == 1) {
1588                 ret = vhci_scsi_reset_target(ap, RESET_LUN,
1589                     select_path);
1590         }
1591 
1592         recovery_depth--;
1593 
1594         if ((ret == 0) && recovery_depth) {
1595                 ret = vhci_scsi_reset_target(ap, RESET_TARGET,
1596                     select_path);
1597                 recovery_depth--;
1598         }
1599 
1600         if ((ret == 0) && recovery_depth) {
1601                 (void) scsi_reset(ap, RESET_ALL);
1602         }
1603 
1604         return (ret);
1605 }
1606 
1607 /*
1608  * Note: The scsi_address passed to this routine could be the scsi_address
1609  * for the virtual device or the physical device. No assumptions should be
1610  * made in this routine about the contents of the ap structure.
1611  * Further, note that the child dip would be the dip of the ssd node regardless
1612  * of the scsi_address passed in.
1613  */
1614 static int
1615 vhci_scsi_reset_target(struct scsi_address *ap, int level, uint8_t select_path)
1616 {
1617         dev_info_t              *vdip, *cdip;
1618         mdi_pathinfo_t          *pip = NULL;
1619         mdi_pathinfo_t          *npip = NULL;
1620         int                     rval = -1;
1621         scsi_vhci_priv_t        *svp = NULL;
1622         struct scsi_address     *pap = NULL;
1623         scsi_hba_tran_t         *hba = NULL;
1624         int                     sps;
1625         struct scsi_vhci        *vhci = NULL;
1626 
1627         if (select_path != TRUE) {
1628                 ASSERT(ap != NULL);
1629                 if (level == RESET_LUN) {
1630                         hba = ap->a_hba_tran;
1631                         ASSERT(hba != NULL);
1632                         return (hba->tran_reset(ap, RESET_LUN));
1633                 }
1634                 return (scsi_reset(ap, level));
1635         }
1636 
1637         cdip = ADDR2DIP(ap);
1638         ASSERT(cdip != NULL);
1639         vdip = ddi_get_parent(cdip);
1640         ASSERT(vdip != NULL);
1641         vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
1642         ASSERT(vhci != NULL);
1643 
1644         rval = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH, NULL, &pip);
1645         if ((rval != MDI_SUCCESS) || (pip == NULL)) {
1646                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1647                     "Unable to get a path, dip 0x%p", (void *)cdip));
1648                 return (0);
1649         }
1650 again:
1651         svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
1652         if (svp == NULL) {
1653                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1654                     "priv is NULL, pip 0x%p", (void *)pip));
1655                 mdi_rele_path(pip);
1656                 return (0);
1657         }
1658 
1659         if (svp->svp_psd == NULL) {
1660                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1661                     "psd is NULL, pip 0x%p, svp 0x%p",
1662                     (void *)pip, (void *)svp));
1663                 mdi_rele_path(pip);
1664                 return (0);
1665         }
1666 
1667         pap = &svp->svp_psd->sd_address;
1668         hba = pap->a_hba_tran;
1669 
1670         ASSERT(pap != NULL);
1671         ASSERT(hba != NULL);
1672 
1673         if (hba->tran_reset != NULL) {
1674                 if (hba->tran_reset(pap, level) == 0) {
1675                         vhci_log(CE_WARN, vdip, "!%s%d: "
1676                             "path %s, reset %d failed",
1677                             ddi_driver_name(cdip), ddi_get_instance(cdip),
1678                             mdi_pi_spathname(pip), level);
1679 
1680                         /*
1681                          * Select next path and issue the reset, repeat
1682                          * until all paths are exhausted
1683                          */
1684                         sps = mdi_select_path(cdip, NULL,
1685                             MDI_SELECT_ONLINE_PATH, pip, &npip);
1686                         if ((sps != MDI_SUCCESS) || (npip == NULL)) {
1687                                 mdi_rele_path(pip);
1688                                 return (0);
1689                         }
1690                         mdi_rele_path(pip);
1691                         pip = npip;
1692                         goto again;
1693                 }
1694                 mdi_rele_path(pip);
1695                 mutex_enter(&vhci->vhci_mutex);
1696                 scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
1697                     &vhci->vhci_reset_notify_listf);
1698                 mutex_exit(&vhci->vhci_mutex);
1699                 VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_scsi_reset_target: "
1700                     "reset %d sent down pip:%p for cdip:%p\n", level,
1701                     (void *)pip, (void *)cdip));
1702                 return (1);
1703         }
1704         mdi_rele_path(pip);
1705         return (0);
1706 }
1707 
1708 
1709 /* ARGSUSED */
1710 static int
1711 vhci_scsi_reset_bus(struct scsi_address *ap)
1712 {
1713         return (1);
1714 }
1715 
1716 
1717 /*
1718  * called by vhci_getcap and vhci_setcap to get and set (respectively)
1719  * SCSI capabilities
1720  */
1721 /* ARGSUSED */
1722 static int
1723 vhci_commoncap(struct scsi_address *ap, char *cap,
1724     int val, int tgtonly, int doset)
1725 {
1726         struct scsi_vhci                *vhci = ADDR2VHCI(ap);
1727         struct scsi_vhci_lun            *vlun = ADDR2VLUN(ap);
1728         int                     cidx;
1729         int                     rval = 0;
1730 
1731         if (cap == (char *)0) {
1732                 VHCI_DEBUG(3, (CE_WARN, vhci->vhci_dip,
1733                     "!vhci_commoncap: invalid arg"));
1734                 return (rval);
1735         }
1736 
1737         if (vlun == NULL) {
1738                 VHCI_DEBUG(3, (CE_WARN, vhci->vhci_dip,
1739                     "!vhci_commoncap: vlun is null"));
1740                 return (rval);
1741         }
1742 
1743         if ((cidx = scsi_hba_lookup_capstr(cap)) == -1) {
1744                 return (UNDEFINED);
1745         }
1746 
1747         /*
1748          * Process setcap request.
1749          */
1750         if (doset) {
1751                 /*
1752                  * At present, we can only set binary (0/1) values
1753                  */
1754                 switch (cidx) {
1755                 case SCSI_CAP_ARQ:
1756                         if (val == 0) {
1757                                 rval = 0;
1758                         } else {
1759                                 rval = 1;
1760                         }
1761                         break;
1762 
1763                 case SCSI_CAP_LUN_RESET:
1764                         if (tgtonly == 0) {
1765                                 VHCI_DEBUG(1, (CE_WARN, vhci->vhci_dip,
1766                                     "scsi_vhci_setcap: "
1767                                     "Returning error since whom = 0"));
1768                                 rval = -1;
1769                                 break;
1770                         }
1771                         /*
1772                          * Set the capability accordingly.
1773                          */
1774                         mutex_enter(&vlun->svl_mutex);
1775                         vlun->svl_support_lun_reset = val;
1776                         rval = val;
1777                         mutex_exit(&vlun->svl_mutex);
1778                         break;
1779 
1780                 case SCSI_CAP_SECTOR_SIZE:
1781                         mutex_enter(&vlun->svl_mutex);
1782                         vlun->svl_sector_size = val;
1783                         vlun->svl_setcap_done = 1;
1784                         mutex_exit(&vlun->svl_mutex);
1785                         (void) vhci_pHCI_cap(ap, cap, val, tgtonly, NULL);
1786 
1787                         /* Always return success */
1788                         rval = 1;
1789                         break;
1790 
1791                 default:
1792                         VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1793                             "!vhci_setcap: unsupported %d", cidx));
1794                         rval = UNDEFINED;
1795                         break;
1796                 }
1797 
1798                 VHCI_DEBUG(6, (CE_NOTE, vhci->vhci_dip,
1799                     "!set cap: cap=%s, val/tgtonly/doset/rval = "
1800                     "0x%x/0x%x/0x%x/%d\n",
1801                     cap, val, tgtonly, doset, rval));
1802 
1803         } else {
1804                 /*
1805                  * Process getcap request.
1806                  */
1807                 switch (cidx) {
1808                 case SCSI_CAP_DMA_MAX:
1809                         /*
1810                          * For X86 this capability is caught in scsi_ifgetcap().
1811                          * XXX Should this be getting the value from the pHCI?
1812                          */
1813                         rval = (int)VHCI_DMA_MAX_XFER_CAP;
1814                         break;
1815 
1816                 case SCSI_CAP_INITIATOR_ID:
1817                         rval = 0x00;
1818                         break;
1819 
1820                 case SCSI_CAP_ARQ:
1821                 case SCSI_CAP_RESET_NOTIFICATION:
1822                 case SCSI_CAP_TAGGED_QING:
1823                         rval = 1;
1824                         break;
1825 
1826                 case SCSI_CAP_SCSI_VERSION:
1827                         rval = 3;
1828                         break;
1829 
1830                 case SCSI_CAP_INTERCONNECT_TYPE:
1831                         rval = INTERCONNECT_FABRIC;
1832                         break;
1833 
1834                 case SCSI_CAP_LUN_RESET:
1835                         /*
1836                          * scsi_vhci will always return success for LUN reset.
1837                          * When request for doing LUN reset comes
1838                          * through scsi_reset entry point, at that time attempt
1839                          * will be made to do reset through all the possible
1840                          * paths.
1841                          */
1842                         mutex_enter(&vlun->svl_mutex);
1843                         rval = vlun->svl_support_lun_reset;
1844                         mutex_exit(&vlun->svl_mutex);
1845                         VHCI_DEBUG(4, (CE_WARN, vhci->vhci_dip,
1846                             "scsi_vhci_getcap:"
1847                             "Getting the Lun reset capability %d", rval));
1848                         break;
1849 
1850                 case SCSI_CAP_SECTOR_SIZE:
1851                         mutex_enter(&vlun->svl_mutex);
1852                         rval = vlun->svl_sector_size;
1853                         mutex_exit(&vlun->svl_mutex);
1854                         break;
1855 
1856                 case SCSI_CAP_CDB_LEN:
1857                         rval = VHCI_SCSI_CDB_SIZE;
1858                         break;
1859 
1860                 case SCSI_CAP_DMA_MAX_ARCH:
1861                         /*
1862                          * For X86 this capability is caught in scsi_ifgetcap().
1863                          * XXX Should this be getting the value from the pHCI?
1864                          */
1865                         rval = 0;
1866                         break;
1867 
1868                 default:
1869                         VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1870                             "!vhci_getcap: unsupported %d", cidx));
1871                         rval = UNDEFINED;
1872                         break;
1873                 }
1874 
1875                 VHCI_DEBUG(6, (CE_NOTE, vhci->vhci_dip,
1876                     "!get cap: cap=%s, val/tgtonly/doset/rval = "
1877                     "0x%x/0x%x/0x%x/%d\n",
1878                     cap, val, tgtonly, doset, rval));
1879         }
1880         return (rval);
1881 }
1882 
1883 
1884 /*
1885  * Function name : vhci_scsi_getcap()
1886  *
1887  */
1888 static int
1889 vhci_scsi_getcap(struct scsi_address *ap, char *cap, int whom)
1890 {
1891         return (vhci_commoncap(ap, cap, 0, whom, 0));
1892 }
1893 
1894 static int
1895 vhci_scsi_setcap(struct scsi_address *ap, char *cap, int value, int whom)
1896 {
1897         return (vhci_commoncap(ap, cap, value, whom, 1));
1898 }
1899 
1900 /*
1901  * Function name : vhci_scsi_abort()
1902  */
1903 /* ARGSUSED */
1904 static int
1905 vhci_scsi_abort(struct scsi_address *ap, struct scsi_pkt *pkt)
1906 {
1907         return (0);
1908 }
1909 
1910 /*
1911  * Function name : vhci_scsi_init_pkt
1912  *
1913  * Return Values : pointer to scsi_pkt, or NULL
1914  */
1915 /* ARGSUSED */
1916 static struct scsi_pkt *
1917 vhci_scsi_init_pkt(struct scsi_address *ap, struct scsi_pkt *pkt,
1918         struct buf *bp, int cmdlen, int statuslen, int tgtlen,
1919         int flags, int (*callback)(caddr_t), caddr_t arg)
1920 {
1921         struct scsi_vhci        *vhci = ADDR2VHCI(ap);
1922         struct vhci_pkt         *vpkt;
1923         int                     rval;
1924         int                     newpkt = 0;
1925         struct scsi_pkt         *pktp;
1926 
1927 
1928         if (pkt == NULL) {
1929                 if (cmdlen > VHCI_SCSI_CDB_SIZE) {
1930                         if ((cmdlen != VHCI_SCSI_OSD_CDB_SIZE) ||
1931                             ((flags & VHCI_SCSI_OSD_PKT_FLAGS) !=
1932                             VHCI_SCSI_OSD_PKT_FLAGS)) {
1933                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
1934                                     "!init pkt: cdb size not supported\n"));
1935                                 return (NULL);
1936                         }
1937                 }
1938 
1939                 pktp = scsi_hba_pkt_alloc(vhci->vhci_dip,
1940                     ap, cmdlen, statuslen, tgtlen, sizeof (*vpkt), callback,
1941                     arg);
1942 
1943                 if (pktp == NULL) {
1944                         return (NULL);
1945                 }
1946 
1947                 /* Get the vhci's private structure */
1948                 vpkt = (struct vhci_pkt *)(pktp->pkt_ha_private);
1949                 ASSERT(vpkt);
1950 
1951                 /* Save the target driver's packet */
1952                 vpkt->vpkt_tgt_pkt = pktp;
1953 
1954                 /*
1955                  * Save pkt_tgt_init_pkt fields if deferred binding
1956                  * is needed or for other purposes.
1957                  */
1958                 vpkt->vpkt_tgt_init_pkt_flags = flags;
1959                 vpkt->vpkt_flags = (callback == NULL_FUNC) ? CFLAG_NOWAIT : 0;
1960                 vpkt->vpkt_state = VHCI_PKT_IDLE;
1961                 vpkt->vpkt_tgt_init_cdblen = cmdlen;
1962                 vpkt->vpkt_tgt_init_scblen = statuslen;
1963                 newpkt = 1;
1964         } else { /* pkt not NULL */
1965                 vpkt = pkt->pkt_ha_private;
1966         }
1967 
1968         VHCI_DEBUG(8, (CE_NOTE, NULL, "vhci_scsi_init_pkt "
1969             "vpkt %p flags %x\n", (void *)vpkt, flags));
1970 
1971         /* Clear any stale error flags */
1972         if (bp) {
1973                 bioerror(bp, 0);
1974         }
1975 
1976         vpkt->vpkt_tgt_init_bp = bp;
1977 
1978         if (flags & PKT_DMA_PARTIAL) {
1979 
1980                 /*
1981                  * Immediate binding is needed.
1982                  * Target driver may not set this flag in next invocation.
1983                  * vhci has to remember this flag was set during first
1984                  * invocation of vhci_scsi_init_pkt.
1985                  */
1986                 vpkt->vpkt_flags |= CFLAG_DMA_PARTIAL;
1987         }
1988 
1989         if (vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) {
1990 
1991                 /*
1992                  * Re-initialize some of the target driver packet state
1993                  * information.
1994                  */
1995                 vpkt->vpkt_tgt_pkt->pkt_state = 0;
1996                 vpkt->vpkt_tgt_pkt->pkt_statistics = 0;
1997                 vpkt->vpkt_tgt_pkt->pkt_reason = 0;
1998 
1999                 /*
2000                  * Binding a vpkt->vpkt_path for this IO at init_time.
2001                  * If an IO error happens later, target driver will clear
2002                  * this vpkt->vpkt_path binding before re-init IO again.
2003                  */
2004                 VHCI_DEBUG(8, (CE_NOTE, NULL,
2005                     "vhci_scsi_init_pkt: calling v_b_t %p, newpkt %d\n",
2006                     (void *)vpkt, newpkt));
2007                 if (pkt && vpkt->vpkt_hba_pkt) {
2008                         VHCI_DEBUG(4, (CE_NOTE, NULL,
2009                             "v_s_i_p calling update_pHCI_pkt resid %ld\n",
2010                             pkt->pkt_resid));
2011                         vhci_update_pHCI_pkt(vpkt, pkt);
2012                 }
2013                 if (callback == SLEEP_FUNC) {
2014                         rval = vhci_bind_transport(
2015                             ap, vpkt, flags, callback);
2016                 } else {
2017                         rval = vhci_bind_transport(
2018                             ap, vpkt, flags, NULL_FUNC);
2019                 }
2020                 VHCI_DEBUG(8, (CE_NOTE, NULL,
2021                     "vhci_scsi_init_pkt: v_b_t called 0x%p rval 0x%x\n",
2022                     (void *)vpkt, rval));
2023                 if (bp) {
2024                         if (rval == TRAN_FATAL_ERROR) {
2025                                 /*
2026                                  * No paths available. Could not bind
2027                                  * any pHCI. Setting EFAULT as a way
2028                                  * to indicate no DMA is mapped.
2029                                  */
2030                                 bioerror(bp, EFAULT);
2031                         } else {
2032                                 /*
2033                                  * Do not indicate any pHCI errors to
2034                                  * target driver otherwise.
2035                                  */
2036                                 bioerror(bp, 0);
2037                         }
2038                 }
2039                 if (rval != TRAN_ACCEPT) {
2040                         VHCI_DEBUG(8, (CE_NOTE, NULL,
2041                             "vhci_scsi_init_pkt: "
2042                             "v_b_t failed 0x%p newpkt %x\n",
2043                             (void *)vpkt, newpkt));
2044                         if (newpkt) {
2045                                 scsi_hba_pkt_free(ap,
2046                                     vpkt->vpkt_tgt_pkt);
2047                         }
2048                         return (NULL);
2049                 }
2050                 ASSERT(vpkt->vpkt_hba_pkt != NULL);
2051                 ASSERT(vpkt->vpkt_path != NULL);
2052 
2053                 /* Update the resid for the target driver */
2054                 vpkt->vpkt_tgt_pkt->pkt_resid =
2055                     vpkt->vpkt_hba_pkt->pkt_resid;
2056         }
2057 
2058         return (vpkt->vpkt_tgt_pkt);
2059 }
2060 
2061 /*
2062  * Function name : vhci_scsi_destroy_pkt
2063  *
2064  * Return Values : none
2065  */
2066 static void
2067 vhci_scsi_destroy_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
2068 {
2069         struct vhci_pkt         *vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
2070 
2071         VHCI_DEBUG(8, (CE_NOTE, NULL,
2072             "vhci_scsi_destroy_pkt: vpkt 0x%p\n", (void *)vpkt));
2073 
2074         vpkt->vpkt_tgt_init_pkt_flags = 0;
2075         if (vpkt->vpkt_hba_pkt) {
2076                 scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
2077                 vpkt->vpkt_hba_pkt = NULL;
2078         }
2079         if (vpkt->vpkt_path) {
2080                 mdi_rele_path(vpkt->vpkt_path);
2081                 vpkt->vpkt_path = NULL;
2082         }
2083 
2084         ASSERT(vpkt->vpkt_state != VHCI_PKT_ISSUED);
2085         scsi_hba_pkt_free(ap, vpkt->vpkt_tgt_pkt);
2086 }
2087 
2088 /*
2089  * Function name : vhci_scsi_dmafree()
2090  *
2091  * Return Values : none
2092  */
2093 /*ARGSUSED*/
2094 static void
2095 vhci_scsi_dmafree(struct scsi_address *ap, struct scsi_pkt *pkt)
2096 {
2097         struct vhci_pkt *vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
2098 
2099         VHCI_DEBUG(6, (CE_NOTE, NULL,
2100             "vhci_scsi_dmafree: vpkt 0x%p\n", (void *)vpkt));
2101 
2102         ASSERT(vpkt != NULL);
2103         if (vpkt->vpkt_hba_pkt) {
2104                 scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
2105                 vpkt->vpkt_hba_pkt = NULL;
2106         }
2107         if (vpkt->vpkt_path) {
2108                 mdi_rele_path(vpkt->vpkt_path);
2109                 vpkt->vpkt_path = NULL;
2110         }
2111 }
2112 
2113 /*
2114  * Function name : vhci_scsi_sync_pkt()
2115  *
2116  * Return Values : none
2117  */
2118 /*ARGSUSED*/
2119 static void
2120 vhci_scsi_sync_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
2121 {
2122         struct vhci_pkt *vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
2123 
2124         ASSERT(vpkt != NULL);
2125         if (vpkt->vpkt_hba_pkt) {
2126                 scsi_sync_pkt(vpkt->vpkt_hba_pkt);
2127         }
2128 }
2129 
2130 /*
2131  * routine for reset notification setup, to register or cancel.
2132  */
2133 static int
2134 vhci_scsi_reset_notify(struct scsi_address *ap, int flag,
2135     void (*callback)(caddr_t), caddr_t arg)
2136 {
2137         struct scsi_vhci *vhci = ADDR2VHCI(ap);
2138         return (scsi_hba_reset_notify_setup(ap, flag, callback, arg,
2139             &vhci->vhci_mutex, &vhci->vhci_reset_notify_listf));
2140 }
2141 
2142 static int
2143 vhci_scsi_get_name_bus_addr(struct scsi_device *sd,
2144     char *name, int len, int bus_addr)
2145 {
2146         dev_info_t              *cdip;
2147         char                    *guid;
2148         scsi_vhci_lun_t         *vlun;
2149 
2150         ASSERT(sd != NULL);
2151         ASSERT(name != NULL);
2152 
2153         *name = 0;
2154         cdip = sd->sd_dev;
2155 
2156         ASSERT(cdip != NULL);
2157 
2158         if (mdi_component_is_client(cdip, NULL) != MDI_SUCCESS)
2159                 return (1);
2160 
2161         if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, PROPFLAGS,
2162             MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS)
2163                 return (1);
2164 
2165         /*
2166          * Message is "sd# at scsi_vhci0: unit-address <guid>: <bus_addr>".
2167          *      <guid>            bus_addr argument == 0
2168          *      <bus_addr>        bus_addr argument != 0
2169          * Since the <guid> is already provided with unit-address, we just
2170          * provide failover module in <bus_addr> to keep output shorter.
2171          */
2172         vlun = ADDR2VLUN(&sd->sd_address);
2173         if (bus_addr == 0) {
2174                 /* report the guid:  */
2175                 (void) snprintf(name, len, "g%s", guid);
2176         } else if (vlun && vlun->svl_fops_name) {
2177                 /* report the name of the failover module */
2178                 (void) snprintf(name, len, "%s", vlun->svl_fops_name);
2179         }
2180 
2181         ddi_prop_free(guid);
2182         return (1);
2183 }
2184 
2185 static int
2186 vhci_scsi_get_bus_addr(struct scsi_device *sd, char *name, int len)
2187 {
2188         return (vhci_scsi_get_name_bus_addr(sd, name, len, 1));
2189 }
2190 
2191 static int
2192 vhci_scsi_get_name(struct scsi_device *sd, char *name, int len)
2193 {
2194         return (vhci_scsi_get_name_bus_addr(sd, name, len, 0));
2195 }
2196 
2197 /*
2198  * Return a pointer to the guid part of the devnm.
2199  * devnm format is "nodename@busaddr", busaddr format is "gGUID".
2200  */
2201 static char *
2202 vhci_devnm_to_guid(char *devnm)
2203 {
2204         char *cp = devnm;
2205 
2206         if (devnm == NULL)
2207                 return (NULL);
2208 
2209         while (*cp != '\0' && *cp != '@')
2210                 cp++;
2211         if (*cp == '@' && *(cp + 1) == 'g')
2212                 return (cp + 2);
2213         return (NULL);
2214 }
2215 
2216 static int
2217 vhci_bind_transport(struct scsi_address *ap, struct vhci_pkt *vpkt, int flags,
2218     int (*func)(caddr_t))
2219 {
2220         struct scsi_vhci        *vhci = ADDR2VHCI(ap);
2221         dev_info_t              *cdip = ADDR2DIP(ap);
2222         mdi_pathinfo_t          *pip = NULL;
2223         mdi_pathinfo_t          *npip = NULL;
2224         scsi_vhci_priv_t        *svp = NULL;
2225         struct scsi_device      *psd = NULL;
2226         struct scsi_address     *address = NULL;
2227         struct scsi_pkt         *pkt = NULL;
2228         int                     rval = -1;
2229         int                     pgr_sema_held = 0;
2230         int                     held;
2231         int                     mps_flag = MDI_SELECT_ONLINE_PATH;
2232         struct scsi_vhci_lun    *vlun;
2233         int                     path_instance = 0;
2234 
2235         vlun = ADDR2VLUN(ap);
2236         ASSERT(vlun != 0);
2237 
2238         if ((vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PROUT) &&
2239             (((vpkt->vpkt_tgt_pkt->pkt_cdbp[1] & 0x1f) ==
2240             VHCI_PROUT_REGISTER) ||
2241             ((vpkt->vpkt_tgt_pkt->pkt_cdbp[1] & 0x1f) ==
2242             VHCI_PROUT_R_AND_IGNORE))) {
2243                 if (!sema_tryp(&vlun->svl_pgr_sema))
2244                         return (TRAN_BUSY);
2245                 pgr_sema_held = 1;
2246                 if (vlun->svl_first_path != NULL) {
2247                         rval = mdi_select_path(cdip, NULL,
2248                             MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH,
2249                             NULL, &pip);
2250                         if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2251                                 VHCI_DEBUG(4, (CE_NOTE, NULL,
2252                                     "vhci_bind_transport: path select fail\n"));
2253                         } else {
2254                                 npip = pip;
2255                                 do {
2256                                         if (npip == vlun->svl_first_path) {
2257                                                 VHCI_DEBUG(4, (CE_NOTE, NULL,
2258                                                     "vhci_bind_transport: "
2259                                                     "valid first path 0x%p\n",
2260                                                     (void *)
2261                                                     vlun->svl_first_path));
2262                                                 pip = vlun->svl_first_path;
2263                                                 goto bind_path;
2264                                         }
2265                                         pip = npip;
2266                                         rval = mdi_select_path(cdip, NULL,
2267                                             MDI_SELECT_ONLINE_PATH |
2268                                             MDI_SELECT_STANDBY_PATH,
2269                                             pip, &npip);
2270                                         mdi_rele_path(pip);
2271                                 } while ((rval == MDI_SUCCESS) &&
2272                                     (npip != NULL));
2273                         }
2274                 }
2275 
2276                 if (vlun->svl_first_path) {
2277                         VHCI_DEBUG(4, (CE_NOTE, NULL,
2278                             "vhci_bind_transport: invalid first path 0x%p\n",
2279                             (void *)vlun->svl_first_path));
2280                         vlun->svl_first_path = NULL;
2281                 }
2282         } else if (vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PRIN) {
2283                 if ((vpkt->vpkt_state & VHCI_PKT_THRU_TASKQ) == 0) {
2284                         if (!sema_tryp(&vlun->svl_pgr_sema))
2285                                 return (TRAN_BUSY);
2286                 }
2287                 pgr_sema_held = 1;
2288         }
2289 
2290         /*
2291          * If the path is already bound for PKT_PARTIAL_DMA case,
2292          * try to use the same path.
2293          */
2294         if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) && vpkt->vpkt_path) {
2295                 VHCI_DEBUG(4, (CE_NOTE, NULL,
2296                     "vhci_bind_transport: PKT_PARTIAL_DMA "
2297                     "vpkt 0x%p, path 0x%p\n",
2298                     (void *)vpkt, (void *)vpkt->vpkt_path));
2299                 pip = vpkt->vpkt_path;
2300                 goto bind_path;
2301         }
2302 
2303         /*
2304          * Get path_instance. Non-zero with FLAG_PKT_PATH_INSTANCE set
2305          * indicates that mdi_select_path should be called to select a
2306          * specific instance.
2307          *
2308          * NB: Condition pkt_path_instance reference on proper allocation.
2309          */
2310         if ((vpkt->vpkt_tgt_pkt->pkt_flags & FLAG_PKT_PATH_INSTANCE) &&
2311             scsi_pkt_allocated_correctly(vpkt->vpkt_tgt_pkt)) {
2312                 path_instance = vpkt->vpkt_tgt_pkt->pkt_path_instance;
2313         }
2314 
2315         /*
2316          * If reservation is active bind the transport directly to the pip
2317          * with the reservation.
2318          */
2319         if (vpkt->vpkt_hba_pkt == NULL) {
2320                 if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
2321                         if (MDI_PI_IS_ONLINE(vlun->svl_resrv_pip)) {
2322                                 pip = vlun->svl_resrv_pip;
2323                                 mdi_hold_path(pip);
2324                                 vlun->svl_waiting_for_activepath = 0;
2325                                 rval = MDI_SUCCESS;
2326                                 goto bind_path;
2327                         } else {
2328                                 if (pgr_sema_held) {
2329                                         sema_v(&vlun->svl_pgr_sema);
2330                                 }
2331                                 return (TRAN_BUSY);
2332                         }
2333                 }
2334 try_again:
2335                 rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp,
2336                     path_instance ? MDI_SELECT_PATH_INSTANCE : 0,
2337                     (void *)(intptr_t)path_instance, &pip);
2338                 if (rval == MDI_BUSY) {
2339                         if (pgr_sema_held) {
2340                                 sema_v(&vlun->svl_pgr_sema);
2341                         }
2342                         return (TRAN_BUSY);
2343                 } else if (rval == MDI_DEVI_ONLINING) {
2344                         /*
2345                          * if we are here then we are in the midst of
2346                          * an attach/probe of the client device.
2347                          * We attempt to bind to ONLINE path if available,
2348                          * else it is OK to bind to a STANDBY path (instead
2349                          * of triggering a failover) because IO associated
2350                          * with attach/probe (eg. INQUIRY, block 0 read)
2351                          * are completed by targets even on passive paths
2352                          * If no ONLINE paths available, it is important
2353                          * to set svl_waiting_for_activepath for two
2354                          * reasons: (1) avoid sense analysis in the
2355                          * "external failure detection" codepath in
2356                          * vhci_intr().  Failure to do so will result in
2357                          * infinite loop (unless an ONLINE path becomes
2358                          * available at some point) (2) avoid
2359                          * unnecessary failover (see "---Waiting For Active
2360                          * Path---" comment below).
2361                          */
2362                         VHCI_DEBUG(1, (CE_NOTE, NULL, "!%p in onlining "
2363                             "state\n", (void *)cdip));
2364                         pip = NULL;
2365                         rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp,
2366                             mps_flag, NULL, &pip);
2367                         if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2368                                 if (vlun->svl_waiting_for_activepath == 0) {
2369                                         vlun->svl_waiting_for_activepath = 1;
2370                                         vlun->svl_wfa_time = gethrtime();
2371                                 }
2372                                 mps_flag |= MDI_SELECT_STANDBY_PATH;
2373                                 rval = mdi_select_path(cdip,
2374                                     vpkt->vpkt_tgt_init_bp,
2375                                     mps_flag, NULL, &pip);
2376                                 if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2377                                         if (pgr_sema_held) {
2378                                                 sema_v(&vlun->svl_pgr_sema);
2379                                         }
2380                                         return (TRAN_FATAL_ERROR);
2381                                 }
2382                                 goto bind_path;
2383                         }
2384                 } else if ((rval == MDI_FAILURE) ||
2385                     ((rval == MDI_NOPATH) && (path_instance))) {
2386                         if (pgr_sema_held) {
2387                                 sema_v(&vlun->svl_pgr_sema);
2388                         }
2389                         return (TRAN_FATAL_ERROR);
2390                 }
2391 
2392                 if ((pip == NULL) || (rval == MDI_NOPATH)) {
2393                         while (vlun->svl_waiting_for_activepath) {
2394                                 /*
2395                                  * ---Waiting For Active Path---
2396                                  * This device was discovered across a
2397                                  * passive path; lets wait for a little
2398                                  * bit, hopefully an active path will
2399                                  * show up obviating the need for a
2400                                  * failover
2401                                  */
2402                                 if ((gethrtime() - vlun->svl_wfa_time) >=
2403                                     (60 * NANOSEC)) {
2404                                         vlun->svl_waiting_for_activepath = 0;
2405                                 } else {
2406                                         drv_usecwait(1000);
2407                                         if (vlun->svl_waiting_for_activepath
2408                                             == 0) {
2409                                                 /*
2410                                                  * an active path has come
2411                                                  * online!
2412                                                  */
2413                                                 goto try_again;
2414                                         }
2415                                 }
2416                         }
2417                         VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
2418                         if (!held) {
2419                                 VHCI_DEBUG(4, (CE_NOTE, NULL,
2420                                     "!Lun not held\n"));
2421                                 if (pgr_sema_held) {
2422                                         sema_v(&vlun->svl_pgr_sema);
2423                                 }
2424                                 return (TRAN_BUSY);
2425                         }
2426                         /*
2427                          * now that the LUN is stable, one last check
2428                          * to make sure no other changes sneaked in
2429                          * (like a path coming online or a
2430                          * failover initiated by another thread)
2431                          */
2432                         pip = NULL;
2433                         rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp,
2434                             0, NULL, &pip);
2435                         if (pip != NULL) {
2436                                 VHCI_RELEASE_LUN(vlun);
2437                                 vlun->svl_waiting_for_activepath = 0;
2438                                 goto bind_path;
2439                         }
2440 
2441                         /*
2442                          * Check if there is an ONLINE path OR a STANDBY path
2443                          * available. If none is available, do not attempt
2444                          * to do a failover, just return a fatal error at this
2445                          * point.
2446                          */
2447                         npip = NULL;
2448                         rval = mdi_select_path(cdip, NULL,
2449                             (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
2450                             NULL, &npip);
2451                         if ((npip == NULL) || (rval != MDI_SUCCESS)) {
2452                                 /*
2453                                  * No paths available, jus return FATAL error.
2454                                  */
2455                                 VHCI_RELEASE_LUN(vlun);
2456                                 if (pgr_sema_held) {
2457                                         sema_v(&vlun->svl_pgr_sema);
2458                                 }
2459                                 return (TRAN_FATAL_ERROR);
2460                         }
2461                         mdi_rele_path(npip);
2462                         if (!(vpkt->vpkt_state & VHCI_PKT_IN_FAILOVER)) {
2463                                 VHCI_DEBUG(1, (CE_NOTE, NULL, "!invoking "
2464                                     "mdi_failover\n"));
2465                                 rval = mdi_failover(vhci->vhci_dip, cdip,
2466                                     MDI_FAILOVER_ASYNC);
2467                         } else {
2468                                 rval = vlun->svl_failover_status;
2469                         }
2470                         if (rval == MDI_FAILURE) {
2471                                 VHCI_RELEASE_LUN(vlun);
2472                                 if (pgr_sema_held) {
2473                                         sema_v(&vlun->svl_pgr_sema);
2474                                 }
2475                                 return (TRAN_FATAL_ERROR);
2476                         } else if (rval == MDI_BUSY) {
2477                                 VHCI_RELEASE_LUN(vlun);
2478                                 if (pgr_sema_held) {
2479                                         sema_v(&vlun->svl_pgr_sema);
2480                                 }
2481                                 return (TRAN_BUSY);
2482                         } else {
2483                                 if (pgr_sema_held) {
2484                                         sema_v(&vlun->svl_pgr_sema);
2485                                 }
2486                                 vpkt->vpkt_state |= VHCI_PKT_IN_FAILOVER;
2487                                 return (TRAN_BUSY);
2488                         }
2489                 }
2490                 vlun->svl_waiting_for_activepath = 0;
2491 bind_path:
2492                 vpkt->vpkt_path = pip;
2493                 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
2494                 ASSERT(svp != NULL);
2495 
2496                 psd = svp->svp_psd;
2497                 ASSERT(psd != NULL);
2498                 address = &psd->sd_address;
2499         } else {
2500                 pkt = vpkt->vpkt_hba_pkt;
2501                 address = &pkt->pkt_address;
2502         }
2503 
2504         /* Verify match of specified path_instance and selected path_instance */
2505         ASSERT((path_instance == 0) ||
2506             (path_instance == mdi_pi_get_path_instance(vpkt->vpkt_path)));
2507 
2508         /*
2509          * For PKT_PARTIAL_DMA case, call pHCI's scsi_init_pkt whenever
2510          * target driver calls vhci_scsi_init_pkt.
2511          */
2512         if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) &&
2513             vpkt->vpkt_path && vpkt->vpkt_hba_pkt) {
2514                 VHCI_DEBUG(4, (CE_NOTE, NULL,
2515                     "vhci_bind_transport: PKT_PARTIAL_DMA "
2516                     "vpkt 0x%p, path 0x%p hba_pkt 0x%p\n",
2517                     (void *)vpkt, (void *)vpkt->vpkt_path, (void *)pkt));
2518                 pkt = vpkt->vpkt_hba_pkt;
2519                 address = &pkt->pkt_address;
2520         }
2521 
2522         if (pkt == NULL || (vpkt->vpkt_flags & CFLAG_DMA_PARTIAL)) {
2523                 pkt = scsi_init_pkt(address, pkt,
2524                     vpkt->vpkt_tgt_init_bp, vpkt->vpkt_tgt_init_cdblen,
2525                     vpkt->vpkt_tgt_init_scblen, 0, flags, func, NULL);
2526 
2527                 if (pkt == NULL) {
2528                         VHCI_DEBUG(4, (CE_NOTE, NULL,
2529                             "!bind transport: 0x%p 0x%p 0x%p\n",
2530                             (void *)vhci, (void *)psd, (void *)vpkt));
2531                         if ((vpkt->vpkt_hba_pkt == NULL) && vpkt->vpkt_path) {
2532                                 MDI_PI_ERRSTAT(vpkt->vpkt_path,
2533                                     MDI_PI_TRANSERR);
2534                                 mdi_rele_path(vpkt->vpkt_path);
2535                                 vpkt->vpkt_path = NULL;
2536                         }
2537                         if (pgr_sema_held) {
2538                                 sema_v(&vlun->svl_pgr_sema);
2539                         }
2540                         /*
2541                          * Consider it a fatal error if b_error is
2542                          * set as a result of DMA binding failure
2543                          * vs. a condition of being temporarily out of
2544                          * some resource
2545                          */
2546                         if (vpkt->vpkt_tgt_init_bp == NULL ||
2547                             geterror(vpkt->vpkt_tgt_init_bp))
2548                                 return (TRAN_FATAL_ERROR);
2549                         else
2550                                 return (TRAN_BUSY);
2551                 }
2552         }
2553 
2554         pkt->pkt_private = vpkt;
2555         vpkt->vpkt_hba_pkt = pkt;
2556         return (TRAN_ACCEPT);
2557 }
2558 
2559 
2560 /*PRINTFLIKE3*/
2561 void
2562 vhci_log(int level, dev_info_t *dip, const char *fmt, ...)
2563 {
2564         char            buf[256];
2565         va_list         ap;
2566 
2567         va_start(ap, fmt);
2568         (void) vsprintf(buf, fmt, ap);
2569         va_end(ap);
2570 
2571         scsi_log(dip, "scsi_vhci", level, buf);
2572 }
2573 
2574 /* do a PGR out with the information we've saved away */
2575 static int
2576 vhci_do_prout(scsi_vhci_priv_t *svp)
2577 {
2578 
2579         struct scsi_pkt                 *new_pkt;
2580         struct buf                      *bp;
2581         scsi_vhci_lun_t                 *vlun = svp->svp_svl;
2582         int                             rval, retry, nr_retry, ua_retry;
2583         uint8_t                         *sns, skey;
2584 
2585         bp = getrbuf(KM_SLEEP);
2586         bp->b_flags = B_WRITE;
2587         bp->b_resid = 0;
2588         bp->b_un.b_addr = (caddr_t)&vlun->svl_prout;
2589         bp->b_bcount = vlun->svl_bcount;
2590 
2591         VHCI_INCR_PATH_CMDCOUNT(svp);
2592 
2593         new_pkt = scsi_init_pkt(&svp->svp_psd->sd_address, NULL, bp,
2594             CDB_GROUP1, sizeof (struct scsi_arq_status), 0, 0,
2595             SLEEP_FUNC, NULL);
2596         if (new_pkt == NULL) {
2597                 VHCI_DECR_PATH_CMDCOUNT(svp);
2598                 freerbuf(bp);
2599                 cmn_err(CE_WARN, "!vhci_do_prout: scsi_init_pkt failed");
2600                 return (0);
2601         }
2602         mutex_enter(&vlun->svl_mutex);
2603         bp->b_un.b_addr = (caddr_t)&vlun->svl_prout;
2604         bp->b_bcount = vlun->svl_bcount;
2605         bcopy(vlun->svl_cdb, new_pkt->pkt_cdbp,
2606             sizeof (vlun->svl_cdb));
2607         new_pkt->pkt_time = vlun->svl_time;
2608         mutex_exit(&vlun->svl_mutex);
2609         new_pkt->pkt_flags = FLAG_NOINTR;
2610 
2611         ua_retry = nr_retry = retry = 0;
2612 again:
2613         rval = vhci_do_scsi_cmd(new_pkt);
2614         if (rval != 1) {
2615                 if ((new_pkt->pkt_reason == CMD_CMPLT) &&
2616                     (SCBP_C(new_pkt) == STATUS_CHECK) &&
2617                     (new_pkt->pkt_state & STATE_ARQ_DONE)) {
2618                         sns = (uint8_t *)
2619                             &(((struct scsi_arq_status *)(uintptr_t)
2620                             (new_pkt->pkt_scbp))->sts_sensedata);
2621                         skey = scsi_sense_key(sns);
2622                         if ((skey == KEY_UNIT_ATTENTION) ||
2623                             (skey == KEY_NOT_READY)) {
2624                                 int max_retry;
2625                                 struct scsi_failover_ops *fops;
2626                                 fops = vlun->svl_fops;
2627                                 rval = fops->sfo_analyze_sense(svp->svp_psd,
2628                                     sns, vlun->svl_fops_ctpriv);
2629                                 if (rval == SCSI_SENSE_NOT_READY) {
2630                                         max_retry = vhci_prout_not_ready_retry;
2631                                         retry = nr_retry++;
2632                                         delay(1*drv_usectohz(1000000));
2633                                 } else {
2634                                         /* chk for state change and update */
2635                                         if (rval == SCSI_SENSE_STATE_CHANGED) {
2636                                                 int held;
2637                                                 VHCI_HOLD_LUN(vlun,
2638                                                     VH_NOSLEEP, held);
2639                                                 if (!held) {
2640                                                         rval = TRAN_BUSY;
2641                                                 } else {
2642                                                         /* chk for alua first */
2643                                                         vhci_update_pathstates(
2644                                                             (void *)vlun);
2645                                                 }
2646                                         }
2647                                         retry = ua_retry++;
2648                                         max_retry = VHCI_MAX_PGR_RETRIES;
2649                                 }
2650                                 if (retry < max_retry) {
2651                                         VHCI_DEBUG(4, (CE_WARN, NULL,
2652                                             "!vhci_do_prout retry 0x%x "
2653                                             "(0x%x 0x%x 0x%x)",
2654                                             SCBP_C(new_pkt),
2655                                             new_pkt->pkt_cdbp[0],
2656                                             new_pkt->pkt_cdbp[1],
2657                                             new_pkt->pkt_cdbp[2]));
2658                                         goto again;
2659                                 }
2660                                 rval = 0;
2661                                 VHCI_DEBUG(4, (CE_WARN, NULL,
2662                                     "!vhci_do_prout 0x%x "
2663                                     "(0x%x 0x%x 0x%x)",
2664                                     SCBP_C(new_pkt),
2665                                     new_pkt->pkt_cdbp[0],
2666                                     new_pkt->pkt_cdbp[1],
2667                                     new_pkt->pkt_cdbp[2]));
2668                         } else if (skey == KEY_ILLEGAL_REQUEST)
2669                                 rval = VHCI_PGR_ILLEGALOP;
2670                 }
2671         } else {
2672                 rval = 1;
2673         }
2674         scsi_destroy_pkt(new_pkt);
2675         VHCI_DECR_PATH_CMDCOUNT(svp);
2676         freerbuf(bp);
2677         return (rval);
2678 }
2679 
2680 static void
2681 vhci_run_cmd(void *arg)
2682 {
2683         struct scsi_pkt         *pkt = (struct scsi_pkt *)arg;
2684         struct scsi_pkt         *tpkt;
2685         scsi_vhci_priv_t        *svp;
2686         mdi_pathinfo_t          *pip, *npip;
2687         scsi_vhci_lun_t         *vlun;
2688         dev_info_t              *cdip;
2689         scsi_vhci_priv_t        *nsvp;
2690         int                     fail = 0;
2691         int                     rval;
2692         struct vhci_pkt         *vpkt;
2693         uchar_t                 cdb_1;
2694         vhci_prout_t            *prout;
2695 
2696         vpkt = (struct vhci_pkt *)pkt->pkt_private;
2697         tpkt = vpkt->vpkt_tgt_pkt;
2698         pip = vpkt->vpkt_path;
2699         svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
2700         if (svp == NULL) {
2701                 tpkt->pkt_reason = CMD_TRAN_ERR;
2702                 tpkt->pkt_statistics = STAT_ABORTED;
2703                 goto done;
2704         }
2705         vlun = svp->svp_svl;
2706         prout = &vlun->svl_prout;
2707         if (SCBP_C(pkt) != STATUS_GOOD)
2708                 fail++;
2709         cdip = vlun->svl_dip;
2710         pip = npip = NULL;
2711         rval = mdi_select_path(cdip, NULL,
2712             MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH, NULL, &npip);
2713         if ((rval != MDI_SUCCESS) || (npip == NULL)) {
2714                 VHCI_DEBUG(4, (CE_NOTE, NULL,
2715                     "vhci_run_cmd: no path! 0x%p\n", (void *)svp));
2716                 tpkt->pkt_reason = CMD_TRAN_ERR;
2717                 tpkt->pkt_statistics = STAT_ABORTED;
2718                 goto done;
2719         }
2720 
2721         cdb_1 = vlun->svl_cdb[1];
2722         vlun->svl_cdb[1] &= 0xe0;
2723         vlun->svl_cdb[1] |= VHCI_PROUT_R_AND_IGNORE;
2724 
2725         do {
2726                 nsvp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(npip);
2727                 if (nsvp == NULL) {
2728                         VHCI_DEBUG(4, (CE_NOTE, NULL,
2729                             "vhci_run_cmd: no "
2730                             "client priv! 0x%p offlined?\n",
2731                             (void *)npip));
2732                         goto next_path;
2733                 }
2734                 if (vlun->svl_first_path == npip) {
2735                         goto next_path;
2736                 } else {
2737                         if (vhci_do_prout(nsvp) != 1)
2738                                 fail++;
2739                 }
2740 next_path:
2741                 pip = npip;
2742                 rval = mdi_select_path(cdip, NULL,
2743                     MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH,
2744                     pip, &npip);
2745                 mdi_rele_path(pip);
2746         } while ((rval == MDI_SUCCESS) && (npip != NULL));
2747 
2748         vlun->svl_cdb[1] = cdb_1;
2749 
2750         if (fail) {
2751                 VHCI_DEBUG(4, (CE_WARN, NULL, "%s%d: key registration failed, "
2752                     "couldn't be replicated on all paths",
2753                     ddi_driver_name(cdip), ddi_get_instance(cdip)));
2754                 vhci_print_prout_keys(vlun, "vhci_run_cmd: ");
2755 
2756                 if (SCBP_C(pkt) != STATUS_GOOD) {
2757                         tpkt->pkt_reason = CMD_TRAN_ERR;
2758                         tpkt->pkt_statistics = STAT_ABORTED;
2759                 }
2760         } else {
2761                 vlun->svl_pgr_active = 1;
2762                 vhci_print_prout_keys(vlun, "vhci_run_cmd: before bcopy:");
2763 
2764                 bcopy((const void *)prout->service_key,
2765                     (void *)prout->active_service_key, MHIOC_RESV_KEY_SIZE);
2766                 bcopy((const void *)prout->res_key,
2767                     (void *)prout->active_res_key, MHIOC_RESV_KEY_SIZE);
2768 
2769                 vhci_print_prout_keys(vlun, "vhci_run_cmd: after bcopy:");
2770         }
2771 done:
2772         if (SCBP_C(pkt) == STATUS_GOOD)
2773                 vlun->svl_first_path = NULL;
2774 
2775         if (svp)
2776                 VHCI_DECR_PATH_CMDCOUNT(svp);
2777 
2778         if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
2779                 scsi_destroy_pkt(pkt);
2780                 vpkt->vpkt_hba_pkt = NULL;
2781                 if (vpkt->vpkt_path) {
2782                         mdi_rele_path(vpkt->vpkt_path);
2783                         vpkt->vpkt_path = NULL;
2784                 }
2785         }
2786 
2787         sema_v(&vlun->svl_pgr_sema);
2788         /*
2789          * The PROUT commands are not included in the automatic retry
2790          * mechanism, therefore, vpkt_org_vpkt should never be set here.
2791          */
2792         ASSERT(vpkt->vpkt_org_vpkt == NULL);
2793         scsi_hba_pkt_comp(tpkt);
2794 }
2795 
2796 /*
2797  * Get the keys registered with this target.  Since we will have
2798  * registered the same key with multiple initiators, strip out
2799  * any duplicate keys.
2800  *
2801  * The pointers which will be used to filter the registered keys from
2802  * the device will be stored in filter_prin and filter_pkt.  If the
2803  * allocation length of the buffer was sufficient for the number of
2804  * parameter data bytes available to be returned by the device then the
2805  * key filtering will use the keylist returned from the original
2806  * request.  If the allocation length of the buffer was not sufficient,
2807  * then the filtering will use the keylist returned from the request
2808  * that is resent below.
2809  *
2810  * If the device returns an additional length field that is greater than
2811  * the allocation length of the buffer, then allocate a new buffer which
2812  * can accommodate the number of parameter data bytes available to be
2813  * returned.  Resend the scsi PRIN command, filter out the duplicate
2814  * keys and return as many of the unique keys found that was originally
2815  * requested and set the additional length field equal to the data bytes
2816  * of unique reservation keys available to be returned.
2817  *
2818  * If the device returns an additional length field that is less than or
2819  * equal to the allocation length of the buffer, then all the available
2820  * keys registered were returned by the device.  Filter out the
2821  * duplicate keys and return all of the unique keys found and set the
2822  * additional length field equal to the data bytes of the reservation
2823  * keys to be returned.
2824  */
2825 
2826 #define VHCI_PRIN_HEADER_SZ (sizeof (prin->length) + sizeof (prin->generation))
2827 
2828 static int
2829 vhci_do_prin(struct vhci_pkt **intr_vpkt)
2830 {
2831         scsi_vhci_priv_t *svp;
2832         struct vhci_pkt *vpkt = *intr_vpkt;
2833         vhci_prin_readkeys_t *prin;
2834         scsi_vhci_lun_t *vlun;
2835         struct scsi_vhci *vhci = ADDR2VHCI(&vpkt->vpkt_tgt_pkt->pkt_address);
2836 
2837         struct buf              *new_bp = NULL;
2838         struct scsi_pkt         *new_pkt = NULL;
2839         struct vhci_pkt         *new_vpkt = NULL;
2840         uint32_t                needed_length;
2841         int                     rval = VHCI_CMD_CMPLT;
2842         uint32_t                prin_length = 0;
2843         uint32_t                svl_prin_length = 0;
2844 
2845         ASSERT(vpkt->vpkt_path);
2846         svp = mdi_pi_get_vhci_private(vpkt->vpkt_path);
2847         ASSERT(svp);
2848         vlun = svp->svp_svl;
2849         ASSERT(vlun);
2850 
2851         /*
2852          * If the caller only asked for an amount of data that would not
2853          * be enough to include any key data it is likely that they will
2854          * send the next command with a buffer size based on the information
2855          * from this header. Doing recovery on this would be a duplication
2856          * of efforts.
2857          */
2858         if (vpkt->vpkt_tgt_init_bp->b_bcount <= VHCI_PRIN_HEADER_SZ) {
2859                 rval = VHCI_CMD_CMPLT;
2860                 goto exit;
2861         }
2862 
2863         if (vpkt->vpkt_org_vpkt == NULL) {
2864                 /*
2865                  * Can fail as sleep is not allowed.
2866                  */
2867                 prin = (vhci_prin_readkeys_t *)
2868                     bp_mapin_common(vpkt->vpkt_tgt_init_bp, VM_NOSLEEP);
2869         } else {
2870                 /*
2871                  * The retry buf doesn't need to be mapped in.
2872                  */
2873                 prin = (vhci_prin_readkeys_t *)
2874                     vpkt->vpkt_tgt_init_bp->b_un.b_daddr;
2875         }
2876 
2877         if (prin == NULL) {
2878                 VHCI_DEBUG(5, (CE_WARN, NULL,
2879                     "vhci_do_prin: bp_mapin_common failed."));
2880                 rval = VHCI_CMD_ERROR;
2881                 goto fail;
2882         }
2883 
2884         prin_length = BE_32(prin->length);
2885 
2886         /*
2887          * According to SPC-3r22, sec 4.3.4.6: "If the amount of
2888          * information to be transferred exceeds the maximum value
2889          * that the ALLOCATION LENGTH field is capable of specifying,
2890          * the device server shall...terminate the command with CHECK
2891          * CONDITION status".  The ALLOCATION LENGTH field of the
2892          * PERSISTENT RESERVE IN command is 2 bytes. We should never
2893          * get here with an ADDITIONAL LENGTH greater than 0xFFFF
2894          * so if we do, then it is an error!
2895          */
2896 
2897 
2898         if ((prin_length + VHCI_PRIN_HEADER_SZ) > 0xFFFF) {
2899                 VHCI_DEBUG(5, (CE_NOTE, NULL,
2900                     "vhci_do_prin: Device returned invalid "
2901                     "length 0x%x\n", prin_length));
2902                 rval = VHCI_CMD_ERROR;
2903                 goto fail;
2904         }
2905         needed_length = prin_length + VHCI_PRIN_HEADER_SZ;
2906 
2907         /*
2908          * If prin->length is greater than the byte count allocated in the
2909          * original buffer, then resend the request with enough buffer
2910          * allocated to get all of the available registered keys.
2911          */
2912         if ((vpkt->vpkt_tgt_init_bp->b_bcount < needed_length) &&
2913             (vpkt->vpkt_org_vpkt == NULL)) {
2914 
2915                 new_pkt = vhci_create_retry_pkt(vpkt);
2916                 if (new_pkt == NULL) {
2917                         rval = VHCI_CMD_ERROR;
2918                         goto fail;
2919                 }
2920                 new_vpkt = TGTPKT2VHCIPKT(new_pkt);
2921 
2922                 /*
2923                  * This is the buf with buffer pointer
2924                  * where the prin readkeys will be
2925                  * returned from the device
2926                  */
2927                 new_bp = scsi_alloc_consistent_buf(&svp->svp_psd->sd_address,
2928                     NULL, needed_length, B_READ, NULL_FUNC, NULL);
2929                 if ((new_bp == NULL) || (new_bp->b_un.b_addr == NULL)) {
2930                         if (new_bp) {
2931                                 scsi_free_consistent_buf(new_bp);
2932                         }
2933                         vhci_scsi_destroy_pkt(&new_pkt->pkt_address, new_pkt);
2934                         rval = VHCI_CMD_ERROR;
2935                         goto fail;
2936                 }
2937                 new_bp->b_bcount = needed_length;
2938                 new_pkt->pkt_cdbp[7] = (uchar_t)(needed_length >> 8);
2939                 new_pkt->pkt_cdbp[8] = (uchar_t)needed_length;
2940 
2941                 rval = VHCI_CMD_RETRY;
2942 
2943                 new_vpkt->vpkt_tgt_init_bp = new_bp;
2944         }
2945 
2946         if (rval == VHCI_CMD_RETRY) {
2947 
2948                 /*
2949                  * There were more keys then the original request asked for.
2950                  */
2951                 mdi_pathinfo_t *path_holder = vpkt->vpkt_path;
2952 
2953                 /*
2954                  * Release the old path because it does not matter which path
2955                  * this command is sent down.  This allows the normal bind
2956                  * transport mechanism to be used.
2957                  */
2958                 if (vpkt->vpkt_path != NULL) {
2959                         mdi_rele_path(vpkt->vpkt_path);
2960                         vpkt->vpkt_path = NULL;
2961                 }
2962 
2963                 /*
2964                  * Dispatch the retry command
2965                  */
2966                 if (taskq_dispatch(vhci->vhci_taskq, vhci_dispatch_scsi_start,
2967                     (void *) new_vpkt, KM_NOSLEEP) == NULL) {
2968                         if (path_holder) {
2969                                 vpkt->vpkt_path = path_holder;
2970                                 mdi_hold_path(path_holder);
2971                         }
2972                         scsi_free_consistent_buf(new_bp);
2973                         vhci_scsi_destroy_pkt(&new_pkt->pkt_address, new_pkt);
2974                         rval = VHCI_CMD_ERROR;
2975                         goto fail;
2976                 }
2977 
2978                 /*
2979                  * If we return VHCI_CMD_RETRY, that means the caller
2980                  * is going to bail and wait for the reissued command
2981                  * to complete.  In that case, we need to decrement
2982                  * the path command count right now.  In any other
2983                  * case, it'll be decremented by the caller.
2984                  */
2985                 VHCI_DECR_PATH_CMDCOUNT(svp);
2986                 goto exit;
2987 
2988         }
2989 
2990         if (rval == VHCI_CMD_CMPLT) {
2991                 /*
2992                  * The original request got all of the keys or the recovery
2993                  * packet returns.
2994                  */
2995                 int new;
2996                 int old;
2997                 int num_keys = prin_length / MHIOC_RESV_KEY_SIZE;
2998 
2999                 VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_do_prin: %d keys read\n",
3000                     num_keys));
3001 
3002 #ifdef DEBUG
3003                 VHCI_DEBUG(5, (CE_NOTE, NULL, "vhci_do_prin: from storage\n"));
3004                 if (vhci_debug == 5)
3005                         vhci_print_prin_keys(prin, num_keys);
3006                 VHCI_DEBUG(5, (CE_NOTE, NULL,
3007                     "vhci_do_prin: MPxIO old keys:\n"));
3008                 if (vhci_debug == 5)
3009                         vhci_print_prin_keys(&vlun->svl_prin, num_keys);
3010 #endif
3011 
3012                 /*
3013                  * Filter out all duplicate keys returned from the device
3014                  * We know that we use a different key for every host, so we
3015                  * can simply strip out duplicates. Otherwise we would need to
3016                  * do more bookkeeping to figure out which keys to strip out.
3017                  */
3018 
3019                 new = 0;
3020 
3021                 /*
3022                  * If we got at least 1 key copy it.
3023                  */
3024                 if (num_keys > 0) {
3025                         vlun->svl_prin.keylist[0] = prin->keylist[0];
3026                         new++;
3027                 }
3028 
3029                 /*
3030                  * find next unique key.
3031                  */
3032                 for (old = 1; old < num_keys; old++) {
3033                         int j;
3034                         int match = 0;
3035 
3036                         if (new >= VHCI_NUM_RESV_KEYS)
3037                                 break;
3038                         for (j = 0; j < new; j++) {
3039                                 if (bcmp(&prin->keylist[old],
3040                                     &vlun->svl_prin.keylist[j],
3041                                     sizeof (mhioc_resv_key_t)) == 0) {
3042                                         match = 1;
3043                                         break;
3044                                 }
3045                         }
3046                         if (!match) {
3047                                 vlun->svl_prin.keylist[new] =
3048                                     prin->keylist[old];
3049                                 new++;
3050                         }
3051                 }
3052 
3053                 /* Stored Big Endian */
3054                 vlun->svl_prin.generation = prin->generation;
3055                 svl_prin_length = new * sizeof (mhioc_resv_key_t);
3056                 /* Stored Big Endian */
3057                 vlun->svl_prin.length = BE_32(svl_prin_length);
3058                 svl_prin_length += VHCI_PRIN_HEADER_SZ;
3059 
3060                 /*
3061                  * If we arrived at this point after issuing a retry, make sure
3062                  * that we put everything back the way it originally was so
3063                  * that the target driver can complete the command correctly.
3064                  */
3065                 if (vpkt->vpkt_org_vpkt != NULL) {
3066                         new_bp = vpkt->vpkt_tgt_init_bp;
3067 
3068                         scsi_free_consistent_buf(new_bp);
3069 
3070                         vpkt = vhci_sync_retry_pkt(vpkt);
3071                         *intr_vpkt = vpkt;
3072 
3073                         /*
3074                          * Make sure the original buffer is mapped into kernel
3075                          * space before we try to copy the filtered keys into
3076                          * it.
3077                          */
3078                         prin = (vhci_prin_readkeys_t *)bp_mapin_common(
3079                             vpkt->vpkt_tgt_init_bp, VM_NOSLEEP);
3080                 }
3081 
3082                 /*
3083                  * Now copy the desired number of prin keys into the original
3084                  * target buffer.
3085                  */
3086                 if (svl_prin_length <= vpkt->vpkt_tgt_init_bp->b_bcount) {
3087                         /*
3088                          * It is safe to return all of the available unique
3089                          * keys
3090                          */
3091                         bcopy(&vlun->svl_prin, prin, svl_prin_length);
3092                 } else {
3093                         /*
3094                          * Not all of the available keys were requested by the
3095                          * original command.
3096                          */
3097                         bcopy(&vlun->svl_prin, prin,
3098                             vpkt->vpkt_tgt_init_bp->b_bcount);
3099                 }
3100 #ifdef DEBUG
3101                 VHCI_DEBUG(5, (CE_NOTE, NULL,
3102                     "vhci_do_prin: To Application:\n"));
3103                 if (vhci_debug == 5)
3104                         vhci_print_prin_keys(prin, new);
3105                 VHCI_DEBUG(5, (CE_NOTE, NULL,
3106                     "vhci_do_prin: MPxIO new keys:\n"));
3107                 if (vhci_debug == 5)
3108                         vhci_print_prin_keys(&vlun->svl_prin, new);
3109 #endif
3110         }
3111 fail:
3112         if (rval == VHCI_CMD_ERROR) {
3113                 /*
3114                  * If we arrived at this point after issuing a
3115                  * retry, make sure that we put everything back
3116                  * the way it originally was so that ssd can
3117                  * complete the command correctly.
3118                  */
3119 
3120                 if (vpkt->vpkt_org_vpkt != NULL) {
3121                         new_bp = vpkt->vpkt_tgt_init_bp;
3122                         if (new_bp != NULL) {
3123                                 scsi_free_consistent_buf(new_bp);
3124                         }
3125 
3126                         new_vpkt = vpkt;
3127                         vpkt = vpkt->vpkt_org_vpkt;
3128 
3129                         vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
3130                             new_vpkt->vpkt_tgt_pkt);
3131                 }
3132 
3133                 /*
3134                  * Mark this command completion as having an error so that
3135                  * ssd will retry the command.
3136                  */
3137 
3138                 vpkt->vpkt_tgt_pkt->pkt_reason = CMD_ABORTED;
3139                 vpkt->vpkt_tgt_pkt->pkt_statistics |= STAT_ABORTED;
3140 
3141                 rval = VHCI_CMD_CMPLT;
3142         }
3143 exit:
3144         /*
3145          * Make sure that the semaphore is only released once.
3146          */
3147         if (rval == VHCI_CMD_CMPLT) {
3148                 sema_v(&vlun->svl_pgr_sema);
3149         }
3150 
3151         return (rval);
3152 }
3153 
3154 static void
3155 vhci_intr(struct scsi_pkt *pkt)
3156 {
3157         struct vhci_pkt         *vpkt = (struct vhci_pkt *)pkt->pkt_private;
3158         struct scsi_pkt         *tpkt;
3159         scsi_vhci_priv_t        *svp;
3160         scsi_vhci_lun_t         *vlun;
3161         int                     rval, held;
3162         struct scsi_failover_ops        *fops;
3163         uint8_t                 *sns, skey, asc, ascq;
3164         mdi_pathinfo_t          *lpath;
3165         static char             *timeout_err = "Command Timeout";
3166         static char             *parity_err = "Parity Error";
3167         char                    *err_str = NULL;
3168         dev_info_t              *vdip, *cdip;
3169         char                    *cpath;
3170 
3171         ASSERT(vpkt != NULL);
3172         tpkt = vpkt->vpkt_tgt_pkt;
3173         ASSERT(tpkt != NULL);
3174         svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(vpkt->vpkt_path);
3175         ASSERT(svp != NULL);
3176         vlun = svp->svp_svl;
3177         ASSERT(vlun != NULL);
3178         lpath = vpkt->vpkt_path;
3179 
3180         /*
3181          * sync up the target driver's pkt with the pkt that
3182          * we actually used
3183          */
3184         *(tpkt->pkt_scbp) = *(pkt->pkt_scbp);
3185         tpkt->pkt_resid = pkt->pkt_resid;
3186         tpkt->pkt_state = pkt->pkt_state;
3187         tpkt->pkt_statistics = pkt->pkt_statistics;
3188         tpkt->pkt_reason = pkt->pkt_reason;
3189 
3190         /* Return path_instance information back to the target driver. */
3191         if (scsi_pkt_allocated_correctly(tpkt)) {
3192                 if (scsi_pkt_allocated_correctly(pkt)) {
3193                         /*
3194                          * If both packets were correctly allocated,
3195                          * return path returned by pHCI.
3196                          */
3197                         tpkt->pkt_path_instance = pkt->pkt_path_instance;
3198                 } else {
3199                         /* Otherwise return path of pHCI we used */
3200                         tpkt->pkt_path_instance =
3201                             mdi_pi_get_path_instance(lpath);
3202                 }
3203         }
3204 
3205         if (pkt->pkt_cdbp[0] == SCMD_PROUT &&
3206             ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
3207             ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE)) {
3208                 if ((SCBP_C(pkt) != STATUS_GOOD) ||
3209                     (pkt->pkt_reason != CMD_CMPLT)) {
3210                         sema_v(&vlun->svl_pgr_sema);
3211                 }
3212         } else if (pkt->pkt_cdbp[0] == SCMD_PRIN) {
3213                 if (pkt->pkt_reason != CMD_CMPLT ||
3214                     (SCBP_C(pkt) != STATUS_GOOD)) {
3215                         sema_v(&vlun->svl_pgr_sema);
3216                 }
3217         }
3218 
3219         switch (pkt->pkt_reason) {
3220         case CMD_CMPLT:
3221                 /*
3222                  * cmd completed successfully, check for scsi errors
3223                  */
3224                 switch (*(pkt->pkt_scbp)) {
3225                 case STATUS_CHECK:
3226                         if (pkt->pkt_state & STATE_ARQ_DONE) {
3227                                 sns = (uint8_t *)
3228                                     &(((struct scsi_arq_status *)(uintptr_t)
3229                                     (pkt->pkt_scbp))->sts_sensedata);
3230                                 skey = scsi_sense_key(sns);
3231                                 asc = scsi_sense_asc(sns);
3232                                 ascq = scsi_sense_ascq(sns);
3233                                 fops = vlun->svl_fops;
3234                                 ASSERT(fops != NULL);
3235                                 VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_intr: "
3236                                     "Received sns key %x  esc %x  escq %x\n",
3237                                     skey, asc, ascq));
3238 
3239                                 if (vlun->svl_waiting_for_activepath == 1) {
3240                                         /*
3241                                          * if we are here it means we are
3242                                          * in the midst of a probe/attach
3243                                          * through a passive path; this
3244                                          * case is exempt from sense analysis
3245                                          * for detection of ext. failover
3246                                          * because that would unnecessarily
3247                                          * increase attach time.
3248                                          */
3249                                         bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3250                                             vpkt->vpkt_tgt_init_scblen);
3251                                         break;
3252                                 }
3253                                 if (asc == VHCI_SCSI_PERR) {
3254                                         /*
3255                                          * parity error
3256                                          */
3257                                         err_str = parity_err;
3258                                         bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3259                                             vpkt->vpkt_tgt_init_scblen);
3260                                         break;
3261                                 }
3262                                 rval = fops->sfo_analyze_sense(svp->svp_psd,
3263                                     sns, vlun->svl_fops_ctpriv);
3264                                 if ((rval == SCSI_SENSE_NOFAILOVER) ||
3265                                     (rval == SCSI_SENSE_UNKNOWN) ||
3266                                     (rval == SCSI_SENSE_NOT_READY)) {
3267                                         bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3268                                             vpkt->vpkt_tgt_init_scblen);
3269                                         break;
3270                                 } else if (rval == SCSI_SENSE_STATE_CHANGED) {
3271                                         struct scsi_vhci        *vhci;
3272                                         vhci = ADDR2VHCI(&tpkt->pkt_address);
3273                                         VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
3274                                         if (!held) {
3275                                                 /*
3276                                                  * looks like some other thread
3277                                                  * has already detected this
3278                                                  * condition
3279                                                  */
3280                                                 tpkt->pkt_state &=
3281                                                     ~STATE_ARQ_DONE;
3282                                                 *(tpkt->pkt_scbp) =
3283                                                     STATUS_BUSY;
3284                                                 break;
3285                                         }
3286                                         (void) taskq_dispatch(
3287                                             vhci->vhci_update_pathstates_taskq,
3288                                             vhci_update_pathstates,
3289                                             (void *)vlun, KM_SLEEP);
3290                                 } else {
3291                                         /*
3292                                          * externally initiated failover
3293                                          * has occurred or is in progress
3294                                          */
3295                                         VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
3296                                         if (!held) {
3297                                                 /*
3298                                                  * looks like some other thread
3299                                                  * has already detected this
3300                                                  * condition
3301                                                  */
3302                                                 tpkt->pkt_state &=
3303                                                     ~STATE_ARQ_DONE;
3304                                                 *(tpkt->pkt_scbp) =
3305                                                     STATUS_BUSY;
3306                                                 break;
3307                                         } else {
3308                                                 rval = vhci_handle_ext_fo
3309                                                     (pkt, rval);
3310                                                 if (rval == BUSY_RETURN) {
3311                                                         tpkt->pkt_state &=
3312                                                             ~STATE_ARQ_DONE;
3313                                                         *(tpkt->pkt_scbp) =
3314                                                             STATUS_BUSY;
3315                                                         break;
3316                                                 }
3317                                                 bcopy(pkt->pkt_scbp,
3318                                                     tpkt->pkt_scbp,
3319                                                     vpkt->vpkt_tgt_init_scblen);
3320                                                 break;
3321                                         }
3322                                 }
3323                         }
3324                         break;
3325 
3326                 /*
3327                  * If this is a good SCSI-II RELEASE cmd completion then restore
3328                  * the load balancing policy and reset VLUN_RESERVE_ACTIVE_FLG.
3329                  * If this is a good SCSI-II RESERVE cmd completion then set
3330                  * VLUN_RESERVE_ACTIVE_FLG.
3331                  */
3332                 case STATUS_GOOD:
3333                         if ((pkt->pkt_cdbp[0] == SCMD_RELEASE) ||
3334                             (pkt->pkt_cdbp[0] == SCMD_RELEASE_G1)) {
3335                                 (void) mdi_set_lb_policy(vlun->svl_dip,
3336                                     vlun->svl_lb_policy_save);
3337                                 vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
3338                                 VHCI_DEBUG(1, (CE_WARN, NULL,
3339                                     "!vhci_intr: vlun 0x%p release path 0x%p",
3340                                     (void *)vlun, (void *)vpkt->vpkt_path));
3341                         }
3342 
3343                         if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3344                             (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3345                                 vlun->svl_flags |= VLUN_RESERVE_ACTIVE_FLG;
3346                                 vlun->svl_resrv_pip = vpkt->vpkt_path;
3347                                 VHCI_DEBUG(1, (CE_WARN, NULL,
3348                                     "!vhci_intr: vlun 0x%p reserved path 0x%p",
3349                                     (void *)vlun, (void *)vpkt->vpkt_path));
3350                         }
3351                         break;
3352 
3353                 case STATUS_RESERVATION_CONFLICT:
3354                         VHCI_DEBUG(1, (CE_WARN, NULL,
3355                             "!vhci_intr: vlun 0x%p "
3356                             "reserve conflict on path 0x%p",
3357                             (void *)vlun, (void *)vpkt->vpkt_path));
3358                         /* FALLTHROUGH */
3359                 default:
3360                         break;
3361                 }
3362 
3363                 /*
3364                  * Update I/O completion statistics for the path
3365                  */
3366                 mdi_pi_kstat_iosupdate(vpkt->vpkt_path, vpkt->vpkt_tgt_init_bp);
3367 
3368                 /*
3369                  * Command completed successfully, release the dma binding and
3370                  * destroy the transport side of the packet.
3371                  */
3372                 if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
3373                     (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
3374                     ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
3375                         if (SCBP_C(pkt) == STATUS_GOOD) {
3376                                 ASSERT(vlun->svl_taskq);
3377                                 svp->svp_last_pkt_reason = pkt->pkt_reason;
3378                                 (void) taskq_dispatch(vlun->svl_taskq,
3379                                     vhci_run_cmd, pkt, KM_SLEEP);
3380                                 return;
3381                         }
3382                 }
3383                 if ((SCBP_C(pkt) == STATUS_GOOD) &&
3384                     (pkt->pkt_cdbp[0] == SCMD_PRIN) && vpkt->vpkt_tgt_init_bp) {
3385                         /*
3386                          * If the action (value in byte 1 of the cdb) is zero,
3387                          * we're reading keys, and that's the only condition
3388                          * where we need to be concerned with filtering keys
3389                          * and potential retries.  Otherwise, we simply signal
3390                          * the semaphore and move on.
3391                          */
3392                         if (pkt->pkt_cdbp[1] == 0) {
3393                                 /*
3394                                  * If this is the completion of an internal
3395                                  * retry then we need to make sure that the
3396                                  * pkt and tpkt pointers are readjusted so
3397                                  * the calls to scsi_destroy_pkt and pkt_comp
3398                                  * below work * correctly.
3399                                  */
3400                                 if (vpkt->vpkt_org_vpkt != NULL) {
3401                                         pkt = vpkt->vpkt_org_vpkt->vpkt_hba_pkt;
3402                                         tpkt = vpkt->vpkt_org_vpkt->
3403                                             vpkt_tgt_pkt;
3404 
3405                                         /*
3406                                          * If this command was issued through
3407                                          * the taskq then we need to clear
3408                                          * this flag for proper processing in
3409                                          * the case of a retry from the target
3410                                          * driver.
3411                                          */
3412                                         vpkt->vpkt_state &=
3413                                             ~VHCI_PKT_THRU_TASKQ;
3414                                 }
3415 
3416                                 /*
3417                                  * if vhci_do_prin returns VHCI_CMD_CMPLT then
3418                                  * vpkt will contain the address of the
3419                                  * original vpkt
3420                                  */
3421                                 if (vhci_do_prin(&vpkt) == VHCI_CMD_RETRY) {
3422                                         /*
3423                                          * The command has been resent to get
3424                                          * all the keys from the device.  Don't
3425                                          * complete the command with ssd until
3426                                          * the retry completes.
3427                                          */
3428                                         return;
3429                                 }
3430                         } else {
3431                                 sema_v(&vlun->svl_pgr_sema);
3432                         }
3433                 }
3434 
3435                 break;
3436 
3437         case CMD_TIMEOUT:
3438                 if ((pkt->pkt_statistics &
3439                     (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
3440 
3441                         VHCI_DEBUG(1, (CE_NOTE, NULL,
3442                             "!scsi vhci timeout invoked\n"));
3443 
3444                         (void) vhci_recovery_reset(vlun, &pkt->pkt_address,
3445                             FALSE, VHCI_DEPTH_ALL);
3446                 }
3447                 MDI_PI_ERRSTAT(lpath, MDI_PI_TRANSERR);
3448                 tpkt->pkt_statistics |= STAT_ABORTED;
3449                 err_str = timeout_err;
3450                 break;
3451 
3452         case CMD_TRAN_ERR:
3453                 /*
3454                  * This status is returned if the transport has sent the cmd
3455                  * down the link to the target and then some error occurs.
3456                  * In case of SCSI-II RESERVE cmd, we don't know if the
3457                  * reservation been accepted by the target or not, so we need
3458                  * to clear the reservation.
3459                  */
3460                 if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3461                     (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3462                         VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_intr received"
3463                             " cmd_tran_err for scsi-2 reserve cmd\n"));
3464                         if (!vhci_recovery_reset(vlun, &pkt->pkt_address,
3465                             TRUE, VHCI_DEPTH_TARGET)) {
3466                                 VHCI_DEBUG(1, (CE_WARN, NULL,
3467                                     "!vhci_intr cmd_tran_err reset failed!"));
3468                         }
3469                 }
3470                 break;
3471 
3472         case CMD_DEV_GONE:
3473                 /*
3474                  * If this is the last path then report CMD_DEV_GONE to the
3475                  * target driver, otherwise report BUSY to triggger retry.
3476                  */
3477                 if (vlun->svl_dip &&
3478                     (mdi_client_get_path_count(vlun->svl_dip) <= 1)) {
3479                         struct scsi_vhci        *vhci;
3480                         vhci = ADDR2VHCI(&tpkt->pkt_address);
3481                         VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_intr received "
3482                             "cmd_dev_gone on last path\n"));
3483                         (void) vhci_invalidate_mpapi_lu(vhci, vlun);
3484                         break;
3485                 }
3486 
3487                 /* Report CMD_CMPLT-with-BUSY to cause retry. */
3488                 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_intr received "
3489                     "cmd_dev_gone\n"));
3490                 tpkt->pkt_reason = CMD_CMPLT;
3491                 tpkt->pkt_state = STATE_GOT_BUS |
3492                     STATE_GOT_TARGET | STATE_SENT_CMD |
3493                     STATE_GOT_STATUS;
3494                 *(tpkt->pkt_scbp) = STATUS_BUSY;
3495                 break;
3496 
3497         default:
3498                 break;
3499         }
3500 
3501         /*
3502          * SCSI-II RESERVE cmd has been serviced by the lower layers clear
3503          * the flag so the lun is not QUIESCED any longer.
3504          * Also clear the VHCI_PKT_THRU_TASKQ flag, to ensure that if this pkt
3505          * is retried, a taskq shall again be dispatched to service it.  Else
3506          * it may lead to a system hang if the retry is within interrupt
3507          * context.
3508          */
3509         if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3510             (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3511                 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
3512                 vpkt->vpkt_state &= ~VHCI_PKT_THRU_TASKQ;
3513         }
3514 
3515         /*
3516          * vpkt_org_vpkt should always be NULL here if the retry command
3517          * has been successfully processed.  If vpkt_org_vpkt != NULL at
3518          * this point, it is an error so restore the original vpkt and
3519          * return an error to the target driver so it can retry the
3520          * command as appropriate.
3521          */
3522         if (vpkt->vpkt_org_vpkt != NULL) {
3523                 struct vhci_pkt *new_vpkt = vpkt;
3524                 vpkt = vpkt->vpkt_org_vpkt;
3525 
3526                 vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
3527                     new_vpkt->vpkt_tgt_pkt);
3528 
3529                 /*
3530                  * Mark this command completion as having an error so that
3531                  * ssd will retry the command.
3532                  */
3533                 vpkt->vpkt_tgt_pkt->pkt_reason = CMD_ABORTED;
3534                 vpkt->vpkt_tgt_pkt->pkt_statistics |= STAT_ABORTED;
3535 
3536                 pkt = vpkt->vpkt_hba_pkt;
3537                 tpkt = vpkt->vpkt_tgt_pkt;
3538         }
3539 
3540         if ((err_str != NULL) && (pkt->pkt_reason !=
3541             svp->svp_last_pkt_reason)) {
3542                 cdip = vlun->svl_dip;
3543                 vdip = ddi_get_parent(cdip);
3544                 cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3545                 vhci_log(CE_WARN, vdip, "!%s (%s%d): %s on path %s",
3546                     ddi_pathname(cdip, cpath), ddi_driver_name(cdip),
3547                     ddi_get_instance(cdip), err_str,
3548                     mdi_pi_spathname(vpkt->vpkt_path));
3549                 kmem_free(cpath, MAXPATHLEN);
3550         }
3551         svp->svp_last_pkt_reason = pkt->pkt_reason;
3552         VHCI_DECR_PATH_CMDCOUNT(svp);
3553 
3554         /*
3555          * For PARTIAL_DMA, vhci should not free the path.
3556          * Target driver will call into vhci_scsi_dmafree or
3557          * destroy pkt to release this path.
3558          */
3559         if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
3560                 scsi_destroy_pkt(pkt);
3561                 vpkt->vpkt_hba_pkt = NULL;
3562                 if (vpkt->vpkt_path) {
3563                         mdi_rele_path(vpkt->vpkt_path);
3564                         vpkt->vpkt_path = NULL;
3565                 }
3566         }
3567 
3568         scsi_hba_pkt_comp(tpkt);
3569 }
3570 
3571 /*
3572  * two possibilities: (1) failover has completed
3573  * or (2) is in progress; update our path states for
3574  * the former case; for the latter case,
3575  * initiate a scsi_watch request to
3576  * determine when failover completes - vlun is HELD
3577  * until failover completes; BUSY is returned to upper
3578  * layer in both the cases
3579  */
3580 static int
3581 vhci_handle_ext_fo(struct scsi_pkt *pkt, int fostat)
3582 {
3583         struct vhci_pkt         *vpkt = (struct vhci_pkt *)pkt->pkt_private;
3584         struct scsi_pkt         *tpkt;
3585         scsi_vhci_priv_t        *svp;
3586         scsi_vhci_lun_t         *vlun;
3587         struct scsi_vhci        *vhci;
3588         scsi_vhci_swarg_t       *swarg;
3589         char                    *path;
3590 
3591         ASSERT(vpkt != NULL);
3592         tpkt = vpkt->vpkt_tgt_pkt;
3593         ASSERT(tpkt != NULL);
3594         svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(vpkt->vpkt_path);
3595         ASSERT(svp != NULL);
3596         vlun = svp->svp_svl;
3597         ASSERT(vlun != NULL);
3598         ASSERT(VHCI_LUN_IS_HELD(vlun));
3599 
3600         vhci = ADDR2VHCI(&tpkt->pkt_address);
3601 
3602         if (fostat == SCSI_SENSE_INACTIVE) {
3603                 VHCI_DEBUG(1, (CE_NOTE, NULL, "!Failover "
3604                     "detected for %s; updating path states...\n",
3605                     vlun->svl_lun_wwn));
3606                 /*
3607                  * set the vlun flag to indicate to the task that the target
3608                  * port group needs updating
3609                  */
3610                 vlun->svl_flags |= VLUN_UPDATE_TPG;
3611                 (void) taskq_dispatch(vhci->vhci_update_pathstates_taskq,
3612                     vhci_update_pathstates, (void *)vlun, KM_SLEEP);
3613         } else {
3614                 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3615                 vhci_log(CE_NOTE, ddi_get_parent(vlun->svl_dip),
3616                     "!%s (%s%d): Waiting for externally initiated failover "
3617                     "to complete", ddi_pathname(vlun->svl_dip, path),
3618                     ddi_driver_name(vlun->svl_dip),
3619                     ddi_get_instance(vlun->svl_dip));
3620                 kmem_free(path, MAXPATHLEN);
3621                 swarg = kmem_alloc(sizeof (*swarg), KM_NOSLEEP);
3622                 if (swarg == NULL) {
3623                         VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_handle_ext_fo: "
3624                             "request packet allocation for %s failed....\n",
3625                             vlun->svl_lun_wwn));
3626                         VHCI_RELEASE_LUN(vlun);
3627                         return (PKT_RETURN);
3628                 }
3629                 swarg->svs_svp = svp;
3630                 swarg->svs_tos = gethrtime();
3631                 swarg->svs_pi = vpkt->vpkt_path;
3632                 swarg->svs_release_lun = 0;
3633                 swarg->svs_done = 0;
3634                 /*
3635                  * place a hold on the path...we don't want it to
3636                  * vanish while scsi_watch is in progress
3637                  */
3638                 mdi_hold_path(vpkt->vpkt_path);
3639                 svp->svp_sw_token = scsi_watch_request_submit(svp->svp_psd,
3640                     VHCI_FOWATCH_INTERVAL, SENSE_LENGTH, vhci_efo_watch_cb,
3641                     (caddr_t)swarg);
3642         }
3643         return (BUSY_RETURN);
3644 }
3645 
3646 /*
3647  * vhci_efo_watch_cb:
3648  *      Callback from scsi_watch request to check the failover status.
3649  *      Completion is either due to successful failover or timeout.
3650  *      Upon successful completion, vhci_update_path_states is called.
3651  *      For timeout condition, vhci_efo_done is called.
3652  *      Always returns 0 to scsi_watch to keep retrying till vhci_efo_done
3653  *      terminates this request properly in a separate thread.
3654  */
3655 
3656 static int
3657 vhci_efo_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
3658 {
3659         struct scsi_status              *statusp = resultp->statusp;
3660         uint8_t                         *sensep = (uint8_t *)resultp->sensep;
3661         struct scsi_pkt                 *pkt = resultp->pkt;
3662         scsi_vhci_swarg_t               *swarg;
3663         scsi_vhci_priv_t                *svp;
3664         scsi_vhci_lun_t                 *vlun;
3665         struct scsi_vhci                *vhci;
3666         dev_info_t                      *vdip;
3667         int                             rval, updt_paths;
3668 
3669         swarg = (scsi_vhci_swarg_t *)(uintptr_t)arg;
3670         svp = swarg->svs_svp;
3671         if (swarg->svs_done) {
3672                 /*
3673                  * Already completed failover or timedout.
3674                  * Waiting for vhci_efo_done to terminate this scsi_watch.
3675                  */
3676                 return (0);
3677         }
3678 
3679         ASSERT(svp != NULL);
3680         vlun = svp->svp_svl;
3681         ASSERT(vlun != NULL);
3682         ASSERT(VHCI_LUN_IS_HELD(vlun));
3683         vlun->svl_efo_update_path = 0;
3684         vdip = ddi_get_parent(vlun->svl_dip);
3685         vhci = ddi_get_soft_state(vhci_softstate,
3686             ddi_get_instance(vdip));
3687 
3688         updt_paths = 0;
3689 
3690         if (pkt->pkt_reason != CMD_CMPLT) {
3691                 if ((gethrtime() - swarg->svs_tos) >= VHCI_EXTFO_TIMEOUT) {
3692                         swarg->svs_release_lun = 1;
3693                         goto done;
3694                 }
3695                 return (0);
3696         }
3697         if (*((unsigned char *)statusp) == STATUS_CHECK) {
3698                 rval = vlun->svl_fops->sfo_analyze_sense(svp->svp_psd, sensep,
3699                     vlun->svl_fops_ctpriv);
3700                 switch (rval) {
3701                         /*
3702                          * Only update path states in case path is definitely
3703                          * inactive, or no failover occurred.  For all other
3704                          * check conditions continue pinging.  A unexpected
3705                          * check condition shouldn't cause pinging to complete
3706                          * prematurely.
3707                          */
3708                         case SCSI_SENSE_INACTIVE:
3709                         case SCSI_SENSE_NOFAILOVER:
3710                                 updt_paths = 1;
3711                                 break;
3712                         default:
3713                                 if ((gethrtime() - swarg->svs_tos)
3714                                     >= VHCI_EXTFO_TIMEOUT) {
3715                                         swarg->svs_release_lun = 1;
3716                                         goto done;
3717                                 }
3718                                 return (0);
3719                 }
3720         } else if (*((unsigned char *)statusp) ==
3721             STATUS_RESERVATION_CONFLICT) {
3722                 updt_paths = 1;
3723         } else if ((*((unsigned char *)statusp)) &
3724             (STATUS_BUSY | STATUS_QFULL)) {
3725                 return (0);
3726         }
3727         if ((*((unsigned char *)statusp) == STATUS_GOOD) ||
3728             (updt_paths == 1)) {
3729                 /*
3730                  * we got here because we had detected an
3731                  * externally initiated failover; things
3732                  * have settled down now, so let's
3733                  * start up a task to update the
3734                  * path states and target port group
3735                  */
3736                 vlun->svl_efo_update_path = 1;
3737                 swarg->svs_done = 1;
3738                 vlun->svl_swarg = swarg;
3739                 vlun->svl_flags |= VLUN_UPDATE_TPG;
3740                 (void) taskq_dispatch(vhci->vhci_update_pathstates_taskq,
3741                     vhci_update_pathstates, (void *)vlun,
3742                     KM_SLEEP);
3743                 return (0);
3744         }
3745         if ((gethrtime() - swarg->svs_tos) >= VHCI_EXTFO_TIMEOUT) {
3746                 swarg->svs_release_lun = 1;
3747                 goto done;
3748         }
3749         return (0);
3750 done:
3751         swarg->svs_done = 1;
3752         (void) taskq_dispatch(vhci->vhci_taskq,
3753             vhci_efo_done, (void *)swarg, KM_SLEEP);
3754         return (0);
3755 }
3756 
3757 /*
3758  * vhci_efo_done:
3759  *      cleanly terminates scsi_watch and free up resources.
3760  *      Called as taskq function in vhci_efo_watch_cb for EFO timeout condition
3761  *      or by vhci_update_path_states invoked during external initiated
3762  *      failover completion.
3763  */
3764 static void
3765 vhci_efo_done(void *arg)
3766 {
3767         scsi_vhci_lun_t                 *vlun;
3768         scsi_vhci_swarg_t               *swarg = (scsi_vhci_swarg_t *)arg;
3769         scsi_vhci_priv_t                *svp = swarg->svs_svp;
3770         ASSERT(svp);
3771 
3772         vlun = svp->svp_svl;
3773         ASSERT(vlun);
3774 
3775         /* Wait for clean termination of scsi_watch */
3776         (void) scsi_watch_request_terminate(svp->svp_sw_token,
3777             SCSI_WATCH_TERMINATE_ALL_WAIT);
3778         svp->svp_sw_token = NULL;
3779 
3780         /* release path and freeup resources to indicate failover completion */
3781         mdi_rele_path(swarg->svs_pi);
3782         if (swarg->svs_release_lun) {
3783                 VHCI_RELEASE_LUN(vlun);
3784         }
3785         kmem_free((void *)swarg, sizeof (*swarg));
3786 }
3787 
3788 /*
3789  * Update the path states
3790  * vlun should be HELD when this is invoked.
3791  * Calls vhci_efo_done to cleanup resources allocated for EFO.
3792  */
3793 void
3794 vhci_update_pathstates(void *arg)
3795 {
3796         mdi_pathinfo_t                  *pip, *npip;
3797         dev_info_t                      *dip;
3798         struct scsi_failover_ops        *fo;
3799         struct scsi_vhci_priv           *svp;
3800         struct scsi_device              *psd;
3801         struct scsi_path_opinfo         opinfo;
3802         char                            *pclass, *tptr;
3803         struct scsi_vhci_lun            *vlun = (struct scsi_vhci_lun *)arg;
3804         int                             sps; /* mdi_select_path() status */
3805         char                            *cpath;
3806         struct scsi_vhci                *vhci;
3807         struct scsi_pkt                 *pkt;
3808         struct buf                      *bp;
3809         struct scsi_vhci_priv           *svp_conflict = NULL;
3810 
3811         ASSERT(VHCI_LUN_IS_HELD(vlun));
3812         dip  = vlun->svl_dip;
3813         pip = npip = NULL;
3814 
3815         vhci = ddi_get_soft_state(vhci_softstate,
3816             ddi_get_instance(ddi_get_parent(dip)));
3817 
3818         sps = mdi_select_path(dip, NULL, (MDI_SELECT_ONLINE_PATH |
3819             MDI_SELECT_STANDBY_PATH | MDI_SELECT_NO_PREFERRED), NULL, &npip);
3820         if ((npip == NULL) || (sps != MDI_SUCCESS)) {
3821                 goto done;
3822         }
3823 
3824         fo = vlun->svl_fops;
3825         do {
3826                 pip = npip;
3827                 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
3828                 psd = svp->svp_psd;
3829                 if (fo->sfo_path_get_opinfo(psd, &opinfo,
3830                     vlun->svl_fops_ctpriv) != 0) {
3831                         sps = mdi_select_path(dip, NULL,
3832                             (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH |
3833                             MDI_SELECT_NO_PREFERRED), pip, &npip);
3834                         mdi_rele_path(pip);
3835                         continue;
3836                 }
3837 
3838                 if (mdi_prop_lookup_string(pip, "path-class", &pclass) !=
3839                     MDI_SUCCESS) {
3840                         VHCI_DEBUG(1, (CE_NOTE, NULL,
3841                             "!vhci_update_pathstates: prop lookup failed for "
3842                             "path 0x%p\n", (void *)pip));
3843                         sps = mdi_select_path(dip, NULL,
3844                             (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH |
3845                             MDI_SELECT_NO_PREFERRED), pip, &npip);
3846                         mdi_rele_path(pip);
3847                         continue;
3848                 }
3849 
3850                 /*
3851                  * Need to update the "path-class" property
3852                  * value in the device tree if different
3853                  * from the existing value.
3854                  */
3855                 if (strcmp(pclass, opinfo.opinfo_path_attr) != 0) {
3856                         (void) mdi_prop_update_string(pip, "path-class",
3857                             opinfo.opinfo_path_attr);
3858                 }
3859 
3860                 /*
3861                  * Only change the state if needed. i.e. Don't call
3862                  * mdi_pi_set_state to ONLINE a path if its already
3863                  * ONLINE. Same for STANDBY paths.
3864                  */
3865 
3866                 if ((opinfo.opinfo_path_state == SCSI_PATH_ACTIVE ||
3867                     opinfo.opinfo_path_state == SCSI_PATH_ACTIVE_NONOPT)) {
3868                         if (!(MDI_PI_IS_ONLINE(pip))) {
3869                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
3870                                     "!vhci_update_pathstates: marking path"
3871                                     " 0x%p as ONLINE\n", (void *)pip));
3872                                 cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3873                                 vhci_log(CE_NOTE, ddi_get_parent(dip), "!%s "
3874                                     "(%s%d): path %s "
3875                                     "is now ONLINE because of "
3876                                     "an externally initiated failover",
3877                                     ddi_pathname(dip, cpath),
3878                                     ddi_driver_name(dip),
3879                                     ddi_get_instance(dip),
3880                                     mdi_pi_spathname(pip));
3881                                 kmem_free(cpath, MAXPATHLEN);
3882                                 mdi_pi_set_state(pip,
3883                                     MDI_PATHINFO_STATE_ONLINE);
3884                                 mdi_pi_set_preferred(pip,
3885                                     opinfo.opinfo_preferred);
3886                                 tptr = kmem_alloc(strlen
3887                                     (opinfo.opinfo_path_attr)+1, KM_SLEEP);
3888                                 (void) strlcpy(tptr, opinfo.opinfo_path_attr,
3889                                     (strlen(opinfo.opinfo_path_attr)+1));
3890                                 mutex_enter(&vlun->svl_mutex);
3891                                 if (vlun->svl_active_pclass != NULL) {
3892                                         kmem_free(vlun->svl_active_pclass,
3893                                             strlen(vlun->svl_active_pclass)+1);
3894                                 }
3895                                 vlun->svl_active_pclass = tptr;
3896                                 if (vlun->svl_waiting_for_activepath) {
3897                                         vlun->svl_waiting_for_activepath = 0;
3898                                 }
3899                                 mutex_exit(&vlun->svl_mutex);
3900                         } else if (MDI_PI_IS_ONLINE(pip)) {
3901                                 if (strcmp(pclass, opinfo.opinfo_path_attr)
3902                                     != 0) {
3903                                         mdi_pi_set_preferred(pip,
3904                                             opinfo.opinfo_preferred);
3905                                         mutex_enter(&vlun->svl_mutex);
3906                                         if (vlun->svl_active_pclass == NULL ||
3907                                             strcmp(opinfo.opinfo_path_attr,
3908                                             vlun->svl_active_pclass) != 0) {
3909                                                 mutex_exit(&vlun->svl_mutex);
3910                                                 tptr = kmem_alloc(strlen
3911                                                     (opinfo.opinfo_path_attr)+1,
3912                                                     KM_SLEEP);
3913                                                 (void) strlcpy(tptr,
3914                                                     opinfo.opinfo_path_attr,
3915                                                     (strlen
3916                                                     (opinfo.opinfo_path_attr)
3917                                                     +1));
3918                                                 mutex_enter(&vlun->svl_mutex);
3919                                         } else {
3920                                                 /*
3921                                                  * No need to update
3922                                                  * svl_active_pclass
3923                                                  */
3924                                                 tptr = NULL;
3925                                                 mutex_exit(&vlun->svl_mutex);
3926                                         }
3927                                         if (tptr) {
3928                                                 if (vlun->svl_active_pclass
3929                                                     != NULL) {
3930                                                         kmem_free(vlun->
3931                                                             svl_active_pclass,
3932                                                             strlen(vlun->
3933                                                             svl_active_pclass)
3934                                                             +1);
3935                                                 }
3936                                                 vlun->svl_active_pclass = tptr;
3937                                                 mutex_exit(&vlun->svl_mutex);
3938                                         }
3939                                 }
3940                         }
3941 
3942                         /* Check for Reservation Conflict */
3943                         bp = scsi_alloc_consistent_buf(
3944                             &svp->svp_psd->sd_address, (struct buf *)NULL,
3945                             DEV_BSIZE, B_READ, NULL, NULL);
3946                         if (!bp) {
3947                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
3948                                     "!vhci_update_pathstates: No resources "
3949                                     "(buf)\n"));
3950                                 mdi_rele_path(pip);
3951                                 goto done;
3952                         }
3953                         pkt = scsi_init_pkt(&svp->svp_psd->sd_address, NULL, bp,
3954                             CDB_GROUP1, sizeof (struct scsi_arq_status), 0,
3955                             PKT_CONSISTENT, NULL, NULL);
3956                         if (pkt) {
3957                                 (void) scsi_setup_cdb((union scsi_cdb *)
3958                                     (uintptr_t)pkt->pkt_cdbp, SCMD_READ, 1, 1,
3959                                     0);
3960                                 pkt->pkt_time = 3*30;
3961                                 pkt->pkt_flags = FLAG_NOINTR;
3962                                 pkt->pkt_path_instance =
3963                                     mdi_pi_get_path_instance(pip);
3964 
3965                                 if ((scsi_transport(pkt) == TRAN_ACCEPT) &&
3966                                     (pkt->pkt_reason == CMD_CMPLT) &&
3967                                     (SCBP_C(pkt) ==
3968                                     STATUS_RESERVATION_CONFLICT)) {
3969                                         VHCI_DEBUG(1, (CE_NOTE, NULL,
3970                                             "!vhci_update_pathstates: reserv. "
3971                                             "conflict to be resolved on 0x%p\n",
3972                                             (void *)pip));
3973                                         svp_conflict = svp;
3974                                 }
3975                                 scsi_destroy_pkt(pkt);
3976                         }
3977                         scsi_free_consistent_buf(bp);
3978                 } else if ((opinfo.opinfo_path_state == SCSI_PATH_INACTIVE) &&
3979                     !(MDI_PI_IS_STANDBY(pip))) {
3980                         VHCI_DEBUG(1, (CE_NOTE, NULL,
3981                             "!vhci_update_pathstates: marking path"
3982                             " 0x%p as STANDBY\n", (void *)pip));
3983                         cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3984                         vhci_log(CE_NOTE, ddi_get_parent(dip), "!%s "
3985                             "(%s%d): path %s "
3986                             "is now STANDBY because of "
3987                             "an externally initiated failover",
3988                             ddi_pathname(dip, cpath),
3989                             ddi_driver_name(dip),
3990                             ddi_get_instance(dip),
3991                             mdi_pi_spathname(pip));
3992                         kmem_free(cpath, MAXPATHLEN);
3993                         mdi_pi_set_state(pip,
3994                             MDI_PATHINFO_STATE_STANDBY);
3995                         mdi_pi_set_preferred(pip,
3996                             opinfo.opinfo_preferred);
3997                         mutex_enter(&vlun->svl_mutex);
3998                         if (vlun->svl_active_pclass != NULL) {
3999                                 if (strcmp(vlun->svl_active_pclass,
4000                                     opinfo.opinfo_path_attr) == 0) {
4001                                         kmem_free(vlun->
4002                                             svl_active_pclass,
4003                                             strlen(vlun->
4004                                             svl_active_pclass)+1);
4005                                         vlun->svl_active_pclass = NULL;
4006                                 }
4007                         }
4008                         mutex_exit(&vlun->svl_mutex);
4009                 }
4010                 (void) mdi_prop_free(pclass);
4011                 sps = mdi_select_path(dip, NULL,
4012                     (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH |
4013                     MDI_SELECT_NO_PREFERRED), pip, &npip);
4014                 mdi_rele_path(pip);
4015 
4016         } while ((npip != NULL) && (sps == MDI_SUCCESS));
4017 
4018         /*
4019          * Check to see if this vlun has an active SCSI-II RESERVE.  If so
4020          * clear the reservation by sending a reset, so the host doesn't
4021          * receive a reservation conflict.  The reset has to be sent via a
4022          * working path.  Let's use a path referred to by svp_conflict as it
4023          * should be working.
4024          * Reset VLUN_RESERVE_ACTIVE_FLG for this vlun.  Also notify ssd
4025          * of the reset, explicitly.
4026          */
4027         if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
4028                 if (svp_conflict && (vlun->svl_xlf_capable == 0)) {
4029                         VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_update_pathstates:"
4030                             " sending recovery reset on 0x%p, path_state: %x",
4031                             svp_conflict->svp_psd->sd_private,
4032                             mdi_pi_get_state((mdi_pathinfo_t *)
4033                             svp_conflict->svp_psd->sd_private)));
4034 
4035                         (void) vhci_recovery_reset(vlun,
4036                             &svp_conflict->svp_psd->sd_address, FALSE,
4037                             VHCI_DEPTH_TARGET);
4038                 }
4039                 vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
4040                 mutex_enter(&vhci->vhci_mutex);
4041                 scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
4042                     &vhci->vhci_reset_notify_listf);
4043                 mutex_exit(&vhci->vhci_mutex);
4044         }
4045         if (vlun->svl_flags & VLUN_UPDATE_TPG) {
4046                 /*
4047                  * Update the AccessState of related MP-API TPGs
4048                  */
4049                 (void) vhci_mpapi_update_tpg_acc_state_for_lu(vhci, vlun);
4050                 vlun->svl_flags &= ~VLUN_UPDATE_TPG;
4051         }
4052 done:
4053         if (vlun->svl_efo_update_path) {
4054                 vlun->svl_efo_update_path = 0;
4055                 vhci_efo_done(vlun->svl_swarg);
4056                 vlun->svl_swarg = 0;
4057         }
4058         VHCI_RELEASE_LUN(vlun);
4059 }
4060 
4061 /* ARGSUSED */
4062 static int
4063 vhci_pathinfo_init(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
4064 {
4065         scsi_hba_tran_t         *hba = NULL;
4066         struct scsi_device      *psd = NULL;
4067         scsi_vhci_lun_t         *vlun = NULL;
4068         dev_info_t              *pdip = NULL;
4069         dev_info_t              *tgt_dip;
4070         struct scsi_vhci        *vhci;
4071         char                    *guid;
4072         scsi_vhci_priv_t        *svp = NULL;
4073         int                     rval = MDI_FAILURE;
4074         int                     vlun_alloced = 0;
4075 
4076         ASSERT(vdip != NULL);
4077         ASSERT(pip != NULL);
4078 
4079         vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
4080         ASSERT(vhci != NULL);
4081 
4082         pdip = mdi_pi_get_phci(pip);
4083         ASSERT(pdip != NULL);
4084 
4085         hba = ddi_get_driver_private(pdip);
4086         ASSERT(hba != NULL);
4087 
4088         tgt_dip = mdi_pi_get_client(pip);
4089         ASSERT(tgt_dip != NULL);
4090 
4091         if (ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip, PROPFLAGS,
4092             MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
4093                 VHCI_DEBUG(1, (CE_WARN, NULL,
4094                     "vhci_pathinfo_init: lun guid property failed"));
4095                 goto failure;
4096         }
4097 
4098         vlun = vhci_lun_lookup_alloc(tgt_dip, guid, &vlun_alloced);
4099         ddi_prop_free(guid);
4100 
4101         vlun->svl_dip = tgt_dip;
4102 
4103         svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
4104         svp->svp_svl = vlun;
4105 
4106         /*
4107          * Initialize svl_lb_policy_save only for newly allocated vlun. Writing
4108          * to svl_lb_policy_save later could accidentally overwrite saved lb
4109          * policy.
4110          */
4111         if (vlun_alloced) {
4112                 vlun->svl_lb_policy_save = mdi_get_lb_policy(tgt_dip);
4113         }
4114 
4115         mutex_init(&svp->svp_mutex, NULL, MUTEX_DRIVER, NULL);
4116         cv_init(&svp->svp_cv, NULL, CV_DRIVER, NULL);
4117 
4118         psd = kmem_zalloc(sizeof (*psd), KM_SLEEP);
4119         mutex_init(&psd->sd_mutex, NULL, MUTEX_DRIVER, NULL);
4120 
4121         if (hba->tran_hba_flags & SCSI_HBA_ADDR_COMPLEX) {
4122                 /*
4123                  * For a SCSI_HBA_ADDR_COMPLEX transport we store a pointer to
4124                  * scsi_device in the scsi_address structure.  This allows an
4125                  * an HBA driver to find its scsi_device(9S) and
4126                  * per-scsi_device(9S) HBA private data given a
4127                  * scsi_address(9S) by using scsi_address_device(9F) and
4128                  * scsi_device_hba_private_get(9F)).
4129                  */
4130                 psd->sd_address.a.a_sd = psd;
4131         } else if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
4132                 /*
4133                  * Clone transport structure if requested, so
4134                  * Self enumerating HBAs always need to use cloning
4135                  */
4136                 scsi_hba_tran_t *clone =
4137                     kmem_alloc(sizeof (scsi_hba_tran_t), KM_SLEEP);
4138                 bcopy(hba, clone, sizeof (scsi_hba_tran_t));
4139                 hba = clone;
4140                 hba->tran_sd = psd;
4141         } else {
4142                 /*
4143                  * SPI pHCI unit-address. If we ever need to support this
4144                  * we could set a.spi.a_target/a.spi.a_lun based on pathinfo
4145                  * node unit-address properties.  For now we fail...
4146                  */
4147                 goto failure;
4148         }
4149 
4150         psd->sd_dev = tgt_dip;
4151         psd->sd_address.a_hba_tran = hba;
4152 
4153         /*
4154          * Mark scsi_device as being associated with a pathinfo node. For
4155          * a scsi_device structure associated with a devinfo node,
4156          * scsi_ctlops_initchild sets this field to NULL.
4157          */
4158         psd->sd_pathinfo = pip;
4159 
4160         /*
4161          * LEGACY: sd_private: set for older mpxio-capable pHCI drivers with
4162          * too much scsi_vhci/mdi/ndi knowledge. Remove this code when all
4163          * mpxio-capable pHCI drivers use SCSA enumeration services (or at
4164          * least have been changed to use sd_pathinfo instead).
4165          */
4166         psd->sd_private = (caddr_t)pip;
4167 
4168         /* See scsi_hba.c for info on sd_tran_safe kludge */
4169         psd->sd_tran_safe = hba;
4170 
4171         svp->svp_psd = psd;
4172         mdi_pi_set_vhci_private(pip, (caddr_t)svp);
4173 
4174         /*
4175          * call hba's target init entry point if it exists
4176          */
4177         if (hba->tran_tgt_init != NULL) {
4178                 psd->sd_tran_tgt_free_done = 0;
4179                 if ((rval = (*hba->tran_tgt_init)(pdip, tgt_dip,
4180                     hba, psd)) != DDI_SUCCESS) {
4181                         VHCI_DEBUG(1, (CE_WARN, pdip,
4182                             "!vhci_pathinfo_init: tran_tgt_init failed for "
4183                             "path=0x%p rval=%x", (void *)pip, rval));
4184                         goto failure;
4185                 }
4186         }
4187 
4188         svp->svp_new_path = 1;
4189 
4190         VHCI_DEBUG(4, (CE_NOTE, NULL, "!vhci_pathinfo_init: path:%p\n",
4191             (void *)pip));
4192         return (MDI_SUCCESS);
4193 
4194 failure:
4195         if (psd) {
4196                 mutex_destroy(&psd->sd_mutex);
4197                 kmem_free(psd, sizeof (*psd));
4198         }
4199         if (svp) {
4200                 mdi_pi_set_vhci_private(pip, NULL);
4201                 mutex_destroy(&svp->svp_mutex);
4202                 cv_destroy(&svp->svp_cv);
4203                 kmem_free(svp, sizeof (*svp));
4204         }
4205         if (hba && (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE))
4206                 kmem_free(hba, sizeof (scsi_hba_tran_t));
4207 
4208         if (vlun_alloced)
4209                 vhci_lun_free(vlun, NULL);
4210 
4211         return (rval);
4212 }
4213 
4214 /* ARGSUSED */
4215 static int
4216 vhci_pathinfo_uninit(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
4217 {
4218         scsi_hba_tran_t         *hba = NULL;
4219         struct scsi_device      *psd = NULL;
4220         dev_info_t              *pdip = NULL;
4221         dev_info_t              *cdip = NULL;
4222         scsi_vhci_priv_t        *svp = NULL;
4223 
4224         ASSERT(vdip != NULL);
4225         ASSERT(pip != NULL);
4226 
4227         pdip = mdi_pi_get_phci(pip);
4228         ASSERT(pdip != NULL);
4229 
4230         cdip = mdi_pi_get_client(pip);
4231         ASSERT(cdip != NULL);
4232 
4233         hba = ddi_get_driver_private(pdip);
4234         ASSERT(hba != NULL);
4235 
4236         vhci_mpapi_set_path_state(vdip, pip, MP_DRVR_PATH_STATE_UNINIT);
4237         svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
4238         if (svp == NULL) {
4239                 /* path already freed. Nothing to do. */
4240                 return (MDI_SUCCESS);
4241         }
4242 
4243         psd = svp->svp_psd;
4244         ASSERT(psd != NULL);
4245 
4246         if (hba->tran_hba_flags & SCSI_HBA_ADDR_COMPLEX) {
4247                 /* Verify plumbing */
4248                 ASSERT(psd->sd_address.a_hba_tran == hba);
4249                 ASSERT(psd->sd_address.a.a_sd == psd);
4250         } else if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
4251                 /* Switch to cloned scsi_hba_tran(9S) structure */
4252                 hba = psd->sd_address.a_hba_tran;
4253                 ASSERT(hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE);
4254                 ASSERT(hba->tran_sd == psd);
4255         }
4256 
4257         if ((hba->tran_tgt_free != NULL) && !psd->sd_tran_tgt_free_done) {
4258                 (*hba->tran_tgt_free) (pdip, cdip, hba, psd);
4259                 psd->sd_tran_tgt_free_done = 1;
4260         }
4261         mutex_destroy(&psd->sd_mutex);
4262         if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
4263                 kmem_free(hba, sizeof (*hba));
4264         }
4265 
4266         mdi_pi_set_vhci_private(pip, NULL);
4267 
4268         /*
4269          * Free the pathinfo related scsi_device inquiry data. Note that this
4270          * matches what happens for scsi_hba.c devinfo case at uninitchild time.
4271          */
4272         if (psd->sd_inq)
4273                 kmem_free((caddr_t)psd->sd_inq, sizeof (struct scsi_inquiry));
4274         kmem_free((caddr_t)psd, sizeof (*psd));
4275 
4276         mutex_destroy(&svp->svp_mutex);
4277         cv_destroy(&svp->svp_cv);
4278         kmem_free((caddr_t)svp, sizeof (*svp));
4279 
4280         VHCI_DEBUG(4, (CE_NOTE, NULL, "!vhci_pathinfo_uninit: path=0x%p\n",
4281             (void *)pip));
4282         return (MDI_SUCCESS);
4283 }
4284 
4285 /* ARGSUSED */
4286 static int
4287 vhci_pathinfo_state_change(dev_info_t *vdip, mdi_pathinfo_t *pip,
4288     mdi_pathinfo_state_t state, uint32_t ext_state, int flags)
4289 {
4290         int                     rval = MDI_SUCCESS;
4291         scsi_vhci_priv_t        *svp;
4292         scsi_vhci_lun_t         *vlun;
4293         int                     held;
4294         int                     op = (flags & 0xf00) >> 8;
4295         struct scsi_vhci        *vhci;
4296 
4297         vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
4298 
4299         if (flags & MDI_EXT_STATE_CHANGE) {
4300                 /*
4301                  * We do not want to issue any commands down the path in case
4302                  * sync flag is set. Lower layers might not be ready to accept
4303                  * any I/O commands.
4304                  */
4305                 if (op == DRIVER_DISABLE)
4306                         return (MDI_SUCCESS);
4307 
4308                 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
4309                 if (svp == NULL) {
4310                         return (MDI_FAILURE);
4311                 }
4312                 vlun = svp->svp_svl;
4313 
4314                 if (flags & MDI_BEFORE_STATE_CHANGE) {
4315                         /*
4316                          * Hold the LUN.
4317                          */
4318                         VHCI_HOLD_LUN(vlun, VH_SLEEP, held);
4319                         if (flags & MDI_DISABLE_OP)  {
4320                                 /*
4321                                  * Issue scsi reset if it happens to be
4322                                  * reserved path.
4323                                  */
4324                                 if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
4325                                         /*
4326                                          * if reservation pending on
4327                                          * this path, dont' mark the
4328                                          * path busy
4329                                          */
4330                                         if (op == DRIVER_DISABLE_TRANSIENT) {
4331                                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
4332                                                     "!vhci_pathinfo"
4333                                                     "_state_change (pip:%p): "
4334                                                     " reservation: fail busy\n",
4335                                                     (void *)pip));
4336                                                 return (MDI_FAILURE);
4337                                         }
4338                                         if (pip == vlun->svl_resrv_pip) {
4339                                                 if (vhci_recovery_reset(
4340                                                     svp->svp_svl,
4341                                                     &svp->svp_psd->sd_address,
4342                                                     TRUE,
4343                                                     VHCI_DEPTH_TARGET) == 0) {
4344                                                         VHCI_DEBUG(1,
4345                                                             (CE_NOTE, NULL,
4346                                                             "!vhci_pathinfo"
4347                                                             "_state_change "
4348                                                             " (pip:%p): "
4349                                                             "reset failed, "
4350                                                             "give up!\n",
4351                                                             (void *)pip));
4352                                                 }
4353                                                 vlun->svl_flags &=
4354                                                     ~VLUN_RESERVE_ACTIVE_FLG;
4355                                         }
4356                                 }
4357                         } else if (flags & MDI_ENABLE_OP)  {
4358                                 if (((vhci->vhci_conf_flags &
4359                                     VHCI_CONF_FLAGS_AUTO_FAILBACK) ==
4360                                     VHCI_CONF_FLAGS_AUTO_FAILBACK) &&
4361                                     MDI_PI_IS_USER_DISABLE(pip) &&
4362                                     MDI_PI_IS_STANDBY(pip)) {
4363                                         struct scsi_failover_ops        *fo;
4364                                         char *best_pclass, *pclass = NULL;
4365                                         int  best_class, rv;
4366                                         /*
4367                                          * Failback if enabling a standby path
4368                                          * and it is the primary class or
4369                                          * preferred class
4370                                          */
4371                                         best_class = mdi_pi_get_preferred(pip);
4372                                         if (best_class == 0) {
4373                                                 /*
4374                                                  * if not preferred - compare
4375                                                  * path-class with class
4376                                                  */
4377                                                 fo = vlun->svl_fops;
4378                                                 (void) fo->sfo_pathclass_next(
4379                                                     NULL, &best_pclass,
4380                                                     vlun->svl_fops_ctpriv);
4381                                                 pclass = NULL;
4382                                                 rv = mdi_prop_lookup_string(pip,
4383                                                     "path-class", &pclass);
4384                                                 if (rv != MDI_SUCCESS ||
4385                                                     pclass == NULL) {
4386                                                         vhci_log(CE_NOTE, vdip,
4387                                                             "!path-class "
4388                                                             " lookup "
4389                                                             "failed. rv: %d"
4390                                                             "class: %p", rv,
4391                                                             (void *)pclass);
4392                                                 } else if (strncmp(pclass,
4393                                                     best_pclass,
4394                                                     strlen(best_pclass)) == 0) {
4395                                                         best_class = 1;
4396                                                 }
4397                                                 if (rv == MDI_SUCCESS &&
4398                                                     pclass != NULL) {
4399                                                         rv = mdi_prop_free(
4400                                                             pclass);
4401                                                         if (rv !=
4402                                                             DDI_PROP_SUCCESS) {
4403                                                                 vhci_log(
4404                                                                     CE_NOTE,
4405                                                                     vdip,
4406                                                                     "!path-"
4407                                                                     "class"
4408                                                                     " free"
4409                                                                     " failed"
4410                                                                     " rv: %d"
4411                                                                     " class: "
4412                                                                     "%p",
4413                                                                     rv,
4414                                                                     (void *)
4415                                                                     pclass);
4416                                                         }
4417                                                 }
4418                                         }
4419                                         if (best_class == 1) {
4420                                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
4421                                                     "preferred path: %p "
4422                                                     "USER_DISABLE->USER_ENABLE "
4423                                                     "transition for lun %s\n",
4424                                                     (void *)pip,
4425                                                     vlun->svl_lun_wwn));
4426                                                 (void) taskq_dispatch(
4427                                                     vhci->vhci_taskq,
4428                                                     vhci_initiate_auto_failback,
4429                                                     (void *) vlun, KM_SLEEP);
4430                                         }
4431                                 }
4432                                 /*
4433                                  * if PGR is active, revalidate key and
4434                                  * register on this path also, if key is
4435                                  * still valid
4436                                  */
4437                                 sema_p(&vlun->svl_pgr_sema);
4438                                 if (vlun->svl_pgr_active)
4439                                         (void)
4440                                             vhci_pgr_validate_and_register(svp);
4441                                 sema_v(&vlun->svl_pgr_sema);
4442                                 /*
4443                                  * Inform target driver about any
4444                                  * reservations to be reinstated if target
4445                                  * has dropped reservation during the busy
4446                                  * period.
4447                                  */
4448                                 mutex_enter(&vhci->vhci_mutex);
4449                                 scsi_hba_reset_notify_callback(
4450                                     &vhci->vhci_mutex,
4451                                     &vhci->vhci_reset_notify_listf);
4452                                 mutex_exit(&vhci->vhci_mutex);
4453                         }
4454                 }
4455                 if (flags & MDI_AFTER_STATE_CHANGE) {
4456                         if (flags & MDI_ENABLE_OP)  {
4457                                 mutex_enter(&vhci_global_mutex);
4458                                 cv_broadcast(&vhci_cv);
4459                                 mutex_exit(&vhci_global_mutex);
4460                         }
4461                         if (vlun->svl_setcap_done) {
4462                                 (void) vhci_pHCI_cap(&svp->svp_psd->sd_address,
4463                                     "sector-size", vlun->svl_sector_size,
4464                                     1, pip);
4465                         }
4466 
4467                         /*
4468                          * Release the LUN
4469                          */
4470                         VHCI_RELEASE_LUN(vlun);
4471 
4472                         /*
4473                          * Path transition is complete.
4474                          * Run callback to indicate target driver to
4475                          * retry to prevent IO starvation.
4476                          */
4477                         if (scsi_callback_id != 0) {
4478                                 ddi_run_callback(&scsi_callback_id);
4479                         }
4480                 }
4481         } else {
4482                 switch (state) {
4483                 case MDI_PATHINFO_STATE_ONLINE:
4484                         rval = vhci_pathinfo_online(vdip, pip, flags);
4485                         break;
4486 
4487                 case MDI_PATHINFO_STATE_OFFLINE:
4488                         rval = vhci_pathinfo_offline(vdip, pip, flags);
4489                         break;
4490 
4491                 default:
4492                         break;
4493                 }
4494                 /*
4495                  * Path transition is complete.
4496                  * Run callback to indicate target driver to
4497                  * retry to prevent IO starvation.
4498                  */
4499                 if ((rval == MDI_SUCCESS) && (scsi_callback_id != 0)) {
4500                         ddi_run_callback(&scsi_callback_id);
4501                 }
4502                 return (rval);
4503         }
4504 
4505         return (MDI_SUCCESS);
4506 }
4507 
4508 /*
4509  * Parse the mpxio load balancing options. The datanameptr
4510  * will point to a string containing the load-balance-options value.
4511  * The load-balance-options value will be a property that
4512  * defines the load-balance algorithm and any arguments to that
4513  * algorithm.
4514  * For example:
4515  * device-type-mpxio-options-list=
4516  * "device-type=SUN    SENA", "load-balance-options=logical-block-options"
4517  * "device-type=SUN     SE6920", "round-robin-options";
4518  * logical-block-options="load-balance=logical-block", "region-size=15";
4519  * round-robin-options="load-balance=round-robin";
4520  *
4521  * If the load-balance is not defined the load balance algorithm will
4522  * default to the global setting. There will be default values assigned
4523  * to the arguments (region-size=18) and if an argument is one
4524  * that is not known, it will be ignored.
4525  */
4526 static void
4527 vhci_parse_mpxio_lb_options(dev_info_t *dip, dev_info_t *cdip,
4528         caddr_t datanameptr)
4529 {
4530         char                    *dataptr, *next_entry;
4531         caddr_t                 config_list     = NULL;
4532         int                     config_list_len = 0, list_len = 0;
4533         int                     region_size = -1;
4534         client_lb_t             load_balance;
4535 
4536         if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, datanameptr,
4537             (caddr_t)&config_list, &config_list_len) != DDI_PROP_SUCCESS) {
4538                 return;
4539         }
4540 
4541         list_len = config_list_len;
4542         next_entry = config_list;
4543         while (config_list_len > 0) {
4544                 dataptr = next_entry;
4545 
4546                 if (strncmp(mdi_load_balance, dataptr,
4547                     strlen(mdi_load_balance)) == 0) {
4548                         /* get the load-balance scheme */
4549                         dataptr += strlen(mdi_load_balance) + 1;
4550                         if (strcmp(dataptr, LOAD_BALANCE_PROP_RR) == 0) {
4551                                 (void) mdi_set_lb_policy(cdip, LOAD_BALANCE_RR);
4552                                 load_balance = LOAD_BALANCE_RR;
4553                         } else if (strcmp(dataptr,
4554                             LOAD_BALANCE_PROP_LBA) == 0) {
4555                                 (void) mdi_set_lb_policy(cdip,
4556                                     LOAD_BALANCE_LBA);
4557                                 load_balance = LOAD_BALANCE_LBA;
4558                         } else if (strcmp(dataptr,
4559                             LOAD_BALANCE_PROP_NONE) == 0) {
4560                                 (void) mdi_set_lb_policy(cdip,
4561                                     LOAD_BALANCE_NONE);
4562                                 load_balance = LOAD_BALANCE_NONE;
4563                         }
4564                 } else if (strncmp(dataptr, LOGICAL_BLOCK_REGION_SIZE,
4565                     strlen(LOGICAL_BLOCK_REGION_SIZE)) == 0) {
4566                         int     i = 0;
4567                         char    *ptr;
4568                         char    *tmp;
4569 
4570                         tmp = dataptr + (strlen(LOGICAL_BLOCK_REGION_SIZE) + 1);
4571                         /* check for numeric value */
4572                         for (ptr = tmp; i < strlen(tmp); i++, ptr++) {
4573                                 if (!isdigit(*ptr)) {
4574                                         cmn_err(CE_WARN,
4575                                             "Illegal region size: %s."
4576                                             " Setting to default value: %d",
4577                                             tmp,
4578                                             LOAD_BALANCE_DEFAULT_REGION_SIZE);
4579                                         region_size =
4580                                             LOAD_BALANCE_DEFAULT_REGION_SIZE;
4581                                         break;
4582                                 }
4583                         }
4584                         if (i >= strlen(tmp)) {
4585                                 region_size = stoi(&tmp);
4586                         }
4587                         (void) mdi_set_lb_region_size(cdip, region_size);
4588                 }
4589                 config_list_len -= (strlen(next_entry) + 1);
4590                 next_entry += strlen(next_entry) + 1;
4591         }
4592 #ifdef DEBUG
4593         if ((region_size >= 0) && (load_balance != LOAD_BALANCE_LBA)) {
4594                 VHCI_DEBUG(1, (CE_NOTE, dip,
4595                     "!vhci_parse_mpxio_lb_options: region-size: %d"
4596                     "only valid for load-balance=logical-block\n",
4597                     region_size));
4598         }
4599 #endif
4600         if ((region_size == -1) && (load_balance == LOAD_BALANCE_LBA)) {
4601                 VHCI_DEBUG(1, (CE_NOTE, dip,
4602                     "!vhci_parse_mpxio_lb_options: No region-size"
4603                     " defined load-balance=logical-block."
4604                     " Default to: %d\n", LOAD_BALANCE_DEFAULT_REGION_SIZE));
4605                 (void) mdi_set_lb_region_size(cdip,
4606                     LOAD_BALANCE_DEFAULT_REGION_SIZE);
4607         }
4608         if (list_len > 0) {
4609                 kmem_free(config_list, list_len);
4610         }
4611 }
4612 
4613 /*
4614  * Parse the device-type-mpxio-options-list looking for the key of
4615  * "load-balance-options". If found, parse the load balancing options.
4616  * Check the comment of the vhci_get_device_type_mpxio_options()
4617  * for the device-type-mpxio-options-list.
4618  */
4619 static void
4620 vhci_parse_mpxio_options(dev_info_t *dip, dev_info_t *cdip,
4621                 caddr_t datanameptr, int list_len)
4622 {
4623         char            *dataptr;
4624         int             len;
4625 
4626         /*
4627          * get the data list
4628          */
4629         dataptr = datanameptr;
4630         len = 0;
4631         while (len < list_len &&
4632             strncmp(dataptr, DEVICE_TYPE_STR, strlen(DEVICE_TYPE_STR))
4633             != 0) {
4634                 if (strncmp(dataptr, LOAD_BALANCE_OPTIONS,
4635                     strlen(LOAD_BALANCE_OPTIONS)) == 0) {
4636                         len += strlen(LOAD_BALANCE_OPTIONS) + 1;
4637                         dataptr += strlen(LOAD_BALANCE_OPTIONS) + 1;
4638                         vhci_parse_mpxio_lb_options(dip, cdip, dataptr);
4639                 }
4640                 len += strlen(dataptr) + 1;
4641                 dataptr += strlen(dataptr) + 1;
4642         }
4643 }
4644 
4645 /*
4646  * Check the inquriy string returned from the device with the device-type
4647  * Check for the existence of the device-type-mpxio-options-list and
4648  * if found parse the list checking for a match with the device-type
4649  * value and the inquiry string returned from the device. If a match
4650  * is found, parse the mpxio options list. The format of the
4651  * device-type-mpxio-options-list is:
4652  * device-type-mpxio-options-list=
4653  * "device-type=SUN    SENA", "load-balance-options=logical-block-options"
4654  * "device-type=SUN     SE6920", "round-robin-options";
4655  * logical-block-options="load-balance=logical-block", "region-size=15";
4656  * round-robin-options="load-balance=round-robin";
4657  */
4658 void
4659 vhci_get_device_type_mpxio_options(dev_info_t *dip, dev_info_t *cdip,
4660         struct scsi_device *devp)
4661 {
4662 
4663         caddr_t                 config_list     = NULL;
4664         caddr_t                 vidptr, datanameptr;
4665         int                     vidlen, dupletlen = 0;
4666         int                     config_list_len = 0, len;
4667         struct scsi_inquiry     *inq = devp->sd_inq;
4668 
4669         /*
4670          * look up the device-type-mpxio-options-list and walk thru
4671          * the list compare the vendor ids of the earlier inquiry command and
4672          * with those vids in the list if there is a match, lookup
4673          * the mpxio-options value
4674          */
4675         if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4676             MPXIO_OPTIONS_LIST,
4677             (caddr_t)&config_list, &config_list_len) == DDI_PROP_SUCCESS) {
4678 
4679                 /*
4680                  * Compare vids in each duplet - if it matches,
4681                  * parse the mpxio options list.
4682                  */
4683                 for (len = config_list_len, vidptr = config_list; len > 0;
4684                     len -= dupletlen) {
4685 
4686                         dupletlen = 0;
4687 
4688                         if (strlen(vidptr) != 0 &&
4689                             strncmp(vidptr, DEVICE_TYPE_STR,
4690                             strlen(DEVICE_TYPE_STR)) == 0) {
4691                                 /* point to next duplet */
4692                                 datanameptr = vidptr + strlen(vidptr) + 1;
4693                                 /* add len of this duplet */
4694                                 dupletlen += strlen(vidptr) + 1;
4695                                 /* get to device type */
4696                                 vidptr += strlen(DEVICE_TYPE_STR) + 1;
4697                                 vidlen = strlen(vidptr);
4698                                 if ((vidlen != 0) &&
4699                                     bcmp(inq->inq_vid, vidptr, vidlen) == 0) {
4700                                         vhci_parse_mpxio_options(dip, cdip,
4701                                             datanameptr, len - dupletlen);
4702                                         break;
4703                                 }
4704                                 /* get to next duplet */
4705                                 vidptr += strlen(vidptr) + 1;
4706                         }
4707                         /* get to the next device-type */
4708                         while (len - dupletlen > 0 &&
4709                             strlen(vidptr) != 0 &&
4710                             strncmp(vidptr, DEVICE_TYPE_STR,
4711                             strlen(DEVICE_TYPE_STR)) != 0) {
4712                                 dupletlen += strlen(vidptr) + 1;
4713                                 vidptr += strlen(vidptr) + 1;
4714                         }
4715                 }
4716                 if (config_list_len > 0) {
4717                         kmem_free(config_list, config_list_len);
4718                 }
4719         }
4720 }
4721 
4722 static int
4723 vhci_update_pathinfo(struct scsi_device *psd,  mdi_pathinfo_t *pip,
4724         struct scsi_failover_ops *fo,
4725         scsi_vhci_lun_t         *vlun,
4726         struct scsi_vhci        *vhci)
4727 {
4728         struct scsi_path_opinfo         opinfo;
4729         char                            *pclass, *best_pclass;
4730         char                            *resrv_pclass = NULL;
4731         int                             force_rereserve = 0;
4732         int                             update_pathinfo_done = 0;
4733 
4734         if (fo->sfo_path_get_opinfo(psd, &opinfo, vlun->svl_fops_ctpriv) != 0) {
4735                 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_update_pathinfo: "
4736                     "Failed to get operation info for path:%p\n", (void *)pip));
4737                 return (MDI_FAILURE);
4738         }
4739         /* set the xlf capable flag in the vlun for future use */
4740         vlun->svl_xlf_capable = opinfo.opinfo_xlf_capable;
4741         (void) mdi_prop_update_string(pip, "path-class",
4742             opinfo.opinfo_path_attr);
4743 
4744         pclass = opinfo.opinfo_path_attr;
4745         if (opinfo.opinfo_path_state == SCSI_PATH_ACTIVE) {
4746                 mutex_enter(&vlun->svl_mutex);
4747                 if (vlun->svl_active_pclass != NULL) {
4748                         if (strcmp(vlun->svl_active_pclass, pclass) != 0) {
4749                                 mutex_exit(&vlun->svl_mutex);
4750                                 /*
4751                                  * Externally initiated failover has happened;
4752                                  * force the path state to be STANDBY/ONLINE,
4753                                  * next IO will trigger failover and thus
4754                                  * sync-up the pathstates.  Reason we don't
4755                                  * sync-up immediately by invoking
4756                                  * vhci_update_pathstates() is because it
4757                                  * needs a VHCI_HOLD_LUN() and we don't
4758                                  * want to block here.
4759                                  *
4760                                  * Further, if the device is an ALUA device,
4761                                  * then failure to exactly match 'pclass' and
4762                                  * 'svl_active_pclass'(as is the case here)
4763                                  * indicates that the currently active path
4764                                  * is a 'non-optimized' path - which means
4765                                  * that 'svl_active_pclass' needs to be
4766                                  * replaced with opinfo.opinfo_path_state
4767                                  * value.
4768                                  */
4769 
4770                                 if (SCSI_FAILOVER_IS_TPGS(vlun->svl_fops)) {
4771                                         char    *tptr;
4772 
4773                                         /*
4774                                          * The device is ALUA compliant. The
4775                                          * state need to be changed to online
4776                                          * rather than standby state which is
4777                                          * done typically for a asymmetric
4778                                          * device that is non ALUA compliant.
4779                                          */
4780                                         mdi_pi_set_state(pip,
4781                                             MDI_PATHINFO_STATE_ONLINE);
4782                                         tptr = kmem_alloc(strlen
4783                                             (opinfo.opinfo_path_attr)+1,
4784                                             KM_SLEEP);
4785                                         (void) strlcpy(tptr,
4786                                             opinfo.opinfo_path_attr,
4787                                             (strlen(opinfo.opinfo_path_attr)
4788                                             +1));
4789                                         mutex_enter(&vlun->svl_mutex);
4790                                         kmem_free(vlun->svl_active_pclass,
4791                                             strlen(vlun->svl_active_pclass)+1);
4792                                         vlun->svl_active_pclass = tptr;
4793                                         mutex_exit(&vlun->svl_mutex);
4794                                 } else {
4795                                         /*
4796                                          * Non ALUA device case.
4797                                          */
4798                                         mdi_pi_set_state(pip,
4799                                             MDI_PATHINFO_STATE_STANDBY);
4800                                 }
4801                                 vlun->svl_fo_support = opinfo.opinfo_mode;
4802                                 mdi_pi_set_preferred(pip,
4803                                     opinfo.opinfo_preferred);
4804                                 update_pathinfo_done = 1;
4805                         }
4806 
4807                         /*
4808                          * Find out a class of currently reserved path if there
4809                          * is any.
4810                          */
4811                         if ((vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) &&
4812                             mdi_prop_lookup_string(vlun->svl_resrv_pip,
4813                             "path-class", &resrv_pclass) != MDI_SUCCESS) {
4814                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
4815                                     "!vhci_update_pathinfo: prop lookup "
4816                                     "failed for path 0x%p\n",
4817                                     (void *)vlun->svl_resrv_pip));
4818                                 /*
4819                                  * Something is wrong with the reserved path.
4820                                  * We can't do much with that right here. Just
4821                                  * force re-reservation to another path.
4822                                  */
4823                                 force_rereserve = 1;
4824                         }
4825 
4826                         (void) fo->sfo_pathclass_next(NULL, &best_pclass,
4827                             vlun->svl_fops_ctpriv);
4828                         if ((force_rereserve == 1) || ((resrv_pclass != NULL) &&
4829                             (strcmp(pclass, best_pclass) == 0) &&
4830                             (strcmp(resrv_pclass, best_pclass) != 0))) {
4831                                 /*
4832                                  * Inform target driver that a reservation
4833                                  * should be reinstated because the reserved
4834                                  * path is not the most preferred one.
4835                                  */
4836                                 mutex_enter(&vhci->vhci_mutex);
4837                                 scsi_hba_reset_notify_callback(
4838                                     &vhci->vhci_mutex,
4839                                     &vhci->vhci_reset_notify_listf);
4840                                 mutex_exit(&vhci->vhci_mutex);
4841                         }
4842 
4843                         if (update_pathinfo_done == 1) {
4844                                 return (MDI_SUCCESS);
4845                         }
4846                 } else {
4847                         char    *tptr;
4848 
4849                         /*
4850                          * lets release the mutex before we try to
4851                          * allocate since the potential to sleep is
4852                          * possible.
4853                          */
4854                         mutex_exit(&vlun->svl_mutex);
4855                         tptr = kmem_alloc(strlen(pclass)+1, KM_SLEEP);
4856                         (void) strlcpy(tptr, pclass, (strlen(pclass)+1));
4857                         mutex_enter(&vlun->svl_mutex);
4858                         vlun->svl_active_pclass = tptr;
4859                 }
4860                 mutex_exit(&vlun->svl_mutex);
4861                 mdi_pi_set_state(pip, MDI_PATHINFO_STATE_ONLINE);
4862                 vlun->svl_waiting_for_activepath = 0;
4863         } else if (opinfo.opinfo_path_state == SCSI_PATH_ACTIVE_NONOPT) {
4864                 mutex_enter(&vlun->svl_mutex);
4865                 if (vlun->svl_active_pclass == NULL) {
4866                         char    *tptr;
4867 
4868                         mutex_exit(&vlun->svl_mutex);
4869                         tptr = kmem_alloc(strlen(pclass)+1, KM_SLEEP);
4870                         (void) strlcpy(tptr, pclass, (strlen(pclass)+1));
4871                         mutex_enter(&vlun->svl_mutex);
4872                         vlun->svl_active_pclass = tptr;
4873                 }
4874                 mutex_exit(&vlun->svl_mutex);
4875                 mdi_pi_set_state(pip, MDI_PATHINFO_STATE_ONLINE);
4876                 vlun->svl_waiting_for_activepath = 0;
4877         } else if (opinfo.opinfo_path_state == SCSI_PATH_INACTIVE) {
4878                 mutex_enter(&vlun->svl_mutex);
4879                 if (vlun->svl_active_pclass != NULL) {
4880                         if (strcmp(vlun->svl_active_pclass, pclass) == 0) {
4881                                 mutex_exit(&vlun->svl_mutex);
4882                                 /*
4883                                  * externally initiated failover has happened;
4884                                  * force state to ONLINE (see comment above)
4885                                  */
4886                                 mdi_pi_set_state(pip,
4887                                     MDI_PATHINFO_STATE_ONLINE);
4888                                 vlun->svl_fo_support = opinfo.opinfo_mode;
4889                                 mdi_pi_set_preferred(pip,
4890                                     opinfo.opinfo_preferred);
4891                                 return (MDI_SUCCESS);
4892                         }
4893                 }
4894                 mutex_exit(&vlun->svl_mutex);
4895                 mdi_pi_set_state(pip, MDI_PATHINFO_STATE_STANDBY);
4896 
4897                 /*
4898                  * Initiate auto-failback, if enabled, for path if path-state
4899                  * is transitioning from OFFLINE->STANDBY and pathclass is the
4900                  * preferred pathclass for this storage.
4901                  * NOTE: In case where opinfo_path_state is SCSI_PATH_ACTIVE
4902                  * (above), where the pi state is set to STANDBY, we don't
4903                  * initiate auto-failback as the next IO shall take care of.
4904                  * this. See comment above.
4905                  */
4906                 (void) fo->sfo_pathclass_next(NULL, &best_pclass,
4907                     vlun->svl_fops_ctpriv);
4908                 if (((vhci->vhci_conf_flags & VHCI_CONF_FLAGS_AUTO_FAILBACK) ==
4909                     VHCI_CONF_FLAGS_AUTO_FAILBACK) &&
4910                     (strcmp(pclass, best_pclass) == 0) &&
4911                     ((MDI_PI_OLD_STATE(pip) == MDI_PATHINFO_STATE_OFFLINE)||
4912                     (MDI_PI_OLD_STATE(pip) == MDI_PATHINFO_STATE_INIT))) {
4913                         VHCI_DEBUG(1, (CE_NOTE, NULL, "%s pathclass path: %p"
4914                             " OFFLINE->STANDBY transition for lun %s\n",
4915                             best_pclass, (void *)pip, vlun->svl_lun_wwn));
4916                         (void) taskq_dispatch(vhci->vhci_taskq,
4917                             vhci_initiate_auto_failback, (void *) vlun,
4918                             KM_SLEEP);
4919                 }
4920         }
4921         vlun->svl_fo_support = opinfo.opinfo_mode;
4922         mdi_pi_set_preferred(pip, opinfo.opinfo_preferred);
4923 
4924         VHCI_DEBUG(8, (CE_NOTE, NULL, "vhci_update_pathinfo: opinfo_rev = %x,"
4925             " opinfo_path_state = %x opinfo_preferred = %x, opinfo_mode = %x\n",
4926             opinfo.opinfo_rev, opinfo.opinfo_path_state,
4927             opinfo.opinfo_preferred, opinfo.opinfo_mode));
4928 
4929         return (MDI_SUCCESS);
4930 }
4931 
4932 /*
4933  * Form the kstat name and and call mdi_pi_kstat_create()
4934  */
4935 void
4936 vhci_kstat_create_pathinfo(mdi_pathinfo_t *pip)
4937 {
4938         dev_info_t      *tgt_dip;
4939         dev_info_t      *pdip;
4940         char            *guid;
4941         char            *target_port, *target_port_dup;
4942         char            ks_name[KSTAT_STRLEN];
4943         uint_t          pid;
4944         int             by_id;
4945         mod_hash_val_t  hv;
4946 
4947 
4948         /* return if we have already allocated kstats */
4949         if (mdi_pi_kstat_exists(pip))
4950                 return;
4951 
4952         /*
4953          * We need instance numbers to create a kstat name, return if we don't
4954          * have instance numbers assigned yet.
4955          */
4956         tgt_dip = mdi_pi_get_client(pip);
4957         pdip = mdi_pi_get_phci(pip);
4958         if ((ddi_get_instance(tgt_dip) == -1) || (ddi_get_instance(pdip) == -1))
4959                 return;
4960 
4961         /*
4962          * A path oriented kstat has a ks_name of the form:
4963          *
4964          * <client-driver><instance>.t<pid>.<pHCI-driver><instance>
4965          *
4966          * We maintain a bidirectional 'target-port' to <pid> map,
4967          * called targetmap. All pathinfo nodes with the same
4968          * 'target-port' map to the same <pid>. The iostat(1M) code,
4969          * when parsing a path oriented kstat name, uses the <pid> as
4970          * a SCSI_VHCI_GET_TARGET_LONGNAME ioctl argument in order
4971          * to get the 'target-port'. For KSTAT_FLAG_PERSISTENT kstats,
4972          * this ioctl needs to translate a <pid> to a 'target-port'
4973          * even after all pathinfo nodes associated with the
4974          * 'target-port' have been destroyed. This is needed to support
4975          * consistent first-iteration activity-since-boot iostat(1M)
4976          * output. Because of this requirement, the mapping can't be
4977          * based on pathinfo information in a devinfo snapshot.
4978          */
4979 
4980         /* determine 'target-port' */
4981         if (mdi_prop_lookup_string(pip,
4982             SCSI_ADDR_PROP_TARGET_PORT, &target_port) == MDI_SUCCESS) {
4983                 target_port_dup = i_ddi_strdup(target_port, KM_SLEEP);
4984                 (void) mdi_prop_free(target_port);
4985                 by_id = 1;
4986         } else {
4987                 /*
4988                  * If the pHCI did not set up 'target-port' on this
4989                  * pathinfo node, assume that our client is the only
4990                  * one with paths to the device by using the guid
4991                  * value as the 'target-port'. Since no other client
4992                  * will have the same guid, no other client will use
4993                  * the same <pid>.  NOTE: a client with an instance
4994                  * number always has a guid.
4995                  */
4996                 (void) ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip,
4997                     PROPFLAGS, MDI_CLIENT_GUID_PROP, &guid);
4998                 target_port_dup = i_ddi_strdup(guid, KM_SLEEP);
4999                 ddi_prop_free(guid);
5000 
5001                 /*
5002                  * For this type of mapping we don't want the
5003                  * <id> -> 'target-port' mapping to be made.  This
5004                  * will cause the SCSI_VHCI_GET_TARGET_LONGNAME ioctl
5005                  * to fail, and the iostat(1M) long '-n' output will
5006                  * still use the <pid>.  We do this because we just
5007                  * made up the 'target-port' using the guid, and we
5008                  * don't want to expose that fact in iostat output.
5009                  */
5010                 by_id = 0;
5011         }
5012 
5013         /* find/establish <pid> given 'target-port' */
5014         mutex_enter(&vhci_targetmap_mutex);
5015         if (mod_hash_find(vhci_targetmap_byport,
5016             (mod_hash_key_t)target_port_dup, &hv) == 0) {
5017                 pid = (int)(intptr_t)hv;        /* mapping exists */
5018         } else {
5019                 pid = vhci_targetmap_pid++;     /* new mapping */
5020 
5021                 (void) mod_hash_insert(vhci_targetmap_byport,
5022                     (mod_hash_key_t)target_port_dup,
5023                     (mod_hash_val_t)(intptr_t)pid);
5024                 if (by_id) {
5025                         (void) mod_hash_insert(vhci_targetmap_bypid,
5026                             (mod_hash_key_t)(uintptr_t)pid,
5027                             (mod_hash_val_t)(uintptr_t)target_port_dup);
5028                 }
5029                 target_port_dup = NULL;         /* owned by hash */
5030         }
5031         mutex_exit(&vhci_targetmap_mutex);
5032 
5033         /* form kstat name */
5034         (void) snprintf(ks_name, KSTAT_STRLEN, "%s%d.t%d.%s%d",
5035             ddi_driver_name(tgt_dip), ddi_get_instance(tgt_dip),
5036             pid, ddi_driver_name(pdip), ddi_get_instance(pdip));
5037 
5038         VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_path_online: path:%p "
5039             "kstat %s: pid %x <-> port %s\n", (void *)pip,
5040             ks_name, pid, target_port_dup));
5041         if (target_port_dup)
5042                 kmem_free(target_port_dup, strlen(target_port_dup) + 1);
5043 
5044         /* call mdi to create kstats with the name we built */
5045         (void) mdi_pi_kstat_create(pip, ks_name);
5046 }
5047 
5048 /* ARGSUSED */
5049 static int
5050 vhci_pathinfo_online(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
5051 {
5052         scsi_hba_tran_t                 *hba = NULL;
5053         struct scsi_device              *psd = NULL;
5054         scsi_vhci_lun_t                 *vlun = NULL;
5055         dev_info_t                      *pdip = NULL;
5056         dev_info_t                      *cdip;
5057         dev_info_t                      *tgt_dip;
5058         struct scsi_vhci                *vhci;
5059         char                            *guid;
5060         struct scsi_failover_ops        *sfo;
5061         scsi_vhci_priv_t                *svp = NULL;
5062         struct scsi_address             *ap;
5063         struct scsi_pkt                 *pkt;
5064         int                             rval = MDI_FAILURE;
5065         mpapi_item_list_t               *list_ptr;
5066         mpapi_lu_data_t                 *ld;
5067 
5068         ASSERT(vdip != NULL);
5069         ASSERT(pip != NULL);
5070 
5071         vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
5072         ASSERT(vhci != NULL);
5073 
5074         pdip = mdi_pi_get_phci(pip);
5075         hba = ddi_get_driver_private(pdip);
5076         ASSERT(hba != NULL);
5077 
5078         svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
5079         ASSERT(svp != NULL);
5080 
5081         cdip = mdi_pi_get_client(pip);
5082         ASSERT(cdip != NULL);
5083         if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, PROPFLAGS,
5084             MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
5085                 VHCI_DEBUG(1, (CE_WARN, NULL, "vhci_path_online: lun guid "
5086                     "property failed"));
5087                 goto failure;
5088         }
5089 
5090         vlun = vhci_lun_lookup(cdip);
5091         ASSERT(vlun != NULL);
5092 
5093         ddi_prop_free(guid);
5094 
5095         vlun->svl_dip = mdi_pi_get_client(pip);
5096         ASSERT(vlun->svl_dip != NULL);
5097 
5098         psd = svp->svp_psd;
5099         ASSERT(psd != NULL);
5100 
5101         ap = &psd->sd_address;
5102 
5103         /*
5104          * Get inquiry data into pathinfo related scsi_device structure.
5105          * Free sq_inq when pathinfo related scsi_device structure is destroyed
5106          * by vhci_pathinfo_uninit(). In other words, vhci maintains its own
5107          * copy of scsi_device and scsi_inquiry data on a per-path basis.
5108          */
5109         if (scsi_probe(psd, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
5110                 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_pathinfo_online: "
5111                     "scsi_probe failed path:%p rval:%x\n", (void *)pip, rval));
5112                 rval = MDI_FAILURE;
5113                 goto failure;
5114         }
5115 
5116         /*
5117          * See if we have a failover module to support the device.
5118          *
5119          * We re-probe to determine the failover ops for each path. This
5120          * is done in case there are any path-specific side-effects associated
5121          * with the sfo_device_probe implementation.
5122          *
5123          * Give the first successfull sfo_device_probe the opportunity to
5124          * establish 'ctpriv', vlun/client private data. The ctpriv will
5125          * then be passed into the failover module on all other sfo_device_*()
5126          * operations (and must be freed by sfo_device_unprobe implementation).
5127          *
5128          * NOTE: While sfo_device_probe is done once per path,
5129          * sfo_device_unprobe only occurs once - when the vlun is destroyed.
5130          *
5131          * NOTE: We don't currently support per-path fops private data
5132          * mechanism.
5133          */
5134         sfo = vhci_dev_fo(vdip, psd,
5135             &vlun->svl_fops_ctpriv, &vlun->svl_fops_name);
5136 
5137         /* check path configuration result with current vlun state */
5138         if (((sfo && vlun->svl_fops) && (sfo != vlun->svl_fops)) ||
5139             (sfo && vlun->svl_not_supported) ||
5140             ((sfo == NULL) && vlun->svl_fops)) {
5141                 /* Getting different results for different paths. */
5142                 VHCI_DEBUG(1, (CE_NOTE, vhci->vhci_dip,
5143                     "!vhci_pathinfo_online: dev (path 0x%p) contradiction\n",
5144                     (void *)pip));
5145                 cmn_err(CE_WARN, "scsi_vhci: failover contradiction: "
5146                     "'%s'.vs.'%s': path %s\n",
5147                     vlun->svl_fops ? vlun->svl_fops->sfo_name : "NULL",
5148                     sfo ? sfo->sfo_name : "NULL", mdi_pi_pathname(pip));
5149                 vlun->svl_not_supported = 1;
5150                 rval = MDI_NOT_SUPPORTED;
5151                 goto done;
5152         } else if (sfo == NULL) {
5153                 /* No failover module - device not supported under vHCI.  */
5154                 VHCI_DEBUG(1, (CE_NOTE, vhci->vhci_dip,
5155                     "!vhci_pathinfo_online: dev (path 0x%p) not "
5156                     "supported\n", (void *)pip));
5157 
5158                 /* XXX does this contradict vhci_is_dev_supported ? */
5159                 vlun->svl_not_supported = 1;
5160                 rval = MDI_NOT_SUPPORTED;
5161                 goto done;
5162         }
5163 
5164         /* failover supported for device - save failover_ops in vlun */
5165         vlun->svl_fops = sfo;
5166         ASSERT(vlun->svl_fops_name != NULL);
5167 
5168         /*
5169          * Obtain the device-type based mpxio options as specified in
5170          * scsi_vhci.conf file.
5171          *
5172          * NOTE: currently, the end result is a call to
5173          * mdi_set_lb_region_size().
5174          */
5175         tgt_dip = psd->sd_dev;
5176         ASSERT(tgt_dip != NULL);
5177         vhci_get_device_type_mpxio_options(vdip, tgt_dip, psd);
5178 
5179         /*
5180          * if PGR is active, revalidate key and register on this path also,
5181          * if key is still valid
5182          */
5183         sema_p(&vlun->svl_pgr_sema);
5184         if (vlun->svl_pgr_active) {
5185                 rval = vhci_pgr_validate_and_register(svp);
5186                 if (rval != 1) {
5187                         rval = MDI_FAILURE;
5188                         sema_v(&vlun->svl_pgr_sema);
5189                         goto failure;
5190                 }
5191         }
5192         sema_v(&vlun->svl_pgr_sema);
5193 
5194         if (svp->svp_new_path) {
5195                 /*
5196                  * Last chance to perform any cleanup operations on this
5197                  * new path before making this path completely online.
5198                  */
5199                 svp->svp_new_path = 0;
5200 
5201                 /*
5202                  * If scsi_vhci knows the lun is alread RESERVE'd,
5203                  * then skip the issue of RELEASE on new path.
5204                  */
5205                 if ((vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) == 0) {
5206                         /*
5207                          * Issue SCSI-2 RELEASE only for the first time on
5208                          * a new path just in case the host rebooted and
5209                          * a reservation is still pending on this path.
5210                          * IBM Shark storage does not clear RESERVE upon
5211                          * host reboot.
5212                          */
5213                         pkt = scsi_init_pkt(ap, NULL, NULL, CDB_GROUP0,
5214                             sizeof (struct scsi_arq_status), 0, 0,
5215                             SLEEP_FUNC, NULL);
5216                         if (pkt == NULL) {
5217                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
5218                                     "!vhci_pathinfo_online: "
5219                                     "Release init_pkt failed :%p\n",
5220                                     (void *)pip));
5221                                 rval = MDI_FAILURE;
5222                                 goto failure;
5223                         }
5224                         pkt->pkt_cdbp[0] = SCMD_RELEASE;
5225                         pkt->pkt_time = 60;
5226 
5227                         VHCI_DEBUG(1, (CE_NOTE, NULL,
5228                             "!vhci_path_online: path:%p "
5229                             "Issued SCSI-2 RELEASE\n", (void *)pip));
5230 
5231                         /* Ignore the return value */
5232                         (void) vhci_do_scsi_cmd(pkt);
5233                         scsi_destroy_pkt(pkt);
5234                 }
5235         }
5236 
5237         rval = vhci_update_pathinfo(psd, pip, sfo, vlun, vhci);
5238         if (rval == MDI_FAILURE) {
5239                 goto failure;
5240         }
5241 
5242         /* Initialize MP-API data */
5243         vhci_update_mpapi_data(vhci, vlun, pip);
5244 
5245         /*
5246          * MP-API also needs the Inquiry data to be maintained in the
5247          * mp_vendor_prop_t structure, so find the lun and update its
5248          * structure with this data.
5249          */
5250         list_ptr = (mpapi_item_list_t *)vhci_get_mpapi_item(vhci, NULL,
5251             MP_OBJECT_TYPE_MULTIPATH_LU, (void *)vlun);
5252         ld = (mpapi_lu_data_t *)list_ptr->item->idata;
5253         if (ld != NULL) {
5254                 bcopy(psd->sd_inq->inq_vid, ld->prop.prodInfo.vendor, 8);
5255                 bcopy(psd->sd_inq->inq_pid, ld->prop.prodInfo.product, 16);
5256                 bcopy(psd->sd_inq->inq_revision, ld->prop.prodInfo.revision, 4);
5257         } else {
5258                 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_pathinfo_online: "
5259                     "mpapi_lu_data_t is NULL"));
5260         }
5261 
5262         /* create kstats for path */
5263         vhci_kstat_create_pathinfo(pip);
5264 
5265 done:
5266         mutex_enter(&vhci_global_mutex);
5267         cv_broadcast(&vhci_cv);
5268         mutex_exit(&vhci_global_mutex);
5269 
5270         if (vlun->svl_setcap_done) {
5271                 (void) vhci_pHCI_cap(ap, "sector-size",
5272                     vlun->svl_sector_size, 1, pip);
5273         }
5274 
5275         VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_path_online: path:%p\n",
5276             (void *)pip));
5277 
5278 failure:
5279         return (rval);
5280 }
5281 
5282 /*
5283  * path offline handler.  Release all bindings that will not be
5284  * released by the normal packet transport/completion code path.
5285  * Since we don't (presently) keep any bindings alive outside of
5286  * the in-transport packets (which will be released on completion)
5287  * there is not much to do here.
5288  */
5289 /* ARGSUSED */
5290 static int
5291 vhci_pathinfo_offline(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
5292 {
5293         scsi_hba_tran_t         *hba = NULL;
5294         struct scsi_device      *psd = NULL;
5295         dev_info_t              *pdip = NULL;
5296         dev_info_t              *cdip = NULL;
5297         scsi_vhci_priv_t        *svp = NULL;
5298 
5299         ASSERT(vdip != NULL);
5300         ASSERT(pip != NULL);
5301 
5302         pdip = mdi_pi_get_phci(pip);
5303         ASSERT(pdip != NULL);
5304         if (pdip == NULL) {
5305                 VHCI_DEBUG(1, (CE_WARN, vdip, "Invalid path 0x%p: NULL "
5306                     "phci dip", (void *)pip));
5307                 return (MDI_FAILURE);
5308         }
5309 
5310         cdip = mdi_pi_get_client(pip);
5311         ASSERT(cdip != NULL);
5312         if (cdip == NULL) {
5313                 VHCI_DEBUG(1, (CE_WARN, vdip, "Invalid path 0x%p: NULL "
5314                     "client dip", (void *)pip));
5315                 return (MDI_FAILURE);
5316         }
5317 
5318         hba = ddi_get_driver_private(pdip);
5319         ASSERT(hba != NULL);
5320 
5321         svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
5322         if (svp == NULL) {
5323                 /*
5324                  * mdi_pathinfo node in INIT state can have vHCI private
5325                  * information set to null
5326                  */
5327                 VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5328                     "svp is NULL for pip 0x%p\n", (void *)pip));
5329                 return (MDI_SUCCESS);
5330         }
5331 
5332         psd = svp->svp_psd;
5333         ASSERT(psd != NULL);
5334 
5335         mutex_enter(&svp->svp_mutex);
5336 
5337         VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5338             "%d cmds pending on path: 0x%p\n", svp->svp_cmds, (void *)pip));
5339         while (svp->svp_cmds != 0) {
5340                 if (cv_reltimedwait(&svp->svp_cv, &svp->svp_mutex,
5341                     drv_usectohz(vhci_path_quiesce_timeout * 1000000),
5342                     TR_CLOCK_TICK) == -1) {
5343                         /*
5344                          * The timeout time reached without the condition
5345                          * being signaled.
5346                          */
5347                         VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5348                             "Timeout reached on path 0x%p without the cond\n",
5349                             (void *)pip));
5350                         VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5351                             "%d cmds still pending on path: 0x%p\n",
5352                             svp->svp_cmds, (void *)pip));
5353                         break;
5354                 }
5355         }
5356         mutex_exit(&svp->svp_mutex);
5357 
5358         /*
5359          * Check to see if this vlun has an active SCSI-II RESERVE. And this
5360          * is the pip for the path that has been reserved.
5361          * If so clear the reservation by sending a reset, so the host will not
5362          * get a reservation conflict.  Reset the flag VLUN_RESERVE_ACTIVE_FLG
5363          * for this lun.  Also a reset notify is sent to the target driver
5364          * just in case the POR check condition is cleared by some other layer
5365          * in the stack.
5366          */
5367         if (svp->svp_svl->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
5368                 if (pip == svp->svp_svl->svl_resrv_pip) {
5369                         if (vhci_recovery_reset(svp->svp_svl,
5370                             &svp->svp_psd->sd_address, TRUE,
5371                             VHCI_DEPTH_TARGET) == 0) {
5372                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
5373                                     "!vhci_pathinfo_offline (pip:%p):"
5374                                     "reset failed, retrying\n", (void *)pip));
5375                                 delay(1*drv_usectohz(1000000));
5376                                 if (vhci_recovery_reset(svp->svp_svl,
5377                                     &svp->svp_psd->sd_address, TRUE,
5378                                     VHCI_DEPTH_TARGET) == 0) {
5379                                         VHCI_DEBUG(1, (CE_NOTE, NULL,
5380                                             "!vhci_pathinfo_offline "
5381                                             "(pip:%p): reset failed, "
5382                                             "giving up!\n", (void *)pip));
5383                                 }
5384                         }
5385                         svp->svp_svl->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
5386                 }
5387         }
5388 
5389         mdi_pi_set_state(pip, MDI_PATHINFO_STATE_OFFLINE);
5390         vhci_mpapi_set_path_state(vdip, pip, MP_DRVR_PATH_STATE_REMOVED);
5391 
5392         VHCI_DEBUG(1, (CE_NOTE, NULL,
5393             "!vhci_pathinfo_offline: offlined path 0x%p\n", (void *)pip));
5394         return (MDI_SUCCESS);
5395 }
5396 
5397 
5398 /*
5399  * routine for SCSI VHCI IOCTL implementation.
5400  */
5401 /* ARGSUSED */
5402 static int
5403 vhci_ctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *credp, int *rval)
5404 {
5405         struct scsi_vhci                *vhci;
5406         dev_info_t                      *vdip;
5407         mdi_pathinfo_t                  *pip;
5408         int                             instance, held;
5409         int                             retval = 0;
5410         caddr_t                         phci_path = NULL, client_path = NULL;
5411         caddr_t                         paddr = NULL;
5412         sv_iocdata_t                    ioc;
5413         sv_iocdata_t                    *pioc = &ioc;
5414         sv_switch_to_cntlr_iocdata_t    iocsc;
5415         sv_switch_to_cntlr_iocdata_t    *piocsc = &iocsc;
5416         caddr_t                         s;
5417         scsi_vhci_lun_t                 *vlun;
5418         struct scsi_failover_ops        *fo;
5419         char                            *pclass;
5420 
5421         /* Check for validity of vhci structure */
5422         vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(dev)));
5423         if (vhci == NULL) {
5424                 return (ENXIO);
5425         }
5426 
5427         mutex_enter(&vhci->vhci_mutex);
5428         if ((vhci->vhci_state & VHCI_STATE_OPEN) == 0) {
5429                 mutex_exit(&vhci->vhci_mutex);
5430                 return (ENXIO);
5431         }
5432         mutex_exit(&vhci->vhci_mutex);
5433 
5434         /* Get the vhci dip */
5435         vdip = vhci->vhci_dip;
5436         ASSERT(vdip != NULL);
5437         instance = ddi_get_instance(vdip);
5438 
5439         /* Allocate memory for getting parameters from userland */
5440         phci_path       = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
5441         client_path     = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
5442         paddr           = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
5443 
5444         /*
5445          * Set a local variable indicating the ioctl name. Used for
5446          * printing debug strings.
5447          */
5448         switch (cmd) {
5449         case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5450                 s = "GET_CLIENT_MULTIPATH_INFO";
5451                 break;
5452 
5453         case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5454                 s = "GET_PHCI_MULTIPATH_INFO";
5455                 break;
5456 
5457         case SCSI_VHCI_GET_CLIENT_NAME:
5458                 s = "GET_CLIENT_NAME";
5459                 break;
5460 
5461         case SCSI_VHCI_PATH_ONLINE:
5462                 s = "PATH_ONLINE";
5463                 break;
5464 
5465         case SCSI_VHCI_PATH_OFFLINE:
5466                 s = "PATH_OFFLINE";
5467                 break;
5468 
5469         case SCSI_VHCI_PATH_STANDBY:
5470                 s = "PATH_STANDBY";
5471                 break;
5472 
5473         case SCSI_VHCI_PATH_TEST:
5474                 s = "PATH_TEST";
5475                 break;
5476 
5477         case SCSI_VHCI_SWITCH_TO_CNTLR:
5478                 s = "SWITCH_TO_CNTLR";
5479                 break;
5480         case SCSI_VHCI_PATH_DISABLE:
5481                 s = "PATH_DISABLE";
5482                 break;
5483         case SCSI_VHCI_PATH_ENABLE:
5484                 s = "PATH_ENABLE";
5485                 break;
5486 
5487         case SCSI_VHCI_GET_TARGET_LONGNAME:
5488                 s = "GET_TARGET_LONGNAME";
5489                 break;
5490 
5491 #ifdef  DEBUG
5492         case SCSI_VHCI_CONFIGURE_PHCI:
5493                 s = "CONFIGURE_PHCI";
5494                 break;
5495 
5496         case SCSI_VHCI_UNCONFIGURE_PHCI:
5497                 s = "UNCONFIGURE_PHCI";
5498                 break;
5499 #endif
5500 
5501         default:
5502                 s = "Unknown";
5503                 vhci_log(CE_NOTE, vdip,
5504                     "!vhci%d: ioctl %x (unsupported ioctl)", instance, cmd);
5505                 retval = ENOTSUP;
5506                 break;
5507         }
5508         if (retval != 0) {
5509                 goto end;
5510         }
5511 
5512         VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci%d: ioctl <%s>", instance, s));
5513 
5514         /*
5515          * Get IOCTL parameters from userland
5516          */
5517         switch (cmd) {
5518         case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5519         case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5520         case SCSI_VHCI_GET_CLIENT_NAME:
5521         case SCSI_VHCI_PATH_ONLINE:
5522         case SCSI_VHCI_PATH_OFFLINE:
5523         case SCSI_VHCI_PATH_STANDBY:
5524         case SCSI_VHCI_PATH_TEST:
5525         case SCSI_VHCI_PATH_DISABLE:
5526         case SCSI_VHCI_PATH_ENABLE:
5527         case SCSI_VHCI_GET_TARGET_LONGNAME:
5528 #ifdef  DEBUG
5529         case SCSI_VHCI_CONFIGURE_PHCI:
5530         case SCSI_VHCI_UNCONFIGURE_PHCI:
5531 #endif
5532                 retval = vhci_get_iocdata((const void *)data, pioc, mode, s);
5533                 break;
5534 
5535         case SCSI_VHCI_SWITCH_TO_CNTLR:
5536                 retval = vhci_get_iocswitchdata((const void *)data, piocsc,
5537                     mode, s);
5538                 break;
5539         }
5540         if (retval != 0) {
5541                 goto end;
5542         }
5543 
5544 
5545         /*
5546          * Process the IOCTL
5547          */
5548         switch (cmd) {
5549         case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5550         {
5551                 uint_t          num_paths;      /* Num paths to client dev */
5552                 sv_path_info_t  *upibuf = NULL; /* To keep userland values */
5553                 sv_path_info_t  *kpibuf = NULL; /* Kernel data for ioctls */
5554                 dev_info_t      *cdip;          /* Client device dip */
5555 
5556                 if (pioc->ret_elem == NULL) {
5557                         retval = EINVAL;
5558                         break;
5559                 }
5560 
5561                 /* Get client device path from user land */
5562                 if (vhci_ioc_get_client_path(pioc, client_path, mode, s)) {
5563                         retval = EFAULT;
5564                         break;
5565                 }
5566 
5567                 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5568                     "client <%s>", s, client_path));
5569 
5570                 /* Get number of paths to this client device */
5571                 if ((cdip = mdi_client_path2devinfo(vdip, client_path))
5572                     == NULL) {
5573                         retval = ENXIO;
5574                         VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5575                             "client dip doesn't exist. invalid path <%s>",
5576                             s, client_path));
5577                         break;
5578                 }
5579                 num_paths = mdi_client_get_path_count(cdip);
5580 
5581                 if (ddi_copyout(&num_paths, pioc->ret_elem,
5582                     sizeof (num_paths), mode)) {
5583                         VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5584                             "num_paths copyout failed", s));
5585                         retval = EFAULT;
5586                         break;
5587                 }
5588 
5589                 /* If  user just wanted num_paths, then return */
5590                 if (pioc->buf_elem == 0 || pioc->ret_buf == NULL ||
5591                     num_paths == 0) {
5592                         break;
5593                 }
5594 
5595                 /* Set num_paths to value as much as can be sent to userland */
5596                 if (num_paths > pioc->buf_elem) {
5597                         num_paths = pioc->buf_elem;
5598                 }
5599 
5600                 /* Allocate memory and get userland pointers */
5601                 if (vhci_ioc_alloc_pathinfo(&upibuf, &kpibuf, num_paths,
5602                     pioc, mode, s) != 0) {
5603                         retval = EFAULT;
5604                         break;
5605                 }
5606                 ASSERT(upibuf != NULL);
5607                 ASSERT(kpibuf != NULL);
5608 
5609                 /*
5610                  * Get the path information and send it to userland.
5611                  */
5612                 if (vhci_get_client_path_list(cdip, kpibuf, num_paths)
5613                     != MDI_SUCCESS) {
5614                         retval = ENXIO;
5615                         vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5616                         break;
5617                 }
5618 
5619                 if (vhci_ioc_send_pathinfo(upibuf, kpibuf, num_paths,
5620                     pioc, mode, s)) {
5621                         retval = EFAULT;
5622                         vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5623                         break;
5624                 }
5625 
5626                 /* Free the memory allocated for path information */
5627                 vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5628                 break;
5629         }
5630 
5631         case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5632         {
5633                 uint_t          num_paths;      /* Num paths to client dev */
5634                 sv_path_info_t  *upibuf = NULL; /* To keep userland values */
5635                 sv_path_info_t  *kpibuf = NULL; /* Kernel data for ioctls */
5636                 dev_info_t      *pdip;          /* PHCI device dip */
5637 
5638                 if (pioc->ret_elem == NULL) {
5639                         retval = EINVAL;
5640                         break;
5641                 }
5642 
5643                 /* Get PHCI device path from user land */
5644                 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
5645                         retval = EFAULT;
5646                         break;
5647                 }
5648 
5649                 VHCI_DEBUG(6, (CE_WARN, vdip,
5650                     "!vhci_ioctl: ioctl <%s> phci <%s>", s, phci_path));
5651 
5652                 /* Get number of devices associated with this PHCI device */
5653                 if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5654                         VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5655                             "phci dip doesn't exist. invalid path <%s>",
5656                             s, phci_path));
5657                         retval = ENXIO;
5658                         break;
5659                 }
5660 
5661                 num_paths = mdi_phci_get_path_count(pdip);
5662 
5663                 if (ddi_copyout(&num_paths, pioc->ret_elem,
5664                     sizeof (num_paths), mode)) {
5665                         VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5666                             "num_paths copyout failed", s));
5667                         retval = EFAULT;
5668                         break;
5669                 }
5670 
5671                 /* If  user just wanted num_paths, then return */
5672                 if (pioc->buf_elem == 0 || pioc->ret_buf == NULL ||
5673                     num_paths == 0) {
5674                         break;
5675                 }
5676 
5677                 /* Set num_paths to value as much as can be sent to userland */
5678                 if (num_paths > pioc->buf_elem) {
5679                         num_paths = pioc->buf_elem;
5680                 }
5681 
5682                 /* Allocate memory and get userland pointers */
5683                 if (vhci_ioc_alloc_pathinfo(&upibuf, &kpibuf, num_paths,
5684                     pioc, mode, s) != 0) {
5685                         retval = EFAULT;
5686                         break;
5687                 }
5688                 ASSERT(upibuf != NULL);
5689                 ASSERT(kpibuf != NULL);
5690 
5691                 /*
5692                  * Get the path information and send it to userland.
5693                  */
5694                 if (vhci_get_phci_path_list(pdip, kpibuf, num_paths)
5695                     != MDI_SUCCESS) {
5696                         retval = ENXIO;
5697                         vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5698                         break;
5699                 }
5700 
5701                 if (vhci_ioc_send_pathinfo(upibuf, kpibuf, num_paths,
5702                     pioc, mode, s)) {
5703                         retval = EFAULT;
5704                         vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5705                         break;
5706                 }
5707 
5708                 /* Free the memory allocated for path information */
5709                 vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5710                 break;
5711         }
5712 
5713         case SCSI_VHCI_GET_CLIENT_NAME:
5714         {
5715                 dev_info_t              *cdip, *pdip;
5716 
5717                 /* Get PHCI path and device address from user land */
5718                 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s) ||
5719                     vhci_ioc_get_paddr(pioc, paddr, mode, s)) {
5720                         retval = EFAULT;
5721                         break;
5722                 }
5723 
5724                 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5725                     "phci <%s>, paddr <%s>", s, phci_path, paddr));
5726 
5727                 /* Get the PHCI dip */
5728                 if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5729                         VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5730                             "phci dip doesn't exist. invalid path <%s>",
5731                             s, phci_path));
5732                         retval = ENXIO;
5733                         break;
5734                 }
5735 
5736                 if ((pip = mdi_pi_find(pdip, NULL, paddr)) == NULL) {
5737                         VHCI_DEBUG(1, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5738                             "pathinfo doesn't exist. invalid device addr", s));
5739                         retval = ENXIO;
5740                         break;
5741                 }
5742 
5743                 /* Get the client device pathname and send to userland */
5744                 cdip = mdi_pi_get_client(pip);
5745                 vhci_ioc_devi_to_path(cdip, client_path);
5746 
5747                 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5748                     "client <%s>", s, client_path));
5749 
5750                 if (vhci_ioc_send_client_path(client_path, pioc, mode, s)) {
5751                         retval = EFAULT;
5752                         break;
5753                 }
5754                 break;
5755         }
5756 
5757         case SCSI_VHCI_PATH_ONLINE:
5758         case SCSI_VHCI_PATH_OFFLINE:
5759         case SCSI_VHCI_PATH_STANDBY:
5760         case SCSI_VHCI_PATH_TEST:
5761         {
5762                 dev_info_t              *pdip;  /* PHCI dip */
5763 
5764                 /* Get PHCI path and device address from user land */
5765                 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s) ||
5766                     vhci_ioc_get_paddr(pioc, paddr, mode, s)) {
5767                         retval = EFAULT;
5768                         break;
5769                 }
5770 
5771                 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5772                     "phci <%s>, paddr <%s>", s, phci_path, paddr));
5773 
5774                 /* Get the PHCI dip */
5775                 if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5776                         VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5777                             "phci dip doesn't exist. invalid path <%s>",
5778                             s, phci_path));
5779                         retval = ENXIO;
5780                         break;
5781                 }
5782 
5783                 if ((pip = mdi_pi_find(pdip, NULL, paddr)) == NULL) {
5784                         VHCI_DEBUG(1, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5785                             "pathinfo doesn't exist. invalid device addr", s));
5786                         retval = ENXIO;
5787                         break;
5788                 }
5789 
5790                 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5791                     "Calling MDI function to change device state", s));
5792 
5793                 switch (cmd) {
5794                 case SCSI_VHCI_PATH_ONLINE:
5795                         retval = mdi_pi_online(pip, 0);
5796                         break;
5797 
5798                 case SCSI_VHCI_PATH_OFFLINE:
5799                         retval = mdi_pi_offline(pip, 0);
5800                         break;
5801 
5802                 case SCSI_VHCI_PATH_STANDBY:
5803                         retval = mdi_pi_standby(pip, 0);
5804                         break;
5805 
5806                 case SCSI_VHCI_PATH_TEST:
5807                         break;
5808                 }
5809                 break;
5810         }
5811 
5812         case SCSI_VHCI_SWITCH_TO_CNTLR:
5813         {
5814                 dev_info_t *cdip;
5815                 struct scsi_device *devp;
5816 
5817                 /* Get the client device pathname */
5818                 if (ddi_copyin(piocsc->client, client_path,
5819                     MAXPATHLEN, mode)) {
5820                         VHCI_DEBUG(2, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5821                             "client_path copyin failed", s));
5822                         retval = EFAULT;
5823                         break;
5824                 }
5825 
5826                 /* Get the path class to which user wants to switch */
5827                 if (ddi_copyin(piocsc->class, paddr, MAXNAMELEN, mode)) {
5828                         VHCI_DEBUG(2, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5829                             "controller_class copyin failed", s));
5830                         retval = EFAULT;
5831                         break;
5832                 }
5833 
5834                 /* Perform validity checks */
5835                 if ((cdip = mdi_client_path2devinfo(vdip,
5836                     client_path)) == NULL) {
5837                         VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5838                             "client dip doesn't exist. invalid path <%s>",
5839                             s, client_path));
5840                         retval = ENXIO;
5841                         break;
5842                 }
5843 
5844                 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: Calling MDI func "
5845                     "to switch controller"));
5846                 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: client <%s> "
5847                     "class <%s>", client_path, paddr));
5848 
5849                 if (strcmp(paddr, PCLASS_PRIMARY) &&
5850                     strcmp(paddr, PCLASS_SECONDARY)) {
5851                         VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5852                             "invalid path class <%s>", s, paddr));
5853                         retval = ENXIO;
5854                         break;
5855                 }
5856 
5857                 devp = ddi_get_driver_private(cdip);
5858                 if (devp == NULL) {
5859                         VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5860                             "invalid scsi device <%s>", s, client_path));
5861                         retval = ENXIO;
5862                         break;
5863                 }
5864                 vlun = ADDR2VLUN(&devp->sd_address);
5865                 ASSERT(vlun);
5866 
5867                 /*
5868                  * Checking to see if device has only one pclass, PRIMARY.
5869                  * If so this device doesn't support failovers.  Assumed
5870                  * that the devices with one pclass is PRIMARY, as thats the
5871                  * case today.  If this is not true and in future other
5872                  * symmetric devices are supported with other pclass, this
5873                  * IOCTL shall have to be overhauled anyways as now the only
5874                  * arguments it accepts are PRIMARY and SECONDARY.
5875                  */
5876                 fo = vlun->svl_fops;
5877                 if (fo->sfo_pathclass_next(PCLASS_PRIMARY, &pclass,
5878                     vlun->svl_fops_ctpriv)) {
5879                         retval = ENOTSUP;
5880                         break;
5881                 }
5882 
5883                 VHCI_HOLD_LUN(vlun, VH_SLEEP, held);
5884                 mutex_enter(&vlun->svl_mutex);
5885                 if (vlun->svl_active_pclass != NULL) {
5886                         if (strcmp(vlun->svl_active_pclass, paddr) == 0) {
5887                                 mutex_exit(&vlun->svl_mutex);
5888                                 retval = EALREADY;
5889                                 VHCI_RELEASE_LUN(vlun);
5890                                 break;
5891                         }
5892                 }
5893                 mutex_exit(&vlun->svl_mutex);
5894                 /* Call mdi function to cause  a switch over */
5895                 retval = mdi_failover(vdip, cdip, MDI_FAILOVER_SYNC);
5896                 if (retval == MDI_SUCCESS) {
5897                         retval = 0;
5898                 } else if (retval == MDI_BUSY) {
5899                         retval = EBUSY;
5900                 } else {
5901                         retval = EIO;
5902                 }
5903                 VHCI_RELEASE_LUN(vlun);
5904                 break;
5905         }
5906 
5907         case SCSI_VHCI_PATH_ENABLE:
5908         case SCSI_VHCI_PATH_DISABLE:
5909         {
5910                 dev_info_t      *cdip, *pdip;
5911 
5912                 /*
5913                  * Get client device path from user land
5914                  */
5915                 if (vhci_ioc_get_client_path(pioc, client_path, mode, s)) {
5916                         retval = EFAULT;
5917                         break;
5918                 }
5919 
5920                 /*
5921                  * Get Phci device path from user land
5922                  */
5923                 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
5924                         retval = EFAULT;
5925                         break;
5926                 }
5927 
5928                 /*
5929                  * Get the devinfo for the Phci.
5930                  */
5931                 if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5932                         VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5933                             "phci dip doesn't exist. invalid path <%s>",
5934                             s, phci_path));
5935                         retval = ENXIO;
5936                         break;
5937                 }
5938 
5939                 /*
5940                  * If the client path is set to /scsi_vhci then we need
5941                  * to do the operation on all the clients so set cdip to NULL.
5942                  * Else, try to get the client dip.
5943                  */
5944                 if (strcmp(client_path, "/scsi_vhci") == 0) {
5945                         cdip = NULL;
5946                 } else {
5947                         if ((cdip = mdi_client_path2devinfo(vdip,
5948                             client_path)) == NULL) {
5949                                 retval = ENXIO;
5950                                 VHCI_DEBUG(1, (CE_WARN, NULL,
5951                                     "!vhci_ioctl: ioctl <%s> client dip "
5952                                     "doesn't exist. invalid path <%s>",
5953                                     s, client_path));
5954                                 break;
5955                         }
5956                 }
5957 
5958                 if (cmd == SCSI_VHCI_PATH_ENABLE)
5959                         retval = mdi_pi_enable(cdip, pdip, USER_DISABLE);
5960                 else
5961                         retval = mdi_pi_disable(cdip, pdip, USER_DISABLE);
5962 
5963                 break;
5964         }
5965 
5966         case SCSI_VHCI_GET_TARGET_LONGNAME:
5967         {
5968                 uint_t          pid = pioc->buf_elem;
5969                 char            *target_port;
5970                 mod_hash_val_t  hv;
5971 
5972                 /* targetmap lookup of 'target-port' by <pid> */
5973                 if (mod_hash_find(vhci_targetmap_bypid,
5974                     (mod_hash_key_t)(uintptr_t)pid, &hv) != 0) {
5975                         /*
5976                          * NOTE: failure to find the mapping is OK for guid
5977                          * based 'target-port' values.
5978                          */
5979                         VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5980                             "targetport mapping doesn't exist: pid %d",
5981                             s, pid));
5982                         retval = ENXIO;
5983                         break;
5984                 }
5985 
5986                 /* copyout 'target-port' result */
5987                 target_port = (char *)hv;
5988                 if (copyoutstr(target_port, pioc->addr, MAXNAMELEN, NULL)) {
5989                         VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5990                             "targetport copyout failed: len: %d",
5991                             s, (int)strlen(target_port)));
5992                         retval = EFAULT;
5993                 }
5994                 break;
5995         }
5996 
5997 #ifdef  DEBUG
5998         case SCSI_VHCI_CONFIGURE_PHCI:
5999         {
6000                 dev_info_t              *pdip;
6001 
6002                 /* Get PHCI path and device address from user land */
6003                 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
6004                         retval = EFAULT;
6005                         break;
6006                 }
6007 
6008                 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
6009                     "phci <%s>", s, phci_path));
6010 
6011                 /* Get the PHCI dip */
6012                 if ((pdip = e_ddi_hold_devi_by_path(phci_path, 0)) == NULL) {
6013                         VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
6014                             "phci dip doesn't exist. invalid path <%s>",
6015                             s, phci_path));
6016                         retval = ENXIO;
6017                         break;
6018                 }
6019 
6020                 if (ndi_devi_config(pdip,
6021                     NDI_DEVFS_CLEAN|NDI_DEVI_PERSIST) != NDI_SUCCESS) {
6022                         retval = EIO;
6023                 }
6024 
6025                 ddi_release_devi(pdip);
6026                 break;
6027         }
6028 
6029         case SCSI_VHCI_UNCONFIGURE_PHCI:
6030         {
6031                 dev_info_t              *pdip;
6032 
6033                 /* Get PHCI path and device address from user land */
6034                 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
6035                         retval = EFAULT;
6036                         break;
6037                 }
6038 
6039                 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
6040                     "phci <%s>", s, phci_path));
6041 
6042                 /* Get the PHCI dip */
6043                 if ((pdip = e_ddi_hold_devi_by_path(phci_path, 0)) == NULL) {
6044                         VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
6045                             "phci dip doesn't exist. invalid path <%s>",
6046                             s, phci_path));
6047                         retval = ENXIO;
6048                         break;
6049                 }
6050 
6051                 if (ndi_devi_unconfig(pdip,
6052                     NDI_DEVI_REMOVE|NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
6053                         retval = EBUSY;
6054                 }
6055 
6056                 ddi_release_devi(pdip);
6057                 break;
6058         }
6059 #endif
6060         }
6061 
6062 end:
6063         /* Free the memory allocated above */
6064         if (phci_path != NULL) {
6065                 kmem_free(phci_path, MAXPATHLEN);
6066         }
6067         if (client_path != NULL) {
6068                 kmem_free(client_path, MAXPATHLEN);
6069         }
6070         if (paddr != NULL) {
6071                 kmem_free(paddr, MAXNAMELEN);
6072         }
6073         return (retval);
6074 }
6075 
6076 /*
6077  * devctl IOCTL support for client device DR
6078  */
6079 /* ARGSUSED */
6080 int
6081 vhci_devctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
6082     int *rvalp)
6083 {
6084         dev_info_t *self;
6085         dev_info_t *child;
6086         scsi_hba_tran_t *hba;
6087         struct devctl_iocdata *dcp;
6088         struct scsi_vhci *vhci;
6089         int rv = 0;
6090         int retval = 0;
6091         scsi_vhci_priv_t *svp;
6092         mdi_pathinfo_t  *pip;
6093 
6094         if ((vhci = ddi_get_soft_state(vhci_softstate,
6095             MINOR2INST(getminor(dev)))) == NULL)
6096                 return (ENXIO);
6097 
6098         /*
6099          * check if :devctl minor device has been opened
6100          */
6101         mutex_enter(&vhci->vhci_mutex);
6102         if ((vhci->vhci_state & VHCI_STATE_OPEN) == 0) {
6103                 mutex_exit(&vhci->vhci_mutex);
6104                 return (ENXIO);
6105         }
6106         mutex_exit(&vhci->vhci_mutex);
6107 
6108         self = vhci->vhci_dip;
6109         hba = ddi_get_driver_private(self);
6110         if (hba == NULL)
6111                 return (ENXIO);
6112 
6113         /*
6114          * We can use the generic implementation for these ioctls
6115          */
6116         switch (cmd) {
6117         case DEVCTL_DEVICE_GETSTATE:
6118         case DEVCTL_DEVICE_ONLINE:
6119         case DEVCTL_DEVICE_OFFLINE:
6120         case DEVCTL_DEVICE_REMOVE:
6121         case DEVCTL_BUS_GETSTATE:
6122                 return (ndi_devctl_ioctl(self, cmd, arg, mode, 0));
6123         }
6124 
6125         /*
6126          * read devctl ioctl data
6127          */
6128         if (ndi_dc_allochdl((void *)arg, &dcp) != NDI_SUCCESS)
6129                 return (EFAULT);
6130 
6131         switch (cmd) {
6132 
6133         case DEVCTL_DEVICE_RESET:
6134                 /*
6135                  * lookup and hold child device
6136                  */
6137                 if ((child = ndi_devi_find(self, ndi_dc_getname(dcp),
6138                     ndi_dc_getaddr(dcp))) == NULL) {
6139                         rv = ENXIO;
6140                         break;
6141                 }
6142                 retval = mdi_select_path(child, NULL,
6143                     (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
6144                     NULL, &pip);
6145                 if ((retval != MDI_SUCCESS) || (pip == NULL)) {
6146                         VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl:"
6147                             "Unable to get a path, dip 0x%p", (void *)child));
6148                         rv = ENXIO;
6149                         break;
6150                 }
6151                 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
6152                 if (vhci_recovery_reset(svp->svp_svl,
6153                     &svp->svp_psd->sd_address, TRUE,
6154                     VHCI_DEPTH_TARGET) == 0) {
6155                         VHCI_DEBUG(1, (CE_NOTE, NULL,
6156                             "!vhci_ioctl(pip:%p): "
6157                             "reset failed\n", (void *)pip));
6158                         rv = ENXIO;
6159                 }
6160                 mdi_rele_path(pip);
6161                 break;
6162 
6163         case DEVCTL_BUS_QUIESCE:
6164         case DEVCTL_BUS_UNQUIESCE:
6165         case DEVCTL_BUS_RESET:
6166         case DEVCTL_BUS_RESETALL:
6167 #ifdef  DEBUG
6168         case DEVCTL_BUS_CONFIGURE:
6169         case DEVCTL_BUS_UNCONFIGURE:
6170 #endif
6171                 rv = ENOTSUP;
6172                 break;
6173 
6174         default:
6175                 rv = ENOTTY;
6176         } /* end of outer switch */
6177 
6178         ndi_dc_freehdl(dcp);
6179         return (rv);
6180 }
6181 
6182 /*
6183  * Routine to get the PHCI pathname from ioctl structures in userland
6184  */
6185 /* ARGSUSED */
6186 static int
6187 vhci_ioc_get_phci_path(sv_iocdata_t *pioc, caddr_t phci_path,
6188         int mode, caddr_t s)
6189 {
6190         int retval = 0;
6191 
6192         if (ddi_copyin(pioc->phci, phci_path, MAXPATHLEN, mode)) {
6193                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_phci: ioctl <%s> "
6194                     "phci_path copyin failed", s));
6195                 retval = EFAULT;
6196         }
6197         return (retval);
6198 
6199 }
6200 
6201 
6202 /*
6203  * Routine to get the Client device pathname from ioctl structures in userland
6204  */
6205 /* ARGSUSED */
6206 static int
6207 vhci_ioc_get_client_path(sv_iocdata_t *pioc, caddr_t client_path,
6208         int mode, caddr_t s)
6209 {
6210         int retval = 0;
6211 
6212         if (ddi_copyin(pioc->client, client_path, MAXPATHLEN, mode)) {
6213                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_client: "
6214                     "ioctl <%s> client_path copyin failed", s));
6215                 retval = EFAULT;
6216         }
6217         return (retval);
6218 }
6219 
6220 
6221 /*
6222  * Routine to get physical device address from ioctl structure in userland
6223  */
6224 /* ARGSUSED */
6225 static int
6226 vhci_ioc_get_paddr(sv_iocdata_t *pioc, caddr_t paddr, int mode, caddr_t s)
6227 {
6228         int retval = 0;
6229 
6230         if (ddi_copyin(pioc->addr, paddr, MAXNAMELEN, mode)) {
6231                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_paddr: "
6232                     "ioctl <%s> device addr copyin failed", s));
6233                 retval = EFAULT;
6234         }
6235         return (retval);
6236 }
6237 
6238 
6239 /*
6240  * Routine to send client device pathname to userland.
6241  */
6242 /* ARGSUSED */
6243 static int
6244 vhci_ioc_send_client_path(caddr_t client_path, sv_iocdata_t *pioc,
6245         int mode, caddr_t s)
6246 {
6247         int retval = 0;
6248 
6249         if (ddi_copyout(client_path, pioc->client, MAXPATHLEN, mode)) {
6250                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_send_client: "
6251                     "ioctl <%s> client_path copyout failed", s));
6252                 retval = EFAULT;
6253         }
6254         return (retval);
6255 }
6256 
6257 
6258 /*
6259  * Routine to translated dev_info pointer (dip) to device pathname.
6260  */
6261 static void
6262 vhci_ioc_devi_to_path(dev_info_t *dip, caddr_t path)
6263 {
6264         (void) ddi_pathname(dip, path);
6265 }
6266 
6267 
6268 /*
6269  * vhci_get_phci_path_list:
6270  *              get information about devices associated with a
6271  *              given PHCI device.
6272  *
6273  * Return Values:
6274  *              path information elements
6275  */
6276 int
6277 vhci_get_phci_path_list(dev_info_t *pdip, sv_path_info_t *pibuf,
6278         uint_t num_elems)
6279 {
6280         uint_t                  count, done;
6281         mdi_pathinfo_t          *pip;
6282         sv_path_info_t          *ret_pip;
6283         int                     status;
6284         size_t                  prop_size;
6285         int                     circular;
6286 
6287         /*
6288          * Get the PHCI structure and retrieve the path information
6289          * from the GUID hash table.
6290          */
6291 
6292         ret_pip = pibuf;
6293         count = 0;
6294 
6295         ndi_devi_enter(pdip, &circular);
6296 
6297         done = (count >= num_elems);
6298         pip = mdi_get_next_client_path(pdip, NULL);
6299         while (pip && !done) {
6300                 mdi_pi_lock(pip);
6301                 (void) ddi_pathname(mdi_pi_get_phci(pip),
6302                     ret_pip->device.ret_phci);
6303                 (void) strcpy(ret_pip->ret_addr, mdi_pi_get_addr(pip));
6304                 (void) mdi_pi_get_state2(pip, &ret_pip->ret_state,
6305                     &ret_pip->ret_ext_state);
6306 
6307                 status = mdi_prop_size(pip, &prop_size);
6308                 if (status == MDI_SUCCESS && ret_pip->ret_prop.ret_buf_size) {
6309                         *ret_pip->ret_prop.ret_buf_size = (uint_t)prop_size;
6310                 }
6311 
6312 #ifdef DEBUG
6313                 if (status != MDI_SUCCESS) {
6314                         VHCI_DEBUG(2, (CE_WARN, NULL,
6315                             "!vhci_get_phci_path_list: "
6316                             "phci <%s>, prop size failure 0x%x",
6317                             ret_pip->device.ret_phci, status));
6318                 }
6319 #endif /* DEBUG */
6320 
6321 
6322                 if (status == MDI_SUCCESS && ret_pip->ret_prop.buf &&
6323                     prop_size && ret_pip->ret_prop.buf_size >= prop_size) {
6324                         status = mdi_prop_pack(pip,
6325                             &ret_pip->ret_prop.buf,
6326                             ret_pip->ret_prop.buf_size);
6327 
6328 #ifdef DEBUG
6329                         if (status != MDI_SUCCESS) {
6330                                 VHCI_DEBUG(2, (CE_WARN, NULL,
6331                                     "!vhci_get_phci_path_list: "
6332                                     "phci <%s>, prop pack failure 0x%x",
6333                                     ret_pip->device.ret_phci, status));
6334                         }
6335 #endif /* DEBUG */
6336                 }
6337 
6338                 mdi_pi_unlock(pip);
6339                 pip = mdi_get_next_client_path(pdip, pip);
6340                 ret_pip++;
6341                 count++;
6342                 done = (count >= num_elems);
6343         }
6344 
6345         ndi_devi_exit(pdip, circular);
6346 
6347         return (MDI_SUCCESS);
6348 }
6349 
6350 
6351 /*
6352  * vhci_get_client_path_list:
6353  *              get information about various paths associated with a
6354  *              given client device.
6355  *
6356  * Return Values:
6357  *              path information elements
6358  */
6359 int
6360 vhci_get_client_path_list(dev_info_t *cdip, sv_path_info_t *pibuf,
6361         uint_t num_elems)
6362 {
6363         uint_t                  count, done;
6364         mdi_pathinfo_t          *pip;
6365         sv_path_info_t          *ret_pip;
6366         int                     status;
6367         size_t                  prop_size;
6368         int                     circular;
6369 
6370         ret_pip = pibuf;
6371         count = 0;
6372 
6373         ndi_devi_enter(cdip, &circular);
6374 
6375         done = (count >= num_elems);
6376         pip = mdi_get_next_phci_path(cdip, NULL);
6377         while (pip && !done) {
6378                 mdi_pi_lock(pip);
6379                 (void) ddi_pathname(mdi_pi_get_phci(pip),
6380                     ret_pip->device.ret_phci);
6381                 (void) strcpy(ret_pip->ret_addr, mdi_pi_get_addr(pip));
6382                 (void) mdi_pi_get_state2(pip, &ret_pip->ret_state,
6383                     &ret_pip->ret_ext_state);
6384 
6385                 status = mdi_prop_size(pip, &prop_size);
6386                 if (status == MDI_SUCCESS && ret_pip->ret_prop.ret_buf_size) {
6387                         *ret_pip->ret_prop.ret_buf_size = (uint_t)prop_size;
6388                 }
6389 
6390 #ifdef DEBUG
6391                 if (status != MDI_SUCCESS) {
6392                         VHCI_DEBUG(2, (CE_WARN, NULL,
6393                             "!vhci_get_client_path_list: "
6394                             "phci <%s>, prop size failure 0x%x",
6395                             ret_pip->device.ret_phci, status));
6396                 }
6397 #endif /* DEBUG */
6398 
6399 
6400                 if (status == MDI_SUCCESS && ret_pip->ret_prop.buf &&
6401                     prop_size && ret_pip->ret_prop.buf_size >= prop_size) {
6402                         status = mdi_prop_pack(pip,
6403                             &ret_pip->ret_prop.buf,
6404                             ret_pip->ret_prop.buf_size);
6405 
6406 #ifdef DEBUG
6407                         if (status != MDI_SUCCESS) {
6408                                 VHCI_DEBUG(2, (CE_WARN, NULL,
6409                                     "!vhci_get_client_path_list: "
6410                                     "phci <%s>, prop pack failure 0x%x",
6411                                     ret_pip->device.ret_phci, status));
6412                         }
6413 #endif /* DEBUG */
6414                 }
6415 
6416                 mdi_pi_unlock(pip);
6417                 pip = mdi_get_next_phci_path(cdip, pip);
6418                 ret_pip++;
6419                 count++;
6420                 done = (count >= num_elems);
6421         }
6422 
6423         ndi_devi_exit(cdip, circular);
6424 
6425         return (MDI_SUCCESS);
6426 }
6427 
6428 
6429 /*
6430  * Routine to get ioctl argument structure from userland.
6431  */
6432 /* ARGSUSED */
6433 static int
6434 vhci_get_iocdata(const void *data, sv_iocdata_t *pioc, int mode, caddr_t s)
6435 {
6436         int     retval = 0;
6437 
6438 #ifdef  _MULTI_DATAMODEL
6439         switch (ddi_model_convert_from(mode & FMODELS)) {
6440         case DDI_MODEL_ILP32:
6441         {
6442                 sv_iocdata32_t  ioc32;
6443 
6444                 if (ddi_copyin(data, &ioc32, sizeof (ioc32), mode)) {
6445                         retval = EFAULT;
6446                         break;
6447                 }
6448                 pioc->client = (caddr_t)(uintptr_t)ioc32.client;
6449                 pioc->phci   = (caddr_t)(uintptr_t)ioc32.phci;
6450                 pioc->addr   = (caddr_t)(uintptr_t)ioc32.addr;
6451                 pioc->buf_elem       = (uint_t)ioc32.buf_elem;
6452                 pioc->ret_buf        = (sv_path_info_t *)(uintptr_t)ioc32.ret_buf;
6453                 pioc->ret_elem       = (uint_t *)(uintptr_t)ioc32.ret_elem;
6454                 break;
6455         }
6456 
6457         case DDI_MODEL_NONE:
6458                 if (ddi_copyin(data, pioc, sizeof (*pioc), mode)) {
6459                         retval = EFAULT;
6460                         break;
6461                 }
6462                 break;
6463         }
6464 #else   /* _MULTI_DATAMODEL */
6465         if (ddi_copyin(data, pioc, sizeof (*pioc), mode)) {
6466                 retval = EFAULT;
6467         }
6468 #endif  /* _MULTI_DATAMODEL */
6469 
6470 #ifdef DEBUG
6471         if (retval) {
6472                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: cmd <%s> "
6473                     "iocdata copyin failed", s));
6474         }
6475 #endif
6476 
6477         return (retval);
6478 }
6479 
6480 
6481 /*
6482  * Routine to get the ioctl argument for ioctl causing controller switchover.
6483  */
6484 /* ARGSUSED */
6485 static int
6486 vhci_get_iocswitchdata(const void *data, sv_switch_to_cntlr_iocdata_t *piocsc,
6487     int mode, caddr_t s)
6488 {
6489         int     retval = 0;
6490 
6491 #ifdef  _MULTI_DATAMODEL
6492         switch (ddi_model_convert_from(mode & FMODELS)) {
6493         case DDI_MODEL_ILP32:
6494         {
6495                 sv_switch_to_cntlr_iocdata32_t  ioc32;
6496 
6497                 if (ddi_copyin(data, &ioc32, sizeof (ioc32), mode)) {
6498                         retval = EFAULT;
6499                         break;
6500                 }
6501                 piocsc->client       = (caddr_t)(uintptr_t)ioc32.client;
6502                 piocsc->class        = (caddr_t)(uintptr_t)ioc32.class;
6503                 break;
6504         }
6505 
6506         case DDI_MODEL_NONE:
6507                 if (ddi_copyin(data, piocsc, sizeof (*piocsc), mode)) {
6508                         retval = EFAULT;
6509                 }
6510                 break;
6511         }
6512 #else   /* _MULTI_DATAMODEL */
6513         if (ddi_copyin(data, piocsc, sizeof (*piocsc), mode)) {
6514                 retval = EFAULT;
6515         }
6516 #endif  /* _MULTI_DATAMODEL */
6517 
6518 #ifdef DEBUG
6519         if (retval) {
6520                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: cmd <%s> "
6521                     "switch_to_cntlr_iocdata copyin failed", s));
6522         }
6523 #endif
6524 
6525         return (retval);
6526 }
6527 
6528 
6529 /*
6530  * Routine to allocate memory for the path information structures.
6531  * It allocates two chunks of memory - one for keeping userland
6532  * pointers/values for path information and path properties, second for
6533  * keeping allocating kernel memory for path properties. These path
6534  * properties are finally copied to userland.
6535  */
6536 /* ARGSUSED */
6537 static int
6538 vhci_ioc_alloc_pathinfo(sv_path_info_t **upibuf, sv_path_info_t **kpibuf,
6539     uint_t num_paths, sv_iocdata_t *pioc, int mode, caddr_t s)
6540 {
6541         sv_path_info_t  *pi;
6542         uint_t          bufsize;
6543         int             retval = 0;
6544         int             index;
6545 
6546         /* Allocate memory */
6547         *upibuf = (sv_path_info_t *)
6548             kmem_zalloc(sizeof (sv_path_info_t) * num_paths, KM_SLEEP);
6549         ASSERT(*upibuf != NULL);
6550         *kpibuf = (sv_path_info_t *)
6551             kmem_zalloc(sizeof (sv_path_info_t) * num_paths, KM_SLEEP);
6552         ASSERT(*kpibuf != NULL);
6553 
6554         /*
6555          * Get the path info structure from the user space.
6556          * We are interested in the following fields:
6557          *      - user size of buffer for per path properties.
6558          *      - user address of buffer for path info properties.
6559          *      - user pointer for returning actual buffer size
6560          * Keep these fields in the 'upibuf' structures.
6561          * Allocate buffer for per path info properties in kernel
6562          * structure ('kpibuf').
6563          * Size of these buffers will be equal to the size of buffers
6564          * in the user space.
6565          */
6566 #ifdef  _MULTI_DATAMODEL
6567         switch (ddi_model_convert_from(mode & FMODELS)) {
6568         case DDI_MODEL_ILP32:
6569         {
6570                 sv_path_info32_t        *src;
6571                 sv_path_info32_t        pi32;
6572 
6573                 src  = (sv_path_info32_t *)pioc->ret_buf;
6574                 pi = (sv_path_info_t *)*upibuf;
6575                 for (index = 0; index < num_paths; index++, src++, pi++) {
6576                         if (ddi_copyin(src, &pi32, sizeof (pi32), mode)) {
6577                                 retval = EFAULT;
6578                                 break;
6579                         }
6580 
6581                         pi->ret_prop.buf_size        =
6582                             (uint_t)pi32.ret_prop.buf_size;
6583                         pi->ret_prop.ret_buf_size =
6584                             (uint_t *)(uintptr_t)pi32.ret_prop.ret_buf_size;
6585                         pi->ret_prop.buf     =
6586                             (caddr_t)(uintptr_t)pi32.ret_prop.buf;
6587                 }
6588                 break;
6589         }
6590 
6591         case DDI_MODEL_NONE:
6592                 if (ddi_copyin(pioc->ret_buf, *upibuf,
6593                     sizeof (sv_path_info_t) * num_paths, mode)) {
6594                         retval = EFAULT;
6595                 }
6596                 break;
6597         }
6598 #else   /* _MULTI_DATAMODEL */
6599         if (ddi_copyin(pioc->ret_buf, *upibuf,
6600             sizeof (sv_path_info_t) * num_paths, mode)) {
6601                 retval = EFAULT;
6602         }
6603 #endif  /* _MULTI_DATAMODEL */
6604 
6605         if (retval != 0) {
6606                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_alloc_path_info: "
6607                     "ioctl <%s> normal: path_info copyin failed", s));
6608                 kmem_free(*upibuf, sizeof (sv_path_info_t) * num_paths);
6609                 kmem_free(*kpibuf, sizeof (sv_path_info_t) * num_paths);
6610                 *upibuf = NULL;
6611                 *kpibuf = NULL;
6612                 return (retval);
6613         }
6614 
6615         /*
6616          * Allocate memory for per path properties.
6617          */
6618         for (index = 0, pi = *kpibuf; index < num_paths; index++, pi++) {
6619                 bufsize = (*upibuf)[index].ret_prop.buf_size;
6620 
6621                 if (bufsize && bufsize <= SV_PROP_MAX_BUF_SIZE) {
6622                         pi->ret_prop.buf_size = bufsize;
6623                         pi->ret_prop.buf = (caddr_t)
6624                             kmem_zalloc(bufsize, KM_SLEEP);
6625                         ASSERT(pi->ret_prop.buf != NULL);
6626                 } else {
6627                         pi->ret_prop.buf_size = 0;
6628                         pi->ret_prop.buf = NULL;
6629                 }
6630 
6631                 if ((*upibuf)[index].ret_prop.ret_buf_size != NULL) {
6632                         pi->ret_prop.ret_buf_size = (uint_t *)kmem_zalloc(
6633                             sizeof (*pi->ret_prop.ret_buf_size), KM_SLEEP);
6634                         ASSERT(pi->ret_prop.ret_buf_size != NULL);
6635                 } else {
6636                         pi->ret_prop.ret_buf_size = NULL;
6637                 }
6638         }
6639 
6640         return (0);
6641 }
6642 
6643 
6644 /*
6645  * Routine to free memory for the path information structures.
6646  * This is the memory which was allocated earlier.
6647  */
6648 /* ARGSUSED */
6649 static void
6650 vhci_ioc_free_pathinfo(sv_path_info_t *upibuf, sv_path_info_t *kpibuf,
6651     uint_t num_paths)
6652 {
6653         sv_path_info_t  *pi;
6654         int             index;
6655 
6656         /* Free memory for per path properties */
6657         for (index = 0, pi = kpibuf; index < num_paths; index++, pi++) {
6658                 if (pi->ret_prop.ret_buf_size != NULL) {
6659                         kmem_free(pi->ret_prop.ret_buf_size,
6660                             sizeof (*pi->ret_prop.ret_buf_size));
6661                 }
6662 
6663                 if (pi->ret_prop.buf != NULL) {
6664                         kmem_free(pi->ret_prop.buf, pi->ret_prop.buf_size);
6665                 }
6666         }
6667 
6668         /* Free memory for path info structures */
6669         kmem_free(upibuf, sizeof (sv_path_info_t) * num_paths);
6670         kmem_free(kpibuf, sizeof (sv_path_info_t) * num_paths);
6671 }
6672 
6673 
6674 /*
6675  * Routine to copy path information and path properties to userland.
6676  */
6677 /* ARGSUSED */
6678 static int
6679 vhci_ioc_send_pathinfo(sv_path_info_t *upibuf, sv_path_info_t *kpibuf,
6680     uint_t num_paths, sv_iocdata_t *pioc, int mode, caddr_t s)
6681 {
6682         int                     retval = 0, index;
6683         sv_path_info_t          *upi_ptr;
6684         sv_path_info32_t        *upi32_ptr;
6685 
6686 #ifdef  _MULTI_DATAMODEL
6687         switch (ddi_model_convert_from(mode & FMODELS)) {
6688         case DDI_MODEL_ILP32:
6689                 goto copy_32bit;
6690 
6691         case DDI_MODEL_NONE:
6692                 goto copy_normal;
6693         }
6694 #else   /* _MULTI_DATAMODEL */
6695 
6696         goto copy_normal;
6697 
6698 #endif  /* _MULTI_DATAMODEL */
6699 
6700 copy_normal:
6701 
6702         /*
6703          * Copy path information and path properties to user land.
6704          * Pointer fields inside the path property structure were
6705          * saved in the 'upibuf' structure earlier.
6706          */
6707         upi_ptr = pioc->ret_buf;
6708         for (index = 0; index < num_paths; index++) {
6709                 if (ddi_copyout(kpibuf[index].device.ret_ct,
6710                     upi_ptr[index].device.ret_ct, MAXPATHLEN, mode)) {
6711                         retval = EFAULT;
6712                         break;
6713                 }
6714 
6715                 if (ddi_copyout(kpibuf[index].ret_addr,
6716                     upi_ptr[index].ret_addr, MAXNAMELEN, mode)) {
6717                         retval = EFAULT;
6718                         break;
6719                 }
6720 
6721                 if (ddi_copyout(&kpibuf[index].ret_state,
6722                     &upi_ptr[index].ret_state, sizeof (kpibuf[index].ret_state),
6723                     mode)) {
6724                         retval = EFAULT;
6725                         break;
6726                 }
6727 
6728                 if (ddi_copyout(&kpibuf[index].ret_ext_state,
6729                     &upi_ptr[index].ret_ext_state,
6730                     sizeof (kpibuf[index].ret_ext_state), mode)) {
6731                         retval = EFAULT;
6732                         break;
6733                 }
6734 
6735                 if ((kpibuf[index].ret_prop.ret_buf_size != NULL) &&
6736                     ddi_copyout(kpibuf[index].ret_prop.ret_buf_size,
6737                     upibuf[index].ret_prop.ret_buf_size,
6738                     sizeof (*upibuf[index].ret_prop.ret_buf_size), mode)) {
6739                         retval = EFAULT;
6740                         break;
6741                 }
6742 
6743                 if ((kpibuf[index].ret_prop.buf != NULL) &&
6744                     ddi_copyout(kpibuf[index].ret_prop.buf,
6745                     upibuf[index].ret_prop.buf,
6746                     upibuf[index].ret_prop.buf_size, mode)) {
6747                         retval = EFAULT;
6748                         break;
6749                 }
6750         }
6751 
6752 #ifdef DEBUG
6753         if (retval) {
6754                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: ioctl <%s> "
6755                     "normal: path_info copyout failed", s));
6756         }
6757 #endif
6758 
6759         return (retval);
6760 
6761 copy_32bit:
6762         /*
6763          * Copy path information and path properties to user land.
6764          * Pointer fields inside the path property structure were
6765          * saved in the 'upibuf' structure earlier.
6766          */
6767         upi32_ptr = (sv_path_info32_t *)pioc->ret_buf;
6768         for (index = 0; index < num_paths; index++) {
6769                 if (ddi_copyout(kpibuf[index].device.ret_ct,
6770                     upi32_ptr[index].device.ret_ct, MAXPATHLEN, mode)) {
6771                         retval = EFAULT;
6772                         break;
6773                 }
6774 
6775                 if (ddi_copyout(kpibuf[index].ret_addr,
6776                     upi32_ptr[index].ret_addr, MAXNAMELEN, mode)) {
6777                         retval = EFAULT;
6778                         break;
6779                 }
6780 
6781                 if (ddi_copyout(&kpibuf[index].ret_state,
6782                     &upi32_ptr[index].ret_state,
6783                     sizeof (kpibuf[index].ret_state), mode)) {
6784                         retval = EFAULT;
6785                         break;
6786                 }
6787 
6788                 if (ddi_copyout(&kpibuf[index].ret_ext_state,
6789                     &upi32_ptr[index].ret_ext_state,
6790                     sizeof (kpibuf[index].ret_ext_state), mode)) {
6791                         retval = EFAULT;
6792                         break;
6793                 }
6794                 if ((kpibuf[index].ret_prop.ret_buf_size != NULL) &&
6795                     ddi_copyout(kpibuf[index].ret_prop.ret_buf_size,
6796                     upibuf[index].ret_prop.ret_buf_size,
6797                     sizeof (*upibuf[index].ret_prop.ret_buf_size), mode)) {
6798                         retval = EFAULT;
6799                         break;
6800                 }
6801 
6802                 if ((kpibuf[index].ret_prop.buf != NULL) &&
6803                     ddi_copyout(kpibuf[index].ret_prop.buf,
6804                     upibuf[index].ret_prop.buf,
6805                     upibuf[index].ret_prop.buf_size, mode)) {
6806                         retval = EFAULT;
6807                         break;
6808                 }
6809         }
6810 
6811 #ifdef DEBUG
6812         if (retval) {
6813                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: ioctl <%s> "
6814                     "normal: path_info copyout failed", s));
6815         }
6816 #endif
6817 
6818         return (retval);
6819 }
6820 
6821 
6822 /*
6823  * vhci_failover()
6824  * This routine expects VHCI_HOLD_LUN before being invoked.  It can be invoked
6825  * as MDI_FAILOVER_ASYNC or MDI_FAILOVER_SYNC.  For Asynchronous failovers
6826  * this routine shall VHCI_RELEASE_LUN on exiting.  For synchronous failovers
6827  * it is the callers responsibility to release lun.
6828  */
6829 
6830 /* ARGSUSED */
6831 static int
6832 vhci_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
6833 {
6834         char                    *guid;
6835         scsi_vhci_lun_t         *vlun = NULL;
6836         struct scsi_vhci        *vhci;
6837         mdi_pathinfo_t          *pip, *npip;
6838         char                    *s_pclass, *pclass1, *pclass2, *pclass;
6839         char                    active_pclass_copy[255], *active_pclass_ptr;
6840         char                    *ptr1, *ptr2;
6841         mdi_pathinfo_state_t    pi_state;
6842         uint32_t                pi_ext_state;
6843         scsi_vhci_priv_t        *svp;
6844         struct scsi_device      *sd;
6845         struct scsi_failover_ops        *sfo;
6846         int                     sps; /* mdi_select_path() status */
6847         int                     activation_done = 0;
6848         int                     rval, retval = MDI_FAILURE;
6849         int                     reserve_pending, check_condition, UA_condition;
6850         struct scsi_pkt         *pkt;
6851         struct buf              *bp;
6852 
6853         vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
6854         sd = ddi_get_driver_private(cdip);
6855         vlun = ADDR2VLUN(&sd->sd_address);
6856         ASSERT(vlun != 0);
6857         ASSERT(VHCI_LUN_IS_HELD(vlun));
6858         guid = vlun->svl_lun_wwn;
6859         VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(1): guid %s\n", guid));
6860         vhci_log(CE_NOTE, vdip, "!Initiating failover for device %s "
6861             "(GUID %s)", ddi_node_name(cdip), guid);
6862 
6863         /*
6864          * Lets maintain a local copy of the vlun->svl_active_pclass
6865          * for the rest of the processing. Accessing the field
6866          * directly in the loop below causes loop logic to break
6867          * especially when the field gets updated by other threads
6868          * update path status etc and causes 'paths are not currently
6869          * available' condition to be declared prematurely.
6870          */
6871         mutex_enter(&vlun->svl_mutex);
6872         if (vlun->svl_active_pclass != NULL) {
6873                 (void) strlcpy(active_pclass_copy, vlun->svl_active_pclass,
6874                     sizeof (active_pclass_copy));
6875                 active_pclass_ptr = &active_pclass_copy[0];
6876                 mutex_exit(&vlun->svl_mutex);
6877                 if (vhci_quiesce_paths(vdip, cdip, vlun, guid,
6878                     active_pclass_ptr) != 0) {
6879                         retval = MDI_FAILURE;
6880                 }
6881         } else {
6882                 /*
6883                  * can happen only when the available path to device
6884                  * discovered is a STANDBY path.
6885                  */
6886                 mutex_exit(&vlun->svl_mutex);
6887                 active_pclass_copy[0] = '\0';
6888                 active_pclass_ptr = NULL;
6889         }
6890 
6891         sfo = vlun->svl_fops;
6892         ASSERT(sfo != NULL);
6893         pclass1 = s_pclass = active_pclass_ptr;
6894         VHCI_DEBUG(1, (CE_NOTE, NULL, "!(%s)failing over from %s\n", guid,
6895             (s_pclass == NULL ? "<none>" : s_pclass)));
6896 
6897 next_pathclass:
6898 
6899         rval = sfo->sfo_pathclass_next(pclass1, &pclass2,
6900             vlun->svl_fops_ctpriv);
6901         if (rval == ENOENT) {
6902                 if (s_pclass == NULL) {
6903                         VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(4)(%s): "
6904                             "failed, no more pathclasses\n", guid));
6905                         goto done;
6906                 } else {
6907                         (void) sfo->sfo_pathclass_next(NULL, &pclass2,
6908                             vlun->svl_fops_ctpriv);
6909                 }
6910         } else if (rval == EINVAL) {
6911                 vhci_log(CE_NOTE, vdip, "!Failover operation failed for "
6912                     "device %s (GUID %s): Invalid path-class %s",
6913                     ddi_node_name(cdip), guid,
6914                     ((pclass1 == NULL) ? "<none>" : pclass1));
6915                 goto done;
6916         }
6917         if ((s_pclass != NULL) && (strcmp(pclass2, s_pclass) == 0)) {
6918                 /*
6919                  * paths are not currently available
6920                  */
6921                 vhci_log(CE_NOTE, vdip, "!Failover path currently unavailable"
6922                     " for device %s (GUID %s)",
6923                     ddi_node_name(cdip), guid);
6924                 goto done;
6925         }
6926         pip = npip = NULL;
6927         VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(5.2)(%s): considering "
6928             "%s as failover destination\n", guid, pclass2));
6929         sps = mdi_select_path(cdip, NULL, MDI_SELECT_STANDBY_PATH, NULL, &npip);
6930         if ((npip == NULL) || (sps != MDI_SUCCESS)) {
6931                 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(%s): no "
6932                     "STANDBY paths found (status:%x)!\n", guid, sps));
6933                 pclass1 = pclass2;
6934                 goto next_pathclass;
6935         }
6936         do {
6937                 pclass = NULL;
6938                 if ((mdi_prop_lookup_string(npip, "path-class",
6939                     &pclass) != MDI_SUCCESS) || (strcmp(pclass2,
6940                     pclass) != 0)) {
6941                         VHCI_DEBUG(1, (CE_NOTE, NULL,
6942                             "!vhci_failover(5.5)(%s): skipping path "
6943                             "%p(%s)...\n", guid, (void *)npip, pclass));
6944                         pip = npip;
6945                         sps = mdi_select_path(cdip, NULL,
6946                             MDI_SELECT_STANDBY_PATH, pip, &npip);
6947                         mdi_rele_path(pip);
6948                         (void) mdi_prop_free(pclass);
6949                         continue;
6950                 }
6951                 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(npip);
6952 
6953                 /*
6954                  * Issue READ at non-zer block on this STANDBY path.
6955                  * Purple returns
6956                  * 1. RESERVATION_CONFLICT if reservation is pending
6957                  * 2. POR check condition if it reset happened.
6958                  * 2. failover Check Conditions if one is already in progress.
6959                  */
6960                 reserve_pending = 0;
6961                 check_condition = 0;
6962                 UA_condition = 0;
6963 
6964                 bp = scsi_alloc_consistent_buf(&svp->svp_psd->sd_address,
6965                     (struct buf *)NULL, DEV_BSIZE, B_READ, NULL, NULL);
6966                 if (!bp) {
6967                         VHCI_DEBUG(1, (CE_NOTE, NULL,
6968                             "vhci_failover !No resources (buf)\n"));
6969                         mdi_rele_path(npip);
6970                         goto done;
6971                 }
6972                 pkt = scsi_init_pkt(&svp->svp_psd->sd_address, NULL, bp,
6973                     CDB_GROUP1, sizeof (struct scsi_arq_status), 0,
6974                     PKT_CONSISTENT, NULL, NULL);
6975                 if (pkt) {
6976                         (void) scsi_setup_cdb((union scsi_cdb *)(uintptr_t)
6977                             pkt->pkt_cdbp, SCMD_READ, 1, 1, 0);
6978                         pkt->pkt_flags = FLAG_NOINTR;
6979 check_path_again:
6980                         pkt->pkt_path_instance = mdi_pi_get_path_instance(npip);
6981                         pkt->pkt_time = 3*30;
6982 
6983                         if (scsi_transport(pkt) == TRAN_ACCEPT) {
6984                                 switch (pkt->pkt_reason) {
6985                                 case CMD_CMPLT:
6986                                         switch (SCBP_C(pkt)) {
6987                                         case STATUS_GOOD:
6988                                                 /* Already failed over */
6989                                                 activation_done = 1;
6990                                                 break;
6991                                         case STATUS_RESERVATION_CONFLICT:
6992                                                 reserve_pending = 1;
6993                                                 break;
6994                                         case STATUS_CHECK:
6995                                                 check_condition = 1;
6996                                                 break;
6997                                         }
6998                                 }
6999                         }
7000                         if (check_condition &&
7001                             (pkt->pkt_state & STATE_ARQ_DONE)) {
7002                                 uint8_t *sns, skey, asc, ascq;
7003                                 sns = (uint8_t *)
7004                                     &(((struct scsi_arq_status *)(uintptr_t)
7005                                     (pkt->pkt_scbp))->sts_sensedata);
7006                                 skey = scsi_sense_key(sns);
7007                                 asc = scsi_sense_asc(sns);
7008                                 ascq = scsi_sense_ascq(sns);
7009                                 if (skey == KEY_UNIT_ATTENTION &&
7010                                     asc == 0x29) {
7011                                         /* Already failed over */
7012                                         VHCI_DEBUG(1, (CE_NOTE, NULL,
7013                                             "!vhci_failover(7)(%s): "
7014                                             "path 0x%p POR UA condition\n",
7015                                             guid, (void *)npip));
7016                                         if (UA_condition == 0) {
7017                                                 UA_condition = 1;
7018                                                 goto check_path_again;
7019                                         }
7020                                 } else {
7021                                         activation_done = 0;
7022                                         VHCI_DEBUG(1, (CE_NOTE, NULL,
7023                                             "!vhci_failover(%s): path 0x%p "
7024                                             "unhandled chkcond %x %x %x\n",
7025                                             guid, (void *)npip, skey,
7026                                             asc, ascq));
7027                                 }
7028                         }
7029                         scsi_destroy_pkt(pkt);
7030                 }
7031                 scsi_free_consistent_buf(bp);
7032 
7033                 if (activation_done) {
7034                         mdi_rele_path(npip);
7035                         VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(7)(%s): "
7036                             "path 0x%p already failedover\n", guid,
7037                             (void *)npip));
7038                         break;
7039                 }
7040                 if (reserve_pending && (vlun->svl_xlf_capable == 0)) {
7041                         (void) vhci_recovery_reset(vlun,
7042                             &svp->svp_psd->sd_address,
7043                             FALSE, VHCI_DEPTH_ALL);
7044                 }
7045                 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(6)(%s): "
7046                     "activating path 0x%p(psd:%p)\n", guid, (void *)npip,
7047                     (void *)svp->svp_psd));
7048                 if (sfo->sfo_path_activate(svp->svp_psd, pclass2,
7049                     vlun->svl_fops_ctpriv) == 0) {
7050                         activation_done = 1;
7051                         mdi_rele_path(npip);
7052                         VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(7)(%s): "
7053                             "path 0x%p successfully activated\n", guid,
7054                             (void *)npip));
7055                         break;
7056                 }
7057                 pip = npip;
7058                 sps = mdi_select_path(cdip, NULL, MDI_SELECT_STANDBY_PATH,
7059                     pip, &npip);
7060                 mdi_rele_path(pip);
7061         } while ((npip != NULL) && (sps == MDI_SUCCESS));
7062         if (activation_done == 0) {
7063                 pclass1 = pclass2;
7064                 goto next_pathclass;
7065         }
7066 
7067         /*
7068          * if we are here, we have succeeded in activating path npip of
7069          * pathclass pclass2; let us validate all paths of pclass2 by
7070          * "ping"-ing each one and mark the good ones ONLINE
7071          * Also, set the state of the paths belonging to the previously
7072          * active pathclass to STANDBY
7073          */
7074         pip = npip = NULL;
7075         sps = mdi_select_path(cdip, NULL, (MDI_SELECT_ONLINE_PATH |
7076             MDI_SELECT_STANDBY_PATH | MDI_SELECT_USER_DISABLE_PATH),
7077             NULL, &npip);
7078         if (npip == NULL || sps != MDI_SUCCESS) {
7079                 VHCI_DEBUG(1, (CE_NOTE, NULL, "!Failover operation failed for "
7080                     "device %s (GUID %s): paths may be busy\n",
7081                     ddi_node_name(cdip), guid));
7082                 goto done;
7083         }
7084         do {
7085                 (void) mdi_pi_get_state2(npip, &pi_state, &pi_ext_state);
7086                 if (mdi_prop_lookup_string(npip, "path-class", &pclass)
7087                     != MDI_SUCCESS) {
7088                         pip = npip;
7089                         sps = mdi_select_path(cdip, NULL,
7090                             (MDI_SELECT_ONLINE_PATH |
7091                             MDI_SELECT_STANDBY_PATH |
7092                             MDI_SELECT_USER_DISABLE_PATH),
7093                             pip, &npip);
7094                         mdi_rele_path(pip);
7095                         continue;
7096                 }
7097                 if (strcmp(pclass, pclass2) == 0) {
7098                         if (pi_state == MDI_PATHINFO_STATE_STANDBY) {
7099                                 svp = (scsi_vhci_priv_t *)
7100                                     mdi_pi_get_vhci_private(npip);
7101                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
7102                                     "!vhci_failover(8)(%s): "
7103                                     "pinging path 0x%p\n",
7104                                     guid, (void *)npip));
7105                                 if (sfo->sfo_path_ping(svp->svp_psd,
7106                                     vlun->svl_fops_ctpriv) == 1) {
7107                                         mdi_pi_set_state(npip,
7108                                             MDI_PATHINFO_STATE_ONLINE);
7109                                         VHCI_DEBUG(1, (CE_NOTE, NULL,
7110                                             "!vhci_failover(9)(%s): "
7111                                             "path 0x%p ping successful, "
7112                                             "marked online\n", guid,
7113                                             (void *)npip));
7114                                         MDI_PI_ERRSTAT(npip, MDI_PI_FAILTO);
7115                                 }
7116                         }
7117                 } else if ((s_pclass != NULL) && (strcmp(pclass, s_pclass)
7118                     == 0)) {
7119                         if (pi_state == MDI_PATHINFO_STATE_ONLINE) {
7120                                 mdi_pi_set_state(npip,
7121                                     MDI_PATHINFO_STATE_STANDBY);
7122                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
7123                                     "!vhci_failover(10)(%s): path 0x%p marked "
7124                                     "STANDBY\n", guid, (void *)npip));
7125                                 MDI_PI_ERRSTAT(npip, MDI_PI_FAILFROM);
7126                         }
7127                 }
7128                 (void) mdi_prop_free(pclass);
7129                 pip = npip;
7130                 sps = mdi_select_path(cdip, NULL, (MDI_SELECT_ONLINE_PATH |
7131                     MDI_SELECT_STANDBY_PATH|MDI_SELECT_USER_DISABLE_PATH),
7132                     pip, &npip);
7133                 mdi_rele_path(pip);
7134         } while ((npip != NULL) && (sps == MDI_SUCCESS));
7135 
7136         /*
7137          * Update the AccessState of related MP-API TPGs
7138          */
7139         (void) vhci_mpapi_update_tpg_acc_state_for_lu(vhci, vlun);
7140 
7141         vhci_log(CE_NOTE, vdip, "!Failover operation completed successfully "
7142             "for device %s (GUID %s): failed over from %s to %s",
7143             ddi_node_name(cdip), guid, ((s_pclass == NULL) ? "<none>" :
7144             s_pclass), pclass2);
7145         ptr1 = kmem_alloc(strlen(pclass2)+1, KM_SLEEP);
7146         (void) strlcpy(ptr1, pclass2, (strlen(pclass2)+1));
7147         mutex_enter(&vlun->svl_mutex);
7148         ptr2 = vlun->svl_active_pclass;
7149         vlun->svl_active_pclass = ptr1;
7150         mutex_exit(&vlun->svl_mutex);
7151         if (ptr2) {
7152                 kmem_free(ptr2, strlen(ptr2)+1);
7153         }
7154         mutex_enter(&vhci->vhci_mutex);
7155         scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
7156             &vhci->vhci_reset_notify_listf);
7157         /* All reservations are cleared upon these resets. */
7158         vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
7159         mutex_exit(&vhci->vhci_mutex);
7160         VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(11): DONE! Active "
7161             "pathclass for %s is now %s\n", guid, pclass2));
7162         retval = MDI_SUCCESS;
7163 
7164 done:
7165         vlun->svl_failover_status = retval;
7166         if (flags == MDI_FAILOVER_ASYNC) {
7167                 VHCI_RELEASE_LUN(vlun);
7168                 VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_failover(12): DONE! "
7169                     "releasing lun, as failover was ASYNC\n"));
7170         } else {
7171                 VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_failover(12): DONE! "
7172                     "NOT releasing lun, as failover was SYNC\n"));
7173         }
7174         return (retval);
7175 }
7176 
7177 /*
7178  * vhci_client_attached is called after the successful attach of a
7179  * client devinfo node.
7180  */
7181 static void
7182 vhci_client_attached(dev_info_t *cdip)
7183 {
7184         mdi_pathinfo_t  *pip;
7185         int             circular;
7186 
7187         /*
7188          * At this point the client has attached and it's instance number is
7189          * valid, so we can set up kstats.  We need to do this here because it
7190          * is possible for paths to go online prior to client attach, in which
7191          * case the call to vhci_kstat_create_pathinfo in vhci_pathinfo_online
7192          * was a noop.
7193          */
7194         ndi_devi_enter(cdip, &circular);
7195         for (pip = mdi_get_next_phci_path(cdip, NULL); pip;
7196             pip = mdi_get_next_phci_path(cdip, pip))
7197                 vhci_kstat_create_pathinfo(pip);
7198         ndi_devi_exit(cdip, circular);
7199 }
7200 
7201 /*
7202  * quiesce all of the online paths
7203  */
7204 static int
7205 vhci_quiesce_paths(dev_info_t *vdip, dev_info_t *cdip, scsi_vhci_lun_t *vlun,
7206         char *guid, char *active_pclass_ptr)
7207 {
7208         scsi_vhci_priv_t        *svp;
7209         char                    *s_pclass = NULL;
7210         mdi_pathinfo_t          *npip, *pip;
7211         int                     sps;
7212 
7213         /* quiesce currently active paths */
7214         s_pclass = NULL;
7215         pip = npip = NULL;
7216         sps = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH, NULL, &npip);
7217         if ((npip == NULL) || (sps != MDI_SUCCESS)) {
7218                 return (1);
7219         }
7220         do {
7221                 if (mdi_prop_lookup_string(npip, "path-class",
7222                     &s_pclass) != MDI_SUCCESS) {
7223                         mdi_rele_path(npip);
7224                         vhci_log(CE_NOTE, vdip, "!Failover operation failed "
7225                             "for device %s (GUID %s) due to an internal "
7226                             "error", ddi_node_name(cdip), guid);
7227                         return (1);
7228                 }
7229                 if (strcmp(s_pclass, active_pclass_ptr) == 0) {
7230                         /*
7231                          * quiesce path. Free s_pclass since
7232                          * we don't need it anymore
7233                          */
7234                         VHCI_DEBUG(1, (CE_NOTE, NULL,
7235                             "!vhci_failover(2)(%s): failing over "
7236                             "from %s; quiescing path %p\n",
7237                             guid, s_pclass, (void *)npip));
7238                         (void) mdi_prop_free(s_pclass);
7239                         svp = (scsi_vhci_priv_t *)
7240                             mdi_pi_get_vhci_private(npip);
7241                         if (svp == NULL) {
7242                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
7243                                     "!vhci_failover(2.5)(%s): no "
7244                                     "client priv! %p offlined?\n",
7245                                     guid, (void *)npip));
7246                                 pip = npip;
7247                                 sps = mdi_select_path(cdip, NULL,
7248                                     MDI_SELECT_ONLINE_PATH, pip, &npip);
7249                                 mdi_rele_path(pip);
7250                                 continue;
7251                         }
7252                         if (scsi_abort(&svp->svp_psd->sd_address, NULL)
7253                             == 0) {
7254                                 (void) vhci_recovery_reset(vlun,
7255                                     &svp->svp_psd->sd_address, FALSE,
7256                                     VHCI_DEPTH_TARGET);
7257                         }
7258                         mutex_enter(&svp->svp_mutex);
7259                         if (svp->svp_cmds == 0) {
7260                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
7261                                     "!vhci_failover(3)(%s):"
7262                                     "quiesced path %p\n", guid, (void *)npip));
7263                         } else {
7264                                 while (svp->svp_cmds != 0) {
7265                                         cv_wait(&svp->svp_cv, &svp->svp_mutex);
7266                                         VHCI_DEBUG(1, (CE_NOTE, NULL,
7267                                             "!vhci_failover(3.cv)(%s):"
7268                                             "quiesced path %p\n", guid,
7269                                             (void *)npip));
7270                                 }
7271                         }
7272                         mutex_exit(&svp->svp_mutex);
7273                 } else {
7274                         /*
7275                          * make sure we freeup the memory
7276                          */
7277                         (void) mdi_prop_free(s_pclass);
7278                 }
7279                 pip = npip;
7280                 sps = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH,
7281                     pip, &npip);
7282                 mdi_rele_path(pip);
7283         } while ((npip != NULL) && (sps == MDI_SUCCESS));
7284         return (0);
7285 }
7286 
7287 static struct scsi_vhci_lun *
7288 vhci_lun_lookup(dev_info_t *tgt_dip)
7289 {
7290         return ((struct scsi_vhci_lun *)
7291             mdi_client_get_vhci_private(tgt_dip));
7292 }
7293 
7294 static struct scsi_vhci_lun *
7295 vhci_lun_lookup_alloc(dev_info_t *tgt_dip, char *guid, int *didalloc)
7296 {
7297         struct scsi_vhci_lun *svl;
7298 
7299         if (svl = vhci_lun_lookup(tgt_dip)) {
7300                 return (svl);
7301         }
7302 
7303         svl = kmem_zalloc(sizeof (*svl), KM_SLEEP);
7304         svl->svl_lun_wwn = kmem_zalloc(strlen(guid)+1, KM_SLEEP);
7305         (void) strcpy(svl->svl_lun_wwn,  guid);
7306         mutex_init(&svl->svl_mutex, NULL, MUTEX_DRIVER, NULL);
7307         cv_init(&svl->svl_cv, NULL, CV_DRIVER, NULL);
7308         sema_init(&svl->svl_pgr_sema, 1, NULL, SEMA_DRIVER, NULL);
7309         svl->svl_waiting_for_activepath = 1;
7310         svl->svl_sector_size = 1;
7311         mdi_client_set_vhci_private(tgt_dip, svl);
7312         *didalloc = 1;
7313         VHCI_DEBUG(1, (CE_NOTE, NULL,
7314             "vhci_lun_lookup_alloc: guid %s vlun 0x%p\n",
7315             guid, (void *)svl));
7316         return (svl);
7317 }
7318 
7319 static void
7320 vhci_lun_free(struct scsi_vhci_lun *dvlp, struct scsi_device *sd)
7321 {
7322         char *guid;
7323 
7324         guid = dvlp->svl_lun_wwn;
7325         ASSERT(guid != NULL);
7326         VHCI_DEBUG(4, (CE_NOTE, NULL, "!vhci_lun_free: %s\n", guid));
7327 
7328         mutex_enter(&dvlp->svl_mutex);
7329         if (dvlp->svl_active_pclass != NULL) {
7330                 kmem_free(dvlp->svl_active_pclass,
7331                     strlen(dvlp->svl_active_pclass)+1);
7332         }
7333         dvlp->svl_active_pclass = NULL;
7334         mutex_exit(&dvlp->svl_mutex);
7335 
7336         if (dvlp->svl_lun_wwn != NULL) {
7337                 kmem_free(dvlp->svl_lun_wwn, strlen(dvlp->svl_lun_wwn)+1);
7338         }
7339         dvlp->svl_lun_wwn = NULL;
7340 
7341         if (dvlp->svl_fops_name) {
7342                 kmem_free(dvlp->svl_fops_name, strlen(dvlp->svl_fops_name)+1);
7343         }
7344         dvlp->svl_fops_name = NULL;
7345 
7346         if (dvlp->svl_fops_ctpriv != NULL &&
7347             dvlp->svl_fops != NULL) {
7348                 dvlp->svl_fops->sfo_device_unprobe(sd, dvlp->svl_fops_ctpriv);
7349         }
7350 
7351         if (dvlp->svl_flags & VLUN_TASK_D_ALIVE_FLG)
7352                 taskq_destroy(dvlp->svl_taskq);
7353 
7354         mutex_destroy(&dvlp->svl_mutex);
7355         cv_destroy(&dvlp->svl_cv);
7356         sema_destroy(&dvlp->svl_pgr_sema);
7357         kmem_free(dvlp, sizeof (*dvlp));
7358         /*
7359          * vhci_lun_free may be called before the tgt_dip
7360          * initialization so check if the sd is NULL.
7361          */
7362         if (sd != NULL)
7363                 scsi_device_hba_private_set(sd, NULL);
7364 }
7365 
7366 int
7367 vhci_do_scsi_cmd(struct scsi_pkt *pkt)
7368 {
7369         int     err = 0;
7370         int     retry_cnt = 0;
7371         uint8_t *sns, skey;
7372 
7373 #ifdef DEBUG
7374         if (vhci_debug > 5) {
7375                 vhci_print_cdb(pkt->pkt_address.a_hba_tran->tran_hba_dip,
7376                     CE_WARN, "Vhci command", pkt->pkt_cdbp);
7377         }
7378 #endif
7379 
7380 retry:
7381         err = scsi_poll(pkt);
7382         if (err) {
7383                 if (pkt->pkt_cdbp[0] == SCMD_RELEASE) {
7384                         if (SCBP_C(pkt) == STATUS_RESERVATION_CONFLICT) {
7385                                 VHCI_DEBUG(1, (CE_NOTE, NULL,
7386                                     "!v_s_do_s_c: RELEASE conflict\n"));
7387                                 return (0);
7388                         }
7389                 }
7390                 if (retry_cnt++ < 6) {
7391                         VHCI_DEBUG(1, (CE_WARN, NULL,
7392                             "!v_s_do_s_c:retry packet 0x%p "
7393                             "status 0x%x reason %s",
7394                             (void *)pkt, SCBP_C(pkt),
7395                             scsi_rname(pkt->pkt_reason)));
7396                         if ((pkt->pkt_reason == CMD_CMPLT) &&
7397                             (SCBP_C(pkt) == STATUS_CHECK) &&
7398                             (pkt->pkt_state & STATE_ARQ_DONE)) {
7399                                 sns = (uint8_t *)
7400                                     &(((struct scsi_arq_status *)(uintptr_t)
7401                                     (pkt->pkt_scbp))->sts_sensedata);
7402                                 skey = scsi_sense_key(sns);
7403                                 VHCI_DEBUG(1, (CE_WARN, NULL,
7404                                     "!v_s_do_s_c:retry "
7405                                     "packet 0x%p  sense data %s", (void *)pkt,
7406                                     scsi_sname(skey)));
7407                         }
7408                         goto retry;
7409                 }
7410                 VHCI_DEBUG(1, (CE_WARN, NULL,
7411                     "!v_s_do_s_c: failed transport 0x%p 0x%x",
7412                     (void *)pkt, SCBP_C(pkt)));
7413                 return (0);
7414         }
7415 
7416         switch (pkt->pkt_reason) {
7417                 case CMD_TIMEOUT:
7418                         VHCI_DEBUG(1, (CE_WARN, NULL, "!pkt timed "
7419                             "out (pkt 0x%p)", (void *)pkt));
7420                         return (0);
7421                 case CMD_CMPLT:
7422                         switch (SCBP_C(pkt)) {
7423                                 case STATUS_GOOD:
7424                                         break;
7425                                 case STATUS_CHECK:
7426                                         if (pkt->pkt_state & STATE_ARQ_DONE) {
7427                                                 sns = (uint8_t *)&(((
7428                                                     struct scsi_arq_status *)
7429                                                     (uintptr_t)
7430                                                     (pkt->pkt_scbp))->
7431                                                     sts_sensedata);
7432                                                 skey = scsi_sense_key(sns);
7433                                                 if ((skey ==
7434                                                     KEY_UNIT_ATTENTION) ||
7435                                                     (skey ==
7436                                                     KEY_NOT_READY)) {
7437                                                         /*
7438                                                          * clear unit attn.
7439                                                          */
7440 
7441                                                         VHCI_DEBUG(1,
7442                                                             (CE_WARN, NULL,
7443                                                             "!v_s_do_s_c: "
7444                                                             "retry "
7445                                                             "packet 0x%p sense "
7446                                                             "data %s",
7447                                                             (void *)pkt,
7448                                                             scsi_sname
7449                                                             (skey)));
7450                                                         goto retry;
7451                                                 }
7452                                                 VHCI_DEBUG(4, (CE_WARN, NULL,
7453                                                     "!ARQ while "
7454                                                     "transporting "
7455                                                     "(pkt 0x%p)",
7456                                                     (void *)pkt));
7457                                                 return (0);
7458                                         }
7459                                         return (0);
7460                                 default:
7461                                         VHCI_DEBUG(1, (CE_WARN, NULL,
7462                                             "!Bad status returned "
7463                                             "(pkt 0x%p, status %x)",
7464                                             (void *)pkt, SCBP_C(pkt)));
7465                                         return (0);
7466                         }
7467                         break;
7468                 case CMD_INCOMPLETE:
7469                 case CMD_RESET:
7470                 case CMD_ABORTED:
7471                 case CMD_TRAN_ERR:
7472                         if (retry_cnt++ < 1) {
7473                                 VHCI_DEBUG(1, (CE_WARN, NULL,
7474                                     "!v_s_do_s_c: retry packet 0x%p %s",
7475                                     (void *)pkt, scsi_rname(pkt->pkt_reason)));
7476                                 goto retry;
7477                         }
7478                         /* FALLTHROUGH */
7479                 default:
7480                         VHCI_DEBUG(1, (CE_WARN, NULL, "!pkt did not "
7481                             "complete successfully (pkt 0x%p,"
7482                             "reason %x)", (void *)pkt, pkt->pkt_reason));
7483                         return (0);
7484         }
7485         return (1);
7486 }
7487 
7488 static int
7489 vhci_quiesce_lun(struct scsi_vhci_lun *vlun)
7490 {
7491         mdi_pathinfo_t          *pip, *spip;
7492         dev_info_t              *cdip;
7493         struct scsi_vhci_priv   *svp;
7494         mdi_pathinfo_state_t    pstate;
7495         uint32_t                p_ext_state;
7496         int                     circular;
7497 
7498         cdip = vlun->svl_dip;
7499         pip = spip = NULL;
7500         ndi_devi_enter(cdip, &circular);
7501         pip = mdi_get_next_phci_path(cdip, NULL);
7502         while (pip != NULL) {
7503                 (void) mdi_pi_get_state2(pip, &pstate, &p_ext_state);
7504                 if (pstate != MDI_PATHINFO_STATE_ONLINE) {
7505                         spip = pip;
7506                         pip = mdi_get_next_phci_path(cdip, spip);
7507                         continue;
7508                 }
7509                 mdi_hold_path(pip);
7510                 ndi_devi_exit(cdip, circular);
7511                 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
7512                 mutex_enter(&svp->svp_mutex);
7513                 while (svp->svp_cmds != 0) {
7514                         if (cv_reltimedwait(&svp->svp_cv, &svp->svp_mutex,
7515                             drv_usectohz(vhci_path_quiesce_timeout * 1000000),
7516                             TR_CLOCK_TICK) == -1) {
7517                                 mutex_exit(&svp->svp_mutex);
7518                                 mdi_rele_path(pip);
7519                                 VHCI_DEBUG(1, (CE_WARN, NULL,
7520                                     "Quiesce of lun is not successful "
7521                                     "vlun: 0x%p.", (void *)vlun));
7522                                 return (0);
7523                         }
7524                 }
7525                 mutex_exit(&svp->svp_mutex);
7526                 ndi_devi_enter(cdip, &circular);
7527                 spip = pip;
7528                 pip = mdi_get_next_phci_path(cdip, spip);
7529                 mdi_rele_path(spip);
7530         }
7531         ndi_devi_exit(cdip, circular);
7532         return (1);
7533 }
7534 
7535 static int
7536 vhci_pgr_validate_and_register(scsi_vhci_priv_t *svp)
7537 {
7538         scsi_vhci_lun_t         *vlun;
7539         vhci_prout_t            *prout;
7540         int                     rval, success;
7541         mdi_pathinfo_t          *pip, *npip;
7542         scsi_vhci_priv_t        *osvp;
7543         dev_info_t              *cdip;
7544         uchar_t                 cdb_1;
7545         uchar_t                 temp_res_key[MHIOC_RESV_KEY_SIZE];
7546 
7547 
7548         /*
7549          * see if there are any other paths available; if none,
7550          * then there is nothing to do.
7551          */
7552         cdip = svp->svp_svl->svl_dip;
7553         rval = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH |
7554             MDI_SELECT_STANDBY_PATH, NULL, &pip);
7555         if ((rval != MDI_SUCCESS) || (pip == NULL)) {
7556                 VHCI_DEBUG(4, (CE_NOTE, NULL,
7557                     "%s%d: vhci_pgr_validate_and_register: first path\n",
7558                     ddi_driver_name(cdip), ddi_get_instance(cdip)));
7559                 return (1);
7560         }
7561 
7562         vlun = svp->svp_svl;
7563         prout = &vlun->svl_prout;
7564         ASSERT(vlun->svl_pgr_active != 0);
7565 
7566         /*
7567          * When the path was busy/offlined, some other host might have
7568          * cleared this key. Validate key on some other path first.
7569          * If it fails, return failure.
7570          */
7571 
7572         npip = pip;
7573         pip = NULL;
7574         success = 0;
7575 
7576         /* Save the res key */
7577         bcopy(prout->res_key, temp_res_key, MHIOC_RESV_KEY_SIZE);
7578 
7579         /*
7580          * Sometimes CDB from application can be a Register_And_Ignore.
7581          * Instead of validation, this cdb would result in force registration.
7582          * Convert it to normal cdb for validation.
7583          * After that be sure to restore the cdb.
7584          */
7585         cdb_1 = vlun->svl_cdb[1];
7586         vlun->svl_cdb[1] &= 0xe0;
7587 
7588         do {
7589                 osvp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(npip);
7590                 if (osvp == NULL) {
7591                         VHCI_DEBUG(4, (CE_NOTE, NULL,
7592                             "vhci_pgr_validate_and_register: no "
7593                             "client priv! 0x%p offlined?\n",
7594                             (void *)npip));
7595                         goto next_path_1;
7596                 }
7597 
7598                 if (osvp == svp) {
7599                         VHCI_DEBUG(4, (CE_NOTE, NULL,
7600                             "vhci_pgr_validate_and_register: same svp 0x%p"
7601                             " npip 0x%p vlun 0x%p\n",
7602                             (void *)svp, (void *)npip, (void *)vlun));
7603                         goto next_path_1;
7604                 }
7605 
7606                 VHCI_DEBUG(4, (CE_NOTE, NULL,
7607                     "vhci_pgr_validate_and_register: First validate on"
7608                     " osvp 0x%p being done. vlun 0x%p thread 0x%p Before bcopy"
7609                     " cdb1 %x\n", (void *)osvp, (void *)vlun,
7610                     (void *)curthread, vlun->svl_cdb[1]));
7611                 vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy:");
7612 
7613                 bcopy(prout->service_key, prout->res_key, MHIOC_RESV_KEY_SIZE);
7614 
7615                 VHCI_DEBUG(4, (CE_WARN, NULL, "vlun 0x%p After bcopy",
7616                     (void *)vlun));
7617                 vhci_print_prout_keys(vlun, "v_pgr_val_reg: after bcopy: ");
7618 
7619                 rval = vhci_do_prout(osvp);
7620                 if (rval == 1) {
7621                         VHCI_DEBUG(4, (CE_NOTE, NULL,
7622                             "%s%d: vhci_pgr_validate_and_register: key"
7623                             " validated thread 0x%p\n", ddi_driver_name(cdip),
7624                             ddi_get_instance(cdip), (void *)curthread));
7625                         pip = npip;
7626                         success = 1;
7627                         break;
7628                 } else {
7629                         VHCI_DEBUG(4, (CE_NOTE, NULL,
7630                             "vhci_pgr_validate_and_register: First validation"
7631                             " on osvp 0x%p failed %x\n", (void *)osvp, rval));
7632                         vhci_print_prout_keys(vlun, "v_pgr_val_reg: failed:");
7633                 }
7634 
7635                 /*
7636                  * Try other paths
7637                  */
7638 next_path_1:
7639                 pip = npip;
7640                 rval = mdi_select_path(cdip, NULL,
7641                     MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH,
7642                     pip, &npip);
7643                 mdi_rele_path(pip);
7644         } while ((rval == MDI_SUCCESS) && (npip != NULL));
7645 
7646 
7647         /* Be sure to restore original cdb */
7648         vlun->svl_cdb[1] = cdb_1;
7649 
7650         /* Restore the res_key */
7651         bcopy(temp_res_key, prout->res_key, MHIOC_RESV_KEY_SIZE);
7652 
7653         /*
7654          * If key could not be registered on any path for the first time,
7655          * return success as online should still continue.
7656          */
7657         if (success == 0) {
7658                 return (1);
7659         }
7660 
7661         ASSERT(pip != NULL);
7662 
7663         /*
7664          * Force register on new path
7665          */
7666         cdb_1 = vlun->svl_cdb[1];            /* store the cdb */
7667 
7668         vlun->svl_cdb[1] &= 0xe0;
7669         vlun->svl_cdb[1] |= VHCI_PROUT_R_AND_IGNORE;
7670 
7671         vhci_print_prout_keys(vlun, "v_pgr_val_reg: keys before bcopy: ");
7672 
7673         bcopy(prout->active_service_key, prout->service_key,
7674             MHIOC_RESV_KEY_SIZE);
7675         bcopy(prout->active_res_key, prout->res_key, MHIOC_RESV_KEY_SIZE);
7676 
7677         vhci_print_prout_keys(vlun, "v_pgr_val_reg:keys after bcopy: ");
7678 
7679         rval = vhci_do_prout(svp);
7680         vlun->svl_cdb[1] = cdb_1;            /* restore the cdb */
7681         if (rval != 1) {
7682                 VHCI_DEBUG(4, (CE_NOTE, NULL,
7683                     "vhci_pgr_validate_and_register: register on new"
7684                     " path 0x%p svp 0x%p failed %x\n",
7685                     (void *)pip, (void *)svp, rval));
7686                 vhci_print_prout_keys(vlun, "v_pgr_val_reg: reg failed: ");
7687                 mdi_rele_path(pip);
7688                 return (0);
7689         }
7690 
7691         if (bcmp(prout->service_key, zero_key, MHIOC_RESV_KEY_SIZE) == 0) {
7692                 VHCI_DEBUG(4, (CE_NOTE, NULL,
7693                     "vhci_pgr_validate_and_register: zero service key\n"));
7694                 mdi_rele_path(pip);
7695                 return (rval);
7696         }
7697 
7698         /*
7699          * While the key was force registered, some other host might have
7700          * cleared the key. Re-validate key on another pre-existing path
7701          * before declaring success.
7702          */
7703         npip = pip;
7704         pip = NULL;
7705 
7706         /*
7707          * Sometimes CDB from application can be Register and Ignore.
7708          * Instead of validation, it would result in force registration.
7709          * Convert it to normal cdb for validation.
7710          * After that be sure to restore the cdb.
7711          */
7712         cdb_1 = vlun->svl_cdb[1];
7713         vlun->svl_cdb[1] &= 0xe0;
7714         success = 0;
7715 
7716         do {
7717                 osvp = (scsi_vhci_priv_t *)
7718                     mdi_pi_get_vhci_private(npip);
7719                 if (osvp == NULL) {
7720                         VHCI_DEBUG(4, (CE_NOTE, NULL,
7721                             "vhci_pgr_validate_and_register: no "
7722                             "client priv! 0x%p offlined?\n",
7723                             (void *)npip));
7724                         goto next_path_2;
7725                 }
7726 
7727                 if (osvp == svp) {
7728                         VHCI_DEBUG(4, (CE_NOTE, NULL,
7729                             "vhci_pgr_validate_and_register: same osvp 0x%p"
7730                             " npip 0x%p vlun 0x%p\n",
7731                             (void *)svp, (void *)npip, (void *)vlun));
7732                         goto next_path_2;
7733                 }
7734 
7735                 VHCI_DEBUG(4, (CE_NOTE, NULL,
7736                     "vhci_pgr_validate_and_register: Re-validation on"
7737                     " osvp 0x%p being done. vlun 0x%p Before bcopy cdb1 %x\n",
7738                     (void *)osvp, (void *)vlun, vlun->svl_cdb[1]));
7739                 vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy: ");
7740 
7741                 bcopy(prout->service_key, prout->res_key, MHIOC_RESV_KEY_SIZE);
7742 
7743                 vhci_print_prout_keys(vlun, "v_pgr_val_reg: after bcopy: ");
7744 
7745                 rval = vhci_do_prout(osvp);
7746                 if (rval == 1) {
7747                         VHCI_DEBUG(4, (CE_NOTE, NULL,
7748                             "%s%d: vhci_pgr_validate_and_register: key"
7749                             " validated thread 0x%p\n", ddi_driver_name(cdip),
7750                             ddi_get_instance(cdip), (void *)curthread));
7751                         pip = npip;
7752                         success = 1;
7753                         break;
7754                 } else {
7755                         VHCI_DEBUG(4, (CE_NOTE, NULL,
7756                             "vhci_pgr_validate_and_register: Re-validation on"
7757                             " osvp 0x%p failed %x\n", (void *)osvp, rval));
7758                         vhci_print_prout_keys(vlun,
7759                             "v_pgr_val_reg: reval failed: ");
7760                 }
7761 
7762                 /*
7763                  * Try other paths
7764                  */
7765 next_path_2:
7766                 pip = npip;
7767                 rval = mdi_select_path(cdip, NULL,
7768                     MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH,
7769                     pip, &npip);
7770                 mdi_rele_path(pip);
7771         } while ((rval == MDI_SUCCESS) && (npip != NULL));
7772 
7773         /* Be sure to restore original cdb */
7774         vlun->svl_cdb[1] = cdb_1;
7775 
7776         if (success == 1) {
7777                 /* Successfully validated registration */
7778                 mdi_rele_path(pip);
7779                 return (1);
7780         }
7781 
7782         VHCI_DEBUG(4, (CE_WARN, NULL, "key validation failed"));
7783 
7784         /*
7785          * key invalid, back out by registering key value of 0
7786          */
7787         VHCI_DEBUG(4, (CE_NOTE, NULL,
7788             "vhci_pgr_validate_and_register: backout on"
7789             " svp 0x%p being done\n", (void *)svp));
7790         vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy: ");
7791 
7792         bcopy(prout->service_key, prout->res_key, MHIOC_RESV_KEY_SIZE);
7793         bzero(prout->service_key, MHIOC_RESV_KEY_SIZE);
7794 
7795         vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy: ");
7796 
7797         /*
7798          * Get a new path
7799          */
7800         rval = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH |
7801             MDI_SELECT_STANDBY_PATH, NULL, &pip);
7802         if ((rval != MDI_SUCCESS) || (pip == NULL)) {
7803                 VHCI_DEBUG(4, (CE_NOTE, NULL,
7804                     "%s%d: vhci_pgr_validate_and_register: no valid pip\n",
7805                     ddi_driver_name(cdip), ddi_get_instance(cdip)));
7806                 return (0);
7807         }
7808 
7809         if ((rval = vhci_do_prout(svp)) != 1) {
7810                 VHCI_DEBUG(4, (CE_NOTE, NULL,
7811                     "vhci_pgr_validate_and_register: backout on"
7812                     " svp 0x%p failed\n", (void *)svp));
7813                 vhci_print_prout_keys(vlun, "backout failed");
7814 
7815                 VHCI_DEBUG(4, (CE_WARN, NULL,
7816                     "%s%d: vhci_pgr_validate_and_register: key"
7817                     " validation and backout failed", ddi_driver_name(cdip),
7818                     ddi_get_instance(cdip)));
7819                 if (rval == VHCI_PGR_ILLEGALOP) {
7820                         VHCI_DEBUG(4, (CE_WARN, NULL,
7821                             "%s%d: vhci_pgr_validate_and_register: key"
7822                             " already cleared", ddi_driver_name(cdip),
7823                             ddi_get_instance(cdip)));
7824                         rval = 1;
7825                 } else
7826                         rval = 0;
7827         } else {
7828                 VHCI_DEBUG(4, (CE_NOTE, NULL,
7829                     "%s%d: vhci_pgr_validate_and_register: key"
7830                     " validation failed, key backed out\n",
7831                     ddi_driver_name(cdip), ddi_get_instance(cdip)));
7832                 vhci_print_prout_keys(vlun, "v_pgr_val_reg: key backed out: ");
7833         }
7834         mdi_rele_path(pip);
7835 
7836         return (rval);
7837 }
7838 
7839 /*
7840  * taskq routine to dispatch a scsi cmd to vhci_scsi_start.  This ensures
7841  * that vhci_scsi_start is not called in interrupt context.
7842  * As the upper layer gets TRAN_ACCEPT when the command is dispatched, we
7843  * need to complete the command if something goes wrong.
7844  */
7845 static void
7846 vhci_dispatch_scsi_start(void *arg)
7847 {
7848         struct vhci_pkt *vpkt   = (struct vhci_pkt *)arg;
7849         struct scsi_pkt *tpkt   = vpkt->vpkt_tgt_pkt;
7850         int rval                = TRAN_BUSY;
7851 
7852         VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_dispatch_scsi_start: sending"
7853             " scsi-2 reserve for 0x%p\n",
7854             (void *)ADDR2DIP(&(vpkt->vpkt_tgt_pkt->pkt_address))));
7855 
7856         /*
7857          * To prevent the taskq from being called recursively we set the
7858          * the VHCI_PKT_THRU_TASKQ bit in the vhci_pkt_states.
7859          */
7860         vpkt->vpkt_state |= VHCI_PKT_THRU_TASKQ;
7861 
7862         /*
7863          * Wait for the transport to get ready to send packets
7864          * and if it times out, it will return something other than
7865          * TRAN_BUSY. The vhci_reserve_delay may want to
7866          * get tuned for other transports and is therefore a global.
7867          * Using delay since this routine is called by taskq dispatch
7868          * and not called during interrupt context.
7869          */
7870         while ((rval = vhci_scsi_start(&(vpkt->vpkt_tgt_pkt->pkt_address),
7871             vpkt->vpkt_tgt_pkt)) == TRAN_BUSY) {
7872                 delay(drv_usectohz(vhci_reserve_delay));
7873         }
7874 
7875         switch (rval) {
7876         case TRAN_ACCEPT:
7877                 return;
7878 
7879         default:
7880                 /*
7881                  * This pkt shall be retried, and to ensure another taskq
7882                  * is dispatched for it, clear the VHCI_PKT_THRU_TASKQ
7883                  * flag.
7884                  */
7885                 vpkt->vpkt_state &= ~VHCI_PKT_THRU_TASKQ;
7886 
7887                 /* Ensure that the pkt is retried without a reset */
7888                 tpkt->pkt_reason = CMD_ABORTED;
7889                 tpkt->pkt_statistics |= STAT_ABORTED;
7890                 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_dispatch_scsi_start: "
7891                     "TRAN_rval %d returned for dip 0x%p", rval,
7892                     (void *)ADDR2DIP(&(vpkt->vpkt_tgt_pkt->pkt_address))));
7893                 break;
7894         }
7895 
7896         /*
7897          * vpkt_org_vpkt should always be NULL here if the retry command
7898          * has been successfully dispatched.  If vpkt_org_vpkt != NULL at
7899          * this point, it is an error so restore the original vpkt and
7900          * return an error to the target driver so it can retry the
7901          * command as appropriate.
7902          */
7903         if (vpkt->vpkt_org_vpkt != NULL) {
7904                 struct vhci_pkt         *new_vpkt = vpkt;
7905                 scsi_vhci_priv_t        *svp = (scsi_vhci_priv_t *)
7906                     mdi_pi_get_vhci_private(vpkt->vpkt_path);
7907 
7908                 vpkt = vpkt->vpkt_org_vpkt;
7909 
7910                 vpkt->vpkt_tgt_pkt->pkt_reason = tpkt->pkt_reason;
7911                 vpkt->vpkt_tgt_pkt->pkt_statistics = tpkt->pkt_statistics;
7912 
7913                 vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
7914                     new_vpkt->vpkt_tgt_pkt);
7915 
7916                 tpkt = vpkt->vpkt_tgt_pkt;
7917         }
7918 
7919         scsi_hba_pkt_comp(tpkt);
7920 }
7921 
7922 static void
7923 vhci_initiate_auto_failback(void *arg)
7924 {
7925         struct scsi_vhci_lun    *vlun = (struct scsi_vhci_lun *)arg;
7926         dev_info_t              *vdip, *cdip;
7927         int                     held;
7928 
7929         cdip = vlun->svl_dip;
7930         vdip = ddi_get_parent(cdip);
7931 
7932         VHCI_HOLD_LUN(vlun, VH_SLEEP, held);
7933 
7934         /*
7935          * Perform a final check to see if the active path class is indeed
7936          * not the preferred path class.  As in the time the auto failback
7937          * was dispatched, an external failover could have been detected.
7938          * [Some other host could have detected this condition and triggered
7939          *  the auto failback before].
7940          * In such a case if we go ahead with failover we will be negating the
7941          * whole purpose of auto failback.
7942          */
7943         mutex_enter(&vlun->svl_mutex);
7944         if (vlun->svl_active_pclass != NULL) {
7945                 char                            *best_pclass;
7946                 struct scsi_failover_ops        *fo;
7947 
7948                 fo = vlun->svl_fops;
7949 
7950                 (void) fo->sfo_pathclass_next(NULL, &best_pclass,
7951                     vlun->svl_fops_ctpriv);
7952                 if (strcmp(vlun->svl_active_pclass, best_pclass) == 0) {
7953                         mutex_exit(&vlun->svl_mutex);
7954                         VHCI_RELEASE_LUN(vlun);
7955                         VHCI_DEBUG(1, (CE_NOTE, NULL, "Not initiating "
7956                             "auto failback for %s as %s pathclass already "
7957                             "active.\n", vlun->svl_lun_wwn, best_pclass));
7958                         return;
7959                 }
7960         }
7961         mutex_exit(&vlun->svl_mutex);
7962         if (mdi_failover(vdip, vlun->svl_dip, MDI_FAILOVER_SYNC)
7963             == MDI_SUCCESS) {
7964                 vhci_log(CE_NOTE, vdip, "!Auto failback operation "
7965                     "succeeded for device %s (GUID %s)",
7966                     ddi_node_name(cdip), vlun->svl_lun_wwn);
7967         } else {
7968                 vhci_log(CE_NOTE, vdip, "!Auto failback operation "
7969                     "failed for device %s (GUID %s)",
7970                     ddi_node_name(cdip), vlun->svl_lun_wwn);
7971         }
7972         VHCI_RELEASE_LUN(vlun);
7973 }
7974 
7975 #ifdef DEBUG
7976 static void
7977 vhci_print_prin_keys(vhci_prin_readkeys_t *prin, int numkeys)
7978 {
7979         vhci_clean_print(NULL, 5, "Current PGR Keys",
7980             (uchar_t *)prin, numkeys * 8);
7981 }
7982 #endif
7983 
7984 static void
7985 vhci_print_prout_keys(scsi_vhci_lun_t *vlun, char *msg)
7986 {
7987         int                     i;
7988         vhci_prout_t            *prout;
7989         char                    buf1[4*MHIOC_RESV_KEY_SIZE + 1];
7990         char                    buf2[4*MHIOC_RESV_KEY_SIZE + 1];
7991         char                    buf3[4*MHIOC_RESV_KEY_SIZE + 1];
7992         char                    buf4[4*MHIOC_RESV_KEY_SIZE + 1];
7993 
7994         prout = &vlun->svl_prout;
7995 
7996         for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
7997                 (void) sprintf(&buf1[4*i], "[%02x]", prout->res_key[i]);
7998         for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
7999                 (void) sprintf(&buf2[(4*i)], "[%02x]", prout->service_key[i]);
8000         for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
8001                 (void) sprintf(&buf3[4*i], "[%02x]", prout->active_res_key[i]);
8002         for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
8003                 (void) sprintf(&buf4[4*i], "[%02x]",
8004                     prout->active_service_key[i]);
8005 
8006         /* Printing all in one go. Otherwise it will jumble up */
8007         VHCI_DEBUG(5, (CE_CONT, NULL, "%s vlun 0x%p, thread 0x%p\n"
8008             "res_key:          : %s\n"
8009             "service_key       : %s\n"
8010             "active_res_key    : %s\n"
8011             "active_service_key: %s\n",
8012             msg, (void *)vlun, (void *)curthread, buf1, buf2, buf3, buf4));
8013 }
8014 
8015 /*
8016  * Called from vhci_scsi_start to update the pHCI pkt with target packet.
8017  */
8018 static void
8019 vhci_update_pHCI_pkt(struct vhci_pkt *vpkt, struct scsi_pkt *pkt)
8020 {
8021 
8022         ASSERT(vpkt->vpkt_hba_pkt);
8023 
8024         vpkt->vpkt_hba_pkt->pkt_flags = pkt->pkt_flags;
8025         vpkt->vpkt_hba_pkt->pkt_flags |= FLAG_NOQUEUE;
8026 
8027         if ((vpkt->vpkt_hba_pkt->pkt_flags & FLAG_NOINTR) ||
8028             MDI_PI_IS_SUSPENDED(vpkt->vpkt_path)) {
8029                 /*
8030                  * Polled Command is requested or HBA is in
8031                  * suspended state
8032                  */
8033                 vpkt->vpkt_hba_pkt->pkt_flags |= FLAG_NOINTR;
8034                 vpkt->vpkt_hba_pkt->pkt_comp = NULL;
8035         } else {
8036                 vpkt->vpkt_hba_pkt->pkt_comp = vhci_intr;
8037         }
8038         vpkt->vpkt_hba_pkt->pkt_time = pkt->pkt_time;
8039         bcopy(pkt->pkt_cdbp, vpkt->vpkt_hba_pkt->pkt_cdbp,
8040             vpkt->vpkt_tgt_init_cdblen);
8041         vpkt->vpkt_hba_pkt->pkt_resid = pkt->pkt_resid;
8042 
8043         /* Re-initialize the following pHCI packet state information */
8044         vpkt->vpkt_hba_pkt->pkt_state = 0;
8045         vpkt->vpkt_hba_pkt->pkt_statistics = 0;
8046         vpkt->vpkt_hba_pkt->pkt_reason = 0;
8047 }
8048 
8049 static int
8050 vhci_scsi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
8051     void *arg, void *result)
8052 {
8053         int ret = DDI_SUCCESS;
8054 
8055         /*
8056          * Generic processing in MPxIO framework
8057          */
8058         ret = mdi_bus_power(parent, impl_arg, op, arg, result);
8059 
8060         switch (ret) {
8061         case MDI_SUCCESS:
8062                 ret = DDI_SUCCESS;
8063                 break;
8064         case MDI_FAILURE:
8065                 ret = DDI_FAILURE;
8066                 break;
8067         default:
8068                 break;
8069         }
8070 
8071         return (ret);
8072 }
8073 
8074 static int
8075 vhci_pHCI_cap(struct scsi_address *ap, char *cap, int val, int whom,
8076     mdi_pathinfo_t *pip)
8077 {
8078         dev_info_t              *cdip;
8079         mdi_pathinfo_t          *npip = NULL;
8080         scsi_vhci_priv_t        *svp = NULL;
8081         struct scsi_address     *pap = NULL;
8082         scsi_hba_tran_t         *hba = NULL;
8083         int                     sps;
8084         int                     mps_flag;
8085         int                     rval = 0;
8086 
8087         mps_flag = (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH);
8088         if (pip) {
8089                 /*
8090                  * If the call is from vhci_pathinfo_state_change,
8091                  * then this path was busy and is becoming ready to accept IO.
8092                  */
8093                 ASSERT(ap != NULL);
8094                 hba = ap->a_hba_tran;
8095                 ASSERT(hba != NULL);
8096                 rval = scsi_ifsetcap(ap, cap, val, whom);
8097 
8098                 VHCI_DEBUG(2, (CE_NOTE, NULL,
8099                     "!vhci_pHCI_cap: only on path %p, ap %p, rval %x\n",
8100                     (void *)pip, (void *)ap, rval));
8101 
8102                 return (rval);
8103         }
8104 
8105         /*
8106          * Set capability on all the pHCIs.
8107          * If any path is busy, then the capability would be set by
8108          * vhci_pathinfo_state_change.
8109          */
8110 
8111         cdip = ADDR2DIP(ap);
8112         ASSERT(cdip != NULL);
8113         sps = mdi_select_path(cdip, NULL, mps_flag, NULL, &pip);
8114         if ((sps != MDI_SUCCESS) || (pip == NULL)) {
8115                 VHCI_DEBUG(2, (CE_WARN, NULL,
8116                     "!vhci_pHCI_cap: Unable to get a path, dip 0x%p",
8117                     (void *)cdip));
8118                 return (0);
8119         }
8120 
8121 again:
8122         svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
8123         if (svp == NULL) {
8124                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_pHCI_cap: "
8125                     "priv is NULL, pip 0x%p", (void *)pip));
8126                 mdi_rele_path(pip);
8127                 return (rval);
8128         }
8129 
8130         if (svp->svp_psd == NULL) {
8131                 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_pHCI_cap: "
8132                     "psd is NULL, pip 0x%p, svp 0x%p",
8133                     (void *)pip, (void *)svp));
8134                 mdi_rele_path(pip);
8135                 return (rval);
8136         }
8137 
8138         pap = &svp->svp_psd->sd_address;
8139         ASSERT(pap != NULL);
8140         hba = pap->a_hba_tran;
8141         ASSERT(hba != NULL);
8142 
8143         if (hba->tran_setcap != NULL) {
8144                 rval = scsi_ifsetcap(pap, cap, val, whom);
8145 
8146                 VHCI_DEBUG(2, (CE_NOTE, NULL,
8147                     "!vhci_pHCI_cap: path %p, ap %p, rval %x\n",
8148                     (void *)pip, (void *)ap, rval));
8149 
8150                 /*
8151                  * Select next path and issue the setcap, repeat
8152                  * until all paths are exhausted
8153                  */
8154                 sps = mdi_select_path(cdip, NULL, mps_flag, pip, &npip);
8155                 if ((sps != MDI_SUCCESS) || (npip == NULL)) {
8156                         mdi_rele_path(pip);
8157                         return (1);
8158                 }
8159                 mdi_rele_path(pip);
8160                 pip = npip;
8161                 goto again;
8162         }
8163         mdi_rele_path(pip);
8164         return (rval);
8165 }
8166 
8167 static int
8168 vhci_scsi_bus_config(dev_info_t *pdip, uint_t flags, ddi_bus_config_op_t op,
8169     void *arg, dev_info_t **child)
8170 {
8171         char *guid;
8172 
8173         if (vhci_bus_config_debug)
8174                 flags |= NDI_DEVI_DEBUG;
8175 
8176         if (op == BUS_CONFIG_ONE || op == BUS_UNCONFIG_ONE)
8177                 guid = vhci_devnm_to_guid((char *)arg);
8178         else
8179                 guid = NULL;
8180 
8181         if (mdi_vhci_bus_config(pdip, flags, op, arg, child, guid)
8182             == MDI_SUCCESS)
8183                 return (NDI_SUCCESS);
8184         else
8185                 return (NDI_FAILURE);
8186 }
8187 
8188 static int
8189 vhci_scsi_bus_unconfig(dev_info_t *pdip, uint_t flags, ddi_bus_config_op_t op,
8190     void *arg)
8191 {
8192         if (vhci_bus_config_debug)
8193                 flags |= NDI_DEVI_DEBUG;
8194 
8195         return (ndi_busop_bus_unconfig(pdip, flags, op, arg));
8196 }
8197 
8198 /*
8199  * Take the original vhci_pkt, create a duplicate of the pkt for resending
8200  * as though it originated in ssd.
8201  */
8202 static struct scsi_pkt *
8203 vhci_create_retry_pkt(struct vhci_pkt *vpkt)
8204 {
8205         struct vhci_pkt *new_vpkt = NULL;
8206         struct scsi_pkt *pkt = NULL;
8207 
8208         scsi_vhci_priv_t *svp = (scsi_vhci_priv_t *)
8209             mdi_pi_get_vhci_private(vpkt->vpkt_path);
8210 
8211         /*
8212          * Ensure consistent data at completion time by setting PKT_CONSISTENT
8213          */
8214         pkt = vhci_scsi_init_pkt(&svp->svp_psd->sd_address, pkt,
8215             vpkt->vpkt_tgt_init_bp, vpkt->vpkt_tgt_init_cdblen,
8216             vpkt->vpkt_tgt_init_scblen, 0, PKT_CONSISTENT, NULL_FUNC, NULL);
8217         if (pkt != NULL) {
8218                 new_vpkt = TGTPKT2VHCIPKT(pkt);
8219 
8220                 pkt->pkt_address = vpkt->vpkt_tgt_pkt->pkt_address;
8221                 pkt->pkt_flags = vpkt->vpkt_tgt_pkt->pkt_flags;
8222                 pkt->pkt_time = vpkt->vpkt_tgt_pkt->pkt_time;
8223                 pkt->pkt_comp = vpkt->vpkt_tgt_pkt->pkt_comp;
8224 
8225                 pkt->pkt_resid = 0;
8226                 pkt->pkt_statistics = 0;
8227                 pkt->pkt_reason = 0;
8228 
8229                 bcopy(vpkt->vpkt_tgt_pkt->pkt_cdbp,
8230                     pkt->pkt_cdbp, vpkt->vpkt_tgt_init_cdblen);
8231 
8232                 /*
8233                  * Save a pointer to the original vhci_pkt
8234                  */
8235                 new_vpkt->vpkt_org_vpkt = vpkt;
8236         }
8237 
8238         return (pkt);
8239 }
8240 
8241 /*
8242  * Copy the successful completion information from the hba packet into
8243  * the original target pkt from the upper layer.  Returns the original
8244  * vpkt and destroys the new vpkt from the internal retry.
8245  */
8246 static struct vhci_pkt *
8247 vhci_sync_retry_pkt(struct vhci_pkt *vpkt)
8248 {
8249         struct vhci_pkt         *ret_vpkt = NULL;
8250         struct scsi_pkt         *tpkt = NULL;
8251         struct scsi_pkt         *hba_pkt = NULL;
8252         scsi_vhci_priv_t        *svp = (scsi_vhci_priv_t *)
8253             mdi_pi_get_vhci_private(vpkt->vpkt_path);
8254 
8255         ASSERT(vpkt->vpkt_org_vpkt != NULL);
8256         VHCI_DEBUG(0, (CE_NOTE, NULL, "vhci_sync_retry_pkt: Retry pkt "
8257             "completed successfully!\n"));
8258 
8259         ret_vpkt = vpkt->vpkt_org_vpkt;
8260         tpkt = ret_vpkt->vpkt_tgt_pkt;
8261         hba_pkt = vpkt->vpkt_hba_pkt;
8262 
8263         /*
8264          * Copy the good status into the target driver's packet
8265          */
8266         *(tpkt->pkt_scbp) = *(hba_pkt->pkt_scbp);
8267         tpkt->pkt_resid = hba_pkt->pkt_resid;
8268         tpkt->pkt_state = hba_pkt->pkt_state;
8269         tpkt->pkt_statistics = hba_pkt->pkt_statistics;
8270         tpkt->pkt_reason = hba_pkt->pkt_reason;
8271 
8272         /*
8273          * Destroy the internally created vpkt for the retry
8274          */
8275         vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
8276             vpkt->vpkt_tgt_pkt);
8277 
8278         return (ret_vpkt);
8279 }
8280 
8281 /* restart the request sense request */
8282 static void
8283 vhci_uscsi_restart_sense(void *arg)
8284 {
8285         struct buf      *rqbp;
8286         struct buf      *bp;
8287         struct scsi_pkt *rqpkt = (struct scsi_pkt *)arg;
8288         mp_uscsi_cmd_t  *mp_uscmdp;
8289 
8290         VHCI_DEBUG(4, (CE_WARN, NULL,
8291             "vhci_uscsi_restart_sense: enter: rqpkt: %p", (void *)rqpkt));
8292 
8293         if (scsi_transport(rqpkt) != TRAN_ACCEPT) {
8294                 /* if it fails - need to wakeup the original command */
8295                 mp_uscmdp = rqpkt->pkt_private;
8296                 bp = mp_uscmdp->cmdbp;
8297                 rqbp = mp_uscmdp->rqbp;
8298                 ASSERT(mp_uscmdp && bp && rqbp);
8299                 scsi_free_consistent_buf(rqbp);
8300                 scsi_destroy_pkt(rqpkt);
8301                 bp->b_resid = bp->b_bcount;
8302                 bioerror(bp, EIO);
8303                 biodone(bp);
8304         }
8305 }
8306 
8307 /*
8308  * auto-rqsense is not enabled so we have to retrieve the request sense
8309  * manually.
8310  */
8311 static int
8312 vhci_uscsi_send_sense(struct scsi_pkt *pkt, mp_uscsi_cmd_t *mp_uscmdp)
8313 {
8314         struct buf              *rqbp, *cmdbp;
8315         struct scsi_pkt         *rqpkt;
8316         int                     rval = 0;
8317 
8318         cmdbp = mp_uscmdp->cmdbp;
8319         ASSERT(cmdbp != NULL);
8320 
8321         VHCI_DEBUG(4, (CE_WARN, NULL,
8322             "vhci_uscsi_send_sense: enter: bp: %p pkt: %p scmd: %p",
8323             (void *)cmdbp, (void *)pkt, (void *)mp_uscmdp));
8324         /* set up the packet information and cdb */
8325         if ((rqbp = scsi_alloc_consistent_buf(mp_uscmdp->ap, NULL,
8326             SENSE_LENGTH, B_READ, NULL, NULL)) == NULL) {
8327                 return (-1);
8328         }
8329 
8330         if ((rqpkt = scsi_init_pkt(mp_uscmdp->ap, NULL, rqbp,
8331             CDB_GROUP0, 1, 0, PKT_CONSISTENT, NULL, NULL)) == NULL) {
8332                 scsi_free_consistent_buf(rqbp);
8333                 return (-1);
8334         }
8335 
8336         (void) scsi_setup_cdb((union scsi_cdb *)(intptr_t)rqpkt->pkt_cdbp,
8337             SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
8338 
8339         mp_uscmdp->rqbp = rqbp;
8340         rqbp->b_private = mp_uscmdp;
8341         rqpkt->pkt_flags |= FLAG_SENSING;
8342         rqpkt->pkt_time = 60;
8343         rqpkt->pkt_comp = vhci_uscsi_iodone;
8344         rqpkt->pkt_private = mp_uscmdp;
8345 
8346         /*
8347          * NOTE: This code path is related to MPAPI uscsi(7I), so path
8348          * selection is not based on path_instance.
8349          */
8350         if (scsi_pkt_allocated_correctly(rqpkt))
8351                 rqpkt->pkt_path_instance = 0;
8352 
8353         switch (scsi_transport(rqpkt)) {
8354         case TRAN_ACCEPT:
8355                 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_uscsi_send_sense: "
8356                     "transport accepted."));
8357                 break;
8358         case TRAN_BUSY:
8359                 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_uscsi_send_sense: "
8360                     "transport busy, setting timeout."));
8361                 vhci_restart_timeid = timeout(vhci_uscsi_restart_sense, rqpkt,
8362                     (drv_usectohz(5 * 1000000)));
8363                 break;
8364         default:
8365                 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_uscsi_send_sense: "
8366                     "transport failed"));
8367                 scsi_free_consistent_buf(rqbp);
8368                 scsi_destroy_pkt(rqpkt);
8369                 rval = -1;
8370         }
8371 
8372         return (rval);
8373 }
8374 
8375 /*
8376  * done routine for the mpapi uscsi command - this is behaving as though
8377  * FLAG_DIAGNOSE is set meaning there are no retries except for a manual
8378  * request sense.
8379  */
8380 void
8381 vhci_uscsi_iodone(struct scsi_pkt *pkt)
8382 {
8383         struct buf                      *bp;
8384         mp_uscsi_cmd_t                  *mp_uscmdp;
8385         struct uscsi_cmd                *uscmdp;
8386         struct scsi_arq_status          *arqstat;
8387         int                             err;
8388 
8389         mp_uscmdp = (mp_uscsi_cmd_t *)pkt->pkt_private;
8390         uscmdp = mp_uscmdp->uscmdp;
8391         bp = mp_uscmdp->cmdbp;
8392         ASSERT(bp != NULL);
8393         VHCI_DEBUG(4, (CE_WARN, NULL,
8394             "vhci_uscsi_iodone: enter: bp: %p pkt: %p scmd: %p",
8395             (void *)bp, (void *)pkt, (void *)mp_uscmdp));
8396         /* Save the status and the residual into the uscsi_cmd struct */
8397         uscmdp->uscsi_status = ((*(pkt)->pkt_scbp) & STATUS_MASK);
8398         uscmdp->uscsi_resid = bp->b_resid;
8399 
8400         /* return on a very successful command */
8401         if (pkt->pkt_reason == CMD_CMPLT &&
8402             SCBP_C(pkt) == 0 && ((pkt->pkt_flags & FLAG_SENSING) == 0) &&
8403             pkt->pkt_resid == 0) {
8404                 mdi_pi_kstat_iosupdate(mp_uscmdp->pip, bp);
8405                 scsi_destroy_pkt(pkt);
8406                 biodone(bp);
8407                 return;
8408         }
8409         VHCI_DEBUG(4, (CE_NOTE, NULL, "iodone: reason=0x%x "
8410             " pkt_resid=%ld pkt_state: 0x%x b_count: %ld b_resid: %ld",
8411             pkt->pkt_reason, pkt->pkt_resid,
8412             pkt->pkt_state, bp->b_bcount, bp->b_resid));
8413 
8414         err = EIO;
8415 
8416         arqstat = (struct scsi_arq_status *)(intptr_t)(pkt->pkt_scbp);
8417         if (pkt->pkt_reason != CMD_CMPLT) {
8418                 /*
8419                  * The command did not complete.
8420                  */
8421                 VHCI_DEBUG(4, (CE_NOTE, NULL,
8422                     "vhci_uscsi_iodone: command did not complete."
8423                     " reason: %x flag: %x", pkt->pkt_reason, pkt->pkt_flags));
8424                 if (pkt->pkt_flags & FLAG_SENSING) {
8425                         MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_TRANSERR);
8426                 } else if (pkt->pkt_reason == CMD_TIMEOUT) {
8427                         MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_HARDERR);
8428                         err = ETIMEDOUT;
8429                 }
8430         } else if (pkt->pkt_state & STATE_ARQ_DONE && mp_uscmdp->arq_enabled) {
8431                 /*
8432                  * The auto-rqsense happened, and the packet has a filled-in
8433                  * scsi_arq_status structure, pointed to by pkt_scbp.
8434                  */
8435                 VHCI_DEBUG(4, (CE_NOTE, NULL,
8436                     "vhci_uscsi_iodone: received auto-requested sense"));
8437                 if (uscmdp->uscsi_flags & USCSI_RQENABLE) {
8438                         /* get the amount of data to copy into rqbuf */
8439                         int rqlen = SENSE_LENGTH - arqstat->sts_rqpkt_resid;
8440                         rqlen = min(((int)uscmdp->uscsi_rqlen), rqlen);
8441                         uscmdp->uscsi_rqresid = uscmdp->uscsi_rqlen - rqlen;
8442                         uscmdp->uscsi_rqstatus =
8443                             *((char *)&arqstat->sts_rqpkt_status);
8444                         if (uscmdp->uscsi_rqbuf && uscmdp->uscsi_rqlen &&
8445                             rqlen != 0) {
8446                                 bcopy(&(arqstat->sts_sensedata),
8447                                     uscmdp->uscsi_rqbuf, rqlen);
8448                         }
8449                         mdi_pi_kstat_iosupdate(mp_uscmdp->pip, bp);
8450                         VHCI_DEBUG(4, (CE_NOTE, NULL,
8451                             "vhci_uscsi_iodone: ARQ "
8452                             "uscsi_rqstatus=0x%x uscsi_rqresid=%d rqlen: %d "
8453                             "xfer: %d rqpkt_resid: %d\n",
8454                             uscmdp->uscsi_rqstatus, uscmdp->uscsi_rqresid,
8455                             uscmdp->uscsi_rqlen, rqlen,
8456                             arqstat->sts_rqpkt_resid));
8457                 }
8458         } else if (pkt->pkt_flags & FLAG_SENSING) {
8459                 struct buf *rqbp;
8460                 struct scsi_status *rqstatus;
8461 
8462                 rqstatus = (struct scsi_status *)pkt->pkt_scbp;
8463                 /* a manual request sense was done - get the information */
8464                 if (uscmdp->uscsi_flags & USCSI_RQENABLE) {
8465                         int rqlen = SENSE_LENGTH - pkt->pkt_resid;
8466 
8467                         rqbp = mp_uscmdp->rqbp;
8468                         /* get the amount of data to copy into rqbuf */
8469                         rqlen = min(((int)uscmdp->uscsi_rqlen), rqlen);
8470                         uscmdp->uscsi_rqresid = uscmdp->uscsi_rqlen - rqlen;
8471                         uscmdp->uscsi_rqstatus = *((char *)rqstatus);
8472                         if (uscmdp->uscsi_rqlen && uscmdp->uscsi_rqbuf) {
8473                                 bcopy(rqbp->b_un.b_addr, uscmdp->uscsi_rqbuf,
8474                                     rqlen);
8475                         }
8476                         MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_TRANSERR);
8477                         scsi_free_consistent_buf(rqbp);
8478                 }
8479                 VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_uscsi_iodone: FLAG_SENSING"
8480                     "uscsi_rqstatus=0x%x uscsi_rqresid=%d\n",
8481                     uscmdp->uscsi_rqstatus, uscmdp->uscsi_rqresid));
8482         } else {
8483                 struct scsi_status *status =
8484                     (struct scsi_status *)pkt->pkt_scbp;
8485                 /*
8486                  * Command completed and we're not getting sense. Check for
8487                  * errors and decide what to do next.
8488                  */
8489                 VHCI_DEBUG(4, (CE_NOTE, NULL,
8490                     "vhci_uscsi_iodone: command appears complete: reason: %x",
8491                     pkt->pkt_reason));
8492                 if (status->sts_chk) {
8493                         /* need to manually get the request sense */
8494                         if (vhci_uscsi_send_sense(pkt, mp_uscmdp) == 0) {
8495                                 scsi_destroy_pkt(pkt);
8496                                 return;
8497                         }
8498                 } else {
8499                         VHCI_DEBUG(4, (CE_NOTE, NULL,
8500                             "vhci_chk_err: appears complete"));
8501                         err = 0;
8502                         mdi_pi_kstat_iosupdate(mp_uscmdp->pip, bp);
8503                         if (pkt->pkt_resid) {
8504                                 bp->b_resid += pkt->pkt_resid;
8505                         }
8506                 }
8507         }
8508 
8509         if (err) {
8510                 if (bp->b_resid == 0)
8511                         bp->b_resid = bp->b_bcount;
8512                 bioerror(bp, err);
8513                 bp->b_flags |= B_ERROR;
8514         }
8515 
8516         scsi_destroy_pkt(pkt);
8517         biodone(bp);
8518 
8519         VHCI_DEBUG(4, (CE_WARN, NULL, "vhci_uscsi_iodone: exit"));
8520 }
8521 
8522 /*
8523  * start routine for the mpapi uscsi command
8524  */
8525 int
8526 vhci_uscsi_iostart(struct buf *bp)
8527 {
8528         struct scsi_pkt         *pkt;
8529         struct uscsi_cmd        *uscmdp;
8530         mp_uscsi_cmd_t          *mp_uscmdp;
8531         int                     stat_size, rval;
8532         int                     retry = 0;
8533 
8534         ASSERT(bp->b_private != NULL);
8535 
8536         mp_uscmdp = (mp_uscsi_cmd_t *)bp->b_private;
8537         uscmdp = mp_uscmdp->uscmdp;
8538         if (uscmdp->uscsi_flags & USCSI_RQENABLE) {
8539                 stat_size = SENSE_LENGTH;
8540         } else {
8541                 stat_size = 1;
8542         }
8543 
8544         pkt = scsi_init_pkt(mp_uscmdp->ap, NULL, bp, uscmdp->uscsi_cdblen,
8545             stat_size, 0, 0, SLEEP_FUNC, NULL);
8546         if (pkt == NULL) {
8547                 VHCI_DEBUG(4, (CE_NOTE, NULL,
8548                     "vhci_uscsi_iostart: rval: EINVAL"));
8549                 bp->b_resid = bp->b_bcount;
8550                 uscmdp->uscsi_resid = bp->b_bcount;
8551                 bioerror(bp, EINVAL);
8552                 biodone(bp);
8553                 return (EINVAL);
8554         }
8555 
8556         pkt->pkt_time = uscmdp->uscsi_timeout;
8557         bcopy(uscmdp->uscsi_cdb, pkt->pkt_cdbp, (size_t)uscmdp->uscsi_cdblen);
8558         pkt->pkt_comp = vhci_uscsi_iodone;
8559         pkt->pkt_private = mp_uscmdp;
8560         if (uscmdp->uscsi_flags & USCSI_SILENT)
8561                 pkt->pkt_flags |= FLAG_SILENT;
8562         if (uscmdp->uscsi_flags & USCSI_ISOLATE)
8563                 pkt->pkt_flags |= FLAG_ISOLATE;
8564         if (uscmdp->uscsi_flags & USCSI_DIAGNOSE)
8565                 pkt->pkt_flags |= FLAG_DIAGNOSE;
8566         if (uscmdp->uscsi_flags & USCSI_RENEGOT) {
8567                 pkt->pkt_flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
8568         }
8569         VHCI_DEBUG(4, (CE_WARN, NULL,
8570             "vhci_uscsi_iostart: ap: %p pkt: %p pcdbp: %p uscmdp: %p"
8571             " ucdbp: %p pcdblen: %d bp: %p count: %ld pip: %p"
8572             " stat_size: %d",
8573             (void *)mp_uscmdp->ap, (void *)pkt, (void *)pkt->pkt_cdbp,
8574             (void *)uscmdp, (void *)uscmdp->uscsi_cdb, pkt->pkt_cdblen,
8575             (void *)bp, bp->b_bcount, (void *)mp_uscmdp->pip, stat_size));
8576 
8577         /*
8578          * NOTE: This code path is related to MPAPI uscsi(7I), so path
8579          * selection is not based on path_instance.
8580          */
8581         if (scsi_pkt_allocated_correctly(pkt))
8582                 pkt->pkt_path_instance = 0;
8583 
8584         while (((rval = scsi_transport(pkt)) == TRAN_BUSY) &&
8585             retry < vhci_uscsi_retry_count) {
8586                 delay(drv_usectohz(vhci_uscsi_delay));
8587                 retry++;
8588         }
8589         if (retry >= vhci_uscsi_retry_count) {
8590                 VHCI_DEBUG(4, (CE_NOTE, NULL,
8591                     "vhci_uscsi_iostart: tran_busy - retry: %d", retry));
8592         }
8593         switch (rval) {
8594         case TRAN_ACCEPT:
8595                 rval =  0;
8596                 break;
8597 
8598         default:
8599                 VHCI_DEBUG(4, (CE_NOTE, NULL,
8600                     "vhci_uscsi_iostart: rval: %d count: %ld res: %ld",
8601                     rval, bp->b_bcount, bp->b_resid));
8602                 bp->b_resid = bp->b_bcount;
8603                 uscmdp->uscsi_resid = bp->b_bcount;
8604                 bioerror(bp, EIO);
8605                 scsi_destroy_pkt(pkt);
8606                 biodone(bp);
8607                 rval = EIO;
8608                 MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_TRANSERR);
8609                 break;
8610         }
8611         VHCI_DEBUG(4, (CE_NOTE, NULL,
8612             "vhci_uscsi_iostart: exit: rval: %d", rval));
8613         return (rval);
8614 }
8615 
8616 /* ARGSUSED */
8617 static struct scsi_failover_ops *
8618 vhci_dev_fo(dev_info_t *vdip, struct scsi_device *psd,
8619     void **ctprivp, char **fo_namep)
8620 {
8621         struct scsi_failover_ops        *sfo;
8622         char                            *sfo_name;
8623         char                            *override;
8624         struct scsi_failover            *sf;
8625 
8626         ASSERT(psd && psd->sd_inq);
8627         if ((psd == NULL) || (psd->sd_inq == NULL)) {
8628                 VHCI_DEBUG(1, (CE_NOTE, NULL,
8629                     "!vhci_dev_fo:return NULL no scsi_device or inquiry"));
8630                 return (NULL);
8631         }
8632 
8633         /*
8634          * Determine if device is supported under scsi_vhci, and select
8635          * failover module.
8636          *
8637          * See if there is a scsi_vhci.conf file override for this devices's
8638          * VID/PID. The following values can be returned:
8639          *
8640          * NULL         If the NULL is returned then there is no scsi_vhci.conf
8641          *              override.  For NULL, we determine the failover_ops for
8642          *              this device by checking the sfo_device_probe entry
8643          *              point for each 'fops' module, in order.
8644          *
8645          *              NOTE: Correct operation may depend on module ordering
8646          *              of 'specific' (failover modules that are completely
8647          *              VID/PID table based) to 'generic' (failover modules
8648          *              that based on T10 standards like TPGS).  Currently,
8649          *              the value of 'ddi-forceload' in scsi_vhci.conf is used
8650          *              to establish the module list and probe order.
8651          *
8652          * "NONE"       If value "NONE" is returned then there is a
8653          *              scsi_vhci.conf VID/PID override to indicate the device
8654          *              should not be supported under scsi_vhci (even if there
8655          *              is an 'fops' module supporting the device).
8656          *
8657          * "<other>"      If another value is returned then that value is the
8658          *              name of the 'fops' module that should be used.
8659          */
8660         sfo = NULL;     /* "NONE" */
8661         override = scsi_get_device_type_string(
8662             "scsi-vhci-failover-override", vdip, psd);
8663         if (override == NULL) {
8664                 /* NULL: default: select based on sfo_device_probe results */
8665                 for (sf = scsi_failover_table; sf->sf_mod; sf++) {
8666                         if ((sf->sf_sfo == NULL) ||
8667                             sf->sf_sfo->sfo_device_probe(psd, psd->sd_inq,
8668                             ctprivp) == SFO_DEVICE_PROBE_PHCI)
8669                                 continue;
8670 
8671                         /* found failover module, supported under scsi_vhci */
8672                         sfo = sf->sf_sfo;
8673                         if (fo_namep && (*fo_namep == NULL)) {
8674                                 sfo_name = i_ddi_strdup(sfo->sfo_name,
8675                                     KM_SLEEP);
8676                                 *fo_namep = sfo_name;
8677                         }
8678                         break;
8679                 }
8680         } else if (strcasecmp(override, "NONE")) {
8681                 /* !"NONE": select based on driver.conf specified name */
8682                 for (sf = scsi_failover_table, sfo = NULL; sf->sf_mod; sf++) {
8683                         if ((sf->sf_sfo == NULL) ||
8684                             (sf->sf_sfo->sfo_name == NULL) ||
8685                             strcmp(override, sf->sf_sfo->sfo_name))
8686                                 continue;
8687 
8688                         /*
8689                          * NOTE: If sfo_device_probe() has side-effects,
8690                          * including setting *ctprivp, these are not going
8691                          * to occur with override config.
8692                          */
8693 
8694                         /* found failover module, supported under scsi_vhci */
8695                         sfo = sf->sf_sfo;
8696                         if (fo_namep && (*fo_namep == NULL)) {
8697                                 sfo_name = kmem_alloc(strlen("conf ") +
8698                                     strlen(sfo->sfo_name) + 1, KM_SLEEP);
8699                                 (void) sprintf(sfo_name, "conf %s",
8700                                     sfo->sfo_name);
8701                                 *fo_namep = sfo_name;
8702                         }
8703                         break;
8704                 }
8705         }
8706         if (override)
8707                 kmem_free(override, strlen(override) + 1);
8708         return (sfo);
8709 }
8710 
8711 /*
8712  * Determine the device described by cinfo should be enumerated under
8713  * the vHCI or the pHCI - if there is a failover ops then device is
8714  * supported under vHCI.  By agreement with SCSA cinfo is a pointer
8715  * to a scsi_device structure associated with a decorated pHCI probe node.
8716  */
8717 /* ARGSUSED */
8718 int
8719 vhci_is_dev_supported(dev_info_t *vdip, dev_info_t *pdip, void *cinfo)
8720 {
8721         struct scsi_device      *psd = (struct scsi_device *)cinfo;
8722 
8723         return (vhci_dev_fo(vdip, psd, NULL, NULL) ? MDI_SUCCESS : MDI_FAILURE);
8724 }
8725 
8726 
8727 #ifdef DEBUG
8728 extern struct scsi_key_strings scsi_cmds[];
8729 
8730 static char *
8731 vhci_print_scsi_cmd(char cmd)
8732 {
8733         char tmp[64];
8734         char *cpnt;
8735 
8736         cpnt = scsi_cmd_name(cmd, scsi_cmds, tmp);
8737         /* tmp goes out of scope on return and caller sees garbage */
8738         if (cpnt == tmp) {
8739                 cpnt = "Unknown Command";
8740         }
8741         return (cpnt);
8742 }
8743 
8744 extern uchar_t  scsi_cdb_size[];
8745 
8746 static void
8747 vhci_print_cdb(dev_info_t *dip, uint_t level, char *title, uchar_t *cdb)
8748 {
8749         int len = scsi_cdb_size[CDB_GROUPID(cdb[0])];
8750         char buf[256];
8751 
8752         if (level == CE_NOTE) {
8753                 vhci_log(level, dip, "path cmd %s\n",
8754                     vhci_print_scsi_cmd(*cdb));
8755                 return;
8756         }
8757 
8758         (void) sprintf(buf, "%s for cmd(%s)", title, vhci_print_scsi_cmd(*cdb));
8759         vhci_clean_print(dip, level, buf, cdb, len);
8760 }
8761 
8762 static void
8763 vhci_clean_print(dev_info_t *dev, uint_t level, char *title, uchar_t *data,
8764     int len)
8765 {
8766         int     i;
8767         int     c;
8768         char    *format;
8769         char    buf[256];
8770         uchar_t byte;
8771 
8772         (void) sprintf(buf, "%s:\n", title);
8773         vhci_log(level, dev, "%s", buf);
8774         level = CE_CONT;
8775         for (i = 0; i < len; ) {
8776                 buf[0] = 0;
8777                 for (c = 0; c < 8 && i < len; c++, i++) {
8778                         byte = (uchar_t)data[i];
8779                         if (byte < 0x10)
8780                                 format = "0x0%x ";
8781                         else
8782                                 format = "0x%x ";
8783                         (void) sprintf(&buf[(int)strlen(buf)], format, byte);
8784                 }
8785                 (void) sprintf(&buf[(int)strlen(buf)], "\n");
8786 
8787                 vhci_log(level, dev, "%s\n", buf);
8788         }
8789 }
8790 #endif
8791 static void
8792 vhci_invalidate_mpapi_lu(struct scsi_vhci *vhci, scsi_vhci_lun_t *vlun)
8793 {
8794         char                    *svl_wwn;
8795         mpapi_item_list_t       *ilist;
8796         mpapi_lu_data_t         *ld;
8797 
8798         if (vlun == NULL) {
8799                 return;
8800         } else {
8801                 svl_wwn = vlun->svl_lun_wwn;
8802         }
8803 
8804         ilist = vhci->mp_priv->obj_hdr_list[MP_OBJECT_TYPE_MULTIPATH_LU]->head;
8805 
8806         while (ilist != NULL) {
8807                 ld = (mpapi_lu_data_t *)(ilist->item->idata);
8808                 if ((ld != NULL) && (strncmp(ld->prop.name, svl_wwn,
8809                     strlen(svl_wwn)) == 0)) {
8810                         ld->valid = 0;
8811                         VHCI_DEBUG(6, (CE_WARN, NULL,
8812                             "vhci_invalidate_mpapi_lu: "
8813                             "Invalidated LU(%s)", svl_wwn));
8814                         return;
8815                 }
8816                 ilist = ilist->next;
8817         }
8818         VHCI_DEBUG(6, (CE_WARN, NULL, "vhci_invalidate_mpapi_lu: "
8819             "Could not find LU(%s) to invalidate.", svl_wwn));
8820 }