1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2018 Nexenta Systems, Inc.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 */
27
28 /*
29 * Multiplexed I/O SCSI vHCI implementation
30 */
31
32 #include <sys/conf.h>
33 #include <sys/file.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/scsi/scsi.h>
37 #include <sys/scsi/impl/scsi_reset_notify.h>
38 #include <sys/scsi/impl/services.h>
39 #include <sys/sunmdi.h>
40 #include <sys/mdi_impldefs.h>
41 #include <sys/scsi/adapters/scsi_vhci.h>
42 #include <sys/disp.h>
43 #include <sys/byteorder.h>
44
45 extern uintptr_t scsi_callback_id;
46 extern ddi_dma_attr_t scsi_alloc_attr;
47
48 #ifdef DEBUG
49 int vhci_debug = VHCI_DEBUG_DEFAULT_VAL;
50 #endif
51
52 /* retry for the vhci_do_prout command when a not ready is returned */
53 int vhci_prout_not_ready_retry = 180;
54
55 /*
56 * Timeout in seconds for SCSI commands used by vHCI.
57 */
58 int vhci_io_time = 30;
59
60 /*
61 * These values are defined to support the internal retry of
62 * SCSI packets for better sense code handling.
63 */
64 #define VHCI_CMD_CMPLT 0
65 #define VHCI_CMD_RETRY 1
66 #define VHCI_CMD_ERROR -1
67
68 #define PROPFLAGS (DDI_PROP_DONTPASS | DDI_PROP_NOTPROM)
69 #define VHCI_SCSI_PERR 0x47
70 #define VHCI_PGR_ILLEGALOP -2
71 #define VHCI_NUM_UPDATE_TASKQ 8
72 /* changed to 132 to accomodate HDS */
73
74 /*
75 * Version Macros
76 */
77 #define VHCI_NAME_VERSION "SCSI VHCI Driver"
78 char vhci_version_name[] = VHCI_NAME_VERSION;
79
80 int vhci_first_time = 0;
81 clock_t vhci_to_ticks = 0;
82 int vhci_init_wait_timeout = VHCI_INIT_WAIT_TIMEOUT;
83 kcondvar_t vhci_cv;
84 kmutex_t vhci_global_mutex;
85 void *vhci_softstate = NULL; /* for soft state */
86
87 /*
88 * Flag to delay the retry of the reserve command
89 */
90 int vhci_reserve_delay = 100000;
91 static int vhci_path_quiesce_timeout = 60;
92 static uchar_t zero_key[MHIOC_RESV_KEY_SIZE];
93
94 /* uscsi delay for a TRAN_BUSY */
95 static int vhci_uscsi_delay = 100000;
96 static int vhci_uscsi_retry_count = 180;
97 /* uscsi_restart_sense timeout id in case it needs to get canceled */
98 static timeout_id_t vhci_restart_timeid = 0;
99
100 static int vhci_bus_config_debug = 0;
101
102 /*
103 * Bidirectional map of 'target-port' to port id <pid> for support of
104 * iostat(1M) '-Xx' and '-Yx' output.
105 */
106 static kmutex_t vhci_targetmap_mutex;
107 static uint_t vhci_targetmap_pid = 1;
108 static mod_hash_t *vhci_targetmap_bypid; /* <pid> -> 'target-port' */
109 static mod_hash_t *vhci_targetmap_byport; /* 'target-port' -> <pid> */
110
111 /*
112 * functions exported by scsi_vhci struct cb_ops
113 */
114 static int vhci_open(dev_t *, int, int, cred_t *);
115 static int vhci_close(dev_t, int, int, cred_t *);
116 static int vhci_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
117
118 /*
119 * functions exported by scsi_vhci struct dev_ops
120 */
121 static int vhci_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
122 static int vhci_attach(dev_info_t *, ddi_attach_cmd_t);
123 static int vhci_detach(dev_info_t *, ddi_detach_cmd_t);
124
125 /*
126 * functions exported by scsi_vhci scsi_hba_tran_t transport table
127 */
128 static int vhci_scsi_tgt_init(dev_info_t *, dev_info_t *,
129 scsi_hba_tran_t *, struct scsi_device *);
130 static void vhci_scsi_tgt_free(dev_info_t *, dev_info_t *, scsi_hba_tran_t *,
131 struct scsi_device *);
132 static int vhci_pgr_register_start(scsi_vhci_lun_t *, struct scsi_pkt *);
133 static int vhci_scsi_start(struct scsi_address *, struct scsi_pkt *);
134 static int vhci_scsi_abort(struct scsi_address *, struct scsi_pkt *);
135 static int vhci_scsi_reset(struct scsi_address *, int);
136 static int vhci_scsi_reset_target(struct scsi_address *, int level,
137 uint8_t select_path);
138 static int vhci_scsi_reset_bus(struct scsi_address *);
139 static int vhci_scsi_reset_all_paths(struct scsi_address *);
140 static int vhci_scsi_getcap(struct scsi_address *, char *, int);
141 static int vhci_scsi_setcap(struct scsi_address *, char *, int, int);
142 static int vhci_commoncap(struct scsi_address *, char *, int, int, int);
143 static int vhci_pHCI_cap(struct scsi_address *ap, char *cap, int val, int whom,
144 mdi_pathinfo_t *pip);
145 static struct scsi_pkt *vhci_scsi_init_pkt(struct scsi_address *,
146 struct scsi_pkt *, struct buf *, int, int, int, int, int (*)(), caddr_t);
147 static void vhci_scsi_destroy_pkt(struct scsi_address *, struct scsi_pkt *);
148 static void vhci_scsi_dmafree(struct scsi_address *, struct scsi_pkt *);
149 static void vhci_scsi_sync_pkt(struct scsi_address *, struct scsi_pkt *);
150 static int vhci_scsi_reset_notify(struct scsi_address *, int, void (*)(caddr_t),
151 caddr_t);
152 static int vhci_scsi_get_bus_addr(struct scsi_device *, char *, int);
153 static int vhci_scsi_get_name(struct scsi_device *, char *, int);
154 static int vhci_scsi_bus_power(dev_info_t *, void *, pm_bus_power_op_t,
155 void *, void *);
156 static int vhci_scsi_bus_config(dev_info_t *, uint_t, ddi_bus_config_op_t,
157 void *, dev_info_t **);
158 static int vhci_scsi_bus_unconfig(dev_info_t *, uint_t, ddi_bus_config_op_t,
159 void *);
160 static struct scsi_failover_ops *vhci_dev_fo(dev_info_t *, struct scsi_device *,
161 void **, char **);
162
163 /*
164 * functions registered with the mpxio framework via mdi_vhci_ops_t
165 */
166 static int vhci_pathinfo_init(dev_info_t *, mdi_pathinfo_t *, int);
167 static int vhci_pathinfo_uninit(dev_info_t *, mdi_pathinfo_t *, int);
168 static int vhci_pathinfo_state_change(dev_info_t *, mdi_pathinfo_t *,
169 mdi_pathinfo_state_t, uint32_t, int);
170 static int vhci_pathinfo_online(dev_info_t *, mdi_pathinfo_t *, int);
171 static int vhci_pathinfo_offline(dev_info_t *, mdi_pathinfo_t *, int);
172 static int vhci_failover(dev_info_t *, dev_info_t *, int);
173 static void vhci_client_attached(dev_info_t *);
174 static int vhci_is_dev_supported(dev_info_t *, dev_info_t *, void *);
175
176 static int vhci_ctl(dev_t, int, intptr_t, int, cred_t *, int *);
177 static int vhci_devctl(dev_t, int, intptr_t, int, cred_t *, int *);
178 static int vhci_ioc_get_phci_path(sv_iocdata_t *, caddr_t, int, caddr_t);
179 static int vhci_ioc_get_client_path(sv_iocdata_t *, caddr_t, int, caddr_t);
180 static int vhci_ioc_get_paddr(sv_iocdata_t *, caddr_t, int, caddr_t);
181 static int vhci_ioc_send_client_path(caddr_t, sv_iocdata_t *, int, caddr_t);
182 static void vhci_ioc_devi_to_path(dev_info_t *, caddr_t);
183 static int vhci_get_phci_path_list(dev_info_t *, sv_path_info_t *, uint_t);
184 static int vhci_get_client_path_list(dev_info_t *, sv_path_info_t *, uint_t);
185 static int vhci_get_iocdata(const void *, sv_iocdata_t *, int, caddr_t);
186 static int vhci_get_iocswitchdata(const void *, sv_switch_to_cntlr_iocdata_t *,
187 int, caddr_t);
188 static int vhci_ioc_alloc_pathinfo(sv_path_info_t **, sv_path_info_t **,
189 uint_t, sv_iocdata_t *, int, caddr_t);
190 static void vhci_ioc_free_pathinfo(sv_path_info_t *, sv_path_info_t *, uint_t);
191 static int vhci_ioc_send_pathinfo(sv_path_info_t *, sv_path_info_t *, uint_t,
192 sv_iocdata_t *, int, caddr_t);
193 static int vhci_handle_ext_fo(struct scsi_pkt *, int);
194 static int vhci_efo_watch_cb(caddr_t, struct scsi_watch_result *);
195 static int vhci_quiesce_lun(struct scsi_vhci_lun *);
196 static int vhci_pgr_validate_and_register(scsi_vhci_priv_t *);
197 static void vhci_dispatch_scsi_start(void *);
198 static void vhci_efo_done(void *);
199 static void vhci_initiate_auto_failback(void *);
200 static void vhci_update_pHCI_pkt(struct vhci_pkt *, struct scsi_pkt *);
201 static int vhci_update_pathinfo(struct scsi_device *, mdi_pathinfo_t *,
202 struct scsi_failover_ops *, scsi_vhci_lun_t *, struct scsi_vhci *);
203 static void vhci_kstat_create_pathinfo(mdi_pathinfo_t *);
204 static int vhci_quiesce_paths(dev_info_t *, dev_info_t *,
205 scsi_vhci_lun_t *, char *, char *);
206
207 static char *vhci_devnm_to_guid(char *);
208 static int vhci_bind_transport(struct scsi_address *, struct vhci_pkt *,
209 int, int (*func)(caddr_t));
210 static void vhci_intr(struct scsi_pkt *);
211 static int vhci_do_prout(scsi_vhci_priv_t *);
212 static void vhci_run_cmd(void *);
213 static int vhci_do_prin(struct vhci_pkt **);
214 static struct scsi_pkt *vhci_create_retry_pkt(struct vhci_pkt *);
215 static struct vhci_pkt *vhci_sync_retry_pkt(struct vhci_pkt *);
216 static struct scsi_vhci_lun *vhci_lun_lookup(dev_info_t *);
217 static struct scsi_vhci_lun *vhci_lun_lookup_alloc(dev_info_t *, char *, int *);
218 static void vhci_lun_free(struct scsi_vhci_lun *dvlp, struct scsi_device *sd);
219 static int vhci_recovery_reset(scsi_vhci_lun_t *, struct scsi_address *,
220 uint8_t, uint8_t);
221 void vhci_update_pathstates(void *);
222
223 #ifdef DEBUG
224 static void vhci_print_prin_keys(vhci_prin_readkeys_t *, int);
225 static void vhci_print_cdb(dev_info_t *dip, uint_t level,
226 char *title, uchar_t *cdb);
227 static void vhci_clean_print(dev_info_t *dev, uint_t level,
228 char *title, uchar_t *data, int len);
229 #endif
230 static void vhci_print_prout_keys(scsi_vhci_lun_t *, char *);
231 static void vhci_uscsi_iodone(struct scsi_pkt *pkt);
232 static void vhci_invalidate_mpapi_lu(struct scsi_vhci *, scsi_vhci_lun_t *);
233
234 /*
235 * MP-API related functions
236 */
237 extern int vhci_mpapi_init(struct scsi_vhci *);
238 extern void vhci_mpapi_add_dev_prod(struct scsi_vhci *, char *);
239 extern int vhci_mpapi_ctl(dev_t, int, intptr_t, int, cred_t *, int *);
240 extern void vhci_update_mpapi_data(struct scsi_vhci *,
241 scsi_vhci_lun_t *, mdi_pathinfo_t *);
242 extern void* vhci_get_mpapi_item(struct scsi_vhci *, mpapi_list_header_t *,
243 uint8_t, void*);
244 extern void vhci_mpapi_set_path_state(dev_info_t *, mdi_pathinfo_t *, int);
245 extern int vhci_mpapi_update_tpg_acc_state_for_lu(struct scsi_vhci *,
246 scsi_vhci_lun_t *);
247
248 #define VHCI_DMA_MAX_XFER_CAP INT_MAX
249
250 #define VHCI_MAX_PGR_RETRIES 3
251
252 /*
253 * Macros for the device-type mpxio options
254 */
255 #define LOAD_BALANCE_OPTIONS "load-balance-options"
256 #define LOGICAL_BLOCK_REGION_SIZE "region-size"
257 #define MPXIO_OPTIONS_LIST "device-type-mpxio-options-list"
258 #define DEVICE_TYPE_STR "device-type"
259 #define isdigit(ch) ((ch) >= '0' && (ch) <= '9')
260
261 static struct cb_ops vhci_cb_ops = {
262 vhci_open, /* open */
263 vhci_close, /* close */
264 nodev, /* strategy */
265 nodev, /* print */
266 nodev, /* dump */
267 nodev, /* read */
268 nodev, /* write */
269 vhci_ioctl, /* ioctl */
270 nodev, /* devmap */
271 nodev, /* mmap */
272 nodev, /* segmap */
273 nochpoll, /* chpoll */
274 ddi_prop_op, /* cb_prop_op */
275 0, /* streamtab */
276 D_NEW | D_MP, /* cb_flag */
277 CB_REV, /* rev */
278 nodev, /* aread */
279 nodev /* awrite */
280 };
281
282 static struct dev_ops vhci_ops = {
283 DEVO_REV,
284 0,
285 vhci_getinfo,
286 nulldev, /* identify */
287 nulldev, /* probe */
288 vhci_attach, /* attach and detach are mandatory */
289 vhci_detach,
290 nodev, /* reset */
291 &vhci_cb_ops, /* cb_ops */
292 NULL, /* bus_ops */
293 NULL, /* power */
294 ddi_quiesce_not_needed, /* quiesce */
295 };
296
297 extern struct mod_ops mod_driverops;
298
299 static struct modldrv modldrv = {
300 &mod_driverops,
301 vhci_version_name, /* module name */
302 &vhci_ops
303 };
304
305 static struct modlinkage modlinkage = {
306 MODREV_1,
307 &modldrv,
308 NULL
309 };
310
311 static mdi_vhci_ops_t vhci_opinfo = {
312 MDI_VHCI_OPS_REV,
313 vhci_pathinfo_init, /* Pathinfo node init callback */
314 vhci_pathinfo_uninit, /* Pathinfo uninit callback */
315 vhci_pathinfo_state_change, /* Pathinfo node state change */
316 vhci_failover, /* failover callback */
317 vhci_client_attached, /* client attached callback */
318 vhci_is_dev_supported /* is device supported by mdi */
319 };
320
321 /*
322 * The scsi_failover table defines an ordered set of 'fops' modules supported
323 * by scsi_vhci. Currently, initialize this table from the 'ddi-forceload'
324 * property specified in scsi_vhci.conf.
325 */
326 static struct scsi_failover {
327 ddi_modhandle_t sf_mod;
328 struct scsi_failover_ops *sf_sfo;
329 } *scsi_failover_table;
330 static uint_t scsi_nfailover;
331
332 int
333 _init(void)
334 {
335 int rval;
336
337 /*
338 * Allocate soft state and prepare to do ddi_soft_state_zalloc()
339 * before registering with the transport first.
340 */
341 if ((rval = ddi_soft_state_init(&vhci_softstate,
342 sizeof (struct scsi_vhci), 1)) != 0) {
343 VHCI_DEBUG(1, (CE_NOTE, NULL,
344 "!_init:soft state init failed\n"));
345 return (rval);
346 }
347
348 if ((rval = scsi_hba_init(&modlinkage)) != 0) {
349 VHCI_DEBUG(1, (CE_NOTE, NULL,
350 "!_init: scsi hba init failed\n"));
351 ddi_soft_state_fini(&vhci_softstate);
352 return (rval);
353 }
354
355 mutex_init(&vhci_global_mutex, NULL, MUTEX_DRIVER, NULL);
356 cv_init(&vhci_cv, NULL, CV_DRIVER, NULL);
357
358 mutex_init(&vhci_targetmap_mutex, NULL, MUTEX_DRIVER, NULL);
359 vhci_targetmap_byport = mod_hash_create_strhash(
360 "vhci_targetmap_byport", 256, mod_hash_null_valdtor);
361 vhci_targetmap_bypid = mod_hash_create_idhash(
362 "vhci_targetmap_bypid", 256, mod_hash_null_valdtor);
363
364 if ((rval = mod_install(&modlinkage)) != 0) {
365 VHCI_DEBUG(1, (CE_NOTE, NULL, "!_init: mod_install failed\n"));
366 if (vhci_targetmap_bypid)
367 mod_hash_destroy_idhash(vhci_targetmap_bypid);
368 if (vhci_targetmap_byport)
369 mod_hash_destroy_strhash(vhci_targetmap_byport);
370 mutex_destroy(&vhci_targetmap_mutex);
371 cv_destroy(&vhci_cv);
372 mutex_destroy(&vhci_global_mutex);
373 scsi_hba_fini(&modlinkage);
374 ddi_soft_state_fini(&vhci_softstate);
375 }
376 return (rval);
377 }
378
379
380 /*
381 * the system is done with us as a driver, so clean up
382 */
383 int
384 _fini(void)
385 {
386 int rval;
387
388 /*
389 * don't start cleaning up until we know that the module remove
390 * has worked -- if this works, then we know that each instance
391 * has successfully been DDI_DETACHed
392 */
393 if ((rval = mod_remove(&modlinkage)) != 0) {
394 VHCI_DEBUG(4, (CE_NOTE, NULL, "!_fini: mod_remove failed\n"));
395 return (rval);
396 }
397
398 if (vhci_targetmap_bypid)
399 mod_hash_destroy_idhash(vhci_targetmap_bypid);
400 if (vhci_targetmap_byport)
401 mod_hash_destroy_strhash(vhci_targetmap_byport);
402 mutex_destroy(&vhci_targetmap_mutex);
403 cv_destroy(&vhci_cv);
404 mutex_destroy(&vhci_global_mutex);
405 scsi_hba_fini(&modlinkage);
406 ddi_soft_state_fini(&vhci_softstate);
407
408 return (rval);
409 }
410
411 int
412 _info(struct modinfo *modinfop)
413 {
414 return (mod_info(&modlinkage, modinfop));
415 }
416
417 /*
418 * Lookup scsi_failover by "short name" of failover module.
419 */
420 struct scsi_failover_ops *
421 vhci_failover_ops_by_name(char *name)
422 {
423 struct scsi_failover *sf;
424
425 for (sf = scsi_failover_table; sf->sf_mod; sf++) {
426 if (sf->sf_sfo == NULL)
427 continue;
428 if (strcmp(sf->sf_sfo->sfo_name, name) == 0)
429 return (sf->sf_sfo);
430 }
431 return (NULL);
432 }
433
434 /*
435 * Load all scsi_failover_ops 'fops' modules.
436 */
437 static void
438 vhci_failover_modopen(struct scsi_vhci *vhci)
439 {
440 char **module;
441 int i;
442 struct scsi_failover *sf;
443 char **dt;
444 int e;
445
446 if (scsi_failover_table)
447 return;
448
449 /* Get the list of modules from scsi_vhci.conf */
450 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY,
451 vhci->vhci_dip, DDI_PROP_DONTPASS, "ddi-forceload",
452 &module, &scsi_nfailover) != DDI_PROP_SUCCESS) {
453 cmn_err(CE_WARN, "scsi_vhci: "
454 "scsi_vhci.conf is missing 'ddi-forceload'");
455 return;
456 }
457 if (scsi_nfailover == 0) {
458 cmn_err(CE_WARN, "scsi_vhci: "
459 "scsi_vhci.conf has empty 'ddi-forceload'");
460 ddi_prop_free(module);
461 return;
462 }
463
464 /* allocate failover table based on number of modules */
465 scsi_failover_table = (struct scsi_failover *)
466 kmem_zalloc(sizeof (struct scsi_failover) * (scsi_nfailover + 1),
467 KM_SLEEP);
468
469 /* loop over modules specified in scsi_vhci.conf and open each module */
470 for (i = 0, sf = scsi_failover_table; i < scsi_nfailover; i++) {
471 if (module[i] == NULL)
472 continue;
473
474 sf->sf_mod = ddi_modopen(module[i], KRTLD_MODE_FIRST, &e);
475 if (sf->sf_mod == NULL) {
476 /*
477 * A module returns EEXIST if other software is
478 * supporting the intended function: for example
479 * the scsi_vhci_f_sum_emc module returns EEXIST
480 * from _init if EMC powerpath software is installed.
481 */
482 if (e != EEXIST)
483 cmn_err(CE_WARN, "scsi_vhci: unable to open "
484 "module '%s', error %d", module[i], e);
485 continue;
486 }
487 sf->sf_sfo = ddi_modsym(sf->sf_mod,
488 "scsi_vhci_failover_ops", &e);
489 if (sf->sf_sfo == NULL) {
490 cmn_err(CE_WARN, "scsi_vhci: "
491 "unable to import 'scsi_failover_ops' from '%s', "
492 "error %d", module[i], e);
493 (void) ddi_modclose(sf->sf_mod);
494 sf->sf_mod = NULL;
495 continue;
496 }
497
498 /* register vid/pid of devices supported with mpapi */
499 for (dt = sf->sf_sfo->sfo_devices; *dt; dt++)
500 vhci_mpapi_add_dev_prod(vhci, *dt);
501 sf++;
502 }
503
504 /* verify that at least the "well-known" modules were there */
505 if (vhci_failover_ops_by_name(SFO_NAME_SYM) == NULL)
506 cmn_err(CE_WARN, "scsi_vhci: well-known module \""
507 SFO_NAME_SYM "\" not defined in scsi_vhci.conf's "
508 "'ddi-forceload'");
509 if (vhci_failover_ops_by_name(SFO_NAME_TPGS) == NULL)
510 cmn_err(CE_WARN, "scsi_vhci: well-known module \""
511 SFO_NAME_TPGS "\" not defined in scsi_vhci.conf's "
512 "'ddi-forceload'");
513
514 /* call sfo_init for modules that need it */
515 for (sf = scsi_failover_table; sf->sf_mod; sf++) {
516 if (sf->sf_sfo && sf->sf_sfo->sfo_init)
517 sf->sf_sfo->sfo_init();
518 }
519
520 ddi_prop_free(module);
521 }
522
523 /*
524 * unload all loaded scsi_failover_ops modules
525 */
526 static void
527 vhci_failover_modclose()
528 {
529 struct scsi_failover *sf;
530
531 for (sf = scsi_failover_table; sf->sf_mod; sf++) {
532 if ((sf->sf_mod == NULL) || (sf->sf_sfo == NULL))
533 continue;
534 (void) ddi_modclose(sf->sf_mod);
535 sf->sf_mod = NULL;
536 sf->sf_sfo = NULL;
537 }
538
539 if (scsi_failover_table && scsi_nfailover)
540 kmem_free(scsi_failover_table,
541 sizeof (struct scsi_failover) * (scsi_nfailover + 1));
542 scsi_failover_table = NULL;
543 scsi_nfailover = 0;
544 }
545
546 /* ARGSUSED */
547 static int
548 vhci_open(dev_t *devp, int flag, int otype, cred_t *credp)
549 {
550 struct scsi_vhci *vhci;
551
552 if (otype != OTYP_CHR) {
553 return (EINVAL);
554 }
555
556 vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(*devp)));
557 if (vhci == NULL) {
558 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_open: failed ENXIO\n"));
559 return (ENXIO);
560 }
561
562 mutex_enter(&vhci->vhci_mutex);
563 if ((flag & FEXCL) && (vhci->vhci_state & VHCI_STATE_OPEN)) {
564 mutex_exit(&vhci->vhci_mutex);
565 vhci_log(CE_NOTE, vhci->vhci_dip,
566 "!vhci%d: Already open\n", getminor(*devp));
567 return (EBUSY);
568 }
569
570 vhci->vhci_state |= VHCI_STATE_OPEN;
571 mutex_exit(&vhci->vhci_mutex);
572 return (0);
573 }
574
575
576 /* ARGSUSED */
577 static int
578 vhci_close(dev_t dev, int flag, int otype, cred_t *credp)
579 {
580 struct scsi_vhci *vhci;
581
582 if (otype != OTYP_CHR) {
583 return (EINVAL);
584 }
585
586 vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(dev)));
587 if (vhci == NULL) {
588 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_close: failed ENXIO\n"));
589 return (ENXIO);
590 }
591
592 mutex_enter(&vhci->vhci_mutex);
593 vhci->vhci_state &= ~VHCI_STATE_OPEN;
594 mutex_exit(&vhci->vhci_mutex);
595
596 return (0);
597 }
598
599 /* ARGSUSED */
600 static int
601 vhci_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
602 cred_t *credp, int *rval)
603 {
604 if (IS_DEVCTL(cmd)) {
605 return (vhci_devctl(dev, cmd, data, mode, credp, rval));
606 } else if (cmd == MP_CMD) {
607 return (vhci_mpapi_ctl(dev, cmd, data, mode, credp, rval));
608 } else {
609 return (vhci_ctl(dev, cmd, data, mode, credp, rval));
610 }
611 }
612
613 /*
614 * attach the module
615 */
616 static int
617 vhci_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
618 {
619 int rval = DDI_FAILURE;
620 int scsi_hba_attached = 0;
621 int vhci_attached = 0;
622 int mutex_initted = 0;
623 int instance;
624 struct scsi_vhci *vhci;
625 scsi_hba_tran_t *tran;
626 char cache_name_buf[64];
627 char *data;
628
629 VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_attach: cmd=0x%x\n", cmd));
630
631 instance = ddi_get_instance(dip);
632
633 switch (cmd) {
634 case DDI_ATTACH:
635 break;
636
637 case DDI_RESUME:
638 case DDI_PM_RESUME:
639 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_attach: resume not yet"
640 "implemented\n"));
641 return (rval);
642
643 default:
644 VHCI_DEBUG(1, (CE_NOTE, NULL,
645 "!vhci_attach: unknown ddi command\n"));
646 return (rval);
647 }
648
649 /*
650 * Allocate vhci data structure.
651 */
652 if (ddi_soft_state_zalloc(vhci_softstate, instance) != DDI_SUCCESS) {
653 VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
654 "soft state alloc failed\n"));
655 return (DDI_FAILURE);
656 }
657
658 if ((vhci = ddi_get_soft_state(vhci_softstate, instance)) == NULL) {
659 VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
660 "bad soft state\n"));
661 ddi_soft_state_free(vhci_softstate, instance);
662 return (DDI_FAILURE);
663 }
664
665 /* Allocate packet cache */
666 (void) snprintf(cache_name_buf, sizeof (cache_name_buf),
667 "vhci%d_cache", instance);
668
669 mutex_init(&vhci->vhci_mutex, NULL, MUTEX_DRIVER, NULL);
670 mutex_initted++;
671
672 /*
673 * Allocate a transport structure
674 */
675 tran = scsi_hba_tran_alloc(dip, SCSI_HBA_CANSLEEP);
676 ASSERT(tran != NULL);
677
678 vhci->vhci_tran = tran;
679 vhci->vhci_dip = dip;
680 vhci->vhci_instance = instance;
681
682 tran->tran_hba_private = vhci;
683 tran->tran_tgt_init = vhci_scsi_tgt_init;
684 tran->tran_tgt_probe = NULL;
685 tran->tran_tgt_free = vhci_scsi_tgt_free;
686
687 tran->tran_start = vhci_scsi_start;
688 tran->tran_abort = vhci_scsi_abort;
689 tran->tran_reset = vhci_scsi_reset;
690 tran->tran_getcap = vhci_scsi_getcap;
691 tran->tran_setcap = vhci_scsi_setcap;
692 tran->tran_init_pkt = vhci_scsi_init_pkt;
693 tran->tran_destroy_pkt = vhci_scsi_destroy_pkt;
694 tran->tran_dmafree = vhci_scsi_dmafree;
695 tran->tran_sync_pkt = vhci_scsi_sync_pkt;
696 tran->tran_reset_notify = vhci_scsi_reset_notify;
697
698 tran->tran_get_bus_addr = vhci_scsi_get_bus_addr;
699 tran->tran_get_name = vhci_scsi_get_name;
700 tran->tran_bus_reset = NULL;
701 tran->tran_quiesce = NULL;
702 tran->tran_unquiesce = NULL;
703
704 /*
705 * register event notification routines with scsa
706 */
707 tran->tran_get_eventcookie = NULL;
708 tran->tran_add_eventcall = NULL;
709 tran->tran_remove_eventcall = NULL;
710 tran->tran_post_event = NULL;
711
712 tran->tran_bus_power = vhci_scsi_bus_power;
713
714 tran->tran_bus_config = vhci_scsi_bus_config;
715 tran->tran_bus_unconfig = vhci_scsi_bus_unconfig;
716
717 /*
718 * Attach this instance with the mpxio framework
719 */
720 if (mdi_vhci_register(MDI_HCI_CLASS_SCSI, dip, &vhci_opinfo, 0)
721 != MDI_SUCCESS) {
722 VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
723 "mdi_vhci_register failed\n"));
724 goto attach_fail;
725 }
726 vhci_attached++;
727
728 /*
729 * Attach this instance of the hba.
730 *
731 * Regarding dma attributes: Since scsi_vhci is a virtual scsi HBA
732 * driver, it has nothing to do with DMA. However, when calling
733 * scsi_hba_attach_setup() we need to pass something valid in the
734 * dma attributes parameter. So we just use scsi_alloc_attr.
735 * SCSA itself seems to care only for dma_attr_minxfer and
736 * dma_attr_burstsizes fields of dma attributes structure.
737 * It expects those fileds to be non-zero.
738 */
739 if (scsi_hba_attach_setup(dip, &scsi_alloc_attr, tran,
740 SCSI_HBA_ADDR_COMPLEX) != DDI_SUCCESS) {
741 VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
742 "hba attach failed\n"));
743 goto attach_fail;
744 }
745 scsi_hba_attached++;
746
747 if (ddi_create_minor_node(dip, "devctl", S_IFCHR,
748 INST2DEVCTL(instance), DDI_NT_SCSI_NEXUS, 0) != DDI_SUCCESS) {
749 VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
750 " ddi_create_minor_node failed\n"));
751 goto attach_fail;
752 }
753
754 /*
755 * Set pm-want-child-notification property for
756 * power management of the phci and client
757 */
758 if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
759 "pm-want-child-notification?", NULL, NULL) != DDI_PROP_SUCCESS) {
760 cmn_err(CE_WARN,
761 "%s%d fail to create pm-want-child-notification? prop",
762 ddi_driver_name(dip), ddi_get_instance(dip));
763 goto attach_fail;
764 }
765
766 vhci->vhci_taskq = taskq_create("vhci_taskq", 1, MINCLSYSPRI, 1, 4, 0);
767 vhci->vhci_update_pathstates_taskq =
768 taskq_create("vhci_update_pathstates", VHCI_NUM_UPDATE_TASKQ,
769 MINCLSYSPRI, 1, 4, 0);
770 ASSERT(vhci->vhci_taskq);
771 ASSERT(vhci->vhci_update_pathstates_taskq);
772
773 /*
774 * Set appropriate configuration flags based on options set in
775 * conf file.
776 */
777 vhci->vhci_conf_flags = 0;
778 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, PROPFLAGS,
779 "auto-failback", &data) == DDI_SUCCESS) {
780 if (strcmp(data, "enable") == 0)
781 vhci->vhci_conf_flags |= VHCI_CONF_FLAGS_AUTO_FAILBACK;
782 ddi_prop_free(data);
783 }
784
785 if (!(vhci->vhci_conf_flags & VHCI_CONF_FLAGS_AUTO_FAILBACK))
786 vhci_log(CE_NOTE, dip, "!Auto-failback capability "
787 "disabled through scsi_vhci.conf file.");
788
789 /*
790 * Allocate an mpapi private structure
791 */
792 vhci->mp_priv = kmem_zalloc(sizeof (mpapi_priv_t), KM_SLEEP);
793 if (vhci_mpapi_init(vhci) != 0) {
794 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_attach: "
795 "vhci_mpapi_init() failed"));
796 }
797
798 vhci_failover_modopen(vhci); /* load failover modules */
799
800 ddi_report_dev(dip);
801 return (DDI_SUCCESS);
802
803 attach_fail:
804 if (vhci_attached)
805 (void) mdi_vhci_unregister(dip, 0);
806
807 if (scsi_hba_attached)
808 (void) scsi_hba_detach(dip);
809
810 if (vhci->vhci_tran)
811 scsi_hba_tran_free(vhci->vhci_tran);
812
813 if (mutex_initted) {
814 mutex_destroy(&vhci->vhci_mutex);
815 }
816
817 ddi_soft_state_free(vhci_softstate, instance);
818 return (DDI_FAILURE);
819 }
820
821
822 /*ARGSUSED*/
823 static int
824 vhci_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
825 {
826 int instance = ddi_get_instance(dip);
827 scsi_hba_tran_t *tran;
828 struct scsi_vhci *vhci;
829
830 VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_detach: cmd=0x%x\n", cmd));
831
832 if ((tran = ddi_get_driver_private(dip)) == NULL)
833 return (DDI_FAILURE);
834
835 vhci = TRAN2HBAPRIVATE(tran);
836 if (!vhci) {
837 return (DDI_FAILURE);
838 }
839
840 switch (cmd) {
841 case DDI_DETACH:
842 break;
843
844 case DDI_SUSPEND:
845 case DDI_PM_SUSPEND:
846 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_detach: suspend/pm not yet"
847 "implemented\n"));
848 return (DDI_FAILURE);
849
850 default:
851 VHCI_DEBUG(1, (CE_NOTE, NULL,
852 "!vhci_detach: unknown ddi command\n"));
853 return (DDI_FAILURE);
854 }
855
856 (void) mdi_vhci_unregister(dip, 0);
857 (void) scsi_hba_detach(dip);
858 scsi_hba_tran_free(tran);
859
860 if (ddi_prop_remove(DDI_DEV_T_NONE, dip,
861 "pm-want-child-notification?") != DDI_PROP_SUCCESS) {
862 cmn_err(CE_WARN,
863 "%s%d unable to remove prop pm-want_child_notification?",
864 ddi_driver_name(dip), ddi_get_instance(dip));
865 }
866 if (vhci_restart_timeid != 0) {
867 (void) untimeout(vhci_restart_timeid);
868 }
869 vhci_restart_timeid = 0;
870
871 mutex_destroy(&vhci->vhci_mutex);
872 vhci->vhci_dip = NULL;
873 vhci->vhci_tran = NULL;
874 taskq_destroy(vhci->vhci_taskq);
875 taskq_destroy(vhci->vhci_update_pathstates_taskq);
876 ddi_remove_minor_node(dip, NULL);
877 ddi_soft_state_free(vhci_softstate, instance);
878
879 vhci_failover_modclose(); /* unload failover modules */
880 return (DDI_SUCCESS);
881 }
882
883 /*
884 * vhci_getinfo()
885 * Given the device number, return the devinfo pointer or the
886 * instance number.
887 * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach.
888 */
889
890 /*ARGSUSED*/
891 static int
892 vhci_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
893 {
894 struct scsi_vhci *vhcip;
895 int instance = MINOR2INST(getminor((dev_t)arg));
896
897 switch (cmd) {
898 case DDI_INFO_DEVT2DEVINFO:
899 vhcip = ddi_get_soft_state(vhci_softstate, instance);
900 if (vhcip != NULL)
901 *result = vhcip->vhci_dip;
902 else {
903 *result = NULL;
904 return (DDI_FAILURE);
905 }
906 break;
907
908 case DDI_INFO_DEVT2INSTANCE:
909 *result = (void *)(uintptr_t)instance;
910 break;
911
912 default:
913 return (DDI_FAILURE);
914 }
915
916 return (DDI_SUCCESS);
917 }
918
919 /*ARGSUSED*/
920 static int
921 vhci_scsi_tgt_init(dev_info_t *hba_dip, dev_info_t *tgt_dip,
922 scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
923 {
924 char *guid;
925 scsi_vhci_lun_t *vlun;
926 struct scsi_vhci *vhci;
927 clock_t from_ticks;
928 mdi_pathinfo_t *pip;
929 int rval;
930
931 ASSERT(hba_dip != NULL);
932 ASSERT(tgt_dip != NULL);
933
934 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip, PROPFLAGS,
935 MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
936 /*
937 * This must be the .conf node without GUID property.
938 * The node under fp already inserts a delay, so we
939 * just return from here. We rely on this delay to have
940 * all dips be posted to the ndi hotplug thread's newdev
941 * list. This is necessary for the deferred attach
942 * mechanism to work and opens() done soon after boot to
943 * succeed.
944 */
945 VHCI_DEBUG(4, (CE_WARN, hba_dip, "tgt_init: lun guid "
946 "property failed"));
947 return (DDI_NOT_WELL_FORMED);
948 }
949
950 if (ndi_dev_is_persistent_node(tgt_dip) == 0) {
951 /*
952 * This must be .conf node with the GUID property. We don't
953 * merge property by ndi_merge_node() here because the
954 * devi_addr_buf of .conf node is "" always according the
955 * implementation of vhci_scsi_get_name_bus_addr().
956 */
957 ddi_set_name_addr(tgt_dip, NULL);
958 return (DDI_FAILURE);
959 }
960
961 vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(hba_dip));
962 ASSERT(vhci != NULL);
963
964 VHCI_DEBUG(4, (CE_NOTE, hba_dip,
965 "!tgt_init: called for %s (instance %d)\n",
966 ddi_driver_name(tgt_dip), ddi_get_instance(tgt_dip)));
967
968 vlun = vhci_lun_lookup(tgt_dip);
969
970 mutex_enter(&vhci_global_mutex);
971
972 from_ticks = ddi_get_lbolt();
973 if (vhci_to_ticks == 0) {
974 vhci_to_ticks = from_ticks +
975 drv_usectohz(vhci_init_wait_timeout);
976 }
977
978 #if DEBUG
979 if (vlun) {
980 VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
981 "vhci_scsi_tgt_init: guid %s : found vlun 0x%p "
982 "from_ticks %lx to_ticks %lx",
983 guid, (void *)vlun, from_ticks, vhci_to_ticks));
984 } else {
985 VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
986 "vhci_scsi_tgt_init: guid %s : vlun not found "
987 "from_ticks %lx to_ticks %lx", guid, from_ticks,
988 vhci_to_ticks));
989 }
990 #endif
991
992 rval = mdi_select_path(tgt_dip, NULL,
993 (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH), NULL, &pip);
994 if (rval == MDI_SUCCESS) {
995 mdi_rele_path(pip);
996 }
997
998 /*
999 * Wait for the following conditions :
1000 * 1. no vlun available yet
1001 * 2. no path established
1002 * 3. timer did not expire
1003 */
1004 while ((vlun == NULL) || (mdi_client_get_path_count(tgt_dip) == 0) ||
1005 (rval != MDI_SUCCESS)) {
1006 if (vlun && vlun->svl_not_supported) {
1007 VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
1008 "vlun 0x%p lun guid %s not supported!",
1009 (void *)vlun, guid));
1010 mutex_exit(&vhci_global_mutex);
1011 ddi_prop_free(guid);
1012 return (DDI_NOT_WELL_FORMED);
1013 }
1014 if ((vhci_first_time == 0) && (from_ticks >= vhci_to_ticks)) {
1015 vhci_first_time = 1;
1016 }
1017 if (vhci_first_time == 1) {
1018 VHCI_DEBUG(1, (CE_WARN, hba_dip, "vhci_scsi_tgt_init: "
1019 "no wait for %s. from_tick %lx, to_tick %lx",
1020 guid, from_ticks, vhci_to_ticks));
1021 mutex_exit(&vhci_global_mutex);
1022 ddi_prop_free(guid);
1023 return (DDI_NOT_WELL_FORMED);
1024 }
1025
1026 if (cv_timedwait(&vhci_cv,
1027 &vhci_global_mutex, vhci_to_ticks) == -1) {
1028 /* Timed out */
1029 #ifdef DEBUG
1030 if (vlun == NULL) {
1031 VHCI_DEBUG(1, (CE_WARN, hba_dip,
1032 "tgt_init: no vlun for %s!", guid));
1033 } else if (mdi_client_get_path_count(tgt_dip) == 0) {
1034 VHCI_DEBUG(1, (CE_WARN, hba_dip,
1035 "tgt_init: client path count is "
1036 "zero for %s!", guid));
1037 } else {
1038 VHCI_DEBUG(1, (CE_WARN, hba_dip,
1039 "tgt_init: client path not "
1040 "available yet for %s!", guid));
1041 }
1042 #endif /* DEBUG */
1043 mutex_exit(&vhci_global_mutex);
1044 ddi_prop_free(guid);
1045 return (DDI_NOT_WELL_FORMED);
1046 }
1047 vlun = vhci_lun_lookup(tgt_dip);
1048 rval = mdi_select_path(tgt_dip, NULL,
1049 (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
1050 NULL, &pip);
1051 if (rval == MDI_SUCCESS) {
1052 mdi_rele_path(pip);
1053 }
1054 from_ticks = ddi_get_lbolt();
1055 }
1056 mutex_exit(&vhci_global_mutex);
1057
1058 ASSERT(vlun != NULL);
1059 ddi_prop_free(guid);
1060
1061 scsi_device_hba_private_set(sd, vlun);
1062
1063 return (DDI_SUCCESS);
1064 }
1065
1066 /*ARGSUSED*/
1067 static void
1068 vhci_scsi_tgt_free(dev_info_t *hba_dip, dev_info_t *tgt_dip,
1069 scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
1070 {
1071 struct scsi_vhci_lun *dvlp;
1072 ASSERT(mdi_client_get_path_count(tgt_dip) <= 0);
1073 dvlp = (struct scsi_vhci_lun *)scsi_device_hba_private_get(sd);
1074 ASSERT(dvlp != NULL);
1075
1076 vhci_lun_free(dvlp, sd);
1077 }
1078
1079 /*
1080 * a PGR register command has started; copy the info we need
1081 */
1082 int
1083 vhci_pgr_register_start(scsi_vhci_lun_t *vlun, struct scsi_pkt *pkt)
1084 {
1085 struct vhci_pkt *vpkt = TGTPKT2VHCIPKT(pkt);
1086 void *addr;
1087
1088 if (!vpkt->vpkt_tgt_init_bp)
1089 return (TRAN_BADPKT);
1090
1091 addr = bp_mapin_common(vpkt->vpkt_tgt_init_bp,
1092 (vpkt->vpkt_flags & CFLAG_NOWAIT) ? VM_NOSLEEP : VM_SLEEP);
1093 if (addr == NULL)
1094 return (TRAN_BUSY);
1095
1096 mutex_enter(&vlun->svl_mutex);
1097
1098 vhci_print_prout_keys(vlun, "v_pgr_reg_start: before bcopy:");
1099
1100 bcopy(addr, &vlun->svl_prout, sizeof (vhci_prout_t) -
1101 (2 * MHIOC_RESV_KEY_SIZE*sizeof (char)));
1102 bcopy(pkt->pkt_cdbp, vlun->svl_cdb, sizeof (vlun->svl_cdb));
1103
1104 vhci_print_prout_keys(vlun, "v_pgr_reg_start: after bcopy:");
1105
1106 vlun->svl_time = pkt->pkt_time;
1107 vlun->svl_bcount = vpkt->vpkt_tgt_init_bp->b_bcount;
1108 vlun->svl_first_path = vpkt->vpkt_path;
1109 mutex_exit(&vlun->svl_mutex);
1110 return (0);
1111 }
1112
1113 /*
1114 * Function name : vhci_scsi_start()
1115 *
1116 * Return Values : TRAN_FATAL_ERROR - vhci has been shutdown
1117 * or other fatal failure
1118 * preventing packet transportation
1119 * TRAN_BUSY - request queue is full
1120 * TRAN_ACCEPT - pkt has been submitted to phci
1121 * (or is held in the waitQ)
1122 * Description : Implements SCSA's tran_start() entry point for
1123 * packet transport
1124 *
1125 */
1126 static int
1127 vhci_scsi_start(struct scsi_address *ap, struct scsi_pkt *pkt)
1128 {
1129 int rval = TRAN_ACCEPT;
1130 int instance, held;
1131 struct scsi_vhci *vhci = ADDR2VHCI(ap);
1132 struct scsi_vhci_lun *vlun = ADDR2VLUN(ap);
1133 struct vhci_pkt *vpkt = TGTPKT2VHCIPKT(pkt);
1134 int flags = 0;
1135 scsi_vhci_priv_t *svp, *svp_resrv;
1136 dev_info_t *cdip;
1137 client_lb_t lbp;
1138 int restore_lbp = 0;
1139 /* set if pkt is SCSI-II RESERVE cmd */
1140 int pkt_reserve_cmd = 0;
1141 int reserve_failed = 0;
1142 int resrv_instance = 0;
1143 mdi_pathinfo_t *pip;
1144 struct scsi_pkt *rel_pkt;
1145
1146 ASSERT(vhci != NULL);
1147 ASSERT(vpkt != NULL);
1148 ASSERT(vpkt->vpkt_state != VHCI_PKT_ISSUED);
1149 cdip = ADDR2DIP(ap);
1150
1151 /*
1152 * Block IOs if LUN is held or QUIESCED for IOs.
1153 */
1154 if ((VHCI_LUN_IS_HELD(vlun)) ||
1155 ((vlun->svl_flags & VLUN_QUIESCED_FLG) == VLUN_QUIESCED_FLG)) {
1156 return (TRAN_BUSY);
1157 }
1158
1159 /*
1160 * vhci_lun needs to be quiesced before SCSI-II RESERVE command
1161 * can be issued. This may require a cv_timedwait, which is
1162 * dangerous to perform in an interrupt context. So if this
1163 * is a RESERVE command a taskq is dispatched to service it.
1164 * This taskq shall again call vhci_scsi_start, but we shall be
1165 * sure its not in an interrupt context.
1166 */
1167 if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
1168 (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
1169 if (!(vpkt->vpkt_state & VHCI_PKT_THRU_TASKQ)) {
1170 if (taskq_dispatch(vhci->vhci_taskq,
1171 vhci_dispatch_scsi_start, (void *) vpkt,
1172 KM_NOSLEEP)) {
1173 return (TRAN_ACCEPT);
1174 } else {
1175 return (TRAN_BUSY);
1176 }
1177 }
1178
1179 /*
1180 * Here we ensure that simultaneous SCSI-II RESERVE cmds don't
1181 * get serviced for a lun.
1182 */
1183 VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
1184 if (!held) {
1185 return (TRAN_BUSY);
1186 } else if ((vlun->svl_flags & VLUN_QUIESCED_FLG) ==
1187 VLUN_QUIESCED_FLG) {
1188 VHCI_RELEASE_LUN(vlun);
1189 return (TRAN_BUSY);
1190 }
1191
1192 /*
1193 * To ensure that no IOs occur for this LUN for the duration
1194 * of this pkt set the VLUN_QUIESCED_FLG.
1195 * In case this routine needs to exit on error make sure that
1196 * this flag is cleared.
1197 */
1198 vlun->svl_flags |= VLUN_QUIESCED_FLG;
1199 pkt_reserve_cmd = 1;
1200
1201 /*
1202 * if this is a SCSI-II RESERVE command, set load balancing
1203 * policy to be ALTERNATE PATH to ensure that all subsequent
1204 * IOs are routed on the same path. This is because if commands
1205 * are routed across multiple paths then IOs on paths other than
1206 * the one on which the RESERVE was executed will get a
1207 * RESERVATION CONFLICT
1208 */
1209 lbp = mdi_get_lb_policy(cdip);
1210 if (lbp != LOAD_BALANCE_NONE) {
1211 if (vhci_quiesce_lun(vlun) != 1) {
1212 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1213 VHCI_RELEASE_LUN(vlun);
1214 return (TRAN_FATAL_ERROR);
1215 }
1216 vlun->svl_lb_policy_save = lbp;
1217 if (mdi_set_lb_policy(cdip, LOAD_BALANCE_NONE) !=
1218 MDI_SUCCESS) {
1219 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1220 VHCI_RELEASE_LUN(vlun);
1221 return (TRAN_FATAL_ERROR);
1222 }
1223 restore_lbp = 1;
1224 }
1225
1226 VHCI_DEBUG(2, (CE_NOTE, vhci->vhci_dip,
1227 "!vhci_scsi_start: sending SCSI-2 RESERVE, vlun 0x%p, "
1228 "svl_resrv_pip 0x%p, svl_flags: %x, lb_policy %x",
1229 (void *)vlun, (void *)vlun->svl_resrv_pip, vlun->svl_flags,
1230 mdi_get_lb_policy(cdip)));
1231
1232 /*
1233 * See comments for VLUN_RESERVE_ACTIVE_FLG in scsi_vhci.h
1234 * To narrow this window where a reserve command may be sent
1235 * down an inactive path the path states first need to be
1236 * updated. Before calling vhci_update_pathstates reset
1237 * VLUN_RESERVE_ACTIVE_FLG, just in case it was already set
1238 * for this lun. This shall prevent an unnecessary reset
1239 * from being sent out. Also remember currently reserved path
1240 * just for a case the new reservation will go to another path.
1241 */
1242 if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
1243 resrv_instance = mdi_pi_get_path_instance(
1244 vlun->svl_resrv_pip);
1245 }
1246 vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
1247 vhci_update_pathstates((void *)vlun);
1248 }
1249
1250 instance = ddi_get_instance(vhci->vhci_dip);
1251
1252 /*
1253 * If the command is PRIN with action of zero, then the cmd
1254 * is reading PR keys which requires filtering on completion.
1255 * Data cache sync must be guaranteed.
1256 */
1257 if ((pkt->pkt_cdbp[0] == SCMD_PRIN) && (pkt->pkt_cdbp[1] == 0) &&
1258 (vpkt->vpkt_org_vpkt == NULL)) {
1259 vpkt->vpkt_tgt_init_pkt_flags |= PKT_CONSISTENT;
1260 }
1261
1262 /*
1263 * Do not defer bind for PKT_DMA_PARTIAL
1264 */
1265 if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1266
1267 /* This is a non pkt_dma_partial case */
1268 if ((rval = vhci_bind_transport(
1269 ap, vpkt, vpkt->vpkt_tgt_init_pkt_flags, NULL_FUNC))
1270 != TRAN_ACCEPT) {
1271 VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1272 "!vhci%d %x: failed to bind transport: "
1273 "vlun 0x%p pkt_reserved %x restore_lbp %x,"
1274 "lbp %x", instance, rval, (void *)vlun,
1275 pkt_reserve_cmd, restore_lbp, lbp));
1276 if (restore_lbp)
1277 (void) mdi_set_lb_policy(cdip, lbp);
1278 if (pkt_reserve_cmd)
1279 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1280 return (rval);
1281 }
1282 VHCI_DEBUG(8, (CE_NOTE, NULL,
1283 "vhci_scsi_start: v_b_t called 0x%p\n", (void *)vpkt));
1284 }
1285 ASSERT(vpkt->vpkt_hba_pkt != NULL);
1286 ASSERT(vpkt->vpkt_path != NULL);
1287
1288 /*
1289 * This is the chance to adjust the pHCI's pkt and other information
1290 * from target driver's pkt.
1291 */
1292 VHCI_DEBUG(8, (CE_NOTE, vhci->vhci_dip, "vhci_scsi_start vpkt %p\n",
1293 (void *)vpkt));
1294 vhci_update_pHCI_pkt(vpkt, pkt);
1295
1296 if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
1297 if (vpkt->vpkt_path != vlun->svl_resrv_pip) {
1298 VHCI_DEBUG(1, (CE_WARN, vhci->vhci_dip,
1299 "!vhci_bind: reserve flag set for vlun 0x%p, but, "
1300 "pktpath 0x%p resrv path 0x%p differ. lb_policy %x",
1301 (void *)vlun, (void *)vpkt->vpkt_path,
1302 (void *)vlun->svl_resrv_pip,
1303 mdi_get_lb_policy(cdip)));
1304 reserve_failed = 1;
1305 }
1306 }
1307
1308 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(vpkt->vpkt_path);
1309 if (svp == NULL || reserve_failed) {
1310 if (pkt_reserve_cmd) {
1311 VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1312 "!vhci_bind returned null svp vlun 0x%p",
1313 (void *)vlun));
1314 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1315 if (restore_lbp)
1316 (void) mdi_set_lb_policy(cdip, lbp);
1317 }
1318 pkt_cleanup:
1319 if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1320 scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1321 vpkt->vpkt_hba_pkt = NULL;
1322 if (vpkt->vpkt_path) {
1323 mdi_rele_path(vpkt->vpkt_path);
1324 vpkt->vpkt_path = NULL;
1325 }
1326 }
1327 if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1328 (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1329 ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
1330 sema_v(&vlun->svl_pgr_sema);
1331 }
1332 return (TRAN_BUSY);
1333 }
1334
1335 if ((resrv_instance != 0) && (resrv_instance !=
1336 mdi_pi_get_path_instance(vpkt->vpkt_path))) {
1337 /*
1338 * This is an attempt to reserve vpkt->vpkt_path. But the
1339 * previously reserved path referred by resrv_instance might
1340 * still be reserved. Hence we will send a release command
1341 * there in order to avoid a reservation conflict.
1342 */
1343 VHCI_DEBUG(1, (CE_NOTE, vhci->vhci_dip, "!vhci_scsi_start: "
1344 "conflicting reservation on another path, vlun 0x%p, "
1345 "reserved instance %d, new instance: %d, pip: 0x%p",
1346 (void *)vlun, resrv_instance,
1347 mdi_pi_get_path_instance(vpkt->vpkt_path),
1348 (void *)vpkt->vpkt_path));
1349
1350 /*
1351 * In rare cases, the path referred by resrv_instance could
1352 * disappear in the meantime. Calling mdi_select_path() below
1353 * is an attempt to find out if the path still exists. It also
1354 * ensures that the path will be held when the release is sent.
1355 */
1356 rval = mdi_select_path(cdip, NULL, MDI_SELECT_PATH_INSTANCE,
1357 (void *)(intptr_t)resrv_instance, &pip);
1358
1359 if ((rval == MDI_SUCCESS) && (pip != NULL)) {
1360 svp_resrv = (scsi_vhci_priv_t *)
1361 mdi_pi_get_vhci_private(pip);
1362 rel_pkt = scsi_init_pkt(&svp_resrv->svp_psd->sd_address,
1363 NULL, NULL, CDB_GROUP0,
1364 sizeof (struct scsi_arq_status), 0, 0, SLEEP_FUNC,
1365 NULL);
1366
1367 if (rel_pkt == NULL) {
1368 char *p_path;
1369
1370 /*
1371 * This is very unlikely.
1372 * scsi_init_pkt(SLEEP_FUNC) does not fail
1373 * because of resources. But in theory it could
1374 * fail for some other reason. There is not an
1375 * easy way how to recover though. Log a warning
1376 * and return.
1377 */
1378 p_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1379 vhci_log(CE_WARN, vhci->vhci_dip, "!Sending "
1380 "RELEASE(6) to %s failed, a potential "
1381 "reservation conflict ahead.",
1382 ddi_pathname(mdi_pi_get_phci(pip), p_path));
1383 kmem_free(p_path, MAXPATHLEN);
1384
1385 if (restore_lbp)
1386 (void) mdi_set_lb_policy(cdip, lbp);
1387
1388 /* no need to check pkt_reserve_cmd here */
1389 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1390 return (TRAN_FATAL_ERROR);
1391 }
1392
1393 rel_pkt->pkt_cdbp[0] = SCMD_RELEASE;
1394 rel_pkt->pkt_time = vhci_io_time;
1395
1396 /*
1397 * Ignore the return value. If it will fail
1398 * then most likely it is no longer reserved
1399 * anyway.
1400 */
1401 (void) vhci_do_scsi_cmd(rel_pkt);
1402 VHCI_DEBUG(1, (CE_NOTE, NULL,
1403 "!vhci_scsi_start: path 0x%p, issued SCSI-2"
1404 " RELEASE\n", (void *)pip));
1405 scsi_destroy_pkt(rel_pkt);
1406 mdi_rele_path(pip);
1407 }
1408 }
1409
1410 VHCI_INCR_PATH_CMDCOUNT(svp);
1411
1412 /*
1413 * Ensure that no other IOs raced ahead, while a RESERVE cmd was
1414 * QUIESCING the same lun.
1415 */
1416 if ((!pkt_reserve_cmd) &&
1417 ((vlun->svl_flags & VLUN_QUIESCED_FLG) == VLUN_QUIESCED_FLG)) {
1418 VHCI_DECR_PATH_CMDCOUNT(svp);
1419 goto pkt_cleanup;
1420 }
1421
1422 if ((pkt->pkt_cdbp[0] == SCMD_PRIN) ||
1423 (pkt->pkt_cdbp[0] == SCMD_PROUT)) {
1424 /*
1425 * currently this thread only handles running PGR
1426 * commands, so don't bother creating it unless
1427 * something interesting is going to happen (like
1428 * either a PGR out, or a PGR in with enough space
1429 * to hold the keys that are getting returned)
1430 */
1431 mutex_enter(&vlun->svl_mutex);
1432 if (((vlun->svl_flags & VLUN_TASK_D_ALIVE_FLG) == 0) &&
1433 (pkt->pkt_cdbp[0] == SCMD_PROUT)) {
1434 vlun->svl_taskq = taskq_create("vlun_pgr_task_daemon",
1435 1, MINCLSYSPRI, 1, 4, 0);
1436 vlun->svl_flags |= VLUN_TASK_D_ALIVE_FLG;
1437 }
1438 mutex_exit(&vlun->svl_mutex);
1439 if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1440 (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1441 ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
1442 if (rval = vhci_pgr_register_start(vlun, pkt)) {
1443 /* an error */
1444 sema_v(&vlun->svl_pgr_sema);
1445 return (rval);
1446 }
1447 }
1448 }
1449
1450 /*
1451 * SCSI-II RESERVE cmd is not expected in polled mode.
1452 * If this changes it needs to be handled for the polled scenario.
1453 */
1454 flags = vpkt->vpkt_hba_pkt->pkt_flags;
1455
1456 /*
1457 * Set the path_instance *before* sending the scsi_pkt down the path
1458 * to mpxio's pHCI so that additional path abstractions at a pHCI
1459 * level (like maybe iSCSI at some point in the future) can update
1460 * the path_instance.
1461 */
1462 if (scsi_pkt_allocated_correctly(vpkt->vpkt_hba_pkt))
1463 vpkt->vpkt_hba_pkt->pkt_path_instance =
1464 mdi_pi_get_path_instance(vpkt->vpkt_path);
1465
1466 rval = scsi_transport(vpkt->vpkt_hba_pkt);
1467 if (rval == TRAN_ACCEPT) {
1468 if (flags & FLAG_NOINTR) {
1469 struct scsi_pkt *tpkt = vpkt->vpkt_tgt_pkt;
1470 struct scsi_pkt *pkt = vpkt->vpkt_hba_pkt;
1471
1472 ASSERT(tpkt != NULL);
1473 *(tpkt->pkt_scbp) = *(pkt->pkt_scbp);
1474 tpkt->pkt_resid = pkt->pkt_resid;
1475 tpkt->pkt_state = pkt->pkt_state;
1476 tpkt->pkt_statistics = pkt->pkt_statistics;
1477 tpkt->pkt_reason = pkt->pkt_reason;
1478
1479 if ((*(pkt->pkt_scbp) == STATUS_CHECK) &&
1480 (pkt->pkt_state & STATE_ARQ_DONE)) {
1481 bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
1482 vpkt->vpkt_tgt_init_scblen);
1483 }
1484
1485 VHCI_DECR_PATH_CMDCOUNT(svp);
1486 if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1487 scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1488 vpkt->vpkt_hba_pkt = NULL;
1489 if (vpkt->vpkt_path) {
1490 mdi_rele_path(vpkt->vpkt_path);
1491 vpkt->vpkt_path = NULL;
1492 }
1493 }
1494 /*
1495 * This path will not automatically retry pkts
1496 * internally, therefore, vpkt_org_vpkt should
1497 * never be set.
1498 */
1499 ASSERT(vpkt->vpkt_org_vpkt == NULL);
1500 scsi_hba_pkt_comp(tpkt);
1501 }
1502 return (rval);
1503 } else if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1504 (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1505 ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
1506 /* the command exited with bad status */
1507 sema_v(&vlun->svl_pgr_sema);
1508 } else if (vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PRIN) {
1509 /* the command exited with bad status */
1510 sema_v(&vlun->svl_pgr_sema);
1511 } else if (pkt_reserve_cmd) {
1512 VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1513 "!vhci_scsi_start: reserve failed vlun 0x%p",
1514 (void *)vlun));
1515 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1516 if (restore_lbp)
1517 (void) mdi_set_lb_policy(cdip, lbp);
1518 }
1519
1520 ASSERT(vpkt->vpkt_hba_pkt != NULL);
1521 VHCI_DECR_PATH_CMDCOUNT(svp);
1522
1523 /* Do not destroy phci packet information for PKT_DMA_PARTIAL */
1524 if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1525 scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1526 vpkt->vpkt_hba_pkt = NULL;
1527 if (vpkt->vpkt_path) {
1528 MDI_PI_ERRSTAT(vpkt->vpkt_path, MDI_PI_TRANSERR);
1529 mdi_rele_path(vpkt->vpkt_path);
1530 vpkt->vpkt_path = NULL;
1531 }
1532 }
1533 return (TRAN_BUSY);
1534 }
1535
1536 /*
1537 * Function name : vhci_scsi_reset()
1538 *
1539 * Return Values : 0 - reset failed
1540 * 1 - reset succeeded
1541 */
1542
1543 static int
1544 vhci_scsi_reset(struct scsi_address *ap, int level)
1545 {
1546 if ((level == RESET_TARGET) || (level == RESET_LUN)) {
1547 return (vhci_scsi_reset_target(ap, level, TRUE));
1548 } else if (level == RESET_ALL) {
1549 return (vhci_scsi_reset_bus(ap));
1550 } else {
1551 return (0);
1552 }
1553 }
1554
1555 /*
1556 * vhci_recovery_reset:
1557 * Issues reset to the device
1558 * Input:
1559 * vlun - vhci lun pointer of the device
1560 * ap - address of the device
1561 * select_path:
1562 * If select_path is FALSE, then the address specified in ap is
1563 * the path on which reset will be issued.
1564 * If select_path is TRUE, then path is obtained by calling
1565 * mdi_select_path.
1566 *
1567 * recovery_depth:
1568 * Caller can specify the level of reset.
1569 * VHCI_DEPTH_LUN -
1570 * Issues LUN RESET if device supports lun reset.
1571 * VHCI_DEPTH_TARGET -
1572 * If Lun Reset fails or the device does not support
1573 * Lun Reset, issues TARGET RESET
1574 * VHCI_DEPTH_ALL -
1575 * If Lun Reset fails or the device does not support
1576 * Lun Reset, issues TARGET RESET.
1577 * If TARGET RESET does not succeed, issues Bus Reset.
1578 */
1579
1580 static int
1581 vhci_recovery_reset(scsi_vhci_lun_t *vlun, struct scsi_address *ap,
1582 uint8_t select_path, uint8_t recovery_depth)
1583 {
1584 int ret = 0;
1585
1586 ASSERT(ap != NULL);
1587
1588 if (vlun && vlun->svl_support_lun_reset == 1) {
1589 ret = vhci_scsi_reset_target(ap, RESET_LUN,
1590 select_path);
1591 }
1592
1593 recovery_depth--;
1594
1595 if ((ret == 0) && recovery_depth) {
1596 ret = vhci_scsi_reset_target(ap, RESET_TARGET,
1597 select_path);
1598 recovery_depth--;
1599 }
1600
1601 if ((ret == 0) && recovery_depth) {
1602 (void) scsi_reset(ap, RESET_ALL);
1603 }
1604
1605 return (ret);
1606 }
1607
1608 /*
1609 * Note: The scsi_address passed to this routine could be the scsi_address
1610 * for the virtual device or the physical device. No assumptions should be
1611 * made in this routine about the contents of the ap structure.
1612 * Further, note that the child dip would be the dip of the ssd node regardless
1613 * of the scsi_address passed in.
1614 */
1615 static int
1616 vhci_scsi_reset_target(struct scsi_address *ap, int level, uint8_t select_path)
1617 {
1618 dev_info_t *vdip, *cdip = NULL;
1619 mdi_pathinfo_t *pip = NULL;
1620 mdi_pathinfo_t *npip = NULL;
1621 int rval = -1;
1622 scsi_vhci_priv_t *svp = NULL;
1623 struct scsi_address *pap = NULL;
1624 scsi_hba_tran_t *hba = NULL;
1625 int sps;
1626 struct scsi_vhci *vhci = NULL;
1627
1628 if (select_path != TRUE) {
1629 ASSERT(ap != NULL);
1630 if (level == RESET_LUN) {
1631 hba = ap->a_hba_tran;
1632 ASSERT(hba != NULL);
1633 return (hba->tran_reset(ap, RESET_LUN));
1634 }
1635 return (scsi_reset(ap, level));
1636 }
1637
1638 /*
1639 * SCSI address should be interpreted according to the pHBA flags.
1640 */
1641 if (ap->a_hba_tran->tran_hba_flags & SCSI_HBA_ADDR_COMPLEX)
1642 cdip = ADDR2DIP(ap);
1643 else if (ap->a_hba_tran->tran_hba_flags & SCSI_HBA_TRAN_CLONE)
1644 cdip = ap->a_hba_tran->tran_sd->sd_dev;
1645
1646 ASSERT(cdip != NULL);
1647 vdip = ddi_get_parent(cdip);
1648 ASSERT(vdip != NULL);
1649 vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
1650 ASSERT(vhci != NULL);
1651
1652 rval = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH, NULL, &pip);
1653 if ((rval != MDI_SUCCESS) || (pip == NULL)) {
1654 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1655 "Unable to get a path, dip 0x%p", (void *)cdip));
1656 return (0);
1657 }
1658 again:
1659 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
1660 if (svp == NULL) {
1661 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1662 "priv is NULL, pip 0x%p", (void *)pip));
1663 mdi_rele_path(pip);
1664 return (0);
1665 }
1666
1667 if (svp->svp_psd == NULL) {
1668 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1669 "psd is NULL, pip 0x%p, svp 0x%p",
1670 (void *)pip, (void *)svp));
1671 mdi_rele_path(pip);
1672 return (0);
1673 }
1674
1675 pap = &svp->svp_psd->sd_address;
1676 hba = pap->a_hba_tran;
1677
1678 ASSERT(pap != NULL);
1679 ASSERT(hba != NULL);
1680
1681 if (hba->tran_reset != NULL) {
1682 if (hba->tran_reset(pap, level) == 0) {
1683 vhci_log(CE_WARN, vdip, "!%s%d: "
1684 "path %s, reset %d failed",
1685 ddi_driver_name(cdip), ddi_get_instance(cdip),
1686 mdi_pi_spathname(pip), level);
1687
1688 /*
1689 * Select next path and issue the reset, repeat
1690 * until all paths are exhausted
1691 */
1692 sps = mdi_select_path(cdip, NULL,
1693 MDI_SELECT_ONLINE_PATH, pip, &npip);
1694 if ((sps != MDI_SUCCESS) || (npip == NULL)) {
1695 mdi_rele_path(pip);
1696 return (0);
1697 }
1698 mdi_rele_path(pip);
1699 pip = npip;
1700 goto again;
1701 }
1702 mdi_rele_path(pip);
1703 mutex_enter(&vhci->vhci_mutex);
1704 scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
1705 &vhci->vhci_reset_notify_listf);
1706 mutex_exit(&vhci->vhci_mutex);
1707 VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_scsi_reset_target: "
1708 "reset %d sent down pip:%p for cdip:%p\n", level,
1709 (void *)pip, (void *)cdip));
1710 return (1);
1711 }
1712 mdi_rele_path(pip);
1713 return (0);
1714 }
1715
1716 /* ARGSUSED */
1717 static int
1718 vhci_scsi_reset_bus(struct scsi_address *ap)
1719 {
1720 return (1);
1721 }
1722
1723 /*
1724 * This is a special version of LUN reset routine
1725 * which sends a reset down all available paths.
1726 */
1727 static int
1728 vhci_scsi_reset_all_paths(struct scsi_address *ap)
1729 {
1730 dev_info_t *vdip, *cdip = NULL;
1731 mdi_pathinfo_t *pip = NULL;
1732 mdi_pathinfo_t *npip = NULL;
1733 int rval = -1;
1734 scsi_vhci_priv_t *svp = NULL;
1735 struct scsi_address *pap = NULL;
1736 scsi_hba_tran_t *hba = NULL;
1737 int sps = MDI_SUCCESS;
1738 int reset_result = 0;
1739 struct scsi_vhci *vhci = NULL;
1740
1741 /*
1742 * SCSI address should be interpreted according to the pHBA flags.
1743 */
1744 if (ap->a_hba_tran->tran_hba_flags & SCSI_HBA_ADDR_COMPLEX)
1745 cdip = ADDR2DIP(ap);
1746 else if (ap->a_hba_tran->tran_hba_flags & SCSI_HBA_TRAN_CLONE)
1747 cdip = ap->a_hba_tran->tran_sd->sd_dev;
1748
1749 if (cdip == NULL || (vdip = ddi_get_parent(cdip)) == NULL ||
1750 (vhci = ddi_get_soft_state(vhci_softstate,
1751 ddi_get_instance(vdip))) == NULL) {
1752 VHCI_DEBUG(2, (CE_WARN, NULL, "!%s: "
1753 "Child info pointer NULL, cdip 0x%p",
1754 __func__, (void *)cdip));
1755 return (0);
1756 }
1757
1758 rval = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH, NULL, &pip);
1759 if ((rval != MDI_SUCCESS) || (pip == NULL)) {
1760 VHCI_DEBUG(2, (CE_WARN, NULL, "!%s: "
1761 "Unable to get a path, dip 0x%p",
1762 __func__, (void *)cdip));
1763 return (0);
1764 }
1765 again:
1766 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
1767
1768 if (svp == NULL || svp->svp_psd == NULL) {
1769 VHCI_DEBUG(2, (CE_WARN, NULL, "!%s: "
1770 "private data is NULL, pip 0x%p",
1771 __func__, (void *)pip));
1772 mdi_rele_path(pip);
1773 return (0);
1774 }
1775
1776 pap = &svp->svp_psd->sd_address;
1777 hba = pap->a_hba_tran;
1778
1779 if (pap != NULL && hba != NULL && hba->tran_reset != NULL) {
1780 /*
1781 * The following sends reset down all available paths
1782 */
1783 if (sps == MDI_SUCCESS) {
1784 reset_result = hba->tran_reset(pap, RESET_LUN);
1785
1786 VHCI_DEBUG(2, (CE_WARN, vdip, "!%s%d: "
1787 "path %s, reset LUN %s",
1788 ddi_driver_name(cdip), ddi_get_instance(cdip),
1789 mdi_pi_spathname(pip),
1790 (reset_result ? "Success" : "Failed")));
1791
1792 /*
1793 * Select next path and issue the reset, repeat
1794 * until all paths are exhausted regardless of success
1795 * or failure of the previous reset.
1796 */
1797 sps = mdi_select_path(cdip, NULL,
1798 MDI_SELECT_ONLINE_PATH, pip, &npip);
1799 if ((sps != MDI_SUCCESS) || (npip == NULL)) {
1800 mdi_rele_path(pip);
1801 return (0);
1802 }
1803 mdi_rele_path(pip);
1804 pip = npip;
1805 goto again;
1806 }
1807 mdi_rele_path(pip);
1808 mutex_enter(&vhci->vhci_mutex);
1809 scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
1810 &vhci->vhci_reset_notify_listf);
1811 mutex_exit(&vhci->vhci_mutex);
1812 VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_scsi_reset_target: "
1813 "reset %d sent down pip:%p for cdip:%p\n", RESET_LUN,
1814 (void *)pip, (void *)cdip));
1815 return (1);
1816 }
1817 mdi_rele_path(pip);
1818 return (0);
1819 }
1820
1821 /*
1822 * called by vhci_getcap and vhci_setcap to get and set (respectively)
1823 * SCSI capabilities
1824 */
1825 /* ARGSUSED */
1826 static int
1827 vhci_commoncap(struct scsi_address *ap, char *cap,
1828 int val, int tgtonly, int doset)
1829 {
1830 struct scsi_vhci *vhci = ADDR2VHCI(ap);
1831 struct scsi_vhci_lun *vlun = ADDR2VLUN(ap);
1832 int cidx;
1833 int rval = 0;
1834
1835 if (cap == (char *)0) {
1836 VHCI_DEBUG(3, (CE_WARN, vhci->vhci_dip,
1837 "!vhci_commoncap: invalid arg"));
1838 return (rval);
1839 }
1840
1841 if (vlun == NULL) {
1842 VHCI_DEBUG(3, (CE_WARN, vhci->vhci_dip,
1843 "!vhci_commoncap: vlun is null"));
1844 return (rval);
1845 }
1846
1847 if ((cidx = scsi_hba_lookup_capstr(cap)) == -1) {
1848 return (UNDEFINED);
1849 }
1850
1851 /*
1852 * Process setcap request.
1853 */
1854 if (doset) {
1855 /*
1856 * At present, we can only set binary (0/1) values
1857 */
1858 switch (cidx) {
1859 case SCSI_CAP_ARQ:
1860 if (val == 0) {
1861 rval = 0;
1862 } else {
1863 rval = 1;
1864 }
1865 break;
1866
1867 case SCSI_CAP_LUN_RESET:
1868 if (tgtonly == 0) {
1869 VHCI_DEBUG(1, (CE_WARN, vhci->vhci_dip,
1870 "scsi_vhci_setcap: "
1871 "Returning error since whom = 0"));
1872 rval = -1;
1873 break;
1874 }
1875 /*
1876 * Set the capability accordingly.
1877 */
1878 mutex_enter(&vlun->svl_mutex);
1879 vlun->svl_support_lun_reset = val;
1880 rval = val;
1881 mutex_exit(&vlun->svl_mutex);
1882 break;
1883
1884 case SCSI_CAP_SECTOR_SIZE:
1885 mutex_enter(&vlun->svl_mutex);
1886 vlun->svl_sector_size = val;
1887 vlun->svl_setcap_done = 1;
1888 mutex_exit(&vlun->svl_mutex);
1889 (void) vhci_pHCI_cap(ap, cap, val, tgtonly, NULL);
1890
1891 /* Always return success */
1892 rval = 1;
1893 break;
1894
1895 default:
1896 VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1897 "!vhci_setcap: unsupported %d", cidx));
1898 rval = UNDEFINED;
1899 break;
1900 }
1901
1902 VHCI_DEBUG(6, (CE_NOTE, vhci->vhci_dip,
1903 "!set cap: cap=%s, val/tgtonly/doset/rval = "
1904 "0x%x/0x%x/0x%x/%d\n",
1905 cap, val, tgtonly, doset, rval));
1906
1907 } else {
1908 /*
1909 * Process getcap request.
1910 */
1911 switch (cidx) {
1912 case SCSI_CAP_DMA_MAX:
1913 /*
1914 * For X86 this capability is caught in scsi_ifgetcap().
1915 * XXX Should this be getting the value from the pHCI?
1916 */
1917 rval = (int)VHCI_DMA_MAX_XFER_CAP;
1918 break;
1919
1920 case SCSI_CAP_INITIATOR_ID:
1921 rval = 0x00;
1922 break;
1923
1924 case SCSI_CAP_ARQ:
1925 case SCSI_CAP_RESET_NOTIFICATION:
1926 case SCSI_CAP_TAGGED_QING:
1927 rval = 1;
1928 break;
1929
1930 case SCSI_CAP_SCSI_VERSION:
1931 rval = 3;
1932 break;
1933
1934 case SCSI_CAP_INTERCONNECT_TYPE:
1935 rval = INTERCONNECT_FABRIC;
1936 break;
1937
1938 case SCSI_CAP_LUN_RESET:
1939 /*
1940 * scsi_vhci will always return success for LUN reset.
1941 * When request for doing LUN reset comes
1942 * through scsi_reset entry point, at that time attempt
1943 * will be made to do reset through all the possible
1944 * paths.
1945 */
1946 mutex_enter(&vlun->svl_mutex);
1947 rval = vlun->svl_support_lun_reset;
1948 mutex_exit(&vlun->svl_mutex);
1949 VHCI_DEBUG(4, (CE_WARN, vhci->vhci_dip,
1950 "scsi_vhci_getcap:"
1951 "Getting the Lun reset capability %d", rval));
1952 break;
1953
1954 case SCSI_CAP_SECTOR_SIZE:
1955 mutex_enter(&vlun->svl_mutex);
1956 rval = vlun->svl_sector_size;
1957 mutex_exit(&vlun->svl_mutex);
1958 break;
1959
1960 case SCSI_CAP_CDB_LEN:
1961 rval = VHCI_SCSI_CDB_SIZE;
1962 break;
1963
1964 case SCSI_CAP_DMA_MAX_ARCH:
1965 /*
1966 * For X86 this capability is caught in scsi_ifgetcap().
1967 * XXX Should this be getting the value from the pHCI?
1968 */
1969 rval = 0;
1970 break;
1971
1972 default:
1973 VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1974 "!vhci_getcap: unsupported %d", cidx));
1975 rval = UNDEFINED;
1976 break;
1977 }
1978
1979 VHCI_DEBUG(6, (CE_NOTE, vhci->vhci_dip,
1980 "!get cap: cap=%s, val/tgtonly/doset/rval = "
1981 "0x%x/0x%x/0x%x/%d\n",
1982 cap, val, tgtonly, doset, rval));
1983 }
1984 return (rval);
1985 }
1986
1987
1988 /*
1989 * Function name : vhci_scsi_getcap()
1990 *
1991 */
1992 static int
1993 vhci_scsi_getcap(struct scsi_address *ap, char *cap, int whom)
1994 {
1995 return (vhci_commoncap(ap, cap, 0, whom, 0));
1996 }
1997
1998 static int
1999 vhci_scsi_setcap(struct scsi_address *ap, char *cap, int value, int whom)
2000 {
2001 return (vhci_commoncap(ap, cap, value, whom, 1));
2002 }
2003
2004 /*
2005 * Function name : vhci_scsi_abort()
2006 */
2007 /* ARGSUSED */
2008 static int
2009 vhci_scsi_abort(struct scsi_address *ap, struct scsi_pkt *pkt)
2010 {
2011 return (0);
2012 }
2013
2014 /*
2015 * Function name : vhci_scsi_init_pkt
2016 *
2017 * Return Values : pointer to scsi_pkt, or NULL
2018 */
2019 /* ARGSUSED */
2020 static struct scsi_pkt *
2021 vhci_scsi_init_pkt(struct scsi_address *ap, struct scsi_pkt *pkt,
2022 struct buf *bp, int cmdlen, int statuslen, int tgtlen,
2023 int flags, int (*callback)(caddr_t), caddr_t arg)
2024 {
2025 struct scsi_vhci *vhci = ADDR2VHCI(ap);
2026 struct vhci_pkt *vpkt;
2027 int rval;
2028 int newpkt = 0;
2029 struct scsi_pkt *pktp;
2030
2031
2032 if (pkt == NULL) {
2033 if (cmdlen > VHCI_SCSI_CDB_SIZE) {
2034 if ((cmdlen != VHCI_SCSI_OSD_CDB_SIZE) ||
2035 ((flags & VHCI_SCSI_OSD_PKT_FLAGS) !=
2036 VHCI_SCSI_OSD_PKT_FLAGS)) {
2037 VHCI_DEBUG(1, (CE_NOTE, NULL,
2038 "!init pkt: cdb size not supported\n"));
2039 return (NULL);
2040 }
2041 }
2042
2043 pktp = scsi_hba_pkt_alloc(vhci->vhci_dip,
2044 ap, cmdlen, statuslen, tgtlen, sizeof (*vpkt), callback,
2045 arg);
2046
2047 if (pktp == NULL) {
2048 return (NULL);
2049 }
2050
2051 /* Get the vhci's private structure */
2052 vpkt = (struct vhci_pkt *)(pktp->pkt_ha_private);
2053 ASSERT(vpkt);
2054
2055 /* Save the target driver's packet */
2056 vpkt->vpkt_tgt_pkt = pktp;
2057
2058 /*
2059 * Save pkt_tgt_init_pkt fields if deferred binding
2060 * is needed or for other purposes.
2061 */
2062 vpkt->vpkt_tgt_init_pkt_flags = flags;
2063 vpkt->vpkt_flags = (callback == NULL_FUNC) ? CFLAG_NOWAIT : 0;
2064 vpkt->vpkt_state = VHCI_PKT_IDLE;
2065 vpkt->vpkt_tgt_init_cdblen = cmdlen;
2066 vpkt->vpkt_tgt_init_scblen = statuslen;
2067 newpkt = 1;
2068 } else { /* pkt not NULL */
2069 vpkt = pkt->pkt_ha_private;
2070 }
2071
2072 VHCI_DEBUG(8, (CE_NOTE, NULL, "vhci_scsi_init_pkt "
2073 "vpkt %p flags %x\n", (void *)vpkt, flags));
2074
2075 /* Clear any stale error flags */
2076 if (bp) {
2077 bioerror(bp, 0);
2078 }
2079
2080 vpkt->vpkt_tgt_init_bp = bp;
2081
2082 if (flags & PKT_DMA_PARTIAL) {
2083
2084 /*
2085 * Immediate binding is needed.
2086 * Target driver may not set this flag in next invocation.
2087 * vhci has to remember this flag was set during first
2088 * invocation of vhci_scsi_init_pkt.
2089 */
2090 vpkt->vpkt_flags |= CFLAG_DMA_PARTIAL;
2091 }
2092
2093 if (vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) {
2094
2095 /*
2096 * Re-initialize some of the target driver packet state
2097 * information.
2098 */
2099 vpkt->vpkt_tgt_pkt->pkt_state = 0;
2100 vpkt->vpkt_tgt_pkt->pkt_statistics = 0;
2101 vpkt->vpkt_tgt_pkt->pkt_reason = 0;
2102
2103 /*
2104 * Binding a vpkt->vpkt_path for this IO at init_time.
2105 * If an IO error happens later, target driver will clear
2106 * this vpkt->vpkt_path binding before re-init IO again.
2107 */
2108 VHCI_DEBUG(8, (CE_NOTE, NULL,
2109 "vhci_scsi_init_pkt: calling v_b_t %p, newpkt %d\n",
2110 (void *)vpkt, newpkt));
2111 if (pkt && vpkt->vpkt_hba_pkt) {
2112 VHCI_DEBUG(4, (CE_NOTE, NULL,
2113 "v_s_i_p calling update_pHCI_pkt resid %ld\n",
2114 pkt->pkt_resid));
2115 vhci_update_pHCI_pkt(vpkt, pkt);
2116 }
2117 if (callback == SLEEP_FUNC) {
2118 rval = vhci_bind_transport(
2119 ap, vpkt, flags, callback);
2120 } else {
2121 rval = vhci_bind_transport(
2122 ap, vpkt, flags, NULL_FUNC);
2123 }
2124 VHCI_DEBUG(8, (CE_NOTE, NULL,
2125 "vhci_scsi_init_pkt: v_b_t called 0x%p rval 0x%x\n",
2126 (void *)vpkt, rval));
2127 if (bp) {
2128 if (rval == TRAN_FATAL_ERROR) {
2129 /*
2130 * No paths available. Could not bind
2131 * any pHCI. Setting EFAULT as a way
2132 * to indicate no DMA is mapped.
2133 */
2134 bioerror(bp, EFAULT);
2135 } else {
2136 /*
2137 * Do not indicate any pHCI errors to
2138 * target driver otherwise.
2139 */
2140 bioerror(bp, 0);
2141 }
2142 }
2143 if (rval != TRAN_ACCEPT) {
2144 VHCI_DEBUG(8, (CE_NOTE, NULL,
2145 "vhci_scsi_init_pkt: "
2146 "v_b_t failed 0x%p newpkt %x\n",
2147 (void *)vpkt, newpkt));
2148 if (newpkt) {
2149 scsi_hba_pkt_free(ap,
2150 vpkt->vpkt_tgt_pkt);
2151 }
2152 return (NULL);
2153 }
2154 ASSERT(vpkt->vpkt_hba_pkt != NULL);
2155 ASSERT(vpkt->vpkt_path != NULL);
2156
2157 /* Update the resid for the target driver */
2158 vpkt->vpkt_tgt_pkt->pkt_resid =
2159 vpkt->vpkt_hba_pkt->pkt_resid;
2160 }
2161
2162 return (vpkt->vpkt_tgt_pkt);
2163 }
2164
2165 /*
2166 * Function name : vhci_scsi_destroy_pkt
2167 *
2168 * Return Values : none
2169 */
2170 static void
2171 vhci_scsi_destroy_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
2172 {
2173 struct vhci_pkt *vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
2174
2175 VHCI_DEBUG(8, (CE_NOTE, NULL,
2176 "vhci_scsi_destroy_pkt: vpkt 0x%p\n", (void *)vpkt));
2177
2178 vpkt->vpkt_tgt_init_pkt_flags = 0;
2179 if (vpkt->vpkt_hba_pkt) {
2180 scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
2181 vpkt->vpkt_hba_pkt = NULL;
2182 }
2183 if (vpkt->vpkt_path) {
2184 mdi_rele_path(vpkt->vpkt_path);
2185 vpkt->vpkt_path = NULL;
2186 }
2187
2188 ASSERT(vpkt->vpkt_state != VHCI_PKT_ISSUED);
2189 scsi_hba_pkt_free(ap, vpkt->vpkt_tgt_pkt);
2190 }
2191
2192 /*
2193 * Function name : vhci_scsi_dmafree()
2194 *
2195 * Return Values : none
2196 */
2197 /*ARGSUSED*/
2198 static void
2199 vhci_scsi_dmafree(struct scsi_address *ap, struct scsi_pkt *pkt)
2200 {
2201 struct vhci_pkt *vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
2202
2203 VHCI_DEBUG(6, (CE_NOTE, NULL,
2204 "vhci_scsi_dmafree: vpkt 0x%p\n", (void *)vpkt));
2205
2206 ASSERT(vpkt != NULL);
2207 if (vpkt->vpkt_hba_pkt) {
2208 scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
2209 vpkt->vpkt_hba_pkt = NULL;
2210 }
2211 if (vpkt->vpkt_path) {
2212 mdi_rele_path(vpkt->vpkt_path);
2213 vpkt->vpkt_path = NULL;
2214 }
2215 }
2216
2217 /*
2218 * Function name : vhci_scsi_sync_pkt()
2219 *
2220 * Return Values : none
2221 */
2222 /*ARGSUSED*/
2223 static void
2224 vhci_scsi_sync_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
2225 {
2226 struct vhci_pkt *vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
2227
2228 ASSERT(vpkt != NULL);
2229 if (vpkt->vpkt_hba_pkt) {
2230 scsi_sync_pkt(vpkt->vpkt_hba_pkt);
2231 }
2232 }
2233
2234 /*
2235 * routine for reset notification setup, to register or cancel.
2236 */
2237 static int
2238 vhci_scsi_reset_notify(struct scsi_address *ap, int flag,
2239 void (*callback)(caddr_t), caddr_t arg)
2240 {
2241 struct scsi_vhci *vhci = ADDR2VHCI(ap);
2242 return (scsi_hba_reset_notify_setup(ap, flag, callback, arg,
2243 &vhci->vhci_mutex, &vhci->vhci_reset_notify_listf));
2244 }
2245
2246 static int
2247 vhci_scsi_get_name_bus_addr(struct scsi_device *sd,
2248 char *name, int len, int bus_addr)
2249 {
2250 dev_info_t *cdip;
2251 char *guid;
2252 scsi_vhci_lun_t *vlun;
2253
2254 ASSERT(sd != NULL);
2255 ASSERT(name != NULL);
2256
2257 *name = 0;
2258 cdip = sd->sd_dev;
2259
2260 ASSERT(cdip != NULL);
2261
2262 if (mdi_component_is_client(cdip, NULL) != MDI_SUCCESS)
2263 return (1);
2264
2265 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, PROPFLAGS,
2266 MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS)
2267 return (1);
2268
2269 /*
2270 * Message is "sd# at scsi_vhci0: unit-address <guid>: <bus_addr>".
2271 * <guid> bus_addr argument == 0
2272 * <bus_addr> bus_addr argument != 0
2273 * Since the <guid> is already provided with unit-address, we just
2274 * provide failover module in <bus_addr> to keep output shorter.
2275 */
2276 vlun = ADDR2VLUN(&sd->sd_address);
2277 if (bus_addr == 0) {
2278 /* report the guid: */
2279 (void) snprintf(name, len, "g%s", guid);
2280 } else if (vlun && vlun->svl_fops_name) {
2281 /* report the name of the failover module */
2282 (void) snprintf(name, len, "%s", vlun->svl_fops_name);
2283 }
2284
2285 ddi_prop_free(guid);
2286 return (1);
2287 }
2288
2289 static int
2290 vhci_scsi_get_bus_addr(struct scsi_device *sd, char *name, int len)
2291 {
2292 return (vhci_scsi_get_name_bus_addr(sd, name, len, 1));
2293 }
2294
2295 static int
2296 vhci_scsi_get_name(struct scsi_device *sd, char *name, int len)
2297 {
2298 return (vhci_scsi_get_name_bus_addr(sd, name, len, 0));
2299 }
2300
2301 /*
2302 * Return a pointer to the guid part of the devnm.
2303 * devnm format is "nodename@busaddr", busaddr format is "gGUID".
2304 */
2305 static char *
2306 vhci_devnm_to_guid(char *devnm)
2307 {
2308 char *cp = devnm;
2309
2310 if (devnm == NULL)
2311 return (NULL);
2312
2313 while (*cp != '\0' && *cp != '@')
2314 cp++;
2315 if (*cp == '@' && *(cp + 1) == 'g')
2316 return (cp + 2);
2317 return (NULL);
2318 }
2319
2320 static int
2321 vhci_bind_transport(struct scsi_address *ap, struct vhci_pkt *vpkt, int flags,
2322 int (*func)(caddr_t))
2323 {
2324 struct scsi_vhci *vhci = ADDR2VHCI(ap);
2325 dev_info_t *cdip = ADDR2DIP(ap);
2326 mdi_pathinfo_t *pip = NULL;
2327 mdi_pathinfo_t *npip = NULL;
2328 scsi_vhci_priv_t *svp = NULL;
2329 struct scsi_device *psd = NULL;
2330 struct scsi_address *address = NULL;
2331 struct scsi_pkt *pkt = NULL;
2332 int rval = -1;
2333 int pgr_sema_held = 0;
2334 int held;
2335 int mps_flag = MDI_SELECT_ONLINE_PATH;
2336 struct scsi_vhci_lun *vlun;
2337 int path_instance = 0;
2338
2339 vlun = ADDR2VLUN(ap);
2340 ASSERT(vlun != 0);
2341
2342 if ((vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PROUT) &&
2343 (((vpkt->vpkt_tgt_pkt->pkt_cdbp[1] & 0x1f) ==
2344 VHCI_PROUT_REGISTER) ||
2345 ((vpkt->vpkt_tgt_pkt->pkt_cdbp[1] & 0x1f) ==
2346 VHCI_PROUT_R_AND_IGNORE))) {
2347 if (!sema_tryp(&vlun->svl_pgr_sema))
2348 return (TRAN_BUSY);
2349 pgr_sema_held = 1;
2350 if (vlun->svl_first_path != NULL) {
2351 rval = mdi_select_path(cdip, NULL,
2352 MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH,
2353 NULL, &pip);
2354 if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2355 VHCI_DEBUG(4, (CE_NOTE, NULL,
2356 "vhci_bind_transport: path select fail\n"));
2357 } else {
2358 npip = pip;
2359 do {
2360 if (npip == vlun->svl_first_path) {
2361 VHCI_DEBUG(4, (CE_NOTE, NULL,
2362 "vhci_bind_transport: "
2363 "valid first path 0x%p\n",
2364 (void *)
2365 vlun->svl_first_path));
2366 pip = vlun->svl_first_path;
2367 goto bind_path;
2368 }
2369 pip = npip;
2370 rval = mdi_select_path(cdip, NULL,
2371 MDI_SELECT_ONLINE_PATH |
2372 MDI_SELECT_STANDBY_PATH,
2373 pip, &npip);
2374 mdi_rele_path(pip);
2375 } while ((rval == MDI_SUCCESS) &&
2376 (npip != NULL));
2377 }
2378 }
2379
2380 if (vlun->svl_first_path) {
2381 VHCI_DEBUG(4, (CE_NOTE, NULL,
2382 "vhci_bind_transport: invalid first path 0x%p\n",
2383 (void *)vlun->svl_first_path));
2384 vlun->svl_first_path = NULL;
2385 }
2386 } else if (vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PRIN) {
2387 if ((vpkt->vpkt_state & VHCI_PKT_THRU_TASKQ) == 0) {
2388 if (!sema_tryp(&vlun->svl_pgr_sema))
2389 return (TRAN_BUSY);
2390 }
2391 pgr_sema_held = 1;
2392 }
2393
2394 /*
2395 * If the path is already bound for PKT_PARTIAL_DMA case,
2396 * try to use the same path.
2397 */
2398 if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) && vpkt->vpkt_path) {
2399 VHCI_DEBUG(4, (CE_NOTE, NULL,
2400 "vhci_bind_transport: PKT_PARTIAL_DMA "
2401 "vpkt 0x%p, path 0x%p\n",
2402 (void *)vpkt, (void *)vpkt->vpkt_path));
2403 pip = vpkt->vpkt_path;
2404 goto bind_path;
2405 }
2406
2407 /*
2408 * Get path_instance. Non-zero with FLAG_PKT_PATH_INSTANCE set
2409 * indicates that mdi_select_path should be called to select a
2410 * specific instance.
2411 *
2412 * NB: Condition pkt_path_instance reference on proper allocation.
2413 */
2414 if ((vpkt->vpkt_tgt_pkt->pkt_flags & FLAG_PKT_PATH_INSTANCE) &&
2415 scsi_pkt_allocated_correctly(vpkt->vpkt_tgt_pkt)) {
2416 path_instance = vpkt->vpkt_tgt_pkt->pkt_path_instance;
2417 }
2418
2419 /*
2420 * If reservation is active bind the transport directly to the pip
2421 * with the reservation.
2422 */
2423 if (vpkt->vpkt_hba_pkt == NULL) {
2424 if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
2425 if (MDI_PI_IS_ONLINE(vlun->svl_resrv_pip)) {
2426 pip = vlun->svl_resrv_pip;
2427 mdi_hold_path(pip);
2428 vlun->svl_waiting_for_activepath = 0;
2429 rval = MDI_SUCCESS;
2430 goto bind_path;
2431 } else {
2432 if (pgr_sema_held) {
2433 sema_v(&vlun->svl_pgr_sema);
2434 }
2435 return (TRAN_BUSY);
2436 }
2437 }
2438 try_again:
2439 rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp,
2440 path_instance ? MDI_SELECT_PATH_INSTANCE : 0,
2441 (void *)(intptr_t)path_instance, &pip);
2442 if (rval == MDI_BUSY) {
2443 if (pgr_sema_held) {
2444 sema_v(&vlun->svl_pgr_sema);
2445 }
2446 return (TRAN_BUSY);
2447 } else if (rval == MDI_DEVI_ONLINING) {
2448 /*
2449 * if we are here then we are in the midst of
2450 * an attach/probe of the client device.
2451 * We attempt to bind to ONLINE path if available,
2452 * else it is OK to bind to a STANDBY path (instead
2453 * of triggering a failover) because IO associated
2454 * with attach/probe (eg. INQUIRY, block 0 read)
2455 * are completed by targets even on passive paths
2456 * If no ONLINE paths available, it is important
2457 * to set svl_waiting_for_activepath for two
2458 * reasons: (1) avoid sense analysis in the
2459 * "external failure detection" codepath in
2460 * vhci_intr(). Failure to do so will result in
2461 * infinite loop (unless an ONLINE path becomes
2462 * available at some point) (2) avoid
2463 * unnecessary failover (see "---Waiting For Active
2464 * Path---" comment below).
2465 */
2466 VHCI_DEBUG(1, (CE_NOTE, NULL, "!%p in onlining "
2467 "state\n", (void *)cdip));
2468 pip = NULL;
2469 rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp,
2470 mps_flag, NULL, &pip);
2471 if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2472 if (vlun->svl_waiting_for_activepath == 0) {
2473 vlun->svl_waiting_for_activepath = 1;
2474 vlun->svl_wfa_time = gethrtime();
2475 }
2476 mps_flag |= MDI_SELECT_STANDBY_PATH;
2477 rval = mdi_select_path(cdip,
2478 vpkt->vpkt_tgt_init_bp,
2479 mps_flag, NULL, &pip);
2480 if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2481 if (pgr_sema_held) {
2482 sema_v(&vlun->svl_pgr_sema);
2483 }
2484 return (TRAN_FATAL_ERROR);
2485 }
2486 goto bind_path;
2487 }
2488 } else if ((rval == MDI_FAILURE) ||
2489 ((rval == MDI_NOPATH) && (path_instance))) {
2490 if (pgr_sema_held) {
2491 sema_v(&vlun->svl_pgr_sema);
2492 }
2493 return (TRAN_FATAL_ERROR);
2494 }
2495
2496 if ((pip == NULL) || (rval == MDI_NOPATH)) {
2497 while (vlun->svl_waiting_for_activepath) {
2498 /*
2499 * ---Waiting For Active Path---
2500 * This device was discovered across a
2501 * passive path; lets wait for a little
2502 * bit, hopefully an active path will
2503 * show up obviating the need for a
2504 * failover
2505 */
2506 if ((gethrtime() - vlun->svl_wfa_time) >=
2507 (60 * NANOSEC)) {
2508 vlun->svl_waiting_for_activepath = 0;
2509 } else {
2510 drv_usecwait(1000);
2511 if (vlun->svl_waiting_for_activepath
2512 == 0) {
2513 /*
2514 * an active path has come
2515 * online!
2516 */
2517 goto try_again;
2518 }
2519 }
2520 }
2521 VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
2522 if (!held) {
2523 VHCI_DEBUG(4, (CE_NOTE, NULL,
2524 "!Lun not held\n"));
2525 if (pgr_sema_held) {
2526 sema_v(&vlun->svl_pgr_sema);
2527 }
2528 return (TRAN_BUSY);
2529 }
2530 /*
2531 * now that the LUN is stable, one last check
2532 * to make sure no other changes sneaked in
2533 * (like a path coming online or a
2534 * failover initiated by another thread)
2535 */
2536 pip = NULL;
2537 rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp,
2538 0, NULL, &pip);
2539 if (pip != NULL) {
2540 VHCI_RELEASE_LUN(vlun);
2541 vlun->svl_waiting_for_activepath = 0;
2542 goto bind_path;
2543 }
2544
2545 /*
2546 * Check if there is an ONLINE path OR a STANDBY path
2547 * available. If none is available, do not attempt
2548 * to do a failover, just return a fatal error at this
2549 * point.
2550 */
2551 npip = NULL;
2552 rval = mdi_select_path(cdip, NULL,
2553 (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
2554 NULL, &npip);
2555 if ((npip == NULL) || (rval != MDI_SUCCESS)) {
2556 /*
2557 * No paths available, jus return FATAL error.
2558 */
2559 VHCI_RELEASE_LUN(vlun);
2560 if (pgr_sema_held) {
2561 sema_v(&vlun->svl_pgr_sema);
2562 }
2563 return (TRAN_FATAL_ERROR);
2564 }
2565 mdi_rele_path(npip);
2566 if (!(vpkt->vpkt_state & VHCI_PKT_IN_FAILOVER)) {
2567 VHCI_DEBUG(1, (CE_NOTE, NULL, "!invoking "
2568 "mdi_failover\n"));
2569 rval = mdi_failover(vhci->vhci_dip, cdip,
2570 MDI_FAILOVER_ASYNC);
2571 } else {
2572 rval = vlun->svl_failover_status;
2573 }
2574 if (rval == MDI_FAILURE) {
2575 VHCI_RELEASE_LUN(vlun);
2576 if (pgr_sema_held) {
2577 sema_v(&vlun->svl_pgr_sema);
2578 }
2579 return (TRAN_FATAL_ERROR);
2580 } else if (rval == MDI_BUSY) {
2581 VHCI_RELEASE_LUN(vlun);
2582 if (pgr_sema_held) {
2583 sema_v(&vlun->svl_pgr_sema);
2584 }
2585 return (TRAN_BUSY);
2586 } else {
2587 if (pgr_sema_held) {
2588 sema_v(&vlun->svl_pgr_sema);
2589 }
2590 vpkt->vpkt_state |= VHCI_PKT_IN_FAILOVER;
2591 return (TRAN_BUSY);
2592 }
2593 }
2594 vlun->svl_waiting_for_activepath = 0;
2595 bind_path:
2596 vpkt->vpkt_path = pip;
2597 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
2598 ASSERT(svp != NULL);
2599
2600 psd = svp->svp_psd;
2601 ASSERT(psd != NULL);
2602 address = &psd->sd_address;
2603 } else {
2604 pkt = vpkt->vpkt_hba_pkt;
2605 address = &pkt->pkt_address;
2606 }
2607
2608 /* Verify match of specified path_instance and selected path_instance */
2609 ASSERT((path_instance == 0) ||
2610 (path_instance == mdi_pi_get_path_instance(vpkt->vpkt_path)));
2611
2612 /*
2613 * For PKT_PARTIAL_DMA case, call pHCI's scsi_init_pkt whenever
2614 * target driver calls vhci_scsi_init_pkt.
2615 */
2616 if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) &&
2617 vpkt->vpkt_path && vpkt->vpkt_hba_pkt) {
2618 VHCI_DEBUG(4, (CE_NOTE, NULL,
2619 "vhci_bind_transport: PKT_PARTIAL_DMA "
2620 "vpkt 0x%p, path 0x%p hba_pkt 0x%p\n",
2621 (void *)vpkt, (void *)vpkt->vpkt_path, (void *)pkt));
2622 pkt = vpkt->vpkt_hba_pkt;
2623 address = &pkt->pkt_address;
2624 }
2625
2626 if (pkt == NULL || (vpkt->vpkt_flags & CFLAG_DMA_PARTIAL)) {
2627 pkt = scsi_init_pkt(address, pkt,
2628 vpkt->vpkt_tgt_init_bp, vpkt->vpkt_tgt_init_cdblen,
2629 vpkt->vpkt_tgt_init_scblen, 0, flags, func, NULL);
2630
2631 if (pkt == NULL) {
2632 VHCI_DEBUG(4, (CE_NOTE, NULL,
2633 "!bind transport: 0x%p 0x%p 0x%p\n",
2634 (void *)vhci, (void *)psd, (void *)vpkt));
2635 if ((vpkt->vpkt_hba_pkt == NULL) && vpkt->vpkt_path) {
2636 MDI_PI_ERRSTAT(vpkt->vpkt_path,
2637 MDI_PI_TRANSERR);
2638 mdi_rele_path(vpkt->vpkt_path);
2639 vpkt->vpkt_path = NULL;
2640 }
2641 if (pgr_sema_held) {
2642 sema_v(&vlun->svl_pgr_sema);
2643 }
2644 /*
2645 * Consider it a fatal error if b_error is
2646 * set as a result of DMA binding failure
2647 * vs. a condition of being temporarily out of
2648 * some resource
2649 */
2650 if (vpkt->vpkt_tgt_init_bp == NULL ||
2651 geterror(vpkt->vpkt_tgt_init_bp))
2652 return (TRAN_FATAL_ERROR);
2653 else
2654 return (TRAN_BUSY);
2655 }
2656 }
2657
2658 pkt->pkt_private = vpkt;
2659 vpkt->vpkt_hba_pkt = pkt;
2660 return (TRAN_ACCEPT);
2661 }
2662
2663
2664 /*PRINTFLIKE3*/
2665 void
2666 vhci_log(int level, dev_info_t *dip, const char *fmt, ...)
2667 {
2668 char buf[256];
2669 va_list ap;
2670
2671 va_start(ap, fmt);
2672 (void) vsprintf(buf, fmt, ap);
2673 va_end(ap);
2674
2675 scsi_log(dip, "scsi_vhci", level, buf);
2676 }
2677
2678 /* do a PGR out with the information we've saved away */
2679 static int
2680 vhci_do_prout(scsi_vhci_priv_t *svp)
2681 {
2682
2683 struct scsi_pkt *new_pkt;
2684 struct buf *bp;
2685 scsi_vhci_lun_t *vlun = svp->svp_svl;
2686 int rval, retry, nr_retry, ua_retry;
2687 uint8_t *sns, skey;
2688
2689 bp = getrbuf(KM_SLEEP);
2690 bp->b_flags = B_WRITE;
2691 bp->b_resid = 0;
2692 bp->b_un.b_addr = (caddr_t)&vlun->svl_prout;
2693 bp->b_bcount = vlun->svl_bcount;
2694
2695 VHCI_INCR_PATH_CMDCOUNT(svp);
2696
2697 new_pkt = scsi_init_pkt(&svp->svp_psd->sd_address, NULL, bp,
2698 CDB_GROUP1, sizeof (struct scsi_arq_status), 0, 0,
2699 SLEEP_FUNC, NULL);
2700 if (new_pkt == NULL) {
2701 VHCI_DECR_PATH_CMDCOUNT(svp);
2702 freerbuf(bp);
2703 cmn_err(CE_WARN, "!vhci_do_prout: scsi_init_pkt failed");
2704 return (0);
2705 }
2706 mutex_enter(&vlun->svl_mutex);
2707 bp->b_un.b_addr = (caddr_t)&vlun->svl_prout;
2708 bp->b_bcount = vlun->svl_bcount;
2709 bcopy(vlun->svl_cdb, new_pkt->pkt_cdbp,
2710 sizeof (vlun->svl_cdb));
2711 new_pkt->pkt_time = vlun->svl_time;
2712 mutex_exit(&vlun->svl_mutex);
2713 new_pkt->pkt_flags = FLAG_NOINTR;
2714
2715 ua_retry = nr_retry = retry = 0;
2716 again:
2717 rval = vhci_do_scsi_cmd(new_pkt);
2718 if (rval != 1) {
2719 if ((new_pkt->pkt_reason == CMD_CMPLT) &&
2720 (SCBP_C(new_pkt) == STATUS_CHECK) &&
2721 (new_pkt->pkt_state & STATE_ARQ_DONE)) {
2722 sns = (uint8_t *)
2723 &(((struct scsi_arq_status *)(uintptr_t)
2724 (new_pkt->pkt_scbp))->sts_sensedata);
2725 skey = scsi_sense_key(sns);
2726 if ((skey == KEY_UNIT_ATTENTION) ||
2727 (skey == KEY_NOT_READY)) {
2728 int max_retry;
2729 struct scsi_failover_ops *fops;
2730 fops = vlun->svl_fops;
2731 rval = fops->sfo_analyze_sense(svp->svp_psd,
2732 sns, vlun->svl_fops_ctpriv);
2733 if (rval == SCSI_SENSE_NOT_READY) {
2734 max_retry = vhci_prout_not_ready_retry;
2735 retry = nr_retry++;
2736 delay(1*drv_usectohz(1000000));
2737 } else {
2738 /* chk for state change and update */
2739 if (rval == SCSI_SENSE_STATE_CHANGED) {
2740 int held;
2741 VHCI_HOLD_LUN(vlun,
2742 VH_NOSLEEP, held);
2743 if (!held) {
2744 rval = TRAN_BUSY;
2745 } else {
2746 /* chk for alua first */
2747 vhci_update_pathstates(
2748 (void *)vlun);
2749 }
2750 }
2751 retry = ua_retry++;
2752 max_retry = VHCI_MAX_PGR_RETRIES;
2753 }
2754 if (retry < max_retry) {
2755 VHCI_DEBUG(4, (CE_WARN, NULL,
2756 "!vhci_do_prout retry 0x%x "
2757 "(0x%x 0x%x 0x%x)",
2758 SCBP_C(new_pkt),
2759 new_pkt->pkt_cdbp[0],
2760 new_pkt->pkt_cdbp[1],
2761 new_pkt->pkt_cdbp[2]));
2762 goto again;
2763 }
2764 rval = 0;
2765 VHCI_DEBUG(4, (CE_WARN, NULL,
2766 "!vhci_do_prout 0x%x "
2767 "(0x%x 0x%x 0x%x)",
2768 SCBP_C(new_pkt),
2769 new_pkt->pkt_cdbp[0],
2770 new_pkt->pkt_cdbp[1],
2771 new_pkt->pkt_cdbp[2]));
2772 } else if (skey == KEY_ILLEGAL_REQUEST)
2773 rval = VHCI_PGR_ILLEGALOP;
2774 }
2775 } else {
2776 rval = 1;
2777 }
2778 scsi_destroy_pkt(new_pkt);
2779 VHCI_DECR_PATH_CMDCOUNT(svp);
2780 freerbuf(bp);
2781 return (rval);
2782 }
2783
2784 static void
2785 vhci_run_cmd(void *arg)
2786 {
2787 struct scsi_pkt *pkt = (struct scsi_pkt *)arg;
2788 struct scsi_pkt *tpkt;
2789 scsi_vhci_priv_t *svp;
2790 mdi_pathinfo_t *pip, *npip;
2791 scsi_vhci_lun_t *vlun;
2792 dev_info_t *cdip;
2793 scsi_vhci_priv_t *nsvp;
2794 int fail = 0;
2795 int rval;
2796 struct vhci_pkt *vpkt;
2797 uchar_t cdb_1;
2798 vhci_prout_t *prout;
2799
2800 vpkt = (struct vhci_pkt *)pkt->pkt_private;
2801 tpkt = vpkt->vpkt_tgt_pkt;
2802 pip = vpkt->vpkt_path;
2803 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
2804 if (svp == NULL) {
2805 tpkt->pkt_reason = CMD_TRAN_ERR;
2806 tpkt->pkt_statistics = STAT_ABORTED;
2807 goto done;
2808 }
2809 vlun = svp->svp_svl;
2810 prout = &vlun->svl_prout;
2811 if (SCBP_C(pkt) != STATUS_GOOD)
2812 fail++;
2813 cdip = vlun->svl_dip;
2814 pip = npip = NULL;
2815 rval = mdi_select_path(cdip, NULL,
2816 MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH, NULL, &npip);
2817 if ((rval != MDI_SUCCESS) || (npip == NULL)) {
2818 VHCI_DEBUG(4, (CE_NOTE, NULL,
2819 "vhci_run_cmd: no path! 0x%p\n", (void *)svp));
2820 tpkt->pkt_reason = CMD_TRAN_ERR;
2821 tpkt->pkt_statistics = STAT_ABORTED;
2822 goto done;
2823 }
2824
2825 cdb_1 = vlun->svl_cdb[1];
2826 vlun->svl_cdb[1] &= 0xe0;
2827 vlun->svl_cdb[1] |= VHCI_PROUT_R_AND_IGNORE;
2828
2829 do {
2830 nsvp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(npip);
2831 if (nsvp == NULL) {
2832 VHCI_DEBUG(4, (CE_NOTE, NULL,
2833 "vhci_run_cmd: no "
2834 "client priv! 0x%p offlined?\n",
2835 (void *)npip));
2836 goto next_path;
2837 }
2838 if (vlun->svl_first_path == npip) {
2839 goto next_path;
2840 } else {
2841 if (vhci_do_prout(nsvp) != 1)
2842 fail++;
2843 }
2844 next_path:
2845 pip = npip;
2846 rval = mdi_select_path(cdip, NULL,
2847 MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH,
2848 pip, &npip);
2849 mdi_rele_path(pip);
2850 } while ((rval == MDI_SUCCESS) && (npip != NULL));
2851
2852 vlun->svl_cdb[1] = cdb_1;
2853
2854 if (fail) {
2855 VHCI_DEBUG(4, (CE_WARN, NULL, "%s%d: key registration failed, "
2856 "couldn't be replicated on all paths",
2857 ddi_driver_name(cdip), ddi_get_instance(cdip)));
2858 vhci_print_prout_keys(vlun, "vhci_run_cmd: ");
2859
2860 if (SCBP_C(pkt) != STATUS_GOOD) {
2861 tpkt->pkt_reason = CMD_TRAN_ERR;
2862 tpkt->pkt_statistics = STAT_ABORTED;
2863 }
2864 } else {
2865 vlun->svl_pgr_active = 1;
2866 vhci_print_prout_keys(vlun, "vhci_run_cmd: before bcopy:");
2867
2868 bcopy((const void *)prout->service_key,
2869 (void *)prout->active_service_key, MHIOC_RESV_KEY_SIZE);
2870 bcopy((const void *)prout->res_key,
2871 (void *)prout->active_res_key, MHIOC_RESV_KEY_SIZE);
2872
2873 vhci_print_prout_keys(vlun, "vhci_run_cmd: after bcopy:");
2874 }
2875 done:
2876 if (SCBP_C(pkt) == STATUS_GOOD)
2877 vlun->svl_first_path = NULL;
2878
2879 if (svp)
2880 VHCI_DECR_PATH_CMDCOUNT(svp);
2881
2882 if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
2883 scsi_destroy_pkt(pkt);
2884 vpkt->vpkt_hba_pkt = NULL;
2885 if (vpkt->vpkt_path) {
2886 mdi_rele_path(vpkt->vpkt_path);
2887 vpkt->vpkt_path = NULL;
2888 }
2889 }
2890
2891 sema_v(&vlun->svl_pgr_sema);
2892 /*
2893 * The PROUT commands are not included in the automatic retry
2894 * mechanism, therefore, vpkt_org_vpkt should never be set here.
2895 */
2896 ASSERT(vpkt->vpkt_org_vpkt == NULL);
2897 scsi_hba_pkt_comp(tpkt);
2898 }
2899
2900 /*
2901 * Get the keys registered with this target. Since we will have
2902 * registered the same key with multiple initiators, strip out
2903 * any duplicate keys.
2904 *
2905 * The pointers which will be used to filter the registered keys from
2906 * the device will be stored in filter_prin and filter_pkt. If the
2907 * allocation length of the buffer was sufficient for the number of
2908 * parameter data bytes available to be returned by the device then the
2909 * key filtering will use the keylist returned from the original
2910 * request. If the allocation length of the buffer was not sufficient,
2911 * then the filtering will use the keylist returned from the request
2912 * that is resent below.
2913 *
2914 * If the device returns an additional length field that is greater than
2915 * the allocation length of the buffer, then allocate a new buffer which
2916 * can accommodate the number of parameter data bytes available to be
2917 * returned. Resend the scsi PRIN command, filter out the duplicate
2918 * keys and return as many of the unique keys found that was originally
2919 * requested and set the additional length field equal to the data bytes
2920 * of unique reservation keys available to be returned.
2921 *
2922 * If the device returns an additional length field that is less than or
2923 * equal to the allocation length of the buffer, then all the available
2924 * keys registered were returned by the device. Filter out the
2925 * duplicate keys and return all of the unique keys found and set the
2926 * additional length field equal to the data bytes of the reservation
2927 * keys to be returned.
2928 */
2929
2930 #define VHCI_PRIN_HEADER_SZ (sizeof (prin->length) + sizeof (prin->generation))
2931
2932 static int
2933 vhci_do_prin(struct vhci_pkt **intr_vpkt)
2934 {
2935 scsi_vhci_priv_t *svp;
2936 struct vhci_pkt *vpkt = *intr_vpkt;
2937 vhci_prin_readkeys_t *prin;
2938 scsi_vhci_lun_t *vlun;
2939 struct scsi_vhci *vhci = ADDR2VHCI(&vpkt->vpkt_tgt_pkt->pkt_address);
2940
2941 struct buf *new_bp = NULL;
2942 struct scsi_pkt *new_pkt = NULL;
2943 struct vhci_pkt *new_vpkt = NULL;
2944 uint32_t needed_length;
2945 int rval = VHCI_CMD_CMPLT;
2946 uint32_t prin_length = 0;
2947 uint32_t svl_prin_length = 0;
2948
2949 ASSERT(vpkt->vpkt_path);
2950 svp = mdi_pi_get_vhci_private(vpkt->vpkt_path);
2951 ASSERT(svp);
2952 vlun = svp->svp_svl;
2953 ASSERT(vlun);
2954
2955 /*
2956 * If the caller only asked for an amount of data that would not
2957 * be enough to include any key data it is likely that they will
2958 * send the next command with a buffer size based on the information
2959 * from this header. Doing recovery on this would be a duplication
2960 * of efforts.
2961 */
2962 if (vpkt->vpkt_tgt_init_bp->b_bcount <= VHCI_PRIN_HEADER_SZ) {
2963 rval = VHCI_CMD_CMPLT;
2964 goto exit;
2965 }
2966
2967 if (vpkt->vpkt_org_vpkt == NULL) {
2968 /*
2969 * Can fail as sleep is not allowed.
2970 */
2971 prin = (vhci_prin_readkeys_t *)
2972 bp_mapin_common(vpkt->vpkt_tgt_init_bp, VM_NOSLEEP);
2973 } else {
2974 /*
2975 * The retry buf doesn't need to be mapped in.
2976 */
2977 prin = (vhci_prin_readkeys_t *)
2978 vpkt->vpkt_tgt_init_bp->b_un.b_daddr;
2979 }
2980
2981 if (prin == NULL) {
2982 VHCI_DEBUG(5, (CE_WARN, NULL,
2983 "vhci_do_prin: bp_mapin_common failed."));
2984 rval = VHCI_CMD_ERROR;
2985 goto fail;
2986 }
2987
2988 prin_length = BE_32(prin->length);
2989
2990 /*
2991 * According to SPC-3r22, sec 4.3.4.6: "If the amount of
2992 * information to be transferred exceeds the maximum value
2993 * that the ALLOCATION LENGTH field is capable of specifying,
2994 * the device server shall...terminate the command with CHECK
2995 * CONDITION status". The ALLOCATION LENGTH field of the
2996 * PERSISTENT RESERVE IN command is 2 bytes. We should never
2997 * get here with an ADDITIONAL LENGTH greater than 0xFFFF
2998 * so if we do, then it is an error!
2999 */
3000
3001
3002 if ((prin_length + VHCI_PRIN_HEADER_SZ) > 0xFFFF) {
3003 VHCI_DEBUG(5, (CE_NOTE, NULL,
3004 "vhci_do_prin: Device returned invalid "
3005 "length 0x%x\n", prin_length));
3006 rval = VHCI_CMD_ERROR;
3007 goto fail;
3008 }
3009 needed_length = prin_length + VHCI_PRIN_HEADER_SZ;
3010
3011 /*
3012 * If prin->length is greater than the byte count allocated in the
3013 * original buffer, then resend the request with enough buffer
3014 * allocated to get all of the available registered keys.
3015 */
3016 if ((vpkt->vpkt_tgt_init_bp->b_bcount < needed_length) &&
3017 (vpkt->vpkt_org_vpkt == NULL)) {
3018
3019 new_pkt = vhci_create_retry_pkt(vpkt);
3020 if (new_pkt == NULL) {
3021 rval = VHCI_CMD_ERROR;
3022 goto fail;
3023 }
3024 new_vpkt = TGTPKT2VHCIPKT(new_pkt);
3025
3026 /*
3027 * This is the buf with buffer pointer
3028 * where the prin readkeys will be
3029 * returned from the device
3030 */
3031 new_bp = scsi_alloc_consistent_buf(&svp->svp_psd->sd_address,
3032 NULL, needed_length, B_READ, NULL_FUNC, NULL);
3033 if ((new_bp == NULL) || (new_bp->b_un.b_addr == NULL)) {
3034 if (new_bp) {
3035 scsi_free_consistent_buf(new_bp);
3036 }
3037 vhci_scsi_destroy_pkt(&new_pkt->pkt_address, new_pkt);
3038 rval = VHCI_CMD_ERROR;
3039 goto fail;
3040 }
3041 new_bp->b_bcount = needed_length;
3042 new_pkt->pkt_cdbp[7] = (uchar_t)(needed_length >> 8);
3043 new_pkt->pkt_cdbp[8] = (uchar_t)needed_length;
3044
3045 rval = VHCI_CMD_RETRY;
3046
3047 new_vpkt->vpkt_tgt_init_bp = new_bp;
3048 }
3049
3050 if (rval == VHCI_CMD_RETRY) {
3051
3052 /*
3053 * There were more keys then the original request asked for.
3054 */
3055 mdi_pathinfo_t *path_holder = vpkt->vpkt_path;
3056
3057 /*
3058 * Release the old path because it does not matter which path
3059 * this command is sent down. This allows the normal bind
3060 * transport mechanism to be used.
3061 */
3062 if (vpkt->vpkt_path != NULL) {
3063 mdi_rele_path(vpkt->vpkt_path);
3064 vpkt->vpkt_path = NULL;
3065 }
3066
3067 /*
3068 * Dispatch the retry command
3069 */
3070 if (taskq_dispatch(vhci->vhci_taskq, vhci_dispatch_scsi_start,
3071 (void *) new_vpkt, KM_NOSLEEP) == NULL) {
3072 if (path_holder) {
3073 vpkt->vpkt_path = path_holder;
3074 mdi_hold_path(path_holder);
3075 }
3076 scsi_free_consistent_buf(new_bp);
3077 vhci_scsi_destroy_pkt(&new_pkt->pkt_address, new_pkt);
3078 rval = VHCI_CMD_ERROR;
3079 goto fail;
3080 }
3081
3082 /*
3083 * If we return VHCI_CMD_RETRY, that means the caller
3084 * is going to bail and wait for the reissued command
3085 * to complete. In that case, we need to decrement
3086 * the path command count right now. In any other
3087 * case, it'll be decremented by the caller.
3088 */
3089 VHCI_DECR_PATH_CMDCOUNT(svp);
3090 goto exit;
3091
3092 }
3093
3094 if (rval == VHCI_CMD_CMPLT) {
3095 /*
3096 * The original request got all of the keys or the recovery
3097 * packet returns.
3098 */
3099 int new;
3100 int old;
3101 int num_keys = prin_length / MHIOC_RESV_KEY_SIZE;
3102
3103 VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_do_prin: %d keys read\n",
3104 num_keys));
3105
3106 #ifdef DEBUG
3107 VHCI_DEBUG(5, (CE_NOTE, NULL, "vhci_do_prin: from storage\n"));
3108 if (vhci_debug == 5)
3109 vhci_print_prin_keys(prin, num_keys);
3110 VHCI_DEBUG(5, (CE_NOTE, NULL,
3111 "vhci_do_prin: MPxIO old keys:\n"));
3112 if (vhci_debug == 5)
3113 vhci_print_prin_keys(&vlun->svl_prin, num_keys);
3114 #endif
3115
3116 /*
3117 * Filter out all duplicate keys returned from the device
3118 * We know that we use a different key for every host, so we
3119 * can simply strip out duplicates. Otherwise we would need to
3120 * do more bookkeeping to figure out which keys to strip out.
3121 */
3122
3123 new = 0;
3124
3125 /*
3126 * If we got at least 1 key copy it.
3127 */
3128 if (num_keys > 0) {
3129 vlun->svl_prin.keylist[0] = prin->keylist[0];
3130 new++;
3131 }
3132
3133 /*
3134 * find next unique key.
3135 */
3136 for (old = 1; old < num_keys; old++) {
3137 int j;
3138 int match = 0;
3139
3140 if (new >= VHCI_NUM_RESV_KEYS)
3141 break;
3142 for (j = 0; j < new; j++) {
3143 if (bcmp(&prin->keylist[old],
3144 &vlun->svl_prin.keylist[j],
3145 sizeof (mhioc_resv_key_t)) == 0) {
3146 match = 1;
3147 break;
3148 }
3149 }
3150 if (!match) {
3151 vlun->svl_prin.keylist[new] =
3152 prin->keylist[old];
3153 new++;
3154 }
3155 }
3156
3157 /* Stored Big Endian */
3158 vlun->svl_prin.generation = prin->generation;
3159 svl_prin_length = new * sizeof (mhioc_resv_key_t);
3160 /* Stored Big Endian */
3161 vlun->svl_prin.length = BE_32(svl_prin_length);
3162 svl_prin_length += VHCI_PRIN_HEADER_SZ;
3163
3164 /*
3165 * If we arrived at this point after issuing a retry, make sure
3166 * that we put everything back the way it originally was so
3167 * that the target driver can complete the command correctly.
3168 */
3169 if (vpkt->vpkt_org_vpkt != NULL) {
3170 new_bp = vpkt->vpkt_tgt_init_bp;
3171
3172 scsi_free_consistent_buf(new_bp);
3173
3174 vpkt = vhci_sync_retry_pkt(vpkt);
3175 *intr_vpkt = vpkt;
3176
3177 /*
3178 * Make sure the original buffer is mapped into kernel
3179 * space before we try to copy the filtered keys into
3180 * it.
3181 */
3182 prin = (vhci_prin_readkeys_t *)bp_mapin_common(
3183 vpkt->vpkt_tgt_init_bp, VM_NOSLEEP);
3184 }
3185
3186 /*
3187 * Now copy the desired number of prin keys into the original
3188 * target buffer.
3189 */
3190 if (svl_prin_length <= vpkt->vpkt_tgt_init_bp->b_bcount) {
3191 /*
3192 * It is safe to return all of the available unique
3193 * keys
3194 */
3195 bcopy(&vlun->svl_prin, prin, svl_prin_length);
3196 } else {
3197 /*
3198 * Not all of the available keys were requested by the
3199 * original command.
3200 */
3201 bcopy(&vlun->svl_prin, prin,
3202 vpkt->vpkt_tgt_init_bp->b_bcount);
3203 }
3204 #ifdef DEBUG
3205 VHCI_DEBUG(5, (CE_NOTE, NULL,
3206 "vhci_do_prin: To Application:\n"));
3207 if (vhci_debug == 5)
3208 vhci_print_prin_keys(prin, new);
3209 VHCI_DEBUG(5, (CE_NOTE, NULL,
3210 "vhci_do_prin: MPxIO new keys:\n"));
3211 if (vhci_debug == 5)
3212 vhci_print_prin_keys(&vlun->svl_prin, new);
3213 #endif
3214 }
3215 fail:
3216 if (rval == VHCI_CMD_ERROR) {
3217 /*
3218 * If we arrived at this point after issuing a
3219 * retry, make sure that we put everything back
3220 * the way it originally was so that ssd can
3221 * complete the command correctly.
3222 */
3223
3224 if (vpkt->vpkt_org_vpkt != NULL) {
3225 new_bp = vpkt->vpkt_tgt_init_bp;
3226 if (new_bp != NULL) {
3227 scsi_free_consistent_buf(new_bp);
3228 }
3229
3230 new_vpkt = vpkt;
3231 vpkt = vpkt->vpkt_org_vpkt;
3232
3233 vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
3234 new_vpkt->vpkt_tgt_pkt);
3235 }
3236
3237 /*
3238 * Mark this command completion as having an error so that
3239 * ssd will retry the command.
3240 */
3241
3242 vpkt->vpkt_tgt_pkt->pkt_reason = CMD_ABORTED;
3243 vpkt->vpkt_tgt_pkt->pkt_statistics |= STAT_ABORTED;
3244
3245 rval = VHCI_CMD_CMPLT;
3246 }
3247 exit:
3248 /*
3249 * Make sure that the semaphore is only released once.
3250 */
3251 if (rval == VHCI_CMD_CMPLT) {
3252 sema_v(&vlun->svl_pgr_sema);
3253 }
3254
3255 return (rval);
3256 }
3257
3258 static void
3259 vhci_intr(struct scsi_pkt *pkt)
3260 {
3261 struct vhci_pkt *vpkt = (struct vhci_pkt *)pkt->pkt_private;
3262 struct scsi_pkt *tpkt;
3263 scsi_vhci_priv_t *svp;
3264 scsi_vhci_lun_t *vlun;
3265 int rval, held;
3266 struct scsi_failover_ops *fops;
3267 uint8_t *sns, skey, asc, ascq;
3268 mdi_pathinfo_t *lpath;
3269 static char *timeout_err = "Command Timeout";
3270 static char *parity_err = "Parity Error";
3271 char *err_str = NULL;
3272 dev_info_t *vdip, *cdip;
3273 char *cpath;
3274
3275 ASSERT(vpkt != NULL);
3276 tpkt = vpkt->vpkt_tgt_pkt;
3277 ASSERT(tpkt != NULL);
3278 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(vpkt->vpkt_path);
3279 ASSERT(svp != NULL);
3280 vlun = svp->svp_svl;
3281 ASSERT(vlun != NULL);
3282 lpath = vpkt->vpkt_path;
3283
3284 /*
3285 * sync up the target driver's pkt with the pkt that
3286 * we actually used
3287 */
3288 *(tpkt->pkt_scbp) = *(pkt->pkt_scbp);
3289 tpkt->pkt_resid = pkt->pkt_resid;
3290 tpkt->pkt_state = pkt->pkt_state;
3291 tpkt->pkt_statistics = pkt->pkt_statistics;
3292 tpkt->pkt_reason = pkt->pkt_reason;
3293 tpkt->pkt_start = pkt->pkt_start;
3294 tpkt->pkt_stop = pkt->pkt_stop;
3295
3296 /* Return path_instance information back to the target driver. */
3297 if (scsi_pkt_allocated_correctly(tpkt)) {
3298 if (scsi_pkt_allocated_correctly(pkt)) {
3299 /*
3300 * If both packets were correctly allocated,
3301 * return path returned by pHCI.
3302 */
3303 tpkt->pkt_path_instance = pkt->pkt_path_instance;
3304 } else {
3305 /* Otherwise return path of pHCI we used */
3306 tpkt->pkt_path_instance =
3307 mdi_pi_get_path_instance(lpath);
3308 }
3309 }
3310
3311 if (pkt->pkt_cdbp[0] == SCMD_PROUT &&
3312 ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
3313 ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE)) {
3314 if ((SCBP_C(pkt) != STATUS_GOOD) ||
3315 (pkt->pkt_reason != CMD_CMPLT)) {
3316 sema_v(&vlun->svl_pgr_sema);
3317 }
3318 } else if (pkt->pkt_cdbp[0] == SCMD_PRIN) {
3319 if (pkt->pkt_reason != CMD_CMPLT ||
3320 (SCBP_C(pkt) != STATUS_GOOD)) {
3321 sema_v(&vlun->svl_pgr_sema);
3322 }
3323 }
3324
3325 switch (pkt->pkt_reason) {
3326 case CMD_CMPLT:
3327 /*
3328 * cmd completed successfully, check for scsi errors
3329 */
3330 switch (*(pkt->pkt_scbp)) {
3331 case STATUS_CHECK:
3332 if (pkt->pkt_state & STATE_ARQ_DONE) {
3333 sns = (uint8_t *)
3334 &(((struct scsi_arq_status *)(uintptr_t)
3335 (pkt->pkt_scbp))->sts_sensedata);
3336 skey = scsi_sense_key(sns);
3337 asc = scsi_sense_asc(sns);
3338 ascq = scsi_sense_ascq(sns);
3339 fops = vlun->svl_fops;
3340 ASSERT(fops != NULL);
3341 VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_intr: "
3342 "Received sns key %x esc %x escq %x\n",
3343 skey, asc, ascq));
3344
3345 if (vlun->svl_waiting_for_activepath == 1) {
3346 /*
3347 * if we are here it means we are
3348 * in the midst of a probe/attach
3349 * through a passive path; this
3350 * case is exempt from sense analysis
3351 * for detection of ext. failover
3352 * because that would unnecessarily
3353 * increase attach time.
3354 */
3355 bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3356 vpkt->vpkt_tgt_init_scblen);
3357 break;
3358 }
3359 if (asc == VHCI_SCSI_PERR) {
3360 /*
3361 * parity error
3362 */
3363 err_str = parity_err;
3364 bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3365 vpkt->vpkt_tgt_init_scblen);
3366 break;
3367 }
3368 rval = fops->sfo_analyze_sense(svp->svp_psd,
3369 sns, vlun->svl_fops_ctpriv);
3370 if ((rval == SCSI_SENSE_NOFAILOVER) ||
3371 (rval == SCSI_SENSE_UNKNOWN) ||
3372 (rval == SCSI_SENSE_NOT_READY)) {
3373 bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3374 vpkt->vpkt_tgt_init_scblen);
3375 break;
3376 } else if (rval == SCSI_SENSE_STATE_CHANGED) {
3377 struct scsi_vhci *vhci;
3378 vhci = ADDR2VHCI(&tpkt->pkt_address);
3379 VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
3380 if (!held) {
3381 /*
3382 * looks like some other thread
3383 * has already detected this
3384 * condition
3385 */
3386 tpkt->pkt_state &=
3387 ~STATE_ARQ_DONE;
3388 *(tpkt->pkt_scbp) =
3389 STATUS_BUSY;
3390 break;
3391 }
3392 (void) taskq_dispatch(
3393 vhci->vhci_update_pathstates_taskq,
3394 vhci_update_pathstates,
3395 (void *)vlun, KM_SLEEP);
3396 } else {
3397 /*
3398 * externally initiated failover
3399 * has occurred or is in progress
3400 */
3401 VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
3402 if (!held) {
3403 /*
3404 * looks like some other thread
3405 * has already detected this
3406 * condition
3407 */
3408 tpkt->pkt_state &=
3409 ~STATE_ARQ_DONE;
3410 *(tpkt->pkt_scbp) =
3411 STATUS_BUSY;
3412 break;
3413 } else {
3414 rval = vhci_handle_ext_fo
3415 (pkt, rval);
3416 if (rval == BUSY_RETURN) {
3417 tpkt->pkt_state &=
3418 ~STATE_ARQ_DONE;
3419 *(tpkt->pkt_scbp) =
3420 STATUS_BUSY;
3421 break;
3422 }
3423 bcopy(pkt->pkt_scbp,
3424 tpkt->pkt_scbp,
3425 vpkt->vpkt_tgt_init_scblen);
3426 break;
3427 }
3428 }
3429 }
3430 break;
3431
3432 /*
3433 * If this is a good SCSI-II RELEASE cmd completion then restore
3434 * the load balancing policy and reset VLUN_RESERVE_ACTIVE_FLG.
3435 * If this is a good SCSI-II RESERVE cmd completion then set
3436 * VLUN_RESERVE_ACTIVE_FLG.
3437 */
3438 case STATUS_GOOD:
3439 if ((pkt->pkt_cdbp[0] == SCMD_RELEASE) ||
3440 (pkt->pkt_cdbp[0] == SCMD_RELEASE_G1)) {
3441 (void) mdi_set_lb_policy(vlun->svl_dip,
3442 vlun->svl_lb_policy_save);
3443 vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
3444 VHCI_DEBUG(1, (CE_WARN, NULL,
3445 "!vhci_intr: vlun 0x%p release path 0x%p",
3446 (void *)vlun, (void *)vpkt->vpkt_path));
3447 }
3448
3449 if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3450 (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3451 vlun->svl_flags |= VLUN_RESERVE_ACTIVE_FLG;
3452 vlun->svl_resrv_pip = vpkt->vpkt_path;
3453 VHCI_DEBUG(1, (CE_WARN, NULL,
3454 "!vhci_intr: vlun 0x%p reserved path 0x%p",
3455 (void *)vlun, (void *)vpkt->vpkt_path));
3456 }
3457 break;
3458
3459 case STATUS_RESERVATION_CONFLICT:
3460 VHCI_DEBUG(1, (CE_WARN, NULL,
3461 "!vhci_intr: vlun 0x%p "
3462 "reserve conflict on path 0x%p",
3463 (void *)vlun, (void *)vpkt->vpkt_path));
3464 /* FALLTHROUGH */
3465 default:
3466 break;
3467 }
3468
3469 /*
3470 * Update I/O completion statistics for the path
3471 */
3472 mdi_pi_kstat_iosupdate(vpkt->vpkt_path, vpkt->vpkt_tgt_init_bp);
3473
3474 /*
3475 * Command completed successfully, release the dma binding and
3476 * destroy the transport side of the packet.
3477 */
3478 if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
3479 (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
3480 ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
3481 if (SCBP_C(pkt) == STATUS_GOOD) {
3482 ASSERT(vlun->svl_taskq);
3483 svp->svp_last_pkt_reason = pkt->pkt_reason;
3484 (void) taskq_dispatch(vlun->svl_taskq,
3485 vhci_run_cmd, pkt, KM_SLEEP);
3486 return;
3487 }
3488 }
3489 if ((SCBP_C(pkt) == STATUS_GOOD) &&
3490 (pkt->pkt_cdbp[0] == SCMD_PRIN) && vpkt->vpkt_tgt_init_bp) {
3491 /*
3492 * If the action (value in byte 1 of the cdb) is zero,
3493 * we're reading keys, and that's the only condition
3494 * where we need to be concerned with filtering keys
3495 * and potential retries. Otherwise, we simply signal
3496 * the semaphore and move on.
3497 */
3498 if (pkt->pkt_cdbp[1] == 0) {
3499 /*
3500 * If this is the completion of an internal
3501 * retry then we need to make sure that the
3502 * pkt and tpkt pointers are readjusted so
3503 * the calls to scsi_destroy_pkt and pkt_comp
3504 * below work * correctly.
3505 */
3506 if (vpkt->vpkt_org_vpkt != NULL) {
3507 pkt = vpkt->vpkt_org_vpkt->vpkt_hba_pkt;
3508 tpkt = vpkt->vpkt_org_vpkt->
3509 vpkt_tgt_pkt;
3510
3511 /*
3512 * If this command was issued through
3513 * the taskq then we need to clear
3514 * this flag for proper processing in
3515 * the case of a retry from the target
3516 * driver.
3517 */
3518 vpkt->vpkt_state &=
3519 ~VHCI_PKT_THRU_TASKQ;
3520 }
3521
3522 /*
3523 * if vhci_do_prin returns VHCI_CMD_CMPLT then
3524 * vpkt will contain the address of the
3525 * original vpkt
3526 */
3527 if (vhci_do_prin(&vpkt) == VHCI_CMD_RETRY) {
3528 /*
3529 * The command has been resent to get
3530 * all the keys from the device. Don't
3531 * complete the command with ssd until
3532 * the retry completes.
3533 */
3534 return;
3535 }
3536 } else {
3537 sema_v(&vlun->svl_pgr_sema);
3538 }
3539 }
3540
3541 break;
3542
3543 case CMD_TIMEOUT:
3544 if ((pkt->pkt_statistics &
3545 (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
3546
3547 VHCI_DEBUG(1, (CE_NOTE, NULL,
3548 "!scsi vhci timeout invoked\n"));
3549
3550 (void) vhci_recovery_reset(vlun, &pkt->pkt_address,
3551 FALSE, VHCI_DEPTH_ALL);
3552 }
3553 MDI_PI_ERRSTAT(lpath, MDI_PI_TRANSERR);
3554 tpkt->pkt_statistics |= STAT_ABORTED;
3555 err_str = timeout_err;
3556 break;
3557
3558 case CMD_TRAN_ERR:
3559 /*
3560 * This status is returned if the transport has sent the cmd
3561 * down the link to the target and then some error occurs.
3562 * In case of SCSI-II RESERVE cmd, we don't know if the
3563 * reservation been accepted by the target or not, so we need
3564 * to clear the reservation.
3565 */
3566 if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3567 (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3568 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_intr received"
3569 " cmd_tran_err for scsi-2 reserve cmd\n"));
3570 if (!vhci_recovery_reset(vlun, &pkt->pkt_address,
3571 TRUE, VHCI_DEPTH_TARGET)) {
3572 VHCI_DEBUG(1, (CE_WARN, NULL,
3573 "!vhci_intr cmd_tran_err reset failed!"));
3574 }
3575 }
3576 break;
3577
3578 case CMD_DEV_GONE:
3579 /*
3580 * If this is the last path then report CMD_DEV_GONE to the
3581 * target driver, otherwise report BUSY to triggger retry.
3582 */
3583 if (vlun->svl_dip &&
3584 (mdi_client_get_path_count(vlun->svl_dip) <= 1)) {
3585 struct scsi_vhci *vhci;
3586 vhci = ADDR2VHCI(&tpkt->pkt_address);
3587 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_intr received "
3588 "cmd_dev_gone on last path\n"));
3589 (void) vhci_invalidate_mpapi_lu(vhci, vlun);
3590 break;
3591 }
3592
3593 /* Report CMD_CMPLT-with-BUSY to cause retry. */
3594 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_intr received "
3595 "cmd_dev_gone\n"));
3596 tpkt->pkt_reason = CMD_CMPLT;
3597 tpkt->pkt_state = STATE_GOT_BUS |
3598 STATE_GOT_TARGET | STATE_SENT_CMD |
3599 STATE_GOT_STATUS;
3600 *(tpkt->pkt_scbp) = STATUS_BUSY;
3601 break;
3602
3603 default:
3604 break;
3605 }
3606
3607 /*
3608 * SCSI-II RESERVE cmd has been serviced by the lower layers clear
3609 * the flag so the lun is not QUIESCED any longer.
3610 * Also clear the VHCI_PKT_THRU_TASKQ flag, to ensure that if this pkt
3611 * is retried, a taskq shall again be dispatched to service it. Else
3612 * it may lead to a system hang if the retry is within interrupt
3613 * context.
3614 */
3615 if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3616 (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3617 vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
3618 vpkt->vpkt_state &= ~VHCI_PKT_THRU_TASKQ;
3619 }
3620
3621 /*
3622 * vpkt_org_vpkt should always be NULL here if the retry command
3623 * has been successfully processed. If vpkt_org_vpkt != NULL at
3624 * this point, it is an error so restore the original vpkt and
3625 * return an error to the target driver so it can retry the
3626 * command as appropriate.
3627 */
3628 if (vpkt->vpkt_org_vpkt != NULL) {
3629 struct vhci_pkt *new_vpkt = vpkt;
3630 vpkt = vpkt->vpkt_org_vpkt;
3631
3632 vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
3633 new_vpkt->vpkt_tgt_pkt);
3634
3635 /*
3636 * Mark this command completion as having an error so that
3637 * ssd will retry the command.
3638 */
3639 vpkt->vpkt_tgt_pkt->pkt_reason = CMD_ABORTED;
3640 vpkt->vpkt_tgt_pkt->pkt_statistics |= STAT_ABORTED;
3641
3642 pkt = vpkt->vpkt_hba_pkt;
3643 tpkt = vpkt->vpkt_tgt_pkt;
3644 }
3645
3646 if ((err_str != NULL) && (pkt->pkt_reason !=
3647 svp->svp_last_pkt_reason)) {
3648 cdip = vlun->svl_dip;
3649 vdip = ddi_get_parent(cdip);
3650 cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3651 vhci_log(CE_WARN, vdip, "!%s (%s%d): %s on path %s",
3652 ddi_pathname(cdip, cpath), ddi_driver_name(cdip),
3653 ddi_get_instance(cdip), err_str,
3654 mdi_pi_spathname(vpkt->vpkt_path));
3655 kmem_free(cpath, MAXPATHLEN);
3656 }
3657 svp->svp_last_pkt_reason = pkt->pkt_reason;
3658 VHCI_DECR_PATH_CMDCOUNT(svp);
3659
3660 /*
3661 * For PARTIAL_DMA, vhci should not free the path.
3662 * Target driver will call into vhci_scsi_dmafree or
3663 * destroy pkt to release this path.
3664 */
3665 if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
3666 scsi_destroy_pkt(pkt);
3667 vpkt->vpkt_hba_pkt = NULL;
3668 if (vpkt->vpkt_path) {
3669 mdi_rele_path(vpkt->vpkt_path);
3670 vpkt->vpkt_path = NULL;
3671 }
3672 }
3673
3674 scsi_hba_pkt_comp(tpkt);
3675 }
3676
3677 /*
3678 * two possibilities: (1) failover has completed
3679 * or (2) is in progress; update our path states for
3680 * the former case; for the latter case,
3681 * initiate a scsi_watch request to
3682 * determine when failover completes - vlun is HELD
3683 * until failover completes; BUSY is returned to upper
3684 * layer in both the cases
3685 */
3686 static int
3687 vhci_handle_ext_fo(struct scsi_pkt *pkt, int fostat)
3688 {
3689 struct vhci_pkt *vpkt = (struct vhci_pkt *)pkt->pkt_private;
3690 struct scsi_pkt *tpkt;
3691 scsi_vhci_priv_t *svp;
3692 scsi_vhci_lun_t *vlun;
3693 struct scsi_vhci *vhci;
3694 scsi_vhci_swarg_t *swarg;
3695 char *path;
3696
3697 ASSERT(vpkt != NULL);
3698 tpkt = vpkt->vpkt_tgt_pkt;
3699 ASSERT(tpkt != NULL);
3700 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(vpkt->vpkt_path);
3701 ASSERT(svp != NULL);
3702 vlun = svp->svp_svl;
3703 ASSERT(vlun != NULL);
3704 ASSERT(VHCI_LUN_IS_HELD(vlun));
3705
3706 vhci = ADDR2VHCI(&tpkt->pkt_address);
3707
3708 if (fostat == SCSI_SENSE_INACTIVE) {
3709 VHCI_DEBUG(1, (CE_NOTE, NULL, "!Failover "
3710 "detected for %s; updating path states...\n",
3711 vlun->svl_lun_wwn));
3712 /*
3713 * set the vlun flag to indicate to the task that the target
3714 * port group needs updating
3715 */
3716 vlun->svl_flags |= VLUN_UPDATE_TPG;
3717 (void) taskq_dispatch(vhci->vhci_update_pathstates_taskq,
3718 vhci_update_pathstates, (void *)vlun, KM_SLEEP);
3719 } else {
3720 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3721 vhci_log(CE_NOTE, ddi_get_parent(vlun->svl_dip),
3722 "!%s (%s%d): Waiting for externally initiated failover "
3723 "to complete", ddi_pathname(vlun->svl_dip, path),
3724 ddi_driver_name(vlun->svl_dip),
3725 ddi_get_instance(vlun->svl_dip));
3726 kmem_free(path, MAXPATHLEN);
3727 swarg = kmem_alloc(sizeof (*swarg), KM_NOSLEEP);
3728 if (swarg == NULL) {
3729 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_handle_ext_fo: "
3730 "request packet allocation for %s failed....\n",
3731 vlun->svl_lun_wwn));
3732 VHCI_RELEASE_LUN(vlun);
3733 return (PKT_RETURN);
3734 }
3735 swarg->svs_svp = svp;
3736 swarg->svs_tos = gethrtime();
3737 swarg->svs_pi = vpkt->vpkt_path;
3738 swarg->svs_release_lun = 0;
3739 swarg->svs_done = 0;
3740 /*
3741 * place a hold on the path...we don't want it to
3742 * vanish while scsi_watch is in progress
3743 */
3744 mdi_hold_path(vpkt->vpkt_path);
3745 svp->svp_sw_token = scsi_watch_request_submit(svp->svp_psd,
3746 VHCI_FOWATCH_INTERVAL, SENSE_LENGTH, vhci_efo_watch_cb,
3747 (caddr_t)swarg);
3748 }
3749 return (BUSY_RETURN);
3750 }
3751
3752 /*
3753 * vhci_efo_watch_cb:
3754 * Callback from scsi_watch request to check the failover status.
3755 * Completion is either due to successful failover or timeout.
3756 * Upon successful completion, vhci_update_path_states is called.
3757 * For timeout condition, vhci_efo_done is called.
3758 * Always returns 0 to scsi_watch to keep retrying till vhci_efo_done
3759 * terminates this request properly in a separate thread.
3760 */
3761
3762 static int
3763 vhci_efo_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
3764 {
3765 struct scsi_status *statusp = resultp->statusp;
3766 uint8_t *sensep = (uint8_t *)resultp->sensep;
3767 struct scsi_pkt *pkt = resultp->pkt;
3768 scsi_vhci_swarg_t *swarg;
3769 scsi_vhci_priv_t *svp;
3770 scsi_vhci_lun_t *vlun;
3771 struct scsi_vhci *vhci;
3772 dev_info_t *vdip;
3773 int rval, updt_paths;
3774
3775 swarg = (scsi_vhci_swarg_t *)(uintptr_t)arg;
3776 svp = swarg->svs_svp;
3777 if (swarg->svs_done) {
3778 /*
3779 * Already completed failover or timedout.
3780 * Waiting for vhci_efo_done to terminate this scsi_watch.
3781 */
3782 return (0);
3783 }
3784
3785 ASSERT(svp != NULL);
3786 vlun = svp->svp_svl;
3787 ASSERT(vlun != NULL);
3788 ASSERT(VHCI_LUN_IS_HELD(vlun));
3789 vlun->svl_efo_update_path = 0;
3790 vdip = ddi_get_parent(vlun->svl_dip);
3791 vhci = ddi_get_soft_state(vhci_softstate,
3792 ddi_get_instance(vdip));
3793
3794 updt_paths = 0;
3795
3796 if (pkt->pkt_reason != CMD_CMPLT) {
3797 if ((gethrtime() - swarg->svs_tos) >= VHCI_EXTFO_TIMEOUT) {
3798 swarg->svs_release_lun = 1;
3799 goto done;
3800 }
3801 return (0);
3802 }
3803 if (*((unsigned char *)statusp) == STATUS_CHECK) {
3804 rval = vlun->svl_fops->sfo_analyze_sense(svp->svp_psd, sensep,
3805 vlun->svl_fops_ctpriv);
3806 switch (rval) {
3807 /*
3808 * Only update path states in case path is definitely
3809 * inactive, or no failover occurred. For all other
3810 * check conditions continue pinging. A unexpected
3811 * check condition shouldn't cause pinging to complete
3812 * prematurely.
3813 */
3814 case SCSI_SENSE_INACTIVE:
3815 case SCSI_SENSE_NOFAILOVER:
3816 updt_paths = 1;
3817 break;
3818 default:
3819 if ((gethrtime() - swarg->svs_tos)
3820 >= VHCI_EXTFO_TIMEOUT) {
3821 swarg->svs_release_lun = 1;
3822 goto done;
3823 }
3824 return (0);
3825 }
3826 } else if (*((unsigned char *)statusp) ==
3827 STATUS_RESERVATION_CONFLICT) {
3828 updt_paths = 1;
3829 } else if ((*((unsigned char *)statusp)) &
3830 (STATUS_BUSY | STATUS_QFULL)) {
3831 return (0);
3832 }
3833 if ((*((unsigned char *)statusp) == STATUS_GOOD) ||
3834 (updt_paths == 1)) {
3835 /*
3836 * we got here because we had detected an
3837 * externally initiated failover; things
3838 * have settled down now, so let's
3839 * start up a task to update the
3840 * path states and target port group
3841 */
3842 vlun->svl_efo_update_path = 1;
3843 swarg->svs_done = 1;
3844 vlun->svl_swarg = swarg;
3845 vlun->svl_flags |= VLUN_UPDATE_TPG;
3846 (void) taskq_dispatch(vhci->vhci_update_pathstates_taskq,
3847 vhci_update_pathstates, (void *)vlun,
3848 KM_SLEEP);
3849 return (0);
3850 }
3851 if ((gethrtime() - swarg->svs_tos) >= VHCI_EXTFO_TIMEOUT) {
3852 swarg->svs_release_lun = 1;
3853 goto done;
3854 }
3855 return (0);
3856 done:
3857 swarg->svs_done = 1;
3858 (void) taskq_dispatch(vhci->vhci_taskq,
3859 vhci_efo_done, (void *)swarg, KM_SLEEP);
3860 return (0);
3861 }
3862
3863 /*
3864 * vhci_efo_done:
3865 * cleanly terminates scsi_watch and free up resources.
3866 * Called as taskq function in vhci_efo_watch_cb for EFO timeout condition
3867 * or by vhci_update_path_states invoked during external initiated
3868 * failover completion.
3869 */
3870 static void
3871 vhci_efo_done(void *arg)
3872 {
3873 scsi_vhci_lun_t *vlun;
3874 scsi_vhci_swarg_t *swarg = (scsi_vhci_swarg_t *)arg;
3875 scsi_vhci_priv_t *svp = swarg->svs_svp;
3876 ASSERT(svp);
3877
3878 vlun = svp->svp_svl;
3879 ASSERT(vlun);
3880
3881 /* Wait for clean termination of scsi_watch */
3882 (void) scsi_watch_request_terminate(svp->svp_sw_token,
3883 SCSI_WATCH_TERMINATE_ALL_WAIT);
3884 svp->svp_sw_token = NULL;
3885
3886 /* release path and freeup resources to indicate failover completion */
3887 mdi_rele_path(swarg->svs_pi);
3888 if (swarg->svs_release_lun) {
3889 VHCI_RELEASE_LUN(vlun);
3890 }
3891 kmem_free((void *)swarg, sizeof (*swarg));
3892 }
3893
3894 /*
3895 * Update the path states
3896 * vlun should be HELD when this is invoked.
3897 * Calls vhci_efo_done to cleanup resources allocated for EFO.
3898 */
3899 void
3900 vhci_update_pathstates(void *arg)
3901 {
3902 mdi_pathinfo_t *pip, *npip;
3903 dev_info_t *dip;
3904 struct scsi_failover_ops *fo;
3905 struct scsi_vhci_priv *svp;
3906 struct scsi_device *psd;
3907 struct scsi_path_opinfo opinfo;
3908 char *pclass, *tptr;
3909 struct scsi_vhci_lun *vlun = (struct scsi_vhci_lun *)arg;
3910 int sps; /* mdi_select_path() status */
3911 char *cpath;
3912 struct scsi_vhci *vhci;
3913 struct scsi_pkt *pkt;
3914 struct buf *bp;
3915 struct scsi_vhci_priv *svp_conflict = NULL;
3916
3917 ASSERT(VHCI_LUN_IS_HELD(vlun));
3918 dip = vlun->svl_dip;
3919 pip = npip = NULL;
3920
3921 vhci = ddi_get_soft_state(vhci_softstate,
3922 ddi_get_instance(ddi_get_parent(dip)));
3923
3924 sps = mdi_select_path(dip, NULL, (MDI_SELECT_ONLINE_PATH |
3925 MDI_SELECT_STANDBY_PATH | MDI_SELECT_NO_PREFERRED), NULL, &npip);
3926 if ((npip == NULL) || (sps != MDI_SUCCESS)) {
3927 goto done;
3928 }
3929
3930 fo = vlun->svl_fops;
3931 do {
3932 pip = npip;
3933 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
3934 psd = svp->svp_psd;
3935 if (fo->sfo_path_get_opinfo(psd, &opinfo,
3936 vlun->svl_fops_ctpriv) != 0) {
3937 sps = mdi_select_path(dip, NULL,
3938 (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH |
3939 MDI_SELECT_NO_PREFERRED), pip, &npip);
3940 mdi_rele_path(pip);
3941 continue;
3942 }
3943
3944 if (mdi_prop_lookup_string(pip, "path-class", &pclass) !=
3945 MDI_SUCCESS) {
3946 VHCI_DEBUG(1, (CE_NOTE, NULL,
3947 "!vhci_update_pathstates: prop lookup failed for "
3948 "path 0x%p\n", (void *)pip));
3949 sps = mdi_select_path(dip, NULL,
3950 (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH |
3951 MDI_SELECT_NO_PREFERRED), pip, &npip);
3952 mdi_rele_path(pip);
3953 continue;
3954 }
3955
3956 /*
3957 * Need to update the "path-class" property
3958 * value in the device tree if different
3959 * from the existing value.
3960 */
3961 if (strcmp(pclass, opinfo.opinfo_path_attr) != 0) {
3962 (void) mdi_prop_update_string(pip, "path-class",
3963 opinfo.opinfo_path_attr);
3964 }
3965
3966 /*
3967 * Only change the state if needed. i.e. Don't call
3968 * mdi_pi_set_state to ONLINE a path if its already
3969 * ONLINE. Same for STANDBY paths.
3970 */
3971
3972 if ((opinfo.opinfo_path_state == SCSI_PATH_ACTIVE ||
3973 opinfo.opinfo_path_state == SCSI_PATH_ACTIVE_NONOPT)) {
3974 if (!(MDI_PI_IS_ONLINE(pip))) {
3975 VHCI_DEBUG(1, (CE_NOTE, NULL,
3976 "!vhci_update_pathstates: marking path"
3977 " 0x%p as ONLINE\n", (void *)pip));
3978 cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3979 vhci_log(CE_NOTE, ddi_get_parent(dip), "!%s "
3980 "(%s%d): path %s "
3981 "is now ONLINE because of "
3982 "an externally initiated failover",
3983 ddi_pathname(dip, cpath),
3984 ddi_driver_name(dip),
3985 ddi_get_instance(dip),
3986 mdi_pi_spathname(pip));
3987 kmem_free(cpath, MAXPATHLEN);
3988 mdi_pi_set_state(pip,
3989 MDI_PATHINFO_STATE_ONLINE);
3990 mdi_pi_set_preferred(pip,
3991 opinfo.opinfo_preferred);
3992 tptr = kmem_alloc(strlen
3993 (opinfo.opinfo_path_attr)+1, KM_SLEEP);
3994 (void) strlcpy(tptr, opinfo.opinfo_path_attr,
3995 (strlen(opinfo.opinfo_path_attr)+1));
3996 mutex_enter(&vlun->svl_mutex);
3997 if (vlun->svl_active_pclass != NULL) {
3998 kmem_free(vlun->svl_active_pclass,
3999 strlen(vlun->svl_active_pclass)+1);
4000 }
4001 vlun->svl_active_pclass = tptr;
4002 if (vlun->svl_waiting_for_activepath) {
4003 vlun->svl_waiting_for_activepath = 0;
4004 }
4005 mutex_exit(&vlun->svl_mutex);
4006 } else if (MDI_PI_IS_ONLINE(pip)) {
4007 if (strcmp(pclass, opinfo.opinfo_path_attr)
4008 != 0) {
4009 mdi_pi_set_preferred(pip,
4010 opinfo.opinfo_preferred);
4011 mutex_enter(&vlun->svl_mutex);
4012 if (vlun->svl_active_pclass == NULL ||
4013 strcmp(opinfo.opinfo_path_attr,
4014 vlun->svl_active_pclass) != 0) {
4015 mutex_exit(&vlun->svl_mutex);
4016 tptr = kmem_alloc(strlen
4017 (opinfo.opinfo_path_attr)+1,
4018 KM_SLEEP);
4019 (void) strlcpy(tptr,
4020 opinfo.opinfo_path_attr,
4021 (strlen
4022 (opinfo.opinfo_path_attr)
4023 +1));
4024 mutex_enter(&vlun->svl_mutex);
4025 } else {
4026 /*
4027 * No need to update
4028 * svl_active_pclass
4029 */
4030 tptr = NULL;
4031 mutex_exit(&vlun->svl_mutex);
4032 }
4033 if (tptr) {
4034 if (vlun->svl_active_pclass
4035 != NULL) {
4036 kmem_free(vlun->
4037 svl_active_pclass,
4038 strlen(vlun->
4039 svl_active_pclass)
4040 +1);
4041 }
4042 vlun->svl_active_pclass = tptr;
4043 mutex_exit(&vlun->svl_mutex);
4044 }
4045 }
4046 }
4047
4048 /* Check for Reservation Conflict */
4049 bp = scsi_alloc_consistent_buf(
4050 &svp->svp_psd->sd_address, (struct buf *)NULL,
4051 DEV_BSIZE, B_READ, NULL, NULL);
4052 if (!bp) {
4053 VHCI_DEBUG(1, (CE_NOTE, NULL,
4054 "!vhci_update_pathstates: No resources "
4055 "(buf)\n"));
4056 mdi_rele_path(pip);
4057 goto done;
4058 }
4059 pkt = scsi_init_pkt(&svp->svp_psd->sd_address, NULL, bp,
4060 CDB_GROUP1, sizeof (struct scsi_arq_status), 0,
4061 PKT_CONSISTENT, NULL, NULL);
4062 if (pkt) {
4063 (void) scsi_setup_cdb((union scsi_cdb *)
4064 (uintptr_t)pkt->pkt_cdbp, SCMD_READ, 1, 1,
4065 0);
4066 pkt->pkt_time = 2 * vhci_io_time;
4067 pkt->pkt_flags = FLAG_NOINTR;
4068 pkt->pkt_path_instance =
4069 mdi_pi_get_path_instance(pip);
4070
4071 if ((scsi_transport(pkt) == TRAN_ACCEPT) &&
4072 (pkt->pkt_reason == CMD_CMPLT) &&
4073 (SCBP_C(pkt) ==
4074 STATUS_RESERVATION_CONFLICT)) {
4075 VHCI_DEBUG(1, (CE_NOTE, NULL,
4076 "!vhci_update_pathstates: reserv. "
4077 "conflict to be resolved on 0x%p\n",
4078 (void *)pip));
4079 svp_conflict = svp;
4080 }
4081 scsi_destroy_pkt(pkt);
4082 }
4083 scsi_free_consistent_buf(bp);
4084 } else if ((opinfo.opinfo_path_state == SCSI_PATH_INACTIVE) &&
4085 !(MDI_PI_IS_STANDBY(pip))) {
4086 VHCI_DEBUG(1, (CE_NOTE, NULL,
4087 "!vhci_update_pathstates: marking path"
4088 " 0x%p as STANDBY\n", (void *)pip));
4089 cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4090 vhci_log(CE_NOTE, ddi_get_parent(dip), "!%s "
4091 "(%s%d): path %s "
4092 "is now STANDBY because of "
4093 "an externally initiated failover",
4094 ddi_pathname(dip, cpath),
4095 ddi_driver_name(dip),
4096 ddi_get_instance(dip),
4097 mdi_pi_spathname(pip));
4098 kmem_free(cpath, MAXPATHLEN);
4099 mdi_pi_set_state(pip,
4100 MDI_PATHINFO_STATE_STANDBY);
4101 mdi_pi_set_preferred(pip,
4102 opinfo.opinfo_preferred);
4103 mutex_enter(&vlun->svl_mutex);
4104 if (vlun->svl_active_pclass != NULL) {
4105 if (strcmp(vlun->svl_active_pclass,
4106 opinfo.opinfo_path_attr) == 0) {
4107 kmem_free(vlun->
4108 svl_active_pclass,
4109 strlen(vlun->
4110 svl_active_pclass)+1);
4111 vlun->svl_active_pclass = NULL;
4112 }
4113 }
4114 mutex_exit(&vlun->svl_mutex);
4115 }
4116 (void) mdi_prop_free(pclass);
4117 sps = mdi_select_path(dip, NULL,
4118 (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH |
4119 MDI_SELECT_NO_PREFERRED), pip, &npip);
4120 mdi_rele_path(pip);
4121
4122 } while ((npip != NULL) && (sps == MDI_SUCCESS));
4123
4124 /*
4125 * Check to see if this vlun has an active SCSI-II RESERVE. If so
4126 * clear the reservation by sending a reset, so the host doesn't
4127 * receive a reservation conflict. The reset has to be sent via a
4128 * working path. Let's use a path referred to by svp_conflict as it
4129 * should be working.
4130 * Reset VLUN_RESERVE_ACTIVE_FLG for this vlun. Also notify ssd
4131 * of the reset, explicitly.
4132 */
4133 if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
4134 if (svp_conflict && (vlun->svl_xlf_capable == 0)) {
4135 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_update_pathstates:"
4136 " sending recovery reset on 0x%p, path_state: %x",
4137 svp_conflict->svp_psd->sd_private,
4138 mdi_pi_get_state((mdi_pathinfo_t *)
4139 svp_conflict->svp_psd->sd_private)));
4140
4141 (void) vhci_recovery_reset(vlun,
4142 &svp_conflict->svp_psd->sd_address, FALSE,
4143 VHCI_DEPTH_TARGET);
4144 }
4145 vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
4146 mutex_enter(&vhci->vhci_mutex);
4147 scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
4148 &vhci->vhci_reset_notify_listf);
4149 mutex_exit(&vhci->vhci_mutex);
4150 }
4151 if (vlun->svl_flags & VLUN_UPDATE_TPG) {
4152 /*
4153 * Update the AccessState of related MP-API TPGs
4154 */
4155 (void) vhci_mpapi_update_tpg_acc_state_for_lu(vhci, vlun);
4156 vlun->svl_flags &= ~VLUN_UPDATE_TPG;
4157 }
4158 done:
4159 if (vlun->svl_efo_update_path) {
4160 vlun->svl_efo_update_path = 0;
4161 vhci_efo_done(vlun->svl_swarg);
4162 vlun->svl_swarg = 0;
4163 }
4164 VHCI_RELEASE_LUN(vlun);
4165 }
4166
4167 /* ARGSUSED */
4168 static int
4169 vhci_pathinfo_init(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
4170 {
4171 scsi_hba_tran_t *hba = NULL;
4172 struct scsi_device *psd = NULL;
4173 scsi_vhci_lun_t *vlun = NULL;
4174 dev_info_t *pdip = NULL;
4175 dev_info_t *tgt_dip;
4176 struct scsi_vhci *vhci;
4177 char *guid;
4178 scsi_vhci_priv_t *svp = NULL;
4179 int rval = MDI_FAILURE;
4180 int vlun_alloced = 0;
4181
4182 ASSERT(vdip != NULL);
4183 ASSERT(pip != NULL);
4184
4185 vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
4186 ASSERT(vhci != NULL);
4187
4188 pdip = mdi_pi_get_phci(pip);
4189 ASSERT(pdip != NULL);
4190
4191 hba = ddi_get_driver_private(pdip);
4192 ASSERT(hba != NULL);
4193
4194 tgt_dip = mdi_pi_get_client(pip);
4195 ASSERT(tgt_dip != NULL);
4196
4197 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip, PROPFLAGS,
4198 MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
4199 VHCI_DEBUG(1, (CE_WARN, NULL,
4200 "vhci_pathinfo_init: lun guid property failed"));
4201 goto failure;
4202 }
4203
4204 vlun = vhci_lun_lookup_alloc(tgt_dip, guid, &vlun_alloced);
4205 ddi_prop_free(guid);
4206
4207 vlun->svl_dip = tgt_dip;
4208
4209 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
4210 svp->svp_svl = vlun;
4211
4212 /*
4213 * Initialize svl_lb_policy_save only for newly allocated vlun. Writing
4214 * to svl_lb_policy_save later could accidentally overwrite saved lb
4215 * policy.
4216 */
4217 if (vlun_alloced) {
4218 vlun->svl_lb_policy_save = mdi_get_lb_policy(tgt_dip);
4219 }
4220
4221 mutex_init(&svp->svp_mutex, NULL, MUTEX_DRIVER, NULL);
4222 cv_init(&svp->svp_cv, NULL, CV_DRIVER, NULL);
4223
4224 psd = kmem_zalloc(sizeof (*psd), KM_SLEEP);
4225 mutex_init(&psd->sd_mutex, NULL, MUTEX_DRIVER, NULL);
4226
4227 if (hba->tran_hba_flags & SCSI_HBA_ADDR_COMPLEX) {
4228 /*
4229 * For a SCSI_HBA_ADDR_COMPLEX transport we store a pointer to
4230 * scsi_device in the scsi_address structure. This allows an
4231 * an HBA driver to find its scsi_device(9S) and
4232 * per-scsi_device(9S) HBA private data given a
4233 * scsi_address(9S) by using scsi_address_device(9F) and
4234 * scsi_device_hba_private_get(9F)).
4235 */
4236 psd->sd_address.a.a_sd = psd;
4237 } else if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
4238 /*
4239 * Clone transport structure if requested, so
4240 * Self enumerating HBAs always need to use cloning
4241 */
4242 scsi_hba_tran_t *clone =
4243 kmem_alloc(sizeof (scsi_hba_tran_t), KM_SLEEP);
4244 bcopy(hba, clone, sizeof (scsi_hba_tran_t));
4245 hba = clone;
4246 hba->tran_sd = psd;
4247 } else {
4248 /*
4249 * SPI pHCI unit-address. If we ever need to support this
4250 * we could set a.spi.a_target/a.spi.a_lun based on pathinfo
4251 * node unit-address properties. For now we fail...
4252 */
4253 goto failure;
4254 }
4255
4256 psd->sd_dev = tgt_dip;
4257 psd->sd_address.a_hba_tran = hba;
4258
4259 /*
4260 * Mark scsi_device as being associated with a pathinfo node. For
4261 * a scsi_device structure associated with a devinfo node,
4262 * scsi_ctlops_initchild sets this field to NULL.
4263 */
4264 psd->sd_pathinfo = pip;
4265
4266 /*
4267 * LEGACY: sd_private: set for older mpxio-capable pHCI drivers with
4268 * too much scsi_vhci/mdi/ndi knowledge. Remove this code when all
4269 * mpxio-capable pHCI drivers use SCSA enumeration services (or at
4270 * least have been changed to use sd_pathinfo instead).
4271 */
4272 psd->sd_private = (caddr_t)pip;
4273
4274 /* See scsi_hba.c for info on sd_tran_safe kludge */
4275 psd->sd_tran_safe = hba;
4276
4277 svp->svp_psd = psd;
4278 mdi_pi_set_vhci_private(pip, (caddr_t)svp);
4279
4280 /*
4281 * call hba's target init entry point if it exists
4282 */
4283 if (hba->tran_tgt_init != NULL) {
4284 psd->sd_tran_tgt_free_done = 0;
4285 if ((rval = (*hba->tran_tgt_init)(pdip, tgt_dip,
4286 hba, psd)) != DDI_SUCCESS) {
4287 VHCI_DEBUG(1, (CE_WARN, pdip,
4288 "!vhci_pathinfo_init: tran_tgt_init failed for "
4289 "path=0x%p rval=%x", (void *)pip, rval));
4290 goto failure;
4291 }
4292 }
4293
4294 svp->svp_new_path = 1;
4295
4296 VHCI_DEBUG(4, (CE_NOTE, NULL, "!vhci_pathinfo_init: path:%p\n",
4297 (void *)pip));
4298 return (MDI_SUCCESS);
4299
4300 failure:
4301 if (psd) {
4302 mutex_destroy(&psd->sd_mutex);
4303 kmem_free(psd, sizeof (*psd));
4304 }
4305 if (svp) {
4306 mdi_pi_set_vhci_private(pip, NULL);
4307 mutex_destroy(&svp->svp_mutex);
4308 cv_destroy(&svp->svp_cv);
4309 kmem_free(svp, sizeof (*svp));
4310 }
4311 if (hba && (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE))
4312 kmem_free(hba, sizeof (scsi_hba_tran_t));
4313
4314 if (vlun_alloced)
4315 vhci_lun_free(vlun, NULL);
4316
4317 return (rval);
4318 }
4319
4320 /* ARGSUSED */
4321 static int
4322 vhci_pathinfo_uninit(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
4323 {
4324 scsi_hba_tran_t *hba = NULL;
4325 struct scsi_device *psd = NULL;
4326 dev_info_t *pdip = NULL;
4327 dev_info_t *cdip = NULL;
4328 scsi_vhci_priv_t *svp = NULL;
4329
4330 ASSERT(vdip != NULL);
4331 ASSERT(pip != NULL);
4332
4333 pdip = mdi_pi_get_phci(pip);
4334 ASSERT(pdip != NULL);
4335
4336 cdip = mdi_pi_get_client(pip);
4337 ASSERT(cdip != NULL);
4338
4339 hba = ddi_get_driver_private(pdip);
4340 ASSERT(hba != NULL);
4341
4342 vhci_mpapi_set_path_state(vdip, pip, MP_DRVR_PATH_STATE_UNINIT);
4343 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
4344 if (svp == NULL) {
4345 /* path already freed. Nothing to do. */
4346 return (MDI_SUCCESS);
4347 }
4348
4349 psd = svp->svp_psd;
4350 ASSERT(psd != NULL);
4351
4352 if (hba->tran_hba_flags & SCSI_HBA_ADDR_COMPLEX) {
4353 /* Verify plumbing */
4354 ASSERT(psd->sd_address.a_hba_tran == hba);
4355 ASSERT(psd->sd_address.a.a_sd == psd);
4356 } else if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
4357 /* Switch to cloned scsi_hba_tran(9S) structure */
4358 hba = psd->sd_address.a_hba_tran;
4359 ASSERT(hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE);
4360 ASSERT(hba->tran_sd == psd);
4361 }
4362
4363 if ((hba->tran_tgt_free != NULL) && !psd->sd_tran_tgt_free_done) {
4364 (*hba->tran_tgt_free) (pdip, cdip, hba, psd);
4365 psd->sd_tran_tgt_free_done = 1;
4366 }
4367 mutex_destroy(&psd->sd_mutex);
4368 if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
4369 kmem_free(hba, sizeof (*hba));
4370 }
4371
4372 mdi_pi_set_vhci_private(pip, NULL);
4373
4374 /*
4375 * Free the pathinfo related scsi_device inquiry data. Note that this
4376 * matches what happens for scsi_hba.c devinfo case at uninitchild time.
4377 */
4378 if (psd->sd_inq)
4379 kmem_free((caddr_t)psd->sd_inq, sizeof (struct scsi_inquiry));
4380 kmem_free((caddr_t)psd, sizeof (*psd));
4381
4382 mutex_destroy(&svp->svp_mutex);
4383 cv_destroy(&svp->svp_cv);
4384 kmem_free((caddr_t)svp, sizeof (*svp));
4385
4386 VHCI_DEBUG(4, (CE_NOTE, NULL, "!vhci_pathinfo_uninit: path=0x%p\n",
4387 (void *)pip));
4388 return (MDI_SUCCESS);
4389 }
4390
4391 /* ARGSUSED */
4392 static int
4393 vhci_pathinfo_state_change(dev_info_t *vdip, mdi_pathinfo_t *pip,
4394 mdi_pathinfo_state_t state, uint32_t ext_state, int flags)
4395 {
4396 int rval = MDI_SUCCESS;
4397 scsi_vhci_priv_t *svp;
4398 scsi_vhci_lun_t *vlun;
4399 int held;
4400 int op = (flags & 0xf00) >> 8;
4401 struct scsi_vhci *vhci;
4402
4403 vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
4404
4405 if (flags & MDI_EXT_STATE_CHANGE) {
4406 /*
4407 * We do not want to issue any commands down the path in case
4408 * sync flag is set. Lower layers might not be ready to accept
4409 * any I/O commands.
4410 */
4411 if (op == DRIVER_DISABLE)
4412 return (MDI_SUCCESS);
4413
4414 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
4415 if (svp == NULL) {
4416 return (MDI_FAILURE);
4417 }
4418 vlun = svp->svp_svl;
4419
4420 if (flags & MDI_BEFORE_STATE_CHANGE) {
4421 /*
4422 * Hold the LUN.
4423 */
4424 VHCI_HOLD_LUN(vlun, VH_SLEEP, held);
4425 if (flags & MDI_DISABLE_OP) {
4426 /*
4427 * Issue scsi reset if it happens to be
4428 * reserved path.
4429 */
4430 if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
4431 /*
4432 * if reservation pending on
4433 * this path, dont' mark the
4434 * path busy
4435 */
4436 if (op == DRIVER_DISABLE_TRANSIENT) {
4437 VHCI_DEBUG(1, (CE_NOTE, NULL,
4438 "!vhci_pathinfo"
4439 "_state_change (pip:%p): "
4440 " reservation: fail busy\n",
4441 (void *)pip));
4442 return (MDI_FAILURE);
4443 }
4444 if (pip == vlun->svl_resrv_pip) {
4445 if (vhci_recovery_reset(
4446 svp->svp_svl,
4447 &svp->svp_psd->sd_address,
4448 TRUE,
4449 VHCI_DEPTH_TARGET) == 0) {
4450 VHCI_DEBUG(1,
4451 (CE_NOTE, NULL,
4452 "!vhci_pathinfo"
4453 "_state_change "
4454 " (pip:%p): "
4455 "reset failed, "
4456 "give up!\n",
4457 (void *)pip));
4458 }
4459 vlun->svl_flags &=
4460 ~VLUN_RESERVE_ACTIVE_FLG;
4461 }
4462 }
4463 } else if (flags & MDI_ENABLE_OP) {
4464 if (((vhci->vhci_conf_flags &
4465 VHCI_CONF_FLAGS_AUTO_FAILBACK) ==
4466 VHCI_CONF_FLAGS_AUTO_FAILBACK) &&
4467 MDI_PI_IS_USER_DISABLE(pip) &&
4468 MDI_PI_IS_STANDBY(pip)) {
4469 struct scsi_failover_ops *fo;
4470 char *best_pclass, *pclass = NULL;
4471 int best_class, rv;
4472 /*
4473 * Failback if enabling a standby path
4474 * and it is the primary class or
4475 * preferred class
4476 */
4477 best_class = mdi_pi_get_preferred(pip);
4478 if (best_class == 0) {
4479 /*
4480 * if not preferred - compare
4481 * path-class with class
4482 */
4483 fo = vlun->svl_fops;
4484 (void) fo->sfo_pathclass_next(
4485 NULL, &best_pclass,
4486 vlun->svl_fops_ctpriv);
4487 pclass = NULL;
4488 rv = mdi_prop_lookup_string(pip,
4489 "path-class", &pclass);
4490 if (rv != MDI_SUCCESS ||
4491 pclass == NULL) {
4492 vhci_log(CE_NOTE, vdip,
4493 "!path-class "
4494 " lookup "
4495 "failed. rv: %d"
4496 "class: %p", rv,
4497 (void *)pclass);
4498 } else if (strncmp(pclass,
4499 best_pclass,
4500 strlen(best_pclass)) == 0) {
4501 best_class = 1;
4502 }
4503 if (rv == MDI_SUCCESS &&
4504 pclass != NULL) {
4505 rv = mdi_prop_free(
4506 pclass);
4507 if (rv !=
4508 DDI_PROP_SUCCESS) {
4509 vhci_log(
4510 CE_NOTE,
4511 vdip,
4512 "!path-"
4513 "class"
4514 " free"
4515 " failed"
4516 " rv: %d"
4517 " class: "
4518 "%p",
4519 rv,
4520 (void *)
4521 pclass);
4522 }
4523 }
4524 }
4525 if (best_class == 1) {
4526 VHCI_DEBUG(1, (CE_NOTE, NULL,
4527 "preferred path: %p "
4528 "USER_DISABLE->USER_ENABLE "
4529 "transition for lun %s\n",
4530 (void *)pip,
4531 vlun->svl_lun_wwn));
4532 (void) taskq_dispatch(
4533 vhci->vhci_taskq,
4534 vhci_initiate_auto_failback,
4535 (void *) vlun, KM_SLEEP);
4536 }
4537 }
4538 /*
4539 * if PGR is active, revalidate key and
4540 * register on this path also, if key is
4541 * still valid
4542 */
4543 sema_p(&vlun->svl_pgr_sema);
4544 if (vlun->svl_pgr_active)
4545 (void)
4546 vhci_pgr_validate_and_register(svp);
4547 sema_v(&vlun->svl_pgr_sema);
4548 /*
4549 * Inform target driver about any
4550 * reservations to be reinstated if target
4551 * has dropped reservation during the busy
4552 * period.
4553 */
4554 mutex_enter(&vhci->vhci_mutex);
4555 scsi_hba_reset_notify_callback(
4556 &vhci->vhci_mutex,
4557 &vhci->vhci_reset_notify_listf);
4558 mutex_exit(&vhci->vhci_mutex);
4559 }
4560 }
4561 if (flags & MDI_AFTER_STATE_CHANGE) {
4562 if (flags & MDI_ENABLE_OP) {
4563 mutex_enter(&vhci_global_mutex);
4564 cv_broadcast(&vhci_cv);
4565 mutex_exit(&vhci_global_mutex);
4566 }
4567 if (vlun->svl_setcap_done) {
4568 (void) vhci_pHCI_cap(&svp->svp_psd->sd_address,
4569 "sector-size", vlun->svl_sector_size,
4570 1, pip);
4571 }
4572
4573 /*
4574 * Release the LUN
4575 */
4576 VHCI_RELEASE_LUN(vlun);
4577
4578 /*
4579 * Path transition is complete.
4580 * Run callback to indicate target driver to
4581 * retry to prevent IO starvation.
4582 */
4583 if (scsi_callback_id != 0) {
4584 ddi_run_callback(&scsi_callback_id);
4585 }
4586 }
4587 } else {
4588 switch (state) {
4589 case MDI_PATHINFO_STATE_ONLINE:
4590 rval = vhci_pathinfo_online(vdip, pip, flags);
4591 break;
4592
4593 case MDI_PATHINFO_STATE_OFFLINE:
4594 rval = vhci_pathinfo_offline(vdip, pip, flags);
4595 break;
4596
4597 default:
4598 break;
4599 }
4600 /*
4601 * Path transition is complete.
4602 * Run callback to indicate target driver to
4603 * retry to prevent IO starvation.
4604 */
4605 if ((rval == MDI_SUCCESS) && (scsi_callback_id != 0)) {
4606 ddi_run_callback(&scsi_callback_id);
4607 }
4608 return (rval);
4609 }
4610
4611 return (MDI_SUCCESS);
4612 }
4613
4614 /*
4615 * Parse the mpxio load balancing options. The datanameptr
4616 * will point to a string containing the load-balance-options value.
4617 * The load-balance-options value will be a property that
4618 * defines the load-balance algorithm and any arguments to that
4619 * algorithm.
4620 * For example:
4621 * device-type-mpxio-options-list=
4622 * "device-type=SUN SENA", "load-balance-options=logical-block-options"
4623 * "device-type=SUN SE6920", "round-robin-options";
4624 * logical-block-options="load-balance=logical-block", "region-size=15";
4625 * round-robin-options="load-balance=round-robin";
4626 *
4627 * If the load-balance is not defined the load balance algorithm will
4628 * default to the global setting. There will be default values assigned
4629 * to the arguments (region-size=18) and if an argument is one
4630 * that is not known, it will be ignored.
4631 */
4632 static void
4633 vhci_parse_mpxio_lb_options(dev_info_t *dip, dev_info_t *cdip,
4634 caddr_t datanameptr)
4635 {
4636 char *dataptr, *next_entry;
4637 caddr_t config_list = NULL;
4638 int config_list_len = 0, list_len = 0;
4639 int region_size = -1;
4640 client_lb_t load_balance;
4641
4642 if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, datanameptr,
4643 (caddr_t)&config_list, &config_list_len) != DDI_PROP_SUCCESS) {
4644 return;
4645 }
4646
4647 list_len = config_list_len;
4648 next_entry = config_list;
4649 while (config_list_len > 0) {
4650 dataptr = next_entry;
4651
4652 if (strncmp(mdi_load_balance, dataptr,
4653 strlen(mdi_load_balance)) == 0) {
4654 /* get the load-balance scheme */
4655 dataptr += strlen(mdi_load_balance) + 1;
4656 if (strcmp(dataptr, LOAD_BALANCE_PROP_RR) == 0) {
4657 (void) mdi_set_lb_policy(cdip, LOAD_BALANCE_RR);
4658 load_balance = LOAD_BALANCE_RR;
4659 } else if (strcmp(dataptr,
4660 LOAD_BALANCE_PROP_LBA) == 0) {
4661 (void) mdi_set_lb_policy(cdip,
4662 LOAD_BALANCE_LBA);
4663 load_balance = LOAD_BALANCE_LBA;
4664 } else if (strcmp(dataptr,
4665 LOAD_BALANCE_PROP_NONE) == 0) {
4666 (void) mdi_set_lb_policy(cdip,
4667 LOAD_BALANCE_NONE);
4668 load_balance = LOAD_BALANCE_NONE;
4669 }
4670 } else if (strncmp(dataptr, LOGICAL_BLOCK_REGION_SIZE,
4671 strlen(LOGICAL_BLOCK_REGION_SIZE)) == 0) {
4672 int i = 0;
4673 char *ptr;
4674 char *tmp;
4675
4676 tmp = dataptr + (strlen(LOGICAL_BLOCK_REGION_SIZE) + 1);
4677 /* check for numeric value */
4678 for (ptr = tmp; i < strlen(tmp); i++, ptr++) {
4679 if (!isdigit(*ptr)) {
4680 cmn_err(CE_WARN,
4681 "Illegal region size: %s."
4682 " Setting to default value: %d",
4683 tmp,
4684 LOAD_BALANCE_DEFAULT_REGION_SIZE);
4685 region_size =
4686 LOAD_BALANCE_DEFAULT_REGION_SIZE;
4687 break;
4688 }
4689 }
4690 if (i >= strlen(tmp)) {
4691 region_size = stoi(&tmp);
4692 }
4693 (void) mdi_set_lb_region_size(cdip, region_size);
4694 }
4695 config_list_len -= (strlen(next_entry) + 1);
4696 next_entry += strlen(next_entry) + 1;
4697 }
4698 #ifdef DEBUG
4699 if ((region_size >= 0) && (load_balance != LOAD_BALANCE_LBA)) {
4700 VHCI_DEBUG(1, (CE_NOTE, dip,
4701 "!vhci_parse_mpxio_lb_options: region-size: %d"
4702 "only valid for load-balance=logical-block\n",
4703 region_size));
4704 }
4705 #endif
4706 if ((region_size == -1) && (load_balance == LOAD_BALANCE_LBA)) {
4707 VHCI_DEBUG(1, (CE_NOTE, dip,
4708 "!vhci_parse_mpxio_lb_options: No region-size"
4709 " defined load-balance=logical-block."
4710 " Default to: %d\n", LOAD_BALANCE_DEFAULT_REGION_SIZE));
4711 (void) mdi_set_lb_region_size(cdip,
4712 LOAD_BALANCE_DEFAULT_REGION_SIZE);
4713 }
4714 if (list_len > 0) {
4715 kmem_free(config_list, list_len);
4716 }
4717 }
4718
4719 /*
4720 * Parse the device-type-mpxio-options-list looking for the key of
4721 * "load-balance-options". If found, parse the load balancing options.
4722 * Check the comment of the vhci_get_device_type_mpxio_options()
4723 * for the device-type-mpxio-options-list.
4724 */
4725 static void
4726 vhci_parse_mpxio_options(dev_info_t *dip, dev_info_t *cdip,
4727 caddr_t datanameptr, int list_len)
4728 {
4729 char *dataptr;
4730 int len;
4731
4732 /*
4733 * get the data list
4734 */
4735 dataptr = datanameptr;
4736 len = 0;
4737 while (len < list_len &&
4738 strncmp(dataptr, DEVICE_TYPE_STR, strlen(DEVICE_TYPE_STR))
4739 != 0) {
4740 if (strncmp(dataptr, LOAD_BALANCE_OPTIONS,
4741 strlen(LOAD_BALANCE_OPTIONS)) == 0) {
4742 len += strlen(LOAD_BALANCE_OPTIONS) + 1;
4743 dataptr += strlen(LOAD_BALANCE_OPTIONS) + 1;
4744 vhci_parse_mpxio_lb_options(dip, cdip, dataptr);
4745 }
4746 len += strlen(dataptr) + 1;
4747 dataptr += strlen(dataptr) + 1;
4748 }
4749 }
4750
4751 /*
4752 * Check the inquriy string returned from the device with the device-type
4753 * Check for the existence of the device-type-mpxio-options-list and
4754 * if found parse the list checking for a match with the device-type
4755 * value and the inquiry string returned from the device. If a match
4756 * is found, parse the mpxio options list. The format of the
4757 * device-type-mpxio-options-list is:
4758 * device-type-mpxio-options-list=
4759 * "device-type=SUN SENA", "load-balance-options=logical-block-options"
4760 * "device-type=SUN SE6920", "round-robin-options";
4761 * logical-block-options="load-balance=logical-block", "region-size=15";
4762 * round-robin-options="load-balance=round-robin";
4763 */
4764 void
4765 vhci_get_device_type_mpxio_options(dev_info_t *dip, dev_info_t *cdip,
4766 struct scsi_device *devp)
4767 {
4768
4769 caddr_t config_list = NULL;
4770 caddr_t vidptr, datanameptr;
4771 int vidlen, dupletlen = 0;
4772 int config_list_len = 0, len;
4773 struct scsi_inquiry *inq = devp->sd_inq;
4774
4775 /*
4776 * look up the device-type-mpxio-options-list and walk thru
4777 * the list compare the vendor ids of the earlier inquiry command and
4778 * with those vids in the list if there is a match, lookup
4779 * the mpxio-options value
4780 */
4781 if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4782 MPXIO_OPTIONS_LIST,
4783 (caddr_t)&config_list, &config_list_len) == DDI_PROP_SUCCESS) {
4784
4785 /*
4786 * Compare vids in each duplet - if it matches,
4787 * parse the mpxio options list.
4788 */
4789 for (len = config_list_len, vidptr = config_list; len > 0;
4790 len -= dupletlen) {
4791
4792 dupletlen = 0;
4793
4794 if (strlen(vidptr) != 0 &&
4795 strncmp(vidptr, DEVICE_TYPE_STR,
4796 strlen(DEVICE_TYPE_STR)) == 0) {
4797 /* point to next duplet */
4798 datanameptr = vidptr + strlen(vidptr) + 1;
4799 /* add len of this duplet */
4800 dupletlen += strlen(vidptr) + 1;
4801 /* get to device type */
4802 vidptr += strlen(DEVICE_TYPE_STR) + 1;
4803 vidlen = strlen(vidptr);
4804 if ((vidlen != 0) &&
4805 bcmp(inq->inq_vid, vidptr, vidlen) == 0) {
4806 vhci_parse_mpxio_options(dip, cdip,
4807 datanameptr, len - dupletlen);
4808 break;
4809 }
4810 /* get to next duplet */
4811 vidptr += strlen(vidptr) + 1;
4812 }
4813 /* get to the next device-type */
4814 while (len - dupletlen > 0 &&
4815 strlen(vidptr) != 0 &&
4816 strncmp(vidptr, DEVICE_TYPE_STR,
4817 strlen(DEVICE_TYPE_STR)) != 0) {
4818 dupletlen += strlen(vidptr) + 1;
4819 vidptr += strlen(vidptr) + 1;
4820 }
4821 }
4822 if (config_list_len > 0) {
4823 kmem_free(config_list, config_list_len);
4824 }
4825 }
4826 }
4827
4828 static int
4829 vhci_update_pathinfo(struct scsi_device *psd, mdi_pathinfo_t *pip,
4830 struct scsi_failover_ops *fo,
4831 scsi_vhci_lun_t *vlun,
4832 struct scsi_vhci *vhci)
4833 {
4834 struct scsi_path_opinfo opinfo;
4835 char *pclass, *best_pclass;
4836 char *resrv_pclass = NULL;
4837 int force_rereserve = 0;
4838 int update_pathinfo_done = 0;
4839
4840 if (fo->sfo_path_get_opinfo(psd, &opinfo, vlun->svl_fops_ctpriv) != 0) {
4841 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_update_pathinfo: "
4842 "Failed to get operation info for path:%p\n", (void *)pip));
4843 return (MDI_FAILURE);
4844 }
4845 /* set the xlf capable flag in the vlun for future use */
4846 vlun->svl_xlf_capable = opinfo.opinfo_xlf_capable;
4847 (void) mdi_prop_update_string(pip, "path-class",
4848 opinfo.opinfo_path_attr);
4849
4850 pclass = opinfo.opinfo_path_attr;
4851 if (opinfo.opinfo_path_state == SCSI_PATH_ACTIVE) {
4852 mutex_enter(&vlun->svl_mutex);
4853 if (vlun->svl_active_pclass != NULL) {
4854 if (strcmp(vlun->svl_active_pclass, pclass) != 0) {
4855 mutex_exit(&vlun->svl_mutex);
4856 /*
4857 * Externally initiated failover has happened;
4858 * force the path state to be STANDBY/ONLINE,
4859 * next IO will trigger failover and thus
4860 * sync-up the pathstates. Reason we don't
4861 * sync-up immediately by invoking
4862 * vhci_update_pathstates() is because it
4863 * needs a VHCI_HOLD_LUN() and we don't
4864 * want to block here.
4865 *
4866 * Further, if the device is an ALUA device,
4867 * then failure to exactly match 'pclass' and
4868 * 'svl_active_pclass'(as is the case here)
4869 * indicates that the currently active path
4870 * is a 'non-optimized' path - which means
4871 * that 'svl_active_pclass' needs to be
4872 * replaced with opinfo.opinfo_path_state
4873 * value.
4874 */
4875
4876 if (SCSI_FAILOVER_IS_TPGS(vlun->svl_fops)) {
4877 char *tptr;
4878
4879 /*
4880 * The device is ALUA compliant. The
4881 * state need to be changed to online
4882 * rather than standby state which is
4883 * done typically for a asymmetric
4884 * device that is non ALUA compliant.
4885 */
4886 mdi_pi_set_state(pip,
4887 MDI_PATHINFO_STATE_ONLINE);
4888 tptr = kmem_alloc(strlen
4889 (opinfo.opinfo_path_attr)+1,
4890 KM_SLEEP);
4891 (void) strlcpy(tptr,
4892 opinfo.opinfo_path_attr,
4893 (strlen(opinfo.opinfo_path_attr)
4894 +1));
4895 mutex_enter(&vlun->svl_mutex);
4896 kmem_free(vlun->svl_active_pclass,
4897 strlen(vlun->svl_active_pclass)+1);
4898 vlun->svl_active_pclass = tptr;
4899 mutex_exit(&vlun->svl_mutex);
4900 } else {
4901 /*
4902 * Non ALUA device case.
4903 */
4904 mdi_pi_set_state(pip,
4905 MDI_PATHINFO_STATE_STANDBY);
4906 }
4907 vlun->svl_fo_support = opinfo.opinfo_mode;
4908 mdi_pi_set_preferred(pip,
4909 opinfo.opinfo_preferred);
4910 update_pathinfo_done = 1;
4911 }
4912
4913 /*
4914 * Find out a class of currently reserved path if there
4915 * is any.
4916 */
4917 if ((vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) &&
4918 mdi_prop_lookup_string(vlun->svl_resrv_pip,
4919 "path-class", &resrv_pclass) != MDI_SUCCESS) {
4920 VHCI_DEBUG(1, (CE_NOTE, NULL,
4921 "!vhci_update_pathinfo: prop lookup "
4922 "failed for path 0x%p\n",
4923 (void *)vlun->svl_resrv_pip));
4924 /*
4925 * Something is wrong with the reserved path.
4926 * We can't do much with that right here. Just
4927 * force re-reservation to another path.
4928 */
4929 force_rereserve = 1;
4930 }
4931
4932 (void) fo->sfo_pathclass_next(NULL, &best_pclass,
4933 vlun->svl_fops_ctpriv);
4934 if ((force_rereserve == 1) || ((resrv_pclass != NULL) &&
4935 (strcmp(pclass, best_pclass) == 0) &&
4936 (strcmp(resrv_pclass, best_pclass) != 0))) {
4937 /*
4938 * Inform target driver that a reservation
4939 * should be reinstated because the reserved
4940 * path is not the most preferred one.
4941 */
4942 mutex_enter(&vhci->vhci_mutex);
4943 scsi_hba_reset_notify_callback(
4944 &vhci->vhci_mutex,
4945 &vhci->vhci_reset_notify_listf);
4946 mutex_exit(&vhci->vhci_mutex);
4947 }
4948
4949 if (update_pathinfo_done == 1) {
4950 return (MDI_SUCCESS);
4951 }
4952 } else {
4953 char *tptr;
4954
4955 /*
4956 * lets release the mutex before we try to
4957 * allocate since the potential to sleep is
4958 * possible.
4959 */
4960 mutex_exit(&vlun->svl_mutex);
4961 tptr = kmem_alloc(strlen(pclass)+1, KM_SLEEP);
4962 (void) strlcpy(tptr, pclass, (strlen(pclass)+1));
4963 mutex_enter(&vlun->svl_mutex);
4964 vlun->svl_active_pclass = tptr;
4965 }
4966 mutex_exit(&vlun->svl_mutex);
4967 mdi_pi_set_state(pip, MDI_PATHINFO_STATE_ONLINE);
4968 vlun->svl_waiting_for_activepath = 0;
4969 } else if (opinfo.opinfo_path_state == SCSI_PATH_ACTIVE_NONOPT) {
4970 mutex_enter(&vlun->svl_mutex);
4971 if (vlun->svl_active_pclass == NULL) {
4972 char *tptr;
4973
4974 mutex_exit(&vlun->svl_mutex);
4975 tptr = kmem_alloc(strlen(pclass)+1, KM_SLEEP);
4976 (void) strlcpy(tptr, pclass, (strlen(pclass)+1));
4977 mutex_enter(&vlun->svl_mutex);
4978 vlun->svl_active_pclass = tptr;
4979 }
4980 mutex_exit(&vlun->svl_mutex);
4981 mdi_pi_set_state(pip, MDI_PATHINFO_STATE_ONLINE);
4982 vlun->svl_waiting_for_activepath = 0;
4983 } else if (opinfo.opinfo_path_state == SCSI_PATH_INACTIVE) {
4984 mutex_enter(&vlun->svl_mutex);
4985 if (vlun->svl_active_pclass != NULL) {
4986 if (strcmp(vlun->svl_active_pclass, pclass) == 0) {
4987 mutex_exit(&vlun->svl_mutex);
4988 /*
4989 * externally initiated failover has happened;
4990 * force state to ONLINE (see comment above)
4991 */
4992 mdi_pi_set_state(pip,
4993 MDI_PATHINFO_STATE_ONLINE);
4994 vlun->svl_fo_support = opinfo.opinfo_mode;
4995 mdi_pi_set_preferred(pip,
4996 opinfo.opinfo_preferred);
4997 return (MDI_SUCCESS);
4998 }
4999 }
5000 mutex_exit(&vlun->svl_mutex);
5001 mdi_pi_set_state(pip, MDI_PATHINFO_STATE_STANDBY);
5002
5003 /*
5004 * Initiate auto-failback, if enabled, for path if path-state
5005 * is transitioning from OFFLINE->STANDBY and pathclass is the
5006 * preferred pathclass for this storage.
5007 * NOTE: In case where opinfo_path_state is SCSI_PATH_ACTIVE
5008 * (above), where the pi state is set to STANDBY, we don't
5009 * initiate auto-failback as the next IO shall take care of.
5010 * this. See comment above.
5011 */
5012 (void) fo->sfo_pathclass_next(NULL, &best_pclass,
5013 vlun->svl_fops_ctpriv);
5014 if (((vhci->vhci_conf_flags & VHCI_CONF_FLAGS_AUTO_FAILBACK) ==
5015 VHCI_CONF_FLAGS_AUTO_FAILBACK) &&
5016 (strcmp(pclass, best_pclass) == 0) &&
5017 ((MDI_PI_OLD_STATE(pip) == MDI_PATHINFO_STATE_OFFLINE)||
5018 (MDI_PI_OLD_STATE(pip) == MDI_PATHINFO_STATE_INIT))) {
5019 VHCI_DEBUG(1, (CE_NOTE, NULL, "%s pathclass path: %p"
5020 " OFFLINE->STANDBY transition for lun %s\n",
5021 best_pclass, (void *)pip, vlun->svl_lun_wwn));
5022 (void) taskq_dispatch(vhci->vhci_taskq,
5023 vhci_initiate_auto_failback, (void *) vlun,
5024 KM_SLEEP);
5025 }
5026 }
5027 vlun->svl_fo_support = opinfo.opinfo_mode;
5028 mdi_pi_set_preferred(pip, opinfo.opinfo_preferred);
5029
5030 VHCI_DEBUG(8, (CE_NOTE, NULL, "vhci_update_pathinfo: opinfo_rev = %x,"
5031 " opinfo_path_state = %x opinfo_preferred = %x, opinfo_mode = %x\n",
5032 opinfo.opinfo_rev, opinfo.opinfo_path_state,
5033 opinfo.opinfo_preferred, opinfo.opinfo_mode));
5034
5035 return (MDI_SUCCESS);
5036 }
5037
5038 /*
5039 * Form the kstat name and and call mdi_pi_kstat_create()
5040 */
5041 void
5042 vhci_kstat_create_pathinfo(mdi_pathinfo_t *pip)
5043 {
5044 dev_info_t *tgt_dip;
5045 dev_info_t *pdip;
5046 char *guid;
5047 char *target_port, *target_port_dup;
5048 char ks_name[KSTAT_STRLEN];
5049 uint_t pid;
5050 int by_id;
5051 mod_hash_val_t hv;
5052
5053
5054 /* return if we have already allocated kstats */
5055 if (mdi_pi_kstat_exists(pip))
5056 return;
5057
5058 /*
5059 * We need instance numbers to create a kstat name, return if we don't
5060 * have instance numbers assigned yet.
5061 */
5062 tgt_dip = mdi_pi_get_client(pip);
5063 pdip = mdi_pi_get_phci(pip);
5064 if ((ddi_get_instance(tgt_dip) == -1) || (ddi_get_instance(pdip) == -1))
5065 return;
5066
5067 /*
5068 * A path oriented kstat has a ks_name of the form:
5069 *
5070 * <client-driver><instance>.t<pid>.<pHCI-driver><instance>
5071 *
5072 * We maintain a bidirectional 'target-port' to <pid> map,
5073 * called targetmap. All pathinfo nodes with the same
5074 * 'target-port' map to the same <pid>. The iostat(1M) code,
5075 * when parsing a path oriented kstat name, uses the <pid> as
5076 * a SCSI_VHCI_GET_TARGET_LONGNAME ioctl argument in order
5077 * to get the 'target-port'. For KSTAT_FLAG_PERSISTENT kstats,
5078 * this ioctl needs to translate a <pid> to a 'target-port'
5079 * even after all pathinfo nodes associated with the
5080 * 'target-port' have been destroyed. This is needed to support
5081 * consistent first-iteration activity-since-boot iostat(1M)
5082 * output. Because of this requirement, the mapping can't be
5083 * based on pathinfo information in a devinfo snapshot.
5084 */
5085
5086 /* determine 'target-port' */
5087 if (mdi_prop_lookup_string(pip,
5088 SCSI_ADDR_PROP_TARGET_PORT, &target_port) == MDI_SUCCESS) {
5089 target_port_dup = i_ddi_strdup(target_port, KM_SLEEP);
5090 (void) mdi_prop_free(target_port);
5091 by_id = 1;
5092 } else {
5093 /*
5094 * If the pHCI did not set up 'target-port' on this
5095 * pathinfo node, assume that our client is the only
5096 * one with paths to the device by using the guid
5097 * value as the 'target-port'. Since no other client
5098 * will have the same guid, no other client will use
5099 * the same <pid>. NOTE: a client with an instance
5100 * number always has a guid.
5101 */
5102 (void) ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip,
5103 PROPFLAGS, MDI_CLIENT_GUID_PROP, &guid);
5104 target_port_dup = i_ddi_strdup(guid, KM_SLEEP);
5105 ddi_prop_free(guid);
5106
5107 /*
5108 * For this type of mapping we don't want the
5109 * <id> -> 'target-port' mapping to be made. This
5110 * will cause the SCSI_VHCI_GET_TARGET_LONGNAME ioctl
5111 * to fail, and the iostat(1M) long '-n' output will
5112 * still use the <pid>. We do this because we just
5113 * made up the 'target-port' using the guid, and we
5114 * don't want to expose that fact in iostat output.
5115 */
5116 by_id = 0;
5117 }
5118
5119 /* find/establish <pid> given 'target-port' */
5120 mutex_enter(&vhci_targetmap_mutex);
5121 if (mod_hash_find(vhci_targetmap_byport,
5122 (mod_hash_key_t)target_port_dup, &hv) == 0) {
5123 pid = (int)(intptr_t)hv; /* mapping exists */
5124 } else {
5125 pid = vhci_targetmap_pid++; /* new mapping */
5126
5127 (void) mod_hash_insert(vhci_targetmap_byport,
5128 (mod_hash_key_t)target_port_dup,
5129 (mod_hash_val_t)(intptr_t)pid);
5130 if (by_id) {
5131 (void) mod_hash_insert(vhci_targetmap_bypid,
5132 (mod_hash_key_t)(uintptr_t)pid,
5133 (mod_hash_val_t)(uintptr_t)target_port_dup);
5134 }
5135 target_port_dup = NULL; /* owned by hash */
5136 }
5137 mutex_exit(&vhci_targetmap_mutex);
5138
5139 /* form kstat name */
5140 (void) snprintf(ks_name, KSTAT_STRLEN, "%s%d.t%d.%s%d",
5141 ddi_driver_name(tgt_dip), ddi_get_instance(tgt_dip),
5142 pid, ddi_driver_name(pdip), ddi_get_instance(pdip));
5143
5144 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_path_online: path:%p "
5145 "kstat %s: pid %x <-> port %s\n", (void *)pip,
5146 ks_name, pid, target_port_dup));
5147 if (target_port_dup)
5148 kmem_free(target_port_dup, strlen(target_port_dup) + 1);
5149
5150 /* call mdi to create kstats with the name we built */
5151 (void) mdi_pi_kstat_create(pip, ks_name);
5152 }
5153
5154 /* ARGSUSED */
5155 static int
5156 vhci_pathinfo_online(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
5157 {
5158 scsi_hba_tran_t *hba = NULL;
5159 struct scsi_device *psd = NULL;
5160 scsi_vhci_lun_t *vlun = NULL;
5161 dev_info_t *pdip = NULL;
5162 dev_info_t *cdip;
5163 dev_info_t *tgt_dip;
5164 struct scsi_vhci *vhci;
5165 char *guid;
5166 struct scsi_failover_ops *sfo;
5167 scsi_vhci_priv_t *svp = NULL;
5168 struct scsi_address *ap;
5169 struct scsi_pkt *pkt;
5170 int rval = MDI_FAILURE;
5171 mpapi_item_list_t *list_ptr;
5172 mpapi_lu_data_t *ld;
5173
5174 ASSERT(vdip != NULL);
5175 ASSERT(pip != NULL);
5176
5177 vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
5178 ASSERT(vhci != NULL);
5179
5180 pdip = mdi_pi_get_phci(pip);
5181 hba = ddi_get_driver_private(pdip);
5182 ASSERT(hba != NULL);
5183
5184 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
5185 ASSERT(svp != NULL);
5186
5187 cdip = mdi_pi_get_client(pip);
5188 ASSERT(cdip != NULL);
5189 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, PROPFLAGS,
5190 MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
5191 VHCI_DEBUG(1, (CE_WARN, NULL, "vhci_path_online: lun guid "
5192 "property failed"));
5193 goto failure;
5194 }
5195
5196 vlun = vhci_lun_lookup(cdip);
5197 ASSERT(vlun != NULL);
5198
5199 ddi_prop_free(guid);
5200
5201 vlun->svl_dip = mdi_pi_get_client(pip);
5202 ASSERT(vlun->svl_dip != NULL);
5203
5204 psd = svp->svp_psd;
5205 ASSERT(psd != NULL);
5206
5207 ap = &psd->sd_address;
5208
5209 /*
5210 * Get inquiry data into pathinfo related scsi_device structure.
5211 * Free sq_inq when pathinfo related scsi_device structure is destroyed
5212 * by vhci_pathinfo_uninit(). In other words, vhci maintains its own
5213 * copy of scsi_device and scsi_inquiry data on a per-path basis.
5214 */
5215 if (scsi_probe(psd, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
5216 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_pathinfo_online: "
5217 "scsi_probe failed path:%p rval:%x\n", (void *)pip, rval));
5218 rval = MDI_FAILURE;
5219 goto failure;
5220 }
5221
5222 /*
5223 * See if we have a failover module to support the device.
5224 *
5225 * We re-probe to determine the failover ops for each path. This
5226 * is done in case there are any path-specific side-effects associated
5227 * with the sfo_device_probe implementation.
5228 *
5229 * Give the first successfull sfo_device_probe the opportunity to
5230 * establish 'ctpriv', vlun/client private data. The ctpriv will
5231 * then be passed into the failover module on all other sfo_device_*()
5232 * operations (and must be freed by sfo_device_unprobe implementation).
5233 *
5234 * NOTE: While sfo_device_probe is done once per path,
5235 * sfo_device_unprobe only occurs once - when the vlun is destroyed.
5236 *
5237 * NOTE: We don't currently support per-path fops private data
5238 * mechanism.
5239 */
5240 sfo = vhci_dev_fo(vdip, psd,
5241 &vlun->svl_fops_ctpriv, &vlun->svl_fops_name);
5242
5243 /* check path configuration result with current vlun state */
5244 if (((sfo && vlun->svl_fops) && (sfo != vlun->svl_fops)) ||
5245 (sfo && vlun->svl_not_supported) ||
5246 ((sfo == NULL) && vlun->svl_fops)) {
5247 /* Getting different results for different paths. */
5248 VHCI_DEBUG(1, (CE_NOTE, vhci->vhci_dip,
5249 "!vhci_pathinfo_online: dev (path 0x%p) contradiction\n",
5250 (void *)pip));
5251 cmn_err(CE_WARN, "scsi_vhci: failover contradiction: "
5252 "'%s'.vs.'%s': path %s\n",
5253 vlun->svl_fops ? vlun->svl_fops->sfo_name : "NULL",
5254 sfo ? sfo->sfo_name : "NULL", mdi_pi_pathname(pip));
5255 vlun->svl_not_supported = 1;
5256 rval = MDI_NOT_SUPPORTED;
5257 goto done;
5258 } else if (sfo == NULL) {
5259 /* No failover module - device not supported under vHCI. */
5260 VHCI_DEBUG(1, (CE_NOTE, vhci->vhci_dip,
5261 "!vhci_pathinfo_online: dev (path 0x%p) not "
5262 "supported\n", (void *)pip));
5263
5264 /* XXX does this contradict vhci_is_dev_supported ? */
5265 vlun->svl_not_supported = 1;
5266 rval = MDI_NOT_SUPPORTED;
5267 goto done;
5268 }
5269
5270 /* failover supported for device - save failover_ops in vlun */
5271 vlun->svl_fops = sfo;
5272 ASSERT(vlun->svl_fops_name != NULL);
5273
5274 /*
5275 * Obtain the device-type based mpxio options as specified in
5276 * scsi_vhci.conf file.
5277 *
5278 * NOTE: currently, the end result is a call to
5279 * mdi_set_lb_region_size().
5280 */
5281 tgt_dip = psd->sd_dev;
5282 ASSERT(tgt_dip != NULL);
5283 vhci_get_device_type_mpxio_options(vdip, tgt_dip, psd);
5284
5285 /*
5286 * if PGR is active, revalidate key and register on this path also,
5287 * if key is still valid
5288 */
5289 sema_p(&vlun->svl_pgr_sema);
5290 if (vlun->svl_pgr_active) {
5291 rval = vhci_pgr_validate_and_register(svp);
5292 if (rval != 1) {
5293 rval = MDI_FAILURE;
5294 sema_v(&vlun->svl_pgr_sema);
5295 goto failure;
5296 }
5297 }
5298 sema_v(&vlun->svl_pgr_sema);
5299
5300 if (svp->svp_new_path) {
5301 /*
5302 * Last chance to perform any cleanup operations on this
5303 * new path before making this path completely online.
5304 */
5305 svp->svp_new_path = 0;
5306
5307 /*
5308 * If scsi_vhci knows the lun is alread RESERVE'd,
5309 * then skip the issue of RELEASE on new path.
5310 */
5311 if ((vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) == 0) {
5312 /*
5313 * Issue SCSI-2 RELEASE only for the first time on
5314 * a new path just in case the host rebooted and
5315 * a reservation is still pending on this path.
5316 * IBM Shark storage does not clear RESERVE upon
5317 * host reboot.
5318 */
5319 pkt = scsi_init_pkt(ap, NULL, NULL, CDB_GROUP0,
5320 sizeof (struct scsi_arq_status), 0, 0,
5321 SLEEP_FUNC, NULL);
5322 if (pkt == NULL) {
5323 VHCI_DEBUG(1, (CE_NOTE, NULL,
5324 "!vhci_pathinfo_online: "
5325 "Release init_pkt failed :%p\n",
5326 (void *)pip));
5327 rval = MDI_FAILURE;
5328 goto failure;
5329 }
5330 pkt->pkt_cdbp[0] = SCMD_RELEASE;
5331 pkt->pkt_time = vhci_io_time;
5332
5333 VHCI_DEBUG(1, (CE_NOTE, NULL,
5334 "!vhci_path_online: path:%p "
5335 "Issued SCSI-2 RELEASE\n", (void *)pip));
5336
5337 /* Ignore the return value */
5338 (void) vhci_do_scsi_cmd(pkt);
5339 scsi_destroy_pkt(pkt);
5340 }
5341 }
5342
5343 rval = vhci_update_pathinfo(psd, pip, sfo, vlun, vhci);
5344 if (rval == MDI_FAILURE) {
5345 goto failure;
5346 }
5347
5348 /* Initialize MP-API data */
5349 vhci_update_mpapi_data(vhci, vlun, pip);
5350
5351 /*
5352 * MP-API also needs the Inquiry data to be maintained in the
5353 * mp_vendor_prop_t structure, so find the lun and update its
5354 * structure with this data.
5355 */
5356 list_ptr = (mpapi_item_list_t *)vhci_get_mpapi_item(vhci, NULL,
5357 MP_OBJECT_TYPE_MULTIPATH_LU, (void *)vlun);
5358 ld = (mpapi_lu_data_t *)list_ptr->item->idata;
5359 if (ld != NULL) {
5360 bcopy(psd->sd_inq->inq_vid, ld->prop.prodInfo.vendor, 8);
5361 bcopy(psd->sd_inq->inq_pid, ld->prop.prodInfo.product, 16);
5362 bcopy(psd->sd_inq->inq_revision, ld->prop.prodInfo.revision, 4);
5363 } else {
5364 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_pathinfo_online: "
5365 "mpapi_lu_data_t is NULL"));
5366 }
5367
5368 /* create kstats for path */
5369 vhci_kstat_create_pathinfo(pip);
5370
5371 done:
5372 mutex_enter(&vhci_global_mutex);
5373 cv_broadcast(&vhci_cv);
5374 mutex_exit(&vhci_global_mutex);
5375
5376 if (vlun->svl_setcap_done) {
5377 (void) vhci_pHCI_cap(ap, "sector-size",
5378 vlun->svl_sector_size, 1, pip);
5379 }
5380
5381 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_path_online: path:%p\n",
5382 (void *)pip));
5383
5384 failure:
5385 return (rval);
5386 }
5387
5388 /*
5389 * path offline handler. Release all bindings that will not be
5390 * released by the normal packet transport/completion code path.
5391 * Since we don't (presently) keep any bindings alive outside of
5392 * the in-transport packets (which will be released on completion)
5393 * there is not much to do here.
5394 */
5395 /* ARGSUSED */
5396 static int
5397 vhci_pathinfo_offline(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
5398 {
5399 scsi_hba_tran_t *hba = NULL;
5400 struct scsi_device *psd = NULL;
5401 dev_info_t *pdip = NULL;
5402 dev_info_t *cdip = NULL;
5403 scsi_vhci_priv_t *svp = NULL;
5404
5405 ASSERT(vdip != NULL);
5406 ASSERT(pip != NULL);
5407
5408 pdip = mdi_pi_get_phci(pip);
5409 ASSERT(pdip != NULL);
5410 if (pdip == NULL) {
5411 VHCI_DEBUG(1, (CE_WARN, vdip, "Invalid path 0x%p: NULL "
5412 "phci dip", (void *)pip));
5413 return (MDI_FAILURE);
5414 }
5415
5416 cdip = mdi_pi_get_client(pip);
5417 ASSERT(cdip != NULL);
5418 if (cdip == NULL) {
5419 VHCI_DEBUG(1, (CE_WARN, vdip, "Invalid path 0x%p: NULL "
5420 "client dip", (void *)pip));
5421 return (MDI_FAILURE);
5422 }
5423
5424 hba = ddi_get_driver_private(pdip);
5425 ASSERT(hba != NULL);
5426
5427 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
5428 if (svp == NULL) {
5429 /*
5430 * mdi_pathinfo node in INIT state can have vHCI private
5431 * information set to null
5432 */
5433 VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5434 "svp is NULL for pip 0x%p\n", (void *)pip));
5435 return (MDI_SUCCESS);
5436 }
5437
5438 psd = svp->svp_psd;
5439 ASSERT(psd != NULL);
5440
5441 mutex_enter(&svp->svp_mutex);
5442
5443 VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5444 "%d cmds pending on path: 0x%p\n", svp->svp_cmds, (void *)pip));
5445 while (svp->svp_cmds != 0) {
5446 if (cv_reltimedwait(&svp->svp_cv, &svp->svp_mutex,
5447 drv_usectohz(vhci_path_quiesce_timeout * 1000000),
5448 TR_CLOCK_TICK) == -1) {
5449 /*
5450 * The timeout time reached without the condition
5451 * being signaled.
5452 */
5453 VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5454 "Timeout reached on path 0x%p without the cond\n",
5455 (void *)pip));
5456 VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5457 "%d cmds still pending on path: 0x%p\n",
5458 svp->svp_cmds, (void *)pip));
5459 break;
5460 }
5461 }
5462 mutex_exit(&svp->svp_mutex);
5463
5464 /*
5465 * Check to see if this vlun has an active SCSI-II RESERVE. And this
5466 * is the pip for the path that has been reserved.
5467 * If so clear the reservation by sending a reset, so the host will not
5468 * get a reservation conflict. Reset the flag VLUN_RESERVE_ACTIVE_FLG
5469 * for this lun. Also a reset notify is sent to the target driver
5470 * just in case the POR check condition is cleared by some other layer
5471 * in the stack.
5472 */
5473 if (svp->svp_svl->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
5474 if (pip == svp->svp_svl->svl_resrv_pip) {
5475 if (vhci_recovery_reset(svp->svp_svl,
5476 &svp->svp_psd->sd_address, TRUE,
5477 VHCI_DEPTH_TARGET) == 0) {
5478 VHCI_DEBUG(1, (CE_NOTE, NULL,
5479 "!vhci_pathinfo_offline (pip:%p):"
5480 "reset failed, retrying\n", (void *)pip));
5481 delay(1*drv_usectohz(1000000));
5482 if (vhci_recovery_reset(svp->svp_svl,
5483 &svp->svp_psd->sd_address, TRUE,
5484 VHCI_DEPTH_TARGET) == 0) {
5485 VHCI_DEBUG(1, (CE_NOTE, NULL,
5486 "!vhci_pathinfo_offline "
5487 "(pip:%p): reset failed, "
5488 "giving up!\n", (void *)pip));
5489 }
5490 }
5491 svp->svp_svl->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
5492 }
5493 }
5494
5495 mdi_pi_set_state(pip, MDI_PATHINFO_STATE_OFFLINE);
5496 vhci_mpapi_set_path_state(vdip, pip, MP_DRVR_PATH_STATE_REMOVED);
5497
5498 VHCI_DEBUG(1, (CE_NOTE, NULL,
5499 "!vhci_pathinfo_offline: offlined path 0x%p\n", (void *)pip));
5500 return (MDI_SUCCESS);
5501 }
5502
5503
5504 /*
5505 * routine for SCSI VHCI IOCTL implementation.
5506 */
5507 /* ARGSUSED */
5508 static int
5509 vhci_ctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *credp, int *rval)
5510 {
5511 struct scsi_vhci *vhci;
5512 dev_info_t *vdip;
5513 mdi_pathinfo_t *pip;
5514 int instance, held;
5515 int retval = 0;
5516 caddr_t phci_path = NULL, client_path = NULL;
5517 caddr_t paddr = NULL;
5518 sv_iocdata_t ioc;
5519 sv_iocdata_t *pioc = &ioc;
5520 sv_switch_to_cntlr_iocdata_t iocsc;
5521 sv_switch_to_cntlr_iocdata_t *piocsc = &iocsc;
5522 caddr_t s;
5523 scsi_vhci_lun_t *vlun;
5524 struct scsi_failover_ops *fo;
5525 char *pclass;
5526
5527 /* Check for validity of vhci structure */
5528 vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(dev)));
5529 if (vhci == NULL) {
5530 return (ENXIO);
5531 }
5532
5533 mutex_enter(&vhci->vhci_mutex);
5534 if ((vhci->vhci_state & VHCI_STATE_OPEN) == 0) {
5535 mutex_exit(&vhci->vhci_mutex);
5536 return (ENXIO);
5537 }
5538 mutex_exit(&vhci->vhci_mutex);
5539
5540 /* Get the vhci dip */
5541 vdip = vhci->vhci_dip;
5542 ASSERT(vdip != NULL);
5543 instance = ddi_get_instance(vdip);
5544
5545 /* Allocate memory for getting parameters from userland */
5546 phci_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
5547 client_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
5548 paddr = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
5549
5550 /*
5551 * Set a local variable indicating the ioctl name. Used for
5552 * printing debug strings.
5553 */
5554 switch (cmd) {
5555 case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5556 s = "GET_CLIENT_MULTIPATH_INFO";
5557 break;
5558
5559 case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5560 s = "GET_PHCI_MULTIPATH_INFO";
5561 break;
5562
5563 case SCSI_VHCI_GET_CLIENT_NAME:
5564 s = "GET_CLIENT_NAME";
5565 break;
5566
5567 case SCSI_VHCI_PATH_ONLINE:
5568 s = "PATH_ONLINE";
5569 break;
5570
5571 case SCSI_VHCI_PATH_OFFLINE:
5572 s = "PATH_OFFLINE";
5573 break;
5574
5575 case SCSI_VHCI_PATH_STANDBY:
5576 s = "PATH_STANDBY";
5577 break;
5578
5579 case SCSI_VHCI_PATH_TEST:
5580 s = "PATH_TEST";
5581 break;
5582
5583 case SCSI_VHCI_SWITCH_TO_CNTLR:
5584 s = "SWITCH_TO_CNTLR";
5585 break;
5586 case SCSI_VHCI_PATH_DISABLE:
5587 s = "PATH_DISABLE";
5588 break;
5589 case SCSI_VHCI_PATH_ENABLE:
5590 s = "PATH_ENABLE";
5591 break;
5592
5593 case SCSI_VHCI_GET_TARGET_LONGNAME:
5594 s = "GET_TARGET_LONGNAME";
5595 break;
5596
5597 #ifdef DEBUG
5598 case SCSI_VHCI_CONFIGURE_PHCI:
5599 s = "CONFIGURE_PHCI";
5600 break;
5601
5602 case SCSI_VHCI_UNCONFIGURE_PHCI:
5603 s = "UNCONFIGURE_PHCI";
5604 break;
5605 #endif
5606
5607 default:
5608 s = "Unknown";
5609 vhci_log(CE_NOTE, vdip,
5610 "!vhci%d: ioctl %x (unsupported ioctl)", instance, cmd);
5611 retval = ENOTSUP;
5612 break;
5613 }
5614 if (retval != 0) {
5615 goto end;
5616 }
5617
5618 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci%d: ioctl <%s>", instance, s));
5619
5620 /*
5621 * Get IOCTL parameters from userland
5622 */
5623 switch (cmd) {
5624 case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5625 case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5626 case SCSI_VHCI_GET_CLIENT_NAME:
5627 case SCSI_VHCI_PATH_ONLINE:
5628 case SCSI_VHCI_PATH_OFFLINE:
5629 case SCSI_VHCI_PATH_STANDBY:
5630 case SCSI_VHCI_PATH_TEST:
5631 case SCSI_VHCI_PATH_DISABLE:
5632 case SCSI_VHCI_PATH_ENABLE:
5633 case SCSI_VHCI_GET_TARGET_LONGNAME:
5634 #ifdef DEBUG
5635 case SCSI_VHCI_CONFIGURE_PHCI:
5636 case SCSI_VHCI_UNCONFIGURE_PHCI:
5637 #endif
5638 retval = vhci_get_iocdata((const void *)data, pioc, mode, s);
5639 break;
5640
5641 case SCSI_VHCI_SWITCH_TO_CNTLR:
5642 retval = vhci_get_iocswitchdata((const void *)data, piocsc,
5643 mode, s);
5644 break;
5645 }
5646 if (retval != 0) {
5647 goto end;
5648 }
5649
5650
5651 /*
5652 * Process the IOCTL
5653 */
5654 switch (cmd) {
5655 case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5656 {
5657 uint_t num_paths; /* Num paths to client dev */
5658 sv_path_info_t *upibuf = NULL; /* To keep userland values */
5659 sv_path_info_t *kpibuf = NULL; /* Kernel data for ioctls */
5660 dev_info_t *cdip; /* Client device dip */
5661
5662 if (pioc->ret_elem == NULL) {
5663 retval = EINVAL;
5664 break;
5665 }
5666
5667 /* Get client device path from user land */
5668 if (vhci_ioc_get_client_path(pioc, client_path, mode, s)) {
5669 retval = EFAULT;
5670 break;
5671 }
5672
5673 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5674 "client <%s>", s, client_path));
5675
5676 /* Get number of paths to this client device */
5677 if ((cdip = mdi_client_path2devinfo(vdip, client_path))
5678 == NULL) {
5679 retval = ENXIO;
5680 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5681 "client dip doesn't exist. invalid path <%s>",
5682 s, client_path));
5683 break;
5684 }
5685 num_paths = mdi_client_get_path_count(cdip);
5686
5687 if (ddi_copyout(&num_paths, pioc->ret_elem,
5688 sizeof (num_paths), mode)) {
5689 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5690 "num_paths copyout failed", s));
5691 retval = EFAULT;
5692 break;
5693 }
5694
5695 /* If user just wanted num_paths, then return */
5696 if (pioc->buf_elem == 0 || pioc->ret_buf == NULL ||
5697 num_paths == 0) {
5698 break;
5699 }
5700
5701 /* Set num_paths to value as much as can be sent to userland */
5702 if (num_paths > pioc->buf_elem) {
5703 num_paths = pioc->buf_elem;
5704 }
5705
5706 /* Allocate memory and get userland pointers */
5707 if (vhci_ioc_alloc_pathinfo(&upibuf, &kpibuf, num_paths,
5708 pioc, mode, s) != 0) {
5709 retval = EFAULT;
5710 break;
5711 }
5712 ASSERT(upibuf != NULL);
5713 ASSERT(kpibuf != NULL);
5714
5715 /*
5716 * Get the path information and send it to userland.
5717 */
5718 if (vhci_get_client_path_list(cdip, kpibuf, num_paths)
5719 != MDI_SUCCESS) {
5720 retval = ENXIO;
5721 vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5722 break;
5723 }
5724
5725 if (vhci_ioc_send_pathinfo(upibuf, kpibuf, num_paths,
5726 pioc, mode, s)) {
5727 retval = EFAULT;
5728 vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5729 break;
5730 }
5731
5732 /* Free the memory allocated for path information */
5733 vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5734 break;
5735 }
5736
5737 case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5738 {
5739 uint_t num_paths; /* Num paths to client dev */
5740 sv_path_info_t *upibuf = NULL; /* To keep userland values */
5741 sv_path_info_t *kpibuf = NULL; /* Kernel data for ioctls */
5742 dev_info_t *pdip; /* PHCI device dip */
5743
5744 if (pioc->ret_elem == NULL) {
5745 retval = EINVAL;
5746 break;
5747 }
5748
5749 /* Get PHCI device path from user land */
5750 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
5751 retval = EFAULT;
5752 break;
5753 }
5754
5755 VHCI_DEBUG(6, (CE_WARN, vdip,
5756 "!vhci_ioctl: ioctl <%s> phci <%s>", s, phci_path));
5757
5758 /* Get number of devices associated with this PHCI device */
5759 if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5760 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5761 "phci dip doesn't exist. invalid path <%s>",
5762 s, phci_path));
5763 retval = ENXIO;
5764 break;
5765 }
5766
5767 num_paths = mdi_phci_get_path_count(pdip);
5768
5769 if (ddi_copyout(&num_paths, pioc->ret_elem,
5770 sizeof (num_paths), mode)) {
5771 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5772 "num_paths copyout failed", s));
5773 retval = EFAULT;
5774 break;
5775 }
5776
5777 /* If user just wanted num_paths, then return */
5778 if (pioc->buf_elem == 0 || pioc->ret_buf == NULL ||
5779 num_paths == 0) {
5780 break;
5781 }
5782
5783 /* Set num_paths to value as much as can be sent to userland */
5784 if (num_paths > pioc->buf_elem) {
5785 num_paths = pioc->buf_elem;
5786 }
5787
5788 /* Allocate memory and get userland pointers */
5789 if (vhci_ioc_alloc_pathinfo(&upibuf, &kpibuf, num_paths,
5790 pioc, mode, s) != 0) {
5791 retval = EFAULT;
5792 break;
5793 }
5794 ASSERT(upibuf != NULL);
5795 ASSERT(kpibuf != NULL);
5796
5797 /*
5798 * Get the path information and send it to userland.
5799 */
5800 if (vhci_get_phci_path_list(pdip, kpibuf, num_paths)
5801 != MDI_SUCCESS) {
5802 retval = ENXIO;
5803 vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5804 break;
5805 }
5806
5807 if (vhci_ioc_send_pathinfo(upibuf, kpibuf, num_paths,
5808 pioc, mode, s)) {
5809 retval = EFAULT;
5810 vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5811 break;
5812 }
5813
5814 /* Free the memory allocated for path information */
5815 vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5816 break;
5817 }
5818
5819 case SCSI_VHCI_GET_CLIENT_NAME:
5820 {
5821 dev_info_t *cdip, *pdip;
5822
5823 /* Get PHCI path and device address from user land */
5824 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s) ||
5825 vhci_ioc_get_paddr(pioc, paddr, mode, s)) {
5826 retval = EFAULT;
5827 break;
5828 }
5829
5830 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5831 "phci <%s>, paddr <%s>", s, phci_path, paddr));
5832
5833 /* Get the PHCI dip */
5834 if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5835 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5836 "phci dip doesn't exist. invalid path <%s>",
5837 s, phci_path));
5838 retval = ENXIO;
5839 break;
5840 }
5841
5842 if ((pip = mdi_pi_find(pdip, NULL, paddr)) == NULL) {
5843 VHCI_DEBUG(1, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5844 "pathinfo doesn't exist. invalid device addr", s));
5845 retval = ENXIO;
5846 break;
5847 }
5848
5849 /* Get the client device pathname and send to userland */
5850 cdip = mdi_pi_get_client(pip);
5851 vhci_ioc_devi_to_path(cdip, client_path);
5852
5853 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5854 "client <%s>", s, client_path));
5855
5856 if (vhci_ioc_send_client_path(client_path, pioc, mode, s)) {
5857 retval = EFAULT;
5858 break;
5859 }
5860 break;
5861 }
5862
5863 case SCSI_VHCI_PATH_ONLINE:
5864 case SCSI_VHCI_PATH_OFFLINE:
5865 case SCSI_VHCI_PATH_STANDBY:
5866 case SCSI_VHCI_PATH_TEST:
5867 {
5868 dev_info_t *pdip; /* PHCI dip */
5869
5870 /* Get PHCI path and device address from user land */
5871 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s) ||
5872 vhci_ioc_get_paddr(pioc, paddr, mode, s)) {
5873 retval = EFAULT;
5874 break;
5875 }
5876
5877 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5878 "phci <%s>, paddr <%s>", s, phci_path, paddr));
5879
5880 /* Get the PHCI dip */
5881 if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5882 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5883 "phci dip doesn't exist. invalid path <%s>",
5884 s, phci_path));
5885 retval = ENXIO;
5886 break;
5887 }
5888
5889 if ((pip = mdi_pi_find(pdip, NULL, paddr)) == NULL) {
5890 VHCI_DEBUG(1, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5891 "pathinfo doesn't exist. invalid device addr", s));
5892 retval = ENXIO;
5893 break;
5894 }
5895
5896 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5897 "Calling MDI function to change device state", s));
5898
5899 switch (cmd) {
5900 case SCSI_VHCI_PATH_ONLINE:
5901 retval = mdi_pi_online(pip, 0);
5902 break;
5903
5904 case SCSI_VHCI_PATH_OFFLINE:
5905 retval = mdi_pi_offline(pip, 0);
5906 break;
5907
5908 case SCSI_VHCI_PATH_STANDBY:
5909 retval = mdi_pi_standby(pip, 0);
5910 break;
5911
5912 case SCSI_VHCI_PATH_TEST:
5913 break;
5914 }
5915 break;
5916 }
5917
5918 case SCSI_VHCI_SWITCH_TO_CNTLR:
5919 {
5920 dev_info_t *cdip;
5921 struct scsi_device *devp;
5922
5923 /* Get the client device pathname */
5924 if (ddi_copyin(piocsc->client, client_path,
5925 MAXPATHLEN, mode)) {
5926 VHCI_DEBUG(2, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5927 "client_path copyin failed", s));
5928 retval = EFAULT;
5929 break;
5930 }
5931
5932 /* Get the path class to which user wants to switch */
5933 if (ddi_copyin(piocsc->class, paddr, MAXNAMELEN, mode)) {
5934 VHCI_DEBUG(2, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5935 "controller_class copyin failed", s));
5936 retval = EFAULT;
5937 break;
5938 }
5939
5940 /* Perform validity checks */
5941 if ((cdip = mdi_client_path2devinfo(vdip,
5942 client_path)) == NULL) {
5943 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5944 "client dip doesn't exist. invalid path <%s>",
5945 s, client_path));
5946 retval = ENXIO;
5947 break;
5948 }
5949
5950 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: Calling MDI func "
5951 "to switch controller"));
5952 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: client <%s> "
5953 "class <%s>", client_path, paddr));
5954
5955 if (strcmp(paddr, PCLASS_PRIMARY) &&
5956 strcmp(paddr, PCLASS_SECONDARY)) {
5957 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5958 "invalid path class <%s>", s, paddr));
5959 retval = ENXIO;
5960 break;
5961 }
5962
5963 devp = ddi_get_driver_private(cdip);
5964 if (devp == NULL) {
5965 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5966 "invalid scsi device <%s>", s, client_path));
5967 retval = ENXIO;
5968 break;
5969 }
5970 vlun = ADDR2VLUN(&devp->sd_address);
5971 ASSERT(vlun);
5972
5973 /*
5974 * Checking to see if device has only one pclass, PRIMARY.
5975 * If so this device doesn't support failovers. Assumed
5976 * that the devices with one pclass is PRIMARY, as thats the
5977 * case today. If this is not true and in future other
5978 * symmetric devices are supported with other pclass, this
5979 * IOCTL shall have to be overhauled anyways as now the only
5980 * arguments it accepts are PRIMARY and SECONDARY.
5981 */
5982 fo = vlun->svl_fops;
5983 if (fo->sfo_pathclass_next(PCLASS_PRIMARY, &pclass,
5984 vlun->svl_fops_ctpriv)) {
5985 retval = ENOTSUP;
5986 break;
5987 }
5988
5989 VHCI_HOLD_LUN(vlun, VH_SLEEP, held);
5990 mutex_enter(&vlun->svl_mutex);
5991 if (vlun->svl_active_pclass != NULL) {
5992 if (strcmp(vlun->svl_active_pclass, paddr) == 0) {
5993 mutex_exit(&vlun->svl_mutex);
5994 retval = EALREADY;
5995 VHCI_RELEASE_LUN(vlun);
5996 break;
5997 }
5998 }
5999 mutex_exit(&vlun->svl_mutex);
6000 /* Call mdi function to cause a switch over */
6001 retval = mdi_failover(vdip, cdip, MDI_FAILOVER_SYNC);
6002 if (retval == MDI_SUCCESS) {
6003 retval = 0;
6004 } else if (retval == MDI_BUSY) {
6005 retval = EBUSY;
6006 } else {
6007 retval = EIO;
6008 }
6009 VHCI_RELEASE_LUN(vlun);
6010 break;
6011 }
6012
6013 case SCSI_VHCI_PATH_ENABLE:
6014 case SCSI_VHCI_PATH_DISABLE:
6015 {
6016 dev_info_t *cdip, *pdip;
6017
6018 /*
6019 * Get client device path from user land
6020 */
6021 if (vhci_ioc_get_client_path(pioc, client_path, mode, s)) {
6022 retval = EFAULT;
6023 break;
6024 }
6025
6026 /*
6027 * Get Phci device path from user land
6028 */
6029 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
6030 retval = EFAULT;
6031 break;
6032 }
6033
6034 /*
6035 * Get the devinfo for the Phci.
6036 */
6037 if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
6038 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
6039 "phci dip doesn't exist. invalid path <%s>",
6040 s, phci_path));
6041 retval = ENXIO;
6042 break;
6043 }
6044
6045 /*
6046 * If the client path is set to /scsi_vhci then we need
6047 * to do the operation on all the clients so set cdip to NULL.
6048 * Else, try to get the client dip.
6049 */
6050 if (strcmp(client_path, "/scsi_vhci") == 0) {
6051 cdip = NULL;
6052 } else {
6053 if ((cdip = mdi_client_path2devinfo(vdip,
6054 client_path)) == NULL) {
6055 retval = ENXIO;
6056 VHCI_DEBUG(1, (CE_WARN, NULL,
6057 "!vhci_ioctl: ioctl <%s> client dip "
6058 "doesn't exist. invalid path <%s>",
6059 s, client_path));
6060 break;
6061 }
6062 }
6063
6064 if (cmd == SCSI_VHCI_PATH_ENABLE)
6065 retval = mdi_pi_enable(cdip, pdip, USER_DISABLE);
6066 else
6067 retval = mdi_pi_disable(cdip, pdip, USER_DISABLE);
6068
6069 break;
6070 }
6071
6072 case SCSI_VHCI_GET_TARGET_LONGNAME:
6073 {
6074 uint_t pid = pioc->buf_elem;
6075 char *target_port;
6076 mod_hash_val_t hv;
6077
6078 /* targetmap lookup of 'target-port' by <pid> */
6079 if (mod_hash_find(vhci_targetmap_bypid,
6080 (mod_hash_key_t)(uintptr_t)pid, &hv) != 0) {
6081 /*
6082 * NOTE: failure to find the mapping is OK for guid
6083 * based 'target-port' values.
6084 */
6085 VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
6086 "targetport mapping doesn't exist: pid %d",
6087 s, pid));
6088 retval = ENXIO;
6089 break;
6090 }
6091
6092 /* copyout 'target-port' result */
6093 target_port = (char *)hv;
6094 if (copyoutstr(target_port, pioc->addr, MAXNAMELEN, NULL)) {
6095 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
6096 "targetport copyout failed: len: %d",
6097 s, (int)strlen(target_port)));
6098 retval = EFAULT;
6099 }
6100 break;
6101 }
6102
6103 #ifdef DEBUG
6104 case SCSI_VHCI_CONFIGURE_PHCI:
6105 {
6106 dev_info_t *pdip;
6107
6108 /* Get PHCI path and device address from user land */
6109 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
6110 retval = EFAULT;
6111 break;
6112 }
6113
6114 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
6115 "phci <%s>", s, phci_path));
6116
6117 /* Get the PHCI dip */
6118 if ((pdip = e_ddi_hold_devi_by_path(phci_path, 0)) == NULL) {
6119 VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
6120 "phci dip doesn't exist. invalid path <%s>",
6121 s, phci_path));
6122 retval = ENXIO;
6123 break;
6124 }
6125
6126 if (ndi_devi_config(pdip,
6127 NDI_DEVFS_CLEAN|NDI_DEVI_PERSIST) != NDI_SUCCESS) {
6128 retval = EIO;
6129 }
6130
6131 ddi_release_devi(pdip);
6132 break;
6133 }
6134
6135 case SCSI_VHCI_UNCONFIGURE_PHCI:
6136 {
6137 dev_info_t *pdip;
6138
6139 /* Get PHCI path and device address from user land */
6140 if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
6141 retval = EFAULT;
6142 break;
6143 }
6144
6145 VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
6146 "phci <%s>", s, phci_path));
6147
6148 /* Get the PHCI dip */
6149 if ((pdip = e_ddi_hold_devi_by_path(phci_path, 0)) == NULL) {
6150 VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
6151 "phci dip doesn't exist. invalid path <%s>",
6152 s, phci_path));
6153 retval = ENXIO;
6154 break;
6155 }
6156
6157 if (ndi_devi_unconfig(pdip,
6158 NDI_DEVI_REMOVE|NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
6159 retval = EBUSY;
6160 }
6161
6162 ddi_release_devi(pdip);
6163 break;
6164 }
6165 #endif
6166 }
6167
6168 end:
6169 /* Free the memory allocated above */
6170 if (phci_path != NULL) {
6171 kmem_free(phci_path, MAXPATHLEN);
6172 }
6173 if (client_path != NULL) {
6174 kmem_free(client_path, MAXPATHLEN);
6175 }
6176 if (paddr != NULL) {
6177 kmem_free(paddr, MAXNAMELEN);
6178 }
6179 return (retval);
6180 }
6181
6182 /*
6183 * devctl IOCTL support for client device DR
6184 */
6185 /* ARGSUSED */
6186 int
6187 vhci_devctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
6188 int *rvalp)
6189 {
6190 dev_info_t *self;
6191 dev_info_t *child;
6192 scsi_hba_tran_t *hba;
6193 struct devctl_iocdata *dcp;
6194 struct scsi_vhci *vhci;
6195 int rv = 0;
6196 int retval = 0;
6197 scsi_vhci_priv_t *svp;
6198 mdi_pathinfo_t *pip;
6199
6200 if ((vhci = ddi_get_soft_state(vhci_softstate,
6201 MINOR2INST(getminor(dev)))) == NULL)
6202 return (ENXIO);
6203
6204 /*
6205 * check if :devctl minor device has been opened
6206 */
6207 mutex_enter(&vhci->vhci_mutex);
6208 if ((vhci->vhci_state & VHCI_STATE_OPEN) == 0) {
6209 mutex_exit(&vhci->vhci_mutex);
6210 return (ENXIO);
6211 }
6212 mutex_exit(&vhci->vhci_mutex);
6213
6214 self = vhci->vhci_dip;
6215 hba = ddi_get_driver_private(self);
6216 if (hba == NULL)
6217 return (ENXIO);
6218
6219 /*
6220 * We can use the generic implementation for these ioctls
6221 */
6222 switch (cmd) {
6223 case DEVCTL_DEVICE_GETSTATE:
6224 case DEVCTL_DEVICE_ONLINE:
6225 case DEVCTL_DEVICE_OFFLINE:
6226 case DEVCTL_DEVICE_REMOVE:
6227 case DEVCTL_BUS_GETSTATE:
6228 return (ndi_devctl_ioctl(self, cmd, arg, mode, 0));
6229 }
6230
6231 /*
6232 * read devctl ioctl data
6233 */
6234 if (ndi_dc_allochdl((void *)arg, &dcp) != NDI_SUCCESS)
6235 return (EFAULT);
6236
6237 switch (cmd) {
6238
6239 case DEVCTL_DEVICE_RESET:
6240 /*
6241 * lookup and hold child device
6242 */
6243 if ((child = ndi_devi_find(self, ndi_dc_getname(dcp),
6244 ndi_dc_getaddr(dcp))) == NULL) {
6245 rv = ENXIO;
6246 break;
6247 }
6248 retval = mdi_select_path(child, NULL,
6249 (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
6250 NULL, &pip);
6251 if ((retval != MDI_SUCCESS) || (pip == NULL)) {
6252 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl:"
6253 "Unable to get a path, dip 0x%p", (void *)child));
6254 rv = ENXIO;
6255 break;
6256 }
6257 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
6258
6259 VHCI_DEBUG(2, (CE_NOTE, NULL,
6260 "!reset %s@%s on all available paths",
6261 ndi_dc_getname(dcp), ndi_dc_getaddr(dcp)));
6262
6263 if (vhci_scsi_reset_all_paths(&svp->svp_psd->sd_address) != 0) {
6264 VHCI_DEBUG(2, (CE_WARN, NULL,
6265 "!vhci_ioctl(pip:%p): reset failed\n",
6266 (void *)pip));
6267 rv = ENXIO;
6268 }
6269 mdi_rele_path(pip);
6270 break;
6271
6272 case DEVCTL_BUS_QUIESCE:
6273 case DEVCTL_BUS_UNQUIESCE:
6274 case DEVCTL_BUS_RESET:
6275 case DEVCTL_BUS_RESETALL:
6276 #ifdef DEBUG
6277 case DEVCTL_BUS_CONFIGURE:
6278 case DEVCTL_BUS_UNCONFIGURE:
6279 #endif
6280 rv = ENOTSUP;
6281 break;
6282
6283 default:
6284 rv = ENOTTY;
6285 } /* end of outer switch */
6286
6287 ndi_dc_freehdl(dcp);
6288 return (rv);
6289 }
6290
6291 /*
6292 * Routine to get the PHCI pathname from ioctl structures in userland
6293 */
6294 /* ARGSUSED */
6295 static int
6296 vhci_ioc_get_phci_path(sv_iocdata_t *pioc, caddr_t phci_path,
6297 int mode, caddr_t s)
6298 {
6299 int retval = 0;
6300
6301 if (ddi_copyin(pioc->phci, phci_path, MAXPATHLEN, mode)) {
6302 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_phci: ioctl <%s> "
6303 "phci_path copyin failed", s));
6304 retval = EFAULT;
6305 }
6306 return (retval);
6307
6308 }
6309
6310
6311 /*
6312 * Routine to get the Client device pathname from ioctl structures in userland
6313 */
6314 /* ARGSUSED */
6315 static int
6316 vhci_ioc_get_client_path(sv_iocdata_t *pioc, caddr_t client_path,
6317 int mode, caddr_t s)
6318 {
6319 int retval = 0;
6320
6321 if (ddi_copyin(pioc->client, client_path, MAXPATHLEN, mode)) {
6322 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_client: "
6323 "ioctl <%s> client_path copyin failed", s));
6324 retval = EFAULT;
6325 }
6326 return (retval);
6327 }
6328
6329
6330 /*
6331 * Routine to get physical device address from ioctl structure in userland
6332 */
6333 /* ARGSUSED */
6334 static int
6335 vhci_ioc_get_paddr(sv_iocdata_t *pioc, caddr_t paddr, int mode, caddr_t s)
6336 {
6337 int retval = 0;
6338
6339 if (ddi_copyin(pioc->addr, paddr, MAXNAMELEN, mode)) {
6340 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_paddr: "
6341 "ioctl <%s> device addr copyin failed", s));
6342 retval = EFAULT;
6343 }
6344 return (retval);
6345 }
6346
6347
6348 /*
6349 * Routine to send client device pathname to userland.
6350 */
6351 /* ARGSUSED */
6352 static int
6353 vhci_ioc_send_client_path(caddr_t client_path, sv_iocdata_t *pioc,
6354 int mode, caddr_t s)
6355 {
6356 int retval = 0;
6357
6358 if (ddi_copyout(client_path, pioc->client, MAXPATHLEN, mode)) {
6359 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_send_client: "
6360 "ioctl <%s> client_path copyout failed", s));
6361 retval = EFAULT;
6362 }
6363 return (retval);
6364 }
6365
6366
6367 /*
6368 * Routine to translated dev_info pointer (dip) to device pathname.
6369 */
6370 static void
6371 vhci_ioc_devi_to_path(dev_info_t *dip, caddr_t path)
6372 {
6373 (void) ddi_pathname(dip, path);
6374 }
6375
6376
6377 /*
6378 * vhci_get_phci_path_list:
6379 * get information about devices associated with a
6380 * given PHCI device.
6381 *
6382 * Return Values:
6383 * path information elements
6384 */
6385 int
6386 vhci_get_phci_path_list(dev_info_t *pdip, sv_path_info_t *pibuf,
6387 uint_t num_elems)
6388 {
6389 uint_t count, done;
6390 mdi_pathinfo_t *pip;
6391 sv_path_info_t *ret_pip;
6392 int status;
6393 size_t prop_size;
6394 int circular;
6395
6396 /*
6397 * Get the PHCI structure and retrieve the path information
6398 * from the GUID hash table.
6399 */
6400
6401 ret_pip = pibuf;
6402 count = 0;
6403
6404 ndi_devi_enter(pdip, &circular);
6405
6406 done = (count >= num_elems);
6407 pip = mdi_get_next_client_path(pdip, NULL);
6408 while (pip && !done) {
6409 mdi_pi_lock(pip);
6410 (void) ddi_pathname(mdi_pi_get_phci(pip),
6411 ret_pip->device.ret_phci);
6412 (void) strcpy(ret_pip->ret_addr, mdi_pi_get_addr(pip));
6413 (void) mdi_pi_get_state2(pip, &ret_pip->ret_state,
6414 &ret_pip->ret_ext_state);
6415
6416 status = mdi_prop_size(pip, &prop_size);
6417 if (status == MDI_SUCCESS && ret_pip->ret_prop.ret_buf_size) {
6418 *ret_pip->ret_prop.ret_buf_size = (uint_t)prop_size;
6419 }
6420
6421 #ifdef DEBUG
6422 if (status != MDI_SUCCESS) {
6423 VHCI_DEBUG(2, (CE_WARN, NULL,
6424 "!vhci_get_phci_path_list: "
6425 "phci <%s>, prop size failure 0x%x",
6426 ret_pip->device.ret_phci, status));
6427 }
6428 #endif /* DEBUG */
6429
6430
6431 if (status == MDI_SUCCESS && ret_pip->ret_prop.buf &&
6432 prop_size && ret_pip->ret_prop.buf_size >= prop_size) {
6433 status = mdi_prop_pack(pip,
6434 &ret_pip->ret_prop.buf,
6435 ret_pip->ret_prop.buf_size);
6436
6437 #ifdef DEBUG
6438 if (status != MDI_SUCCESS) {
6439 VHCI_DEBUG(2, (CE_WARN, NULL,
6440 "!vhci_get_phci_path_list: "
6441 "phci <%s>, prop pack failure 0x%x",
6442 ret_pip->device.ret_phci, status));
6443 }
6444 #endif /* DEBUG */
6445 }
6446
6447 mdi_pi_unlock(pip);
6448 pip = mdi_get_next_client_path(pdip, pip);
6449 ret_pip++;
6450 count++;
6451 done = (count >= num_elems);
6452 }
6453
6454 ndi_devi_exit(pdip, circular);
6455
6456 return (MDI_SUCCESS);
6457 }
6458
6459
6460 /*
6461 * vhci_get_client_path_list:
6462 * get information about various paths associated with a
6463 * given client device.
6464 *
6465 * Return Values:
6466 * path information elements
6467 */
6468 int
6469 vhci_get_client_path_list(dev_info_t *cdip, sv_path_info_t *pibuf,
6470 uint_t num_elems)
6471 {
6472 uint_t count, done;
6473 mdi_pathinfo_t *pip;
6474 sv_path_info_t *ret_pip;
6475 int status;
6476 size_t prop_size;
6477 int circular;
6478
6479 ret_pip = pibuf;
6480 count = 0;
6481
6482 ndi_devi_enter(cdip, &circular);
6483
6484 done = (count >= num_elems);
6485 pip = mdi_get_next_phci_path(cdip, NULL);
6486 while (pip && !done) {
6487 mdi_pi_lock(pip);
6488 (void) ddi_pathname(mdi_pi_get_phci(pip),
6489 ret_pip->device.ret_phci);
6490 (void) strcpy(ret_pip->ret_addr, mdi_pi_get_addr(pip));
6491 (void) mdi_pi_get_state2(pip, &ret_pip->ret_state,
6492 &ret_pip->ret_ext_state);
6493
6494 status = mdi_prop_size(pip, &prop_size);
6495 if (status == MDI_SUCCESS && ret_pip->ret_prop.ret_buf_size) {
6496 *ret_pip->ret_prop.ret_buf_size = (uint_t)prop_size;
6497 }
6498
6499 #ifdef DEBUG
6500 if (status != MDI_SUCCESS) {
6501 VHCI_DEBUG(2, (CE_WARN, NULL,
6502 "!vhci_get_client_path_list: "
6503 "phci <%s>, prop size failure 0x%x",
6504 ret_pip->device.ret_phci, status));
6505 }
6506 #endif /* DEBUG */
6507
6508
6509 if (status == MDI_SUCCESS && ret_pip->ret_prop.buf &&
6510 prop_size && ret_pip->ret_prop.buf_size >= prop_size) {
6511 status = mdi_prop_pack(pip,
6512 &ret_pip->ret_prop.buf,
6513 ret_pip->ret_prop.buf_size);
6514
6515 #ifdef DEBUG
6516 if (status != MDI_SUCCESS) {
6517 VHCI_DEBUG(2, (CE_WARN, NULL,
6518 "!vhci_get_client_path_list: "
6519 "phci <%s>, prop pack failure 0x%x",
6520 ret_pip->device.ret_phci, status));
6521 }
6522 #endif /* DEBUG */
6523 }
6524
6525 mdi_pi_unlock(pip);
6526 pip = mdi_get_next_phci_path(cdip, pip);
6527 ret_pip++;
6528 count++;
6529 done = (count >= num_elems);
6530 }
6531
6532 ndi_devi_exit(cdip, circular);
6533
6534 return (MDI_SUCCESS);
6535 }
6536
6537
6538 /*
6539 * Routine to get ioctl argument structure from userland.
6540 */
6541 /* ARGSUSED */
6542 static int
6543 vhci_get_iocdata(const void *data, sv_iocdata_t *pioc, int mode, caddr_t s)
6544 {
6545 int retval = 0;
6546
6547 #ifdef _MULTI_DATAMODEL
6548 switch (ddi_model_convert_from(mode & FMODELS)) {
6549 case DDI_MODEL_ILP32:
6550 {
6551 sv_iocdata32_t ioc32;
6552
6553 if (ddi_copyin(data, &ioc32, sizeof (ioc32), mode)) {
6554 retval = EFAULT;
6555 break;
6556 }
6557 pioc->client = (caddr_t)(uintptr_t)ioc32.client;
6558 pioc->phci = (caddr_t)(uintptr_t)ioc32.phci;
6559 pioc->addr = (caddr_t)(uintptr_t)ioc32.addr;
6560 pioc->buf_elem = (uint_t)ioc32.buf_elem;
6561 pioc->ret_buf = (sv_path_info_t *)(uintptr_t)ioc32.ret_buf;
6562 pioc->ret_elem = (uint_t *)(uintptr_t)ioc32.ret_elem;
6563 break;
6564 }
6565
6566 case DDI_MODEL_NONE:
6567 if (ddi_copyin(data, pioc, sizeof (*pioc), mode)) {
6568 retval = EFAULT;
6569 break;
6570 }
6571 break;
6572 }
6573 #else /* _MULTI_DATAMODEL */
6574 if (ddi_copyin(data, pioc, sizeof (*pioc), mode)) {
6575 retval = EFAULT;
6576 }
6577 #endif /* _MULTI_DATAMODEL */
6578
6579 #ifdef DEBUG
6580 if (retval) {
6581 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: cmd <%s> "
6582 "iocdata copyin failed", s));
6583 }
6584 #endif
6585
6586 return (retval);
6587 }
6588
6589
6590 /*
6591 * Routine to get the ioctl argument for ioctl causing controller switchover.
6592 */
6593 /* ARGSUSED */
6594 static int
6595 vhci_get_iocswitchdata(const void *data, sv_switch_to_cntlr_iocdata_t *piocsc,
6596 int mode, caddr_t s)
6597 {
6598 int retval = 0;
6599
6600 #ifdef _MULTI_DATAMODEL
6601 switch (ddi_model_convert_from(mode & FMODELS)) {
6602 case DDI_MODEL_ILP32:
6603 {
6604 sv_switch_to_cntlr_iocdata32_t ioc32;
6605
6606 if (ddi_copyin(data, &ioc32, sizeof (ioc32), mode)) {
6607 retval = EFAULT;
6608 break;
6609 }
6610 piocsc->client = (caddr_t)(uintptr_t)ioc32.client;
6611 piocsc->class = (caddr_t)(uintptr_t)ioc32.class;
6612 break;
6613 }
6614
6615 case DDI_MODEL_NONE:
6616 if (ddi_copyin(data, piocsc, sizeof (*piocsc), mode)) {
6617 retval = EFAULT;
6618 }
6619 break;
6620 }
6621 #else /* _MULTI_DATAMODEL */
6622 if (ddi_copyin(data, piocsc, sizeof (*piocsc), mode)) {
6623 retval = EFAULT;
6624 }
6625 #endif /* _MULTI_DATAMODEL */
6626
6627 #ifdef DEBUG
6628 if (retval) {
6629 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: cmd <%s> "
6630 "switch_to_cntlr_iocdata copyin failed", s));
6631 }
6632 #endif
6633
6634 return (retval);
6635 }
6636
6637
6638 /*
6639 * Routine to allocate memory for the path information structures.
6640 * It allocates two chunks of memory - one for keeping userland
6641 * pointers/values for path information and path properties, second for
6642 * keeping allocating kernel memory for path properties. These path
6643 * properties are finally copied to userland.
6644 */
6645 /* ARGSUSED */
6646 static int
6647 vhci_ioc_alloc_pathinfo(sv_path_info_t **upibuf, sv_path_info_t **kpibuf,
6648 uint_t num_paths, sv_iocdata_t *pioc, int mode, caddr_t s)
6649 {
6650 sv_path_info_t *pi;
6651 uint_t bufsize;
6652 int retval = 0;
6653 int index;
6654
6655 /* Allocate memory */
6656 *upibuf = (sv_path_info_t *)
6657 kmem_zalloc(sizeof (sv_path_info_t) * num_paths, KM_SLEEP);
6658 ASSERT(*upibuf != NULL);
6659 *kpibuf = (sv_path_info_t *)
6660 kmem_zalloc(sizeof (sv_path_info_t) * num_paths, KM_SLEEP);
6661 ASSERT(*kpibuf != NULL);
6662
6663 /*
6664 * Get the path info structure from the user space.
6665 * We are interested in the following fields:
6666 * - user size of buffer for per path properties.
6667 * - user address of buffer for path info properties.
6668 * - user pointer for returning actual buffer size
6669 * Keep these fields in the 'upibuf' structures.
6670 * Allocate buffer for per path info properties in kernel
6671 * structure ('kpibuf').
6672 * Size of these buffers will be equal to the size of buffers
6673 * in the user space.
6674 */
6675 #ifdef _MULTI_DATAMODEL
6676 switch (ddi_model_convert_from(mode & FMODELS)) {
6677 case DDI_MODEL_ILP32:
6678 {
6679 sv_path_info32_t *src;
6680 sv_path_info32_t pi32;
6681
6682 src = (sv_path_info32_t *)pioc->ret_buf;
6683 pi = (sv_path_info_t *)*upibuf;
6684 for (index = 0; index < num_paths; index++, src++, pi++) {
6685 if (ddi_copyin(src, &pi32, sizeof (pi32), mode)) {
6686 retval = EFAULT;
6687 break;
6688 }
6689
6690 pi->ret_prop.buf_size =
6691 (uint_t)pi32.ret_prop.buf_size;
6692 pi->ret_prop.ret_buf_size =
6693 (uint_t *)(uintptr_t)pi32.ret_prop.ret_buf_size;
6694 pi->ret_prop.buf =
6695 (caddr_t)(uintptr_t)pi32.ret_prop.buf;
6696 }
6697 break;
6698 }
6699
6700 case DDI_MODEL_NONE:
6701 if (ddi_copyin(pioc->ret_buf, *upibuf,
6702 sizeof (sv_path_info_t) * num_paths, mode)) {
6703 retval = EFAULT;
6704 }
6705 break;
6706 }
6707 #else /* _MULTI_DATAMODEL */
6708 if (ddi_copyin(pioc->ret_buf, *upibuf,
6709 sizeof (sv_path_info_t) * num_paths, mode)) {
6710 retval = EFAULT;
6711 }
6712 #endif /* _MULTI_DATAMODEL */
6713
6714 if (retval != 0) {
6715 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_alloc_path_info: "
6716 "ioctl <%s> normal: path_info copyin failed", s));
6717 kmem_free(*upibuf, sizeof (sv_path_info_t) * num_paths);
6718 kmem_free(*kpibuf, sizeof (sv_path_info_t) * num_paths);
6719 *upibuf = NULL;
6720 *kpibuf = NULL;
6721 return (retval);
6722 }
6723
6724 /*
6725 * Allocate memory for per path properties.
6726 */
6727 for (index = 0, pi = *kpibuf; index < num_paths; index++, pi++) {
6728 bufsize = (*upibuf)[index].ret_prop.buf_size;
6729
6730 if (bufsize && bufsize <= SV_PROP_MAX_BUF_SIZE) {
6731 pi->ret_prop.buf_size = bufsize;
6732 pi->ret_prop.buf = (caddr_t)
6733 kmem_zalloc(bufsize, KM_SLEEP);
6734 ASSERT(pi->ret_prop.buf != NULL);
6735 } else {
6736 pi->ret_prop.buf_size = 0;
6737 pi->ret_prop.buf = NULL;
6738 }
6739
6740 if ((*upibuf)[index].ret_prop.ret_buf_size != NULL) {
6741 pi->ret_prop.ret_buf_size = (uint_t *)kmem_zalloc(
6742 sizeof (*pi->ret_prop.ret_buf_size), KM_SLEEP);
6743 ASSERT(pi->ret_prop.ret_buf_size != NULL);
6744 } else {
6745 pi->ret_prop.ret_buf_size = NULL;
6746 }
6747 }
6748
6749 return (0);
6750 }
6751
6752
6753 /*
6754 * Routine to free memory for the path information structures.
6755 * This is the memory which was allocated earlier.
6756 */
6757 /* ARGSUSED */
6758 static void
6759 vhci_ioc_free_pathinfo(sv_path_info_t *upibuf, sv_path_info_t *kpibuf,
6760 uint_t num_paths)
6761 {
6762 sv_path_info_t *pi;
6763 int index;
6764
6765 /* Free memory for per path properties */
6766 for (index = 0, pi = kpibuf; index < num_paths; index++, pi++) {
6767 if (pi->ret_prop.ret_buf_size != NULL) {
6768 kmem_free(pi->ret_prop.ret_buf_size,
6769 sizeof (*pi->ret_prop.ret_buf_size));
6770 }
6771
6772 if (pi->ret_prop.buf != NULL) {
6773 kmem_free(pi->ret_prop.buf, pi->ret_prop.buf_size);
6774 }
6775 }
6776
6777 /* Free memory for path info structures */
6778 kmem_free(upibuf, sizeof (sv_path_info_t) * num_paths);
6779 kmem_free(kpibuf, sizeof (sv_path_info_t) * num_paths);
6780 }
6781
6782
6783 /*
6784 * Routine to copy path information and path properties to userland.
6785 */
6786 /* ARGSUSED */
6787 static int
6788 vhci_ioc_send_pathinfo(sv_path_info_t *upibuf, sv_path_info_t *kpibuf,
6789 uint_t num_paths, sv_iocdata_t *pioc, int mode, caddr_t s)
6790 {
6791 int retval = 0, index;
6792 sv_path_info_t *upi_ptr;
6793 sv_path_info32_t *upi32_ptr;
6794
6795 #ifdef _MULTI_DATAMODEL
6796 switch (ddi_model_convert_from(mode & FMODELS)) {
6797 case DDI_MODEL_ILP32:
6798 goto copy_32bit;
6799
6800 case DDI_MODEL_NONE:
6801 goto copy_normal;
6802 }
6803 #else /* _MULTI_DATAMODEL */
6804
6805 goto copy_normal;
6806
6807 #endif /* _MULTI_DATAMODEL */
6808
6809 copy_normal:
6810
6811 /*
6812 * Copy path information and path properties to user land.
6813 * Pointer fields inside the path property structure were
6814 * saved in the 'upibuf' structure earlier.
6815 */
6816 upi_ptr = pioc->ret_buf;
6817 for (index = 0; index < num_paths; index++) {
6818 if (ddi_copyout(kpibuf[index].device.ret_ct,
6819 upi_ptr[index].device.ret_ct, MAXPATHLEN, mode)) {
6820 retval = EFAULT;
6821 break;
6822 }
6823
6824 if (ddi_copyout(kpibuf[index].ret_addr,
6825 upi_ptr[index].ret_addr, MAXNAMELEN, mode)) {
6826 retval = EFAULT;
6827 break;
6828 }
6829
6830 if (ddi_copyout(&kpibuf[index].ret_state,
6831 &upi_ptr[index].ret_state, sizeof (kpibuf[index].ret_state),
6832 mode)) {
6833 retval = EFAULT;
6834 break;
6835 }
6836
6837 if (ddi_copyout(&kpibuf[index].ret_ext_state,
6838 &upi_ptr[index].ret_ext_state,
6839 sizeof (kpibuf[index].ret_ext_state), mode)) {
6840 retval = EFAULT;
6841 break;
6842 }
6843
6844 if ((kpibuf[index].ret_prop.ret_buf_size != NULL) &&
6845 ddi_copyout(kpibuf[index].ret_prop.ret_buf_size,
6846 upibuf[index].ret_prop.ret_buf_size,
6847 sizeof (*upibuf[index].ret_prop.ret_buf_size), mode)) {
6848 retval = EFAULT;
6849 break;
6850 }
6851
6852 if ((kpibuf[index].ret_prop.buf != NULL) &&
6853 ddi_copyout(kpibuf[index].ret_prop.buf,
6854 upibuf[index].ret_prop.buf,
6855 upibuf[index].ret_prop.buf_size, mode)) {
6856 retval = EFAULT;
6857 break;
6858 }
6859 }
6860
6861 #ifdef DEBUG
6862 if (retval) {
6863 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: ioctl <%s> "
6864 "normal: path_info copyout failed", s));
6865 }
6866 #endif
6867
6868 return (retval);
6869
6870 copy_32bit:
6871 /*
6872 * Copy path information and path properties to user land.
6873 * Pointer fields inside the path property structure were
6874 * saved in the 'upibuf' structure earlier.
6875 */
6876 upi32_ptr = (sv_path_info32_t *)pioc->ret_buf;
6877 for (index = 0; index < num_paths; index++) {
6878 if (ddi_copyout(kpibuf[index].device.ret_ct,
6879 upi32_ptr[index].device.ret_ct, MAXPATHLEN, mode)) {
6880 retval = EFAULT;
6881 break;
6882 }
6883
6884 if (ddi_copyout(kpibuf[index].ret_addr,
6885 upi32_ptr[index].ret_addr, MAXNAMELEN, mode)) {
6886 retval = EFAULT;
6887 break;
6888 }
6889
6890 if (ddi_copyout(&kpibuf[index].ret_state,
6891 &upi32_ptr[index].ret_state,
6892 sizeof (kpibuf[index].ret_state), mode)) {
6893 retval = EFAULT;
6894 break;
6895 }
6896
6897 if (ddi_copyout(&kpibuf[index].ret_ext_state,
6898 &upi32_ptr[index].ret_ext_state,
6899 sizeof (kpibuf[index].ret_ext_state), mode)) {
6900 retval = EFAULT;
6901 break;
6902 }
6903 if ((kpibuf[index].ret_prop.ret_buf_size != NULL) &&
6904 ddi_copyout(kpibuf[index].ret_prop.ret_buf_size,
6905 upibuf[index].ret_prop.ret_buf_size,
6906 sizeof (*upibuf[index].ret_prop.ret_buf_size), mode)) {
6907 retval = EFAULT;
6908 break;
6909 }
6910
6911 if ((kpibuf[index].ret_prop.buf != NULL) &&
6912 ddi_copyout(kpibuf[index].ret_prop.buf,
6913 upibuf[index].ret_prop.buf,
6914 upibuf[index].ret_prop.buf_size, mode)) {
6915 retval = EFAULT;
6916 break;
6917 }
6918 }
6919
6920 #ifdef DEBUG
6921 if (retval) {
6922 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: ioctl <%s> "
6923 "normal: path_info copyout failed", s));
6924 }
6925 #endif
6926
6927 return (retval);
6928 }
6929
6930
6931 /*
6932 * vhci_failover()
6933 * This routine expects VHCI_HOLD_LUN before being invoked. It can be invoked
6934 * as MDI_FAILOVER_ASYNC or MDI_FAILOVER_SYNC. For Asynchronous failovers
6935 * this routine shall VHCI_RELEASE_LUN on exiting. For synchronous failovers
6936 * it is the callers responsibility to release lun.
6937 */
6938
6939 /* ARGSUSED */
6940 static int
6941 vhci_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
6942 {
6943 char *guid;
6944 scsi_vhci_lun_t *vlun = NULL;
6945 struct scsi_vhci *vhci;
6946 mdi_pathinfo_t *pip, *npip;
6947 char *s_pclass, *pclass1, *pclass2, *pclass;
6948 char active_pclass_copy[255], *active_pclass_ptr;
6949 char *ptr1, *ptr2;
6950 mdi_pathinfo_state_t pi_state;
6951 uint32_t pi_ext_state;
6952 scsi_vhci_priv_t *svp;
6953 struct scsi_device *sd;
6954 struct scsi_failover_ops *sfo;
6955 int sps; /* mdi_select_path() status */
6956 int activation_done = 0;
6957 int rval, retval = MDI_FAILURE;
6958 int reserve_pending, check_condition, UA_condition;
6959 struct scsi_pkt *pkt;
6960 struct buf *bp;
6961
6962 vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
6963 sd = ddi_get_driver_private(cdip);
6964 vlun = ADDR2VLUN(&sd->sd_address);
6965 ASSERT(vlun != 0);
6966 ASSERT(VHCI_LUN_IS_HELD(vlun));
6967 guid = vlun->svl_lun_wwn;
6968 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(1): guid %s\n", guid));
6969 vhci_log(CE_NOTE, vdip, "!Initiating failover for device %s "
6970 "(GUID %s)", ddi_node_name(cdip), guid);
6971
6972 /*
6973 * Lets maintain a local copy of the vlun->svl_active_pclass
6974 * for the rest of the processing. Accessing the field
6975 * directly in the loop below causes loop logic to break
6976 * especially when the field gets updated by other threads
6977 * update path status etc and causes 'paths are not currently
6978 * available' condition to be declared prematurely.
6979 */
6980 mutex_enter(&vlun->svl_mutex);
6981 if (vlun->svl_active_pclass != NULL) {
6982 (void) strlcpy(active_pclass_copy, vlun->svl_active_pclass,
6983 sizeof (active_pclass_copy));
6984 active_pclass_ptr = &active_pclass_copy[0];
6985 mutex_exit(&vlun->svl_mutex);
6986 if (vhci_quiesce_paths(vdip, cdip, vlun, guid,
6987 active_pclass_ptr) != 0) {
6988 retval = MDI_FAILURE;
6989 }
6990 } else {
6991 /*
6992 * can happen only when the available path to device
6993 * discovered is a STANDBY path.
6994 */
6995 mutex_exit(&vlun->svl_mutex);
6996 active_pclass_copy[0] = '\0';
6997 active_pclass_ptr = NULL;
6998 }
6999
7000 sfo = vlun->svl_fops;
7001 ASSERT(sfo != NULL);
7002 pclass1 = s_pclass = active_pclass_ptr;
7003 VHCI_DEBUG(1, (CE_NOTE, NULL, "!(%s)failing over from %s\n", guid,
7004 (s_pclass == NULL ? "<none>" : s_pclass)));
7005
7006 next_pathclass:
7007
7008 rval = sfo->sfo_pathclass_next(pclass1, &pclass2,
7009 vlun->svl_fops_ctpriv);
7010 if (rval == ENOENT) {
7011 if (s_pclass == NULL) {
7012 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(4)(%s): "
7013 "failed, no more pathclasses\n", guid));
7014 goto done;
7015 } else {
7016 (void) sfo->sfo_pathclass_next(NULL, &pclass2,
7017 vlun->svl_fops_ctpriv);
7018 }
7019 } else if (rval == EINVAL) {
7020 vhci_log(CE_NOTE, vdip, "!Failover operation failed for "
7021 "device %s (GUID %s): Invalid path-class %s",
7022 ddi_node_name(cdip), guid,
7023 ((pclass1 == NULL) ? "<none>" : pclass1));
7024 goto done;
7025 }
7026 if ((s_pclass != NULL) && (strcmp(pclass2, s_pclass) == 0)) {
7027 /*
7028 * paths are not currently available
7029 */
7030 vhci_log(CE_NOTE, vdip, "!Failover path currently unavailable"
7031 " for device %s (GUID %s)",
7032 ddi_node_name(cdip), guid);
7033 goto done;
7034 }
7035 pip = npip = NULL;
7036 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(5.2)(%s): considering "
7037 "%s as failover destination\n", guid, pclass2));
7038 sps = mdi_select_path(cdip, NULL, MDI_SELECT_STANDBY_PATH, NULL, &npip);
7039 if ((npip == NULL) || (sps != MDI_SUCCESS)) {
7040 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(%s): no "
7041 "STANDBY paths found (status:%x)!\n", guid, sps));
7042 pclass1 = pclass2;
7043 goto next_pathclass;
7044 }
7045 do {
7046 pclass = NULL;
7047 if ((mdi_prop_lookup_string(npip, "path-class",
7048 &pclass) != MDI_SUCCESS) || (strcmp(pclass2,
7049 pclass) != 0)) {
7050 VHCI_DEBUG(1, (CE_NOTE, NULL,
7051 "!vhci_failover(5.5)(%s): skipping path "
7052 "%p(%s)...\n", guid, (void *)npip, pclass));
7053 pip = npip;
7054 sps = mdi_select_path(cdip, NULL,
7055 MDI_SELECT_STANDBY_PATH, pip, &npip);
7056 mdi_rele_path(pip);
7057 (void) mdi_prop_free(pclass);
7058 continue;
7059 }
7060 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(npip);
7061
7062 /*
7063 * Issue READ at non-zer block on this STANDBY path.
7064 * Purple returns
7065 * 1. RESERVATION_CONFLICT if reservation is pending
7066 * 2. POR check condition if it reset happened.
7067 * 2. failover Check Conditions if one is already in progress.
7068 */
7069 reserve_pending = 0;
7070 check_condition = 0;
7071 UA_condition = 0;
7072
7073 bp = scsi_alloc_consistent_buf(&svp->svp_psd->sd_address,
7074 (struct buf *)NULL, DEV_BSIZE, B_READ, NULL, NULL);
7075 if (!bp) {
7076 VHCI_DEBUG(1, (CE_NOTE, NULL,
7077 "vhci_failover !No resources (buf)\n"));
7078 mdi_rele_path(npip);
7079 goto done;
7080 }
7081 pkt = scsi_init_pkt(&svp->svp_psd->sd_address, NULL, bp,
7082 CDB_GROUP1, sizeof (struct scsi_arq_status), 0,
7083 PKT_CONSISTENT, NULL, NULL);
7084 if (pkt) {
7085 (void) scsi_setup_cdb((union scsi_cdb *)(uintptr_t)
7086 pkt->pkt_cdbp, SCMD_READ, 1, 1, 0);
7087 pkt->pkt_flags = FLAG_NOINTR;
7088 check_path_again:
7089 pkt->pkt_path_instance = mdi_pi_get_path_instance(npip);
7090 pkt->pkt_time = 2 * vhci_io_time;
7091
7092 if (scsi_transport(pkt) == TRAN_ACCEPT) {
7093 switch (pkt->pkt_reason) {
7094 case CMD_CMPLT:
7095 switch (SCBP_C(pkt)) {
7096 case STATUS_GOOD:
7097 /* Already failed over */
7098 activation_done = 1;
7099 break;
7100 case STATUS_RESERVATION_CONFLICT:
7101 reserve_pending = 1;
7102 break;
7103 case STATUS_CHECK:
7104 check_condition = 1;
7105 break;
7106 }
7107 }
7108 }
7109 if (check_condition &&
7110 (pkt->pkt_state & STATE_ARQ_DONE)) {
7111 uint8_t *sns, skey, asc, ascq;
7112 sns = (uint8_t *)
7113 &(((struct scsi_arq_status *)(uintptr_t)
7114 (pkt->pkt_scbp))->sts_sensedata);
7115 skey = scsi_sense_key(sns);
7116 asc = scsi_sense_asc(sns);
7117 ascq = scsi_sense_ascq(sns);
7118 if (skey == KEY_UNIT_ATTENTION &&
7119 asc == 0x29) {
7120 /* Already failed over */
7121 VHCI_DEBUG(1, (CE_NOTE, NULL,
7122 "!vhci_failover(7)(%s): "
7123 "path 0x%p POR UA condition\n",
7124 guid, (void *)npip));
7125 if (UA_condition == 0) {
7126 UA_condition = 1;
7127 goto check_path_again;
7128 }
7129 } else {
7130 activation_done = 0;
7131 VHCI_DEBUG(1, (CE_NOTE, NULL,
7132 "!vhci_failover(%s): path 0x%p "
7133 "unhandled chkcond %x %x %x\n",
7134 guid, (void *)npip, skey,
7135 asc, ascq));
7136 }
7137 }
7138 scsi_destroy_pkt(pkt);
7139 }
7140 scsi_free_consistent_buf(bp);
7141
7142 if (activation_done) {
7143 mdi_rele_path(npip);
7144 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(7)(%s): "
7145 "path 0x%p already failedover\n", guid,
7146 (void *)npip));
7147 break;
7148 }
7149 if (reserve_pending && (vlun->svl_xlf_capable == 0)) {
7150 (void) vhci_recovery_reset(vlun,
7151 &svp->svp_psd->sd_address,
7152 FALSE, VHCI_DEPTH_ALL);
7153 }
7154 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(6)(%s): "
7155 "activating path 0x%p(psd:%p)\n", guid, (void *)npip,
7156 (void *)svp->svp_psd));
7157 if (sfo->sfo_path_activate(svp->svp_psd, pclass2,
7158 vlun->svl_fops_ctpriv) == 0) {
7159 activation_done = 1;
7160 mdi_rele_path(npip);
7161 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(7)(%s): "
7162 "path 0x%p successfully activated\n", guid,
7163 (void *)npip));
7164 break;
7165 }
7166 pip = npip;
7167 sps = mdi_select_path(cdip, NULL, MDI_SELECT_STANDBY_PATH,
7168 pip, &npip);
7169 mdi_rele_path(pip);
7170 } while ((npip != NULL) && (sps == MDI_SUCCESS));
7171 if (activation_done == 0) {
7172 pclass1 = pclass2;
7173 goto next_pathclass;
7174 }
7175
7176 /*
7177 * if we are here, we have succeeded in activating path npip of
7178 * pathclass pclass2; let us validate all paths of pclass2 by
7179 * "ping"-ing each one and mark the good ones ONLINE
7180 * Also, set the state of the paths belonging to the previously
7181 * active pathclass to STANDBY
7182 */
7183 pip = npip = NULL;
7184 sps = mdi_select_path(cdip, NULL, (MDI_SELECT_ONLINE_PATH |
7185 MDI_SELECT_STANDBY_PATH | MDI_SELECT_USER_DISABLE_PATH),
7186 NULL, &npip);
7187 if (npip == NULL || sps != MDI_SUCCESS) {
7188 VHCI_DEBUG(1, (CE_NOTE, NULL, "!Failover operation failed for "
7189 "device %s (GUID %s): paths may be busy\n",
7190 ddi_node_name(cdip), guid));
7191 goto done;
7192 }
7193 do {
7194 (void) mdi_pi_get_state2(npip, &pi_state, &pi_ext_state);
7195 if (mdi_prop_lookup_string(npip, "path-class", &pclass)
7196 != MDI_SUCCESS) {
7197 pip = npip;
7198 sps = mdi_select_path(cdip, NULL,
7199 (MDI_SELECT_ONLINE_PATH |
7200 MDI_SELECT_STANDBY_PATH |
7201 MDI_SELECT_USER_DISABLE_PATH),
7202 pip, &npip);
7203 mdi_rele_path(pip);
7204 continue;
7205 }
7206 if (strcmp(pclass, pclass2) == 0) {
7207 if (pi_state == MDI_PATHINFO_STATE_STANDBY) {
7208 svp = (scsi_vhci_priv_t *)
7209 mdi_pi_get_vhci_private(npip);
7210 VHCI_DEBUG(1, (CE_NOTE, NULL,
7211 "!vhci_failover(8)(%s): "
7212 "pinging path 0x%p\n",
7213 guid, (void *)npip));
7214 if (sfo->sfo_path_ping(svp->svp_psd,
7215 vlun->svl_fops_ctpriv) == 1) {
7216 mdi_pi_set_state(npip,
7217 MDI_PATHINFO_STATE_ONLINE);
7218 VHCI_DEBUG(1, (CE_NOTE, NULL,
7219 "!vhci_failover(9)(%s): "
7220 "path 0x%p ping successful, "
7221 "marked online\n", guid,
7222 (void *)npip));
7223 MDI_PI_ERRSTAT(npip, MDI_PI_FAILTO);
7224 }
7225 }
7226 } else if ((s_pclass != NULL) && (strcmp(pclass, s_pclass)
7227 == 0)) {
7228 if (pi_state == MDI_PATHINFO_STATE_ONLINE) {
7229 mdi_pi_set_state(npip,
7230 MDI_PATHINFO_STATE_STANDBY);
7231 VHCI_DEBUG(1, (CE_NOTE, NULL,
7232 "!vhci_failover(10)(%s): path 0x%p marked "
7233 "STANDBY\n", guid, (void *)npip));
7234 MDI_PI_ERRSTAT(npip, MDI_PI_FAILFROM);
7235 }
7236 }
7237 (void) mdi_prop_free(pclass);
7238 pip = npip;
7239 sps = mdi_select_path(cdip, NULL, (MDI_SELECT_ONLINE_PATH |
7240 MDI_SELECT_STANDBY_PATH|MDI_SELECT_USER_DISABLE_PATH),
7241 pip, &npip);
7242 mdi_rele_path(pip);
7243 } while ((npip != NULL) && (sps == MDI_SUCCESS));
7244
7245 /*
7246 * Update the AccessState of related MP-API TPGs
7247 */
7248 (void) vhci_mpapi_update_tpg_acc_state_for_lu(vhci, vlun);
7249
7250 vhci_log(CE_NOTE, vdip, "!Failover operation completed successfully "
7251 "for device %s (GUID %s): failed over from %s to %s",
7252 ddi_node_name(cdip), guid, ((s_pclass == NULL) ? "<none>" :
7253 s_pclass), pclass2);
7254 ptr1 = kmem_alloc(strlen(pclass2)+1, KM_SLEEP);
7255 (void) strlcpy(ptr1, pclass2, (strlen(pclass2)+1));
7256 mutex_enter(&vlun->svl_mutex);
7257 ptr2 = vlun->svl_active_pclass;
7258 vlun->svl_active_pclass = ptr1;
7259 mutex_exit(&vlun->svl_mutex);
7260 if (ptr2) {
7261 kmem_free(ptr2, strlen(ptr2)+1);
7262 }
7263 mutex_enter(&vhci->vhci_mutex);
7264 scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
7265 &vhci->vhci_reset_notify_listf);
7266 /* All reservations are cleared upon these resets. */
7267 vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
7268 mutex_exit(&vhci->vhci_mutex);
7269 VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(11): DONE! Active "
7270 "pathclass for %s is now %s\n", guid, pclass2));
7271 retval = MDI_SUCCESS;
7272
7273 done:
7274 vlun->svl_failover_status = retval;
7275 if (flags == MDI_FAILOVER_ASYNC) {
7276 VHCI_RELEASE_LUN(vlun);
7277 VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_failover(12): DONE! "
7278 "releasing lun, as failover was ASYNC\n"));
7279 } else {
7280 VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_failover(12): DONE! "
7281 "NOT releasing lun, as failover was SYNC\n"));
7282 }
7283 return (retval);
7284 }
7285
7286 /*
7287 * vhci_client_attached is called after the successful attach of a
7288 * client devinfo node.
7289 */
7290 static void
7291 vhci_client_attached(dev_info_t *cdip)
7292 {
7293 mdi_pathinfo_t *pip;
7294 int circular;
7295
7296 /*
7297 * At this point the client has attached and it's instance number is
7298 * valid, so we can set up kstats. We need to do this here because it
7299 * is possible for paths to go online prior to client attach, in which
7300 * case the call to vhci_kstat_create_pathinfo in vhci_pathinfo_online
7301 * was a noop.
7302 */
7303 ndi_devi_enter(cdip, &circular);
7304 for (pip = mdi_get_next_phci_path(cdip, NULL); pip;
7305 pip = mdi_get_next_phci_path(cdip, pip))
7306 vhci_kstat_create_pathinfo(pip);
7307 ndi_devi_exit(cdip, circular);
7308 }
7309
7310 /*
7311 * quiesce all of the online paths
7312 */
7313 static int
7314 vhci_quiesce_paths(dev_info_t *vdip, dev_info_t *cdip, scsi_vhci_lun_t *vlun,
7315 char *guid, char *active_pclass_ptr)
7316 {
7317 scsi_vhci_priv_t *svp;
7318 char *s_pclass = NULL;
7319 mdi_pathinfo_t *npip, *pip;
7320 int sps;
7321
7322 /* quiesce currently active paths */
7323 s_pclass = NULL;
7324 pip = npip = NULL;
7325 sps = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH, NULL, &npip);
7326 if ((npip == NULL) || (sps != MDI_SUCCESS)) {
7327 return (1);
7328 }
7329 do {
7330 if (mdi_prop_lookup_string(npip, "path-class",
7331 &s_pclass) != MDI_SUCCESS) {
7332 mdi_rele_path(npip);
7333 vhci_log(CE_NOTE, vdip, "!Failover operation failed "
7334 "for device %s (GUID %s) due to an internal "
7335 "error", ddi_node_name(cdip), guid);
7336 return (1);
7337 }
7338 if (strcmp(s_pclass, active_pclass_ptr) == 0) {
7339 /*
7340 * quiesce path. Free s_pclass since
7341 * we don't need it anymore
7342 */
7343 VHCI_DEBUG(1, (CE_NOTE, NULL,
7344 "!vhci_failover(2)(%s): failing over "
7345 "from %s; quiescing path %p\n",
7346 guid, s_pclass, (void *)npip));
7347 (void) mdi_prop_free(s_pclass);
7348 svp = (scsi_vhci_priv_t *)
7349 mdi_pi_get_vhci_private(npip);
7350 if (svp == NULL) {
7351 VHCI_DEBUG(1, (CE_NOTE, NULL,
7352 "!vhci_failover(2.5)(%s): no "
7353 "client priv! %p offlined?\n",
7354 guid, (void *)npip));
7355 pip = npip;
7356 sps = mdi_select_path(cdip, NULL,
7357 MDI_SELECT_ONLINE_PATH, pip, &npip);
7358 mdi_rele_path(pip);
7359 continue;
7360 }
7361 if (scsi_abort(&svp->svp_psd->sd_address, NULL)
7362 == 0) {
7363 (void) vhci_recovery_reset(vlun,
7364 &svp->svp_psd->sd_address, FALSE,
7365 VHCI_DEPTH_TARGET);
7366 }
7367 mutex_enter(&svp->svp_mutex);
7368 if (svp->svp_cmds == 0) {
7369 VHCI_DEBUG(1, (CE_NOTE, NULL,
7370 "!vhci_failover(3)(%s):"
7371 "quiesced path %p\n", guid, (void *)npip));
7372 } else {
7373 while (svp->svp_cmds != 0) {
7374 cv_wait(&svp->svp_cv, &svp->svp_mutex);
7375 VHCI_DEBUG(1, (CE_NOTE, NULL,
7376 "!vhci_failover(3.cv)(%s):"
7377 "quiesced path %p\n", guid,
7378 (void *)npip));
7379 }
7380 }
7381 mutex_exit(&svp->svp_mutex);
7382 } else {
7383 /*
7384 * make sure we freeup the memory
7385 */
7386 (void) mdi_prop_free(s_pclass);
7387 }
7388 pip = npip;
7389 sps = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH,
7390 pip, &npip);
7391 mdi_rele_path(pip);
7392 } while ((npip != NULL) && (sps == MDI_SUCCESS));
7393 return (0);
7394 }
7395
7396 static struct scsi_vhci_lun *
7397 vhci_lun_lookup(dev_info_t *tgt_dip)
7398 {
7399 return ((struct scsi_vhci_lun *)
7400 mdi_client_get_vhci_private(tgt_dip));
7401 }
7402
7403 static struct scsi_vhci_lun *
7404 vhci_lun_lookup_alloc(dev_info_t *tgt_dip, char *guid, int *didalloc)
7405 {
7406 struct scsi_vhci_lun *svl;
7407
7408 if (svl = vhci_lun_lookup(tgt_dip)) {
7409 return (svl);
7410 }
7411
7412 svl = kmem_zalloc(sizeof (*svl), KM_SLEEP);
7413 svl->svl_lun_wwn = kmem_zalloc(strlen(guid)+1, KM_SLEEP);
7414 (void) strcpy(svl->svl_lun_wwn, guid);
7415 mutex_init(&svl->svl_mutex, NULL, MUTEX_DRIVER, NULL);
7416 cv_init(&svl->svl_cv, NULL, CV_DRIVER, NULL);
7417 sema_init(&svl->svl_pgr_sema, 1, NULL, SEMA_DRIVER, NULL);
7418 svl->svl_waiting_for_activepath = 1;
7419 svl->svl_sector_size = 1;
7420 mdi_client_set_vhci_private(tgt_dip, svl);
7421 *didalloc = 1;
7422 VHCI_DEBUG(1, (CE_NOTE, NULL,
7423 "vhci_lun_lookup_alloc: guid %s vlun 0x%p\n",
7424 guid, (void *)svl));
7425 return (svl);
7426 }
7427
7428 static void
7429 vhci_lun_free(struct scsi_vhci_lun *dvlp, struct scsi_device *sd)
7430 {
7431 char *guid;
7432
7433 guid = dvlp->svl_lun_wwn;
7434 ASSERT(guid != NULL);
7435 VHCI_DEBUG(4, (CE_NOTE, NULL, "!vhci_lun_free: %s\n", guid));
7436
7437 mutex_enter(&dvlp->svl_mutex);
7438 if (dvlp->svl_active_pclass != NULL) {
7439 kmem_free(dvlp->svl_active_pclass,
7440 strlen(dvlp->svl_active_pclass)+1);
7441 }
7442 dvlp->svl_active_pclass = NULL;
7443 mutex_exit(&dvlp->svl_mutex);
7444
7445 if (dvlp->svl_lun_wwn != NULL) {
7446 kmem_free(dvlp->svl_lun_wwn, strlen(dvlp->svl_lun_wwn)+1);
7447 }
7448 dvlp->svl_lun_wwn = NULL;
7449
7450 if (dvlp->svl_fops_name) {
7451 kmem_free(dvlp->svl_fops_name, strlen(dvlp->svl_fops_name)+1);
7452 }
7453 dvlp->svl_fops_name = NULL;
7454
7455 if (dvlp->svl_fops_ctpriv != NULL &&
7456 dvlp->svl_fops != NULL) {
7457 dvlp->svl_fops->sfo_device_unprobe(sd, dvlp->svl_fops_ctpriv);
7458 }
7459
7460 if (dvlp->svl_flags & VLUN_TASK_D_ALIVE_FLG)
7461 taskq_destroy(dvlp->svl_taskq);
7462
7463 mutex_destroy(&dvlp->svl_mutex);
7464 cv_destroy(&dvlp->svl_cv);
7465 sema_destroy(&dvlp->svl_pgr_sema);
7466 kmem_free(dvlp, sizeof (*dvlp));
7467 /*
7468 * vhci_lun_free may be called before the tgt_dip
7469 * initialization so check if the sd is NULL.
7470 */
7471 if (sd != NULL)
7472 scsi_device_hba_private_set(sd, NULL);
7473 }
7474
7475 int
7476 vhci_do_scsi_cmd(struct scsi_pkt *pkt)
7477 {
7478 int err = 0;
7479 int retry_cnt = 0;
7480 uint8_t *sns, skey;
7481
7482 #ifdef DEBUG
7483 if (vhci_debug > 5) {
7484 vhci_print_cdb(pkt->pkt_address.a_hba_tran->tran_hba_dip,
7485 CE_WARN, "Vhci command", pkt->pkt_cdbp);
7486 }
7487 #endif
7488
7489 retry:
7490 err = scsi_poll(pkt);
7491 if (err) {
7492 if (pkt->pkt_cdbp[0] == SCMD_RELEASE) {
7493 if (SCBP_C(pkt) == STATUS_RESERVATION_CONFLICT) {
7494 VHCI_DEBUG(1, (CE_NOTE, NULL,
7495 "!v_s_do_s_c: RELEASE conflict\n"));
7496 return (0);
7497 }
7498 }
7499 if (retry_cnt++ < 6) {
7500 VHCI_DEBUG(1, (CE_WARN, NULL,
7501 "!v_s_do_s_c:retry packet 0x%p "
7502 "status 0x%x reason %s",
7503 (void *)pkt, SCBP_C(pkt),
7504 scsi_rname(pkt->pkt_reason)));
7505 if ((pkt->pkt_reason == CMD_CMPLT) &&
7506 (SCBP_C(pkt) == STATUS_CHECK) &&
7507 (pkt->pkt_state & STATE_ARQ_DONE)) {
7508 sns = (uint8_t *)
7509 &(((struct scsi_arq_status *)(uintptr_t)
7510 (pkt->pkt_scbp))->sts_sensedata);
7511 skey = scsi_sense_key(sns);
7512 VHCI_DEBUG(1, (CE_WARN, NULL,
7513 "!v_s_do_s_c:retry "
7514 "packet 0x%p sense data %s", (void *)pkt,
7515 scsi_sname(skey)));
7516 }
7517 goto retry;
7518 }
7519 VHCI_DEBUG(1, (CE_WARN, NULL,
7520 "!v_s_do_s_c: failed transport 0x%p 0x%x",
7521 (void *)pkt, SCBP_C(pkt)));
7522 return (0);
7523 }
7524
7525 switch (pkt->pkt_reason) {
7526 case CMD_TIMEOUT:
7527 VHCI_DEBUG(1, (CE_WARN, NULL, "!pkt timed "
7528 "out (pkt 0x%p)", (void *)pkt));
7529 return (0);
7530 case CMD_CMPLT:
7531 switch (SCBP_C(pkt)) {
7532 case STATUS_GOOD:
7533 break;
7534 case STATUS_CHECK:
7535 if (pkt->pkt_state & STATE_ARQ_DONE) {
7536 sns = (uint8_t *)&(((
7537 struct scsi_arq_status *)
7538 (uintptr_t)
7539 (pkt->pkt_scbp))->
7540 sts_sensedata);
7541 skey = scsi_sense_key(sns);
7542 if ((skey ==
7543 KEY_UNIT_ATTENTION) ||
7544 (skey ==
7545 KEY_NOT_READY)) {
7546 /*
7547 * clear unit attn.
7548 */
7549
7550 VHCI_DEBUG(1,
7551 (CE_WARN, NULL,
7552 "!v_s_do_s_c: "
7553 "retry "
7554 "packet 0x%p sense "
7555 "data %s",
7556 (void *)pkt,
7557 scsi_sname
7558 (skey)));
7559 goto retry;
7560 }
7561 VHCI_DEBUG(4, (CE_WARN, NULL,
7562 "!ARQ while "
7563 "transporting "
7564 "(pkt 0x%p)",
7565 (void *)pkt));
7566 return (0);
7567 }
7568 return (0);
7569 default:
7570 VHCI_DEBUG(1, (CE_WARN, NULL,
7571 "!Bad status returned "
7572 "(pkt 0x%p, status %x)",
7573 (void *)pkt, SCBP_C(pkt)));
7574 return (0);
7575 }
7576 break;
7577 case CMD_INCOMPLETE:
7578 case CMD_RESET:
7579 case CMD_ABORTED:
7580 case CMD_TRAN_ERR:
7581 if (retry_cnt++ < 1) {
7582 VHCI_DEBUG(1, (CE_WARN, NULL,
7583 "!v_s_do_s_c: retry packet 0x%p %s",
7584 (void *)pkt, scsi_rname(pkt->pkt_reason)));
7585 goto retry;
7586 }
7587 /* FALLTHROUGH */
7588 default:
7589 VHCI_DEBUG(1, (CE_WARN, NULL, "!pkt did not "
7590 "complete successfully (pkt 0x%p,"
7591 "reason %x)", (void *)pkt, pkt->pkt_reason));
7592 return (0);
7593 }
7594 return (1);
7595 }
7596
7597 static int
7598 vhci_quiesce_lun(struct scsi_vhci_lun *vlun)
7599 {
7600 mdi_pathinfo_t *pip, *spip;
7601 dev_info_t *cdip;
7602 struct scsi_vhci_priv *svp;
7603 mdi_pathinfo_state_t pstate;
7604 uint32_t p_ext_state;
7605 int circular;
7606
7607 cdip = vlun->svl_dip;
7608 pip = spip = NULL;
7609 ndi_devi_enter(cdip, &circular);
7610 pip = mdi_get_next_phci_path(cdip, NULL);
7611 while (pip != NULL) {
7612 (void) mdi_pi_get_state2(pip, &pstate, &p_ext_state);
7613 if (pstate != MDI_PATHINFO_STATE_ONLINE) {
7614 spip = pip;
7615 pip = mdi_get_next_phci_path(cdip, spip);
7616 continue;
7617 }
7618 mdi_hold_path(pip);
7619 ndi_devi_exit(cdip, circular);
7620 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
7621 mutex_enter(&svp->svp_mutex);
7622 while (svp->svp_cmds != 0) {
7623 if (cv_reltimedwait(&svp->svp_cv, &svp->svp_mutex,
7624 drv_usectohz(vhci_path_quiesce_timeout * 1000000),
7625 TR_CLOCK_TICK) == -1) {
7626 mutex_exit(&svp->svp_mutex);
7627 mdi_rele_path(pip);
7628 VHCI_DEBUG(1, (CE_WARN, NULL,
7629 "Quiesce of lun is not successful "
7630 "vlun: 0x%p.", (void *)vlun));
7631 return (0);
7632 }
7633 }
7634 mutex_exit(&svp->svp_mutex);
7635 ndi_devi_enter(cdip, &circular);
7636 spip = pip;
7637 pip = mdi_get_next_phci_path(cdip, spip);
7638 mdi_rele_path(spip);
7639 }
7640 ndi_devi_exit(cdip, circular);
7641 return (1);
7642 }
7643
7644 static int
7645 vhci_pgr_validate_and_register(scsi_vhci_priv_t *svp)
7646 {
7647 scsi_vhci_lun_t *vlun;
7648 vhci_prout_t *prout;
7649 int rval, success;
7650 mdi_pathinfo_t *pip, *npip;
7651 scsi_vhci_priv_t *osvp;
7652 dev_info_t *cdip;
7653 uchar_t cdb_1;
7654 uchar_t temp_res_key[MHIOC_RESV_KEY_SIZE];
7655
7656
7657 /*
7658 * see if there are any other paths available; if none,
7659 * then there is nothing to do.
7660 */
7661 cdip = svp->svp_svl->svl_dip;
7662 rval = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH |
7663 MDI_SELECT_STANDBY_PATH, NULL, &pip);
7664 if ((rval != MDI_SUCCESS) || (pip == NULL)) {
7665 VHCI_DEBUG(4, (CE_NOTE, NULL,
7666 "%s%d: vhci_pgr_validate_and_register: first path\n",
7667 ddi_driver_name(cdip), ddi_get_instance(cdip)));
7668 return (1);
7669 }
7670
7671 vlun = svp->svp_svl;
7672 prout = &vlun->svl_prout;
7673 ASSERT(vlun->svl_pgr_active != 0);
7674
7675 /*
7676 * When the path was busy/offlined, some other host might have
7677 * cleared this key. Validate key on some other path first.
7678 * If it fails, return failure.
7679 */
7680
7681 npip = pip;
7682 pip = NULL;
7683 success = 0;
7684
7685 /* Save the res key */
7686 bcopy(prout->res_key, temp_res_key, MHIOC_RESV_KEY_SIZE);
7687
7688 /*
7689 * Sometimes CDB from application can be a Register_And_Ignore.
7690 * Instead of validation, this cdb would result in force registration.
7691 * Convert it to normal cdb for validation.
7692 * After that be sure to restore the cdb.
7693 */
7694 cdb_1 = vlun->svl_cdb[1];
7695 vlun->svl_cdb[1] &= 0xe0;
7696
7697 do {
7698 osvp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(npip);
7699 if (osvp == NULL) {
7700 VHCI_DEBUG(4, (CE_NOTE, NULL,
7701 "vhci_pgr_validate_and_register: no "
7702 "client priv! 0x%p offlined?\n",
7703 (void *)npip));
7704 goto next_path_1;
7705 }
7706
7707 if (osvp == svp) {
7708 VHCI_DEBUG(4, (CE_NOTE, NULL,
7709 "vhci_pgr_validate_and_register: same svp 0x%p"
7710 " npip 0x%p vlun 0x%p\n",
7711 (void *)svp, (void *)npip, (void *)vlun));
7712 goto next_path_1;
7713 }
7714
7715 VHCI_DEBUG(4, (CE_NOTE, NULL,
7716 "vhci_pgr_validate_and_register: First validate on"
7717 " osvp 0x%p being done. vlun 0x%p thread 0x%p Before bcopy"
7718 " cdb1 %x\n", (void *)osvp, (void *)vlun,
7719 (void *)curthread, vlun->svl_cdb[1]));
7720 vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy:");
7721
7722 bcopy(prout->service_key, prout->res_key, MHIOC_RESV_KEY_SIZE);
7723
7724 VHCI_DEBUG(4, (CE_WARN, NULL, "vlun 0x%p After bcopy",
7725 (void *)vlun));
7726 vhci_print_prout_keys(vlun, "v_pgr_val_reg: after bcopy: ");
7727
7728 rval = vhci_do_prout(osvp);
7729 if (rval == 1) {
7730 VHCI_DEBUG(4, (CE_NOTE, NULL,
7731 "%s%d: vhci_pgr_validate_and_register: key"
7732 " validated thread 0x%p\n", ddi_driver_name(cdip),
7733 ddi_get_instance(cdip), (void *)curthread));
7734 pip = npip;
7735 success = 1;
7736 break;
7737 } else {
7738 VHCI_DEBUG(4, (CE_NOTE, NULL,
7739 "vhci_pgr_validate_and_register: First validation"
7740 " on osvp 0x%p failed %x\n", (void *)osvp, rval));
7741 vhci_print_prout_keys(vlun, "v_pgr_val_reg: failed:");
7742 }
7743
7744 /*
7745 * Try other paths
7746 */
7747 next_path_1:
7748 pip = npip;
7749 rval = mdi_select_path(cdip, NULL,
7750 MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH,
7751 pip, &npip);
7752 mdi_rele_path(pip);
7753 } while ((rval == MDI_SUCCESS) && (npip != NULL));
7754
7755
7756 /* Be sure to restore original cdb */
7757 vlun->svl_cdb[1] = cdb_1;
7758
7759 /* Restore the res_key */
7760 bcopy(temp_res_key, prout->res_key, MHIOC_RESV_KEY_SIZE);
7761
7762 /*
7763 * If key could not be registered on any path for the first time,
7764 * return success as online should still continue.
7765 */
7766 if (success == 0) {
7767 return (1);
7768 }
7769
7770 ASSERT(pip != NULL);
7771
7772 /*
7773 * Force register on new path
7774 */
7775 cdb_1 = vlun->svl_cdb[1]; /* store the cdb */
7776
7777 vlun->svl_cdb[1] &= 0xe0;
7778 vlun->svl_cdb[1] |= VHCI_PROUT_R_AND_IGNORE;
7779
7780 vhci_print_prout_keys(vlun, "v_pgr_val_reg: keys before bcopy: ");
7781
7782 bcopy(prout->active_service_key, prout->service_key,
7783 MHIOC_RESV_KEY_SIZE);
7784 bcopy(prout->active_res_key, prout->res_key, MHIOC_RESV_KEY_SIZE);
7785
7786 vhci_print_prout_keys(vlun, "v_pgr_val_reg:keys after bcopy: ");
7787
7788 rval = vhci_do_prout(svp);
7789 vlun->svl_cdb[1] = cdb_1; /* restore the cdb */
7790 if (rval != 1) {
7791 VHCI_DEBUG(4, (CE_NOTE, NULL,
7792 "vhci_pgr_validate_and_register: register on new"
7793 " path 0x%p svp 0x%p failed %x\n",
7794 (void *)pip, (void *)svp, rval));
7795 vhci_print_prout_keys(vlun, "v_pgr_val_reg: reg failed: ");
7796 mdi_rele_path(pip);
7797 return (0);
7798 }
7799
7800 if (bcmp(prout->service_key, zero_key, MHIOC_RESV_KEY_SIZE) == 0) {
7801 VHCI_DEBUG(4, (CE_NOTE, NULL,
7802 "vhci_pgr_validate_and_register: zero service key\n"));
7803 mdi_rele_path(pip);
7804 return (rval);
7805 }
7806
7807 /*
7808 * While the key was force registered, some other host might have
7809 * cleared the key. Re-validate key on another pre-existing path
7810 * before declaring success.
7811 */
7812 npip = pip;
7813 pip = NULL;
7814
7815 /*
7816 * Sometimes CDB from application can be Register and Ignore.
7817 * Instead of validation, it would result in force registration.
7818 * Convert it to normal cdb for validation.
7819 * After that be sure to restore the cdb.
7820 */
7821 cdb_1 = vlun->svl_cdb[1];
7822 vlun->svl_cdb[1] &= 0xe0;
7823 success = 0;
7824
7825 do {
7826 osvp = (scsi_vhci_priv_t *)
7827 mdi_pi_get_vhci_private(npip);
7828 if (osvp == NULL) {
7829 VHCI_DEBUG(4, (CE_NOTE, NULL,
7830 "vhci_pgr_validate_and_register: no "
7831 "client priv! 0x%p offlined?\n",
7832 (void *)npip));
7833 goto next_path_2;
7834 }
7835
7836 if (osvp == svp) {
7837 VHCI_DEBUG(4, (CE_NOTE, NULL,
7838 "vhci_pgr_validate_and_register: same osvp 0x%p"
7839 " npip 0x%p vlun 0x%p\n",
7840 (void *)svp, (void *)npip, (void *)vlun));
7841 goto next_path_2;
7842 }
7843
7844 VHCI_DEBUG(4, (CE_NOTE, NULL,
7845 "vhci_pgr_validate_and_register: Re-validation on"
7846 " osvp 0x%p being done. vlun 0x%p Before bcopy cdb1 %x\n",
7847 (void *)osvp, (void *)vlun, vlun->svl_cdb[1]));
7848 vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy: ");
7849
7850 bcopy(prout->service_key, prout->res_key, MHIOC_RESV_KEY_SIZE);
7851
7852 vhci_print_prout_keys(vlun, "v_pgr_val_reg: after bcopy: ");
7853
7854 rval = vhci_do_prout(osvp);
7855 if (rval == 1) {
7856 VHCI_DEBUG(4, (CE_NOTE, NULL,
7857 "%s%d: vhci_pgr_validate_and_register: key"
7858 " validated thread 0x%p\n", ddi_driver_name(cdip),
7859 ddi_get_instance(cdip), (void *)curthread));
7860 pip = npip;
7861 success = 1;
7862 break;
7863 } else {
7864 VHCI_DEBUG(4, (CE_NOTE, NULL,
7865 "vhci_pgr_validate_and_register: Re-validation on"
7866 " osvp 0x%p failed %x\n", (void *)osvp, rval));
7867 vhci_print_prout_keys(vlun,
7868 "v_pgr_val_reg: reval failed: ");
7869 }
7870
7871 /*
7872 * Try other paths
7873 */
7874 next_path_2:
7875 pip = npip;
7876 rval = mdi_select_path(cdip, NULL,
7877 MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH,
7878 pip, &npip);
7879 mdi_rele_path(pip);
7880 } while ((rval == MDI_SUCCESS) && (npip != NULL));
7881
7882 /* Be sure to restore original cdb */
7883 vlun->svl_cdb[1] = cdb_1;
7884
7885 if (success == 1) {
7886 /* Successfully validated registration */
7887 mdi_rele_path(pip);
7888 return (1);
7889 }
7890
7891 VHCI_DEBUG(4, (CE_WARN, NULL, "key validation failed"));
7892
7893 /*
7894 * key invalid, back out by registering key value of 0
7895 */
7896 VHCI_DEBUG(4, (CE_NOTE, NULL,
7897 "vhci_pgr_validate_and_register: backout on"
7898 " svp 0x%p being done\n", (void *)svp));
7899 vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy: ");
7900
7901 bcopy(prout->service_key, prout->res_key, MHIOC_RESV_KEY_SIZE);
7902 bzero(prout->service_key, MHIOC_RESV_KEY_SIZE);
7903
7904 vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy: ");
7905
7906 /*
7907 * Get a new path
7908 */
7909 rval = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH |
7910 MDI_SELECT_STANDBY_PATH, NULL, &pip);
7911 if ((rval != MDI_SUCCESS) || (pip == NULL)) {
7912 VHCI_DEBUG(4, (CE_NOTE, NULL,
7913 "%s%d: vhci_pgr_validate_and_register: no valid pip\n",
7914 ddi_driver_name(cdip), ddi_get_instance(cdip)));
7915 return (0);
7916 }
7917
7918 if ((rval = vhci_do_prout(svp)) != 1) {
7919 VHCI_DEBUG(4, (CE_NOTE, NULL,
7920 "vhci_pgr_validate_and_register: backout on"
7921 " svp 0x%p failed\n", (void *)svp));
7922 vhci_print_prout_keys(vlun, "backout failed");
7923
7924 VHCI_DEBUG(4, (CE_WARN, NULL,
7925 "%s%d: vhci_pgr_validate_and_register: key"
7926 " validation and backout failed", ddi_driver_name(cdip),
7927 ddi_get_instance(cdip)));
7928 if (rval == VHCI_PGR_ILLEGALOP) {
7929 VHCI_DEBUG(4, (CE_WARN, NULL,
7930 "%s%d: vhci_pgr_validate_and_register: key"
7931 " already cleared", ddi_driver_name(cdip),
7932 ddi_get_instance(cdip)));
7933 rval = 1;
7934 } else
7935 rval = 0;
7936 } else {
7937 VHCI_DEBUG(4, (CE_NOTE, NULL,
7938 "%s%d: vhci_pgr_validate_and_register: key"
7939 " validation failed, key backed out\n",
7940 ddi_driver_name(cdip), ddi_get_instance(cdip)));
7941 vhci_print_prout_keys(vlun, "v_pgr_val_reg: key backed out: ");
7942 }
7943 mdi_rele_path(pip);
7944
7945 return (rval);
7946 }
7947
7948 /*
7949 * taskq routine to dispatch a scsi cmd to vhci_scsi_start. This ensures
7950 * that vhci_scsi_start is not called in interrupt context.
7951 * As the upper layer gets TRAN_ACCEPT when the command is dispatched, we
7952 * need to complete the command if something goes wrong.
7953 */
7954 static void
7955 vhci_dispatch_scsi_start(void *arg)
7956 {
7957 struct vhci_pkt *vpkt = (struct vhci_pkt *)arg;
7958 struct scsi_pkt *tpkt = vpkt->vpkt_tgt_pkt;
7959 int rval = TRAN_BUSY;
7960
7961 VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_dispatch_scsi_start: sending"
7962 " scsi-2 reserve for 0x%p\n",
7963 (void *)ADDR2DIP(&(vpkt->vpkt_tgt_pkt->pkt_address))));
7964
7965 /*
7966 * To prevent the taskq from being called recursively we set the
7967 * the VHCI_PKT_THRU_TASKQ bit in the vhci_pkt_states.
7968 */
7969 vpkt->vpkt_state |= VHCI_PKT_THRU_TASKQ;
7970
7971 /*
7972 * Wait for the transport to get ready to send packets
7973 * and if it times out, it will return something other than
7974 * TRAN_BUSY. The vhci_reserve_delay may want to
7975 * get tuned for other transports and is therefore a global.
7976 * Using delay since this routine is called by taskq dispatch
7977 * and not called during interrupt context.
7978 */
7979 while ((rval = vhci_scsi_start(&(vpkt->vpkt_tgt_pkt->pkt_address),
7980 vpkt->vpkt_tgt_pkt)) == TRAN_BUSY) {
7981 delay(drv_usectohz(vhci_reserve_delay));
7982 }
7983
7984 switch (rval) {
7985 case TRAN_ACCEPT:
7986 return;
7987
7988 default:
7989 /*
7990 * This pkt shall be retried, and to ensure another taskq
7991 * is dispatched for it, clear the VHCI_PKT_THRU_TASKQ
7992 * flag.
7993 */
7994 vpkt->vpkt_state &= ~VHCI_PKT_THRU_TASKQ;
7995
7996 /* Ensure that the pkt is retried without a reset */
7997 tpkt->pkt_reason = CMD_ABORTED;
7998 tpkt->pkt_statistics |= STAT_ABORTED;
7999 VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_dispatch_scsi_start: "
8000 "TRAN_rval %d returned for dip 0x%p", rval,
8001 (void *)ADDR2DIP(&(vpkt->vpkt_tgt_pkt->pkt_address))));
8002 break;
8003 }
8004
8005 /*
8006 * vpkt_org_vpkt should always be NULL here if the retry command
8007 * has been successfully dispatched. If vpkt_org_vpkt != NULL at
8008 * this point, it is an error so restore the original vpkt and
8009 * return an error to the target driver so it can retry the
8010 * command as appropriate.
8011 */
8012 if (vpkt->vpkt_org_vpkt != NULL) {
8013 struct vhci_pkt *new_vpkt = vpkt;
8014 scsi_vhci_priv_t *svp = (scsi_vhci_priv_t *)
8015 mdi_pi_get_vhci_private(vpkt->vpkt_path);
8016
8017 vpkt = vpkt->vpkt_org_vpkt;
8018
8019 vpkt->vpkt_tgt_pkt->pkt_reason = tpkt->pkt_reason;
8020 vpkt->vpkt_tgt_pkt->pkt_statistics = tpkt->pkt_statistics;
8021
8022 vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
8023 new_vpkt->vpkt_tgt_pkt);
8024
8025 tpkt = vpkt->vpkt_tgt_pkt;
8026 }
8027
8028 scsi_hba_pkt_comp(tpkt);
8029 }
8030
8031 static void
8032 vhci_initiate_auto_failback(void *arg)
8033 {
8034 struct scsi_vhci_lun *vlun = (struct scsi_vhci_lun *)arg;
8035 dev_info_t *vdip, *cdip;
8036 int held;
8037
8038 cdip = vlun->svl_dip;
8039 vdip = ddi_get_parent(cdip);
8040
8041 VHCI_HOLD_LUN(vlun, VH_SLEEP, held);
8042
8043 /*
8044 * Perform a final check to see if the active path class is indeed
8045 * not the preferred path class. As in the time the auto failback
8046 * was dispatched, an external failover could have been detected.
8047 * [Some other host could have detected this condition and triggered
8048 * the auto failback before].
8049 * In such a case if we go ahead with failover we will be negating the
8050 * whole purpose of auto failback.
8051 */
8052 mutex_enter(&vlun->svl_mutex);
8053 if (vlun->svl_active_pclass != NULL) {
8054 char *best_pclass;
8055 struct scsi_failover_ops *fo;
8056
8057 fo = vlun->svl_fops;
8058
8059 (void) fo->sfo_pathclass_next(NULL, &best_pclass,
8060 vlun->svl_fops_ctpriv);
8061 if (strcmp(vlun->svl_active_pclass, best_pclass) == 0) {
8062 mutex_exit(&vlun->svl_mutex);
8063 VHCI_RELEASE_LUN(vlun);
8064 VHCI_DEBUG(1, (CE_NOTE, NULL, "Not initiating "
8065 "auto failback for %s as %s pathclass already "
8066 "active.\n", vlun->svl_lun_wwn, best_pclass));
8067 return;
8068 }
8069 }
8070 mutex_exit(&vlun->svl_mutex);
8071 if (mdi_failover(vdip, vlun->svl_dip, MDI_FAILOVER_SYNC)
8072 == MDI_SUCCESS) {
8073 vhci_log(CE_NOTE, vdip, "!Auto failback operation "
8074 "succeeded for device %s (GUID %s)",
8075 ddi_node_name(cdip), vlun->svl_lun_wwn);
8076 } else {
8077 vhci_log(CE_NOTE, vdip, "!Auto failback operation "
8078 "failed for device %s (GUID %s)",
8079 ddi_node_name(cdip), vlun->svl_lun_wwn);
8080 }
8081 VHCI_RELEASE_LUN(vlun);
8082 }
8083
8084 #ifdef DEBUG
8085 static void
8086 vhci_print_prin_keys(vhci_prin_readkeys_t *prin, int numkeys)
8087 {
8088 vhci_clean_print(NULL, 5, "Current PGR Keys",
8089 (uchar_t *)prin, numkeys * 8);
8090 }
8091 #endif
8092
8093 static void
8094 vhci_print_prout_keys(scsi_vhci_lun_t *vlun, char *msg)
8095 {
8096 int i;
8097 vhci_prout_t *prout;
8098 char buf1[4*MHIOC_RESV_KEY_SIZE + 1];
8099 char buf2[4*MHIOC_RESV_KEY_SIZE + 1];
8100 char buf3[4*MHIOC_RESV_KEY_SIZE + 1];
8101 char buf4[4*MHIOC_RESV_KEY_SIZE + 1];
8102
8103 prout = &vlun->svl_prout;
8104
8105 for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
8106 (void) sprintf(&buf1[4*i], "[%02x]", prout->res_key[i]);
8107 for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
8108 (void) sprintf(&buf2[(4*i)], "[%02x]", prout->service_key[i]);
8109 for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
8110 (void) sprintf(&buf3[4*i], "[%02x]", prout->active_res_key[i]);
8111 for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
8112 (void) sprintf(&buf4[4*i], "[%02x]",
8113 prout->active_service_key[i]);
8114
8115 /* Printing all in one go. Otherwise it will jumble up */
8116 VHCI_DEBUG(5, (CE_CONT, NULL, "%s vlun 0x%p, thread 0x%p\n"
8117 "res_key: : %s\n"
8118 "service_key : %s\n"
8119 "active_res_key : %s\n"
8120 "active_service_key: %s\n",
8121 msg, (void *)vlun, (void *)curthread, buf1, buf2, buf3, buf4));
8122 }
8123
8124 /*
8125 * Called from vhci_scsi_start to update the pHCI pkt with target packet.
8126 */
8127 static void
8128 vhci_update_pHCI_pkt(struct vhci_pkt *vpkt, struct scsi_pkt *pkt)
8129 {
8130
8131 ASSERT(vpkt->vpkt_hba_pkt);
8132
8133 vpkt->vpkt_hba_pkt->pkt_flags = pkt->pkt_flags;
8134 vpkt->vpkt_hba_pkt->pkt_flags |= FLAG_NOQUEUE;
8135
8136 if ((vpkt->vpkt_hba_pkt->pkt_flags & FLAG_NOINTR) ||
8137 MDI_PI_IS_SUSPENDED(vpkt->vpkt_path)) {
8138 /*
8139 * Polled Command is requested or HBA is in
8140 * suspended state
8141 */
8142 vpkt->vpkt_hba_pkt->pkt_flags |= FLAG_NOINTR;
8143 vpkt->vpkt_hba_pkt->pkt_comp = NULL;
8144 } else {
8145 vpkt->vpkt_hba_pkt->pkt_comp = vhci_intr;
8146 }
8147 vpkt->vpkt_hba_pkt->pkt_time = pkt->pkt_time;
8148 bcopy(pkt->pkt_cdbp, vpkt->vpkt_hba_pkt->pkt_cdbp,
8149 vpkt->vpkt_tgt_init_cdblen);
8150 vpkt->vpkt_hba_pkt->pkt_resid = pkt->pkt_resid;
8151
8152 /* Re-initialize the following pHCI packet state information */
8153 vpkt->vpkt_hba_pkt->pkt_state = 0;
8154 vpkt->vpkt_hba_pkt->pkt_statistics = 0;
8155 vpkt->vpkt_hba_pkt->pkt_reason = 0;
8156 }
8157
8158 static int
8159 vhci_scsi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
8160 void *arg, void *result)
8161 {
8162 int ret = DDI_SUCCESS;
8163
8164 /*
8165 * Generic processing in MPxIO framework
8166 */
8167 ret = mdi_bus_power(parent, impl_arg, op, arg, result);
8168
8169 switch (ret) {
8170 case MDI_SUCCESS:
8171 ret = DDI_SUCCESS;
8172 break;
8173 case MDI_FAILURE:
8174 ret = DDI_FAILURE;
8175 break;
8176 default:
8177 break;
8178 }
8179
8180 return (ret);
8181 }
8182
8183 static int
8184 vhci_pHCI_cap(struct scsi_address *ap, char *cap, int val, int whom,
8185 mdi_pathinfo_t *pip)
8186 {
8187 dev_info_t *cdip;
8188 mdi_pathinfo_t *npip = NULL;
8189 scsi_vhci_priv_t *svp = NULL;
8190 struct scsi_address *pap = NULL;
8191 scsi_hba_tran_t *hba = NULL;
8192 int sps;
8193 int mps_flag;
8194 int rval = 0;
8195
8196 mps_flag = (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH);
8197 if (pip) {
8198 /*
8199 * If the call is from vhci_pathinfo_state_change,
8200 * then this path was busy and is becoming ready to accept IO.
8201 */
8202 ASSERT(ap != NULL);
8203 hba = ap->a_hba_tran;
8204 ASSERT(hba != NULL);
8205 rval = scsi_ifsetcap(ap, cap, val, whom);
8206
8207 VHCI_DEBUG(2, (CE_NOTE, NULL,
8208 "!vhci_pHCI_cap: only on path %p, ap %p, rval %x\n",
8209 (void *)pip, (void *)ap, rval));
8210
8211 return (rval);
8212 }
8213
8214 /*
8215 * Set capability on all the pHCIs.
8216 * If any path is busy, then the capability would be set by
8217 * vhci_pathinfo_state_change.
8218 */
8219
8220 cdip = ADDR2DIP(ap);
8221 ASSERT(cdip != NULL);
8222 sps = mdi_select_path(cdip, NULL, mps_flag, NULL, &pip);
8223 if ((sps != MDI_SUCCESS) || (pip == NULL)) {
8224 VHCI_DEBUG(2, (CE_WARN, NULL,
8225 "!vhci_pHCI_cap: Unable to get a path, dip 0x%p",
8226 (void *)cdip));
8227 return (0);
8228 }
8229
8230 again:
8231 svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
8232 if (svp == NULL) {
8233 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_pHCI_cap: "
8234 "priv is NULL, pip 0x%p", (void *)pip));
8235 mdi_rele_path(pip);
8236 return (rval);
8237 }
8238
8239 if (svp->svp_psd == NULL) {
8240 VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_pHCI_cap: "
8241 "psd is NULL, pip 0x%p, svp 0x%p",
8242 (void *)pip, (void *)svp));
8243 mdi_rele_path(pip);
8244 return (rval);
8245 }
8246
8247 pap = &svp->svp_psd->sd_address;
8248 ASSERT(pap != NULL);
8249 hba = pap->a_hba_tran;
8250 ASSERT(hba != NULL);
8251
8252 if (hba->tran_setcap != NULL) {
8253 rval = scsi_ifsetcap(pap, cap, val, whom);
8254
8255 VHCI_DEBUG(2, (CE_NOTE, NULL,
8256 "!vhci_pHCI_cap: path %p, ap %p, rval %x\n",
8257 (void *)pip, (void *)ap, rval));
8258
8259 /*
8260 * Select next path and issue the setcap, repeat
8261 * until all paths are exhausted
8262 */
8263 sps = mdi_select_path(cdip, NULL, mps_flag, pip, &npip);
8264 if ((sps != MDI_SUCCESS) || (npip == NULL)) {
8265 mdi_rele_path(pip);
8266 return (1);
8267 }
8268 mdi_rele_path(pip);
8269 pip = npip;
8270 goto again;
8271 }
8272 mdi_rele_path(pip);
8273 return (rval);
8274 }
8275
8276 static int
8277 vhci_scsi_bus_config(dev_info_t *pdip, uint_t flags, ddi_bus_config_op_t op,
8278 void *arg, dev_info_t **child)
8279 {
8280 char *guid;
8281
8282 if (vhci_bus_config_debug)
8283 flags |= NDI_DEVI_DEBUG;
8284
8285 if (op == BUS_CONFIG_ONE || op == BUS_UNCONFIG_ONE)
8286 guid = vhci_devnm_to_guid((char *)arg);
8287 else
8288 guid = NULL;
8289
8290 if (mdi_vhci_bus_config(pdip, flags, op, arg, child, guid)
8291 == MDI_SUCCESS)
8292 return (NDI_SUCCESS);
8293 else
8294 return (NDI_FAILURE);
8295 }
8296
8297 static int
8298 vhci_scsi_bus_unconfig(dev_info_t *pdip, uint_t flags, ddi_bus_config_op_t op,
8299 void *arg)
8300 {
8301 if (vhci_bus_config_debug)
8302 flags |= NDI_DEVI_DEBUG;
8303
8304 return (ndi_busop_bus_unconfig(pdip, flags, op, arg));
8305 }
8306
8307 /*
8308 * Take the original vhci_pkt, create a duplicate of the pkt for resending
8309 * as though it originated in ssd.
8310 */
8311 static struct scsi_pkt *
8312 vhci_create_retry_pkt(struct vhci_pkt *vpkt)
8313 {
8314 struct vhci_pkt *new_vpkt = NULL;
8315 struct scsi_pkt *pkt = NULL;
8316
8317 scsi_vhci_priv_t *svp = (scsi_vhci_priv_t *)
8318 mdi_pi_get_vhci_private(vpkt->vpkt_path);
8319
8320 /*
8321 * Ensure consistent data at completion time by setting PKT_CONSISTENT
8322 */
8323 pkt = vhci_scsi_init_pkt(&svp->svp_psd->sd_address, pkt,
8324 vpkt->vpkt_tgt_init_bp, vpkt->vpkt_tgt_init_cdblen,
8325 vpkt->vpkt_tgt_init_scblen, 0, PKT_CONSISTENT, NULL_FUNC, NULL);
8326 if (pkt != NULL) {
8327 new_vpkt = TGTPKT2VHCIPKT(pkt);
8328
8329 pkt->pkt_address = vpkt->vpkt_tgt_pkt->pkt_address;
8330 pkt->pkt_flags = vpkt->vpkt_tgt_pkt->pkt_flags;
8331 pkt->pkt_time = vpkt->vpkt_tgt_pkt->pkt_time;
8332 pkt->pkt_comp = vpkt->vpkt_tgt_pkt->pkt_comp;
8333
8334 pkt->pkt_resid = 0;
8335 pkt->pkt_statistics = 0;
8336 pkt->pkt_reason = 0;
8337
8338 bcopy(vpkt->vpkt_tgt_pkt->pkt_cdbp,
8339 pkt->pkt_cdbp, vpkt->vpkt_tgt_init_cdblen);
8340
8341 /*
8342 * Save a pointer to the original vhci_pkt
8343 */
8344 new_vpkt->vpkt_org_vpkt = vpkt;
8345 }
8346
8347 return (pkt);
8348 }
8349
8350 /*
8351 * Copy the successful completion information from the hba packet into
8352 * the original target pkt from the upper layer. Returns the original
8353 * vpkt and destroys the new vpkt from the internal retry.
8354 */
8355 static struct vhci_pkt *
8356 vhci_sync_retry_pkt(struct vhci_pkt *vpkt)
8357 {
8358 struct vhci_pkt *ret_vpkt = NULL;
8359 struct scsi_pkt *tpkt = NULL;
8360 struct scsi_pkt *hba_pkt = NULL;
8361 scsi_vhci_priv_t *svp = (scsi_vhci_priv_t *)
8362 mdi_pi_get_vhci_private(vpkt->vpkt_path);
8363
8364 ASSERT(vpkt->vpkt_org_vpkt != NULL);
8365 VHCI_DEBUG(0, (CE_NOTE, NULL, "vhci_sync_retry_pkt: Retry pkt "
8366 "completed successfully!\n"));
8367
8368 ret_vpkt = vpkt->vpkt_org_vpkt;
8369 tpkt = ret_vpkt->vpkt_tgt_pkt;
8370 hba_pkt = vpkt->vpkt_hba_pkt;
8371
8372 /*
8373 * Copy the good status into the target driver's packet
8374 */
8375 *(tpkt->pkt_scbp) = *(hba_pkt->pkt_scbp);
8376 tpkt->pkt_resid = hba_pkt->pkt_resid;
8377 tpkt->pkt_state = hba_pkt->pkt_state;
8378 tpkt->pkt_statistics = hba_pkt->pkt_statistics;
8379 tpkt->pkt_reason = hba_pkt->pkt_reason;
8380
8381 /*
8382 * Destroy the internally created vpkt for the retry
8383 */
8384 vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
8385 vpkt->vpkt_tgt_pkt);
8386
8387 return (ret_vpkt);
8388 }
8389
8390 /* restart the request sense request */
8391 static void
8392 vhci_uscsi_restart_sense(void *arg)
8393 {
8394 struct buf *rqbp;
8395 struct buf *bp;
8396 struct scsi_pkt *rqpkt = (struct scsi_pkt *)arg;
8397 mp_uscsi_cmd_t *mp_uscmdp;
8398
8399 VHCI_DEBUG(4, (CE_WARN, NULL,
8400 "vhci_uscsi_restart_sense: enter: rqpkt: %p", (void *)rqpkt));
8401
8402 if (scsi_transport(rqpkt) != TRAN_ACCEPT) {
8403 /* if it fails - need to wakeup the original command */
8404 mp_uscmdp = rqpkt->pkt_private;
8405 bp = mp_uscmdp->cmdbp;
8406 rqbp = mp_uscmdp->rqbp;
8407 ASSERT(mp_uscmdp && bp && rqbp);
8408 scsi_free_consistent_buf(rqbp);
8409 scsi_destroy_pkt(rqpkt);
8410 bp->b_resid = bp->b_bcount;
8411 bioerror(bp, EIO);
8412 biodone(bp);
8413 }
8414 }
8415
8416 /*
8417 * auto-rqsense is not enabled so we have to retrieve the request sense
8418 * manually.
8419 */
8420 static int
8421 vhci_uscsi_send_sense(struct scsi_pkt *pkt, mp_uscsi_cmd_t *mp_uscmdp)
8422 {
8423 struct buf *rqbp, *cmdbp;
8424 struct scsi_pkt *rqpkt;
8425 int rval = 0;
8426
8427 cmdbp = mp_uscmdp->cmdbp;
8428 ASSERT(cmdbp != NULL);
8429
8430 VHCI_DEBUG(4, (CE_WARN, NULL,
8431 "vhci_uscsi_send_sense: enter: bp: %p pkt: %p scmd: %p",
8432 (void *)cmdbp, (void *)pkt, (void *)mp_uscmdp));
8433 /* set up the packet information and cdb */
8434 if ((rqbp = scsi_alloc_consistent_buf(mp_uscmdp->ap, NULL,
8435 SENSE_LENGTH, B_READ, NULL, NULL)) == NULL) {
8436 return (-1);
8437 }
8438
8439 if ((rqpkt = scsi_init_pkt(mp_uscmdp->ap, NULL, rqbp,
8440 CDB_GROUP0, 1, 0, PKT_CONSISTENT, NULL, NULL)) == NULL) {
8441 scsi_free_consistent_buf(rqbp);
8442 return (-1);
8443 }
8444
8445 (void) scsi_setup_cdb((union scsi_cdb *)(intptr_t)rqpkt->pkt_cdbp,
8446 SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
8447
8448 mp_uscmdp->rqbp = rqbp;
8449 rqbp->b_private = mp_uscmdp;
8450 rqpkt->pkt_flags |= FLAG_SENSING;
8451 rqpkt->pkt_time = vhci_io_time;
8452 rqpkt->pkt_comp = vhci_uscsi_iodone;
8453 rqpkt->pkt_private = mp_uscmdp;
8454
8455 /*
8456 * NOTE: This code path is related to MPAPI uscsi(7I), so path
8457 * selection is not based on path_instance.
8458 */
8459 if (scsi_pkt_allocated_correctly(rqpkt))
8460 rqpkt->pkt_path_instance = 0;
8461
8462 switch (scsi_transport(rqpkt)) {
8463 case TRAN_ACCEPT:
8464 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_uscsi_send_sense: "
8465 "transport accepted."));
8466 break;
8467 case TRAN_BUSY:
8468 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_uscsi_send_sense: "
8469 "transport busy, setting timeout."));
8470 vhci_restart_timeid = timeout(vhci_uscsi_restart_sense, rqpkt,
8471 (drv_usectohz(5 * 1000000)));
8472 break;
8473 default:
8474 VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_uscsi_send_sense: "
8475 "transport failed"));
8476 scsi_free_consistent_buf(rqbp);
8477 scsi_destroy_pkt(rqpkt);
8478 rval = -1;
8479 }
8480
8481 return (rval);
8482 }
8483
8484 /*
8485 * done routine for the mpapi uscsi command - this is behaving as though
8486 * FLAG_DIAGNOSE is set meaning there are no retries except for a manual
8487 * request sense.
8488 */
8489 void
8490 vhci_uscsi_iodone(struct scsi_pkt *pkt)
8491 {
8492 struct buf *bp;
8493 mp_uscsi_cmd_t *mp_uscmdp;
8494 struct uscsi_cmd *uscmdp;
8495 struct scsi_arq_status *arqstat;
8496 int err;
8497
8498 mp_uscmdp = (mp_uscsi_cmd_t *)pkt->pkt_private;
8499 uscmdp = mp_uscmdp->uscmdp;
8500 bp = mp_uscmdp->cmdbp;
8501 ASSERT(bp != NULL);
8502 VHCI_DEBUG(4, (CE_WARN, NULL,
8503 "vhci_uscsi_iodone: enter: bp: %p pkt: %p scmd: %p",
8504 (void *)bp, (void *)pkt, (void *)mp_uscmdp));
8505 /* Save the status and the residual into the uscsi_cmd struct */
8506 uscmdp->uscsi_status = ((*(pkt)->pkt_scbp) & STATUS_MASK);
8507 uscmdp->uscsi_resid = bp->b_resid;
8508
8509 /* return on a very successful command */
8510 if (pkt->pkt_reason == CMD_CMPLT &&
8511 SCBP_C(pkt) == 0 && ((pkt->pkt_flags & FLAG_SENSING) == 0) &&
8512 pkt->pkt_resid == 0) {
8513 mdi_pi_kstat_iosupdate(mp_uscmdp->pip, bp);
8514 scsi_destroy_pkt(pkt);
8515 biodone(bp);
8516 return;
8517 }
8518 VHCI_DEBUG(4, (CE_NOTE, NULL, "iodone: reason=0x%x "
8519 " pkt_resid=%ld pkt_state: 0x%x b_count: %ld b_resid: %ld",
8520 pkt->pkt_reason, pkt->pkt_resid,
8521 pkt->pkt_state, bp->b_bcount, bp->b_resid));
8522
8523 err = EIO;
8524
8525 arqstat = (struct scsi_arq_status *)(intptr_t)(pkt->pkt_scbp);
8526 if (pkt->pkt_reason != CMD_CMPLT) {
8527 /*
8528 * The command did not complete.
8529 */
8530 VHCI_DEBUG(4, (CE_NOTE, NULL,
8531 "vhci_uscsi_iodone: command did not complete."
8532 " reason: %x flag: %x", pkt->pkt_reason, pkt->pkt_flags));
8533 if (pkt->pkt_flags & FLAG_SENSING) {
8534 MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_TRANSERR);
8535 } else if (pkt->pkt_reason == CMD_TIMEOUT) {
8536 MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_HARDERR);
8537 err = ETIMEDOUT;
8538 }
8539 } else if (pkt->pkt_state & STATE_ARQ_DONE && mp_uscmdp->arq_enabled) {
8540 /*
8541 * The auto-rqsense happened, and the packet has a filled-in
8542 * scsi_arq_status structure, pointed to by pkt_scbp.
8543 */
8544 VHCI_DEBUG(4, (CE_NOTE, NULL,
8545 "vhci_uscsi_iodone: received auto-requested sense"));
8546 if (uscmdp->uscsi_flags & USCSI_RQENABLE) {
8547 /* get the amount of data to copy into rqbuf */
8548 int rqlen = SENSE_LENGTH - arqstat->sts_rqpkt_resid;
8549 rqlen = min(((int)uscmdp->uscsi_rqlen), rqlen);
8550 uscmdp->uscsi_rqresid = uscmdp->uscsi_rqlen - rqlen;
8551 uscmdp->uscsi_rqstatus =
8552 *((char *)&arqstat->sts_rqpkt_status);
8553 if (uscmdp->uscsi_rqbuf && uscmdp->uscsi_rqlen &&
8554 rqlen != 0) {
8555 bcopy(&(arqstat->sts_sensedata),
8556 uscmdp->uscsi_rqbuf, rqlen);
8557 }
8558 mdi_pi_kstat_iosupdate(mp_uscmdp->pip, bp);
8559 VHCI_DEBUG(4, (CE_NOTE, NULL,
8560 "vhci_uscsi_iodone: ARQ "
8561 "uscsi_rqstatus=0x%x uscsi_rqresid=%d rqlen: %d "
8562 "xfer: %d rqpkt_resid: %d\n",
8563 uscmdp->uscsi_rqstatus, uscmdp->uscsi_rqresid,
8564 uscmdp->uscsi_rqlen, rqlen,
8565 arqstat->sts_rqpkt_resid));
8566 }
8567 } else if (pkt->pkt_flags & FLAG_SENSING) {
8568 struct buf *rqbp;
8569 struct scsi_status *rqstatus;
8570
8571 rqstatus = (struct scsi_status *)pkt->pkt_scbp;
8572 /* a manual request sense was done - get the information */
8573 if (uscmdp->uscsi_flags & USCSI_RQENABLE) {
8574 int rqlen = SENSE_LENGTH - pkt->pkt_resid;
8575
8576 rqbp = mp_uscmdp->rqbp;
8577 /* get the amount of data to copy into rqbuf */
8578 rqlen = min(((int)uscmdp->uscsi_rqlen), rqlen);
8579 uscmdp->uscsi_rqresid = uscmdp->uscsi_rqlen - rqlen;
8580 uscmdp->uscsi_rqstatus = *((char *)rqstatus);
8581 if (uscmdp->uscsi_rqlen && uscmdp->uscsi_rqbuf) {
8582 bcopy(rqbp->b_un.b_addr, uscmdp->uscsi_rqbuf,
8583 rqlen);
8584 }
8585 MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_TRANSERR);
8586 scsi_free_consistent_buf(rqbp);
8587 }
8588 VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_uscsi_iodone: FLAG_SENSING"
8589 "uscsi_rqstatus=0x%x uscsi_rqresid=%d\n",
8590 uscmdp->uscsi_rqstatus, uscmdp->uscsi_rqresid));
8591 } else {
8592 struct scsi_status *status =
8593 (struct scsi_status *)pkt->pkt_scbp;
8594 /*
8595 * Command completed and we're not getting sense. Check for
8596 * errors and decide what to do next.
8597 */
8598 VHCI_DEBUG(4, (CE_NOTE, NULL,
8599 "vhci_uscsi_iodone: command appears complete: reason: %x",
8600 pkt->pkt_reason));
8601 if (status->sts_chk) {
8602 /* need to manually get the request sense */
8603 if (vhci_uscsi_send_sense(pkt, mp_uscmdp) == 0) {
8604 scsi_destroy_pkt(pkt);
8605 return;
8606 }
8607 } else {
8608 VHCI_DEBUG(4, (CE_NOTE, NULL,
8609 "vhci_chk_err: appears complete"));
8610 err = 0;
8611 mdi_pi_kstat_iosupdate(mp_uscmdp->pip, bp);
8612 if (pkt->pkt_resid) {
8613 bp->b_resid += pkt->pkt_resid;
8614 }
8615 }
8616 }
8617
8618 if (err) {
8619 if (bp->b_resid == 0)
8620 bp->b_resid = bp->b_bcount;
8621 bioerror(bp, err);
8622 bp->b_flags |= B_ERROR;
8623 }
8624
8625 scsi_destroy_pkt(pkt);
8626 biodone(bp);
8627
8628 VHCI_DEBUG(4, (CE_WARN, NULL, "vhci_uscsi_iodone: exit"));
8629 }
8630
8631 /*
8632 * start routine for the mpapi uscsi command
8633 */
8634 int
8635 vhci_uscsi_iostart(struct buf *bp)
8636 {
8637 struct scsi_pkt *pkt;
8638 struct uscsi_cmd *uscmdp;
8639 mp_uscsi_cmd_t *mp_uscmdp;
8640 int stat_size, rval;
8641 int retry = 0;
8642
8643 ASSERT(bp->b_private != NULL);
8644
8645 mp_uscmdp = (mp_uscsi_cmd_t *)bp->b_private;
8646 uscmdp = mp_uscmdp->uscmdp;
8647 if (uscmdp->uscsi_flags & USCSI_RQENABLE) {
8648 stat_size = SENSE_LENGTH;
8649 } else {
8650 stat_size = 1;
8651 }
8652
8653 pkt = scsi_init_pkt(mp_uscmdp->ap, NULL, bp, uscmdp->uscsi_cdblen,
8654 stat_size, 0, 0, SLEEP_FUNC, NULL);
8655 if (pkt == NULL) {
8656 VHCI_DEBUG(4, (CE_NOTE, NULL,
8657 "vhci_uscsi_iostart: rval: EINVAL"));
8658 bp->b_resid = bp->b_bcount;
8659 uscmdp->uscsi_resid = bp->b_bcount;
8660 bioerror(bp, EINVAL);
8661 biodone(bp);
8662 return (EINVAL);
8663 }
8664
8665 pkt->pkt_time = uscmdp->uscsi_timeout;
8666 if (pkt->pkt_time == 0)
8667 pkt->pkt_time = vhci_io_time;
8668
8669 bcopy(uscmdp->uscsi_cdb, pkt->pkt_cdbp, (size_t)uscmdp->uscsi_cdblen);
8670 pkt->pkt_comp = vhci_uscsi_iodone;
8671 pkt->pkt_private = mp_uscmdp;
8672 if (uscmdp->uscsi_flags & USCSI_SILENT)
8673 pkt->pkt_flags |= FLAG_SILENT;
8674 if (uscmdp->uscsi_flags & USCSI_ISOLATE)
8675 pkt->pkt_flags |= FLAG_ISOLATE;
8676 if (uscmdp->uscsi_flags & USCSI_DIAGNOSE)
8677 pkt->pkt_flags |= FLAG_DIAGNOSE;
8678 if (uscmdp->uscsi_flags & USCSI_RENEGOT) {
8679 pkt->pkt_flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
8680 }
8681 VHCI_DEBUG(4, (CE_WARN, NULL,
8682 "vhci_uscsi_iostart: ap: %p pkt: %p pcdbp: %p uscmdp: %p"
8683 " ucdbp: %p pcdblen: %d bp: %p count: %ld pip: %p"
8684 " stat_size: %d",
8685 (void *)mp_uscmdp->ap, (void *)pkt, (void *)pkt->pkt_cdbp,
8686 (void *)uscmdp, (void *)uscmdp->uscsi_cdb, pkt->pkt_cdblen,
8687 (void *)bp, bp->b_bcount, (void *)mp_uscmdp->pip, stat_size));
8688
8689 /*
8690 * NOTE: This code path is related to MPAPI uscsi(7I), so path
8691 * selection is not based on path_instance.
8692 */
8693 if (scsi_pkt_allocated_correctly(pkt))
8694 pkt->pkt_path_instance = 0;
8695
8696 while (((rval = scsi_transport(pkt)) == TRAN_BUSY) &&
8697 retry < vhci_uscsi_retry_count) {
8698 delay(drv_usectohz(vhci_uscsi_delay));
8699 retry++;
8700 }
8701 if (retry >= vhci_uscsi_retry_count) {
8702 VHCI_DEBUG(4, (CE_NOTE, NULL,
8703 "vhci_uscsi_iostart: tran_busy - retry: %d", retry));
8704 }
8705 switch (rval) {
8706 case TRAN_ACCEPT:
8707 rval = 0;
8708 break;
8709
8710 default:
8711 VHCI_DEBUG(4, (CE_NOTE, NULL,
8712 "vhci_uscsi_iostart: rval: %d count: %ld res: %ld",
8713 rval, bp->b_bcount, bp->b_resid));
8714 bp->b_resid = bp->b_bcount;
8715 uscmdp->uscsi_resid = bp->b_bcount;
8716 bioerror(bp, EIO);
8717 scsi_destroy_pkt(pkt);
8718 biodone(bp);
8719 rval = EIO;
8720 MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_TRANSERR);
8721 break;
8722 }
8723 VHCI_DEBUG(4, (CE_NOTE, NULL,
8724 "vhci_uscsi_iostart: exit: rval: %d", rval));
8725 return (rval);
8726 }
8727
8728 /* ARGSUSED */
8729 static struct scsi_failover_ops *
8730 vhci_dev_fo(dev_info_t *vdip, struct scsi_device *psd,
8731 void **ctprivp, char **fo_namep)
8732 {
8733 struct scsi_failover_ops *sfo;
8734 char *sfo_name;
8735 char *override;
8736 struct scsi_failover *sf;
8737
8738 ASSERT(psd && psd->sd_inq);
8739 if ((psd == NULL) || (psd->sd_inq == NULL)) {
8740 VHCI_DEBUG(1, (CE_NOTE, NULL,
8741 "!vhci_dev_fo:return NULL no scsi_device or inquiry"));
8742 return (NULL);
8743 }
8744
8745 /*
8746 * Determine if device is supported under scsi_vhci, and select
8747 * failover module.
8748 *
8749 * See if there is a scsi_vhci.conf file override for this devices's
8750 * VID/PID. The following values can be returned:
8751 *
8752 * NULL If the NULL is returned then there is no scsi_vhci.conf
8753 * override. For NULL, we determine the failover_ops for
8754 * this device by checking the sfo_device_probe entry
8755 * point for each 'fops' module, in order.
8756 *
8757 * NOTE: Correct operation may depend on module ordering
8758 * of 'specific' (failover modules that are completely
8759 * VID/PID table based) to 'generic' (failover modules
8760 * that based on T10 standards like TPGS). Currently,
8761 * the value of 'ddi-forceload' in scsi_vhci.conf is used
8762 * to establish the module list and probe order.
8763 *
8764 * "NONE" If value "NONE" is returned then there is a
8765 * scsi_vhci.conf VID/PID override to indicate the device
8766 * should not be supported under scsi_vhci (even if there
8767 * is an 'fops' module supporting the device).
8768 *
8769 * "<other>" If another value is returned then that value is the
8770 * name of the 'fops' module that should be used.
8771 */
8772 sfo = NULL; /* "NONE" */
8773 override = scsi_get_device_type_string(
8774 "scsi-vhci-failover-override", vdip, psd);
8775 if (override == NULL) {
8776 /* NULL: default: select based on sfo_device_probe results */
8777 for (sf = scsi_failover_table; sf->sf_mod; sf++) {
8778 if ((sf->sf_sfo == NULL) ||
8779 sf->sf_sfo->sfo_device_probe(psd, psd->sd_inq,
8780 ctprivp) == SFO_DEVICE_PROBE_PHCI)
8781 continue;
8782
8783 /* found failover module, supported under scsi_vhci */
8784 sfo = sf->sf_sfo;
8785 if (fo_namep && (*fo_namep == NULL)) {
8786 sfo_name = i_ddi_strdup(sfo->sfo_name,
8787 KM_SLEEP);
8788 *fo_namep = sfo_name;
8789 }
8790 break;
8791 }
8792 } else if (strcasecmp(override, "NONE")) {
8793 /* !"NONE": select based on driver.conf specified name */
8794 for (sf = scsi_failover_table, sfo = NULL; sf->sf_mod; sf++) {
8795 if ((sf->sf_sfo == NULL) ||
8796 (sf->sf_sfo->sfo_name == NULL) ||
8797 strcmp(override, sf->sf_sfo->sfo_name))
8798 continue;
8799
8800 /*
8801 * NOTE: If sfo_device_probe() has side-effects,
8802 * including setting *ctprivp, these are not going
8803 * to occur with override config.
8804 */
8805
8806 /* found failover module, supported under scsi_vhci */
8807 sfo = sf->sf_sfo;
8808 if (fo_namep && (*fo_namep == NULL)) {
8809 sfo_name = kmem_alloc(strlen("conf ") +
8810 strlen(sfo->sfo_name) + 1, KM_SLEEP);
8811 (void) sprintf(sfo_name, "conf %s",
8812 sfo->sfo_name);
8813 *fo_namep = sfo_name;
8814 }
8815 break;
8816 }
8817 }
8818 if (override)
8819 kmem_free(override, strlen(override) + 1);
8820 return (sfo);
8821 }
8822
8823 /*
8824 * Determine the device described by cinfo should be enumerated under
8825 * the vHCI or the pHCI - if there is a failover ops then device is
8826 * supported under vHCI. By agreement with SCSA cinfo is a pointer
8827 * to a scsi_device structure associated with a decorated pHCI probe node.
8828 */
8829 /* ARGSUSED */
8830 int
8831 vhci_is_dev_supported(dev_info_t *vdip, dev_info_t *pdip, void *cinfo)
8832 {
8833 struct scsi_device *psd = (struct scsi_device *)cinfo;
8834
8835 return (vhci_dev_fo(vdip, psd, NULL, NULL) ? MDI_SUCCESS : MDI_FAILURE);
8836 }
8837
8838
8839 #ifdef DEBUG
8840 extern struct scsi_key_strings scsi_cmds[];
8841
8842 static char *
8843 vhci_print_scsi_cmd(char cmd)
8844 {
8845 char tmp[64];
8846 char *cpnt;
8847
8848 cpnt = scsi_cmd_name(cmd, scsi_cmds, tmp);
8849 /* tmp goes out of scope on return and caller sees garbage */
8850 if (cpnt == tmp) {
8851 cpnt = "Unknown Command";
8852 }
8853 return (cpnt);
8854 }
8855
8856 extern uchar_t scsi_cdb_size[];
8857
8858 static void
8859 vhci_print_cdb(dev_info_t *dip, uint_t level, char *title, uchar_t *cdb)
8860 {
8861 int len = scsi_cdb_size[CDB_GROUPID(cdb[0])];
8862 char buf[256];
8863
8864 if (level == CE_NOTE) {
8865 vhci_log(level, dip, "path cmd %s\n",
8866 vhci_print_scsi_cmd(*cdb));
8867 return;
8868 }
8869
8870 (void) sprintf(buf, "%s for cmd(%s)", title, vhci_print_scsi_cmd(*cdb));
8871 vhci_clean_print(dip, level, buf, cdb, len);
8872 }
8873
8874 static void
8875 vhci_clean_print(dev_info_t *dev, uint_t level, char *title, uchar_t *data,
8876 int len)
8877 {
8878 int i;
8879 int c;
8880 char *format;
8881 char buf[256];
8882 uchar_t byte;
8883
8884 (void) sprintf(buf, "%s:\n", title);
8885 vhci_log(level, dev, "%s", buf);
8886 level = CE_CONT;
8887 for (i = 0; i < len; ) {
8888 buf[0] = 0;
8889 for (c = 0; c < 8 && i < len; c++, i++) {
8890 byte = (uchar_t)data[i];
8891 if (byte < 0x10)
8892 format = "0x0%x ";
8893 else
8894 format = "0x%x ";
8895 (void) sprintf(&buf[(int)strlen(buf)], format, byte);
8896 }
8897 (void) sprintf(&buf[(int)strlen(buf)], "\n");
8898
8899 vhci_log(level, dev, "%s\n", buf);
8900 }
8901 }
8902 #endif
8903 static void
8904 vhci_invalidate_mpapi_lu(struct scsi_vhci *vhci, scsi_vhci_lun_t *vlun)
8905 {
8906 char *svl_wwn;
8907 mpapi_item_list_t *ilist;
8908 mpapi_lu_data_t *ld;
8909
8910 if (vlun == NULL) {
8911 return;
8912 } else {
8913 svl_wwn = vlun->svl_lun_wwn;
8914 }
8915
8916 ilist = vhci->mp_priv->obj_hdr_list[MP_OBJECT_TYPE_MULTIPATH_LU]->head;
8917
8918 while (ilist != NULL) {
8919 ld = (mpapi_lu_data_t *)(ilist->item->idata);
8920 if ((ld != NULL) && (strncmp(ld->prop.name, svl_wwn,
8921 strlen(svl_wwn)) == 0)) {
8922 ld->valid = 0;
8923 VHCI_DEBUG(6, (CE_WARN, NULL,
8924 "vhci_invalidate_mpapi_lu: "
8925 "Invalidated LU(%s)", svl_wwn));
8926 return;
8927 }
8928 ilist = ilist->next;
8929 }
8930 VHCI_DEBUG(6, (CE_WARN, NULL, "vhci_invalidate_mpapi_lu: "
8931 "Could not find LU(%s) to invalidate.", svl_wwn));
8932 }